Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'oprofile/ring_buffer' into oprofile/oprofile-for-tip

+575 -474
+2
arch/Kconfig
··· 6 6 tristate "OProfile system profiling (EXPERIMENTAL)" 7 7 depends on PROFILING 8 8 depends on HAVE_OPROFILE 9 + select TRACING 10 + select RING_BUFFER 9 11 help 10 12 OProfile is a profiling system capable of profiling the 11 13 whole system, include the kernel, kernel modules, libraries,
+90 -132
arch/x86/oprofile/op_model_amd.c
··· 2 2 * @file op_model_amd.c 3 3 * athlon / K7 / K8 / Family 10h model-specific MSR operations 4 4 * 5 - * @remark Copyright 2002-2008 OProfile authors 5 + * @remark Copyright 2002-2009 OProfile authors 6 6 * @remark Read the file COPYING 7 7 * 8 8 * @author John Levon ··· 10 10 * @author Graydon Hoare 11 11 * @author Robert Richter <robert.richter@amd.com> 12 12 * @author Barry Kasindorf 13 - */ 13 + */ 14 14 15 15 #include <linux/oprofile.h> 16 16 #include <linux/device.h> ··· 60 60 #define IBS_OP_LOW_VALID_BIT (1ULL<<18) /* bit 18 */ 61 61 #define IBS_OP_LOW_ENABLE (1ULL<<17) /* bit 17 */ 62 62 63 - /* Codes used in cpu_buffer.c */ 64 - /* This produces duplicate code, need to be fixed */ 65 - #define IBS_FETCH_BEGIN 3 66 - #define IBS_OP_BEGIN 4 63 + #define IBS_FETCH_SIZE 6 64 + #define IBS_OP_SIZE 12 67 65 68 - /* The function interface needs to be fixed, something like add 69 - data. Should then be added to linux/oprofile.h. */ 70 - extern void 71 - oprofile_add_ibs_sample(struct pt_regs *const regs, 72 - unsigned int *const ibs_sample, int ibs_code); 73 - 74 - struct ibs_fetch_sample { 75 - /* MSRC001_1031 IBS Fetch Linear Address Register */ 76 - unsigned int ibs_fetch_lin_addr_low; 77 - unsigned int ibs_fetch_lin_addr_high; 78 - /* MSRC001_1030 IBS Fetch Control Register */ 79 - unsigned int ibs_fetch_ctl_low; 80 - unsigned int ibs_fetch_ctl_high; 81 - /* MSRC001_1032 IBS Fetch Physical Address Register */ 82 - unsigned int ibs_fetch_phys_addr_low; 83 - unsigned int ibs_fetch_phys_addr_high; 84 - }; 85 - 86 - struct ibs_op_sample { 87 - /* MSRC001_1034 IBS Op Logical Address Register (IbsRIP) */ 88 - unsigned int ibs_op_rip_low; 89 - unsigned int ibs_op_rip_high; 90 - /* MSRC001_1035 IBS Op Data Register */ 91 - unsigned int ibs_op_data1_low; 92 - unsigned int ibs_op_data1_high; 93 - /* MSRC001_1036 IBS Op Data 2 Register */ 94 - unsigned int ibs_op_data2_low; 95 - unsigned int ibs_op_data2_high; 96 - /* MSRC001_1037 IBS Op Data 3 Register */ 97 - unsigned int ibs_op_data3_low; 98 - unsigned int ibs_op_data3_high; 99 - /* MSRC001_1038 IBS DC Linear Address Register (IbsDcLinAd) */ 100 - unsigned int ibs_dc_linear_low; 101 - unsigned int ibs_dc_linear_high; 102 - /* MSRC001_1039 IBS DC Physical Address Register (IbsDcPhysAd) */ 103 - unsigned int ibs_dc_phys_low; 104 - unsigned int ibs_dc_phys_high; 105 - }; 106 - 107 - /* 108 - * unitialize the APIC for the IBS interrupts if needed on AMD Family10h+ 109 - */ 110 - static void clear_ibs_nmi(void); 111 - 112 - static int ibs_allowed; /* AMD Family10h and later */ 66 + static int has_ibs; /* AMD Family10h and later */ 113 67 114 68 struct op_ibs_config { 115 69 unsigned long op_enabled; ··· 154 200 op_amd_handle_ibs(struct pt_regs * const regs, 155 201 struct op_msrs const * const msrs) 156 202 { 157 - unsigned int low, high; 158 - struct ibs_fetch_sample ibs_fetch; 159 - struct ibs_op_sample ibs_op; 203 + u32 low, high; 204 + u64 msr; 205 + struct op_entry entry; 160 206 161 - if (!ibs_allowed) 207 + if (!has_ibs) 162 208 return 1; 163 209 164 210 if (ibs_config.fetch_enabled) { 165 211 rdmsr(MSR_AMD64_IBSFETCHCTL, low, high); 166 212 if (high & IBS_FETCH_HIGH_VALID_BIT) { 167 - ibs_fetch.ibs_fetch_ctl_high = high; 168 - ibs_fetch.ibs_fetch_ctl_low = low; 169 - rdmsr(MSR_AMD64_IBSFETCHLINAD, low, high); 170 - ibs_fetch.ibs_fetch_lin_addr_high = high; 171 - ibs_fetch.ibs_fetch_lin_addr_low = low; 172 - rdmsr(MSR_AMD64_IBSFETCHPHYSAD, low, high); 173 - ibs_fetch.ibs_fetch_phys_addr_high = high; 174 - ibs_fetch.ibs_fetch_phys_addr_low = low; 213 + rdmsrl(MSR_AMD64_IBSFETCHLINAD, msr); 214 + oprofile_write_reserve(&entry, regs, msr, 215 + IBS_FETCH_CODE, IBS_FETCH_SIZE); 216 + oprofile_add_data(&entry, (u32)msr); 217 + oprofile_add_data(&entry, (u32)(msr >> 32)); 218 + oprofile_add_data(&entry, low); 219 + oprofile_add_data(&entry, high); 220 + rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, msr); 221 + oprofile_add_data(&entry, (u32)msr); 222 + oprofile_add_data(&entry, (u32)(msr >> 32)); 223 + oprofile_write_commit(&entry); 175 224 176 - oprofile_add_ibs_sample(regs, 177 - (unsigned int *)&ibs_fetch, 178 - IBS_FETCH_BEGIN); 179 - 180 - /*reenable the IRQ */ 181 - rdmsr(MSR_AMD64_IBSFETCHCTL, low, high); 225 + /* reenable the IRQ */ 182 226 high &= ~IBS_FETCH_HIGH_VALID_BIT; 183 227 high |= IBS_FETCH_HIGH_ENABLE; 184 228 low &= IBS_FETCH_LOW_MAX_CNT_MASK; ··· 187 235 if (ibs_config.op_enabled) { 188 236 rdmsr(MSR_AMD64_IBSOPCTL, low, high); 189 237 if (low & IBS_OP_LOW_VALID_BIT) { 190 - rdmsr(MSR_AMD64_IBSOPRIP, low, high); 191 - ibs_op.ibs_op_rip_low = low; 192 - ibs_op.ibs_op_rip_high = high; 193 - rdmsr(MSR_AMD64_IBSOPDATA, low, high); 194 - ibs_op.ibs_op_data1_low = low; 195 - ibs_op.ibs_op_data1_high = high; 196 - rdmsr(MSR_AMD64_IBSOPDATA2, low, high); 197 - ibs_op.ibs_op_data2_low = low; 198 - ibs_op.ibs_op_data2_high = high; 199 - rdmsr(MSR_AMD64_IBSOPDATA3, low, high); 200 - ibs_op.ibs_op_data3_low = low; 201 - ibs_op.ibs_op_data3_high = high; 202 - rdmsr(MSR_AMD64_IBSDCLINAD, low, high); 203 - ibs_op.ibs_dc_linear_low = low; 204 - ibs_op.ibs_dc_linear_high = high; 205 - rdmsr(MSR_AMD64_IBSDCPHYSAD, low, high); 206 - ibs_op.ibs_dc_phys_low = low; 207 - ibs_op.ibs_dc_phys_high = high; 238 + rdmsrl(MSR_AMD64_IBSOPRIP, msr); 239 + oprofile_write_reserve(&entry, regs, msr, 240 + IBS_OP_CODE, IBS_OP_SIZE); 241 + oprofile_add_data(&entry, (u32)msr); 242 + oprofile_add_data(&entry, (u32)(msr >> 32)); 243 + rdmsrl(MSR_AMD64_IBSOPDATA, msr); 244 + oprofile_add_data(&entry, (u32)msr); 245 + oprofile_add_data(&entry, (u32)(msr >> 32)); 246 + rdmsrl(MSR_AMD64_IBSOPDATA2, msr); 247 + oprofile_add_data(&entry, (u32)msr); 248 + oprofile_add_data(&entry, (u32)(msr >> 32)); 249 + rdmsrl(MSR_AMD64_IBSOPDATA3, msr); 250 + oprofile_add_data(&entry, (u32)msr); 251 + oprofile_add_data(&entry, (u32)(msr >> 32)); 252 + rdmsrl(MSR_AMD64_IBSDCLINAD, msr); 253 + oprofile_add_data(&entry, (u32)msr); 254 + oprofile_add_data(&entry, (u32)(msr >> 32)); 255 + rdmsrl(MSR_AMD64_IBSDCPHYSAD, msr); 256 + oprofile_add_data(&entry, (u32)msr); 257 + oprofile_add_data(&entry, (u32)(msr >> 32)); 258 + oprofile_write_commit(&entry); 208 259 209 260 /* reenable the IRQ */ 210 - oprofile_add_ibs_sample(regs, 211 - (unsigned int *)&ibs_op, 212 - IBS_OP_BEGIN); 213 - rdmsr(MSR_AMD64_IBSOPCTL, low, high); 214 261 high = 0; 215 262 low &= ~IBS_OP_LOW_VALID_BIT; 216 263 low |= IBS_OP_LOW_ENABLE; ··· 259 308 } 260 309 261 310 #ifdef CONFIG_OPROFILE_IBS 262 - if (ibs_allowed && ibs_config.fetch_enabled) { 311 + if (has_ibs && ibs_config.fetch_enabled) { 263 312 low = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; 264 313 high = ((ibs_config.rand_en & 0x1) << 25) /* bit 57 */ 265 314 + IBS_FETCH_HIGH_ENABLE; 266 315 wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); 267 316 } 268 317 269 - if (ibs_allowed && ibs_config.op_enabled) { 318 + if (has_ibs && ibs_config.op_enabled) { 270 319 low = ((ibs_config.max_cnt_op >> 4) & 0xFFFF) 271 320 + ((ibs_config.dispatched_ops & 0x1) << 19) /* bit 19 */ 272 321 + IBS_OP_LOW_ENABLE; ··· 282 331 unsigned int low, high; 283 332 int i; 284 333 285 - /* Subtle: stop on all counters to avoid race with 286 - * setting our pm callback */ 334 + /* 335 + * Subtle: stop on all counters to avoid race with setting our 336 + * pm callback 337 + */ 287 338 for (i = 0 ; i < NUM_COUNTERS ; ++i) { 288 339 if (!reset_value[i]) 289 340 continue; ··· 295 342 } 296 343 297 344 #ifdef CONFIG_OPROFILE_IBS 298 - if (ibs_allowed && ibs_config.fetch_enabled) { 299 - low = 0; /* clear max count and enable */ 345 + if (has_ibs && ibs_config.fetch_enabled) { 346 + /* clear max count and enable */ 347 + low = 0; 300 348 high = 0; 301 349 wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); 302 350 } 303 351 304 - if (ibs_allowed && ibs_config.op_enabled) { 305 - low = 0; /* clear max count and enable */ 352 + if (has_ibs && ibs_config.op_enabled) { 353 + /* clear max count and enable */ 354 + low = 0; 306 355 high = 0; 307 356 wrmsr(MSR_AMD64_IBSOPCTL, low, high); 308 357 } ··· 325 370 } 326 371 } 327 372 328 - #ifndef CONFIG_OPROFILE_IBS 329 - 330 - /* no IBS support */ 331 - 332 - static int op_amd_init(struct oprofile_operations *ops) 333 - { 334 - return 0; 335 - } 336 - 337 - static void op_amd_exit(void) {} 338 - 339 - #else 373 + #ifdef CONFIG_OPROFILE_IBS 340 374 341 375 static u8 ibs_eilvt_off; 342 376 ··· 339 395 setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_FIX, 1); 340 396 } 341 397 342 - static int pfm_amd64_setup_eilvt(void) 398 + static int init_ibs_nmi(void) 343 399 { 344 400 #define IBSCTL_LVTOFFSETVAL (1 << 8) 345 401 #define IBSCTL 0x1cc ··· 363 419 | IBSCTL_LVTOFFSETVAL); 364 420 pci_read_config_dword(cpu_cfg, IBSCTL, &value); 365 421 if (value != (ibs_eilvt_off | IBSCTL_LVTOFFSETVAL)) { 422 + pci_dev_put(cpu_cfg); 366 423 printk(KERN_DEBUG "Failed to setup IBS LVT offset, " 367 424 "IBSCTL = 0x%08x", value); 368 425 return 1; ··· 388 443 return 0; 389 444 } 390 445 391 - /* 392 - * initialize the APIC for the IBS interrupts 393 - * if available (AMD Family10h rev B0 and later) 394 - */ 395 - static void setup_ibs(void) 446 + /* uninitialize the APIC for the IBS interrupts if needed */ 447 + static void clear_ibs_nmi(void) 396 448 { 397 - ibs_allowed = boot_cpu_has(X86_FEATURE_IBS); 449 + if (has_ibs) 450 + on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1); 451 + } 398 452 399 - if (!ibs_allowed) 453 + /* initialize the APIC for the IBS interrupts if available */ 454 + static void ibs_init(void) 455 + { 456 + has_ibs = boot_cpu_has(X86_FEATURE_IBS); 457 + 458 + if (!has_ibs) 400 459 return; 401 460 402 - if (pfm_amd64_setup_eilvt()) { 403 - ibs_allowed = 0; 461 + if (init_ibs_nmi()) { 462 + has_ibs = 0; 404 463 return; 405 464 } 406 465 407 466 printk(KERN_INFO "oprofile: AMD IBS detected\n"); 408 467 } 409 468 410 - 411 - /* 412 - * unitialize the APIC for the IBS interrupts if needed on AMD Family10h 413 - * rev B0 and later */ 414 - static void clear_ibs_nmi(void) 469 + static void ibs_exit(void) 415 470 { 416 - if (ibs_allowed) 417 - on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1); 471 + if (!has_ibs) 472 + return; 473 + 474 + clear_ibs_nmi(); 418 475 } 419 476 420 477 static int (*create_arch_files)(struct super_block *sb, struct dentry *root); ··· 433 486 if (ret) 434 487 return ret; 435 488 436 - if (!ibs_allowed) 489 + if (!has_ibs) 437 490 return ret; 438 491 439 492 /* model specific files */ ··· 466 519 467 520 static int op_amd_init(struct oprofile_operations *ops) 468 521 { 469 - setup_ibs(); 522 + ibs_init(); 470 523 create_arch_files = ops->create_files; 471 524 ops->create_files = setup_ibs_files; 472 525 return 0; ··· 474 527 475 528 static void op_amd_exit(void) 476 529 { 477 - clear_ibs_nmi(); 530 + ibs_exit(); 478 531 } 479 532 480 - #endif 533 + #else 534 + 535 + /* no IBS support */ 536 + 537 + static int op_amd_init(struct oprofile_operations *ops) 538 + { 539 + return 0; 540 + } 541 + 542 + static void op_amd_exit(void) {} 543 + 544 + #endif /* CONFIG_OPROFILE_IBS */ 481 545 482 546 struct op_x86_model_spec const op_amd_spec = { 483 547 .init = op_amd_init,
+82 -149
drivers/oprofile/buffer_sync.c
··· 1 1 /** 2 2 * @file buffer_sync.c 3 3 * 4 - * @remark Copyright 2002 OProfile authors 4 + * @remark Copyright 2002-2009 OProfile authors 5 5 * @remark Read the file COPYING 6 6 * 7 7 * @author John Levon <levon@movementarian.org> 8 8 * @author Barry Kasindorf 9 + * @author Robert Richter <robert.richter@amd.com> 9 10 * 10 11 * This is the core of the buffer management. Each 11 12 * CPU buffer is processed and entered into the ··· 269 268 return cookie; 270 269 } 271 270 272 - static void increment_tail(struct oprofile_cpu_buffer *b) 273 - { 274 - unsigned long new_tail = b->tail_pos + 1; 275 - 276 - rmb(); /* be sure fifo pointers are synchromized */ 277 - 278 - if (new_tail < b->buffer_size) 279 - b->tail_pos = new_tail; 280 - else 281 - b->tail_pos = 0; 282 - } 283 - 284 271 static unsigned long last_cookie = INVALID_COOKIE; 285 272 286 273 static void add_cpu_switch(int i) ··· 316 327 add_event_entry(TRACE_BEGIN_CODE); 317 328 } 318 329 319 - #ifdef CONFIG_OPROFILE_IBS 320 - 321 - #define IBS_FETCH_CODE_SIZE 2 322 - #define IBS_OP_CODE_SIZE 5 323 - #define IBS_EIP(offset) \ 324 - (((struct op_sample *)&cpu_buf->buffer[(offset)])->eip) 325 - #define IBS_EVENT(offset) \ 326 - (((struct op_sample *)&cpu_buf->buffer[(offset)])->event) 327 - 328 - /* 329 - * Add IBS fetch and op entries to event buffer 330 - */ 331 - static void add_ibs_begin(struct oprofile_cpu_buffer *cpu_buf, int code, 332 - struct mm_struct *mm) 330 + static void add_data(struct op_entry *entry, struct mm_struct *mm) 333 331 { 334 - unsigned long rip; 335 - int i, count; 336 - unsigned long ibs_cookie = 0; 332 + unsigned long code, pc, val; 333 + unsigned long cookie; 337 334 off_t offset; 338 335 339 - increment_tail(cpu_buf); /* move to RIP entry */ 340 - 341 - rip = IBS_EIP(cpu_buf->tail_pos); 342 - 343 - #ifdef __LP64__ 344 - rip += IBS_EVENT(cpu_buf->tail_pos) << 32; 345 - #endif 336 + if (!op_cpu_buffer_get_data(entry, &code)) 337 + return; 338 + if (!op_cpu_buffer_get_data(entry, &pc)) 339 + return; 340 + if (!op_cpu_buffer_get_size(entry)) 341 + return; 346 342 347 343 if (mm) { 348 - ibs_cookie = lookup_dcookie(mm, rip, &offset); 344 + cookie = lookup_dcookie(mm, pc, &offset); 349 345 350 - if (ibs_cookie == NO_COOKIE) 351 - offset = rip; 352 - if (ibs_cookie == INVALID_COOKIE) { 346 + if (cookie == NO_COOKIE) 347 + offset = pc; 348 + if (cookie == INVALID_COOKIE) { 353 349 atomic_inc(&oprofile_stats.sample_lost_no_mapping); 354 - offset = rip; 350 + offset = pc; 355 351 } 356 - if (ibs_cookie != last_cookie) { 357 - add_cookie_switch(ibs_cookie); 358 - last_cookie = ibs_cookie; 352 + if (cookie != last_cookie) { 353 + add_cookie_switch(cookie); 354 + last_cookie = cookie; 359 355 } 360 356 } else 361 - offset = rip; 357 + offset = pc; 362 358 363 359 add_event_entry(ESCAPE_CODE); 364 360 add_event_entry(code); 365 361 add_event_entry(offset); /* Offset from Dcookie */ 366 362 367 - /* we send the Dcookie offset, but send the raw Linear Add also*/ 368 - add_event_entry(IBS_EIP(cpu_buf->tail_pos)); 369 - add_event_entry(IBS_EVENT(cpu_buf->tail_pos)); 370 - 371 - if (code == IBS_FETCH_CODE) 372 - count = IBS_FETCH_CODE_SIZE; /*IBS FETCH is 2 int64s*/ 373 - else 374 - count = IBS_OP_CODE_SIZE; /*IBS OP is 5 int64s*/ 375 - 376 - for (i = 0; i < count; i++) { 377 - increment_tail(cpu_buf); 378 - add_event_entry(IBS_EIP(cpu_buf->tail_pos)); 379 - add_event_entry(IBS_EVENT(cpu_buf->tail_pos)); 380 - } 363 + while (op_cpu_buffer_get_data(entry, &val)) 364 + add_event_entry(val); 381 365 } 382 366 383 - #endif 384 - 385 - static void add_sample_entry(unsigned long offset, unsigned long event) 367 + static inline void add_sample_entry(unsigned long offset, unsigned long event) 386 368 { 387 369 add_event_entry(offset); 388 370 add_event_entry(event); 389 371 } 390 372 391 373 392 - static int add_us_sample(struct mm_struct *mm, struct op_sample *s) 374 + /* 375 + * Add a sample to the global event buffer. If possible the 376 + * sample is converted into a persistent dentry/offset pair 377 + * for later lookup from userspace. Return 0 on failure. 378 + */ 379 + static int 380 + add_sample(struct mm_struct *mm, struct op_sample *s, int in_kernel) 393 381 { 394 382 unsigned long cookie; 395 383 off_t offset; 384 + 385 + if (in_kernel) { 386 + add_sample_entry(s->eip, s->event); 387 + return 1; 388 + } 389 + 390 + /* add userspace sample */ 391 + 392 + if (!mm) { 393 + atomic_inc(&oprofile_stats.sample_lost_no_mm); 394 + return 0; 395 + } 396 396 397 397 cookie = lookup_dcookie(mm, s->eip, &offset); 398 398 ··· 398 420 add_sample_entry(offset, s->event); 399 421 400 422 return 1; 401 - } 402 - 403 - 404 - /* Add a sample to the global event buffer. If possible the 405 - * sample is converted into a persistent dentry/offset pair 406 - * for later lookup from userspace. 407 - */ 408 - static int 409 - add_sample(struct mm_struct *mm, struct op_sample *s, int in_kernel) 410 - { 411 - if (in_kernel) { 412 - add_sample_entry(s->eip, s->event); 413 - return 1; 414 - } else if (mm) { 415 - return add_us_sample(mm, s); 416 - } else { 417 - atomic_inc(&oprofile_stats.sample_lost_no_mm); 418 - } 419 - return 0; 420 423 } 421 424 422 425 ··· 422 463 static inline int is_code(unsigned long val) 423 464 { 424 465 return val == ESCAPE_CODE; 425 - } 426 - 427 - 428 - /* "acquire" as many cpu buffer slots as we can */ 429 - static unsigned long get_slots(struct oprofile_cpu_buffer *b) 430 - { 431 - unsigned long head = b->head_pos; 432 - unsigned long tail = b->tail_pos; 433 - 434 - /* 435 - * Subtle. This resets the persistent last_task 436 - * and in_kernel values used for switching notes. 437 - * BUT, there is a small window between reading 438 - * head_pos, and this call, that means samples 439 - * can appear at the new head position, but not 440 - * be prefixed with the notes for switching 441 - * kernel mode or a task switch. This small hole 442 - * can lead to mis-attribution or samples where 443 - * we don't know if it's in the kernel or not, 444 - * at the start of an event buffer. 445 - */ 446 - cpu_buffer_reset(b); 447 - 448 - if (head >= tail) 449 - return head - tail; 450 - 451 - return head + (b->buffer_size - tail); 452 466 } 453 467 454 468 ··· 491 559 */ 492 560 void sync_buffer(int cpu) 493 561 { 494 - struct oprofile_cpu_buffer *cpu_buf = &per_cpu(cpu_buffer, cpu); 495 562 struct mm_struct *mm = NULL; 563 + struct mm_struct *oldmm; 564 + unsigned long val; 496 565 struct task_struct *new; 497 566 unsigned long cookie = 0; 498 567 int in_kernel = 1; 499 568 sync_buffer_state state = sb_buffer_start; 500 - #ifndef CONFIG_OPROFILE_IBS 501 569 unsigned int i; 502 570 unsigned long available; 503 - #endif 571 + unsigned long flags; 572 + struct op_entry entry; 573 + struct op_sample *sample; 504 574 505 575 mutex_lock(&buffer_mutex); 506 576 507 577 add_cpu_switch(cpu); 508 578 509 - /* Remember, only we can modify tail_pos */ 510 - 511 - #ifndef CONFIG_OPROFILE_IBS 512 - available = get_slots(cpu_buf); 579 + op_cpu_buffer_reset(cpu); 580 + available = op_cpu_buffer_entries(cpu); 513 581 514 582 for (i = 0; i < available; ++i) { 515 - #else 516 - while (get_slots(cpu_buf)) { 517 - #endif 518 - struct op_sample *s = &cpu_buf->buffer[cpu_buf->tail_pos]; 583 + sample = op_cpu_buffer_read_entry(&entry, cpu); 584 + if (!sample) 585 + break; 519 586 520 - if (is_code(s->eip)) { 521 - if (s->event <= CPU_IS_KERNEL) { 522 - /* kernel/userspace switch */ 523 - in_kernel = s->event; 524 - if (state == sb_buffer_start) 525 - state = sb_sample_start; 526 - add_kernel_ctx_switch(s->event); 527 - } else if (s->event == CPU_TRACE_BEGIN) { 587 + if (is_code(sample->eip)) { 588 + flags = sample->event; 589 + if (flags & TRACE_BEGIN) { 528 590 state = sb_bt_start; 529 591 add_trace_begin(); 530 - #ifdef CONFIG_OPROFILE_IBS 531 - } else if (s->event == IBS_FETCH_BEGIN) { 532 - state = sb_bt_start; 533 - add_ibs_begin(cpu_buf, IBS_FETCH_CODE, mm); 534 - } else if (s->event == IBS_OP_BEGIN) { 535 - state = sb_bt_start; 536 - add_ibs_begin(cpu_buf, IBS_OP_CODE, mm); 537 - #endif 538 - } else { 539 - struct mm_struct *oldmm = mm; 540 - 592 + } 593 + if (flags & KERNEL_CTX_SWITCH) { 594 + /* kernel/userspace switch */ 595 + in_kernel = flags & IS_KERNEL; 596 + if (state == sb_buffer_start) 597 + state = sb_sample_start; 598 + add_kernel_ctx_switch(flags & IS_KERNEL); 599 + } 600 + if (flags & USER_CTX_SWITCH 601 + && op_cpu_buffer_get_data(&entry, &val)) { 541 602 /* userspace context switch */ 542 - new = (struct task_struct *)s->event; 543 - 603 + new = (struct task_struct *)val; 604 + oldmm = mm; 544 605 release_mm(oldmm); 545 606 mm = take_tasks_mm(new); 546 607 if (mm != oldmm) 547 608 cookie = get_exec_dcookie(mm); 548 609 add_user_ctx_switch(new, cookie); 549 610 } 550 - } else if (state >= sb_bt_start && 551 - !add_sample(mm, s, in_kernel)) { 552 - if (state == sb_bt_start) { 553 - state = sb_bt_ignore; 554 - atomic_inc(&oprofile_stats.bt_lost_no_mapping); 555 - } 611 + if (op_cpu_buffer_get_size(&entry)) 612 + add_data(&entry, mm); 613 + continue; 556 614 } 557 615 558 - increment_tail(cpu_buf); 616 + if (state < sb_bt_start) 617 + /* ignore sample */ 618 + continue; 619 + 620 + if (add_sample(mm, sample, in_kernel)) 621 + continue; 622 + 623 + /* ignore backtraces if failed to add a sample */ 624 + if (state == sb_bt_start) { 625 + state = sb_bt_ignore; 626 + atomic_inc(&oprofile_stats.bt_lost_no_mapping); 627 + } 559 628 } 560 629 release_mm(mm); 561 630
+249 -162
drivers/oprofile/cpu_buffer.c
··· 1 1 /** 2 2 * @file cpu_buffer.c 3 3 * 4 - * @remark Copyright 2002 OProfile authors 4 + * @remark Copyright 2002-2009 OProfile authors 5 5 * @remark Read the file COPYING 6 6 * 7 7 * @author John Levon <levon@movementarian.org> 8 8 * @author Barry Kasindorf <barry.kasindorf@amd.com> 9 + * @author Robert Richter <robert.richter@amd.com> 9 10 * 10 11 * Each CPU has a local buffer that stores PC value/event 11 12 * pairs. We also log context switches when we notice them. ··· 29 28 #include "buffer_sync.h" 30 29 #include "oprof.h" 31 30 31 + #define OP_BUFFER_FLAGS 0 32 + 33 + /* 34 + * Read and write access is using spin locking. Thus, writing to the 35 + * buffer by NMI handler (x86) could occur also during critical 36 + * sections when reading the buffer. To avoid this, there are 2 37 + * buffers for independent read and write access. Read access is in 38 + * process context only, write access only in the NMI handler. If the 39 + * read buffer runs empty, both buffers are swapped atomically. There 40 + * is potentially a small window during swapping where the buffers are 41 + * disabled and samples could be lost. 42 + * 43 + * Using 2 buffers is a little bit overhead, but the solution is clear 44 + * and does not require changes in the ring buffer implementation. It 45 + * can be changed to a single buffer solution when the ring buffer 46 + * access is implemented as non-locking atomic code. 47 + */ 48 + static struct ring_buffer *op_ring_buffer_read; 49 + static struct ring_buffer *op_ring_buffer_write; 32 50 DEFINE_PER_CPU(struct oprofile_cpu_buffer, cpu_buffer); 33 51 34 52 static void wq_sync_buffer(struct work_struct *work); ··· 55 35 #define DEFAULT_TIMER_EXPIRE (HZ / 10) 56 36 static int work_enabled; 57 37 58 - void free_cpu_buffers(void) 59 - { 60 - int i; 61 - 62 - for_each_possible_cpu(i) { 63 - vfree(per_cpu(cpu_buffer, i).buffer); 64 - per_cpu(cpu_buffer, i).buffer = NULL; 65 - } 66 - } 67 - 68 38 unsigned long oprofile_get_cpu_buffer_size(void) 69 39 { 70 - return fs_cpu_buffer_size; 40 + return oprofile_cpu_buffer_size; 71 41 } 72 42 73 43 void oprofile_cpu_buffer_inc_smpl_lost(void) ··· 68 58 cpu_buf->sample_lost_overflow++; 69 59 } 70 60 61 + void free_cpu_buffers(void) 62 + { 63 + if (op_ring_buffer_read) 64 + ring_buffer_free(op_ring_buffer_read); 65 + op_ring_buffer_read = NULL; 66 + if (op_ring_buffer_write) 67 + ring_buffer_free(op_ring_buffer_write); 68 + op_ring_buffer_write = NULL; 69 + } 70 + 71 71 int alloc_cpu_buffers(void) 72 72 { 73 73 int i; 74 74 75 - unsigned long buffer_size = fs_cpu_buffer_size; 75 + unsigned long buffer_size = oprofile_cpu_buffer_size; 76 + 77 + op_ring_buffer_read = ring_buffer_alloc(buffer_size, OP_BUFFER_FLAGS); 78 + if (!op_ring_buffer_read) 79 + goto fail; 80 + op_ring_buffer_write = ring_buffer_alloc(buffer_size, OP_BUFFER_FLAGS); 81 + if (!op_ring_buffer_write) 82 + goto fail; 76 83 77 84 for_each_possible_cpu(i) { 78 85 struct oprofile_cpu_buffer *b = &per_cpu(cpu_buffer, i); 79 - 80 - b->buffer = vmalloc_node(sizeof(struct op_sample) * buffer_size, 81 - cpu_to_node(i)); 82 - if (!b->buffer) 83 - goto fail; 84 86 85 87 b->last_task = NULL; 86 88 b->last_is_kernel = -1; 87 89 b->tracing = 0; 88 90 b->buffer_size = buffer_size; 89 - b->tail_pos = 0; 90 - b->head_pos = 0; 91 91 b->sample_received = 0; 92 92 b->sample_lost_overflow = 0; 93 93 b->backtrace_aborted = 0; ··· 144 124 flush_scheduled_work(); 145 125 } 146 126 147 - /* Resets the cpu buffer to a sane state. */ 148 - void cpu_buffer_reset(struct oprofile_cpu_buffer *cpu_buf) 127 + /* 128 + * This function prepares the cpu buffer to write a sample. 129 + * 130 + * Struct op_entry is used during operations on the ring buffer while 131 + * struct op_sample contains the data that is stored in the ring 132 + * buffer. Struct entry can be uninitialized. The function reserves a 133 + * data array that is specified by size. Use 134 + * op_cpu_buffer_write_commit() after preparing the sample. In case of 135 + * errors a null pointer is returned, otherwise the pointer to the 136 + * sample. 137 + * 138 + */ 139 + struct op_sample 140 + *op_cpu_buffer_write_reserve(struct op_entry *entry, unsigned long size) 149 141 { 150 - /* reset these to invalid values; the next sample 151 - * collected will populate the buffer with proper 152 - * values to initialize the buffer 153 - */ 154 - cpu_buf->last_is_kernel = -1; 155 - cpu_buf->last_task = NULL; 156 - } 157 - 158 - /* compute number of available slots in cpu_buffer queue */ 159 - static unsigned long nr_available_slots(struct oprofile_cpu_buffer const *b) 160 - { 161 - unsigned long head = b->head_pos; 162 - unsigned long tail = b->tail_pos; 163 - 164 - if (tail > head) 165 - return (tail - head) - 1; 166 - 167 - return tail + (b->buffer_size - head) - 1; 168 - } 169 - 170 - static void increment_head(struct oprofile_cpu_buffer *b) 171 - { 172 - unsigned long new_head = b->head_pos + 1; 173 - 174 - /* Ensure anything written to the slot before we 175 - * increment is visible */ 176 - wmb(); 177 - 178 - if (new_head < b->buffer_size) 179 - b->head_pos = new_head; 142 + entry->event = ring_buffer_lock_reserve 143 + (op_ring_buffer_write, sizeof(struct op_sample) + 144 + size * sizeof(entry->sample->data[0]), &entry->irq_flags); 145 + if (entry->event) 146 + entry->sample = ring_buffer_event_data(entry->event); 180 147 else 181 - b->head_pos = 0; 148 + entry->sample = NULL; 149 + 150 + if (!entry->sample) 151 + return NULL; 152 + 153 + entry->size = size; 154 + entry->data = entry->sample->data; 155 + 156 + return entry->sample; 182 157 } 183 158 184 - static inline void 185 - add_sample(struct oprofile_cpu_buffer *cpu_buf, 186 - unsigned long pc, unsigned long event) 159 + int op_cpu_buffer_write_commit(struct op_entry *entry) 187 160 { 188 - struct op_sample *entry = &cpu_buf->buffer[cpu_buf->head_pos]; 189 - entry->eip = pc; 190 - entry->event = event; 191 - increment_head(cpu_buf); 161 + return ring_buffer_unlock_commit(op_ring_buffer_write, entry->event, 162 + entry->irq_flags); 192 163 } 193 164 194 - static inline void 195 - add_code(struct oprofile_cpu_buffer *buffer, unsigned long value) 165 + struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu) 196 166 { 197 - add_sample(buffer, ESCAPE_CODE, value); 167 + struct ring_buffer_event *e; 168 + e = ring_buffer_consume(op_ring_buffer_read, cpu, NULL); 169 + if (e) 170 + goto event; 171 + if (ring_buffer_swap_cpu(op_ring_buffer_read, 172 + op_ring_buffer_write, 173 + cpu)) 174 + return NULL; 175 + e = ring_buffer_consume(op_ring_buffer_read, cpu, NULL); 176 + if (e) 177 + goto event; 178 + return NULL; 179 + 180 + event: 181 + entry->event = e; 182 + entry->sample = ring_buffer_event_data(e); 183 + entry->size = (ring_buffer_event_length(e) - sizeof(struct op_sample)) 184 + / sizeof(entry->sample->data[0]); 185 + entry->data = entry->sample->data; 186 + return entry->sample; 198 187 } 199 188 200 - /* This must be safe from any context. It's safe writing here 201 - * because of the head/tail separation of the writer and reader 202 - * of the CPU buffer. 189 + unsigned long op_cpu_buffer_entries(int cpu) 190 + { 191 + return ring_buffer_entries_cpu(op_ring_buffer_read, cpu) 192 + + ring_buffer_entries_cpu(op_ring_buffer_write, cpu); 193 + } 194 + 195 + static int 196 + op_add_code(struct oprofile_cpu_buffer *cpu_buf, unsigned long backtrace, 197 + int is_kernel, struct task_struct *task) 198 + { 199 + struct op_entry entry; 200 + struct op_sample *sample; 201 + unsigned long flags; 202 + int size; 203 + 204 + flags = 0; 205 + 206 + if (backtrace) 207 + flags |= TRACE_BEGIN; 208 + 209 + /* notice a switch from user->kernel or vice versa */ 210 + is_kernel = !!is_kernel; 211 + if (cpu_buf->last_is_kernel != is_kernel) { 212 + cpu_buf->last_is_kernel = is_kernel; 213 + flags |= KERNEL_CTX_SWITCH; 214 + if (is_kernel) 215 + flags |= IS_KERNEL; 216 + } 217 + 218 + /* notice a task switch */ 219 + if (cpu_buf->last_task != task) { 220 + cpu_buf->last_task = task; 221 + flags |= USER_CTX_SWITCH; 222 + } 223 + 224 + if (!flags) 225 + /* nothing to do */ 226 + return 0; 227 + 228 + if (flags & USER_CTX_SWITCH) 229 + size = 1; 230 + else 231 + size = 0; 232 + 233 + sample = op_cpu_buffer_write_reserve(&entry, size); 234 + if (!sample) 235 + return -ENOMEM; 236 + 237 + sample->eip = ESCAPE_CODE; 238 + sample->event = flags; 239 + 240 + if (size) 241 + op_cpu_buffer_add_data(&entry, (unsigned long)task); 242 + 243 + op_cpu_buffer_write_commit(&entry); 244 + 245 + return 0; 246 + } 247 + 248 + static inline int 249 + op_add_sample(struct oprofile_cpu_buffer *cpu_buf, 250 + unsigned long pc, unsigned long event) 251 + { 252 + struct op_entry entry; 253 + struct op_sample *sample; 254 + 255 + sample = op_cpu_buffer_write_reserve(&entry, 0); 256 + if (!sample) 257 + return -ENOMEM; 258 + 259 + sample->eip = pc; 260 + sample->event = event; 261 + 262 + return op_cpu_buffer_write_commit(&entry); 263 + } 264 + 265 + /* 266 + * This must be safe from any context. 203 267 * 204 268 * is_kernel is needed because on some architectures you cannot 205 269 * tell if you are in kernel or user space simply by looking at 206 270 * pc. We tag this in the buffer by generating kernel enter/exit 207 271 * events whenever is_kernel changes 208 272 */ 209 - static int log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc, 210 - int is_kernel, unsigned long event) 273 + static int 274 + log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc, 275 + unsigned long backtrace, int is_kernel, unsigned long event) 211 276 { 212 - struct task_struct *task; 213 - 214 277 cpu_buf->sample_received++; 215 278 216 279 if (pc == ESCAPE_CODE) { ··· 301 198 return 0; 302 199 } 303 200 304 - if (nr_available_slots(cpu_buf) < 3) { 305 - cpu_buf->sample_lost_overflow++; 306 - return 0; 307 - } 201 + if (op_add_code(cpu_buf, backtrace, is_kernel, current)) 202 + goto fail; 308 203 309 - is_kernel = !!is_kernel; 204 + if (op_add_sample(cpu_buf, pc, event)) 205 + goto fail; 310 206 311 - task = current; 312 - 313 - /* notice a switch from user->kernel or vice versa */ 314 - if (cpu_buf->last_is_kernel != is_kernel) { 315 - cpu_buf->last_is_kernel = is_kernel; 316 - add_code(cpu_buf, is_kernel); 317 - } 318 - 319 - /* notice a task switch */ 320 - if (cpu_buf->last_task != task) { 321 - cpu_buf->last_task = task; 322 - add_code(cpu_buf, (unsigned long)task); 323 - } 324 - 325 - add_sample(cpu_buf, pc, event); 326 207 return 1; 208 + 209 + fail: 210 + cpu_buf->sample_lost_overflow++; 211 + return 0; 327 212 } 328 213 329 - static int oprofile_begin_trace(struct oprofile_cpu_buffer *cpu_buf) 214 + static inline void oprofile_begin_trace(struct oprofile_cpu_buffer *cpu_buf) 330 215 { 331 - if (nr_available_slots(cpu_buf) < 4) { 332 - cpu_buf->sample_lost_overflow++; 333 - return 0; 334 - } 335 - 336 - add_code(cpu_buf, CPU_TRACE_BEGIN); 337 216 cpu_buf->tracing = 1; 338 - return 1; 339 217 } 340 218 341 - static void oprofile_end_trace(struct oprofile_cpu_buffer *cpu_buf) 219 + static inline void oprofile_end_trace(struct oprofile_cpu_buffer *cpu_buf) 342 220 { 343 221 cpu_buf->tracing = 0; 344 222 } 345 223 346 - void oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs, 347 - unsigned long event, int is_kernel) 224 + static inline void 225 + __oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs, 226 + unsigned long event, int is_kernel) 348 227 { 349 228 struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); 229 + unsigned long backtrace = oprofile_backtrace_depth; 350 230 351 - if (!backtrace_depth) { 352 - log_sample(cpu_buf, pc, is_kernel, event); 231 + /* 232 + * if log_sample() fail we can't backtrace since we lost the 233 + * source of this event 234 + */ 235 + if (!log_sample(cpu_buf, pc, backtrace, is_kernel, event)) 236 + /* failed */ 353 237 return; 354 - } 355 238 356 - if (!oprofile_begin_trace(cpu_buf)) 239 + if (!backtrace) 357 240 return; 358 241 359 - /* if log_sample() fail we can't backtrace since we lost the source 360 - * of this event */ 361 - if (log_sample(cpu_buf, pc, is_kernel, event)) 362 - oprofile_ops.backtrace(regs, backtrace_depth); 242 + oprofile_begin_trace(cpu_buf); 243 + oprofile_ops.backtrace(regs, backtrace); 363 244 oprofile_end_trace(cpu_buf); 245 + } 246 + 247 + void oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs, 248 + unsigned long event, int is_kernel) 249 + { 250 + __oprofile_add_ext_sample(pc, regs, event, is_kernel); 364 251 } 365 252 366 253 void oprofile_add_sample(struct pt_regs * const regs, unsigned long event) ··· 358 265 int is_kernel = !user_mode(regs); 359 266 unsigned long pc = profile_pc(regs); 360 267 361 - oprofile_add_ext_sample(pc, regs, event, is_kernel); 268 + __oprofile_add_ext_sample(pc, regs, event, is_kernel); 362 269 } 363 270 364 - #ifdef CONFIG_OPROFILE_IBS 365 - 366 - #define MAX_IBS_SAMPLE_SIZE 14 367 - 368 - void oprofile_add_ibs_sample(struct pt_regs *const regs, 369 - unsigned int *const ibs_sample, int ibs_code) 271 + /* 272 + * Add samples with data to the ring buffer. 273 + * 274 + * Use oprofile_add_data(&entry, val) to add data and 275 + * oprofile_write_commit(&entry) to commit the sample. 276 + */ 277 + void 278 + oprofile_write_reserve(struct op_entry *entry, struct pt_regs * const regs, 279 + unsigned long pc, int code, int size) 370 280 { 281 + struct op_sample *sample; 371 282 int is_kernel = !user_mode(regs); 372 283 struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); 373 - struct task_struct *task; 374 284 375 285 cpu_buf->sample_received++; 376 286 377 - if (nr_available_slots(cpu_buf) < MAX_IBS_SAMPLE_SIZE) { 378 - /* we can't backtrace since we lost the source of this event */ 379 - cpu_buf->sample_lost_overflow++; 380 - return; 381 - } 287 + /* no backtraces for samples with data */ 288 + if (op_add_code(cpu_buf, 0, is_kernel, current)) 289 + goto fail; 382 290 383 - /* notice a switch from user->kernel or vice versa */ 384 - if (cpu_buf->last_is_kernel != is_kernel) { 385 - cpu_buf->last_is_kernel = is_kernel; 386 - add_code(cpu_buf, is_kernel); 387 - } 291 + sample = op_cpu_buffer_write_reserve(entry, size + 2); 292 + if (!sample) 293 + goto fail; 294 + sample->eip = ESCAPE_CODE; 295 + sample->event = 0; /* no flags */ 388 296 389 - /* notice a task switch */ 390 - if (!is_kernel) { 391 - task = current; 392 - if (cpu_buf->last_task != task) { 393 - cpu_buf->last_task = task; 394 - add_code(cpu_buf, (unsigned long)task); 395 - } 396 - } 297 + op_cpu_buffer_add_data(entry, code); 298 + op_cpu_buffer_add_data(entry, pc); 397 299 398 - add_code(cpu_buf, ibs_code); 399 - add_sample(cpu_buf, ibs_sample[0], ibs_sample[1]); 400 - add_sample(cpu_buf, ibs_sample[2], ibs_sample[3]); 401 - add_sample(cpu_buf, ibs_sample[4], ibs_sample[5]); 300 + return; 402 301 403 - if (ibs_code == IBS_OP_BEGIN) { 404 - add_sample(cpu_buf, ibs_sample[6], ibs_sample[7]); 405 - add_sample(cpu_buf, ibs_sample[8], ibs_sample[9]); 406 - add_sample(cpu_buf, ibs_sample[10], ibs_sample[11]); 407 - } 408 - 409 - if (backtrace_depth) 410 - oprofile_ops.backtrace(regs, backtrace_depth); 302 + fail: 303 + cpu_buf->sample_lost_overflow++; 411 304 } 412 305 413 - #endif 306 + int oprofile_add_data(struct op_entry *entry, unsigned long val) 307 + { 308 + return op_cpu_buffer_add_data(entry, val); 309 + } 310 + 311 + int oprofile_write_commit(struct op_entry *entry) 312 + { 313 + return op_cpu_buffer_write_commit(entry); 314 + } 414 315 415 316 void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event) 416 317 { 417 318 struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); 418 - log_sample(cpu_buf, pc, is_kernel, event); 319 + log_sample(cpu_buf, pc, 0, is_kernel, event); 419 320 } 420 321 421 322 void oprofile_add_trace(unsigned long pc) ··· 419 332 if (!cpu_buf->tracing) 420 333 return; 421 334 422 - if (nr_available_slots(cpu_buf) < 1) { 423 - cpu_buf->tracing = 0; 424 - cpu_buf->sample_lost_overflow++; 425 - return; 426 - } 335 + /* 336 + * broken frame can give an eip with the same value as an 337 + * escape code, abort the trace if we get it 338 + */ 339 + if (pc == ESCAPE_CODE) 340 + goto fail; 427 341 428 - /* broken frame can give an eip with the same value as an escape code, 429 - * abort the trace if we get it */ 430 - if (pc == ESCAPE_CODE) { 431 - cpu_buf->tracing = 0; 432 - cpu_buf->backtrace_aborted++; 433 - return; 434 - } 342 + if (op_add_sample(cpu_buf, pc, 0)) 343 + goto fail; 435 344 436 - add_sample(cpu_buf, pc, 0); 345 + return; 346 + fail: 347 + cpu_buf->tracing = 0; 348 + cpu_buf->backtrace_aborted++; 349 + return; 437 350 } 438 351 439 352 /*
+62 -10
drivers/oprofile/cpu_buffer.h
··· 1 1 /** 2 2 * @file cpu_buffer.h 3 3 * 4 - * @remark Copyright 2002 OProfile authors 4 + * @remark Copyright 2002-2009 OProfile authors 5 5 * @remark Read the file COPYING 6 6 * 7 7 * @author John Levon <levon@movementarian.org> 8 + * @author Robert Richter <robert.richter@amd.com> 8 9 */ 9 10 10 11 #ifndef OPROFILE_CPU_BUFFER_H ··· 16 15 #include <linux/workqueue.h> 17 16 #include <linux/cache.h> 18 17 #include <linux/sched.h> 18 + #include <linux/ring_buffer.h> 19 19 20 20 struct task_struct; 21 21 ··· 32 30 struct op_sample { 33 31 unsigned long eip; 34 32 unsigned long event; 33 + unsigned long data[0]; 35 34 }; 36 35 36 + struct op_entry; 37 + 37 38 struct oprofile_cpu_buffer { 38 - volatile unsigned long head_pos; 39 - volatile unsigned long tail_pos; 40 39 unsigned long buffer_size; 41 40 struct task_struct *last_task; 42 41 int last_is_kernel; 43 42 int tracing; 44 - struct op_sample *buffer; 45 43 unsigned long sample_received; 46 44 unsigned long sample_lost_overflow; 47 45 unsigned long backtrace_aborted; ··· 52 50 53 51 DECLARE_PER_CPU(struct oprofile_cpu_buffer, cpu_buffer); 54 52 55 - void cpu_buffer_reset(struct oprofile_cpu_buffer *cpu_buf); 53 + /* 54 + * Resets the cpu buffer to a sane state. 55 + * 56 + * reset these to invalid values; the next sample collected will 57 + * populate the buffer with proper values to initialize the buffer 58 + */ 59 + static inline void op_cpu_buffer_reset(int cpu) 60 + { 61 + struct oprofile_cpu_buffer *cpu_buf = &per_cpu(cpu_buffer, cpu); 56 62 57 - /* transient events for the CPU buffer -> event buffer */ 58 - #define CPU_IS_KERNEL 1 59 - #define CPU_TRACE_BEGIN 2 60 - #define IBS_FETCH_BEGIN 3 61 - #define IBS_OP_BEGIN 4 63 + cpu_buf->last_is_kernel = -1; 64 + cpu_buf->last_task = NULL; 65 + } 66 + 67 + struct op_sample 68 + *op_cpu_buffer_write_reserve(struct op_entry *entry, unsigned long size); 69 + int op_cpu_buffer_write_commit(struct op_entry *entry); 70 + struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu); 71 + unsigned long op_cpu_buffer_entries(int cpu); 72 + 73 + /* returns the remaining free size of data in the entry */ 74 + static inline 75 + int op_cpu_buffer_add_data(struct op_entry *entry, unsigned long val) 76 + { 77 + if (!entry->size) 78 + return 0; 79 + *entry->data = val; 80 + entry->size--; 81 + entry->data++; 82 + return entry->size; 83 + } 84 + 85 + /* returns the size of data in the entry */ 86 + static inline 87 + int op_cpu_buffer_get_size(struct op_entry *entry) 88 + { 89 + return entry->size; 90 + } 91 + 92 + /* returns 0 if empty or the size of data including the current value */ 93 + static inline 94 + int op_cpu_buffer_get_data(struct op_entry *entry, unsigned long *val) 95 + { 96 + int size = entry->size; 97 + if (!size) 98 + return 0; 99 + *val = *entry->data; 100 + entry->size--; 101 + entry->data++; 102 + return size; 103 + } 104 + 105 + /* extra data flags */ 106 + #define KERNEL_CTX_SWITCH (1UL << 0) 107 + #define IS_KERNEL (1UL << 1) 108 + #define TRACE_BEGIN (1UL << 2) 109 + #define USER_CTX_SWITCH (1UL << 3) 62 110 63 111 #endif /* OPROFILE_CPU_BUFFER_H */
+2 -2
drivers/oprofile/event_buffer.c
··· 73 73 unsigned long flags; 74 74 75 75 spin_lock_irqsave(&oprofilefs_lock, flags); 76 - buffer_size = fs_buffer_size; 77 - buffer_watershed = fs_buffer_watershed; 76 + buffer_size = oprofile_buffer_size; 77 + buffer_watershed = oprofile_buffer_watershed; 78 78 spin_unlock_irqrestore(&oprofilefs_lock, flags); 79 79 80 80 if (buffer_watershed >= buffer_size)
+2 -2
drivers/oprofile/oprof.c
··· 23 23 struct oprofile_operations oprofile_ops; 24 24 25 25 unsigned long oprofile_started; 26 - unsigned long backtrace_depth; 26 + unsigned long oprofile_backtrace_depth; 27 27 static unsigned long is_setup; 28 28 static DEFINE_MUTEX(start_mutex); 29 29 ··· 172 172 goto out; 173 173 } 174 174 175 - backtrace_depth = val; 175 + oprofile_backtrace_depth = val; 176 176 177 177 out: 178 178 mutex_unlock(&start_mutex);
+4 -4
drivers/oprofile/oprof.h
··· 21 21 22 22 struct oprofile_operations; 23 23 24 - extern unsigned long fs_buffer_size; 25 - extern unsigned long fs_cpu_buffer_size; 26 - extern unsigned long fs_buffer_watershed; 24 + extern unsigned long oprofile_buffer_size; 25 + extern unsigned long oprofile_cpu_buffer_size; 26 + extern unsigned long oprofile_buffer_watershed; 27 27 extern struct oprofile_operations oprofile_ops; 28 28 extern unsigned long oprofile_started; 29 - extern unsigned long backtrace_depth; 29 + extern unsigned long oprofile_backtrace_depth; 30 30 31 31 struct super_block; 32 32 struct dentry;
+17 -7
drivers/oprofile/oprofile_files.c
··· 14 14 #include "oprofile_stats.h" 15 15 #include "oprof.h" 16 16 17 - unsigned long fs_buffer_size = 131072; 18 - unsigned long fs_cpu_buffer_size = 8192; 19 - unsigned long fs_buffer_watershed = 32768; /* FIXME: tune */ 17 + #define BUFFER_SIZE_DEFAULT 131072 18 + #define CPU_BUFFER_SIZE_DEFAULT 8192 19 + #define BUFFER_WATERSHED_DEFAULT 32768 /* FIXME: tune */ 20 + 21 + unsigned long oprofile_buffer_size; 22 + unsigned long oprofile_cpu_buffer_size; 23 + unsigned long oprofile_buffer_watershed; 20 24 21 25 static ssize_t depth_read(struct file *file, char __user *buf, size_t count, loff_t *offset) 22 26 { 23 - return oprofilefs_ulong_to_user(backtrace_depth, buf, count, offset); 27 + return oprofilefs_ulong_to_user(oprofile_backtrace_depth, buf, count, 28 + offset); 24 29 } 25 30 26 31 ··· 125 120 126 121 void oprofile_create_files(struct super_block *sb, struct dentry *root) 127 122 { 123 + /* reinitialize default values */ 124 + oprofile_buffer_size = BUFFER_SIZE_DEFAULT; 125 + oprofile_cpu_buffer_size = CPU_BUFFER_SIZE_DEFAULT; 126 + oprofile_buffer_watershed = BUFFER_WATERSHED_DEFAULT; 127 + 128 128 oprofilefs_create_file(sb, root, "enable", &enable_fops); 129 129 oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666); 130 130 oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops); 131 - oprofilefs_create_ulong(sb, root, "buffer_size", &fs_buffer_size); 132 - oprofilefs_create_ulong(sb, root, "buffer_watershed", &fs_buffer_watershed); 133 - oprofilefs_create_ulong(sb, root, "cpu_buffer_size", &fs_cpu_buffer_size); 131 + oprofilefs_create_ulong(sb, root, "buffer_size", &oprofile_buffer_size); 132 + oprofilefs_create_ulong(sb, root, "buffer_watershed", &oprofile_buffer_watershed); 133 + oprofilefs_create_ulong(sb, root, "cpu_buffer_size", &oprofile_cpu_buffer_size); 134 134 oprofilefs_create_file(sb, root, "cpu_type", &cpu_type_fops); 135 135 oprofilefs_create_file(sb, root, "backtrace_depth", &depth_fops); 136 136 oprofilefs_create_file(sb, root, "pointer_size", &pointer_size_fops);
+19 -2
include/linux/oprofile.h
··· 86 86 void oprofile_arch_exit(void); 87 87 88 88 /** 89 - * Add a sample. This may be called from any context. Pass 90 - * smp_processor_id() as cpu. 89 + * Add a sample. This may be called from any context. 91 90 */ 92 91 void oprofile_add_sample(struct pt_regs * const regs, unsigned long event); 93 92 ··· 164 165 unsigned long oprofile_get_cpu_buffer_size(void); 165 166 void oprofile_cpu_buffer_inc_smpl_lost(void); 166 167 168 + /* cpu buffer functions */ 169 + 170 + struct op_sample; 171 + 172 + struct op_entry { 173 + struct ring_buffer_event *event; 174 + struct op_sample *sample; 175 + unsigned long irq_flags; 176 + unsigned long size; 177 + unsigned long *data; 178 + }; 179 + 180 + void oprofile_write_reserve(struct op_entry *entry, 181 + struct pt_regs * const regs, 182 + unsigned long pc, int code, int size); 183 + int oprofile_add_data(struct op_entry *entry, unsigned long val); 184 + int oprofile_write_commit(struct op_entry *entry); 185 + 167 186 #endif /* OPROFILE_H */
+2
include/linux/ring_buffer.h
··· 116 116 117 117 unsigned long ring_buffer_entries(struct ring_buffer *buffer); 118 118 unsigned long ring_buffer_overruns(struct ring_buffer *buffer); 119 + unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu); 120 + unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu); 119 121 120 122 u64 ring_buffer_time_stamp(int cpu); 121 123 void ring_buffer_normalize_time_stamp(int cpu, u64 *ts);
+42 -2
kernel/trace/ring_buffer.c
··· 31 31 { 32 32 ring_buffers_off = 0; 33 33 } 34 + EXPORT_SYMBOL_GPL(tracing_on); 34 35 35 36 /** 36 37 * tracing_off - turn off all tracing buffers ··· 45 44 { 46 45 ring_buffers_off = 1; 47 46 } 47 + EXPORT_SYMBOL_GPL(tracing_off); 48 48 49 49 /* Up this if you want to test the TIME_EXTENTS and normalization */ 50 50 #define DEBUG_SHIFT 0 ··· 62 60 63 61 return time; 64 62 } 63 + EXPORT_SYMBOL_GPL(ring_buffer_time_stamp); 65 64 66 65 void ring_buffer_normalize_time_stamp(int cpu, u64 *ts) 67 66 { 68 67 /* Just stupid testing the normalize function and deltas */ 69 68 *ts >>= DEBUG_SHIFT; 70 69 } 70 + EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp); 71 71 72 72 #define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event)) 73 73 #define RB_ALIGNMENT_SHIFT 2 ··· 117 113 */ 118 114 unsigned ring_buffer_event_length(struct ring_buffer_event *event) 119 115 { 120 - return rb_event_length(event); 116 + unsigned length = rb_event_length(event); 117 + if (event->type != RINGBUF_TYPE_DATA) 118 + return length; 119 + length -= RB_EVNT_HDR_SIZE; 120 + if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0])) 121 + length -= sizeof(event->array[0]); 122 + return length; 121 123 } 124 + EXPORT_SYMBOL_GPL(ring_buffer_event_length); 122 125 123 126 /* inline for ring buffer fast paths */ 124 127 static inline void * ··· 147 136 { 148 137 return rb_event_data(event); 149 138 } 139 + EXPORT_SYMBOL_GPL(ring_buffer_event_data); 150 140 151 141 #define for_each_buffer_cpu(buffer, cpu) \ 152 142 for_each_cpu_mask(cpu, buffer->cpumask) ··· 393 381 394 382 /** 395 383 * ring_buffer_alloc - allocate a new ring_buffer 396 - * @size: the size in bytes that is needed. 384 + * @size: the size in bytes per cpu that is needed. 397 385 * @flags: attributes to set for the ring buffer. 398 386 * 399 387 * Currently the only flag that is available is the RB_FL_OVERWRITE ··· 456 444 kfree(buffer); 457 445 return NULL; 458 446 } 447 + EXPORT_SYMBOL_GPL(ring_buffer_alloc); 459 448 460 449 /** 461 450 * ring_buffer_free - free a ring buffer. ··· 472 459 473 460 kfree(buffer); 474 461 } 462 + EXPORT_SYMBOL_GPL(ring_buffer_free); 475 463 476 464 static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer); 477 465 ··· 634 620 mutex_unlock(&buffer->mutex); 635 621 return -ENOMEM; 636 622 } 623 + EXPORT_SYMBOL_GPL(ring_buffer_resize); 637 624 638 625 static inline int rb_null_event(struct ring_buffer_event *event) 639 626 { ··· 1235 1220 preempt_enable_notrace(); 1236 1221 return NULL; 1237 1222 } 1223 + EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve); 1238 1224 1239 1225 static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer, 1240 1226 struct ring_buffer_event *event) ··· 1285 1269 1286 1270 return 0; 1287 1271 } 1272 + EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit); 1288 1273 1289 1274 /** 1290 1275 * ring_buffer_write - write data to the buffer without reserving ··· 1351 1334 1352 1335 return ret; 1353 1336 } 1337 + EXPORT_SYMBOL_GPL(ring_buffer_write); 1354 1338 1355 1339 static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) 1356 1340 { ··· 1378 1360 { 1379 1361 atomic_inc(&buffer->record_disabled); 1380 1362 } 1363 + EXPORT_SYMBOL_GPL(ring_buffer_record_disable); 1381 1364 1382 1365 /** 1383 1366 * ring_buffer_record_enable - enable writes to the buffer ··· 1391 1372 { 1392 1373 atomic_dec(&buffer->record_disabled); 1393 1374 } 1375 + EXPORT_SYMBOL_GPL(ring_buffer_record_enable); 1394 1376 1395 1377 /** 1396 1378 * ring_buffer_record_disable_cpu - stop all writes into the cpu_buffer ··· 1413 1393 cpu_buffer = buffer->buffers[cpu]; 1414 1394 atomic_inc(&cpu_buffer->record_disabled); 1415 1395 } 1396 + EXPORT_SYMBOL_GPL(ring_buffer_record_disable_cpu); 1416 1397 1417 1398 /** 1418 1399 * ring_buffer_record_enable_cpu - enable writes to the buffer ··· 1433 1412 cpu_buffer = buffer->buffers[cpu]; 1434 1413 atomic_dec(&cpu_buffer->record_disabled); 1435 1414 } 1415 + EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu); 1436 1416 1437 1417 /** 1438 1418 * ring_buffer_entries_cpu - get the number of entries in a cpu buffer ··· 1450 1428 cpu_buffer = buffer->buffers[cpu]; 1451 1429 return cpu_buffer->entries; 1452 1430 } 1431 + EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); 1453 1432 1454 1433 /** 1455 1434 * ring_buffer_overrun_cpu - get the number of overruns in a cpu_buffer ··· 1467 1444 cpu_buffer = buffer->buffers[cpu]; 1468 1445 return cpu_buffer->overrun; 1469 1446 } 1447 + EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu); 1470 1448 1471 1449 /** 1472 1450 * ring_buffer_entries - get the number of entries in a buffer ··· 1490 1466 1491 1467 return entries; 1492 1468 } 1469 + EXPORT_SYMBOL_GPL(ring_buffer_entries); 1493 1470 1494 1471 /** 1495 1472 * ring_buffer_overrun_cpu - get the number of overruns in buffer ··· 1513 1488 1514 1489 return overruns; 1515 1490 } 1491 + EXPORT_SYMBOL_GPL(ring_buffer_overruns); 1516 1492 1517 1493 /** 1518 1494 * ring_buffer_iter_reset - reset an iterator ··· 1539 1513 else 1540 1514 iter->read_stamp = iter->head_page->time_stamp; 1541 1515 } 1516 + EXPORT_SYMBOL_GPL(ring_buffer_iter_reset); 1542 1517 1543 1518 /** 1544 1519 * ring_buffer_iter_empty - check if an iterator has no more to read ··· 1554 1527 return iter->head_page == cpu_buffer->commit_page && 1555 1528 iter->head == rb_commit_index(cpu_buffer); 1556 1529 } 1530 + EXPORT_SYMBOL_GPL(ring_buffer_iter_empty); 1557 1531 1558 1532 static void 1559 1533 rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer, ··· 1825 1797 1826 1798 return NULL; 1827 1799 } 1800 + EXPORT_SYMBOL_GPL(ring_buffer_peek); 1828 1801 1829 1802 /** 1830 1803 * ring_buffer_iter_peek - peek at the next event to be read ··· 1896 1867 1897 1868 return NULL; 1898 1869 } 1870 + EXPORT_SYMBOL_GPL(ring_buffer_iter_peek); 1899 1871 1900 1872 /** 1901 1873 * ring_buffer_consume - return an event and consume it ··· 1924 1894 1925 1895 return event; 1926 1896 } 1897 + EXPORT_SYMBOL_GPL(ring_buffer_consume); 1927 1898 1928 1899 /** 1929 1900 * ring_buffer_read_start - start a non consuming read of the buffer ··· 1965 1934 1966 1935 return iter; 1967 1936 } 1937 + EXPORT_SYMBOL_GPL(ring_buffer_read_start); 1968 1938 1969 1939 /** 1970 1940 * ring_buffer_finish - finish reading the iterator of the buffer ··· 1982 1950 atomic_dec(&cpu_buffer->record_disabled); 1983 1951 kfree(iter); 1984 1952 } 1953 + EXPORT_SYMBOL_GPL(ring_buffer_read_finish); 1985 1954 1986 1955 /** 1987 1956 * ring_buffer_read - read the next item in the ring buffer by the iterator ··· 2004 1971 2005 1972 return event; 2006 1973 } 1974 + EXPORT_SYMBOL_GPL(ring_buffer_read); 2007 1975 2008 1976 /** 2009 1977 * ring_buffer_size - return the size of the ring buffer (in bytes) ··· 2014 1980 { 2015 1981 return BUF_PAGE_SIZE * buffer->pages; 2016 1982 } 1983 + EXPORT_SYMBOL_GPL(ring_buffer_size); 2017 1984 2018 1985 static void 2019 1986 rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) ··· 2057 2022 2058 2023 spin_unlock_irqrestore(&cpu_buffer->lock, flags); 2059 2024 } 2025 + EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu); 2060 2026 2061 2027 /** 2062 2028 * ring_buffer_reset - reset a ring buffer ··· 2070 2034 for_each_buffer_cpu(buffer, cpu) 2071 2035 ring_buffer_reset_cpu(buffer, cpu); 2072 2036 } 2037 + EXPORT_SYMBOL_GPL(ring_buffer_reset); 2073 2038 2074 2039 /** 2075 2040 * rind_buffer_empty - is the ring buffer empty? ··· 2089 2052 } 2090 2053 return 1; 2091 2054 } 2055 + EXPORT_SYMBOL_GPL(ring_buffer_empty); 2092 2056 2093 2057 /** 2094 2058 * ring_buffer_empty_cpu - is a cpu buffer of a ring buffer empty? ··· 2106 2068 cpu_buffer = buffer->buffers[cpu]; 2107 2069 return rb_per_cpu_empty(cpu_buffer); 2108 2070 } 2071 + EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu); 2109 2072 2110 2073 /** 2111 2074 * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers ··· 2156 2117 2157 2118 return 0; 2158 2119 } 2120 + EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu); 2159 2121 2160 2122 static ssize_t 2161 2123 rb_simple_read(struct file *filp, char __user *ubuf,
+2 -2
kernel/trace/trace.c
··· 914 914 TRACE_FILE_LAT_FMT = 1, 915 915 }; 916 916 917 - static void trace_iterator_increment(struct trace_iterator *iter, int cpu) 917 + static void trace_iterator_increment(struct trace_iterator *iter) 918 918 { 919 919 /* Don't allow ftrace to trace into the ring buffers */ 920 920 ftrace_disable_cpu(); ··· 993 993 iter->ent = __find_next_entry(iter, &iter->cpu, &iter->ts); 994 994 995 995 if (iter->ent) 996 - trace_iterator_increment(iter, iter->cpu); 996 + trace_iterator_increment(iter); 997 997 998 998 return iter->ent ? iter : NULL; 999 999 }