Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'trace-ring-buffer-v6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace

Pull trace ring-buffer updates from Steven Rostedt:

- Limit time interrupts are disabled in rb_check_pages()

rb_check_pages() is called after the ring buffer size is updated to
make sure that the ring buffer has not been corrupted. Commit
c2274b908db0 ("ring-buffer: Fix a race between readers and resize
checks") fixed a race with the check pages and simultaneous resizes
to the ring buffer by adding a raw_spin_lock_irqsave() around the
check operation. Although this was a simple fix, it would hold
interrupts disabled for non determinative amount of time. This could
harm PREEMPT_RT operations.

Instead, modify the logic by adding a counter when the buffer is
modified and to release the raw_spin_lock() at each iteration. It
checks the counter under the lock to see if a modification happened
during the loop, and if it did, it would restart the loop up to 3
times. After 3 times, it will simply exit the check, as it is
unlikely that would ever happen as buffer resizes are rare
occurrences.

- Replace some open coded str_low_high() with the helper

- Fix some documentation/comments

* tag 'trace-ring-buffer-v6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace:
ring-buffer: Correct a grammatical error in a comment
ring-buffer: Use str_low_high() helper in ring_buffer_producer()
ring-buffer: Reorganize kerneldoc parameter names
ring-buffer: Limit time with disabled interrupts in rb_check_pages()

+76 -30
+74 -28
kernel/trace/ring_buffer.c
··· 482 482 unsigned long nr_pages; 483 483 unsigned int current_context; 484 484 struct list_head *pages; 485 + /* pages generation counter, incremented when the list changes */ 486 + unsigned long cnt; 485 487 struct buffer_page *head_page; /* read from head */ 486 488 struct buffer_page *tail_page; /* write to tail */ 487 489 struct buffer_page *commit_page; /* committed pages */ ··· 1477 1475 RB_WARN_ON(cpu_buffer, val & RB_FLAG_MASK); 1478 1476 } 1479 1477 1478 + static bool rb_check_links(struct ring_buffer_per_cpu *cpu_buffer, 1479 + struct list_head *list) 1480 + { 1481 + if (RB_WARN_ON(cpu_buffer, 1482 + rb_list_head(rb_list_head(list->next)->prev) != list)) 1483 + return false; 1484 + 1485 + if (RB_WARN_ON(cpu_buffer, 1486 + rb_list_head(rb_list_head(list->prev)->next) != list)) 1487 + return false; 1488 + 1489 + return true; 1490 + } 1491 + 1480 1492 /** 1481 1493 * rb_check_pages - integrity check of buffer pages 1482 1494 * @cpu_buffer: CPU buffer with pages to test 1483 1495 * 1484 1496 * As a safety measure we check to make sure the data pages have not 1485 1497 * been corrupted. 1486 - * 1487 - * Callers of this function need to guarantee that the list of pages doesn't get 1488 - * modified during the check. In particular, if it's possible that the function 1489 - * is invoked with concurrent readers which can swap in a new reader page then 1490 - * the caller should take cpu_buffer->reader_lock. 1491 1498 */ 1492 1499 static void rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer) 1493 1500 { 1494 - struct list_head *head = rb_list_head(cpu_buffer->pages); 1495 - struct list_head *tmp; 1501 + struct list_head *head, *tmp; 1502 + unsigned long buffer_cnt; 1503 + unsigned long flags; 1504 + int nr_loops = 0; 1496 1505 1497 - if (RB_WARN_ON(cpu_buffer, 1498 - rb_list_head(rb_list_head(head->next)->prev) != head)) 1506 + /* 1507 + * Walk the linked list underpinning the ring buffer and validate all 1508 + * its next and prev links. 1509 + * 1510 + * The check acquires the reader_lock to avoid concurrent processing 1511 + * with code that could be modifying the list. However, the lock cannot 1512 + * be held for the entire duration of the walk, as this would make the 1513 + * time when interrupts are disabled non-deterministic, dependent on the 1514 + * ring buffer size. Therefore, the code releases and re-acquires the 1515 + * lock after checking each page. The ring_buffer_per_cpu.cnt variable 1516 + * is then used to detect if the list was modified while the lock was 1517 + * not held, in which case the check needs to be restarted. 1518 + * 1519 + * The code attempts to perform the check at most three times before 1520 + * giving up. This is acceptable because this is only a self-validation 1521 + * to detect problems early on. In practice, the list modification 1522 + * operations are fairly spaced, and so this check typically succeeds at 1523 + * most on the second try. 1524 + */ 1525 + again: 1526 + if (++nr_loops > 3) 1499 1527 return; 1500 1528 1501 - if (RB_WARN_ON(cpu_buffer, 1502 - rb_list_head(rb_list_head(head->prev)->next) != head)) 1503 - return; 1529 + raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 1530 + head = rb_list_head(cpu_buffer->pages); 1531 + if (!rb_check_links(cpu_buffer, head)) 1532 + goto out_locked; 1533 + buffer_cnt = cpu_buffer->cnt; 1534 + tmp = head; 1535 + raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 1504 1536 1505 - for (tmp = rb_list_head(head->next); tmp != head; tmp = rb_list_head(tmp->next)) { 1506 - if (RB_WARN_ON(cpu_buffer, 1507 - rb_list_head(rb_list_head(tmp->next)->prev) != tmp)) 1508 - return; 1537 + while (true) { 1538 + raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 1509 1539 1510 - if (RB_WARN_ON(cpu_buffer, 1511 - rb_list_head(rb_list_head(tmp->prev)->next) != tmp)) 1512 - return; 1540 + if (buffer_cnt != cpu_buffer->cnt) { 1541 + /* The list was updated, try again. */ 1542 + raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 1543 + goto again; 1544 + } 1545 + 1546 + tmp = rb_list_head(tmp->next); 1547 + if (tmp == head) 1548 + /* The iteration circled back, all is done. */ 1549 + goto out_locked; 1550 + 1551 + if (!rb_check_links(cpu_buffer, tmp)) 1552 + goto out_locked; 1553 + 1554 + raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 1513 1555 } 1556 + 1557 + out_locked: 1558 + raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 1514 1559 } 1515 1560 1516 1561 /* ··· 2433 2384 * __ring_buffer_alloc_range - allocate a new ring_buffer from existing memory 2434 2385 * @size: the size in bytes per cpu that is needed. 2435 2386 * @flags: attributes to set for the ring buffer. 2387 + * @order: sub-buffer order 2436 2388 * @start: start of allocated range 2437 2389 * @range_size: size of allocated range 2438 - * @order: sub-buffer order 2439 2390 * @key: ring buffer reader_lock_key. 2440 2391 * 2441 2392 * Currently the only flag that is available is the RB_FL_OVERWRITE ··· 2581 2532 2582 2533 /* make sure pages points to a valid page in the ring buffer */ 2583 2534 cpu_buffer->pages = next_page; 2535 + cpu_buffer->cnt++; 2584 2536 2585 2537 /* update head page */ 2586 2538 if (head_bit) ··· 2688 2638 * pointer to point to end of list 2689 2639 */ 2690 2640 head_page->prev = last_page; 2641 + cpu_buffer->cnt++; 2691 2642 success = true; 2692 2643 break; 2693 2644 } ··· 2924 2873 */ 2925 2874 synchronize_rcu(); 2926 2875 for_each_buffer_cpu(buffer, cpu) { 2927 - unsigned long flags; 2928 - 2929 2876 cpu_buffer = buffer->buffers[cpu]; 2930 - raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 2931 2877 rb_check_pages(cpu_buffer); 2932 - raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 2933 2878 } 2934 2879 atomic_dec(&buffer->record_disabled); 2935 2880 } ··· 4057 4010 return type[bits]; 4058 4011 } 4059 4012 4060 - /* Assume this is an trace event */ 4013 + /* Assume this is a trace event */ 4061 4014 static const char *show_flags(struct ring_buffer_event *event) 4062 4015 { 4063 4016 struct trace_entry *entry; ··· 5343 5296 rb_list_head(reader->list.next)->prev = &cpu_buffer->reader_page->list; 5344 5297 rb_inc_page(&cpu_buffer->head_page); 5345 5298 5299 + cpu_buffer->cnt++; 5346 5300 local_inc(&cpu_buffer->pages_read); 5347 5301 5348 5302 /* Finally update the reader page to the new head */ ··· 5883 5835 ring_buffer_read_finish(struct ring_buffer_iter *iter) 5884 5836 { 5885 5837 struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; 5886 - unsigned long flags; 5887 5838 5888 5839 /* Use this opportunity to check the integrity of the ring buffer. */ 5889 - raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); 5890 5840 rb_check_pages(cpu_buffer); 5891 - raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); 5892 5841 5893 5842 atomic_dec(&cpu_buffer->resize_disabled); 5894 5843 kfree(iter->event); ··· 6802 6757 /* Install the new pages, remove the head from the list */ 6803 6758 cpu_buffer->pages = cpu_buffer->new_pages.next; 6804 6759 list_del_init(&cpu_buffer->new_pages); 6760 + cpu_buffer->cnt++; 6805 6761 6806 6762 cpu_buffer->head_page 6807 6763 = list_entry(cpu_buffer->pages, struct buffer_page, list);
+2 -2
kernel/trace/ring_buffer_benchmark.c
··· 307 307 if (!disable_reader) { 308 308 if (consumer_fifo) 309 309 trace_printk("Running Consumer at SCHED_FIFO %s\n", 310 - consumer_fifo == 1 ? "low" : "high"); 310 + str_low_high(consumer_fifo == 1)); 311 311 else 312 312 trace_printk("Running Consumer at nice: %d\n", 313 313 consumer_nice); 314 314 } 315 315 if (producer_fifo) 316 316 trace_printk("Running Producer at SCHED_FIFO %s\n", 317 - producer_fifo == 1 ? "low" : "high"); 317 + str_low_high(producer_fifo == 1)); 318 318 else 319 319 trace_printk("Running Producer at nice: %d\n", 320 320 producer_nice);