Merge tag 'printk-for-6.9' of git://git.kernel.org/pub/scm/linux/kernel/git/printk/linux

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Pull printk updates from Petr Mladek:
"Improve the behavior during panic. The issues were found when testing
the ongoing changes introducing atomic consoles and printk kthreads:

- pr_flush() has to wait for the last reserved record instead of the
last finalized one. Note that records are finalized in random order
when generated by more CPUs in parallel.

- Ignore non-finalized records during panic(). Messages printed on
panic-CPU are always finalized. Messages printed by other CPUs
might never be finalized when the CPUs get stopped.

- Block new printk() calls on non-panic CPUs completely. Backtraces
are printed before entering the panic mode. Later messages would
just mess information printed by the panic CPU.

- Do not take console_lock in console_flush_on_panic() at all. The
original code did try_lock()/console_unlock(). The unlock part
might cause a deadlock when panic() happened in a scheduler code.

- Fix conversion of 64-bit sequence number for 32-bit atomic
operations"

* tag 'printk-for-6.9' of git://git.kernel.org/pub/scm/linux/kernel/git/printk/linux:
dump_stack: Do not get cpu_sync for panic CPU
panic: Flush kernel log buffer at the end
printk: Avoid non-panic CPUs writing to ringbuffer
printk: Disable passing console lock owner completely during panic()
printk: ringbuffer: Skip non-finalized records in panic
printk: Wait for all reserved records with pr_flush()
printk: ringbuffer: Cleanup reader terminology
printk: Add this_cpu_in_panic()
printk: For @suppress_panic_printk check for other CPU in panic
printk: ringbuffer: Clarify special lpos values
printk: ringbuffer: Do not skip non-finalized records with prb_next_seq()
printk: Use prb_first_seq() as base for 32bit seq macros
printk: Adjust mapping for 32bit seq macros
printk: nbcon: Relocate 32bit seq macros

Linus Torvalds 2 years ago b0546776 f88c3fb8

+420 -139

7 changed files

expand all

include

linux

printk.h

kernel

panic.c

printk

nbcon.c

printk.c

printk_ringbuffer.c

printk_ringbuffer.h

lib

dump_stack.c

include/linux/printk.h

··· 273 273 } 274 274 #endif 275 275 276 + bool this_cpu_in_panic(void); 277 + 276 278 #ifdef CONFIG_SMP 277 279 extern int __printk_cpu_sync_try_get(void); 278 280 extern void __printk_cpu_sync_wait(void);

kernel/panic.c

··· 446 446 447 447 /* Do not scroll important messages printed above */ 448 448 suppress_printk = 1; 449 + 450 + /* 451 + * The final messages may not have been printed if in a context that 452 + * defers printing (such as NMI) and irq_work is not available. 453 + * Explicitly flush the kernel log buffer one last time. 454 + */ 455 + console_flush_on_panic(CONSOLE_FLUSH_PENDING); 456 + 449 457 local_irq_enable(); 450 458 for (i = 0; ; i += PANIC_TIMER_STEP) { 451 459 touch_softlockup_watchdog();

+4 -37

kernel/printk/nbcon.c

··· 140 140 return atomic_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_state), &cur->atom, new->atom); 141 141 } 142 142 143 - #ifdef CONFIG_64BIT 144 - 145 - #define __seq_to_nbcon_seq(seq) (seq) 146 - #define __nbcon_seq_to_seq(seq) (seq) 147 - 148 - #else /* CONFIG_64BIT */ 149 - 150 - #define __seq_to_nbcon_seq(seq) ((u32)seq) 151 - 152 - static inline u64 __nbcon_seq_to_seq(u32 nbcon_seq) 153 - { 154 - u64 seq; 155 - u64 rb_next_seq; 156 - 157 - /* 158 - * The provided sequence is only the lower 32 bits of the ringbuffer 159 - * sequence. It needs to be expanded to 64bit. Get the next sequence 160 - * number from the ringbuffer and fold it. 161 - * 162 - * Having a 32bit representation in the console is sufficient. 163 - * If a console ever gets more than 2^31 records behind 164 - * the ringbuffer then this is the least of the problems. 165 - * 166 - * Also the access to the ring buffer is always safe. 167 - */ 168 - rb_next_seq = prb_next_seq(prb); 169 - seq = rb_next_seq - ((u32)rb_next_seq - nbcon_seq); 170 - 171 - return seq; 172 - } 173 - 174 - #endif /* CONFIG_64BIT */ 175 - 176 143 /** 177 144 * nbcon_seq_read - Read the current console sequence 178 145 * @con: Console to read the sequence of ··· 150 183 { 151 184 unsigned long nbcon_seq = atomic_long_read(&ACCESS_PRIVATE(con, nbcon_seq)); 152 185 153 - return __nbcon_seq_to_seq(nbcon_seq); 186 + return __ulseq_to_u64seq(prb, nbcon_seq); 154 187 } 155 188 156 189 /** ··· 171 204 */ 172 205 u64 valid_seq = max_t(u64, seq, prb_first_valid_seq(prb)); 173 206 174 - atomic_long_set(&ACCESS_PRIVATE(con, nbcon_seq), __seq_to_nbcon_seq(valid_seq)); 207 + atomic_long_set(&ACCESS_PRIVATE(con, nbcon_seq), __u64seq_to_ulseq(valid_seq)); 175 208 176 209 /* Clear con->seq since nbcon consoles use con->nbcon_seq instead. */ 177 210 con->seq = 0; ··· 190 223 */ 191 224 static void nbcon_seq_try_update(struct nbcon_context *ctxt, u64 new_seq) 192 225 { 193 - unsigned long nbcon_seq = __seq_to_nbcon_seq(ctxt->seq); 226 + unsigned long nbcon_seq = __u64seq_to_ulseq(ctxt->seq); 194 227 struct console *con = ctxt->console; 195 228 196 229 if (atomic_long_try_cmpxchg(&ACCESS_PRIVATE(con, nbcon_seq), &nbcon_seq, 197 - __seq_to_nbcon_seq(new_seq))) { 230 + __u64seq_to_ulseq(new_seq))) { 198 231 ctxt->seq = new_seq; 199 232 } else { 200 233 ctxt->seq = nbcon_seq_read(con);

+59 -42

kernel/printk/printk.c

··· 347 347 return unlikely(atomic_read(&panic_cpu) != PANIC_CPU_INVALID); 348 348 } 349 349 350 + /* Return true if a panic is in progress on the current CPU. */ 351 + bool this_cpu_in_panic(void) 352 + { 353 + /* 354 + * We can use raw_smp_processor_id() here because it is impossible for 355 + * the task to be migrated to the panic_cpu, or away from it. If 356 + * panic_cpu has already been set, and we're not currently executing on 357 + * that CPU, then we never will be. 358 + */ 359 + return unlikely(atomic_read(&panic_cpu) == raw_smp_processor_id()); 360 + } 361 + 362 + /* 363 + * Return true if a panic is in progress on a remote CPU. 364 + * 365 + * On true, the local CPU should immediately release any printing resources 366 + * that may be needed by the panic CPU. 367 + */ 368 + bool other_cpu_in_panic(void) 369 + { 370 + return (panic_in_progress() && !this_cpu_in_panic()); 371 + } 372 + 350 373 /* 351 374 * This is used for debugging the mess that is the VT code by 352 375 * keeping track if we have the console semaphore held. It's ··· 462 439 static DEFINE_MUTEX(syslog_lock); 463 440 464 441 #ifdef CONFIG_PRINTK 465 - /* 466 - * During panic, heavy printk by other CPUs can delay the 467 - * panic and risk deadlock on console resources. 468 - */ 469 - static int __read_mostly suppress_panic_printk; 470 - 471 442 DECLARE_WAIT_QUEUE_HEAD(log_wait); 472 443 /* All 3 protected by @syslog_lock. */ 473 444 /* the next printk record to read by syslog(READ) or /proc/kmsg */ ··· 1852 1835 */ 1853 1836 static void console_lock_spinning_enable(void) 1854 1837 { 1838 + /* 1839 + * Do not use spinning in panic(). The panic CPU wants to keep the lock. 1840 + * Non-panic CPUs abandon the flush anyway. 1841 + * 1842 + * Just keep the lockdep annotation. The panic-CPU should avoid 1843 + * taking console_owner_lock because it might cause a deadlock. 1844 + * This looks like the easiest way how to prevent false lockdep 1845 + * reports without handling races a lockless way. 1846 + */ 1847 + if (panic_in_progress()) 1848 + goto lockdep; 1849 + 1855 1850 raw_spin_lock(&console_owner_lock); 1856 1851 console_owner = current; 1857 1852 raw_spin_unlock(&console_owner_lock); 1858 1853 1854 + lockdep: 1859 1855 /* The waiter may spin on us after setting console_owner */ 1860 1856 spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); 1861 1857 } ··· 1892 1862 static int console_lock_spinning_disable_and_check(int cookie) 1893 1863 { 1894 1864 int waiter; 1865 + 1866 + /* 1867 + * Ignore spinning waiters during panic() because they might get stopped 1868 + * or blocked at any time, 1869 + * 1870 + * It is safe because nobody is allowed to start spinning during panic 1871 + * in the first place. If there has been a waiter then non panic CPUs 1872 + * might stay spinning. They would get stopped anyway. The panic context 1873 + * will never start spinning and an interrupted spin on panic CPU will 1874 + * never continue. 1875 + */ 1876 + if (panic_in_progress()) { 1877 + /* Keep lockdep happy. */ 1878 + spin_release(&console_owner_dep_map, _THIS_IP_); 1879 + return 0; 1880 + } 1895 1881 1896 1882 raw_spin_lock(&console_owner_lock); 1897 1883 waiter = READ_ONCE(console_waiter); ··· 2305 2259 if (unlikely(suppress_printk)) 2306 2260 return 0; 2307 2261 2308 - if (unlikely(suppress_panic_printk) && 2309 - atomic_read(&panic_cpu) != raw_smp_processor_id()) 2262 + /* 2263 + * The messages on the panic CPU are the most important. If 2264 + * non-panic CPUs are generating any messages, they will be 2265 + * silently dropped. 2266 + */ 2267 + if (other_cpu_in_panic()) 2310 2268 return 0; 2311 2269 2312 2270 if (level == LOGLEVEL_SCHED) { ··· 2640 2590 return 0; 2641 2591 } 2642 2592 2643 - /* 2644 - * Return true if a panic is in progress on a remote CPU. 2645 - * 2646 - * On true, the local CPU should immediately release any printing resources 2647 - * that may be needed by the panic CPU. 2648 - */ 2649 - bool other_cpu_in_panic(void) 2650 - { 2651 - if (!panic_in_progress()) 2652 - return false; 2653 - 2654 - /* 2655 - * We can use raw_smp_processor_id() here because it is impossible for 2656 - * the task to be migrated to the panic_cpu, or away from it. If 2657 - * panic_cpu has already been set, and we're not currently executing on 2658 - * that CPU, then we never will be. 2659 - */ 2660 - return atomic_read(&panic_cpu) != raw_smp_processor_id(); 2661 - } 2662 - 2663 2593 /** 2664 2594 * console_lock - block the console subsystem from printing 2665 2595 * ··· 2795 2765 bool printk_get_next_message(struct printk_message *pmsg, u64 seq, 2796 2766 bool is_extended, bool may_suppress) 2797 2767 { 2798 - static int panic_console_dropped; 2799 - 2800 2768 struct printk_buffers *pbufs = pmsg->pbufs; 2801 2769 const size_t scratchbuf_sz = sizeof(pbufs->scratchbuf); 2802 2770 const size_t outbuf_sz = sizeof(pbufs->outbuf); ··· 2821 2793 2822 2794 pmsg->seq = r.info->seq; 2823 2795 pmsg->dropped = r.info->seq - seq; 2824 - 2825 - /* 2826 - * Check for dropped messages in panic here so that printk 2827 - * suppression can occur as early as possible if necessary. 2828 - */ 2829 - if (pmsg->dropped && 2830 - panic_in_progress() && 2831 - panic_console_dropped++ > 10) { 2832 - suppress_panic_printk = 1; 2833 - pr_warn_once("Too many dropped messages. Suppress messages on non-panic CPUs to prevent livelock.\n"); 2834 - } 2835 2796 2836 2797 /* Skip record that has level above the console loglevel. */ 2837 2798 if (may_suppress && suppress_message_printing(r.info->level)) ··· 3767 3750 3768 3751 might_sleep(); 3769 3752 3770 - seq = prb_next_seq(prb); 3753 + seq = prb_next_reserve_seq(prb); 3771 3754 3772 3755 /* Flush the consoles so that records up to @seq are printed. */ 3773 3756 console_lock();

+283 -54

kernel/printk/printk_ringbuffer.c

··· 6 6 #include <linux/errno.h> 7 7 #include <linux/bug.h> 8 8 #include "printk_ringbuffer.h" 9 + #include "internal.h" 9 10 10 11 /** 11 12 * DOC: printk_ringbuffer overview ··· 304 303 * 305 304 * desc_push_tail:B / desc_reserve:D 306 305 * set descriptor reusable (state), then push descriptor tail (id) 306 + * 307 + * desc_update_last_finalized:A / desc_last_finalized_seq:A 308 + * store finalized record, then set new highest finalized sequence number 307 309 */ 308 310 309 311 #define DATA_SIZE(data_ring) _DATA_SIZE((data_ring)->size_bits) ··· 1034 1030 unsigned long next_lpos; 1035 1031 1036 1032 if (size == 0) { 1037 - /* Specify a data-less block. */ 1038 - blk_lpos->begin = NO_LPOS; 1039 - blk_lpos->next = NO_LPOS; 1033 + /* 1034 + * Data blocks are not created for empty lines. Instead, the 1035 + * reader will recognize these special lpos values and handle 1036 + * it appropriately. 1037 + */ 1038 + blk_lpos->begin = EMPTY_LINE_LPOS; 1039 + blk_lpos->next = EMPTY_LINE_LPOS; 1040 1040 return NULL; 1041 1041 } 1042 1042 ··· 1218 1210 1219 1211 /* Data-less data block description. */ 1220 1212 if (BLK_DATALESS(blk_lpos)) { 1221 - if (blk_lpos->begin == NO_LPOS && blk_lpos->next == NO_LPOS) { 1213 + /* 1214 + * Records that are just empty lines are also valid, even 1215 + * though they do not have a data block. For such records 1216 + * explicitly return empty string data to signify success. 1217 + */ 1218 + if (blk_lpos->begin == EMPTY_LINE_LPOS && 1219 + blk_lpos->next == EMPTY_LINE_LPOS) { 1222 1220 *data_size = 0; 1223 1221 return ""; 1224 1222 } 1223 + 1224 + /* Data lost, invalid, or otherwise unavailable. */ 1225 1225 return NULL; 1226 1226 } 1227 1227 ··· 1458 1442 } 1459 1443 1460 1444 /* 1445 + * @last_finalized_seq value guarantees that all records up to and including 1446 + * this sequence number are finalized and can be read. The only exception are 1447 + * too old records which have already been overwritten. 1448 + * 1449 + * It is also guaranteed that @last_finalized_seq only increases. 1450 + * 1451 + * Be aware that finalized records following non-finalized records are not 1452 + * reported because they are not yet available to the reader. For example, 1453 + * a new record stored via printk() will not be available to a printer if 1454 + * it follows a record that has not been finalized yet. However, once that 1455 + * non-finalized record becomes finalized, @last_finalized_seq will be 1456 + * appropriately updated and the full set of finalized records will be 1457 + * available to the printer. And since each printk() caller will either 1458 + * directly print or trigger deferred printing of all available unprinted 1459 + * records, all printk() messages will get printed. 1460 + */ 1461 + static u64 desc_last_finalized_seq(struct printk_ringbuffer *rb) 1462 + { 1463 + struct prb_desc_ring *desc_ring = &rb->desc_ring; 1464 + unsigned long ulseq; 1465 + 1466 + /* 1467 + * Guarantee the sequence number is loaded before loading the 1468 + * associated record in order to guarantee that the record can be 1469 + * seen by this CPU. This pairs with desc_update_last_finalized:A. 1470 + */ 1471 + ulseq = atomic_long_read_acquire(&desc_ring->last_finalized_seq 1472 + ); /* LMM(desc_last_finalized_seq:A) */ 1473 + 1474 + return __ulseq_to_u64seq(rb, ulseq); 1475 + } 1476 + 1477 + static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq, 1478 + struct printk_record *r, unsigned int *line_count); 1479 + 1480 + /* 1481 + * Check if there are records directly following @last_finalized_seq that are 1482 + * finalized. If so, update @last_finalized_seq to the latest of these 1483 + * records. It is not allowed to skip over records that are not yet finalized. 1484 + */ 1485 + static void desc_update_last_finalized(struct printk_ringbuffer *rb) 1486 + { 1487 + struct prb_desc_ring *desc_ring = &rb->desc_ring; 1488 + u64 old_seq = desc_last_finalized_seq(rb); 1489 + unsigned long oldval; 1490 + unsigned long newval; 1491 + u64 finalized_seq; 1492 + u64 try_seq; 1493 + 1494 + try_again: 1495 + finalized_seq = old_seq; 1496 + try_seq = finalized_seq + 1; 1497 + 1498 + /* Try to find later finalized records. */ 1499 + while (_prb_read_valid(rb, &try_seq, NULL, NULL)) { 1500 + finalized_seq = try_seq; 1501 + try_seq++; 1502 + } 1503 + 1504 + /* No update needed if no later finalized record was found. */ 1505 + if (finalized_seq == old_seq) 1506 + return; 1507 + 1508 + oldval = __u64seq_to_ulseq(old_seq); 1509 + newval = __u64seq_to_ulseq(finalized_seq); 1510 + 1511 + /* 1512 + * Set the sequence number of a later finalized record that has been 1513 + * seen. 1514 + * 1515 + * Guarantee the record data is visible to other CPUs before storing 1516 + * its sequence number. This pairs with desc_last_finalized_seq:A. 1517 + * 1518 + * Memory barrier involvement: 1519 + * 1520 + * If desc_last_finalized_seq:A reads from 1521 + * desc_update_last_finalized:A, then desc_read:A reads from 1522 + * _prb_commit:B. 1523 + * 1524 + * Relies on: 1525 + * 1526 + * RELEASE from _prb_commit:B to desc_update_last_finalized:A 1527 + * matching 1528 + * ACQUIRE from desc_last_finalized_seq:A to desc_read:A 1529 + * 1530 + * Note: _prb_commit:B and desc_update_last_finalized:A can be 1531 + * different CPUs. However, the desc_update_last_finalized:A 1532 + * CPU (which performs the release) must have previously seen 1533 + * _prb_commit:B. 1534 + */ 1535 + if (!atomic_long_try_cmpxchg_release(&desc_ring->last_finalized_seq, 1536 + &oldval, newval)) { /* LMM(desc_update_last_finalized:A) */ 1537 + old_seq = __ulseq_to_u64seq(rb, oldval); 1538 + goto try_again; 1539 + } 1540 + } 1541 + 1542 + /* 1461 1543 * Attempt to finalize a specified descriptor. If this fails, the descriptor 1462 1544 * is either already final or it will finalize itself when the writer commits. 1463 1545 */ 1464 - static void desc_make_final(struct prb_desc_ring *desc_ring, unsigned long id) 1546 + static void desc_make_final(struct printk_ringbuffer *rb, unsigned long id) 1465 1547 { 1548 + struct prb_desc_ring *desc_ring = &rb->desc_ring; 1466 1549 unsigned long prev_state_val = DESC_SV(id, desc_committed); 1467 1550 struct prb_desc *d = to_desc(desc_ring, id); 1468 1551 1469 - atomic_long_cmpxchg_relaxed(&d->state_var, prev_state_val, 1470 - DESC_SV(id, desc_finalized)); /* LMM(desc_make_final:A) */ 1471 - 1472 - /* Best effort to remember the last finalized @id. */ 1473 - atomic_long_set(&desc_ring->last_finalized_id, id); 1552 + if (atomic_long_try_cmpxchg_relaxed(&d->state_var, &prev_state_val, 1553 + DESC_SV(id, desc_finalized))) { /* LMM(desc_make_final:A) */ 1554 + desc_update_last_finalized(rb); 1555 + } 1474 1556 } 1475 1557 1476 1558 /** ··· 1664 1550 * readers. (For seq==0 there is no previous descriptor.) 1665 1551 */ 1666 1552 if (info->seq > 0) 1667 - desc_make_final(desc_ring, DESC_ID(id - 1)); 1553 + desc_make_final(rb, DESC_ID(id - 1)); 1668 1554 1669 1555 r->text_buf = data_alloc(rb, r->text_buf_size, &d->text_blk_lpos, id); 1670 1556 /* If text data allocation fails, a data-less record is committed. */ ··· 1757 1643 */ 1758 1644 head_id = atomic_long_read(&desc_ring->head_id); /* LMM(prb_commit:A) */ 1759 1645 if (head_id != e->id) 1760 - desc_make_final(desc_ring, e->id); 1646 + desc_make_final(e->rb, e->id); 1761 1647 } 1762 1648 1763 1649 /** ··· 1777 1663 */ 1778 1664 void prb_final_commit(struct prb_reserved_entry *e) 1779 1665 { 1780 - struct prb_desc_ring *desc_ring = &e->rb->desc_ring; 1781 - 1782 1666 _prb_commit(e, desc_finalized); 1783 1667 1784 - /* Best effort to remember the last finalized @id. */ 1785 - atomic_long_set(&desc_ring->last_finalized_id, e->id); 1668 + desc_update_last_finalized(e->rb); 1786 1669 } 1787 1670 1788 1671 /* ··· 1943 1832 } 1944 1833 1945 1834 /* Get the sequence number of the tail descriptor. */ 1946 - static u64 prb_first_seq(struct printk_ringbuffer *rb) 1835 + u64 prb_first_seq(struct printk_ringbuffer *rb) 1947 1836 { 1948 1837 struct prb_desc_ring *desc_ring = &rb->desc_ring; 1949 1838 enum desc_state d_state; ··· 1986 1875 return seq; 1987 1876 } 1988 1877 1989 - /* 1990 - * Non-blocking read of a record. Updates @seq to the last finalized record 1991 - * (which may have no data available). 1878 + /** 1879 + * prb_next_reserve_seq() - Get the sequence number after the most recently 1880 + * reserved record. 1992 1881 * 1993 - * See the description of prb_read_valid() and prb_read_valid_info() 1994 - * for details. 1882 + * @rb: The ringbuffer to get the sequence number from. 1883 + * 1884 + * This is the public function available to readers to see what sequence 1885 + * number will be assigned to the next reserved record. 1886 + * 1887 + * Note that depending on the situation, this value can be equal to or 1888 + * higher than the sequence number returned by prb_next_seq(). 1889 + * 1890 + * Context: Any context. 1891 + * Return: The sequence number that will be assigned to the next record 1892 + * reserved. 1893 + */ 1894 + u64 prb_next_reserve_seq(struct printk_ringbuffer *rb) 1895 + { 1896 + struct prb_desc_ring *desc_ring = &rb->desc_ring; 1897 + unsigned long last_finalized_id; 1898 + atomic_long_t *state_var; 1899 + u64 last_finalized_seq; 1900 + unsigned long head_id; 1901 + struct prb_desc desc; 1902 + unsigned long diff; 1903 + struct prb_desc *d; 1904 + int err; 1905 + 1906 + /* 1907 + * It may not be possible to read a sequence number for @head_id. 1908 + * So the ID of @last_finailzed_seq is used to calculate what the 1909 + * sequence number of @head_id will be. 1910 + */ 1911 + 1912 + try_again: 1913 + last_finalized_seq = desc_last_finalized_seq(rb); 1914 + 1915 + /* 1916 + * @head_id is loaded after @last_finalized_seq to ensure that 1917 + * it points to the record with @last_finalized_seq or newer. 1918 + * 1919 + * Memory barrier involvement: 1920 + * 1921 + * If desc_last_finalized_seq:A reads from 1922 + * desc_update_last_finalized:A, then 1923 + * prb_next_reserve_seq:A reads from desc_reserve:D. 1924 + * 1925 + * Relies on: 1926 + * 1927 + * RELEASE from desc_reserve:D to desc_update_last_finalized:A 1928 + * matching 1929 + * ACQUIRE from desc_last_finalized_seq:A to prb_next_reserve_seq:A 1930 + * 1931 + * Note: desc_reserve:D and desc_update_last_finalized:A can be 1932 + * different CPUs. However, the desc_update_last_finalized:A CPU 1933 + * (which performs the release) must have previously seen 1934 + * desc_read:C, which implies desc_reserve:D can be seen. 1935 + */ 1936 + head_id = atomic_long_read(&desc_ring->head_id); /* LMM(prb_next_reserve_seq:A) */ 1937 + 1938 + d = to_desc(desc_ring, last_finalized_seq); 1939 + state_var = &d->state_var; 1940 + 1941 + /* Extract the ID, used to specify the descriptor to read. */ 1942 + last_finalized_id = DESC_ID(atomic_long_read(state_var)); 1943 + 1944 + /* Ensure @last_finalized_id is correct. */ 1945 + err = desc_read_finalized_seq(desc_ring, last_finalized_id, last_finalized_seq, &desc); 1946 + 1947 + if (err == -EINVAL) { 1948 + if (last_finalized_seq == 0) { 1949 + /* 1950 + * No record has been finalized or even reserved yet. 1951 + * 1952 + * The @head_id is initialized such that the first 1953 + * increment will yield the first record (seq=0). 1954 + * Handle it separately to avoid a negative @diff 1955 + * below. 1956 + */ 1957 + if (head_id == DESC0_ID(desc_ring->count_bits)) 1958 + return 0; 1959 + 1960 + /* 1961 + * One or more descriptors are already reserved. Use 1962 + * the descriptor ID of the first one (@seq=0) for 1963 + * the @diff below. 1964 + */ 1965 + last_finalized_id = DESC0_ID(desc_ring->count_bits) + 1; 1966 + } else { 1967 + /* Record must have been overwritten. Try again. */ 1968 + goto try_again; 1969 + } 1970 + } 1971 + 1972 + /* Diff of known descriptor IDs to compute related sequence numbers. */ 1973 + diff = head_id - last_finalized_id; 1974 + 1975 + /* 1976 + * @head_id points to the most recently reserved record, but this 1977 + * function returns the sequence number that will be assigned to the 1978 + * next (not yet reserved) record. Thus +1 is needed. 1979 + */ 1980 + return (last_finalized_seq + diff + 1); 1981 + } 1982 + 1983 + /* 1984 + * Non-blocking read of a record. 1985 + * 1986 + * On success @seq is updated to the record that was read and (if provided) 1987 + * @r and @line_count will contain the read/calculated data. 1988 + * 1989 + * On failure @seq is updated to a record that is not yet available to the 1990 + * reader, but it will be the next record available to the reader. 1991 + * 1992 + * Note: When the current CPU is in panic, this function will skip over any 1993 + * non-existent/non-finalized records in order to allow the panic CPU 1994 + * to print any and all records that have been finalized. 1995 1995 */ 1996 1996 static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq, 1997 1997 struct printk_record *r, unsigned int *line_count) ··· 2121 1899 *seq = tail_seq; 2122 1900 2123 1901 } else if (err == -ENOENT) { 2124 - /* Record exists, but no data available. Skip. */ 1902 + /* Record exists, but the data was lost. Skip. */ 2125 1903 (*seq)++; 2126 1904 2127 1905 } else { 2128 - /* Non-existent/non-finalized record. Must stop. */ 2129 - return false; 1906 + /* 1907 + * Non-existent/non-finalized record. Must stop. 1908 + * 1909 + * For panic situations it cannot be expected that 1910 + * non-finalized records will become finalized. But 1911 + * there may be other finalized records beyond that 1912 + * need to be printed for a panic situation. If this 1913 + * is the panic CPU, skip this 1914 + * non-existent/non-finalized record unless it is 1915 + * at or beyond the head, in which case it is not 1916 + * possible to continue. 1917 + * 1918 + * Note that new messages printed on panic CPU are 1919 + * finalized when we are here. The only exception 1920 + * might be the last message without trailing newline. 1921 + * But it would have the sequence number returned 1922 + * by "prb_next_reserve_seq() - 1". 1923 + */ 1924 + if (this_cpu_in_panic() && ((*seq + 1) < prb_next_reserve_seq(rb))) 1925 + (*seq)++; 1926 + else 1927 + return false; 2130 1928 } 2131 1929 } 2132 1930 ··· 2174 1932 * On success, the reader must check r->info.seq to see which record was 2175 1933 * actually read. This allows the reader to detect dropped records. 2176 1934 * 2177 - * Failure means @seq refers to a not yet written record. 1935 + * Failure means @seq refers to a record not yet available to the reader. 2178 1936 */ 2179 1937 bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq, 2180 1938 struct printk_record *r) ··· 2204 1962 * On success, the reader must check info->seq to see which record meta data 2205 1963 * was actually read. This allows the reader to detect dropped records. 2206 1964 * 2207 - * Failure means @seq refers to a not yet written record. 1965 + * Failure means @seq refers to a record not yet available to the reader. 2208 1966 */ 2209 1967 bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq, 2210 1968 struct printk_info *info, unsigned int *line_count) ··· 2250 2008 * newest sequence number available to readers will be. 2251 2009 * 2252 2010 * This provides readers a sequence number to jump to if all currently 2253 - * available records should be skipped. 2011 + * available records should be skipped. It is guaranteed that all records 2012 + * previous to the returned value have been finalized and are (or were) 2013 + * available to the reader. 2254 2014 * 2255 2015 * Context: Any context. 2256 2016 * Return: The sequence number of the next newest (not yet available) record ··· 2260 2016 */ 2261 2017 u64 prb_next_seq(struct printk_ringbuffer *rb) 2262 2018 { 2263 - struct prb_desc_ring *desc_ring = &rb->desc_ring; 2264 - enum desc_state d_state; 2265 - unsigned long id; 2266 2019 u64 seq; 2267 2020 2268 - /* Check if the cached @id still points to a valid @seq. */ 2269 - id = atomic_long_read(&desc_ring->last_finalized_id); 2270 - d_state = desc_read(desc_ring, id, NULL, &seq, NULL); 2021 + seq = desc_last_finalized_seq(rb); 2271 2022 2272 - if (d_state == desc_finalized || d_state == desc_reusable) { 2273 - /* 2274 - * Begin searching after the last finalized record. 2275 - * 2276 - * On 0, the search must begin at 0 because of hack#2 2277 - * of the bootstrapping phase it is not known if a 2278 - * record at index 0 exists. 2279 - */ 2280 - if (seq != 0) 2281 - seq++; 2282 - } else { 2283 - /* 2284 - * The information about the last finalized sequence number 2285 - * has gone. It should happen only when there is a flood of 2286 - * new messages and the ringbuffer is rapidly recycled. 2287 - * Give up and start from the beginning. 2288 - */ 2289 - seq = 0; 2290 - } 2023 + /* 2024 + * Begin searching after the last finalized record. 2025 + * 2026 + * On 0, the search must begin at 0 because of hack#2 2027 + * of the bootstrapping phase it is not known if a 2028 + * record at index 0 exists. 2029 + */ 2030 + if (seq != 0) 2031 + seq++; 2291 2032 2292 2033 /* 2293 2034 * The information about the last finalized @seq might be inaccurate. ··· 2314 2085 rb->desc_ring.infos = infos; 2315 2086 atomic_long_set(&rb->desc_ring.head_id, DESC0_ID(descbits)); 2316 2087 atomic_long_set(&rb->desc_ring.tail_id, DESC0_ID(descbits)); 2317 - atomic_long_set(&rb->desc_ring.last_finalized_id, DESC0_ID(descbits)); 2088 + atomic_long_set(&rb->desc_ring.last_finalized_seq, 0); 2318 2089 2319 2090 rb->text_data_ring.size_bits = textbits; 2320 2091 rb->text_data_ring.data = text_buf;

+51 -3

kernel/printk/printk_ringbuffer.h

··· 75 75 struct printk_info *infos; 76 76 atomic_long_t head_id; 77 77 atomic_long_t tail_id; 78 - atomic_long_t last_finalized_id; 78 + atomic_long_t last_finalized_seq; 79 79 }; 80 80 81 81 /* ··· 127 127 #define DESC_SV(id, state) (((unsigned long)state << DESC_FLAGS_SHIFT) | id) 128 128 #define DESC_ID_MASK (~DESC_FLAGS_MASK) 129 129 #define DESC_ID(sv) ((sv) & DESC_ID_MASK) 130 + 131 + /* 132 + * Special data block logical position values (for fields of 133 + * @prb_desc.text_blk_lpos). 134 + * 135 + * - Bit0 is used to identify if the record has no data block. (Implemented in 136 + * the LPOS_DATALESS() macro.) 137 + * 138 + * - Bit1 specifies the reason for not having a data block. 139 + * 140 + * These special values could never be real lpos values because of the 141 + * meta data and alignment padding of data blocks. (See to_blk_size() for 142 + * details.) 143 + */ 130 144 #define FAILED_LPOS 0x1 131 - #define NO_LPOS 0x3 145 + #define EMPTY_LINE_LPOS 0x3 132 146 133 147 #define FAILED_BLK_LPOS \ 134 148 { \ ··· 273 259 .infos = &_##name##_infos[0], \ 274 260 .head_id = ATOMIC_INIT(DESC0_ID(descbits)), \ 275 261 .tail_id = ATOMIC_INIT(DESC0_ID(descbits)), \ 276 - .last_finalized_id = ATOMIC_INIT(DESC0_ID(descbits)), \ 262 + .last_finalized_seq = ATOMIC_INIT(0), \ 277 263 }, \ 278 264 .text_data_ring = { \ 279 265 .size_bits = (avgtextbits) + (descbits), \ ··· 392 378 bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq, 393 379 struct printk_info *info, unsigned int *line_count); 394 380 381 + u64 prb_first_seq(struct printk_ringbuffer *rb); 395 382 u64 prb_first_valid_seq(struct printk_ringbuffer *rb); 396 383 u64 prb_next_seq(struct printk_ringbuffer *rb); 384 + u64 prb_next_reserve_seq(struct printk_ringbuffer *rb); 385 + 386 + #ifdef CONFIG_64BIT 387 + 388 + #define __u64seq_to_ulseq(u64seq) (u64seq) 389 + #define __ulseq_to_u64seq(rb, ulseq) (ulseq) 390 + 391 + #else /* CONFIG_64BIT */ 392 + 393 + #define __u64seq_to_ulseq(u64seq) ((u32)u64seq) 394 + 395 + static inline u64 __ulseq_to_u64seq(struct printk_ringbuffer *rb, u32 ulseq) 396 + { 397 + u64 rb_first_seq = prb_first_seq(rb); 398 + u64 seq; 399 + 400 + /* 401 + * The provided sequence is only the lower 32 bits of the ringbuffer 402 + * sequence. It needs to be expanded to 64bit. Get the first sequence 403 + * number from the ringbuffer and fold it. 404 + * 405 + * Having a 32bit representation in the console is sufficient. 406 + * If a console ever gets more than 2^31 records behind 407 + * the ringbuffer then this is the least of the problems. 408 + * 409 + * Also the access to the ring buffer is always safe. 410 + */ 411 + seq = rb_first_seq - (s32)((u32)rb_first_seq - ulseq); 412 + 413 + return seq; 414 + } 415 + 416 + #endif /* CONFIG_64BIT */ 397 417 398 418 #endif /* _KERNEL_PRINTK_RINGBUFFER_H */

+13 -3

lib/dump_stack.c

··· 96 96 */ 97 97 asmlinkage __visible void dump_stack_lvl(const char *log_lvl) 98 98 { 99 + bool in_panic = this_cpu_in_panic(); 99 100 unsigned long flags; 100 101 101 102 /* 102 103 * Permit this cpu to perform nested stack dumps while serialising 103 - * against other CPUs 104 + * against other CPUs, unless this CPU is in panic. 105 + * 106 + * When in panic, non-panic CPUs are not permitted to store new 107 + * printk messages so there is no need to synchronize the output. 108 + * This avoids potential deadlock in panic() if another CPU is 109 + * holding and unable to release the printk_cpu_sync. 104 110 */ 105 - printk_cpu_sync_get_irqsave(flags); 111 + if (!in_panic) 112 + printk_cpu_sync_get_irqsave(flags); 113 + 106 114 __dump_stack(log_lvl); 107 - printk_cpu_sync_put_irqrestore(flags); 115 + 116 + if (!in_panic) 117 + printk_cpu_sync_put_irqrestore(flags); 108 118 } 109 119 EXPORT_SYMBOL(dump_stack_lvl); 110 120