Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

printk: queue wake_up_klogd irq_work only if per-CPU areas are ready

printk_deferred(), similarly to printk_safe/printk_nmi, does not
immediately attempt to print a new message on the consoles, avoiding
calls into non-reentrant kernel paths, e.g. scheduler or timekeeping,
which potentially can deadlock the system.

Those printk() flavors, instead, rely on per-CPU flush irq_work to print
messages from safer contexts. For same reasons (recursive scheduler or
timekeeping calls) printk() uses per-CPU irq_work in order to wake up
user space syslog/kmsg readers.

However, only printk_safe/printk_nmi do make sure that per-CPU areas
have been initialised and that it's safe to modify per-CPU irq_work.
This means that, for instance, should printk_deferred() be invoked "too
early", that is before per-CPU areas are initialised, printk_deferred()
will perform illegal per-CPU access.

Lech Perczak [0] reports that after commit 1b710b1b10ef ("char/random:
silence a lockdep splat with printk()") user-space syslog/kmsg readers
are not able to read new kernel messages.

The reason is printk_deferred() being called too early (as was pointed
out by Petr and John).

Fix printk_deferred() and do not queue per-CPU irq_work before per-CPU
areas are initialized.

Link: https://lore.kernel.org/lkml/aa0732c6-5c4e-8a8b-a1c1-75ebe3dca05b@camlintechnologies.com/
Reported-by: Lech Perczak <l.perczak@camlintechnologies.com>
Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Tested-by: Jann Horn <jannh@google.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Theodore Ts'o <tytso@mit.edu>
Cc: John Ogness <john.ogness@linutronix.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Sergey Senozhatsky and committed by
Linus Torvalds
ab6f762f 87ad46e6

+40 -16
-5
include/linux/printk.h
··· 202 202 void dump_stack_print_info(const char *log_lvl); 203 203 void show_regs_print_info(const char *log_lvl); 204 204 extern asmlinkage void dump_stack(void) __cold; 205 - extern void printk_safe_init(void); 206 205 extern void printk_safe_flush(void); 207 206 extern void printk_safe_flush_on_panic(void); 208 207 #else ··· 265 266 } 266 267 267 268 static inline void dump_stack(void) 268 - { 269 - } 270 - 271 - static inline void printk_safe_init(void) 272 269 { 273 270 } 274 271
-1
init/main.c
··· 913 913 boot_init_stack_canary(); 914 914 915 915 time_init(); 916 - printk_safe_init(); 917 916 perf_event_init(); 918 917 profile_init(); 919 918 call_function_init();
+5
kernel/printk/internal.h
··· 23 23 void __printk_safe_enter(void); 24 24 void __printk_safe_exit(void); 25 25 26 + void printk_safe_init(void); 27 + bool printk_percpu_data_ready(void); 28 + 26 29 #define printk_safe_enter_irqsave(flags) \ 27 30 do { \ 28 31 local_irq_save(flags); \ ··· 67 64 #define printk_safe_enter_irq() local_irq_disable() 68 65 #define printk_safe_exit_irq() local_irq_enable() 69 66 67 + static inline void printk_safe_init(void) { } 68 + static inline bool printk_percpu_data_ready(void) { return false; } 70 69 #endif /* CONFIG_PRINTK */
+34
kernel/printk/printk.c
··· 460 460 static char *log_buf = __log_buf; 461 461 static u32 log_buf_len = __LOG_BUF_LEN; 462 462 463 + /* 464 + * We cannot access per-CPU data (e.g. per-CPU flush irq_work) before 465 + * per_cpu_areas are initialised. This variable is set to true when 466 + * it's safe to access per-CPU data. 467 + */ 468 + static bool __printk_percpu_data_ready __read_mostly; 469 + 470 + bool printk_percpu_data_ready(void) 471 + { 472 + return __printk_percpu_data_ready; 473 + } 474 + 463 475 /* Return log buffer address */ 464 476 char *log_buf_addr_get(void) 465 477 { ··· 1158 1146 static inline void log_buf_add_cpu(void) {} 1159 1147 #endif /* CONFIG_SMP */ 1160 1148 1149 + static void __init set_percpu_data_ready(void) 1150 + { 1151 + printk_safe_init(); 1152 + /* Make sure we set this flag only after printk_safe() init is done */ 1153 + barrier(); 1154 + __printk_percpu_data_ready = true; 1155 + } 1156 + 1161 1157 void __init setup_log_buf(int early) 1162 1158 { 1163 1159 unsigned long flags; 1164 1160 char *new_log_buf; 1165 1161 unsigned int free; 1162 + 1163 + /* 1164 + * Some archs call setup_log_buf() multiple times - first is very 1165 + * early, e.g. from setup_arch(), and second - when percpu_areas 1166 + * are initialised. 1167 + */ 1168 + if (!early) 1169 + set_percpu_data_ready(); 1166 1170 1167 1171 if (log_buf != __log_buf) 1168 1172 return; ··· 3003 2975 3004 2976 void wake_up_klogd(void) 3005 2977 { 2978 + if (!printk_percpu_data_ready()) 2979 + return; 2980 + 3006 2981 preempt_disable(); 3007 2982 if (waitqueue_active(&log_wait)) { 3008 2983 this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP); ··· 3016 2985 3017 2986 void defer_console_output(void) 3018 2987 { 2988 + if (!printk_percpu_data_ready()) 2989 + return; 2990 + 3019 2991 preempt_disable(); 3020 2992 __this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT); 3021 2993 irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
+1 -10
kernel/printk/printk_safe.c
··· 27 27 * There are situations when we want to make sure that all buffers 28 28 * were handled or when IRQs are blocked. 29 29 */ 30 - static int printk_safe_irq_ready __read_mostly; 31 30 32 31 #define SAFE_LOG_BUF_LEN ((1 << CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT) - \ 33 32 sizeof(atomic_t) - \ ··· 50 51 /* Get flushed in a more safe context. */ 51 52 static void queue_flush_work(struct printk_safe_seq_buf *s) 52 53 { 53 - if (printk_safe_irq_ready) 54 + if (printk_percpu_data_ready()) 54 55 irq_work_queue(&s->work); 55 56 } 56 57 ··· 400 401 init_irq_work(&s->work, __printk_safe_flush); 401 402 #endif 402 403 } 403 - 404 - /* 405 - * In the highly unlikely event that a NMI were to trigger at 406 - * this moment. Make sure IRQ work is set up before this 407 - * variable is set. 408 - */ 409 - barrier(); 410 - printk_safe_irq_ready = 1; 411 404 412 405 /* Flush pending messages that did not have scheduled IRQ works. */ 413 406 printk_safe_flush();