Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'printk-for-5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/printk/linux

Pull printk updates from Petr Mladek:
"The big new thing is the fully lockless ringbuffer implementation,
including the support for continuous lines. It will allow to store and
read messages in any situation wihtout the risk of deadlocks and
without the need of temporary per-CPU buffers.

The access is still serialized by logbuf_lock. It synchronizes few
more operations, for example, temporary buffer for formatting the
message, syslog and kmsg_dump operations. The lock removal is being
discussed and should be ready for the next release.

The continuous lines are handled exactly the same way as before to
avoid regressions in user space. It means that they are appended to
the last message when the caller is the same. Only the last message
can be extended.

The data ring includes plain text of the messages. Except for an
integer at the beginning of each message that points back to the
descriptor ring with other metadata.

The dictionary has to stay. journalctl uses it to filter the log. It
allows to show messages related to a given device. The dictionary
values are stored in the descriptor ring with the other metadata.

This is the first part of the printk rework as discussed at Plumbers
2019, see https://lore.kernel.org/r/87k1acz5rx.fsf@linutronix.de. The
next big step will be handling consoles by kthreads during the normal
system operation. It will require special handling of situations when
the kthreads could not get scheduled, for example, early boot,
suspend, panic.

Other changes:

- Add John Ogness as a reviewer for printk subsystem. He is author of
the rework and is familiar with the code and history.

- Fix locking in serial8250_do_startup() to prevent lockdep report.

- Few code cleanups"

* tag 'printk-for-5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/printk/linux: (27 commits)
printk: Use fallthrough pseudo-keyword
printk: reduce setup_text_buf size to LOG_LINE_MAX
printk: avoid and/or handle record truncation
printk: remove dict ring
printk: move dictionary keys to dev_printk_info
printk: move printk_info into separate array
printk: reimplement log_cont using record extension
printk: ringbuffer: add finalization/extension support
printk: ringbuffer: change representation of states
printk: ringbuffer: clear initial reserved fields
printk: ringbuffer: add BLK_DATALESS() macro
printk: ringbuffer: relocate get_data()
printk: ringbuffer: avoid memcpy() on state_var
printk: ringbuffer: fix setting state in desc_read()
kernel.h: Move oops_in_progress to printk.h
scripts/gdb: update for lockless printk ringbuffer
scripts/gdb: add utils.read_ulong()
docs: vmcoreinfo: add lockless printk ringbuffer vmcoreinfo
printk: reduce LOG_BUF_SHIFT range for H8300
printk: ringbuffer: support dataless records
...

+3416 -737
+111 -52
Documentation/admin-guide/kdump/gdbmacros.txt
··· 170 170 address the kernel panicked. 171 171 end 172 172 173 - define dump_log_idx 174 - set $idx = $arg0 175 - if ($argc > 1) 176 - set $prev_flags = $arg1 173 + define dump_record 174 + set var $desc = $arg0 175 + set var $info = $arg1 176 + if ($argc > 2) 177 + set var $prev_flags = $arg2 177 178 else 178 - set $prev_flags = 0 179 - end 180 - set $msg = ((struct printk_log *) (log_buf + $idx)) 181 - set $prefix = 1 182 - set $newline = 1 183 - set $log = log_buf + $idx + sizeof(*$msg) 184 - 185 - # prev & LOG_CONT && !(msg->flags & LOG_PREIX) 186 - if (($prev_flags & 8) && !($msg->flags & 4)) 187 - set $prefix = 0 179 + set var $prev_flags = 0 188 180 end 189 181 190 - # msg->flags & LOG_CONT 191 - if ($msg->flags & 8) 182 + set var $prefix = 1 183 + set var $newline = 1 184 + 185 + set var $begin = $desc->text_blk_lpos.begin % (1U << prb->text_data_ring.size_bits) 186 + set var $next = $desc->text_blk_lpos.next % (1U << prb->text_data_ring.size_bits) 187 + 188 + # handle data-less record 189 + if ($begin & 1) 190 + set var $text_len = 0 191 + set var $log = "" 192 + else 193 + # handle wrapping data block 194 + if ($begin > $next) 195 + set var $begin = 0 196 + end 197 + 198 + # skip over descriptor id 199 + set var $begin = $begin + sizeof(long) 200 + 201 + # handle truncated message 202 + if ($next - $begin < $info->text_len) 203 + set var $text_len = $next - $begin 204 + else 205 + set var $text_len = $info->text_len 206 + end 207 + 208 + set var $log = &prb->text_data_ring.data[$begin] 209 + end 210 + 211 + # prev & LOG_CONT && !(info->flags & LOG_PREIX) 212 + if (($prev_flags & 8) && !($info->flags & 4)) 213 + set var $prefix = 0 214 + end 215 + 216 + # info->flags & LOG_CONT 217 + if ($info->flags & 8) 192 218 # (prev & LOG_CONT && !(prev & LOG_NEWLINE)) 193 219 if (($prev_flags & 8) && !($prev_flags & 2)) 194 - set $prefix = 0 220 + set var $prefix = 0 195 221 end 196 - # (!(msg->flags & LOG_NEWLINE)) 197 - if (!($msg->flags & 2)) 198 - set $newline = 0 222 + # (!(info->flags & LOG_NEWLINE)) 223 + if (!($info->flags & 2)) 224 + set var $newline = 0 199 225 end 200 226 end 201 227 202 228 if ($prefix) 203 - printf "[%5lu.%06lu] ", $msg->ts_nsec / 1000000000, $msg->ts_nsec % 1000000000 229 + printf "[%5lu.%06lu] ", $info->ts_nsec / 1000000000, $info->ts_nsec % 1000000000 204 230 end 205 - if ($msg->text_len != 0) 206 - eval "printf \"%%%d.%ds\", $log", $msg->text_len, $msg->text_len 231 + if ($text_len) 232 + eval "printf \"%%%d.%ds\", $log", $text_len, $text_len 207 233 end 208 234 if ($newline) 209 235 printf "\n" 210 236 end 211 - if ($msg->dict_len > 0) 212 - set $dict = $log + $msg->text_len 213 - set $idx = 0 214 - set $line = 1 215 - while ($idx < $msg->dict_len) 216 - if ($line) 217 - printf " " 218 - set $line = 0 219 - end 220 - set $c = $dict[$idx] 237 + 238 + # handle dictionary data 239 + 240 + set var $dict = &$info->dev_info.subsystem[0] 241 + set var $dict_len = sizeof($info->dev_info.subsystem) 242 + if ($dict[0] != '\0') 243 + printf " SUBSYSTEM=" 244 + set var $idx = 0 245 + while ($idx < $dict_len) 246 + set var $c = $dict[$idx] 221 247 if ($c == '\0') 222 - printf "\n" 223 - set $line = 1 248 + loop_break 224 249 else 225 250 if ($c < ' ' || $c >= 127 || $c == '\\') 226 251 printf "\\x%02x", $c ··· 253 228 printf "%c", $c 254 229 end 255 230 end 256 - set $idx = $idx + 1 231 + set var $idx = $idx + 1 232 + end 233 + printf "\n" 234 + end 235 + 236 + set var $dict = &$info->dev_info.device[0] 237 + set var $dict_len = sizeof($info->dev_info.device) 238 + if ($dict[0] != '\0') 239 + printf " DEVICE=" 240 + set var $idx = 0 241 + while ($idx < $dict_len) 242 + set var $c = $dict[$idx] 243 + if ($c == '\0') 244 + loop_break 245 + else 246 + if ($c < ' ' || $c >= 127 || $c == '\\') 247 + printf "\\x%02x", $c 248 + else 249 + printf "%c", $c 250 + end 251 + end 252 + set var $idx = $idx + 1 257 253 end 258 254 printf "\n" 259 255 end 260 256 end 261 - document dump_log_idx 262 - Dump a single log given its index in the log buffer. The first 263 - parameter is the index into log_buf, the second is optional and 264 - specified the previous log buffer's flags, used for properly 265 - formatting continued lines. 257 + document dump_record 258 + Dump a single record. The first parameter is the descriptor, 259 + the second parameter is the info, the third parameter is 260 + optional and specifies the previous record's flags, used for 261 + properly formatting continued lines. 266 262 end 267 263 268 264 define dmesg 269 - set $i = log_first_idx 270 - set $end_idx = log_first_idx 271 - set $prev_flags = 0 265 + # definitions from kernel/printk/printk_ringbuffer.h 266 + set var $desc_committed = 1 267 + set var $desc_finalized = 2 268 + set var $desc_sv_bits = sizeof(long) * 8 269 + set var $desc_flags_shift = $desc_sv_bits - 2 270 + set var $desc_flags_mask = 3 << $desc_flags_shift 271 + set var $id_mask = ~$desc_flags_mask 272 + 273 + set var $desc_count = 1U << prb->desc_ring.count_bits 274 + set var $prev_flags = 0 275 + 276 + set var $id = prb->desc_ring.tail_id.counter 277 + set var $end_id = prb->desc_ring.head_id.counter 272 278 273 279 while (1) 274 - set $msg = ((struct printk_log *) (log_buf + $i)) 275 - if ($msg->len == 0) 276 - set $i = 0 277 - else 278 - dump_log_idx $i $prev_flags 279 - set $i = $i + $msg->len 280 - set $prev_flags = $msg->flags 280 + set var $desc = &prb->desc_ring.descs[$id % $desc_count] 281 + set var $info = &prb->desc_ring.infos[$id % $desc_count] 282 + 283 + # skip non-committed record 284 + set var $state = 3 & ($desc->state_var.counter >> $desc_flags_shift) 285 + if ($state == $desc_committed || $state == $desc_finalized) 286 + dump_record $desc $info $prev_flags 287 + set var $prev_flags = $info->flags 281 288 end 282 - if ($i == $end_idx) 289 + 290 + set var $id = ($id + 1) & $id_mask 291 + if ($id == $end_id) 283 292 loop_break 284 293 end 285 294 end
+105 -32
Documentation/admin-guide/kdump/vmcoreinfo.rst
··· 189 189 Free areas descriptor. User-space tools use this value to iterate the 190 190 free_area ranges. MAX_ORDER is used by the zone buddy allocator. 191 191 192 - log_first_idx 193 - ------------- 192 + prb 193 + --- 194 194 195 - Index of the first record stored in the buffer log_buf. Used by 196 - user-space tools to read the strings in the log_buf. 195 + A pointer to the printk ringbuffer (struct printk_ringbuffer). This 196 + may be pointing to the static boot ringbuffer or the dynamically 197 + allocated ringbuffer, depending on when the the core dump occurred. 198 + Used by user-space tools to read the active kernel log buffer. 197 199 198 - log_buf 199 - ------- 200 + printk_rb_static 201 + ---------------- 200 202 201 - Console output is written to the ring buffer log_buf at index 202 - log_first_idx. Used to get the kernel log. 203 + A pointer to the static boot printk ringbuffer. If @prb has a 204 + different value, this is useful for viewing the initial boot messages, 205 + which may have been overwritten in the dynamically allocated 206 + ringbuffer. 203 207 204 - log_buf_len 205 - ----------- 206 - 207 - log_buf's length. 208 - 209 - clear_idx 208 + clear_seq 210 209 --------- 211 210 212 - The index that the next printk() record to read after the last clear 213 - command. It indicates the first record after the last SYSLOG_ACTION 214 - _CLEAR, like issued by 'dmesg -c'. Used by user-space tools to dump 215 - the dmesg log. 211 + The sequence number of the printk() record after the last clear 212 + command. It indicates the first record after the last 213 + SYSLOG_ACTION_CLEAR, like issued by 'dmesg -c'. Used by user-space 214 + tools to dump a subset of the dmesg log. 216 215 217 - log_next_idx 218 - ------------ 216 + printk_ringbuffer 217 + ----------------- 219 218 220 - The index of the next record to store in the buffer log_buf. Used to 221 - compute the index of the current buffer position. 219 + The size of a printk_ringbuffer structure. This structure contains all 220 + information required for accessing the various components of the 221 + kernel log buffer. 222 222 223 - printk_log 224 - ---------- 223 + (printk_ringbuffer, desc_ring|text_data_ring|dict_data_ring|fail) 224 + ----------------------------------------------------------------- 225 225 226 - The size of a structure printk_log. Used to compute the size of 227 - messages, and extract dmesg log. It encapsulates header information for 228 - log_buf, such as timestamp, syslog level, etc. 226 + Offsets for the various components of the printk ringbuffer. Used by 227 + user-space tools to view the kernel log buffer without requiring the 228 + declaration of the structure. 229 229 230 - (printk_log, ts_nsec|len|text_len|dict_len) 231 - ------------------------------------------- 230 + prb_desc_ring 231 + ------------- 232 232 233 - It represents field offsets in struct printk_log. User space tools 234 - parse it and check whether the values of printk_log's members have been 235 - changed. 233 + The size of the prb_desc_ring structure. This structure contains 234 + information about the set of record descriptors. 235 + 236 + (prb_desc_ring, count_bits|descs|head_id|tail_id) 237 + ------------------------------------------------- 238 + 239 + Offsets for the fields describing the set of record descriptors. Used 240 + by user-space tools to be able to traverse the descriptors without 241 + requiring the declaration of the structure. 242 + 243 + prb_desc 244 + -------- 245 + 246 + The size of the prb_desc structure. This structure contains 247 + information about a single record descriptor. 248 + 249 + (prb_desc, info|state_var|text_blk_lpos|dict_blk_lpos) 250 + ------------------------------------------------------ 251 + 252 + Offsets for the fields describing a record descriptors. Used by 253 + user-space tools to be able to read descriptors without requiring 254 + the declaration of the structure. 255 + 256 + prb_data_blk_lpos 257 + ----------------- 258 + 259 + The size of the prb_data_blk_lpos structure. This structure contains 260 + information about where the text or dictionary data (data block) is 261 + located within the respective data ring. 262 + 263 + (prb_data_blk_lpos, begin|next) 264 + ------------------------------- 265 + 266 + Offsets for the fields describing the location of a data block. Used 267 + by user-space tools to be able to locate data blocks without 268 + requiring the declaration of the structure. 269 + 270 + printk_info 271 + ----------- 272 + 273 + The size of the printk_info structure. This structure contains all 274 + the meta-data for a record. 275 + 276 + (printk_info, seq|ts_nsec|text_len|dict_len|caller_id) 277 + ------------------------------------------------------ 278 + 279 + Offsets for the fields providing the meta-data for a record. Used by 280 + user-space tools to be able to read the information without requiring 281 + the declaration of the structure. 282 + 283 + prb_data_ring 284 + ------------- 285 + 286 + The size of the prb_data_ring structure. This structure contains 287 + information about a set of data blocks. 288 + 289 + (prb_data_ring, size_bits|data|head_lpos|tail_lpos) 290 + --------------------------------------------------- 291 + 292 + Offsets for the fields describing a set of data blocks. Used by 293 + user-space tools to be able to access the data blocks without 294 + requiring the declaration of the structure. 295 + 296 + atomic_long_t 297 + ------------- 298 + 299 + The size of the atomic_long_t structure. Used by user-space tools to 300 + be able to copy the full structure, regardless of its 301 + architecture-specific implementation. 302 + 303 + (atomic_long_t, counter) 304 + ------------------------ 305 + 306 + Offset for the long value of an atomic_long_t variable. Used by 307 + user-space tools to access the long value without requiring the 308 + architecture-specific declaration. 236 309 237 310 (free_area.free_list, MIGRATE_TYPES) 238 311 ------------------------------------
+1
MAINTAINERS
··· 13970 13970 M: Petr Mladek <pmladek@suse.com> 13971 13971 M: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> 13972 13972 R: Steven Rostedt <rostedt@goodmis.org> 13973 + R: John Ogness <john.ogness@linutronix.de> 13973 13974 S: Maintained 13974 13975 F: include/linux/printk.h 13975 13976 F: kernel/printk/
+16 -30
drivers/base/core.c
··· 4061 4061 */ 4062 4062 4063 4063 #ifdef CONFIG_PRINTK 4064 - static int 4065 - create_syslog_header(const struct device *dev, char *hdr, size_t hdrlen) 4064 + static void 4065 + set_dev_info(const struct device *dev, struct dev_printk_info *dev_info) 4066 4066 { 4067 4067 const char *subsys; 4068 - size_t pos = 0; 4068 + 4069 + memset(dev_info, 0, sizeof(*dev_info)); 4069 4070 4070 4071 if (dev->class) 4071 4072 subsys = dev->class->name; 4072 4073 else if (dev->bus) 4073 4074 subsys = dev->bus->name; 4074 4075 else 4075 - return 0; 4076 + return; 4076 4077 4077 - pos += snprintf(hdr + pos, hdrlen - pos, "SUBSYSTEM=%s", subsys); 4078 - if (pos >= hdrlen) 4079 - goto overflow; 4078 + strscpy(dev_info->subsystem, subsys, sizeof(dev_info->subsystem)); 4080 4079 4081 4080 /* 4082 4081 * Add device identifier DEVICE=: ··· 4091 4092 c = 'b'; 4092 4093 else 4093 4094 c = 'c'; 4094 - pos++; 4095 - pos += snprintf(hdr + pos, hdrlen - pos, 4096 - "DEVICE=%c%u:%u", 4097 - c, MAJOR(dev->devt), MINOR(dev->devt)); 4095 + 4096 + snprintf(dev_info->device, sizeof(dev_info->device), 4097 + "%c%u:%u", c, MAJOR(dev->devt), MINOR(dev->devt)); 4098 4098 } else if (strcmp(subsys, "net") == 0) { 4099 4099 struct net_device *net = to_net_dev(dev); 4100 4100 4101 - pos++; 4102 - pos += snprintf(hdr + pos, hdrlen - pos, 4103 - "DEVICE=n%u", net->ifindex); 4101 + snprintf(dev_info->device, sizeof(dev_info->device), 4102 + "n%u", net->ifindex); 4104 4103 } else { 4105 - pos++; 4106 - pos += snprintf(hdr + pos, hdrlen - pos, 4107 - "DEVICE=+%s:%s", subsys, dev_name(dev)); 4104 + snprintf(dev_info->device, sizeof(dev_info->device), 4105 + "+%s:%s", subsys, dev_name(dev)); 4108 4106 } 4109 - 4110 - if (pos >= hdrlen) 4111 - goto overflow; 4112 - 4113 - return pos; 4114 - 4115 - overflow: 4116 - dev_WARN(dev, "device/subsystem name too long"); 4117 - return 0; 4118 4107 } 4119 4108 4120 4109 int dev_vprintk_emit(int level, const struct device *dev, 4121 4110 const char *fmt, va_list args) 4122 4111 { 4123 - char hdr[128]; 4124 - size_t hdrlen; 4112 + struct dev_printk_info dev_info; 4125 4113 4126 - hdrlen = create_syslog_header(dev, hdr, sizeof(hdr)); 4114 + set_dev_info(dev, &dev_info); 4127 4115 4128 - return vprintk_emit(0, level, hdrlen ? hdr : NULL, hdrlen, fmt, args); 4116 + return vprintk_emit(0, level, &dev_info, fmt, args); 4129 4117 } 4130 4118 EXPORT_SYMBOL(dev_vprintk_emit); 4131 4119
+3
include/linux/crash_core.h
··· 55 55 #define VMCOREINFO_OFFSET(name, field) \ 56 56 vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \ 57 57 (unsigned long)offsetof(struct name, field)) 58 + #define VMCOREINFO_TYPE_OFFSET(name, field) \ 59 + vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \ 60 + (unsigned long)offsetof(name, field)) 58 61 #define VMCOREINFO_LENGTH(name, value) \ 59 62 vmcoreinfo_append_str("LENGTH(%s)=%lu\n", #name, (unsigned long)value) 60 63 #define VMCOREINFO_NUMBER(name) \
+1 -1
include/linux/debug_locks.h
··· 2 2 #ifndef __LINUX_DEBUG_LOCKING_H 3 3 #define __LINUX_DEBUG_LOCKING_H 4 4 5 - #include <linux/kernel.h> 6 5 #include <linux/atomic.h> 7 6 #include <linux/bug.h> 7 + #include <linux/printk.h> 8 8 9 9 struct task_struct; 10 10
+8
include/linux/dev_printk.h
··· 21 21 22 22 struct device; 23 23 24 + #define PRINTK_INFO_SUBSYSTEM_LEN 16 25 + #define PRINTK_INFO_DEVICE_LEN 48 26 + 27 + struct dev_printk_info { 28 + char subsystem[PRINTK_INFO_SUBSYSTEM_LEN]; 29 + char device[PRINTK_INFO_DEVICE_LEN]; 30 + }; 31 + 24 32 #ifdef CONFIG_PRINTK 25 33 26 34 __printf(3, 0) __cold
-1
include/linux/kernel.h
··· 526 526 #endif /* CONFIG_SMP */ 527 527 528 528 extern void bust_spinlocks(int yes); 529 - extern int oops_in_progress; /* If set, an oops, panic(), BUG() or die() is in progress */ 530 529 extern int panic_timeout; 531 530 extern unsigned long panic_print; 532 531 extern int panic_on_oops;
+6 -2
include/linux/printk.h
··· 12 12 extern const char linux_banner[]; 13 13 extern const char linux_proc_banner[]; 14 14 15 + extern int oops_in_progress; /* If set, an oops, panic(), BUG() or die() is in progress */ 16 + 15 17 #define PRINTK_MAX_SINGLE_HEADER_LEN 2 16 18 17 19 static inline int printk_get_level(const char *buffer) ··· 161 159 static inline void printk_nmi_direct_exit(void) { } 162 160 #endif /* PRINTK_NMI */ 163 161 162 + struct dev_printk_info; 163 + 164 164 #ifdef CONFIG_PRINTK 165 - asmlinkage __printf(5, 0) 165 + asmlinkage __printf(4, 0) 166 166 int vprintk_emit(int facility, int level, 167 - const char *dict, size_t dictlen, 167 + const struct dev_printk_info *dev_info, 168 168 const char *fmt, va_list args); 169 169 170 170 asmlinkage __printf(1, 0)
+2 -1
init/Kconfig
··· 682 682 683 683 config LOG_BUF_SHIFT 684 684 int "Kernel log buffer size (16 => 64KB, 17 => 128KB)" 685 - range 12 25 685 + range 12 25 if !H8300 686 + range 12 19 if H8300 686 687 default 17 687 688 depends on PRINTK 688 689 help
+1
kernel/printk/Makefile
··· 2 2 obj-y = printk.o 3 3 obj-$(CONFIG_PRINTK) += printk_safe.o 4 4 obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o 5 + obj-$(CONFIG_PRINTK) += printk_ringbuffer.o
+2 -2
kernel/printk/internal.h
··· 14 14 15 15 extern raw_spinlock_t logbuf_lock; 16 16 17 - __printf(5, 0) 17 + __printf(4, 0) 18 18 int vprintk_store(int facility, int level, 19 - const char *dict, size_t dictlen, 19 + const struct dev_printk_info *dev_info, 20 20 const char *fmt, va_list args); 21 21 22 22 __printf(1, 0) int vprintk_default(const char *fmt, va_list args);
+579 -580
kernel/printk/printk.c
··· 55 55 #define CREATE_TRACE_POINTS 56 56 #include <trace/events/printk.h> 57 57 58 + #include "printk_ringbuffer.h" 58 59 #include "console_cmdline.h" 59 60 #include "braille.h" 60 61 #include "internal.h" ··· 295 294 static int console_msg_format = MSG_FORMAT_DEFAULT; 296 295 297 296 /* 298 - * The printk log buffer consists of a chain of concatenated variable 299 - * length records. Every record starts with a record header, containing 300 - * the overall length of the record. 297 + * The printk log buffer consists of a sequenced collection of records, each 298 + * containing variable length message text. Every record also contains its 299 + * own meta-data (@info). 301 300 * 302 - * The heads to the first and last entry in the buffer, as well as the 303 - * sequence numbers of these entries are maintained when messages are 304 - * stored. 301 + * Every record meta-data carries the timestamp in microseconds, as well as 302 + * the standard userspace syslog level and syslog facility. The usual kernel 303 + * messages use LOG_KERN; userspace-injected messages always carry a matching 304 + * syslog facility, by default LOG_USER. The origin of every message can be 305 + * reliably determined that way. 305 306 * 306 - * If the heads indicate available messages, the length in the header 307 - * tells the start next message. A length == 0 for the next message 308 - * indicates a wrap-around to the beginning of the buffer. 307 + * The human readable log message of a record is available in @text, the 308 + * length of the message text in @text_len. The stored message is not 309 + * terminated. 309 310 * 310 - * Every record carries the monotonic timestamp in microseconds, as well as 311 - * the standard userspace syslog level and syslog facility. The usual 312 - * kernel messages use LOG_KERN; userspace-injected messages always carry 313 - * a matching syslog facility, by default LOG_USER. The origin of every 314 - * message can be reliably determined that way. 315 - * 316 - * The human readable log message directly follows the message header. The 317 - * length of the message text is stored in the header, the stored message 318 - * is not terminated. 319 - * 320 - * Optionally, a message can carry a dictionary of properties (key/value pairs), 321 - * to provide userspace with a machine-readable message context. 311 + * Optionally, a record can carry a dictionary of properties (key/value 312 + * pairs), to provide userspace with a machine-readable message context. 322 313 * 323 314 * Examples for well-defined, commonly used property names are: 324 315 * DEVICE=b12:8 device identifier ··· 320 327 * +sound:card0 subsystem:devname 321 328 * SUBSYSTEM=pci driver-core subsystem name 322 329 * 323 - * Valid characters in property names are [a-zA-Z0-9.-_]. The plain text value 324 - * follows directly after a '=' character. Every property is terminated by 325 - * a '\0' character. The last property is not terminated. 330 + * Valid characters in property names are [a-zA-Z0-9.-_]. Property names 331 + * and values are terminated by a '\0' character. 326 332 * 327 - * Example of a message structure: 328 - * 0000 ff 8f 00 00 00 00 00 00 monotonic time in nsec 329 - * 0008 34 00 record is 52 bytes long 330 - * 000a 0b 00 text is 11 bytes long 331 - * 000c 1f 00 dictionary is 23 bytes long 332 - * 000e 03 00 LOG_KERN (facility) LOG_ERR (level) 333 - * 0010 69 74 27 73 20 61 20 6c "it's a l" 334 - * 69 6e 65 "ine" 335 - * 001b 44 45 56 49 43 "DEVIC" 336 - * 45 3d 62 38 3a 32 00 44 "E=b8:2\0D" 337 - * 52 49 56 45 52 3d 62 75 "RIVER=bu" 338 - * 67 "g" 339 - * 0032 00 00 00 padding to next message header 333 + * Example of record values: 334 + * record.text_buf = "it's a line" (unterminated) 335 + * record.info.seq = 56 336 + * record.info.ts_nsec = 36863 337 + * record.info.text_len = 11 338 + * record.info.facility = 0 (LOG_KERN) 339 + * record.info.flags = 0 340 + * record.info.level = 3 (LOG_ERR) 341 + * record.info.caller_id = 299 (task 299) 342 + * record.info.dev_info.subsystem = "pci" (terminated) 343 + * record.info.dev_info.device = "+pci:0000:00:01.0" (terminated) 340 344 * 341 - * The 'struct printk_log' buffer header must never be directly exported to 345 + * The 'struct printk_info' buffer must never be directly exported to 342 346 * userspace, it is a kernel-private implementation detail that might 343 347 * need to be changed in the future, when the requirements change. 344 348 * ··· 354 364 LOG_NEWLINE = 2, /* text ended with a newline */ 355 365 LOG_CONT = 8, /* text is a fragment of a continuation line */ 356 366 }; 357 - 358 - struct printk_log { 359 - u64 ts_nsec; /* timestamp in nanoseconds */ 360 - u16 len; /* length of entire record */ 361 - u16 text_len; /* length of text buffer */ 362 - u16 dict_len; /* length of dictionary buffer */ 363 - u8 facility; /* syslog facility */ 364 - u8 flags:5; /* internal record flags */ 365 - u8 level:3; /* syslog level */ 366 - #ifdef CONFIG_PRINTK_CALLER 367 - u32 caller_id; /* thread id or processor id */ 368 - #endif 369 - } 370 - #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 371 - __packed __aligned(4) 372 - #endif 373 - ; 374 367 375 368 /* 376 369 * The logbuf_lock protects kmsg buffer, indices, counters. This can be taken ··· 394 421 DECLARE_WAIT_QUEUE_HEAD(log_wait); 395 422 /* the next printk record to read by syslog(READ) or /proc/kmsg */ 396 423 static u64 syslog_seq; 397 - static u32 syslog_idx; 398 424 static size_t syslog_partial; 399 425 static bool syslog_time; 400 426 401 - /* index and sequence number of the first record stored in the buffer */ 402 - static u64 log_first_seq; 403 - static u32 log_first_idx; 404 - 405 - /* index and sequence number of the next record to store in the buffer */ 406 - static u64 log_next_seq; 407 - static u32 log_next_idx; 408 - 409 427 /* the next printk record to write to the console */ 410 428 static u64 console_seq; 411 - static u32 console_idx; 412 429 static u64 exclusive_console_stop_seq; 430 + static unsigned long console_dropped; 413 431 414 432 /* the next printk record to read after the last 'clear' command */ 415 433 static u64 clear_seq; 416 - static u32 clear_idx; 417 434 418 435 #ifdef CONFIG_PRINTK_CALLER 419 436 #define PREFIX_MAX 48 ··· 416 453 #define LOG_FACILITY(v) ((v) >> 3 & 0xff) 417 454 418 455 /* record buffer */ 419 - #define LOG_ALIGN __alignof__(struct printk_log) 456 + #define LOG_ALIGN __alignof__(unsigned long) 420 457 #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) 421 458 #define LOG_BUF_LEN_MAX (u32)(1 << 31) 422 459 static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN); 423 460 static char *log_buf = __log_buf; 424 461 static u32 log_buf_len = __LOG_BUF_LEN; 462 + 463 + /* 464 + * Define the average message size. This only affects the number of 465 + * descriptors that will be available. Underestimating is better than 466 + * overestimating (too many available descriptors is better than not enough). 467 + */ 468 + #define PRB_AVGBITS 5 /* 32 character average length */ 469 + 470 + #if CONFIG_LOG_BUF_SHIFT <= PRB_AVGBITS 471 + #error CONFIG_LOG_BUF_SHIFT value too small. 472 + #endif 473 + _DEFINE_PRINTKRB(printk_rb_static, CONFIG_LOG_BUF_SHIFT - PRB_AVGBITS, 474 + PRB_AVGBITS, &__log_buf[0]); 475 + 476 + static struct printk_ringbuffer printk_rb_dynamic; 477 + 478 + static struct printk_ringbuffer *prb = &printk_rb_static; 425 479 426 480 /* 427 481 * We cannot access per-CPU data (e.g. per-CPU flush irq_work) before ··· 464 484 return log_buf_len; 465 485 } 466 486 467 - /* human readable text of the record */ 468 - static char *log_text(const struct printk_log *msg) 469 - { 470 - return (char *)msg + sizeof(struct printk_log); 471 - } 472 - 473 - /* optional key/value pair dictionary attached to the record */ 474 - static char *log_dict(const struct printk_log *msg) 475 - { 476 - return (char *)msg + sizeof(struct printk_log) + msg->text_len; 477 - } 478 - 479 - /* get record by index; idx must point to valid msg */ 480 - static struct printk_log *log_from_idx(u32 idx) 481 - { 482 - struct printk_log *msg = (struct printk_log *)(log_buf + idx); 483 - 484 - /* 485 - * A length == 0 record is the end of buffer marker. Wrap around and 486 - * read the message at the start of the buffer. 487 - */ 488 - if (!msg->len) 489 - return (struct printk_log *)log_buf; 490 - return msg; 491 - } 492 - 493 - /* get next record; idx must point to valid msg */ 494 - static u32 log_next(u32 idx) 495 - { 496 - struct printk_log *msg = (struct printk_log *)(log_buf + idx); 497 - 498 - /* length == 0 indicates the end of the buffer; wrap */ 499 - /* 500 - * A length == 0 record is the end of buffer marker. Wrap around and 501 - * read the message at the start of the buffer as *this* one, and 502 - * return the one after that. 503 - */ 504 - if (!msg->len) { 505 - msg = (struct printk_log *)log_buf; 506 - return msg->len; 507 - } 508 - return idx + msg->len; 509 - } 510 - 511 - /* 512 - * Check whether there is enough free space for the given message. 513 - * 514 - * The same values of first_idx and next_idx mean that the buffer 515 - * is either empty or full. 516 - * 517 - * If the buffer is empty, we must respect the position of the indexes. 518 - * They cannot be reset to the beginning of the buffer. 519 - */ 520 - static int logbuf_has_space(u32 msg_size, bool empty) 521 - { 522 - u32 free; 523 - 524 - if (log_next_idx > log_first_idx || empty) 525 - free = max(log_buf_len - log_next_idx, log_first_idx); 526 - else 527 - free = log_first_idx - log_next_idx; 528 - 529 - /* 530 - * We need space also for an empty header that signalizes wrapping 531 - * of the buffer. 532 - */ 533 - return free >= msg_size + sizeof(struct printk_log); 534 - } 535 - 536 - static int log_make_free_space(u32 msg_size) 537 - { 538 - while (log_first_seq < log_next_seq && 539 - !logbuf_has_space(msg_size, false)) { 540 - /* drop old messages until we have enough contiguous space */ 541 - log_first_idx = log_next(log_first_idx); 542 - log_first_seq++; 543 - } 544 - 545 - if (clear_seq < log_first_seq) { 546 - clear_seq = log_first_seq; 547 - clear_idx = log_first_idx; 548 - } 549 - 550 - /* sequence numbers are equal, so the log buffer is empty */ 551 - if (logbuf_has_space(msg_size, log_first_seq == log_next_seq)) 552 - return 0; 553 - 554 - return -ENOMEM; 555 - } 556 - 557 - /* compute the message size including the padding bytes */ 558 - static u32 msg_used_size(u16 text_len, u16 dict_len, u32 *pad_len) 559 - { 560 - u32 size; 561 - 562 - size = sizeof(struct printk_log) + text_len + dict_len; 563 - *pad_len = (-size) & (LOG_ALIGN - 1); 564 - size += *pad_len; 565 - 566 - return size; 567 - } 568 - 569 487 /* 570 488 * Define how much of the log buffer we could take at maximum. The value 571 489 * must be greater than two. Note that only half of the buffer is available ··· 472 594 #define MAX_LOG_TAKE_PART 4 473 595 static const char trunc_msg[] = "<truncated>"; 474 596 475 - static u32 truncate_msg(u16 *text_len, u16 *trunc_msg_len, 476 - u16 *dict_len, u32 *pad_len) 597 + static void truncate_msg(u16 *text_len, u16 *trunc_msg_len) 477 598 { 478 599 /* 479 600 * The message should not take the whole buffer. Otherwise, it might 480 601 * get removed too soon. 481 602 */ 482 603 u32 max_text_len = log_buf_len / MAX_LOG_TAKE_PART; 604 + 483 605 if (*text_len > max_text_len) 484 606 *text_len = max_text_len; 485 - /* enable the warning message */ 607 + 608 + /* enable the warning message (if there is room) */ 486 609 *trunc_msg_len = strlen(trunc_msg); 487 - /* disable the "dict" completely */ 488 - *dict_len = 0; 489 - /* compute the size again, count also the warning message */ 490 - return msg_used_size(*text_len + *trunc_msg_len, 0, pad_len); 610 + if (*text_len >= *trunc_msg_len) 611 + *text_len -= *trunc_msg_len; 612 + else 613 + *trunc_msg_len = 0; 491 614 } 492 615 493 616 /* insert record into the buffer, discard old ones, update heads */ 494 617 static int log_store(u32 caller_id, int facility, int level, 495 618 enum log_flags flags, u64 ts_nsec, 496 - const char *dict, u16 dict_len, 619 + const struct dev_printk_info *dev_info, 497 620 const char *text, u16 text_len) 498 621 { 499 - struct printk_log *msg; 500 - u32 size, pad_len; 622 + struct prb_reserved_entry e; 623 + struct printk_record r; 501 624 u16 trunc_msg_len = 0; 502 625 503 - /* number of '\0' padding bytes to next message */ 504 - size = msg_used_size(text_len, dict_len, &pad_len); 626 + prb_rec_init_wr(&r, text_len); 505 627 506 - if (log_make_free_space(size)) { 628 + if (!prb_reserve(&e, prb, &r)) { 507 629 /* truncate the message if it is too long for empty buffer */ 508 - size = truncate_msg(&text_len, &trunc_msg_len, 509 - &dict_len, &pad_len); 630 + truncate_msg(&text_len, &trunc_msg_len); 631 + prb_rec_init_wr(&r, text_len + trunc_msg_len); 510 632 /* survive when the log buffer is too small for trunc_msg */ 511 - if (log_make_free_space(size)) 633 + if (!prb_reserve(&e, prb, &r)) 512 634 return 0; 513 635 } 514 636 515 - if (log_next_idx + size + sizeof(struct printk_log) > log_buf_len) { 516 - /* 517 - * This message + an additional empty header does not fit 518 - * at the end of the buffer. Add an empty header with len == 0 519 - * to signify a wrap around. 520 - */ 521 - memset(log_buf + log_next_idx, 0, sizeof(struct printk_log)); 522 - log_next_idx = 0; 523 - } 524 - 525 637 /* fill message */ 526 - msg = (struct printk_log *)(log_buf + log_next_idx); 527 - memcpy(log_text(msg), text, text_len); 528 - msg->text_len = text_len; 529 - if (trunc_msg_len) { 530 - memcpy(log_text(msg) + text_len, trunc_msg, trunc_msg_len); 531 - msg->text_len += trunc_msg_len; 532 - } 533 - memcpy(log_dict(msg), dict, dict_len); 534 - msg->dict_len = dict_len; 535 - msg->facility = facility; 536 - msg->level = level & 7; 537 - msg->flags = flags & 0x1f; 638 + memcpy(&r.text_buf[0], text, text_len); 639 + if (trunc_msg_len) 640 + memcpy(&r.text_buf[text_len], trunc_msg, trunc_msg_len); 641 + r.info->text_len = text_len + trunc_msg_len; 642 + r.info->facility = facility; 643 + r.info->level = level & 7; 644 + r.info->flags = flags & 0x1f; 538 645 if (ts_nsec > 0) 539 - msg->ts_nsec = ts_nsec; 646 + r.info->ts_nsec = ts_nsec; 540 647 else 541 - msg->ts_nsec = local_clock(); 542 - #ifdef CONFIG_PRINTK_CALLER 543 - msg->caller_id = caller_id; 544 - #endif 545 - memset(log_dict(msg) + dict_len, 0, pad_len); 546 - msg->len = size; 648 + r.info->ts_nsec = local_clock(); 649 + r.info->caller_id = caller_id; 650 + if (dev_info) 651 + memcpy(&r.info->dev_info, dev_info, sizeof(r.info->dev_info)); 547 652 548 653 /* insert message */ 549 - log_next_idx += msg->len; 550 - log_next_seq++; 654 + if ((flags & LOG_CONT) || !(flags & LOG_NEWLINE)) 655 + prb_commit(&e); 656 + else 657 + prb_final_commit(&e); 551 658 552 - return msg->text_len; 659 + return (text_len + trunc_msg_len); 553 660 } 554 661 555 662 int dmesg_restrict = IS_ENABLED(CONFIG_SECURITY_DMESG_RESTRICT); ··· 586 723 *(*pp)++ = c; 587 724 } 588 725 589 - static ssize_t msg_print_ext_header(char *buf, size_t size, 590 - struct printk_log *msg, u64 seq) 726 + static ssize_t info_print_ext_header(char *buf, size_t size, 727 + struct printk_info *info) 591 728 { 592 - u64 ts_usec = msg->ts_nsec; 729 + u64 ts_usec = info->ts_nsec; 593 730 char caller[20]; 594 731 #ifdef CONFIG_PRINTK_CALLER 595 - u32 id = msg->caller_id; 732 + u32 id = info->caller_id; 596 733 597 734 snprintf(caller, sizeof(caller), ",caller=%c%u", 598 735 id & 0x80000000 ? 'C' : 'T', id & ~0x80000000); ··· 603 740 do_div(ts_usec, 1000); 604 741 605 742 return scnprintf(buf, size, "%u,%llu,%llu,%c%s;", 606 - (msg->facility << 3) | msg->level, seq, ts_usec, 607 - msg->flags & LOG_CONT ? 'c' : '-', caller); 743 + (info->facility << 3) | info->level, info->seq, 744 + ts_usec, info->flags & LOG_CONT ? 'c' : '-', caller); 608 745 } 609 746 610 - static ssize_t msg_print_ext_body(char *buf, size_t size, 611 - char *dict, size_t dict_len, 612 - char *text, size_t text_len) 747 + static ssize_t msg_add_ext_text(char *buf, size_t size, 748 + const char *text, size_t text_len, 749 + unsigned char endc) 613 750 { 614 751 char *p = buf, *e = buf + size; 615 752 size_t i; ··· 623 760 else 624 761 append_char(&p, e, c); 625 762 } 626 - append_char(&p, e, '\n'); 627 - 628 - if (dict_len) { 629 - bool line = true; 630 - 631 - for (i = 0; i < dict_len; i++) { 632 - unsigned char c = dict[i]; 633 - 634 - if (line) { 635 - append_char(&p, e, ' '); 636 - line = false; 637 - } 638 - 639 - if (c == '\0') { 640 - append_char(&p, e, '\n'); 641 - line = true; 642 - continue; 643 - } 644 - 645 - if (c < ' ' || c >= 127 || c == '\\') { 646 - p += scnprintf(p, e - p, "\\x%02x", c); 647 - continue; 648 - } 649 - 650 - append_char(&p, e, c); 651 - } 652 - append_char(&p, e, '\n'); 653 - } 763 + append_char(&p, e, endc); 654 764 655 765 return p - buf; 766 + } 767 + 768 + static ssize_t msg_add_dict_text(char *buf, size_t size, 769 + const char *key, const char *val) 770 + { 771 + size_t val_len = strlen(val); 772 + ssize_t len; 773 + 774 + if (!val_len) 775 + return 0; 776 + 777 + len = msg_add_ext_text(buf, size, "", 0, ' '); /* dict prefix */ 778 + len += msg_add_ext_text(buf + len, size - len, key, strlen(key), '='); 779 + len += msg_add_ext_text(buf + len, size - len, val, val_len, '\n'); 780 + 781 + return len; 782 + } 783 + 784 + static ssize_t msg_print_ext_body(char *buf, size_t size, 785 + char *text, size_t text_len, 786 + struct dev_printk_info *dev_info) 787 + { 788 + ssize_t len; 789 + 790 + len = msg_add_ext_text(buf, size, text, text_len, '\n'); 791 + 792 + if (!dev_info) 793 + goto out; 794 + 795 + len += msg_add_dict_text(buf + len, size - len, "SUBSYSTEM", 796 + dev_info->subsystem); 797 + len += msg_add_dict_text(buf + len, size - len, "DEVICE", 798 + dev_info->device); 799 + out: 800 + return len; 656 801 } 657 802 658 803 /* /dev/kmsg - userspace message inject/listen interface */ 659 804 struct devkmsg_user { 660 805 u64 seq; 661 - u32 idx; 662 806 struct ratelimit_state rs; 663 807 struct mutex lock; 664 808 char buf[CONSOLE_EXT_LOG_MAX]; 809 + 810 + struct printk_info info; 811 + char text_buf[CONSOLE_EXT_LOG_MAX]; 812 + struct printk_record record; 665 813 }; 666 814 667 815 static __printf(3, 4) __cold ··· 682 808 int r; 683 809 684 810 va_start(args, fmt); 685 - r = vprintk_emit(facility, level, NULL, 0, fmt, args); 811 + r = vprintk_emit(facility, level, NULL, fmt, args); 686 812 va_end(args); 687 813 688 814 return r; ··· 755 881 size_t count, loff_t *ppos) 756 882 { 757 883 struct devkmsg_user *user = file->private_data; 758 - struct printk_log *msg; 884 + struct printk_record *r = &user->record; 759 885 size_t len; 760 886 ssize_t ret; 761 887 ··· 767 893 return ret; 768 894 769 895 logbuf_lock_irq(); 770 - while (user->seq == log_next_seq) { 896 + if (!prb_read_valid(prb, user->seq, r)) { 771 897 if (file->f_flags & O_NONBLOCK) { 772 898 ret = -EAGAIN; 773 899 logbuf_unlock_irq(); ··· 776 902 777 903 logbuf_unlock_irq(); 778 904 ret = wait_event_interruptible(log_wait, 779 - user->seq != log_next_seq); 905 + prb_read_valid(prb, user->seq, r)); 780 906 if (ret) 781 907 goto out; 782 908 logbuf_lock_irq(); 783 909 } 784 910 785 - if (user->seq < log_first_seq) { 911 + if (user->seq < prb_first_valid_seq(prb)) { 786 912 /* our last seen message is gone, return error and reset */ 787 - user->idx = log_first_idx; 788 - user->seq = log_first_seq; 913 + user->seq = prb_first_valid_seq(prb); 789 914 ret = -EPIPE; 790 915 logbuf_unlock_irq(); 791 916 goto out; 792 917 } 793 918 794 - msg = log_from_idx(user->idx); 795 - len = msg_print_ext_header(user->buf, sizeof(user->buf), 796 - msg, user->seq); 919 + len = info_print_ext_header(user->buf, sizeof(user->buf), r->info); 797 920 len += msg_print_ext_body(user->buf + len, sizeof(user->buf) - len, 798 - log_dict(msg), msg->dict_len, 799 - log_text(msg), msg->text_len); 921 + &r->text_buf[0], r->info->text_len, 922 + &r->info->dev_info); 800 923 801 - user->idx = log_next(user->idx); 802 - user->seq++; 924 + user->seq = r->info->seq + 1; 803 925 logbuf_unlock_irq(); 804 926 805 927 if (len > count) { ··· 835 965 switch (whence) { 836 966 case SEEK_SET: 837 967 /* the first record */ 838 - user->idx = log_first_idx; 839 - user->seq = log_first_seq; 968 + user->seq = prb_first_valid_seq(prb); 840 969 break; 841 970 case SEEK_DATA: 842 971 /* ··· 843 974 * like issued by 'dmesg -c'. Reading /dev/kmsg itself 844 975 * changes no global state, and does not clear anything. 845 976 */ 846 - user->idx = clear_idx; 847 977 user->seq = clear_seq; 848 978 break; 849 979 case SEEK_END: 850 980 /* after the last record */ 851 - user->idx = log_next_idx; 852 - user->seq = log_next_seq; 981 + user->seq = prb_next_seq(prb); 853 982 break; 854 983 default: 855 984 ret = -EINVAL; ··· 867 1000 poll_wait(file, &log_wait, wait); 868 1001 869 1002 logbuf_lock_irq(); 870 - if (user->seq < log_next_seq) { 1003 + if (prb_read_valid(prb, user->seq, NULL)) { 871 1004 /* return error when data has vanished underneath us */ 872 - if (user->seq < log_first_seq) 1005 + if (user->seq < prb_first_valid_seq(prb)) 873 1006 ret = EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI; 874 1007 else 875 1008 ret = EPOLLIN|EPOLLRDNORM; ··· 904 1037 905 1038 mutex_init(&user->lock); 906 1039 1040 + prb_rec_init_rd(&user->record, &user->info, 1041 + &user->text_buf[0], sizeof(user->text_buf)); 1042 + 907 1043 logbuf_lock_irq(); 908 - user->idx = log_first_idx; 909 - user->seq = log_first_seq; 1044 + user->seq = prb_first_valid_seq(prb); 910 1045 logbuf_unlock_irq(); 911 1046 912 1047 file->private_data = user; ··· 949 1080 */ 950 1081 void log_buf_vmcoreinfo_setup(void) 951 1082 { 952 - VMCOREINFO_SYMBOL(log_buf); 953 - VMCOREINFO_SYMBOL(log_buf_len); 954 - VMCOREINFO_SYMBOL(log_first_idx); 955 - VMCOREINFO_SYMBOL(clear_idx); 956 - VMCOREINFO_SYMBOL(log_next_idx); 1083 + struct dev_printk_info *dev_info = NULL; 1084 + 1085 + VMCOREINFO_SYMBOL(prb); 1086 + VMCOREINFO_SYMBOL(printk_rb_static); 1087 + VMCOREINFO_SYMBOL(clear_seq); 1088 + 957 1089 /* 958 - * Export struct printk_log size and field offsets. User space tools can 1090 + * Export struct size and field offsets. User space tools can 959 1091 * parse it and detect any changes to structure down the line. 960 1092 */ 961 - VMCOREINFO_STRUCT_SIZE(printk_log); 962 - VMCOREINFO_OFFSET(printk_log, ts_nsec); 963 - VMCOREINFO_OFFSET(printk_log, len); 964 - VMCOREINFO_OFFSET(printk_log, text_len); 965 - VMCOREINFO_OFFSET(printk_log, dict_len); 966 - #ifdef CONFIG_PRINTK_CALLER 967 - VMCOREINFO_OFFSET(printk_log, caller_id); 968 - #endif 1093 + 1094 + VMCOREINFO_STRUCT_SIZE(printk_ringbuffer); 1095 + VMCOREINFO_OFFSET(printk_ringbuffer, desc_ring); 1096 + VMCOREINFO_OFFSET(printk_ringbuffer, text_data_ring); 1097 + VMCOREINFO_OFFSET(printk_ringbuffer, fail); 1098 + 1099 + VMCOREINFO_STRUCT_SIZE(prb_desc_ring); 1100 + VMCOREINFO_OFFSET(prb_desc_ring, count_bits); 1101 + VMCOREINFO_OFFSET(prb_desc_ring, descs); 1102 + VMCOREINFO_OFFSET(prb_desc_ring, infos); 1103 + VMCOREINFO_OFFSET(prb_desc_ring, head_id); 1104 + VMCOREINFO_OFFSET(prb_desc_ring, tail_id); 1105 + 1106 + VMCOREINFO_STRUCT_SIZE(prb_desc); 1107 + VMCOREINFO_OFFSET(prb_desc, state_var); 1108 + VMCOREINFO_OFFSET(prb_desc, text_blk_lpos); 1109 + 1110 + VMCOREINFO_STRUCT_SIZE(prb_data_blk_lpos); 1111 + VMCOREINFO_OFFSET(prb_data_blk_lpos, begin); 1112 + VMCOREINFO_OFFSET(prb_data_blk_lpos, next); 1113 + 1114 + VMCOREINFO_STRUCT_SIZE(printk_info); 1115 + VMCOREINFO_OFFSET(printk_info, seq); 1116 + VMCOREINFO_OFFSET(printk_info, ts_nsec); 1117 + VMCOREINFO_OFFSET(printk_info, text_len); 1118 + VMCOREINFO_OFFSET(printk_info, caller_id); 1119 + VMCOREINFO_OFFSET(printk_info, dev_info); 1120 + 1121 + VMCOREINFO_STRUCT_SIZE(dev_printk_info); 1122 + VMCOREINFO_OFFSET(dev_printk_info, subsystem); 1123 + VMCOREINFO_LENGTH(printk_info_subsystem, sizeof(dev_info->subsystem)); 1124 + VMCOREINFO_OFFSET(dev_printk_info, device); 1125 + VMCOREINFO_LENGTH(printk_info_device, sizeof(dev_info->device)); 1126 + 1127 + VMCOREINFO_STRUCT_SIZE(prb_data_ring); 1128 + VMCOREINFO_OFFSET(prb_data_ring, size_bits); 1129 + VMCOREINFO_OFFSET(prb_data_ring, data); 1130 + VMCOREINFO_OFFSET(prb_data_ring, head_lpos); 1131 + VMCOREINFO_OFFSET(prb_data_ring, tail_lpos); 1132 + 1133 + VMCOREINFO_SIZE(atomic_long_t); 1134 + VMCOREINFO_TYPE_OFFSET(atomic_long_t, counter); 969 1135 } 970 1136 #endif 971 1137 ··· 1078 1174 __printk_percpu_data_ready = true; 1079 1175 } 1080 1176 1177 + static unsigned int __init add_to_rb(struct printk_ringbuffer *rb, 1178 + struct printk_record *r) 1179 + { 1180 + struct prb_reserved_entry e; 1181 + struct printk_record dest_r; 1182 + 1183 + prb_rec_init_wr(&dest_r, r->info->text_len); 1184 + 1185 + if (!prb_reserve(&e, rb, &dest_r)) 1186 + return 0; 1187 + 1188 + memcpy(&dest_r.text_buf[0], &r->text_buf[0], r->info->text_len); 1189 + dest_r.info->text_len = r->info->text_len; 1190 + dest_r.info->facility = r->info->facility; 1191 + dest_r.info->level = r->info->level; 1192 + dest_r.info->flags = r->info->flags; 1193 + dest_r.info->ts_nsec = r->info->ts_nsec; 1194 + dest_r.info->caller_id = r->info->caller_id; 1195 + memcpy(&dest_r.info->dev_info, &r->info->dev_info, sizeof(dest_r.info->dev_info)); 1196 + 1197 + prb_final_commit(&e); 1198 + 1199 + return prb_record_text_space(&e); 1200 + } 1201 + 1202 + static char setup_text_buf[LOG_LINE_MAX] __initdata; 1203 + 1081 1204 void __init setup_log_buf(int early) 1082 1205 { 1206 + struct printk_info *new_infos; 1207 + unsigned int new_descs_count; 1208 + struct prb_desc *new_descs; 1209 + struct printk_info info; 1210 + struct printk_record r; 1211 + size_t new_descs_size; 1212 + size_t new_infos_size; 1083 1213 unsigned long flags; 1084 1214 char *new_log_buf; 1085 1215 unsigned int free; 1216 + u64 seq; 1086 1217 1087 1218 /* 1088 1219 * Some archs call setup_log_buf() multiple times - first is very ··· 1136 1197 if (!new_log_buf_len) 1137 1198 return; 1138 1199 1139 - new_log_buf = memblock_alloc(new_log_buf_len, LOG_ALIGN); 1140 - if (unlikely(!new_log_buf)) { 1141 - pr_err("log_buf_len: %lu bytes not available\n", 1142 - new_log_buf_len); 1200 + new_descs_count = new_log_buf_len >> PRB_AVGBITS; 1201 + if (new_descs_count == 0) { 1202 + pr_err("new_log_buf_len: %lu too small\n", new_log_buf_len); 1143 1203 return; 1144 1204 } 1145 1205 1206 + new_log_buf = memblock_alloc(new_log_buf_len, LOG_ALIGN); 1207 + if (unlikely(!new_log_buf)) { 1208 + pr_err("log_buf_len: %lu text bytes not available\n", 1209 + new_log_buf_len); 1210 + return; 1211 + } 1212 + 1213 + new_descs_size = new_descs_count * sizeof(struct prb_desc); 1214 + new_descs = memblock_alloc(new_descs_size, LOG_ALIGN); 1215 + if (unlikely(!new_descs)) { 1216 + pr_err("log_buf_len: %zu desc bytes not available\n", 1217 + new_descs_size); 1218 + goto err_free_log_buf; 1219 + } 1220 + 1221 + new_infos_size = new_descs_count * sizeof(struct printk_info); 1222 + new_infos = memblock_alloc(new_infos_size, LOG_ALIGN); 1223 + if (unlikely(!new_infos)) { 1224 + pr_err("log_buf_len: %zu info bytes not available\n", 1225 + new_infos_size); 1226 + goto err_free_descs; 1227 + } 1228 + 1229 + prb_rec_init_rd(&r, &info, &setup_text_buf[0], sizeof(setup_text_buf)); 1230 + 1231 + prb_init(&printk_rb_dynamic, 1232 + new_log_buf, ilog2(new_log_buf_len), 1233 + new_descs, ilog2(new_descs_count), 1234 + new_infos); 1235 + 1146 1236 logbuf_lock_irqsave(flags); 1237 + 1147 1238 log_buf_len = new_log_buf_len; 1148 1239 log_buf = new_log_buf; 1149 1240 new_log_buf_len = 0; 1150 - free = __LOG_BUF_LEN - log_next_idx; 1151 - memcpy(log_buf, __log_buf, __LOG_BUF_LEN); 1241 + 1242 + free = __LOG_BUF_LEN; 1243 + prb_for_each_record(0, &printk_rb_static, seq, &r) 1244 + free -= add_to_rb(&printk_rb_dynamic, &r); 1245 + 1246 + /* 1247 + * This is early enough that everything is still running on the 1248 + * boot CPU and interrupts are disabled. So no new messages will 1249 + * appear during the transition to the dynamic buffer. 1250 + */ 1251 + prb = &printk_rb_dynamic; 1252 + 1152 1253 logbuf_unlock_irqrestore(flags); 1254 + 1255 + if (seq != prb_next_seq(&printk_rb_static)) { 1256 + pr_err("dropped %llu messages\n", 1257 + prb_next_seq(&printk_rb_static) - seq); 1258 + } 1153 1259 1154 1260 pr_info("log_buf_len: %u bytes\n", log_buf_len); 1155 1261 pr_info("early log buf free: %u(%u%%)\n", 1156 1262 free, (free * 100) / __LOG_BUF_LEN); 1263 + return; 1264 + 1265 + err_free_descs: 1266 + memblock_free(__pa(new_descs), new_descs_size); 1267 + err_free_log_buf: 1268 + memblock_free(__pa(new_log_buf), new_log_buf_len); 1157 1269 } 1158 1270 1159 1271 static bool __read_mostly ignore_loglevel; ··· 1311 1321 #define print_caller(id, buf) 0 1312 1322 #endif 1313 1323 1314 - static size_t print_prefix(const struct printk_log *msg, bool syslog, 1315 - bool time, char *buf) 1324 + static size_t info_print_prefix(const struct printk_info *info, bool syslog, 1325 + bool time, char *buf) 1316 1326 { 1317 1327 size_t len = 0; 1318 1328 1319 1329 if (syslog) 1320 - len = print_syslog((msg->facility << 3) | msg->level, buf); 1330 + len = print_syslog((info->facility << 3) | info->level, buf); 1321 1331 1322 1332 if (time) 1323 - len += print_time(msg->ts_nsec, buf + len); 1333 + len += print_time(info->ts_nsec, buf + len); 1324 1334 1325 - len += print_caller(msg->caller_id, buf + len); 1335 + len += print_caller(info->caller_id, buf + len); 1326 1336 1327 1337 if (IS_ENABLED(CONFIG_PRINTK_CALLER) || time) { 1328 1338 buf[len++] = ' '; ··· 1332 1342 return len; 1333 1343 } 1334 1344 1335 - static size_t msg_print_text(const struct printk_log *msg, bool syslog, 1336 - bool time, char *buf, size_t size) 1345 + /* 1346 + * Prepare the record for printing. The text is shifted within the given 1347 + * buffer to avoid a need for another one. The following operations are 1348 + * done: 1349 + * 1350 + * - Add prefix for each line. 1351 + * - Add the trailing newline that has been removed in vprintk_store(). 1352 + * - Drop truncated lines that do not longer fit into the buffer. 1353 + * 1354 + * Return: The length of the updated/prepared text, including the added 1355 + * prefixes and the newline. The dropped line(s) are not counted. 1356 + */ 1357 + static size_t record_print_text(struct printk_record *r, bool syslog, 1358 + bool time) 1337 1359 { 1338 - const char *text = log_text(msg); 1339 - size_t text_size = msg->text_len; 1340 - size_t len = 0; 1360 + size_t text_len = r->info->text_len; 1361 + size_t buf_size = r->text_buf_size; 1362 + char *text = r->text_buf; 1341 1363 char prefix[PREFIX_MAX]; 1342 - const size_t prefix_len = print_prefix(msg, syslog, time, prefix); 1364 + bool truncated = false; 1365 + size_t prefix_len; 1366 + size_t line_len; 1367 + size_t len = 0; 1368 + char *next; 1343 1369 1344 - do { 1345 - const char *next = memchr(text, '\n', text_size); 1346 - size_t text_len; 1370 + /* 1371 + * If the message was truncated because the buffer was not large 1372 + * enough, treat the available text as if it were the full text. 1373 + */ 1374 + if (text_len > buf_size) 1375 + text_len = buf_size; 1347 1376 1377 + prefix_len = info_print_prefix(r->info, syslog, time, prefix); 1378 + 1379 + /* 1380 + * @text_len: bytes of unprocessed text 1381 + * @line_len: bytes of current line _without_ newline 1382 + * @text: pointer to beginning of current line 1383 + * @len: number of bytes prepared in r->text_buf 1384 + */ 1385 + for (;;) { 1386 + next = memchr(text, '\n', text_len); 1348 1387 if (next) { 1349 - text_len = next - text; 1350 - next++; 1351 - text_size -= next - text; 1388 + line_len = next - text; 1352 1389 } else { 1353 - text_len = text_size; 1390 + /* Drop truncated line(s). */ 1391 + if (truncated) 1392 + break; 1393 + line_len = text_len; 1354 1394 } 1355 1395 1356 - if (buf) { 1357 - if (prefix_len + text_len + 1 >= size - len) 1396 + /* 1397 + * Truncate the text if there is not enough space to add the 1398 + * prefix and a trailing newline. 1399 + */ 1400 + if (len + prefix_len + text_len + 1 > buf_size) { 1401 + /* Drop even the current line if no space. */ 1402 + if (len + prefix_len + line_len + 1 > buf_size) 1358 1403 break; 1359 1404 1360 - memcpy(buf + len, prefix, prefix_len); 1361 - len += prefix_len; 1362 - memcpy(buf + len, text, text_len); 1363 - len += text_len; 1364 - buf[len++] = '\n'; 1365 - } else { 1366 - /* SYSLOG_ACTION_* buffer size only calculation */ 1367 - len += prefix_len + text_len + 1; 1405 + text_len = buf_size - len - prefix_len - 1; 1406 + truncated = true; 1368 1407 } 1369 1408 1370 - text = next; 1371 - } while (text); 1409 + memmove(text + prefix_len, text, text_len); 1410 + memcpy(text, prefix, prefix_len); 1411 + 1412 + len += prefix_len + line_len + 1; 1413 + 1414 + if (text_len == line_len) { 1415 + /* 1416 + * Add the trailing newline removed in 1417 + * vprintk_store(). 1418 + */ 1419 + text[prefix_len + line_len] = '\n'; 1420 + break; 1421 + } 1422 + 1423 + /* 1424 + * Advance beyond the added prefix and the related line with 1425 + * its newline. 1426 + */ 1427 + text += prefix_len + line_len + 1; 1428 + 1429 + /* 1430 + * The remaining text has only decreased by the line with its 1431 + * newline. 1432 + * 1433 + * Note that @text_len can become zero. It happens when @text 1434 + * ended with a newline (either due to truncation or the 1435 + * original string ending with "\n\n"). The loop is correctly 1436 + * repeated and (if not truncated) an empty line with a prefix 1437 + * will be prepared. 1438 + */ 1439 + text_len -= line_len + 1; 1440 + } 1372 1441 1373 1442 return len; 1374 1443 } 1375 1444 1445 + static size_t get_record_print_text_size(struct printk_info *info, 1446 + unsigned int line_count, 1447 + bool syslog, bool time) 1448 + { 1449 + char prefix[PREFIX_MAX]; 1450 + size_t prefix_len; 1451 + 1452 + prefix_len = info_print_prefix(info, syslog, time, prefix); 1453 + 1454 + /* 1455 + * Each line will be preceded with a prefix. The intermediate 1456 + * newlines are already within the text, but a final trailing 1457 + * newline will be added. 1458 + */ 1459 + return ((prefix_len * line_count) + info->text_len + 1); 1460 + } 1461 + 1376 1462 static int syslog_print(char __user *buf, int size) 1377 1463 { 1464 + struct printk_info info; 1465 + struct printk_record r; 1378 1466 char *text; 1379 - struct printk_log *msg; 1380 1467 int len = 0; 1381 1468 1382 1469 text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL); 1383 1470 if (!text) 1384 1471 return -ENOMEM; 1385 1472 1473 + prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX); 1474 + 1386 1475 while (size > 0) { 1387 1476 size_t n; 1388 1477 size_t skip; 1389 1478 1390 1479 logbuf_lock_irq(); 1391 - if (syslog_seq < log_first_seq) { 1392 - /* messages are gone, move to first one */ 1393 - syslog_seq = log_first_seq; 1394 - syslog_idx = log_first_idx; 1395 - syslog_partial = 0; 1396 - } 1397 - if (syslog_seq == log_next_seq) { 1480 + if (!prb_read_valid(prb, syslog_seq, &r)) { 1398 1481 logbuf_unlock_irq(); 1399 1482 break; 1483 + } 1484 + if (r.info->seq != syslog_seq) { 1485 + /* message is gone, move to next valid one */ 1486 + syslog_seq = r.info->seq; 1487 + syslog_partial = 0; 1400 1488 } 1401 1489 1402 1490 /* ··· 1485 1417 syslog_time = printk_time; 1486 1418 1487 1419 skip = syslog_partial; 1488 - msg = log_from_idx(syslog_idx); 1489 - n = msg_print_text(msg, true, syslog_time, text, 1490 - LOG_LINE_MAX + PREFIX_MAX); 1420 + n = record_print_text(&r, true, syslog_time); 1491 1421 if (n - syslog_partial <= size) { 1492 1422 /* message fits into buffer, move forward */ 1493 - syslog_idx = log_next(syslog_idx); 1494 - syslog_seq++; 1423 + syslog_seq = r.info->seq + 1; 1495 1424 n -= syslog_partial; 1496 1425 syslog_partial = 0; 1497 1426 } else if (!len){ ··· 1519 1454 1520 1455 static int syslog_print_all(char __user *buf, int size, bool clear) 1521 1456 { 1457 + struct printk_info info; 1458 + unsigned int line_count; 1459 + struct printk_record r; 1522 1460 char *text; 1523 1461 int len = 0; 1524 - u64 next_seq; 1525 1462 u64 seq; 1526 - u32 idx; 1527 1463 bool time; 1528 1464 1529 1465 text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL); ··· 1537 1471 * Find first record that fits, including all following records, 1538 1472 * into the user-provided buffer for this dump. 1539 1473 */ 1540 - seq = clear_seq; 1541 - idx = clear_idx; 1542 - while (seq < log_next_seq) { 1543 - struct printk_log *msg = log_from_idx(idx); 1544 - 1545 - len += msg_print_text(msg, true, time, NULL, 0); 1546 - idx = log_next(idx); 1547 - seq++; 1548 - } 1474 + prb_for_each_info(clear_seq, prb, seq, &info, &line_count) 1475 + len += get_record_print_text_size(&info, line_count, true, time); 1549 1476 1550 1477 /* move first record forward until length fits into the buffer */ 1551 - seq = clear_seq; 1552 - idx = clear_idx; 1553 - while (len > size && seq < log_next_seq) { 1554 - struct printk_log *msg = log_from_idx(idx); 1555 - 1556 - len -= msg_print_text(msg, true, time, NULL, 0); 1557 - idx = log_next(idx); 1558 - seq++; 1478 + prb_for_each_info(clear_seq, prb, seq, &info, &line_count) { 1479 + if (len <= size) 1480 + break; 1481 + len -= get_record_print_text_size(&info, line_count, true, time); 1559 1482 } 1560 1483 1561 - /* last message fitting into this dump */ 1562 - next_seq = log_next_seq; 1484 + prb_rec_init_rd(&r, &info, text, LOG_LINE_MAX + PREFIX_MAX); 1563 1485 1564 1486 len = 0; 1565 - while (len >= 0 && seq < next_seq) { 1566 - struct printk_log *msg = log_from_idx(idx); 1567 - int textlen = msg_print_text(msg, true, time, text, 1568 - LOG_LINE_MAX + PREFIX_MAX); 1487 + prb_for_each_record(seq, prb, seq, &r) { 1488 + int textlen; 1569 1489 1570 - idx = log_next(idx); 1571 - seq++; 1490 + textlen = record_print_text(&r, true, time); 1491 + 1492 + if (len + textlen > size) { 1493 + seq--; 1494 + break; 1495 + } 1572 1496 1573 1497 logbuf_unlock_irq(); 1574 1498 if (copy_to_user(buf + len, text, textlen)) ··· 1567 1511 len += textlen; 1568 1512 logbuf_lock_irq(); 1569 1513 1570 - if (seq < log_first_seq) { 1571 - /* messages are gone, move to next one */ 1572 - seq = log_first_seq; 1573 - idx = log_first_idx; 1574 - } 1514 + if (len < 0) 1515 + break; 1575 1516 } 1576 1517 1577 - if (clear) { 1578 - clear_seq = log_next_seq; 1579 - clear_idx = log_next_idx; 1580 - } 1518 + if (clear) 1519 + clear_seq = seq; 1581 1520 logbuf_unlock_irq(); 1582 1521 1583 1522 kfree(text); ··· 1582 1531 static void syslog_clear(void) 1583 1532 { 1584 1533 logbuf_lock_irq(); 1585 - clear_seq = log_next_seq; 1586 - clear_idx = log_next_idx; 1534 + clear_seq = prb_next_seq(prb); 1587 1535 logbuf_unlock_irq(); 1588 1536 } 1589 1537 ··· 1609 1559 if (!access_ok(buf, len)) 1610 1560 return -EFAULT; 1611 1561 error = wait_event_interruptible(log_wait, 1612 - syslog_seq != log_next_seq); 1562 + prb_read_valid(prb, syslog_seq, NULL)); 1613 1563 if (error) 1614 1564 return error; 1615 1565 error = syslog_print(buf, len); ··· 1617 1567 /* Read/clear last kernel messages */ 1618 1568 case SYSLOG_ACTION_READ_CLEAR: 1619 1569 clear = true; 1620 - /* FALL THRU */ 1570 + fallthrough; 1621 1571 /* Read last kernel messages */ 1622 1572 case SYSLOG_ACTION_READ_ALL: 1623 1573 if (!buf || len < 0) ··· 1658 1608 /* Number of chars in the log buffer */ 1659 1609 case SYSLOG_ACTION_SIZE_UNREAD: 1660 1610 logbuf_lock_irq(); 1661 - if (syslog_seq < log_first_seq) { 1611 + if (syslog_seq < prb_first_valid_seq(prb)) { 1662 1612 /* messages are gone, move to first one */ 1663 - syslog_seq = log_first_seq; 1664 - syslog_idx = log_first_idx; 1613 + syslog_seq = prb_first_valid_seq(prb); 1665 1614 syslog_partial = 0; 1666 1615 } 1667 1616 if (source == SYSLOG_FROM_PROC) { ··· 1669 1620 * for pending data, not the size; return the count of 1670 1621 * records, not the length. 1671 1622 */ 1672 - error = log_next_seq - syslog_seq; 1623 + error = prb_next_seq(prb) - syslog_seq; 1673 1624 } else { 1674 - u64 seq = syslog_seq; 1675 - u32 idx = syslog_idx; 1676 1625 bool time = syslog_partial ? syslog_time : printk_time; 1626 + struct printk_info info; 1627 + unsigned int line_count; 1628 + u64 seq; 1677 1629 1678 - while (seq < log_next_seq) { 1679 - struct printk_log *msg = log_from_idx(idx); 1680 - 1681 - error += msg_print_text(msg, true, time, NULL, 1682 - 0); 1630 + prb_for_each_info(syslog_seq, prb, seq, &info, 1631 + &line_count) { 1632 + error += get_record_print_text_size(&info, line_count, 1633 + true, time); 1683 1634 time = printk_time; 1684 - idx = log_next(idx); 1685 - seq++; 1686 1635 } 1687 1636 error -= syslog_partial; 1688 1637 } ··· 1851 1804 static void call_console_drivers(const char *ext_text, size_t ext_len, 1852 1805 const char *text, size_t len) 1853 1806 { 1807 + static char dropped_text[64]; 1808 + size_t dropped_len = 0; 1854 1809 struct console *con; 1855 1810 1856 1811 trace_console_rcuidle(text, len); 1812 + 1813 + if (!console_drivers) 1814 + return; 1815 + 1816 + if (console_dropped) { 1817 + dropped_len = snprintf(dropped_text, sizeof(dropped_text), 1818 + "** %lu printk messages dropped **\n", 1819 + console_dropped); 1820 + console_dropped = 0; 1821 + } 1857 1822 1858 1823 for_each_console(con) { 1859 1824 if (exclusive_console && con != exclusive_console) ··· 1879 1820 continue; 1880 1821 if (con->flags & CON_EXTENDED) 1881 1822 con->write(con, ext_text, ext_len); 1882 - else 1823 + else { 1824 + if (dropped_len) 1825 + con->write(con, dropped_text, dropped_len); 1883 1826 con->write(con, text, len); 1827 + } 1884 1828 } 1885 1829 } 1886 1830 ··· 1907 1845 0x80000000 + raw_smp_processor_id(); 1908 1846 } 1909 1847 1910 - /* 1911 - * Continuation lines are buffered, and not committed to the record buffer 1912 - * until the line is complete, or a race forces it. The line fragments 1913 - * though, are printed immediately to the consoles to ensure everything has 1914 - * reached the console in case of a kernel crash. 1915 - */ 1916 - static struct cont { 1917 - char buf[LOG_LINE_MAX]; 1918 - size_t len; /* length == 0 means unused buffer */ 1919 - u32 caller_id; /* printk_caller_id() of first print */ 1920 - u64 ts_nsec; /* time of first print */ 1921 - u8 level; /* log level of first message */ 1922 - u8 facility; /* log facility of first message */ 1923 - enum log_flags flags; /* prefix, newline flags */ 1924 - } cont; 1925 - 1926 - static void cont_flush(void) 1927 - { 1928 - if (cont.len == 0) 1929 - return; 1930 - 1931 - log_store(cont.caller_id, cont.facility, cont.level, cont.flags, 1932 - cont.ts_nsec, NULL, 0, cont.buf, cont.len); 1933 - cont.len = 0; 1934 - } 1935 - 1936 - static bool cont_add(u32 caller_id, int facility, int level, 1937 - enum log_flags flags, const char *text, size_t len) 1938 - { 1939 - /* If the line gets too long, split it up in separate records. */ 1940 - if (cont.len + len > sizeof(cont.buf)) { 1941 - cont_flush(); 1942 - return false; 1943 - } 1944 - 1945 - if (!cont.len) { 1946 - cont.facility = facility; 1947 - cont.level = level; 1948 - cont.caller_id = caller_id; 1949 - cont.ts_nsec = local_clock(); 1950 - cont.flags = flags; 1951 - } 1952 - 1953 - memcpy(cont.buf + cont.len, text, len); 1954 - cont.len += len; 1955 - 1956 - // The original flags come from the first line, 1957 - // but later continuations can add a newline. 1958 - if (flags & LOG_NEWLINE) { 1959 - cont.flags |= LOG_NEWLINE; 1960 - cont_flush(); 1961 - } 1962 - 1963 - return true; 1964 - } 1965 - 1966 - static size_t log_output(int facility, int level, enum log_flags lflags, const char *dict, size_t dictlen, char *text, size_t text_len) 1848 + static size_t log_output(int facility, int level, enum log_flags lflags, 1849 + const struct dev_printk_info *dev_info, 1850 + char *text, size_t text_len) 1967 1851 { 1968 1852 const u32 caller_id = printk_caller_id(); 1969 1853 1970 - /* 1971 - * If an earlier line was buffered, and we're a continuation 1972 - * write from the same context, try to add it to the buffer. 1973 - */ 1974 - if (cont.len) { 1975 - if (cont.caller_id == caller_id && (lflags & LOG_CONT)) { 1976 - if (cont_add(caller_id, facility, level, lflags, text, text_len)) 1977 - return text_len; 1978 - } 1979 - /* Otherwise, make sure it's flushed */ 1980 - cont_flush(); 1981 - } 1854 + if (lflags & LOG_CONT) { 1855 + struct prb_reserved_entry e; 1856 + struct printk_record r; 1982 1857 1983 - /* Skip empty continuation lines that couldn't be added - they just flush */ 1984 - if (!text_len && (lflags & LOG_CONT)) 1985 - return 0; 1986 - 1987 - /* If it doesn't end in a newline, try to buffer the current line */ 1988 - if (!(lflags & LOG_NEWLINE)) { 1989 - if (cont_add(caller_id, facility, level, lflags, text, text_len)) 1858 + prb_rec_init_wr(&r, text_len); 1859 + if (prb_reserve_in_last(&e, prb, &r, caller_id, LOG_LINE_MAX)) { 1860 + memcpy(&r.text_buf[r.info->text_len], text, text_len); 1861 + r.info->text_len += text_len; 1862 + if (lflags & LOG_NEWLINE) { 1863 + r.info->flags |= LOG_NEWLINE; 1864 + prb_final_commit(&e); 1865 + } else { 1866 + prb_commit(&e); 1867 + } 1990 1868 return text_len; 1869 + } 1991 1870 } 1992 1871 1993 1872 /* Store it in the record log */ 1994 1873 return log_store(caller_id, facility, level, lflags, 0, 1995 - dict, dictlen, text, text_len); 1874 + dev_info, text, text_len); 1996 1875 } 1997 1876 1998 1877 /* Must be called under logbuf_lock. */ 1999 1878 int vprintk_store(int facility, int level, 2000 - const char *dict, size_t dictlen, 1879 + const struct dev_printk_info *dev_info, 2001 1880 const char *fmt, va_list args) 2002 1881 { 2003 1882 static char textbuf[LOG_LINE_MAX]; ··· 1980 1977 if (level == LOGLEVEL_DEFAULT) 1981 1978 level = default_message_loglevel; 1982 1979 1983 - if (dict) 1980 + if (dev_info) 1984 1981 lflags |= LOG_NEWLINE; 1985 1982 1986 - return log_output(facility, level, lflags, 1987 - dict, dictlen, text, text_len); 1983 + return log_output(facility, level, lflags, dev_info, text, text_len); 1988 1984 } 1989 1985 1990 1986 asmlinkage int vprintk_emit(int facility, int level, 1991 - const char *dict, size_t dictlen, 1987 + const struct dev_printk_info *dev_info, 1992 1988 const char *fmt, va_list args) 1993 1989 { 1994 1990 int printed_len; 1995 - bool in_sched = false, pending_output; 1991 + bool in_sched = false; 1996 1992 unsigned long flags; 1997 - u64 curr_log_seq; 1998 1993 1999 1994 /* Suppress unimportant messages after panic happens */ 2000 1995 if (unlikely(suppress_printk)) ··· 2008 2007 2009 2008 /* This stops the holder of console_sem just where we want him */ 2010 2009 logbuf_lock_irqsave(flags); 2011 - curr_log_seq = log_next_seq; 2012 - printed_len = vprintk_store(facility, level, dict, dictlen, fmt, args); 2013 - pending_output = (curr_log_seq != log_next_seq); 2010 + printed_len = vprintk_store(facility, level, dev_info, fmt, args); 2014 2011 logbuf_unlock_irqrestore(flags); 2015 2012 2016 2013 /* If called from the scheduler, we can not call up(). */ 2017 - if (!in_sched && pending_output) { 2014 + if (!in_sched) { 2018 2015 /* 2019 2016 * Disable preemption to avoid being preempted while holding 2020 2017 * console_sem which would prevent anyone from printing to ··· 2029 2030 preempt_enable(); 2030 2031 } 2031 2032 2032 - if (pending_output) 2033 - wake_up_klogd(); 2033 + wake_up_klogd(); 2034 2034 return printed_len; 2035 2035 } 2036 2036 EXPORT_SYMBOL(vprintk_emit); ··· 2042 2044 2043 2045 int vprintk_default(const char *fmt, va_list args) 2044 2046 { 2045 - return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args); 2047 + return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, fmt, args); 2046 2048 } 2047 2049 EXPORT_SYMBOL_GPL(vprintk_default); 2048 2050 ··· 2086 2088 #define PREFIX_MAX 0 2087 2089 #define printk_time false 2088 2090 2091 + #define prb_read_valid(rb, seq, r) false 2092 + #define prb_first_valid_seq(rb) 0 2093 + 2089 2094 static u64 syslog_seq; 2090 - static u32 syslog_idx; 2091 2095 static u64 console_seq; 2092 - static u32 console_idx; 2093 2096 static u64 exclusive_console_stop_seq; 2094 - static u64 log_first_seq; 2095 - static u32 log_first_idx; 2096 - static u64 log_next_seq; 2097 - static char *log_text(const struct printk_log *msg) { return NULL; } 2098 - static char *log_dict(const struct printk_log *msg) { return NULL; } 2099 - static struct printk_log *log_from_idx(u32 idx) { return NULL; } 2100 - static u32 log_next(u32 idx) { return 0; } 2101 - static ssize_t msg_print_ext_header(char *buf, size_t size, 2102 - struct printk_log *msg, 2103 - u64 seq) { return 0; } 2097 + static unsigned long console_dropped; 2098 + 2099 + static size_t record_print_text(const struct printk_record *r, 2100 + bool syslog, bool time) 2101 + { 2102 + return 0; 2103 + } 2104 + static ssize_t info_print_ext_header(char *buf, size_t size, 2105 + struct printk_info *info) 2106 + { 2107 + return 0; 2108 + } 2104 2109 static ssize_t msg_print_ext_body(char *buf, size_t size, 2105 - char *dict, size_t dict_len, 2106 - char *text, size_t text_len) { return 0; } 2110 + char *text, size_t text_len, 2111 + struct dev_printk_info *dev_info) { return 0; } 2107 2112 static void console_lock_spinning_enable(void) { } 2108 2113 static int console_lock_spinning_disable_and_check(void) { return 0; } 2109 2114 static void call_console_drivers(const char *ext_text, size_t ext_len, 2110 2115 const char *text, size_t len) {} 2111 - static size_t msg_print_text(const struct printk_log *msg, bool syslog, 2112 - bool time, char *buf, size_t size) { return 0; } 2113 2116 static bool suppress_message_printing(int level) { return false; } 2114 2117 2115 2118 #endif /* CONFIG_PRINTK */ ··· 2397 2398 static char text[LOG_LINE_MAX + PREFIX_MAX]; 2398 2399 unsigned long flags; 2399 2400 bool do_cond_resched, retry; 2401 + struct printk_info info; 2402 + struct printk_record r; 2400 2403 2401 2404 if (console_suspended) { 2402 2405 up_console_sem(); 2403 2406 return; 2404 2407 } 2408 + 2409 + prb_rec_init_rd(&r, &info, text, sizeof(text)); 2405 2410 2406 2411 /* 2407 2412 * Console drivers are called with interrupts disabled, so ··· 2419 2416 * 2420 2417 * console_trylock() is not able to detect the preemptive 2421 2418 * context reliably. Therefore the value must be stored before 2422 - * and cleared after the the "again" goto label. 2419 + * and cleared after the "again" goto label. 2423 2420 */ 2424 2421 do_cond_resched = console_may_schedule; 2425 2422 again: ··· 2437 2434 } 2438 2435 2439 2436 for (;;) { 2440 - struct printk_log *msg; 2441 2437 size_t ext_len = 0; 2442 2438 size_t len; 2443 2439 2444 2440 printk_safe_enter_irqsave(flags); 2445 2441 raw_spin_lock(&logbuf_lock); 2446 - if (console_seq < log_first_seq) { 2447 - len = snprintf(text, sizeof(text), 2448 - "** %llu printk messages dropped **\n", 2449 - log_first_seq - console_seq); 2450 - 2451 - /* messages are gone, move to first one */ 2452 - console_seq = log_first_seq; 2453 - console_idx = log_first_idx; 2454 - } else { 2455 - len = 0; 2456 - } 2457 2442 skip: 2458 - if (console_seq == log_next_seq) 2443 + if (!prb_read_valid(prb, console_seq, &r)) 2459 2444 break; 2460 2445 2461 - msg = log_from_idx(console_idx); 2462 - if (suppress_message_printing(msg->level)) { 2446 + if (console_seq != r.info->seq) { 2447 + console_dropped += r.info->seq - console_seq; 2448 + console_seq = r.info->seq; 2449 + } 2450 + 2451 + if (suppress_message_printing(r.info->level)) { 2463 2452 /* 2464 2453 * Skip record we have buffered and already printed 2465 2454 * directly to the console when we received it, and 2466 2455 * record that has level above the console loglevel. 2467 2456 */ 2468 - console_idx = log_next(console_idx); 2469 2457 console_seq++; 2470 2458 goto skip; 2471 2459 } ··· 2467 2473 exclusive_console = NULL; 2468 2474 } 2469 2475 2470 - len += msg_print_text(msg, 2471 - console_msg_format & MSG_FORMAT_SYSLOG, 2472 - printk_time, text + len, sizeof(text) - len); 2476 + /* 2477 + * Handle extended console text first because later 2478 + * record_print_text() will modify the record buffer in-place. 2479 + */ 2473 2480 if (nr_ext_console_drivers) { 2474 - ext_len = msg_print_ext_header(ext_text, 2481 + ext_len = info_print_ext_header(ext_text, 2475 2482 sizeof(ext_text), 2476 - msg, console_seq); 2483 + r.info); 2477 2484 ext_len += msg_print_ext_body(ext_text + ext_len, 2478 2485 sizeof(ext_text) - ext_len, 2479 - log_dict(msg), msg->dict_len, 2480 - log_text(msg), msg->text_len); 2486 + &r.text_buf[0], 2487 + r.info->text_len, 2488 + &r.info->dev_info); 2481 2489 } 2482 - console_idx = log_next(console_idx); 2490 + len = record_print_text(&r, 2491 + console_msg_format & MSG_FORMAT_SYSLOG, 2492 + printk_time); 2483 2493 console_seq++; 2484 2494 raw_spin_unlock(&logbuf_lock); 2485 2495 ··· 2523 2525 * flush, no worries. 2524 2526 */ 2525 2527 raw_spin_lock(&logbuf_lock); 2526 - retry = console_seq != log_next_seq; 2528 + retry = prb_read_valid(prb, console_seq, NULL); 2527 2529 raw_spin_unlock(&logbuf_lock); 2528 2530 printk_safe_exit_irqrestore(flags); 2529 2531 ··· 2592 2594 unsigned long flags; 2593 2595 2594 2596 logbuf_lock_irqsave(flags); 2595 - console_seq = log_first_seq; 2596 - console_idx = log_first_idx; 2597 + console_seq = prb_first_valid_seq(prb); 2597 2598 logbuf_unlock_irqrestore(flags); 2598 2599 } 2599 2600 console_unlock(); ··· 2835 2838 exclusive_console = newcon; 2836 2839 exclusive_console_stop_seq = console_seq; 2837 2840 console_seq = syslog_seq; 2838 - console_idx = syslog_idx; 2839 2841 logbuf_unlock_irqrestore(flags); 2840 2842 } 2841 2843 console_unlock(); ··· 3058 3062 { 3059 3063 int r; 3060 3064 3061 - r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, 0, fmt, args); 3065 + r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, fmt, args); 3062 3066 defer_console_output(); 3063 3067 3064 3068 return r; ··· 3223 3227 3224 3228 logbuf_lock_irqsave(flags); 3225 3229 dumper->cur_seq = clear_seq; 3226 - dumper->cur_idx = clear_idx; 3227 - dumper->next_seq = log_next_seq; 3228 - dumper->next_idx = log_next_idx; 3230 + dumper->next_seq = prb_next_seq(prb); 3229 3231 logbuf_unlock_irqrestore(flags); 3230 3232 3231 3233 /* invoke dumper which will iterate over records */ ··· 3257 3263 bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog, 3258 3264 char *line, size_t size, size_t *len) 3259 3265 { 3260 - struct printk_log *msg; 3266 + struct printk_info info; 3267 + unsigned int line_count; 3268 + struct printk_record r; 3261 3269 size_t l = 0; 3262 3270 bool ret = false; 3271 + 3272 + prb_rec_init_rd(&r, &info, line, size); 3263 3273 3264 3274 if (!dumper->active) 3265 3275 goto out; 3266 3276 3267 - if (dumper->cur_seq < log_first_seq) { 3268 - /* messages are gone, move to first available one */ 3269 - dumper->cur_seq = log_first_seq; 3270 - dumper->cur_idx = log_first_idx; 3277 + /* Read text or count text lines? */ 3278 + if (line) { 3279 + if (!prb_read_valid(prb, dumper->cur_seq, &r)) 3280 + goto out; 3281 + l = record_print_text(&r, syslog, printk_time); 3282 + } else { 3283 + if (!prb_read_valid_info(prb, dumper->cur_seq, 3284 + &info, &line_count)) { 3285 + goto out; 3286 + } 3287 + l = get_record_print_text_size(&info, line_count, syslog, 3288 + printk_time); 3289 + 3271 3290 } 3272 3291 3273 - /* last entry */ 3274 - if (dumper->cur_seq >= log_next_seq) 3275 - goto out; 3276 - 3277 - msg = log_from_idx(dumper->cur_idx); 3278 - l = msg_print_text(msg, syslog, printk_time, line, size); 3279 - 3280 - dumper->cur_idx = log_next(dumper->cur_idx); 3281 - dumper->cur_seq++; 3292 + dumper->cur_seq = r.info->seq + 1; 3282 3293 ret = true; 3283 3294 out: 3284 3295 if (len) ··· 3331 3332 * @len: length of line placed into buffer 3332 3333 * 3333 3334 * Start at the end of the kmsg buffer and fill the provided buffer 3334 - * with as many of the the *youngest* kmsg records that fit into it. 3335 + * with as many of the *youngest* kmsg records that fit into it. 3335 3336 * If the buffer is large enough, all available kmsg records will be 3336 3337 * copied with a single call. 3337 3338 * ··· 3344 3345 bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, 3345 3346 char *buf, size_t size, size_t *len) 3346 3347 { 3348 + struct printk_info info; 3349 + unsigned int line_count; 3350 + struct printk_record r; 3347 3351 unsigned long flags; 3348 3352 u64 seq; 3349 - u32 idx; 3350 3353 u64 next_seq; 3351 - u32 next_idx; 3352 3354 size_t l = 0; 3353 3355 bool ret = false; 3354 3356 bool time = printk_time; 3355 3357 3356 - if (!dumper->active) 3358 + prb_rec_init_rd(&r, &info, buf, size); 3359 + 3360 + if (!dumper->active || !buf || !size) 3357 3361 goto out; 3358 3362 3359 3363 logbuf_lock_irqsave(flags); 3360 - if (dumper->cur_seq < log_first_seq) { 3364 + if (dumper->cur_seq < prb_first_valid_seq(prb)) { 3361 3365 /* messages are gone, move to first available one */ 3362 - dumper->cur_seq = log_first_seq; 3363 - dumper->cur_idx = log_first_idx; 3366 + dumper->cur_seq = prb_first_valid_seq(prb); 3364 3367 } 3365 3368 3366 3369 /* last entry */ ··· 3373 3372 3374 3373 /* calculate length of entire buffer */ 3375 3374 seq = dumper->cur_seq; 3376 - idx = dumper->cur_idx; 3377 - while (seq < dumper->next_seq) { 3378 - struct printk_log *msg = log_from_idx(idx); 3379 - 3380 - l += msg_print_text(msg, true, time, NULL, 0); 3381 - idx = log_next(idx); 3382 - seq++; 3375 + while (prb_read_valid_info(prb, seq, &info, &line_count)) { 3376 + if (r.info->seq >= dumper->next_seq) 3377 + break; 3378 + l += get_record_print_text_size(&info, line_count, true, time); 3379 + seq = r.info->seq + 1; 3383 3380 } 3384 3381 3385 3382 /* move first record forward until length fits into the buffer */ 3386 3383 seq = dumper->cur_seq; 3387 - idx = dumper->cur_idx; 3388 - while (l >= size && seq < dumper->next_seq) { 3389 - struct printk_log *msg = log_from_idx(idx); 3390 - 3391 - l -= msg_print_text(msg, true, time, NULL, 0); 3392 - idx = log_next(idx); 3393 - seq++; 3384 + while (l >= size && prb_read_valid_info(prb, seq, 3385 + &info, &line_count)) { 3386 + if (r.info->seq >= dumper->next_seq) 3387 + break; 3388 + l -= get_record_print_text_size(&info, line_count, true, time); 3389 + seq = r.info->seq + 1; 3394 3390 } 3395 3391 3396 3392 /* last message in next interation */ 3397 3393 next_seq = seq; 3398 - next_idx = idx; 3399 3394 3395 + /* actually read text into the buffer now */ 3400 3396 l = 0; 3401 - while (seq < dumper->next_seq) { 3402 - struct printk_log *msg = log_from_idx(idx); 3397 + while (prb_read_valid(prb, seq, &r)) { 3398 + if (r.info->seq >= dumper->next_seq) 3399 + break; 3403 3400 3404 - l += msg_print_text(msg, syslog, time, buf + l, size - l); 3405 - idx = log_next(idx); 3406 - seq++; 3401 + l += record_print_text(&r, syslog, time); 3402 + 3403 + /* adjust record to store to remaining buffer space */ 3404 + prb_rec_init_rd(&r, &info, buf + l, size - l); 3405 + 3406 + seq = r.info->seq + 1; 3407 3407 } 3408 3408 3409 3409 dumper->next_seq = next_seq; 3410 - dumper->next_idx = next_idx; 3411 3410 ret = true; 3412 3411 logbuf_unlock_irqrestore(flags); 3413 3412 out: ··· 3430 3429 void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper) 3431 3430 { 3432 3431 dumper->cur_seq = clear_seq; 3433 - dumper->cur_idx = clear_idx; 3434 - dumper->next_seq = log_next_seq; 3435 - dumper->next_idx = log_next_idx; 3432 + dumper->next_seq = prb_next_seq(prb); 3436 3433 } 3437 3434 3438 3435 /**
+2083
kernel/printk/printk_ringbuffer.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include <linux/kernel.h> 4 + #include <linux/irqflags.h> 5 + #include <linux/string.h> 6 + #include <linux/errno.h> 7 + #include <linux/bug.h> 8 + #include "printk_ringbuffer.h" 9 + 10 + /** 11 + * DOC: printk_ringbuffer overview 12 + * 13 + * Data Structure 14 + * -------------- 15 + * The printk_ringbuffer is made up of 3 internal ringbuffers: 16 + * 17 + * desc_ring 18 + * A ring of descriptors and their meta data (such as sequence number, 19 + * timestamp, loglevel, etc.) as well as internal state information about 20 + * the record and logical positions specifying where in the other 21 + * ringbuffer the text strings are located. 22 + * 23 + * text_data_ring 24 + * A ring of data blocks. A data block consists of an unsigned long 25 + * integer (ID) that maps to a desc_ring index followed by the text 26 + * string of the record. 27 + * 28 + * The internal state information of a descriptor is the key element to allow 29 + * readers and writers to locklessly synchronize access to the data. 30 + * 31 + * Implementation 32 + * -------------- 33 + * 34 + * Descriptor Ring 35 + * ~~~~~~~~~~~~~~~ 36 + * The descriptor ring is an array of descriptors. A descriptor contains 37 + * essential meta data to track the data of a printk record using 38 + * blk_lpos structs pointing to associated text data blocks (see 39 + * "Data Rings" below). Each descriptor is assigned an ID that maps 40 + * directly to index values of the descriptor array and has a state. The ID 41 + * and the state are bitwise combined into a single descriptor field named 42 + * @state_var, allowing ID and state to be synchronously and atomically 43 + * updated. 44 + * 45 + * Descriptors have four states: 46 + * 47 + * reserved 48 + * A writer is modifying the record. 49 + * 50 + * committed 51 + * The record and all its data are written. A writer can reopen the 52 + * descriptor (transitioning it back to reserved), but in the committed 53 + * state the data is consistent. 54 + * 55 + * finalized 56 + * The record and all its data are complete and available for reading. A 57 + * writer cannot reopen the descriptor. 58 + * 59 + * reusable 60 + * The record exists, but its text and/or meta data may no longer be 61 + * available. 62 + * 63 + * Querying the @state_var of a record requires providing the ID of the 64 + * descriptor to query. This can yield a possible fifth (pseudo) state: 65 + * 66 + * miss 67 + * The descriptor being queried has an unexpected ID. 68 + * 69 + * The descriptor ring has a @tail_id that contains the ID of the oldest 70 + * descriptor and @head_id that contains the ID of the newest descriptor. 71 + * 72 + * When a new descriptor should be created (and the ring is full), the tail 73 + * descriptor is invalidated by first transitioning to the reusable state and 74 + * then invalidating all tail data blocks up to and including the data blocks 75 + * associated with the tail descriptor (for the text ring). Then 76 + * @tail_id is advanced, followed by advancing @head_id. And finally the 77 + * @state_var of the new descriptor is initialized to the new ID and reserved 78 + * state. 79 + * 80 + * The @tail_id can only be advanced if the new @tail_id would be in the 81 + * committed or reusable queried state. This makes it possible that a valid 82 + * sequence number of the tail is always available. 83 + * 84 + * Descriptor Finalization 85 + * ~~~~~~~~~~~~~~~~~~~~~~~ 86 + * When a writer calls the commit function prb_commit(), record data is 87 + * fully stored and is consistent within the ringbuffer. However, a writer can 88 + * reopen that record, claiming exclusive access (as with prb_reserve()), and 89 + * modify that record. When finished, the writer must again commit the record. 90 + * 91 + * In order for a record to be made available to readers (and also become 92 + * recyclable for writers), it must be finalized. A finalized record cannot be 93 + * reopened and can never become "unfinalized". Record finalization can occur 94 + * in three different scenarios: 95 + * 96 + * 1) A writer can simultaneously commit and finalize its record by calling 97 + * prb_final_commit() instead of prb_commit(). 98 + * 99 + * 2) When a new record is reserved and the previous record has been 100 + * committed via prb_commit(), that previous record is automatically 101 + * finalized. 102 + * 103 + * 3) When a record is committed via prb_commit() and a newer record 104 + * already exists, the record being committed is automatically finalized. 105 + * 106 + * Data Ring 107 + * ~~~~~~~~~ 108 + * The text data ring is a byte array composed of data blocks. Data blocks are 109 + * referenced by blk_lpos structs that point to the logical position of the 110 + * beginning of a data block and the beginning of the next adjacent data 111 + * block. Logical positions are mapped directly to index values of the byte 112 + * array ringbuffer. 113 + * 114 + * Each data block consists of an ID followed by the writer data. The ID is 115 + * the identifier of a descriptor that is associated with the data block. A 116 + * given data block is considered valid if all of the following conditions 117 + * are met: 118 + * 119 + * 1) The descriptor associated with the data block is in the committed 120 + * or finalized queried state. 121 + * 122 + * 2) The blk_lpos struct within the descriptor associated with the data 123 + * block references back to the same data block. 124 + * 125 + * 3) The data block is within the head/tail logical position range. 126 + * 127 + * If the writer data of a data block would extend beyond the end of the 128 + * byte array, only the ID of the data block is stored at the logical 129 + * position and the full data block (ID and writer data) is stored at the 130 + * beginning of the byte array. The referencing blk_lpos will point to the 131 + * ID before the wrap and the next data block will be at the logical 132 + * position adjacent the full data block after the wrap. 133 + * 134 + * Data rings have a @tail_lpos that points to the beginning of the oldest 135 + * data block and a @head_lpos that points to the logical position of the 136 + * next (not yet existing) data block. 137 + * 138 + * When a new data block should be created (and the ring is full), tail data 139 + * blocks will first be invalidated by putting their associated descriptors 140 + * into the reusable state and then pushing the @tail_lpos forward beyond 141 + * them. Then the @head_lpos is pushed forward and is associated with a new 142 + * descriptor. If a data block is not valid, the @tail_lpos cannot be 143 + * advanced beyond it. 144 + * 145 + * Info Array 146 + * ~~~~~~~~~~ 147 + * The general meta data of printk records are stored in printk_info structs, 148 + * stored in an array with the same number of elements as the descriptor ring. 149 + * Each info corresponds to the descriptor of the same index in the 150 + * descriptor ring. Info validity is confirmed by evaluating the corresponding 151 + * descriptor before and after loading the info. 152 + * 153 + * Usage 154 + * ----- 155 + * Here are some simple examples demonstrating writers and readers. For the 156 + * examples a global ringbuffer (test_rb) is available (which is not the 157 + * actual ringbuffer used by printk):: 158 + * 159 + * DEFINE_PRINTKRB(test_rb, 15, 5); 160 + * 161 + * This ringbuffer allows up to 32768 records (2 ^ 15) and has a size of 162 + * 1 MiB (2 ^ (15 + 5)) for text data. 163 + * 164 + * Sample writer code:: 165 + * 166 + * const char *textstr = "message text"; 167 + * struct prb_reserved_entry e; 168 + * struct printk_record r; 169 + * 170 + * // specify how much to allocate 171 + * prb_rec_init_wr(&r, strlen(textstr) + 1); 172 + * 173 + * if (prb_reserve(&e, &test_rb, &r)) { 174 + * snprintf(r.text_buf, r.text_buf_size, "%s", textstr); 175 + * 176 + * r.info->text_len = strlen(textstr); 177 + * r.info->ts_nsec = local_clock(); 178 + * r.info->caller_id = printk_caller_id(); 179 + * 180 + * // commit and finalize the record 181 + * prb_final_commit(&e); 182 + * } 183 + * 184 + * Note that additional writer functions are available to extend a record 185 + * after it has been committed but not yet finalized. This can be done as 186 + * long as no new records have been reserved and the caller is the same. 187 + * 188 + * Sample writer code (record extending):: 189 + * 190 + * // alternate rest of previous example 191 + * 192 + * r.info->text_len = strlen(textstr); 193 + * r.info->ts_nsec = local_clock(); 194 + * r.info->caller_id = printk_caller_id(); 195 + * 196 + * // commit the record (but do not finalize yet) 197 + * prb_commit(&e); 198 + * } 199 + * 200 + * ... 201 + * 202 + * // specify additional 5 bytes text space to extend 203 + * prb_rec_init_wr(&r, 5); 204 + * 205 + * // try to extend, but only if it does not exceed 32 bytes 206 + * if (prb_reserve_in_last(&e, &test_rb, &r, printk_caller_id()), 32) { 207 + * snprintf(&r.text_buf[r.info->text_len], 208 + * r.text_buf_size - r.info->text_len, "hello"); 209 + * 210 + * r.info->text_len += 5; 211 + * 212 + * // commit and finalize the record 213 + * prb_final_commit(&e); 214 + * } 215 + * 216 + * Sample reader code:: 217 + * 218 + * struct printk_info info; 219 + * struct printk_record r; 220 + * char text_buf[32]; 221 + * u64 seq; 222 + * 223 + * prb_rec_init_rd(&r, &info, &text_buf[0], sizeof(text_buf)); 224 + * 225 + * prb_for_each_record(0, &test_rb, &seq, &r) { 226 + * if (info.seq != seq) 227 + * pr_warn("lost %llu records\n", info.seq - seq); 228 + * 229 + * if (info.text_len > r.text_buf_size) { 230 + * pr_warn("record %llu text truncated\n", info.seq); 231 + * text_buf[r.text_buf_size - 1] = 0; 232 + * } 233 + * 234 + * pr_info("%llu: %llu: %s\n", info.seq, info.ts_nsec, 235 + * &text_buf[0]); 236 + * } 237 + * 238 + * Note that additional less convenient reader functions are available to 239 + * allow complex record access. 240 + * 241 + * ABA Issues 242 + * ~~~~~~~~~~ 243 + * To help avoid ABA issues, descriptors are referenced by IDs (array index 244 + * values combined with tagged bits counting array wraps) and data blocks are 245 + * referenced by logical positions (array index values combined with tagged 246 + * bits counting array wraps). However, on 32-bit systems the number of 247 + * tagged bits is relatively small such that an ABA incident is (at least 248 + * theoretically) possible. For example, if 4 million maximally sized (1KiB) 249 + * printk messages were to occur in NMI context on a 32-bit system, the 250 + * interrupted context would not be able to recognize that the 32-bit integer 251 + * completely wrapped and thus represents a different data block than the one 252 + * the interrupted context expects. 253 + * 254 + * To help combat this possibility, additional state checking is performed 255 + * (such as using cmpxchg() even though set() would suffice). These extra 256 + * checks are commented as such and will hopefully catch any ABA issue that 257 + * a 32-bit system might experience. 258 + * 259 + * Memory Barriers 260 + * ~~~~~~~~~~~~~~~ 261 + * Multiple memory barriers are used. To simplify proving correctness and 262 + * generating litmus tests, lines of code related to memory barriers 263 + * (loads, stores, and the associated memory barriers) are labeled:: 264 + * 265 + * LMM(function:letter) 266 + * 267 + * Comments reference the labels using only the "function:letter" part. 268 + * 269 + * The memory barrier pairs and their ordering are: 270 + * 271 + * desc_reserve:D / desc_reserve:B 272 + * push descriptor tail (id), then push descriptor head (id) 273 + * 274 + * desc_reserve:D / data_push_tail:B 275 + * push data tail (lpos), then set new descriptor reserved (state) 276 + * 277 + * desc_reserve:D / desc_push_tail:C 278 + * push descriptor tail (id), then set new descriptor reserved (state) 279 + * 280 + * desc_reserve:D / prb_first_seq:C 281 + * push descriptor tail (id), then set new descriptor reserved (state) 282 + * 283 + * desc_reserve:F / desc_read:D 284 + * set new descriptor id and reserved (state), then allow writer changes 285 + * 286 + * data_alloc:A (or data_realloc:A) / desc_read:D 287 + * set old descriptor reusable (state), then modify new data block area 288 + * 289 + * data_alloc:A (or data_realloc:A) / data_push_tail:B 290 + * push data tail (lpos), then modify new data block area 291 + * 292 + * _prb_commit:B / desc_read:B 293 + * store writer changes, then set new descriptor committed (state) 294 + * 295 + * desc_reopen_last:A / _prb_commit:B 296 + * set descriptor reserved (state), then read descriptor data 297 + * 298 + * _prb_commit:B / desc_reserve:D 299 + * set new descriptor committed (state), then check descriptor head (id) 300 + * 301 + * data_push_tail:D / data_push_tail:A 302 + * set descriptor reusable (state), then push data tail (lpos) 303 + * 304 + * desc_push_tail:B / desc_reserve:D 305 + * set descriptor reusable (state), then push descriptor tail (id) 306 + */ 307 + 308 + #define DATA_SIZE(data_ring) _DATA_SIZE((data_ring)->size_bits) 309 + #define DATA_SIZE_MASK(data_ring) (DATA_SIZE(data_ring) - 1) 310 + 311 + #define DESCS_COUNT(desc_ring) _DESCS_COUNT((desc_ring)->count_bits) 312 + #define DESCS_COUNT_MASK(desc_ring) (DESCS_COUNT(desc_ring) - 1) 313 + 314 + /* Determine the data array index from a logical position. */ 315 + #define DATA_INDEX(data_ring, lpos) ((lpos) & DATA_SIZE_MASK(data_ring)) 316 + 317 + /* Determine the desc array index from an ID or sequence number. */ 318 + #define DESC_INDEX(desc_ring, n) ((n) & DESCS_COUNT_MASK(desc_ring)) 319 + 320 + /* Determine how many times the data array has wrapped. */ 321 + #define DATA_WRAPS(data_ring, lpos) ((lpos) >> (data_ring)->size_bits) 322 + 323 + /* Determine if a logical position refers to a data-less block. */ 324 + #define LPOS_DATALESS(lpos) ((lpos) & 1UL) 325 + #define BLK_DATALESS(blk) (LPOS_DATALESS((blk)->begin) && \ 326 + LPOS_DATALESS((blk)->next)) 327 + 328 + /* Get the logical position at index 0 of the current wrap. */ 329 + #define DATA_THIS_WRAP_START_LPOS(data_ring, lpos) \ 330 + ((lpos) & ~DATA_SIZE_MASK(data_ring)) 331 + 332 + /* Get the ID for the same index of the previous wrap as the given ID. */ 333 + #define DESC_ID_PREV_WRAP(desc_ring, id) \ 334 + DESC_ID((id) - DESCS_COUNT(desc_ring)) 335 + 336 + /* 337 + * A data block: mapped directly to the beginning of the data block area 338 + * specified as a logical position within the data ring. 339 + * 340 + * @id: the ID of the associated descriptor 341 + * @data: the writer data 342 + * 343 + * Note that the size of a data block is only known by its associated 344 + * descriptor. 345 + */ 346 + struct prb_data_block { 347 + unsigned long id; 348 + char data[0]; 349 + }; 350 + 351 + /* 352 + * Return the descriptor associated with @n. @n can be either a 353 + * descriptor ID or a sequence number. 354 + */ 355 + static struct prb_desc *to_desc(struct prb_desc_ring *desc_ring, u64 n) 356 + { 357 + return &desc_ring->descs[DESC_INDEX(desc_ring, n)]; 358 + } 359 + 360 + /* 361 + * Return the printk_info associated with @n. @n can be either a 362 + * descriptor ID or a sequence number. 363 + */ 364 + static struct printk_info *to_info(struct prb_desc_ring *desc_ring, u64 n) 365 + { 366 + return &desc_ring->infos[DESC_INDEX(desc_ring, n)]; 367 + } 368 + 369 + static struct prb_data_block *to_block(struct prb_data_ring *data_ring, 370 + unsigned long begin_lpos) 371 + { 372 + return (void *)&data_ring->data[DATA_INDEX(data_ring, begin_lpos)]; 373 + } 374 + 375 + /* 376 + * Increase the data size to account for data block meta data plus any 377 + * padding so that the adjacent data block is aligned on the ID size. 378 + */ 379 + static unsigned int to_blk_size(unsigned int size) 380 + { 381 + struct prb_data_block *db = NULL; 382 + 383 + size += sizeof(*db); 384 + size = ALIGN(size, sizeof(db->id)); 385 + return size; 386 + } 387 + 388 + /* 389 + * Sanity checker for reserve size. The ringbuffer code assumes that a data 390 + * block does not exceed the maximum possible size that could fit within the 391 + * ringbuffer. This function provides that basic size check so that the 392 + * assumption is safe. 393 + */ 394 + static bool data_check_size(struct prb_data_ring *data_ring, unsigned int size) 395 + { 396 + struct prb_data_block *db = NULL; 397 + 398 + if (size == 0) 399 + return true; 400 + 401 + /* 402 + * Ensure the alignment padded size could possibly fit in the data 403 + * array. The largest possible data block must still leave room for 404 + * at least the ID of the next block. 405 + */ 406 + size = to_blk_size(size); 407 + if (size > DATA_SIZE(data_ring) - sizeof(db->id)) 408 + return false; 409 + 410 + return true; 411 + } 412 + 413 + /* Query the state of a descriptor. */ 414 + static enum desc_state get_desc_state(unsigned long id, 415 + unsigned long state_val) 416 + { 417 + if (id != DESC_ID(state_val)) 418 + return desc_miss; 419 + 420 + return DESC_STATE(state_val); 421 + } 422 + 423 + /* 424 + * Get a copy of a specified descriptor and return its queried state. If the 425 + * descriptor is in an inconsistent state (miss or reserved), the caller can 426 + * only expect the descriptor's @state_var field to be valid. 427 + * 428 + * The sequence number and caller_id can be optionally retrieved. Like all 429 + * non-state_var data, they are only valid if the descriptor is in a 430 + * consistent state. 431 + */ 432 + static enum desc_state desc_read(struct prb_desc_ring *desc_ring, 433 + unsigned long id, struct prb_desc *desc_out, 434 + u64 *seq_out, u32 *caller_id_out) 435 + { 436 + struct printk_info *info = to_info(desc_ring, id); 437 + struct prb_desc *desc = to_desc(desc_ring, id); 438 + atomic_long_t *state_var = &desc->state_var; 439 + enum desc_state d_state; 440 + unsigned long state_val; 441 + 442 + /* Check the descriptor state. */ 443 + state_val = atomic_long_read(state_var); /* LMM(desc_read:A) */ 444 + d_state = get_desc_state(id, state_val); 445 + if (d_state == desc_miss || d_state == desc_reserved) { 446 + /* 447 + * The descriptor is in an inconsistent state. Set at least 448 + * @state_var so that the caller can see the details of 449 + * the inconsistent state. 450 + */ 451 + goto out; 452 + } 453 + 454 + /* 455 + * Guarantee the state is loaded before copying the descriptor 456 + * content. This avoids copying obsolete descriptor content that might 457 + * not apply to the descriptor state. This pairs with _prb_commit:B. 458 + * 459 + * Memory barrier involvement: 460 + * 461 + * If desc_read:A reads from _prb_commit:B, then desc_read:C reads 462 + * from _prb_commit:A. 463 + * 464 + * Relies on: 465 + * 466 + * WMB from _prb_commit:A to _prb_commit:B 467 + * matching 468 + * RMB from desc_read:A to desc_read:C 469 + */ 470 + smp_rmb(); /* LMM(desc_read:B) */ 471 + 472 + /* 473 + * Copy the descriptor data. The data is not valid until the 474 + * state has been re-checked. A memcpy() for all of @desc 475 + * cannot be used because of the atomic_t @state_var field. 476 + */ 477 + memcpy(&desc_out->text_blk_lpos, &desc->text_blk_lpos, 478 + sizeof(desc_out->text_blk_lpos)); /* LMM(desc_read:C) */ 479 + if (seq_out) 480 + *seq_out = info->seq; /* also part of desc_read:C */ 481 + if (caller_id_out) 482 + *caller_id_out = info->caller_id; /* also part of desc_read:C */ 483 + 484 + /* 485 + * 1. Guarantee the descriptor content is loaded before re-checking 486 + * the state. This avoids reading an obsolete descriptor state 487 + * that may not apply to the copied content. This pairs with 488 + * desc_reserve:F. 489 + * 490 + * Memory barrier involvement: 491 + * 492 + * If desc_read:C reads from desc_reserve:G, then desc_read:E 493 + * reads from desc_reserve:F. 494 + * 495 + * Relies on: 496 + * 497 + * WMB from desc_reserve:F to desc_reserve:G 498 + * matching 499 + * RMB from desc_read:C to desc_read:E 500 + * 501 + * 2. Guarantee the record data is loaded before re-checking the 502 + * state. This avoids reading an obsolete descriptor state that may 503 + * not apply to the copied data. This pairs with data_alloc:A and 504 + * data_realloc:A. 505 + * 506 + * Memory barrier involvement: 507 + * 508 + * If copy_data:A reads from data_alloc:B, then desc_read:E 509 + * reads from desc_make_reusable:A. 510 + * 511 + * Relies on: 512 + * 513 + * MB from desc_make_reusable:A to data_alloc:B 514 + * matching 515 + * RMB from desc_read:C to desc_read:E 516 + * 517 + * Note: desc_make_reusable:A and data_alloc:B can be different 518 + * CPUs. However, the data_alloc:B CPU (which performs the 519 + * full memory barrier) must have previously seen 520 + * desc_make_reusable:A. 521 + */ 522 + smp_rmb(); /* LMM(desc_read:D) */ 523 + 524 + /* 525 + * The data has been copied. Return the current descriptor state, 526 + * which may have changed since the load above. 527 + */ 528 + state_val = atomic_long_read(state_var); /* LMM(desc_read:E) */ 529 + d_state = get_desc_state(id, state_val); 530 + out: 531 + atomic_long_set(&desc_out->state_var, state_val); 532 + return d_state; 533 + } 534 + 535 + /* 536 + * Take a specified descriptor out of the finalized state by attempting 537 + * the transition from finalized to reusable. Either this context or some 538 + * other context will have been successful. 539 + */ 540 + static void desc_make_reusable(struct prb_desc_ring *desc_ring, 541 + unsigned long id) 542 + { 543 + unsigned long val_finalized = DESC_SV(id, desc_finalized); 544 + unsigned long val_reusable = DESC_SV(id, desc_reusable); 545 + struct prb_desc *desc = to_desc(desc_ring, id); 546 + atomic_long_t *state_var = &desc->state_var; 547 + 548 + atomic_long_cmpxchg_relaxed(state_var, val_finalized, 549 + val_reusable); /* LMM(desc_make_reusable:A) */ 550 + } 551 + 552 + /* 553 + * Given the text data ring, put the associated descriptor of each 554 + * data block from @lpos_begin until @lpos_end into the reusable state. 555 + * 556 + * If there is any problem making the associated descriptor reusable, either 557 + * the descriptor has not yet been finalized or another writer context has 558 + * already pushed the tail lpos past the problematic data block. Regardless, 559 + * on error the caller can re-load the tail lpos to determine the situation. 560 + */ 561 + static bool data_make_reusable(struct printk_ringbuffer *rb, 562 + struct prb_data_ring *data_ring, 563 + unsigned long lpos_begin, 564 + unsigned long lpos_end, 565 + unsigned long *lpos_out) 566 + { 567 + struct prb_desc_ring *desc_ring = &rb->desc_ring; 568 + struct prb_data_block *blk; 569 + enum desc_state d_state; 570 + struct prb_desc desc; 571 + struct prb_data_blk_lpos *blk_lpos = &desc.text_blk_lpos; 572 + unsigned long id; 573 + 574 + /* Loop until @lpos_begin has advanced to or beyond @lpos_end. */ 575 + while ((lpos_end - lpos_begin) - 1 < DATA_SIZE(data_ring)) { 576 + blk = to_block(data_ring, lpos_begin); 577 + 578 + /* 579 + * Load the block ID from the data block. This is a data race 580 + * against a writer that may have newly reserved this data 581 + * area. If the loaded value matches a valid descriptor ID, 582 + * the blk_lpos of that descriptor will be checked to make 583 + * sure it points back to this data block. If the check fails, 584 + * the data area has been recycled by another writer. 585 + */ 586 + id = blk->id; /* LMM(data_make_reusable:A) */ 587 + 588 + d_state = desc_read(desc_ring, id, &desc, 589 + NULL, NULL); /* LMM(data_make_reusable:B) */ 590 + 591 + switch (d_state) { 592 + case desc_miss: 593 + case desc_reserved: 594 + case desc_committed: 595 + return false; 596 + case desc_finalized: 597 + /* 598 + * This data block is invalid if the descriptor 599 + * does not point back to it. 600 + */ 601 + if (blk_lpos->begin != lpos_begin) 602 + return false; 603 + desc_make_reusable(desc_ring, id); 604 + break; 605 + case desc_reusable: 606 + /* 607 + * This data block is invalid if the descriptor 608 + * does not point back to it. 609 + */ 610 + if (blk_lpos->begin != lpos_begin) 611 + return false; 612 + break; 613 + } 614 + 615 + /* Advance @lpos_begin to the next data block. */ 616 + lpos_begin = blk_lpos->next; 617 + } 618 + 619 + *lpos_out = lpos_begin; 620 + return true; 621 + } 622 + 623 + /* 624 + * Advance the data ring tail to at least @lpos. This function puts 625 + * descriptors into the reusable state if the tail is pushed beyond 626 + * their associated data block. 627 + */ 628 + static bool data_push_tail(struct printk_ringbuffer *rb, 629 + struct prb_data_ring *data_ring, 630 + unsigned long lpos) 631 + { 632 + unsigned long tail_lpos_new; 633 + unsigned long tail_lpos; 634 + unsigned long next_lpos; 635 + 636 + /* If @lpos is from a data-less block, there is nothing to do. */ 637 + if (LPOS_DATALESS(lpos)) 638 + return true; 639 + 640 + /* 641 + * Any descriptor states that have transitioned to reusable due to the 642 + * data tail being pushed to this loaded value will be visible to this 643 + * CPU. This pairs with data_push_tail:D. 644 + * 645 + * Memory barrier involvement: 646 + * 647 + * If data_push_tail:A reads from data_push_tail:D, then this CPU can 648 + * see desc_make_reusable:A. 649 + * 650 + * Relies on: 651 + * 652 + * MB from desc_make_reusable:A to data_push_tail:D 653 + * matches 654 + * READFROM from data_push_tail:D to data_push_tail:A 655 + * thus 656 + * READFROM from desc_make_reusable:A to this CPU 657 + */ 658 + tail_lpos = atomic_long_read(&data_ring->tail_lpos); /* LMM(data_push_tail:A) */ 659 + 660 + /* 661 + * Loop until the tail lpos is at or beyond @lpos. This condition 662 + * may already be satisfied, resulting in no full memory barrier 663 + * from data_push_tail:D being performed. However, since this CPU 664 + * sees the new tail lpos, any descriptor states that transitioned to 665 + * the reusable state must already be visible. 666 + */ 667 + while ((lpos - tail_lpos) - 1 < DATA_SIZE(data_ring)) { 668 + /* 669 + * Make all descriptors reusable that are associated with 670 + * data blocks before @lpos. 671 + */ 672 + if (!data_make_reusable(rb, data_ring, tail_lpos, lpos, 673 + &next_lpos)) { 674 + /* 675 + * 1. Guarantee the block ID loaded in 676 + * data_make_reusable() is performed before 677 + * reloading the tail lpos. The failed 678 + * data_make_reusable() may be due to a newly 679 + * recycled data area causing the tail lpos to 680 + * have been previously pushed. This pairs with 681 + * data_alloc:A and data_realloc:A. 682 + * 683 + * Memory barrier involvement: 684 + * 685 + * If data_make_reusable:A reads from data_alloc:B, 686 + * then data_push_tail:C reads from 687 + * data_push_tail:D. 688 + * 689 + * Relies on: 690 + * 691 + * MB from data_push_tail:D to data_alloc:B 692 + * matching 693 + * RMB from data_make_reusable:A to 694 + * data_push_tail:C 695 + * 696 + * Note: data_push_tail:D and data_alloc:B can be 697 + * different CPUs. However, the data_alloc:B 698 + * CPU (which performs the full memory 699 + * barrier) must have previously seen 700 + * data_push_tail:D. 701 + * 702 + * 2. Guarantee the descriptor state loaded in 703 + * data_make_reusable() is performed before 704 + * reloading the tail lpos. The failed 705 + * data_make_reusable() may be due to a newly 706 + * recycled descriptor causing the tail lpos to 707 + * have been previously pushed. This pairs with 708 + * desc_reserve:D. 709 + * 710 + * Memory barrier involvement: 711 + * 712 + * If data_make_reusable:B reads from 713 + * desc_reserve:F, then data_push_tail:C reads 714 + * from data_push_tail:D. 715 + * 716 + * Relies on: 717 + * 718 + * MB from data_push_tail:D to desc_reserve:F 719 + * matching 720 + * RMB from data_make_reusable:B to 721 + * data_push_tail:C 722 + * 723 + * Note: data_push_tail:D and desc_reserve:F can 724 + * be different CPUs. However, the 725 + * desc_reserve:F CPU (which performs the 726 + * full memory barrier) must have previously 727 + * seen data_push_tail:D. 728 + */ 729 + smp_rmb(); /* LMM(data_push_tail:B) */ 730 + 731 + tail_lpos_new = atomic_long_read(&data_ring->tail_lpos 732 + ); /* LMM(data_push_tail:C) */ 733 + if (tail_lpos_new == tail_lpos) 734 + return false; 735 + 736 + /* Another CPU pushed the tail. Try again. */ 737 + tail_lpos = tail_lpos_new; 738 + continue; 739 + } 740 + 741 + /* 742 + * Guarantee any descriptor states that have transitioned to 743 + * reusable are stored before pushing the tail lpos. A full 744 + * memory barrier is needed since other CPUs may have made 745 + * the descriptor states reusable. This pairs with 746 + * data_push_tail:A. 747 + */ 748 + if (atomic_long_try_cmpxchg(&data_ring->tail_lpos, &tail_lpos, 749 + next_lpos)) { /* LMM(data_push_tail:D) */ 750 + break; 751 + } 752 + } 753 + 754 + return true; 755 + } 756 + 757 + /* 758 + * Advance the desc ring tail. This function advances the tail by one 759 + * descriptor, thus invalidating the oldest descriptor. Before advancing 760 + * the tail, the tail descriptor is made reusable and all data blocks up to 761 + * and including the descriptor's data block are invalidated (i.e. the data 762 + * ring tail is pushed past the data block of the descriptor being made 763 + * reusable). 764 + */ 765 + static bool desc_push_tail(struct printk_ringbuffer *rb, 766 + unsigned long tail_id) 767 + { 768 + struct prb_desc_ring *desc_ring = &rb->desc_ring; 769 + enum desc_state d_state; 770 + struct prb_desc desc; 771 + 772 + d_state = desc_read(desc_ring, tail_id, &desc, NULL, NULL); 773 + 774 + switch (d_state) { 775 + case desc_miss: 776 + /* 777 + * If the ID is exactly 1 wrap behind the expected, it is 778 + * in the process of being reserved by another writer and 779 + * must be considered reserved. 780 + */ 781 + if (DESC_ID(atomic_long_read(&desc.state_var)) == 782 + DESC_ID_PREV_WRAP(desc_ring, tail_id)) { 783 + return false; 784 + } 785 + 786 + /* 787 + * The ID has changed. Another writer must have pushed the 788 + * tail and recycled the descriptor already. Success is 789 + * returned because the caller is only interested in the 790 + * specified tail being pushed, which it was. 791 + */ 792 + return true; 793 + case desc_reserved: 794 + case desc_committed: 795 + return false; 796 + case desc_finalized: 797 + desc_make_reusable(desc_ring, tail_id); 798 + break; 799 + case desc_reusable: 800 + break; 801 + } 802 + 803 + /* 804 + * Data blocks must be invalidated before their associated 805 + * descriptor can be made available for recycling. Invalidating 806 + * them later is not possible because there is no way to trust 807 + * data blocks once their associated descriptor is gone. 808 + */ 809 + 810 + if (!data_push_tail(rb, &rb->text_data_ring, desc.text_blk_lpos.next)) 811 + return false; 812 + 813 + /* 814 + * Check the next descriptor after @tail_id before pushing the tail 815 + * to it because the tail must always be in a finalized or reusable 816 + * state. The implementation of prb_first_seq() relies on this. 817 + * 818 + * A successful read implies that the next descriptor is less than or 819 + * equal to @head_id so there is no risk of pushing the tail past the 820 + * head. 821 + */ 822 + d_state = desc_read(desc_ring, DESC_ID(tail_id + 1), &desc, 823 + NULL, NULL); /* LMM(desc_push_tail:A) */ 824 + 825 + if (d_state == desc_finalized || d_state == desc_reusable) { 826 + /* 827 + * Guarantee any descriptor states that have transitioned to 828 + * reusable are stored before pushing the tail ID. This allows 829 + * verifying the recycled descriptor state. A full memory 830 + * barrier is needed since other CPUs may have made the 831 + * descriptor states reusable. This pairs with desc_reserve:D. 832 + */ 833 + atomic_long_cmpxchg(&desc_ring->tail_id, tail_id, 834 + DESC_ID(tail_id + 1)); /* LMM(desc_push_tail:B) */ 835 + } else { 836 + /* 837 + * Guarantee the last state load from desc_read() is before 838 + * reloading @tail_id in order to see a new tail ID in the 839 + * case that the descriptor has been recycled. This pairs 840 + * with desc_reserve:D. 841 + * 842 + * Memory barrier involvement: 843 + * 844 + * If desc_push_tail:A reads from desc_reserve:F, then 845 + * desc_push_tail:D reads from desc_push_tail:B. 846 + * 847 + * Relies on: 848 + * 849 + * MB from desc_push_tail:B to desc_reserve:F 850 + * matching 851 + * RMB from desc_push_tail:A to desc_push_tail:D 852 + * 853 + * Note: desc_push_tail:B and desc_reserve:F can be different 854 + * CPUs. However, the desc_reserve:F CPU (which performs 855 + * the full memory barrier) must have previously seen 856 + * desc_push_tail:B. 857 + */ 858 + smp_rmb(); /* LMM(desc_push_tail:C) */ 859 + 860 + /* 861 + * Re-check the tail ID. The descriptor following @tail_id is 862 + * not in an allowed tail state. But if the tail has since 863 + * been moved by another CPU, then it does not matter. 864 + */ 865 + if (atomic_long_read(&desc_ring->tail_id) == tail_id) /* LMM(desc_push_tail:D) */ 866 + return false; 867 + } 868 + 869 + return true; 870 + } 871 + 872 + /* Reserve a new descriptor, invalidating the oldest if necessary. */ 873 + static bool desc_reserve(struct printk_ringbuffer *rb, unsigned long *id_out) 874 + { 875 + struct prb_desc_ring *desc_ring = &rb->desc_ring; 876 + unsigned long prev_state_val; 877 + unsigned long id_prev_wrap; 878 + struct prb_desc *desc; 879 + unsigned long head_id; 880 + unsigned long id; 881 + 882 + head_id = atomic_long_read(&desc_ring->head_id); /* LMM(desc_reserve:A) */ 883 + 884 + do { 885 + desc = to_desc(desc_ring, head_id); 886 + 887 + id = DESC_ID(head_id + 1); 888 + id_prev_wrap = DESC_ID_PREV_WRAP(desc_ring, id); 889 + 890 + /* 891 + * Guarantee the head ID is read before reading the tail ID. 892 + * Since the tail ID is updated before the head ID, this 893 + * guarantees that @id_prev_wrap is never ahead of the tail 894 + * ID. This pairs with desc_reserve:D. 895 + * 896 + * Memory barrier involvement: 897 + * 898 + * If desc_reserve:A reads from desc_reserve:D, then 899 + * desc_reserve:C reads from desc_push_tail:B. 900 + * 901 + * Relies on: 902 + * 903 + * MB from desc_push_tail:B to desc_reserve:D 904 + * matching 905 + * RMB from desc_reserve:A to desc_reserve:C 906 + * 907 + * Note: desc_push_tail:B and desc_reserve:D can be different 908 + * CPUs. However, the desc_reserve:D CPU (which performs 909 + * the full memory barrier) must have previously seen 910 + * desc_push_tail:B. 911 + */ 912 + smp_rmb(); /* LMM(desc_reserve:B) */ 913 + 914 + if (id_prev_wrap == atomic_long_read(&desc_ring->tail_id 915 + )) { /* LMM(desc_reserve:C) */ 916 + /* 917 + * Make space for the new descriptor by 918 + * advancing the tail. 919 + */ 920 + if (!desc_push_tail(rb, id_prev_wrap)) 921 + return false; 922 + } 923 + 924 + /* 925 + * 1. Guarantee the tail ID is read before validating the 926 + * recycled descriptor state. A read memory barrier is 927 + * sufficient for this. This pairs with desc_push_tail:B. 928 + * 929 + * Memory barrier involvement: 930 + * 931 + * If desc_reserve:C reads from desc_push_tail:B, then 932 + * desc_reserve:E reads from desc_make_reusable:A. 933 + * 934 + * Relies on: 935 + * 936 + * MB from desc_make_reusable:A to desc_push_tail:B 937 + * matching 938 + * RMB from desc_reserve:C to desc_reserve:E 939 + * 940 + * Note: desc_make_reusable:A and desc_push_tail:B can be 941 + * different CPUs. However, the desc_push_tail:B CPU 942 + * (which performs the full memory barrier) must have 943 + * previously seen desc_make_reusable:A. 944 + * 945 + * 2. Guarantee the tail ID is stored before storing the head 946 + * ID. This pairs with desc_reserve:B. 947 + * 948 + * 3. Guarantee any data ring tail changes are stored before 949 + * recycling the descriptor. Data ring tail changes can 950 + * happen via desc_push_tail()->data_push_tail(). A full 951 + * memory barrier is needed since another CPU may have 952 + * pushed the data ring tails. This pairs with 953 + * data_push_tail:B. 954 + * 955 + * 4. Guarantee a new tail ID is stored before recycling the 956 + * descriptor. A full memory barrier is needed since 957 + * another CPU may have pushed the tail ID. This pairs 958 + * with desc_push_tail:C and this also pairs with 959 + * prb_first_seq:C. 960 + * 961 + * 5. Guarantee the head ID is stored before trying to 962 + * finalize the previous descriptor. This pairs with 963 + * _prb_commit:B. 964 + */ 965 + } while (!atomic_long_try_cmpxchg(&desc_ring->head_id, &head_id, 966 + id)); /* LMM(desc_reserve:D) */ 967 + 968 + desc = to_desc(desc_ring, id); 969 + 970 + /* 971 + * If the descriptor has been recycled, verify the old state val. 972 + * See "ABA Issues" about why this verification is performed. 973 + */ 974 + prev_state_val = atomic_long_read(&desc->state_var); /* LMM(desc_reserve:E) */ 975 + if (prev_state_val && 976 + get_desc_state(id_prev_wrap, prev_state_val) != desc_reusable) { 977 + WARN_ON_ONCE(1); 978 + return false; 979 + } 980 + 981 + /* 982 + * Assign the descriptor a new ID and set its state to reserved. 983 + * See "ABA Issues" about why cmpxchg() instead of set() is used. 984 + * 985 + * Guarantee the new descriptor ID and state is stored before making 986 + * any other changes. A write memory barrier is sufficient for this. 987 + * This pairs with desc_read:D. 988 + */ 989 + if (!atomic_long_try_cmpxchg(&desc->state_var, &prev_state_val, 990 + DESC_SV(id, desc_reserved))) { /* LMM(desc_reserve:F) */ 991 + WARN_ON_ONCE(1); 992 + return false; 993 + } 994 + 995 + /* Now data in @desc can be modified: LMM(desc_reserve:G) */ 996 + 997 + *id_out = id; 998 + return true; 999 + } 1000 + 1001 + /* Determine the end of a data block. */ 1002 + static unsigned long get_next_lpos(struct prb_data_ring *data_ring, 1003 + unsigned long lpos, unsigned int size) 1004 + { 1005 + unsigned long begin_lpos; 1006 + unsigned long next_lpos; 1007 + 1008 + begin_lpos = lpos; 1009 + next_lpos = lpos + size; 1010 + 1011 + /* First check if the data block does not wrap. */ 1012 + if (DATA_WRAPS(data_ring, begin_lpos) == DATA_WRAPS(data_ring, next_lpos)) 1013 + return next_lpos; 1014 + 1015 + /* Wrapping data blocks store their data at the beginning. */ 1016 + return (DATA_THIS_WRAP_START_LPOS(data_ring, next_lpos) + size); 1017 + } 1018 + 1019 + /* 1020 + * Allocate a new data block, invalidating the oldest data block(s) 1021 + * if necessary. This function also associates the data block with 1022 + * a specified descriptor. 1023 + */ 1024 + static char *data_alloc(struct printk_ringbuffer *rb, 1025 + struct prb_data_ring *data_ring, unsigned int size, 1026 + struct prb_data_blk_lpos *blk_lpos, unsigned long id) 1027 + { 1028 + struct prb_data_block *blk; 1029 + unsigned long begin_lpos; 1030 + unsigned long next_lpos; 1031 + 1032 + if (size == 0) { 1033 + /* Specify a data-less block. */ 1034 + blk_lpos->begin = NO_LPOS; 1035 + blk_lpos->next = NO_LPOS; 1036 + return NULL; 1037 + } 1038 + 1039 + size = to_blk_size(size); 1040 + 1041 + begin_lpos = atomic_long_read(&data_ring->head_lpos); 1042 + 1043 + do { 1044 + next_lpos = get_next_lpos(data_ring, begin_lpos, size); 1045 + 1046 + if (!data_push_tail(rb, data_ring, next_lpos - DATA_SIZE(data_ring))) { 1047 + /* Failed to allocate, specify a data-less block. */ 1048 + blk_lpos->begin = FAILED_LPOS; 1049 + blk_lpos->next = FAILED_LPOS; 1050 + return NULL; 1051 + } 1052 + 1053 + /* 1054 + * 1. Guarantee any descriptor states that have transitioned 1055 + * to reusable are stored before modifying the newly 1056 + * allocated data area. A full memory barrier is needed 1057 + * since other CPUs may have made the descriptor states 1058 + * reusable. See data_push_tail:A about why the reusable 1059 + * states are visible. This pairs with desc_read:D. 1060 + * 1061 + * 2. Guarantee any updated tail lpos is stored before 1062 + * modifying the newly allocated data area. Another CPU may 1063 + * be in data_make_reusable() and is reading a block ID 1064 + * from this area. data_make_reusable() can handle reading 1065 + * a garbage block ID value, but then it must be able to 1066 + * load a new tail lpos. A full memory barrier is needed 1067 + * since other CPUs may have updated the tail lpos. This 1068 + * pairs with data_push_tail:B. 1069 + */ 1070 + } while (!atomic_long_try_cmpxchg(&data_ring->head_lpos, &begin_lpos, 1071 + next_lpos)); /* LMM(data_alloc:A) */ 1072 + 1073 + blk = to_block(data_ring, begin_lpos); 1074 + blk->id = id; /* LMM(data_alloc:B) */ 1075 + 1076 + if (DATA_WRAPS(data_ring, begin_lpos) != DATA_WRAPS(data_ring, next_lpos)) { 1077 + /* Wrapping data blocks store their data at the beginning. */ 1078 + blk = to_block(data_ring, 0); 1079 + 1080 + /* 1081 + * Store the ID on the wrapped block for consistency. 1082 + * The printk_ringbuffer does not actually use it. 1083 + */ 1084 + blk->id = id; 1085 + } 1086 + 1087 + blk_lpos->begin = begin_lpos; 1088 + blk_lpos->next = next_lpos; 1089 + 1090 + return &blk->data[0]; 1091 + } 1092 + 1093 + /* 1094 + * Try to resize an existing data block associated with the descriptor 1095 + * specified by @id. If the resized data block should become wrapped, it 1096 + * copies the old data to the new data block. If @size yields a data block 1097 + * with the same or less size, the data block is left as is. 1098 + * 1099 + * Fail if this is not the last allocated data block or if there is not 1100 + * enough space or it is not possible make enough space. 1101 + * 1102 + * Return a pointer to the beginning of the entire data buffer or NULL on 1103 + * failure. 1104 + */ 1105 + static char *data_realloc(struct printk_ringbuffer *rb, 1106 + struct prb_data_ring *data_ring, unsigned int size, 1107 + struct prb_data_blk_lpos *blk_lpos, unsigned long id) 1108 + { 1109 + struct prb_data_block *blk; 1110 + unsigned long head_lpos; 1111 + unsigned long next_lpos; 1112 + bool wrapped; 1113 + 1114 + /* Reallocation only works if @blk_lpos is the newest data block. */ 1115 + head_lpos = atomic_long_read(&data_ring->head_lpos); 1116 + if (head_lpos != blk_lpos->next) 1117 + return NULL; 1118 + 1119 + /* Keep track if @blk_lpos was a wrapping data block. */ 1120 + wrapped = (DATA_WRAPS(data_ring, blk_lpos->begin) != DATA_WRAPS(data_ring, blk_lpos->next)); 1121 + 1122 + size = to_blk_size(size); 1123 + 1124 + next_lpos = get_next_lpos(data_ring, blk_lpos->begin, size); 1125 + 1126 + /* If the data block does not increase, there is nothing to do. */ 1127 + if (head_lpos - next_lpos < DATA_SIZE(data_ring)) { 1128 + blk = to_block(data_ring, blk_lpos->begin); 1129 + return &blk->data[0]; 1130 + } 1131 + 1132 + if (!data_push_tail(rb, data_ring, next_lpos - DATA_SIZE(data_ring))) 1133 + return NULL; 1134 + 1135 + /* The memory barrier involvement is the same as data_alloc:A. */ 1136 + if (!atomic_long_try_cmpxchg(&data_ring->head_lpos, &head_lpos, 1137 + next_lpos)) { /* LMM(data_realloc:A) */ 1138 + return NULL; 1139 + } 1140 + 1141 + blk = to_block(data_ring, blk_lpos->begin); 1142 + 1143 + if (DATA_WRAPS(data_ring, blk_lpos->begin) != DATA_WRAPS(data_ring, next_lpos)) { 1144 + struct prb_data_block *old_blk = blk; 1145 + 1146 + /* Wrapping data blocks store their data at the beginning. */ 1147 + blk = to_block(data_ring, 0); 1148 + 1149 + /* 1150 + * Store the ID on the wrapped block for consistency. 1151 + * The printk_ringbuffer does not actually use it. 1152 + */ 1153 + blk->id = id; 1154 + 1155 + if (!wrapped) { 1156 + /* 1157 + * Since the allocated space is now in the newly 1158 + * created wrapping data block, copy the content 1159 + * from the old data block. 1160 + */ 1161 + memcpy(&blk->data[0], &old_blk->data[0], 1162 + (blk_lpos->next - blk_lpos->begin) - sizeof(blk->id)); 1163 + } 1164 + } 1165 + 1166 + blk_lpos->next = next_lpos; 1167 + 1168 + return &blk->data[0]; 1169 + } 1170 + 1171 + /* Return the number of bytes used by a data block. */ 1172 + static unsigned int space_used(struct prb_data_ring *data_ring, 1173 + struct prb_data_blk_lpos *blk_lpos) 1174 + { 1175 + /* Data-less blocks take no space. */ 1176 + if (BLK_DATALESS(blk_lpos)) 1177 + return 0; 1178 + 1179 + if (DATA_WRAPS(data_ring, blk_lpos->begin) == DATA_WRAPS(data_ring, blk_lpos->next)) { 1180 + /* Data block does not wrap. */ 1181 + return (DATA_INDEX(data_ring, blk_lpos->next) - 1182 + DATA_INDEX(data_ring, blk_lpos->begin)); 1183 + } 1184 + 1185 + /* 1186 + * For wrapping data blocks, the trailing (wasted) space is 1187 + * also counted. 1188 + */ 1189 + return (DATA_INDEX(data_ring, blk_lpos->next) + 1190 + DATA_SIZE(data_ring) - DATA_INDEX(data_ring, blk_lpos->begin)); 1191 + } 1192 + 1193 + /* 1194 + * Given @blk_lpos, return a pointer to the writer data from the data block 1195 + * and calculate the size of the data part. A NULL pointer is returned if 1196 + * @blk_lpos specifies values that could never be legal. 1197 + * 1198 + * This function (used by readers) performs strict validation on the lpos 1199 + * values to possibly detect bugs in the writer code. A WARN_ON_ONCE() is 1200 + * triggered if an internal error is detected. 1201 + */ 1202 + static const char *get_data(struct prb_data_ring *data_ring, 1203 + struct prb_data_blk_lpos *blk_lpos, 1204 + unsigned int *data_size) 1205 + { 1206 + struct prb_data_block *db; 1207 + 1208 + /* Data-less data block description. */ 1209 + if (BLK_DATALESS(blk_lpos)) { 1210 + if (blk_lpos->begin == NO_LPOS && blk_lpos->next == NO_LPOS) { 1211 + *data_size = 0; 1212 + return ""; 1213 + } 1214 + return NULL; 1215 + } 1216 + 1217 + /* Regular data block: @begin less than @next and in same wrap. */ 1218 + if (DATA_WRAPS(data_ring, blk_lpos->begin) == DATA_WRAPS(data_ring, blk_lpos->next) && 1219 + blk_lpos->begin < blk_lpos->next) { 1220 + db = to_block(data_ring, blk_lpos->begin); 1221 + *data_size = blk_lpos->next - blk_lpos->begin; 1222 + 1223 + /* Wrapping data block: @begin is one wrap behind @next. */ 1224 + } else if (DATA_WRAPS(data_ring, blk_lpos->begin + DATA_SIZE(data_ring)) == 1225 + DATA_WRAPS(data_ring, blk_lpos->next)) { 1226 + db = to_block(data_ring, 0); 1227 + *data_size = DATA_INDEX(data_ring, blk_lpos->next); 1228 + 1229 + /* Illegal block description. */ 1230 + } else { 1231 + WARN_ON_ONCE(1); 1232 + return NULL; 1233 + } 1234 + 1235 + /* A valid data block will always be aligned to the ID size. */ 1236 + if (WARN_ON_ONCE(blk_lpos->begin != ALIGN(blk_lpos->begin, sizeof(db->id))) || 1237 + WARN_ON_ONCE(blk_lpos->next != ALIGN(blk_lpos->next, sizeof(db->id)))) { 1238 + return NULL; 1239 + } 1240 + 1241 + /* A valid data block will always have at least an ID. */ 1242 + if (WARN_ON_ONCE(*data_size < sizeof(db->id))) 1243 + return NULL; 1244 + 1245 + /* Subtract block ID space from size to reflect data size. */ 1246 + *data_size -= sizeof(db->id); 1247 + 1248 + return &db->data[0]; 1249 + } 1250 + 1251 + /* 1252 + * Attempt to transition the newest descriptor from committed back to reserved 1253 + * so that the record can be modified by a writer again. This is only possible 1254 + * if the descriptor is not yet finalized and the provided @caller_id matches. 1255 + */ 1256 + static struct prb_desc *desc_reopen_last(struct prb_desc_ring *desc_ring, 1257 + u32 caller_id, unsigned long *id_out) 1258 + { 1259 + unsigned long prev_state_val; 1260 + enum desc_state d_state; 1261 + struct prb_desc desc; 1262 + struct prb_desc *d; 1263 + unsigned long id; 1264 + u32 cid; 1265 + 1266 + id = atomic_long_read(&desc_ring->head_id); 1267 + 1268 + /* 1269 + * To reduce unnecessarily reopening, first check if the descriptor 1270 + * state and caller ID are correct. 1271 + */ 1272 + d_state = desc_read(desc_ring, id, &desc, NULL, &cid); 1273 + if (d_state != desc_committed || cid != caller_id) 1274 + return NULL; 1275 + 1276 + d = to_desc(desc_ring, id); 1277 + 1278 + prev_state_val = DESC_SV(id, desc_committed); 1279 + 1280 + /* 1281 + * Guarantee the reserved state is stored before reading any 1282 + * record data. A full memory barrier is needed because @state_var 1283 + * modification is followed by reading. This pairs with _prb_commit:B. 1284 + * 1285 + * Memory barrier involvement: 1286 + * 1287 + * If desc_reopen_last:A reads from _prb_commit:B, then 1288 + * prb_reserve_in_last:A reads from _prb_commit:A. 1289 + * 1290 + * Relies on: 1291 + * 1292 + * WMB from _prb_commit:A to _prb_commit:B 1293 + * matching 1294 + * MB If desc_reopen_last:A to prb_reserve_in_last:A 1295 + */ 1296 + if (!atomic_long_try_cmpxchg(&d->state_var, &prev_state_val, 1297 + DESC_SV(id, desc_reserved))) { /* LMM(desc_reopen_last:A) */ 1298 + return NULL; 1299 + } 1300 + 1301 + *id_out = id; 1302 + return d; 1303 + } 1304 + 1305 + /** 1306 + * prb_reserve_in_last() - Re-reserve and extend the space in the ringbuffer 1307 + * used by the newest record. 1308 + * 1309 + * @e: The entry structure to setup. 1310 + * @rb: The ringbuffer to re-reserve and extend data in. 1311 + * @r: The record structure to allocate buffers for. 1312 + * @caller_id: The caller ID of the caller (reserving writer). 1313 + * @max_size: Fail if the extended size would be greater than this. 1314 + * 1315 + * This is the public function available to writers to re-reserve and extend 1316 + * data. 1317 + * 1318 + * The writer specifies the text size to extend (not the new total size) by 1319 + * setting the @text_buf_size field of @r. To ensure proper initialization 1320 + * of @r, prb_rec_init_wr() should be used. 1321 + * 1322 + * This function will fail if @caller_id does not match the caller ID of the 1323 + * newest record. In that case the caller must reserve new data using 1324 + * prb_reserve(). 1325 + * 1326 + * Context: Any context. Disables local interrupts on success. 1327 + * Return: true if text data could be extended, otherwise false. 1328 + * 1329 + * On success: 1330 + * 1331 + * - @r->text_buf points to the beginning of the entire text buffer. 1332 + * 1333 + * - @r->text_buf_size is set to the new total size of the buffer. 1334 + * 1335 + * - @r->info is not touched so that @r->info->text_len could be used 1336 + * to append the text. 1337 + * 1338 + * - prb_record_text_space() can be used on @e to query the new 1339 + * actually used space. 1340 + * 1341 + * Important: All @r->info fields will already be set with the current values 1342 + * for the record. I.e. @r->info->text_len will be less than 1343 + * @text_buf_size. Writers can use @r->info->text_len to know 1344 + * where concatenation begins and writers should update 1345 + * @r->info->text_len after concatenating. 1346 + */ 1347 + bool prb_reserve_in_last(struct prb_reserved_entry *e, struct printk_ringbuffer *rb, 1348 + struct printk_record *r, u32 caller_id, unsigned int max_size) 1349 + { 1350 + struct prb_desc_ring *desc_ring = &rb->desc_ring; 1351 + struct printk_info *info; 1352 + unsigned int data_size; 1353 + struct prb_desc *d; 1354 + unsigned long id; 1355 + 1356 + local_irq_save(e->irqflags); 1357 + 1358 + /* Transition the newest descriptor back to the reserved state. */ 1359 + d = desc_reopen_last(desc_ring, caller_id, &id); 1360 + if (!d) { 1361 + local_irq_restore(e->irqflags); 1362 + goto fail_reopen; 1363 + } 1364 + 1365 + /* Now the writer has exclusive access: LMM(prb_reserve_in_last:A) */ 1366 + 1367 + info = to_info(desc_ring, id); 1368 + 1369 + /* 1370 + * Set the @e fields here so that prb_commit() can be used if 1371 + * anything fails from now on. 1372 + */ 1373 + e->rb = rb; 1374 + e->id = id; 1375 + 1376 + /* 1377 + * desc_reopen_last() checked the caller_id, but there was no 1378 + * exclusive access at that point. The descriptor may have 1379 + * changed since then. 1380 + */ 1381 + if (caller_id != info->caller_id) 1382 + goto fail; 1383 + 1384 + if (BLK_DATALESS(&d->text_blk_lpos)) { 1385 + if (WARN_ON_ONCE(info->text_len != 0)) { 1386 + pr_warn_once("wrong text_len value (%hu, expecting 0)\n", 1387 + info->text_len); 1388 + info->text_len = 0; 1389 + } 1390 + 1391 + if (!data_check_size(&rb->text_data_ring, r->text_buf_size)) 1392 + goto fail; 1393 + 1394 + if (r->text_buf_size > max_size) 1395 + goto fail; 1396 + 1397 + r->text_buf = data_alloc(rb, &rb->text_data_ring, r->text_buf_size, 1398 + &d->text_blk_lpos, id); 1399 + } else { 1400 + if (!get_data(&rb->text_data_ring, &d->text_blk_lpos, &data_size)) 1401 + goto fail; 1402 + 1403 + /* 1404 + * Increase the buffer size to include the original size. If 1405 + * the meta data (@text_len) is not sane, use the full data 1406 + * block size. 1407 + */ 1408 + if (WARN_ON_ONCE(info->text_len > data_size)) { 1409 + pr_warn_once("wrong text_len value (%hu, expecting <=%u)\n", 1410 + info->text_len, data_size); 1411 + info->text_len = data_size; 1412 + } 1413 + r->text_buf_size += info->text_len; 1414 + 1415 + if (!data_check_size(&rb->text_data_ring, r->text_buf_size)) 1416 + goto fail; 1417 + 1418 + if (r->text_buf_size > max_size) 1419 + goto fail; 1420 + 1421 + r->text_buf = data_realloc(rb, &rb->text_data_ring, r->text_buf_size, 1422 + &d->text_blk_lpos, id); 1423 + } 1424 + if (r->text_buf_size && !r->text_buf) 1425 + goto fail; 1426 + 1427 + r->info = info; 1428 + 1429 + e->text_space = space_used(&rb->text_data_ring, &d->text_blk_lpos); 1430 + 1431 + return true; 1432 + fail: 1433 + prb_commit(e); 1434 + /* prb_commit() re-enabled interrupts. */ 1435 + fail_reopen: 1436 + /* Make it clear to the caller that the re-reserve failed. */ 1437 + memset(r, 0, sizeof(*r)); 1438 + return false; 1439 + } 1440 + 1441 + /* 1442 + * Attempt to finalize a specified descriptor. If this fails, the descriptor 1443 + * is either already final or it will finalize itself when the writer commits. 1444 + */ 1445 + static void desc_make_final(struct prb_desc_ring *desc_ring, unsigned long id) 1446 + { 1447 + unsigned long prev_state_val = DESC_SV(id, desc_committed); 1448 + struct prb_desc *d = to_desc(desc_ring, id); 1449 + 1450 + atomic_long_cmpxchg_relaxed(&d->state_var, prev_state_val, 1451 + DESC_SV(id, desc_finalized)); /* LMM(desc_make_final:A) */ 1452 + } 1453 + 1454 + /** 1455 + * prb_reserve() - Reserve space in the ringbuffer. 1456 + * 1457 + * @e: The entry structure to setup. 1458 + * @rb: The ringbuffer to reserve data in. 1459 + * @r: The record structure to allocate buffers for. 1460 + * 1461 + * This is the public function available to writers to reserve data. 1462 + * 1463 + * The writer specifies the text size to reserve by setting the 1464 + * @text_buf_size field of @r. To ensure proper initialization of @r, 1465 + * prb_rec_init_wr() should be used. 1466 + * 1467 + * Context: Any context. Disables local interrupts on success. 1468 + * Return: true if at least text data could be allocated, otherwise false. 1469 + * 1470 + * On success, the fields @info and @text_buf of @r will be set by this 1471 + * function and should be filled in by the writer before committing. Also 1472 + * on success, prb_record_text_space() can be used on @e to query the actual 1473 + * space used for the text data block. 1474 + * 1475 + * Important: @info->text_len needs to be set correctly by the writer in 1476 + * order for data to be readable and/or extended. Its value 1477 + * is initialized to 0. 1478 + */ 1479 + bool prb_reserve(struct prb_reserved_entry *e, struct printk_ringbuffer *rb, 1480 + struct printk_record *r) 1481 + { 1482 + struct prb_desc_ring *desc_ring = &rb->desc_ring; 1483 + struct printk_info *info; 1484 + struct prb_desc *d; 1485 + unsigned long id; 1486 + u64 seq; 1487 + 1488 + if (!data_check_size(&rb->text_data_ring, r->text_buf_size)) 1489 + goto fail; 1490 + 1491 + /* 1492 + * Descriptors in the reserved state act as blockers to all further 1493 + * reservations once the desc_ring has fully wrapped. Disable 1494 + * interrupts during the reserve/commit window in order to minimize 1495 + * the likelihood of this happening. 1496 + */ 1497 + local_irq_save(e->irqflags); 1498 + 1499 + if (!desc_reserve(rb, &id)) { 1500 + /* Descriptor reservation failures are tracked. */ 1501 + atomic_long_inc(&rb->fail); 1502 + local_irq_restore(e->irqflags); 1503 + goto fail; 1504 + } 1505 + 1506 + d = to_desc(desc_ring, id); 1507 + info = to_info(desc_ring, id); 1508 + 1509 + /* 1510 + * All @info fields (except @seq) are cleared and must be filled in 1511 + * by the writer. Save @seq before clearing because it is used to 1512 + * determine the new sequence number. 1513 + */ 1514 + seq = info->seq; 1515 + memset(info, 0, sizeof(*info)); 1516 + 1517 + /* 1518 + * Set the @e fields here so that prb_commit() can be used if 1519 + * text data allocation fails. 1520 + */ 1521 + e->rb = rb; 1522 + e->id = id; 1523 + 1524 + /* 1525 + * Initialize the sequence number if it has "never been set". 1526 + * Otherwise just increment it by a full wrap. 1527 + * 1528 + * @seq is considered "never been set" if it has a value of 0, 1529 + * _except_ for @infos[0], which was specially setup by the ringbuffer 1530 + * initializer and therefore is always considered as set. 1531 + * 1532 + * See the "Bootstrap" comment block in printk_ringbuffer.h for 1533 + * details about how the initializer bootstraps the descriptors. 1534 + */ 1535 + if (seq == 0 && DESC_INDEX(desc_ring, id) != 0) 1536 + info->seq = DESC_INDEX(desc_ring, id); 1537 + else 1538 + info->seq = seq + DESCS_COUNT(desc_ring); 1539 + 1540 + /* 1541 + * New data is about to be reserved. Once that happens, previous 1542 + * descriptors are no longer able to be extended. Finalize the 1543 + * previous descriptor now so that it can be made available to 1544 + * readers. (For seq==0 there is no previous descriptor.) 1545 + */ 1546 + if (info->seq > 0) 1547 + desc_make_final(desc_ring, DESC_ID(id - 1)); 1548 + 1549 + r->text_buf = data_alloc(rb, &rb->text_data_ring, r->text_buf_size, 1550 + &d->text_blk_lpos, id); 1551 + /* If text data allocation fails, a data-less record is committed. */ 1552 + if (r->text_buf_size && !r->text_buf) { 1553 + prb_commit(e); 1554 + /* prb_commit() re-enabled interrupts. */ 1555 + goto fail; 1556 + } 1557 + 1558 + r->info = info; 1559 + 1560 + /* Record full text space used by record. */ 1561 + e->text_space = space_used(&rb->text_data_ring, &d->text_blk_lpos); 1562 + 1563 + return true; 1564 + fail: 1565 + /* Make it clear to the caller that the reserve failed. */ 1566 + memset(r, 0, sizeof(*r)); 1567 + return false; 1568 + } 1569 + 1570 + /* Commit the data (possibly finalizing it) and restore interrupts. */ 1571 + static void _prb_commit(struct prb_reserved_entry *e, unsigned long state_val) 1572 + { 1573 + struct prb_desc_ring *desc_ring = &e->rb->desc_ring; 1574 + struct prb_desc *d = to_desc(desc_ring, e->id); 1575 + unsigned long prev_state_val = DESC_SV(e->id, desc_reserved); 1576 + 1577 + /* Now the writer has finished all writing: LMM(_prb_commit:A) */ 1578 + 1579 + /* 1580 + * Set the descriptor as committed. See "ABA Issues" about why 1581 + * cmpxchg() instead of set() is used. 1582 + * 1583 + * 1 Guarantee all record data is stored before the descriptor state 1584 + * is stored as committed. A write memory barrier is sufficient 1585 + * for this. This pairs with desc_read:B and desc_reopen_last:A. 1586 + * 1587 + * 2. Guarantee the descriptor state is stored as committed before 1588 + * re-checking the head ID in order to possibly finalize this 1589 + * descriptor. This pairs with desc_reserve:D. 1590 + * 1591 + * Memory barrier involvement: 1592 + * 1593 + * If prb_commit:A reads from desc_reserve:D, then 1594 + * desc_make_final:A reads from _prb_commit:B. 1595 + * 1596 + * Relies on: 1597 + * 1598 + * MB _prb_commit:B to prb_commit:A 1599 + * matching 1600 + * MB desc_reserve:D to desc_make_final:A 1601 + */ 1602 + if (!atomic_long_try_cmpxchg(&d->state_var, &prev_state_val, 1603 + DESC_SV(e->id, state_val))) { /* LMM(_prb_commit:B) */ 1604 + WARN_ON_ONCE(1); 1605 + } 1606 + 1607 + /* Restore interrupts, the reserve/commit window is finished. */ 1608 + local_irq_restore(e->irqflags); 1609 + } 1610 + 1611 + /** 1612 + * prb_commit() - Commit (previously reserved) data to the ringbuffer. 1613 + * 1614 + * @e: The entry containing the reserved data information. 1615 + * 1616 + * This is the public function available to writers to commit data. 1617 + * 1618 + * Note that the data is not yet available to readers until it is finalized. 1619 + * Finalizing happens automatically when space for the next record is 1620 + * reserved. 1621 + * 1622 + * See prb_final_commit() for a version of this function that finalizes 1623 + * immediately. 1624 + * 1625 + * Context: Any context. Enables local interrupts. 1626 + */ 1627 + void prb_commit(struct prb_reserved_entry *e) 1628 + { 1629 + struct prb_desc_ring *desc_ring = &e->rb->desc_ring; 1630 + unsigned long head_id; 1631 + 1632 + _prb_commit(e, desc_committed); 1633 + 1634 + /* 1635 + * If this descriptor is no longer the head (i.e. a new record has 1636 + * been allocated), extending the data for this record is no longer 1637 + * allowed and therefore it must be finalized. 1638 + */ 1639 + head_id = atomic_long_read(&desc_ring->head_id); /* LMM(prb_commit:A) */ 1640 + if (head_id != e->id) 1641 + desc_make_final(desc_ring, e->id); 1642 + } 1643 + 1644 + /** 1645 + * prb_final_commit() - Commit and finalize (previously reserved) data to 1646 + * the ringbuffer. 1647 + * 1648 + * @e: The entry containing the reserved data information. 1649 + * 1650 + * This is the public function available to writers to commit+finalize data. 1651 + * 1652 + * By finalizing, the data is made immediately available to readers. 1653 + * 1654 + * This function should only be used if there are no intentions of extending 1655 + * this data using prb_reserve_in_last(). 1656 + * 1657 + * Context: Any context. Enables local interrupts. 1658 + */ 1659 + void prb_final_commit(struct prb_reserved_entry *e) 1660 + { 1661 + _prb_commit(e, desc_finalized); 1662 + } 1663 + 1664 + /* 1665 + * Count the number of lines in provided text. All text has at least 1 line 1666 + * (even if @text_size is 0). Each '\n' processed is counted as an additional 1667 + * line. 1668 + */ 1669 + static unsigned int count_lines(const char *text, unsigned int text_size) 1670 + { 1671 + unsigned int next_size = text_size; 1672 + unsigned int line_count = 1; 1673 + const char *next = text; 1674 + 1675 + while (next_size) { 1676 + next = memchr(next, '\n', next_size); 1677 + if (!next) 1678 + break; 1679 + line_count++; 1680 + next++; 1681 + next_size = text_size - (next - text); 1682 + } 1683 + 1684 + return line_count; 1685 + } 1686 + 1687 + /* 1688 + * Given @blk_lpos, copy an expected @len of data into the provided buffer. 1689 + * If @line_count is provided, count the number of lines in the data. 1690 + * 1691 + * This function (used by readers) performs strict validation on the data 1692 + * size to possibly detect bugs in the writer code. A WARN_ON_ONCE() is 1693 + * triggered if an internal error is detected. 1694 + */ 1695 + static bool copy_data(struct prb_data_ring *data_ring, 1696 + struct prb_data_blk_lpos *blk_lpos, u16 len, char *buf, 1697 + unsigned int buf_size, unsigned int *line_count) 1698 + { 1699 + unsigned int data_size; 1700 + const char *data; 1701 + 1702 + /* Caller might not want any data. */ 1703 + if ((!buf || !buf_size) && !line_count) 1704 + return true; 1705 + 1706 + data = get_data(data_ring, blk_lpos, &data_size); 1707 + if (!data) 1708 + return false; 1709 + 1710 + /* 1711 + * Actual cannot be less than expected. It can be more than expected 1712 + * because of the trailing alignment padding. 1713 + * 1714 + * Note that invalid @len values can occur because the caller loads 1715 + * the value during an allowed data race. 1716 + */ 1717 + if (data_size < (unsigned int)len) 1718 + return false; 1719 + 1720 + /* Caller interested in the line count? */ 1721 + if (line_count) 1722 + *line_count = count_lines(data, data_size); 1723 + 1724 + /* Caller interested in the data content? */ 1725 + if (!buf || !buf_size) 1726 + return true; 1727 + 1728 + data_size = min_t(u16, buf_size, len); 1729 + 1730 + memcpy(&buf[0], data, data_size); /* LMM(copy_data:A) */ 1731 + return true; 1732 + } 1733 + 1734 + /* 1735 + * This is an extended version of desc_read(). It gets a copy of a specified 1736 + * descriptor. However, it also verifies that the record is finalized and has 1737 + * the sequence number @seq. On success, 0 is returned. 1738 + * 1739 + * Error return values: 1740 + * -EINVAL: A finalized record with sequence number @seq does not exist. 1741 + * -ENOENT: A finalized record with sequence number @seq exists, but its data 1742 + * is not available. This is a valid record, so readers should 1743 + * continue with the next record. 1744 + */ 1745 + static int desc_read_finalized_seq(struct prb_desc_ring *desc_ring, 1746 + unsigned long id, u64 seq, 1747 + struct prb_desc *desc_out) 1748 + { 1749 + struct prb_data_blk_lpos *blk_lpos = &desc_out->text_blk_lpos; 1750 + enum desc_state d_state; 1751 + u64 s; 1752 + 1753 + d_state = desc_read(desc_ring, id, desc_out, &s, NULL); 1754 + 1755 + /* 1756 + * An unexpected @id (desc_miss) or @seq mismatch means the record 1757 + * does not exist. A descriptor in the reserved or committed state 1758 + * means the record does not yet exist for the reader. 1759 + */ 1760 + if (d_state == desc_miss || 1761 + d_state == desc_reserved || 1762 + d_state == desc_committed || 1763 + s != seq) { 1764 + return -EINVAL; 1765 + } 1766 + 1767 + /* 1768 + * A descriptor in the reusable state may no longer have its data 1769 + * available; report it as existing but with lost data. Or the record 1770 + * may actually be a record with lost data. 1771 + */ 1772 + if (d_state == desc_reusable || 1773 + (blk_lpos->begin == FAILED_LPOS && blk_lpos->next == FAILED_LPOS)) { 1774 + return -ENOENT; 1775 + } 1776 + 1777 + return 0; 1778 + } 1779 + 1780 + /* 1781 + * Copy the ringbuffer data from the record with @seq to the provided 1782 + * @r buffer. On success, 0 is returned. 1783 + * 1784 + * See desc_read_finalized_seq() for error return values. 1785 + */ 1786 + static int prb_read(struct printk_ringbuffer *rb, u64 seq, 1787 + struct printk_record *r, unsigned int *line_count) 1788 + { 1789 + struct prb_desc_ring *desc_ring = &rb->desc_ring; 1790 + struct printk_info *info = to_info(desc_ring, seq); 1791 + struct prb_desc *rdesc = to_desc(desc_ring, seq); 1792 + atomic_long_t *state_var = &rdesc->state_var; 1793 + struct prb_desc desc; 1794 + unsigned long id; 1795 + int err; 1796 + 1797 + /* Extract the ID, used to specify the descriptor to read. */ 1798 + id = DESC_ID(atomic_long_read(state_var)); 1799 + 1800 + /* Get a local copy of the correct descriptor (if available). */ 1801 + err = desc_read_finalized_seq(desc_ring, id, seq, &desc); 1802 + 1803 + /* 1804 + * If @r is NULL, the caller is only interested in the availability 1805 + * of the record. 1806 + */ 1807 + if (err || !r) 1808 + return err; 1809 + 1810 + /* If requested, copy meta data. */ 1811 + if (r->info) 1812 + memcpy(r->info, info, sizeof(*(r->info))); 1813 + 1814 + /* Copy text data. If it fails, this is a data-less record. */ 1815 + if (!copy_data(&rb->text_data_ring, &desc.text_blk_lpos, info->text_len, 1816 + r->text_buf, r->text_buf_size, line_count)) { 1817 + return -ENOENT; 1818 + } 1819 + 1820 + /* Ensure the record is still finalized and has the same @seq. */ 1821 + return desc_read_finalized_seq(desc_ring, id, seq, &desc); 1822 + } 1823 + 1824 + /* Get the sequence number of the tail descriptor. */ 1825 + static u64 prb_first_seq(struct printk_ringbuffer *rb) 1826 + { 1827 + struct prb_desc_ring *desc_ring = &rb->desc_ring; 1828 + enum desc_state d_state; 1829 + struct prb_desc desc; 1830 + unsigned long id; 1831 + u64 seq; 1832 + 1833 + for (;;) { 1834 + id = atomic_long_read(&rb->desc_ring.tail_id); /* LMM(prb_first_seq:A) */ 1835 + 1836 + d_state = desc_read(desc_ring, id, &desc, &seq, NULL); /* LMM(prb_first_seq:B) */ 1837 + 1838 + /* 1839 + * This loop will not be infinite because the tail is 1840 + * _always_ in the finalized or reusable state. 1841 + */ 1842 + if (d_state == desc_finalized || d_state == desc_reusable) 1843 + break; 1844 + 1845 + /* 1846 + * Guarantee the last state load from desc_read() is before 1847 + * reloading @tail_id in order to see a new tail in the case 1848 + * that the descriptor has been recycled. This pairs with 1849 + * desc_reserve:D. 1850 + * 1851 + * Memory barrier involvement: 1852 + * 1853 + * If prb_first_seq:B reads from desc_reserve:F, then 1854 + * prb_first_seq:A reads from desc_push_tail:B. 1855 + * 1856 + * Relies on: 1857 + * 1858 + * MB from desc_push_tail:B to desc_reserve:F 1859 + * matching 1860 + * RMB prb_first_seq:B to prb_first_seq:A 1861 + */ 1862 + smp_rmb(); /* LMM(prb_first_seq:C) */ 1863 + } 1864 + 1865 + return seq; 1866 + } 1867 + 1868 + /* 1869 + * Non-blocking read of a record. Updates @seq to the last finalized record 1870 + * (which may have no data available). 1871 + * 1872 + * See the description of prb_read_valid() and prb_read_valid_info() 1873 + * for details. 1874 + */ 1875 + static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq, 1876 + struct printk_record *r, unsigned int *line_count) 1877 + { 1878 + u64 tail_seq; 1879 + int err; 1880 + 1881 + while ((err = prb_read(rb, *seq, r, line_count))) { 1882 + tail_seq = prb_first_seq(rb); 1883 + 1884 + if (*seq < tail_seq) { 1885 + /* 1886 + * Behind the tail. Catch up and try again. This 1887 + * can happen for -ENOENT and -EINVAL cases. 1888 + */ 1889 + *seq = tail_seq; 1890 + 1891 + } else if (err == -ENOENT) { 1892 + /* Record exists, but no data available. Skip. */ 1893 + (*seq)++; 1894 + 1895 + } else { 1896 + /* Non-existent/non-finalized record. Must stop. */ 1897 + return false; 1898 + } 1899 + } 1900 + 1901 + return true; 1902 + } 1903 + 1904 + /** 1905 + * prb_read_valid() - Non-blocking read of a requested record or (if gone) 1906 + * the next available record. 1907 + * 1908 + * @rb: The ringbuffer to read from. 1909 + * @seq: The sequence number of the record to read. 1910 + * @r: A record data buffer to store the read record to. 1911 + * 1912 + * This is the public function available to readers to read a record. 1913 + * 1914 + * The reader provides the @info and @text_buf buffers of @r to be 1915 + * filled in. Any of the buffer pointers can be set to NULL if the reader 1916 + * is not interested in that data. To ensure proper initialization of @r, 1917 + * prb_rec_init_rd() should be used. 1918 + * 1919 + * Context: Any context. 1920 + * Return: true if a record was read, otherwise false. 1921 + * 1922 + * On success, the reader must check r->info.seq to see which record was 1923 + * actually read. This allows the reader to detect dropped records. 1924 + * 1925 + * Failure means @seq refers to a not yet written record. 1926 + */ 1927 + bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq, 1928 + struct printk_record *r) 1929 + { 1930 + return _prb_read_valid(rb, &seq, r, NULL); 1931 + } 1932 + 1933 + /** 1934 + * prb_read_valid_info() - Non-blocking read of meta data for a requested 1935 + * record or (if gone) the next available record. 1936 + * 1937 + * @rb: The ringbuffer to read from. 1938 + * @seq: The sequence number of the record to read. 1939 + * @info: A buffer to store the read record meta data to. 1940 + * @line_count: A buffer to store the number of lines in the record text. 1941 + * 1942 + * This is the public function available to readers to read only the 1943 + * meta data of a record. 1944 + * 1945 + * The reader provides the @info, @line_count buffers to be filled in. 1946 + * Either of the buffer pointers can be set to NULL if the reader is not 1947 + * interested in that data. 1948 + * 1949 + * Context: Any context. 1950 + * Return: true if a record's meta data was read, otherwise false. 1951 + * 1952 + * On success, the reader must check info->seq to see which record meta data 1953 + * was actually read. This allows the reader to detect dropped records. 1954 + * 1955 + * Failure means @seq refers to a not yet written record. 1956 + */ 1957 + bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq, 1958 + struct printk_info *info, unsigned int *line_count) 1959 + { 1960 + struct printk_record r; 1961 + 1962 + prb_rec_init_rd(&r, info, NULL, 0); 1963 + 1964 + return _prb_read_valid(rb, &seq, &r, line_count); 1965 + } 1966 + 1967 + /** 1968 + * prb_first_valid_seq() - Get the sequence number of the oldest available 1969 + * record. 1970 + * 1971 + * @rb: The ringbuffer to get the sequence number from. 1972 + * 1973 + * This is the public function available to readers to see what the 1974 + * first/oldest valid sequence number is. 1975 + * 1976 + * This provides readers a starting point to begin iterating the ringbuffer. 1977 + * 1978 + * Context: Any context. 1979 + * Return: The sequence number of the first/oldest record or, if the 1980 + * ringbuffer is empty, 0 is returned. 1981 + */ 1982 + u64 prb_first_valid_seq(struct printk_ringbuffer *rb) 1983 + { 1984 + u64 seq = 0; 1985 + 1986 + if (!_prb_read_valid(rb, &seq, NULL, NULL)) 1987 + return 0; 1988 + 1989 + return seq; 1990 + } 1991 + 1992 + /** 1993 + * prb_next_seq() - Get the sequence number after the last available record. 1994 + * 1995 + * @rb: The ringbuffer to get the sequence number from. 1996 + * 1997 + * This is the public function available to readers to see what the next 1998 + * newest sequence number available to readers will be. 1999 + * 2000 + * This provides readers a sequence number to jump to if all currently 2001 + * available records should be skipped. 2002 + * 2003 + * Context: Any context. 2004 + * Return: The sequence number of the next newest (not yet available) record 2005 + * for readers. 2006 + */ 2007 + u64 prb_next_seq(struct printk_ringbuffer *rb) 2008 + { 2009 + u64 seq = 0; 2010 + 2011 + /* Search forward from the oldest descriptor. */ 2012 + while (_prb_read_valid(rb, &seq, NULL, NULL)) 2013 + seq++; 2014 + 2015 + return seq; 2016 + } 2017 + 2018 + /** 2019 + * prb_init() - Initialize a ringbuffer to use provided external buffers. 2020 + * 2021 + * @rb: The ringbuffer to initialize. 2022 + * @text_buf: The data buffer for text data. 2023 + * @textbits: The size of @text_buf as a power-of-2 value. 2024 + * @descs: The descriptor buffer for ringbuffer records. 2025 + * @descbits: The count of @descs items as a power-of-2 value. 2026 + * @infos: The printk_info buffer for ringbuffer records. 2027 + * 2028 + * This is the public function available to writers to setup a ringbuffer 2029 + * during runtime using provided buffers. 2030 + * 2031 + * This must match the initialization of DEFINE_PRINTKRB(). 2032 + * 2033 + * Context: Any context. 2034 + */ 2035 + void prb_init(struct printk_ringbuffer *rb, 2036 + char *text_buf, unsigned int textbits, 2037 + struct prb_desc *descs, unsigned int descbits, 2038 + struct printk_info *infos) 2039 + { 2040 + memset(descs, 0, _DESCS_COUNT(descbits) * sizeof(descs[0])); 2041 + memset(infos, 0, _DESCS_COUNT(descbits) * sizeof(infos[0])); 2042 + 2043 + rb->desc_ring.count_bits = descbits; 2044 + rb->desc_ring.descs = descs; 2045 + rb->desc_ring.infos = infos; 2046 + atomic_long_set(&rb->desc_ring.head_id, DESC0_ID(descbits)); 2047 + atomic_long_set(&rb->desc_ring.tail_id, DESC0_ID(descbits)); 2048 + 2049 + rb->text_data_ring.size_bits = textbits; 2050 + rb->text_data_ring.data = text_buf; 2051 + atomic_long_set(&rb->text_data_ring.head_lpos, BLK0_LPOS(textbits)); 2052 + atomic_long_set(&rb->text_data_ring.tail_lpos, BLK0_LPOS(textbits)); 2053 + 2054 + atomic_long_set(&rb->fail, 0); 2055 + 2056 + atomic_long_set(&(descs[_DESCS_COUNT(descbits) - 1].state_var), DESC0_SV(descbits)); 2057 + descs[_DESCS_COUNT(descbits) - 1].text_blk_lpos.begin = FAILED_LPOS; 2058 + descs[_DESCS_COUNT(descbits) - 1].text_blk_lpos.next = FAILED_LPOS; 2059 + 2060 + infos[0].seq = -(u64)_DESCS_COUNT(descbits); 2061 + infos[_DESCS_COUNT(descbits) - 1].seq = 0; 2062 + } 2063 + 2064 + /** 2065 + * prb_record_text_space() - Query the full actual used ringbuffer space for 2066 + * the text data of a reserved entry. 2067 + * 2068 + * @e: The successfully reserved entry to query. 2069 + * 2070 + * This is the public function available to writers to see how much actual 2071 + * space is used in the ringbuffer to store the text data of the specified 2072 + * entry. 2073 + * 2074 + * This function is only valid if @e has been successfully reserved using 2075 + * prb_reserve(). 2076 + * 2077 + * Context: Any context. 2078 + * Return: The size in bytes used by the text data of the associated record. 2079 + */ 2080 + unsigned int prb_record_text_space(struct prb_reserved_entry *e) 2081 + { 2082 + return e->text_space; 2083 + }
+382
kernel/printk/printk_ringbuffer.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + 3 + #ifndef _KERNEL_PRINTK_RINGBUFFER_H 4 + #define _KERNEL_PRINTK_RINGBUFFER_H 5 + 6 + #include <linux/atomic.h> 7 + #include <linux/dev_printk.h> 8 + 9 + /* 10 + * Meta information about each stored message. 11 + * 12 + * All fields are set by the printk code except for @seq, which is 13 + * set by the ringbuffer code. 14 + */ 15 + struct printk_info { 16 + u64 seq; /* sequence number */ 17 + u64 ts_nsec; /* timestamp in nanoseconds */ 18 + u16 text_len; /* length of text message */ 19 + u8 facility; /* syslog facility */ 20 + u8 flags:5; /* internal record flags */ 21 + u8 level:3; /* syslog level */ 22 + u32 caller_id; /* thread id or processor id */ 23 + 24 + struct dev_printk_info dev_info; 25 + }; 26 + 27 + /* 28 + * A structure providing the buffers, used by writers and readers. 29 + * 30 + * Writers: 31 + * Using prb_rec_init_wr(), a writer sets @text_buf_size before calling 32 + * prb_reserve(). On success, prb_reserve() sets @info and @text_buf to 33 + * buffers reserved for that writer. 34 + * 35 + * Readers: 36 + * Using prb_rec_init_rd(), a reader sets all fields before calling 37 + * prb_read_valid(). Note that the reader provides the @info and @text_buf, 38 + * buffers. On success, the struct pointed to by @info will be filled and 39 + * the char array pointed to by @text_buf will be filled with text data. 40 + */ 41 + struct printk_record { 42 + struct printk_info *info; 43 + char *text_buf; 44 + unsigned int text_buf_size; 45 + }; 46 + 47 + /* Specifies the logical position and span of a data block. */ 48 + struct prb_data_blk_lpos { 49 + unsigned long begin; 50 + unsigned long next; 51 + }; 52 + 53 + /* 54 + * A descriptor: the complete meta-data for a record. 55 + * 56 + * @state_var: A bitwise combination of descriptor ID and descriptor state. 57 + */ 58 + struct prb_desc { 59 + atomic_long_t state_var; 60 + struct prb_data_blk_lpos text_blk_lpos; 61 + }; 62 + 63 + /* A ringbuffer of "ID + data" elements. */ 64 + struct prb_data_ring { 65 + unsigned int size_bits; 66 + char *data; 67 + atomic_long_t head_lpos; 68 + atomic_long_t tail_lpos; 69 + }; 70 + 71 + /* A ringbuffer of "struct prb_desc" elements. */ 72 + struct prb_desc_ring { 73 + unsigned int count_bits; 74 + struct prb_desc *descs; 75 + struct printk_info *infos; 76 + atomic_long_t head_id; 77 + atomic_long_t tail_id; 78 + }; 79 + 80 + /* 81 + * The high level structure representing the printk ringbuffer. 82 + * 83 + * @fail: Count of failed prb_reserve() calls where not even a data-less 84 + * record was created. 85 + */ 86 + struct printk_ringbuffer { 87 + struct prb_desc_ring desc_ring; 88 + struct prb_data_ring text_data_ring; 89 + atomic_long_t fail; 90 + }; 91 + 92 + /* 93 + * Used by writers as a reserve/commit handle. 94 + * 95 + * @rb: Ringbuffer where the entry is reserved. 96 + * @irqflags: Saved irq flags to restore on entry commit. 97 + * @id: ID of the reserved descriptor. 98 + * @text_space: Total occupied buffer space in the text data ring, including 99 + * ID, alignment padding, and wrapping data blocks. 100 + * 101 + * This structure is an opaque handle for writers. Its contents are only 102 + * to be used by the ringbuffer implementation. 103 + */ 104 + struct prb_reserved_entry { 105 + struct printk_ringbuffer *rb; 106 + unsigned long irqflags; 107 + unsigned long id; 108 + unsigned int text_space; 109 + }; 110 + 111 + /* The possible responses of a descriptor state-query. */ 112 + enum desc_state { 113 + desc_miss = -1, /* ID mismatch (pseudo state) */ 114 + desc_reserved = 0x0, /* reserved, in use by writer */ 115 + desc_committed = 0x1, /* committed by writer, could get reopened */ 116 + desc_finalized = 0x2, /* committed, no further modification allowed */ 117 + desc_reusable = 0x3, /* free, not yet used by any writer */ 118 + }; 119 + 120 + #define _DATA_SIZE(sz_bits) (1UL << (sz_bits)) 121 + #define _DESCS_COUNT(ct_bits) (1U << (ct_bits)) 122 + #define DESC_SV_BITS (sizeof(unsigned long) * 8) 123 + #define DESC_FLAGS_SHIFT (DESC_SV_BITS - 2) 124 + #define DESC_FLAGS_MASK (3UL << DESC_FLAGS_SHIFT) 125 + #define DESC_STATE(sv) (3UL & (sv >> DESC_FLAGS_SHIFT)) 126 + #define DESC_SV(id, state) (((unsigned long)state << DESC_FLAGS_SHIFT) | id) 127 + #define DESC_ID_MASK (~DESC_FLAGS_MASK) 128 + #define DESC_ID(sv) ((sv) & DESC_ID_MASK) 129 + #define FAILED_LPOS 0x1 130 + #define NO_LPOS 0x3 131 + 132 + #define FAILED_BLK_LPOS \ 133 + { \ 134 + .begin = FAILED_LPOS, \ 135 + .next = FAILED_LPOS, \ 136 + } 137 + 138 + /* 139 + * Descriptor Bootstrap 140 + * 141 + * The descriptor array is minimally initialized to allow immediate usage 142 + * by readers and writers. The requirements that the descriptor array 143 + * initialization must satisfy: 144 + * 145 + * Req1 146 + * The tail must point to an existing (committed or reusable) descriptor. 147 + * This is required by the implementation of prb_first_seq(). 148 + * 149 + * Req2 150 + * Readers must see that the ringbuffer is initially empty. 151 + * 152 + * Req3 153 + * The first record reserved by a writer is assigned sequence number 0. 154 + * 155 + * To satisfy Req1, the tail initially points to a descriptor that is 156 + * minimally initialized (having no data block, i.e. data-less with the 157 + * data block's lpos @begin and @next values set to FAILED_LPOS). 158 + * 159 + * To satisfy Req2, the initial tail descriptor is initialized to the 160 + * reusable state. Readers recognize reusable descriptors as existing 161 + * records, but skip over them. 162 + * 163 + * To satisfy Req3, the last descriptor in the array is used as the initial 164 + * head (and tail) descriptor. This allows the first record reserved by a 165 + * writer (head + 1) to be the first descriptor in the array. (Only the first 166 + * descriptor in the array could have a valid sequence number of 0.) 167 + * 168 + * The first time a descriptor is reserved, it is assigned a sequence number 169 + * with the value of the array index. A "first time reserved" descriptor can 170 + * be recognized because it has a sequence number of 0 but does not have an 171 + * index of 0. (Only the first descriptor in the array could have a valid 172 + * sequence number of 0.) After the first reservation, all future reservations 173 + * (recycling) simply involve incrementing the sequence number by the array 174 + * count. 175 + * 176 + * Hack #1 177 + * Only the first descriptor in the array is allowed to have the sequence 178 + * number 0. In this case it is not possible to recognize if it is being 179 + * reserved the first time (set to index value) or has been reserved 180 + * previously (increment by the array count). This is handled by _always_ 181 + * incrementing the sequence number by the array count when reserving the 182 + * first descriptor in the array. In order to satisfy Req3, the sequence 183 + * number of the first descriptor in the array is initialized to minus 184 + * the array count. Then, upon the first reservation, it is incremented 185 + * to 0, thus satisfying Req3. 186 + * 187 + * Hack #2 188 + * prb_first_seq() can be called at any time by readers to retrieve the 189 + * sequence number of the tail descriptor. However, due to Req2 and Req3, 190 + * initially there are no records to report the sequence number of 191 + * (sequence numbers are u64 and there is nothing less than 0). To handle 192 + * this, the sequence number of the initial tail descriptor is initialized 193 + * to 0. Technically this is incorrect, because there is no record with 194 + * sequence number 0 (yet) and the tail descriptor is not the first 195 + * descriptor in the array. But it allows prb_read_valid() to correctly 196 + * report the existence of a record for _any_ given sequence number at all 197 + * times. Bootstrapping is complete when the tail is pushed the first 198 + * time, thus finally pointing to the first descriptor reserved by a 199 + * writer, which has the assigned sequence number 0. 200 + */ 201 + 202 + /* 203 + * Initiating Logical Value Overflows 204 + * 205 + * Both logical position (lpos) and ID values can be mapped to array indexes 206 + * but may experience overflows during the lifetime of the system. To ensure 207 + * that printk_ringbuffer can handle the overflows for these types, initial 208 + * values are chosen that map to the correct initial array indexes, but will 209 + * result in overflows soon. 210 + * 211 + * BLK0_LPOS 212 + * The initial @head_lpos and @tail_lpos for data rings. It is at index 213 + * 0 and the lpos value is such that it will overflow on the first wrap. 214 + * 215 + * DESC0_ID 216 + * The initial @head_id and @tail_id for the desc ring. It is at the last 217 + * index of the descriptor array (see Req3 above) and the ID value is such 218 + * that it will overflow on the second wrap. 219 + */ 220 + #define BLK0_LPOS(sz_bits) (-(_DATA_SIZE(sz_bits))) 221 + #define DESC0_ID(ct_bits) DESC_ID(-(_DESCS_COUNT(ct_bits) + 1)) 222 + #define DESC0_SV(ct_bits) DESC_SV(DESC0_ID(ct_bits), desc_reusable) 223 + 224 + /* 225 + * Define a ringbuffer with an external text data buffer. The same as 226 + * DEFINE_PRINTKRB() but requires specifying an external buffer for the 227 + * text data. 228 + * 229 + * Note: The specified external buffer must be of the size: 230 + * 2 ^ (descbits + avgtextbits) 231 + */ 232 + #define _DEFINE_PRINTKRB(name, descbits, avgtextbits, text_buf) \ 233 + static struct prb_desc _##name##_descs[_DESCS_COUNT(descbits)] = { \ 234 + /* the initial head and tail */ \ 235 + [_DESCS_COUNT(descbits) - 1] = { \ 236 + /* reusable */ \ 237 + .state_var = ATOMIC_INIT(DESC0_SV(descbits)), \ 238 + /* no associated data block */ \ 239 + .text_blk_lpos = FAILED_BLK_LPOS, \ 240 + }, \ 241 + }; \ 242 + static struct printk_info _##name##_infos[_DESCS_COUNT(descbits)] = { \ 243 + /* this will be the first record reserved by a writer */ \ 244 + [0] = { \ 245 + /* will be incremented to 0 on the first reservation */ \ 246 + .seq = -(u64)_DESCS_COUNT(descbits), \ 247 + }, \ 248 + /* the initial head and tail */ \ 249 + [_DESCS_COUNT(descbits) - 1] = { \ 250 + /* reports the first seq value during the bootstrap phase */ \ 251 + .seq = 0, \ 252 + }, \ 253 + }; \ 254 + static struct printk_ringbuffer name = { \ 255 + .desc_ring = { \ 256 + .count_bits = descbits, \ 257 + .descs = &_##name##_descs[0], \ 258 + .infos = &_##name##_infos[0], \ 259 + .head_id = ATOMIC_INIT(DESC0_ID(descbits)), \ 260 + .tail_id = ATOMIC_INIT(DESC0_ID(descbits)), \ 261 + }, \ 262 + .text_data_ring = { \ 263 + .size_bits = (avgtextbits) + (descbits), \ 264 + .data = text_buf, \ 265 + .head_lpos = ATOMIC_LONG_INIT(BLK0_LPOS((avgtextbits) + (descbits))), \ 266 + .tail_lpos = ATOMIC_LONG_INIT(BLK0_LPOS((avgtextbits) + (descbits))), \ 267 + }, \ 268 + .fail = ATOMIC_LONG_INIT(0), \ 269 + } 270 + 271 + /** 272 + * DEFINE_PRINTKRB() - Define a ringbuffer. 273 + * 274 + * @name: The name of the ringbuffer variable. 275 + * @descbits: The number of descriptors as a power-of-2 value. 276 + * @avgtextbits: The average text data size per record as a power-of-2 value. 277 + * 278 + * This is a macro for defining a ringbuffer and all internal structures 279 + * such that it is ready for immediate use. See _DEFINE_PRINTKRB() for a 280 + * variant where the text data buffer can be specified externally. 281 + */ 282 + #define DEFINE_PRINTKRB(name, descbits, avgtextbits) \ 283 + static char _##name##_text[1U << ((avgtextbits) + (descbits))] \ 284 + __aligned(__alignof__(unsigned long)); \ 285 + _DEFINE_PRINTKRB(name, descbits, avgtextbits, &_##name##_text[0]) 286 + 287 + /* Writer Interface */ 288 + 289 + /** 290 + * prb_rec_init_wd() - Initialize a buffer for writing records. 291 + * 292 + * @r: The record to initialize. 293 + * @text_buf_size: The needed text buffer size. 294 + */ 295 + static inline void prb_rec_init_wr(struct printk_record *r, 296 + unsigned int text_buf_size) 297 + { 298 + r->info = NULL; 299 + r->text_buf = NULL; 300 + r->text_buf_size = text_buf_size; 301 + } 302 + 303 + bool prb_reserve(struct prb_reserved_entry *e, struct printk_ringbuffer *rb, 304 + struct printk_record *r); 305 + bool prb_reserve_in_last(struct prb_reserved_entry *e, struct printk_ringbuffer *rb, 306 + struct printk_record *r, u32 caller_id, unsigned int max_size); 307 + void prb_commit(struct prb_reserved_entry *e); 308 + void prb_final_commit(struct prb_reserved_entry *e); 309 + 310 + void prb_init(struct printk_ringbuffer *rb, 311 + char *text_buf, unsigned int text_buf_size, 312 + struct prb_desc *descs, unsigned int descs_count_bits, 313 + struct printk_info *infos); 314 + unsigned int prb_record_text_space(struct prb_reserved_entry *e); 315 + 316 + /* Reader Interface */ 317 + 318 + /** 319 + * prb_rec_init_rd() - Initialize a buffer for reading records. 320 + * 321 + * @r: The record to initialize. 322 + * @info: A buffer to store record meta-data. 323 + * @text_buf: A buffer to store text data. 324 + * @text_buf_size: The size of @text_buf. 325 + * 326 + * Initialize all the fields that a reader is interested in. All arguments 327 + * (except @r) are optional. Only record data for arguments that are 328 + * non-NULL or non-zero will be read. 329 + */ 330 + static inline void prb_rec_init_rd(struct printk_record *r, 331 + struct printk_info *info, 332 + char *text_buf, unsigned int text_buf_size) 333 + { 334 + r->info = info; 335 + r->text_buf = text_buf; 336 + r->text_buf_size = text_buf_size; 337 + } 338 + 339 + /** 340 + * prb_for_each_record() - Iterate over the records of a ringbuffer. 341 + * 342 + * @from: The sequence number to begin with. 343 + * @rb: The ringbuffer to iterate over. 344 + * @s: A u64 to store the sequence number on each iteration. 345 + * @r: A printk_record to store the record on each iteration. 346 + * 347 + * This is a macro for conveniently iterating over a ringbuffer. 348 + * Note that @s may not be the sequence number of the record on each 349 + * iteration. For the sequence number, @r->info->seq should be checked. 350 + * 351 + * Context: Any context. 352 + */ 353 + #define prb_for_each_record(from, rb, s, r) \ 354 + for ((s) = from; prb_read_valid(rb, s, r); (s) = (r)->info->seq + 1) 355 + 356 + /** 357 + * prb_for_each_info() - Iterate over the meta data of a ringbuffer. 358 + * 359 + * @from: The sequence number to begin with. 360 + * @rb: The ringbuffer to iterate over. 361 + * @s: A u64 to store the sequence number on each iteration. 362 + * @i: A printk_info to store the record meta data on each iteration. 363 + * @lc: An unsigned int to store the text line count of each record. 364 + * 365 + * This is a macro for conveniently iterating over a ringbuffer. 366 + * Note that @s may not be the sequence number of the record on each 367 + * iteration. For the sequence number, @r->info->seq should be checked. 368 + * 369 + * Context: Any context. 370 + */ 371 + #define prb_for_each_info(from, rb, s, i, lc) \ 372 + for ((s) = from; prb_read_valid_info(rb, s, i, lc); (s) = (i)->seq + 1) 373 + 374 + bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq, 375 + struct printk_record *r); 376 + bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq, 377 + struct printk_info *info, unsigned int *line_count); 378 + 379 + u64 prb_first_valid_seq(struct printk_ringbuffer *rb); 380 + u64 prb_next_seq(struct printk_ringbuffer *rb); 381 + 382 + #endif /* _KERNEL_PRINTK_RINGBUFFER_H */
+1 -1
kernel/printk/printk_safe.c
··· 375 375 raw_spin_trylock(&logbuf_lock)) { 376 376 int len; 377 377 378 - len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args); 378 + len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, fmt, args); 379 379 raw_spin_unlock(&logbuf_lock); 380 380 defer_console_output(); 381 381 return len;
+108 -35
scripts/gdb/linux/dmesg.py
··· 16 16 17 17 from linux import utils 18 18 19 - printk_log_type = utils.CachedType("struct printk_log") 20 - 19 + printk_info_type = utils.CachedType("struct printk_info") 20 + prb_data_blk_lpos_type = utils.CachedType("struct prb_data_blk_lpos") 21 + prb_desc_type = utils.CachedType("struct prb_desc") 22 + prb_desc_ring_type = utils.CachedType("struct prb_desc_ring") 23 + prb_data_ring_type = utils.CachedType("struct prb_data_ring") 24 + printk_ringbuffer_type = utils.CachedType("struct printk_ringbuffer") 25 + atomic_long_type = utils.CachedType("atomic_long_t") 21 26 22 27 class LxDmesg(gdb.Command): 23 28 """Print Linux kernel log buffer.""" ··· 31 26 super(LxDmesg, self).__init__("lx-dmesg", gdb.COMMAND_DATA) 32 27 33 28 def invoke(self, arg, from_tty): 34 - log_buf_addr = int(str(gdb.parse_and_eval( 35 - "(void *)'printk.c'::log_buf")).split()[0], 16) 36 - log_first_idx = int(gdb.parse_and_eval("'printk.c'::log_first_idx")) 37 - log_next_idx = int(gdb.parse_and_eval("'printk.c'::log_next_idx")) 38 - log_buf_len = int(gdb.parse_and_eval("'printk.c'::log_buf_len")) 39 - 40 29 inf = gdb.inferiors()[0] 41 - start = log_buf_addr + log_first_idx 42 - if log_first_idx < log_next_idx: 43 - log_buf_2nd_half = -1 44 - length = log_next_idx - log_first_idx 45 - log_buf = utils.read_memoryview(inf, start, length).tobytes() 46 - else: 47 - log_buf_2nd_half = log_buf_len - log_first_idx 48 - a = utils.read_memoryview(inf, start, log_buf_2nd_half) 49 - b = utils.read_memoryview(inf, log_buf_addr, log_next_idx) 50 - log_buf = a.tobytes() + b.tobytes() 51 30 52 - length_offset = printk_log_type.get_type()['len'].bitpos // 8 53 - text_len_offset = printk_log_type.get_type()['text_len'].bitpos // 8 54 - time_stamp_offset = printk_log_type.get_type()['ts_nsec'].bitpos // 8 55 - text_offset = printk_log_type.get_type().sizeof 31 + # read in prb structure 32 + prb_addr = int(str(gdb.parse_and_eval("(void *)'printk.c'::prb")).split()[0], 16) 33 + sz = printk_ringbuffer_type.get_type().sizeof 34 + prb = utils.read_memoryview(inf, prb_addr, sz).tobytes() 56 35 57 - pos = 0 58 - while pos < log_buf.__len__(): 59 - length = utils.read_u16(log_buf, pos + length_offset) 60 - if length == 0: 61 - if log_buf_2nd_half == -1: 62 - gdb.write("Corrupted log buffer!\n") 36 + # read in descriptor ring structure 37 + off = printk_ringbuffer_type.get_type()['desc_ring'].bitpos // 8 38 + addr = prb_addr + off 39 + sz = prb_desc_ring_type.get_type().sizeof 40 + desc_ring = utils.read_memoryview(inf, addr, sz).tobytes() 41 + 42 + # read in descriptor array 43 + off = prb_desc_ring_type.get_type()['count_bits'].bitpos // 8 44 + desc_ring_count = 1 << utils.read_u32(desc_ring, off) 45 + desc_sz = prb_desc_type.get_type().sizeof 46 + off = prb_desc_ring_type.get_type()['descs'].bitpos // 8 47 + addr = utils.read_ulong(desc_ring, off) 48 + descs = utils.read_memoryview(inf, addr, desc_sz * desc_ring_count).tobytes() 49 + 50 + # read in info array 51 + info_sz = printk_info_type.get_type().sizeof 52 + off = prb_desc_ring_type.get_type()['infos'].bitpos // 8 53 + addr = utils.read_ulong(desc_ring, off) 54 + infos = utils.read_memoryview(inf, addr, info_sz * desc_ring_count).tobytes() 55 + 56 + # read in text data ring structure 57 + off = printk_ringbuffer_type.get_type()['text_data_ring'].bitpos // 8 58 + addr = prb_addr + off 59 + sz = prb_data_ring_type.get_type().sizeof 60 + text_data_ring = utils.read_memoryview(inf, addr, sz).tobytes() 61 + 62 + # read in text data 63 + off = prb_data_ring_type.get_type()['size_bits'].bitpos // 8 64 + text_data_sz = 1 << utils.read_u32(text_data_ring, off) 65 + off = prb_data_ring_type.get_type()['data'].bitpos // 8 66 + addr = utils.read_ulong(text_data_ring, off) 67 + text_data = utils.read_memoryview(inf, addr, text_data_sz).tobytes() 68 + 69 + counter_off = atomic_long_type.get_type()['counter'].bitpos // 8 70 + 71 + sv_off = prb_desc_type.get_type()['state_var'].bitpos // 8 72 + 73 + off = prb_desc_type.get_type()['text_blk_lpos'].bitpos // 8 74 + begin_off = off + (prb_data_blk_lpos_type.get_type()['begin'].bitpos // 8) 75 + next_off = off + (prb_data_blk_lpos_type.get_type()['next'].bitpos // 8) 76 + 77 + ts_off = printk_info_type.get_type()['ts_nsec'].bitpos // 8 78 + len_off = printk_info_type.get_type()['text_len'].bitpos // 8 79 + 80 + # definitions from kernel/printk/printk_ringbuffer.h 81 + desc_committed = 1 82 + desc_finalized = 2 83 + desc_sv_bits = utils.get_long_type().sizeof * 8 84 + desc_flags_shift = desc_sv_bits - 2 85 + desc_flags_mask = 3 << desc_flags_shift 86 + desc_id_mask = ~desc_flags_mask 87 + 88 + # read in tail and head descriptor ids 89 + off = prb_desc_ring_type.get_type()['tail_id'].bitpos // 8 90 + tail_id = utils.read_u64(desc_ring, off + counter_off) 91 + off = prb_desc_ring_type.get_type()['head_id'].bitpos // 8 92 + head_id = utils.read_u64(desc_ring, off + counter_off) 93 + 94 + did = tail_id 95 + while True: 96 + ind = did % desc_ring_count 97 + desc_off = desc_sz * ind 98 + info_off = info_sz * ind 99 + 100 + # skip non-committed record 101 + state = 3 & (utils.read_u64(descs, desc_off + sv_off + 102 + counter_off) >> desc_flags_shift) 103 + if state != desc_committed and state != desc_finalized: 104 + if did == head_id: 63 105 break 64 - pos = log_buf_2nd_half 106 + did = (did + 1) & desc_id_mask 65 107 continue 66 108 67 - text_len = utils.read_u16(log_buf, pos + text_len_offset) 68 - text_start = pos + text_offset 69 - text = log_buf[text_start:text_start + text_len].decode( 70 - encoding='utf8', errors='replace') 71 - time_stamp = utils.read_u64(log_buf, pos + time_stamp_offset) 109 + begin = utils.read_ulong(descs, desc_off + begin_off) % text_data_sz 110 + end = utils.read_ulong(descs, desc_off + next_off) % text_data_sz 111 + 112 + # handle data-less record 113 + if begin & 1 == 1: 114 + text = "" 115 + else: 116 + # handle wrapping data block 117 + if begin > end: 118 + begin = 0 119 + 120 + # skip over descriptor id 121 + text_start = begin + utils.get_long_type().sizeof 122 + 123 + text_len = utils.read_u16(infos, info_off + len_off) 124 + 125 + # handle truncated message 126 + if end - text_start < text_len: 127 + text_len = end - text_start 128 + 129 + text = text_data[text_start:text_start + text_len].decode( 130 + encoding='utf8', errors='replace') 131 + 132 + time_stamp = utils.read_u64(infos, info_off + ts_off) 72 133 73 134 for line in text.splitlines(): 74 135 msg = u"[{time:12.6f}] {line}\n".format( ··· 146 75 msg = msg.encode(encoding='utf8', errors='replace') 147 76 gdb.write(msg) 148 77 149 - pos += length 78 + if did == head_id: 79 + break 80 + did = (did + 1) & desc_id_mask 150 81 151 82 152 83 LxDmesg()
+7
scripts/gdb/linux/utils.py
··· 123 123 return read_u32(buffer, offset + 4) + (read_u32(buffer, offset) << 32) 124 124 125 125 126 + def read_ulong(buffer, offset): 127 + if get_long_type().sizeof == 8: 128 + return read_u64(buffer, offset) 129 + else: 130 + return read_u32(buffer, offset) 131 + 132 + 126 133 target_arch = None 127 134 128 135