Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

kmsg: export printk records to the /dev/kmsg interface

Support for multiple concurrent readers of /dev/kmsg, with read(),
seek(), poll() support. Output of message sequence numbers, to allow
userspace log consumers to reliably reconnect and reconstruct their
state at any given time. After open("/dev/kmsg"), read() always
returns *all* buffered records. If only future messages should be
read, SEEK_END can be used. In case records get overwritten while
/dev/kmsg is held open, or records get faster overwritten than they
are read, the next read() will return -EPIPE and the current reading
position gets updated to the next available record. The passed
sequence numbers allow the log consumer to calculate the amount of
lost messages.

[root@mop ~]# cat /dev/kmsg
5,0,0;Linux version 3.4.0-rc1+ (kay@mop) (gcc version 4.7.0 20120315 ...
6,159,423091;ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff])
7,160,424069;pci_root PNP0A03:00: host bridge window [io 0x0000-0x0cf7] (ignored)
SUBSYSTEM=acpi
DEVICE=+acpi:PNP0A03:00
6,339,5140900;NET: Registered protocol family 10
30,340,5690716;udevd[80]: starting version 181
6,341,6081421;FDC 0 is a S82078B
6,345,6154686;microcode: CPU0 sig=0x623, pf=0x0, revision=0x0
7,346,6156968;sr 1:0:0:0: Attached scsi CD-ROM sr0
SUBSYSTEM=scsi
DEVICE=+scsi:1:0:0:0
6,347,6289375;microcode: CPU1 sig=0x623, pf=0x0, revision=0x0

Cc: Karel Zak <kzak@redhat.com>
Tested-by: William Douglas <william.douglas@intel.com>
Signed-off-by: Kay Sievers <kay@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

authored by

Kay Sievers and committed by
Greg Kroah-Hartman
e11fea92 7ff9554b

+316 -60
+1 -60
drivers/char/mem.c
··· 807 807 }; 808 808 #endif 809 809 810 - static ssize_t kmsg_writev(struct kiocb *iocb, const struct iovec *iv, 811 - unsigned long count, loff_t pos) 812 - { 813 - char *buf, *line; 814 - int i; 815 - int level = default_message_loglevel; 816 - int facility = 1; /* LOG_USER */ 817 - size_t len = iov_length(iv, count); 818 - ssize_t ret = len; 819 - 820 - if (len > 1024) 821 - return -EINVAL; 822 - buf = kmalloc(len+1, GFP_KERNEL); 823 - if (buf == NULL) 824 - return -ENOMEM; 825 - 826 - line = buf; 827 - for (i = 0; i < count; i++) { 828 - if (copy_from_user(line, iv[i].iov_base, iv[i].iov_len)) 829 - goto out; 830 - line += iv[i].iov_len; 831 - } 832 - 833 - /* 834 - * Extract and skip the syslog prefix <[0-9]*>. Coming from userspace 835 - * the decimal value represents 32bit, the lower 3 bit are the log 836 - * level, the rest are the log facility. 837 - * 838 - * If no prefix or no userspace facility is specified, we 839 - * enforce LOG_USER, to be able to reliably distinguish 840 - * kernel-generated messages from userspace-injected ones. 841 - */ 842 - line = buf; 843 - if (line[0] == '<') { 844 - char *endp = NULL; 845 - 846 - i = simple_strtoul(line+1, &endp, 10); 847 - if (endp && endp[0] == '>') { 848 - level = i & 7; 849 - if (i >> 3) 850 - facility = i >> 3; 851 - endp++; 852 - len -= endp - line; 853 - line = endp; 854 - } 855 - } 856 - line[len] = '\0'; 857 - 858 - printk_emit(facility, level, NULL, 0, "%s", line); 859 - out: 860 - kfree(buf); 861 - return ret; 862 - } 863 - 864 - static const struct file_operations kmsg_fops = { 865 - .aio_write = kmsg_writev, 866 - .llseek = noop_llseek, 867 - }; 868 - 869 810 static const struct memdev { 870 811 const char *name; 871 812 umode_t mode; ··· 825 884 [7] = { "full", 0666, &full_fops, NULL }, 826 885 [8] = { "random", 0666, &random_fops, NULL }, 827 886 [9] = { "urandom", 0666, &urandom_fops, NULL }, 828 - [11] = { "kmsg", 0, &kmsg_fops, NULL }, 887 + [11] = { "kmsg", 0644, &kmsg_fops, NULL }, 829 888 #ifdef CONFIG_CRASH_DUMP 830 889 [12] = { "oldmem", 0, &oldmem_fops, NULL }, 831 890 #endif
+2
include/linux/printk.h
··· 300 300 no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) 301 301 #endif 302 302 303 + extern const struct file_operations kmsg_fops; 304 + 303 305 enum { 304 306 DUMP_PREFIX_NONE, 305 307 DUMP_PREFIX_ADDRESS,
+313
kernel/printk.c
··· 41 41 #include <linux/cpu.h> 42 42 #include <linux/notifier.h> 43 43 #include <linux/rculist.h> 44 + #include <linux/poll.h> 44 45 45 46 #include <asm/uaccess.h> 46 47 ··· 150 149 * length of the message text is stored in the header, the stored message 151 150 * is not terminated. 152 151 * 152 + * Optionally, a message can carry a dictionary of properties (key/value pairs), 153 + * to provide userspace with a machine-readable message context. 154 + * 155 + * Examples for well-defined, commonly used property names are: 156 + * DEVICE=b12:8 device identifier 157 + * b12:8 block dev_t 158 + * c127:3 char dev_t 159 + * n8 netdev ifindex 160 + * +sound:card0 subsystem:devname 161 + * SUBSYSTEM=pci driver-core subsystem name 162 + * 163 + * Valid characters in property names are [a-zA-Z0-9.-_]. The plain text value 164 + * follows directly after a '=' character. Every property is terminated by 165 + * a '\0' character. The last property is not terminated. 166 + * 167 + * Example of a message structure: 168 + * 0000 ff 8f 00 00 00 00 00 00 monotonic time in nsec 169 + * 0008 34 00 record is 52 bytes long 170 + * 000a 0b 00 text is 11 bytes long 171 + * 000c 1f 00 dictionary is 23 bytes long 172 + * 000e 03 00 LOG_KERN (facility) LOG_ERR (level) 173 + * 0010 69 74 27 73 20 61 20 6c "it's a l" 174 + * 69 6e 65 "ine" 175 + * 001b 44 45 56 49 43 "DEVIC" 176 + * 45 3d 62 38 3a 32 00 44 "E=b8:2\0D" 177 + * 52 49 56 45 52 3d 62 75 "RIVER=bu" 178 + * 67 "g" 179 + * 0032 00 00 00 padding to next message header 180 + * 181 + * The 'struct log' buffer header must never be directly exported to 182 + * userspace, it is a kernel-private implementation detail that might 183 + * need to be changed in the future, when the requirements change. 184 + * 185 + * /dev/kmsg exports the structured data in the following line format: 186 + * "level,sequnum,timestamp;<message text>\n" 187 + * 188 + * The optional key/value pairs are attached as continuation lines starting 189 + * with a space character and terminated by a newline. All possible 190 + * non-prinatable characters are escaped in the "\xff" notation. 191 + * 192 + * Users of the export format should ignore possible additional values 193 + * separated by ',', and find the message after the ';' character. 153 194 */ 154 195 155 196 struct log { ··· 339 296 log_next_idx += msg->len; 340 297 log_next_seq++; 341 298 } 299 + 300 + /* /dev/kmsg - userspace message inject/listen interface */ 301 + struct devkmsg_user { 302 + u64 seq; 303 + u32 idx; 304 + struct mutex lock; 305 + char buf[8192]; 306 + }; 307 + 308 + static ssize_t devkmsg_writev(struct kiocb *iocb, const struct iovec *iv, 309 + unsigned long count, loff_t pos) 310 + { 311 + char *buf, *line; 312 + int i; 313 + int level = default_message_loglevel; 314 + int facility = 1; /* LOG_USER */ 315 + size_t len = iov_length(iv, count); 316 + ssize_t ret = len; 317 + 318 + if (len > LOG_LINE_MAX) 319 + return -EINVAL; 320 + buf = kmalloc(len+1, GFP_KERNEL); 321 + if (buf == NULL) 322 + return -ENOMEM; 323 + 324 + line = buf; 325 + for (i = 0; i < count; i++) { 326 + if (copy_from_user(line, iv[i].iov_base, iv[i].iov_len)) 327 + goto out; 328 + line += iv[i].iov_len; 329 + } 330 + 331 + /* 332 + * Extract and skip the syslog prefix <[0-9]*>. Coming from userspace 333 + * the decimal value represents 32bit, the lower 3 bit are the log 334 + * level, the rest are the log facility. 335 + * 336 + * If no prefix or no userspace facility is specified, we 337 + * enforce LOG_USER, to be able to reliably distinguish 338 + * kernel-generated messages from userspace-injected ones. 339 + */ 340 + line = buf; 341 + if (line[0] == '<') { 342 + char *endp = NULL; 343 + 344 + i = simple_strtoul(line+1, &endp, 10); 345 + if (endp && endp[0] == '>') { 346 + level = i & 7; 347 + if (i >> 3) 348 + facility = i >> 3; 349 + endp++; 350 + len -= endp - line; 351 + line = endp; 352 + } 353 + } 354 + line[len] = '\0'; 355 + 356 + printk_emit(facility, level, NULL, 0, "%s", line); 357 + out: 358 + kfree(buf); 359 + return ret; 360 + } 361 + 362 + static ssize_t devkmsg_read(struct file *file, char __user *buf, 363 + size_t count, loff_t *ppos) 364 + { 365 + struct devkmsg_user *user = file->private_data; 366 + struct log *msg; 367 + size_t i; 368 + size_t len; 369 + ssize_t ret; 370 + 371 + if (!user) 372 + return -EBADF; 373 + 374 + mutex_lock(&user->lock); 375 + raw_spin_lock(&logbuf_lock); 376 + while (user->seq == log_next_seq) { 377 + if (file->f_flags & O_NONBLOCK) { 378 + ret = -EAGAIN; 379 + raw_spin_unlock(&logbuf_lock); 380 + goto out; 381 + } 382 + 383 + raw_spin_unlock(&logbuf_lock); 384 + ret = wait_event_interruptible(log_wait, 385 + user->seq != log_next_seq); 386 + if (ret) 387 + goto out; 388 + raw_spin_lock(&logbuf_lock); 389 + } 390 + 391 + if (user->seq < log_first_seq) { 392 + /* our last seen message is gone, return error and reset */ 393 + user->idx = log_first_idx; 394 + user->seq = log_first_seq; 395 + ret = -EPIPE; 396 + raw_spin_unlock(&logbuf_lock); 397 + goto out; 398 + } 399 + 400 + msg = log_from_idx(user->idx); 401 + len = sprintf(user->buf, "%u,%llu,%llu;", 402 + msg->level, user->seq, msg->ts_nsec / 1000); 403 + 404 + /* escape non-printable characters */ 405 + for (i = 0; i < msg->text_len; i++) { 406 + char c = log_text(msg)[i]; 407 + 408 + if (c < ' ' || c >= 128) 409 + len += sprintf(user->buf + len, "\\x%02x", c); 410 + else 411 + user->buf[len++] = c; 412 + } 413 + user->buf[len++] = '\n'; 414 + 415 + if (msg->dict_len) { 416 + bool line = true; 417 + 418 + for (i = 0; i < msg->dict_len; i++) { 419 + char c = log_dict(msg)[i]; 420 + 421 + if (line) { 422 + user->buf[len++] = ' '; 423 + line = false; 424 + } 425 + 426 + if (c == '\0') { 427 + user->buf[len++] = '\n'; 428 + line = true; 429 + continue; 430 + } 431 + 432 + if (c < ' ' || c >= 128) { 433 + len += sprintf(user->buf + len, "\\x%02x", c); 434 + continue; 435 + } 436 + 437 + user->buf[len++] = c; 438 + } 439 + user->buf[len++] = '\n'; 440 + } 441 + 442 + user->idx = log_next(user->idx); 443 + user->seq++; 444 + raw_spin_unlock(&logbuf_lock); 445 + 446 + if (len > count) { 447 + ret = -EINVAL; 448 + goto out; 449 + } 450 + 451 + if (copy_to_user(buf, user->buf, len)) { 452 + ret = -EFAULT; 453 + goto out; 454 + } 455 + ret = len; 456 + out: 457 + mutex_unlock(&user->lock); 458 + return ret; 459 + } 460 + 461 + static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) 462 + { 463 + struct devkmsg_user *user = file->private_data; 464 + loff_t ret = 0; 465 + 466 + if (!user) 467 + return -EBADF; 468 + if (offset) 469 + return -ESPIPE; 470 + 471 + raw_spin_lock(&logbuf_lock); 472 + switch (whence) { 473 + case SEEK_SET: 474 + /* the first record */ 475 + user->idx = log_first_idx; 476 + user->seq = log_first_seq; 477 + break; 478 + case SEEK_DATA: 479 + /* 480 + * The first record after the last SYSLOG_ACTION_CLEAR, 481 + * like issued by 'dmesg -c'. Reading /dev/kmsg itself 482 + * changes no global state, and does not clear anything. 483 + */ 484 + user->idx = clear_idx; 485 + user->seq = clear_seq; 486 + break; 487 + case SEEK_END: 488 + /* after the last record */ 489 + user->idx = log_next_idx; 490 + user->seq = log_next_seq; 491 + break; 492 + default: 493 + ret = -EINVAL; 494 + } 495 + raw_spin_unlock(&logbuf_lock); 496 + return ret; 497 + } 498 + 499 + static unsigned int devkmsg_poll(struct file *file, poll_table *wait) 500 + { 501 + struct devkmsg_user *user = file->private_data; 502 + int ret = 0; 503 + 504 + if (!user) 505 + return POLLERR|POLLNVAL; 506 + 507 + poll_wait(file, &log_wait, wait); 508 + 509 + raw_spin_lock(&logbuf_lock); 510 + if (user->seq < log_next_seq) { 511 + /* return error when data has vanished underneath us */ 512 + if (user->seq < log_first_seq) 513 + ret = POLLIN|POLLRDNORM|POLLERR|POLLPRI; 514 + ret = POLLIN|POLLRDNORM; 515 + } 516 + raw_spin_unlock(&logbuf_lock); 517 + 518 + return ret; 519 + } 520 + 521 + static int devkmsg_open(struct inode *inode, struct file *file) 522 + { 523 + struct devkmsg_user *user; 524 + int err; 525 + 526 + /* write-only does not need any file context */ 527 + if ((file->f_flags & O_ACCMODE) == O_WRONLY) 528 + return 0; 529 + 530 + err = security_syslog(SYSLOG_ACTION_READ_ALL); 531 + if (err) 532 + return err; 533 + 534 + user = kmalloc(sizeof(struct devkmsg_user), GFP_KERNEL); 535 + if (!user) 536 + return -ENOMEM; 537 + 538 + mutex_init(&user->lock); 539 + 540 + raw_spin_lock(&logbuf_lock); 541 + user->idx = log_first_idx; 542 + user->seq = log_first_seq; 543 + raw_spin_unlock(&logbuf_lock); 544 + 545 + file->private_data = user; 546 + return 0; 547 + } 548 + 549 + static int devkmsg_release(struct inode *inode, struct file *file) 550 + { 551 + struct devkmsg_user *user = file->private_data; 552 + 553 + if (!user) 554 + return 0; 555 + 556 + mutex_destroy(&user->lock); 557 + kfree(user); 558 + return 0; 559 + } 560 + 561 + const struct file_operations kmsg_fops = { 562 + .open = devkmsg_open, 563 + .read = devkmsg_read, 564 + .aio_write = devkmsg_writev, 565 + .llseek = devkmsg_llseek, 566 + .poll = devkmsg_poll, 567 + .release = devkmsg_release, 568 + }; 342 569 343 570 #ifdef CONFIG_KEXEC 344 571 /*