Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

device coredump: add new device coredump class

Many devices run firmware and/or complex hardware, and most of that
can have bugs. When it misbehaves, however, it is often much harder
to debug than software running on the host.

Introduce a "device coredump" mechanism to allow dumping internal
device/firmware state through a generalized mechanism. As devices
are different and information needed can vary accordingly, this
doesn't prescribe a file format - it just provides mechanism to
get data to be able to capture it in a generalized way (e.g. in
distributions.)

The dumped data will be readable in sysfs in the virtual device's
data file under /sys/class/devcoredump/devcd*/. Writing to it will
free the data and remove the device, as does a 5-minute timeout.

Note that generalized capturing of such data may result in privacy
issues, so users generally need to be involved. In order to allow
certain users/system integrators/... to disable the feature at all,
introduce a Kconfig option to override the drivers that would like
to have the feature.

For now, this provides two ways of dumping data:
1) with a vmalloc'ed area, that is then given to the subsystem
and freed after retrieval or timeout
2) with a generalized reader/free function method

We could/should add more options, e.g. a list of pages, since the
vmalloc area is very limited on some architectures.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

authored by

Johannes Berg and committed by
Greg Kroah-Hartman
833c9545 00e262fd

+329
+7
MAINTAINERS
··· 2859 2859 S: Maintained 2860 2860 F: drivers/usb/dwc3/ 2861 2861 2862 + DEVICE COREDUMP (DEV_COREDUMP) 2863 + M: Johannes Berg <johannes@sipsolutions.net> 2864 + L: linux-kernel@vger.kernel.org 2865 + S: Maintained 2866 + F: drivers/base/devcoredump.c 2867 + F: include/linux/devcoredump.h 2868 + 2862 2869 DEVICE FREQUENCY (DEVFREQ) 2863 2870 M: MyungJoo Ham <myungjoo.ham@samsung.com> 2864 2871 M: Kyungmin Park <kyungmin.park@samsung.com>
+21
drivers/base/Kconfig
··· 165 165 166 166 If you are unsure about this, say N here. 167 167 168 + config WANT_DEV_COREDUMP 169 + bool 170 + help 171 + Drivers should "select" this option if they desire to use the 172 + device coredump mechanism. 173 + 174 + config DISABLE_DEV_COREDUMP 175 + bool "Disable device coredump" if EXPERT 176 + help 177 + Disable the device coredump mechanism despite drivers wanting to 178 + use it; this allows for more sensitive systems or systems that 179 + don't want to ever access the information to not have the code, 180 + nor keep any data. 181 + 182 + If unsure, say N. 183 + 184 + config DEV_COREDUMP 185 + bool 186 + default y if WANT_DEV_COREDUMP 187 + depends on !DISABLE_DEV_COREDUMP 188 + 168 189 config DEBUG_DRIVER 169 190 bool "Driver Core verbose debug messages" 170 191 depends on DEBUG_KERNEL
+1
drivers/base/Makefile
··· 21 21 obj-$(CONFIG_REGMAP) += regmap/ 22 22 obj-$(CONFIG_SOC_BUS) += soc.o 23 23 obj-$(CONFIG_PINCTRL) += pinctrl.o 24 + obj-$(CONFIG_DEV_COREDUMP) += devcoredump.o 24 25 25 26 ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG 26 27
+265
drivers/base/devcoredump.c
··· 1 + /* 2 + * This file is provided under the GPLv2 license. 3 + * 4 + * GPL LICENSE SUMMARY 5 + * 6 + * Copyright(c) 2014 Intel Mobile Communications GmbH 7 + * 8 + * This program is free software; you can redistribute it and/or modify 9 + * it under the terms of version 2 of the GNU General Public License as 10 + * published by the Free Software Foundation. 11 + * 12 + * This program is distributed in the hope that it will be useful, but 13 + * WITHOUT ANY WARRANTY; without even the implied warranty of 14 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 + * General Public License for more details. 16 + * 17 + * The full GNU General Public License is included in this distribution 18 + * in the file called COPYING. 19 + * 20 + * Contact Information: 21 + * Intel Linux Wireless <ilw@linux.intel.com> 22 + * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 23 + * 24 + * Author: Johannes Berg <johannes@sipsolutions.net> 25 + */ 26 + #include <linux/module.h> 27 + #include <linux/device.h> 28 + #include <linux/devcoredump.h> 29 + #include <linux/list.h> 30 + #include <linux/slab.h> 31 + #include <linux/fs.h> 32 + #include <linux/workqueue.h> 33 + 34 + /* if data isn't read by userspace after 5 minutes then delete it */ 35 + #define DEVCD_TIMEOUT (HZ * 60 * 5) 36 + 37 + struct devcd_entry { 38 + struct device devcd_dev; 39 + const void *data; 40 + size_t datalen; 41 + struct module *owner; 42 + ssize_t (*read)(char *buffer, loff_t offset, size_t count, 43 + const void *data, size_t datalen); 44 + void (*free)(const void *data); 45 + struct delayed_work del_wk; 46 + struct device *failing_dev; 47 + }; 48 + 49 + static struct devcd_entry *dev_to_devcd(struct device *dev) 50 + { 51 + return container_of(dev, struct devcd_entry, devcd_dev); 52 + } 53 + 54 + static void devcd_dev_release(struct device *dev) 55 + { 56 + struct devcd_entry *devcd = dev_to_devcd(dev); 57 + 58 + devcd->free(devcd->data); 59 + module_put(devcd->owner); 60 + 61 + /* 62 + * this seems racy, but I don't see a notifier or such on 63 + * a struct device to know when it goes away? 64 + */ 65 + if (devcd->failing_dev->kobj.sd) 66 + sysfs_delete_link(&devcd->failing_dev->kobj, &dev->kobj, 67 + "devcoredump"); 68 + 69 + put_device(devcd->failing_dev); 70 + kfree(devcd); 71 + } 72 + 73 + static void devcd_del(struct work_struct *wk) 74 + { 75 + struct devcd_entry *devcd; 76 + 77 + devcd = container_of(wk, struct devcd_entry, del_wk.work); 78 + 79 + device_del(&devcd->devcd_dev); 80 + put_device(&devcd->devcd_dev); 81 + } 82 + 83 + static ssize_t devcd_data_read(struct file *filp, struct kobject *kobj, 84 + struct bin_attribute *bin_attr, 85 + char *buffer, loff_t offset, size_t count) 86 + { 87 + struct device *dev = kobj_to_dev(kobj); 88 + struct devcd_entry *devcd = dev_to_devcd(dev); 89 + 90 + return devcd->read(buffer, offset, count, devcd->data, devcd->datalen); 91 + } 92 + 93 + static ssize_t devcd_data_write(struct file *filp, struct kobject *kobj, 94 + struct bin_attribute *bin_attr, 95 + char *buffer, loff_t offset, size_t count) 96 + { 97 + struct device *dev = kobj_to_dev(kobj); 98 + struct devcd_entry *devcd = dev_to_devcd(dev); 99 + 100 + mod_delayed_work(system_wq, &devcd->del_wk, 0); 101 + 102 + return count; 103 + } 104 + 105 + static struct bin_attribute devcd_attr_data = { 106 + .attr = { .name = "data", .mode = S_IRUSR | S_IWUSR, }, 107 + .size = 0, 108 + .read = devcd_data_read, 109 + .write = devcd_data_write, 110 + }; 111 + 112 + static struct bin_attribute *devcd_dev_bin_attrs[] = { 113 + &devcd_attr_data, NULL, 114 + }; 115 + 116 + static const struct attribute_group devcd_dev_group = { 117 + .bin_attrs = devcd_dev_bin_attrs, 118 + }; 119 + 120 + static const struct attribute_group *devcd_dev_groups[] = { 121 + &devcd_dev_group, NULL, 122 + }; 123 + 124 + static struct class devcd_class = { 125 + .name = "devcoredump", 126 + .owner = THIS_MODULE, 127 + .dev_release = devcd_dev_release, 128 + .dev_groups = devcd_dev_groups, 129 + }; 130 + 131 + static ssize_t devcd_readv(char *buffer, loff_t offset, size_t count, 132 + const void *data, size_t datalen) 133 + { 134 + if (offset > datalen) 135 + return -EINVAL; 136 + 137 + if (offset + count > datalen) 138 + count = datalen - offset; 139 + 140 + if (count) 141 + memcpy(buffer, ((u8 *)data) + offset, count); 142 + 143 + return count; 144 + } 145 + 146 + /** 147 + * dev_coredumpv - create device coredump with vmalloc data 148 + * @dev: the struct device for the crashed device 149 + * @data: vmalloc data containing the device coredump 150 + * @datalen: length of the data 151 + * @gfp: allocation flags 152 + * 153 + * This function takes ownership of the vmalloc'ed data and will free 154 + * it when it is no longer used. See dev_coredumpm() for more information. 155 + */ 156 + void dev_coredumpv(struct device *dev, const void *data, size_t datalen, 157 + gfp_t gfp) 158 + { 159 + dev_coredumpm(dev, NULL, data, datalen, gfp, devcd_readv, vfree); 160 + } 161 + EXPORT_SYMBOL_GPL(dev_coredumpv); 162 + 163 + static int devcd_match_failing(struct device *dev, const void *failing) 164 + { 165 + struct devcd_entry *devcd = dev_to_devcd(dev); 166 + 167 + return devcd->failing_dev == failing; 168 + } 169 + 170 + /** 171 + * dev_coredumpm - create device coredump with read/free methods 172 + * @dev: the struct device for the crashed device 173 + * @owner: the module that contains the read/free functions, use %THIS_MODULE 174 + * @data: data cookie for the @read/@free functions 175 + * @datalen: length of the data 176 + * @gfp: allocation flags 177 + * @read: function to read from the given buffer 178 + * @free: function to free the given buffer 179 + * 180 + * Creates a new device coredump for the given device. If a previous one hasn't 181 + * been read yet, the new coredump is discarded. The data lifetime is determined 182 + * by the device coredump framework and when it is no longer needed the @free 183 + * function will be called to free the data. 184 + */ 185 + void dev_coredumpm(struct device *dev, struct module *owner, 186 + const void *data, size_t datalen, gfp_t gfp, 187 + ssize_t (*read)(char *buffer, loff_t offset, size_t count, 188 + const void *data, size_t datalen), 189 + void (*free)(const void *data)) 190 + { 191 + static atomic_t devcd_count = ATOMIC_INIT(0); 192 + struct devcd_entry *devcd; 193 + struct device *existing; 194 + 195 + existing = class_find_device(&devcd_class, NULL, dev, 196 + devcd_match_failing); 197 + if (existing) { 198 + put_device(existing); 199 + goto free; 200 + } 201 + 202 + if (!try_module_get(owner)) 203 + goto free; 204 + 205 + devcd = kzalloc(sizeof(*devcd), gfp); 206 + if (!devcd) 207 + goto put_module; 208 + 209 + devcd->owner = owner; 210 + devcd->data = data; 211 + devcd->datalen = datalen; 212 + devcd->read = read; 213 + devcd->free = free; 214 + devcd->failing_dev = get_device(dev); 215 + 216 + device_initialize(&devcd->devcd_dev); 217 + 218 + dev_set_name(&devcd->devcd_dev, "devcd%d", 219 + atomic_inc_return(&devcd_count)); 220 + devcd->devcd_dev.class = &devcd_class; 221 + 222 + if (device_add(&devcd->devcd_dev)) 223 + goto put_device; 224 + 225 + if (sysfs_create_link(&devcd->devcd_dev.kobj, &dev->kobj, 226 + "failing_device")) 227 + /* nothing - symlink will be missing */; 228 + 229 + if (sysfs_create_link(&dev->kobj, &devcd->devcd_dev.kobj, 230 + "devcoredump")) 231 + /* nothing - symlink will be missing */; 232 + 233 + INIT_DELAYED_WORK(&devcd->del_wk, devcd_del); 234 + schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT); 235 + 236 + return; 237 + put_device: 238 + put_device(&devcd->devcd_dev); 239 + put_module: 240 + module_put(owner); 241 + free: 242 + free(data); 243 + } 244 + EXPORT_SYMBOL_GPL(dev_coredumpm); 245 + 246 + static int __init devcoredump_init(void) 247 + { 248 + return class_register(&devcd_class); 249 + } 250 + __initcall(devcoredump_init); 251 + 252 + static int devcd_free(struct device *dev, void *data) 253 + { 254 + struct devcd_entry *devcd = dev_to_devcd(dev); 255 + 256 + flush_delayed_work(&devcd->del_wk); 257 + return 0; 258 + } 259 + 260 + static void __exit devcoredump_exit(void) 261 + { 262 + class_for_each_device(&devcd_class, NULL, NULL, devcd_free); 263 + class_unregister(&devcd_class); 264 + } 265 + __exitcall(devcoredump_exit);
+35
include/linux/devcoredump.h
··· 1 + #ifndef __DEVCOREDUMP_H 2 + #define __DEVCOREDUMP_H 3 + 4 + #include <linux/device.h> 5 + #include <linux/module.h> 6 + #include <linux/vmalloc.h> 7 + 8 + #ifdef CONFIG_DEV_COREDUMP 9 + void dev_coredumpv(struct device *dev, const void *data, size_t datalen, 10 + gfp_t gfp); 11 + 12 + void dev_coredumpm(struct device *dev, struct module *owner, 13 + const void *data, size_t datalen, gfp_t gfp, 14 + ssize_t (*read)(char *buffer, loff_t offset, size_t count, 15 + const void *data, size_t datalen), 16 + void (*free)(const void *data)); 17 + #else 18 + static inline void dev_coredumpv(struct device *dev, const void *data, 19 + size_t datalen, gfp_t gfp) 20 + { 21 + vfree(data); 22 + } 23 + 24 + static inline void 25 + dev_coredumpm(struct device *dev, struct module *owner, 26 + const void *data, size_t datalen, gfp_t gfp, 27 + ssize_t (*read)(char *buffer, loff_t offset, size_t count, 28 + const void *data, size_t datalen), 29 + void (*free)(const void *data)) 30 + { 31 + free(data); 32 + } 33 + #endif /* CONFIG_DEV_COREDUMP */ 34 + 35 + #endif /* __DEVCOREDUMP_H */