Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

nfit, libnvdimm: allow an ARS scrub to be triggered on demand

Normally, an ARS (Address Range Scrub) only happens at
boot/initialization time. There can however arise situations where a
bus-wide rescan is needed - notably, in the case of discovering a latent
media error, we should do a full rescan to figure out what other sectors
are bad, and thus potentially avoid triggering an mce on them in the
future. Also provide a sysfs trigger to start a bus-wide scrub.

Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

authored by

Vishal Verma and committed by
Dan Williams
37b137ff 18515942

+165 -8
+154 -7
drivers/acpi/nfit.c
··· 15 15 #include <linux/module.h> 16 16 #include <linux/mutex.h> 17 17 #include <linux/ndctl.h> 18 + #include <linux/sysfs.h> 18 19 #include <linux/delay.h> 19 20 #include <linux/list.h> 20 21 #include <linux/acpi.h> ··· 875 874 } 876 875 static DEVICE_ATTR_RO(revision); 877 876 877 + /* 878 + * This shows the number of full Address Range Scrubs that have been 879 + * completed since driver load time. Userspace can wait on this using 880 + * select/poll etc. A '+' at the end indicates an ARS is in progress 881 + */ 882 + static ssize_t scrub_show(struct device *dev, 883 + struct device_attribute *attr, char *buf) 884 + { 885 + struct nvdimm_bus_descriptor *nd_desc; 886 + ssize_t rc = -ENXIO; 887 + 888 + device_lock(dev); 889 + nd_desc = dev_get_drvdata(dev); 890 + if (nd_desc) { 891 + struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); 892 + 893 + rc = sprintf(buf, "%d%s", acpi_desc->scrub_count, 894 + (work_busy(&acpi_desc->work)) ? "+\n" : "\n"); 895 + } 896 + device_unlock(dev); 897 + return rc; 898 + } 899 + 900 + static int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc); 901 + 902 + static ssize_t scrub_store(struct device *dev, 903 + struct device_attribute *attr, const char *buf, size_t size) 904 + { 905 + struct nvdimm_bus_descriptor *nd_desc; 906 + ssize_t rc; 907 + long val; 908 + 909 + rc = kstrtol(buf, 0, &val); 910 + if (rc) 911 + return rc; 912 + if (val != 1) 913 + return -EINVAL; 914 + 915 + device_lock(dev); 916 + nd_desc = dev_get_drvdata(dev); 917 + if (nd_desc) { 918 + struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); 919 + 920 + rc = acpi_nfit_ars_rescan(acpi_desc); 921 + } 922 + device_unlock(dev); 923 + if (rc) 924 + return rc; 925 + return size; 926 + } 927 + static DEVICE_ATTR_RW(scrub); 928 + 929 + static bool ars_supported(struct nvdimm_bus *nvdimm_bus) 930 + { 931 + struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus); 932 + const unsigned long mask = 1 << ND_CMD_ARS_CAP | 1 << ND_CMD_ARS_START 933 + | 1 << ND_CMD_ARS_STATUS; 934 + 935 + return (nd_desc->cmd_mask & mask) == mask; 936 + } 937 + 938 + static umode_t nfit_visible(struct kobject *kobj, struct attribute *a, int n) 939 + { 940 + struct device *dev = container_of(kobj, struct device, kobj); 941 + struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev); 942 + 943 + if (a == &dev_attr_scrub.attr && !ars_supported(nvdimm_bus)) 944 + return 0; 945 + return a->mode; 946 + } 947 + 878 948 static struct attribute *acpi_nfit_attributes[] = { 879 949 &dev_attr_revision.attr, 950 + &dev_attr_scrub.attr, 880 951 NULL, 881 952 }; 882 953 883 954 static struct attribute_group acpi_nfit_attribute_group = { 884 955 .name = "nfit", 885 956 .attrs = acpi_nfit_attributes, 957 + .is_visible = nfit_visible, 886 958 }; 887 959 888 960 static const struct attribute_group *acpi_nfit_attribute_groups[] = { ··· 2128 2054 unsigned int tmo = scrub_timeout; 2129 2055 int rc; 2130 2056 2131 - if (nfit_spa->ars_done || !nfit_spa->nd_region) 2057 + if (!nfit_spa->ars_required || !nfit_spa->nd_region) 2132 2058 return; 2133 2059 2134 2060 rc = ars_start(acpi_desc, nfit_spa); ··· 2217 2143 * firmware initiated scrubs to complete and then we go search for the 2218 2144 * affected spa regions to mark them scanned. In the second phase we 2219 2145 * initiate a directed scrub for every range that was not scrubbed in 2220 - * phase 1. 2146 + * phase 1. If we're called for a 'rescan', we harmlessly pass through 2147 + * the first phase, but really only care about running phase 2, where 2148 + * regions can be notified of new poison. 2221 2149 */ 2222 2150 2223 2151 /* process platform firmware initiated scrubs */ ··· 2322 2246 * Flag all the ranges that still need scrubbing, but 2323 2247 * register them now to make data available. 2324 2248 */ 2325 - if (nfit_spa->nd_region) 2326 - nfit_spa->ars_done = 1; 2327 - else 2249 + if (!nfit_spa->nd_region) { 2250 + nfit_spa->ars_required = 1; 2328 2251 acpi_nfit_register_region(acpi_desc, nfit_spa); 2252 + } 2329 2253 } 2330 2254 2331 2255 list_for_each_entry(nfit_spa, &acpi_desc->spas, list) 2332 2256 acpi_nfit_async_scrub(acpi_desc, nfit_spa); 2257 + acpi_desc->scrub_count++; 2258 + if (acpi_desc->scrub_count_state) 2259 + sysfs_notify_dirent(acpi_desc->scrub_count_state); 2333 2260 mutex_unlock(&acpi_desc->init_mutex); 2334 2261 } 2335 2262 ··· 2370 2291 return 0; 2371 2292 } 2372 2293 2294 + static int acpi_nfit_desc_init_scrub_attr(struct acpi_nfit_desc *acpi_desc) 2295 + { 2296 + struct device *dev = acpi_desc->dev; 2297 + struct kernfs_node *nfit; 2298 + struct device *bus_dev; 2299 + 2300 + if (!ars_supported(acpi_desc->nvdimm_bus)) 2301 + return 0; 2302 + 2303 + bus_dev = to_nvdimm_bus_dev(acpi_desc->nvdimm_bus); 2304 + nfit = sysfs_get_dirent(bus_dev->kobj.sd, "nfit"); 2305 + if (!nfit) { 2306 + dev_err(dev, "sysfs_get_dirent 'nfit' failed\n"); 2307 + return -ENODEV; 2308 + } 2309 + acpi_desc->scrub_count_state = sysfs_get_dirent(nfit, "scrub"); 2310 + sysfs_put(nfit); 2311 + if (!acpi_desc->scrub_count_state) { 2312 + dev_err(dev, "sysfs_get_dirent 'scrub' failed\n"); 2313 + return -ENODEV; 2314 + } 2315 + 2316 + return 0; 2317 + } 2318 + 2373 2319 static void acpi_nfit_destruct(void *data) 2374 2320 { 2375 2321 struct acpi_nfit_desc *acpi_desc = data; 2322 + struct device *bus_dev = to_nvdimm_bus_dev(acpi_desc->nvdimm_bus); 2376 2323 2377 2324 acpi_desc->cancel = 1; 2325 + /* 2326 + * Bounce the nvdimm bus lock to make sure any in-flight 2327 + * acpi_nfit_ars_rescan() submissions have had a chance to 2328 + * either submit or see ->cancel set. 2329 + */ 2330 + device_lock(bus_dev); 2331 + device_unlock(bus_dev); 2332 + 2378 2333 flush_workqueue(nfit_wq); 2334 + if (acpi_desc->scrub_count_state) 2335 + sysfs_put(acpi_desc->scrub_count_state); 2379 2336 nvdimm_bus_unregister(acpi_desc->nvdimm_bus); 2380 2337 acpi_desc->nvdimm_bus = NULL; 2381 2338 } ··· 2424 2309 int rc; 2425 2310 2426 2311 if (!acpi_desc->nvdimm_bus) { 2312 + acpi_nfit_init_dsms(acpi_desc); 2313 + 2427 2314 acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, 2428 2315 &acpi_desc->nd_desc); 2429 2316 if (!acpi_desc->nvdimm_bus) 2430 2317 return -ENOMEM; 2318 + 2431 2319 rc = devm_add_action_or_reset(dev, acpi_nfit_destruct, 2432 2320 acpi_desc); 2321 + if (rc) 2322 + return rc; 2323 + 2324 + rc = acpi_nfit_desc_init_scrub_attr(acpi_desc); 2433 2325 if (rc) 2434 2326 return rc; 2435 2327 } ··· 2481 2359 rc = nfit_mem_init(acpi_desc); 2482 2360 if (rc) 2483 2361 goto out_unlock; 2484 - 2485 - acpi_nfit_init_dsms(acpi_desc); 2486 2362 2487 2363 rc = acpi_nfit_register_dimms(acpi_desc); 2488 2364 if (rc) ··· 2545 2425 */ 2546 2426 if (work_busy(&acpi_desc->work)) 2547 2427 return -EBUSY; 2428 + 2429 + return 0; 2430 + } 2431 + 2432 + static int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc) 2433 + { 2434 + struct device *dev = acpi_desc->dev; 2435 + struct nfit_spa *nfit_spa; 2436 + 2437 + if (work_busy(&acpi_desc->work)) 2438 + return -EBUSY; 2439 + 2440 + if (acpi_desc->cancel) 2441 + return 0; 2442 + 2443 + mutex_lock(&acpi_desc->init_mutex); 2444 + list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { 2445 + struct acpi_nfit_system_address *spa = nfit_spa->spa; 2446 + 2447 + if (nfit_spa_type(spa) != NFIT_SPA_PM) 2448 + continue; 2449 + 2450 + nfit_spa->ars_required = 1; 2451 + } 2452 + queue_work(nfit_wq, &acpi_desc->work); 2453 + dev_dbg(dev, "%s: ars_scan triggered\n", __func__); 2454 + mutex_unlock(&acpi_desc->init_mutex); 2548 2455 2549 2456 return 0; 2550 2457 }
+3 -1
drivers/acpi/nfit.h
··· 80 80 struct nfit_spa { 81 81 struct list_head list; 82 82 struct nd_region *nd_region; 83 - unsigned int ars_done:1; 83 + unsigned int ars_required:1; 84 84 u32 clear_err_unit; 85 85 u32 max_ars; 86 86 struct acpi_nfit_system_address spa[0]; ··· 148 148 struct nd_cmd_ars_status *ars_status; 149 149 size_t ars_status_size; 150 150 struct work_struct work; 151 + struct kernfs_node *scrub_count_state; 152 + unsigned int scrub_count; 151 153 unsigned int cancel:1; 152 154 unsigned long dimm_cmd_force_en; 153 155 unsigned long bus_cmd_force_en;
+7
drivers/nvdimm/core.c
··· 201 201 } 202 202 EXPORT_SYMBOL_GPL(to_nd_desc); 203 203 204 + struct device *to_nvdimm_bus_dev(struct nvdimm_bus *nvdimm_bus) 205 + { 206 + /* struct nvdimm_bus definition is private to libnvdimm */ 207 + return &nvdimm_bus->dev; 208 + } 209 + EXPORT_SYMBOL_GPL(to_nvdimm_bus_dev); 210 + 204 211 static bool is_uuid_sep(char sep) 205 212 { 206 213 if (sep == '\n' || sep == '-' || sep == ':' || sep == '\0')
+1
include/linux/libnvdimm.h
··· 137 137 struct nd_region *to_nd_region(struct device *dev); 138 138 struct nd_blk_region *to_nd_blk_region(struct device *dev); 139 139 struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus); 140 + struct device *to_nvdimm_bus_dev(struct nvdimm_bus *nvdimm_bus); 140 141 const char *nvdimm_name(struct nvdimm *nvdimm); 141 142 unsigned long nvdimm_cmd_mask(struct nvdimm *nvdimm); 142 143 void *nvdimm_provider_data(struct nvdimm *nvdimm);