Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at v4.7 659 lines 16 kB view raw
1/* 2 * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of version 2 of the GNU General Public License as 6 * published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, but 9 * WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 */ 13#include <linux/libnvdimm.h> 14#include <linux/badblocks.h> 15#include <linux/export.h> 16#include <linux/module.h> 17#include <linux/blkdev.h> 18#include <linux/device.h> 19#include <linux/ctype.h> 20#include <linux/ndctl.h> 21#include <linux/mutex.h> 22#include <linux/slab.h> 23#include "nd-core.h" 24#include "nd.h" 25 26LIST_HEAD(nvdimm_bus_list); 27DEFINE_MUTEX(nvdimm_bus_list_mutex); 28static DEFINE_IDA(nd_ida); 29 30void nvdimm_bus_lock(struct device *dev) 31{ 32 struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); 33 34 if (!nvdimm_bus) 35 return; 36 mutex_lock(&nvdimm_bus->reconfig_mutex); 37} 38EXPORT_SYMBOL(nvdimm_bus_lock); 39 40void nvdimm_bus_unlock(struct device *dev) 41{ 42 struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); 43 44 if (!nvdimm_bus) 45 return; 46 mutex_unlock(&nvdimm_bus->reconfig_mutex); 47} 48EXPORT_SYMBOL(nvdimm_bus_unlock); 49 50bool is_nvdimm_bus_locked(struct device *dev) 51{ 52 struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); 53 54 if (!nvdimm_bus) 55 return false; 56 return mutex_is_locked(&nvdimm_bus->reconfig_mutex); 57} 58EXPORT_SYMBOL(is_nvdimm_bus_locked); 59 60u64 nd_fletcher64(void *addr, size_t len, bool le) 61{ 62 u32 *buf = addr; 63 u32 lo32 = 0; 64 u64 hi32 = 0; 65 int i; 66 67 for (i = 0; i < len / sizeof(u32); i++) { 68 lo32 += le ? le32_to_cpu((__le32) buf[i]) : buf[i]; 69 hi32 += lo32; 70 } 71 72 return hi32 << 32 | lo32; 73} 74EXPORT_SYMBOL_GPL(nd_fletcher64); 75 76static void nvdimm_bus_release(struct device *dev) 77{ 78 struct nvdimm_bus *nvdimm_bus; 79 80 nvdimm_bus = container_of(dev, struct nvdimm_bus, dev); 81 ida_simple_remove(&nd_ida, nvdimm_bus->id); 82 kfree(nvdimm_bus); 83} 84 85struct nvdimm_bus *to_nvdimm_bus(struct device *dev) 86{ 87 struct nvdimm_bus *nvdimm_bus; 88 89 nvdimm_bus = container_of(dev, struct nvdimm_bus, dev); 90 WARN_ON(nvdimm_bus->dev.release != nvdimm_bus_release); 91 return nvdimm_bus; 92} 93EXPORT_SYMBOL_GPL(to_nvdimm_bus); 94 95struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus) 96{ 97 /* struct nvdimm_bus definition is private to libnvdimm */ 98 return nvdimm_bus->nd_desc; 99} 100EXPORT_SYMBOL_GPL(to_nd_desc); 101 102struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev) 103{ 104 struct device *dev; 105 106 for (dev = nd_dev; dev; dev = dev->parent) 107 if (dev->release == nvdimm_bus_release) 108 break; 109 dev_WARN_ONCE(nd_dev, !dev, "invalid dev, not on nd bus\n"); 110 if (dev) 111 return to_nvdimm_bus(dev); 112 return NULL; 113} 114 115static bool is_uuid_sep(char sep) 116{ 117 if (sep == '\n' || sep == '-' || sep == ':' || sep == '\0') 118 return true; 119 return false; 120} 121 122static int nd_uuid_parse(struct device *dev, u8 *uuid_out, const char *buf, 123 size_t len) 124{ 125 const char *str = buf; 126 u8 uuid[16]; 127 int i; 128 129 for (i = 0; i < 16; i++) { 130 if (!isxdigit(str[0]) || !isxdigit(str[1])) { 131 dev_dbg(dev, "%s: pos: %d buf[%zd]: %c buf[%zd]: %c\n", 132 __func__, i, str - buf, str[0], 133 str + 1 - buf, str[1]); 134 return -EINVAL; 135 } 136 137 uuid[i] = (hex_to_bin(str[0]) << 4) | hex_to_bin(str[1]); 138 str += 2; 139 if (is_uuid_sep(*str)) 140 str++; 141 } 142 143 memcpy(uuid_out, uuid, sizeof(uuid)); 144 return 0; 145} 146 147/** 148 * nd_uuid_store: common implementation for writing 'uuid' sysfs attributes 149 * @dev: container device for the uuid property 150 * @uuid_out: uuid buffer to replace 151 * @buf: raw sysfs buffer to parse 152 * 153 * Enforce that uuids can only be changed while the device is disabled 154 * (driver detached) 155 * LOCKING: expects device_lock() is held on entry 156 */ 157int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf, 158 size_t len) 159{ 160 u8 uuid[16]; 161 int rc; 162 163 if (dev->driver) 164 return -EBUSY; 165 166 rc = nd_uuid_parse(dev, uuid, buf, len); 167 if (rc) 168 return rc; 169 170 kfree(*uuid_out); 171 *uuid_out = kmemdup(uuid, sizeof(uuid), GFP_KERNEL); 172 if (!(*uuid_out)) 173 return -ENOMEM; 174 175 return 0; 176} 177 178ssize_t nd_sector_size_show(unsigned long current_lbasize, 179 const unsigned long *supported, char *buf) 180{ 181 ssize_t len = 0; 182 int i; 183 184 for (i = 0; supported[i]; i++) 185 if (current_lbasize == supported[i]) 186 len += sprintf(buf + len, "[%ld] ", supported[i]); 187 else 188 len += sprintf(buf + len, "%ld ", supported[i]); 189 len += sprintf(buf + len, "\n"); 190 return len; 191} 192 193ssize_t nd_sector_size_store(struct device *dev, const char *buf, 194 unsigned long *current_lbasize, const unsigned long *supported) 195{ 196 unsigned long lbasize; 197 int rc, i; 198 199 if (dev->driver) 200 return -EBUSY; 201 202 rc = kstrtoul(buf, 0, &lbasize); 203 if (rc) 204 return rc; 205 206 for (i = 0; supported[i]; i++) 207 if (lbasize == supported[i]) 208 break; 209 210 if (supported[i]) { 211 *current_lbasize = lbasize; 212 return 0; 213 } else { 214 return -EINVAL; 215 } 216} 217 218void __nd_iostat_start(struct bio *bio, unsigned long *start) 219{ 220 struct gendisk *disk = bio->bi_bdev->bd_disk; 221 const int rw = bio_data_dir(bio); 222 int cpu = part_stat_lock(); 223 224 *start = jiffies; 225 part_round_stats(cpu, &disk->part0); 226 part_stat_inc(cpu, &disk->part0, ios[rw]); 227 part_stat_add(cpu, &disk->part0, sectors[rw], bio_sectors(bio)); 228 part_inc_in_flight(&disk->part0, rw); 229 part_stat_unlock(); 230} 231EXPORT_SYMBOL(__nd_iostat_start); 232 233void nd_iostat_end(struct bio *bio, unsigned long start) 234{ 235 struct gendisk *disk = bio->bi_bdev->bd_disk; 236 unsigned long duration = jiffies - start; 237 const int rw = bio_data_dir(bio); 238 int cpu = part_stat_lock(); 239 240 part_stat_add(cpu, &disk->part0, ticks[rw], duration); 241 part_round_stats(cpu, &disk->part0); 242 part_dec_in_flight(&disk->part0, rw); 243 part_stat_unlock(); 244} 245EXPORT_SYMBOL(nd_iostat_end); 246 247static ssize_t commands_show(struct device *dev, 248 struct device_attribute *attr, char *buf) 249{ 250 int cmd, len = 0; 251 struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev); 252 struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; 253 254 for_each_set_bit(cmd, &nd_desc->cmd_mask, BITS_PER_LONG) 255 len += sprintf(buf + len, "%s ", nvdimm_bus_cmd_name(cmd)); 256 len += sprintf(buf + len, "\n"); 257 return len; 258} 259static DEVICE_ATTR_RO(commands); 260 261static const char *nvdimm_bus_provider(struct nvdimm_bus *nvdimm_bus) 262{ 263 struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; 264 struct device *parent = nvdimm_bus->dev.parent; 265 266 if (nd_desc->provider_name) 267 return nd_desc->provider_name; 268 else if (parent) 269 return dev_name(parent); 270 else 271 return "unknown"; 272} 273 274static ssize_t provider_show(struct device *dev, 275 struct device_attribute *attr, char *buf) 276{ 277 struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev); 278 279 return sprintf(buf, "%s\n", nvdimm_bus_provider(nvdimm_bus)); 280} 281static DEVICE_ATTR_RO(provider); 282 283static int flush_namespaces(struct device *dev, void *data) 284{ 285 device_lock(dev); 286 device_unlock(dev); 287 return 0; 288} 289 290static int flush_regions_dimms(struct device *dev, void *data) 291{ 292 device_lock(dev); 293 device_unlock(dev); 294 device_for_each_child(dev, NULL, flush_namespaces); 295 return 0; 296} 297 298static ssize_t wait_probe_show(struct device *dev, 299 struct device_attribute *attr, char *buf) 300{ 301 struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev); 302 struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; 303 int rc; 304 305 if (nd_desc->flush_probe) { 306 rc = nd_desc->flush_probe(nd_desc); 307 if (rc) 308 return rc; 309 } 310 nd_synchronize(); 311 device_for_each_child(dev, NULL, flush_regions_dimms); 312 return sprintf(buf, "1\n"); 313} 314static DEVICE_ATTR_RO(wait_probe); 315 316static struct attribute *nvdimm_bus_attributes[] = { 317 &dev_attr_commands.attr, 318 &dev_attr_wait_probe.attr, 319 &dev_attr_provider.attr, 320 NULL, 321}; 322 323struct attribute_group nvdimm_bus_attribute_group = { 324 .attrs = nvdimm_bus_attributes, 325}; 326EXPORT_SYMBOL_GPL(nvdimm_bus_attribute_group); 327 328struct nvdimm_bus *__nvdimm_bus_register(struct device *parent, 329 struct nvdimm_bus_descriptor *nd_desc, struct module *module) 330{ 331 struct nvdimm_bus *nvdimm_bus; 332 int rc; 333 334 nvdimm_bus = kzalloc(sizeof(*nvdimm_bus), GFP_KERNEL); 335 if (!nvdimm_bus) 336 return NULL; 337 INIT_LIST_HEAD(&nvdimm_bus->list); 338 INIT_LIST_HEAD(&nvdimm_bus->poison_list); 339 init_waitqueue_head(&nvdimm_bus->probe_wait); 340 nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL); 341 mutex_init(&nvdimm_bus->reconfig_mutex); 342 if (nvdimm_bus->id < 0) { 343 kfree(nvdimm_bus); 344 return NULL; 345 } 346 nvdimm_bus->nd_desc = nd_desc; 347 nvdimm_bus->module = module; 348 nvdimm_bus->dev.parent = parent; 349 nvdimm_bus->dev.release = nvdimm_bus_release; 350 nvdimm_bus->dev.groups = nd_desc->attr_groups; 351 dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id); 352 rc = device_register(&nvdimm_bus->dev); 353 if (rc) { 354 dev_dbg(&nvdimm_bus->dev, "registration failed: %d\n", rc); 355 goto err; 356 } 357 358 rc = nvdimm_bus_create_ndctl(nvdimm_bus); 359 if (rc) 360 goto err; 361 362 mutex_lock(&nvdimm_bus_list_mutex); 363 list_add_tail(&nvdimm_bus->list, &nvdimm_bus_list); 364 mutex_unlock(&nvdimm_bus_list_mutex); 365 366 return nvdimm_bus; 367 err: 368 put_device(&nvdimm_bus->dev); 369 return NULL; 370} 371EXPORT_SYMBOL_GPL(__nvdimm_bus_register); 372 373static void set_badblock(struct badblocks *bb, sector_t s, int num) 374{ 375 dev_dbg(bb->dev, "Found a poison range (0x%llx, 0x%llx)\n", 376 (u64) s * 512, (u64) num * 512); 377 /* this isn't an error as the hardware will still throw an exception */ 378 if (badblocks_set(bb, s, num, 1)) 379 dev_info_once(bb->dev, "%s: failed for sector %llx\n", 380 __func__, (u64) s); 381} 382 383/** 384 * __add_badblock_range() - Convert a physical address range to bad sectors 385 * @bb: badblocks instance to populate 386 * @ns_offset: namespace offset where the error range begins (in bytes) 387 * @len: number of bytes of poison to be added 388 * 389 * This assumes that the range provided with (ns_offset, len) is within 390 * the bounds of physical addresses for this namespace, i.e. lies in the 391 * interval [ns_start, ns_start + ns_size) 392 */ 393static void __add_badblock_range(struct badblocks *bb, u64 ns_offset, u64 len) 394{ 395 const unsigned int sector_size = 512; 396 sector_t start_sector; 397 u64 num_sectors; 398 u32 rem; 399 400 start_sector = div_u64(ns_offset, sector_size); 401 num_sectors = div_u64_rem(len, sector_size, &rem); 402 if (rem) 403 num_sectors++; 404 405 if (unlikely(num_sectors > (u64)INT_MAX)) { 406 u64 remaining = num_sectors; 407 sector_t s = start_sector; 408 409 while (remaining) { 410 int done = min_t(u64, remaining, INT_MAX); 411 412 set_badblock(bb, s, done); 413 remaining -= done; 414 s += done; 415 } 416 } else 417 set_badblock(bb, start_sector, num_sectors); 418} 419 420static void badblocks_populate(struct list_head *poison_list, 421 struct badblocks *bb, const struct resource *res) 422{ 423 struct nd_poison *pl; 424 425 if (list_empty(poison_list)) 426 return; 427 428 list_for_each_entry(pl, poison_list, list) { 429 u64 pl_end = pl->start + pl->length - 1; 430 431 /* Discard intervals with no intersection */ 432 if (pl_end < res->start) 433 continue; 434 if (pl->start > res->end) 435 continue; 436 /* Deal with any overlap after start of the namespace */ 437 if (pl->start >= res->start) { 438 u64 start = pl->start; 439 u64 len; 440 441 if (pl_end <= res->end) 442 len = pl->length; 443 else 444 len = res->start + resource_size(res) 445 - pl->start; 446 __add_badblock_range(bb, start - res->start, len); 447 continue; 448 } 449 /* Deal with overlap for poison starting before the namespace */ 450 if (pl->start < res->start) { 451 u64 len; 452 453 if (pl_end < res->end) 454 len = pl->start + pl->length - res->start; 455 else 456 len = resource_size(res); 457 __add_badblock_range(bb, 0, len); 458 } 459 } 460} 461 462/** 463 * nvdimm_badblocks_populate() - Convert a list of poison ranges to badblocks 464 * @region: parent region of the range to interrogate 465 * @bb: badblocks instance to populate 466 * @res: resource range to consider 467 * 468 * The poison list generated during bus initialization may contain 469 * multiple, possibly overlapping physical address ranges. Compare each 470 * of these ranges to the resource range currently being initialized, 471 * and add badblocks entries for all matching sub-ranges 472 */ 473void nvdimm_badblocks_populate(struct nd_region *nd_region, 474 struct badblocks *bb, const struct resource *res) 475{ 476 struct nvdimm_bus *nvdimm_bus; 477 struct list_head *poison_list; 478 479 if (!is_nd_pmem(&nd_region->dev)) { 480 dev_WARN_ONCE(&nd_region->dev, 1, 481 "%s only valid for pmem regions\n", __func__); 482 return; 483 } 484 nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev); 485 poison_list = &nvdimm_bus->poison_list; 486 487 nvdimm_bus_lock(&nvdimm_bus->dev); 488 badblocks_populate(poison_list, bb, res); 489 nvdimm_bus_unlock(&nvdimm_bus->dev); 490} 491EXPORT_SYMBOL_GPL(nvdimm_badblocks_populate); 492 493static int add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) 494{ 495 struct nd_poison *pl; 496 497 pl = kzalloc(sizeof(*pl), GFP_KERNEL); 498 if (!pl) 499 return -ENOMEM; 500 501 pl->start = addr; 502 pl->length = length; 503 list_add_tail(&pl->list, &nvdimm_bus->poison_list); 504 505 return 0; 506} 507 508static int bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) 509{ 510 struct nd_poison *pl; 511 512 if (list_empty(&nvdimm_bus->poison_list)) 513 return add_poison(nvdimm_bus, addr, length); 514 515 /* 516 * There is a chance this is a duplicate, check for those first. 517 * This will be the common case as ARS_STATUS returns all known 518 * errors in the SPA space, and we can't query it per region 519 */ 520 list_for_each_entry(pl, &nvdimm_bus->poison_list, list) 521 if (pl->start == addr) { 522 /* If length has changed, update this list entry */ 523 if (pl->length != length) 524 pl->length = length; 525 return 0; 526 } 527 528 /* 529 * If not a duplicate or a simple length update, add the entry as is, 530 * as any overlapping ranges will get resolved when the list is consumed 531 * and converted to badblocks 532 */ 533 return add_poison(nvdimm_bus, addr, length); 534} 535 536int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) 537{ 538 int rc; 539 540 nvdimm_bus_lock(&nvdimm_bus->dev); 541 rc = bus_add_poison(nvdimm_bus, addr, length); 542 nvdimm_bus_unlock(&nvdimm_bus->dev); 543 544 return rc; 545} 546EXPORT_SYMBOL_GPL(nvdimm_bus_add_poison); 547 548static void free_poison_list(struct list_head *poison_list) 549{ 550 struct nd_poison *pl, *next; 551 552 list_for_each_entry_safe(pl, next, poison_list, list) { 553 list_del(&pl->list); 554 kfree(pl); 555 } 556 list_del_init(poison_list); 557} 558 559static int child_unregister(struct device *dev, void *data) 560{ 561 /* 562 * the singular ndctl class device per bus needs to be 563 * "device_destroy"ed, so skip it here 564 * 565 * i.e. remove classless children 566 */ 567 if (dev->class) 568 /* pass */; 569 else 570 nd_device_unregister(dev, ND_SYNC); 571 return 0; 572} 573 574void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus) 575{ 576 if (!nvdimm_bus) 577 return; 578 579 mutex_lock(&nvdimm_bus_list_mutex); 580 list_del_init(&nvdimm_bus->list); 581 mutex_unlock(&nvdimm_bus_list_mutex); 582 583 nd_synchronize(); 584 device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister); 585 586 nvdimm_bus_lock(&nvdimm_bus->dev); 587 free_poison_list(&nvdimm_bus->poison_list); 588 nvdimm_bus_unlock(&nvdimm_bus->dev); 589 590 nvdimm_bus_destroy_ndctl(nvdimm_bus); 591 592 device_unregister(&nvdimm_bus->dev); 593} 594EXPORT_SYMBOL_GPL(nvdimm_bus_unregister); 595 596#ifdef CONFIG_BLK_DEV_INTEGRITY 597int nd_integrity_init(struct gendisk *disk, unsigned long meta_size) 598{ 599 struct blk_integrity bi; 600 601 if (meta_size == 0) 602 return 0; 603 604 bi.profile = NULL; 605 bi.tuple_size = meta_size; 606 bi.tag_size = meta_size; 607 608 blk_integrity_register(disk, &bi); 609 blk_queue_max_integrity_segments(disk->queue, 1); 610 611 return 0; 612} 613EXPORT_SYMBOL(nd_integrity_init); 614 615#else /* CONFIG_BLK_DEV_INTEGRITY */ 616int nd_integrity_init(struct gendisk *disk, unsigned long meta_size) 617{ 618 return 0; 619} 620EXPORT_SYMBOL(nd_integrity_init); 621 622#endif 623 624static __init int libnvdimm_init(void) 625{ 626 int rc; 627 628 rc = nvdimm_bus_init(); 629 if (rc) 630 return rc; 631 rc = nvdimm_init(); 632 if (rc) 633 goto err_dimm; 634 rc = nd_region_init(); 635 if (rc) 636 goto err_region; 637 return 0; 638 err_region: 639 nvdimm_exit(); 640 err_dimm: 641 nvdimm_bus_exit(); 642 return rc; 643} 644 645static __exit void libnvdimm_exit(void) 646{ 647 WARN_ON(!list_empty(&nvdimm_bus_list)); 648 nd_region_exit(); 649 nvdimm_exit(); 650 nvdimm_bus_exit(); 651 nd_region_devs_exit(); 652 nvdimm_devs_exit(); 653 ida_destroy(&nd_ida); 654} 655 656MODULE_LICENSE("GPL v2"); 657MODULE_AUTHOR("Intel Corporation"); 658subsys_initcall(libnvdimm_init); 659module_exit(libnvdimm_exit);