Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v4.20-rc5 675 lines 18 kB view raw
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Code extracted from drivers/block/genhd.c 4 * Copyright (C) 1991-1998 Linus Torvalds 5 * Re-organised Feb 1998 Russell King 6 * 7 * We now have independent partition support from the 8 * block drivers, which allows all the partition code to 9 * be grouped in one location, and it to be mostly self 10 * contained. 11 */ 12 13#include <linux/init.h> 14#include <linux/module.h> 15#include <linux/fs.h> 16#include <linux/slab.h> 17#include <linux/kmod.h> 18#include <linux/ctype.h> 19#include <linux/genhd.h> 20#include <linux/blktrace_api.h> 21 22#include "partitions/check.h" 23 24#ifdef CONFIG_BLK_DEV_MD 25extern void md_autodetect_dev(dev_t dev); 26#endif 27 28/* 29 * disk_name() is used by partition check code and the genhd driver. 30 * It formats the devicename of the indicated disk into 31 * the supplied buffer (of size at least 32), and returns 32 * a pointer to that same buffer (for convenience). 33 */ 34 35char *disk_name(struct gendisk *hd, int partno, char *buf) 36{ 37 if (!partno) 38 snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name); 39 else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1])) 40 snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno); 41 else 42 snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno); 43 44 return buf; 45} 46 47const char *bdevname(struct block_device *bdev, char *buf) 48{ 49 return disk_name(bdev->bd_disk, bdev->bd_part->partno, buf); 50} 51 52EXPORT_SYMBOL(bdevname); 53 54const char *bio_devname(struct bio *bio, char *buf) 55{ 56 return disk_name(bio->bi_disk, bio->bi_partno, buf); 57} 58EXPORT_SYMBOL(bio_devname); 59 60/* 61 * There's very little reason to use this, you should really 62 * have a struct block_device just about everywhere and use 63 * bdevname() instead. 64 */ 65const char *__bdevname(dev_t dev, char *buffer) 66{ 67 scnprintf(buffer, BDEVNAME_SIZE, "unknown-block(%u,%u)", 68 MAJOR(dev), MINOR(dev)); 69 return buffer; 70} 71 72EXPORT_SYMBOL(__bdevname); 73 74static ssize_t part_partition_show(struct device *dev, 75 struct device_attribute *attr, char *buf) 76{ 77 struct hd_struct *p = dev_to_part(dev); 78 79 return sprintf(buf, "%d\n", p->partno); 80} 81 82static ssize_t part_start_show(struct device *dev, 83 struct device_attribute *attr, char *buf) 84{ 85 struct hd_struct *p = dev_to_part(dev); 86 87 return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect); 88} 89 90ssize_t part_size_show(struct device *dev, 91 struct device_attribute *attr, char *buf) 92{ 93 struct hd_struct *p = dev_to_part(dev); 94 return sprintf(buf, "%llu\n",(unsigned long long)part_nr_sects_read(p)); 95} 96 97static ssize_t part_ro_show(struct device *dev, 98 struct device_attribute *attr, char *buf) 99{ 100 struct hd_struct *p = dev_to_part(dev); 101 return sprintf(buf, "%d\n", p->policy ? 1 : 0); 102} 103 104static ssize_t part_alignment_offset_show(struct device *dev, 105 struct device_attribute *attr, char *buf) 106{ 107 struct hd_struct *p = dev_to_part(dev); 108 return sprintf(buf, "%llu\n", (unsigned long long)p->alignment_offset); 109} 110 111static ssize_t part_discard_alignment_show(struct device *dev, 112 struct device_attribute *attr, char *buf) 113{ 114 struct hd_struct *p = dev_to_part(dev); 115 return sprintf(buf, "%u\n", p->discard_alignment); 116} 117 118ssize_t part_stat_show(struct device *dev, 119 struct device_attribute *attr, char *buf) 120{ 121 struct hd_struct *p = dev_to_part(dev); 122 struct request_queue *q = part_to_disk(p)->queue; 123 unsigned int inflight[2]; 124 int cpu; 125 126 cpu = part_stat_lock(); 127 part_round_stats(q, cpu, p); 128 part_stat_unlock(); 129 part_in_flight(q, p, inflight); 130 return sprintf(buf, 131 "%8lu %8lu %8llu %8u " 132 "%8lu %8lu %8llu %8u " 133 "%8u %8u %8u " 134 "%8lu %8lu %8llu %8u" 135 "\n", 136 part_stat_read(p, ios[STAT_READ]), 137 part_stat_read(p, merges[STAT_READ]), 138 (unsigned long long)part_stat_read(p, sectors[STAT_READ]), 139 (unsigned int)part_stat_read_msecs(p, STAT_READ), 140 part_stat_read(p, ios[STAT_WRITE]), 141 part_stat_read(p, merges[STAT_WRITE]), 142 (unsigned long long)part_stat_read(p, sectors[STAT_WRITE]), 143 (unsigned int)part_stat_read_msecs(p, STAT_WRITE), 144 inflight[0], 145 jiffies_to_msecs(part_stat_read(p, io_ticks)), 146 jiffies_to_msecs(part_stat_read(p, time_in_queue)), 147 part_stat_read(p, ios[STAT_DISCARD]), 148 part_stat_read(p, merges[STAT_DISCARD]), 149 (unsigned long long)part_stat_read(p, sectors[STAT_DISCARD]), 150 (unsigned int)part_stat_read_msecs(p, STAT_DISCARD)); 151} 152 153ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr, 154 char *buf) 155{ 156 struct hd_struct *p = dev_to_part(dev); 157 struct request_queue *q = part_to_disk(p)->queue; 158 unsigned int inflight[2]; 159 160 part_in_flight_rw(q, p, inflight); 161 return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]); 162} 163 164#ifdef CONFIG_FAIL_MAKE_REQUEST 165ssize_t part_fail_show(struct device *dev, 166 struct device_attribute *attr, char *buf) 167{ 168 struct hd_struct *p = dev_to_part(dev); 169 170 return sprintf(buf, "%d\n", p->make_it_fail); 171} 172 173ssize_t part_fail_store(struct device *dev, 174 struct device_attribute *attr, 175 const char *buf, size_t count) 176{ 177 struct hd_struct *p = dev_to_part(dev); 178 int i; 179 180 if (count > 0 && sscanf(buf, "%d", &i) > 0) 181 p->make_it_fail = (i == 0) ? 0 : 1; 182 183 return count; 184} 185#endif 186 187static DEVICE_ATTR(partition, 0444, part_partition_show, NULL); 188static DEVICE_ATTR(start, 0444, part_start_show, NULL); 189static DEVICE_ATTR(size, 0444, part_size_show, NULL); 190static DEVICE_ATTR(ro, 0444, part_ro_show, NULL); 191static DEVICE_ATTR(alignment_offset, 0444, part_alignment_offset_show, NULL); 192static DEVICE_ATTR(discard_alignment, 0444, part_discard_alignment_show, NULL); 193static DEVICE_ATTR(stat, 0444, part_stat_show, NULL); 194static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL); 195#ifdef CONFIG_FAIL_MAKE_REQUEST 196static struct device_attribute dev_attr_fail = 197 __ATTR(make-it-fail, 0644, part_fail_show, part_fail_store); 198#endif 199 200static struct attribute *part_attrs[] = { 201 &dev_attr_partition.attr, 202 &dev_attr_start.attr, 203 &dev_attr_size.attr, 204 &dev_attr_ro.attr, 205 &dev_attr_alignment_offset.attr, 206 &dev_attr_discard_alignment.attr, 207 &dev_attr_stat.attr, 208 &dev_attr_inflight.attr, 209#ifdef CONFIG_FAIL_MAKE_REQUEST 210 &dev_attr_fail.attr, 211#endif 212 NULL 213}; 214 215static struct attribute_group part_attr_group = { 216 .attrs = part_attrs, 217}; 218 219static const struct attribute_group *part_attr_groups[] = { 220 &part_attr_group, 221#ifdef CONFIG_BLK_DEV_IO_TRACE 222 &blk_trace_attr_group, 223#endif 224 NULL 225}; 226 227static void part_release(struct device *dev) 228{ 229 struct hd_struct *p = dev_to_part(dev); 230 blk_free_devt(dev->devt); 231 hd_free_part(p); 232 kfree(p); 233} 234 235static int part_uevent(struct device *dev, struct kobj_uevent_env *env) 236{ 237 struct hd_struct *part = dev_to_part(dev); 238 239 add_uevent_var(env, "PARTN=%u", part->partno); 240 if (part->info && part->info->volname[0]) 241 add_uevent_var(env, "PARTNAME=%s", part->info->volname); 242 return 0; 243} 244 245struct device_type part_type = { 246 .name = "partition", 247 .groups = part_attr_groups, 248 .release = part_release, 249 .uevent = part_uevent, 250}; 251 252static void delete_partition_rcu_cb(struct rcu_head *head) 253{ 254 struct hd_struct *part = container_of(head, struct hd_struct, rcu_head); 255 256 part->start_sect = 0; 257 part->nr_sects = 0; 258 part_stat_set_all(part, 0); 259 put_device(part_to_dev(part)); 260} 261 262void __delete_partition(struct percpu_ref *ref) 263{ 264 struct hd_struct *part = container_of(ref, struct hd_struct, ref); 265 call_rcu(&part->rcu_head, delete_partition_rcu_cb); 266} 267 268/* 269 * Must be called either with bd_mutex held, before a disk can be opened or 270 * after all disk users are gone. 271 */ 272void delete_partition(struct gendisk *disk, int partno) 273{ 274 struct disk_part_tbl *ptbl = 275 rcu_dereference_protected(disk->part_tbl, 1); 276 struct hd_struct *part; 277 278 if (partno >= ptbl->len) 279 return; 280 281 part = rcu_dereference_protected(ptbl->part[partno], 1); 282 if (!part) 283 return; 284 285 rcu_assign_pointer(ptbl->part[partno], NULL); 286 rcu_assign_pointer(ptbl->last_lookup, NULL); 287 kobject_put(part->holder_dir); 288 device_del(part_to_dev(part)); 289 290 hd_struct_kill(part); 291} 292 293static ssize_t whole_disk_show(struct device *dev, 294 struct device_attribute *attr, char *buf) 295{ 296 return 0; 297} 298static DEVICE_ATTR(whole_disk, 0444, whole_disk_show, NULL); 299 300/* 301 * Must be called either with bd_mutex held, before a disk can be opened or 302 * after all disk users are gone. 303 */ 304struct hd_struct *add_partition(struct gendisk *disk, int partno, 305 sector_t start, sector_t len, int flags, 306 struct partition_meta_info *info) 307{ 308 struct hd_struct *p; 309 dev_t devt = MKDEV(0, 0); 310 struct device *ddev = disk_to_dev(disk); 311 struct device *pdev; 312 struct disk_part_tbl *ptbl; 313 const char *dname; 314 int err; 315 316 err = disk_expand_part_tbl(disk, partno); 317 if (err) 318 return ERR_PTR(err); 319 ptbl = rcu_dereference_protected(disk->part_tbl, 1); 320 321 if (ptbl->part[partno]) 322 return ERR_PTR(-EBUSY); 323 324 p = kzalloc(sizeof(*p), GFP_KERNEL); 325 if (!p) 326 return ERR_PTR(-EBUSY); 327 328 if (!init_part_stats(p)) { 329 err = -ENOMEM; 330 goto out_free; 331 } 332 333 seqcount_init(&p->nr_sects_seq); 334 pdev = part_to_dev(p); 335 336 p->start_sect = start; 337 p->alignment_offset = 338 queue_limit_alignment_offset(&disk->queue->limits, start); 339 p->discard_alignment = 340 queue_limit_discard_alignment(&disk->queue->limits, start); 341 p->nr_sects = len; 342 p->partno = partno; 343 p->policy = get_disk_ro(disk); 344 345 if (info) { 346 struct partition_meta_info *pinfo = alloc_part_info(disk); 347 if (!pinfo) { 348 err = -ENOMEM; 349 goto out_free_stats; 350 } 351 memcpy(pinfo, info, sizeof(*info)); 352 p->info = pinfo; 353 } 354 355 dname = dev_name(ddev); 356 if (isdigit(dname[strlen(dname) - 1])) 357 dev_set_name(pdev, "%sp%d", dname, partno); 358 else 359 dev_set_name(pdev, "%s%d", dname, partno); 360 361 device_initialize(pdev); 362 pdev->class = &block_class; 363 pdev->type = &part_type; 364 pdev->parent = ddev; 365 366 err = blk_alloc_devt(p, &devt); 367 if (err) 368 goto out_free_info; 369 pdev->devt = devt; 370 371 /* delay uevent until 'holders' subdir is created */ 372 dev_set_uevent_suppress(pdev, 1); 373 err = device_add(pdev); 374 if (err) 375 goto out_put; 376 377 err = -ENOMEM; 378 p->holder_dir = kobject_create_and_add("holders", &pdev->kobj); 379 if (!p->holder_dir) 380 goto out_del; 381 382 dev_set_uevent_suppress(pdev, 0); 383 if (flags & ADDPART_FLAG_WHOLEDISK) { 384 err = device_create_file(pdev, &dev_attr_whole_disk); 385 if (err) 386 goto out_del; 387 } 388 389 err = hd_ref_init(p); 390 if (err) { 391 if (flags & ADDPART_FLAG_WHOLEDISK) 392 goto out_remove_file; 393 goto out_del; 394 } 395 396 /* everything is up and running, commence */ 397 rcu_assign_pointer(ptbl->part[partno], p); 398 399 /* suppress uevent if the disk suppresses it */ 400 if (!dev_get_uevent_suppress(ddev)) 401 kobject_uevent(&pdev->kobj, KOBJ_ADD); 402 return p; 403 404out_free_info: 405 free_part_info(p); 406out_free_stats: 407 free_part_stats(p); 408out_free: 409 kfree(p); 410 return ERR_PTR(err); 411out_remove_file: 412 device_remove_file(pdev, &dev_attr_whole_disk); 413out_del: 414 kobject_put(p->holder_dir); 415 device_del(pdev); 416out_put: 417 put_device(pdev); 418 return ERR_PTR(err); 419} 420 421static bool disk_unlock_native_capacity(struct gendisk *disk) 422{ 423 const struct block_device_operations *bdops = disk->fops; 424 425 if (bdops->unlock_native_capacity && 426 !(disk->flags & GENHD_FL_NATIVE_CAPACITY)) { 427 printk(KERN_CONT "enabling native capacity\n"); 428 bdops->unlock_native_capacity(disk); 429 disk->flags |= GENHD_FL_NATIVE_CAPACITY; 430 return true; 431 } else { 432 printk(KERN_CONT "truncated\n"); 433 return false; 434 } 435} 436 437static int drop_partitions(struct gendisk *disk, struct block_device *bdev) 438{ 439 struct disk_part_iter piter; 440 struct hd_struct *part; 441 int res; 442 443 if (bdev->bd_part_count || bdev->bd_super) 444 return -EBUSY; 445 res = invalidate_partition(disk, 0); 446 if (res) 447 return res; 448 449 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); 450 while ((part = disk_part_iter_next(&piter))) 451 delete_partition(disk, part->partno); 452 disk_part_iter_exit(&piter); 453 454 return 0; 455} 456 457static bool part_zone_aligned(struct gendisk *disk, 458 struct block_device *bdev, 459 sector_t from, sector_t size) 460{ 461 unsigned int zone_sectors = bdev_zone_sectors(bdev); 462 463 /* 464 * If this function is called, then the disk is a zoned block device 465 * (host-aware or host-managed). This can be detected even if the 466 * zoned block device support is disabled (CONFIG_BLK_DEV_ZONED not 467 * set). In this case, however, only host-aware devices will be seen 468 * as a block device is not created for host-managed devices. Without 469 * zoned block device support, host-aware drives can still be used as 470 * regular block devices (no zone operation) and their zone size will 471 * be reported as 0. Allow this case. 472 */ 473 if (!zone_sectors) 474 return true; 475 476 /* 477 * Check partition start and size alignement. If the drive has a 478 * smaller last runt zone, ignore it and allow the partition to 479 * use it. Check the zone size too: it should be a power of 2 number 480 * of sectors. 481 */ 482 if (WARN_ON_ONCE(!is_power_of_2(zone_sectors))) { 483 u32 rem; 484 485 div_u64_rem(from, zone_sectors, &rem); 486 if (rem) 487 return false; 488 if ((from + size) < get_capacity(disk)) { 489 div_u64_rem(size, zone_sectors, &rem); 490 if (rem) 491 return false; 492 } 493 494 } else { 495 496 if (from & (zone_sectors - 1)) 497 return false; 498 if ((from + size) < get_capacity(disk) && 499 (size & (zone_sectors - 1))) 500 return false; 501 502 } 503 504 return true; 505} 506 507int rescan_partitions(struct gendisk *disk, struct block_device *bdev) 508{ 509 struct parsed_partitions *state = NULL; 510 struct hd_struct *part; 511 int p, highest, res; 512rescan: 513 if (state && !IS_ERR(state)) { 514 free_partitions(state); 515 state = NULL; 516 } 517 518 res = drop_partitions(disk, bdev); 519 if (res) 520 return res; 521 522 if (disk->fops->revalidate_disk) 523 disk->fops->revalidate_disk(disk); 524 check_disk_size_change(disk, bdev, true); 525 bdev->bd_invalidated = 0; 526 if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) 527 return 0; 528 if (IS_ERR(state)) { 529 /* 530 * I/O error reading the partition table. If any 531 * partition code tried to read beyond EOD, retry 532 * after unlocking native capacity. 533 */ 534 if (PTR_ERR(state) == -ENOSPC) { 535 printk(KERN_WARNING "%s: partition table beyond EOD, ", 536 disk->disk_name); 537 if (disk_unlock_native_capacity(disk)) 538 goto rescan; 539 } 540 return -EIO; 541 } 542 /* 543 * If any partition code tried to read beyond EOD, try 544 * unlocking native capacity even if partition table is 545 * successfully read as we could be missing some partitions. 546 */ 547 if (state->access_beyond_eod) { 548 printk(KERN_WARNING 549 "%s: partition table partially beyond EOD, ", 550 disk->disk_name); 551 if (disk_unlock_native_capacity(disk)) 552 goto rescan; 553 } 554 555 /* tell userspace that the media / partition table may have changed */ 556 kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE); 557 558 /* Detect the highest partition number and preallocate 559 * disk->part_tbl. This is an optimization and not strictly 560 * necessary. 561 */ 562 for (p = 1, highest = 0; p < state->limit; p++) 563 if (state->parts[p].size) 564 highest = p; 565 566 disk_expand_part_tbl(disk, highest); 567 568 /* add partitions */ 569 for (p = 1; p < state->limit; p++) { 570 sector_t size, from; 571 572 size = state->parts[p].size; 573 if (!size) 574 continue; 575 576 from = state->parts[p].from; 577 if (from >= get_capacity(disk)) { 578 printk(KERN_WARNING 579 "%s: p%d start %llu is beyond EOD, ", 580 disk->disk_name, p, (unsigned long long) from); 581 if (disk_unlock_native_capacity(disk)) 582 goto rescan; 583 continue; 584 } 585 586 if (from + size > get_capacity(disk)) { 587 printk(KERN_WARNING 588 "%s: p%d size %llu extends beyond EOD, ", 589 disk->disk_name, p, (unsigned long long) size); 590 591 if (disk_unlock_native_capacity(disk)) { 592 /* free state and restart */ 593 goto rescan; 594 } else { 595 /* 596 * we can not ignore partitions of broken tables 597 * created by for example camera firmware, but 598 * we limit them to the end of the disk to avoid 599 * creating invalid block devices 600 */ 601 size = get_capacity(disk) - from; 602 } 603 } 604 605 /* 606 * On a zoned block device, partitions should be aligned on the 607 * device zone size (i.e. zone boundary crossing not allowed). 608 * Otherwise, resetting the write pointer of the last zone of 609 * one partition may impact the following partition. 610 */ 611 if (bdev_is_zoned(bdev) && 612 !part_zone_aligned(disk, bdev, from, size)) { 613 printk(KERN_WARNING 614 "%s: p%d start %llu+%llu is not zone aligned\n", 615 disk->disk_name, p, (unsigned long long) from, 616 (unsigned long long) size); 617 continue; 618 } 619 620 part = add_partition(disk, p, from, size, 621 state->parts[p].flags, 622 &state->parts[p].info); 623 if (IS_ERR(part)) { 624 printk(KERN_ERR " %s: p%d could not be added: %ld\n", 625 disk->disk_name, p, -PTR_ERR(part)); 626 continue; 627 } 628#ifdef CONFIG_BLK_DEV_MD 629 if (state->parts[p].flags & ADDPART_FLAG_RAID) 630 md_autodetect_dev(part_to_dev(part)->devt); 631#endif 632 } 633 free_partitions(state); 634 return 0; 635} 636 637int invalidate_partitions(struct gendisk *disk, struct block_device *bdev) 638{ 639 int res; 640 641 if (!bdev->bd_invalidated) 642 return 0; 643 644 res = drop_partitions(disk, bdev); 645 if (res) 646 return res; 647 648 set_capacity(disk, 0); 649 check_disk_size_change(disk, bdev, false); 650 bdev->bd_invalidated = 0; 651 /* tell userspace that the media / partition table may have changed */ 652 kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE); 653 654 return 0; 655} 656 657unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p) 658{ 659 struct address_space *mapping = bdev->bd_inode->i_mapping; 660 struct page *page; 661 662 page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_SHIFT-9)), NULL); 663 if (!IS_ERR(page)) { 664 if (PageError(page)) 665 goto fail; 666 p->v = page; 667 return (unsigned char *)page_address(page) + ((n & ((1 << (PAGE_SHIFT - 9)) - 1)) << 9); 668fail: 669 put_page(page); 670 } 671 p->v = NULL; 672 return NULL; 673} 674 675EXPORT_SYMBOL(read_dev_sector);