Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at v4.4 572 lines 14 kB view raw
1/* 2 * Code extracted from drivers/block/genhd.c 3 * Copyright (C) 1991-1998 Linus Torvalds 4 * Re-organised Feb 1998 Russell King 5 * 6 * We now have independent partition support from the 7 * block drivers, which allows all the partition code to 8 * be grouped in one location, and it to be mostly self 9 * contained. 10 */ 11 12#include <linux/init.h> 13#include <linux/module.h> 14#include <linux/fs.h> 15#include <linux/slab.h> 16#include <linux/kmod.h> 17#include <linux/ctype.h> 18#include <linux/genhd.h> 19#include <linux/blktrace_api.h> 20 21#include "partitions/check.h" 22 23#ifdef CONFIG_BLK_DEV_MD 24extern void md_autodetect_dev(dev_t dev); 25#endif 26 27/* 28 * disk_name() is used by partition check code and the genhd driver. 29 * It formats the devicename of the indicated disk into 30 * the supplied buffer (of size at least 32), and returns 31 * a pointer to that same buffer (for convenience). 32 */ 33 34char *disk_name(struct gendisk *hd, int partno, char *buf) 35{ 36 if (!partno) 37 snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name); 38 else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1])) 39 snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno); 40 else 41 snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno); 42 43 return buf; 44} 45 46const char *bdevname(struct block_device *bdev, char *buf) 47{ 48 return disk_name(bdev->bd_disk, bdev->bd_part->partno, buf); 49} 50 51EXPORT_SYMBOL(bdevname); 52 53/* 54 * There's very little reason to use this, you should really 55 * have a struct block_device just about everywhere and use 56 * bdevname() instead. 57 */ 58const char *__bdevname(dev_t dev, char *buffer) 59{ 60 scnprintf(buffer, BDEVNAME_SIZE, "unknown-block(%u,%u)", 61 MAJOR(dev), MINOR(dev)); 62 return buffer; 63} 64 65EXPORT_SYMBOL(__bdevname); 66 67static ssize_t part_partition_show(struct device *dev, 68 struct device_attribute *attr, char *buf) 69{ 70 struct hd_struct *p = dev_to_part(dev); 71 72 return sprintf(buf, "%d\n", p->partno); 73} 74 75static ssize_t part_start_show(struct device *dev, 76 struct device_attribute *attr, char *buf) 77{ 78 struct hd_struct *p = dev_to_part(dev); 79 80 return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect); 81} 82 83ssize_t part_size_show(struct device *dev, 84 struct device_attribute *attr, char *buf) 85{ 86 struct hd_struct *p = dev_to_part(dev); 87 return sprintf(buf, "%llu\n",(unsigned long long)part_nr_sects_read(p)); 88} 89 90static ssize_t part_ro_show(struct device *dev, 91 struct device_attribute *attr, char *buf) 92{ 93 struct hd_struct *p = dev_to_part(dev); 94 return sprintf(buf, "%d\n", p->policy ? 1 : 0); 95} 96 97static ssize_t part_alignment_offset_show(struct device *dev, 98 struct device_attribute *attr, char *buf) 99{ 100 struct hd_struct *p = dev_to_part(dev); 101 return sprintf(buf, "%llu\n", (unsigned long long)p->alignment_offset); 102} 103 104static ssize_t part_discard_alignment_show(struct device *dev, 105 struct device_attribute *attr, char *buf) 106{ 107 struct hd_struct *p = dev_to_part(dev); 108 return sprintf(buf, "%u\n", p->discard_alignment); 109} 110 111ssize_t part_stat_show(struct device *dev, 112 struct device_attribute *attr, char *buf) 113{ 114 struct hd_struct *p = dev_to_part(dev); 115 int cpu; 116 117 cpu = part_stat_lock(); 118 part_round_stats(cpu, p); 119 part_stat_unlock(); 120 return sprintf(buf, 121 "%8lu %8lu %8llu %8u " 122 "%8lu %8lu %8llu %8u " 123 "%8u %8u %8u" 124 "\n", 125 part_stat_read(p, ios[READ]), 126 part_stat_read(p, merges[READ]), 127 (unsigned long long)part_stat_read(p, sectors[READ]), 128 jiffies_to_msecs(part_stat_read(p, ticks[READ])), 129 part_stat_read(p, ios[WRITE]), 130 part_stat_read(p, merges[WRITE]), 131 (unsigned long long)part_stat_read(p, sectors[WRITE]), 132 jiffies_to_msecs(part_stat_read(p, ticks[WRITE])), 133 part_in_flight(p), 134 jiffies_to_msecs(part_stat_read(p, io_ticks)), 135 jiffies_to_msecs(part_stat_read(p, time_in_queue))); 136} 137 138ssize_t part_inflight_show(struct device *dev, 139 struct device_attribute *attr, char *buf) 140{ 141 struct hd_struct *p = dev_to_part(dev); 142 143 return sprintf(buf, "%8u %8u\n", atomic_read(&p->in_flight[0]), 144 atomic_read(&p->in_flight[1])); 145} 146 147#ifdef CONFIG_FAIL_MAKE_REQUEST 148ssize_t part_fail_show(struct device *dev, 149 struct device_attribute *attr, char *buf) 150{ 151 struct hd_struct *p = dev_to_part(dev); 152 153 return sprintf(buf, "%d\n", p->make_it_fail); 154} 155 156ssize_t part_fail_store(struct device *dev, 157 struct device_attribute *attr, 158 const char *buf, size_t count) 159{ 160 struct hd_struct *p = dev_to_part(dev); 161 int i; 162 163 if (count > 0 && sscanf(buf, "%d", &i) > 0) 164 p->make_it_fail = (i == 0) ? 0 : 1; 165 166 return count; 167} 168#endif 169 170static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL); 171static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL); 172static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); 173static DEVICE_ATTR(ro, S_IRUGO, part_ro_show, NULL); 174static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL); 175static DEVICE_ATTR(discard_alignment, S_IRUGO, part_discard_alignment_show, 176 NULL); 177static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); 178static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL); 179#ifdef CONFIG_FAIL_MAKE_REQUEST 180static struct device_attribute dev_attr_fail = 181 __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store); 182#endif 183 184static struct attribute *part_attrs[] = { 185 &dev_attr_partition.attr, 186 &dev_attr_start.attr, 187 &dev_attr_size.attr, 188 &dev_attr_ro.attr, 189 &dev_attr_alignment_offset.attr, 190 &dev_attr_discard_alignment.attr, 191 &dev_attr_stat.attr, 192 &dev_attr_inflight.attr, 193#ifdef CONFIG_FAIL_MAKE_REQUEST 194 &dev_attr_fail.attr, 195#endif 196 NULL 197}; 198 199static struct attribute_group part_attr_group = { 200 .attrs = part_attrs, 201}; 202 203static const struct attribute_group *part_attr_groups[] = { 204 &part_attr_group, 205#ifdef CONFIG_BLK_DEV_IO_TRACE 206 &blk_trace_attr_group, 207#endif 208 NULL 209}; 210 211static void part_release(struct device *dev) 212{ 213 struct hd_struct *p = dev_to_part(dev); 214 blk_free_devt(dev->devt); 215 hd_free_part(p); 216 kfree(p); 217} 218 219struct device_type part_type = { 220 .name = "partition", 221 .groups = part_attr_groups, 222 .release = part_release, 223}; 224 225static void delete_partition_rcu_cb(struct rcu_head *head) 226{ 227 struct hd_struct *part = container_of(head, struct hd_struct, rcu_head); 228 229 part->start_sect = 0; 230 part->nr_sects = 0; 231 part_stat_set_all(part, 0); 232 put_device(part_to_dev(part)); 233} 234 235void __delete_partition(struct percpu_ref *ref) 236{ 237 struct hd_struct *part = container_of(ref, struct hd_struct, ref); 238 call_rcu(&part->rcu_head, delete_partition_rcu_cb); 239} 240 241void delete_partition(struct gendisk *disk, int partno) 242{ 243 struct disk_part_tbl *ptbl = disk->part_tbl; 244 struct hd_struct *part; 245 246 if (partno >= ptbl->len) 247 return; 248 249 part = ptbl->part[partno]; 250 if (!part) 251 return; 252 253 rcu_assign_pointer(ptbl->part[partno], NULL); 254 rcu_assign_pointer(ptbl->last_lookup, NULL); 255 kobject_put(part->holder_dir); 256 device_del(part_to_dev(part)); 257 258 hd_struct_kill(part); 259} 260 261static ssize_t whole_disk_show(struct device *dev, 262 struct device_attribute *attr, char *buf) 263{ 264 return 0; 265} 266static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH, 267 whole_disk_show, NULL); 268 269struct hd_struct *add_partition(struct gendisk *disk, int partno, 270 sector_t start, sector_t len, int flags, 271 struct partition_meta_info *info) 272{ 273 struct hd_struct *p; 274 dev_t devt = MKDEV(0, 0); 275 struct device *ddev = disk_to_dev(disk); 276 struct device *pdev; 277 struct disk_part_tbl *ptbl; 278 const char *dname; 279 int err; 280 281 err = disk_expand_part_tbl(disk, partno); 282 if (err) 283 return ERR_PTR(err); 284 ptbl = disk->part_tbl; 285 286 if (ptbl->part[partno]) 287 return ERR_PTR(-EBUSY); 288 289 p = kzalloc(sizeof(*p), GFP_KERNEL); 290 if (!p) 291 return ERR_PTR(-EBUSY); 292 293 if (!init_part_stats(p)) { 294 err = -ENOMEM; 295 goto out_free; 296 } 297 298 seqcount_init(&p->nr_sects_seq); 299 pdev = part_to_dev(p); 300 301 p->start_sect = start; 302 p->alignment_offset = 303 queue_limit_alignment_offset(&disk->queue->limits, start); 304 p->discard_alignment = 305 queue_limit_discard_alignment(&disk->queue->limits, start); 306 p->nr_sects = len; 307 p->partno = partno; 308 p->policy = get_disk_ro(disk); 309 310 if (info) { 311 struct partition_meta_info *pinfo = alloc_part_info(disk); 312 if (!pinfo) 313 goto out_free_stats; 314 memcpy(pinfo, info, sizeof(*info)); 315 p->info = pinfo; 316 } 317 318 dname = dev_name(ddev); 319 if (isdigit(dname[strlen(dname) - 1])) 320 dev_set_name(pdev, "%sp%d", dname, partno); 321 else 322 dev_set_name(pdev, "%s%d", dname, partno); 323 324 device_initialize(pdev); 325 pdev->class = &block_class; 326 pdev->type = &part_type; 327 pdev->parent = ddev; 328 329 err = blk_alloc_devt(p, &devt); 330 if (err) 331 goto out_free_info; 332 pdev->devt = devt; 333 334 /* delay uevent until 'holders' subdir is created */ 335 dev_set_uevent_suppress(pdev, 1); 336 err = device_add(pdev); 337 if (err) 338 goto out_put; 339 340 err = -ENOMEM; 341 p->holder_dir = kobject_create_and_add("holders", &pdev->kobj); 342 if (!p->holder_dir) 343 goto out_del; 344 345 dev_set_uevent_suppress(pdev, 0); 346 if (flags & ADDPART_FLAG_WHOLEDISK) { 347 err = device_create_file(pdev, &dev_attr_whole_disk); 348 if (err) 349 goto out_del; 350 } 351 352 /* everything is up and running, commence */ 353 rcu_assign_pointer(ptbl->part[partno], p); 354 355 /* suppress uevent if the disk suppresses it */ 356 if (!dev_get_uevent_suppress(ddev)) 357 kobject_uevent(&pdev->kobj, KOBJ_ADD); 358 359 if (!hd_ref_init(p)) 360 return p; 361 362out_free_info: 363 free_part_info(p); 364out_free_stats: 365 free_part_stats(p); 366out_free: 367 kfree(p); 368 return ERR_PTR(err); 369out_del: 370 kobject_put(p->holder_dir); 371 device_del(pdev); 372out_put: 373 put_device(pdev); 374 blk_free_devt(devt); 375 return ERR_PTR(err); 376} 377 378static bool disk_unlock_native_capacity(struct gendisk *disk) 379{ 380 const struct block_device_operations *bdops = disk->fops; 381 382 if (bdops->unlock_native_capacity && 383 !(disk->flags & GENHD_FL_NATIVE_CAPACITY)) { 384 printk(KERN_CONT "enabling native capacity\n"); 385 bdops->unlock_native_capacity(disk); 386 disk->flags |= GENHD_FL_NATIVE_CAPACITY; 387 return true; 388 } else { 389 printk(KERN_CONT "truncated\n"); 390 return false; 391 } 392} 393 394static int drop_partitions(struct gendisk *disk, struct block_device *bdev) 395{ 396 struct disk_part_iter piter; 397 struct hd_struct *part; 398 int res; 399 400 if (bdev->bd_part_count || bdev->bd_super) 401 return -EBUSY; 402 res = invalidate_partition(disk, 0); 403 if (res) 404 return res; 405 406 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); 407 while ((part = disk_part_iter_next(&piter))) 408 delete_partition(disk, part->partno); 409 disk_part_iter_exit(&piter); 410 411 return 0; 412} 413 414int rescan_partitions(struct gendisk *disk, struct block_device *bdev) 415{ 416 struct parsed_partitions *state = NULL; 417 struct hd_struct *part; 418 int p, highest, res; 419rescan: 420 if (state && !IS_ERR(state)) { 421 free_partitions(state); 422 state = NULL; 423 } 424 425 res = drop_partitions(disk, bdev); 426 if (res) 427 return res; 428 429 if (disk->fops->revalidate_disk) 430 disk->fops->revalidate_disk(disk); 431 blk_integrity_revalidate(disk); 432 check_disk_size_change(disk, bdev); 433 bdev->bd_invalidated = 0; 434 if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) 435 return 0; 436 if (IS_ERR(state)) { 437 /* 438 * I/O error reading the partition table. If any 439 * partition code tried to read beyond EOD, retry 440 * after unlocking native capacity. 441 */ 442 if (PTR_ERR(state) == -ENOSPC) { 443 printk(KERN_WARNING "%s: partition table beyond EOD, ", 444 disk->disk_name); 445 if (disk_unlock_native_capacity(disk)) 446 goto rescan; 447 } 448 return -EIO; 449 } 450 /* 451 * If any partition code tried to read beyond EOD, try 452 * unlocking native capacity even if partition table is 453 * successfully read as we could be missing some partitions. 454 */ 455 if (state->access_beyond_eod) { 456 printk(KERN_WARNING 457 "%s: partition table partially beyond EOD, ", 458 disk->disk_name); 459 if (disk_unlock_native_capacity(disk)) 460 goto rescan; 461 } 462 463 /* tell userspace that the media / partition table may have changed */ 464 kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE); 465 466 /* Detect the highest partition number and preallocate 467 * disk->part_tbl. This is an optimization and not strictly 468 * necessary. 469 */ 470 for (p = 1, highest = 0; p < state->limit; p++) 471 if (state->parts[p].size) 472 highest = p; 473 474 disk_expand_part_tbl(disk, highest); 475 476 /* add partitions */ 477 for (p = 1; p < state->limit; p++) { 478 sector_t size, from; 479 struct partition_meta_info *info = NULL; 480 481 size = state->parts[p].size; 482 if (!size) 483 continue; 484 485 from = state->parts[p].from; 486 if (from >= get_capacity(disk)) { 487 printk(KERN_WARNING 488 "%s: p%d start %llu is beyond EOD, ", 489 disk->disk_name, p, (unsigned long long) from); 490 if (disk_unlock_native_capacity(disk)) 491 goto rescan; 492 continue; 493 } 494 495 if (from + size > get_capacity(disk)) { 496 printk(KERN_WARNING 497 "%s: p%d size %llu extends beyond EOD, ", 498 disk->disk_name, p, (unsigned long long) size); 499 500 if (disk_unlock_native_capacity(disk)) { 501 /* free state and restart */ 502 goto rescan; 503 } else { 504 /* 505 * we can not ignore partitions of broken tables 506 * created by for example camera firmware, but 507 * we limit them to the end of the disk to avoid 508 * creating invalid block devices 509 */ 510 size = get_capacity(disk) - from; 511 } 512 } 513 514 if (state->parts[p].has_info) 515 info = &state->parts[p].info; 516 part = add_partition(disk, p, from, size, 517 state->parts[p].flags, 518 &state->parts[p].info); 519 if (IS_ERR(part)) { 520 printk(KERN_ERR " %s: p%d could not be added: %ld\n", 521 disk->disk_name, p, -PTR_ERR(part)); 522 continue; 523 } 524#ifdef CONFIG_BLK_DEV_MD 525 if (state->parts[p].flags & ADDPART_FLAG_RAID) 526 md_autodetect_dev(part_to_dev(part)->devt); 527#endif 528 } 529 free_partitions(state); 530 return 0; 531} 532 533int invalidate_partitions(struct gendisk *disk, struct block_device *bdev) 534{ 535 int res; 536 537 if (!bdev->bd_invalidated) 538 return 0; 539 540 res = drop_partitions(disk, bdev); 541 if (res) 542 return res; 543 544 set_capacity(disk, 0); 545 check_disk_size_change(disk, bdev); 546 bdev->bd_invalidated = 0; 547 /* tell userspace that the media / partition table may have changed */ 548 kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE); 549 550 return 0; 551} 552 553unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p) 554{ 555 struct address_space *mapping = bdev->bd_inode->i_mapping; 556 struct page *page; 557 558 page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)), 559 NULL); 560 if (!IS_ERR(page)) { 561 if (PageError(page)) 562 goto fail; 563 p->v = page; 564 return (unsigned char *)page_address(page) + ((n & ((1 << (PAGE_CACHE_SHIFT - 9)) - 1)) << 9); 565fail: 566 page_cache_release(page); 567 } 568 p->v = NULL; 569 return NULL; 570} 571 572EXPORT_SYMBOL(read_dev_sector);