Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at v3.2-rc3 687 lines 17 kB view raw
1/* 2 * fs/partitions/check.c 3 * 4 * Code extracted from drivers/block/genhd.c 5 * Copyright (C) 1991-1998 Linus Torvalds 6 * Re-organised Feb 1998 Russell King 7 * 8 * We now have independent partition support from the 9 * block drivers, which allows all the partition code to 10 * be grouped in one location, and it to be mostly self 11 * contained. 12 * 13 * Added needed MAJORS for new pairs, {hdi,hdj}, {hdk,hdl} 14 */ 15 16#include <linux/init.h> 17#include <linux/module.h> 18#include <linux/fs.h> 19#include <linux/slab.h> 20#include <linux/kmod.h> 21#include <linux/ctype.h> 22#include <linux/genhd.h> 23#include <linux/blktrace_api.h> 24 25#include "check.h" 26 27#include "acorn.h" 28#include "amiga.h" 29#include "atari.h" 30#include "ldm.h" 31#include "mac.h" 32#include "msdos.h" 33#include "osf.h" 34#include "sgi.h" 35#include "sun.h" 36#include "ibm.h" 37#include "ultrix.h" 38#include "efi.h" 39#include "karma.h" 40#include "sysv68.h" 41 42#ifdef CONFIG_BLK_DEV_MD 43extern void md_autodetect_dev(dev_t dev); 44#endif 45 46int warn_no_part = 1; /*This is ugly: should make genhd removable media aware*/ 47 48static int (*check_part[])(struct parsed_partitions *) = { 49 /* 50 * Probe partition formats with tables at disk address 0 51 * that also have an ADFS boot block at 0xdc0. 52 */ 53#ifdef CONFIG_ACORN_PARTITION_ICS 54 adfspart_check_ICS, 55#endif 56#ifdef CONFIG_ACORN_PARTITION_POWERTEC 57 adfspart_check_POWERTEC, 58#endif 59#ifdef CONFIG_ACORN_PARTITION_EESOX 60 adfspart_check_EESOX, 61#endif 62 63 /* 64 * Now move on to formats that only have partition info at 65 * disk address 0xdc0. Since these may also have stale 66 * PC/BIOS partition tables, they need to come before 67 * the msdos entry. 68 */ 69#ifdef CONFIG_ACORN_PARTITION_CUMANA 70 adfspart_check_CUMANA, 71#endif 72#ifdef CONFIG_ACORN_PARTITION_ADFS 73 adfspart_check_ADFS, 74#endif 75 76#ifdef CONFIG_EFI_PARTITION 77 efi_partition, /* this must come before msdos */ 78#endif 79#ifdef CONFIG_SGI_PARTITION 80 sgi_partition, 81#endif 82#ifdef CONFIG_LDM_PARTITION 83 ldm_partition, /* this must come before msdos */ 84#endif 85#ifdef CONFIG_MSDOS_PARTITION 86 msdos_partition, 87#endif 88#ifdef CONFIG_OSF_PARTITION 89 osf_partition, 90#endif 91#ifdef CONFIG_SUN_PARTITION 92 sun_partition, 93#endif 94#ifdef CONFIG_AMIGA_PARTITION 95 amiga_partition, 96#endif 97#ifdef CONFIG_ATARI_PARTITION 98 atari_partition, 99#endif 100#ifdef CONFIG_MAC_PARTITION 101 mac_partition, 102#endif 103#ifdef CONFIG_ULTRIX_PARTITION 104 ultrix_partition, 105#endif 106#ifdef CONFIG_IBM_PARTITION 107 ibm_partition, 108#endif 109#ifdef CONFIG_KARMA_PARTITION 110 karma_partition, 111#endif 112#ifdef CONFIG_SYSV68_PARTITION 113 sysv68_partition, 114#endif 115 NULL 116}; 117 118/* 119 * disk_name() is used by partition check code and the genhd driver. 120 * It formats the devicename of the indicated disk into 121 * the supplied buffer (of size at least 32), and returns 122 * a pointer to that same buffer (for convenience). 123 */ 124 125char *disk_name(struct gendisk *hd, int partno, char *buf) 126{ 127 if (!partno) 128 snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name); 129 else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1])) 130 snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno); 131 else 132 snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno); 133 134 return buf; 135} 136 137const char *bdevname(struct block_device *bdev, char *buf) 138{ 139 return disk_name(bdev->bd_disk, bdev->bd_part->partno, buf); 140} 141 142EXPORT_SYMBOL(bdevname); 143 144/* 145 * There's very little reason to use this, you should really 146 * have a struct block_device just about everywhere and use 147 * bdevname() instead. 148 */ 149const char *__bdevname(dev_t dev, char *buffer) 150{ 151 scnprintf(buffer, BDEVNAME_SIZE, "unknown-block(%u,%u)", 152 MAJOR(dev), MINOR(dev)); 153 return buffer; 154} 155 156EXPORT_SYMBOL(__bdevname); 157 158static struct parsed_partitions * 159check_partition(struct gendisk *hd, struct block_device *bdev) 160{ 161 struct parsed_partitions *state; 162 int i, res, err; 163 164 state = kzalloc(sizeof(struct parsed_partitions), GFP_KERNEL); 165 if (!state) 166 return NULL; 167 state->pp_buf = (char *)__get_free_page(GFP_KERNEL); 168 if (!state->pp_buf) { 169 kfree(state); 170 return NULL; 171 } 172 state->pp_buf[0] = '\0'; 173 174 state->bdev = bdev; 175 disk_name(hd, 0, state->name); 176 snprintf(state->pp_buf, PAGE_SIZE, " %s:", state->name); 177 if (isdigit(state->name[strlen(state->name)-1])) 178 sprintf(state->name, "p"); 179 180 state->limit = disk_max_parts(hd); 181 i = res = err = 0; 182 while (!res && check_part[i]) { 183 memset(&state->parts, 0, sizeof(state->parts)); 184 res = check_part[i++](state); 185 if (res < 0) { 186 /* We have hit an I/O error which we don't report now. 187 * But record it, and let the others do their job. 188 */ 189 err = res; 190 res = 0; 191 } 192 193 } 194 if (res > 0) { 195 printk(KERN_INFO "%s", state->pp_buf); 196 197 free_page((unsigned long)state->pp_buf); 198 return state; 199 } 200 if (state->access_beyond_eod) 201 err = -ENOSPC; 202 if (err) 203 /* The partition is unrecognized. So report I/O errors if there were any */ 204 res = err; 205 if (!res) 206 strlcat(state->pp_buf, " unknown partition table\n", PAGE_SIZE); 207 else if (warn_no_part) 208 strlcat(state->pp_buf, " unable to read partition table\n", PAGE_SIZE); 209 210 printk(KERN_INFO "%s", state->pp_buf); 211 212 free_page((unsigned long)state->pp_buf); 213 kfree(state); 214 return ERR_PTR(res); 215} 216 217static ssize_t part_partition_show(struct device *dev, 218 struct device_attribute *attr, char *buf) 219{ 220 struct hd_struct *p = dev_to_part(dev); 221 222 return sprintf(buf, "%d\n", p->partno); 223} 224 225static ssize_t part_start_show(struct device *dev, 226 struct device_attribute *attr, char *buf) 227{ 228 struct hd_struct *p = dev_to_part(dev); 229 230 return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect); 231} 232 233ssize_t part_size_show(struct device *dev, 234 struct device_attribute *attr, char *buf) 235{ 236 struct hd_struct *p = dev_to_part(dev); 237 return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects); 238} 239 240static ssize_t part_ro_show(struct device *dev, 241 struct device_attribute *attr, char *buf) 242{ 243 struct hd_struct *p = dev_to_part(dev); 244 return sprintf(buf, "%d\n", p->policy ? 1 : 0); 245} 246 247static ssize_t part_alignment_offset_show(struct device *dev, 248 struct device_attribute *attr, char *buf) 249{ 250 struct hd_struct *p = dev_to_part(dev); 251 return sprintf(buf, "%llu\n", (unsigned long long)p->alignment_offset); 252} 253 254static ssize_t part_discard_alignment_show(struct device *dev, 255 struct device_attribute *attr, char *buf) 256{ 257 struct hd_struct *p = dev_to_part(dev); 258 return sprintf(buf, "%u\n", p->discard_alignment); 259} 260 261ssize_t part_stat_show(struct device *dev, 262 struct device_attribute *attr, char *buf) 263{ 264 struct hd_struct *p = dev_to_part(dev); 265 int cpu; 266 267 cpu = part_stat_lock(); 268 part_round_stats(cpu, p); 269 part_stat_unlock(); 270 return sprintf(buf, 271 "%8lu %8lu %8llu %8u " 272 "%8lu %8lu %8llu %8u " 273 "%8u %8u %8u" 274 "\n", 275 part_stat_read(p, ios[READ]), 276 part_stat_read(p, merges[READ]), 277 (unsigned long long)part_stat_read(p, sectors[READ]), 278 jiffies_to_msecs(part_stat_read(p, ticks[READ])), 279 part_stat_read(p, ios[WRITE]), 280 part_stat_read(p, merges[WRITE]), 281 (unsigned long long)part_stat_read(p, sectors[WRITE]), 282 jiffies_to_msecs(part_stat_read(p, ticks[WRITE])), 283 part_in_flight(p), 284 jiffies_to_msecs(part_stat_read(p, io_ticks)), 285 jiffies_to_msecs(part_stat_read(p, time_in_queue))); 286} 287 288ssize_t part_inflight_show(struct device *dev, 289 struct device_attribute *attr, char *buf) 290{ 291 struct hd_struct *p = dev_to_part(dev); 292 293 return sprintf(buf, "%8u %8u\n", atomic_read(&p->in_flight[0]), 294 atomic_read(&p->in_flight[1])); 295} 296 297#ifdef CONFIG_FAIL_MAKE_REQUEST 298ssize_t part_fail_show(struct device *dev, 299 struct device_attribute *attr, char *buf) 300{ 301 struct hd_struct *p = dev_to_part(dev); 302 303 return sprintf(buf, "%d\n", p->make_it_fail); 304} 305 306ssize_t part_fail_store(struct device *dev, 307 struct device_attribute *attr, 308 const char *buf, size_t count) 309{ 310 struct hd_struct *p = dev_to_part(dev); 311 int i; 312 313 if (count > 0 && sscanf(buf, "%d", &i) > 0) 314 p->make_it_fail = (i == 0) ? 0 : 1; 315 316 return count; 317} 318#endif 319 320static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL); 321static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL); 322static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); 323static DEVICE_ATTR(ro, S_IRUGO, part_ro_show, NULL); 324static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL); 325static DEVICE_ATTR(discard_alignment, S_IRUGO, part_discard_alignment_show, 326 NULL); 327static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); 328static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL); 329#ifdef CONFIG_FAIL_MAKE_REQUEST 330static struct device_attribute dev_attr_fail = 331 __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store); 332#endif 333 334static struct attribute *part_attrs[] = { 335 &dev_attr_partition.attr, 336 &dev_attr_start.attr, 337 &dev_attr_size.attr, 338 &dev_attr_ro.attr, 339 &dev_attr_alignment_offset.attr, 340 &dev_attr_discard_alignment.attr, 341 &dev_attr_stat.attr, 342 &dev_attr_inflight.attr, 343#ifdef CONFIG_FAIL_MAKE_REQUEST 344 &dev_attr_fail.attr, 345#endif 346 NULL 347}; 348 349static struct attribute_group part_attr_group = { 350 .attrs = part_attrs, 351}; 352 353static const struct attribute_group *part_attr_groups[] = { 354 &part_attr_group, 355#ifdef CONFIG_BLK_DEV_IO_TRACE 356 &blk_trace_attr_group, 357#endif 358 NULL 359}; 360 361static void part_release(struct device *dev) 362{ 363 struct hd_struct *p = dev_to_part(dev); 364 free_part_stats(p); 365 free_part_info(p); 366 kfree(p); 367} 368 369struct device_type part_type = { 370 .name = "partition", 371 .groups = part_attr_groups, 372 .release = part_release, 373}; 374 375static void delete_partition_rcu_cb(struct rcu_head *head) 376{ 377 struct hd_struct *part = container_of(head, struct hd_struct, rcu_head); 378 379 part->start_sect = 0; 380 part->nr_sects = 0; 381 part_stat_set_all(part, 0); 382 put_device(part_to_dev(part)); 383} 384 385void __delete_partition(struct hd_struct *part) 386{ 387 call_rcu(&part->rcu_head, delete_partition_rcu_cb); 388} 389 390void delete_partition(struct gendisk *disk, int partno) 391{ 392 struct disk_part_tbl *ptbl = disk->part_tbl; 393 struct hd_struct *part; 394 395 if (partno >= ptbl->len) 396 return; 397 398 part = ptbl->part[partno]; 399 if (!part) 400 return; 401 402 blk_free_devt(part_devt(part)); 403 rcu_assign_pointer(ptbl->part[partno], NULL); 404 rcu_assign_pointer(ptbl->last_lookup, NULL); 405 kobject_put(part->holder_dir); 406 device_del(part_to_dev(part)); 407 408 hd_struct_put(part); 409} 410 411static ssize_t whole_disk_show(struct device *dev, 412 struct device_attribute *attr, char *buf) 413{ 414 return 0; 415} 416static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH, 417 whole_disk_show, NULL); 418 419struct hd_struct *add_partition(struct gendisk *disk, int partno, 420 sector_t start, sector_t len, int flags, 421 struct partition_meta_info *info) 422{ 423 struct hd_struct *p; 424 dev_t devt = MKDEV(0, 0); 425 struct device *ddev = disk_to_dev(disk); 426 struct device *pdev; 427 struct disk_part_tbl *ptbl; 428 const char *dname; 429 int err; 430 431 err = disk_expand_part_tbl(disk, partno); 432 if (err) 433 return ERR_PTR(err); 434 ptbl = disk->part_tbl; 435 436 if (ptbl->part[partno]) 437 return ERR_PTR(-EBUSY); 438 439 p = kzalloc(sizeof(*p), GFP_KERNEL); 440 if (!p) 441 return ERR_PTR(-EBUSY); 442 443 if (!init_part_stats(p)) { 444 err = -ENOMEM; 445 goto out_free; 446 } 447 pdev = part_to_dev(p); 448 449 p->start_sect = start; 450 p->alignment_offset = 451 queue_limit_alignment_offset(&disk->queue->limits, start); 452 p->discard_alignment = 453 queue_limit_discard_alignment(&disk->queue->limits, start); 454 p->nr_sects = len; 455 p->partno = partno; 456 p->policy = get_disk_ro(disk); 457 458 if (info) { 459 struct partition_meta_info *pinfo = alloc_part_info(disk); 460 if (!pinfo) 461 goto out_free_stats; 462 memcpy(pinfo, info, sizeof(*info)); 463 p->info = pinfo; 464 } 465 466 dname = dev_name(ddev); 467 if (isdigit(dname[strlen(dname) - 1])) 468 dev_set_name(pdev, "%sp%d", dname, partno); 469 else 470 dev_set_name(pdev, "%s%d", dname, partno); 471 472 device_initialize(pdev); 473 pdev->class = &block_class; 474 pdev->type = &part_type; 475 pdev->parent = ddev; 476 477 err = blk_alloc_devt(p, &devt); 478 if (err) 479 goto out_free_info; 480 pdev->devt = devt; 481 482 /* delay uevent until 'holders' subdir is created */ 483 dev_set_uevent_suppress(pdev, 1); 484 err = device_add(pdev); 485 if (err) 486 goto out_put; 487 488 err = -ENOMEM; 489 p->holder_dir = kobject_create_and_add("holders", &pdev->kobj); 490 if (!p->holder_dir) 491 goto out_del; 492 493 dev_set_uevent_suppress(pdev, 0); 494 if (flags & ADDPART_FLAG_WHOLEDISK) { 495 err = device_create_file(pdev, &dev_attr_whole_disk); 496 if (err) 497 goto out_del; 498 } 499 500 /* everything is up and running, commence */ 501 rcu_assign_pointer(ptbl->part[partno], p); 502 503 /* suppress uevent if the disk suppresses it */ 504 if (!dev_get_uevent_suppress(ddev)) 505 kobject_uevent(&pdev->kobj, KOBJ_ADD); 506 507 hd_ref_init(p); 508 return p; 509 510out_free_info: 511 free_part_info(p); 512out_free_stats: 513 free_part_stats(p); 514out_free: 515 kfree(p); 516 return ERR_PTR(err); 517out_del: 518 kobject_put(p->holder_dir); 519 device_del(pdev); 520out_put: 521 put_device(pdev); 522 blk_free_devt(devt); 523 return ERR_PTR(err); 524} 525 526static bool disk_unlock_native_capacity(struct gendisk *disk) 527{ 528 const struct block_device_operations *bdops = disk->fops; 529 530 if (bdops->unlock_native_capacity && 531 !(disk->flags & GENHD_FL_NATIVE_CAPACITY)) { 532 printk(KERN_CONT "enabling native capacity\n"); 533 bdops->unlock_native_capacity(disk); 534 disk->flags |= GENHD_FL_NATIVE_CAPACITY; 535 return true; 536 } else { 537 printk(KERN_CONT "truncated\n"); 538 return false; 539 } 540} 541 542int rescan_partitions(struct gendisk *disk, struct block_device *bdev) 543{ 544 struct parsed_partitions *state = NULL; 545 struct disk_part_iter piter; 546 struct hd_struct *part; 547 int p, highest, res; 548rescan: 549 if (state && !IS_ERR(state)) { 550 kfree(state); 551 state = NULL; 552 } 553 554 if (bdev->bd_part_count) 555 return -EBUSY; 556 res = invalidate_partition(disk, 0); 557 if (res) 558 return res; 559 560 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); 561 while ((part = disk_part_iter_next(&piter))) 562 delete_partition(disk, part->partno); 563 disk_part_iter_exit(&piter); 564 565 if (disk->fops->revalidate_disk) 566 disk->fops->revalidate_disk(disk); 567 check_disk_size_change(disk, bdev); 568 bdev->bd_invalidated = 0; 569 if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) 570 return 0; 571 if (IS_ERR(state)) { 572 /* 573 * I/O error reading the partition table. If any 574 * partition code tried to read beyond EOD, retry 575 * after unlocking native capacity. 576 */ 577 if (PTR_ERR(state) == -ENOSPC) { 578 printk(KERN_WARNING "%s: partition table beyond EOD, ", 579 disk->disk_name); 580 if (disk_unlock_native_capacity(disk)) 581 goto rescan; 582 } 583 return -EIO; 584 } 585 /* 586 * If any partition code tried to read beyond EOD, try 587 * unlocking native capacity even if partition table is 588 * successfully read as we could be missing some partitions. 589 */ 590 if (state->access_beyond_eod) { 591 printk(KERN_WARNING 592 "%s: partition table partially beyond EOD, ", 593 disk->disk_name); 594 if (disk_unlock_native_capacity(disk)) 595 goto rescan; 596 } 597 598 /* tell userspace that the media / partition table may have changed */ 599 kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE); 600 601 /* Detect the highest partition number and preallocate 602 * disk->part_tbl. This is an optimization and not strictly 603 * necessary. 604 */ 605 for (p = 1, highest = 0; p < state->limit; p++) 606 if (state->parts[p].size) 607 highest = p; 608 609 disk_expand_part_tbl(disk, highest); 610 611 /* add partitions */ 612 for (p = 1; p < state->limit; p++) { 613 sector_t size, from; 614 struct partition_meta_info *info = NULL; 615 616 size = state->parts[p].size; 617 if (!size) 618 continue; 619 620 from = state->parts[p].from; 621 if (from >= get_capacity(disk)) { 622 printk(KERN_WARNING 623 "%s: p%d start %llu is beyond EOD, ", 624 disk->disk_name, p, (unsigned long long) from); 625 if (disk_unlock_native_capacity(disk)) 626 goto rescan; 627 continue; 628 } 629 630 if (from + size > get_capacity(disk)) { 631 printk(KERN_WARNING 632 "%s: p%d size %llu extends beyond EOD, ", 633 disk->disk_name, p, (unsigned long long) size); 634 635 if (disk_unlock_native_capacity(disk)) { 636 /* free state and restart */ 637 goto rescan; 638 } else { 639 /* 640 * we can not ignore partitions of broken tables 641 * created by for example camera firmware, but 642 * we limit them to the end of the disk to avoid 643 * creating invalid block devices 644 */ 645 size = get_capacity(disk) - from; 646 } 647 } 648 649 if (state->parts[p].has_info) 650 info = &state->parts[p].info; 651 part = add_partition(disk, p, from, size, 652 state->parts[p].flags, 653 &state->parts[p].info); 654 if (IS_ERR(part)) { 655 printk(KERN_ERR " %s: p%d could not be added: %ld\n", 656 disk->disk_name, p, -PTR_ERR(part)); 657 continue; 658 } 659#ifdef CONFIG_BLK_DEV_MD 660 if (state->parts[p].flags & ADDPART_FLAG_RAID) 661 md_autodetect_dev(part_to_dev(part)->devt); 662#endif 663 } 664 kfree(state); 665 return 0; 666} 667 668unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p) 669{ 670 struct address_space *mapping = bdev->bd_inode->i_mapping; 671 struct page *page; 672 673 page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)), 674 NULL); 675 if (!IS_ERR(page)) { 676 if (PageError(page)) 677 goto fail; 678 p->v = page; 679 return (unsigned char *)page_address(page) + ((n & ((1 << (PAGE_CACHE_SHIFT - 9)) - 1)) << 9); 680fail: 681 page_cache_release(page); 682 } 683 p->v = NULL; 684 return NULL; 685} 686 687EXPORT_SYMBOL(read_dev_sector);