at v2.6.17 1219 lines 30 kB view raw
1/* 2 * linux/fs/block_dev.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE 6 */ 7 8#include <linux/config.h> 9#include <linux/init.h> 10#include <linux/mm.h> 11#include <linux/fcntl.h> 12#include <linux/slab.h> 13#include <linux/kmod.h> 14#include <linux/major.h> 15#include <linux/devfs_fs_kernel.h> 16#include <linux/smp_lock.h> 17#include <linux/highmem.h> 18#include <linux/blkdev.h> 19#include <linux/module.h> 20#include <linux/blkpg.h> 21#include <linux/buffer_head.h> 22#include <linux/mpage.h> 23#include <linux/mount.h> 24#include <linux/uio.h> 25#include <linux/namei.h> 26#include <asm/uaccess.h> 27 28struct bdev_inode { 29 struct block_device bdev; 30 struct inode vfs_inode; 31}; 32 33static inline struct bdev_inode *BDEV_I(struct inode *inode) 34{ 35 return container_of(inode, struct bdev_inode, vfs_inode); 36} 37 38inline struct block_device *I_BDEV(struct inode *inode) 39{ 40 return &BDEV_I(inode)->bdev; 41} 42 43EXPORT_SYMBOL(I_BDEV); 44 45static sector_t max_block(struct block_device *bdev) 46{ 47 sector_t retval = ~((sector_t)0); 48 loff_t sz = i_size_read(bdev->bd_inode); 49 50 if (sz) { 51 unsigned int size = block_size(bdev); 52 unsigned int sizebits = blksize_bits(size); 53 retval = (sz >> sizebits); 54 } 55 return retval; 56} 57 58/* Kill _all_ buffers, dirty or not.. */ 59static void kill_bdev(struct block_device *bdev) 60{ 61 invalidate_bdev(bdev, 1); 62 truncate_inode_pages(bdev->bd_inode->i_mapping, 0); 63} 64 65int set_blocksize(struct block_device *bdev, int size) 66{ 67 /* Size must be a power of two, and between 512 and PAGE_SIZE */ 68 if (size > PAGE_SIZE || size < 512 || (size & (size-1))) 69 return -EINVAL; 70 71 /* Size cannot be smaller than the size supported by the device */ 72 if (size < bdev_hardsect_size(bdev)) 73 return -EINVAL; 74 75 /* Don't change the size if it is same as current */ 76 if (bdev->bd_block_size != size) { 77 sync_blockdev(bdev); 78 bdev->bd_block_size = size; 79 bdev->bd_inode->i_blkbits = blksize_bits(size); 80 kill_bdev(bdev); 81 } 82 return 0; 83} 84 85EXPORT_SYMBOL(set_blocksize); 86 87int sb_set_blocksize(struct super_block *sb, int size) 88{ 89 if (set_blocksize(sb->s_bdev, size)) 90 return 0; 91 /* If we get here, we know size is power of two 92 * and it's value is between 512 and PAGE_SIZE */ 93 sb->s_blocksize = size; 94 sb->s_blocksize_bits = blksize_bits(size); 95 return sb->s_blocksize; 96} 97 98EXPORT_SYMBOL(sb_set_blocksize); 99 100int sb_min_blocksize(struct super_block *sb, int size) 101{ 102 int minsize = bdev_hardsect_size(sb->s_bdev); 103 if (size < minsize) 104 size = minsize; 105 return sb_set_blocksize(sb, size); 106} 107 108EXPORT_SYMBOL(sb_min_blocksize); 109 110static int 111blkdev_get_block(struct inode *inode, sector_t iblock, 112 struct buffer_head *bh, int create) 113{ 114 if (iblock >= max_block(I_BDEV(inode))) { 115 if (create) 116 return -EIO; 117 118 /* 119 * for reads, we're just trying to fill a partial page. 120 * return a hole, they will have to call get_block again 121 * before they can fill it, and they will get -EIO at that 122 * time 123 */ 124 return 0; 125 } 126 bh->b_bdev = I_BDEV(inode); 127 bh->b_blocknr = iblock; 128 set_buffer_mapped(bh); 129 return 0; 130} 131 132static int 133blkdev_get_blocks(struct inode *inode, sector_t iblock, 134 struct buffer_head *bh, int create) 135{ 136 sector_t end_block = max_block(I_BDEV(inode)); 137 unsigned long max_blocks = bh->b_size >> inode->i_blkbits; 138 139 if ((iblock + max_blocks) > end_block) { 140 max_blocks = end_block - iblock; 141 if ((long)max_blocks <= 0) { 142 if (create) 143 return -EIO; /* write fully beyond EOF */ 144 /* 145 * It is a read which is fully beyond EOF. We return 146 * a !buffer_mapped buffer 147 */ 148 max_blocks = 0; 149 } 150 } 151 152 bh->b_bdev = I_BDEV(inode); 153 bh->b_blocknr = iblock; 154 bh->b_size = max_blocks << inode->i_blkbits; 155 if (max_blocks) 156 set_buffer_mapped(bh); 157 return 0; 158} 159 160static ssize_t 161blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, 162 loff_t offset, unsigned long nr_segs) 163{ 164 struct file *file = iocb->ki_filp; 165 struct inode *inode = file->f_mapping->host; 166 167 return blockdev_direct_IO_no_locking(rw, iocb, inode, I_BDEV(inode), 168 iov, offset, nr_segs, blkdev_get_blocks, NULL); 169} 170 171static int blkdev_writepage(struct page *page, struct writeback_control *wbc) 172{ 173 return block_write_full_page(page, blkdev_get_block, wbc); 174} 175 176static int blkdev_readpage(struct file * file, struct page * page) 177{ 178 return block_read_full_page(page, blkdev_get_block); 179} 180 181static int blkdev_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) 182{ 183 return block_prepare_write(page, from, to, blkdev_get_block); 184} 185 186static int blkdev_commit_write(struct file *file, struct page *page, unsigned from, unsigned to) 187{ 188 return block_commit_write(page, from, to); 189} 190 191/* 192 * private llseek: 193 * for a block special file file->f_dentry->d_inode->i_size is zero 194 * so we compute the size by hand (just as in block_read/write above) 195 */ 196static loff_t block_llseek(struct file *file, loff_t offset, int origin) 197{ 198 struct inode *bd_inode = file->f_mapping->host; 199 loff_t size; 200 loff_t retval; 201 202 mutex_lock(&bd_inode->i_mutex); 203 size = i_size_read(bd_inode); 204 205 switch (origin) { 206 case 2: 207 offset += size; 208 break; 209 case 1: 210 offset += file->f_pos; 211 } 212 retval = -EINVAL; 213 if (offset >= 0 && offset <= size) { 214 if (offset != file->f_pos) { 215 file->f_pos = offset; 216 } 217 retval = offset; 218 } 219 mutex_unlock(&bd_inode->i_mutex); 220 return retval; 221} 222 223/* 224 * Filp is never NULL; the only case when ->fsync() is called with 225 * NULL first argument is nfsd_sync_dir() and that's not a directory. 226 */ 227 228static int block_fsync(struct file *filp, struct dentry *dentry, int datasync) 229{ 230 return sync_blockdev(I_BDEV(filp->f_mapping->host)); 231} 232 233/* 234 * pseudo-fs 235 */ 236 237static __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock); 238static kmem_cache_t * bdev_cachep __read_mostly; 239 240static struct inode *bdev_alloc_inode(struct super_block *sb) 241{ 242 struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, SLAB_KERNEL); 243 if (!ei) 244 return NULL; 245 return &ei->vfs_inode; 246} 247 248static void bdev_destroy_inode(struct inode *inode) 249{ 250 struct bdev_inode *bdi = BDEV_I(inode); 251 252 bdi->bdev.bd_inode_backing_dev_info = NULL; 253 kmem_cache_free(bdev_cachep, bdi); 254} 255 256static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) 257{ 258 struct bdev_inode *ei = (struct bdev_inode *) foo; 259 struct block_device *bdev = &ei->bdev; 260 261 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 262 SLAB_CTOR_CONSTRUCTOR) 263 { 264 memset(bdev, 0, sizeof(*bdev)); 265 mutex_init(&bdev->bd_mutex); 266 mutex_init(&bdev->bd_mount_mutex); 267 INIT_LIST_HEAD(&bdev->bd_inodes); 268 INIT_LIST_HEAD(&bdev->bd_list); 269#ifdef CONFIG_SYSFS 270 INIT_LIST_HEAD(&bdev->bd_holder_list); 271#endif 272 inode_init_once(&ei->vfs_inode); 273 } 274} 275 276static inline void __bd_forget(struct inode *inode) 277{ 278 list_del_init(&inode->i_devices); 279 inode->i_bdev = NULL; 280 inode->i_mapping = &inode->i_data; 281} 282 283static void bdev_clear_inode(struct inode *inode) 284{ 285 struct block_device *bdev = &BDEV_I(inode)->bdev; 286 struct list_head *p; 287 spin_lock(&bdev_lock); 288 while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) { 289 __bd_forget(list_entry(p, struct inode, i_devices)); 290 } 291 list_del_init(&bdev->bd_list); 292 spin_unlock(&bdev_lock); 293} 294 295static struct super_operations bdev_sops = { 296 .statfs = simple_statfs, 297 .alloc_inode = bdev_alloc_inode, 298 .destroy_inode = bdev_destroy_inode, 299 .drop_inode = generic_delete_inode, 300 .clear_inode = bdev_clear_inode, 301}; 302 303static struct super_block *bd_get_sb(struct file_system_type *fs_type, 304 int flags, const char *dev_name, void *data) 305{ 306 return get_sb_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576); 307} 308 309static struct file_system_type bd_type = { 310 .name = "bdev", 311 .get_sb = bd_get_sb, 312 .kill_sb = kill_anon_super, 313}; 314 315static struct vfsmount *bd_mnt __read_mostly; 316struct super_block *blockdev_superblock; 317 318void __init bdev_cache_init(void) 319{ 320 int err; 321 bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode), 322 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| 323 SLAB_MEM_SPREAD|SLAB_PANIC), 324 init_once, NULL); 325 err = register_filesystem(&bd_type); 326 if (err) 327 panic("Cannot register bdev pseudo-fs"); 328 bd_mnt = kern_mount(&bd_type); 329 err = PTR_ERR(bd_mnt); 330 if (IS_ERR(bd_mnt)) 331 panic("Cannot create bdev pseudo-fs"); 332 blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */ 333} 334 335/* 336 * Most likely _very_ bad one - but then it's hardly critical for small 337 * /dev and can be fixed when somebody will need really large one. 338 * Keep in mind that it will be fed through icache hash function too. 339 */ 340static inline unsigned long hash(dev_t dev) 341{ 342 return MAJOR(dev)+MINOR(dev); 343} 344 345static int bdev_test(struct inode *inode, void *data) 346{ 347 return BDEV_I(inode)->bdev.bd_dev == *(dev_t *)data; 348} 349 350static int bdev_set(struct inode *inode, void *data) 351{ 352 BDEV_I(inode)->bdev.bd_dev = *(dev_t *)data; 353 return 0; 354} 355 356static LIST_HEAD(all_bdevs); 357 358struct block_device *bdget(dev_t dev) 359{ 360 struct block_device *bdev; 361 struct inode *inode; 362 363 inode = iget5_locked(bd_mnt->mnt_sb, hash(dev), 364 bdev_test, bdev_set, &dev); 365 366 if (!inode) 367 return NULL; 368 369 bdev = &BDEV_I(inode)->bdev; 370 371 if (inode->i_state & I_NEW) { 372 bdev->bd_contains = NULL; 373 bdev->bd_inode = inode; 374 bdev->bd_block_size = (1 << inode->i_blkbits); 375 bdev->bd_part_count = 0; 376 bdev->bd_invalidated = 0; 377 inode->i_mode = S_IFBLK; 378 inode->i_rdev = dev; 379 inode->i_bdev = bdev; 380 inode->i_data.a_ops = &def_blk_aops; 381 mapping_set_gfp_mask(&inode->i_data, GFP_USER); 382 inode->i_data.backing_dev_info = &default_backing_dev_info; 383 spin_lock(&bdev_lock); 384 list_add(&bdev->bd_list, &all_bdevs); 385 spin_unlock(&bdev_lock); 386 unlock_new_inode(inode); 387 } 388 return bdev; 389} 390 391EXPORT_SYMBOL(bdget); 392 393long nr_blockdev_pages(void) 394{ 395 struct list_head *p; 396 long ret = 0; 397 spin_lock(&bdev_lock); 398 list_for_each(p, &all_bdevs) { 399 struct block_device *bdev; 400 bdev = list_entry(p, struct block_device, bd_list); 401 ret += bdev->bd_inode->i_mapping->nrpages; 402 } 403 spin_unlock(&bdev_lock); 404 return ret; 405} 406 407void bdput(struct block_device *bdev) 408{ 409 iput(bdev->bd_inode); 410} 411 412EXPORT_SYMBOL(bdput); 413 414static struct block_device *bd_acquire(struct inode *inode) 415{ 416 struct block_device *bdev; 417 spin_lock(&bdev_lock); 418 bdev = inode->i_bdev; 419 if (bdev && igrab(bdev->bd_inode)) { 420 spin_unlock(&bdev_lock); 421 return bdev; 422 } 423 spin_unlock(&bdev_lock); 424 bdev = bdget(inode->i_rdev); 425 if (bdev) { 426 spin_lock(&bdev_lock); 427 if (inode->i_bdev) 428 __bd_forget(inode); 429 inode->i_bdev = bdev; 430 inode->i_mapping = bdev->bd_inode->i_mapping; 431 list_add(&inode->i_devices, &bdev->bd_inodes); 432 spin_unlock(&bdev_lock); 433 } 434 return bdev; 435} 436 437/* Call when you free inode */ 438 439void bd_forget(struct inode *inode) 440{ 441 spin_lock(&bdev_lock); 442 if (inode->i_bdev) 443 __bd_forget(inode); 444 spin_unlock(&bdev_lock); 445} 446 447int bd_claim(struct block_device *bdev, void *holder) 448{ 449 int res; 450 spin_lock(&bdev_lock); 451 452 /* first decide result */ 453 if (bdev->bd_holder == holder) 454 res = 0; /* already a holder */ 455 else if (bdev->bd_holder != NULL) 456 res = -EBUSY; /* held by someone else */ 457 else if (bdev->bd_contains == bdev) 458 res = 0; /* is a whole device which isn't held */ 459 460 else if (bdev->bd_contains->bd_holder == bd_claim) 461 res = 0; /* is a partition of a device that is being partitioned */ 462 else if (bdev->bd_contains->bd_holder != NULL) 463 res = -EBUSY; /* is a partition of a held device */ 464 else 465 res = 0; /* is a partition of an un-held device */ 466 467 /* now impose change */ 468 if (res==0) { 469 /* note that for a whole device bd_holders 470 * will be incremented twice, and bd_holder will 471 * be set to bd_claim before being set to holder 472 */ 473 bdev->bd_contains->bd_holders ++; 474 bdev->bd_contains->bd_holder = bd_claim; 475 bdev->bd_holders++; 476 bdev->bd_holder = holder; 477 } 478 spin_unlock(&bdev_lock); 479 return res; 480} 481 482EXPORT_SYMBOL(bd_claim); 483 484void bd_release(struct block_device *bdev) 485{ 486 spin_lock(&bdev_lock); 487 if (!--bdev->bd_contains->bd_holders) 488 bdev->bd_contains->bd_holder = NULL; 489 if (!--bdev->bd_holders) 490 bdev->bd_holder = NULL; 491 spin_unlock(&bdev_lock); 492} 493 494EXPORT_SYMBOL(bd_release); 495 496#ifdef CONFIG_SYSFS 497/* 498 * Functions for bd_claim_by_kobject / bd_release_from_kobject 499 * 500 * If a kobject is passed to bd_claim_by_kobject() 501 * and the kobject has a parent directory, 502 * following symlinks are created: 503 * o from the kobject to the claimed bdev 504 * o from "holders" directory of the bdev to the parent of the kobject 505 * bd_release_from_kobject() removes these symlinks. 506 * 507 * Example: 508 * If /dev/dm-0 maps to /dev/sda, kobject corresponding to 509 * /sys/block/dm-0/slaves is passed to bd_claim_by_kobject(), then: 510 * /sys/block/dm-0/slaves/sda --> /sys/block/sda 511 * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0 512 */ 513 514static struct kobject *bdev_get_kobj(struct block_device *bdev) 515{ 516 if (bdev->bd_contains != bdev) 517 return kobject_get(&bdev->bd_part->kobj); 518 else 519 return kobject_get(&bdev->bd_disk->kobj); 520} 521 522static struct kobject *bdev_get_holder(struct block_device *bdev) 523{ 524 if (bdev->bd_contains != bdev) 525 return kobject_get(bdev->bd_part->holder_dir); 526 else 527 return kobject_get(bdev->bd_disk->holder_dir); 528} 529 530static void add_symlink(struct kobject *from, struct kobject *to) 531{ 532 if (!from || !to) 533 return; 534 sysfs_create_link(from, to, kobject_name(to)); 535} 536 537static void del_symlink(struct kobject *from, struct kobject *to) 538{ 539 if (!from || !to) 540 return; 541 sysfs_remove_link(from, kobject_name(to)); 542} 543 544/* 545 * 'struct bd_holder' contains pointers to kobjects symlinked by 546 * bd_claim_by_kobject. 547 * It's connected to bd_holder_list which is protected by bdev->bd_sem. 548 */ 549struct bd_holder { 550 struct list_head list; /* chain of holders of the bdev */ 551 int count; /* references from the holder */ 552 struct kobject *sdir; /* holder object, e.g. "/block/dm-0/slaves" */ 553 struct kobject *hdev; /* e.g. "/block/dm-0" */ 554 struct kobject *hdir; /* e.g. "/block/sda/holders" */ 555 struct kobject *sdev; /* e.g. "/block/sda" */ 556}; 557 558/* 559 * Get references of related kobjects at once. 560 * Returns 1 on success. 0 on failure. 561 * 562 * Should call bd_holder_release_dirs() after successful use. 563 */ 564static int bd_holder_grab_dirs(struct block_device *bdev, 565 struct bd_holder *bo) 566{ 567 if (!bdev || !bo) 568 return 0; 569 570 bo->sdir = kobject_get(bo->sdir); 571 if (!bo->sdir) 572 return 0; 573 574 bo->hdev = kobject_get(bo->sdir->parent); 575 if (!bo->hdev) 576 goto fail_put_sdir; 577 578 bo->sdev = bdev_get_kobj(bdev); 579 if (!bo->sdev) 580 goto fail_put_hdev; 581 582 bo->hdir = bdev_get_holder(bdev); 583 if (!bo->hdir) 584 goto fail_put_sdev; 585 586 return 1; 587 588fail_put_sdev: 589 kobject_put(bo->sdev); 590fail_put_hdev: 591 kobject_put(bo->hdev); 592fail_put_sdir: 593 kobject_put(bo->sdir); 594 595 return 0; 596} 597 598/* Put references of related kobjects at once. */ 599static void bd_holder_release_dirs(struct bd_holder *bo) 600{ 601 kobject_put(bo->hdir); 602 kobject_put(bo->sdev); 603 kobject_put(bo->hdev); 604 kobject_put(bo->sdir); 605} 606 607static struct bd_holder *alloc_bd_holder(struct kobject *kobj) 608{ 609 struct bd_holder *bo; 610 611 bo = kzalloc(sizeof(*bo), GFP_KERNEL); 612 if (!bo) 613 return NULL; 614 615 bo->count = 1; 616 bo->sdir = kobj; 617 618 return bo; 619} 620 621static void free_bd_holder(struct bd_holder *bo) 622{ 623 kfree(bo); 624} 625 626/** 627 * add_bd_holder - create sysfs symlinks for bd_claim() relationship 628 * 629 * @bdev: block device to be bd_claimed 630 * @bo: preallocated and initialized by alloc_bd_holder() 631 * 632 * If there is no matching entry with @bo in @bdev->bd_holder_list, 633 * add @bo to the list, create symlinks. 634 * 635 * Returns 1 if @bo was added to the list. 636 * Returns 0 if @bo wasn't used by any reason and should be freed. 637 */ 638static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo) 639{ 640 struct bd_holder *tmp; 641 642 if (!bo) 643 return 0; 644 645 list_for_each_entry(tmp, &bdev->bd_holder_list, list) { 646 if (tmp->sdir == bo->sdir) { 647 tmp->count++; 648 return 0; 649 } 650 } 651 652 if (!bd_holder_grab_dirs(bdev, bo)) 653 return 0; 654 655 add_symlink(bo->sdir, bo->sdev); 656 add_symlink(bo->hdir, bo->hdev); 657 list_add_tail(&bo->list, &bdev->bd_holder_list); 658 return 1; 659} 660 661/** 662 * del_bd_holder - delete sysfs symlinks for bd_claim() relationship 663 * 664 * @bdev: block device to be bd_claimed 665 * @kobj: holder's kobject 666 * 667 * If there is matching entry with @kobj in @bdev->bd_holder_list 668 * and no other bd_claim() from the same kobject, 669 * remove the struct bd_holder from the list, delete symlinks for it. 670 * 671 * Returns a pointer to the struct bd_holder when it's removed from the list 672 * and ready to be freed. 673 * Returns NULL if matching claim isn't found or there is other bd_claim() 674 * by the same kobject. 675 */ 676static struct bd_holder *del_bd_holder(struct block_device *bdev, 677 struct kobject *kobj) 678{ 679 struct bd_holder *bo; 680 681 list_for_each_entry(bo, &bdev->bd_holder_list, list) { 682 if (bo->sdir == kobj) { 683 bo->count--; 684 BUG_ON(bo->count < 0); 685 if (!bo->count) { 686 list_del(&bo->list); 687 del_symlink(bo->sdir, bo->sdev); 688 del_symlink(bo->hdir, bo->hdev); 689 bd_holder_release_dirs(bo); 690 return bo; 691 } 692 break; 693 } 694 } 695 696 return NULL; 697} 698 699/** 700 * bd_claim_by_kobject - bd_claim() with additional kobject signature 701 * 702 * @bdev: block device to be claimed 703 * @holder: holder's signature 704 * @kobj: holder's kobject 705 * 706 * Do bd_claim() and if it succeeds, create sysfs symlinks between 707 * the bdev and the holder's kobject. 708 * Use bd_release_from_kobject() when relesing the claimed bdev. 709 * 710 * Returns 0 on success. (same as bd_claim()) 711 * Returns errno on failure. 712 */ 713static int bd_claim_by_kobject(struct block_device *bdev, void *holder, 714 struct kobject *kobj) 715{ 716 int res; 717 struct bd_holder *bo; 718 719 if (!kobj) 720 return -EINVAL; 721 722 bo = alloc_bd_holder(kobj); 723 if (!bo) 724 return -ENOMEM; 725 726 mutex_lock(&bdev->bd_mutex); 727 res = bd_claim(bdev, holder); 728 if (res || !add_bd_holder(bdev, bo)) 729 free_bd_holder(bo); 730 mutex_unlock(&bdev->bd_mutex); 731 732 return res; 733} 734 735/** 736 * bd_release_from_kobject - bd_release() with additional kobject signature 737 * 738 * @bdev: block device to be released 739 * @kobj: holder's kobject 740 * 741 * Do bd_release() and remove sysfs symlinks created by bd_claim_by_kobject(). 742 */ 743static void bd_release_from_kobject(struct block_device *bdev, 744 struct kobject *kobj) 745{ 746 struct bd_holder *bo; 747 748 if (!kobj) 749 return; 750 751 mutex_lock(&bdev->bd_mutex); 752 bd_release(bdev); 753 if ((bo = del_bd_holder(bdev, kobj))) 754 free_bd_holder(bo); 755 mutex_unlock(&bdev->bd_mutex); 756} 757 758/** 759 * bd_claim_by_disk - wrapper function for bd_claim_by_kobject() 760 * 761 * @bdev: block device to be claimed 762 * @holder: holder's signature 763 * @disk: holder's gendisk 764 * 765 * Call bd_claim_by_kobject() with getting @disk->slave_dir. 766 */ 767int bd_claim_by_disk(struct block_device *bdev, void *holder, 768 struct gendisk *disk) 769{ 770 return bd_claim_by_kobject(bdev, holder, kobject_get(disk->slave_dir)); 771} 772EXPORT_SYMBOL_GPL(bd_claim_by_disk); 773 774/** 775 * bd_release_from_disk - wrapper function for bd_release_from_kobject() 776 * 777 * @bdev: block device to be claimed 778 * @disk: holder's gendisk 779 * 780 * Call bd_release_from_kobject() and put @disk->slave_dir. 781 */ 782void bd_release_from_disk(struct block_device *bdev, struct gendisk *disk) 783{ 784 bd_release_from_kobject(bdev, disk->slave_dir); 785 kobject_put(disk->slave_dir); 786} 787EXPORT_SYMBOL_GPL(bd_release_from_disk); 788#endif 789 790/* 791 * Tries to open block device by device number. Use it ONLY if you 792 * really do not have anything better - i.e. when you are behind a 793 * truly sucky interface and all you are given is a device number. _Never_ 794 * to be used for internal purposes. If you ever need it - reconsider 795 * your API. 796 */ 797struct block_device *open_by_devnum(dev_t dev, unsigned mode) 798{ 799 struct block_device *bdev = bdget(dev); 800 int err = -ENOMEM; 801 int flags = mode & FMODE_WRITE ? O_RDWR : O_RDONLY; 802 if (bdev) 803 err = blkdev_get(bdev, mode, flags); 804 return err ? ERR_PTR(err) : bdev; 805} 806 807EXPORT_SYMBOL(open_by_devnum); 808 809/* 810 * This routine checks whether a removable media has been changed, 811 * and invalidates all buffer-cache-entries in that case. This 812 * is a relatively slow routine, so we have to try to minimize using 813 * it. Thus it is called only upon a 'mount' or 'open'. This 814 * is the best way of combining speed and utility, I think. 815 * People changing diskettes in the middle of an operation deserve 816 * to lose :-) 817 */ 818int check_disk_change(struct block_device *bdev) 819{ 820 struct gendisk *disk = bdev->bd_disk; 821 struct block_device_operations * bdops = disk->fops; 822 823 if (!bdops->media_changed) 824 return 0; 825 if (!bdops->media_changed(bdev->bd_disk)) 826 return 0; 827 828 if (__invalidate_device(bdev)) 829 printk("VFS: busy inodes on changed media.\n"); 830 831 if (bdops->revalidate_disk) 832 bdops->revalidate_disk(bdev->bd_disk); 833 if (bdev->bd_disk->minors > 1) 834 bdev->bd_invalidated = 1; 835 return 1; 836} 837 838EXPORT_SYMBOL(check_disk_change); 839 840void bd_set_size(struct block_device *bdev, loff_t size) 841{ 842 unsigned bsize = bdev_hardsect_size(bdev); 843 844 bdev->bd_inode->i_size = size; 845 while (bsize < PAGE_CACHE_SIZE) { 846 if (size & bsize) 847 break; 848 bsize <<= 1; 849 } 850 bdev->bd_block_size = bsize; 851 bdev->bd_inode->i_blkbits = blksize_bits(bsize); 852} 853EXPORT_SYMBOL(bd_set_size); 854 855static int do_open(struct block_device *bdev, struct file *file) 856{ 857 struct module *owner = NULL; 858 struct gendisk *disk; 859 int ret = -ENXIO; 860 int part; 861 862 file->f_mapping = bdev->bd_inode->i_mapping; 863 lock_kernel(); 864 disk = get_gendisk(bdev->bd_dev, &part); 865 if (!disk) { 866 unlock_kernel(); 867 bdput(bdev); 868 return ret; 869 } 870 owner = disk->fops->owner; 871 872 mutex_lock(&bdev->bd_mutex); 873 if (!bdev->bd_openers) { 874 bdev->bd_disk = disk; 875 bdev->bd_contains = bdev; 876 if (!part) { 877 struct backing_dev_info *bdi; 878 if (disk->fops->open) { 879 ret = disk->fops->open(bdev->bd_inode, file); 880 if (ret) 881 goto out_first; 882 } 883 if (!bdev->bd_openers) { 884 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); 885 bdi = blk_get_backing_dev_info(bdev); 886 if (bdi == NULL) 887 bdi = &default_backing_dev_info; 888 bdev->bd_inode->i_data.backing_dev_info = bdi; 889 } 890 if (bdev->bd_invalidated) 891 rescan_partitions(disk, bdev); 892 } else { 893 struct hd_struct *p; 894 struct block_device *whole; 895 whole = bdget_disk(disk, 0); 896 ret = -ENOMEM; 897 if (!whole) 898 goto out_first; 899 ret = blkdev_get(whole, file->f_mode, file->f_flags); 900 if (ret) 901 goto out_first; 902 bdev->bd_contains = whole; 903 mutex_lock(&whole->bd_mutex); 904 whole->bd_part_count++; 905 p = disk->part[part - 1]; 906 bdev->bd_inode->i_data.backing_dev_info = 907 whole->bd_inode->i_data.backing_dev_info; 908 if (!(disk->flags & GENHD_FL_UP) || !p || !p->nr_sects) { 909 whole->bd_part_count--; 910 mutex_unlock(&whole->bd_mutex); 911 ret = -ENXIO; 912 goto out_first; 913 } 914 kobject_get(&p->kobj); 915 bdev->bd_part = p; 916 bd_set_size(bdev, (loff_t) p->nr_sects << 9); 917 mutex_unlock(&whole->bd_mutex); 918 } 919 } else { 920 put_disk(disk); 921 module_put(owner); 922 if (bdev->bd_contains == bdev) { 923 if (bdev->bd_disk->fops->open) { 924 ret = bdev->bd_disk->fops->open(bdev->bd_inode, file); 925 if (ret) 926 goto out; 927 } 928 if (bdev->bd_invalidated) 929 rescan_partitions(bdev->bd_disk, bdev); 930 } else { 931 mutex_lock(&bdev->bd_contains->bd_mutex); 932 bdev->bd_contains->bd_part_count++; 933 mutex_unlock(&bdev->bd_contains->bd_mutex); 934 } 935 } 936 bdev->bd_openers++; 937 mutex_unlock(&bdev->bd_mutex); 938 unlock_kernel(); 939 return 0; 940 941out_first: 942 bdev->bd_disk = NULL; 943 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; 944 if (bdev != bdev->bd_contains) 945 blkdev_put(bdev->bd_contains); 946 bdev->bd_contains = NULL; 947 put_disk(disk); 948 module_put(owner); 949out: 950 mutex_unlock(&bdev->bd_mutex); 951 unlock_kernel(); 952 if (ret) 953 bdput(bdev); 954 return ret; 955} 956 957int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags) 958{ 959 /* 960 * This crockload is due to bad choice of ->open() type. 961 * It will go away. 962 * For now, block device ->open() routine must _not_ 963 * examine anything in 'inode' argument except ->i_rdev. 964 */ 965 struct file fake_file = {}; 966 struct dentry fake_dentry = {}; 967 fake_file.f_mode = mode; 968 fake_file.f_flags = flags; 969 fake_file.f_dentry = &fake_dentry; 970 fake_dentry.d_inode = bdev->bd_inode; 971 972 return do_open(bdev, &fake_file); 973} 974 975EXPORT_SYMBOL(blkdev_get); 976 977static int blkdev_open(struct inode * inode, struct file * filp) 978{ 979 struct block_device *bdev; 980 int res; 981 982 /* 983 * Preserve backwards compatibility and allow large file access 984 * even if userspace doesn't ask for it explicitly. Some mkfs 985 * binary needs it. We might want to drop this workaround 986 * during an unstable branch. 987 */ 988 filp->f_flags |= O_LARGEFILE; 989 990 bdev = bd_acquire(inode); 991 992 res = do_open(bdev, filp); 993 if (res) 994 return res; 995 996 if (!(filp->f_flags & O_EXCL) ) 997 return 0; 998 999 if (!(res = bd_claim(bdev, filp))) 1000 return 0; 1001 1002 blkdev_put(bdev); 1003 return res; 1004} 1005 1006int blkdev_put(struct block_device *bdev) 1007{ 1008 int ret = 0; 1009 struct inode *bd_inode = bdev->bd_inode; 1010 struct gendisk *disk = bdev->bd_disk; 1011 1012 mutex_lock(&bdev->bd_mutex); 1013 lock_kernel(); 1014 if (!--bdev->bd_openers) { 1015 sync_blockdev(bdev); 1016 kill_bdev(bdev); 1017 } 1018 if (bdev->bd_contains == bdev) { 1019 if (disk->fops->release) 1020 ret = disk->fops->release(bd_inode, NULL); 1021 } else { 1022 mutex_lock(&bdev->bd_contains->bd_mutex); 1023 bdev->bd_contains->bd_part_count--; 1024 mutex_unlock(&bdev->bd_contains->bd_mutex); 1025 } 1026 if (!bdev->bd_openers) { 1027 struct module *owner = disk->fops->owner; 1028 1029 put_disk(disk); 1030 module_put(owner); 1031 1032 if (bdev->bd_contains != bdev) { 1033 kobject_put(&bdev->bd_part->kobj); 1034 bdev->bd_part = NULL; 1035 } 1036 bdev->bd_disk = NULL; 1037 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; 1038 if (bdev != bdev->bd_contains) { 1039 blkdev_put(bdev->bd_contains); 1040 } 1041 bdev->bd_contains = NULL; 1042 } 1043 unlock_kernel(); 1044 mutex_unlock(&bdev->bd_mutex); 1045 bdput(bdev); 1046 return ret; 1047} 1048 1049EXPORT_SYMBOL(blkdev_put); 1050 1051static int blkdev_close(struct inode * inode, struct file * filp) 1052{ 1053 struct block_device *bdev = I_BDEV(filp->f_mapping->host); 1054 if (bdev->bd_holder == filp) 1055 bd_release(bdev); 1056 return blkdev_put(bdev); 1057} 1058 1059static ssize_t blkdev_file_write(struct file *file, const char __user *buf, 1060 size_t count, loff_t *ppos) 1061{ 1062 struct iovec local_iov = { .iov_base = (void __user *)buf, .iov_len = count }; 1063 1064 return generic_file_write_nolock(file, &local_iov, 1, ppos); 1065} 1066 1067static ssize_t blkdev_file_aio_write(struct kiocb *iocb, const char __user *buf, 1068 size_t count, loff_t pos) 1069{ 1070 struct iovec local_iov = { .iov_base = (void __user *)buf, .iov_len = count }; 1071 1072 return generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos); 1073} 1074 1075static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) 1076{ 1077 return blkdev_ioctl(file->f_mapping->host, file, cmd, arg); 1078} 1079 1080struct address_space_operations def_blk_aops = { 1081 .readpage = blkdev_readpage, 1082 .writepage = blkdev_writepage, 1083 .sync_page = block_sync_page, 1084 .prepare_write = blkdev_prepare_write, 1085 .commit_write = blkdev_commit_write, 1086 .writepages = generic_writepages, 1087 .direct_IO = blkdev_direct_IO, 1088}; 1089 1090const struct file_operations def_blk_fops = { 1091 .open = blkdev_open, 1092 .release = blkdev_close, 1093 .llseek = block_llseek, 1094 .read = generic_file_read, 1095 .write = blkdev_file_write, 1096 .aio_read = generic_file_aio_read, 1097 .aio_write = blkdev_file_aio_write, 1098 .mmap = generic_file_mmap, 1099 .fsync = block_fsync, 1100 .unlocked_ioctl = block_ioctl, 1101#ifdef CONFIG_COMPAT 1102 .compat_ioctl = compat_blkdev_ioctl, 1103#endif 1104 .readv = generic_file_readv, 1105 .writev = generic_file_write_nolock, 1106 .sendfile = generic_file_sendfile, 1107 .splice_read = generic_file_splice_read, 1108 .splice_write = generic_file_splice_write, 1109}; 1110 1111int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg) 1112{ 1113 int res; 1114 mm_segment_t old_fs = get_fs(); 1115 set_fs(KERNEL_DS); 1116 res = blkdev_ioctl(bdev->bd_inode, NULL, cmd, arg); 1117 set_fs(old_fs); 1118 return res; 1119} 1120 1121EXPORT_SYMBOL(ioctl_by_bdev); 1122 1123/** 1124 * lookup_bdev - lookup a struct block_device by name 1125 * 1126 * @path: special file representing the block device 1127 * 1128 * Get a reference to the blockdevice at @path in the current 1129 * namespace if possible and return it. Return ERR_PTR(error) 1130 * otherwise. 1131 */ 1132struct block_device *lookup_bdev(const char *path) 1133{ 1134 struct block_device *bdev; 1135 struct inode *inode; 1136 struct nameidata nd; 1137 int error; 1138 1139 if (!path || !*path) 1140 return ERR_PTR(-EINVAL); 1141 1142 error = path_lookup(path, LOOKUP_FOLLOW, &nd); 1143 if (error) 1144 return ERR_PTR(error); 1145 1146 inode = nd.dentry->d_inode; 1147 error = -ENOTBLK; 1148 if (!S_ISBLK(inode->i_mode)) 1149 goto fail; 1150 error = -EACCES; 1151 if (nd.mnt->mnt_flags & MNT_NODEV) 1152 goto fail; 1153 error = -ENOMEM; 1154 bdev = bd_acquire(inode); 1155 if (!bdev) 1156 goto fail; 1157out: 1158 path_release(&nd); 1159 return bdev; 1160fail: 1161 bdev = ERR_PTR(error); 1162 goto out; 1163} 1164 1165/** 1166 * open_bdev_excl - open a block device by name and set it up for use 1167 * 1168 * @path: special file representing the block device 1169 * @flags: %MS_RDONLY for opening read-only 1170 * @holder: owner for exclusion 1171 * 1172 * Open the blockdevice described by the special file at @path, claim it 1173 * for the @holder. 1174 */ 1175struct block_device *open_bdev_excl(const char *path, int flags, void *holder) 1176{ 1177 struct block_device *bdev; 1178 mode_t mode = FMODE_READ; 1179 int error = 0; 1180 1181 bdev = lookup_bdev(path); 1182 if (IS_ERR(bdev)) 1183 return bdev; 1184 1185 if (!(flags & MS_RDONLY)) 1186 mode |= FMODE_WRITE; 1187 error = blkdev_get(bdev, mode, 0); 1188 if (error) 1189 return ERR_PTR(error); 1190 error = -EACCES; 1191 if (!(flags & MS_RDONLY) && bdev_read_only(bdev)) 1192 goto blkdev_put; 1193 error = bd_claim(bdev, holder); 1194 if (error) 1195 goto blkdev_put; 1196 1197 return bdev; 1198 1199blkdev_put: 1200 blkdev_put(bdev); 1201 return ERR_PTR(error); 1202} 1203 1204EXPORT_SYMBOL(open_bdev_excl); 1205 1206/** 1207 * close_bdev_excl - release a blockdevice openen by open_bdev_excl() 1208 * 1209 * @bdev: blockdevice to close 1210 * 1211 * This is the counterpart to open_bdev_excl(). 1212 */ 1213void close_bdev_excl(struct block_device *bdev) 1214{ 1215 bd_release(bdev); 1216 blkdev_put(bdev); 1217} 1218 1219EXPORT_SYMBOL(close_bdev_excl);