at v2.6.33 39 kB view raw
1/* 2 * linux/fs/block_dev.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE 6 */ 7 8#include <linux/init.h> 9#include <linux/mm.h> 10#include <linux/fcntl.h> 11#include <linux/slab.h> 12#include <linux/kmod.h> 13#include <linux/major.h> 14#include <linux/smp_lock.h> 15#include <linux/device_cgroup.h> 16#include <linux/highmem.h> 17#include <linux/blkdev.h> 18#include <linux/module.h> 19#include <linux/blkpg.h> 20#include <linux/buffer_head.h> 21#include <linux/pagevec.h> 22#include <linux/writeback.h> 23#include <linux/mpage.h> 24#include <linux/mount.h> 25#include <linux/uio.h> 26#include <linux/namei.h> 27#include <linux/log2.h> 28#include <linux/kmemleak.h> 29#include <asm/uaccess.h> 30#include "internal.h" 31 32struct bdev_inode { 33 struct block_device bdev; 34 struct inode vfs_inode; 35}; 36 37static const struct address_space_operations def_blk_aops; 38 39static inline struct bdev_inode *BDEV_I(struct inode *inode) 40{ 41 return container_of(inode, struct bdev_inode, vfs_inode); 42} 43 44inline struct block_device *I_BDEV(struct inode *inode) 45{ 46 return &BDEV_I(inode)->bdev; 47} 48 49EXPORT_SYMBOL(I_BDEV); 50 51static sector_t max_block(struct block_device *bdev) 52{ 53 sector_t retval = ~((sector_t)0); 54 loff_t sz = i_size_read(bdev->bd_inode); 55 56 if (sz) { 57 unsigned int size = block_size(bdev); 58 unsigned int sizebits = blksize_bits(size); 59 retval = (sz >> sizebits); 60 } 61 return retval; 62} 63 64/* Kill _all_ buffers and pagecache , dirty or not.. */ 65static void kill_bdev(struct block_device *bdev) 66{ 67 if (bdev->bd_inode->i_mapping->nrpages == 0) 68 return; 69 invalidate_bh_lrus(); 70 truncate_inode_pages(bdev->bd_inode->i_mapping, 0); 71} 72 73int set_blocksize(struct block_device *bdev, int size) 74{ 75 /* Size must be a power of two, and between 512 and PAGE_SIZE */ 76 if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size)) 77 return -EINVAL; 78 79 /* Size cannot be smaller than the size supported by the device */ 80 if (size < bdev_logical_block_size(bdev)) 81 return -EINVAL; 82 83 /* Don't change the size if it is same as current */ 84 if (bdev->bd_block_size != size) { 85 sync_blockdev(bdev); 86 bdev->bd_block_size = size; 87 bdev->bd_inode->i_blkbits = blksize_bits(size); 88 kill_bdev(bdev); 89 } 90 return 0; 91} 92 93EXPORT_SYMBOL(set_blocksize); 94 95int sb_set_blocksize(struct super_block *sb, int size) 96{ 97 if (set_blocksize(sb->s_bdev, size)) 98 return 0; 99 /* If we get here, we know size is power of two 100 * and it's value is between 512 and PAGE_SIZE */ 101 sb->s_blocksize = size; 102 sb->s_blocksize_bits = blksize_bits(size); 103 return sb->s_blocksize; 104} 105 106EXPORT_SYMBOL(sb_set_blocksize); 107 108int sb_min_blocksize(struct super_block *sb, int size) 109{ 110 int minsize = bdev_logical_block_size(sb->s_bdev); 111 if (size < minsize) 112 size = minsize; 113 return sb_set_blocksize(sb, size); 114} 115 116EXPORT_SYMBOL(sb_min_blocksize); 117 118static int 119blkdev_get_block(struct inode *inode, sector_t iblock, 120 struct buffer_head *bh, int create) 121{ 122 if (iblock >= max_block(I_BDEV(inode))) { 123 if (create) 124 return -EIO; 125 126 /* 127 * for reads, we're just trying to fill a partial page. 128 * return a hole, they will have to call get_block again 129 * before they can fill it, and they will get -EIO at that 130 * time 131 */ 132 return 0; 133 } 134 bh->b_bdev = I_BDEV(inode); 135 bh->b_blocknr = iblock; 136 set_buffer_mapped(bh); 137 return 0; 138} 139 140static int 141blkdev_get_blocks(struct inode *inode, sector_t iblock, 142 struct buffer_head *bh, int create) 143{ 144 sector_t end_block = max_block(I_BDEV(inode)); 145 unsigned long max_blocks = bh->b_size >> inode->i_blkbits; 146 147 if ((iblock + max_blocks) > end_block) { 148 max_blocks = end_block - iblock; 149 if ((long)max_blocks <= 0) { 150 if (create) 151 return -EIO; /* write fully beyond EOF */ 152 /* 153 * It is a read which is fully beyond EOF. We return 154 * a !buffer_mapped buffer 155 */ 156 max_blocks = 0; 157 } 158 } 159 160 bh->b_bdev = I_BDEV(inode); 161 bh->b_blocknr = iblock; 162 bh->b_size = max_blocks << inode->i_blkbits; 163 if (max_blocks) 164 set_buffer_mapped(bh); 165 return 0; 166} 167 168static ssize_t 169blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, 170 loff_t offset, unsigned long nr_segs) 171{ 172 struct file *file = iocb->ki_filp; 173 struct inode *inode = file->f_mapping->host; 174 175 return blockdev_direct_IO_no_locking(rw, iocb, inode, I_BDEV(inode), 176 iov, offset, nr_segs, blkdev_get_blocks, NULL); 177} 178 179int __sync_blockdev(struct block_device *bdev, int wait) 180{ 181 if (!bdev) 182 return 0; 183 if (!wait) 184 return filemap_flush(bdev->bd_inode->i_mapping); 185 return filemap_write_and_wait(bdev->bd_inode->i_mapping); 186} 187 188/* 189 * Write out and wait upon all the dirty data associated with a block 190 * device via its mapping. Does not take the superblock lock. 191 */ 192int sync_blockdev(struct block_device *bdev) 193{ 194 return __sync_blockdev(bdev, 1); 195} 196EXPORT_SYMBOL(sync_blockdev); 197 198/* 199 * Write out and wait upon all dirty data associated with this 200 * device. Filesystem data as well as the underlying block 201 * device. Takes the superblock lock. 202 */ 203int fsync_bdev(struct block_device *bdev) 204{ 205 struct super_block *sb = get_super(bdev); 206 if (sb) { 207 int res = sync_filesystem(sb); 208 drop_super(sb); 209 return res; 210 } 211 return sync_blockdev(bdev); 212} 213EXPORT_SYMBOL(fsync_bdev); 214 215/** 216 * freeze_bdev -- lock a filesystem and force it into a consistent state 217 * @bdev: blockdevice to lock 218 * 219 * If a superblock is found on this device, we take the s_umount semaphore 220 * on it to make sure nobody unmounts until the snapshot creation is done. 221 * The reference counter (bd_fsfreeze_count) guarantees that only the last 222 * unfreeze process can unfreeze the frozen filesystem actually when multiple 223 * freeze requests arrive simultaneously. It counts up in freeze_bdev() and 224 * count down in thaw_bdev(). When it becomes 0, thaw_bdev() will unfreeze 225 * actually. 226 */ 227struct super_block *freeze_bdev(struct block_device *bdev) 228{ 229 struct super_block *sb; 230 int error = 0; 231 232 mutex_lock(&bdev->bd_fsfreeze_mutex); 233 if (++bdev->bd_fsfreeze_count > 1) { 234 /* 235 * We don't even need to grab a reference - the first call 236 * to freeze_bdev grab an active reference and only the last 237 * thaw_bdev drops it. 238 */ 239 sb = get_super(bdev); 240 drop_super(sb); 241 mutex_unlock(&bdev->bd_fsfreeze_mutex); 242 return sb; 243 } 244 245 sb = get_active_super(bdev); 246 if (!sb) 247 goto out; 248 if (sb->s_flags & MS_RDONLY) { 249 sb->s_frozen = SB_FREEZE_TRANS; 250 up_write(&sb->s_umount); 251 mutex_unlock(&bdev->bd_fsfreeze_mutex); 252 return sb; 253 } 254 255 sb->s_frozen = SB_FREEZE_WRITE; 256 smp_wmb(); 257 258 sync_filesystem(sb); 259 260 sb->s_frozen = SB_FREEZE_TRANS; 261 smp_wmb(); 262 263 sync_blockdev(sb->s_bdev); 264 265 if (sb->s_op->freeze_fs) { 266 error = sb->s_op->freeze_fs(sb); 267 if (error) { 268 printk(KERN_ERR 269 "VFS:Filesystem freeze failed\n"); 270 sb->s_frozen = SB_UNFROZEN; 271 deactivate_locked_super(sb); 272 bdev->bd_fsfreeze_count--; 273 mutex_unlock(&bdev->bd_fsfreeze_mutex); 274 return ERR_PTR(error); 275 } 276 } 277 up_write(&sb->s_umount); 278 279 out: 280 sync_blockdev(bdev); 281 mutex_unlock(&bdev->bd_fsfreeze_mutex); 282 return sb; /* thaw_bdev releases s->s_umount */ 283} 284EXPORT_SYMBOL(freeze_bdev); 285 286/** 287 * thaw_bdev -- unlock filesystem 288 * @bdev: blockdevice to unlock 289 * @sb: associated superblock 290 * 291 * Unlocks the filesystem and marks it writeable again after freeze_bdev(). 292 */ 293int thaw_bdev(struct block_device *bdev, struct super_block *sb) 294{ 295 int error = -EINVAL; 296 297 mutex_lock(&bdev->bd_fsfreeze_mutex); 298 if (!bdev->bd_fsfreeze_count) 299 goto out_unlock; 300 301 error = 0; 302 if (--bdev->bd_fsfreeze_count > 0) 303 goto out_unlock; 304 305 if (!sb) 306 goto out_unlock; 307 308 BUG_ON(sb->s_bdev != bdev); 309 down_write(&sb->s_umount); 310 if (sb->s_flags & MS_RDONLY) 311 goto out_unfrozen; 312 313 if (sb->s_op->unfreeze_fs) { 314 error = sb->s_op->unfreeze_fs(sb); 315 if (error) { 316 printk(KERN_ERR 317 "VFS:Filesystem thaw failed\n"); 318 sb->s_frozen = SB_FREEZE_TRANS; 319 bdev->bd_fsfreeze_count++; 320 mutex_unlock(&bdev->bd_fsfreeze_mutex); 321 return error; 322 } 323 } 324 325out_unfrozen: 326 sb->s_frozen = SB_UNFROZEN; 327 smp_wmb(); 328 wake_up(&sb->s_wait_unfrozen); 329 330 if (sb) 331 deactivate_locked_super(sb); 332out_unlock: 333 mutex_unlock(&bdev->bd_fsfreeze_mutex); 334 return 0; 335} 336EXPORT_SYMBOL(thaw_bdev); 337 338static int blkdev_writepage(struct page *page, struct writeback_control *wbc) 339{ 340 return block_write_full_page(page, blkdev_get_block, wbc); 341} 342 343static int blkdev_readpage(struct file * file, struct page * page) 344{ 345 return block_read_full_page(page, blkdev_get_block); 346} 347 348static int blkdev_write_begin(struct file *file, struct address_space *mapping, 349 loff_t pos, unsigned len, unsigned flags, 350 struct page **pagep, void **fsdata) 351{ 352 *pagep = NULL; 353 return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 354 blkdev_get_block); 355} 356 357static int blkdev_write_end(struct file *file, struct address_space *mapping, 358 loff_t pos, unsigned len, unsigned copied, 359 struct page *page, void *fsdata) 360{ 361 int ret; 362 ret = block_write_end(file, mapping, pos, len, copied, page, fsdata); 363 364 unlock_page(page); 365 page_cache_release(page); 366 367 return ret; 368} 369 370/* 371 * private llseek: 372 * for a block special file file->f_path.dentry->d_inode->i_size is zero 373 * so we compute the size by hand (just as in block_read/write above) 374 */ 375static loff_t block_llseek(struct file *file, loff_t offset, int origin) 376{ 377 struct inode *bd_inode = file->f_mapping->host; 378 loff_t size; 379 loff_t retval; 380 381 mutex_lock(&bd_inode->i_mutex); 382 size = i_size_read(bd_inode); 383 384 switch (origin) { 385 case 2: 386 offset += size; 387 break; 388 case 1: 389 offset += file->f_pos; 390 } 391 retval = -EINVAL; 392 if (offset >= 0 && offset <= size) { 393 if (offset != file->f_pos) { 394 file->f_pos = offset; 395 } 396 retval = offset; 397 } 398 mutex_unlock(&bd_inode->i_mutex); 399 return retval; 400} 401 402/* 403 * Filp is never NULL; the only case when ->fsync() is called with 404 * NULL first argument is nfsd_sync_dir() and that's not a directory. 405 */ 406 407static int block_fsync(struct file *filp, struct dentry *dentry, int datasync) 408{ 409 struct block_device *bdev = I_BDEV(filp->f_mapping->host); 410 int error; 411 412 error = sync_blockdev(bdev); 413 if (error) 414 return error; 415 416 error = blkdev_issue_flush(bdev, NULL); 417 if (error == -EOPNOTSUPP) 418 error = 0; 419 return error; 420} 421 422/* 423 * pseudo-fs 424 */ 425 426static __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock); 427static struct kmem_cache * bdev_cachep __read_mostly; 428 429static struct inode *bdev_alloc_inode(struct super_block *sb) 430{ 431 struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL); 432 if (!ei) 433 return NULL; 434 return &ei->vfs_inode; 435} 436 437static void bdev_destroy_inode(struct inode *inode) 438{ 439 struct bdev_inode *bdi = BDEV_I(inode); 440 441 kmem_cache_free(bdev_cachep, bdi); 442} 443 444static void init_once(void *foo) 445{ 446 struct bdev_inode *ei = (struct bdev_inode *) foo; 447 struct block_device *bdev = &ei->bdev; 448 449 memset(bdev, 0, sizeof(*bdev)); 450 mutex_init(&bdev->bd_mutex); 451 INIT_LIST_HEAD(&bdev->bd_inodes); 452 INIT_LIST_HEAD(&bdev->bd_list); 453#ifdef CONFIG_SYSFS 454 INIT_LIST_HEAD(&bdev->bd_holder_list); 455#endif 456 inode_init_once(&ei->vfs_inode); 457 /* Initialize mutex for freeze. */ 458 mutex_init(&bdev->bd_fsfreeze_mutex); 459} 460 461static inline void __bd_forget(struct inode *inode) 462{ 463 list_del_init(&inode->i_devices); 464 inode->i_bdev = NULL; 465 inode->i_mapping = &inode->i_data; 466} 467 468static void bdev_clear_inode(struct inode *inode) 469{ 470 struct block_device *bdev = &BDEV_I(inode)->bdev; 471 struct list_head *p; 472 spin_lock(&bdev_lock); 473 while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) { 474 __bd_forget(list_entry(p, struct inode, i_devices)); 475 } 476 list_del_init(&bdev->bd_list); 477 spin_unlock(&bdev_lock); 478} 479 480static const struct super_operations bdev_sops = { 481 .statfs = simple_statfs, 482 .alloc_inode = bdev_alloc_inode, 483 .destroy_inode = bdev_destroy_inode, 484 .drop_inode = generic_delete_inode, 485 .clear_inode = bdev_clear_inode, 486}; 487 488static int bd_get_sb(struct file_system_type *fs_type, 489 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 490{ 491 return get_sb_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576, mnt); 492} 493 494static struct file_system_type bd_type = { 495 .name = "bdev", 496 .get_sb = bd_get_sb, 497 .kill_sb = kill_anon_super, 498}; 499 500struct super_block *blockdev_superblock __read_mostly; 501 502void __init bdev_cache_init(void) 503{ 504 int err; 505 struct vfsmount *bd_mnt; 506 507 bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode), 508 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| 509 SLAB_MEM_SPREAD|SLAB_PANIC), 510 init_once); 511 err = register_filesystem(&bd_type); 512 if (err) 513 panic("Cannot register bdev pseudo-fs"); 514 bd_mnt = kern_mount(&bd_type); 515 if (IS_ERR(bd_mnt)) 516 panic("Cannot create bdev pseudo-fs"); 517 /* 518 * This vfsmount structure is only used to obtain the 519 * blockdev_superblock, so tell kmemleak not to report it. 520 */ 521 kmemleak_not_leak(bd_mnt); 522 blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */ 523} 524 525/* 526 * Most likely _very_ bad one - but then it's hardly critical for small 527 * /dev and can be fixed when somebody will need really large one. 528 * Keep in mind that it will be fed through icache hash function too. 529 */ 530static inline unsigned long hash(dev_t dev) 531{ 532 return MAJOR(dev)+MINOR(dev); 533} 534 535static int bdev_test(struct inode *inode, void *data) 536{ 537 return BDEV_I(inode)->bdev.bd_dev == *(dev_t *)data; 538} 539 540static int bdev_set(struct inode *inode, void *data) 541{ 542 BDEV_I(inode)->bdev.bd_dev = *(dev_t *)data; 543 return 0; 544} 545 546static LIST_HEAD(all_bdevs); 547 548struct block_device *bdget(dev_t dev) 549{ 550 struct block_device *bdev; 551 struct inode *inode; 552 553 inode = iget5_locked(blockdev_superblock, hash(dev), 554 bdev_test, bdev_set, &dev); 555 556 if (!inode) 557 return NULL; 558 559 bdev = &BDEV_I(inode)->bdev; 560 561 if (inode->i_state & I_NEW) { 562 bdev->bd_contains = NULL; 563 bdev->bd_inode = inode; 564 bdev->bd_block_size = (1 << inode->i_blkbits); 565 bdev->bd_part_count = 0; 566 bdev->bd_invalidated = 0; 567 inode->i_mode = S_IFBLK; 568 inode->i_rdev = dev; 569 inode->i_bdev = bdev; 570 inode->i_data.a_ops = &def_blk_aops; 571 mapping_set_gfp_mask(&inode->i_data, GFP_USER); 572 inode->i_data.backing_dev_info = &default_backing_dev_info; 573 spin_lock(&bdev_lock); 574 list_add(&bdev->bd_list, &all_bdevs); 575 spin_unlock(&bdev_lock); 576 unlock_new_inode(inode); 577 } 578 return bdev; 579} 580 581EXPORT_SYMBOL(bdget); 582 583/** 584 * bdgrab -- Grab a reference to an already referenced block device 585 * @bdev: Block device to grab a reference to. 586 */ 587struct block_device *bdgrab(struct block_device *bdev) 588{ 589 atomic_inc(&bdev->bd_inode->i_count); 590 return bdev; 591} 592 593long nr_blockdev_pages(void) 594{ 595 struct block_device *bdev; 596 long ret = 0; 597 spin_lock(&bdev_lock); 598 list_for_each_entry(bdev, &all_bdevs, bd_list) { 599 ret += bdev->bd_inode->i_mapping->nrpages; 600 } 601 spin_unlock(&bdev_lock); 602 return ret; 603} 604 605void bdput(struct block_device *bdev) 606{ 607 iput(bdev->bd_inode); 608} 609 610EXPORT_SYMBOL(bdput); 611 612static struct block_device *bd_acquire(struct inode *inode) 613{ 614 struct block_device *bdev; 615 616 spin_lock(&bdev_lock); 617 bdev = inode->i_bdev; 618 if (bdev) { 619 atomic_inc(&bdev->bd_inode->i_count); 620 spin_unlock(&bdev_lock); 621 return bdev; 622 } 623 spin_unlock(&bdev_lock); 624 625 bdev = bdget(inode->i_rdev); 626 if (bdev) { 627 spin_lock(&bdev_lock); 628 if (!inode->i_bdev) { 629 /* 630 * We take an additional bd_inode->i_count for inode, 631 * and it's released in clear_inode() of inode. 632 * So, we can access it via ->i_mapping always 633 * without igrab(). 634 */ 635 atomic_inc(&bdev->bd_inode->i_count); 636 inode->i_bdev = bdev; 637 inode->i_mapping = bdev->bd_inode->i_mapping; 638 list_add(&inode->i_devices, &bdev->bd_inodes); 639 } 640 spin_unlock(&bdev_lock); 641 } 642 return bdev; 643} 644 645/* Call when you free inode */ 646 647void bd_forget(struct inode *inode) 648{ 649 struct block_device *bdev = NULL; 650 651 spin_lock(&bdev_lock); 652 if (inode->i_bdev) { 653 if (!sb_is_blkdev_sb(inode->i_sb)) 654 bdev = inode->i_bdev; 655 __bd_forget(inode); 656 } 657 spin_unlock(&bdev_lock); 658 659 if (bdev) 660 iput(bdev->bd_inode); 661} 662 663int bd_claim(struct block_device *bdev, void *holder) 664{ 665 int res; 666 spin_lock(&bdev_lock); 667 668 /* first decide result */ 669 if (bdev->bd_holder == holder) 670 res = 0; /* already a holder */ 671 else if (bdev->bd_holder != NULL) 672 res = -EBUSY; /* held by someone else */ 673 else if (bdev->bd_contains == bdev) 674 res = 0; /* is a whole device which isn't held */ 675 676 else if (bdev->bd_contains->bd_holder == bd_claim) 677 res = 0; /* is a partition of a device that is being partitioned */ 678 else if (bdev->bd_contains->bd_holder != NULL) 679 res = -EBUSY; /* is a partition of a held device */ 680 else 681 res = 0; /* is a partition of an un-held device */ 682 683 /* now impose change */ 684 if (res==0) { 685 /* note that for a whole device bd_holders 686 * will be incremented twice, and bd_holder will 687 * be set to bd_claim before being set to holder 688 */ 689 bdev->bd_contains->bd_holders ++; 690 bdev->bd_contains->bd_holder = bd_claim; 691 bdev->bd_holders++; 692 bdev->bd_holder = holder; 693 } 694 spin_unlock(&bdev_lock); 695 return res; 696} 697 698EXPORT_SYMBOL(bd_claim); 699 700void bd_release(struct block_device *bdev) 701{ 702 spin_lock(&bdev_lock); 703 if (!--bdev->bd_contains->bd_holders) 704 bdev->bd_contains->bd_holder = NULL; 705 if (!--bdev->bd_holders) 706 bdev->bd_holder = NULL; 707 spin_unlock(&bdev_lock); 708} 709 710EXPORT_SYMBOL(bd_release); 711 712#ifdef CONFIG_SYSFS 713/* 714 * Functions for bd_claim_by_kobject / bd_release_from_kobject 715 * 716 * If a kobject is passed to bd_claim_by_kobject() 717 * and the kobject has a parent directory, 718 * following symlinks are created: 719 * o from the kobject to the claimed bdev 720 * o from "holders" directory of the bdev to the parent of the kobject 721 * bd_release_from_kobject() removes these symlinks. 722 * 723 * Example: 724 * If /dev/dm-0 maps to /dev/sda, kobject corresponding to 725 * /sys/block/dm-0/slaves is passed to bd_claim_by_kobject(), then: 726 * /sys/block/dm-0/slaves/sda --> /sys/block/sda 727 * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0 728 */ 729 730static int add_symlink(struct kobject *from, struct kobject *to) 731{ 732 if (!from || !to) 733 return 0; 734 return sysfs_create_link(from, to, kobject_name(to)); 735} 736 737static void del_symlink(struct kobject *from, struct kobject *to) 738{ 739 if (!from || !to) 740 return; 741 sysfs_remove_link(from, kobject_name(to)); 742} 743 744/* 745 * 'struct bd_holder' contains pointers to kobjects symlinked by 746 * bd_claim_by_kobject. 747 * It's connected to bd_holder_list which is protected by bdev->bd_sem. 748 */ 749struct bd_holder { 750 struct list_head list; /* chain of holders of the bdev */ 751 int count; /* references from the holder */ 752 struct kobject *sdir; /* holder object, e.g. "/block/dm-0/slaves" */ 753 struct kobject *hdev; /* e.g. "/block/dm-0" */ 754 struct kobject *hdir; /* e.g. "/block/sda/holders" */ 755 struct kobject *sdev; /* e.g. "/block/sda" */ 756}; 757 758/* 759 * Get references of related kobjects at once. 760 * Returns 1 on success. 0 on failure. 761 * 762 * Should call bd_holder_release_dirs() after successful use. 763 */ 764static int bd_holder_grab_dirs(struct block_device *bdev, 765 struct bd_holder *bo) 766{ 767 if (!bdev || !bo) 768 return 0; 769 770 bo->sdir = kobject_get(bo->sdir); 771 if (!bo->sdir) 772 return 0; 773 774 bo->hdev = kobject_get(bo->sdir->parent); 775 if (!bo->hdev) 776 goto fail_put_sdir; 777 778 bo->sdev = kobject_get(&part_to_dev(bdev->bd_part)->kobj); 779 if (!bo->sdev) 780 goto fail_put_hdev; 781 782 bo->hdir = kobject_get(bdev->bd_part->holder_dir); 783 if (!bo->hdir) 784 goto fail_put_sdev; 785 786 return 1; 787 788fail_put_sdev: 789 kobject_put(bo->sdev); 790fail_put_hdev: 791 kobject_put(bo->hdev); 792fail_put_sdir: 793 kobject_put(bo->sdir); 794 795 return 0; 796} 797 798/* Put references of related kobjects at once. */ 799static void bd_holder_release_dirs(struct bd_holder *bo) 800{ 801 kobject_put(bo->hdir); 802 kobject_put(bo->sdev); 803 kobject_put(bo->hdev); 804 kobject_put(bo->sdir); 805} 806 807static struct bd_holder *alloc_bd_holder(struct kobject *kobj) 808{ 809 struct bd_holder *bo; 810 811 bo = kzalloc(sizeof(*bo), GFP_KERNEL); 812 if (!bo) 813 return NULL; 814 815 bo->count = 1; 816 bo->sdir = kobj; 817 818 return bo; 819} 820 821static void free_bd_holder(struct bd_holder *bo) 822{ 823 kfree(bo); 824} 825 826/** 827 * find_bd_holder - find matching struct bd_holder from the block device 828 * 829 * @bdev: struct block device to be searched 830 * @bo: target struct bd_holder 831 * 832 * Returns matching entry with @bo in @bdev->bd_holder_list. 833 * If found, increment the reference count and return the pointer. 834 * If not found, returns NULL. 835 */ 836static struct bd_holder *find_bd_holder(struct block_device *bdev, 837 struct bd_holder *bo) 838{ 839 struct bd_holder *tmp; 840 841 list_for_each_entry(tmp, &bdev->bd_holder_list, list) 842 if (tmp->sdir == bo->sdir) { 843 tmp->count++; 844 return tmp; 845 } 846 847 return NULL; 848} 849 850/** 851 * add_bd_holder - create sysfs symlinks for bd_claim() relationship 852 * 853 * @bdev: block device to be bd_claimed 854 * @bo: preallocated and initialized by alloc_bd_holder() 855 * 856 * Add @bo to @bdev->bd_holder_list, create symlinks. 857 * 858 * Returns 0 if symlinks are created. 859 * Returns -ve if something fails. 860 */ 861static int add_bd_holder(struct block_device *bdev, struct bd_holder *bo) 862{ 863 int err; 864 865 if (!bo) 866 return -EINVAL; 867 868 if (!bd_holder_grab_dirs(bdev, bo)) 869 return -EBUSY; 870 871 err = add_symlink(bo->sdir, bo->sdev); 872 if (err) 873 return err; 874 875 err = add_symlink(bo->hdir, bo->hdev); 876 if (err) { 877 del_symlink(bo->sdir, bo->sdev); 878 return err; 879 } 880 881 list_add_tail(&bo->list, &bdev->bd_holder_list); 882 return 0; 883} 884 885/** 886 * del_bd_holder - delete sysfs symlinks for bd_claim() relationship 887 * 888 * @bdev: block device to be bd_claimed 889 * @kobj: holder's kobject 890 * 891 * If there is matching entry with @kobj in @bdev->bd_holder_list 892 * and no other bd_claim() from the same kobject, 893 * remove the struct bd_holder from the list, delete symlinks for it. 894 * 895 * Returns a pointer to the struct bd_holder when it's removed from the list 896 * and ready to be freed. 897 * Returns NULL if matching claim isn't found or there is other bd_claim() 898 * by the same kobject. 899 */ 900static struct bd_holder *del_bd_holder(struct block_device *bdev, 901 struct kobject *kobj) 902{ 903 struct bd_holder *bo; 904 905 list_for_each_entry(bo, &bdev->bd_holder_list, list) { 906 if (bo->sdir == kobj) { 907 bo->count--; 908 BUG_ON(bo->count < 0); 909 if (!bo->count) { 910 list_del(&bo->list); 911 del_symlink(bo->sdir, bo->sdev); 912 del_symlink(bo->hdir, bo->hdev); 913 bd_holder_release_dirs(bo); 914 return bo; 915 } 916 break; 917 } 918 } 919 920 return NULL; 921} 922 923/** 924 * bd_claim_by_kobject - bd_claim() with additional kobject signature 925 * 926 * @bdev: block device to be claimed 927 * @holder: holder's signature 928 * @kobj: holder's kobject 929 * 930 * Do bd_claim() and if it succeeds, create sysfs symlinks between 931 * the bdev and the holder's kobject. 932 * Use bd_release_from_kobject() when relesing the claimed bdev. 933 * 934 * Returns 0 on success. (same as bd_claim()) 935 * Returns errno on failure. 936 */ 937static int bd_claim_by_kobject(struct block_device *bdev, void *holder, 938 struct kobject *kobj) 939{ 940 int err; 941 struct bd_holder *bo, *found; 942 943 if (!kobj) 944 return -EINVAL; 945 946 bo = alloc_bd_holder(kobj); 947 if (!bo) 948 return -ENOMEM; 949 950 mutex_lock(&bdev->bd_mutex); 951 952 err = bd_claim(bdev, holder); 953 if (err) 954 goto fail; 955 956 found = find_bd_holder(bdev, bo); 957 if (found) 958 goto fail; 959 960 err = add_bd_holder(bdev, bo); 961 if (err) 962 bd_release(bdev); 963 else 964 bo = NULL; 965fail: 966 mutex_unlock(&bdev->bd_mutex); 967 free_bd_holder(bo); 968 return err; 969} 970 971/** 972 * bd_release_from_kobject - bd_release() with additional kobject signature 973 * 974 * @bdev: block device to be released 975 * @kobj: holder's kobject 976 * 977 * Do bd_release() and remove sysfs symlinks created by bd_claim_by_kobject(). 978 */ 979static void bd_release_from_kobject(struct block_device *bdev, 980 struct kobject *kobj) 981{ 982 if (!kobj) 983 return; 984 985 mutex_lock(&bdev->bd_mutex); 986 bd_release(bdev); 987 free_bd_holder(del_bd_holder(bdev, kobj)); 988 mutex_unlock(&bdev->bd_mutex); 989} 990 991/** 992 * bd_claim_by_disk - wrapper function for bd_claim_by_kobject() 993 * 994 * @bdev: block device to be claimed 995 * @holder: holder's signature 996 * @disk: holder's gendisk 997 * 998 * Call bd_claim_by_kobject() with getting @disk->slave_dir. 999 */ 1000int bd_claim_by_disk(struct block_device *bdev, void *holder, 1001 struct gendisk *disk) 1002{ 1003 return bd_claim_by_kobject(bdev, holder, kobject_get(disk->slave_dir)); 1004} 1005EXPORT_SYMBOL_GPL(bd_claim_by_disk); 1006 1007/** 1008 * bd_release_from_disk - wrapper function for bd_release_from_kobject() 1009 * 1010 * @bdev: block device to be claimed 1011 * @disk: holder's gendisk 1012 * 1013 * Call bd_release_from_kobject() and put @disk->slave_dir. 1014 */ 1015void bd_release_from_disk(struct block_device *bdev, struct gendisk *disk) 1016{ 1017 bd_release_from_kobject(bdev, disk->slave_dir); 1018 kobject_put(disk->slave_dir); 1019} 1020EXPORT_SYMBOL_GPL(bd_release_from_disk); 1021#endif 1022 1023/* 1024 * Tries to open block device by device number. Use it ONLY if you 1025 * really do not have anything better - i.e. when you are behind a 1026 * truly sucky interface and all you are given is a device number. _Never_ 1027 * to be used for internal purposes. If you ever need it - reconsider 1028 * your API. 1029 */ 1030struct block_device *open_by_devnum(dev_t dev, fmode_t mode) 1031{ 1032 struct block_device *bdev = bdget(dev); 1033 int err = -ENOMEM; 1034 if (bdev) 1035 err = blkdev_get(bdev, mode); 1036 return err ? ERR_PTR(err) : bdev; 1037} 1038 1039EXPORT_SYMBOL(open_by_devnum); 1040 1041/** 1042 * flush_disk - invalidates all buffer-cache entries on a disk 1043 * 1044 * @bdev: struct block device to be flushed 1045 * 1046 * Invalidates all buffer-cache entries on a disk. It should be called 1047 * when a disk has been changed -- either by a media change or online 1048 * resize. 1049 */ 1050static void flush_disk(struct block_device *bdev) 1051{ 1052 if (__invalidate_device(bdev)) { 1053 char name[BDEVNAME_SIZE] = ""; 1054 1055 if (bdev->bd_disk) 1056 disk_name(bdev->bd_disk, 0, name); 1057 printk(KERN_WARNING "VFS: busy inodes on changed media or " 1058 "resized disk %s\n", name); 1059 } 1060 1061 if (!bdev->bd_disk) 1062 return; 1063 if (disk_partitionable(bdev->bd_disk)) 1064 bdev->bd_invalidated = 1; 1065} 1066 1067/** 1068 * check_disk_size_change - checks for disk size change and adjusts bdev size. 1069 * @disk: struct gendisk to check 1070 * @bdev: struct bdev to adjust. 1071 * 1072 * This routine checks to see if the bdev size does not match the disk size 1073 * and adjusts it if it differs. 1074 */ 1075void check_disk_size_change(struct gendisk *disk, struct block_device *bdev) 1076{ 1077 loff_t disk_size, bdev_size; 1078 1079 disk_size = (loff_t)get_capacity(disk) << 9; 1080 bdev_size = i_size_read(bdev->bd_inode); 1081 if (disk_size != bdev_size) { 1082 char name[BDEVNAME_SIZE]; 1083 1084 disk_name(disk, 0, name); 1085 printk(KERN_INFO 1086 "%s: detected capacity change from %lld to %lld\n", 1087 name, bdev_size, disk_size); 1088 i_size_write(bdev->bd_inode, disk_size); 1089 flush_disk(bdev); 1090 } 1091} 1092EXPORT_SYMBOL(check_disk_size_change); 1093 1094/** 1095 * revalidate_disk - wrapper for lower-level driver's revalidate_disk call-back 1096 * @disk: struct gendisk to be revalidated 1097 * 1098 * This routine is a wrapper for lower-level driver's revalidate_disk 1099 * call-backs. It is used to do common pre and post operations needed 1100 * for all revalidate_disk operations. 1101 */ 1102int revalidate_disk(struct gendisk *disk) 1103{ 1104 struct block_device *bdev; 1105 int ret = 0; 1106 1107 if (disk->fops->revalidate_disk) 1108 ret = disk->fops->revalidate_disk(disk); 1109 1110 bdev = bdget_disk(disk, 0); 1111 if (!bdev) 1112 return ret; 1113 1114 mutex_lock(&bdev->bd_mutex); 1115 check_disk_size_change(disk, bdev); 1116 mutex_unlock(&bdev->bd_mutex); 1117 bdput(bdev); 1118 return ret; 1119} 1120EXPORT_SYMBOL(revalidate_disk); 1121 1122/* 1123 * This routine checks whether a removable media has been changed, 1124 * and invalidates all buffer-cache-entries in that case. This 1125 * is a relatively slow routine, so we have to try to minimize using 1126 * it. Thus it is called only upon a 'mount' or 'open'. This 1127 * is the best way of combining speed and utility, I think. 1128 * People changing diskettes in the middle of an operation deserve 1129 * to lose :-) 1130 */ 1131int check_disk_change(struct block_device *bdev) 1132{ 1133 struct gendisk *disk = bdev->bd_disk; 1134 const struct block_device_operations *bdops = disk->fops; 1135 1136 if (!bdops->media_changed) 1137 return 0; 1138 if (!bdops->media_changed(bdev->bd_disk)) 1139 return 0; 1140 1141 flush_disk(bdev); 1142 if (bdops->revalidate_disk) 1143 bdops->revalidate_disk(bdev->bd_disk); 1144 return 1; 1145} 1146 1147EXPORT_SYMBOL(check_disk_change); 1148 1149void bd_set_size(struct block_device *bdev, loff_t size) 1150{ 1151 unsigned bsize = bdev_logical_block_size(bdev); 1152 1153 bdev->bd_inode->i_size = size; 1154 while (bsize < PAGE_CACHE_SIZE) { 1155 if (size & bsize) 1156 break; 1157 bsize <<= 1; 1158 } 1159 bdev->bd_block_size = bsize; 1160 bdev->bd_inode->i_blkbits = blksize_bits(bsize); 1161} 1162EXPORT_SYMBOL(bd_set_size); 1163 1164static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); 1165 1166/* 1167 * bd_mutex locking: 1168 * 1169 * mutex_lock(part->bd_mutex) 1170 * mutex_lock_nested(whole->bd_mutex, 1) 1171 */ 1172 1173static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) 1174{ 1175 struct gendisk *disk; 1176 int ret; 1177 int partno; 1178 int perm = 0; 1179 1180 if (mode & FMODE_READ) 1181 perm |= MAY_READ; 1182 if (mode & FMODE_WRITE) 1183 perm |= MAY_WRITE; 1184 /* 1185 * hooks: /n/, see "layering violations". 1186 */ 1187 ret = devcgroup_inode_permission(bdev->bd_inode, perm); 1188 if (ret != 0) { 1189 bdput(bdev); 1190 return ret; 1191 } 1192 1193 lock_kernel(); 1194 restart: 1195 1196 ret = -ENXIO; 1197 disk = get_gendisk(bdev->bd_dev, &partno); 1198 if (!disk) 1199 goto out_unlock_kernel; 1200 1201 mutex_lock_nested(&bdev->bd_mutex, for_part); 1202 if (!bdev->bd_openers) { 1203 bdev->bd_disk = disk; 1204 bdev->bd_contains = bdev; 1205 if (!partno) { 1206 struct backing_dev_info *bdi; 1207 1208 ret = -ENXIO; 1209 bdev->bd_part = disk_get_part(disk, partno); 1210 if (!bdev->bd_part) 1211 goto out_clear; 1212 1213 if (disk->fops->open) { 1214 ret = disk->fops->open(bdev, mode); 1215 if (ret == -ERESTARTSYS) { 1216 /* Lost a race with 'disk' being 1217 * deleted, try again. 1218 * See md.c 1219 */ 1220 disk_put_part(bdev->bd_part); 1221 bdev->bd_part = NULL; 1222 module_put(disk->fops->owner); 1223 put_disk(disk); 1224 bdev->bd_disk = NULL; 1225 mutex_unlock(&bdev->bd_mutex); 1226 goto restart; 1227 } 1228 if (ret) 1229 goto out_clear; 1230 } 1231 if (!bdev->bd_openers) { 1232 bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); 1233 bdi = blk_get_backing_dev_info(bdev); 1234 if (bdi == NULL) 1235 bdi = &default_backing_dev_info; 1236 bdev->bd_inode->i_data.backing_dev_info = bdi; 1237 } 1238 if (bdev->bd_invalidated) 1239 rescan_partitions(disk, bdev); 1240 } else { 1241 struct block_device *whole; 1242 whole = bdget_disk(disk, 0); 1243 ret = -ENOMEM; 1244 if (!whole) 1245 goto out_clear; 1246 BUG_ON(for_part); 1247 ret = __blkdev_get(whole, mode, 1); 1248 if (ret) 1249 goto out_clear; 1250 bdev->bd_contains = whole; 1251 bdev->bd_inode->i_data.backing_dev_info = 1252 whole->bd_inode->i_data.backing_dev_info; 1253 bdev->bd_part = disk_get_part(disk, partno); 1254 if (!(disk->flags & GENHD_FL_UP) || 1255 !bdev->bd_part || !bdev->bd_part->nr_sects) { 1256 ret = -ENXIO; 1257 goto out_clear; 1258 } 1259 bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9); 1260 } 1261 } else { 1262 module_put(disk->fops->owner); 1263 put_disk(disk); 1264 disk = NULL; 1265 if (bdev->bd_contains == bdev) { 1266 if (bdev->bd_disk->fops->open) { 1267 ret = bdev->bd_disk->fops->open(bdev, mode); 1268 if (ret) 1269 goto out_unlock_bdev; 1270 } 1271 if (bdev->bd_invalidated) 1272 rescan_partitions(bdev->bd_disk, bdev); 1273 } 1274 } 1275 bdev->bd_openers++; 1276 if (for_part) 1277 bdev->bd_part_count++; 1278 mutex_unlock(&bdev->bd_mutex); 1279 unlock_kernel(); 1280 return 0; 1281 1282 out_clear: 1283 disk_put_part(bdev->bd_part); 1284 bdev->bd_disk = NULL; 1285 bdev->bd_part = NULL; 1286 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; 1287 if (bdev != bdev->bd_contains) 1288 __blkdev_put(bdev->bd_contains, mode, 1); 1289 bdev->bd_contains = NULL; 1290 out_unlock_bdev: 1291 mutex_unlock(&bdev->bd_mutex); 1292 out_unlock_kernel: 1293 unlock_kernel(); 1294 1295 if (disk) 1296 module_put(disk->fops->owner); 1297 put_disk(disk); 1298 bdput(bdev); 1299 1300 return ret; 1301} 1302 1303int blkdev_get(struct block_device *bdev, fmode_t mode) 1304{ 1305 return __blkdev_get(bdev, mode, 0); 1306} 1307EXPORT_SYMBOL(blkdev_get); 1308 1309static int blkdev_open(struct inode * inode, struct file * filp) 1310{ 1311 struct block_device *bdev; 1312 int res; 1313 1314 /* 1315 * Preserve backwards compatibility and allow large file access 1316 * even if userspace doesn't ask for it explicitly. Some mkfs 1317 * binary needs it. We might want to drop this workaround 1318 * during an unstable branch. 1319 */ 1320 filp->f_flags |= O_LARGEFILE; 1321 1322 if (filp->f_flags & O_NDELAY) 1323 filp->f_mode |= FMODE_NDELAY; 1324 if (filp->f_flags & O_EXCL) 1325 filp->f_mode |= FMODE_EXCL; 1326 if ((filp->f_flags & O_ACCMODE) == 3) 1327 filp->f_mode |= FMODE_WRITE_IOCTL; 1328 1329 bdev = bd_acquire(inode); 1330 if (bdev == NULL) 1331 return -ENOMEM; 1332 1333 filp->f_mapping = bdev->bd_inode->i_mapping; 1334 1335 res = blkdev_get(bdev, filp->f_mode); 1336 if (res) 1337 return res; 1338 1339 if (filp->f_mode & FMODE_EXCL) { 1340 res = bd_claim(bdev, filp); 1341 if (res) 1342 goto out_blkdev_put; 1343 } 1344 1345 return 0; 1346 1347 out_blkdev_put: 1348 blkdev_put(bdev, filp->f_mode); 1349 return res; 1350} 1351 1352static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) 1353{ 1354 int ret = 0; 1355 struct gendisk *disk = bdev->bd_disk; 1356 struct block_device *victim = NULL; 1357 1358 mutex_lock_nested(&bdev->bd_mutex, for_part); 1359 lock_kernel(); 1360 if (for_part) 1361 bdev->bd_part_count--; 1362 1363 if (!--bdev->bd_openers) { 1364 sync_blockdev(bdev); 1365 kill_bdev(bdev); 1366 } 1367 if (bdev->bd_contains == bdev) { 1368 if (disk->fops->release) 1369 ret = disk->fops->release(disk, mode); 1370 } 1371 if (!bdev->bd_openers) { 1372 struct module *owner = disk->fops->owner; 1373 1374 put_disk(disk); 1375 module_put(owner); 1376 disk_put_part(bdev->bd_part); 1377 bdev->bd_part = NULL; 1378 bdev->bd_disk = NULL; 1379 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; 1380 if (bdev != bdev->bd_contains) 1381 victim = bdev->bd_contains; 1382 bdev->bd_contains = NULL; 1383 } 1384 unlock_kernel(); 1385 mutex_unlock(&bdev->bd_mutex); 1386 bdput(bdev); 1387 if (victim) 1388 __blkdev_put(victim, mode, 1); 1389 return ret; 1390} 1391 1392int blkdev_put(struct block_device *bdev, fmode_t mode) 1393{ 1394 return __blkdev_put(bdev, mode, 0); 1395} 1396EXPORT_SYMBOL(blkdev_put); 1397 1398static int blkdev_close(struct inode * inode, struct file * filp) 1399{ 1400 struct block_device *bdev = I_BDEV(filp->f_mapping->host); 1401 if (bdev->bd_holder == filp) 1402 bd_release(bdev); 1403 return blkdev_put(bdev, filp->f_mode); 1404} 1405 1406static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) 1407{ 1408 struct block_device *bdev = I_BDEV(file->f_mapping->host); 1409 fmode_t mode = file->f_mode; 1410 1411 /* 1412 * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have 1413 * to updated it before every ioctl. 1414 */ 1415 if (file->f_flags & O_NDELAY) 1416 mode |= FMODE_NDELAY; 1417 else 1418 mode &= ~FMODE_NDELAY; 1419 1420 return blkdev_ioctl(bdev, mode, cmd, arg); 1421} 1422 1423/* 1424 * Write data to the block device. Only intended for the block device itself 1425 * and the raw driver which basically is a fake block device. 1426 * 1427 * Does not take i_mutex for the write and thus is not for general purpose 1428 * use. 1429 */ 1430ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, 1431 unsigned long nr_segs, loff_t pos) 1432{ 1433 struct file *file = iocb->ki_filp; 1434 ssize_t ret; 1435 1436 BUG_ON(iocb->ki_pos != pos); 1437 1438 ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); 1439 if (ret > 0 || ret == -EIOCBQUEUED) { 1440 ssize_t err; 1441 1442 err = generic_write_sync(file, pos, ret); 1443 if (err < 0 && ret > 0) 1444 ret = err; 1445 } 1446 return ret; 1447} 1448EXPORT_SYMBOL_GPL(blkdev_aio_write); 1449 1450/* 1451 * Try to release a page associated with block device when the system 1452 * is under memory pressure. 1453 */ 1454static int blkdev_releasepage(struct page *page, gfp_t wait) 1455{ 1456 struct super_block *super = BDEV_I(page->mapping->host)->bdev.bd_super; 1457 1458 if (super && super->s_op->bdev_try_to_free_page) 1459 return super->s_op->bdev_try_to_free_page(super, page, wait); 1460 1461 return try_to_free_buffers(page); 1462} 1463 1464static const struct address_space_operations def_blk_aops = { 1465 .readpage = blkdev_readpage, 1466 .writepage = blkdev_writepage, 1467 .sync_page = block_sync_page, 1468 .write_begin = blkdev_write_begin, 1469 .write_end = blkdev_write_end, 1470 .writepages = generic_writepages, 1471 .releasepage = blkdev_releasepage, 1472 .direct_IO = blkdev_direct_IO, 1473}; 1474 1475const struct file_operations def_blk_fops = { 1476 .open = blkdev_open, 1477 .release = blkdev_close, 1478 .llseek = block_llseek, 1479 .read = do_sync_read, 1480 .write = do_sync_write, 1481 .aio_read = generic_file_aio_read, 1482 .aio_write = blkdev_aio_write, 1483 .mmap = generic_file_mmap, 1484 .fsync = block_fsync, 1485 .unlocked_ioctl = block_ioctl, 1486#ifdef CONFIG_COMPAT 1487 .compat_ioctl = compat_blkdev_ioctl, 1488#endif 1489 .splice_read = generic_file_splice_read, 1490 .splice_write = generic_file_splice_write, 1491}; 1492 1493int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg) 1494{ 1495 int res; 1496 mm_segment_t old_fs = get_fs(); 1497 set_fs(KERNEL_DS); 1498 res = blkdev_ioctl(bdev, 0, cmd, arg); 1499 set_fs(old_fs); 1500 return res; 1501} 1502 1503EXPORT_SYMBOL(ioctl_by_bdev); 1504 1505/** 1506 * lookup_bdev - lookup a struct block_device by name 1507 * @pathname: special file representing the block device 1508 * 1509 * Get a reference to the blockdevice at @pathname in the current 1510 * namespace if possible and return it. Return ERR_PTR(error) 1511 * otherwise. 1512 */ 1513struct block_device *lookup_bdev(const char *pathname) 1514{ 1515 struct block_device *bdev; 1516 struct inode *inode; 1517 struct path path; 1518 int error; 1519 1520 if (!pathname || !*pathname) 1521 return ERR_PTR(-EINVAL); 1522 1523 error = kern_path(pathname, LOOKUP_FOLLOW, &path); 1524 if (error) 1525 return ERR_PTR(error); 1526 1527 inode = path.dentry->d_inode; 1528 error = -ENOTBLK; 1529 if (!S_ISBLK(inode->i_mode)) 1530 goto fail; 1531 error = -EACCES; 1532 if (path.mnt->mnt_flags & MNT_NODEV) 1533 goto fail; 1534 error = -ENOMEM; 1535 bdev = bd_acquire(inode); 1536 if (!bdev) 1537 goto fail; 1538out: 1539 path_put(&path); 1540 return bdev; 1541fail: 1542 bdev = ERR_PTR(error); 1543 goto out; 1544} 1545EXPORT_SYMBOL(lookup_bdev); 1546 1547/** 1548 * open_bdev_exclusive - open a block device by name and set it up for use 1549 * 1550 * @path: special file representing the block device 1551 * @mode: FMODE_... combination to pass be used 1552 * @holder: owner for exclusion 1553 * 1554 * Open the blockdevice described by the special file at @path, claim it 1555 * for the @holder. 1556 */ 1557struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder) 1558{ 1559 struct block_device *bdev; 1560 int error = 0; 1561 1562 bdev = lookup_bdev(path); 1563 if (IS_ERR(bdev)) 1564 return bdev; 1565 1566 error = blkdev_get(bdev, mode); 1567 if (error) 1568 return ERR_PTR(error); 1569 error = -EACCES; 1570 if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) 1571 goto blkdev_put; 1572 error = bd_claim(bdev, holder); 1573 if (error) 1574 goto blkdev_put; 1575 1576 return bdev; 1577 1578blkdev_put: 1579 blkdev_put(bdev, mode); 1580 return ERR_PTR(error); 1581} 1582 1583EXPORT_SYMBOL(open_bdev_exclusive); 1584 1585/** 1586 * close_bdev_exclusive - close a blockdevice opened by open_bdev_exclusive() 1587 * 1588 * @bdev: blockdevice to close 1589 * @mode: mode, must match that used to open. 1590 * 1591 * This is the counterpart to open_bdev_exclusive(). 1592 */ 1593void close_bdev_exclusive(struct block_device *bdev, fmode_t mode) 1594{ 1595 bd_release(bdev); 1596 blkdev_put(bdev, mode); 1597} 1598 1599EXPORT_SYMBOL(close_bdev_exclusive); 1600 1601int __invalidate_device(struct block_device *bdev) 1602{ 1603 struct super_block *sb = get_super(bdev); 1604 int res = 0; 1605 1606 if (sb) { 1607 /* 1608 * no need to lock the super, get_super holds the 1609 * read mutex so the filesystem cannot go away 1610 * under us (->put_super runs with the write lock 1611 * hold). 1612 */ 1613 shrink_dcache_sb(sb); 1614 res = invalidate_inodes(sb); 1615 drop_super(sb); 1616 } 1617 invalidate_bdev(bdev); 1618 return res; 1619} 1620EXPORT_SYMBOL(__invalidate_device);