Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'pull-set_blocksize' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull vfs blocksize updates from Al Viro:
"This gets rid of bogus set_blocksize() uses, switches it over
to be based on a 'struct file *' and verifies that the caller
has the device opened exclusively"

* tag 'pull-set_blocksize' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
make set_blocksize() fail unless block device is opened exclusive
set_blocksize(): switch to passing struct file *
btrfs_get_bdev_and_sb(): call set_blocksize() only for exclusive opens
swsusp: don't bother with setting block size
zram: don't bother with reopening - just use O_EXCL for open
swapon(2): open swap with O_EXCL
swapon(2)/swapoff(2): don't bother with block size
pktcdvd: sort set_blocksize() calls out
bcache_register(): don't bother with set_blocksize()

+55 -93
+7
Documentation/filesystems/porting.rst
··· 1134 1134 device freezing now works for any block device owned by a given superblock, not 1135 1135 just the main block device. The get_active_super() helper and bd_fsfreeze_sb 1136 1136 pointer are gone. 1137 + 1138 + --- 1139 + 1140 + **mandatory** 1141 + 1142 + set_blocksize() takes opened struct file instead of struct block_device now 1143 + and it *must* be opened exclusive.
+10 -4
block/bdev.c
··· 144 144 bdev->bd_inode->i_blkbits = blksize_bits(bsize); 145 145 } 146 146 147 - int set_blocksize(struct block_device *bdev, int size) 147 + int set_blocksize(struct file *file, int size) 148 148 { 149 + struct inode *inode = file->f_mapping->host; 150 + struct block_device *bdev = I_BDEV(inode); 151 + 149 152 /* Size must be a power of two, and between 512 and PAGE_SIZE */ 150 153 if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size)) 151 154 return -EINVAL; ··· 157 154 if (size < bdev_logical_block_size(bdev)) 158 155 return -EINVAL; 159 156 157 + if (!file->private_data) 158 + return -EINVAL; 159 + 160 160 /* Don't change the size if it is same as current */ 161 - if (bdev->bd_inode->i_blkbits != blksize_bits(size)) { 161 + if (inode->i_blkbits != blksize_bits(size)) { 162 162 sync_blockdev(bdev); 163 - bdev->bd_inode->i_blkbits = blksize_bits(size); 163 + inode->i_blkbits = blksize_bits(size); 164 164 kill_bdev(bdev); 165 165 } 166 166 return 0; ··· 173 167 174 168 int sb_set_blocksize(struct super_block *sb, int size) 175 169 { 176 - if (set_blocksize(sb->s_bdev, size)) 170 + if (set_blocksize(sb->s_bdev_file, size)) 177 171 return 0; 178 172 /* If we get here, we know size is power of two 179 173 * and it's value is between 512 and PAGE_SIZE */
+12 -9
block/ioctl.c
··· 503 503 #endif 504 504 505 505 /* set the logical block size */ 506 - static int blkdev_bszset(struct block_device *bdev, blk_mode_t mode, 506 + static int blkdev_bszset(struct file *file, blk_mode_t mode, 507 507 int __user *argp) 508 508 { 509 + // this one might be file_inode(file)->i_rdev - a rare valid 510 + // use of file_inode() for those. 511 + dev_t dev = I_BDEV(file->f_mapping->host)->bd_dev; 512 + struct file *excl_file; 509 513 int ret, n; 510 - struct file *file; 511 514 512 515 if (!capable(CAP_SYS_ADMIN)) 513 516 return -EACCES; ··· 520 517 return -EFAULT; 521 518 522 519 if (mode & BLK_OPEN_EXCL) 523 - return set_blocksize(bdev, n); 520 + return set_blocksize(file, n); 524 521 525 - file = bdev_file_open_by_dev(bdev->bd_dev, mode, &bdev, NULL); 526 - if (IS_ERR(file)) 522 + excl_file = bdev_file_open_by_dev(dev, mode, &dev, NULL); 523 + if (IS_ERR(excl_file)) 527 524 return -EBUSY; 528 - ret = set_blocksize(bdev, n); 529 - fput(file); 525 + ret = set_blocksize(excl_file, n); 526 + fput(excl_file); 530 527 return ret; 531 528 } 532 529 ··· 655 652 case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */ 656 653 return put_int(argp, block_size(bdev)); 657 654 case BLKBSZSET: 658 - return blkdev_bszset(bdev, mode, argp); 655 + return blkdev_bszset(file, mode, argp); 659 656 case BLKGETSIZE64: 660 657 return put_u64(argp, bdev_nr_bytes(bdev)); 661 658 ··· 715 712 case BLKBSZGET_32: /* get the logical block size (cf. BLKSSZGET) */ 716 713 return put_int(argp, bdev_logical_block_size(bdev)); 717 714 case BLKBSZSET_32: 718 - return blkdev_bszset(bdev, mode, argp); 715 + return blkdev_bszset(file, mode, argp); 719 716 case BLKGETSIZE64_32: 720 717 return put_u64(argp, bdev_nr_bytes(bdev)); 721 718
+1 -6
drivers/block/pktcdvd.c
··· 2215 2215 } 2216 2216 dev_info(ddev, "%lukB available on disc\n", lba << 1); 2217 2217 } 2218 + set_blocksize(bdev_file, CD_FRAMESIZE); 2218 2219 2219 2220 return 0; 2220 2221 ··· 2279 2278 ret = pkt_open_dev(pd, mode & BLK_OPEN_WRITE); 2280 2279 if (ret) 2281 2280 goto out_dec; 2282 - /* 2283 - * needed here as well, since ext2 (among others) may change 2284 - * the blocksize at mount time 2285 - */ 2286 - set_blocksize(disk->part0, CD_FRAMESIZE); 2287 2281 } 2288 2282 mutex_unlock(&ctl_mutex); 2289 2283 mutex_unlock(&pktcdvd_mutex); ··· 2522 2526 __module_get(THIS_MODULE); 2523 2527 2524 2528 pd->bdev_file = bdev_file; 2525 - set_blocksize(file_bdev(bdev_file), CD_FRAMESIZE); 2526 2529 2527 2530 atomic_set(&pd->cdrw.pending_bios, 0); 2528 2531 pd->cdrw.thread = kthread_run(kcdrwd, pd, "%s", pd->disk->disk_name);
+7 -22
drivers/block/zram/zram_drv.c
··· 426 426 if (!zram->backing_dev) 427 427 return; 428 428 429 - fput(zram->bdev_file); 430 429 /* hope filp_close flush all of IO */ 431 430 filp_close(zram->backing_dev, NULL); 432 431 zram->backing_dev = NULL; 433 - zram->bdev_file = NULL; 432 + zram->bdev = NULL; 434 433 zram->disk->fops = &zram_devops; 435 434 kvfree(zram->bitmap); 436 435 zram->bitmap = NULL; ··· 472 473 size_t sz; 473 474 struct file *backing_dev = NULL; 474 475 struct inode *inode; 475 - struct address_space *mapping; 476 476 unsigned int bitmap_sz; 477 477 unsigned long nr_pages, *bitmap = NULL; 478 - struct file *bdev_file = NULL; 479 478 int err; 480 479 struct zram *zram = dev_to_zram(dev); 481 480 ··· 494 497 if (sz > 0 && file_name[sz - 1] == '\n') 495 498 file_name[sz - 1] = 0x00; 496 499 497 - backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0); 500 + backing_dev = filp_open(file_name, O_RDWR | O_LARGEFILE | O_EXCL, 0); 498 501 if (IS_ERR(backing_dev)) { 499 502 err = PTR_ERR(backing_dev); 500 503 backing_dev = NULL; 501 504 goto out; 502 505 } 503 506 504 - mapping = backing_dev->f_mapping; 505 - inode = mapping->host; 507 + inode = backing_dev->f_mapping->host; 506 508 507 509 /* Support only block device in this moment */ 508 510 if (!S_ISBLK(inode->i_mode)) { 509 511 err = -ENOTBLK; 510 - goto out; 511 - } 512 - 513 - bdev_file = bdev_file_open_by_dev(inode->i_rdev, 514 - BLK_OPEN_READ | BLK_OPEN_WRITE, zram, NULL); 515 - if (IS_ERR(bdev_file)) { 516 - err = PTR_ERR(bdev_file); 517 - bdev_file = NULL; 518 512 goto out; 519 513 } 520 514 ··· 519 531 520 532 reset_bdev(zram); 521 533 522 - zram->bdev_file = bdev_file; 534 + zram->bdev = I_BDEV(inode); 523 535 zram->backing_dev = backing_dev; 524 536 zram->bitmap = bitmap; 525 537 zram->nr_pages = nr_pages; ··· 531 543 return len; 532 544 out: 533 545 kvfree(bitmap); 534 - 535 - if (bdev_file) 536 - fput(bdev_file); 537 546 538 547 if (backing_dev) 539 548 filp_close(backing_dev, NULL); ··· 572 587 { 573 588 struct bio *bio; 574 589 575 - bio = bio_alloc(file_bdev(zram->bdev_file), 1, parent->bi_opf, GFP_NOIO); 590 + bio = bio_alloc(zram->bdev, 1, parent->bi_opf, GFP_NOIO); 576 591 bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); 577 592 __bio_add_page(bio, page, PAGE_SIZE, 0); 578 593 bio_chain(bio, parent); ··· 688 703 continue; 689 704 } 690 705 691 - bio_init(&bio, file_bdev(zram->bdev_file), &bio_vec, 1, 706 + bio_init(&bio, zram->bdev, &bio_vec, 1, 692 707 REQ_OP_WRITE | REQ_SYNC); 693 708 bio.bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9); 694 709 __bio_add_page(&bio, page, PAGE_SIZE, 0); ··· 770 785 struct bio_vec bv; 771 786 struct bio bio; 772 787 773 - bio_init(&bio, file_bdev(zw->zram->bdev_file), &bv, 1, REQ_OP_READ); 788 + bio_init(&bio, zw->zram->bdev, &bv, 1, REQ_OP_READ); 774 789 bio.bi_iter.bi_sector = zw->entry * (PAGE_SIZE >> 9); 775 790 __bio_add_page(&bio, zw->page, PAGE_SIZE, 0); 776 791 zw->error = submit_bio_wait(&bio);
+1 -1
drivers/block/zram/zram_drv.h
··· 132 132 spinlock_t wb_limit_lock; 133 133 bool wb_limit_enable; 134 134 u64 bd_wb_limit; 135 - struct file *bdev_file; 135 + struct block_device *bdev; 136 136 unsigned long *bitmap; 137 137 unsigned long nr_pages; 138 138 #endif
-4
drivers/md/bcache/super.c
··· 2555 2555 if (IS_ERR(bdev_file)) 2556 2556 goto out_free_sb; 2557 2557 2558 - err = "failed to set blocksize"; 2559 - if (set_blocksize(file_bdev(bdev_file), 4096)) 2560 - goto out_blkdev_put; 2561 - 2562 2558 err = read_super(sb, file_bdev(bdev_file), &sb_disk); 2563 2559 if (err) 2564 2560 goto out_blkdev_put;
+1 -1
fs/btrfs/dev-replace.c
··· 316 316 set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); 317 317 set_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state); 318 318 device->dev_stats_valid = 1; 319 - set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE); 319 + set_blocksize(bdev_file, BTRFS_BDEV_BLOCKSIZE); 320 320 device->fs_devices = fs_devices; 321 321 322 322 ret = btrfs_get_dev_zone_info(device, false);
+8 -5
fs/btrfs/volumes.c
··· 482 482 483 483 if (flush) 484 484 sync_blockdev(bdev); 485 - ret = set_blocksize(bdev, BTRFS_BDEV_BLOCKSIZE); 486 - if (ret) { 487 - fput(*bdev_file); 488 - goto error; 485 + if (holder) { 486 + ret = set_blocksize(*bdev_file, BTRFS_BDEV_BLOCKSIZE); 487 + if (ret) { 488 + fput(*bdev_file); 489 + goto error; 490 + } 489 491 } 490 492 invalidate_bdev(bdev); 491 493 *disk_super = btrfs_read_dev_super(bdev); ··· 500 498 return 0; 501 499 502 500 error: 501 + *disk_super = NULL; 503 502 *bdev_file = NULL; 504 503 return ret; 505 504 } ··· 2717 2714 set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); 2718 2715 clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state); 2719 2716 device->dev_stats_valid = 1; 2720 - set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE); 2717 + set_blocksize(device->bdev_file, BTRFS_BDEV_BLOCKSIZE); 2721 2718 2722 2719 if (seeding_dev) { 2723 2720 btrfs_clear_sb_rdonly(sb);
+1 -1
fs/ext4/super.c
··· 5866 5866 5867 5867 sb_block = EXT4_MIN_BLOCK_SIZE / blocksize; 5868 5868 offset = EXT4_MIN_BLOCK_SIZE % blocksize; 5869 - set_blocksize(bdev, blocksize); 5869 + set_blocksize(bdev_file, blocksize); 5870 5870 bh = __bread(bdev, sb_block, blocksize); 5871 5871 if (!bh) { 5872 5872 ext4_msg(sb, KERN_ERR, "couldn't read superblock of "
+2 -3
fs/reiserfs/journal.c
··· 2626 2626 MAJOR(jdev), MINOR(jdev), result); 2627 2627 return result; 2628 2628 } else if (jdev != super->s_dev) 2629 - set_blocksize(file_bdev(journal->j_bdev_file), 2630 - super->s_blocksize); 2629 + set_blocksize(journal->j_bdev_file, super->s_blocksize); 2631 2630 2632 2631 return 0; 2633 2632 } ··· 2642 2643 return result; 2643 2644 } 2644 2645 2645 - set_blocksize(file_bdev(journal->j_bdev_file), super->s_blocksize); 2646 + set_blocksize(journal->j_bdev_file, super->s_blocksize); 2646 2647 reiserfs_info(super, 2647 2648 "journal_init_dev: journal device: %pg\n", 2648 2649 file_bdev(journal->j_bdev_file));
+1 -1
fs/xfs/xfs_buf.c
··· 2046 2046 btp->bt_meta_sectorsize = sectorsize; 2047 2047 btp->bt_meta_sectormask = sectorsize - 1; 2048 2048 2049 - if (set_blocksize(btp->bt_bdev, sectorsize)) { 2049 + if (set_blocksize(btp->bt_bdev_file, sectorsize)) { 2050 2050 xfs_warn(btp->bt_mount, 2051 2051 "Cannot set_blocksize to %u on device %pg", 2052 2052 sectorsize, btp->bt_bdev);
+1 -1
include/linux/blkdev.h
··· 1481 1481 } 1482 1482 1483 1483 int bdev_read_only(struct block_device *bdev); 1484 - int set_blocksize(struct block_device *bdev, int size); 1484 + int set_blocksize(struct file *file, int size); 1485 1485 1486 1486 int lookup_bdev(const char *pathname, dev_t *dev); 1487 1487
-2
include/linux/swap.h
··· 310 310 unsigned int __percpu *cluster_next_cpu; /*percpu index for next allocation */ 311 311 struct percpu_cluster __percpu *percpu_cluster; /* per cpu's swap location */ 312 312 struct rb_root swap_extent_root;/* root of the swap extent rbtree */ 313 - struct file *bdev_file; /* open handle of the bdev */ 314 313 struct block_device *bdev; /* swap device or bdev of swap file */ 315 314 struct file *swap_file; /* seldom referenced */ 316 - unsigned int old_block_size; /* seldom referenced */ 317 315 struct completion comp; /* seldom referenced */ 318 316 spinlock_t lock; /* 319 317 * protect map scan related fields like
+1 -6
kernel/power/swap.c
··· 368 368 if (IS_ERR(hib_resume_bdev_file)) 369 369 return PTR_ERR(hib_resume_bdev_file); 370 370 371 - res = set_blocksize(file_bdev(hib_resume_bdev_file), PAGE_SIZE); 372 - if (res < 0) 373 - fput(hib_resume_bdev_file); 374 - 375 - return res; 371 + return 0; 376 372 } 377 373 378 374 /** ··· 1570 1574 hib_resume_bdev_file = bdev_file_open_by_dev(swsusp_resume_device, 1571 1575 BLK_OPEN_READ, holder, NULL); 1572 1576 if (!IS_ERR(hib_resume_bdev_file)) { 1573 - set_blocksize(file_bdev(hib_resume_bdev_file), PAGE_SIZE); 1574 1577 clear_page(swsusp_header); 1575 1578 error = hib_submit_io(REQ_OP_READ, swsusp_resume_block, 1576 1579 swsusp_header, NULL);
+2 -27
mm/swapfile.c
··· 2469 2469 struct inode *inode; 2470 2470 struct filename *pathname; 2471 2471 int err, found = 0; 2472 - unsigned int old_block_size; 2473 2472 2474 2473 if (!capable(CAP_SYS_ADMIN)) 2475 2474 return -EPERM; ··· 2581 2582 } 2582 2583 2583 2584 swap_file = p->swap_file; 2584 - old_block_size = p->old_block_size; 2585 2585 p->swap_file = NULL; 2586 2586 p->max = 0; 2587 2587 swap_map = p->swap_map; ··· 2603 2605 exit_swap_address_space(p->type); 2604 2606 2605 2607 inode = mapping->host; 2606 - if (p->bdev_file) { 2607 - set_blocksize(p->bdev, old_block_size); 2608 - fput(p->bdev_file); 2609 - p->bdev_file = NULL; 2610 - } 2611 2608 2612 2609 inode_lock(inode); 2613 2610 inode->i_flags &= ~S_SWAPFILE; ··· 2828 2835 2829 2836 static int claim_swapfile(struct swap_info_struct *p, struct inode *inode) 2830 2837 { 2831 - int error; 2832 - 2833 2838 if (S_ISBLK(inode->i_mode)) { 2834 - p->bdev_file = bdev_file_open_by_dev(inode->i_rdev, 2835 - BLK_OPEN_READ | BLK_OPEN_WRITE, p, NULL); 2836 - if (IS_ERR(p->bdev_file)) { 2837 - error = PTR_ERR(p->bdev_file); 2838 - p->bdev_file = NULL; 2839 - return error; 2840 - } 2841 - p->bdev = file_bdev(p->bdev_file); 2842 - p->old_block_size = block_size(p->bdev); 2843 - error = set_blocksize(p->bdev, PAGE_SIZE); 2844 - if (error < 0) 2845 - return error; 2839 + p->bdev = I_BDEV(inode); 2846 2840 /* 2847 2841 * Zoned block devices contain zones that have a sequential 2848 2842 * write only restriction. Hence zoned block devices are not ··· 3070 3090 name = NULL; 3071 3091 goto bad_swap; 3072 3092 } 3073 - swap_file = file_open_name(name, O_RDWR|O_LARGEFILE, 0); 3093 + swap_file = file_open_name(name, O_RDWR | O_LARGEFILE | O_EXCL, 0); 3074 3094 if (IS_ERR(swap_file)) { 3075 3095 error = PTR_ERR(swap_file); 3076 3096 swap_file = NULL; ··· 3269 3289 p->percpu_cluster = NULL; 3270 3290 free_percpu(p->cluster_next_cpu); 3271 3291 p->cluster_next_cpu = NULL; 3272 - if (p->bdev_file) { 3273 - set_blocksize(p->bdev, p->old_block_size); 3274 - fput(p->bdev_file); 3275 - p->bdev_file = NULL; 3276 - } 3277 3292 inode = NULL; 3278 3293 destroy_swap_extents(p); 3279 3294 swap_cgroup_swapoff(p->type);