commit ac7ac4618cf25e0d5cd8eba83d5f600084b65b9a · tjh.dev/kernel

+5 -5

block/bio.c

··· 608 void guard_bio_eod(struct bio *bio) 609 { 610 sector_t maxsector; 611 - struct hd_struct *part; 612 613 rcu_read_lock(); 614 part = __disk_get_part(bio->bi_disk, bio->bi_partno); 615 if (part) 616 - maxsector = part_nr_sects_read(part); 617 - else 618 maxsector = get_capacity(bio->bi_disk); 619 rcu_read_unlock(); 620 ··· 1212 1213 flush_dcache_page(dst_bv.bv_page); 1214 1215 - bio_advance_iter(src, src_iter, bytes); 1216 - bio_advance_iter(dst, dst_iter, bytes); 1217 } 1218 } 1219 EXPORT_SYMBOL(bio_copy_data_iter);

··· 608 void guard_bio_eod(struct bio *bio) 609 { 610 sector_t maxsector; 611 + struct block_device *part; 612 613 rcu_read_lock(); 614 part = __disk_get_part(bio->bi_disk, bio->bi_partno); 615 if (part) 616 + maxsector = bdev_nr_sectors(part); 617 + else 618 maxsector = get_capacity(bio->bi_disk); 619 rcu_read_unlock(); 620 ··· 1212 1213 flush_dcache_page(dst_bv.bv_page); 1214 1215 + bio_advance_iter_single(src, src_iter, bytes); 1216 + bio_advance_iter_single(dst, dst_iter, bytes); 1217 } 1218 } 1219 EXPORT_SYMBOL(bio_copy_data_iter);

+25 -26

block/blk-cgroup.c

··· 556 } 557 558 /** 559 - * blkg_conf_prep - parse and prepare for per-blkg config update 560 * @inputp: input string pointer 561 * 562 * Parse the device node prefix part, MAJ:MIN, of per-blkg config update 563 - * from @input and get and return the matching gendisk. *@inputp is 564 * updated to point past the device node prefix. Returns an ERR_PTR() 565 * value on error. 566 * 567 * Use this function iff blkg_conf_prep() can't be used for some reason. 568 */ 569 - struct gendisk *blkcg_conf_get_disk(char **inputp) 570 { 571 char *input = *inputp; 572 unsigned int major, minor; 573 - struct gendisk *disk; 574 - int key_len, part; 575 576 if (sscanf(input, "%u:%u%n", &major, &minor, &key_len) != 2) 577 return ERR_PTR(-EINVAL); ··· 581 return ERR_PTR(-EINVAL); 582 input = skip_spaces(input); 583 584 - disk = get_gendisk(MKDEV(major, minor), &part); 585 - if (!disk) 586 return ERR_PTR(-ENODEV); 587 - if (part) { 588 - put_disk_and_module(disk); 589 return ERR_PTR(-ENODEV); 590 } 591 592 *inputp = input; 593 - return disk; 594 } 595 596 /** ··· 607 */ 608 int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, 609 char *input, struct blkg_conf_ctx *ctx) 610 - __acquires(rcu) __acquires(&disk->queue->queue_lock) 611 { 612 - struct gendisk *disk; 613 struct request_queue *q; 614 struct blkcg_gq *blkg; 615 int ret; 616 617 - disk = blkcg_conf_get_disk(&input); 618 - if (IS_ERR(disk)) 619 - return PTR_ERR(disk); 620 621 - q = disk->queue; 622 623 rcu_read_lock(); 624 spin_lock_irq(&q->queue_lock); ··· 689 goto success; 690 } 691 success: 692 - ctx->disk = disk; 693 ctx->blkg = blkg; 694 ctx->body = input; 695 return 0; ··· 700 spin_unlock_irq(&q->queue_lock); 701 rcu_read_unlock(); 702 fail: 703 - put_disk_and_module(disk); 704 /* 705 * If queue was bypassing, we should retry. Do so after a 706 * short msleep(). It isn't strictly necessary but queue ··· 723 * with blkg_conf_prep(). 724 */ 725 void blkg_conf_finish(struct blkg_conf_ctx *ctx) 726 - __releases(&ctx->disk->queue->queue_lock) __releases(rcu) 727 { 728 - spin_unlock_irq(&ctx->disk->queue->queue_lock); 729 rcu_read_unlock(); 730 - put_disk_and_module(ctx->disk); 731 } 732 EXPORT_SYMBOL_GPL(blkg_conf_finish); 733 ··· 820 821 class_dev_iter_init(&iter, &block_class, NULL, &disk_type); 822 while ((dev = class_dev_iter_next(&iter))) { 823 - struct gendisk *disk = dev_to_disk(dev); 824 - struct hd_struct *part = disk_get_part(disk, 0); 825 - struct blkcg_gq *blkg = blk_queue_root_blkg(disk->queue); 826 struct blkg_iostat tmp; 827 int cpu; 828 ··· 830 for_each_possible_cpu(cpu) { 831 struct disk_stats *cpu_dkstats; 832 833 - cpu_dkstats = per_cpu_ptr(part->dkstats, cpu); 834 tmp.ios[BLKG_IOSTAT_READ] += 835 cpu_dkstats->ios[STAT_READ]; 836 tmp.ios[BLKG_IOSTAT_WRITE] += ··· 849 blkg_iostat_set(&blkg->iostat.cur, &tmp); 850 u64_stats_update_end(&blkg->iostat.sync); 851 } 852 - disk_put_part(part); 853 } 854 } 855

··· 556 } 557 558 /** 559 + * blkcg_conf_open_bdev - parse and open bdev for per-blkg config update 560 * @inputp: input string pointer 561 * 562 * Parse the device node prefix part, MAJ:MIN, of per-blkg config update 563 + * from @input and get and return the matching bdev. *@inputp is 564 * updated to point past the device node prefix. Returns an ERR_PTR() 565 * value on error. 566 * 567 * Use this function iff blkg_conf_prep() can't be used for some reason. 568 */ 569 + struct block_device *blkcg_conf_open_bdev(char **inputp) 570 { 571 char *input = *inputp; 572 unsigned int major, minor; 573 + struct block_device *bdev; 574 + int key_len; 575 576 if (sscanf(input, "%u:%u%n", &major, &minor, &key_len) != 2) 577 return ERR_PTR(-EINVAL); ··· 581 return ERR_PTR(-EINVAL); 582 input = skip_spaces(input); 583 584 + bdev = blkdev_get_no_open(MKDEV(major, minor)); 585 + if (!bdev) 586 return ERR_PTR(-ENODEV); 587 + if (bdev_is_partition(bdev)) { 588 + blkdev_put_no_open(bdev); 589 return ERR_PTR(-ENODEV); 590 } 591 592 *inputp = input; 593 + return bdev; 594 } 595 596 /** ··· 607 */ 608 int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, 609 char *input, struct blkg_conf_ctx *ctx) 610 + __acquires(rcu) __acquires(&bdev->bd_disk->queue->queue_lock) 611 { 612 + struct block_device *bdev; 613 struct request_queue *q; 614 struct blkcg_gq *blkg; 615 int ret; 616 617 + bdev = blkcg_conf_open_bdev(&input); 618 + if (IS_ERR(bdev)) 619 + return PTR_ERR(bdev); 620 621 + q = bdev->bd_disk->queue; 622 623 rcu_read_lock(); 624 spin_lock_irq(&q->queue_lock); ··· 689 goto success; 690 } 691 success: 692 + ctx->bdev = bdev; 693 ctx->blkg = blkg; 694 ctx->body = input; 695 return 0; ··· 700 spin_unlock_irq(&q->queue_lock); 701 rcu_read_unlock(); 702 fail: 703 + blkdev_put_no_open(bdev); 704 /* 705 * If queue was bypassing, we should retry. Do so after a 706 * short msleep(). It isn't strictly necessary but queue ··· 723 * with blkg_conf_prep(). 724 */ 725 void blkg_conf_finish(struct blkg_conf_ctx *ctx) 726 + __releases(&ctx->bdev->bd_disk->queue->queue_lock) __releases(rcu) 727 { 728 + spin_unlock_irq(&ctx->bdev->bd_disk->queue->queue_lock); 729 rcu_read_unlock(); 730 + blkdev_put_no_open(ctx->bdev); 731 } 732 EXPORT_SYMBOL_GPL(blkg_conf_finish); 733 ··· 820 821 class_dev_iter_init(&iter, &block_class, NULL, &disk_type); 822 while ((dev = class_dev_iter_next(&iter))) { 823 + struct block_device *bdev = dev_to_bdev(dev); 824 + struct blkcg_gq *blkg = 825 + blk_queue_root_blkg(bdev->bd_disk->queue); 826 struct blkg_iostat tmp; 827 int cpu; 828 ··· 830 for_each_possible_cpu(cpu) { 831 struct disk_stats *cpu_dkstats; 832 833 + cpu_dkstats = per_cpu_ptr(bdev->bd_stats, cpu); 834 tmp.ios[BLKG_IOSTAT_READ] += 835 cpu_dkstats->ios[STAT_READ]; 836 tmp.ios[BLKG_IOSTAT_WRITE] += ··· 849 blkg_iostat_set(&blkg->iostat.cur, &tmp); 850 u64_stats_update_end(&blkg->iostat.sync); 851 } 852 } 853 } 854

+32 -38

block/blk-core.c

··· 666 } 667 __setup("fail_make_request=", setup_fail_make_request); 668 669 - static bool should_fail_request(struct hd_struct *part, unsigned int bytes) 670 { 671 - return part->make_it_fail && should_fail(&fail_make_request, bytes); 672 } 673 674 static int __init fail_make_request_debugfs(void) ··· 683 684 #else /* CONFIG_FAIL_MAKE_REQUEST */ 685 686 - static inline bool should_fail_request(struct hd_struct *part, 687 unsigned int bytes) 688 { 689 return false; ··· 691 692 #endif /* CONFIG_FAIL_MAKE_REQUEST */ 693 694 - static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part) 695 { 696 const int op = bio_op(bio); 697 698 - if (part->policy && op_is_write(op)) { 699 char b[BDEVNAME_SIZE]; 700 701 if (op_is_flush(bio->bi_opf) && !bio_sectors(bio)) ··· 703 704 WARN_ONCE(1, 705 "Trying to write to read-only block-device %s (partno %d)\n", 706 - bio_devname(bio, b), part->partno); 707 /* Older lvm-tools actually trigger this */ 708 return false; 709 } ··· 713 714 static noinline int should_fail_bio(struct bio *bio) 715 { 716 - if (should_fail_request(&bio->bi_disk->part0, bio->bi_iter.bi_size)) 717 return -EIO; 718 return 0; 719 } ··· 742 */ 743 static inline int blk_partition_remap(struct bio *bio) 744 { 745 - struct hd_struct *p; 746 int ret = -EIO; 747 748 rcu_read_lock(); ··· 755 goto out; 756 757 if (bio_sectors(bio)) { 758 - if (bio_check_eod(bio, part_nr_sects_read(p))) 759 goto out; 760 - bio->bi_iter.bi_sector += p->start_sect; 761 - trace_block_bio_remap(bio->bi_disk->queue, bio, part_devt(p), 762 - bio->bi_iter.bi_sector - p->start_sect); 763 } 764 bio->bi_partno = 0; 765 ret = 0; ··· 830 if (unlikely(blk_partition_remap(bio))) 831 goto end_io; 832 } else { 833 - if (unlikely(bio_check_ro(bio, &bio->bi_disk->part0))) 834 goto end_io; 835 if (unlikely(bio_check_eod(bio, get_capacity(bio->bi_disk)))) 836 goto end_io; ··· 907 blkcg_bio_issue_init(bio); 908 909 if (!bio_flagged(bio, BIO_TRACE_COMPLETION)) { 910 - trace_block_bio_queue(q, bio); 911 /* Now that enqueuing has been traced, we need to trace 912 * completion as well. 913 */ ··· 1202 return ret; 1203 1204 if (rq->rq_disk && 1205 - should_fail_request(&rq->rq_disk->part0, blk_rq_bytes(rq))) 1206 return BLK_STS_IOERR; 1207 1208 if (blk_crypto_insert_cloned_request(rq)) ··· 1261 } 1262 EXPORT_SYMBOL_GPL(blk_rq_err_bytes); 1263 1264 - static void update_io_ticks(struct hd_struct *part, unsigned long now, bool end) 1265 { 1266 unsigned long stamp; 1267 again: 1268 - stamp = READ_ONCE(part->stamp); 1269 if (unlikely(stamp != now)) { 1270 - if (likely(cmpxchg(&part->stamp, stamp, now) == stamp)) 1271 __part_stat_add(part, io_ticks, end ? now - stamp : 1); 1272 } 1273 - if (part->partno) { 1274 - part = &part_to_disk(part)->part0; 1275 goto again; 1276 } 1277 } ··· 1281 { 1282 if (req->part && blk_do_io_stat(req)) { 1283 const int sgrp = op_stat_group(req_op(req)); 1284 - struct hd_struct *part; 1285 1286 part_stat_lock(); 1287 - part = req->part; 1288 - part_stat_add(part, sectors[sgrp], bytes >> 9); 1289 part_stat_unlock(); 1290 } 1291 } ··· 1298 if (req->part && blk_do_io_stat(req) && 1299 !(req->rq_flags & RQF_FLUSH_SEQ)) { 1300 const int sgrp = op_stat_group(req_op(req)); 1301 - struct hd_struct *part; 1302 1303 part_stat_lock(); 1304 - part = req->part; 1305 - 1306 - update_io_ticks(part, jiffies, true); 1307 - part_stat_inc(part, ios[sgrp]); 1308 - part_stat_add(part, nsecs[sgrp], now - req->start_time_ns); 1309 part_stat_unlock(); 1310 - 1311 - hd_struct_put(part); 1312 } 1313 } 1314 ··· 1319 part_stat_unlock(); 1320 } 1321 1322 - static unsigned long __part_start_io_acct(struct hd_struct *part, 1323 unsigned int sectors, unsigned int op) 1324 { 1325 const int sgrp = op_stat_group(op); ··· 1335 return now; 1336 } 1337 1338 - unsigned long part_start_io_acct(struct gendisk *disk, struct hd_struct **part, 1339 struct bio *bio) 1340 { 1341 *part = disk_map_sector_rcu(disk, bio->bi_iter.bi_sector); ··· 1347 unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors, 1348 unsigned int op) 1349 { 1350 - return __part_start_io_acct(&disk->part0, sectors, op); 1351 } 1352 EXPORT_SYMBOL(disk_start_io_acct); 1353 1354 - static void __part_end_io_acct(struct hd_struct *part, unsigned int op, 1355 unsigned long start_time) 1356 { 1357 const int sgrp = op_stat_group(op); ··· 1365 part_stat_unlock(); 1366 } 1367 1368 - void part_end_io_acct(struct hd_struct *part, struct bio *bio, 1369 unsigned long start_time) 1370 { 1371 __part_end_io_acct(part, bio_op(bio), start_time); 1372 - hd_struct_put(part); 1373 } 1374 EXPORT_SYMBOL_GPL(part_end_io_acct); 1375 1376 void disk_end_io_acct(struct gendisk *disk, unsigned int op, 1377 unsigned long start_time) 1378 { 1379 - __part_end_io_acct(&disk->part0, op, start_time); 1380 } 1381 EXPORT_SYMBOL(disk_end_io_acct); 1382

··· 666 } 667 __setup("fail_make_request=", setup_fail_make_request); 668 669 + static bool should_fail_request(struct block_device *part, unsigned int bytes) 670 { 671 + return part->bd_make_it_fail && should_fail(&fail_make_request, bytes); 672 } 673 674 static int __init fail_make_request_debugfs(void) ··· 683 684 #else /* CONFIG_FAIL_MAKE_REQUEST */ 685 686 + static inline bool should_fail_request(struct block_device *part, 687 unsigned int bytes) 688 { 689 return false; ··· 691 692 #endif /* CONFIG_FAIL_MAKE_REQUEST */ 693 694 + static inline bool bio_check_ro(struct bio *bio, struct block_device *part) 695 { 696 const int op = bio_op(bio); 697 698 + if (part->bd_read_only && op_is_write(op)) { 699 char b[BDEVNAME_SIZE]; 700 701 if (op_is_flush(bio->bi_opf) && !bio_sectors(bio)) ··· 703 704 WARN_ONCE(1, 705 "Trying to write to read-only block-device %s (partno %d)\n", 706 + bio_devname(bio, b), part->bd_partno); 707 /* Older lvm-tools actually trigger this */ 708 return false; 709 } ··· 713 714 static noinline int should_fail_bio(struct bio *bio) 715 { 716 + if (should_fail_request(bio->bi_disk->part0, bio->bi_iter.bi_size)) 717 return -EIO; 718 return 0; 719 } ··· 742 */ 743 static inline int blk_partition_remap(struct bio *bio) 744 { 745 + struct block_device *p; 746 int ret = -EIO; 747 748 rcu_read_lock(); ··· 755 goto out; 756 757 if (bio_sectors(bio)) { 758 + if (bio_check_eod(bio, bdev_nr_sectors(p))) 759 goto out; 760 + bio->bi_iter.bi_sector += p->bd_start_sect; 761 + trace_block_bio_remap(bio, p->bd_dev, 762 + bio->bi_iter.bi_sector - 763 + p->bd_start_sect); 764 } 765 bio->bi_partno = 0; 766 ret = 0; ··· 829 if (unlikely(blk_partition_remap(bio))) 830 goto end_io; 831 } else { 832 + if (unlikely(bio_check_ro(bio, bio->bi_disk->part0))) 833 goto end_io; 834 if (unlikely(bio_check_eod(bio, get_capacity(bio->bi_disk)))) 835 goto end_io; ··· 906 blkcg_bio_issue_init(bio); 907 908 if (!bio_flagged(bio, BIO_TRACE_COMPLETION)) { 909 + trace_block_bio_queue(bio); 910 /* Now that enqueuing has been traced, we need to trace 911 * completion as well. 912 */ ··· 1201 return ret; 1202 1203 if (rq->rq_disk && 1204 + should_fail_request(rq->rq_disk->part0, blk_rq_bytes(rq))) 1205 return BLK_STS_IOERR; 1206 1207 if (blk_crypto_insert_cloned_request(rq)) ··· 1260 } 1261 EXPORT_SYMBOL_GPL(blk_rq_err_bytes); 1262 1263 + static void update_io_ticks(struct block_device *part, unsigned long now, 1264 + bool end) 1265 { 1266 unsigned long stamp; 1267 again: 1268 + stamp = READ_ONCE(part->bd_stamp); 1269 if (unlikely(stamp != now)) { 1270 + if (likely(cmpxchg(&part->bd_stamp, stamp, now) == stamp)) 1271 __part_stat_add(part, io_ticks, end ? now - stamp : 1); 1272 } 1273 + if (part->bd_partno) { 1274 + part = bdev_whole(part); 1275 goto again; 1276 } 1277 } ··· 1279 { 1280 if (req->part && blk_do_io_stat(req)) { 1281 const int sgrp = op_stat_group(req_op(req)); 1282 1283 part_stat_lock(); 1284 + part_stat_add(req->part, sectors[sgrp], bytes >> 9); 1285 part_stat_unlock(); 1286 } 1287 } ··· 1298 if (req->part && blk_do_io_stat(req) && 1299 !(req->rq_flags & RQF_FLUSH_SEQ)) { 1300 const int sgrp = op_stat_group(req_op(req)); 1301 1302 part_stat_lock(); 1303 + update_io_ticks(req->part, jiffies, true); 1304 + part_stat_inc(req->part, ios[sgrp]); 1305 + part_stat_add(req->part, nsecs[sgrp], now - req->start_time_ns); 1306 part_stat_unlock(); 1307 } 1308 } 1309 ··· 1324 part_stat_unlock(); 1325 } 1326 1327 + static unsigned long __part_start_io_acct(struct block_device *part, 1328 unsigned int sectors, unsigned int op) 1329 { 1330 const int sgrp = op_stat_group(op); ··· 1340 return now; 1341 } 1342 1343 + unsigned long part_start_io_acct(struct gendisk *disk, struct block_device **part, 1344 struct bio *bio) 1345 { 1346 *part = disk_map_sector_rcu(disk, bio->bi_iter.bi_sector); ··· 1352 unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors, 1353 unsigned int op) 1354 { 1355 + return __part_start_io_acct(disk->part0, sectors, op); 1356 } 1357 EXPORT_SYMBOL(disk_start_io_acct); 1358 1359 + static void __part_end_io_acct(struct block_device *part, unsigned int op, 1360 unsigned long start_time) 1361 { 1362 const int sgrp = op_stat_group(op); ··· 1370 part_stat_unlock(); 1371 } 1372 1373 + void part_end_io_acct(struct block_device *part, struct bio *bio, 1374 unsigned long start_time) 1375 { 1376 __part_end_io_acct(part, bio_op(bio), start_time); 1377 } 1378 EXPORT_SYMBOL_GPL(part_end_io_acct); 1379 1380 void disk_end_io_acct(struct gendisk *disk, unsigned int op, 1381 unsigned long start_time) 1382 { 1383 + __part_end_io_acct(disk->part0, op, start_time); 1384 } 1385 EXPORT_SYMBOL(disk_end_io_acct); 1386

+26 -6

block/blk-flush.c

··· 69 #include <linux/blkdev.h> 70 #include <linux/gfp.h> 71 #include <linux/blk-mq.h> 72 - #include <linux/lockdep.h> 73 74 #include "blk.h" 75 #include "blk-mq.h" ··· 138 139 static void blk_account_io_flush(struct request *rq) 140 { 141 - struct hd_struct *part = &rq->rq_disk->part0; 142 143 part_stat_lock(); 144 part_stat_inc(part, ios[STAT_FLUSH]); ··· 473 INIT_LIST_HEAD(&fq->flush_queue[1]); 474 INIT_LIST_HEAD(&fq->flush_data_in_flight); 475 476 - lockdep_register_key(&fq->key); 477 - lockdep_set_class(&fq->mq_flush_lock, &fq->key); 478 - 479 return fq; 480 481 fail_rq: ··· 487 if (!fq) 488 return; 489 490 - lockdep_unregister_key(&fq->key); 491 kfree(fq->flush_rq); 492 kfree(fq); 493 }

··· 69 #include <linux/blkdev.h> 70 #include <linux/gfp.h> 71 #include <linux/blk-mq.h> 72 73 #include "blk.h" 74 #include "blk-mq.h" ··· 139 140 static void blk_account_io_flush(struct request *rq) 141 { 142 + struct block_device *part = rq->rq_disk->part0; 143 144 part_stat_lock(); 145 part_stat_inc(part, ios[STAT_FLUSH]); ··· 474 INIT_LIST_HEAD(&fq->flush_queue[1]); 475 INIT_LIST_HEAD(&fq->flush_data_in_flight); 476 477 return fq; 478 479 fail_rq: ··· 491 if (!fq) 492 return; 493 494 kfree(fq->flush_rq); 495 kfree(fq); 496 } 497 + 498 + /* 499 + * Allow driver to set its own lock class to fq->mq_flush_lock for 500 + * avoiding lockdep complaint. 501 + * 502 + * flush_end_io() may be called recursively from some driver, such as 503 + * nvme-loop, so lockdep may complain 'possible recursive locking' because 504 + * all 'struct blk_flush_queue' instance share same mq_flush_lock lock class 505 + * key. We need to assign different lock class for these driver's 506 + * fq->mq_flush_lock for avoiding the lockdep warning. 507 + * 508 + * Use dynamically allocated lock class key for each 'blk_flush_queue' 509 + * instance is over-kill, and more worse it introduces horrible boot delay 510 + * issue because synchronize_rcu() is implied in lockdep_unregister_key which 511 + * is called for each hctx release. SCSI probing may synchronously create and 512 + * destroy lots of MQ request_queues for non-existent devices, and some robot 513 + * test kernel always enable lockdep option. It is observed that more than half 514 + * an hour is taken during SCSI MQ probe with per-fq lock class. 515 + */ 516 + void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx, 517 + struct lock_class_key *key) 518 + { 519 + lockdep_set_class(&hctx->fq->mq_flush_lock, key); 520 + } 521 + EXPORT_SYMBOL_GPL(blk_mq_hctx_set_fq_lock_class);

+178 -155

block/blk-iocost.c

··· 39 * On top of that, a size cost proportional to the length of the IO is 40 * added. While simple, this model captures the operational 41 * characteristics of a wide varienty of devices well enough. Default 42 - * paramters for several different classes of devices are provided and the 43 * parameters can be configured from userspace via 44 * /sys/fs/cgroup/io.cost.model. 45 * ··· 77 * 78 * This constitutes the basis of IO capacity distribution. Each cgroup's 79 * vtime is running at a rate determined by its hweight. A cgroup tracks 80 - * the vtime consumed by past IOs and can issue a new IO iff doing so 81 * wouldn't outrun the current device vtime. Otherwise, the IO is 82 * suspended until the vtime has progressed enough to cover it. 83 * ··· 155 * Instead of debugfs or other clumsy monitoring mechanisms, this 156 * controller uses a drgn based monitoring script - 157 * tools/cgroup/iocost_monitor.py. For details on drgn, please see 158 - * https://github.com/osandov/drgn. The ouput looks like the following. 159 * 160 * sdb RUN per=300ms cur_per=234.218:v203.695 busy= +1 vrate= 62.12% 161 * active weight hweight% inflt% dbt delay usages% ··· 370 AUTOP_SSD_FAST, 371 }; 372 373 - struct ioc_gq; 374 - 375 struct ioc_params { 376 u32 qos[NR_QOS_PARAMS]; 377 u64 i_lcoefs[NR_I_LCOEFS]; ··· 490 /* 491 * `vtime` is this iocg's vtime cursor which progresses as IOs are 492 * issued. If lagging behind device vtime, the delta represents 493 - * the currently available IO budget. If runnning ahead, the 494 * overage. 495 * 496 * `vtime_done` is the same but progressed on completion rather ··· 971 ioc->vtime_err = clamp(ioc->vtime_err, -vperiod, vperiod); 972 } 973 974 /* take a snapshot of the current [v]time and vrate */ 975 static void ioc_now(struct ioc *ioc, struct ioc_now *now) 976 { ··· 1096 1097 /* 1098 * The delta between inuse and active sums indicates that 1099 - * that much of weight is being given away. Parent's inuse 1100 * and active should reflect the ratio. 1101 */ 1102 if (parent->child_active_sum) { ··· 2121 } 2122 } 2123 2124 static void ioc_timer_fn(struct timer_list *timer) 2125 { 2126 struct ioc *ioc = container_of(timer, struct ioc, timer); 2127 struct ioc_gq *iocg, *tiocg; 2128 struct ioc_now now; 2129 LIST_HEAD(surpluses); 2130 - int nr_debtors = 0, nr_shortages = 0, nr_lagging = 0; 2131 u64 usage_us_sum = 0; 2132 u32 ppm_rthr = MILLION - ioc->params.qos[QOS_RPPM]; 2133 u32 ppm_wthr = MILLION - ioc->params.qos[QOS_WPPM]; ··· 2224 return; 2225 } 2226 2227 - /* 2228 - * Waiters determine the sleep durations based on the vrate they 2229 - * saw at the time of sleep. If vrate has increased, some waiters 2230 - * could be sleeping for too long. Wake up tardy waiters which 2231 - * should have woken up in the last period and expire idle iocgs. 2232 - */ 2233 - list_for_each_entry_safe(iocg, tiocg, &ioc->active_iocgs, active_list) { 2234 - if (!waitqueue_active(&iocg->waitq) && !iocg->abs_vdebt && 2235 - !iocg->delay && !iocg_is_idle(iocg)) 2236 - continue; 2237 - 2238 - spin_lock(&iocg->waitq.lock); 2239 - 2240 - /* flush wait and indebt stat deltas */ 2241 - if (iocg->wait_since) { 2242 - iocg->local_stat.wait_us += now.now - iocg->wait_since; 2243 - iocg->wait_since = now.now; 2244 - } 2245 - if (iocg->indebt_since) { 2246 - iocg->local_stat.indebt_us += 2247 - now.now - iocg->indebt_since; 2248 - iocg->indebt_since = now.now; 2249 - } 2250 - if (iocg->indelay_since) { 2251 - iocg->local_stat.indelay_us += 2252 - now.now - iocg->indelay_since; 2253 - iocg->indelay_since = now.now; 2254 - } 2255 - 2256 - if (waitqueue_active(&iocg->waitq) || iocg->abs_vdebt || 2257 - iocg->delay) { 2258 - /* might be oversleeping vtime / hweight changes, kick */ 2259 - iocg_kick_waitq(iocg, true, &now); 2260 - if (iocg->abs_vdebt || iocg->delay) 2261 - nr_debtors++; 2262 - } else if (iocg_is_idle(iocg)) { 2263 - /* no waiter and idle, deactivate */ 2264 - u64 vtime = atomic64_read(&iocg->vtime); 2265 - s64 excess; 2266 - 2267 - /* 2268 - * @iocg has been inactive for a full duration and will 2269 - * have a high budget. Account anything above target as 2270 - * error and throw away. On reactivation, it'll start 2271 - * with the target budget. 2272 - */ 2273 - excess = now.vnow - vtime - ioc->margins.target; 2274 - if (excess > 0) { 2275 - u32 old_hwi; 2276 - 2277 - current_hweight(iocg, NULL, &old_hwi); 2278 - ioc->vtime_err -= div64_u64(excess * old_hwi, 2279 - WEIGHT_ONE); 2280 - } 2281 - 2282 - __propagate_weights(iocg, 0, 0, false, &now); 2283 - list_del_init(&iocg->active_list); 2284 - } 2285 - 2286 - spin_unlock(&iocg->waitq.lock); 2287 - } 2288 - commit_weights(ioc); 2289 2290 /* 2291 * Wait and indebt stat are flushed above and the donation calculation ··· 2234 2235 /* calc usage and see whether some weights need to be moved around */ 2236 list_for_each_entry(iocg, &ioc->active_iocgs, active_list) { 2237 - u64 vdone, vtime, usage_us, usage_dur; 2238 - u32 usage, hw_active, hw_inuse; 2239 2240 /* 2241 * Collect unused and wind vtime closer to vnow to prevent ··· 2266 usage_us = iocg->usage_delta_us; 2267 usage_us_sum += usage_us; 2268 2269 - if (vdone != vtime) { 2270 - u64 inflight_us = DIV64_U64_ROUND_UP( 2271 - cost_to_abs_cost(vtime - vdone, hw_inuse), 2272 - ioc->vtime_base_rate); 2273 - usage_us = max(usage_us, inflight_us); 2274 - } 2275 - 2276 - /* convert to hweight based usage ratio */ 2277 - if (time_after64(iocg->activated_at, ioc->period_at)) 2278 - usage_dur = max_t(u64, now.now - iocg->activated_at, 1); 2279 - else 2280 - usage_dur = max_t(u64, now.now - ioc->period_at, 1); 2281 - 2282 - usage = clamp_t(u32, 2283 - DIV64_U64_ROUND_UP(usage_us * WEIGHT_ONE, 2284 - usage_dur), 2285 - 1, WEIGHT_ONE); 2286 - 2287 /* see whether there's surplus vtime */ 2288 WARN_ON_ONCE(!list_empty(&iocg->surplus_list)); 2289 if (hw_inuse < hw_active || 2290 (!waitqueue_active(&iocg->waitq) && 2291 time_before64(vtime, now.vnow - ioc->margins.low))) { 2292 - u32 hwa, old_hwi, hwm, new_hwi; 2293 2294 /* 2295 * Already donating or accumulated enough to start. ··· 2375 2376 ioc->busy_level = clamp(ioc->busy_level, -1000, 1000); 2377 2378 - if (ioc->busy_level > 0 || (ioc->busy_level < 0 && !nr_lagging)) { 2379 - u64 vrate = ioc->vtime_base_rate; 2380 - u64 vrate_min = ioc->vrate_min, vrate_max = ioc->vrate_max; 2381 - 2382 - /* rq_wait signal is always reliable, ignore user vrate_min */ 2383 - if (rq_wait_pct > RQ_WAIT_BUSY_PCT) 2384 - vrate_min = VRATE_MIN; 2385 - 2386 - /* 2387 - * If vrate is out of bounds, apply clamp gradually as the 2388 - * bounds can change abruptly. Otherwise, apply busy_level 2389 - * based adjustment. 2390 - */ 2391 - if (vrate < vrate_min) { 2392 - vrate = div64_u64(vrate * (100 + VRATE_CLAMP_ADJ_PCT), 2393 - 100); 2394 - vrate = min(vrate, vrate_min); 2395 - } else if (vrate > vrate_max) { 2396 - vrate = div64_u64(vrate * (100 - VRATE_CLAMP_ADJ_PCT), 2397 - 100); 2398 - vrate = max(vrate, vrate_max); 2399 - } else { 2400 - int idx = min_t(int, abs(ioc->busy_level), 2401 - ARRAY_SIZE(vrate_adj_pct) - 1); 2402 - u32 adj_pct = vrate_adj_pct[idx]; 2403 - 2404 - if (ioc->busy_level > 0) 2405 - adj_pct = 100 - adj_pct; 2406 - else 2407 - adj_pct = 100 + adj_pct; 2408 - 2409 - vrate = clamp(DIV64_U64_ROUND_UP(vrate * adj_pct, 100), 2410 - vrate_min, vrate_max); 2411 - } 2412 - 2413 - trace_iocost_ioc_vrate_adj(ioc, vrate, missed_ppm, rq_wait_pct, 2414 - nr_lagging, nr_shortages); 2415 - 2416 - ioc->vtime_base_rate = vrate; 2417 - ioc_refresh_margins(ioc); 2418 - } else if (ioc->busy_level != prev_busy_level || nr_lagging) { 2419 - trace_iocost_ioc_vrate_adj(ioc, atomic64_read(&ioc->vtime_rate), 2420 - missed_ppm, rq_wait_pct, nr_lagging, 2421 - nr_shortages); 2422 - } 2423 2424 ioc_refresh_params(ioc, false); 2425 ··· 2423 return cost; 2424 2425 /* 2426 - * We only increase inuse during period and do so iff the margin has 2427 * deteriorated since the previous adjustment. 2428 */ 2429 if (margin >= iocg->saved_margin || margin >= margins->low || ··· 3143 static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input, 3144 size_t nbytes, loff_t off) 3145 { 3146 - struct gendisk *disk; 3147 struct ioc *ioc; 3148 u32 qos[NR_QOS_PARAMS]; 3149 bool enable, user; 3150 char *p; 3151 int ret; 3152 3153 - disk = blkcg_conf_get_disk(&input); 3154 - if (IS_ERR(disk)) 3155 - return PTR_ERR(disk); 3156 3157 - ioc = q_to_ioc(disk->queue); 3158 if (!ioc) { 3159 - ret = blk_iocost_init(disk->queue); 3160 if (ret) 3161 goto err; 3162 - ioc = q_to_ioc(disk->queue); 3163 } 3164 3165 spin_lock_irq(&ioc->lock); ··· 3254 ioc_refresh_params(ioc, true); 3255 spin_unlock_irq(&ioc->lock); 3256 3257 - put_disk_and_module(disk); 3258 return nbytes; 3259 einval: 3260 ret = -EINVAL; 3261 err: 3262 - put_disk_and_module(disk); 3263 return ret; 3264 } 3265 ··· 3310 static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input, 3311 size_t nbytes, loff_t off) 3312 { 3313 - struct gendisk *disk; 3314 struct ioc *ioc; 3315 u64 u[NR_I_LCOEFS]; 3316 bool user; 3317 char *p; 3318 int ret; 3319 3320 - disk = blkcg_conf_get_disk(&input); 3321 - if (IS_ERR(disk)) 3322 - return PTR_ERR(disk); 3323 3324 - ioc = q_to_ioc(disk->queue); 3325 if (!ioc) { 3326 - ret = blk_iocost_init(disk->queue); 3327 if (ret) 3328 goto err; 3329 - ioc = q_to_ioc(disk->queue); 3330 } 3331 3332 spin_lock_irq(&ioc->lock); ··· 3379 ioc_refresh_params(ioc, true); 3380 spin_unlock_irq(&ioc->lock); 3381 3382 - put_disk_and_module(disk); 3383 return nbytes; 3384 3385 einval: 3386 ret = -EINVAL; 3387 err: 3388 - put_disk_and_module(disk); 3389 return ret; 3390 } 3391

··· 39 * On top of that, a size cost proportional to the length of the IO is 40 * added. While simple, this model captures the operational 41 * characteristics of a wide varienty of devices well enough. Default 42 + * parameters for several different classes of devices are provided and the 43 * parameters can be configured from userspace via 44 * /sys/fs/cgroup/io.cost.model. 45 * ··· 77 * 78 * This constitutes the basis of IO capacity distribution. Each cgroup's 79 * vtime is running at a rate determined by its hweight. A cgroup tracks 80 + * the vtime consumed by past IOs and can issue a new IO if doing so 81 * wouldn't outrun the current device vtime. Otherwise, the IO is 82 * suspended until the vtime has progressed enough to cover it. 83 * ··· 155 * Instead of debugfs or other clumsy monitoring mechanisms, this 156 * controller uses a drgn based monitoring script - 157 * tools/cgroup/iocost_monitor.py. For details on drgn, please see 158 + * https://github.com/osandov/drgn. The output looks like the following. 159 * 160 * sdb RUN per=300ms cur_per=234.218:v203.695 busy= +1 vrate= 62.12% 161 * active weight hweight% inflt% dbt delay usages% ··· 370 AUTOP_SSD_FAST, 371 }; 372 373 struct ioc_params { 374 u32 qos[NR_QOS_PARAMS]; 375 u64 i_lcoefs[NR_I_LCOEFS]; ··· 492 /* 493 * `vtime` is this iocg's vtime cursor which progresses as IOs are 494 * issued. If lagging behind device vtime, the delta represents 495 + * the currently available IO budget. If running ahead, the 496 * overage. 497 * 498 * `vtime_done` is the same but progressed on completion rather ··· 973 ioc->vtime_err = clamp(ioc->vtime_err, -vperiod, vperiod); 974 } 975 976 + static void ioc_adjust_base_vrate(struct ioc *ioc, u32 rq_wait_pct, 977 + int nr_lagging, int nr_shortages, 978 + int prev_busy_level, u32 *missed_ppm) 979 + { 980 + u64 vrate = ioc->vtime_base_rate; 981 + u64 vrate_min = ioc->vrate_min, vrate_max = ioc->vrate_max; 982 + 983 + if (!ioc->busy_level || (ioc->busy_level < 0 && nr_lagging)) { 984 + if (ioc->busy_level != prev_busy_level || nr_lagging) 985 + trace_iocost_ioc_vrate_adj(ioc, atomic64_read(&ioc->vtime_rate), 986 + missed_ppm, rq_wait_pct, 987 + nr_lagging, nr_shortages); 988 + 989 + return; 990 + } 991 + 992 + /* rq_wait signal is always reliable, ignore user vrate_min */ 993 + if (rq_wait_pct > RQ_WAIT_BUSY_PCT) 994 + vrate_min = VRATE_MIN; 995 + 996 + /* 997 + * If vrate is out of bounds, apply clamp gradually as the 998 + * bounds can change abruptly. Otherwise, apply busy_level 999 + * based adjustment. 1000 + */ 1001 + if (vrate < vrate_min) { 1002 + vrate = div64_u64(vrate * (100 + VRATE_CLAMP_ADJ_PCT), 100); 1003 + vrate = min(vrate, vrate_min); 1004 + } else if (vrate > vrate_max) { 1005 + vrate = div64_u64(vrate * (100 - VRATE_CLAMP_ADJ_PCT), 100); 1006 + vrate = max(vrate, vrate_max); 1007 + } else { 1008 + int idx = min_t(int, abs(ioc->busy_level), 1009 + ARRAY_SIZE(vrate_adj_pct) - 1); 1010 + u32 adj_pct = vrate_adj_pct[idx]; 1011 + 1012 + if (ioc->busy_level > 0) 1013 + adj_pct = 100 - adj_pct; 1014 + else 1015 + adj_pct = 100 + adj_pct; 1016 + 1017 + vrate = clamp(DIV64_U64_ROUND_UP(vrate * adj_pct, 100), 1018 + vrate_min, vrate_max); 1019 + } 1020 + 1021 + trace_iocost_ioc_vrate_adj(ioc, vrate, missed_ppm, rq_wait_pct, 1022 + nr_lagging, nr_shortages); 1023 + 1024 + ioc->vtime_base_rate = vrate; 1025 + ioc_refresh_margins(ioc); 1026 + } 1027 + 1028 /* take a snapshot of the current [v]time and vrate */ 1029 static void ioc_now(struct ioc *ioc, struct ioc_now *now) 1030 { ··· 1046 1047 /* 1048 * The delta between inuse and active sums indicates that 1049 + * much of weight is being given away. Parent's inuse 1050 * and active should reflect the ratio. 1051 */ 1052 if (parent->child_active_sum) { ··· 2071 } 2072 } 2073 2074 + /* 2075 + * Check the active iocgs' state to avoid oversleeping and deactive 2076 + * idle iocgs. 2077 + * 2078 + * Since waiters determine the sleep durations based on the vrate 2079 + * they saw at the time of sleep, if vrate has increased, some 2080 + * waiters could be sleeping for too long. Wake up tardy waiters 2081 + * which should have woken up in the last period and expire idle 2082 + * iocgs. 2083 + */ 2084 + static int ioc_check_iocgs(struct ioc *ioc, struct ioc_now *now) 2085 + { 2086 + int nr_debtors = 0; 2087 + struct ioc_gq *iocg, *tiocg; 2088 + 2089 + list_for_each_entry_safe(iocg, tiocg, &ioc->active_iocgs, active_list) { 2090 + if (!waitqueue_active(&iocg->waitq) && !iocg->abs_vdebt && 2091 + !iocg->delay && !iocg_is_idle(iocg)) 2092 + continue; 2093 + 2094 + spin_lock(&iocg->waitq.lock); 2095 + 2096 + /* flush wait and indebt stat deltas */ 2097 + if (iocg->wait_since) { 2098 + iocg->local_stat.wait_us += now->now - iocg->wait_since; 2099 + iocg->wait_since = now->now; 2100 + } 2101 + if (iocg->indebt_since) { 2102 + iocg->local_stat.indebt_us += 2103 + now->now - iocg->indebt_since; 2104 + iocg->indebt_since = now->now; 2105 + } 2106 + if (iocg->indelay_since) { 2107 + iocg->local_stat.indelay_us += 2108 + now->now - iocg->indelay_since; 2109 + iocg->indelay_since = now->now; 2110 + } 2111 + 2112 + if (waitqueue_active(&iocg->waitq) || iocg->abs_vdebt || 2113 + iocg->delay) { 2114 + /* might be oversleeping vtime / hweight changes, kick */ 2115 + iocg_kick_waitq(iocg, true, now); 2116 + if (iocg->abs_vdebt || iocg->delay) 2117 + nr_debtors++; 2118 + } else if (iocg_is_idle(iocg)) { 2119 + /* no waiter and idle, deactivate */ 2120 + u64 vtime = atomic64_read(&iocg->vtime); 2121 + s64 excess; 2122 + 2123 + /* 2124 + * @iocg has been inactive for a full duration and will 2125 + * have a high budget. Account anything above target as 2126 + * error and throw away. On reactivation, it'll start 2127 + * with the target budget. 2128 + */ 2129 + excess = now->vnow - vtime - ioc->margins.target; 2130 + if (excess > 0) { 2131 + u32 old_hwi; 2132 + 2133 + current_hweight(iocg, NULL, &old_hwi); 2134 + ioc->vtime_err -= div64_u64(excess * old_hwi, 2135 + WEIGHT_ONE); 2136 + } 2137 + 2138 + __propagate_weights(iocg, 0, 0, false, now); 2139 + list_del_init(&iocg->active_list); 2140 + } 2141 + 2142 + spin_unlock(&iocg->waitq.lock); 2143 + } 2144 + 2145 + commit_weights(ioc); 2146 + return nr_debtors; 2147 + } 2148 + 2149 static void ioc_timer_fn(struct timer_list *timer) 2150 { 2151 struct ioc *ioc = container_of(timer, struct ioc, timer); 2152 struct ioc_gq *iocg, *tiocg; 2153 struct ioc_now now; 2154 LIST_HEAD(surpluses); 2155 + int nr_debtors, nr_shortages = 0, nr_lagging = 0; 2156 u64 usage_us_sum = 0; 2157 u32 ppm_rthr = MILLION - ioc->params.qos[QOS_RPPM]; 2158 u32 ppm_wthr = MILLION - ioc->params.qos[QOS_WPPM]; ··· 2099 return; 2100 } 2101 2102 + nr_debtors = ioc_check_iocgs(ioc, &now); 2103 2104 /* 2105 * Wait and indebt stat are flushed above and the donation calculation ··· 2170 2171 /* calc usage and see whether some weights need to be moved around */ 2172 list_for_each_entry(iocg, &ioc->active_iocgs, active_list) { 2173 + u64 vdone, vtime, usage_us; 2174 + u32 hw_active, hw_inuse; 2175 2176 /* 2177 * Collect unused and wind vtime closer to vnow to prevent ··· 2202 usage_us = iocg->usage_delta_us; 2203 usage_us_sum += usage_us; 2204 2205 /* see whether there's surplus vtime */ 2206 WARN_ON_ONCE(!list_empty(&iocg->surplus_list)); 2207 if (hw_inuse < hw_active || 2208 (!waitqueue_active(&iocg->waitq) && 2209 time_before64(vtime, now.vnow - ioc->margins.low))) { 2210 + u32 hwa, old_hwi, hwm, new_hwi, usage; 2211 + u64 usage_dur; 2212 + 2213 + if (vdone != vtime) { 2214 + u64 inflight_us = DIV64_U64_ROUND_UP( 2215 + cost_to_abs_cost(vtime - vdone, hw_inuse), 2216 + ioc->vtime_base_rate); 2217 + 2218 + usage_us = max(usage_us, inflight_us); 2219 + } 2220 + 2221 + /* convert to hweight based usage ratio */ 2222 + if (time_after64(iocg->activated_at, ioc->period_at)) 2223 + usage_dur = max_t(u64, now.now - iocg->activated_at, 1); 2224 + else 2225 + usage_dur = max_t(u64, now.now - ioc->period_at, 1); 2226 + 2227 + usage = clamp_t(u32, 2228 + DIV64_U64_ROUND_UP(usage_us * WEIGHT_ONE, 2229 + usage_dur), 2230 + 1, WEIGHT_ONE); 2231 2232 /* 2233 * Already donating or accumulated enough to start. ··· 2309 2310 ioc->busy_level = clamp(ioc->busy_level, -1000, 1000); 2311 2312 + ioc_adjust_base_vrate(ioc, rq_wait_pct, nr_lagging, nr_shortages, 2313 + prev_busy_level, missed_ppm); 2314 2315 ioc_refresh_params(ioc, false); 2316 ··· 2400 return cost; 2401 2402 /* 2403 + * We only increase inuse during period and do so if the margin has 2404 * deteriorated since the previous adjustment. 2405 */ 2406 if (margin >= iocg->saved_margin || margin >= margins->low || ··· 3120 static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input, 3121 size_t nbytes, loff_t off) 3122 { 3123 + struct block_device *bdev; 3124 struct ioc *ioc; 3125 u32 qos[NR_QOS_PARAMS]; 3126 bool enable, user; 3127 char *p; 3128 int ret; 3129 3130 + bdev = blkcg_conf_open_bdev(&input); 3131 + if (IS_ERR(bdev)) 3132 + return PTR_ERR(bdev); 3133 3134 + ioc = q_to_ioc(bdev->bd_disk->queue); 3135 if (!ioc) { 3136 + ret = blk_iocost_init(bdev->bd_disk->queue); 3137 if (ret) 3138 goto err; 3139 + ioc = q_to_ioc(bdev->bd_disk->queue); 3140 } 3141 3142 spin_lock_irq(&ioc->lock); ··· 3231 ioc_refresh_params(ioc, true); 3232 spin_unlock_irq(&ioc->lock); 3233 3234 + blkdev_put_no_open(bdev); 3235 return nbytes; 3236 einval: 3237 ret = -EINVAL; 3238 err: 3239 + blkdev_put_no_open(bdev); 3240 return ret; 3241 } 3242 ··· 3287 static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input, 3288 size_t nbytes, loff_t off) 3289 { 3290 + struct block_device *bdev; 3291 struct ioc *ioc; 3292 u64 u[NR_I_LCOEFS]; 3293 bool user; 3294 char *p; 3295 int ret; 3296 3297 + bdev = blkcg_conf_open_bdev(&input); 3298 + if (IS_ERR(bdev)) 3299 + return PTR_ERR(bdev); 3300 3301 + ioc = q_to_ioc(bdev->bd_disk->queue); 3302 if (!ioc) { 3303 + ret = blk_iocost_init(bdev->bd_disk->queue); 3304 if (ret) 3305 goto err; 3306 + ioc = q_to_ioc(bdev->bd_disk->queue); 3307 } 3308 3309 spin_lock_irq(&ioc->lock); ··· 3356 ioc_refresh_params(ioc, true); 3357 spin_unlock_irq(&ioc->lock); 3358 3359 + blkdev_put_no_open(bdev); 3360 return nbytes; 3361 3362 einval: 3363 ret = -EINVAL; 3364 err: 3365 + blkdev_put_no_open(bdev); 3366 return ret; 3367 } 3368

+1 -1

block/blk-lib.c

··· 65 66 /* In case the discard request is in a partition */ 67 if (bdev_is_partition(bdev)) 68 - part_offset = bdev->bd_part->start_sect; 69 70 while (nr_sects) { 71 sector_t granularity_aligned_lba, req_sects;

··· 65 66 /* In case the discard request is in a partition */ 67 if (bdev_is_partition(bdev)) 68 + part_offset = bdev->bd_start_sect; 69 70 while (nr_sects) { 71 sector_t granularity_aligned_lba, req_sects;

+12 -6

block/blk-merge.c

··· 279 return NULL; 280 split: 281 *segs = nsegs; 282 return bio_split(bio, sectors, GFP_NOIO, bs); 283 } 284 ··· 346 split->bi_opf |= REQ_NOMERGE; 347 348 bio_chain(split, *bio); 349 - trace_block_split(q, split, (*bio)->bi_iter.bi_sector); 350 submit_bio_noacct(*bio); 351 *bio = split; 352 } ··· 691 part_stat_lock(); 692 part_stat_inc(req->part, merges[op_stat_group(req_op(req))]); 693 part_stat_unlock(); 694 - 695 - hd_struct_put(req->part); 696 } 697 } 698 ··· 807 */ 808 blk_account_io_merge_request(next); 809 810 - trace_block_rq_merge(q, next); 811 812 /* 813 * ownership of bio passed from next to req, return 'next' for ··· 930 if (!ll_back_merge_fn(req, bio, nr_segs)) 931 return BIO_MERGE_FAILED; 932 933 - trace_block_bio_backmerge(req->q, req, bio); 934 rq_qos_merge(req->q, req, bio); 935 936 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) ··· 954 if (!ll_front_merge_fn(req, bio, nr_segs)) 955 return BIO_MERGE_FAILED; 956 957 - trace_block_bio_frontmerge(req->q, req, bio); 958 rq_qos_merge(req->q, req, bio); 959 960 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)

··· 279 return NULL; 280 split: 281 *segs = nsegs; 282 + 283 + /* 284 + * Bio splitting may cause subtle trouble such as hang when doing sync 285 + * iopoll in direct IO routine. Given performance gain of iopoll for 286 + * big IO can be trival, disable iopoll when split needed. 287 + */ 288 + bio->bi_opf &= ~REQ_HIPRI; 289 + 290 return bio_split(bio, sectors, GFP_NOIO, bs); 291 } 292 ··· 338 split->bi_opf |= REQ_NOMERGE; 339 340 bio_chain(split, *bio); 341 + trace_block_split(split, (*bio)->bi_iter.bi_sector); 342 submit_bio_noacct(*bio); 343 *bio = split; 344 } ··· 683 part_stat_lock(); 684 part_stat_inc(req->part, merges[op_stat_group(req_op(req))]); 685 part_stat_unlock(); 686 } 687 } 688 ··· 801 */ 802 blk_account_io_merge_request(next); 803 804 + trace_block_rq_merge(next); 805 806 /* 807 * ownership of bio passed from next to req, return 'next' for ··· 924 if (!ll_back_merge_fn(req, bio, nr_segs)) 925 return BIO_MERGE_FAILED; 926 927 + trace_block_bio_backmerge(bio); 928 rq_qos_merge(req->q, req, bio); 929 930 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) ··· 948 if (!ll_front_merge_fn(req, bio, nr_segs)) 949 return BIO_MERGE_FAILED; 950 951 + trace_block_bio_frontmerge(bio); 952 rq_qos_merge(req->q, req, bio); 953 954 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)

+1 -1

block/blk-mq-sched.c

··· 386 387 void blk_mq_sched_request_inserted(struct request *rq) 388 { 389 - trace_block_rq_insert(rq->q, rq); 390 } 391 EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted); 392

··· 386 387 void blk_mq_sched_request_inserted(struct request *rq) 388 { 389 + trace_block_rq_insert(rq); 390 } 391 EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted); 392

+30 -16

block/blk-mq.c

··· 95 } 96 97 struct mq_inflight { 98 - struct hd_struct *part; 99 unsigned int inflight[2]; 100 }; 101 ··· 105 { 106 struct mq_inflight *mi = priv; 107 108 - if (rq->part == mi->part && blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT) 109 mi->inflight[rq_data_dir(rq)]++; 110 111 return true; 112 } 113 114 - unsigned int blk_mq_in_flight(struct request_queue *q, struct hd_struct *part) 115 { 116 struct mq_inflight mi = { .part = part }; 117 ··· 122 return mi.inflight[0] + mi.inflight[1]; 123 } 124 125 - void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part, 126 - unsigned int inflight[2]) 127 { 128 struct mq_inflight mi = { .part = part }; 129 ··· 731 { 732 struct request_queue *q = rq->q; 733 734 - trace_block_rq_issue(q, rq); 735 736 if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) { 737 rq->io_start_time_ns = ktime_get_ns(); ··· 758 759 blk_mq_put_driver_tag(rq); 760 761 - trace_block_rq_requeue(q, rq); 762 rq_qos_requeue(q, rq); 763 764 if (blk_mq_request_started(rq)) { ··· 1592 * __blk_mq_delay_run_hw_queue - Run (or schedule to run) a hardware queue. 1593 * @hctx: Pointer to the hardware queue to run. 1594 * @async: If we want to run the queue asynchronously. 1595 - * @msecs: Microseconds of delay to wait before running the queue. 1596 * 1597 * If !@async, try to run the queue now. Else, run the queue asynchronously and 1598 * with a delay of @msecs. ··· 1621 /** 1622 * blk_mq_delay_run_hw_queue - Run a hardware queue asynchronously. 1623 * @hctx: Pointer to the hardware queue to run. 1624 - * @msecs: Microseconds of delay to wait before running the queue. 1625 * 1626 * Run a hardware queue asynchronously with a delay of @msecs. 1627 */ ··· 1685 /** 1686 * blk_mq_delay_run_hw_queues - Run all hardware queues asynchronously. 1687 * @q: Pointer to the request queue to run. 1688 - * @msecs: Microseconds of delay to wait before running the queues. 1689 */ 1690 void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs) 1691 { ··· 1819 1820 lockdep_assert_held(&ctx->lock); 1821 1822 - trace_block_rq_insert(hctx->queue, rq); 1823 1824 if (at_head) 1825 list_add(&rq->queuelist, &ctx->rq_lists[type]); ··· 1876 */ 1877 list_for_each_entry(rq, list, queuelist) { 1878 BUG_ON(rq->mq_ctx != ctx); 1879 - trace_block_rq_insert(hctx->queue, rq); 1880 } 1881 1882 spin_lock(&ctx->lock); ··· 2157 unsigned int nr_segs; 2158 blk_qc_t cookie; 2159 blk_status_t ret; 2160 2161 blk_queue_bounce(q, &bio); 2162 __blk_queue_split(&bio, &nr_segs); ··· 2174 2175 rq_qos_throttle(q, bio); 2176 2177 data.cmd_flags = bio->bi_opf; 2178 rq = __blk_mq_alloc_request(&data); 2179 if (unlikely(!rq)) { ··· 2185 goto queue_exit; 2186 } 2187 2188 - trace_block_getrq(q, bio, bio->bi_opf); 2189 2190 rq_qos_track(q, rq, bio); 2191 ··· 2268 blk_mq_sched_insert_request(rq, false, true, true); 2269 } 2270 2271 return cookie; 2272 queue_exit: 2273 blk_queue_exit(q); ··· 3380 return 0; 3381 } 3382 3383 /* 3384 * Alloc a tag set to be associated with one or more request queues. 3385 * May fail with EINVAL for various error conditions. May adjust the ··· 3439 if (set->nr_maps == 1 && set->nr_hw_queues > nr_cpu_ids) 3440 set->nr_hw_queues = nr_cpu_ids; 3441 3442 - if (blk_mq_realloc_tag_set_tags(set, 0, set->nr_hw_queues) < 0) 3443 return -ENOMEM; 3444 3445 ret = -ENOMEM; ··· 3874 * the state. Like for the other success return cases, the 3875 * caller is responsible for checking if the IO completed. If 3876 * the IO isn't complete, we'll get called again and will go 3877 - * straight to the busy poll loop. 3878 */ 3879 - if (blk_mq_poll_hybrid(q, hctx, cookie)) 3880 return 1; 3881 3882 hctx->poll_considered++;

··· 95 } 96 97 struct mq_inflight { 98 + struct block_device *part; 99 unsigned int inflight[2]; 100 }; 101 ··· 105 { 106 struct mq_inflight *mi = priv; 107 108 + if ((!mi->part->bd_partno || rq->part == mi->part) && 109 + blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT) 110 mi->inflight[rq_data_dir(rq)]++; 111 112 return true; 113 } 114 115 + unsigned int blk_mq_in_flight(struct request_queue *q, 116 + struct block_device *part) 117 { 118 struct mq_inflight mi = { .part = part }; 119 ··· 120 return mi.inflight[0] + mi.inflight[1]; 121 } 122 123 + void blk_mq_in_flight_rw(struct request_queue *q, struct block_device *part, 124 + unsigned int inflight[2]) 125 { 126 struct mq_inflight mi = { .part = part }; 127 ··· 729 { 730 struct request_queue *q = rq->q; 731 732 + trace_block_rq_issue(rq); 733 734 if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) { 735 rq->io_start_time_ns = ktime_get_ns(); ··· 756 757 blk_mq_put_driver_tag(rq); 758 759 + trace_block_rq_requeue(rq); 760 rq_qos_requeue(q, rq); 761 762 if (blk_mq_request_started(rq)) { ··· 1590 * __blk_mq_delay_run_hw_queue - Run (or schedule to run) a hardware queue. 1591 * @hctx: Pointer to the hardware queue to run. 1592 * @async: If we want to run the queue asynchronously. 1593 + * @msecs: Milliseconds of delay to wait before running the queue. 1594 * 1595 * If !@async, try to run the queue now. Else, run the queue asynchronously and 1596 * with a delay of @msecs. ··· 1619 /** 1620 * blk_mq_delay_run_hw_queue - Run a hardware queue asynchronously. 1621 * @hctx: Pointer to the hardware queue to run. 1622 + * @msecs: Milliseconds of delay to wait before running the queue. 1623 * 1624 * Run a hardware queue asynchronously with a delay of @msecs. 1625 */ ··· 1683 /** 1684 * blk_mq_delay_run_hw_queues - Run all hardware queues asynchronously. 1685 * @q: Pointer to the request queue to run. 1686 + * @msecs: Milliseconds of delay to wait before running the queues. 1687 */ 1688 void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs) 1689 { ··· 1817 1818 lockdep_assert_held(&ctx->lock); 1819 1820 + trace_block_rq_insert(rq); 1821 1822 if (at_head) 1823 list_add(&rq->queuelist, &ctx->rq_lists[type]); ··· 1874 */ 1875 list_for_each_entry(rq, list, queuelist) { 1876 BUG_ON(rq->mq_ctx != ctx); 1877 + trace_block_rq_insert(rq); 1878 } 1879 1880 spin_lock(&ctx->lock); ··· 2155 unsigned int nr_segs; 2156 blk_qc_t cookie; 2157 blk_status_t ret; 2158 + bool hipri; 2159 2160 blk_queue_bounce(q, &bio); 2161 __blk_queue_split(&bio, &nr_segs); ··· 2171 2172 rq_qos_throttle(q, bio); 2173 2174 + hipri = bio->bi_opf & REQ_HIPRI; 2175 + 2176 data.cmd_flags = bio->bi_opf; 2177 rq = __blk_mq_alloc_request(&data); 2178 if (unlikely(!rq)) { ··· 2180 goto queue_exit; 2181 } 2182 2183 + trace_block_getrq(bio); 2184 2185 rq_qos_track(q, rq, bio); 2186 ··· 2263 blk_mq_sched_insert_request(rq, false, true, true); 2264 } 2265 2266 + if (!hipri) 2267 + return BLK_QC_T_NONE; 2268 return cookie; 2269 queue_exit: 2270 blk_queue_exit(q); ··· 3373 return 0; 3374 } 3375 3376 + static int blk_mq_alloc_tag_set_tags(struct blk_mq_tag_set *set, 3377 + int new_nr_hw_queues) 3378 + { 3379 + return blk_mq_realloc_tag_set_tags(set, 0, new_nr_hw_queues); 3380 + } 3381 + 3382 /* 3383 * Alloc a tag set to be associated with one or more request queues. 3384 * May fail with EINVAL for various error conditions. May adjust the ··· 3426 if (set->nr_maps == 1 && set->nr_hw_queues > nr_cpu_ids) 3427 set->nr_hw_queues = nr_cpu_ids; 3428 3429 + if (blk_mq_alloc_tag_set_tags(set, set->nr_hw_queues) < 0) 3430 return -ENOMEM; 3431 3432 ret = -ENOMEM; ··· 3861 * the state. Like for the other success return cases, the 3862 * caller is responsible for checking if the IO completed. If 3863 * the IO isn't complete, we'll get called again and will go 3864 + * straight to the busy poll loop. If specified not to spin, 3865 + * we also should not sleep. 3866 */ 3867 + if (spin && blk_mq_poll_hybrid(q, hctx, cookie)) 3868 return 1; 3869 3870 hctx->poll_considered++;

+5 -4

block/blk-mq.h

··· 99 * blk_mq_map_queue() - map (cmd_flags,type) to hardware queue 100 * @q: request queue 101 * @flags: request command flags 102 - * @cpu: cpu ctx 103 */ 104 static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, 105 unsigned int flags, ··· 182 return hctx->nr_ctx && hctx->tags; 183 } 184 185 - unsigned int blk_mq_in_flight(struct request_queue *q, struct hd_struct *part); 186 - void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part, 187 - unsigned int inflight[2]); 188 189 static inline void blk_mq_put_dispatch_budget(struct request_queue *q) 190 {

··· 99 * blk_mq_map_queue() - map (cmd_flags,type) to hardware queue 100 * @q: request queue 101 * @flags: request command flags 102 + * @ctx: software queue cpu ctx 103 */ 104 static inline struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, 105 unsigned int flags, ··· 182 return hctx->nr_ctx && hctx->tags; 183 } 184 185 + unsigned int blk_mq_in_flight(struct request_queue *q, 186 + struct block_device *part); 187 + void blk_mq_in_flight_rw(struct request_queue *q, struct block_device *part, 188 + unsigned int inflight[2]); 189 190 static inline void blk_mq_put_dispatch_budget(struct request_queue *q) 191 {

+6

block/blk-throttle.c

··· 587 tg_update_has_rules(tg); 588 } 589 590 static void blk_throtl_update_limit_valid(struct throtl_data *td) 591 { 592 struct cgroup_subsys_state *pos_css; ··· 608 609 td->limit_valid[LIMIT_LOW] = low_valid; 610 } 611 612 static void throtl_upgrade_state(struct throtl_data *td); 613 static void throtl_pd_offline(struct blkg_policy_data *pd)

··· 587 tg_update_has_rules(tg); 588 } 589 590 + #ifdef CONFIG_BLK_DEV_THROTTLING_LOW 591 static void blk_throtl_update_limit_valid(struct throtl_data *td) 592 { 593 struct cgroup_subsys_state *pos_css; ··· 607 608 td->limit_valid[LIMIT_LOW] = low_valid; 609 } 610 + #else 611 + static inline void blk_throtl_update_limit_valid(struct throtl_data *td) 612 + { 613 + } 614 + #endif 615 616 static void throtl_upgrade_state(struct throtl_data *td); 617 static void throtl_pd_offline(struct blkg_policy_data *pd)

-1

block/blk-wbt.c

··· 835 rwb->enable_state = WBT_STATE_ON_DEFAULT; 836 rwb->wc = 1; 837 rwb->rq_depth.default_depth = RWB_DEF_DEPTH; 838 - wbt_update_limits(rwb); 839 840 /* 841 * Assign rwb and add the stats callback.

··· 835 rwb->enable_state = WBT_STATE_ON_DEFAULT; 836 rwb->wc = 1; 837 rwb->rq_depth.default_depth = RWB_DEF_DEPTH; 838 839 /* 840 * Assign rwb and add the stats callback.

+15 -1

block/blk-zoned.c

··· 508 noio_flag = memalloc_noio_save(); 509 ret = disk->fops->report_zones(disk, 0, UINT_MAX, 510 blk_revalidate_zone_cb, &args); 511 memalloc_noio_restore(noio_flag); 512 513 /* 514 * Install the new bitmaps and update nr_zones only once the queue is ··· 530 * referencing the bitmaps). 531 */ 532 blk_mq_freeze_queue(q); 533 - if (ret >= 0) { 534 blk_queue_chunk_sectors(q, args.zone_sectors); 535 q->nr_zones = args.nr_zones; 536 swap(q->seq_zones_wlock, args.seq_zones_wlock);

··· 508 noio_flag = memalloc_noio_save(); 509 ret = disk->fops->report_zones(disk, 0, UINT_MAX, 510 blk_revalidate_zone_cb, &args); 511 + if (!ret) { 512 + pr_warn("%s: No zones reported\n", disk->disk_name); 513 + ret = -ENODEV; 514 + } 515 memalloc_noio_restore(noio_flag); 516 + 517 + /* 518 + * If zones where reported, make sure that the entire disk capacity 519 + * has been checked. 520 + */ 521 + if (ret > 0 && args.sector != get_capacity(disk)) { 522 + pr_warn("%s: Missing zones from sector %llu\n", 523 + disk->disk_name, args.sector); 524 + ret = -ENODEV; 525 + } 526 527 /* 528 * Install the new bitmaps and update nr_zones only once the queue is ··· 516 * referencing the bitmaps). 517 */ 518 blk_mq_freeze_queue(q); 519 + if (ret > 0) { 520 blk_queue_chunk_sectors(q, args.zone_sectors); 521 q->nr_zones = args.nr_zones; 522 swap(q->seq_zones_wlock, args.seq_zones_wlock);

+4 -81

block/blk.h

··· 25 struct list_head flush_data_in_flight; 26 struct request *flush_rq; 27 28 - struct lock_class_key key; 29 spinlock_t mq_flush_lock; 30 }; 31 ··· 214 __elevator_exit(q, e); 215 } 216 217 - struct hd_struct *__disk_get_part(struct gendisk *disk, int partno); 218 219 ssize_t part_size_show(struct device *dev, struct device_attribute *attr, 220 char *buf); ··· 347 static inline void blk_queue_free_zone_bitmaps(struct request_queue *q) {} 348 #endif 349 350 - struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector); 351 352 - int blk_alloc_devt(struct hd_struct *part, dev_t *devt); 353 void blk_free_devt(dev_t devt); 354 - void blk_invalidate_devt(dev_t devt); 355 char *disk_name(struct gendisk *hd, int partno, char *buf); 356 #define ADDPART_FLAG_NONE 0 357 #define ADDPART_FLAG_RAID 1 358 #define ADDPART_FLAG_WHOLEDISK 2 359 - void delete_partition(struct hd_struct *part); 360 int bdev_add_partition(struct block_device *bdev, int partno, 361 sector_t start, sector_t length); 362 int bdev_del_partition(struct block_device *bdev, int partno); 363 int bdev_resize_partition(struct block_device *bdev, int partno, 364 sector_t start, sector_t length); 365 int disk_expand_part_tbl(struct gendisk *disk, int target); 366 - int hd_ref_init(struct hd_struct *part); 367 - 368 - /* no need to get/put refcount of part0 */ 369 - static inline int hd_struct_try_get(struct hd_struct *part) 370 - { 371 - if (part->partno) 372 - return percpu_ref_tryget_live(&part->ref); 373 - return 1; 374 - } 375 - 376 - static inline void hd_struct_put(struct hd_struct *part) 377 - { 378 - if (part->partno) 379 - percpu_ref_put(&part->ref); 380 - } 381 - 382 - static inline void hd_free_part(struct hd_struct *part) 383 - { 384 - free_percpu(part->dkstats); 385 - kfree(part->info); 386 - percpu_ref_exit(&part->ref); 387 - } 388 - 389 - /* 390 - * Any access of part->nr_sects which is not protected by partition 391 - * bd_mutex or gendisk bdev bd_mutex, should be done using this 392 - * accessor function. 393 - * 394 - * Code written along the lines of i_size_read() and i_size_write(). 395 - * CONFIG_PREEMPTION case optimizes the case of UP kernel with preemption 396 - * on. 397 - */ 398 - static inline sector_t part_nr_sects_read(struct hd_struct *part) 399 - { 400 - #if BITS_PER_LONG==32 && defined(CONFIG_SMP) 401 - sector_t nr_sects; 402 - unsigned seq; 403 - do { 404 - seq = read_seqcount_begin(&part->nr_sects_seq); 405 - nr_sects = part->nr_sects; 406 - } while (read_seqcount_retry(&part->nr_sects_seq, seq)); 407 - return nr_sects; 408 - #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) 409 - sector_t nr_sects; 410 - 411 - preempt_disable(); 412 - nr_sects = part->nr_sects; 413 - preempt_enable(); 414 - return nr_sects; 415 - #else 416 - return part->nr_sects; 417 - #endif 418 - } 419 - 420 - /* 421 - * Should be called with mutex lock held (typically bd_mutex) of partition 422 - * to provide mutual exlusion among writers otherwise seqcount might be 423 - * left in wrong state leaving the readers spinning infinitely. 424 - */ 425 - static inline void part_nr_sects_write(struct hd_struct *part, sector_t size) 426 - { 427 - #if BITS_PER_LONG==32 && defined(CONFIG_SMP) 428 - preempt_disable(); 429 - write_seqcount_begin(&part->nr_sects_seq); 430 - part->nr_sects = size; 431 - write_seqcount_end(&part->nr_sects_seq); 432 - preempt_enable(); 433 - #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION) 434 - preempt_disable(); 435 - part->nr_sects = size; 436 - preempt_enable(); 437 - #else 438 - part->nr_sects = size; 439 - #endif 440 - } 441 442 int bio_add_hw_page(struct request_queue *q, struct bio *bio, 443 struct page *page, unsigned int len, unsigned int offset,

··· 25 struct list_head flush_data_in_flight; 26 struct request *flush_rq; 27 28 spinlock_t mq_flush_lock; 29 }; 30 ··· 215 __elevator_exit(q, e); 216 } 217 218 + struct block_device *__disk_get_part(struct gendisk *disk, int partno); 219 220 ssize_t part_size_show(struct device *dev, struct device_attribute *attr, 221 char *buf); ··· 348 static inline void blk_queue_free_zone_bitmaps(struct request_queue *q) {} 349 #endif 350 351 + struct block_device *disk_map_sector_rcu(struct gendisk *disk, sector_t sector); 352 353 + int blk_alloc_devt(struct block_device *part, dev_t *devt); 354 void blk_free_devt(dev_t devt); 355 char *disk_name(struct gendisk *hd, int partno, char *buf); 356 #define ADDPART_FLAG_NONE 0 357 #define ADDPART_FLAG_RAID 1 358 #define ADDPART_FLAG_WHOLEDISK 2 359 + void delete_partition(struct block_device *part); 360 int bdev_add_partition(struct block_device *bdev, int partno, 361 sector_t start, sector_t length); 362 int bdev_del_partition(struct block_device *bdev, int partno); 363 int bdev_resize_partition(struct block_device *bdev, int partno, 364 sector_t start, sector_t length); 365 int disk_expand_part_tbl(struct gendisk *disk, int target); 366 367 int bio_add_hw_page(struct request_queue *q, struct bio *bio, 368 struct page *page, unsigned int len, unsigned int offset,

+1 -1

block/bounce.c

··· 340 } 341 } 342 343 - trace_block_bio_bounce(q, *bio_orig); 344 345 bio->bi_flags |= (1 << BIO_BOUNCED); 346

··· 340 } 341 } 342 343 + trace_block_bio_bounce(*bio_orig); 344 345 bio->bi_flags |= (1 << BIO_BOUNCED); 346

+172 -393

block/genhd.c

··· 17 #include <linux/seq_file.h> 18 #include <linux/slab.h> 19 #include <linux/kmod.h> 20 - #include <linux/kobj_map.h> 21 #include <linux/mutex.h> 22 #include <linux/idr.h> 23 #include <linux/log2.h> ··· 25 26 #include "blk.h" 27 28 - static DEFINE_MUTEX(block_class_lock); 29 static struct kobject *block_depr; 30 31 /* for extended dynamic devt allocation, currently only one major is used */ 32 #define NR_EXT_DEVT (1 << MINORBITS) 33 - 34 - /* For extended devt allocation. ext_devt_lock prevents look up 35 - * results from going away underneath its user. 36 - */ 37 - static DEFINE_SPINLOCK(ext_devt_lock); 38 - static DEFINE_IDR(ext_devt_idr); 39 40 static void disk_check_events(struct disk_events *ev, 41 unsigned int *clearing_ptr); ··· 40 static void disk_del_events(struct gendisk *disk); 41 static void disk_release_events(struct gendisk *disk); 42 43 /* 44 - * Set disk capacity and notify if the size is not currently 45 - * zero and will not be set to zero 46 */ 47 - bool set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size, 48 - bool update_bdev) 49 { 50 sector_t capacity = get_capacity(disk); 51 52 set_capacity(disk, size); 53 - if (update_bdev) 54 - revalidate_disk_size(disk, true); 55 56 - if (capacity != size && capacity != 0 && size != 0) { 57 - char *envp[] = { "RESIZE=1", NULL }; 58 59 - kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp); 60 - return true; 61 - } 62 63 - return false; 64 } 65 - 66 - EXPORT_SYMBOL_GPL(set_capacity_revalidate_and_notify); 67 68 /* 69 * Format the device name of the indicated disk into the supplied buffer and ··· 106 } 107 EXPORT_SYMBOL(bdevname); 108 109 - static void part_stat_read_all(struct hd_struct *part, struct disk_stats *stat) 110 { 111 int cpu; 112 113 memset(stat, 0, sizeof(struct disk_stats)); 114 for_each_possible_cpu(cpu) { 115 - struct disk_stats *ptr = per_cpu_ptr(part->dkstats, cpu); 116 int group; 117 118 for (group = 0; group < NR_STAT_GROUPS; group++) { ··· 127 } 128 } 129 130 - static unsigned int part_in_flight(struct hd_struct *part) 131 { 132 unsigned int inflight = 0; 133 int cpu; ··· 142 return inflight; 143 } 144 145 - static void part_in_flight_rw(struct hd_struct *part, unsigned int inflight[2]) 146 { 147 int cpu; 148 ··· 159 inflight[1] = 0; 160 } 161 162 - struct hd_struct *__disk_get_part(struct gendisk *disk, int partno) 163 { 164 struct disk_part_tbl *ptbl = rcu_dereference(disk->part_tbl); 165 166 if (unlikely(partno < 0 || partno >= ptbl->len)) 167 return NULL; 168 return rcu_dereference(ptbl->part[partno]); 169 - } 170 - 171 - /** 172 - * disk_get_part - get partition 173 - * @disk: disk to look partition from 174 - * @partno: partition number 175 - * 176 - * Look for partition @partno from @disk. If found, increment 177 - * reference count and return it. 178 - * 179 - * CONTEXT: 180 - * Don't care. 181 - * 182 - * RETURNS: 183 - * Pointer to the found partition on success, NULL if not found. 184 - */ 185 - struct hd_struct *disk_get_part(struct gendisk *disk, int partno) 186 - { 187 - struct hd_struct *part; 188 - 189 - rcu_read_lock(); 190 - part = __disk_get_part(disk, partno); 191 - if (part) 192 - get_device(part_to_dev(part)); 193 - rcu_read_unlock(); 194 - 195 - return part; 196 } 197 198 /** ··· 212 * CONTEXT: 213 * Don't care. 214 */ 215 - struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter) 216 { 217 struct disk_part_tbl *ptbl; 218 int inc, end; 219 220 /* put the last partition */ 221 - disk_put_part(piter->part); 222 - piter->part = NULL; 223 224 /* get part_tbl */ 225 rcu_read_lock(); ··· 239 240 /* iterate to the next partition */ 241 for (; piter->idx != end; piter->idx += inc) { 242 - struct hd_struct *part; 243 244 part = rcu_dereference(ptbl->part[piter->idx]); 245 if (!part) 246 continue; 247 - if (!part_nr_sects_read(part) && 248 !(piter->flags & DISK_PITER_INCL_EMPTY) && 249 !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 && 250 piter->idx == 0)) 251 continue; 252 253 - get_device(part_to_dev(part)); 254 - piter->part = part; 255 piter->idx += inc; 256 break; 257 } ··· 274 */ 275 void disk_part_iter_exit(struct disk_part_iter *piter) 276 { 277 - disk_put_part(piter->part); 278 piter->part = NULL; 279 } 280 EXPORT_SYMBOL_GPL(disk_part_iter_exit); 281 282 - static inline int sector_in_part(struct hd_struct *part, sector_t sector) 283 { 284 - return part->start_sect <= sector && 285 - sector < part->start_sect + part_nr_sects_read(part); 286 } 287 288 /** ··· 295 * primarily used for stats accounting. 296 * 297 * CONTEXT: 298 - * RCU read locked. The returned partition pointer is always valid 299 - * because its refcount is grabbed except for part0, which lifetime 300 - * is same with the disk. 301 * 302 * RETURNS: 303 * Found partition on success, part0 is returned if no partition matches 304 * or the matched partition is being deleted. 305 */ 306 - struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector) 307 { 308 struct disk_part_tbl *ptbl; 309 - struct hd_struct *part; 310 int i; 311 312 rcu_read_lock(); 313 ptbl = rcu_dereference(disk->part_tbl); 314 315 part = rcu_dereference(ptbl->last_lookup); 316 - if (part && sector_in_part(part, sector) && hd_struct_try_get(part)) 317 goto out_unlock; 318 319 for (i = 1; i < ptbl->len; i++) { 320 part = rcu_dereference(ptbl->part[i]); 321 - 322 if (part && sector_in_part(part, sector)) { 323 - /* 324 - * only live partition can be cached for lookup, 325 - * so use-after-free on cached & deleting partition 326 - * can be avoided 327 - */ 328 - if (!hd_struct_try_get(part)) 329 - break; 330 rcu_assign_pointer(ptbl->last_lookup, part); 331 goto out_unlock; 332 } 333 } 334 335 - part = &disk->part0; 336 out_unlock: 337 rcu_read_unlock(); 338 return part; ··· 373 struct blk_major_name *next; 374 int major; 375 char name[16]; 376 } *major_names[BLKDEV_MAJOR_HASH_SIZE]; 377 378 /* index in the above - for now: assume no multimajor ranges */ 379 static inline int major_to_index(unsigned major) ··· 388 { 389 struct blk_major_name *dp; 390 391 - mutex_lock(&block_class_lock); 392 for (dp = major_names[major_to_index(offset)]; dp; dp = dp->next) 393 if (dp->major == offset) 394 seq_printf(seqf, "%3d %s\n", dp->major, dp->name); 395 - mutex_unlock(&block_class_lock); 396 } 397 #endif /* CONFIG_PROC_FS */ 398 399 /** 400 - * register_blkdev - register a new block device 401 * 402 * @major: the requested major device number [1..BLKDEV_MAJOR_MAX-1]. If 403 * @major = 0, try to allocate any unused major number. 404 * @name: the name of the new block device as a zero terminated string 405 * 406 * The @name must be unique within the system. 407 * ··· 416 * 417 * See Documentation/admin-guide/devices.txt for the list of allocated 418 * major numbers. 419 */ 420 - int register_blkdev(unsigned int major, const char *name) 421 { 422 struct blk_major_name **n, *p; 423 int index, ret = 0; 424 425 - mutex_lock(&block_class_lock); 426 427 /* temporary */ 428 if (major == 0) { ··· 459 } 460 461 p->major = major; 462 strlcpy(p->name, name, sizeof(p->name)); 463 p->next = NULL; 464 index = major_to_index(major); ··· 479 kfree(p); 480 } 481 out: 482 - mutex_unlock(&block_class_lock); 483 return ret; 484 } 485 - 486 - EXPORT_SYMBOL(register_blkdev); 487 488 void unregister_blkdev(unsigned int major, const char *name) 489 { ··· 490 struct blk_major_name *p = NULL; 491 int index = major_to_index(major); 492 493 - mutex_lock(&block_class_lock); 494 for (n = &major_names[index]; *n; n = &(*n)->next) 495 if ((*n)->major == major) 496 break; ··· 500 p = *n; 501 *n = p->next; 502 } 503 - mutex_unlock(&block_class_lock); 504 kfree(p); 505 } 506 507 EXPORT_SYMBOL(unregister_blkdev); 508 - 509 - static struct kobj_map *bdev_map; 510 511 /** 512 * blk_mangle_minor - scatter minor numbers apart ··· 539 } 540 541 /** 542 - * blk_alloc_devt - allocate a dev_t for a partition 543 - * @part: partition to allocate dev_t for 544 * @devt: out parameter for resulting dev_t 545 * 546 * Allocate a dev_t for block device. ··· 552 * CONTEXT: 553 * Might sleep. 554 */ 555 - int blk_alloc_devt(struct hd_struct *part, dev_t *devt) 556 { 557 - struct gendisk *disk = part_to_disk(part); 558 int idx; 559 560 /* in consecutive minor range? */ 561 - if (part->partno < disk->minors) { 562 - *devt = MKDEV(disk->major, disk->first_minor + part->partno); 563 return 0; 564 } 565 566 - /* allocate ext devt */ 567 - idr_preload(GFP_KERNEL); 568 - 569 - spin_lock_bh(&ext_devt_lock); 570 - idx = idr_alloc(&ext_devt_idr, part, 0, NR_EXT_DEVT, GFP_NOWAIT); 571 - spin_unlock_bh(&ext_devt_lock); 572 - 573 - idr_preload_end(); 574 if (idx < 0) 575 return idx == -ENOSPC ? -EBUSY : idx; 576 ··· 582 */ 583 void blk_free_devt(dev_t devt) 584 { 585 - if (devt == MKDEV(0, 0)) 586 - return; 587 - 588 - if (MAJOR(devt) == BLOCK_EXT_MAJOR) { 589 - spin_lock_bh(&ext_devt_lock); 590 - idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); 591 - spin_unlock_bh(&ext_devt_lock); 592 - } 593 - } 594 - 595 - /* 596 - * We invalidate devt by assigning NULL pointer for devt in idr. 597 - */ 598 - void blk_invalidate_devt(dev_t devt) 599 - { 600 - if (MAJOR(devt) == BLOCK_EXT_MAJOR) { 601 - spin_lock_bh(&ext_devt_lock); 602 - idr_replace(&ext_devt_idr, NULL, blk_mangle_minor(MINOR(devt))); 603 - spin_unlock_bh(&ext_devt_lock); 604 - } 605 } 606 607 static char *bdevt_str(dev_t devt, char *buf) ··· 596 snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt)); 597 598 return buf; 599 - } 600 - 601 - /* 602 - * Register device numbers dev..(dev+range-1) 603 - * range must be nonzero 604 - * The hash chain is sorted on range, so that subranges can override. 605 - */ 606 - void blk_register_region(dev_t devt, unsigned long range, struct module *module, 607 - struct kobject *(*probe)(dev_t, int *, void *), 608 - int (*lock)(dev_t, void *), void *data) 609 - { 610 - kobj_map(bdev_map, devt, range, module, probe, lock, data); 611 - } 612 - 613 - EXPORT_SYMBOL(blk_register_region); 614 - 615 - void blk_unregister_region(dev_t devt, unsigned long range) 616 - { 617 - kobj_unmap(bdev_map, devt, range); 618 - } 619 - 620 - EXPORT_SYMBOL(blk_unregister_region); 621 - 622 - static struct kobject *exact_match(dev_t devt, int *partno, void *data) 623 - { 624 - struct gendisk *p = data; 625 - 626 - return &disk_to_dev(p)->kobj; 627 - } 628 - 629 - static int exact_lock(dev_t devt, void *data) 630 - { 631 - struct gendisk *p = data; 632 - 633 - if (!get_disk_and_module(p)) 634 - return -1; 635 - return 0; 636 } 637 638 static void disk_scan_partitions(struct gendisk *disk) ··· 616 { 617 struct device *ddev = disk_to_dev(disk); 618 struct disk_part_iter piter; 619 - struct hd_struct *part; 620 int err; 621 622 ddev->parent = parent; ··· 648 */ 649 pm_runtime_set_memalloc_noio(ddev, true); 650 651 - disk->part0.holder_dir = kobject_create_and_add("holders", &ddev->kobj); 652 disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj); 653 654 if (disk->flags & GENHD_FL_HIDDEN) { ··· 666 /* announce possible partitions */ 667 disk_part_iter_init(&piter, disk, 0); 668 while ((part = disk_part_iter_next(&piter))) 669 - kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD); 670 disk_part_iter_exit(&piter); 671 672 if (disk->queue->backing_dev_info->dev) { ··· 715 716 disk->flags |= GENHD_FL_UP; 717 718 - retval = blk_alloc_devt(&disk->part0, &devt); 719 if (retval) { 720 WARN_ON(1); 721 return; ··· 742 ret = bdi_register(bdi, "%u:%u", MAJOR(devt), MINOR(devt)); 743 WARN_ON(ret); 744 bdi_set_owner(bdi, dev); 745 - blk_register_region(disk_devt(disk), disk->minors, NULL, 746 - exact_match, exact_lock, disk); 747 } 748 register_disk(parent, disk, groups); 749 if (register_queue) ··· 772 } 773 EXPORT_SYMBOL(device_add_disk_no_queue_reg); 774 775 - static void invalidate_partition(struct gendisk *disk, int partno) 776 { 777 - struct block_device *bdev; 778 - 779 - bdev = bdget_disk(disk, partno); 780 - if (!bdev) 781 - return; 782 - 783 fsync_bdev(bdev); 784 __invalidate_device(bdev, true); 785 786 /* 787 - * Unhash the bdev inode for this device so that it gets evicted as soon 788 - * as last inode reference is dropped. 789 */ 790 remove_inode_hash(bdev->bd_inode); 791 - bdput(bdev); 792 } 793 794 /** ··· 806 void del_gendisk(struct gendisk *disk) 807 { 808 struct disk_part_iter piter; 809 - struct hd_struct *part; 810 811 might_sleep(); 812 813 blk_integrity_del(disk); 814 disk_del_events(disk); ··· 820 * Block lookups of the disk until all bdevs are unhashed and the 821 * disk is marked as dead (GENHD_FL_UP cleared). 822 */ 823 - down_write(&disk->lookup_sem); 824 /* invalidate stuff */ 825 disk_part_iter_init(&piter, disk, 826 DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE); 827 while ((part = disk_part_iter_next(&piter))) { 828 - invalidate_partition(disk, part->partno); 829 delete_partition(part); 830 } 831 disk_part_iter_exit(&piter); 832 833 - invalidate_partition(disk, 0); 834 set_capacity(disk, 0); 835 disk->flags &= ~GENHD_FL_UP; 836 - up_write(&disk->lookup_sem); 837 838 - if (!(disk->flags & GENHD_FL_HIDDEN)) 839 sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); 840 - if (disk->queue) { 841 /* 842 * Unregister bdi before releasing device numbers (as they can 843 * get reused and we'd get clashes in sysfs). 844 */ 845 - if (!(disk->flags & GENHD_FL_HIDDEN)) 846 - bdi_unregister(disk->queue->backing_dev_info); 847 - blk_unregister_queue(disk); 848 - } else { 849 - WARN_ON(1); 850 } 851 852 - if (!(disk->flags & GENHD_FL_HIDDEN)) 853 - blk_unregister_region(disk_devt(disk), disk->minors); 854 - /* 855 - * Remove gendisk pointer from idr so that it cannot be looked up 856 - * while RCU period before freeing gendisk is running to prevent 857 - * use-after-free issues. Note that the device number stays 858 - * "in-use" until we really free the gendisk. 859 - */ 860 - blk_invalidate_devt(disk_devt(disk)); 861 862 - kobject_put(disk->part0.holder_dir); 863 kobject_put(disk->slave_dir); 864 865 - part_stat_set_all(&disk->part0, 0); 866 - disk->part0.stamp = 0; 867 if (!sysfs_deprecated) 868 sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); 869 pm_runtime_set_memalloc_noio(disk_to_dev(disk), false); ··· 885 return badblocks_store(disk->bb, page, len, 0); 886 } 887 888 - /** 889 - * get_gendisk - get partitioning information for a given device 890 - * @devt: device to get partitioning information for 891 - * @partno: returned partition index 892 - * 893 - * This function gets the structure containing partitioning 894 - * information for the given device @devt. 895 - * 896 - * Context: can sleep 897 - */ 898 - struct gendisk *get_gendisk(dev_t devt, int *partno) 899 { 900 - struct gendisk *disk = NULL; 901 902 - might_sleep(); 903 - 904 - if (MAJOR(devt) != BLOCK_EXT_MAJOR) { 905 - struct kobject *kobj; 906 - 907 - kobj = kobj_lookup(bdev_map, devt, partno); 908 - if (kobj) 909 - disk = dev_to_disk(kobj_to_dev(kobj)); 910 - } else { 911 - struct hd_struct *part; 912 - 913 - spin_lock_bh(&ext_devt_lock); 914 - part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); 915 - if (part && get_disk_and_module(part_to_disk(part))) { 916 - *partno = part->partno; 917 - disk = part_to_disk(part); 918 } 919 - spin_unlock_bh(&ext_devt_lock); 920 } 921 922 - if (!disk) 923 - return NULL; 924 - 925 - /* 926 - * Synchronize with del_gendisk() to not return disk that is being 927 - * destroyed. 928 - */ 929 - down_read(&disk->lookup_sem); 930 - if (unlikely((disk->flags & GENHD_FL_HIDDEN) || 931 - !(disk->flags & GENHD_FL_UP))) { 932 - up_read(&disk->lookup_sem); 933 - put_disk_and_module(disk); 934 - disk = NULL; 935 - } else { 936 - up_read(&disk->lookup_sem); 937 - } 938 - return disk; 939 } 940 941 /** ··· 920 */ 921 struct block_device *bdget_disk(struct gendisk *disk, int partno) 922 { 923 - struct hd_struct *part; 924 struct block_device *bdev = NULL; 925 926 - part = disk_get_part(disk, partno); 927 - if (part) 928 - bdev = bdget_part(part); 929 - disk_put_part(part); 930 931 return bdev; 932 } 933 - EXPORT_SYMBOL(bdget_disk); 934 935 /* 936 * print a full list of all partitions - intended for places where the root ··· 945 while ((dev = class_dev_iter_next(&iter))) { 946 struct gendisk *disk = dev_to_disk(dev); 947 struct disk_part_iter piter; 948 - struct hd_struct *part; 949 char name_buf[BDEVNAME_SIZE]; 950 char devt_buf[BDEVT_SIZE]; 951 ··· 964 */ 965 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); 966 while ((part = disk_part_iter_next(&piter))) { 967 - bool is_part0 = part == &disk->part0; 968 969 printk("%s%s %10llu %s %s", is_part0 ? "" : " ", 970 - bdevt_str(part_devt(part), devt_buf), 971 - (unsigned long long)part_nr_sects_read(part) >> 1 972 - , disk_name(disk, part->partno, name_buf), 973 - part->info ? part->info->uuid : ""); 974 if (is_part0) { 975 if (dev->parent && dev->parent->driver) 976 printk(" driver: %s\n", ··· 1047 { 1048 struct gendisk *sgp = v; 1049 struct disk_part_iter piter; 1050 - struct hd_struct *part; 1051 char buf[BDEVNAME_SIZE]; 1052 1053 /* Don't show non-partitionable removeable devices or empty devices */ ··· 1061 disk_part_iter_init(&piter, sgp, DISK_PITER_INCL_PART0); 1062 while ((part = disk_part_iter_next(&piter))) 1063 seq_printf(seqf, "%4d %7d %10llu %s\n", 1064 - MAJOR(part_devt(part)), MINOR(part_devt(part)), 1065 - (unsigned long long)part_nr_sects_read(part) >> 1, 1066 - disk_name(sgp, part->partno, buf)); 1067 disk_part_iter_exit(&piter); 1068 1069 return 0; ··· 1077 }; 1078 #endif 1079 1080 - 1081 - static struct kobject *base_probe(dev_t devt, int *partno, void *data) 1082 - { 1083 - if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0) 1084 - /* Make old-style 2.4 aliases work */ 1085 - request_module("block-major-%d", MAJOR(devt)); 1086 - return NULL; 1087 - } 1088 - 1089 static int __init genhd_device_init(void) 1090 { 1091 int error; ··· 1085 error = class_register(&block_class); 1086 if (unlikely(error)) 1087 return error; 1088 - bdev_map = kobj_map_init(base_probe, &block_class_lock); 1089 blk_dev_init(); 1090 1091 register_blkdev(BLOCK_EXT_MAJOR, "blkext"); ··· 1142 ssize_t part_size_show(struct device *dev, 1143 struct device_attribute *attr, char *buf) 1144 { 1145 - struct hd_struct *p = dev_to_part(dev); 1146 - 1147 - return sprintf(buf, "%llu\n", 1148 - (unsigned long long)part_nr_sects_read(p)); 1149 } 1150 1151 ssize_t part_stat_show(struct device *dev, 1152 struct device_attribute *attr, char *buf) 1153 { 1154 - struct hd_struct *p = dev_to_part(dev); 1155 - struct request_queue *q = part_to_disk(p)->queue; 1156 struct disk_stats stat; 1157 unsigned int inflight; 1158 1159 - part_stat_read_all(p, &stat); 1160 if (queue_is_mq(q)) 1161 - inflight = blk_mq_in_flight(q, p); 1162 else 1163 - inflight = part_in_flight(p); 1164 1165 return sprintf(buf, 1166 "%8lu %8lu %8llu %8u " ··· 1192 ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr, 1193 char *buf) 1194 { 1195 - struct hd_struct *p = dev_to_part(dev); 1196 - struct request_queue *q = part_to_disk(p)->queue; 1197 unsigned int inflight[2]; 1198 1199 if (queue_is_mq(q)) 1200 - blk_mq_in_flight_rw(q, p, inflight); 1201 else 1202 - part_in_flight_rw(p, inflight); 1203 1204 return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]); 1205 } ··· 1247 ssize_t part_fail_show(struct device *dev, 1248 struct device_attribute *attr, char *buf) 1249 { 1250 - struct hd_struct *p = dev_to_part(dev); 1251 - 1252 - return sprintf(buf, "%d\n", p->make_it_fail); 1253 } 1254 1255 ssize_t part_fail_store(struct device *dev, 1256 struct device_attribute *attr, 1257 const char *buf, size_t count) 1258 { 1259 - struct hd_struct *p = dev_to_part(dev); 1260 int i; 1261 1262 if (count > 0 && sscanf(buf, "%d", &i) > 0) 1263 - p->make_it_fail = (i == 0) ? 0 : 1; 1264 1265 return count; 1266 } ··· 1396 * 1397 * This function releases all allocated resources of the gendisk. 1398 * 1399 - * The struct gendisk refcount is incremented with get_gendisk() or 1400 - * get_disk_and_module(), and its refcount is decremented with 1401 - * put_disk_and_module() or put_disk(). Once the refcount reaches 0 this 1402 - * function is called. 1403 - * 1404 * Drivers which used __device_add_disk() have a gendisk with a request_queue 1405 * assigned. Since the request_queue sits on top of the gendisk for these 1406 * drivers we also call blk_put_queue() for them, and we expect the ··· 1414 disk_release_events(disk); 1415 kfree(disk->random); 1416 disk_replace_part_tbl(disk, NULL); 1417 - hd_free_part(&disk->part0); 1418 if (disk->queue) 1419 blk_put_queue(disk->queue); 1420 kfree(disk); ··· 1452 { 1453 struct gendisk *gp = v; 1454 struct disk_part_iter piter; 1455 - struct hd_struct *hd; 1456 char buf[BDEVNAME_SIZE]; 1457 unsigned int inflight; 1458 struct disk_stats stat; ··· 1480 "%lu %lu %lu %u " 1481 "%lu %u" 1482 "\n", 1483 - MAJOR(part_devt(hd)), MINOR(part_devt(hd)), 1484 - disk_name(gp, hd->partno, buf), 1485 stat.ios[STAT_READ], 1486 stat.merges[STAT_READ], 1487 stat.sectors[STAT_READ], ··· 1539 class_dev_iter_init(&iter, &block_class, NULL, &disk_type); 1540 while ((dev = class_dev_iter_next(&iter))) { 1541 struct gendisk *disk = dev_to_disk(dev); 1542 - struct hd_struct *part; 1543 1544 if (strcmp(dev_name(dev), name)) 1545 continue; ··· 1552 MINOR(dev->devt) + partno); 1553 break; 1554 } 1555 - part = disk_get_part(disk, partno); 1556 if (part) { 1557 - devt = part_devt(part); 1558 - disk_put_part(part); 1559 break; 1560 } 1561 - disk_put_part(part); 1562 } 1563 class_dev_iter_exit(&iter); 1564 return devt; ··· 1579 if (!disk) 1580 return NULL; 1581 1582 - disk->part0.dkstats = alloc_percpu(struct disk_stats); 1583 - if (!disk->part0.dkstats) 1584 goto out_free_disk; 1585 1586 - init_rwsem(&disk->lookup_sem); 1587 disk->node_id = node_id; 1588 - if (disk_expand_part_tbl(disk, 0)) { 1589 - free_percpu(disk->part0.dkstats); 1590 - goto out_free_disk; 1591 - } 1592 1593 ptbl = rcu_dereference_protected(disk->part_tbl, 1); 1594 - rcu_assign_pointer(ptbl->part[0], &disk->part0); 1595 - 1596 - /* 1597 - * set_capacity() and get_capacity() currently don't use 1598 - * seqcounter to read/update the part0->nr_sects. Still init 1599 - * the counter as we can read the sectors in IO submission 1600 - * patch using seqence counters. 1601 - * 1602 - * TODO: Ideally set_capacity() and get_capacity() should be 1603 - * converted to make use of bd_mutex and sequence counters. 1604 - */ 1605 - hd_sects_seq_init(&disk->part0); 1606 - if (hd_ref_init(&disk->part0)) 1607 - goto out_free_part0; 1608 1609 disk->minors = minors; 1610 rand_initialize_disk(disk); ··· 1597 device_initialize(disk_to_dev(disk)); 1598 return disk; 1599 1600 - out_free_part0: 1601 - hd_free_part(&disk->part0); 1602 out_free_disk: 1603 kfree(disk); 1604 return NULL; 1605 } 1606 EXPORT_SYMBOL(__alloc_disk_node); 1607 - 1608 - /** 1609 - * get_disk_and_module - increments the gendisk and gendisk fops module refcount 1610 - * @disk: the struct gendisk to increment the refcount for 1611 - * 1612 - * This increments the refcount for the struct gendisk, and the gendisk's 1613 - * fops module owner. 1614 - * 1615 - * Context: Any context. 1616 - */ 1617 - struct kobject *get_disk_and_module(struct gendisk *disk) 1618 - { 1619 - struct module *owner; 1620 - struct kobject *kobj; 1621 - 1622 - if (!disk->fops) 1623 - return NULL; 1624 - owner = disk->fops->owner; 1625 - if (owner && !try_module_get(owner)) 1626 - return NULL; 1627 - kobj = kobject_get_unless_zero(&disk_to_dev(disk)->kobj); 1628 - if (kobj == NULL) { 1629 - module_put(owner); 1630 - return NULL; 1631 - } 1632 - return kobj; 1633 - 1634 - } 1635 - EXPORT_SYMBOL(get_disk_and_module); 1636 1637 /** 1638 * put_disk - decrements the gendisk refcount ··· 1618 void put_disk(struct gendisk *disk) 1619 { 1620 if (disk) 1621 - kobject_put(&disk_to_dev(disk)->kobj); 1622 } 1623 EXPORT_SYMBOL(put_disk); 1624 - 1625 - /** 1626 - * put_disk_and_module - decrements the module and gendisk refcount 1627 - * @disk: the struct gendisk to decrement the refcount for 1628 - * 1629 - * This is a counterpart of get_disk_and_module() and thus also of 1630 - * get_gendisk(). 1631 - * 1632 - * Context: Any context, but the last reference must not be dropped from 1633 - * atomic context. 1634 - */ 1635 - void put_disk_and_module(struct gendisk *disk) 1636 - { 1637 - if (disk) { 1638 - struct module *owner = disk->fops->owner; 1639 - 1640 - put_disk(disk); 1641 - module_put(owner); 1642 - } 1643 - } 1644 - EXPORT_SYMBOL(put_disk_and_module); 1645 1646 static void set_disk_ro_uevent(struct gendisk *gd, int ro) 1647 { ··· 1632 kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp); 1633 } 1634 1635 - void set_device_ro(struct block_device *bdev, int flag) 1636 - { 1637 - bdev->bd_part->policy = flag; 1638 - } 1639 - 1640 - EXPORT_SYMBOL(set_device_ro); 1641 - 1642 void set_disk_ro(struct gendisk *disk, int flag) 1643 { 1644 struct disk_part_iter piter; 1645 - struct hd_struct *part; 1646 1647 - if (disk->part0.policy != flag) { 1648 set_disk_ro_uevent(disk, flag); 1649 - disk->part0.policy = flag; 1650 } 1651 1652 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); 1653 while ((part = disk_part_iter_next(&piter))) 1654 - part->policy = flag; 1655 disk_part_iter_exit(&piter); 1656 } 1657 ··· 1654 { 1655 if (!bdev) 1656 return 0; 1657 - return bdev->bd_part->policy; 1658 } 1659 1660 EXPORT_SYMBOL(bdev_read_only);

··· 17 #include <linux/seq_file.h> 18 #include <linux/slab.h> 19 #include <linux/kmod.h> 20 #include <linux/mutex.h> 21 #include <linux/idr.h> 22 #include <linux/log2.h> ··· 26 27 #include "blk.h" 28 29 static struct kobject *block_depr; 30 + 31 + DECLARE_RWSEM(bdev_lookup_sem); 32 33 /* for extended dynamic devt allocation, currently only one major is used */ 34 #define NR_EXT_DEVT (1 << MINORBITS) 35 + static DEFINE_IDA(ext_devt_ida); 36 37 static void disk_check_events(struct disk_events *ev, 38 unsigned int *clearing_ptr); ··· 45 static void disk_del_events(struct gendisk *disk); 46 static void disk_release_events(struct gendisk *disk); 47 48 + void set_capacity(struct gendisk *disk, sector_t sectors) 49 + { 50 + struct block_device *bdev = disk->part0; 51 + 52 + spin_lock(&bdev->bd_size_lock); 53 + i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT); 54 + spin_unlock(&bdev->bd_size_lock); 55 + } 56 + EXPORT_SYMBOL(set_capacity); 57 + 58 /* 59 + * Set disk capacity and notify if the size is not currently zero and will not 60 + * be set to zero. Returns true if a uevent was sent, otherwise false. 61 */ 62 + bool set_capacity_and_notify(struct gendisk *disk, sector_t size) 63 { 64 sector_t capacity = get_capacity(disk); 65 + char *envp[] = { "RESIZE=1", NULL }; 66 67 set_capacity(disk, size); 68 69 + /* 70 + * Only print a message and send a uevent if the gendisk is user visible 71 + * and alive. This avoids spamming the log and udev when setting the 72 + * initial capacity during probing. 73 + */ 74 + if (size == capacity || 75 + (disk->flags & (GENHD_FL_UP | GENHD_FL_HIDDEN)) != GENHD_FL_UP) 76 + return false; 77 78 + pr_info("%s: detected capacity change from %lld to %lld\n", 79 + disk->disk_name, size, capacity); 80 81 + /* 82 + * Historically we did not send a uevent for changes to/from an empty 83 + * device. 84 + */ 85 + if (!capacity || !size) 86 + return false; 87 + kobject_uevent_env(&disk_to_dev(disk)->kobj, KOBJ_CHANGE, envp); 88 + return true; 89 } 90 + EXPORT_SYMBOL_GPL(set_capacity_and_notify); 91 92 /* 93 * Format the device name of the indicated disk into the supplied buffer and ··· 92 } 93 EXPORT_SYMBOL(bdevname); 94 95 + static void part_stat_read_all(struct block_device *part, 96 + struct disk_stats *stat) 97 { 98 int cpu; 99 100 memset(stat, 0, sizeof(struct disk_stats)); 101 for_each_possible_cpu(cpu) { 102 + struct disk_stats *ptr = per_cpu_ptr(part->bd_stats, cpu); 103 int group; 104 105 for (group = 0; group < NR_STAT_GROUPS; group++) { ··· 112 } 113 } 114 115 + static unsigned int part_in_flight(struct block_device *part) 116 { 117 unsigned int inflight = 0; 118 int cpu; ··· 127 return inflight; 128 } 129 130 + static void part_in_flight_rw(struct block_device *part, 131 + unsigned int inflight[2]) 132 { 133 int cpu; 134 ··· 143 inflight[1] = 0; 144 } 145 146 + struct block_device *__disk_get_part(struct gendisk *disk, int partno) 147 { 148 struct disk_part_tbl *ptbl = rcu_dereference(disk->part_tbl); 149 150 if (unlikely(partno < 0 || partno >= ptbl->len)) 151 return NULL; 152 return rcu_dereference(ptbl->part[partno]); 153 } 154 155 /** ··· 223 * CONTEXT: 224 * Don't care. 225 */ 226 + struct block_device *disk_part_iter_next(struct disk_part_iter *piter) 227 { 228 struct disk_part_tbl *ptbl; 229 int inc, end; 230 231 /* put the last partition */ 232 + disk_part_iter_exit(piter); 233 234 /* get part_tbl */ 235 rcu_read_lock(); ··· 251 252 /* iterate to the next partition */ 253 for (; piter->idx != end; piter->idx += inc) { 254 + struct block_device *part; 255 256 part = rcu_dereference(ptbl->part[piter->idx]); 257 if (!part) 258 continue; 259 + if (!bdev_nr_sectors(part) && 260 !(piter->flags & DISK_PITER_INCL_EMPTY) && 261 !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 && 262 piter->idx == 0)) 263 continue; 264 265 + piter->part = bdgrab(part); 266 + if (!piter->part) 267 + continue; 268 piter->idx += inc; 269 break; 270 } ··· 285 */ 286 void disk_part_iter_exit(struct disk_part_iter *piter) 287 { 288 + if (piter->part) 289 + bdput(piter->part); 290 piter->part = NULL; 291 } 292 EXPORT_SYMBOL_GPL(disk_part_iter_exit); 293 294 + static inline int sector_in_part(struct block_device *part, sector_t sector) 295 { 296 + return part->bd_start_sect <= sector && 297 + sector < part->bd_start_sect + bdev_nr_sectors(part); 298 } 299 300 /** ··· 305 * primarily used for stats accounting. 306 * 307 * CONTEXT: 308 + * RCU read locked. 309 * 310 * RETURNS: 311 * Found partition on success, part0 is returned if no partition matches 312 * or the matched partition is being deleted. 313 */ 314 + struct block_device *disk_map_sector_rcu(struct gendisk *disk, sector_t sector) 315 { 316 struct disk_part_tbl *ptbl; 317 + struct block_device *part; 318 int i; 319 320 rcu_read_lock(); 321 ptbl = rcu_dereference(disk->part_tbl); 322 323 part = rcu_dereference(ptbl->last_lookup); 324 + if (part && sector_in_part(part, sector)) 325 goto out_unlock; 326 327 for (i = 1; i < ptbl->len; i++) { 328 part = rcu_dereference(ptbl->part[i]); 329 if (part && sector_in_part(part, sector)) { 330 rcu_assign_pointer(ptbl->last_lookup, part); 331 goto out_unlock; 332 } 333 } 334 335 + part = disk->part0; 336 out_unlock: 337 rcu_read_unlock(); 338 return part; ··· 393 struct blk_major_name *next; 394 int major; 395 char name[16]; 396 + void (*probe)(dev_t devt); 397 } *major_names[BLKDEV_MAJOR_HASH_SIZE]; 398 + static DEFINE_MUTEX(major_names_lock); 399 400 /* index in the above - for now: assume no multimajor ranges */ 401 static inline int major_to_index(unsigned major) ··· 406 { 407 struct blk_major_name *dp; 408 409 + mutex_lock(&major_names_lock); 410 for (dp = major_names[major_to_index(offset)]; dp; dp = dp->next) 411 if (dp->major == offset) 412 seq_printf(seqf, "%3d %s\n", dp->major, dp->name); 413 + mutex_unlock(&major_names_lock); 414 } 415 #endif /* CONFIG_PROC_FS */ 416 417 /** 418 + * __register_blkdev - register a new block device 419 * 420 * @major: the requested major device number [1..BLKDEV_MAJOR_MAX-1]. If 421 * @major = 0, try to allocate any unused major number. 422 * @name: the name of the new block device as a zero terminated string 423 + * @probe: allback that is called on access to any minor number of @major 424 * 425 * The @name must be unique within the system. 426 * ··· 433 * 434 * See Documentation/admin-guide/devices.txt for the list of allocated 435 * major numbers. 436 + * 437 + * Use register_blkdev instead for any new code. 438 */ 439 + int __register_blkdev(unsigned int major, const char *name, 440 + void (*probe)(dev_t devt)) 441 { 442 struct blk_major_name **n, *p; 443 int index, ret = 0; 444 445 + mutex_lock(&major_names_lock); 446 447 /* temporary */ 448 if (major == 0) { ··· 473 } 474 475 p->major = major; 476 + p->probe = probe; 477 strlcpy(p->name, name, sizeof(p->name)); 478 p->next = NULL; 479 index = major_to_index(major); ··· 492 kfree(p); 493 } 494 out: 495 + mutex_unlock(&major_names_lock); 496 return ret; 497 } 498 + EXPORT_SYMBOL(__register_blkdev); 499 500 void unregister_blkdev(unsigned int major, const char *name) 501 { ··· 504 struct blk_major_name *p = NULL; 505 int index = major_to_index(major); 506 507 + mutex_lock(&major_names_lock); 508 for (n = &major_names[index]; *n; n = &(*n)->next) 509 if ((*n)->major == major) 510 break; ··· 514 p = *n; 515 *n = p->next; 516 } 517 + mutex_unlock(&major_names_lock); 518 kfree(p); 519 } 520 521 EXPORT_SYMBOL(unregister_blkdev); 522 523 /** 524 * blk_mangle_minor - scatter minor numbers apart ··· 555 } 556 557 /** 558 + * blk_alloc_devt - allocate a dev_t for a block device 559 + * @bdev: block device to allocate dev_t for 560 * @devt: out parameter for resulting dev_t 561 * 562 * Allocate a dev_t for block device. ··· 568 * CONTEXT: 569 * Might sleep. 570 */ 571 + int blk_alloc_devt(struct block_device *bdev, dev_t *devt) 572 { 573 + struct gendisk *disk = bdev->bd_disk; 574 int idx; 575 576 /* in consecutive minor range? */ 577 + if (bdev->bd_partno < disk->minors) { 578 + *devt = MKDEV(disk->major, disk->first_minor + bdev->bd_partno); 579 return 0; 580 } 581 582 + idx = ida_alloc_range(&ext_devt_ida, 0, NR_EXT_DEVT, GFP_KERNEL); 583 if (idx < 0) 584 return idx == -ENOSPC ? -EBUSY : idx; 585 ··· 605 */ 606 void blk_free_devt(dev_t devt) 607 { 608 + if (MAJOR(devt) == BLOCK_EXT_MAJOR) 609 + ida_free(&ext_devt_ida, blk_mangle_minor(MINOR(devt))); 610 } 611 612 static char *bdevt_str(dev_t devt, char *buf) ··· 637 snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt)); 638 639 return buf; 640 } 641 642 static void disk_scan_partitions(struct gendisk *disk) ··· 694 { 695 struct device *ddev = disk_to_dev(disk); 696 struct disk_part_iter piter; 697 + struct block_device *part; 698 int err; 699 700 ddev->parent = parent; ··· 726 */ 727 pm_runtime_set_memalloc_noio(ddev, true); 728 729 + disk->part0->bd_holder_dir = 730 + kobject_create_and_add("holders", &ddev->kobj); 731 disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj); 732 733 if (disk->flags & GENHD_FL_HIDDEN) { ··· 743 /* announce possible partitions */ 744 disk_part_iter_init(&piter, disk, 0); 745 while ((part = disk_part_iter_next(&piter))) 746 + kobject_uevent(bdev_kobj(part), KOBJ_ADD); 747 disk_part_iter_exit(&piter); 748 749 if (disk->queue->backing_dev_info->dev) { ··· 792 793 disk->flags |= GENHD_FL_UP; 794 795 + retval = blk_alloc_devt(disk->part0, &devt); 796 if (retval) { 797 WARN_ON(1); 798 return; ··· 819 ret = bdi_register(bdi, "%u:%u", MAJOR(devt), MINOR(devt)); 820 WARN_ON(ret); 821 bdi_set_owner(bdi, dev); 822 + bdev_add(disk->part0, devt); 823 } 824 register_disk(parent, disk, groups); 825 if (register_queue) ··· 850 } 851 EXPORT_SYMBOL(device_add_disk_no_queue_reg); 852 853 + static void invalidate_partition(struct block_device *bdev) 854 { 855 fsync_bdev(bdev); 856 __invalidate_device(bdev, true); 857 858 /* 859 + * Unhash the bdev inode for this device so that it can't be looked 860 + * up any more even if openers still hold references to it. 861 */ 862 remove_inode_hash(bdev->bd_inode); 863 } 864 865 /** ··· 891 void del_gendisk(struct gendisk *disk) 892 { 893 struct disk_part_iter piter; 894 + struct block_device *part; 895 896 might_sleep(); 897 + 898 + if (WARN_ON_ONCE(!disk->queue)) 899 + return; 900 901 blk_integrity_del(disk); 902 disk_del_events(disk); ··· 902 * Block lookups of the disk until all bdevs are unhashed and the 903 * disk is marked as dead (GENHD_FL_UP cleared). 904 */ 905 + down_write(&bdev_lookup_sem); 906 + 907 /* invalidate stuff */ 908 disk_part_iter_init(&piter, disk, 909 DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE); 910 while ((part = disk_part_iter_next(&piter))) { 911 + invalidate_partition(part); 912 delete_partition(part); 913 } 914 disk_part_iter_exit(&piter); 915 916 + invalidate_partition(disk->part0); 917 set_capacity(disk, 0); 918 disk->flags &= ~GENHD_FL_UP; 919 + up_write(&bdev_lookup_sem); 920 921 + if (!(disk->flags & GENHD_FL_HIDDEN)) { 922 sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); 923 + 924 /* 925 * Unregister bdi before releasing device numbers (as they can 926 * get reused and we'd get clashes in sysfs). 927 */ 928 + bdi_unregister(disk->queue->backing_dev_info); 929 } 930 931 + blk_unregister_queue(disk); 932 933 + kobject_put(disk->part0->bd_holder_dir); 934 kobject_put(disk->slave_dir); 935 936 + part_stat_set_all(disk->part0, 0); 937 + disk->part0->bd_stamp = 0; 938 if (!sysfs_deprecated) 939 sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); 940 pm_runtime_set_memalloc_noio(disk_to_dev(disk), false); ··· 978 return badblocks_store(disk->bb, page, len, 0); 979 } 980 981 + void blk_request_module(dev_t devt) 982 { 983 + unsigned int major = MAJOR(devt); 984 + struct blk_major_name **n; 985 986 + mutex_lock(&major_names_lock); 987 + for (n = &major_names[major_to_index(major)]; *n; n = &(*n)->next) { 988 + if ((*n)->major == major && (*n)->probe) { 989 + (*n)->probe(devt); 990 + mutex_unlock(&major_names_lock); 991 + return; 992 } 993 } 994 + mutex_unlock(&major_names_lock); 995 996 + if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0) 997 + /* Make old-style 2.4 aliases work */ 998 + request_module("block-major-%d", MAJOR(devt)); 999 } 1000 1001 /** ··· 1046 */ 1047 struct block_device *bdget_disk(struct gendisk *disk, int partno) 1048 { 1049 struct block_device *bdev = NULL; 1050 1051 + rcu_read_lock(); 1052 + bdev = __disk_get_part(disk, partno); 1053 + if (bdev && !bdgrab(bdev)) 1054 + bdev = NULL; 1055 + rcu_read_unlock(); 1056 1057 return bdev; 1058 } 1059 1060 /* 1061 * print a full list of all partitions - intended for places where the root ··· 1072 while ((dev = class_dev_iter_next(&iter))) { 1073 struct gendisk *disk = dev_to_disk(dev); 1074 struct disk_part_iter piter; 1075 + struct block_device *part; 1076 char name_buf[BDEVNAME_SIZE]; 1077 char devt_buf[BDEVT_SIZE]; 1078 ··· 1091 */ 1092 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); 1093 while ((part = disk_part_iter_next(&piter))) { 1094 + bool is_part0 = part == disk->part0; 1095 1096 printk("%s%s %10llu %s %s", is_part0 ? "" : " ", 1097 + bdevt_str(part->bd_dev, devt_buf), 1098 + bdev_nr_sectors(part) >> 1, 1099 + disk_name(disk, part->bd_partno, name_buf), 1100 + part->bd_meta_info ? 1101 + part->bd_meta_info->uuid : ""); 1102 if (is_part0) { 1103 if (dev->parent && dev->parent->driver) 1104 printk(" driver: %s\n", ··· 1173 { 1174 struct gendisk *sgp = v; 1175 struct disk_part_iter piter; 1176 + struct block_device *part; 1177 char buf[BDEVNAME_SIZE]; 1178 1179 /* Don't show non-partitionable removeable devices or empty devices */ ··· 1187 disk_part_iter_init(&piter, sgp, DISK_PITER_INCL_PART0); 1188 while ((part = disk_part_iter_next(&piter))) 1189 seq_printf(seqf, "%4d %7d %10llu %s\n", 1190 + MAJOR(part->bd_dev), MINOR(part->bd_dev), 1191 + bdev_nr_sectors(part) >> 1, 1192 + disk_name(sgp, part->bd_partno, buf)); 1193 disk_part_iter_exit(&piter); 1194 1195 return 0; ··· 1203 }; 1204 #endif 1205 1206 static int __init genhd_device_init(void) 1207 { 1208 int error; ··· 1220 error = class_register(&block_class); 1221 if (unlikely(error)) 1222 return error; 1223 blk_dev_init(); 1224 1225 register_blkdev(BLOCK_EXT_MAJOR, "blkext"); ··· 1278 ssize_t part_size_show(struct device *dev, 1279 struct device_attribute *attr, char *buf) 1280 { 1281 + return sprintf(buf, "%llu\n", bdev_nr_sectors(dev_to_bdev(dev))); 1282 } 1283 1284 ssize_t part_stat_show(struct device *dev, 1285 struct device_attribute *attr, char *buf) 1286 { 1287 + struct block_device *bdev = dev_to_bdev(dev); 1288 + struct request_queue *q = bdev->bd_disk->queue; 1289 struct disk_stats stat; 1290 unsigned int inflight; 1291 1292 + part_stat_read_all(bdev, &stat); 1293 if (queue_is_mq(q)) 1294 + inflight = blk_mq_in_flight(q, bdev); 1295 else 1296 + inflight = part_in_flight(bdev); 1297 1298 return sprintf(buf, 1299 "%8lu %8lu %8llu %8u " ··· 1331 ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr, 1332 char *buf) 1333 { 1334 + struct block_device *bdev = dev_to_bdev(dev); 1335 + struct request_queue *q = bdev->bd_disk->queue; 1336 unsigned int inflight[2]; 1337 1338 if (queue_is_mq(q)) 1339 + blk_mq_in_flight_rw(q, bdev, inflight); 1340 else 1341 + part_in_flight_rw(bdev, inflight); 1342 1343 return sprintf(buf, "%8u %8u\n", inflight[0], inflight[1]); 1344 } ··· 1386 ssize_t part_fail_show(struct device *dev, 1387 struct device_attribute *attr, char *buf) 1388 { 1389 + return sprintf(buf, "%d\n", dev_to_bdev(dev)->bd_make_it_fail); 1390 } 1391 1392 ssize_t part_fail_store(struct device *dev, 1393 struct device_attribute *attr, 1394 const char *buf, size_t count) 1395 { 1396 int i; 1397 1398 if (count > 0 && sscanf(buf, "%d", &i) > 0) 1399 + dev_to_bdev(dev)->bd_make_it_fail = i; 1400 1401 return count; 1402 } ··· 1538 * 1539 * This function releases all allocated resources of the gendisk. 1540 * 1541 * Drivers which used __device_add_disk() have a gendisk with a request_queue 1542 * assigned. Since the request_queue sits on top of the gendisk for these 1543 * drivers we also call blk_put_queue() for them, and we expect the ··· 1561 disk_release_events(disk); 1562 kfree(disk->random); 1563 disk_replace_part_tbl(disk, NULL); 1564 + bdput(disk->part0); 1565 if (disk->queue) 1566 blk_put_queue(disk->queue); 1567 kfree(disk); ··· 1599 { 1600 struct gendisk *gp = v; 1601 struct disk_part_iter piter; 1602 + struct block_device *hd; 1603 char buf[BDEVNAME_SIZE]; 1604 unsigned int inflight; 1605 struct disk_stats stat; ··· 1627 "%lu %lu %lu %u " 1628 "%lu %u" 1629 "\n", 1630 + MAJOR(hd->bd_dev), MINOR(hd->bd_dev), 1631 + disk_name(gp, hd->bd_partno, buf), 1632 stat.ios[STAT_READ], 1633 stat.merges[STAT_READ], 1634 stat.sectors[STAT_READ], ··· 1686 class_dev_iter_init(&iter, &block_class, NULL, &disk_type); 1687 while ((dev = class_dev_iter_next(&iter))) { 1688 struct gendisk *disk = dev_to_disk(dev); 1689 + struct block_device *part; 1690 1691 if (strcmp(dev_name(dev), name)) 1692 continue; ··· 1699 MINOR(dev->devt) + partno); 1700 break; 1701 } 1702 + part = bdget_disk(disk, partno); 1703 if (part) { 1704 + devt = part->bd_dev; 1705 + bdput(part); 1706 break; 1707 } 1708 } 1709 class_dev_iter_exit(&iter); 1710 return devt; ··· 1727 if (!disk) 1728 return NULL; 1729 1730 + disk->part0 = bdev_alloc(disk, 0); 1731 + if (!disk->part0) 1732 goto out_free_disk; 1733 1734 disk->node_id = node_id; 1735 + if (disk_expand_part_tbl(disk, 0)) 1736 + goto out_bdput; 1737 1738 ptbl = rcu_dereference_protected(disk->part_tbl, 1); 1739 + rcu_assign_pointer(ptbl->part[0], disk->part0); 1740 1741 disk->minors = minors; 1742 rand_initialize_disk(disk); ··· 1761 device_initialize(disk_to_dev(disk)); 1762 return disk; 1763 1764 + out_bdput: 1765 + bdput(disk->part0); 1766 out_free_disk: 1767 kfree(disk); 1768 return NULL; 1769 } 1770 EXPORT_SYMBOL(__alloc_disk_node); 1771 1772 /** 1773 * put_disk - decrements the gendisk refcount ··· 1811 void put_disk(struct gendisk *disk) 1812 { 1813 if (disk) 1814 + put_device(disk_to_dev(disk)); 1815 } 1816 EXPORT_SYMBOL(put_disk); 1817 1818 static void set_disk_ro_uevent(struct gendisk *gd, int ro) 1819 { ··· 1846 kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp); 1847 } 1848 1849 void set_disk_ro(struct gendisk *disk, int flag) 1850 { 1851 struct disk_part_iter piter; 1852 + struct block_device *part; 1853 1854 + if (disk->part0->bd_read_only != flag) { 1855 set_disk_ro_uevent(disk, flag); 1856 + disk->part0->bd_read_only = flag; 1857 } 1858 1859 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); 1860 while ((part = disk_part_iter_next(&piter))) 1861 + part->bd_read_only = flag; 1862 disk_part_iter_exit(&piter); 1863 } 1864 ··· 1875 { 1876 if (!bdev) 1877 return 0; 1878 + return bdev->bd_read_only; 1879 } 1880 1881 EXPORT_SYMBOL(bdev_read_only);

+12 -62

block/ioctl.c

··· 35 start = p.start >> SECTOR_SHIFT; 36 length = p.length >> SECTOR_SHIFT; 37 38 - /* check for fit in a hd_struct */ 39 - if (sizeof(sector_t) < sizeof(long long)) { 40 - long pstart = start, plength = length; 41 - 42 - if (pstart != start || plength != length || pstart < 0 || 43 - plength < 0 || p.pno > 65535) 44 - return -EINVAL; 45 - } 46 - 47 switch (op) { 48 case BLKPG_ADD_PARTITION: 49 /* check if partition is aligned to blocksize */ ··· 210 } 211 #endif 212 213 - int __blkdev_driver_ioctl(struct block_device *bdev, fmode_t mode, 214 - unsigned cmd, unsigned long arg) 215 - { 216 - struct gendisk *disk = bdev->bd_disk; 217 - 218 - if (disk->fops->ioctl) 219 - return disk->fops->ioctl(bdev, mode, cmd, arg); 220 - 221 - return -ENOTTY; 222 - } 223 - /* 224 - * For the record: _GPL here is only because somebody decided to slap it 225 - * on the previous export. Sheer idiocy, since it wasn't copyrightable 226 - * at all and could be open-coded without any exports by anybody who cares. 227 - */ 228 - EXPORT_SYMBOL_GPL(__blkdev_driver_ioctl); 229 - 230 #ifdef CONFIG_COMPAT 231 /* 232 * This is the equivalent of compat_ptr_ioctl(), to be used by block ··· 320 return ops->pr_clear(bdev, c.key); 321 } 322 323 - /* 324 - * Is it an unrecognized ioctl? The correct returns are either 325 - * ENOTTY (final) or ENOIOCTLCMD ("I don't know this one, try a 326 - * fallback"). ENOIOCTLCMD gets turned into ENOTTY by the ioctl 327 - * code before returning. 328 - * 329 - * Confused drivers sometimes return EINVAL, which is wrong. It 330 - * means "I understood the ioctl command, but the parameters to 331 - * it were wrong". 332 - * 333 - * We should aim to just fix the broken drivers, the EINVAL case 334 - * should go away. 335 - */ 336 - static inline int is_unrecognized_ioctl(int ret) 337 - { 338 - return ret == -EINVAL || 339 - ret == -ENOTTY || 340 - ret == -ENOIOCTLCMD; 341 - } 342 - 343 static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode, 344 unsigned cmd, unsigned long arg) 345 { 346 - int ret; 347 - 348 if (!capable(CAP_SYS_ADMIN)) 349 return -EACCES; 350 - 351 - ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg); 352 - if (!is_unrecognized_ioctl(ret)) 353 - return ret; 354 - 355 fsync_bdev(bdev); 356 invalidate_bdev(bdev); 357 return 0; ··· 338 if (!capable(CAP_SYS_ADMIN)) 339 return -EACCES; 340 341 - ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg); 342 - if (!is_unrecognized_ioctl(ret)) 343 - return ret; 344 if (get_user(n, (int __user *)arg)) 345 return -EFAULT; 346 - set_device_ro(bdev, n); 347 return 0; 348 } 349 ··· 568 } 569 570 ret = blkdev_common_ioctl(bdev, mode, cmd, arg, argp); 571 - if (ret == -ENOIOCTLCMD) 572 - return __blkdev_driver_ioctl(bdev, mode, cmd, arg); 573 574 - return ret; 575 } 576 EXPORT_SYMBOL_GPL(blkdev_ioctl); /* for /dev/raw */ 577 ··· 590 { 591 int ret; 592 void __user *argp = compat_ptr(arg); 593 - struct inode *inode = file->f_mapping->host; 594 - struct block_device *bdev = inode->i_bdev; 595 struct gendisk *disk = bdev->bd_disk; 596 fmode_t mode = file->f_mode; 597 loff_t size;

··· 35 start = p.start >> SECTOR_SHIFT; 36 length = p.length >> SECTOR_SHIFT; 37 38 switch (op) { 39 case BLKPG_ADD_PARTITION: 40 /* check if partition is aligned to blocksize */ ··· 219 } 220 #endif 221 222 #ifdef CONFIG_COMPAT 223 /* 224 * This is the equivalent of compat_ptr_ioctl(), to be used by block ··· 346 return ops->pr_clear(bdev, c.key); 347 } 348 349 static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode, 350 unsigned cmd, unsigned long arg) 351 { 352 if (!capable(CAP_SYS_ADMIN)) 353 return -EACCES; 354 fsync_bdev(bdev); 355 invalidate_bdev(bdev); 356 return 0; ··· 391 if (!capable(CAP_SYS_ADMIN)) 392 return -EACCES; 393 394 if (get_user(n, (int __user *)arg)) 395 return -EFAULT; 396 + if (bdev->bd_disk->fops->set_read_only) { 397 + ret = bdev->bd_disk->fops->set_read_only(bdev, n); 398 + if (ret) 399 + return ret; 400 + } 401 + bdev->bd_read_only = n; 402 return 0; 403 } 404 ··· 619 } 620 621 ret = blkdev_common_ioctl(bdev, mode, cmd, arg, argp); 622 + if (ret != -ENOIOCTLCMD) 623 + return ret; 624 625 + if (!bdev->bd_disk->fops->ioctl) 626 + return -ENOTTY; 627 + return bdev->bd_disk->fops->ioctl(bdev, mode, cmd, arg); 628 } 629 EXPORT_SYMBOL_GPL(blkdev_ioctl); /* for /dev/raw */ 630 ··· 639 { 640 int ret; 641 void __user *argp = compat_ptr(arg); 642 + struct block_device *bdev = I_BDEV(file->f_mapping->host); 643 struct gendisk *disk = bdev->bd_disk; 644 fmode_t mode = file->f_mode; 645 loff_t size;

+80 -168

block/partitions/core.c

··· 85 NULL 86 }; 87 88 static struct parsed_partitions *allocate_partitions(struct gendisk *hd) 89 { 90 struct parsed_partitions *state; ··· 182 static ssize_t part_partition_show(struct device *dev, 183 struct device_attribute *attr, char *buf) 184 { 185 - struct hd_struct *p = dev_to_part(dev); 186 - 187 - return sprintf(buf, "%d\n", p->partno); 188 } 189 190 static ssize_t part_start_show(struct device *dev, 191 struct device_attribute *attr, char *buf) 192 { 193 - struct hd_struct *p = dev_to_part(dev); 194 - 195 - return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect); 196 } 197 198 static ssize_t part_ro_show(struct device *dev, 199 struct device_attribute *attr, char *buf) 200 { 201 - struct hd_struct *p = dev_to_part(dev); 202 - return sprintf(buf, "%d\n", p->policy ? 1 : 0); 203 } 204 205 static ssize_t part_alignment_offset_show(struct device *dev, 206 struct device_attribute *attr, char *buf) 207 { 208 - struct hd_struct *p = dev_to_part(dev); 209 210 return sprintf(buf, "%u\n", 211 - queue_limit_alignment_offset(&part_to_disk(p)->queue->limits, 212 - p->start_sect)); 213 } 214 215 static ssize_t part_discard_alignment_show(struct device *dev, 216 struct device_attribute *attr, char *buf) 217 { 218 - struct hd_struct *p = dev_to_part(dev); 219 220 return sprintf(buf, "%u\n", 221 - queue_limit_discard_alignment(&part_to_disk(p)->queue->limits, 222 - p->start_sect)); 223 } 224 225 static DEVICE_ATTR(partition, 0444, part_partition_show, NULL); ··· 259 260 static void part_release(struct device *dev) 261 { 262 - struct hd_struct *p = dev_to_part(dev); 263 blk_free_devt(dev->devt); 264 - hd_free_part(p); 265 - kfree(p); 266 } 267 268 static int part_uevent(struct device *dev, struct kobj_uevent_env *env) 269 { 270 - struct hd_struct *part = dev_to_part(dev); 271 272 - add_uevent_var(env, "PARTN=%u", part->partno); 273 - if (part->info && part->info->volname[0]) 274 - add_uevent_var(env, "PARTNAME=%s", part->info->volname); 275 return 0; 276 } 277 ··· 280 .uevent = part_uevent, 281 }; 282 283 - static void hd_struct_free_work(struct work_struct *work) 284 - { 285 - struct hd_struct *part = 286 - container_of(to_rcu_work(work), struct hd_struct, rcu_work); 287 - struct gendisk *disk = part_to_disk(part); 288 - 289 - /* 290 - * Release the disk reference acquired in delete_partition here. 291 - * We can't release it in hd_struct_free because the final put_device 292 - * needs process context and thus can't be run directly from a 293 - * percpu_ref ->release handler. 294 - */ 295 - put_device(disk_to_dev(disk)); 296 - 297 - part->start_sect = 0; 298 - part->nr_sects = 0; 299 - part_stat_set_all(part, 0); 300 - put_device(part_to_dev(part)); 301 - } 302 - 303 - static void hd_struct_free(struct percpu_ref *ref) 304 - { 305 - struct hd_struct *part = container_of(ref, struct hd_struct, ref); 306 - struct gendisk *disk = part_to_disk(part); 307 - struct disk_part_tbl *ptbl = 308 - rcu_dereference_protected(disk->part_tbl, 1); 309 - 310 - rcu_assign_pointer(ptbl->last_lookup, NULL); 311 - 312 - INIT_RCU_WORK(&part->rcu_work, hd_struct_free_work); 313 - queue_rcu_work(system_wq, &part->rcu_work); 314 - } 315 - 316 - int hd_ref_init(struct hd_struct *part) 317 - { 318 - if (percpu_ref_init(&part->ref, hd_struct_free, 0, GFP_KERNEL)) 319 - return -ENOMEM; 320 - return 0; 321 - } 322 - 323 /* 324 * Must be called either with bd_mutex held, before a disk can be opened or 325 * after all disk users are gone. 326 */ 327 - void delete_partition(struct hd_struct *part) 328 { 329 - struct gendisk *disk = part_to_disk(part); 330 struct disk_part_tbl *ptbl = 331 rcu_dereference_protected(disk->part_tbl, 1); 332 333 - /* 334 - * ->part_tbl is referenced in this part's release handler, so 335 - * we have to hold the disk device 336 - */ 337 - get_device(disk_to_dev(disk)); 338 - rcu_assign_pointer(ptbl->part[part->partno], NULL); 339 - kobject_put(part->holder_dir); 340 - device_del(part_to_dev(part)); 341 342 /* 343 - * Remove gendisk pointer from idr so that it cannot be looked up 344 - * while RCU period before freeing gendisk is running to prevent 345 - * use-after-free issues. Note that the device number stays 346 - * "in-use" until we really free the gendisk. 347 */ 348 - blk_invalidate_devt(part_devt(part)); 349 - percpu_ref_kill(&part->ref); 350 } 351 352 static ssize_t whole_disk_show(struct device *dev, ··· 316 * Must be called either with bd_mutex held, before a disk can be opened or 317 * after all disk users are gone. 318 */ 319 - static struct hd_struct *add_partition(struct gendisk *disk, int partno, 320 sector_t start, sector_t len, int flags, 321 struct partition_meta_info *info) 322 { 323 - struct hd_struct *p; 324 dev_t devt = MKDEV(0, 0); 325 struct device *ddev = disk_to_dev(disk); 326 struct device *pdev; 327 struct disk_part_tbl *ptbl; 328 const char *dname; 329 int err; ··· 354 if (ptbl->part[partno]) 355 return ERR_PTR(-EBUSY); 356 357 - p = kzalloc(sizeof(*p), GFP_KERNEL); 358 - if (!p) 359 - return ERR_PTR(-EBUSY); 360 361 - p->dkstats = alloc_percpu(struct disk_stats); 362 - if (!p->dkstats) { 363 - err = -ENOMEM; 364 - goto out_free; 365 - } 366 - 367 - hd_sects_seq_init(p); 368 - pdev = part_to_dev(p); 369 - 370 - p->start_sect = start; 371 - p->nr_sects = len; 372 - p->partno = partno; 373 - p->policy = get_disk_ro(disk); 374 375 if (info) { 376 - struct partition_meta_info *pinfo; 377 - 378 - pinfo = kzalloc_node(sizeof(*pinfo), GFP_KERNEL, disk->node_id); 379 - if (!pinfo) { 380 - err = -ENOMEM; 381 - goto out_free_stats; 382 - } 383 - memcpy(pinfo, info, sizeof(*info)); 384 - p->info = pinfo; 385 } 386 387 dname = dev_name(ddev); 388 if (isdigit(dname[strlen(dname) - 1])) 389 dev_set_name(pdev, "%sp%d", dname, partno); ··· 381 pdev->type = &part_type; 382 pdev->parent = ddev; 383 384 - err = blk_alloc_devt(p, &devt); 385 if (err) 386 - goto out_free_info; 387 pdev->devt = devt; 388 389 /* delay uevent until 'holders' subdir is created */ ··· 393 goto out_put; 394 395 err = -ENOMEM; 396 - p->holder_dir = kobject_create_and_add("holders", &pdev->kobj); 397 - if (!p->holder_dir) 398 goto out_del; 399 400 dev_set_uevent_suppress(pdev, 0); ··· 404 goto out_del; 405 } 406 407 - err = hd_ref_init(p); 408 - if (err) { 409 - if (flags & ADDPART_FLAG_WHOLEDISK) 410 - goto out_remove_file; 411 - goto out_del; 412 - } 413 - 414 /* everything is up and running, commence */ 415 - rcu_assign_pointer(ptbl->part[partno], p); 416 417 /* suppress uevent if the disk suppresses it */ 418 if (!dev_get_uevent_suppress(ddev)) 419 kobject_uevent(&pdev->kobj, KOBJ_ADD); 420 - return p; 421 422 - out_free_info: 423 - kfree(p->info); 424 - out_free_stats: 425 - free_percpu(p->dkstats); 426 - out_free: 427 - kfree(p); 428 return ERR_PTR(err); 429 - out_remove_file: 430 - device_remove_file(pdev, &dev_attr_whole_disk); 431 out_del: 432 - kobject_put(p->holder_dir); 433 device_del(pdev); 434 out_put: 435 put_device(pdev); ··· 428 sector_t length, int skip_partno) 429 { 430 struct disk_part_iter piter; 431 - struct hd_struct *part; 432 bool overlap = false; 433 434 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); 435 while ((part = disk_part_iter_next(&piter))) { 436 - if (part->partno == skip_partno || 437 - start >= part->start_sect + part->nr_sects || 438 - start + length <= part->start_sect) 439 continue; 440 overlap = true; 441 break; ··· 448 int bdev_add_partition(struct block_device *bdev, int partno, 449 sector_t start, sector_t length) 450 { 451 - struct hd_struct *part; 452 453 mutex_lock(&bdev->bd_mutex); 454 if (partition_overlaps(bdev->bd_disk, start, length, -1)) { ··· 464 465 int bdev_del_partition(struct block_device *bdev, int partno) 466 { 467 - struct block_device *bdevp; 468 - struct hd_struct *part = NULL; 469 int ret; 470 471 - bdevp = bdget_disk(bdev->bd_disk, partno); 472 - if (!bdevp) 473 return -ENXIO; 474 475 - mutex_lock(&bdevp->bd_mutex); 476 mutex_lock_nested(&bdev->bd_mutex, 1); 477 478 - ret = -ENXIO; 479 - part = disk_get_part(bdev->bd_disk, partno); 480 - if (!part) 481 - goto out_unlock; 482 - 483 ret = -EBUSY; 484 - if (bdevp->bd_openers) 485 goto out_unlock; 486 487 - sync_blockdev(bdevp); 488 - invalidate_bdev(bdevp); 489 490 delete_partition(part); 491 ret = 0; 492 out_unlock: 493 mutex_unlock(&bdev->bd_mutex); 494 - mutex_unlock(&bdevp->bd_mutex); 495 - bdput(bdevp); 496 - if (part) 497 - disk_put_part(part); 498 return ret; 499 } 500 501 int bdev_resize_partition(struct block_device *bdev, int partno, 502 sector_t start, sector_t length) 503 { 504 - struct block_device *bdevp; 505 - struct hd_struct *part; 506 int ret = 0; 507 508 - part = disk_get_part(bdev->bd_disk, partno); 509 if (!part) 510 return -ENXIO; 511 512 - ret = -ENOMEM; 513 - bdevp = bdget_part(part); 514 - if (!bdevp) 515 - goto out_put_part; 516 - 517 - mutex_lock(&bdevp->bd_mutex); 518 mutex_lock_nested(&bdev->bd_mutex, 1); 519 - 520 ret = -EINVAL; 521 - if (start != part->start_sect) 522 goto out_unlock; 523 524 ret = -EBUSY; 525 if (partition_overlaps(bdev->bd_disk, start, length, partno)) 526 goto out_unlock; 527 528 - part_nr_sects_write(part, length); 529 - bd_set_nr_sectors(bdevp, length); 530 531 ret = 0; 532 out_unlock: 533 - mutex_unlock(&bdevp->bd_mutex); 534 mutex_unlock(&bdev->bd_mutex); 535 - bdput(bdevp); 536 - out_put_part: 537 - disk_put_part(part); 538 return ret; 539 } 540 ··· 539 int blk_drop_partitions(struct block_device *bdev) 540 { 541 struct disk_part_iter piter; 542 - struct hd_struct *part; 543 544 if (bdev->bd_part_count) 545 return -EBUSY; ··· 564 { 565 sector_t size = state->parts[p].size; 566 sector_t from = state->parts[p].from; 567 - struct hd_struct *part; 568 569 if (!size) 570 return true; ··· 604 605 if (IS_BUILTIN(CONFIG_BLK_DEV_MD) && 606 (state->parts[p].flags & ADDPART_FLAG_RAID)) 607 - md_autodetect_dev(part_to_dev(part)->devt); 608 609 return true; 610 }

··· 85 NULL 86 }; 87 88 + static void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors) 89 + { 90 + spin_lock(&bdev->bd_size_lock); 91 + i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT); 92 + spin_unlock(&bdev->bd_size_lock); 93 + } 94 + 95 static struct parsed_partitions *allocate_partitions(struct gendisk *hd) 96 { 97 struct parsed_partitions *state; ··· 175 static ssize_t part_partition_show(struct device *dev, 176 struct device_attribute *attr, char *buf) 177 { 178 + return sprintf(buf, "%d\n", dev_to_bdev(dev)->bd_partno); 179 } 180 181 static ssize_t part_start_show(struct device *dev, 182 struct device_attribute *attr, char *buf) 183 { 184 + return sprintf(buf, "%llu\n", dev_to_bdev(dev)->bd_start_sect); 185 } 186 187 static ssize_t part_ro_show(struct device *dev, 188 struct device_attribute *attr, char *buf) 189 { 190 + return sprintf(buf, "%d\n", dev_to_bdev(dev)->bd_read_only); 191 } 192 193 static ssize_t part_alignment_offset_show(struct device *dev, 194 struct device_attribute *attr, char *buf) 195 { 196 + struct block_device *bdev = dev_to_bdev(dev); 197 198 return sprintf(buf, "%u\n", 199 + queue_limit_alignment_offset(&bdev->bd_disk->queue->limits, 200 + bdev->bd_start_sect)); 201 } 202 203 static ssize_t part_discard_alignment_show(struct device *dev, 204 struct device_attribute *attr, char *buf) 205 { 206 + struct block_device *bdev = dev_to_bdev(dev); 207 208 return sprintf(buf, "%u\n", 209 + queue_limit_discard_alignment(&bdev->bd_disk->queue->limits, 210 + bdev->bd_start_sect)); 211 } 212 213 static DEVICE_ATTR(partition, 0444, part_partition_show, NULL); ··· 257 258 static void part_release(struct device *dev) 259 { 260 blk_free_devt(dev->devt); 261 + bdput(dev_to_bdev(dev)); 262 } 263 264 static int part_uevent(struct device *dev, struct kobj_uevent_env *env) 265 { 266 + struct block_device *part = dev_to_bdev(dev); 267 268 + add_uevent_var(env, "PARTN=%u", part->bd_partno); 269 + if (part->bd_meta_info && part->bd_meta_info->volname[0]) 270 + add_uevent_var(env, "PARTNAME=%s", part->bd_meta_info->volname); 271 return 0; 272 } 273 ··· 280 .uevent = part_uevent, 281 }; 282 283 /* 284 * Must be called either with bd_mutex held, before a disk can be opened or 285 * after all disk users are gone. 286 */ 287 + void delete_partition(struct block_device *part) 288 { 289 + struct gendisk *disk = part->bd_disk; 290 struct disk_part_tbl *ptbl = 291 rcu_dereference_protected(disk->part_tbl, 1); 292 293 + rcu_assign_pointer(ptbl->part[part->bd_partno], NULL); 294 + rcu_assign_pointer(ptbl->last_lookup, NULL); 295 + 296 + kobject_put(part->bd_holder_dir); 297 + device_del(&part->bd_device); 298 299 /* 300 + * Remove the block device from the inode hash, so that it cannot be 301 + * looked up any more even when openers still hold references. 302 */ 303 + remove_inode_hash(part->bd_inode); 304 + 305 + put_device(&part->bd_device); 306 } 307 308 static ssize_t whole_disk_show(struct device *dev, ··· 360 * Must be called either with bd_mutex held, before a disk can be opened or 361 * after all disk users are gone. 362 */ 363 + static struct block_device *add_partition(struct gendisk *disk, int partno, 364 sector_t start, sector_t len, int flags, 365 struct partition_meta_info *info) 366 { 367 dev_t devt = MKDEV(0, 0); 368 struct device *ddev = disk_to_dev(disk); 369 struct device *pdev; 370 + struct block_device *bdev; 371 struct disk_part_tbl *ptbl; 372 const char *dname; 373 int err; ··· 398 if (ptbl->part[partno]) 399 return ERR_PTR(-EBUSY); 400 401 + bdev = bdev_alloc(disk, partno); 402 + if (!bdev) 403 + return ERR_PTR(-ENOMEM); 404 405 + bdev->bd_start_sect = start; 406 + bdev_set_nr_sectors(bdev, len); 407 + bdev->bd_read_only = get_disk_ro(disk); 408 409 if (info) { 410 + err = -ENOMEM; 411 + bdev->bd_meta_info = kmemdup(info, sizeof(*info), GFP_KERNEL); 412 + if (!bdev->bd_meta_info) 413 + goto out_bdput; 414 } 415 416 + pdev = &bdev->bd_device; 417 dname = dev_name(ddev); 418 if (isdigit(dname[strlen(dname) - 1])) 419 dev_set_name(pdev, "%sp%d", dname, partno); ··· 439 pdev->type = &part_type; 440 pdev->parent = ddev; 441 442 + err = blk_alloc_devt(bdev, &devt); 443 if (err) 444 + goto out_bdput; 445 pdev->devt = devt; 446 447 /* delay uevent until 'holders' subdir is created */ ··· 451 goto out_put; 452 453 err = -ENOMEM; 454 + bdev->bd_holder_dir = kobject_create_and_add("holders", &pdev->kobj); 455 + if (!bdev->bd_holder_dir) 456 goto out_del; 457 458 dev_set_uevent_suppress(pdev, 0); ··· 462 goto out_del; 463 } 464 465 /* everything is up and running, commence */ 466 + bdev_add(bdev, devt); 467 + rcu_assign_pointer(ptbl->part[partno], bdev); 468 469 /* suppress uevent if the disk suppresses it */ 470 if (!dev_get_uevent_suppress(ddev)) 471 kobject_uevent(&pdev->kobj, KOBJ_ADD); 472 + return bdev; 473 474 + out_bdput: 475 + bdput(bdev); 476 return ERR_PTR(err); 477 out_del: 478 + kobject_put(bdev->bd_holder_dir); 479 device_del(pdev); 480 out_put: 481 put_device(pdev); ··· 498 sector_t length, int skip_partno) 499 { 500 struct disk_part_iter piter; 501 + struct block_device *part; 502 bool overlap = false; 503 504 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); 505 while ((part = disk_part_iter_next(&piter))) { 506 + if (part->bd_partno == skip_partno || 507 + start >= part->bd_start_sect + bdev_nr_sectors(part) || 508 + start + length <= part->bd_start_sect) 509 continue; 510 overlap = true; 511 break; ··· 518 int bdev_add_partition(struct block_device *bdev, int partno, 519 sector_t start, sector_t length) 520 { 521 + struct block_device *part; 522 523 mutex_lock(&bdev->bd_mutex); 524 if (partition_overlaps(bdev->bd_disk, start, length, -1)) { ··· 534 535 int bdev_del_partition(struct block_device *bdev, int partno) 536 { 537 + struct block_device *part; 538 int ret; 539 540 + part = bdget_disk(bdev->bd_disk, partno); 541 + if (!part) 542 return -ENXIO; 543 544 + mutex_lock(&part->bd_mutex); 545 mutex_lock_nested(&bdev->bd_mutex, 1); 546 547 ret = -EBUSY; 548 + if (part->bd_openers) 549 goto out_unlock; 550 551 + sync_blockdev(part); 552 + invalidate_bdev(part); 553 554 delete_partition(part); 555 ret = 0; 556 out_unlock: 557 mutex_unlock(&bdev->bd_mutex); 558 + mutex_unlock(&part->bd_mutex); 559 + bdput(part); 560 return ret; 561 } 562 563 int bdev_resize_partition(struct block_device *bdev, int partno, 564 sector_t start, sector_t length) 565 { 566 + struct block_device *part; 567 int ret = 0; 568 569 + part = bdget_disk(bdev->bd_disk, partno); 570 if (!part) 571 return -ENXIO; 572 573 + mutex_lock(&part->bd_mutex); 574 mutex_lock_nested(&bdev->bd_mutex, 1); 575 ret = -EINVAL; 576 + if (start != part->bd_start_sect) 577 goto out_unlock; 578 579 ret = -EBUSY; 580 if (partition_overlaps(bdev->bd_disk, start, length, partno)) 581 goto out_unlock; 582 583 + bdev_set_nr_sectors(part, length); 584 585 ret = 0; 586 out_unlock: 587 + mutex_unlock(&part->bd_mutex); 588 mutex_unlock(&bdev->bd_mutex); 589 + bdput(part); 590 return ret; 591 } 592 ··· 627 int blk_drop_partitions(struct block_device *bdev) 628 { 629 struct disk_part_iter piter; 630 + struct block_device *part; 631 632 if (bdev->bd_part_count) 633 return -EBUSY; ··· 652 { 653 sector_t size = state->parts[p].size; 654 sector_t from = state->parts[p].from; 655 + struct block_device *part; 656 657 if (!size) 658 return true; ··· 692 693 if (IS_BUILTIN(CONFIG_BLK_DEV_MD) && 694 (state->parts[p].flags & ADDPART_FLAG_RAID)) 695 + md_autodetect_dev(part->bd_dev); 696 697 return true; 698 }

+56 -44

drivers/block/amiflop.c

··· 201 int busy; /* true when drive is active */ 202 int dirty; /* true when trackbuf is not on disk */ 203 int status; /* current error code for unit */ 204 - struct gendisk *gendisk; 205 struct blk_mq_tag_set tag_set; 206 }; 207 ··· 1669 return -EBUSY; 1670 } 1671 1672 if (mode & (FMODE_READ|FMODE_WRITE)) { 1673 bdev_check_media_change(bdev); 1674 if (mode & FMODE_WRITE) { ··· 1700 unit[drive].dtype=&data_types[system]; 1701 unit[drive].blocks=unit[drive].type->heads*unit[drive].type->tracks* 1702 data_types[system].sects*unit[drive].type->sect_mult; 1703 - set_capacity(unit[drive].gendisk, unit[drive].blocks); 1704 1705 printk(KERN_INFO "fd%d: accessing %s-disk with %s-layout\n",drive, 1706 unit[drive].type->name, data_types[system].name); ··· 1777 .queue_rq = amiflop_queue_rq, 1778 }; 1779 1780 - static struct gendisk *fd_alloc_disk(int drive) 1781 { 1782 struct gendisk *disk; 1783 1784 disk = alloc_disk(1); 1785 if (!disk) 1786 goto out; 1787 - 1788 - disk->queue = blk_mq_init_sq_queue(&unit[drive].tag_set, &amiflop_mq_ops, 1789 - 2, BLK_MQ_F_SHOULD_MERGE); 1790 - if (IS_ERR(disk->queue)) { 1791 - disk->queue = NULL; 1792 goto out_put_disk; 1793 - } 1794 1795 - unit[drive].trackbuf = kmalloc(FLOPPY_MAX_SECTORS * 512, GFP_KERNEL); 1796 - if (!unit[drive].trackbuf) 1797 - goto out_cleanup_queue; 1798 1799 - return disk; 1800 1801 - out_cleanup_queue: 1802 - blk_cleanup_queue(disk->queue); 1803 - disk->queue = NULL; 1804 - blk_mq_free_tag_set(&unit[drive].tag_set); 1805 out_put_disk: 1806 put_disk(disk); 1807 out: 1808 unit[drive].type->code = FD_NODRIVE; 1809 - return NULL; 1810 } 1811 1812 static int __init fd_probe_drives(void) ··· 1849 drives=0; 1850 nomem=0; 1851 for(drive=0;drive<FD_MAX_UNITS;drive++) { 1852 - struct gendisk *disk; 1853 fd_probe(drive); 1854 if (unit[drive].type->code == FD_NODRIVE) 1855 continue; 1856 1857 - disk = fd_alloc_disk(drive); 1858 - if (!disk) { 1859 pr_cont(" no mem for fd%d", drive); 1860 nomem = 1; 1861 continue; 1862 } 1863 - unit[drive].gendisk = disk; 1864 drives++; 1865 - 1866 - pr_cont(" fd%d",drive); 1867 - disk->major = FLOPPY_MAJOR; 1868 - disk->first_minor = drive; 1869 - disk->fops = &floppy_fops; 1870 - disk->events = DISK_EVENT_MEDIA_CHANGE; 1871 - sprintf(disk->disk_name, "fd%d", drive); 1872 - disk->private_data = &unit[drive]; 1873 - set_capacity(disk, 880*2); 1874 - add_disk(disk); 1875 } 1876 if ((drives > 0) || (nomem == 0)) { 1877 if (drives == 0) ··· 1870 return -ENOMEM; 1871 } 1872 1873 - static struct kobject *floppy_find(dev_t dev, int *part, void *data) 1874 - { 1875 - int drive = *part & 3; 1876 - if (unit[drive].type->code == FD_NODRIVE) 1877 - return NULL; 1878 - *part = 0; 1879 - return get_disk_and_module(unit[drive].gendisk); 1880 - } 1881 - 1882 static int __init amiga_floppy_probe(struct platform_device *pdev) 1883 { 1884 int i, ret; ··· 1898 ret = -ENODEV; 1899 if (fd_probe_drives() < 1) /* No usable drives */ 1900 goto out_probe; 1901 - 1902 - blk_register_region(MKDEV(FLOPPY_MAJOR, 0), 256, THIS_MODULE, 1903 - floppy_find, NULL, NULL); 1904 1905 /* initialize variables */ 1906 timer_setup(&motor_on_timer, motor_on_callback, 0);

··· 201 int busy; /* true when drive is active */ 202 int dirty; /* true when trackbuf is not on disk */ 203 int status; /* current error code for unit */ 204 + struct gendisk *gendisk[2]; 205 struct blk_mq_tag_set tag_set; 206 }; 207 ··· 1669 return -EBUSY; 1670 } 1671 1672 + if (unit[drive].type->code == FD_NODRIVE) { 1673 + mutex_unlock(&amiflop_mutex); 1674 + return -ENXIO; 1675 + } 1676 + 1677 if (mode & (FMODE_READ|FMODE_WRITE)) { 1678 bdev_check_media_change(bdev); 1679 if (mode & FMODE_WRITE) { ··· 1695 unit[drive].dtype=&data_types[system]; 1696 unit[drive].blocks=unit[drive].type->heads*unit[drive].type->tracks* 1697 data_types[system].sects*unit[drive].type->sect_mult; 1698 + set_capacity(unit[drive].gendisk[system], unit[drive].blocks); 1699 1700 printk(KERN_INFO "fd%d: accessing %s-disk with %s-layout\n",drive, 1701 unit[drive].type->name, data_types[system].name); ··· 1772 .queue_rq = amiflop_queue_rq, 1773 }; 1774 1775 + static int fd_alloc_disk(int drive, int system) 1776 { 1777 struct gendisk *disk; 1778 1779 disk = alloc_disk(1); 1780 if (!disk) 1781 goto out; 1782 + disk->queue = blk_mq_init_queue(&unit[drive].tag_set); 1783 + if (IS_ERR(disk->queue)) 1784 goto out_put_disk; 1785 1786 + disk->major = FLOPPY_MAJOR; 1787 + disk->first_minor = drive + system; 1788 + disk->fops = &floppy_fops; 1789 + disk->events = DISK_EVENT_MEDIA_CHANGE; 1790 + if (system) 1791 + sprintf(disk->disk_name, "fd%d_msdos", drive); 1792 + else 1793 + sprintf(disk->disk_name, "fd%d", drive); 1794 + disk->private_data = &unit[drive]; 1795 + set_capacity(disk, 880 * 2); 1796 1797 + unit[drive].gendisk[system] = disk; 1798 + add_disk(disk); 1799 + return 0; 1800 1801 out_put_disk: 1802 + disk->queue = NULL; 1803 put_disk(disk); 1804 out: 1805 + return -ENOMEM; 1806 + } 1807 + 1808 + static int fd_alloc_drive(int drive) 1809 + { 1810 + unit[drive].trackbuf = kmalloc(FLOPPY_MAX_SECTORS * 512, GFP_KERNEL); 1811 + if (!unit[drive].trackbuf) 1812 + goto out; 1813 + 1814 + memset(&unit[drive].tag_set, 0, sizeof(unit[drive].tag_set)); 1815 + unit[drive].tag_set.ops = &amiflop_mq_ops; 1816 + unit[drive].tag_set.nr_hw_queues = 1; 1817 + unit[drive].tag_set.nr_maps = 1; 1818 + unit[drive].tag_set.queue_depth = 2; 1819 + unit[drive].tag_set.numa_node = NUMA_NO_NODE; 1820 + unit[drive].tag_set.flags = BLK_MQ_F_SHOULD_MERGE; 1821 + if (blk_mq_alloc_tag_set(&unit[drive].tag_set)) 1822 + goto out_cleanup_trackbuf; 1823 + 1824 + pr_cont(" fd%d", drive); 1825 + 1826 + if (fd_alloc_disk(drive, 0) || fd_alloc_disk(drive, 1)) 1827 + goto out_cleanup_tagset; 1828 + return 0; 1829 + 1830 + out_cleanup_tagset: 1831 + blk_mq_free_tag_set(&unit[drive].tag_set); 1832 + out_cleanup_trackbuf: 1833 + kfree(unit[drive].trackbuf); 1834 + out: 1835 unit[drive].type->code = FD_NODRIVE; 1836 + return -ENOMEM; 1837 } 1838 1839 static int __init fd_probe_drives(void) ··· 1812 drives=0; 1813 nomem=0; 1814 for(drive=0;drive<FD_MAX_UNITS;drive++) { 1815 fd_probe(drive); 1816 if (unit[drive].type->code == FD_NODRIVE) 1817 continue; 1818 1819 + if (fd_alloc_drive(drive) < 0) { 1820 pr_cont(" no mem for fd%d", drive); 1821 nomem = 1; 1822 continue; 1823 } 1824 drives++; 1825 } 1826 if ((drives > 0) || (nomem == 0)) { 1827 if (drives == 0) ··· 1846 return -ENOMEM; 1847 } 1848 1849 static int __init amiga_floppy_probe(struct platform_device *pdev) 1850 { 1851 int i, ret; ··· 1883 ret = -ENODEV; 1884 if (fd_probe_drives() < 1) /* No usable drives */ 1885 goto out_probe; 1886 1887 /* initialize variables */ 1888 timer_setup(&motor_on_timer, motor_on_callback, 0);

+4 -11

drivers/block/aoe/aoecmd.c

··· 890 aoecmd_sleepwork(struct work_struct *work) 891 { 892 struct aoedev *d = container_of(work, struct aoedev, work); 893 - struct block_device *bd; 894 - u64 ssize; 895 896 if (d->flags & DEVFL_GDALLOC) 897 aoeblk_gdalloc(d); 898 899 if (d->flags & DEVFL_NEWSIZE) { 900 - ssize = get_capacity(d->gd); 901 - bd = bdget_disk(d->gd, 0); 902 - if (bd) { 903 - bd_set_nr_sectors(bd, ssize); 904 - bdput(bd); 905 - } 906 spin_lock_irq(&d->lock); 907 d->flags |= DEVFL_UP; 908 d->flags &= ~DEVFL_NEWSIZE; ··· 965 d->geo.start = 0; 966 if (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE)) 967 return; 968 - if (d->gd != NULL) { 969 - set_capacity(d->gd, ssize); 970 d->flags |= DEVFL_NEWSIZE; 971 - } else 972 d->flags |= DEVFL_GDALLOC; 973 schedule_work(&d->work); 974 }

··· 890 aoecmd_sleepwork(struct work_struct *work) 891 { 892 struct aoedev *d = container_of(work, struct aoedev, work); 893 894 if (d->flags & DEVFL_GDALLOC) 895 aoeblk_gdalloc(d); 896 897 if (d->flags & DEVFL_NEWSIZE) { 898 + set_capacity_and_notify(d->gd, d->ssize); 899 + 900 spin_lock_irq(&d->lock); 901 d->flags |= DEVFL_UP; 902 d->flags &= ~DEVFL_NEWSIZE; ··· 971 d->geo.start = 0; 972 if (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE)) 973 return; 974 + if (d->gd != NULL) 975 d->flags |= DEVFL_NEWSIZE; 976 + else 977 d->flags |= DEVFL_GDALLOC; 978 schedule_work(&d->work); 979 }

+86 -49

drivers/block/ataflop.c

··· 297 unsigned int wpstat; /* current state of WP signal (for 298 disk change detection) */ 299 int flags; /* flags */ 300 - struct gendisk *disk; 301 int ref; 302 int type; 303 struct blk_mq_tag_set tag_set; ··· 723 724 static int do_format(int drive, int type, struct atari_format_descr *desc) 725 { 726 - struct request_queue *q = unit[drive].disk->queue; 727 unsigned char *p; 728 int sect, nsect; 729 unsigned long flags; 730 int ret; 731 732 blk_mq_freeze_queue(q); 733 blk_mq_quiesce_queue(q); 734 ··· 742 local_irq_restore(flags); 743 744 if (type) { 745 - if (--type >= NUM_DISK_MINORS || 746 minor2disktype[type].drive_types > DriveType) { 747 ret = -EINVAL; 748 goto out; ··· 1158 if (SUDT[-1].blocks > ReqBlock) { 1159 /* try another disk type */ 1160 SUDT--; 1161 - set_capacity(unit[SelectedDrive].disk, 1162 SUDT->blocks); 1163 } else 1164 Probing = 0; ··· 1173 /* record not found, but not probing. Maybe stretch wrong ? Restart probing */ 1174 if (SUD.autoprobe) { 1175 SUDT = atari_disk_type + StartDiskType[DriveType]; 1176 - set_capacity(unit[SelectedDrive].disk, 1177 SUDT->blocks); 1178 Probing = 1; 1179 } ··· 1519 if (!UDT) { 1520 Probing = 1; 1521 UDT = atari_disk_type + StartDiskType[DriveType]; 1522 - set_capacity(floppy->disk, UDT->blocks); 1523 UD.autoprobe = 1; 1524 } 1525 } ··· 1537 } 1538 type = minor2disktype[type].index; 1539 UDT = &atari_disk_type[type]; 1540 - set_capacity(floppy->disk, UDT->blocks); 1541 UD.autoprobe = 0; 1542 } 1543 ··· 1662 printk (KERN_INFO "floppy%d: setting %s %p!\n", 1663 drive, dtp->name, dtp); 1664 UDT = dtp; 1665 - set_capacity(floppy->disk, UDT->blocks); 1666 1667 if (cmd == FDDEFPRM) { 1668 /* save settings as permanent default type */ ··· 1706 return -EINVAL; 1707 1708 UDT = dtp; 1709 - set_capacity(floppy->disk, UDT->blocks); 1710 1711 return 0; 1712 case FDMSGON: ··· 1729 UDT = NULL; 1730 /* MSch: invalidate default_params */ 1731 default_params[drive].blocks = 0; 1732 - set_capacity(floppy->disk, MAX_DISK_SIZE * 2); 1733 fallthrough; 1734 case FDFMTEND: 1735 case FDFLUSH: ··· 1966 .commit_rqs = ataflop_commit_rqs, 1967 }; 1968 1969 - static struct kobject *floppy_find(dev_t dev, int *part, void *data) 1970 { 1971 - int drive = *part & 3; 1972 - int type = *part >> 2; 1973 if (drive >= FD_MAX_UNITS || type > NUM_DISK_MINORS) 1974 - return NULL; 1975 - *part = 0; 1976 - return get_disk_and_module(unit[drive].disk); 1977 } 1978 1979 static int __init atari_floppy_init (void) ··· 2021 /* Amiga, Mac, ... don't have Atari-compatible floppy :-) */ 2022 return -ENODEV; 2023 2024 - if (register_blkdev(FLOPPY_MAJOR,"fd")) 2025 - return -EBUSY; 2026 2027 for (i = 0; i < FD_MAX_UNITS; i++) { 2028 - unit[i].disk = alloc_disk(1); 2029 - if (!unit[i].disk) { 2030 - ret = -ENOMEM; 2031 goto err; 2032 - } 2033 2034 - unit[i].disk->queue = blk_mq_init_sq_queue(&unit[i].tag_set, 2035 - &ataflop_mq_ops, 2, 2036 - BLK_MQ_F_SHOULD_MERGE); 2037 - if (IS_ERR(unit[i].disk->queue)) { 2038 - put_disk(unit[i].disk); 2039 - ret = PTR_ERR(unit[i].disk->queue); 2040 - unit[i].disk->queue = NULL; 2041 goto err; 2042 } 2043 } ··· 2070 for (i = 0; i < FD_MAX_UNITS; i++) { 2071 unit[i].track = -1; 2072 unit[i].flags = 0; 2073 - unit[i].disk->major = FLOPPY_MAJOR; 2074 - unit[i].disk->first_minor = i; 2075 - sprintf(unit[i].disk->disk_name, "fd%d", i); 2076 - unit[i].disk->fops = &floppy_fops; 2077 - unit[i].disk->events = DISK_EVENT_MEDIA_CHANGE; 2078 - unit[i].disk->private_data = &unit[i]; 2079 - set_capacity(unit[i].disk, MAX_DISK_SIZE * 2); 2080 - add_disk(unit[i].disk); 2081 } 2082 - 2083 - blk_register_region(MKDEV(FLOPPY_MAJOR, 0), 256, THIS_MODULE, 2084 - floppy_find, NULL, NULL); 2085 2086 printk(KERN_INFO "Atari floppy driver: max. %cD, %strack buffering\n", 2087 DriveType == 0 ? 'D' : DriveType == 1 ? 'H' : 'E', ··· 2082 2083 err: 2084 while (--i >= 0) { 2085 - struct gendisk *disk = unit[i].disk; 2086 - 2087 - blk_cleanup_queue(disk->queue); 2088 blk_mq_free_tag_set(&unit[i].tag_set); 2089 - put_disk(unit[i].disk); 2090 } 2091 2092 unregister_blkdev(FLOPPY_MAJOR, "fd"); 2093 return ret; 2094 } 2095 ··· 2134 2135 static void __exit atari_floppy_exit(void) 2136 { 2137 - int i; 2138 - blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256); 2139 for (i = 0; i < FD_MAX_UNITS; i++) { 2140 - del_gendisk(unit[i].disk); 2141 - blk_cleanup_queue(unit[i].disk->queue); 2142 blk_mq_free_tag_set(&unit[i].tag_set); 2143 - put_disk(unit[i].disk); 2144 } 2145 unregister_blkdev(FLOPPY_MAJOR, "fd"); 2146

··· 297 unsigned int wpstat; /* current state of WP signal (for 298 disk change detection) */ 299 int flags; /* flags */ 300 + struct gendisk *disk[NUM_DISK_MINORS]; 301 int ref; 302 int type; 303 struct blk_mq_tag_set tag_set; ··· 723 724 static int do_format(int drive, int type, struct atari_format_descr *desc) 725 { 726 + struct request_queue *q; 727 unsigned char *p; 728 int sect, nsect; 729 unsigned long flags; 730 int ret; 731 732 + if (type) 733 + type--; 734 + 735 + q = unit[drive].disk[type]->queue; 736 blk_mq_freeze_queue(q); 737 blk_mq_quiesce_queue(q); 738 ··· 738 local_irq_restore(flags); 739 740 if (type) { 741 + if (type >= NUM_DISK_MINORS || 742 minor2disktype[type].drive_types > DriveType) { 743 ret = -EINVAL; 744 goto out; ··· 1154 if (SUDT[-1].blocks > ReqBlock) { 1155 /* try another disk type */ 1156 SUDT--; 1157 + set_capacity(unit[SelectedDrive].disk[0], 1158 SUDT->blocks); 1159 } else 1160 Probing = 0; ··· 1169 /* record not found, but not probing. Maybe stretch wrong ? Restart probing */ 1170 if (SUD.autoprobe) { 1171 SUDT = atari_disk_type + StartDiskType[DriveType]; 1172 + set_capacity(unit[SelectedDrive].disk[0], 1173 SUDT->blocks); 1174 Probing = 1; 1175 } ··· 1515 if (!UDT) { 1516 Probing = 1; 1517 UDT = atari_disk_type + StartDiskType[DriveType]; 1518 + set_capacity(bd->rq->rq_disk, UDT->blocks); 1519 UD.autoprobe = 1; 1520 } 1521 } ··· 1533 } 1534 type = minor2disktype[type].index; 1535 UDT = &atari_disk_type[type]; 1536 + set_capacity(bd->rq->rq_disk, UDT->blocks); 1537 UD.autoprobe = 0; 1538 } 1539 ··· 1658 printk (KERN_INFO "floppy%d: setting %s %p!\n", 1659 drive, dtp->name, dtp); 1660 UDT = dtp; 1661 + set_capacity(disk, UDT->blocks); 1662 1663 if (cmd == FDDEFPRM) { 1664 /* save settings as permanent default type */ ··· 1702 return -EINVAL; 1703 1704 UDT = dtp; 1705 + set_capacity(disk, UDT->blocks); 1706 1707 return 0; 1708 case FDMSGON: ··· 1725 UDT = NULL; 1726 /* MSch: invalidate default_params */ 1727 default_params[drive].blocks = 0; 1728 + set_capacity(disk, MAX_DISK_SIZE * 2); 1729 fallthrough; 1730 case FDFMTEND: 1731 case FDFLUSH: ··· 1962 .commit_rqs = ataflop_commit_rqs, 1963 }; 1964 1965 + static int ataflop_alloc_disk(unsigned int drive, unsigned int type) 1966 { 1967 + struct gendisk *disk; 1968 + int ret; 1969 + 1970 + disk = alloc_disk(1); 1971 + if (!disk) 1972 + return -ENOMEM; 1973 + 1974 + disk->queue = blk_mq_init_queue(&unit[drive].tag_set); 1975 + if (IS_ERR(disk->queue)) { 1976 + ret = PTR_ERR(disk->queue); 1977 + disk->queue = NULL; 1978 + put_disk(disk); 1979 + return ret; 1980 + } 1981 + 1982 + disk->major = FLOPPY_MAJOR; 1983 + disk->first_minor = drive + (type << 2); 1984 + sprintf(disk->disk_name, "fd%d", drive); 1985 + disk->fops = &floppy_fops; 1986 + disk->events = DISK_EVENT_MEDIA_CHANGE; 1987 + disk->private_data = &unit[drive]; 1988 + set_capacity(disk, MAX_DISK_SIZE * 2); 1989 + 1990 + unit[drive].disk[type] = disk; 1991 + return 0; 1992 + } 1993 + 1994 + static DEFINE_MUTEX(ataflop_probe_lock); 1995 + 1996 + static void ataflop_probe(dev_t dev) 1997 + { 1998 + int drive = MINOR(dev) & 3; 1999 + int type = MINOR(dev) >> 2; 2000 + 2001 if (drive >= FD_MAX_UNITS || type > NUM_DISK_MINORS) 2002 + return; 2003 + mutex_lock(&ataflop_probe_lock); 2004 + if (!unit[drive].disk[type]) { 2005 + if (ataflop_alloc_disk(drive, type) == 0) 2006 + add_disk(unit[drive].disk[type]); 2007 + } 2008 + mutex_unlock(&ataflop_probe_lock); 2009 } 2010 2011 static int __init atari_floppy_init (void) ··· 1981 /* Amiga, Mac, ... don't have Atari-compatible floppy :-) */ 1982 return -ENODEV; 1983 1984 + mutex_lock(&ataflop_probe_lock); 1985 + ret = __register_blkdev(FLOPPY_MAJOR, "fd", ataflop_probe); 1986 + if (ret) 1987 + goto out_unlock; 1988 1989 for (i = 0; i < FD_MAX_UNITS; i++) { 1990 + memset(&unit[i].tag_set, 0, sizeof(unit[i].tag_set)); 1991 + unit[i].tag_set.ops = &ataflop_mq_ops; 1992 + unit[i].tag_set.nr_hw_queues = 1; 1993 + unit[i].tag_set.nr_maps = 1; 1994 + unit[i].tag_set.queue_depth = 2; 1995 + unit[i].tag_set.numa_node = NUMA_NO_NODE; 1996 + unit[i].tag_set.flags = BLK_MQ_F_SHOULD_MERGE; 1997 + ret = blk_mq_alloc_tag_set(&unit[i].tag_set); 1998 + if (ret) 1999 goto err; 2000 2001 + ret = ataflop_alloc_disk(i, 0); 2002 + if (ret) { 2003 + blk_mq_free_tag_set(&unit[i].tag_set); 2004 goto err; 2005 } 2006 } ··· 2027 for (i = 0; i < FD_MAX_UNITS; i++) { 2028 unit[i].track = -1; 2029 unit[i].flags = 0; 2030 + add_disk(unit[i].disk[0]); 2031 } 2032 2033 printk(KERN_INFO "Atari floppy driver: max. %cD, %strack buffering\n", 2034 DriveType == 0 ? 'D' : DriveType == 1 ? 'H' : 'E', ··· 2049 2050 err: 2051 while (--i >= 0) { 2052 + blk_cleanup_queue(unit[i].disk[0]->queue); 2053 + put_disk(unit[i].disk[0]); 2054 blk_mq_free_tag_set(&unit[i].tag_set); 2055 } 2056 2057 unregister_blkdev(FLOPPY_MAJOR, "fd"); 2058 + out_unlock: 2059 + mutex_unlock(&ataflop_probe_lock); 2060 return ret; 2061 } 2062 ··· 2101 2102 static void __exit atari_floppy_exit(void) 2103 { 2104 + int i, type; 2105 + 2106 for (i = 0; i < FD_MAX_UNITS; i++) { 2107 + for (type = 0; type < NUM_DISK_MINORS; type++) { 2108 + if (!unit[i].disk[type]) 2109 + continue; 2110 + del_gendisk(unit[i].disk[type]); 2111 + blk_cleanup_queue(unit[i].disk[type]->queue); 2112 + put_disk(unit[i].disk[type]); 2113 + } 2114 blk_mq_free_tag_set(&unit[i].tag_set); 2115 } 2116 unregister_blkdev(FLOPPY_MAJOR, "fd"); 2117

+11 -28

drivers/block/brd.c

··· 426 kfree(brd); 427 } 428 429 - static struct brd_device *brd_init_one(int i, bool *new) 430 { 431 struct brd_device *brd; 432 433 - *new = false; 434 list_for_each_entry(brd, &brd_devices, brd_list) { 435 if (brd->brd_number == i) 436 - goto out; 437 } 438 439 brd = brd_alloc(i); ··· 443 add_disk(brd->brd_disk); 444 list_add_tail(&brd->brd_list, &brd_devices); 445 } 446 - *new = true; 447 - out: 448 - return brd; 449 } 450 451 static void brd_del_one(struct brd_device *brd) ··· 453 list_del(&brd->brd_list); 454 del_gendisk(brd->brd_disk); 455 brd_free(brd); 456 - } 457 - 458 - static struct kobject *brd_probe(dev_t dev, int *part, void *data) 459 - { 460 - struct brd_device *brd; 461 - struct kobject *kobj; 462 - bool new; 463 - 464 - mutex_lock(&brd_devices_mutex); 465 - brd = brd_init_one(MINOR(dev) / max_part, &new); 466 - kobj = brd ? get_disk_and_module(brd->brd_disk) : NULL; 467 - mutex_unlock(&brd_devices_mutex); 468 - 469 - if (new) 470 - *part = 0; 471 - 472 - return kobj; 473 } 474 475 static inline void brd_check_and_reset_par(void) ··· 494 * dynamically. 495 */ 496 497 - if (register_blkdev(RAMDISK_MAJOR, "ramdisk")) 498 return -EIO; 499 500 brd_check_and_reset_par(); 501 502 for (i = 0; i < rd_nr; i++) { 503 brd = brd_alloc(i); 504 if (!brd) ··· 517 brd->brd_disk->queue = brd->brd_queue; 518 add_disk(brd->brd_disk); 519 } 520 - 521 - blk_register_region(MKDEV(RAMDISK_MAJOR, 0), 1UL << MINORBITS, 522 - THIS_MODULE, brd_probe, NULL, NULL); 523 524 pr_info("brd: module loaded\n"); 525 return 0; ··· 527 list_del(&brd->brd_list); 528 brd_free(brd); 529 } 530 unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); 531 532 pr_info("brd: module NOT loaded !!!\n"); ··· 541 list_for_each_entry_safe(brd, next, &brd_devices, brd_list) 542 brd_del_one(brd); 543 544 - blk_unregister_region(MKDEV(RAMDISK_MAJOR, 0), 1UL << MINORBITS); 545 unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); 546 547 pr_info("brd: module unloaded\n");

··· 426 kfree(brd); 427 } 428 429 + static void brd_probe(dev_t dev) 430 { 431 struct brd_device *brd; 432 + int i = MINOR(dev) / max_part; 433 434 + mutex_lock(&brd_devices_mutex); 435 list_for_each_entry(brd, &brd_devices, brd_list) { 436 if (brd->brd_number == i) 437 + goto out_unlock; 438 } 439 440 brd = brd_alloc(i); ··· 442 add_disk(brd->brd_disk); 443 list_add_tail(&brd->brd_list, &brd_devices); 444 } 445 + 446 + out_unlock: 447 + mutex_unlock(&brd_devices_mutex); 448 } 449 450 static void brd_del_one(struct brd_device *brd) ··· 452 list_del(&brd->brd_list); 453 del_gendisk(brd->brd_disk); 454 brd_free(brd); 455 } 456 457 static inline void brd_check_and_reset_par(void) ··· 510 * dynamically. 511 */ 512 513 + if (__register_blkdev(RAMDISK_MAJOR, "ramdisk", brd_probe)) 514 return -EIO; 515 516 brd_check_and_reset_par(); 517 518 + mutex_lock(&brd_devices_mutex); 519 for (i = 0; i < rd_nr; i++) { 520 brd = brd_alloc(i); 521 if (!brd) ··· 532 brd->brd_disk->queue = brd->brd_queue; 533 add_disk(brd->brd_disk); 534 } 535 + mutex_unlock(&brd_devices_mutex); 536 537 pr_info("brd: module loaded\n"); 538 return 0; ··· 544 list_del(&brd->brd_list); 545 brd_free(brd); 546 } 547 + mutex_unlock(&brd_devices_mutex); 548 unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); 549 550 pr_info("brd: module NOT loaded !!!\n"); ··· 557 list_for_each_entry_safe(brd, next, &brd_devices, brd_list) 558 brd_del_one(brd); 559 560 unregister_blkdev(RAMDISK_MAJOR, "ramdisk"); 561 562 pr_info("brd: module unloaded\n");

+2 -4

drivers/block/drbd/drbd_main.c

··· 2036 { 2037 char ppb[10]; 2038 2039 - set_capacity(device->vdisk, size); 2040 - revalidate_disk_size(device->vdisk, false); 2041 2042 drbd_info(device, "size = %s (%llu KB)\n", 2043 ppsize(ppb, size>>1), (unsigned long long)size>>1); ··· 2067 } 2068 D_ASSERT(device, first_peer_device(device)->connection->net_conf == NULL); 2069 2070 - set_capacity(device->vdisk, 0); 2071 - revalidate_disk_size(device->vdisk, false); 2072 if (device->bitmap) { 2073 /* maybe never allocated. */ 2074 drbd_bm_resize(device, 0, 1);

··· 2036 { 2037 char ppb[10]; 2038 2039 + set_capacity_and_notify(device->vdisk, size); 2040 2041 drbd_info(device, "size = %s (%llu KB)\n", 2042 ppsize(ppb, size>>1), (unsigned long long)size>>1); ··· 2068 } 2069 D_ASSERT(device, first_peer_device(device)->connection->net_conf == NULL); 2070 2071 + set_capacity_and_notify(device->vdisk, 0); 2072 if (device->bitmap) { 2073 /* maybe never allocated. */ 2074 drbd_bm_resize(device, 0, 1);

+1 -1

drivers/block/drbd/drbd_receiver.c

··· 2802 if (c_min_rate == 0) 2803 return false; 2804 2805 - curr_events = (int)part_stat_read_accum(&disk->part0, sectors) - 2806 atomic_read(&device->rs_sect_ev); 2807 2808 if (atomic_read(&device->ap_actlog_cnt)

··· 2802 if (c_min_rate == 0) 2803 return false; 2804 2805 + curr_events = (int)part_stat_read_accum(disk->part0, sectors) - 2806 atomic_read(&device->rs_sect_ev); 2807 2808 if (atomic_read(&device->ap_actlog_cnt)

+2 -1

drivers/block/drbd/drbd_worker.c

··· 1678 atomic_set(&device->rs_sect_in, 0); 1679 atomic_set(&device->rs_sect_ev, 0); 1680 device->rs_in_flight = 0; 1681 - device->rs_last_events = (int)part_stat_read_accum(&disk->part0, sectors); 1682 1683 /* Updating the RCU protected object in place is necessary since 1684 this function gets called from atomic context.

··· 1678 atomic_set(&device->rs_sect_in, 0); 1679 atomic_set(&device->rs_sect_ev, 0); 1680 device->rs_in_flight = 0; 1681 + device->rs_last_events = 1682 + (int)part_stat_read_accum(disk->part0, sectors); 1683 1684 /* Updating the RCU protected object in place is necessary since 1685 this function gets called from atomic context.

+97 -57

drivers/block/floppy.c

··· 402 static struct floppy_drive_struct drive_state[N_DRIVE]; 403 static struct floppy_write_errors write_errors[N_DRIVE]; 404 static struct timer_list motor_off_timer[N_DRIVE]; 405 - static struct gendisk *disks[N_DRIVE]; 406 static struct blk_mq_tag_set tag_sets[N_DRIVE]; 407 static struct block_device *opened_bdev[N_DRIVE]; 408 static DEFINE_MUTEX(open_lock); ··· 475 { 1600,10,2,80,0,0x25,0x02,0xDF,0x2E,"D800" }, /* 30 800KB 3.5" */ 476 { 3200,20,2,80,0,0x1C,0x00,0xCF,0x2C,"H1600" }, /* 31 1.6MB 3.5" */ 477 }; 478 479 #define SECTSIZE (_FD_SECTSIZE(*floppy)) 480 ··· 4112 4113 new_dev = MINOR(bdev->bd_dev); 4114 drive_state[drive].fd_device = new_dev; 4115 - set_capacity(disks[drive], floppy_sizes[new_dev]); 4116 if (old_dev != -1 && old_dev != new_dev) { 4117 if (buffer_drive == drive) 4118 buffer_track = -1; ··· 4580 return true; 4581 } 4582 4583 - static struct kobject *floppy_find(dev_t dev, int *part, void *data) 4584 { 4585 - int drive = (*part & 3) | ((*part & 0x80) >> 5); 4586 - if (drive >= N_DRIVE || !floppy_available(drive)) 4587 - return NULL; 4588 - if (((*part >> 2) & 0x1f) >= ARRAY_SIZE(floppy_type)) 4589 - return NULL; 4590 - *part = 0; 4591 - return get_disk_and_module(disks[drive]); 4592 } 4593 4594 static int __init do_floppy_init(void) ··· 4653 return -ENOMEM; 4654 4655 for (drive = 0; drive < N_DRIVE; drive++) { 4656 - disks[drive] = alloc_disk(1); 4657 - if (!disks[drive]) { 4658 - err = -ENOMEM; 4659 goto out_put_disk; 4660 - } 4661 4662 - disks[drive]->queue = blk_mq_init_sq_queue(&tag_sets[drive], 4663 - &floppy_mq_ops, 2, 4664 - BLK_MQ_F_SHOULD_MERGE); 4665 - if (IS_ERR(disks[drive]->queue)) { 4666 - err = PTR_ERR(disks[drive]->queue); 4667 - disks[drive]->queue = NULL; 4668 goto out_put_disk; 4669 - } 4670 - 4671 - blk_queue_bounce_limit(disks[drive]->queue, BLK_BOUNCE_HIGH); 4672 - blk_queue_max_hw_sectors(disks[drive]->queue, 64); 4673 - disks[drive]->major = FLOPPY_MAJOR; 4674 - disks[drive]->first_minor = TOMINOR(drive); 4675 - disks[drive]->fops = &floppy_fops; 4676 - disks[drive]->events = DISK_EVENT_MEDIA_CHANGE; 4677 - sprintf(disks[drive]->disk_name, "fd%d", drive); 4678 4679 timer_setup(&motor_off_timer[drive], motor_off_callback, 0); 4680 } 4681 4682 - err = register_blkdev(FLOPPY_MAJOR, "fd"); 4683 if (err) 4684 goto out_put_disk; 4685 4686 err = platform_driver_register(&floppy_driver); 4687 if (err) 4688 goto out_unreg_blkdev; 4689 - 4690 - blk_register_region(MKDEV(FLOPPY_MAJOR, 0), 256, THIS_MODULE, 4691 - floppy_find, NULL, NULL); 4692 4693 for (i = 0; i < 256; i++) 4694 if (ITYPE(i)) ··· 4706 if (fdc_state[0].address == -1) { 4707 cancel_delayed_work(&fd_timeout); 4708 err = -ENODEV; 4709 - goto out_unreg_region; 4710 } 4711 #if N_FDC > 1 4712 fdc_state[1].address = FDC2; ··· 4717 if (err) { 4718 cancel_delayed_work(&fd_timeout); 4719 err = -EBUSY; 4720 - goto out_unreg_region; 4721 } 4722 4723 /* initialise drive state */ ··· 4794 if (err) 4795 goto out_remove_drives; 4796 4797 - /* to be cleaned up... */ 4798 - disks[drive]->private_data = (void *)(long)drive; 4799 - disks[drive]->flags |= GENHD_FL_REMOVABLE; 4800 - device_add_disk(&floppy_device[drive].dev, disks[drive], NULL); 4801 } 4802 4803 return 0; ··· 4803 out_remove_drives: 4804 while (drive--) { 4805 if (floppy_available(drive)) { 4806 - del_gendisk(disks[drive]); 4807 platform_device_unregister(&floppy_device[drive]); 4808 } 4809 } 4810 out_release_dma: 4811 if (atomic_read(&usage_count)) 4812 floppy_release_irq_and_dma(); 4813 - out_unreg_region: 4814 - blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256); 4815 platform_driver_unregister(&floppy_driver); 4816 out_unreg_blkdev: 4817 unregister_blkdev(FLOPPY_MAJOR, "fd"); 4818 out_put_disk: 4819 destroy_workqueue(floppy_wq); 4820 for (drive = 0; drive < N_DRIVE; drive++) { 4821 - if (!disks[drive]) 4822 break; 4823 - if (disks[drive]->queue) { 4824 - del_timer_sync(&motor_off_timer[drive]); 4825 - blk_cleanup_queue(disks[drive]->queue); 4826 - disks[drive]->queue = NULL; 4827 - blk_mq_free_tag_set(&tag_sets[drive]); 4828 - } 4829 - put_disk(disks[drive]); 4830 } 4831 return err; 4832 } ··· 5034 5035 static void __exit floppy_module_exit(void) 5036 { 5037 - int drive; 5038 5039 - blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256); 5040 unregister_blkdev(FLOPPY_MAJOR, "fd"); 5041 platform_driver_unregister(&floppy_driver); 5042 ··· 5045 del_timer_sync(&motor_off_timer[drive]); 5046 5047 if (floppy_available(drive)) { 5048 - del_gendisk(disks[drive]); 5049 platform_device_unregister(&floppy_device[drive]); 5050 } 5051 - blk_cleanup_queue(disks[drive]->queue); 5052 blk_mq_free_tag_set(&tag_sets[drive]); 5053 5054 /* ··· 5062 * queue reference in put_disk(). 5063 */ 5064 if (!(allowed_drive_mask & (1 << drive)) || 5065 - fdc_state[FDC(drive)].version == FDC_NONE) 5066 - disks[drive]->queue = NULL; 5067 5068 - put_disk(disks[drive]); 5069 } 5070 5071 cancel_delayed_work_sync(&fd_timeout);

··· 402 static struct floppy_drive_struct drive_state[N_DRIVE]; 403 static struct floppy_write_errors write_errors[N_DRIVE]; 404 static struct timer_list motor_off_timer[N_DRIVE]; 405 static struct blk_mq_tag_set tag_sets[N_DRIVE]; 406 static struct block_device *opened_bdev[N_DRIVE]; 407 static DEFINE_MUTEX(open_lock); ··· 476 { 1600,10,2,80,0,0x25,0x02,0xDF,0x2E,"D800" }, /* 30 800KB 3.5" */ 477 { 3200,20,2,80,0,0x1C,0x00,0xCF,0x2C,"H1600" }, /* 31 1.6MB 3.5" */ 478 }; 479 + 480 + static struct gendisk *disks[N_DRIVE][ARRAY_SIZE(floppy_type)]; 481 482 #define SECTSIZE (_FD_SECTSIZE(*floppy)) 483 ··· 4111 4112 new_dev = MINOR(bdev->bd_dev); 4113 drive_state[drive].fd_device = new_dev; 4114 + set_capacity(disks[drive][ITYPE(new_dev)], floppy_sizes[new_dev]); 4115 if (old_dev != -1 && old_dev != new_dev) { 4116 if (buffer_drive == drive) 4117 buffer_track = -1; ··· 4579 return true; 4580 } 4581 4582 + static int floppy_alloc_disk(unsigned int drive, unsigned int type) 4583 { 4584 + struct gendisk *disk; 4585 + int err; 4586 + 4587 + disk = alloc_disk(1); 4588 + if (!disk) 4589 + return -ENOMEM; 4590 + 4591 + disk->queue = blk_mq_init_queue(&tag_sets[drive]); 4592 + if (IS_ERR(disk->queue)) { 4593 + err = PTR_ERR(disk->queue); 4594 + disk->queue = NULL; 4595 + put_disk(disk); 4596 + return err; 4597 + } 4598 + 4599 + blk_queue_bounce_limit(disk->queue, BLK_BOUNCE_HIGH); 4600 + blk_queue_max_hw_sectors(disk->queue, 64); 4601 + disk->major = FLOPPY_MAJOR; 4602 + disk->first_minor = TOMINOR(drive) | (type << 2); 4603 + disk->fops = &floppy_fops; 4604 + disk->events = DISK_EVENT_MEDIA_CHANGE; 4605 + if (type) 4606 + sprintf(disk->disk_name, "fd%d_type%d", drive, type); 4607 + else 4608 + sprintf(disk->disk_name, "fd%d", drive); 4609 + /* to be cleaned up... */ 4610 + disk->private_data = (void *)(long)drive; 4611 + disk->flags |= GENHD_FL_REMOVABLE; 4612 + 4613 + disks[drive][type] = disk; 4614 + return 0; 4615 + } 4616 + 4617 + static DEFINE_MUTEX(floppy_probe_lock); 4618 + 4619 + static void floppy_probe(dev_t dev) 4620 + { 4621 + unsigned int drive = (MINOR(dev) & 3) | ((MINOR(dev) & 0x80) >> 5); 4622 + unsigned int type = (MINOR(dev) >> 2) & 0x1f; 4623 + 4624 + if (drive >= N_DRIVE || !floppy_available(drive) || 4625 + type >= ARRAY_SIZE(floppy_type)) 4626 + return; 4627 + 4628 + mutex_lock(&floppy_probe_lock); 4629 + if (!disks[drive][type]) { 4630 + if (floppy_alloc_disk(drive, type) == 0) 4631 + add_disk(disks[drive][type]); 4632 + } 4633 + mutex_unlock(&floppy_probe_lock); 4634 } 4635 4636 static int __init do_floppy_init(void) ··· 4609 return -ENOMEM; 4610 4611 for (drive = 0; drive < N_DRIVE; drive++) { 4612 + memset(&tag_sets[drive], 0, sizeof(tag_sets[drive])); 4613 + tag_sets[drive].ops = &floppy_mq_ops; 4614 + tag_sets[drive].nr_hw_queues = 1; 4615 + tag_sets[drive].nr_maps = 1; 4616 + tag_sets[drive].queue_depth = 2; 4617 + tag_sets[drive].numa_node = NUMA_NO_NODE; 4618 + tag_sets[drive].flags = BLK_MQ_F_SHOULD_MERGE; 4619 + err = blk_mq_alloc_tag_set(&tag_sets[drive]); 4620 + if (err) 4621 goto out_put_disk; 4622 4623 + err = floppy_alloc_disk(drive, 0); 4624 + if (err) 4625 goto out_put_disk; 4626 4627 timer_setup(&motor_off_timer[drive], motor_off_callback, 0); 4628 } 4629 4630 + err = __register_blkdev(FLOPPY_MAJOR, "fd", floppy_probe); 4631 if (err) 4632 goto out_put_disk; 4633 4634 err = platform_driver_register(&floppy_driver); 4635 if (err) 4636 goto out_unreg_blkdev; 4637 4638 for (i = 0; i < 256; i++) 4639 if (ITYPE(i)) ··· 4673 if (fdc_state[0].address == -1) { 4674 cancel_delayed_work(&fd_timeout); 4675 err = -ENODEV; 4676 + goto out_unreg_driver; 4677 } 4678 #if N_FDC > 1 4679 fdc_state[1].address = FDC2; ··· 4684 if (err) { 4685 cancel_delayed_work(&fd_timeout); 4686 err = -EBUSY; 4687 + goto out_unreg_driver; 4688 } 4689 4690 /* initialise drive state */ ··· 4761 if (err) 4762 goto out_remove_drives; 4763 4764 + device_add_disk(&floppy_device[drive].dev, disks[drive][0], 4765 + NULL); 4766 } 4767 4768 return 0; ··· 4772 out_remove_drives: 4773 while (drive--) { 4774 if (floppy_available(drive)) { 4775 + del_gendisk(disks[drive][0]); 4776 platform_device_unregister(&floppy_device[drive]); 4777 } 4778 } 4779 out_release_dma: 4780 if (atomic_read(&usage_count)) 4781 floppy_release_irq_and_dma(); 4782 + out_unreg_driver: 4783 platform_driver_unregister(&floppy_driver); 4784 out_unreg_blkdev: 4785 unregister_blkdev(FLOPPY_MAJOR, "fd"); 4786 out_put_disk: 4787 destroy_workqueue(floppy_wq); 4788 for (drive = 0; drive < N_DRIVE; drive++) { 4789 + if (!disks[drive][0]) 4790 break; 4791 + del_timer_sync(&motor_off_timer[drive]); 4792 + blk_cleanup_queue(disks[drive][0]->queue); 4793 + disks[drive][0]->queue = NULL; 4794 + blk_mq_free_tag_set(&tag_sets[drive]); 4795 + put_disk(disks[drive][0]); 4796 } 4797 return err; 4798 } ··· 5006 5007 static void __exit floppy_module_exit(void) 5008 { 5009 + int drive, i; 5010 5011 unregister_blkdev(FLOPPY_MAJOR, "fd"); 5012 platform_driver_unregister(&floppy_driver); 5013 ··· 5018 del_timer_sync(&motor_off_timer[drive]); 5019 5020 if (floppy_available(drive)) { 5021 + for (i = 0; i < ARRAY_SIZE(floppy_type); i++) { 5022 + if (disks[drive][i]) 5023 + del_gendisk(disks[drive][i]); 5024 + } 5025 platform_device_unregister(&floppy_device[drive]); 5026 } 5027 + for (i = 0; i < ARRAY_SIZE(floppy_type); i++) { 5028 + if (disks[drive][i]) 5029 + blk_cleanup_queue(disks[drive][i]->queue); 5030 + } 5031 blk_mq_free_tag_set(&tag_sets[drive]); 5032 5033 /* ··· 5029 * queue reference in put_disk(). 5030 */ 5031 if (!(allowed_drive_mask & (1 << drive)) || 5032 + fdc_state[FDC(drive)].version == FDC_NONE) { 5033 + for (i = 0; i < ARRAY_SIZE(floppy_type); i++) { 5034 + if (disks[drive][i]) 5035 + disks[drive][i]->queue = NULL; 5036 + } 5037 + } 5038 5039 + for (i = 0; i < ARRAY_SIZE(floppy_type); i++) { 5040 + if (disks[drive][i]) 5041 + put_disk(disks[drive][i]); 5042 + } 5043 } 5044 5045 cancel_delayed_work_sync(&fd_timeout);

+19 -45

drivers/block/loop.c

··· 251 */ 252 static void loop_set_size(struct loop_device *lo, loff_t size) 253 { 254 - struct block_device *bdev = lo->lo_device; 255 - 256 - bd_set_nr_sectors(bdev, size); 257 - 258 - if (!set_capacity_revalidate_and_notify(lo->lo_disk, size, false)) 259 - kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); 260 } 261 262 static inline int ··· 675 while (is_loop_device(f)) { 676 struct loop_device *l; 677 678 - if (f->f_mapping->host->i_bdev == bdev) 679 return -EBADF; 680 681 - l = f->f_mapping->host->i_bdev->bd_disk->private_data; 682 if (l->lo_state != Lo_bound) { 683 return -EINVAL; 684 } ··· 885 * file-backed loop devices: discarded regions read back as zero. 886 */ 887 if (S_ISBLK(inode->i_mode) && !lo->lo_encrypt_key_size) { 888 - struct request_queue *backingq; 889 - 890 - backingq = bdev_get_queue(inode->i_bdev); 891 892 max_discard_sectors = backingq->limits.max_write_zeroes_sectors; 893 granularity = backingq->limits.discard_granularity ?: ··· 1069 struct file *file; 1070 struct inode *inode; 1071 struct address_space *mapping; 1072 - struct block_device *claimed_bdev = NULL; 1073 int error; 1074 loff_t size; 1075 bool partscan; ··· 1087 * here to avoid changing device under exclusive owner. 1088 */ 1089 if (!(mode & FMODE_EXCL)) { 1090 - claimed_bdev = bdev->bd_contains; 1091 - error = bd_prepare_to_claim(bdev, claimed_bdev, loop_configure); 1092 if (error) 1093 goto out_putf; 1094 } ··· 1130 if (error) 1131 goto out_unlock; 1132 1133 - set_device_ro(bdev, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0); 1134 1135 lo->use_dio = lo->lo_flags & LO_FLAGS_DIRECT_IO; 1136 lo->lo_device = bdev; ··· 1160 size = get_loop_size(lo, file); 1161 loop_set_size(lo, size); 1162 1163 - set_blocksize(bdev, S_ISBLK(inode->i_mode) ? 1164 - block_size(inode->i_bdev) : PAGE_SIZE); 1165 - 1166 lo->lo_state = Lo_bound; 1167 if (part_shift) 1168 lo->lo_flags |= LO_FLAGS_PARTSCAN; ··· 1174 mutex_unlock(&loop_ctl_mutex); 1175 if (partscan) 1176 loop_reread_partitions(lo, bdev); 1177 - if (claimed_bdev) 1178 - bd_abort_claiming(bdev, claimed_bdev, loop_configure); 1179 return 0; 1180 1181 out_unlock: 1182 mutex_unlock(&loop_ctl_mutex); 1183 out_bdev: 1184 - if (claimed_bdev) 1185 - bd_abort_claiming(bdev, claimed_bdev, loop_configure); 1186 out_putf: 1187 fput(file); 1188 out: ··· 1241 set_capacity(lo->lo_disk, 0); 1242 loop_sysfs_exit(lo); 1243 if (bdev) { 1244 - bd_set_nr_sectors(bdev, 0); 1245 /* let user-space know about this change */ 1246 kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); 1247 } ··· 2223 return ret; 2224 } 2225 2226 - static struct kobject *loop_probe(dev_t dev, int *part, void *data) 2227 { 2228 struct loop_device *lo; 2229 - struct kobject *kobj; 2230 - int err; 2231 2232 mutex_lock(&loop_ctl_mutex); 2233 - err = loop_lookup(&lo, MINOR(dev) >> part_shift); 2234 - if (err < 0) 2235 - err = loop_add(&lo, MINOR(dev) >> part_shift); 2236 - if (err < 0) 2237 - kobj = NULL; 2238 - else 2239 - kobj = get_disk_and_module(lo->lo_disk); 2240 mutex_unlock(&loop_ctl_mutex); 2241 - 2242 - *part = 0; 2243 - return kobj; 2244 } 2245 2246 static long loop_control_ioctl(struct file *file, unsigned int cmd, ··· 2354 goto err_out; 2355 2356 2357 - if (register_blkdev(LOOP_MAJOR, "loop")) { 2358 err = -EIO; 2359 goto misc_out; 2360 } 2361 - 2362 - blk_register_region(MKDEV(LOOP_MAJOR, 0), range, 2363 - THIS_MODULE, loop_probe, NULL, NULL); 2364 2365 /* pre-create number of devices given by config or max_loop */ 2366 mutex_lock(&loop_ctl_mutex); ··· 2384 2385 static void __exit loop_exit(void) 2386 { 2387 - unsigned long range; 2388 - 2389 - range = max_loop ? max_loop << part_shift : 1UL << MINORBITS; 2390 - 2391 mutex_lock(&loop_ctl_mutex); 2392 2393 idr_for_each(&loop_index_idr, &loop_exit_cb, NULL); 2394 idr_destroy(&loop_index_idr); 2395 2396 - blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range); 2397 unregister_blkdev(LOOP_MAJOR, "loop"); 2398 2399 misc_deregister(&loop_misc);

··· 251 */ 252 static void loop_set_size(struct loop_device *lo, loff_t size) 253 { 254 + if (!set_capacity_and_notify(lo->lo_disk, size)) 255 + kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE); 256 } 257 258 static inline int ··· 679 while (is_loop_device(f)) { 680 struct loop_device *l; 681 682 + if (f->f_mapping->host->i_rdev == bdev->bd_dev) 683 return -EBADF; 684 685 + l = I_BDEV(f->f_mapping->host)->bd_disk->private_data; 686 if (l->lo_state != Lo_bound) { 687 return -EINVAL; 688 } ··· 889 * file-backed loop devices: discarded regions read back as zero. 890 */ 891 if (S_ISBLK(inode->i_mode) && !lo->lo_encrypt_key_size) { 892 + struct request_queue *backingq = bdev_get_queue(I_BDEV(inode)); 893 894 max_discard_sectors = backingq->limits.max_write_zeroes_sectors; 895 granularity = backingq->limits.discard_granularity ?: ··· 1075 struct file *file; 1076 struct inode *inode; 1077 struct address_space *mapping; 1078 int error; 1079 loff_t size; 1080 bool partscan; ··· 1094 * here to avoid changing device under exclusive owner. 1095 */ 1096 if (!(mode & FMODE_EXCL)) { 1097 + error = bd_prepare_to_claim(bdev, loop_configure); 1098 if (error) 1099 goto out_putf; 1100 } ··· 1138 if (error) 1139 goto out_unlock; 1140 1141 + set_disk_ro(lo->lo_disk, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0); 1142 1143 lo->use_dio = lo->lo_flags & LO_FLAGS_DIRECT_IO; 1144 lo->lo_device = bdev; ··· 1168 size = get_loop_size(lo, file); 1169 loop_set_size(lo, size); 1170 1171 lo->lo_state = Lo_bound; 1172 if (part_shift) 1173 lo->lo_flags |= LO_FLAGS_PARTSCAN; ··· 1185 mutex_unlock(&loop_ctl_mutex); 1186 if (partscan) 1187 loop_reread_partitions(lo, bdev); 1188 + if (!(mode & FMODE_EXCL)) 1189 + bd_abort_claiming(bdev, loop_configure); 1190 return 0; 1191 1192 out_unlock: 1193 mutex_unlock(&loop_ctl_mutex); 1194 out_bdev: 1195 + if (!(mode & FMODE_EXCL)) 1196 + bd_abort_claiming(bdev, loop_configure); 1197 out_putf: 1198 fput(file); 1199 out: ··· 1252 set_capacity(lo->lo_disk, 0); 1253 loop_sysfs_exit(lo); 1254 if (bdev) { 1255 /* let user-space know about this change */ 1256 kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); 1257 } ··· 2235 return ret; 2236 } 2237 2238 + static void loop_probe(dev_t dev) 2239 { 2240 + int idx = MINOR(dev) >> part_shift; 2241 struct loop_device *lo; 2242 + 2243 + if (max_loop && idx >= max_loop) 2244 + return; 2245 2246 mutex_lock(&loop_ctl_mutex); 2247 + if (loop_lookup(&lo, idx) < 0) 2248 + loop_add(&lo, idx); 2249 mutex_unlock(&loop_ctl_mutex); 2250 } 2251 2252 static long loop_control_ioctl(struct file *file, unsigned int cmd, ··· 2372 goto err_out; 2373 2374 2375 + if (__register_blkdev(LOOP_MAJOR, "loop", loop_probe)) { 2376 err = -EIO; 2377 goto misc_out; 2378 } 2379 2380 /* pre-create number of devices given by config or max_loop */ 2381 mutex_lock(&loop_ctl_mutex); ··· 2405 2406 static void __exit loop_exit(void) 2407 { 2408 mutex_lock(&loop_ctl_mutex); 2409 2410 idr_for_each(&loop_index_idr, &loop_exit_cb, NULL); 2411 idr_destroy(&loop_index_idr); 2412 2413 unregister_blkdev(LOOP_MAJOR, "loop"); 2414 2415 misc_deregister(&loop_misc);

-15

drivers/block/mtip32xx/mtip32xx.c

··· 3687 /* Enable the block device and add it to /dev */ 3688 device_add_disk(&dd->pdev->dev, dd->disk, NULL); 3689 3690 - dd->bdev = bdget_disk(dd->disk, 0); 3691 /* 3692 * Now that the disk is active, initialize any sysfs attributes 3693 * managed by the protocol layer. ··· 3720 return rv; 3721 3722 kthread_run_error: 3723 - bdput(dd->bdev); 3724 - dd->bdev = NULL; 3725 - 3726 /* Delete our gendisk. This also removes the device from /dev */ 3727 del_gendisk(dd->disk); 3728 ··· 3800 blk_mq_tagset_busy_iter(&dd->tags, mtip_no_dev_cleanup, dd); 3801 blk_mq_unquiesce_queue(dd->queue); 3802 3803 - /* 3804 - * Delete our gendisk structure. This also removes the device 3805 - * from /dev 3806 - */ 3807 - if (dd->bdev) { 3808 - bdput(dd->bdev); 3809 - dd->bdev = NULL; 3810 - } 3811 if (dd->disk) { 3812 if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) 3813 del_gendisk(dd->disk); ··· 4193 msleep(20); 4194 } while (atomic_read(&dd->irq_workers_active) != 0 && 4195 time_before(jiffies, to)); 4196 - 4197 - if (!dd->sr) 4198 - fsync_bdev(dd->bdev); 4199 4200 if (atomic_read(&dd->irq_workers_active) != 0) { 4201 dev_warn(&dd->pdev->dev,

··· 3687 /* Enable the block device and add it to /dev */ 3688 device_add_disk(&dd->pdev->dev, dd->disk, NULL); 3689 3690 /* 3691 * Now that the disk is active, initialize any sysfs attributes 3692 * managed by the protocol layer. ··· 3721 return rv; 3722 3723 kthread_run_error: 3724 /* Delete our gendisk. This also removes the device from /dev */ 3725 del_gendisk(dd->disk); 3726 ··· 3804 blk_mq_tagset_busy_iter(&dd->tags, mtip_no_dev_cleanup, dd); 3805 blk_mq_unquiesce_queue(dd->queue); 3806 3807 if (dd->disk) { 3808 if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) 3809 del_gendisk(dd->disk); ··· 4205 msleep(20); 4206 } while (atomic_read(&dd->irq_workers_active) != 0 && 4207 time_before(jiffies, to)); 4208 4209 if (atomic_read(&dd->irq_workers_active) != 0) { 4210 dev_warn(&dd->pdev->dev,

-2

drivers/block/mtip32xx/mtip32xx.h

··· 463 464 int isr_binding; 465 466 - struct block_device *bdev; 467 - 468 struct list_head online_list; /* linkage for online list */ 469 470 struct list_head remove_list; /* linkage for removing list */

··· 463 464 int isr_binding; 465 466 struct list_head online_list; /* linkage for online list */ 467 468 struct list_head remove_list; /* linkage for removing list */

+30 -64

drivers/block/nbd.c

··· 296 } 297 } 298 299 - static void nbd_size_update(struct nbd_device *nbd, bool start) 300 { 301 - struct nbd_config *config = nbd->config; 302 - struct block_device *bdev = bdget_disk(nbd->disk, 0); 303 - sector_t nr_sectors = config->bytesize >> 9; 304 305 - if (config->flags & NBD_FLAG_SEND_TRIM) { 306 - nbd->disk->queue->limits.discard_granularity = config->blksize; 307 - nbd->disk->queue->limits.discard_alignment = config->blksize; 308 blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX); 309 } 310 - blk_queue_logical_block_size(nbd->disk->queue, config->blksize); 311 - blk_queue_physical_block_size(nbd->disk->queue, config->blksize); 312 - set_capacity(nbd->disk, nr_sectors); 313 - if (bdev) { 314 - if (bdev->bd_disk) { 315 - bd_set_nr_sectors(bdev, nr_sectors); 316 - if (start) 317 - set_blocksize(bdev, config->blksize); 318 - } else 319 - set_bit(GD_NEED_PART_SCAN, &nbd->disk->state); 320 - bdput(bdev); 321 - } 322 - kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); 323 - } 324 325 - static void nbd_size_set(struct nbd_device *nbd, loff_t blocksize, 326 - loff_t nr_blocks) 327 - { 328 - struct nbd_config *config = nbd->config; 329 - config->blksize = blocksize; 330 - config->bytesize = blocksize * nr_blocks; 331 - if (nbd->task_recv != NULL) 332 - nbd_size_update(nbd, false); 333 } 334 335 static void nbd_complete_rq(struct request *req) ··· 1132 { 1133 if (bdev->bd_openers > 1) 1134 return; 1135 - bd_set_nr_sectors(bdev, 0); 1136 } 1137 1138 static void nbd_parse_flags(struct nbd_device *nbd) ··· 1301 args->index = i; 1302 queue_work(nbd->recv_workq, &args->work); 1303 } 1304 - nbd_size_update(nbd, true); 1305 - return error; 1306 } 1307 1308 static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *bdev) ··· 1343 nbd_config_put(nbd); 1344 } 1345 1346 - static bool nbd_is_valid_blksize(unsigned long blksize) 1347 - { 1348 - if (!blksize || !is_power_of_2(blksize) || blksize < 512 || 1349 - blksize > PAGE_SIZE) 1350 - return false; 1351 - return true; 1352 - } 1353 - 1354 static void nbd_set_cmd_timeout(struct nbd_device *nbd, u64 timeout) 1355 { 1356 nbd->tag_set.timeout = timeout * HZ; ··· 1367 case NBD_SET_SOCK: 1368 return nbd_add_socket(nbd, arg, false); 1369 case NBD_SET_BLKSIZE: 1370 - if (!arg) 1371 - arg = NBD_DEF_BLKSIZE; 1372 - if (!nbd_is_valid_blksize(arg)) 1373 - return -EINVAL; 1374 - nbd_size_set(nbd, arg, 1375 - div_s64(config->bytesize, arg)); 1376 - return 0; 1377 case NBD_SET_SIZE: 1378 - nbd_size_set(nbd, config->blksize, 1379 - div_s64(arg, config->blksize)); 1380 - return 0; 1381 case NBD_SET_SIZE_BLOCKS: 1382 - nbd_size_set(nbd, config->blksize, arg); 1383 - return 0; 1384 case NBD_SET_TIMEOUT: 1385 nbd_set_cmd_timeout(nbd, arg); 1386 return 0; ··· 1488 static void nbd_release(struct gendisk *disk, fmode_t mode) 1489 { 1490 struct nbd_device *nbd = disk->private_data; 1491 - struct block_device *bdev = bdget_disk(disk, 0); 1492 1493 if (test_bit(NBD_RT_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) && 1494 - bdev->bd_openers == 0) 1495 nbd_disconnect_and_put(nbd); 1496 - bdput(bdev); 1497 1498 nbd_config_put(nbd); 1499 nbd_put(nbd); ··· 1788 if (info->attrs[NBD_ATTR_SIZE_BYTES]) 1789 bytes = nla_get_u64(info->attrs[NBD_ATTR_SIZE_BYTES]); 1790 1791 - if (info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]) { 1792 bsize = nla_get_u64(info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]); 1793 - if (!bsize) 1794 - bsize = NBD_DEF_BLKSIZE; 1795 - if (!nbd_is_valid_blksize(bsize)) { 1796 - printk(KERN_ERR "Invalid block size %llu\n", bsize); 1797 - return -EINVAL; 1798 - } 1799 - } 1800 1801 if (bytes != config->bytesize || bsize != config->blksize) 1802 - nbd_size_set(nbd, bsize, div64_u64(bytes, bsize)); 1803 return 0; 1804 } 1805

··· 296 } 297 } 298 299 + static int nbd_set_size(struct nbd_device *nbd, loff_t bytesize, 300 + loff_t blksize) 301 { 302 + if (!blksize) 303 + blksize = NBD_DEF_BLKSIZE; 304 + if (blksize < 512 || blksize > PAGE_SIZE || !is_power_of_2(blksize)) 305 + return -EINVAL; 306 307 + nbd->config->bytesize = bytesize; 308 + nbd->config->blksize = blksize; 309 + 310 + if (!nbd->task_recv) 311 + return 0; 312 + 313 + if (nbd->config->flags & NBD_FLAG_SEND_TRIM) { 314 + nbd->disk->queue->limits.discard_granularity = blksize; 315 + nbd->disk->queue->limits.discard_alignment = blksize; 316 blk_queue_max_discard_sectors(nbd->disk->queue, UINT_MAX); 317 } 318 + blk_queue_logical_block_size(nbd->disk->queue, blksize); 319 + blk_queue_physical_block_size(nbd->disk->queue, blksize); 320 321 + set_bit(GD_NEED_PART_SCAN, &nbd->disk->state); 322 + if (!set_capacity_and_notify(nbd->disk, bytesize >> 9)) 323 + kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE); 324 + return 0; 325 } 326 327 static void nbd_complete_rq(struct request *req) ··· 1140 { 1141 if (bdev->bd_openers > 1) 1142 return; 1143 + set_capacity(bdev->bd_disk, 0); 1144 } 1145 1146 static void nbd_parse_flags(struct nbd_device *nbd) ··· 1309 args->index = i; 1310 queue_work(nbd->recv_workq, &args->work); 1311 } 1312 + return nbd_set_size(nbd, config->bytesize, config->blksize); 1313 } 1314 1315 static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *bdev) ··· 1352 nbd_config_put(nbd); 1353 } 1354 1355 static void nbd_set_cmd_timeout(struct nbd_device *nbd, u64 timeout) 1356 { 1357 nbd->tag_set.timeout = timeout * HZ; ··· 1384 case NBD_SET_SOCK: 1385 return nbd_add_socket(nbd, arg, false); 1386 case NBD_SET_BLKSIZE: 1387 + return nbd_set_size(nbd, config->bytesize, arg); 1388 case NBD_SET_SIZE: 1389 + return nbd_set_size(nbd, arg, config->blksize); 1390 case NBD_SET_SIZE_BLOCKS: 1391 + return nbd_set_size(nbd, arg * config->blksize, 1392 + config->blksize); 1393 case NBD_SET_TIMEOUT: 1394 nbd_set_cmd_timeout(nbd, arg); 1395 return 0; ··· 1513 static void nbd_release(struct gendisk *disk, fmode_t mode) 1514 { 1515 struct nbd_device *nbd = disk->private_data; 1516 1517 if (test_bit(NBD_RT_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) && 1518 + disk->part0->bd_openers == 0) 1519 nbd_disconnect_and_put(nbd); 1520 1521 nbd_config_put(nbd); 1522 nbd_put(nbd); ··· 1815 if (info->attrs[NBD_ATTR_SIZE_BYTES]) 1816 bytes = nla_get_u64(info->attrs[NBD_ATTR_SIZE_BYTES]); 1817 1818 + if (info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]) 1819 bsize = nla_get_u64(info->attrs[NBD_ATTR_BLOCK_SIZE_BYTES]); 1820 1821 if (bytes != config->bytesize || bsize != config->blksize) 1822 + return nbd_set_size(nbd, bytes, bsize); 1823 return 0; 1824 } 1825

+5 -4

drivers/block/pktcdvd.c

··· 2130 } 2131 2132 set_capacity(pd->disk, lba << 2); 2133 - set_capacity(pd->bdev->bd_disk, lba << 2); 2134 - bd_set_nr_sectors(pd->bdev, lba << 2); 2135 2136 q = bdev_get_queue(pd->bdev); 2137 if (write) { ··· 2583 case CDROM_LAST_WRITTEN: 2584 case CDROM_SEND_PACKET: 2585 case SCSI_IOCTL_SEND_COMMAND: 2586 - ret = __blkdev_driver_ioctl(pd->bdev, mode, cmd, arg); 2587 break; 2588 - 2589 default: 2590 pkt_dbg(2, pd, "Unknown ioctl (%x)\n", cmd); 2591 ret = -ENOTTY;

··· 2130 } 2131 2132 set_capacity(pd->disk, lba << 2); 2133 + set_capacity_and_notify(pd->bdev->bd_disk, lba << 2); 2134 2135 q = bdev_get_queue(pd->bdev); 2136 if (write) { ··· 2584 case CDROM_LAST_WRITTEN: 2585 case CDROM_SEND_PACKET: 2586 case SCSI_IOCTL_SEND_COMMAND: 2587 + if (!bdev->bd_disk->fops->ioctl) 2588 + ret = -ENOTTY; 2589 + else 2590 + ret = bdev->bd_disk->fops->ioctl(bdev, mode, cmd, arg); 2591 break; 2592 default: 2593 pkt_dbg(2, pd, "Unknown ioctl (%x)\n", cmd); 2594 ret = -ENOTTY;

+5 -38

drivers/block/rbd.c

··· 692 put_device(&rbd_dev->dev); 693 } 694 695 - static int rbd_ioctl_set_ro(struct rbd_device *rbd_dev, unsigned long arg) 696 { 697 - int ro; 698 - 699 - if (get_user(ro, (int __user *)arg)) 700 - return -EFAULT; 701 702 /* 703 * Both images mapped read-only and snapshots can't be marked ··· 707 rbd_assert(!rbd_is_snap(rbd_dev)); 708 } 709 710 - /* Let blkdev_roset() handle it */ 711 - return -ENOTTY; 712 } 713 - 714 - static int rbd_ioctl(struct block_device *bdev, fmode_t mode, 715 - unsigned int cmd, unsigned long arg) 716 - { 717 - struct rbd_device *rbd_dev = bdev->bd_disk->private_data; 718 - int ret; 719 - 720 - switch (cmd) { 721 - case BLKROSET: 722 - ret = rbd_ioctl_set_ro(rbd_dev, arg); 723 - break; 724 - default: 725 - ret = -ENOTTY; 726 - } 727 - 728 - return ret; 729 - } 730 - 731 - #ifdef CONFIG_COMPAT 732 - static int rbd_compat_ioctl(struct block_device *bdev, fmode_t mode, 733 - unsigned int cmd, unsigned long arg) 734 - { 735 - return rbd_ioctl(bdev, mode, cmd, arg); 736 - } 737 - #endif /* CONFIG_COMPAT */ 738 739 static const struct block_device_operations rbd_bd_ops = { 740 .owner = THIS_MODULE, 741 .open = rbd_open, 742 .release = rbd_release, 743 - .ioctl = rbd_ioctl, 744 - #ifdef CONFIG_COMPAT 745 - .compat_ioctl = rbd_compat_ioctl, 746 - #endif 747 }; 748 749 /* ··· 4888 !test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags)) { 4889 size = (sector_t)rbd_dev->mapping.size / SECTOR_SIZE; 4890 dout("setting size to %llu sectors", (unsigned long long)size); 4891 - set_capacity(rbd_dev->disk, size); 4892 - revalidate_disk_size(rbd_dev->disk, true); 4893 } 4894 } 4895

··· 692 put_device(&rbd_dev->dev); 693 } 694 695 + static int rbd_set_read_only(struct block_device *bdev, bool ro) 696 { 697 + struct rbd_device *rbd_dev = bdev->bd_disk->private_data; 698 699 /* 700 * Both images mapped read-only and snapshots can't be marked ··· 710 rbd_assert(!rbd_is_snap(rbd_dev)); 711 } 712 713 + return 0; 714 } 715 716 static const struct block_device_operations rbd_bd_ops = { 717 .owner = THIS_MODULE, 718 .open = rbd_open, 719 .release = rbd_release, 720 + .set_read_only = rbd_set_read_only, 721 }; 722 723 /* ··· 4920 !test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags)) { 4921 size = (sector_t)rbd_dev->mapping.size / SECTOR_SIZE; 4922 dout("setting size to %llu sectors", (unsigned long long)size); 4923 + set_capacity_and_notify(rbd_dev->disk, size); 4924 } 4925 } 4926

+1 -2

drivers/block/rnbd/rnbd-clt.c

··· 100 rnbd_clt_info(dev, "Device size changed from %zu to %zu sectors\n", 101 dev->nsectors, new_nsectors); 102 dev->nsectors = new_nsectors; 103 - set_capacity(dev->gd, dev->nsectors); 104 - revalidate_disk_size(dev->gd, true); 105 return 0; 106 } 107

··· 100 rnbd_clt_info(dev, "Device size changed from %zu to %zu sectors\n", 101 dev->nsectors, new_nsectors); 102 dev->nsectors = new_nsectors; 103 + set_capacity_and_notify(dev->gd, dev->nsectors); 104 return 0; 105 } 106

-17

drivers/block/swim.c

··· 745 .check_events = floppy_check_events, 746 }; 747 748 - static struct kobject *floppy_find(dev_t dev, int *part, void *data) 749 - { 750 - struct swim_priv *swd = data; 751 - int drive = (*part & 3); 752 - 753 - if (drive >= swd->floppy_count) 754 - return NULL; 755 - 756 - *part = 0; 757 - return get_disk_and_module(swd->unit[drive].disk); 758 - } 759 - 760 static int swim_add_floppy(struct swim_priv *swd, enum drive_location location) 761 { 762 struct floppy_state *fs = &swd->unit[swd->floppy_count]; ··· 834 add_disk(swd->unit[drive].disk); 835 } 836 837 - blk_register_region(MKDEV(FLOPPY_MAJOR, 0), 256, THIS_MODULE, 838 - floppy_find, NULL, swd); 839 - 840 return 0; 841 842 exit_put_disks: ··· 916 struct swim_priv *swd = platform_get_drvdata(dev); 917 int drive; 918 struct resource *res; 919 - 920 - blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256); 921 922 for (drive = 0; drive < swd->floppy_count; drive++) { 923 del_gendisk(swd->unit[drive].disk);

··· 745 .check_events = floppy_check_events, 746 }; 747 748 static int swim_add_floppy(struct swim_priv *swd, enum drive_location location) 749 { 750 struct floppy_state *fs = &swd->unit[swd->floppy_count]; ··· 846 add_disk(swd->unit[drive].disk); 847 } 848 849 return 0; 850 851 exit_put_disks: ··· 931 struct swim_priv *swd = platform_get_drvdata(dev); 932 int drive; 933 struct resource *res; 934 935 for (drive = 0; drive < swd->floppy_count; drive++) { 936 del_gendisk(swd->unit[drive].disk);

+1 -2

drivers/block/virtio_blk.c

··· 470 cap_str_10, 471 cap_str_2); 472 473 - set_capacity_revalidate_and_notify(vblk->disk, capacity, true); 474 } 475 476 static void virtblk_config_changed_work(struct work_struct *work) ··· 598 struct virtio_blk *vblk = vdev->priv; 599 600 blk_queue_write_cache(vblk->disk->queue, writeback, false); 601 - revalidate_disk_size(vblk->disk, true); 602 } 603 604 static const char *const virtblk_cache_types[] = {

··· 470 cap_str_10, 471 cap_str_2); 472 473 + set_capacity_and_notify(vblk->disk, capacity); 474 } 475 476 static void virtblk_config_changed_work(struct work_struct *work) ··· 598 struct virtio_blk *vblk = vdev->priv; 599 600 blk_queue_write_cache(vblk->disk->queue, writeback, false); 601 } 602 603 static const char *const virtblk_cache_types[] = {

+1 -3

drivers/block/xen-blkback/common.h

··· 356 }; 357 358 359 - #define vbd_sz(_v) ((_v)->bdev->bd_part ? \ 360 - (_v)->bdev->bd_part->nr_sects : \ 361 - get_capacity((_v)->bdev->bd_disk)) 362 363 #define xen_blkif_get(_b) (atomic_inc(&(_b)->refcnt)) 364 #define xen_blkif_put(_b) \

··· 356 }; 357 358 359 + #define vbd_sz(_v) bdev_nr_sectors((_v)->bdev) 360 361 #define xen_blkif_get(_b) (atomic_inc(&(_b)->refcnt)) 362 #define xen_blkif_put(_b) \

+6 -16

drivers/block/xen-blkfront.c

··· 2153 } 2154 2155 if (info->gd) 2156 - bdev = bdget_disk(info->gd, 0); 2157 2158 mutex_unlock(&info->mutex); 2159 ··· 2370 return; 2371 printk(KERN_INFO "Setting capacity to %Lu\n", 2372 sectors); 2373 - set_capacity_revalidate_and_notify(info->gd, sectors, true); 2374 2375 return; 2376 case BLKIF_STATE_SUSPENDED: ··· 2518 2519 disk = info->gd; 2520 if (disk) 2521 - bdev = bdget_disk(disk, 0); 2522 2523 info->xbdev = NULL; 2524 mutex_unlock(&info->mutex); ··· 2595 static void blkif_release(struct gendisk *disk, fmode_t mode) 2596 { 2597 struct blkfront_info *info = disk->private_data; 2598 - struct block_device *bdev; 2599 struct xenbus_device *xbdev; 2600 2601 mutex_lock(&blkfront_mutex); 2602 - 2603 - bdev = bdget_disk(disk, 0); 2604 - 2605 - if (!bdev) { 2606 - WARN(1, "Block device %s yanked out from us!\n", disk->disk_name); 2607 goto out_mutex; 2608 - } 2609 - if (bdev->bd_openers) 2610 - goto out; 2611 2612 /* 2613 * Check if we have been instructed to close. We will have ··· 2611 2612 if (xbdev && xbdev->state == XenbusStateClosing) { 2613 /* pending switch to state closed */ 2614 - dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n"); 2615 xlvbd_release_gendisk(info); 2616 xenbus_frontend_closed(info->xbdev); 2617 } ··· 2620 2621 if (!xbdev) { 2622 /* sudden device removal */ 2623 - dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n"); 2624 xlvbd_release_gendisk(info); 2625 disk->private_data = NULL; 2626 free_info(info); 2627 } 2628 2629 - out: 2630 - bdput(bdev); 2631 out_mutex: 2632 mutex_unlock(&blkfront_mutex); 2633 }

··· 2153 } 2154 2155 if (info->gd) 2156 + bdev = bdgrab(info->gd->part0); 2157 2158 mutex_unlock(&info->mutex); 2159 ··· 2370 return; 2371 printk(KERN_INFO "Setting capacity to %Lu\n", 2372 sectors); 2373 + set_capacity_and_notify(info->gd, sectors); 2374 2375 return; 2376 case BLKIF_STATE_SUSPENDED: ··· 2518 2519 disk = info->gd; 2520 if (disk) 2521 + bdev = bdgrab(disk->part0); 2522 2523 info->xbdev = NULL; 2524 mutex_unlock(&info->mutex); ··· 2595 static void blkif_release(struct gendisk *disk, fmode_t mode) 2596 { 2597 struct blkfront_info *info = disk->private_data; 2598 struct xenbus_device *xbdev; 2599 2600 mutex_lock(&blkfront_mutex); 2601 + if (disk->part0->bd_openers) 2602 goto out_mutex; 2603 2604 /* 2605 * Check if we have been instructed to close. We will have ··· 2619 2620 if (xbdev && xbdev->state == XenbusStateClosing) { 2621 /* pending switch to state closed */ 2622 + dev_info(disk_to_dev(disk), "releasing disk\n"); 2623 xlvbd_release_gendisk(info); 2624 xenbus_frontend_closed(info->xbdev); 2625 } ··· 2628 2629 if (!xbdev) { 2630 /* sudden device removal */ 2631 + dev_info(disk_to_dev(disk), "releasing disk\n"); 2632 xlvbd_release_gendisk(info); 2633 disk->private_data = NULL; 2634 free_info(info); 2635 } 2636 2637 out_mutex: 2638 mutex_unlock(&blkfront_mutex); 2639 }

+260 -265

drivers/block/z2ram.c

··· 42 43 #include <linux/zorro.h> 44 45 - 46 #define Z2MINOR_COMBINED (0) 47 #define Z2MINOR_Z2ONLY (1) 48 #define Z2MINOR_CHIPONLY (2) ··· 49 #define Z2MINOR_MEMLIST2 (5) 50 #define Z2MINOR_MEMLIST3 (6) 51 #define Z2MINOR_MEMLIST4 (7) 52 - #define Z2MINOR_COUNT (8) /* Move this down when adding a new minor */ 53 54 #define Z2RAM_CHUNK1024 ( Z2RAM_CHUNKSIZE >> 10 ) 55 56 static DEFINE_MUTEX(z2ram_mutex); 57 - static u_long *z2ram_map = NULL; 58 - static u_long z2ram_size = 0; 59 - static int z2_count = 0; 60 - static int chip_count = 0; 61 - static int list_count = 0; 62 - static int current_device = -1; 63 64 static DEFINE_SPINLOCK(z2ram_lock); 65 66 - static struct gendisk *z2ram_gendisk; 67 68 static blk_status_t z2_queue_rq(struct blk_mq_hw_ctx *hctx, 69 const struct blk_mq_queue_data *bd) 70 { 71 struct request *req = bd->rq; 72 unsigned long start = blk_rq_pos(req) << 9; 73 - unsigned long len = blk_rq_cur_bytes(req); 74 75 blk_mq_start_request(req); 76 ··· 91 92 if (len < size) 93 size = len; 94 - addr += z2ram_map[ start >> Z2RAM_CHUNKSHIFT ]; 95 if (rq_data_dir(req) == READ) 96 memcpy(buffer, (char *)addr, size); 97 else ··· 105 return BLK_STS_OK; 106 } 107 108 - static void 109 - get_z2ram( void ) 110 { 111 - int i; 112 113 - for ( i = 0; i < Z2RAM_SIZE / Z2RAM_CHUNKSIZE; i++ ) 114 - { 115 - if ( test_bit( i, zorro_unused_z2ram ) ) 116 - { 117 - z2_count++; 118 - z2ram_map[z2ram_size++] = (unsigned long)ZTWO_VADDR(Z2RAM_START) + 119 - (i << Z2RAM_CHUNKSHIFT); 120 - clear_bit( i, zorro_unused_z2ram ); 121 } 122 - } 123 124 - return; 125 } 126 127 - static void 128 - get_chipram( void ) 129 { 130 131 - while ( amiga_chip_avail() > ( Z2RAM_CHUNKSIZE * 4 ) ) 132 - { 133 - chip_count++; 134 - z2ram_map[ z2ram_size ] = 135 - (u_long)amiga_chip_alloc( Z2RAM_CHUNKSIZE, "z2ram" ); 136 137 - if ( z2ram_map[ z2ram_size ] == 0 ) 138 - { 139 - break; 140 } 141 142 - z2ram_size++; 143 - } 144 - 145 - return; 146 } 147 148 static int z2_open(struct block_device *bdev, fmode_t mode) 149 { 150 - int device; 151 - int max_z2_map = ( Z2RAM_SIZE / Z2RAM_CHUNKSIZE ) * 152 - sizeof( z2ram_map[0] ); 153 - int max_chip_map = ( amiga_chip_size / Z2RAM_CHUNKSIZE ) * 154 - sizeof( z2ram_map[0] ); 155 - int rc = -ENOMEM; 156 157 - device = MINOR(bdev->bd_dev); 158 159 - mutex_lock(&z2ram_mutex); 160 - if ( current_device != -1 && current_device != device ) 161 - { 162 - rc = -EBUSY; 163 - goto err_out; 164 - } 165 166 - if ( current_device == -1 ) 167 - { 168 - z2_count = 0; 169 - chip_count = 0; 170 - list_count = 0; 171 - z2ram_size = 0; 172 173 - /* Use a specific list entry. */ 174 - if (device >= Z2MINOR_MEMLIST1 && device <= Z2MINOR_MEMLIST4) { 175 - int index = device - Z2MINOR_MEMLIST1 + 1; 176 - unsigned long size, paddr, vaddr; 177 178 - if (index >= m68k_realnum_memory) { 179 - printk( KERN_ERR DEVICE_NAME 180 - ": no such entry in z2ram_map\n" ); 181 - goto err_out; 182 - } 183 184 - paddr = m68k_memory[index].addr; 185 - size = m68k_memory[index].size & ~(Z2RAM_CHUNKSIZE-1); 186 187 #ifdef __powerpc__ 188 - /* FIXME: ioremap doesn't build correct memory tables. */ 189 - { 190 - vfree(vmalloc (size)); 191 - } 192 193 - vaddr = (unsigned long)ioremap_wt(paddr, size); 194 195 #else 196 - vaddr = (unsigned long)z_remap_nocache_nonser(paddr, size); 197 #endif 198 - z2ram_map = 199 - kmalloc_array(size / Z2RAM_CHUNKSIZE, 200 - sizeof(z2ram_map[0]), 201 - GFP_KERNEL); 202 - if ( z2ram_map == NULL ) 203 - { 204 - printk( KERN_ERR DEVICE_NAME 205 - ": cannot get mem for z2ram_map\n" ); 206 - goto err_out; 207 } 208 209 - while (size) { 210 - z2ram_map[ z2ram_size++ ] = vaddr; 211 - size -= Z2RAM_CHUNKSIZE; 212 - vaddr += Z2RAM_CHUNKSIZE; 213 - list_count++; 214 - } 215 - 216 - if ( z2ram_size != 0 ) 217 - printk( KERN_INFO DEVICE_NAME 218 - ": using %iK List Entry %d Memory\n", 219 - list_count * Z2RAM_CHUNK1024, index ); 220 - } else 221 - 222 - switch ( device ) 223 - { 224 - case Z2MINOR_COMBINED: 225 - 226 - z2ram_map = kmalloc( max_z2_map + max_chip_map, GFP_KERNEL ); 227 - if ( z2ram_map == NULL ) 228 - { 229 - printk( KERN_ERR DEVICE_NAME 230 - ": cannot get mem for z2ram_map\n" ); 231 - goto err_out; 232 - } 233 - 234 - get_z2ram(); 235 - get_chipram(); 236 - 237 - if ( z2ram_size != 0 ) 238 - printk( KERN_INFO DEVICE_NAME 239 - ": using %iK Zorro II RAM and %iK Chip RAM (Total %dK)\n", 240 - z2_count * Z2RAM_CHUNK1024, 241 - chip_count * Z2RAM_CHUNK1024, 242 - ( z2_count + chip_count ) * Z2RAM_CHUNK1024 ); 243 - 244 - break; 245 - 246 - case Z2MINOR_Z2ONLY: 247 - z2ram_map = kmalloc( max_z2_map, GFP_KERNEL ); 248 - if ( z2ram_map == NULL ) 249 - { 250 - printk( KERN_ERR DEVICE_NAME 251 - ": cannot get mem for z2ram_map\n" ); 252 - goto err_out; 253 - } 254 - 255 - get_z2ram(); 256 - 257 - if ( z2ram_size != 0 ) 258 - printk( KERN_INFO DEVICE_NAME 259 - ": using %iK of Zorro II RAM\n", 260 - z2_count * Z2RAM_CHUNK1024 ); 261 - 262 - break; 263 - 264 - case Z2MINOR_CHIPONLY: 265 - z2ram_map = kmalloc( max_chip_map, GFP_KERNEL ); 266 - if ( z2ram_map == NULL ) 267 - { 268 - printk( KERN_ERR DEVICE_NAME 269 - ": cannot get mem for z2ram_map\n" ); 270 - goto err_out; 271 - } 272 - 273 - get_chipram(); 274 - 275 - if ( z2ram_size != 0 ) 276 - printk( KERN_INFO DEVICE_NAME 277 - ": using %iK Chip RAM\n", 278 - chip_count * Z2RAM_CHUNK1024 ); 279 - 280 - break; 281 - 282 - default: 283 - rc = -ENODEV; 284 - goto err_out; 285 - 286 - break; 287 } 288 289 - if ( z2ram_size == 0 ) 290 - { 291 - printk( KERN_NOTICE DEVICE_NAME 292 - ": no unused ZII/Chip RAM found\n" ); 293 - goto err_out_kfree; 294 - } 295 - 296 - current_device = device; 297 - z2ram_size <<= Z2RAM_CHUNKSHIFT; 298 - set_capacity(z2ram_gendisk, z2ram_size >> 9); 299 - } 300 - 301 - mutex_unlock(&z2ram_mutex); 302 - return 0; 303 304 err_out_kfree: 305 - kfree(z2ram_map); 306 err_out: 307 - mutex_unlock(&z2ram_mutex); 308 - return rc; 309 } 310 311 - static void 312 - z2_release(struct gendisk *disk, fmode_t mode) 313 { 314 - mutex_lock(&z2ram_mutex); 315 - if ( current_device == -1 ) { 316 - mutex_unlock(&z2ram_mutex); 317 - return; 318 - } 319 - mutex_unlock(&z2ram_mutex); 320 - /* 321 - * FIXME: unmap memory 322 - */ 323 } 324 325 - static const struct block_device_operations z2_fops = 326 - { 327 - .owner = THIS_MODULE, 328 - .open = z2_open, 329 - .release = z2_release, 330 }; 331 332 - static struct kobject *z2_find(dev_t dev, int *part, void *data) 333 - { 334 - *part = 0; 335 - return get_disk_and_module(z2ram_gendisk); 336 - } 337 - 338 - static struct request_queue *z2_queue; 339 static struct blk_mq_tag_set tag_set; 340 341 static const struct blk_mq_ops z2_mq_ops = { 342 - .queue_rq = z2_queue_rq, 343 }; 344 345 - static int __init 346 - z2_init(void) 347 { 348 - int ret; 349 350 - if (!MACH_IS_AMIGA) 351 - return -ENODEV; 352 353 - ret = -EBUSY; 354 - if (register_blkdev(Z2RAM_MAJOR, DEVICE_NAME)) 355 - goto err; 356 357 - ret = -ENOMEM; 358 - z2ram_gendisk = alloc_disk(1); 359 - if (!z2ram_gendisk) 360 - goto out_disk; 361 362 - z2_queue = blk_mq_init_sq_queue(&tag_set, &z2_mq_ops, 16, 363 - BLK_MQ_F_SHOULD_MERGE); 364 - if (IS_ERR(z2_queue)) { 365 - ret = PTR_ERR(z2_queue); 366 - z2_queue = NULL; 367 - goto out_queue; 368 - } 369 370 - z2ram_gendisk->major = Z2RAM_MAJOR; 371 - z2ram_gendisk->first_minor = 0; 372 - z2ram_gendisk->fops = &z2_fops; 373 - sprintf(z2ram_gendisk->disk_name, "z2ram"); 374 375 - z2ram_gendisk->queue = z2_queue; 376 - add_disk(z2ram_gendisk); 377 - blk_register_region(MKDEV(Z2RAM_MAJOR, 0), Z2MINOR_COUNT, THIS_MODULE, 378 - z2_find, NULL, NULL); 379 380 - return 0; 381 382 - out_queue: 383 - put_disk(z2ram_gendisk); 384 - out_disk: 385 - unregister_blkdev(Z2RAM_MAJOR, DEVICE_NAME); 386 - err: 387 - return ret; 388 } 389 390 static void __exit z2_exit(void) 391 { 392 - int i, j; 393 - blk_unregister_region(MKDEV(Z2RAM_MAJOR, 0), Z2MINOR_COUNT); 394 - unregister_blkdev(Z2RAM_MAJOR, DEVICE_NAME); 395 - del_gendisk(z2ram_gendisk); 396 - put_disk(z2ram_gendisk); 397 - blk_cleanup_queue(z2_queue); 398 - blk_mq_free_tag_set(&tag_set); 399 400 - if ( current_device != -1 ) 401 - { 402 - i = 0; 403 404 - for ( j = 0 ; j < z2_count; j++ ) 405 - { 406 - set_bit( i++, zorro_unused_z2ram ); 407 } 408 409 - for ( j = 0 ; j < chip_count; j++ ) 410 - { 411 - if ( z2ram_map[ i ] ) 412 - { 413 - amiga_chip_free( (void *) z2ram_map[ i++ ] ); 414 - } 415 - } 416 - 417 - if ( z2ram_map != NULL ) 418 - { 419 - kfree( z2ram_map ); 420 - } 421 - } 422 - 423 - return; 424 - } 425 426 module_init(z2_init); 427 module_exit(z2_exit);

··· 42 43 #include <linux/zorro.h> 44 45 #define Z2MINOR_COMBINED (0) 46 #define Z2MINOR_Z2ONLY (1) 47 #define Z2MINOR_CHIPONLY (2) ··· 50 #define Z2MINOR_MEMLIST2 (5) 51 #define Z2MINOR_MEMLIST3 (6) 52 #define Z2MINOR_MEMLIST4 (7) 53 + #define Z2MINOR_COUNT (8) /* Move this down when adding a new minor */ 54 55 #define Z2RAM_CHUNK1024 ( Z2RAM_CHUNKSIZE >> 10 ) 56 57 static DEFINE_MUTEX(z2ram_mutex); 58 + static u_long *z2ram_map = NULL; 59 + static u_long z2ram_size = 0; 60 + static int z2_count = 0; 61 + static int chip_count = 0; 62 + static int list_count = 0; 63 + static int current_device = -1; 64 65 static DEFINE_SPINLOCK(z2ram_lock); 66 67 + static struct gendisk *z2ram_gendisk[Z2MINOR_COUNT]; 68 69 static blk_status_t z2_queue_rq(struct blk_mq_hw_ctx *hctx, 70 const struct blk_mq_queue_data *bd) 71 { 72 struct request *req = bd->rq; 73 unsigned long start = blk_rq_pos(req) << 9; 74 + unsigned long len = blk_rq_cur_bytes(req); 75 76 blk_mq_start_request(req); 77 ··· 92 93 if (len < size) 94 size = len; 95 + addr += z2ram_map[start >> Z2RAM_CHUNKSHIFT]; 96 if (rq_data_dir(req) == READ) 97 memcpy(buffer, (char *)addr, size); 98 else ··· 106 return BLK_STS_OK; 107 } 108 109 + static void get_z2ram(void) 110 { 111 + int i; 112 113 + for (i = 0; i < Z2RAM_SIZE / Z2RAM_CHUNKSIZE; i++) { 114 + if (test_bit(i, zorro_unused_z2ram)) { 115 + z2_count++; 116 + z2ram_map[z2ram_size++] = 117 + (unsigned long)ZTWO_VADDR(Z2RAM_START) + 118 + (i << Z2RAM_CHUNKSHIFT); 119 + clear_bit(i, zorro_unused_z2ram); 120 + } 121 } 122 123 + return; 124 } 125 126 + static void get_chipram(void) 127 { 128 129 + while (amiga_chip_avail() > (Z2RAM_CHUNKSIZE * 4)) { 130 + chip_count++; 131 + z2ram_map[z2ram_size] = 132 + (u_long) amiga_chip_alloc(Z2RAM_CHUNKSIZE, "z2ram"); 133 134 + if (z2ram_map[z2ram_size] == 0) { 135 + break; 136 + } 137 + 138 + z2ram_size++; 139 } 140 141 + return; 142 } 143 144 static int z2_open(struct block_device *bdev, fmode_t mode) 145 { 146 + int device; 147 + int max_z2_map = (Z2RAM_SIZE / Z2RAM_CHUNKSIZE) * sizeof(z2ram_map[0]); 148 + int max_chip_map = (amiga_chip_size / Z2RAM_CHUNKSIZE) * 149 + sizeof(z2ram_map[0]); 150 + int rc = -ENOMEM; 151 152 + device = MINOR(bdev->bd_dev); 153 154 + mutex_lock(&z2ram_mutex); 155 + if (current_device != -1 && current_device != device) { 156 + rc = -EBUSY; 157 + goto err_out; 158 + } 159 160 + if (current_device == -1) { 161 + z2_count = 0; 162 + chip_count = 0; 163 + list_count = 0; 164 + z2ram_size = 0; 165 166 + /* Use a specific list entry. */ 167 + if (device >= Z2MINOR_MEMLIST1 && device <= Z2MINOR_MEMLIST4) { 168 + int index = device - Z2MINOR_MEMLIST1 + 1; 169 + unsigned long size, paddr, vaddr; 170 171 + if (index >= m68k_realnum_memory) { 172 + printk(KERN_ERR DEVICE_NAME 173 + ": no such entry in z2ram_map\n"); 174 + goto err_out; 175 + } 176 177 + paddr = m68k_memory[index].addr; 178 + size = m68k_memory[index].size & ~(Z2RAM_CHUNKSIZE - 1); 179 180 #ifdef __powerpc__ 181 + /* FIXME: ioremap doesn't build correct memory tables. */ 182 + { 183 + vfree(vmalloc(size)); 184 + } 185 186 + vaddr = (unsigned long)ioremap_wt(paddr, size); 187 188 #else 189 + vaddr = 190 + (unsigned long)z_remap_nocache_nonser(paddr, size); 191 #endif 192 + z2ram_map = 193 + kmalloc_array(size / Z2RAM_CHUNKSIZE, 194 + sizeof(z2ram_map[0]), GFP_KERNEL); 195 + if (z2ram_map == NULL) { 196 + printk(KERN_ERR DEVICE_NAME 197 + ": cannot get mem for z2ram_map\n"); 198 + goto err_out; 199 + } 200 + 201 + while (size) { 202 + z2ram_map[z2ram_size++] = vaddr; 203 + size -= Z2RAM_CHUNKSIZE; 204 + vaddr += Z2RAM_CHUNKSIZE; 205 + list_count++; 206 + } 207 + 208 + if (z2ram_size != 0) 209 + printk(KERN_INFO DEVICE_NAME 210 + ": using %iK List Entry %d Memory\n", 211 + list_count * Z2RAM_CHUNK1024, index); 212 + } else 213 + switch (device) { 214 + case Z2MINOR_COMBINED: 215 + 216 + z2ram_map = 217 + kmalloc(max_z2_map + max_chip_map, 218 + GFP_KERNEL); 219 + if (z2ram_map == NULL) { 220 + printk(KERN_ERR DEVICE_NAME 221 + ": cannot get mem for z2ram_map\n"); 222 + goto err_out; 223 + } 224 + 225 + get_z2ram(); 226 + get_chipram(); 227 + 228 + if (z2ram_size != 0) 229 + printk(KERN_INFO DEVICE_NAME 230 + ": using %iK Zorro II RAM and %iK Chip RAM (Total %dK)\n", 231 + z2_count * Z2RAM_CHUNK1024, 232 + chip_count * Z2RAM_CHUNK1024, 233 + (z2_count + 234 + chip_count) * Z2RAM_CHUNK1024); 235 + 236 + break; 237 + 238 + case Z2MINOR_Z2ONLY: 239 + z2ram_map = kmalloc(max_z2_map, GFP_KERNEL); 240 + if (z2ram_map == NULL) { 241 + printk(KERN_ERR DEVICE_NAME 242 + ": cannot get mem for z2ram_map\n"); 243 + goto err_out; 244 + } 245 + 246 + get_z2ram(); 247 + 248 + if (z2ram_size != 0) 249 + printk(KERN_INFO DEVICE_NAME 250 + ": using %iK of Zorro II RAM\n", 251 + z2_count * Z2RAM_CHUNK1024); 252 + 253 + break; 254 + 255 + case Z2MINOR_CHIPONLY: 256 + z2ram_map = kmalloc(max_chip_map, GFP_KERNEL); 257 + if (z2ram_map == NULL) { 258 + printk(KERN_ERR DEVICE_NAME 259 + ": cannot get mem for z2ram_map\n"); 260 + goto err_out; 261 + } 262 + 263 + get_chipram(); 264 + 265 + if (z2ram_size != 0) 266 + printk(KERN_INFO DEVICE_NAME 267 + ": using %iK Chip RAM\n", 268 + chip_count * Z2RAM_CHUNK1024); 269 + 270 + break; 271 + 272 + default: 273 + rc = -ENODEV; 274 + goto err_out; 275 + 276 + break; 277 + } 278 + 279 + if (z2ram_size == 0) { 280 + printk(KERN_NOTICE DEVICE_NAME 281 + ": no unused ZII/Chip RAM found\n"); 282 + goto err_out_kfree; 283 } 284 285 + current_device = device; 286 + z2ram_size <<= Z2RAM_CHUNKSHIFT; 287 + set_capacity(z2ram_gendisk[device], z2ram_size >> 9); 288 } 289 290 + mutex_unlock(&z2ram_mutex); 291 + return 0; 292 293 err_out_kfree: 294 + kfree(z2ram_map); 295 err_out: 296 + mutex_unlock(&z2ram_mutex); 297 + return rc; 298 } 299 300 + static void z2_release(struct gendisk *disk, fmode_t mode) 301 { 302 + mutex_lock(&z2ram_mutex); 303 + if (current_device == -1) { 304 + mutex_unlock(&z2ram_mutex); 305 + return; 306 + } 307 + mutex_unlock(&z2ram_mutex); 308 + /* 309 + * FIXME: unmap memory 310 + */ 311 } 312 313 + static const struct block_device_operations z2_fops = { 314 + .owner = THIS_MODULE, 315 + .open = z2_open, 316 + .release = z2_release, 317 }; 318 319 static struct blk_mq_tag_set tag_set; 320 321 static const struct blk_mq_ops z2_mq_ops = { 322 + .queue_rq = z2_queue_rq, 323 }; 324 325 + static int z2ram_register_disk(int minor) 326 { 327 + struct request_queue *q; 328 + struct gendisk *disk; 329 330 + disk = alloc_disk(1); 331 + if (!disk) 332 + return -ENOMEM; 333 334 + q = blk_mq_init_queue(&tag_set); 335 + if (IS_ERR(q)) { 336 + put_disk(disk); 337 + return PTR_ERR(q); 338 + } 339 340 + disk->major = Z2RAM_MAJOR; 341 + disk->first_minor = minor; 342 + disk->fops = &z2_fops; 343 + if (minor) 344 + sprintf(disk->disk_name, "z2ram%d", minor); 345 + else 346 + sprintf(disk->disk_name, "z2ram"); 347 + disk->queue = q; 348 349 + z2ram_gendisk[minor] = disk; 350 + add_disk(disk); 351 + return 0; 352 + } 353 354 + static int __init z2_init(void) 355 + { 356 + int ret, i; 357 358 + if (!MACH_IS_AMIGA) 359 + return -ENODEV; 360 361 + if (register_blkdev(Z2RAM_MAJOR, DEVICE_NAME)) 362 + return -EBUSY; 363 364 + tag_set.ops = &z2_mq_ops; 365 + tag_set.nr_hw_queues = 1; 366 + tag_set.nr_maps = 1; 367 + tag_set.queue_depth = 16; 368 + tag_set.numa_node = NUMA_NO_NODE; 369 + tag_set.flags = BLK_MQ_F_SHOULD_MERGE; 370 + ret = blk_mq_alloc_tag_set(&tag_set); 371 + if (ret) 372 + goto out_unregister_blkdev; 373 + 374 + for (i = 0; i < Z2MINOR_COUNT; i++) { 375 + ret = z2ram_register_disk(i); 376 + if (ret && i == 0) 377 + goto out_free_tagset; 378 + } 379 + 380 + return 0; 381 + 382 + out_free_tagset: 383 + blk_mq_free_tag_set(&tag_set); 384 + out_unregister_blkdev: 385 + unregister_blkdev(Z2RAM_MAJOR, DEVICE_NAME); 386 + return ret; 387 } 388 389 static void __exit z2_exit(void) 390 { 391 + int i, j; 392 393 + unregister_blkdev(Z2RAM_MAJOR, DEVICE_NAME); 394 395 + for (i = 0; i < Z2MINOR_COUNT; i++) { 396 + del_gendisk(z2ram_gendisk[i]); 397 + blk_cleanup_queue(z2ram_gendisk[i]->queue); 398 + put_disk(z2ram_gendisk[i]); 399 + } 400 + blk_mq_free_tag_set(&tag_set); 401 + 402 + if (current_device != -1) { 403 + i = 0; 404 + 405 + for (j = 0; j < z2_count; j++) { 406 + set_bit(i++, zorro_unused_z2ram); 407 + } 408 + 409 + for (j = 0; j < chip_count; j++) { 410 + if (z2ram_map[i]) { 411 + amiga_chip_free((void *)z2ram_map[i++]); 412 + } 413 + } 414 + 415 + if (z2ram_map != NULL) { 416 + kfree(z2ram_map); 417 + } 418 } 419 420 + return; 421 + } 422 423 module_init(z2_init); 424 module_exit(z2_exit);

+6 -28

drivers/block/zram/zram_drv.c

··· 403 return; 404 405 bdev = zram->bdev; 406 - if (zram->old_block_size) 407 - set_blocksize(bdev, zram->old_block_size); 408 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); 409 /* hope filp_close flush all of IO */ 410 filp_close(zram->backing_dev, NULL); 411 zram->backing_dev = NULL; 412 - zram->old_block_size = 0; 413 zram->bdev = NULL; 414 zram->disk->fops = &zram_devops; 415 kvfree(zram->bitmap); ··· 451 struct file *backing_dev = NULL; 452 struct inode *inode; 453 struct address_space *mapping; 454 - unsigned int bitmap_sz, old_block_size = 0; 455 unsigned long nr_pages, *bitmap = NULL; 456 struct block_device *bdev = NULL; 457 int err; ··· 506 goto out; 507 } 508 509 - old_block_size = block_size(bdev); 510 - err = set_blocksize(bdev, PAGE_SIZE); 511 - if (err) 512 - goto out; 513 - 514 reset_bdev(zram); 515 516 - zram->old_block_size = old_block_size; 517 zram->bdev = bdev; 518 zram->backing_dev = backing_dev; 519 zram->bitmap = bitmap; ··· 1701 disksize = zram->disksize; 1702 zram->disksize = 0; 1703 1704 - set_capacity(zram->disk, 0); 1705 - part_stat_set_all(&zram->disk->part0, 0); 1706 1707 up_write(&zram->init_lock); 1708 /* I/O operation under all of CPU are done so let's free */ ··· 1747 1748 zram->comp = comp; 1749 zram->disksize = disksize; 1750 - set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); 1751 - 1752 - revalidate_disk_size(zram->disk, true); 1753 up_write(&zram->init_lock); 1754 1755 return len; ··· 1775 return -EINVAL; 1776 1777 zram = dev_to_zram(dev); 1778 - bdev = bdget_disk(zram->disk, 0); 1779 - if (!bdev) 1780 - return -ENOMEM; 1781 1782 mutex_lock(&bdev->bd_mutex); 1783 /* Do not reset an active device or claimed device */ 1784 if (bdev->bd_openers || zram->claim) { 1785 mutex_unlock(&bdev->bd_mutex); 1786 - bdput(bdev); 1787 return -EBUSY; 1788 } 1789 ··· 1791 /* Make sure all the pending I/O are finished */ 1792 fsync_bdev(bdev); 1793 zram_reset_device(zram); 1794 - revalidate_disk_size(zram->disk, true); 1795 - bdput(bdev); 1796 1797 mutex_lock(&bdev->bd_mutex); 1798 zram->claim = false; ··· 1976 1977 static int zram_remove(struct zram *zram) 1978 { 1979 - struct block_device *bdev; 1980 - 1981 - bdev = bdget_disk(zram->disk, 0); 1982 - if (!bdev) 1983 - return -ENOMEM; 1984 1985 mutex_lock(&bdev->bd_mutex); 1986 if (bdev->bd_openers || zram->claim) { 1987 mutex_unlock(&bdev->bd_mutex); 1988 - bdput(bdev); 1989 return -EBUSY; 1990 } 1991 ··· 1992 /* Make sure all the pending I/O are finished */ 1993 fsync_bdev(bdev); 1994 zram_reset_device(zram); 1995 - bdput(bdev); 1996 1997 pr_info("Removed device: %s\n", zram->disk->disk_name); 1998

··· 403 return; 404 405 bdev = zram->bdev; 406 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); 407 /* hope filp_close flush all of IO */ 408 filp_close(zram->backing_dev, NULL); 409 zram->backing_dev = NULL; 410 zram->bdev = NULL; 411 zram->disk->fops = &zram_devops; 412 kvfree(zram->bitmap); ··· 454 struct file *backing_dev = NULL; 455 struct inode *inode; 456 struct address_space *mapping; 457 + unsigned int bitmap_sz; 458 unsigned long nr_pages, *bitmap = NULL; 459 struct block_device *bdev = NULL; 460 int err; ··· 509 goto out; 510 } 511 512 reset_bdev(zram); 513 514 zram->bdev = bdev; 515 zram->backing_dev = backing_dev; 516 zram->bitmap = bitmap; ··· 1710 disksize = zram->disksize; 1711 zram->disksize = 0; 1712 1713 + set_capacity_and_notify(zram->disk, 0); 1714 + part_stat_set_all(zram->disk->part0, 0); 1715 1716 up_write(&zram->init_lock); 1717 /* I/O operation under all of CPU are done so let's free */ ··· 1756 1757 zram->comp = comp; 1758 zram->disksize = disksize; 1759 + set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT); 1760 up_write(&zram->init_lock); 1761 1762 return len; ··· 1786 return -EINVAL; 1787 1788 zram = dev_to_zram(dev); 1789 + bdev = zram->disk->part0; 1790 1791 mutex_lock(&bdev->bd_mutex); 1792 /* Do not reset an active device or claimed device */ 1793 if (bdev->bd_openers || zram->claim) { 1794 mutex_unlock(&bdev->bd_mutex); 1795 return -EBUSY; 1796 } 1797 ··· 1805 /* Make sure all the pending I/O are finished */ 1806 fsync_bdev(bdev); 1807 zram_reset_device(zram); 1808 1809 mutex_lock(&bdev->bd_mutex); 1810 zram->claim = false; ··· 1992 1993 static int zram_remove(struct zram *zram) 1994 { 1995 + struct block_device *bdev = zram->disk->part0; 1996 1997 mutex_lock(&bdev->bd_mutex); 1998 if (bdev->bd_openers || zram->claim) { 1999 mutex_unlock(&bdev->bd_mutex); 2000 return -EBUSY; 2001 } 2002 ··· 2013 /* Make sure all the pending I/O are finished */ 2014 fsync_bdev(bdev); 2015 zram_reset_device(zram); 2016 2017 pr_info("Removed device: %s\n", zram->disk->disk_name); 2018

-1

drivers/block/zram/zram_drv.h

··· 119 bool wb_limit_enable; 120 u64 bd_wb_limit; 121 struct block_device *bdev; 122 - unsigned int old_block_size; 123 unsigned long *bitmap; 124 unsigned long nr_pages; 125 #endif

··· 119 bool wb_limit_enable; 120 u64 bd_wb_limit; 121 struct block_device *bdev; 122 unsigned long *bitmap; 123 unsigned long nr_pages; 124 #endif

+6 -60

drivers/ide/ide-probe.c

··· 902 return 1; 903 } 904 905 - static int ata_lock(dev_t dev, void *data) 906 { 907 - /* FIXME: we want to pin hwif down */ 908 - return 0; 909 } 910 - 911 - static struct kobject *ata_probe(dev_t dev, int *part, void *data) 912 - { 913 - ide_hwif_t *hwif = data; 914 - int unit = *part >> PARTN_BITS; 915 - ide_drive_t *drive = hwif->devices[unit]; 916 - 917 - if ((drive->dev_flags & IDE_DFLAG_PRESENT) == 0) 918 - return NULL; 919 - 920 - if (drive->media == ide_disk) 921 - request_module("ide-disk"); 922 - if (drive->media == ide_cdrom || drive->media == ide_optical) 923 - request_module("ide-cd"); 924 - if (drive->media == ide_tape) 925 - request_module("ide-tape"); 926 - if (drive->media == ide_floppy) 927 - request_module("ide-floppy"); 928 - 929 - return NULL; 930 - } 931 - 932 - static struct kobject *exact_match(dev_t dev, int *part, void *data) 933 - { 934 - struct gendisk *p = data; 935 - *part &= (1 << PARTN_BITS) - 1; 936 - return &disk_to_dev(p)->kobj; 937 - } 938 - 939 - static int exact_lock(dev_t dev, void *data) 940 - { 941 - struct gendisk *p = data; 942 - 943 - if (!get_disk_and_module(p)) 944 - return -1; 945 - return 0; 946 - } 947 - 948 - void ide_register_region(struct gendisk *disk) 949 - { 950 - blk_register_region(MKDEV(disk->major, disk->first_minor), 951 - disk->minors, NULL, exact_match, exact_lock, disk); 952 - } 953 - 954 - EXPORT_SYMBOL_GPL(ide_register_region); 955 - 956 - void ide_unregister_region(struct gendisk *disk) 957 - { 958 - blk_unregister_region(MKDEV(disk->major, disk->first_minor), 959 - disk->minors); 960 - } 961 - 962 - EXPORT_SYMBOL_GPL(ide_unregister_region); 963 964 void ide_init_disk(struct gendisk *disk, ide_drive_t *drive) 965 { ··· 948 return 0; 949 } 950 951 - if (register_blkdev(hwif->major, hwif->name)) 952 return 0; 953 954 if (!hwif->sg_max_nents) ··· 970 goto out; 971 } 972 973 - blk_register_region(MKDEV(hwif->major, 0), MAX_DRIVES << PARTN_BITS, 974 - THIS_MODULE, ata_probe, ata_lock, hwif); 975 return 1; 976 977 out: ··· 1558 /* 1559 * Remove us from the kernel's knowledge 1560 */ 1561 - blk_unregister_region(MKDEV(hwif->major, 0), MAX_DRIVES<<PARTN_BITS); 1562 kfree(hwif->sg_table); 1563 unregister_blkdev(hwif->major, hwif->name); 1564

··· 902 return 1; 903 } 904 905 + static void ata_probe(dev_t dev) 906 { 907 + request_module("ide-disk"); 908 + request_module("ide-cd"); 909 + request_module("ide-tape"); 910 + request_module("ide-floppy"); 911 } 912 913 void ide_init_disk(struct gendisk *disk, ide_drive_t *drive) 914 { ··· 999 return 0; 1000 } 1001 1002 + if (__register_blkdev(hwif->major, hwif->name, ata_probe)) 1003 return 0; 1004 1005 if (!hwif->sg_max_nents) ··· 1021 goto out; 1022 } 1023 1024 return 1; 1025 1026 out: ··· 1611 /* 1612 * Remove us from the kernel's knowledge 1613 */ 1614 kfree(hwif->sg_table); 1615 unregister_blkdev(hwif->major, hwif->name); 1616

-2

drivers/ide/ide-tape.c

··· 1822 1823 ide_proc_unregister_driver(drive, tape->driver); 1824 device_del(&tape->dev); 1825 - ide_unregister_region(tape->disk); 1826 1827 mutex_lock(&idetape_ref_mutex); 1828 put_device(&tape->dev); ··· 2025 "n%s", tape->name); 2026 2027 g->fops = &idetape_block_ops; 2028 - ide_register_region(g); 2029 2030 return 0; 2031

··· 1822 1823 ide_proc_unregister_driver(drive, tape->driver); 1824 device_del(&tape->dev); 1825 1826 mutex_lock(&idetape_ref_mutex); 1827 put_device(&tape->dev); ··· 2026 "n%s", tape->name); 2027 2028 g->fops = &idetape_block_ops; 2029 2030 return 0; 2031

+5 -4

drivers/md/bcache/request.c

··· 475 unsigned int read_dirty_data:1; 476 unsigned int cache_missed:1; 477 478 - struct hd_struct *part; 479 unsigned long start_time; 480 481 struct btree_op op; ··· 1073 unsigned long start_time; 1074 bio_end_io_t *bi_end_io; 1075 void *bi_private; 1076 - struct hd_struct *part; 1077 }; 1078 1079 static void detached_dev_end_io(struct bio *bio) ··· 1230 1231 if (dc->io_disable) 1232 return -EIO; 1233 - 1234 - return __blkdev_driver_ioctl(dc->bdev, mode, cmd, arg); 1235 } 1236 1237 void bch_cached_dev_request_init(struct cached_dev *dc)

··· 475 unsigned int read_dirty_data:1; 476 unsigned int cache_missed:1; 477 478 + struct block_device *part; 479 unsigned long start_time; 480 481 struct btree_op op; ··· 1073 unsigned long start_time; 1074 bio_end_io_t *bi_end_io; 1075 void *bi_private; 1076 + struct block_device *part; 1077 }; 1078 1079 static void detached_dev_end_io(struct bio *bio) ··· 1230 1231 if (dc->io_disable) 1232 return -EIO; 1233 + if (!dc->bdev->bd_disk->fops->ioctl) 1234 + return -ENOTTY; 1235 + return dc->bdev->bd_disk->fops->ioctl(dc->bdev, mode, cmd, arg); 1236 } 1237 1238 void bch_cached_dev_request_init(struct cached_dev *dc)

+14 -15

drivers/md/bcache/super.c

··· 1408 q->limits.raid_partial_stripes_expensive; 1409 1410 ret = bcache_device_init(&dc->disk, block_size, 1411 - dc->bdev->bd_part->nr_sects - dc->sb.data_offset, 1412 dc->bdev, &bcache_cached_ops); 1413 if (ret) 1414 return ret; ··· 1447 goto err; 1448 1449 err = "error creating kobject"; 1450 - if (kobject_add(&dc->disk.kobj, &part_to_dev(bdev->bd_part)->kobj, 1451 - "bcache")) 1452 goto err; 1453 if (bch_cache_accounting_add_kobjs(&dc->accounting, &dc->disk.kobj)) 1454 goto err; ··· 2341 goto err; 2342 } 2343 2344 - if (kobject_add(&ca->kobj, 2345 - &part_to_dev(bdev->bd_part)->kobj, 2346 - "bcache")) { 2347 err = "error calling kobject_add"; 2348 ret = -ENOMEM; 2349 goto out; ··· 2380 kobj_attribute_write(register_quiet, register_bcache); 2381 kobj_attribute_write(pendings_cleanup, bch_pending_bdevs_cleanup); 2382 2383 - static bool bch_is_open_backing(struct block_device *bdev) 2384 { 2385 struct cache_set *c, *tc; 2386 struct cached_dev *dc, *t; 2387 2388 list_for_each_entry_safe(c, tc, &bch_cache_sets, list) 2389 list_for_each_entry_safe(dc, t, &c->cached_devs, list) 2390 - if (dc->bdev == bdev) 2391 return true; 2392 list_for_each_entry_safe(dc, t, &uncached_devices, list) 2393 - if (dc->bdev == bdev) 2394 return true; 2395 return false; 2396 } 2397 2398 - static bool bch_is_open_cache(struct block_device *bdev) 2399 { 2400 struct cache_set *c, *tc; 2401 2402 list_for_each_entry_safe(c, tc, &bch_cache_sets, list) { 2403 struct cache *ca = c->cache; 2404 2405 - if (ca->bdev == bdev) 2406 return true; 2407 } 2408 2409 return false; 2410 } 2411 2412 - static bool bch_is_open(struct block_device *bdev) 2413 { 2414 - return bch_is_open_cache(bdev) || bch_is_open_backing(bdev); 2415 } 2416 2417 struct async_reg_args { ··· 2535 sb); 2536 if (IS_ERR(bdev)) { 2537 if (bdev == ERR_PTR(-EBUSY)) { 2538 - bdev = lookup_bdev(strim(path)); 2539 mutex_lock(&bch_register_lock); 2540 - if (!IS_ERR(bdev) && bch_is_open(bdev)) 2541 err = "device already registered"; 2542 else 2543 err = "device busy";

··· 1408 q->limits.raid_partial_stripes_expensive; 1409 1410 ret = bcache_device_init(&dc->disk, block_size, 1411 + bdev_nr_sectors(dc->bdev) - dc->sb.data_offset, 1412 dc->bdev, &bcache_cached_ops); 1413 if (ret) 1414 return ret; ··· 1447 goto err; 1448 1449 err = "error creating kobject"; 1450 + if (kobject_add(&dc->disk.kobj, bdev_kobj(bdev), "bcache")) 1451 goto err; 1452 if (bch_cache_accounting_add_kobjs(&dc->accounting, &dc->disk.kobj)) 1453 goto err; ··· 2342 goto err; 2343 } 2344 2345 + if (kobject_add(&ca->kobj, bdev_kobj(bdev), "bcache")) { 2346 err = "error calling kobject_add"; 2347 ret = -ENOMEM; 2348 goto out; ··· 2383 kobj_attribute_write(register_quiet, register_bcache); 2384 kobj_attribute_write(pendings_cleanup, bch_pending_bdevs_cleanup); 2385 2386 + static bool bch_is_open_backing(dev_t dev) 2387 { 2388 struct cache_set *c, *tc; 2389 struct cached_dev *dc, *t; 2390 2391 list_for_each_entry_safe(c, tc, &bch_cache_sets, list) 2392 list_for_each_entry_safe(dc, t, &c->cached_devs, list) 2393 + if (dc->bdev->bd_dev == dev) 2394 return true; 2395 list_for_each_entry_safe(dc, t, &uncached_devices, list) 2396 + if (dc->bdev->bd_dev == dev) 2397 return true; 2398 return false; 2399 } 2400 2401 + static bool bch_is_open_cache(dev_t dev) 2402 { 2403 struct cache_set *c, *tc; 2404 2405 list_for_each_entry_safe(c, tc, &bch_cache_sets, list) { 2406 struct cache *ca = c->cache; 2407 2408 + if (ca->bdev->bd_dev == dev) 2409 return true; 2410 } 2411 2412 return false; 2413 } 2414 2415 + static bool bch_is_open(dev_t dev) 2416 { 2417 + return bch_is_open_cache(dev) || bch_is_open_backing(dev); 2418 } 2419 2420 struct async_reg_args { ··· 2538 sb); 2539 if (IS_ERR(bdev)) { 2540 if (bdev == ERR_PTR(-EBUSY)) { 2541 + dev_t dev; 2542 + 2543 mutex_lock(&bch_register_lock); 2544 + if (lookup_bdev(strim(path), &dev) == 0 && 2545 + bch_is_open(dev)) 2546 err = "device already registered"; 2547 else 2548 err = "device busy";

-7

drivers/md/dm-core.h

··· 96 */ 97 struct workqueue_struct *wq; 98 99 - /* 100 - * freeze/thaw support require holding onto a super block 101 - */ 102 - struct super_block *frozen_sb; 103 - 104 /* forced geometry settings */ 105 struct hd_geometry geometry; 106 107 /* kobject and completion */ 108 struct dm_kobject_holder kobj_holder; 109 - 110 - struct block_device *bdev; 111 112 struct dm_stats stats; 113

··· 96 */ 97 struct workqueue_struct *wq; 98 99 /* forced geometry settings */ 100 struct hd_geometry geometry; 101 102 /* kobject and completion */ 103 struct dm_kobject_holder kobj_holder; 104 105 struct dm_stats stats; 106

+1 -2

drivers/md/dm-raid.c

··· 700 { 701 struct gendisk *gendisk = dm_disk(dm_table_get_md(rs->ti->table)); 702 703 - set_capacity(gendisk, rs->md.array_sectors); 704 - revalidate_disk_size(gendisk, true); 705 } 706 707 /*

··· 700 { 701 struct gendisk *gendisk = dm_disk(dm_table_get_md(rs->ti->table)); 702 703 + set_capacity_and_notify(gendisk, rs->md.array_sectors); 704 } 705 706 /*

+1 -1

drivers/md/dm-rq.c

··· 397 } 398 399 /* The target has remapped the I/O so dispatch it */ 400 - trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)), 401 blk_rq_pos(rq)); 402 ret = dm_dispatch_clone_request(clone, rq); 403 if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) {

··· 397 } 398 399 /* The target has remapped the I/O so dispatch it */ 400 + trace_block_rq_remap(clone, disk_devt(dm_disk(md)), 401 blk_rq_pos(rq)); 402 ret = dm_dispatch_clone_request(clone, rq); 403 if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) {

+1 -8

drivers/md/dm-table.c

··· 347 dev_t dm_get_dev_t(const char *path) 348 { 349 dev_t dev; 350 - struct block_device *bdev; 351 352 - bdev = lookup_bdev(path); 353 - if (IS_ERR(bdev)) 354 dev = name_to_dev_t(path); 355 - else { 356 - dev = bdev->bd_dev; 357 - bdput(bdev); 358 - } 359 - 360 return dev; 361 } 362 EXPORT_SYMBOL_GPL(dm_get_dev_t);

··· 347 dev_t dm_get_dev_t(const char *path) 348 { 349 dev_t dev; 350 351 + if (lookup_bdev(path, &dev)) 352 dev = name_to_dev_t(path); 353 return dev; 354 } 355 EXPORT_SYMBOL_GPL(dm_get_dev_t);

+18 -40

drivers/md/dm.c

··· 570 } 571 } 572 573 - r = __blkdev_driver_ioctl(bdev, mode, cmd, arg); 574 out: 575 dm_unprepare_ioctl(md, srcu_idx); 576 return r; ··· 1277 break; 1278 case DM_MAPIO_REMAPPED: 1279 /* the bio has been remapped so dispatch it */ 1280 - trace_block_bio_remap(clone->bi_disk->queue, clone, 1281 - bio_dev(io->orig_bio), sector); 1282 ret = submit_bio_noacct(clone); 1283 break; 1284 case DM_MAPIO_KILL: ··· 1422 */ 1423 bio_init(&flush_bio, NULL, 0); 1424 flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC; 1425 ci->bio = &flush_bio; 1426 ci->sector_count = 0; 1427 - 1428 - /* 1429 - * Empty flush uses a statically initialized bio, as the base for 1430 - * cloning. However, blkg association requires that a bdev is 1431 - * associated with a gendisk, which doesn't happen until the bdev is 1432 - * opened. So, blkg association is done at issue time of the flush 1433 - * rather than when the device is created in alloc_dev(). 1434 - */ 1435 - bio_set_dev(ci->bio, ci->io->md->bdev); 1436 1437 BUG_ON(bio_has_data(ci->bio)); 1438 while ((ti = dm_table_get_target(ci->map, target_nr++))) ··· 1607 * (by eliminating DM's splitting and just using bio_split) 1608 */ 1609 part_stat_lock(); 1610 - __dm_part_stat_sub(&dm_disk(md)->part0, 1611 sectors[op_stat_group(bio_op(bio))], ci.sector_count); 1612 part_stat_unlock(); 1613 1614 bio_chain(b, bio); 1615 - trace_block_split(md->queue, b, bio->bi_iter.bi_sector); 1616 ret = submit_bio_noacct(bio); 1617 break; 1618 } ··· 1744 1745 cleanup_srcu_struct(&md->io_barrier); 1746 1747 - if (md->bdev) { 1748 - bdput(md->bdev); 1749 - md->bdev = NULL; 1750 - } 1751 - 1752 mutex_destroy(&md->suspend_lock); 1753 mutex_destroy(&md->type_lock); 1754 mutex_destroy(&md->table_devices_lock); ··· 1833 1834 md->wq = alloc_workqueue("kdmflush", WQ_MEM_RECLAIM, 0); 1835 if (!md->wq) 1836 - goto bad; 1837 - 1838 - md->bdev = bdget_disk(md->disk, 0); 1839 - if (!md->bdev) 1840 goto bad; 1841 1842 dm_stats_init(&md->stats); ··· 1959 if (size != dm_get_size(md)) 1960 memset(&md->geometry, 0, sizeof(md->geometry)); 1961 1962 - set_capacity(md->disk, size); 1963 - bd_set_nr_sectors(md->bdev, size); 1964 1965 dm_table_event_callback(t, event_callback, md); 1966 ··· 2242 static bool md_in_flight_bios(struct mapped_device *md) 2243 { 2244 int cpu; 2245 - struct hd_struct *part = &dm_disk(md)->part0; 2246 long sum = 0; 2247 2248 for_each_possible_cpu(cpu) { ··· 2377 { 2378 int r; 2379 2380 - WARN_ON(md->frozen_sb); 2381 2382 - md->frozen_sb = freeze_bdev(md->bdev); 2383 - if (IS_ERR(md->frozen_sb)) { 2384 - r = PTR_ERR(md->frozen_sb); 2385 - md->frozen_sb = NULL; 2386 - return r; 2387 - } 2388 - 2389 - set_bit(DMF_FROZEN, &md->flags); 2390 - 2391 - return 0; 2392 } 2393 2394 static void unlock_fs(struct mapped_device *md) 2395 { 2396 if (!test_bit(DMF_FROZEN, &md->flags)) 2397 return; 2398 - 2399 - thaw_bdev(md->bdev, md->frozen_sb); 2400 - md->frozen_sb = NULL; 2401 clear_bit(DMF_FROZEN, &md->flags); 2402 } 2403

··· 570 } 571 } 572 573 + if (!bdev->bd_disk->fops->ioctl) 574 + r = -ENOTTY; 575 + else 576 + r = bdev->bd_disk->fops->ioctl(bdev, mode, cmd, arg); 577 out: 578 dm_unprepare_ioctl(md, srcu_idx); 579 return r; ··· 1274 break; 1275 case DM_MAPIO_REMAPPED: 1276 /* the bio has been remapped so dispatch it */ 1277 + trace_block_bio_remap(clone, bio_dev(io->orig_bio), sector); 1278 ret = submit_bio_noacct(clone); 1279 break; 1280 case DM_MAPIO_KILL: ··· 1420 */ 1421 bio_init(&flush_bio, NULL, 0); 1422 flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC; 1423 + flush_bio.bi_disk = ci->io->md->disk; 1424 + bio_associate_blkg(&flush_bio); 1425 + 1426 ci->bio = &flush_bio; 1427 ci->sector_count = 0; 1428 1429 BUG_ON(bio_has_data(ci->bio)); 1430 while ((ti = dm_table_get_target(ci->map, target_nr++))) ··· 1611 * (by eliminating DM's splitting and just using bio_split) 1612 */ 1613 part_stat_lock(); 1614 + __dm_part_stat_sub(dm_disk(md)->part0, 1615 sectors[op_stat_group(bio_op(bio))], ci.sector_count); 1616 part_stat_unlock(); 1617 1618 bio_chain(b, bio); 1619 + trace_block_split(b, bio->bi_iter.bi_sector); 1620 ret = submit_bio_noacct(bio); 1621 break; 1622 } ··· 1748 1749 cleanup_srcu_struct(&md->io_barrier); 1750 1751 mutex_destroy(&md->suspend_lock); 1752 mutex_destroy(&md->type_lock); 1753 mutex_destroy(&md->table_devices_lock); ··· 1842 1843 md->wq = alloc_workqueue("kdmflush", WQ_MEM_RECLAIM, 0); 1844 if (!md->wq) 1845 goto bad; 1846 1847 dm_stats_init(&md->stats); ··· 1972 if (size != dm_get_size(md)) 1973 memset(&md->geometry, 0, sizeof(md->geometry)); 1974 1975 + set_capacity_and_notify(md->disk, size); 1976 1977 dm_table_event_callback(t, event_callback, md); 1978 ··· 2256 static bool md_in_flight_bios(struct mapped_device *md) 2257 { 2258 int cpu; 2259 + struct block_device *part = dm_disk(md)->part0; 2260 long sum = 0; 2261 2262 for_each_possible_cpu(cpu) { ··· 2391 { 2392 int r; 2393 2394 + WARN_ON(test_bit(DMF_FROZEN, &md->flags)); 2395 2396 + r = freeze_bdev(md->disk->part0); 2397 + if (!r) 2398 + set_bit(DMF_FROZEN, &md->flags); 2399 + return r; 2400 } 2401 2402 static void unlock_fs(struct mapped_device *md) 2403 { 2404 if (!test_bit(DMF_FROZEN, &md->flags)) 2405 return; 2406 + thaw_bdev(md->disk->part0); 2407 clear_bit(DMF_FROZEN, &md->flags); 2408 } 2409

+2 -6

drivers/md/md-cluster.c

··· 581 process_metadata_update(mddev, msg); 582 break; 583 case CHANGE_CAPACITY: 584 - set_capacity(mddev->gendisk, mddev->array_sectors); 585 - revalidate_disk_size(mddev->gendisk, true); 586 break; 587 case RESYNCING: 588 set_bit(MD_RESYNCING_REMOTE, &mddev->recovery); ··· 1295 if (ret) 1296 pr_err("%s:%d: failed to send CHANGE_CAPACITY msg\n", 1297 __func__, __LINE__); 1298 - set_capacity(mddev->gendisk, mddev->array_sectors); 1299 - revalidate_disk_size(mddev->gendisk, true); 1300 } else { 1301 /* revert to previous sectors */ 1302 ret = mddev->pers->resize(mddev, old_dev_sectors); 1303 - if (!ret) 1304 - revalidate_disk_size(mddev->gendisk, true); 1305 ret = __sendmsg(cinfo, &cmsg); 1306 if (ret) 1307 pr_err("%s:%d: failed to send METADATA_UPDATED msg\n",

··· 581 process_metadata_update(mddev, msg); 582 break; 583 case CHANGE_CAPACITY: 584 + set_capacity_and_notify(mddev->gendisk, mddev->array_sectors); 585 break; 586 case RESYNCING: 587 set_bit(MD_RESYNCING_REMOTE, &mddev->recovery); ··· 1296 if (ret) 1297 pr_err("%s:%d: failed to send CHANGE_CAPACITY msg\n", 1298 __func__, __LINE__); 1299 + set_capacity_and_notify(mddev->gendisk, mddev->array_sectors); 1300 } else { 1301 /* revert to previous sectors */ 1302 ret = mddev->pers->resize(mddev, old_dev_sectors); 1303 ret = __sendmsg(cinfo, &cmsg); 1304 if (ret) 1305 pr_err("%s:%d: failed to send METADATA_UPDATED msg\n",

+2 -4

drivers/md/md-linear.c

··· 200 "copied raid_disks doesn't match mddev->raid_disks"); 201 rcu_assign_pointer(mddev->private, newconf); 202 md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); 203 - set_capacity(mddev->gendisk, mddev->array_sectors); 204 mddev_resume(mddev); 205 - revalidate_disk_size(mddev->gendisk, true); 206 kfree_rcu(oldconf, rcu); 207 return 0; 208 } ··· 257 bio_endio(bio); 258 } else { 259 if (mddev->gendisk) 260 - trace_block_bio_remap(bio->bi_disk->queue, 261 - bio, disk_devt(mddev->gendisk), 262 bio_sector); 263 mddev_check_writesame(mddev, bio); 264 mddev_check_write_zeroes(mddev, bio);

··· 200 "copied raid_disks doesn't match mddev->raid_disks"); 201 rcu_assign_pointer(mddev->private, newconf); 202 md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); 203 + set_capacity_and_notify(mddev->gendisk, mddev->array_sectors); 204 mddev_resume(mddev); 205 kfree_rcu(oldconf, rcu); 206 return 0; 207 } ··· 258 bio_endio(bio); 259 } else { 260 if (mddev->gendisk) 261 + trace_block_bio_remap(bio, disk_devt(mddev->gendisk), 262 bio_sector); 263 mddev_check_writesame(mddev, bio); 264 mddev_check_write_zeroes(mddev, bio);

+52 -63

drivers/md/md.c

··· 464 bio_end_io_t *orig_bi_end_io; 465 void *orig_bi_private; 466 unsigned long start_time; 467 - struct hd_struct *part; 468 }; 469 470 static void md_end_io(struct bio *bio) ··· 2414 static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev) 2415 { 2416 char b[BDEVNAME_SIZE]; 2417 - struct kobject *ko; 2418 int err; 2419 2420 /* prevent duplicates */ ··· 2476 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b))) 2477 goto fail; 2478 2479 - ko = &part_to_dev(rdev->bdev->bd_part)->kobj; 2480 /* failure here is OK */ 2481 - err = sysfs_create_link(&rdev->kobj, ko, "block"); 2482 rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state"); 2483 rdev->sysfs_unack_badblocks = 2484 sysfs_get_dirent_safe(rdev->kobj.sd, "unacknowledged_bad_blocks"); ··· 5353 5354 if (!err) { 5355 mddev->array_sectors = sectors; 5356 - if (mddev->pers) { 5357 - set_capacity(mddev->gendisk, mddev->array_sectors); 5358 - revalidate_disk_size(mddev->gendisk, true); 5359 - } 5360 } 5361 mddev_unlock(mddev); 5362 return err ?: len; ··· 5762 return error; 5763 } 5764 5765 - static struct kobject *md_probe(dev_t dev, int *part, void *data) 5766 { 5767 if (create_on_open) 5768 md_alloc(dev, NULL); 5769 - return NULL; 5770 } 5771 5772 static int add_named_array(const char *val, const struct kernel_param *kp) ··· 6105 md_wakeup_thread(mddev->thread); 6106 md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ 6107 6108 - set_capacity(mddev->gendisk, mddev->array_sectors); 6109 - revalidate_disk_size(mddev->gendisk, true); 6110 clear_bit(MD_NOT_READY, &mddev->flags); 6111 mddev->changed = 1; 6112 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); ··· 6420 if (rdev->raid_disk >= 0) 6421 sysfs_unlink_rdev(mddev, rdev); 6422 6423 - set_capacity(disk, 0); 6424 mutex_unlock(&mddev->open_mutex); 6425 mddev->changed = 1; 6426 - revalidate_disk_size(disk, true); 6427 6428 if (mddev->ro) 6429 mddev->ro = 0; ··· 6531 break; 6532 } 6533 6534 - md_probe(dev, NULL, NULL); 6535 mddev = mddev_find(dev); 6536 if (!mddev || !mddev->gendisk) { 6537 if (mddev) ··· 7253 if (mddev_is_clustered(mddev)) 7254 md_cluster_ops->update_size(mddev, old_dev_sectors); 7255 else if (mddev->queue) { 7256 - set_capacity(mddev->gendisk, mddev->array_sectors); 7257 - revalidate_disk_size(mddev->gendisk, true); 7258 } 7259 } 7260 return rv; ··· 7476 { 7477 switch (cmd) { 7478 case ADD_NEW_DISK: 7479 - case BLKROSET: 7480 case GET_ARRAY_INFO: 7481 case GET_BITMAP_FILE: 7482 case GET_DISK_INFO: ··· 7502 int err = 0; 7503 void __user *argp = (void __user *)arg; 7504 struct mddev *mddev = NULL; 7505 - int ro; 7506 bool did_set_md_closing = false; 7507 7508 if (!md_ioctl_valid(cmd)) ··· 7681 goto unlock; 7682 } 7683 break; 7684 - 7685 - case BLKROSET: 7686 - if (get_user(ro, (int __user *)(arg))) { 7687 - err = -EFAULT; 7688 - goto unlock; 7689 - } 7690 - err = -EINVAL; 7691 - 7692 - /* if the bdev is going readonly the value of mddev->ro 7693 - * does not matter, no writes are coming 7694 - */ 7695 - if (ro) 7696 - goto unlock; 7697 - 7698 - /* are we are already prepared for writes? */ 7699 - if (mddev->ro != 1) 7700 - goto unlock; 7701 - 7702 - /* transitioning to readauto need only happen for 7703 - * arrays that call md_write_start 7704 - */ 7705 - if (mddev->pers) { 7706 - err = restart_array(mddev); 7707 - if (err == 0) { 7708 - mddev->ro = 2; 7709 - set_disk_ro(mddev->gendisk, 0); 7710 - } 7711 - } 7712 - goto unlock; 7713 } 7714 7715 /* ··· 7774 } 7775 #endif /* CONFIG_COMPAT */ 7776 7777 static int md_open(struct block_device *bdev, fmode_t mode) 7778 { 7779 /* ··· 7881 #endif 7882 .getgeo = md_getgeo, 7883 .check_events = md_check_events, 7884 }; 7885 7886 static int md_thread(void *arg) ··· 8441 rcu_read_lock(); 8442 rdev_for_each_rcu(rdev, mddev) { 8443 struct gendisk *disk = rdev->bdev->bd_disk; 8444 - curr_events = (int)part_stat_read_accum(&disk->part0, sectors) - 8445 atomic_read(&disk->sync_io); 8446 /* sync IO will cause sync_io to increase before the disk_stats 8447 * as sync_io is counted when a request starts, and ··· 9011 mddev_lock_nointr(mddev); 9012 md_set_array_sectors(mddev, mddev->pers->size(mddev, 0, 0)); 9013 mddev_unlock(mddev); 9014 - if (!mddev_is_clustered(mddev)) { 9015 - set_capacity(mddev->gendisk, mddev->array_sectors); 9016 - revalidate_disk_size(mddev->gendisk, true); 9017 - } 9018 } 9019 9020 spin_lock(&mddev->lock); ··· 9542 if (!md_rdev_misc_wq) 9543 goto err_rdev_misc_wq; 9544 9545 - if ((ret = register_blkdev(MD_MAJOR, "md")) < 0) 9546 goto err_md; 9547 9548 - if ((ret = register_blkdev(0, "mdp")) < 0) 9549 goto err_mdp; 9550 mdp_major = ret; 9551 - 9552 - blk_register_region(MKDEV(MD_MAJOR, 0), 512, THIS_MODULE, 9553 - md_probe, NULL, NULL); 9554 - blk_register_region(MKDEV(mdp_major, 0), 1UL<<MINORBITS, THIS_MODULE, 9555 - md_probe, NULL, NULL); 9556 9557 register_reboot_notifier(&md_notifier); 9558 raid_table_header = register_sysctl_table(raid_root_table); ··· 9816 struct mddev *mddev; 9817 struct list_head *tmp; 9818 int delay = 1; 9819 - 9820 - blk_unregister_region(MKDEV(MD_MAJOR,0), 512); 9821 - blk_unregister_region(MKDEV(mdp_major,0), 1U << MINORBITS); 9822 9823 unregister_blkdev(MD_MAJOR,"md"); 9824 unregister_blkdev(mdp_major, "mdp");

··· 464 bio_end_io_t *orig_bi_end_io; 465 void *orig_bi_private; 466 unsigned long start_time; 467 + struct block_device *part; 468 }; 469 470 static void md_end_io(struct bio *bio) ··· 2414 static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev) 2415 { 2416 char b[BDEVNAME_SIZE]; 2417 int err; 2418 2419 /* prevent duplicates */ ··· 2477 if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b))) 2478 goto fail; 2479 2480 /* failure here is OK */ 2481 + err = sysfs_create_link(&rdev->kobj, bdev_kobj(rdev->bdev), "block"); 2482 rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state"); 2483 rdev->sysfs_unack_badblocks = 2484 sysfs_get_dirent_safe(rdev->kobj.sd, "unacknowledged_bad_blocks"); ··· 5355 5356 if (!err) { 5357 mddev->array_sectors = sectors; 5358 + if (mddev->pers) 5359 + set_capacity_and_notify(mddev->gendisk, 5360 + mddev->array_sectors); 5361 } 5362 mddev_unlock(mddev); 5363 return err ?: len; ··· 5765 return error; 5766 } 5767 5768 + static void md_probe(dev_t dev) 5769 { 5770 + if (MAJOR(dev) == MD_MAJOR && MINOR(dev) >= 512) 5771 + return; 5772 if (create_on_open) 5773 md_alloc(dev, NULL); 5774 } 5775 5776 static int add_named_array(const char *val, const struct kernel_param *kp) ··· 6107 md_wakeup_thread(mddev->thread); 6108 md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ 6109 6110 + set_capacity_and_notify(mddev->gendisk, mddev->array_sectors); 6111 clear_bit(MD_NOT_READY, &mddev->flags); 6112 mddev->changed = 1; 6113 kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); ··· 6423 if (rdev->raid_disk >= 0) 6424 sysfs_unlink_rdev(mddev, rdev); 6425 6426 + set_capacity_and_notify(disk, 0); 6427 mutex_unlock(&mddev->open_mutex); 6428 mddev->changed = 1; 6429 6430 if (mddev->ro) 6431 mddev->ro = 0; ··· 6535 break; 6536 } 6537 6538 + md_probe(dev); 6539 mddev = mddev_find(dev); 6540 if (!mddev || !mddev->gendisk) { 6541 if (mddev) ··· 7257 if (mddev_is_clustered(mddev)) 7258 md_cluster_ops->update_size(mddev, old_dev_sectors); 7259 else if (mddev->queue) { 7260 + set_capacity_and_notify(mddev->gendisk, 7261 + mddev->array_sectors); 7262 } 7263 } 7264 return rv; ··· 7480 { 7481 switch (cmd) { 7482 case ADD_NEW_DISK: 7483 case GET_ARRAY_INFO: 7484 case GET_BITMAP_FILE: 7485 case GET_DISK_INFO: ··· 7507 int err = 0; 7508 void __user *argp = (void __user *)arg; 7509 struct mddev *mddev = NULL; 7510 bool did_set_md_closing = false; 7511 7512 if (!md_ioctl_valid(cmd)) ··· 7687 goto unlock; 7688 } 7689 break; 7690 } 7691 7692 /* ··· 7809 } 7810 #endif /* CONFIG_COMPAT */ 7811 7812 + static int md_set_read_only(struct block_device *bdev, bool ro) 7813 + { 7814 + struct mddev *mddev = bdev->bd_disk->private_data; 7815 + int err; 7816 + 7817 + err = mddev_lock(mddev); 7818 + if (err) 7819 + return err; 7820 + 7821 + if (!mddev->raid_disks && !mddev->external) { 7822 + err = -ENODEV; 7823 + goto out_unlock; 7824 + } 7825 + 7826 + /* 7827 + * Transitioning to read-auto need only happen for arrays that call 7828 + * md_write_start and which are not ready for writes yet. 7829 + */ 7830 + if (!ro && mddev->ro == 1 && mddev->pers) { 7831 + err = restart_array(mddev); 7832 + if (err) 7833 + goto out_unlock; 7834 + mddev->ro = 2; 7835 + } 7836 + 7837 + out_unlock: 7838 + mddev_unlock(mddev); 7839 + return err; 7840 + } 7841 + 7842 static int md_open(struct block_device *bdev, fmode_t mode) 7843 { 7844 /* ··· 7886 #endif 7887 .getgeo = md_getgeo, 7888 .check_events = md_check_events, 7889 + .set_read_only = md_set_read_only, 7890 }; 7891 7892 static int md_thread(void *arg) ··· 8445 rcu_read_lock(); 8446 rdev_for_each_rcu(rdev, mddev) { 8447 struct gendisk *disk = rdev->bdev->bd_disk; 8448 + curr_events = (int)part_stat_read_accum(disk->part0, sectors) - 8449 atomic_read(&disk->sync_io); 8450 /* sync IO will cause sync_io to increase before the disk_stats 8451 * as sync_io is counted when a request starts, and ··· 9015 mddev_lock_nointr(mddev); 9016 md_set_array_sectors(mddev, mddev->pers->size(mddev, 0, 0)); 9017 mddev_unlock(mddev); 9018 + if (!mddev_is_clustered(mddev)) 9019 + set_capacity_and_notify(mddev->gendisk, 9020 + mddev->array_sectors); 9021 } 9022 9023 spin_lock(&mddev->lock); ··· 9547 if (!md_rdev_misc_wq) 9548 goto err_rdev_misc_wq; 9549 9550 + ret = __register_blkdev(MD_MAJOR, "md", md_probe); 9551 + if (ret < 0) 9552 goto err_md; 9553 9554 + ret = __register_blkdev(0, "mdp", md_probe); 9555 + if (ret < 0) 9556 goto err_mdp; 9557 mdp_major = ret; 9558 9559 register_reboot_notifier(&md_notifier); 9560 raid_table_header = register_sysctl_table(raid_root_table); ··· 9824 struct mddev *mddev; 9825 struct list_head *tmp; 9826 int delay = 1; 9827 9828 unregister_blkdev(MD_MAJOR,"md"); 9829 unregister_blkdev(mdp_major, "mdp");

+4 -4

drivers/md/raid0.c

··· 508 bio_chain(discard_bio, bio); 509 bio_clone_blkg_association(discard_bio, bio); 510 if (mddev->gendisk) 511 - trace_block_bio_remap(bdev_get_queue(rdev->bdev), 512 - discard_bio, disk_devt(mddev->gendisk), 513 bio->bi_iter.bi_sector); 514 submit_bio_noacct(discard_bio); 515 } ··· 581 tmp_dev->data_offset; 582 583 if (mddev->gendisk) 584 - trace_block_bio_remap(bio->bi_disk->queue, bio, 585 - disk_devt(mddev->gendisk), bio_sector); 586 mddev_check_writesame(mddev, bio); 587 mddev_check_write_zeroes(mddev, bio); 588 submit_bio_noacct(bio);

··· 508 bio_chain(discard_bio, bio); 509 bio_clone_blkg_association(discard_bio, bio); 510 if (mddev->gendisk) 511 + trace_block_bio_remap(discard_bio, 512 + disk_devt(mddev->gendisk), 513 bio->bi_iter.bi_sector); 514 submit_bio_noacct(discard_bio); 515 } ··· 581 tmp_dev->data_offset; 582 583 if (mddev->gendisk) 584 + trace_block_bio_remap(bio, disk_devt(mddev->gendisk), 585 + bio_sector); 586 mddev_check_writesame(mddev, bio); 587 mddev_check_write_zeroes(mddev, bio); 588 submit_bio_noacct(bio);

+3 -4

drivers/md/raid1.c

··· 1305 read_bio->bi_private = r1_bio; 1306 1307 if (mddev->gendisk) 1308 - trace_block_bio_remap(read_bio->bi_disk->queue, read_bio, 1309 - disk_devt(mddev->gendisk), r1_bio->sector); 1310 1311 submit_bio_noacct(read_bio); 1312 } ··· 1517 atomic_inc(&r1_bio->remaining); 1518 1519 if (mddev->gendisk) 1520 - trace_block_bio_remap(mbio->bi_disk->queue, 1521 - mbio, disk_devt(mddev->gendisk), 1522 r1_bio->sector); 1523 /* flush_pending_writes() needs access to the rdev so...*/ 1524 mbio->bi_disk = (void *)conf->mirrors[i].rdev;

··· 1305 read_bio->bi_private = r1_bio; 1306 1307 if (mddev->gendisk) 1308 + trace_block_bio_remap(read_bio, disk_devt(mddev->gendisk), 1309 + r1_bio->sector); 1310 1311 submit_bio_noacct(read_bio); 1312 } ··· 1517 atomic_inc(&r1_bio->remaining); 1518 1519 if (mddev->gendisk) 1520 + trace_block_bio_remap(mbio, disk_devt(mddev->gendisk), 1521 r1_bio->sector); 1522 /* flush_pending_writes() needs access to the rdev so...*/ 1523 mbio->bi_disk = (void *)conf->mirrors[i].rdev;

+2 -4

drivers/md/raid10.c

··· 1201 read_bio->bi_private = r10_bio; 1202 1203 if (mddev->gendisk) 1204 - trace_block_bio_remap(read_bio->bi_disk->queue, 1205 - read_bio, disk_devt(mddev->gendisk), 1206 r10_bio->sector); 1207 submit_bio_noacct(read_bio); 1208 return; ··· 1250 mbio->bi_private = r10_bio; 1251 1252 if (conf->mddev->gendisk) 1253 - trace_block_bio_remap(mbio->bi_disk->queue, 1254 - mbio, disk_devt(conf->mddev->gendisk), 1255 r10_bio->sector); 1256 /* flush_pending_writes() needs access to the rdev so...*/ 1257 mbio->bi_disk = (void *)rdev;

··· 1201 read_bio->bi_private = r10_bio; 1202 1203 if (mddev->gendisk) 1204 + trace_block_bio_remap(read_bio, disk_devt(mddev->gendisk), 1205 r10_bio->sector); 1206 submit_bio_noacct(read_bio); 1207 return; ··· 1251 mbio->bi_private = r10_bio; 1252 1253 if (conf->mddev->gendisk) 1254 + trace_block_bio_remap(mbio, disk_devt(conf->mddev->gendisk), 1255 r10_bio->sector); 1256 /* flush_pending_writes() needs access to the rdev so...*/ 1257 mbio->bi_disk = (void *)rdev;

+7 -8

drivers/md/raid5.c

··· 1222 set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags); 1223 1224 if (conf->mddev->gendisk) 1225 - trace_block_bio_remap(bi->bi_disk->queue, 1226 - bi, disk_devt(conf->mddev->gendisk), 1227 - sh->dev[i].sector); 1228 if (should_defer && op_is_write(op)) 1229 bio_list_add(&pending_bios, bi); 1230 else ··· 1272 if (op == REQ_OP_DISCARD) 1273 rbi->bi_vcnt = 0; 1274 if (conf->mddev->gendisk) 1275 - trace_block_bio_remap(rbi->bi_disk->queue, 1276 - rbi, disk_devt(conf->mddev->gendisk), 1277 - sh->dev[i].sector); 1278 if (should_defer && op_is_write(op)) 1279 bio_list_add(&pending_bios, rbi); 1280 else ··· 5468 spin_unlock_irq(&conf->device_lock); 5469 5470 if (mddev->gendisk) 5471 - trace_block_bio_remap(align_bi->bi_disk->queue, 5472 - align_bi, disk_devt(mddev->gendisk), 5473 raid_bio->bi_iter.bi_sector); 5474 submit_bio_noacct(align_bi); 5475 return 1;

··· 1222 set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags); 1223 1224 if (conf->mddev->gendisk) 1225 + trace_block_bio_remap(bi, 1226 + disk_devt(conf->mddev->gendisk), 1227 + sh->dev[i].sector); 1228 if (should_defer && op_is_write(op)) 1229 bio_list_add(&pending_bios, bi); 1230 else ··· 1272 if (op == REQ_OP_DISCARD) 1273 rbi->bi_vcnt = 0; 1274 if (conf->mddev->gendisk) 1275 + trace_block_bio_remap(rbi, 1276 + disk_devt(conf->mddev->gendisk), 1277 + sh->dev[i].sector); 1278 if (should_defer && op_is_write(op)) 1279 bio_list_add(&pending_bios, rbi); 1280 else ··· 5468 spin_unlock_irq(&conf->device_lock); 5469 5470 if (mddev->gendisk) 5471 + trace_block_bio_remap(align_bi, disk_devt(mddev->gendisk), 5472 raid_bio->bi_iter.bi_sector); 5473 submit_bio_noacct(align_bi); 5474 return 1;

-28

drivers/mtd/mtd_blkdevs.c

··· 298 return ret; 299 } 300 301 - static int blktrans_ioctl(struct block_device *bdev, fmode_t mode, 302 - unsigned int cmd, unsigned long arg) 303 - { 304 - struct mtd_blktrans_dev *dev = blktrans_dev_get(bdev->bd_disk); 305 - int ret = -ENXIO; 306 - 307 - if (!dev) 308 - return ret; 309 - 310 - mutex_lock(&dev->lock); 311 - 312 - if (!dev->mtd) 313 - goto unlock; 314 - 315 - switch (cmd) { 316 - case BLKFLSBUF: 317 - ret = dev->tr->flush ? dev->tr->flush(dev) : 0; 318 - break; 319 - default: 320 - ret = -ENOTTY; 321 - } 322 - unlock: 323 - mutex_unlock(&dev->lock); 324 - blktrans_dev_put(dev); 325 - return ret; 326 - } 327 - 328 static const struct block_device_operations mtd_block_ops = { 329 .owner = THIS_MODULE, 330 .open = blktrans_open, 331 .release = blktrans_release, 332 - .ioctl = blktrans_ioctl, 333 .getgeo = blktrans_getgeo, 334 }; 335

··· 298 return ret; 299 } 300 301 static const struct block_device_operations mtd_block_ops = { 302 .owner = THIS_MODULE, 303 .open = blktrans_open, 304 .release = blktrans_release, 305 .getgeo = blktrans_getgeo, 306 }; 307

+6 -11

drivers/mtd/mtdsuper.c

··· 120 struct fs_context *fc)) 121 { 122 #ifdef CONFIG_BLOCK 123 - struct block_device *bdev; 124 - int ret, major; 125 #endif 126 int mtdnr; 127 ··· 169 /* try the old way - the hack where we allowed users to mount 170 * /dev/mtdblock$(n) but didn't actually _use_ the blockdev 171 */ 172 - bdev = lookup_bdev(fc->source); 173 - if (IS_ERR(bdev)) { 174 - ret = PTR_ERR(bdev); 175 errorf(fc, "MTD: Couldn't look up '%s': %d", fc->source, ret); 176 return ret; 177 } 178 pr_debug("MTDSB: lookup_bdev() returned 0\n"); 179 180 - major = MAJOR(bdev->bd_dev); 181 - mtdnr = MINOR(bdev->bd_dev); 182 - bdput(bdev); 183 - 184 - if (major == MTD_BLOCK_MAJOR) 185 - return mtd_get_sb_by_nr(fc, mtdnr, fill_super); 186 187 #endif /* CONFIG_BLOCK */ 188

··· 120 struct fs_context *fc)) 121 { 122 #ifdef CONFIG_BLOCK 123 + dev_t dev; 124 + int ret; 125 #endif 126 int mtdnr; 127 ··· 169 /* try the old way - the hack where we allowed users to mount 170 * /dev/mtdblock$(n) but didn't actually _use_ the blockdev 171 */ 172 + ret = lookup_bdev(fc->source, &dev); 173 + if (ret) { 174 errorf(fc, "MTD: Couldn't look up '%s': %d", fc->source, ret); 175 return ret; 176 } 177 pr_debug("MTDSB: lookup_bdev() returned 0\n"); 178 179 + if (MAJOR(dev) == MTD_BLOCK_MAJOR) 180 + return mtd_get_sb_by_nr(fc, MINOR(dev), fill_super); 181 182 #endif /* CONFIG_BLOCK */ 183

+2 -16

drivers/nvme/host/core.c

··· 93 static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, 94 unsigned nsid); 95 96 - static void nvme_update_bdev_size(struct gendisk *disk) 97 - { 98 - struct block_device *bdev = bdget_disk(disk, 0); 99 - 100 - if (bdev) { 101 - bd_set_nr_sectors(bdev, get_capacity(disk)); 102 - bdput(bdev); 103 - } 104 - } 105 - 106 /* 107 * Prepare a queue for teardown. 108 * ··· 109 blk_set_queue_dying(ns->queue); 110 blk_mq_unquiesce_queue(ns->queue); 111 112 - set_capacity(ns->disk, 0); 113 - nvme_update_bdev_size(ns->disk); 114 } 115 116 static void nvme_queue_scan(struct nvme_ctrl *ctrl) ··· 2042 capacity = 0; 2043 } 2044 2045 - set_capacity_revalidate_and_notify(disk, capacity, false); 2046 2047 nvme_config_discard(disk, ns); 2048 nvme_config_write_zeroes(disk, ns); ··· 2123 blk_stack_limits(&ns->head->disk->queue->limits, 2124 &ns->queue->limits, 0); 2125 blk_queue_update_readahead(ns->head->disk->queue); 2126 - nvme_update_bdev_size(ns->head->disk); 2127 blk_mq_unfreeze_queue(ns->head->disk->queue); 2128 } 2129 #endif ··· 3950 */ 3951 if (ret && ret != -ENOMEM && !(ret > 0 && !(ret & NVME_SC_DNR))) 3952 nvme_ns_remove(ns); 3953 - else 3954 - revalidate_disk_size(ns->disk, true); 3955 } 3956 3957 static void nvme_validate_or_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)

··· 93 static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, 94 unsigned nsid); 95 96 /* 97 * Prepare a queue for teardown. 98 * ··· 119 blk_set_queue_dying(ns->queue); 120 blk_mq_unquiesce_queue(ns->queue); 121 122 + set_capacity_and_notify(ns->disk, 0); 123 } 124 125 static void nvme_queue_scan(struct nvme_ctrl *ctrl) ··· 2053 capacity = 0; 2054 } 2055 2056 + set_capacity_and_notify(disk, capacity); 2057 2058 nvme_config_discard(disk, ns); 2059 nvme_config_write_zeroes(disk, ns); ··· 2134 blk_stack_limits(&ns->head->disk->queue->limits, 2135 &ns->queue->limits, 0); 2136 blk_queue_update_readahead(ns->head->disk->queue); 2137 blk_mq_unfreeze_queue(ns->head->disk->queue); 2138 } 2139 #endif ··· 3962 */ 3963 if (ret && ret != -ENOMEM && !(ret > 0 && !(ret & NVME_SC_DNR))) 3964 nvme_ns_remove(ns); 3965 } 3966 3967 static void nvme_validate_or_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)

+1 -2

drivers/nvme/host/multipath.c

··· 312 if (likely(ns)) { 313 bio->bi_disk = ns->disk; 314 bio->bi_opf |= REQ_NVME_MPATH; 315 - trace_block_bio_remap(bio->bi_disk->queue, bio, 316 - disk_devt(ns->head->disk), 317 bio->bi_iter.bi_sector); 318 ret = submit_bio_noacct(bio); 319 } else if (nvme_available_path(head)) {

··· 312 if (likely(ns)) { 313 bio->bi_disk = ns->disk; 314 bio->bi_opf |= REQ_NVME_MPATH; 315 + trace_block_bio_remap(bio, disk_devt(ns->head->disk), 316 bio->bi_iter.bi_sector); 317 ret = submit_bio_noacct(bio); 318 } else if (nvme_available_path(head)) {

+10 -10

drivers/nvme/target/admin-cmd.c

··· 89 if (!ns->bdev) 90 goto out; 91 92 - host_reads = part_stat_read(ns->bdev->bd_part, ios[READ]); 93 - data_units_read = DIV_ROUND_UP(part_stat_read(ns->bdev->bd_part, 94 - sectors[READ]), 1000); 95 - host_writes = part_stat_read(ns->bdev->bd_part, ios[WRITE]); 96 - data_units_written = DIV_ROUND_UP(part_stat_read(ns->bdev->bd_part, 97 - sectors[WRITE]), 1000); 98 99 put_unaligned_le64(host_reads, &slog->host_reads[0]); 100 put_unaligned_le64(data_units_read, &slog->data_units_read[0]); ··· 120 /* we don't have the right data for file backed ns */ 121 if (!ns->bdev) 122 continue; 123 - host_reads += part_stat_read(ns->bdev->bd_part, ios[READ]); 124 data_units_read += DIV_ROUND_UP( 125 - part_stat_read(ns->bdev->bd_part, sectors[READ]), 1000); 126 - host_writes += part_stat_read(ns->bdev->bd_part, ios[WRITE]); 127 data_units_written += DIV_ROUND_UP( 128 - part_stat_read(ns->bdev->bd_part, sectors[WRITE]), 1000); 129 } 130 131 put_unaligned_le64(host_reads, &slog->host_reads[0]);

··· 89 if (!ns->bdev) 90 goto out; 91 92 + host_reads = part_stat_read(ns->bdev, ios[READ]); 93 + data_units_read = 94 + DIV_ROUND_UP(part_stat_read(ns->bdev, sectors[READ]), 1000); 95 + host_writes = part_stat_read(ns->bdev, ios[WRITE]); 96 + data_units_written = 97 + DIV_ROUND_UP(part_stat_read(ns->bdev, sectors[WRITE]), 1000); 98 99 put_unaligned_le64(host_reads, &slog->host_reads[0]); 100 put_unaligned_le64(data_units_read, &slog->data_units_read[0]); ··· 120 /* we don't have the right data for file backed ns */ 121 if (!ns->bdev) 122 continue; 123 + host_reads += part_stat_read(ns->bdev, ios[READ]); 124 data_units_read += DIV_ROUND_UP( 125 + part_stat_read(ns->bdev, sectors[READ]), 1000); 126 + host_writes += part_stat_read(ns->bdev, ios[WRITE]); 127 data_units_written += DIV_ROUND_UP( 128 + part_stat_read(ns->bdev, sectors[WRITE]), 1000); 129 } 130 131 put_unaligned_le64(host_reads, &slog->host_reads[0]);

+10

drivers/nvme/target/loop.c

··· 211 (set == &ctrl->tag_set) ? hctx_idx + 1 : 0); 212 } 213 214 static int nvme_loop_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, 215 unsigned int hctx_idx) 216 { ··· 220 struct nvme_loop_queue *queue = &ctrl->queues[hctx_idx + 1]; 221 222 BUG_ON(hctx_idx >= ctrl->ctrl.queue_count); 223 224 hctx->driver_data = queue; 225 return 0;

··· 211 (set == &ctrl->tag_set) ? hctx_idx + 1 : 0); 212 } 213 214 + static struct lock_class_key loop_hctx_fq_lock_key; 215 + 216 static int nvme_loop_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, 217 unsigned int hctx_idx) 218 { ··· 218 struct nvme_loop_queue *queue = &ctrl->queues[hctx_idx + 1]; 219 220 BUG_ON(hctx_idx >= ctrl->ctrl.queue_count); 221 + 222 + /* 223 + * flush_end_io() can be called recursively for us, so use our own 224 + * lock class key for avoiding lockdep possible recursive locking, 225 + * then we can remove the dynamically allocated lock class for each 226 + * flush queue, that way may cause horrible boot delay. 227 + */ 228 + blk_mq_hctx_set_fq_lock_class(hctx, &loop_hctx_fq_lock_key); 229 230 hctx->driver_data = queue; 231 return 0;

+5 -4

drivers/s390/block/dasd.c

··· 430 { 431 struct gendisk *disk; 432 struct disk_part_iter piter; 433 - struct hd_struct *part; 434 435 device->state = DASD_STATE_ONLINE; 436 if (device->block) { ··· 443 disk = device->block->bdev->bd_disk; 444 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); 445 while ((part = disk_part_iter_next(&piter))) 446 - kobject_uevent(&part_to_dev(part)->kobj, KOBJ_CHANGE); 447 disk_part_iter_exit(&piter); 448 } 449 return 0; ··· 457 int rc; 458 struct gendisk *disk; 459 struct disk_part_iter piter; 460 - struct hd_struct *part; 461 462 if (device->discipline->online_to_ready) { 463 rc = device->discipline->online_to_ready(device); ··· 470 disk = device->block->bdev->bd_disk; 471 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); 472 while ((part = disk_part_iter_next(&piter))) 473 - kobject_uevent(&part_to_dev(part)->kobj, KOBJ_CHANGE); 474 disk_part_iter_exit(&piter); 475 } 476 return 0; ··· 3376 .ioctl = dasd_ioctl, 3377 .compat_ioctl = dasd_ioctl, 3378 .getgeo = dasd_getgeo, 3379 }; 3380 3381 /*******************************************************************************

··· 430 { 431 struct gendisk *disk; 432 struct disk_part_iter piter; 433 + struct block_device *part; 434 435 device->state = DASD_STATE_ONLINE; 436 if (device->block) { ··· 443 disk = device->block->bdev->bd_disk; 444 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); 445 while ((part = disk_part_iter_next(&piter))) 446 + kobject_uevent(bdev_kobj(part), KOBJ_CHANGE); 447 disk_part_iter_exit(&piter); 448 } 449 return 0; ··· 457 int rc; 458 struct gendisk *disk; 459 struct disk_part_iter piter; 460 + struct block_device *part; 461 462 if (device->discipline->online_to_ready) { 463 rc = device->discipline->online_to_ready(device); ··· 470 disk = device->block->bdev->bd_disk; 471 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); 472 while ((part = disk_part_iter_next(&piter))) 473 + kobject_uevent(bdev_kobj(part), KOBJ_CHANGE); 474 disk_part_iter_exit(&piter); 475 } 476 return 0; ··· 3376 .ioctl = dasd_ioctl, 3377 .compat_ioctl = dasd_ioctl, 3378 .getgeo = dasd_getgeo, 3379 + .set_read_only = dasd_set_read_only, 3380 }; 3381 3382 /*******************************************************************************

+2 -1

drivers/s390/block/dasd_int.h

··· 834 void dasd_destroy_partitions(struct dasd_block *); 835 836 /* externals in dasd_ioctl.c */ 837 - int dasd_ioctl(struct block_device *, fmode_t, unsigned int, unsigned long); 838 839 /* externals in dasd_proc.c */ 840 int dasd_proc_init(void);

··· 834 void dasd_destroy_partitions(struct dasd_block *); 835 836 /* externals in dasd_ioctl.c */ 837 + int dasd_ioctl(struct block_device *, fmode_t, unsigned int, unsigned long); 838 + int dasd_set_read_only(struct block_device *bdev, bool ro); 839 840 /* externals in dasd_proc.c */ 841 int dasd_proc_init(void);

+12 -24

drivers/s390/block/dasd_ioctl.c

··· 54 return -ENODEV; 55 56 dasd_enable_device(base); 57 - /* Formatting the dasd device can change the capacity. */ 58 - bd_set_nr_sectors(bdev, get_capacity(base->block->gdp)); 59 dasd_put_device(base); 60 return 0; 61 } ··· 86 * Set i_size to zero, since read, write, etc. check against this 87 * value. 88 */ 89 - bd_set_nr_sectors(bdev, 0); 90 dasd_put_device(base); 91 return 0; 92 } ··· 220 * enabling the device later. 221 */ 222 if (fdata->start_unit == 0) { 223 - struct block_device *bdev = bdget_disk(block->gdp, 0); 224 - bdev->bd_inode->i_blkbits = blksize_bits(fdata->blksize); 225 - bdput(bdev); 226 } 227 228 rc = base->discipline->format_device(base, fdata, 1); ··· 529 /* 530 * Set read only 531 */ 532 - static int 533 - dasd_ioctl_set_ro(struct block_device *bdev, void __user *argp) 534 { 535 struct dasd_device *base; 536 - int intval, rc; 537 538 - if (!capable(CAP_SYS_ADMIN)) 539 - return -EACCES; 540 if (bdev_is_partition(bdev)) 541 - // ro setting is not allowed for partitions 542 - return -EINVAL; 543 - if (get_user(intval, (int __user *)argp)) 544 - return -EFAULT; 545 base = dasd_device_from_gendisk(bdev->bd_disk); 546 if (!base) 547 return -ENODEV; 548 - if (!intval && test_bit(DASD_FLAG_DEVICE_RO, &base->flags)) { 549 - dasd_put_device(base); 550 - return -EROFS; 551 - } 552 - set_disk_ro(bdev->bd_disk, intval); 553 - rc = dasd_set_feature(base->cdev, DASD_FEATURE_READONLY, intval); 554 dasd_put_device(base); 555 return rc; 556 } ··· 623 break; 624 case BIODASDPRRST: 625 rc = dasd_ioctl_reset_profile(block); 626 - break; 627 - case BLKROSET: 628 - rc = dasd_ioctl_set_ro(bdev, argp); 629 break; 630 case DASDAPIVER: 631 rc = dasd_ioctl_api_version(argp);

··· 54 return -ENODEV; 55 56 dasd_enable_device(base); 57 dasd_put_device(base); 58 return 0; 59 } ··· 88 * Set i_size to zero, since read, write, etc. check against this 89 * value. 90 */ 91 + set_capacity(bdev->bd_disk, 0); 92 dasd_put_device(base); 93 return 0; 94 } ··· 222 * enabling the device later. 223 */ 224 if (fdata->start_unit == 0) { 225 + block->gdp->part0->bd_inode->i_blkbits = 226 + blksize_bits(fdata->blksize); 227 } 228 229 rc = base->discipline->format_device(base, fdata, 1); ··· 532 /* 533 * Set read only 534 */ 535 + int dasd_set_read_only(struct block_device *bdev, bool ro) 536 { 537 struct dasd_device *base; 538 + int rc; 539 540 + /* do not manipulate hardware state for partitions */ 541 if (bdev_is_partition(bdev)) 542 + return 0; 543 + 544 base = dasd_device_from_gendisk(bdev->bd_disk); 545 if (!base) 546 return -ENODEV; 547 + if (!ro && test_bit(DASD_FLAG_DEVICE_RO, &base->flags)) 548 + rc = -EROFS; 549 + else 550 + rc = dasd_set_feature(base->cdev, DASD_FEATURE_READONLY, ro); 551 dasd_put_device(base); 552 return rc; 553 } ··· 632 break; 633 case BIODASDPRRST: 634 rc = dasd_ioctl_reset_profile(block); 635 break; 636 case DASDAPIVER: 637 rc = dasd_ioctl_api_version(argp);

+1 -2

drivers/s390/scsi/zfcp_fsf.c

··· 2359 } 2360 } 2361 2362 - blk_add_driver_data(scsi->request->q, scsi->request, &blktrc, 2363 - sizeof(blktrc)); 2364 } 2365 2366 /**

··· 2359 } 2360 } 2361 2362 + blk_add_driver_data(scsi->request, &blktrc, sizeof(blktrc)); 2363 } 2364 2365 /**

+1 -1

drivers/scsi/scsicam.c

··· 32 */ 33 unsigned char *scsi_bios_ptable(struct block_device *dev) 34 { 35 - struct address_space *mapping = dev->bd_contains->bd_inode->i_mapping; 36 unsigned char *res = NULL; 37 struct page *page; 38

··· 32 */ 33 unsigned char *scsi_bios_ptable(struct block_device *dev) 34 { 35 + struct address_space *mapping = bdev_whole(dev)->bd_inode->i_mapping; 36 unsigned char *res = NULL; 37 struct page *page; 38

+8 -20

drivers/scsi/sd.c

··· 630 }; 631 632 /* 633 - * Dummy kobj_map->probe function. 634 - * The default ->probe function will call modprobe, which is 635 - * pointless as this module is already loaded. 636 */ 637 - static struct kobject *sd_default_probe(dev_t devt, int *partno, void *data) 638 { 639 - return NULL; 640 } 641 642 /* ··· 1748 static void sd_rescan(struct device *dev) 1749 { 1750 struct scsi_disk *sdkp = dev_get_drvdata(dev); 1751 - int ret; 1752 1753 - ret = sd_revalidate_disk(sdkp->disk); 1754 - revalidate_disk_size(sdkp->disk, ret == 0); 1755 } 1756 1757 static int sd_ioctl(struct block_device *bdev, fmode_t mode, ··· 3261 3262 sdkp->first_scan = 0; 3263 3264 - set_capacity_revalidate_and_notify(disk, 3265 - logical_to_sectors(sdp, sdkp->capacity), false); 3266 sd_config_write_same(sdkp); 3267 kfree(buffer); 3268 ··· 3271 * capacity to 0. 3272 */ 3273 if (sd_zbc_revalidate_zones(sdkp)) 3274 - set_capacity_revalidate_and_notify(disk, 0, false); 3275 3276 out: 3277 return 0; ··· 3523 3524 free_opal_dev(sdkp->opal_dev); 3525 3526 - blk_register_region(devt, SD_MINORS, NULL, 3527 - sd_default_probe, NULL, NULL); 3528 - 3529 mutex_lock(&sd_ref_mutex); 3530 dev_set_drvdata(dev, NULL); 3531 put_device(&sdkp->dev); ··· 3712 SCSI_LOG_HLQUEUE(3, printk("init_sd: sd driver entry point\n")); 3713 3714 for (i = 0; i < SD_MAJORS; i++) { 3715 - if (register_blkdev(sd_major(i), "sd") != 0) 3716 continue; 3717 majors++; 3718 - blk_register_region(sd_major(i), SD_MINORS, NULL, 3719 - sd_default_probe, NULL, NULL); 3720 } 3721 3722 if (!majors) ··· 3787 3788 class_unregister(&sd_disk_class); 3789 3790 - for (i = 0; i < SD_MAJORS; i++) { 3791 - blk_unregister_region(sd_major(i), SD_MINORS); 3792 unregister_blkdev(sd_major(i), "sd"); 3793 - } 3794 } 3795 3796 module_init(init_sd);

··· 630 }; 631 632 /* 633 + * Don't request a new module, as that could deadlock in multipath 634 + * environment. 635 */ 636 + static void sd_default_probe(dev_t devt) 637 { 638 } 639 640 /* ··· 1750 static void sd_rescan(struct device *dev) 1751 { 1752 struct scsi_disk *sdkp = dev_get_drvdata(dev); 1753 1754 + sd_revalidate_disk(sdkp->disk); 1755 } 1756 1757 static int sd_ioctl(struct block_device *bdev, fmode_t mode, ··· 3265 3266 sdkp->first_scan = 0; 3267 3268 + set_capacity_and_notify(disk, logical_to_sectors(sdp, sdkp->capacity)); 3269 sd_config_write_same(sdkp); 3270 kfree(buffer); 3271 ··· 3276 * capacity to 0. 3277 */ 3278 if (sd_zbc_revalidate_zones(sdkp)) 3279 + set_capacity_and_notify(disk, 0); 3280 3281 out: 3282 return 0; ··· 3528 3529 free_opal_dev(sdkp->opal_dev); 3530 3531 mutex_lock(&sd_ref_mutex); 3532 dev_set_drvdata(dev, NULL); 3533 put_device(&sdkp->dev); ··· 3720 SCSI_LOG_HLQUEUE(3, printk("init_sd: sd driver entry point\n")); 3721 3722 for (i = 0; i < SD_MAJORS; i++) { 3723 + if (__register_blkdev(sd_major(i), "sd", sd_default_probe)) 3724 continue; 3725 majors++; 3726 } 3727 3728 if (!majors) ··· 3797 3798 class_unregister(&sd_disk_class); 3799 3800 + for (i = 0; i < SD_MAJORS; i++) 3801 unregister_blkdev(sd_major(i), "sd"); 3802 } 3803 3804 module_init(init_sd);

+3 -3

drivers/target/target_core_file.c

··· 133 */ 134 inode = file->f_mapping->host; 135 if (S_ISBLK(inode->i_mode)) { 136 - struct request_queue *q = bdev_get_queue(inode->i_bdev); 137 unsigned long long dev_size; 138 139 - fd_dev->fd_block_size = bdev_logical_block_size(inode->i_bdev); 140 /* 141 * Determine the number of bytes from i_size_read() minus 142 * one (1) logical sector from underlying struct block_device ··· 559 560 if (S_ISBLK(inode->i_mode)) { 561 /* The backend is block device, use discard */ 562 - struct block_device *bdev = inode->i_bdev; 563 struct se_device *dev = cmd->se_dev; 564 565 ret = blkdev_issue_discard(bdev,

··· 133 */ 134 inode = file->f_mapping->host; 135 if (S_ISBLK(inode->i_mode)) { 136 + struct request_queue *q = bdev_get_queue(I_BDEV(inode)); 137 unsigned long long dev_size; 138 139 + fd_dev->fd_block_size = bdev_logical_block_size(I_BDEV(inode)); 140 /* 141 * Determine the number of bytes from i_size_read() minus 142 * one (1) logical sector from underlying struct block_device ··· 559 560 if (S_ISBLK(inode->i_mode)) { 561 /* The backend is block device, use discard */ 562 + struct block_device *bdev = I_BDEV(inode); 563 struct se_device *dev = cmd->se_dev; 564 565 ret = blkdev_issue_discard(bdev,

+2 -3

drivers/target/target_core_pscsi.c

··· 1029 { 1030 struct pscsi_dev_virt *pdv = PSCSI_DEV(dev); 1031 1032 - if (pdv->pdv_bd && pdv->pdv_bd->bd_part) 1033 - return pdv->pdv_bd->bd_part->nr_sects; 1034 - 1035 return 0; 1036 } 1037

··· 1029 { 1030 struct pscsi_dev_virt *pdv = PSCSI_DEV(dev); 1031 1032 + if (pdv->pdv_bd) 1033 + return bdev_nr_sectors(pdv->pdv_bd); 1034 return 0; 1035 } 1036

+4 -4

drivers/usb/gadget/function/storage_common.c

··· 204 if (!(filp->f_mode & FMODE_WRITE)) 205 ro = 1; 206 207 - inode = file_inode(filp); 208 if ((!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))) { 209 LINFO(curlun, "invalid file type: %s\n", filename); 210 goto out; ··· 221 if (!(filp->f_mode & FMODE_CAN_WRITE)) 222 ro = 1; 223 224 - size = i_size_read(inode->i_mapping->host); 225 if (size < 0) { 226 LINFO(curlun, "unable to find file size: %s\n", filename); 227 rc = (int) size; ··· 231 if (curlun->cdrom) { 232 blksize = 2048; 233 blkbits = 11; 234 - } else if (inode->i_bdev) { 235 - blksize = bdev_logical_block_size(inode->i_bdev); 236 blkbits = blksize_bits(blksize); 237 } else { 238 blksize = 512;

··· 204 if (!(filp->f_mode & FMODE_WRITE)) 205 ro = 1; 206 207 + inode = filp->f_mapping->host; 208 if ((!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))) { 209 LINFO(curlun, "invalid file type: %s\n", filename); 210 goto out; ··· 221 if (!(filp->f_mode & FMODE_CAN_WRITE)) 222 ro = 1; 223 224 + size = i_size_read(inode); 225 if (size < 0) { 226 LINFO(curlun, "unable to find file size: %s\n", filename); 227 rc = (int) size; ··· 231 if (curlun->cdrom) { 232 blksize = 2048; 233 blkbits = 11; 234 + } else if (S_ISBLK(inode->i_mode)) { 235 + blksize = bdev_logical_block_size(I_BDEV(inode)); 236 blkbits = blksize_bits(blksize); 237 } else { 238 blksize = 512;

+287 -512

fs/block_dev.c

··· 32 #include <linux/cleancache.h> 33 #include <linux/task_io_accounting_ops.h> 34 #include <linux/falloc.h> 35 #include <linux/uaccess.h> 36 #include <linux/suspend.h> 37 #include "internal.h" ··· 111 int truncate_bdev_range(struct block_device *bdev, fmode_t mode, 112 loff_t lstart, loff_t lend) 113 { 114 - struct block_device *claimed_bdev = NULL; 115 - int err; 116 - 117 /* 118 * If we don't hold exclusive handle for the device, upgrade to it 119 * while we discard the buffer cache to avoid discarding buffers 120 * under live filesystem. 121 */ 122 if (!(mode & FMODE_EXCL)) { 123 - claimed_bdev = bdev->bd_contains; 124 - err = bd_prepare_to_claim(bdev, claimed_bdev, 125 - truncate_bdev_range); 126 if (err) 127 return err; 128 } 129 truncate_inode_pages_range(bdev->bd_inode->i_mapping, lstart, lend); 130 - if (claimed_bdev) 131 - bd_abort_claiming(bdev, claimed_bdev, truncate_bdev_range); 132 return 0; 133 } 134 EXPORT_SYMBOL(truncate_bdev_range); ··· 545 * count down in thaw_bdev(). When it becomes 0, thaw_bdev() will unfreeze 546 * actually. 547 */ 548 - struct super_block *freeze_bdev(struct block_device *bdev) 549 { 550 struct super_block *sb; 551 int error = 0; 552 553 mutex_lock(&bdev->bd_fsfreeze_mutex); 554 - if (++bdev->bd_fsfreeze_count > 1) { 555 - /* 556 - * We don't even need to grab a reference - the first call 557 - * to freeze_bdev grab an active reference and only the last 558 - * thaw_bdev drops it. 559 - */ 560 - sb = get_super(bdev); 561 - if (sb) 562 - drop_super(sb); 563 - mutex_unlock(&bdev->bd_fsfreeze_mutex); 564 - return sb; 565 - } 566 567 sb = get_active_super(bdev); 568 if (!sb) 569 - goto out; 570 if (sb->s_op->freeze_super) 571 error = sb->s_op->freeze_super(sb); 572 else 573 error = freeze_super(sb); 574 - if (error) { 575 - deactivate_super(sb); 576 - bdev->bd_fsfreeze_count--; 577 - mutex_unlock(&bdev->bd_fsfreeze_mutex); 578 - return ERR_PTR(error); 579 - } 580 deactivate_super(sb); 581 - out: 582 sync_blockdev(bdev); 583 mutex_unlock(&bdev->bd_fsfreeze_mutex); 584 - return sb; /* thaw_bdev releases s->s_umount */ 585 } 586 EXPORT_SYMBOL(freeze_bdev); 587 588 /** 589 * thaw_bdev -- unlock filesystem 590 * @bdev: blockdevice to unlock 591 - * @sb: associated superblock 592 * 593 * Unlocks the filesystem and marks it writeable again after freeze_bdev(). 594 */ 595 - int thaw_bdev(struct block_device *bdev, struct super_block *sb) 596 { 597 int error = -EINVAL; 598 599 mutex_lock(&bdev->bd_fsfreeze_mutex); ··· 596 if (--bdev->bd_fsfreeze_count > 0) 597 goto out; 598 599 if (!sb) 600 goto out; 601 ··· 782 783 static void bdev_free_inode(struct inode *inode) 784 { 785 kmem_cache_free(bdev_cachep, BDEV_I(inode)); 786 } 787 788 - static void init_once(void *foo) 789 { 790 - struct bdev_inode *ei = (struct bdev_inode *) foo; 791 - struct block_device *bdev = &ei->bdev; 792 793 - memset(bdev, 0, sizeof(*bdev)); 794 - mutex_init(&bdev->bd_mutex); 795 - #ifdef CONFIG_SYSFS 796 - INIT_LIST_HEAD(&bdev->bd_holder_disks); 797 - #endif 798 - bdev->bd_bdi = &noop_backing_dev_info; 799 inode_init_once(&ei->vfs_inode); 800 - /* Initialize mutex for freeze. */ 801 - mutex_init(&bdev->bd_fsfreeze_mutex); 802 } 803 804 static void bdev_evict_inode(struct inode *inode) ··· 856 blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */ 857 } 858 859 - /* 860 - * Most likely _very_ bad one - but then it's hardly critical for small 861 - * /dev and can be fixed when somebody will need really large one. 862 - * Keep in mind that it will be fed through icache hash function too. 863 - */ 864 - static inline unsigned long hash(dev_t dev) 865 - { 866 - return MAJOR(dev)+MINOR(dev); 867 - } 868 - 869 - static int bdev_test(struct inode *inode, void *data) 870 - { 871 - return BDEV_I(inode)->bdev.bd_dev == *(dev_t *)data; 872 - } 873 - 874 - static int bdev_set(struct inode *inode, void *data) 875 - { 876 - BDEV_I(inode)->bdev.bd_dev = *(dev_t *)data; 877 - return 0; 878 - } 879 - 880 - static struct block_device *bdget(dev_t dev) 881 { 882 struct block_device *bdev; 883 struct inode *inode; 884 885 - inode = iget5_locked(blockdev_superblock, hash(dev), 886 - bdev_test, bdev_set, &dev); 887 - 888 if (!inode) 889 return NULL; 890 891 - bdev = &BDEV_I(inode)->bdev; 892 - 893 - if (inode->i_state & I_NEW) { 894 - spin_lock_init(&bdev->bd_size_lock); 895 - bdev->bd_contains = NULL; 896 - bdev->bd_super = NULL; 897 - bdev->bd_inode = inode; 898 - bdev->bd_part_count = 0; 899 - inode->i_mode = S_IFBLK; 900 - inode->i_rdev = dev; 901 - inode->i_bdev = bdev; 902 - inode->i_data.a_ops = &def_blk_aops; 903 - mapping_set_gfp_mask(&inode->i_data, GFP_USER); 904 - unlock_new_inode(inode); 905 } 906 return bdev; 907 } 908 909 /** 910 * bdgrab -- Grab a reference to an already referenced block device 911 * @bdev: Block device to grab a reference to. 912 */ 913 struct block_device *bdgrab(struct block_device *bdev) 914 { 915 - ihold(bdev->bd_inode); 916 return bdev; 917 } 918 EXPORT_SYMBOL(bdgrab); 919 - 920 - struct block_device *bdget_part(struct hd_struct *part) 921 - { 922 - return bdget(part_devt(part)); 923 - } 924 925 long nr_blockdev_pages(void) 926 { ··· 939 { 940 iput(bdev->bd_inode); 941 } 942 - 943 EXPORT_SYMBOL(bdput); 944 945 - static struct block_device *bd_acquire(struct inode *inode) 946 - { 947 - struct block_device *bdev; 948 - 949 - spin_lock(&bdev_lock); 950 - bdev = inode->i_bdev; 951 - if (bdev && !inode_unhashed(bdev->bd_inode)) { 952 - bdgrab(bdev); 953 - spin_unlock(&bdev_lock); 954 - return bdev; 955 - } 956 - spin_unlock(&bdev_lock); 957 - 958 - /* 959 - * i_bdev references block device inode that was already shut down 960 - * (corresponding device got removed). Remove the reference and look 961 - * up block device inode again just in case new device got 962 - * reestablished under the same device number. 963 - */ 964 - if (bdev) 965 - bd_forget(inode); 966 - 967 - bdev = bdget(inode->i_rdev); 968 - if (bdev) { 969 - spin_lock(&bdev_lock); 970 - if (!inode->i_bdev) { 971 - /* 972 - * We take an additional reference to bd_inode, 973 - * and it's released in clear_inode() of inode. 974 - * So, we can access it via ->i_mapping always 975 - * without igrab(). 976 - */ 977 - bdgrab(bdev); 978 - inode->i_bdev = bdev; 979 - inode->i_mapping = bdev->bd_inode->i_mapping; 980 - } 981 - spin_unlock(&bdev_lock); 982 - } 983 - return bdev; 984 - } 985 - 986 - /* Call when you free inode */ 987 - 988 - void bd_forget(struct inode *inode) 989 - { 990 - struct block_device *bdev = NULL; 991 - 992 - spin_lock(&bdev_lock); 993 - if (!sb_is_blkdev_sb(inode->i_sb)) 994 - bdev = inode->i_bdev; 995 - inode->i_bdev = NULL; 996 - inode->i_mapping = &inode->i_data; 997 - spin_unlock(&bdev_lock); 998 - 999 - if (bdev) 1000 - bdput(bdev); 1001 - } 1002 - 1003 /** 1004 * bd_may_claim - test whether a block device can be claimed 1005 * @bdev: block device of interest ··· 976 /** 977 * bd_prepare_to_claim - claim a block device 978 * @bdev: block device of interest 979 - * @whole: the whole device containing @bdev, may equal @bdev 980 * @holder: holder trying to claim @bdev 981 * 982 * Claim @bdev. This function fails if @bdev is already claimed by another ··· 985 * RETURNS: 986 * 0 if @bdev can be claimed, -EBUSY otherwise. 987 */ 988 - int bd_prepare_to_claim(struct block_device *bdev, struct block_device *whole, 989 - void *holder) 990 { 991 retry: 992 spin_lock(&bdev_lock); 993 /* if someone else claimed, fail */ ··· 1018 } 1019 EXPORT_SYMBOL_GPL(bd_prepare_to_claim); /* only for the loop driver */ 1020 1021 - static struct gendisk *bdev_get_gendisk(struct block_device *bdev, int *partno) 1022 - { 1023 - struct gendisk *disk = get_gendisk(bdev->bd_dev, partno); 1024 - 1025 - if (!disk) 1026 - return NULL; 1027 - /* 1028 - * Now that we hold gendisk reference we make sure bdev we looked up is 1029 - * not stale. If it is, it means device got removed and created before 1030 - * we looked up gendisk and we fail open in such case. Associating 1031 - * unhashed bdev with newly created gendisk could lead to two bdevs 1032 - * (and thus two independent caches) being associated with one device 1033 - * which is bad. 1034 - */ 1035 - if (inode_unhashed(bdev->bd_inode)) { 1036 - put_disk_and_module(disk); 1037 - return NULL; 1038 - } 1039 - return disk; 1040 - } 1041 - 1042 static void bd_clear_claiming(struct block_device *whole, void *holder) 1043 { 1044 lockdep_assert_held(&bdev_lock); ··· 1030 /** 1031 * bd_finish_claiming - finish claiming of a block device 1032 * @bdev: block device of interest 1033 - * @whole: whole block device 1034 * @holder: holder that has claimed @bdev 1035 * 1036 * Finish exclusive open of a block device. Mark the device as exlusively 1037 * open by the holder and wake up all waiters for exclusive open to finish. 1038 */ 1039 - static void bd_finish_claiming(struct block_device *bdev, 1040 - struct block_device *whole, void *holder) 1041 { 1042 spin_lock(&bdev_lock); 1043 BUG_ON(!bd_may_claim(bdev, whole, holder)); 1044 /* ··· 1063 * also used when exclusive open is not actually desired and we just needed 1064 * to block other exclusive openers for a while. 1065 */ 1066 - void bd_abort_claiming(struct block_device *bdev, struct block_device *whole, 1067 - void *holder) 1068 { 1069 spin_lock(&bdev_lock); 1070 - bd_clear_claiming(whole, holder); 1071 spin_unlock(&bdev_lock); 1072 } 1073 EXPORT_SYMBOL(bd_abort_claiming); ··· 1137 WARN_ON_ONCE(!bdev->bd_holder); 1138 1139 /* FIXME: remove the following once add_disk() handles errors */ 1140 - if (WARN_ON(!disk->slave_dir || !bdev->bd_part->holder_dir)) 1141 goto out_unlock; 1142 1143 holder = bd_find_holder_disk(bdev, disk); ··· 1156 holder->disk = disk; 1157 holder->refcnt = 1; 1158 1159 - ret = add_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); 1160 if (ret) 1161 goto out_free; 1162 1163 - ret = add_symlink(bdev->bd_part->holder_dir, &disk_to_dev(disk)->kobj); 1164 if (ret) 1165 goto out_del; 1166 /* 1167 * bdev could be deleted beneath us which would implicitly destroy 1168 * the holder directory. Hold on to it. 1169 */ 1170 - kobject_get(bdev->bd_part->holder_dir); 1171 1172 list_add(&holder->list, &bdev->bd_holder_disks); 1173 goto out_unlock; 1174 1175 out_del: 1176 - del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); 1177 out_free: 1178 kfree(holder); 1179 out_unlock: ··· 1201 holder = bd_find_holder_disk(bdev, disk); 1202 1203 if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) { 1204 - del_symlink(disk->slave_dir, &part_to_dev(bdev->bd_part)->kobj); 1205 - del_symlink(bdev->bd_part->holder_dir, 1206 - &disk_to_dev(disk)->kobj); 1207 - kobject_put(bdev->bd_part->holder_dir); 1208 list_del_init(&holder->list); 1209 kfree(holder); 1210 } ··· 1212 } 1213 EXPORT_SYMBOL_GPL(bd_unlink_disk_holder); 1214 #endif 1215 - 1216 - /** 1217 - * check_disk_size_change - checks for disk size change and adjusts bdev size. 1218 - * @disk: struct gendisk to check 1219 - * @bdev: struct bdev to adjust. 1220 - * @verbose: if %true log a message about a size change if there is any 1221 - * 1222 - * This routine checks to see if the bdev size does not match the disk size 1223 - * and adjusts it if it differs. When shrinking the bdev size, its all caches 1224 - * are freed. 1225 - */ 1226 - static void check_disk_size_change(struct gendisk *disk, 1227 - struct block_device *bdev, bool verbose) 1228 - { 1229 - loff_t disk_size, bdev_size; 1230 - 1231 - spin_lock(&bdev->bd_size_lock); 1232 - disk_size = (loff_t)get_capacity(disk) << 9; 1233 - bdev_size = i_size_read(bdev->bd_inode); 1234 - if (disk_size != bdev_size) { 1235 - if (verbose) { 1236 - printk(KERN_INFO 1237 - "%s: detected capacity change from %lld to %lld\n", 1238 - disk->disk_name, bdev_size, disk_size); 1239 - } 1240 - i_size_write(bdev->bd_inode, disk_size); 1241 - } 1242 - spin_unlock(&bdev->bd_size_lock); 1243 - 1244 - if (bdev_size > disk_size) { 1245 - if (__invalidate_device(bdev, false)) 1246 - pr_warn("VFS: busy inodes on resized disk %s\n", 1247 - disk->disk_name); 1248 - } 1249 - } 1250 - 1251 - /** 1252 - * revalidate_disk_size - checks for disk size change and adjusts bdev size. 1253 - * @disk: struct gendisk to check 1254 - * @verbose: if %true log a message about a size change if there is any 1255 - * 1256 - * This routine checks to see if the bdev size does not match the disk size 1257 - * and adjusts it if it differs. When shrinking the bdev size, its all caches 1258 - * are freed. 1259 - */ 1260 - void revalidate_disk_size(struct gendisk *disk, bool verbose) 1261 - { 1262 - struct block_device *bdev; 1263 - 1264 - /* 1265 - * Hidden disks don't have associated bdev so there's no point in 1266 - * revalidating them. 1267 - */ 1268 - if (disk->flags & GENHD_FL_HIDDEN) 1269 - return; 1270 - 1271 - bdev = bdget_disk(disk, 0); 1272 - if (bdev) { 1273 - check_disk_size_change(disk, bdev, verbose); 1274 - bdput(bdev); 1275 - } 1276 - } 1277 - EXPORT_SYMBOL(revalidate_disk_size); 1278 - 1279 - void bd_set_nr_sectors(struct block_device *bdev, sector_t sectors) 1280 - { 1281 - spin_lock(&bdev->bd_size_lock); 1282 - i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT); 1283 - spin_unlock(&bdev->bd_size_lock); 1284 - } 1285 - EXPORT_SYMBOL(bd_set_nr_sectors); 1286 1287 static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); 1288 ··· 1246 disk->fops->revalidate_disk(disk); 1247 } 1248 1249 - check_disk_size_change(disk, bdev, !invalidate); 1250 - 1251 if (get_capacity(disk)) { 1252 ret = blk_add_partitions(disk, bdev); 1253 if (ret == -EAGAIN) ··· 1272 * mutex_lock(part->bd_mutex) 1273 * mutex_lock_nested(whole->bd_mutex, 1) 1274 */ 1275 - 1276 - static int __blkdev_get(struct block_device *bdev, fmode_t mode, void *holder, 1277 - int for_part) 1278 { 1279 - struct block_device *whole = NULL, *claiming = NULL; 1280 - struct gendisk *disk; 1281 - int ret; 1282 - int partno; 1283 - bool first_open = false, unblock_events = true, need_restart; 1284 1285 - restart: 1286 - need_restart = false; 1287 - ret = -ENXIO; 1288 - disk = bdev_get_gendisk(bdev, &partno); 1289 - if (!disk) 1290 - goto out; 1291 - 1292 - if (partno) { 1293 - whole = bdget_disk(disk, 0); 1294 - if (!whole) { 1295 - ret = -ENOMEM; 1296 - goto out_put_disk; 1297 - } 1298 - } 1299 - 1300 - if (!for_part && (mode & FMODE_EXCL)) { 1301 - WARN_ON_ONCE(!holder); 1302 - if (whole) 1303 - claiming = whole; 1304 - else 1305 - claiming = bdev; 1306 - ret = bd_prepare_to_claim(bdev, claiming, holder); 1307 - if (ret) 1308 - goto out_put_whole; 1309 - } 1310 - 1311 - disk_block_events(disk); 1312 - mutex_lock_nested(&bdev->bd_mutex, for_part); 1313 if (!bdev->bd_openers) { 1314 - first_open = true; 1315 - bdev->bd_disk = disk; 1316 - bdev->bd_contains = bdev; 1317 - bdev->bd_partno = partno; 1318 - 1319 - if (!partno) { 1320 - ret = -ENXIO; 1321 - bdev->bd_part = disk_get_part(disk, partno); 1322 - if (!bdev->bd_part) 1323 - goto out_clear; 1324 - 1325 ret = 0; 1326 - if (disk->fops->open) { 1327 ret = disk->fops->open(bdev, mode); 1328 - /* 1329 - * If we lost a race with 'disk' being deleted, 1330 - * try again. See md.c 1331 - */ 1332 - if (ret == -ERESTARTSYS) 1333 - need_restart = true; 1334 - } 1335 1336 - if (!ret) { 1337 - bd_set_nr_sectors(bdev, get_capacity(disk)); 1338 set_init_blocksize(bdev); 1339 - } 1340 1341 /* 1342 * If the device is invalidated, rescan partition ··· 1297 bdev_disk_changed(bdev, ret == -ENOMEDIUM); 1298 1299 if (ret) 1300 - goto out_clear; 1301 } else { 1302 - BUG_ON(for_part); 1303 - ret = __blkdev_get(whole, mode, NULL, 1); 1304 - if (ret) 1305 - goto out_clear; 1306 - bdev->bd_contains = bdgrab(whole); 1307 - bdev->bd_part = disk_get_part(disk, partno); 1308 - if (!(disk->flags & GENHD_FL_UP) || 1309 - !bdev->bd_part || !bdev->bd_part->nr_sects) { 1310 - ret = -ENXIO; 1311 - goto out_clear; 1312 } 1313 - bd_set_nr_sectors(bdev, bdev->bd_part->nr_sects); 1314 set_init_blocksize(bdev); 1315 } 1316 1317 if (bdev->bd_bdi == &noop_backing_dev_info) 1318 bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info); 1319 } else { 1320 - if (bdev->bd_contains == bdev) { 1321 - ret = 0; 1322 if (bdev->bd_disk->fops->open) 1323 ret = bdev->bd_disk->fops->open(bdev, mode); 1324 /* the same as first opener case, read comment there */ ··· 1331 (!ret || ret == -ENOMEDIUM)) 1332 bdev_disk_changed(bdev, ret == -ENOMEDIUM); 1333 if (ret) 1334 - goto out_unlock_bdev; 1335 } 1336 } 1337 bdev->bd_openers++; 1338 - if (for_part) 1339 - bdev->bd_part_count++; 1340 - if (claiming) 1341 - bd_finish_claiming(bdev, claiming, holder); 1342 - 1343 - /* 1344 - * Block event polling for write claims if requested. Any write holder 1345 - * makes the write_holder state stick until all are released. This is 1346 - * good enough and tracking individual writeable reference is too 1347 - * fragile given the way @mode is used in blkdev_get/put(). 1348 - */ 1349 - if (claiming && (mode & FMODE_WRITE) && !bdev->bd_write_holder && 1350 - (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) { 1351 - bdev->bd_write_holder = true; 1352 - unblock_events = false; 1353 - } 1354 - mutex_unlock(&bdev->bd_mutex); 1355 - 1356 - if (unblock_events) 1357 - disk_unblock_events(disk); 1358 - 1359 - /* only one opener holds refs to the module and disk */ 1360 - if (!first_open) 1361 - put_disk_and_module(disk); 1362 - if (whole) 1363 - bdput(whole); 1364 return 0; 1365 - 1366 - out_clear: 1367 - disk_put_part(bdev->bd_part); 1368 - bdev->bd_disk = NULL; 1369 - bdev->bd_part = NULL; 1370 - if (bdev != bdev->bd_contains) 1371 - __blkdev_put(bdev->bd_contains, mode, 1); 1372 - bdev->bd_contains = NULL; 1373 - out_unlock_bdev: 1374 - if (claiming) 1375 - bd_abort_claiming(bdev, claiming, holder); 1376 - mutex_unlock(&bdev->bd_mutex); 1377 - disk_unblock_events(disk); 1378 - out_put_whole: 1379 - if (whole) 1380 - bdput(whole); 1381 - out_put_disk: 1382 - put_disk_and_module(disk); 1383 - if (need_restart) 1384 - goto restart; 1385 - out: 1386 - return ret; 1387 } 1388 1389 - /** 1390 - * blkdev_get - open a block device 1391 - * @bdev: block_device to open 1392 - * @mode: FMODE_* mask 1393 - * @holder: exclusive holder identifier 1394 - * 1395 - * Open @bdev with @mode. If @mode includes %FMODE_EXCL, @bdev is 1396 - * open with exclusive access. Specifying %FMODE_EXCL with %NULL 1397 - * @holder is invalid. Exclusive opens may nest for the same @holder. 1398 - * 1399 - * On success, the reference count of @bdev is unchanged. On failure, 1400 - * @bdev is put. 1401 - * 1402 - * CONTEXT: 1403 - * Might sleep. 1404 - * 1405 - * RETURNS: 1406 - * 0 on success, -errno on failure. 1407 - */ 1408 - static int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) 1409 - { 1410 - int ret, perm = 0; 1411 - 1412 - if (mode & FMODE_READ) 1413 - perm |= MAY_READ; 1414 - if (mode & FMODE_WRITE) 1415 - perm |= MAY_WRITE; 1416 - ret = devcgroup_inode_permission(bdev->bd_inode, perm); 1417 - if (ret) 1418 - goto bdput; 1419 - 1420 - ret =__blkdev_get(bdev, mode, holder, 0); 1421 - if (ret) 1422 - goto bdput; 1423 - return 0; 1424 - 1425 - bdput: 1426 - bdput(bdev); 1427 - return ret; 1428 - } 1429 - 1430 - /** 1431 - * blkdev_get_by_path - open a block device by name 1432 - * @path: path to the block device to open 1433 - * @mode: FMODE_* mask 1434 - * @holder: exclusive holder identifier 1435 - * 1436 - * Open the blockdevice described by the device file at @path. @mode 1437 - * and @holder are identical to blkdev_get(). 1438 - * 1439 - * On success, the returned block_device has reference count of one. 1440 - * 1441 - * CONTEXT: 1442 - * Might sleep. 1443 - * 1444 - * RETURNS: 1445 - * Pointer to block_device on success, ERR_PTR(-errno) on failure. 1446 - */ 1447 - struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, 1448 - void *holder) 1449 { 1450 struct block_device *bdev; 1451 - int err; 1452 1453 - bdev = lookup_bdev(path); 1454 - if (IS_ERR(bdev)) 1455 - return bdev; 1456 1457 - err = blkdev_get(bdev, mode, holder); 1458 - if (err) 1459 - return ERR_PTR(err); 1460 - 1461 - if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) { 1462 - blkdev_put(bdev, mode); 1463 - return ERR_PTR(-EACCES); 1464 } 1465 1466 return bdev; 1467 } 1468 - EXPORT_SYMBOL(blkdev_get_by_path); 1469 1470 /** 1471 * blkdev_get_by_dev - open a block device by device number ··· 1386 * @mode: FMODE_* mask 1387 * @holder: exclusive holder identifier 1388 * 1389 - * Open the blockdevice described by device number @dev. @mode and 1390 - * @holder are identical to blkdev_get(). 1391 * 1392 - * Use it ONLY if you really do not have anything better - i.e. when 1393 - * you are behind a truly sucky interface and all you are given is a 1394 - * device number. _Never_ to be used for internal purposes. If you 1395 - * ever need it - reconsider your API. 1396 - * 1397 - * On success, the returned block_device has reference count of one. 1398 * 1399 * CONTEXT: 1400 * Might sleep. 1401 * 1402 * RETURNS: 1403 - * Pointer to block_device on success, ERR_PTR(-errno) on failure. 1404 */ 1405 struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder) 1406 { 1407 struct block_device *bdev; 1408 - int err; 1409 1410 - bdev = bdget(dev); 1411 if (!bdev) 1412 - return ERR_PTR(-ENOMEM); 1413 1414 - err = blkdev_get(bdev, mode, holder); 1415 - if (err) 1416 - return ERR_PTR(err); 1417 1418 return bdev; 1419 } 1420 - EXPORT_SYMBOL(blkdev_get_by_dev); 1421 1422 static int blkdev_open(struct inode * inode, struct file * filp) 1423 { ··· 1530 if ((filp->f_flags & O_ACCMODE) == 3) 1531 filp->f_mode |= FMODE_WRITE_IOCTL; 1532 1533 - bdev = bd_acquire(inode); 1534 - if (bdev == NULL) 1535 - return -ENOMEM; 1536 - 1537 filp->f_mapping = bdev->bd_inode->i_mapping; 1538 filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping); 1539 - 1540 - return blkdev_get(bdev, filp->f_mode, filp); 1541 } 1542 1543 static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) ··· 1561 WARN_ON_ONCE(bdev->bd_holders); 1562 sync_blockdev(bdev); 1563 kill_bdev(bdev); 1564 - 1565 bdev_write_inode(bdev); 1566 } 1567 - if (bdev->bd_contains == bdev) { 1568 - if (disk->fops->release) 1569 - disk->fops->release(disk, mode); 1570 - } 1571 - if (!bdev->bd_openers) { 1572 - disk_put_part(bdev->bd_part); 1573 - bdev->bd_part = NULL; 1574 - bdev->bd_disk = NULL; 1575 - if (bdev != bdev->bd_contains) 1576 - victim = bdev->bd_contains; 1577 - bdev->bd_contains = NULL; 1578 1579 - put_disk_and_module(disk); 1580 - } 1581 mutex_unlock(&bdev->bd_mutex); 1582 - bdput(bdev); 1583 - if (victim) 1584 __blkdev_put(victim, mode, 1); 1585 } 1586 1587 void blkdev_put(struct block_device *bdev, fmode_t mode) 1588 { 1589 mutex_lock(&bdev->bd_mutex); 1590 1591 if (mode & FMODE_EXCL) { 1592 bool bdev_free; 1593 1594 /* ··· 1593 spin_lock(&bdev_lock); 1594 1595 WARN_ON_ONCE(--bdev->bd_holders < 0); 1596 - WARN_ON_ONCE(--bdev->bd_contains->bd_holders < 0); 1597 1598 - /* bd_contains might point to self, check in a separate step */ 1599 if ((bdev_free = !bdev->bd_holders)) 1600 bdev->bd_holder = NULL; 1601 - if (!bdev->bd_contains->bd_holders) 1602 - bdev->bd_contains->bd_holder = NULL; 1603 1604 spin_unlock(&bdev_lock); 1605 ··· 1607 * unblock evpoll if it was a write holder. 1608 */ 1609 if (bdev_free && bdev->bd_write_holder) { 1610 - disk_unblock_events(bdev->bd_disk); 1611 bdev->bd_write_holder = false; 1612 } 1613 } ··· 1617 * event. This is to ensure detection of media removal commanded 1618 * from userland - e.g. eject(1). 1619 */ 1620 - disk_flush_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE); 1621 - 1622 mutex_unlock(&bdev->bd_mutex); 1623 1624 __blkdev_put(bdev, mode, 0); 1625 } 1626 EXPORT_SYMBOL(blkdev_put); 1627 ··· 1834 * namespace if possible and return it. Return ERR_PTR(error) 1835 * otherwise. 1836 */ 1837 - struct block_device *lookup_bdev(const char *pathname) 1838 { 1839 - struct block_device *bdev; 1840 struct inode *inode; 1841 struct path path; 1842 int error; 1843 1844 if (!pathname || !*pathname) 1845 - return ERR_PTR(-EINVAL); 1846 1847 error = kern_path(pathname, LOOKUP_FOLLOW, &path); 1848 if (error) 1849 - return ERR_PTR(error); 1850 1851 inode = d_backing_inode(path.dentry); 1852 error = -ENOTBLK; 1853 if (!S_ISBLK(inode->i_mode)) 1854 - goto fail; 1855 error = -EACCES; 1856 if (!may_open_dev(&path)) 1857 - goto fail; 1858 - error = -ENOMEM; 1859 - bdev = bd_acquire(inode); 1860 - if (!bdev) 1861 - goto fail; 1862 - out: 1863 path_put(&path); 1864 - return bdev; 1865 - fail: 1866 - bdev = ERR_PTR(error); 1867 - goto out; 1868 } 1869 EXPORT_SYMBOL(lookup_bdev); 1870

··· 32 #include <linux/cleancache.h> 33 #include <linux/task_io_accounting_ops.h> 34 #include <linux/falloc.h> 35 + #include <linux/part_stat.h> 36 #include <linux/uaccess.h> 37 #include <linux/suspend.h> 38 #include "internal.h" ··· 110 int truncate_bdev_range(struct block_device *bdev, fmode_t mode, 111 loff_t lstart, loff_t lend) 112 { 113 /* 114 * If we don't hold exclusive handle for the device, upgrade to it 115 * while we discard the buffer cache to avoid discarding buffers 116 * under live filesystem. 117 */ 118 if (!(mode & FMODE_EXCL)) { 119 + int err = bd_prepare_to_claim(bdev, truncate_bdev_range); 120 if (err) 121 return err; 122 } 123 + 124 truncate_inode_pages_range(bdev->bd_inode->i_mapping, lstart, lend); 125 + if (!(mode & FMODE_EXCL)) 126 + bd_abort_claiming(bdev, truncate_bdev_range); 127 return 0; 128 } 129 EXPORT_SYMBOL(truncate_bdev_range); ··· 548 * count down in thaw_bdev(). When it becomes 0, thaw_bdev() will unfreeze 549 * actually. 550 */ 551 + int freeze_bdev(struct block_device *bdev) 552 { 553 struct super_block *sb; 554 int error = 0; 555 556 mutex_lock(&bdev->bd_fsfreeze_mutex); 557 + if (++bdev->bd_fsfreeze_count > 1) 558 + goto done; 559 560 sb = get_active_super(bdev); 561 if (!sb) 562 + goto sync; 563 if (sb->s_op->freeze_super) 564 error = sb->s_op->freeze_super(sb); 565 else 566 error = freeze_super(sb); 567 deactivate_super(sb); 568 + 569 + if (error) { 570 + bdev->bd_fsfreeze_count--; 571 + goto done; 572 + } 573 + bdev->bd_fsfreeze_sb = sb; 574 + 575 + sync: 576 sync_blockdev(bdev); 577 + done: 578 mutex_unlock(&bdev->bd_fsfreeze_mutex); 579 + return error; 580 } 581 EXPORT_SYMBOL(freeze_bdev); 582 583 /** 584 * thaw_bdev -- unlock filesystem 585 * @bdev: blockdevice to unlock 586 * 587 * Unlocks the filesystem and marks it writeable again after freeze_bdev(). 588 */ 589 + int thaw_bdev(struct block_device *bdev) 590 { 591 + struct super_block *sb; 592 int error = -EINVAL; 593 594 mutex_lock(&bdev->bd_fsfreeze_mutex); ··· 607 if (--bdev->bd_fsfreeze_count > 0) 608 goto out; 609 610 + sb = bdev->bd_fsfreeze_sb; 611 if (!sb) 612 goto out; 613 ··· 792 793 static void bdev_free_inode(struct inode *inode) 794 { 795 + struct block_device *bdev = I_BDEV(inode); 796 + 797 + free_percpu(bdev->bd_stats); 798 + kfree(bdev->bd_meta_info); 799 + 800 kmem_cache_free(bdev_cachep, BDEV_I(inode)); 801 } 802 803 + static void init_once(void *data) 804 { 805 + struct bdev_inode *ei = data; 806 807 inode_init_once(&ei->vfs_inode); 808 } 809 810 static void bdev_evict_inode(struct inode *inode) ··· 870 blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */ 871 } 872 873 + struct block_device *bdev_alloc(struct gendisk *disk, u8 partno) 874 { 875 struct block_device *bdev; 876 struct inode *inode; 877 878 + inode = new_inode(blockdev_superblock); 879 if (!inode) 880 return NULL; 881 + inode->i_mode = S_IFBLK; 882 + inode->i_rdev = 0; 883 + inode->i_data.a_ops = &def_blk_aops; 884 + mapping_set_gfp_mask(&inode->i_data, GFP_USER); 885 886 + bdev = I_BDEV(inode); 887 + memset(bdev, 0, sizeof(*bdev)); 888 + mutex_init(&bdev->bd_mutex); 889 + mutex_init(&bdev->bd_fsfreeze_mutex); 890 + spin_lock_init(&bdev->bd_size_lock); 891 + bdev->bd_disk = disk; 892 + bdev->bd_partno = partno; 893 + bdev->bd_inode = inode; 894 + bdev->bd_bdi = &noop_backing_dev_info; 895 + #ifdef CONFIG_SYSFS 896 + INIT_LIST_HEAD(&bdev->bd_holder_disks); 897 + #endif 898 + bdev->bd_stats = alloc_percpu(struct disk_stats); 899 + if (!bdev->bd_stats) { 900 + iput(inode); 901 + return NULL; 902 } 903 return bdev; 904 + } 905 + 906 + void bdev_add(struct block_device *bdev, dev_t dev) 907 + { 908 + bdev->bd_dev = dev; 909 + bdev->bd_inode->i_rdev = dev; 910 + bdev->bd_inode->i_ino = dev; 911 + insert_inode_hash(bdev->bd_inode); 912 + } 913 + 914 + static struct block_device *bdget(dev_t dev) 915 + { 916 + struct inode *inode; 917 + 918 + inode = ilookup(blockdev_superblock, dev); 919 + if (!inode) 920 + return NULL; 921 + return &BDEV_I(inode)->bdev; 922 } 923 924 /** 925 * bdgrab -- Grab a reference to an already referenced block device 926 * @bdev: Block device to grab a reference to. 927 + * 928 + * Returns the block_device with an additional reference when successful, 929 + * or NULL if the inode is already beeing freed. 930 */ 931 struct block_device *bdgrab(struct block_device *bdev) 932 { 933 + if (!igrab(bdev->bd_inode)) 934 + return NULL; 935 return bdev; 936 } 937 EXPORT_SYMBOL(bdgrab); 938 939 long nr_blockdev_pages(void) 940 { ··· 953 { 954 iput(bdev->bd_inode); 955 } 956 EXPORT_SYMBOL(bdput); 957 958 /** 959 * bd_may_claim - test whether a block device can be claimed 960 * @bdev: block device of interest ··· 1049 /** 1050 * bd_prepare_to_claim - claim a block device 1051 * @bdev: block device of interest 1052 * @holder: holder trying to claim @bdev 1053 * 1054 * Claim @bdev. This function fails if @bdev is already claimed by another ··· 1059 * RETURNS: 1060 * 0 if @bdev can be claimed, -EBUSY otherwise. 1061 */ 1062 + int bd_prepare_to_claim(struct block_device *bdev, void *holder) 1063 { 1064 + struct block_device *whole = bdev_whole(bdev); 1065 + 1066 + if (WARN_ON_ONCE(!holder)) 1067 + return -EINVAL; 1068 retry: 1069 spin_lock(&bdev_lock); 1070 /* if someone else claimed, fail */ ··· 1089 } 1090 EXPORT_SYMBOL_GPL(bd_prepare_to_claim); /* only for the loop driver */ 1091 1092 static void bd_clear_claiming(struct block_device *whole, void *holder) 1093 { 1094 lockdep_assert_held(&bdev_lock); ··· 1122 /** 1123 * bd_finish_claiming - finish claiming of a block device 1124 * @bdev: block device of interest 1125 * @holder: holder that has claimed @bdev 1126 * 1127 * Finish exclusive open of a block device. Mark the device as exlusively 1128 * open by the holder and wake up all waiters for exclusive open to finish. 1129 */ 1130 + static void bd_finish_claiming(struct block_device *bdev, void *holder) 1131 { 1132 + struct block_device *whole = bdev_whole(bdev); 1133 + 1134 spin_lock(&bdev_lock); 1135 BUG_ON(!bd_may_claim(bdev, whole, holder)); 1136 /* ··· 1155 * also used when exclusive open is not actually desired and we just needed 1156 * to block other exclusive openers for a while. 1157 */ 1158 + void bd_abort_claiming(struct block_device *bdev, void *holder) 1159 { 1160 spin_lock(&bdev_lock); 1161 + bd_clear_claiming(bdev_whole(bdev), holder); 1162 spin_unlock(&bdev_lock); 1163 } 1164 EXPORT_SYMBOL(bd_abort_claiming); ··· 1230 WARN_ON_ONCE(!bdev->bd_holder); 1231 1232 /* FIXME: remove the following once add_disk() handles errors */ 1233 + if (WARN_ON(!disk->slave_dir || !bdev->bd_holder_dir)) 1234 goto out_unlock; 1235 1236 holder = bd_find_holder_disk(bdev, disk); ··· 1249 holder->disk = disk; 1250 holder->refcnt = 1; 1251 1252 + ret = add_symlink(disk->slave_dir, bdev_kobj(bdev)); 1253 if (ret) 1254 goto out_free; 1255 1256 + ret = add_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj); 1257 if (ret) 1258 goto out_del; 1259 /* 1260 * bdev could be deleted beneath us which would implicitly destroy 1261 * the holder directory. Hold on to it. 1262 */ 1263 + kobject_get(bdev->bd_holder_dir); 1264 1265 list_add(&holder->list, &bdev->bd_holder_disks); 1266 goto out_unlock; 1267 1268 out_del: 1269 + del_symlink(disk->slave_dir, bdev_kobj(bdev)); 1270 out_free: 1271 kfree(holder); 1272 out_unlock: ··· 1294 holder = bd_find_holder_disk(bdev, disk); 1295 1296 if (!WARN_ON_ONCE(holder == NULL) && !--holder->refcnt) { 1297 + del_symlink(disk->slave_dir, bdev_kobj(bdev)); 1298 + del_symlink(bdev->bd_holder_dir, &disk_to_dev(disk)->kobj); 1299 + kobject_put(bdev->bd_holder_dir); 1300 list_del_init(&holder->list); 1301 kfree(holder); 1302 } ··· 1306 } 1307 EXPORT_SYMBOL_GPL(bd_unlink_disk_holder); 1308 #endif 1309 1310 static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); 1311 ··· 1411 disk->fops->revalidate_disk(disk); 1412 } 1413 1414 if (get_capacity(disk)) { 1415 ret = blk_add_partitions(disk, bdev); 1416 if (ret == -EAGAIN) ··· 1439 * mutex_lock(part->bd_mutex) 1440 * mutex_lock_nested(whole->bd_mutex, 1) 1441 */ 1442 + static int __blkdev_get(struct block_device *bdev, fmode_t mode) 1443 { 1444 + struct gendisk *disk = bdev->bd_disk; 1445 + int ret = 0; 1446 1447 if (!bdev->bd_openers) { 1448 + if (!bdev_is_partition(bdev)) { 1449 ret = 0; 1450 + if (disk->fops->open) 1451 ret = disk->fops->open(bdev, mode); 1452 1453 + if (!ret) 1454 set_init_blocksize(bdev); 1455 1456 /* 1457 * If the device is invalidated, rescan partition ··· 1516 bdev_disk_changed(bdev, ret == -ENOMEDIUM); 1517 1518 if (ret) 1519 + return ret; 1520 } else { 1521 + struct block_device *whole = bdgrab(disk->part0); 1522 + 1523 + mutex_lock_nested(&whole->bd_mutex, 1); 1524 + ret = __blkdev_get(whole, mode); 1525 + if (ret) { 1526 + mutex_unlock(&whole->bd_mutex); 1527 + bdput(whole); 1528 + return ret; 1529 } 1530 + whole->bd_part_count++; 1531 + mutex_unlock(&whole->bd_mutex); 1532 + 1533 + if (!(disk->flags & GENHD_FL_UP) || 1534 + !bdev_nr_sectors(bdev)) { 1535 + __blkdev_put(whole, mode, 1); 1536 + bdput(whole); 1537 + return -ENXIO; 1538 + } 1539 set_init_blocksize(bdev); 1540 } 1541 1542 if (bdev->bd_bdi == &noop_backing_dev_info) 1543 bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info); 1544 } else { 1545 + if (!bdev_is_partition(bdev)) { 1546 if (bdev->bd_disk->fops->open) 1547 ret = bdev->bd_disk->fops->open(bdev, mode); 1548 /* the same as first opener case, read comment there */ ··· 1545 (!ret || ret == -ENOMEDIUM)) 1546 bdev_disk_changed(bdev, ret == -ENOMEDIUM); 1547 if (ret) 1548 + return ret; 1549 } 1550 } 1551 bdev->bd_openers++; 1552 return 0; 1553 } 1554 1555 + struct block_device *blkdev_get_no_open(dev_t dev) 1556 { 1557 struct block_device *bdev; 1558 + struct gendisk *disk; 1559 1560 + down_read(&bdev_lookup_sem); 1561 + bdev = bdget(dev); 1562 + if (!bdev) { 1563 + up_read(&bdev_lookup_sem); 1564 + blk_request_module(dev); 1565 + down_read(&bdev_lookup_sem); 1566 1567 + bdev = bdget(dev); 1568 + if (!bdev) 1569 + goto unlock; 1570 } 1571 1572 + disk = bdev->bd_disk; 1573 + if (!kobject_get_unless_zero(&disk_to_dev(disk)->kobj)) 1574 + goto bdput; 1575 + if ((disk->flags & (GENHD_FL_UP | GENHD_FL_HIDDEN)) != GENHD_FL_UP) 1576 + goto put_disk; 1577 + if (!try_module_get(bdev->bd_disk->fops->owner)) 1578 + goto put_disk; 1579 + up_read(&bdev_lookup_sem); 1580 return bdev; 1581 + put_disk: 1582 + put_disk(disk); 1583 + bdput: 1584 + bdput(bdev); 1585 + unlock: 1586 + up_read(&bdev_lookup_sem); 1587 + return NULL; 1588 } 1589 + 1590 + void blkdev_put_no_open(struct block_device *bdev) 1591 + { 1592 + module_put(bdev->bd_disk->fops->owner); 1593 + put_disk(bdev->bd_disk); 1594 + bdput(bdev); 1595 + } 1596 1597 /** 1598 * blkdev_get_by_dev - open a block device by device number ··· 1687 * @mode: FMODE_* mask 1688 * @holder: exclusive holder identifier 1689 * 1690 + * Open the block device described by device number @dev. If @mode includes 1691 + * %FMODE_EXCL, the block device is opened with exclusive access. Specifying 1692 + * %FMODE_EXCL with a %NULL @holder is invalid. Exclusive opens may nest for 1693 + * the same @holder. 1694 * 1695 + * Use this interface ONLY if you really do not have anything better - i.e. when 1696 + * you are behind a truly sucky interface and all you are given is a device 1697 + * number. Everything else should use blkdev_get_by_path(). 1698 * 1699 * CONTEXT: 1700 * Might sleep. 1701 * 1702 * RETURNS: 1703 + * Reference to the block_device on success, ERR_PTR(-errno) on failure. 1704 */ 1705 struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder) 1706 { 1707 + bool unblock_events = true; 1708 struct block_device *bdev; 1709 + struct gendisk *disk; 1710 + int ret; 1711 1712 + ret = devcgroup_check_permission(DEVCG_DEV_BLOCK, 1713 + MAJOR(dev), MINOR(dev), 1714 + ((mode & FMODE_READ) ? DEVCG_ACC_READ : 0) | 1715 + ((mode & FMODE_WRITE) ? DEVCG_ACC_WRITE : 0)); 1716 + if (ret) 1717 + return ERR_PTR(ret); 1718 + 1719 + /* 1720 + * If we lost a race with 'disk' being deleted, try again. See md.c. 1721 + */ 1722 + retry: 1723 + bdev = blkdev_get_no_open(dev); 1724 if (!bdev) 1725 + return ERR_PTR(-ENXIO); 1726 + disk = bdev->bd_disk; 1727 1728 + if (mode & FMODE_EXCL) { 1729 + ret = bd_prepare_to_claim(bdev, holder); 1730 + if (ret) 1731 + goto put_blkdev; 1732 + } 1733 + 1734 + disk_block_events(disk); 1735 + 1736 + mutex_lock(&bdev->bd_mutex); 1737 + ret =__blkdev_get(bdev, mode); 1738 + if (ret) 1739 + goto abort_claiming; 1740 + if (mode & FMODE_EXCL) { 1741 + bd_finish_claiming(bdev, holder); 1742 + 1743 + /* 1744 + * Block event polling for write claims if requested. Any write 1745 + * holder makes the write_holder state stick until all are 1746 + * released. This is good enough and tracking individual 1747 + * writeable reference is too fragile given the way @mode is 1748 + * used in blkdev_get/put(). 1749 + */ 1750 + if ((mode & FMODE_WRITE) && !bdev->bd_write_holder && 1751 + (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) { 1752 + bdev->bd_write_holder = true; 1753 + unblock_events = false; 1754 + } 1755 + } 1756 + mutex_unlock(&bdev->bd_mutex); 1757 + 1758 + if (unblock_events) 1759 + disk_unblock_events(disk); 1760 + return bdev; 1761 + 1762 + abort_claiming: 1763 + if (mode & FMODE_EXCL) 1764 + bd_abort_claiming(bdev, holder); 1765 + mutex_unlock(&bdev->bd_mutex); 1766 + disk_unblock_events(disk); 1767 + put_blkdev: 1768 + blkdev_put_no_open(bdev); 1769 + if (ret == -ERESTARTSYS) 1770 + goto retry; 1771 + return ERR_PTR(ret); 1772 + } 1773 + EXPORT_SYMBOL(blkdev_get_by_dev); 1774 + 1775 + /** 1776 + * blkdev_get_by_path - open a block device by name 1777 + * @path: path to the block device to open 1778 + * @mode: FMODE_* mask 1779 + * @holder: exclusive holder identifier 1780 + * 1781 + * Open the block device described by the device file at @path. If @mode 1782 + * includes %FMODE_EXCL, the block device is opened with exclusive access. 1783 + * Specifying %FMODE_EXCL with a %NULL @holder is invalid. Exclusive opens may 1784 + * nest for the same @holder. 1785 + * 1786 + * CONTEXT: 1787 + * Might sleep. 1788 + * 1789 + * RETURNS: 1790 + * Reference to the block_device on success, ERR_PTR(-errno) on failure. 1791 + */ 1792 + struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, 1793 + void *holder) 1794 + { 1795 + struct block_device *bdev; 1796 + dev_t dev; 1797 + int error; 1798 + 1799 + error = lookup_bdev(path, &dev); 1800 + if (error) 1801 + return ERR_PTR(error); 1802 + 1803 + bdev = blkdev_get_by_dev(dev, mode, holder); 1804 + if (!IS_ERR(bdev) && (mode & FMODE_WRITE) && bdev_read_only(bdev)) { 1805 + blkdev_put(bdev, mode); 1806 + return ERR_PTR(-EACCES); 1807 + } 1808 1809 return bdev; 1810 } 1811 + EXPORT_SYMBOL(blkdev_get_by_path); 1812 1813 static int blkdev_open(struct inode * inode, struct file * filp) 1814 { ··· 1741 if ((filp->f_flags & O_ACCMODE) == 3) 1742 filp->f_mode |= FMODE_WRITE_IOCTL; 1743 1744 + bdev = blkdev_get_by_dev(inode->i_rdev, filp->f_mode, filp); 1745 + if (IS_ERR(bdev)) 1746 + return PTR_ERR(bdev); 1747 filp->f_mapping = bdev->bd_inode->i_mapping; 1748 filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping); 1749 + return 0; 1750 } 1751 1752 static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) ··· 1774 WARN_ON_ONCE(bdev->bd_holders); 1775 sync_blockdev(bdev); 1776 kill_bdev(bdev); 1777 bdev_write_inode(bdev); 1778 + if (bdev_is_partition(bdev)) 1779 + victim = bdev_whole(bdev); 1780 } 1781 1782 + if (!bdev_is_partition(bdev) && disk->fops->release) 1783 + disk->fops->release(disk, mode); 1784 mutex_unlock(&bdev->bd_mutex); 1785 + if (victim) { 1786 __blkdev_put(victim, mode, 1); 1787 + bdput(victim); 1788 + } 1789 } 1790 1791 void blkdev_put(struct block_device *bdev, fmode_t mode) 1792 { 1793 + struct gendisk *disk = bdev->bd_disk; 1794 + 1795 mutex_lock(&bdev->bd_mutex); 1796 1797 if (mode & FMODE_EXCL) { 1798 + struct block_device *whole = bdev_whole(bdev); 1799 bool bdev_free; 1800 1801 /* ··· 1812 spin_lock(&bdev_lock); 1813 1814 WARN_ON_ONCE(--bdev->bd_holders < 0); 1815 + WARN_ON_ONCE(--whole->bd_holders < 0); 1816 1817 if ((bdev_free = !bdev->bd_holders)) 1818 bdev->bd_holder = NULL; 1819 + if (!whole->bd_holders) 1820 + whole->bd_holder = NULL; 1821 1822 spin_unlock(&bdev_lock); 1823 ··· 1827 * unblock evpoll if it was a write holder. 1828 */ 1829 if (bdev_free && bdev->bd_write_holder) { 1830 + disk_unblock_events(disk); 1831 bdev->bd_write_holder = false; 1832 } 1833 } ··· 1837 * event. This is to ensure detection of media removal commanded 1838 * from userland - e.g. eject(1). 1839 */ 1840 + disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE); 1841 mutex_unlock(&bdev->bd_mutex); 1842 1843 __blkdev_put(bdev, mode, 0); 1844 + blkdev_put_no_open(bdev); 1845 } 1846 EXPORT_SYMBOL(blkdev_put); 1847 ··· 2054 * namespace if possible and return it. Return ERR_PTR(error) 2055 * otherwise. 2056 */ 2057 + int lookup_bdev(const char *pathname, dev_t *dev) 2058 { 2059 struct inode *inode; 2060 struct path path; 2061 int error; 2062 2063 if (!pathname || !*pathname) 2064 + return -EINVAL; 2065 2066 error = kern_path(pathname, LOOKUP_FOLLOW, &path); 2067 if (error) 2068 + return error; 2069 2070 inode = d_backing_inode(path.dentry); 2071 error = -ENOTBLK; 2072 if (!S_ISBLK(inode->i_mode)) 2073 + goto out_path_put; 2074 error = -EACCES; 2075 if (!may_open_dev(&path)) 2076 + goto out_path_put; 2077 + 2078 + *dev = inode->i_rdev; 2079 + error = 0; 2080 + out_path_put: 2081 path_put(&path); 2082 + return error; 2083 } 2084 EXPORT_SYMBOL(lookup_bdev); 2085

+3 -12

fs/btrfs/sysfs.c

··· 1343 1344 void btrfs_sysfs_remove_device(struct btrfs_device *device) 1345 { 1346 - struct hd_struct *disk; 1347 - struct kobject *disk_kobj; 1348 struct kobject *devices_kobj; 1349 1350 /* ··· 1352 devices_kobj = device->fs_info->fs_devices->devices_kobj; 1353 ASSERT(devices_kobj); 1354 1355 - if (device->bdev) { 1356 - disk = device->bdev->bd_part; 1357 - disk_kobj = &part_to_dev(disk)->kobj; 1358 - sysfs_remove_link(devices_kobj, disk_kobj->name); 1359 - } 1360 1361 if (device->devid_kobj.state_initialized) { 1362 kobject_del(&device->devid_kobj); ··· 1459 nofs_flag = memalloc_nofs_save(); 1460 1461 if (device->bdev) { 1462 - struct hd_struct *disk; 1463 - struct kobject *disk_kobj; 1464 - 1465 - disk = device->bdev->bd_part; 1466 - disk_kobj = &part_to_dev(disk)->kobj; 1467 1468 ret = sysfs_create_link(devices_kobj, disk_kobj, disk_kobj->name); 1469 if (ret) {

··· 1343 1344 void btrfs_sysfs_remove_device(struct btrfs_device *device) 1345 { 1346 struct kobject *devices_kobj; 1347 1348 /* ··· 1354 devices_kobj = device->fs_info->fs_devices->devices_kobj; 1355 ASSERT(devices_kobj); 1356 1357 + if (device->bdev) 1358 + sysfs_remove_link(devices_kobj, bdev_kobj(device->bdev)->name); 1359 1360 if (device->devid_kobj.state_initialized) { 1361 kobject_del(&device->devid_kobj); ··· 1464 nofs_flag = memalloc_nofs_save(); 1465 1466 if (device->bdev) { 1467 + struct kobject *disk_kobj = bdev_kobj(device->bdev); 1468 1469 ret = sysfs_create_link(devices_kobj, disk_kobj, disk_kobj->name); 1470 if (ret) {

+6 -7

fs/btrfs/volumes.c

··· 935 * make sure it's the same device if the device is mounted 936 */ 937 if (device->bdev) { 938 - struct block_device *path_bdev; 939 940 - path_bdev = lookup_bdev(path); 941 - if (IS_ERR(path_bdev)) { 942 mutex_unlock(&fs_devices->device_list_mutex); 943 - return ERR_CAST(path_bdev); 944 } 945 946 - if (device->bdev != path_bdev) { 947 - bdput(path_bdev); 948 mutex_unlock(&fs_devices->device_list_mutex); 949 /* 950 * device->fs_info may not be reliable here, so ··· 959 task_pid_nr(current)); 960 return ERR_PTR(-EEXIST); 961 } 962 - bdput(path_bdev); 963 btrfs_info_in_rcu(device->fs_info, 964 "devid %llu device path %s changed to %s scanned by %s (%d)", 965 devid, rcu_str_deref(device->name),

··· 935 * make sure it's the same device if the device is mounted 936 */ 937 if (device->bdev) { 938 + int error; 939 + dev_t path_dev; 940 941 + error = lookup_bdev(path, &path_dev); 942 + if (error) { 943 mutex_unlock(&fs_devices->device_list_mutex); 944 + return ERR_PTR(error); 945 } 946 947 + if (device->bdev->bd_dev != path_dev) { 948 mutex_unlock(&fs_devices->device_list_mutex); 949 /* 950 * device->fs_info may not be reliable here, so ··· 959 task_pid_nr(current)); 960 return ERR_PTR(-EEXIST); 961 } 962 btrfs_info_in_rcu(device->fs_info, 963 "devid %llu device path %s changed to %s scanned by %s (%d)", 964 devid, rcu_str_deref(device->name),

+3 -3

fs/btrfs/zoned.c

··· 165 if (!zone_info) 166 return -ENOMEM; 167 168 - nr_sectors = bdev->bd_part->nr_sects; 169 zone_sectors = bdev_zone_sectors(bdev); 170 /* Check if it's power of 2 (see is_power_of_2) */ 171 ASSERT(zone_sectors != 0 && (zone_sectors & (zone_sectors - 1)) == 0); ··· 505 return -EINVAL; 506 zone_size = zone_sectors << SECTOR_SHIFT; 507 zone_sectors_shift = ilog2(zone_sectors); 508 - nr_sectors = bdev->bd_part->nr_sects; 509 nr_zones = nr_sectors >> zone_sectors_shift; 510 511 sb_zone = sb_zone_number(zone_sectors_shift + SECTOR_SHIFT, mirror); ··· 603 604 zone_sectors = bdev_zone_sectors(bdev); 605 zone_sectors_shift = ilog2(zone_sectors); 606 - nr_sectors = bdev->bd_part->nr_sects; 607 nr_zones = nr_sectors >> zone_sectors_shift; 608 609 sb_zone = sb_zone_number(zone_sectors_shift + SECTOR_SHIFT, mirror);

··· 165 if (!zone_info) 166 return -ENOMEM; 167 168 + nr_sectors = bdev_nr_sectors(bdev); 169 zone_sectors = bdev_zone_sectors(bdev); 170 /* Check if it's power of 2 (see is_power_of_2) */ 171 ASSERT(zone_sectors != 0 && (zone_sectors & (zone_sectors - 1)) == 0); ··· 505 return -EINVAL; 506 zone_size = zone_sectors << SECTOR_SHIFT; 507 zone_sectors_shift = ilog2(zone_sectors); 508 + nr_sectors = bdev_nr_sectors(bdev); 509 nr_zones = nr_sectors >> zone_sectors_shift; 510 511 sb_zone = sb_zone_number(zone_sectors_shift + SECTOR_SHIFT, mirror); ··· 603 604 zone_sectors = bdev_zone_sectors(bdev); 605 zone_sectors_shift = ilog2(zone_sectors); 606 + nr_sectors = bdev_nr_sectors(bdev); 607 nr_zones = nr_sectors >> zone_sectors_shift; 608 609 sb_zone = sb_zone_number(zone_sectors_shift + SECTOR_SHIFT, mirror);

+1 -1

fs/buffer.c

··· 523 524 void emergency_thaw_bdev(struct super_block *sb) 525 { 526 - while (sb->s_bdev && !thaw_bdev(sb->s_bdev, sb)) 527 printk(KERN_WARNING "Emergency Thaw on %pg\n", sb->s_bdev); 528 } 529

··· 523 524 void emergency_thaw_bdev(struct super_block *sb) 525 { 526 + while (sb->s_bdev && !thaw_bdev(sb->s_bdev)) 527 printk(KERN_WARNING "Emergency Thaw on %pg\n", sb->s_bdev); 528 } 529

+1 -1

fs/ext4/ioctl.c

··· 624 case EXT4_GOING_FLAGS_DEFAULT: 625 freeze_bdev(sb->s_bdev); 626 set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags); 627 - thaw_bdev(sb->s_bdev, sb); 628 break; 629 case EXT4_GOING_FLAGS_LOGFLUSH: 630 set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags);

··· 624 case EXT4_GOING_FLAGS_DEFAULT: 625 freeze_bdev(sb->s_bdev); 626 set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags); 627 + thaw_bdev(sb->s_bdev); 628 break; 629 case EXT4_GOING_FLAGS_LOGFLUSH: 630 set_bit(EXT4_FLAGS_SHUTDOWN, &sbi->s_ext4_flags);

+6 -12

fs/ext4/super.c

··· 4044 sbi->s_sb = sb; 4045 sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; 4046 sbi->s_sb_block = sb_block; 4047 - if (sb->s_bdev->bd_part) 4048 - sbi->s_sectors_written_start = 4049 - part_stat_read(sb->s_bdev->bd_part, sectors[STAT_WRITE]); 4050 4051 /* Cleanup superblock name */ 4052 strreplace(sb->s_id, '/', '!'); ··· 5504 */ 5505 if (!(sb->s_flags & SB_RDONLY)) 5506 ext4_update_tstamp(es, s_wtime); 5507 - if (sb->s_bdev->bd_part) 5508 - es->s_kbytes_written = 5509 - cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + 5510 - ((part_stat_read(sb->s_bdev->bd_part, 5511 - sectors[STAT_WRITE]) - 5512 - EXT4_SB(sb)->s_sectors_written_start) >> 1)); 5513 - else 5514 - es->s_kbytes_written = 5515 - cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); 5516 if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeclusters_counter)) 5517 ext4_free_blocks_count_set(es, 5518 EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive(

··· 4044 sbi->s_sb = sb; 4045 sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; 4046 sbi->s_sb_block = sb_block; 4047 + sbi->s_sectors_written_start = 4048 + part_stat_read(sb->s_bdev, sectors[STAT_WRITE]); 4049 4050 /* Cleanup superblock name */ 4051 strreplace(sb->s_id, '/', '!'); ··· 5505 */ 5506 if (!(sb->s_flags & SB_RDONLY)) 5507 ext4_update_tstamp(es, s_wtime); 5508 + es->s_kbytes_written = 5509 + cpu_to_le64(EXT4_SB(sb)->s_kbytes_written + 5510 + ((part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) - 5511 + EXT4_SB(sb)->s_sectors_written_start) >> 1)); 5512 if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeclusters_counter)) 5513 ext4_free_blocks_count_set(es, 5514 EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive(

+2 -8

fs/ext4/sysfs.c

··· 62 { 63 struct super_block *sb = sbi->s_buddy_cache->i_sb; 64 65 - if (!sb->s_bdev->bd_part) 66 - return snprintf(buf, PAGE_SIZE, "0\n"); 67 return snprintf(buf, PAGE_SIZE, "%lu\n", 68 - (part_stat_read(sb->s_bdev->bd_part, 69 - sectors[STAT_WRITE]) - 70 sbi->s_sectors_written_start) >> 1); 71 } 72 ··· 71 { 72 struct super_block *sb = sbi->s_buddy_cache->i_sb; 73 74 - if (!sb->s_bdev->bd_part) 75 - return snprintf(buf, PAGE_SIZE, "0\n"); 76 return snprintf(buf, PAGE_SIZE, "%llu\n", 77 (unsigned long long)(sbi->s_kbytes_written + 78 - ((part_stat_read(sb->s_bdev->bd_part, 79 - sectors[STAT_WRITE]) - 80 EXT4_SB(sb)->s_sectors_written_start) >> 1))); 81 } 82

··· 62 { 63 struct super_block *sb = sbi->s_buddy_cache->i_sb; 64 65 return snprintf(buf, PAGE_SIZE, "%lu\n", 66 + (part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) - 67 sbi->s_sectors_written_start) >> 1); 68 } 69 ··· 74 { 75 struct super_block *sb = sbi->s_buddy_cache->i_sb; 76 77 return snprintf(buf, PAGE_SIZE, "%llu\n", 78 (unsigned long long)(sbi->s_kbytes_written + 79 + ((part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) - 80 EXT4_SB(sb)->s_sectors_written_start) >> 1))); 81 } 82

+1 -4

fs/f2fs/checkpoint.c

··· 1395 __u32 crc32 = 0; 1396 int i; 1397 int cp_payload_blks = __cp_payload(sbi); 1398 - struct super_block *sb = sbi->sb; 1399 struct curseg_info *seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE); 1400 u64 kbytes_written; 1401 int err; ··· 1488 start_blk += data_sum_blocks; 1489 1490 /* Record write statistics in the hot node summary */ 1491 - kbytes_written = sbi->kbytes_written; 1492 - if (sb->s_bdev->bd_part) 1493 - kbytes_written += BD_PART_WRITTEN(sbi); 1494 1495 seg_i->journal->info.kbytes_written = cpu_to_le64(kbytes_written); 1496

··· 1395 __u32 crc32 = 0; 1396 int i; 1397 int cp_payload_blks = __cp_payload(sbi); 1398 struct curseg_info *seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE); 1399 u64 kbytes_written; 1400 int err; ··· 1489 start_blk += data_sum_blocks; 1490 1491 /* Record write statistics in the hot node summary */ 1492 + kbytes_written = sbi->kbytes_written + BD_PART_WRITTEN(sbi); 1493 1494 seg_i->journal->info.kbytes_written = cpu_to_le64(kbytes_written); 1495

+1 -1

fs/f2fs/f2fs.h

··· 1675 * and the return value is in kbytes. s is of struct f2fs_sb_info. 1676 */ 1677 #define BD_PART_WRITTEN(s) \ 1678 - (((u64)part_stat_read((s)->sb->s_bdev->bd_part, sectors[STAT_WRITE]) - \ 1679 (s)->sectors_written_start) >> 1) 1680 1681 static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type)

··· 1675 * and the return value is in kbytes. s is of struct f2fs_sb_info. 1676 */ 1677 #define BD_PART_WRITTEN(s) \ 1678 + (((u64)part_stat_read((s)->sb->s_bdev, sectors[STAT_WRITE]) - \ 1679 (s)->sectors_written_start) >> 1) 1680 1681 static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type)

+5 -9

fs/f2fs/file.c

··· 2230 2231 switch (in) { 2232 case F2FS_GOING_DOWN_FULLSYNC: 2233 - sb = freeze_bdev(sb->s_bdev); 2234 - if (IS_ERR(sb)) { 2235 - ret = PTR_ERR(sb); 2236 goto out; 2237 - } 2238 - if (sb) { 2239 - f2fs_stop_checkpoint(sbi, false); 2240 - set_sbi_flag(sbi, SBI_IS_SHUTDOWN); 2241 - thaw_bdev(sb->s_bdev, sb); 2242 - } 2243 break; 2244 case F2FS_GOING_DOWN_METASYNC: 2245 /* do checkpoint only */

··· 2230 2231 switch (in) { 2232 case F2FS_GOING_DOWN_FULLSYNC: 2233 + ret = freeze_bdev(sb->s_bdev); 2234 + if (ret) 2235 goto out; 2236 + f2fs_stop_checkpoint(sbi, false); 2237 + set_sbi_flag(sbi, SBI_IS_SHUTDOWN); 2238 + thaw_bdev(sb->s_bdev); 2239 break; 2240 case F2FS_GOING_DOWN_METASYNC: 2241 /* do checkpoint only */

+3 -5

fs/f2fs/super.c

··· 3151 static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) 3152 { 3153 struct block_device *bdev = FDEV(devi).bdev; 3154 - sector_t nr_sectors = bdev->bd_part->nr_sects; 3155 struct f2fs_report_zones_args rep_zone_arg; 3156 int ret; 3157 ··· 3700 } 3701 3702 /* For write statistics */ 3703 - if (sb->s_bdev->bd_part) 3704 - sbi->sectors_written_start = 3705 - (u64)part_stat_read(sb->s_bdev->bd_part, 3706 - sectors[STAT_WRITE]); 3707 3708 /* Read accumulated write IO statistics if exists */ 3709 seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);

··· 3151 static int init_blkz_info(struct f2fs_sb_info *sbi, int devi) 3152 { 3153 struct block_device *bdev = FDEV(devi).bdev; 3154 + sector_t nr_sectors = bdev_nr_sectors(bdev); 3155 struct f2fs_report_zones_args rep_zone_arg; 3156 int ret; 3157 ··· 3700 } 3701 3702 /* For write statistics */ 3703 + sbi->sectors_written_start = 3704 + (u64)part_stat_read(sb->s_bdev, sectors[STAT_WRITE]); 3705 3706 /* Read accumulated write IO statistics if exists */ 3707 seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);

-9

fs/f2fs/sysfs.c

··· 90 static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a, 91 struct f2fs_sb_info *sbi, char *buf) 92 { 93 - struct super_block *sb = sbi->sb; 94 - 95 - if (!sb->s_bdev->bd_part) 96 - return sprintf(buf, "0\n"); 97 - 98 return sprintf(buf, "%llu\n", 99 (unsigned long long)(sbi->kbytes_written + 100 BD_PART_WRITTEN(sbi))); ··· 98 static ssize_t features_show(struct f2fs_attr *a, 99 struct f2fs_sb_info *sbi, char *buf) 100 { 101 - struct super_block *sb = sbi->sb; 102 int len = 0; 103 - 104 - if (!sb->s_bdev->bd_part) 105 - return sprintf(buf, "0\n"); 106 107 if (f2fs_sb_has_encrypt(sbi)) 108 len += scnprintf(buf, PAGE_SIZE - len, "%s",

··· 90 static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a, 91 struct f2fs_sb_info *sbi, char *buf) 92 { 93 return sprintf(buf, "%llu\n", 94 (unsigned long long)(sbi->kbytes_written + 95 BD_PART_WRITTEN(sbi))); ··· 103 static ssize_t features_show(struct f2fs_attr *a, 104 struct f2fs_sb_info *sbi, char *buf) 105 { 106 int len = 0; 107 108 if (f2fs_sb_has_encrypt(sbi)) 109 len += scnprintf(buf, PAGE_SIZE - len, "%s",

-3

fs/inode.c

··· 155 inode->i_bytes = 0; 156 inode->i_generation = 0; 157 inode->i_pipe = NULL; 158 - inode->i_bdev = NULL; 159 inode->i_cdev = NULL; 160 inode->i_link = NULL; 161 inode->i_dir_seq = 0; ··· 579 truncate_inode_pages_final(&inode->i_data); 580 clear_inode(inode); 581 } 582 - if (S_ISBLK(inode->i_mode) && inode->i_bdev) 583 - bd_forget(inode); 584 if (S_ISCHR(inode->i_mode) && inode->i_cdev) 585 cd_forget(inode); 586

··· 155 inode->i_bytes = 0; 156 inode->i_generation = 0; 157 inode->i_pipe = NULL; 158 inode->i_cdev = NULL; 159 inode->i_link = NULL; 160 inode->i_dir_seq = 0; ··· 580 truncate_inode_pages_final(&inode->i_data); 581 clear_inode(inode); 582 } 583 if (S_ISCHR(inode->i_mode) && inode->i_cdev) 584 cd_forget(inode); 585

+2 -5

fs/internal.h

··· 25 extern int __sync_blockdev(struct block_device *bdev, int wait); 26 void iterate_bdevs(void (*)(struct block_device *, void *), void *); 27 void emergency_thaw_bdev(struct super_block *sb); 28 - void bd_forget(struct inode *inode); 29 #else 30 static inline void bdev_cache_init(void) 31 { ··· 41 static inline int emergency_thaw_bdev(struct super_block *sb) 42 { 43 return 0; 44 - } 45 - static inline void bd_forget(struct inode *inode) 46 - { 47 } 48 #endif /* CONFIG_BLOCK */ 49 ··· 112 */ 113 extern int reconfigure_super(struct fs_context *); 114 extern bool trylock_super(struct super_block *sb); 115 - extern struct super_block *user_get_super(dev_t); 116 extern bool mount_capable(struct fs_context *); 117 118 /*

··· 25 extern int __sync_blockdev(struct block_device *bdev, int wait); 26 void iterate_bdevs(void (*)(struct block_device *, void *), void *); 27 void emergency_thaw_bdev(struct super_block *sb); 28 #else 29 static inline void bdev_cache_init(void) 30 { ··· 42 static inline int emergency_thaw_bdev(struct super_block *sb) 43 { 44 return 0; 45 } 46 #endif /* CONFIG_BLOCK */ 47 ··· 116 */ 117 extern int reconfigure_super(struct fs_context *); 118 extern bool trylock_super(struct super_block *sb); 119 + struct super_block *user_get_super(dev_t, bool excl); 120 + void put_super(struct super_block *sb); 121 extern bool mount_capable(struct fs_context *); 122 123 /*

+4 -6

fs/io_uring.c

··· 2802 2803 static bool io_bdev_nowait(struct block_device *bdev) 2804 { 2805 - #ifdef CONFIG_BLOCK 2806 return !bdev || blk_queue_nowait(bdev_get_queue(bdev)); 2807 - #else 2808 - return true; 2809 - #endif 2810 } 2811 2812 /* ··· 2815 umode_t mode = file_inode(file)->i_mode; 2816 2817 if (S_ISBLK(mode)) { 2818 - if (io_bdev_nowait(file->f_inode->i_bdev)) 2819 return true; 2820 return false; 2821 } 2822 if (S_ISCHR(mode) || S_ISSOCK(mode)) 2823 return true; 2824 if (S_ISREG(mode)) { 2825 - if (io_bdev_nowait(file->f_inode->i_sb->s_bdev) && 2826 file->f_op != &io_uring_fops) 2827 return true; 2828 return false;

··· 2802 2803 static bool io_bdev_nowait(struct block_device *bdev) 2804 { 2805 return !bdev || blk_queue_nowait(bdev_get_queue(bdev)); 2806 } 2807 2808 /* ··· 2819 umode_t mode = file_inode(file)->i_mode; 2820 2821 if (S_ISBLK(mode)) { 2822 + if (IS_ENABLED(CONFIG_BLOCK) && 2823 + io_bdev_nowait(I_BDEV(file->f_mapping->host))) 2824 return true; 2825 return false; 2826 } 2827 if (S_ISCHR(mode) || S_ISSOCK(mode)) 2828 return true; 2829 if (S_ISREG(mode)) { 2830 + if (IS_ENABLED(CONFIG_BLOCK) && 2831 + io_bdev_nowait(file->f_inode->i_sb->s_bdev) && 2832 file->f_op != &io_uring_fops) 2833 return true; 2834 return false;

+2 -3

fs/pipe.c

··· 1342 } 1343 1344 /* 1345 - * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same 1346 - * location, so checking ->i_pipe is not enough to verify that this is a 1347 - * pipe. 1348 */ 1349 struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice) 1350 {

··· 1342 } 1343 1344 /* 1345 + * Note that i_pipe and i_cdev share the same location, so checking ->i_pipe is 1346 + * not enough to verify that this is a pipe. 1347 */ 1348 struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice) 1349 {

+1 -1

fs/pstore/blk.c

··· 244 return bdev; 245 } 246 247 - nr_sects = part_nr_sects_read(bdev->bd_part); 248 if (!nr_sects) { 249 pr_err("not enough space for '%s'\n", blkdev); 250 blkdev_put(bdev, mode);

··· 244 return bdev; 245 } 246 247 + nr_sects = bdev_nr_sectors(bdev); 248 if (!nr_sects) { 249 pr_err("not enough space for '%s'\n", blkdev); 250 blkdev_put(bdev, mode);

+28 -12

fs/quota/quota.c

··· 20 #include <linux/writeback.h> 21 #include <linux/nospec.h> 22 #include "compat.h" 23 24 static int check_quotactl_permission(struct super_block *sb, int type, int cmd, 25 qid_t id) ··· 866 static struct super_block *quotactl_block(const char __user *special, int cmd) 867 { 868 #ifdef CONFIG_BLOCK 869 - struct block_device *bdev; 870 struct super_block *sb; 871 struct filename *tmp = getname(special); 872 873 if (IS_ERR(tmp)) 874 return ERR_CAST(tmp); 875 - bdev = lookup_bdev(tmp->name); 876 putname(tmp); 877 - if (IS_ERR(bdev)) 878 - return ERR_CAST(bdev); 879 - if (quotactl_cmd_onoff(cmd)) 880 - sb = get_super_exclusive_thawed(bdev); 881 - else if (quotactl_cmd_write(cmd)) 882 - sb = get_super_thawed(bdev); 883 - else 884 - sb = get_super(bdev); 885 - bdput(bdev); 886 if (!sb) 887 return ERR_PTR(-ENODEV); 888 - 889 return sb; 890 #else 891 return ERR_PTR(-ENODEV); 892 #endif

··· 20 #include <linux/writeback.h> 21 #include <linux/nospec.h> 22 #include "compat.h" 23 + #include "../internal.h" 24 25 static int check_quotactl_permission(struct super_block *sb, int type, int cmd, 26 qid_t id) ··· 865 static struct super_block *quotactl_block(const char __user *special, int cmd) 866 { 867 #ifdef CONFIG_BLOCK 868 struct super_block *sb; 869 struct filename *tmp = getname(special); 870 + bool excl = false, thawed = false; 871 + int error; 872 + dev_t dev; 873 874 if (IS_ERR(tmp)) 875 return ERR_CAST(tmp); 876 + error = lookup_bdev(tmp->name, &dev); 877 putname(tmp); 878 + if (error) 879 + return ERR_PTR(error); 880 + 881 + if (quotactl_cmd_onoff(cmd)) { 882 + excl = true; 883 + thawed = true; 884 + } else if (quotactl_cmd_write(cmd)) { 885 + thawed = true; 886 + } 887 + 888 + retry: 889 + sb = user_get_super(dev, excl); 890 if (!sb) 891 return ERR_PTR(-ENODEV); 892 + if (thawed && sb->s_writers.frozen != SB_UNFROZEN) { 893 + if (excl) 894 + up_write(&sb->s_umount); 895 + else 896 + up_read(&sb->s_umount); 897 + wait_event(sb->s_writers.wait_unfrozen, 898 + sb->s_writers.frozen == SB_UNFROZEN); 899 + put_super(sb); 900 + goto retry; 901 + } 902 return sb; 903 + 904 #else 905 return ERR_PTR(-ENODEV); 906 #endif

+1 -1

fs/statfs.c

··· 235 236 static int vfs_ustat(dev_t dev, struct kstatfs *sbuf) 237 { 238 - struct super_block *s = user_get_super(dev); 239 int err; 240 if (!s) 241 return -EINVAL;

··· 235 236 static int vfs_ustat(dev_t dev, struct kstatfs *sbuf) 237 { 238 + struct super_block *s = user_get_super(dev, false); 239 int err; 240 if (!s) 241 return -EINVAL;

+20 -73

fs/super.c

··· 307 * Drops a temporary reference, frees superblock if there's no 308 * references left. 309 */ 310 - static void put_super(struct super_block *sb) 311 { 312 spin_lock(&sb_lock); 313 __put_super(sb); ··· 740 741 EXPORT_SYMBOL(iterate_supers_type); 742 743 - static struct super_block *__get_super(struct block_device *bdev, bool excl) 744 { 745 struct super_block *sb; 746 ··· 762 if (sb->s_bdev == bdev) { 763 sb->s_count++; 764 spin_unlock(&sb_lock); 765 - if (!excl) 766 - down_read(&sb->s_umount); 767 - else 768 - down_write(&sb->s_umount); 769 /* still alive? */ 770 if (sb->s_root && (sb->s_flags & SB_BORN)) 771 return sb; 772 - if (!excl) 773 - up_read(&sb->s_umount); 774 - else 775 - up_write(&sb->s_umount); 776 /* nope, got unmounted */ 777 spin_lock(&sb_lock); 778 __put_super(sb); ··· 776 spin_unlock(&sb_lock); 777 return NULL; 778 } 779 - 780 - /** 781 - * get_super - get the superblock of a device 782 - * @bdev: device to get the superblock for 783 - * 784 - * Scans the superblock list and finds the superblock of the file system 785 - * mounted on the device given. %NULL is returned if no match is found. 786 - */ 787 - struct super_block *get_super(struct block_device *bdev) 788 - { 789 - return __get_super(bdev, false); 790 - } 791 - EXPORT_SYMBOL(get_super); 792 - 793 - static struct super_block *__get_super_thawed(struct block_device *bdev, 794 - bool excl) 795 - { 796 - while (1) { 797 - struct super_block *s = __get_super(bdev, excl); 798 - if (!s || s->s_writers.frozen == SB_UNFROZEN) 799 - return s; 800 - if (!excl) 801 - up_read(&s->s_umount); 802 - else 803 - up_write(&s->s_umount); 804 - wait_event(s->s_writers.wait_unfrozen, 805 - s->s_writers.frozen == SB_UNFROZEN); 806 - put_super(s); 807 - } 808 - } 809 - 810 - /** 811 - * get_super_thawed - get thawed superblock of a device 812 - * @bdev: device to get the superblock for 813 - * 814 - * Scans the superblock list and finds the superblock of the file system 815 - * mounted on the device. The superblock is returned once it is thawed 816 - * (or immediately if it was not frozen). %NULL is returned if no match 817 - * is found. 818 - */ 819 - struct super_block *get_super_thawed(struct block_device *bdev) 820 - { 821 - return __get_super_thawed(bdev, false); 822 - } 823 - EXPORT_SYMBOL(get_super_thawed); 824 - 825 - /** 826 - * get_super_exclusive_thawed - get thawed superblock of a device 827 - * @bdev: device to get the superblock for 828 - * 829 - * Scans the superblock list and finds the superblock of the file system 830 - * mounted on the device. The superblock is returned once it is thawed 831 - * (or immediately if it was not frozen) and s_umount semaphore is held 832 - * in exclusive mode. %NULL is returned if no match is found. 833 - */ 834 - struct super_block *get_super_exclusive_thawed(struct block_device *bdev) 835 - { 836 - return __get_super_thawed(bdev, true); 837 - } 838 - EXPORT_SYMBOL(get_super_exclusive_thawed); 839 840 /** 841 * get_active_super - get an active reference to the superblock of a device ··· 808 return NULL; 809 } 810 811 - struct super_block *user_get_super(dev_t dev) 812 { 813 struct super_block *sb; 814 ··· 820 if (sb->s_dev == dev) { 821 sb->s_count++; 822 spin_unlock(&sb_lock); 823 - down_read(&sb->s_umount); 824 /* still alive? */ 825 if (sb->s_root && (sb->s_flags & SB_BORN)) 826 return sb; 827 - up_read(&sb->s_umount); 828 /* nope, got unmounted */ 829 spin_lock(&sb_lock); 830 __put_super(sb);

··· 307 * Drops a temporary reference, frees superblock if there's no 308 * references left. 309 */ 310 + void put_super(struct super_block *sb) 311 { 312 spin_lock(&sb_lock); 313 __put_super(sb); ··· 740 741 EXPORT_SYMBOL(iterate_supers_type); 742 743 + /** 744 + * get_super - get the superblock of a device 745 + * @bdev: device to get the superblock for 746 + * 747 + * Scans the superblock list and finds the superblock of the file system 748 + * mounted on the device given. %NULL is returned if no match is found. 749 + */ 750 + struct super_block *get_super(struct block_device *bdev) 751 { 752 struct super_block *sb; 753 ··· 755 if (sb->s_bdev == bdev) { 756 sb->s_count++; 757 spin_unlock(&sb_lock); 758 + down_read(&sb->s_umount); 759 /* still alive? */ 760 if (sb->s_root && (sb->s_flags & SB_BORN)) 761 return sb; 762 + up_read(&sb->s_umount); 763 /* nope, got unmounted */ 764 spin_lock(&sb_lock); 765 __put_super(sb); ··· 775 spin_unlock(&sb_lock); 776 return NULL; 777 } 778 779 /** 780 * get_active_super - get an active reference to the superblock of a device ··· 867 return NULL; 868 } 869 870 + struct super_block *user_get_super(dev_t dev, bool excl) 871 { 872 struct super_block *sb; 873 ··· 879 if (sb->s_dev == dev) { 880 sb->s_count++; 881 spin_unlock(&sb_lock); 882 + if (excl) 883 + down_write(&sb->s_umount); 884 + else 885 + down_read(&sb->s_umount); 886 /* still alive? */ 887 if (sb->s_root && (sb->s_flags & SB_BORN)) 888 return sb; 889 + if (excl) 890 + up_write(&sb->s_umount); 891 + else 892 + up_read(&sb->s_umount); 893 /* nope, got unmounted */ 894 spin_lock(&sb_lock); 895 __put_super(sb);

+2 -5

fs/xfs/xfs_fsops.c

··· 433 { 434 switch (inflags) { 435 case XFS_FSOP_GOING_FLAGS_DEFAULT: { 436 - struct super_block *sb = freeze_bdev(mp->m_super->s_bdev); 437 - 438 - if (sb && !IS_ERR(sb)) { 439 xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT); 440 - thaw_bdev(sb->s_bdev, sb); 441 } 442 - 443 break; 444 } 445 case XFS_FSOP_GOING_FLAGS_LOGFLUSH:

··· 433 { 434 switch (inflags) { 435 case XFS_FSOP_GOING_FLAGS_DEFAULT: { 436 + if (!freeze_bdev(mp->m_super->s_bdev)) { 437 xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT); 438 + thaw_bdev(mp->m_super->s_bdev); 439 } 440 break; 441 } 442 case XFS_FSOP_GOING_FLAGS_LOGFLUSH:

+15 -8

include/linux/bio.h

··· 148 /* TODO: It is reasonable to complete bio with error here. */ 149 } 150 151 #define __bio_for_each_segment(bvl, bio, iter, start) \ 152 for (iter = (start); \ 153 (iter).bi_size && \ 154 ((bvl = bio_iter_iovec((bio), (iter))), 1); \ 155 - bio_advance_iter((bio), &(iter), (bvl).bv_len)) 156 157 #define bio_for_each_segment(bvl, bio, iter) \ 158 __bio_for_each_segment(bvl, bio, iter, (bio)->bi_iter) ··· 174 for (iter = (start); \ 175 (iter).bi_size && \ 176 ((bvl = mp_bvec_iter_bvec((bio)->bi_io_vec, (iter))), 1); \ 177 - bio_advance_iter((bio), &(iter), (bvl).bv_len)) 178 179 /* iterate over multi-page bvec */ 180 #define bio_for_each_bvec(bvl, bio, iter) \ ··· 723 { 724 return bs->bio_slab != NULL; 725 } 726 - 727 - /* 728 - * a small number of entries is fine, not going to be performance critical. 729 - * basically we just need to survive 730 - */ 731 - #define BIO_SPLIT_ENTRIES 2 732 733 #if defined(CONFIG_BLK_DEV_INTEGRITY) 734

··· 148 /* TODO: It is reasonable to complete bio with error here. */ 149 } 150 151 + /* @bytes should be less or equal to bvec[i->bi_idx].bv_len */ 152 + static inline void bio_advance_iter_single(const struct bio *bio, 153 + struct bvec_iter *iter, 154 + unsigned int bytes) 155 + { 156 + iter->bi_sector += bytes >> 9; 157 + 158 + if (bio_no_advance_iter(bio)) 159 + iter->bi_size -= bytes; 160 + else 161 + bvec_iter_advance_single(bio->bi_io_vec, iter, bytes); 162 + } 163 + 164 #define __bio_for_each_segment(bvl, bio, iter, start) \ 165 for (iter = (start); \ 166 (iter).bi_size && \ 167 ((bvl = bio_iter_iovec((bio), (iter))), 1); \ 168 + bio_advance_iter_single((bio), &(iter), (bvl).bv_len)) 169 170 #define bio_for_each_segment(bvl, bio, iter) \ 171 __bio_for_each_segment(bvl, bio, iter, (bio)->bi_iter) ··· 161 for (iter = (start); \ 162 (iter).bi_size && \ 163 ((bvl = mp_bvec_iter_bvec((bio)->bi_io_vec, (iter))), 1); \ 164 + bio_advance_iter_single((bio), &(iter), (bvl).bv_len)) 165 166 /* iterate over multi-page bvec */ 167 #define bio_for_each_bvec(bvl, bio, iter) \ ··· 710 { 711 return bs->bio_slab != NULL; 712 } 713 714 #if defined(CONFIG_BLK_DEV_INTEGRITY) 715

+2 -2

include/linux/blk-cgroup.h

··· 197 u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v); 198 199 struct blkg_conf_ctx { 200 - struct gendisk *disk; 201 struct blkcg_gq *blkg; 202 char *body; 203 }; 204 205 - struct gendisk *blkcg_conf_get_disk(char **inputp); 206 int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, 207 char *input, struct blkg_conf_ctx *ctx); 208 void blkg_conf_finish(struct blkg_conf_ctx *ctx);

··· 197 u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v); 198 199 struct blkg_conf_ctx { 200 + struct block_device *bdev; 201 struct blkcg_gq *blkg; 202 char *body; 203 }; 204 205 + struct block_device *blkcg_conf_open_bdev(char **inputp); 206 int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, 207 char *input, struct blkg_conf_ctx *ctx); 208 void blkg_conf_finish(struct blkg_conf_ctx *ctx);

+3

include/linux/blk-mq.h

··· 5 #include <linux/blkdev.h> 6 #include <linux/sbitmap.h> 7 #include <linux/srcu.h> 8 9 struct blk_mq_tags; 10 struct blk_flush_queue; ··· 595 } 596 597 blk_qc_t blk_mq_submit_bio(struct bio *bio); 598 599 #endif

··· 5 #include <linux/blkdev.h> 6 #include <linux/sbitmap.h> 7 #include <linux/srcu.h> 8 + #include <linux/lockdep.h> 9 10 struct blk_mq_tags; 11 struct blk_flush_queue; ··· 594 } 595 596 blk_qc_t blk_mq_submit_bio(struct bio *bio); 597 + void blk_mq_hctx_set_fq_lock_class(struct blk_mq_hw_ctx *hctx, 598 + struct lock_class_key *key); 599 600 #endif

+22 -2

include/linux/blk_types.h

··· 8 9 #include <linux/types.h> 10 #include <linux/bvec.h> 11 #include <linux/ktime.h> 12 13 struct bio_set; ··· 21 struct bio_crypt_ctx; 22 23 struct block_device { 24 dev_t bd_dev; 25 int bd_openers; 26 struct inode * bd_inode; /* will die */ 27 struct super_block * bd_super; 28 struct mutex bd_mutex; /* open/close mutex */ 29 void * bd_claiming; 30 void * bd_holder; 31 int bd_holders; 32 bool bd_write_holder; 33 #ifdef CONFIG_SYSFS 34 struct list_head bd_holder_disks; 35 #endif 36 - struct block_device * bd_contains; 37 u8 bd_partno; 38 - struct hd_struct * bd_part; 39 /* number of times partitions within this device have been opened. */ 40 unsigned bd_part_count; 41 ··· 51 int bd_fsfreeze_count; 52 /* Mutex for freeze */ 53 struct mutex bd_fsfreeze_mutex; 54 } __randomize_layout; 55 56 /* 57 * Block error status values. See block/blk-core:blk_errors for the details.

··· 8 9 #include <linux/types.h> 10 #include <linux/bvec.h> 11 + #include <linux/device.h> 12 #include <linux/ktime.h> 13 14 struct bio_set; ··· 20 struct bio_crypt_ctx; 21 22 struct block_device { 23 + sector_t bd_start_sect; 24 + struct disk_stats __percpu *bd_stats; 25 + unsigned long bd_stamp; 26 + bool bd_read_only; /* read-only policy */ 27 dev_t bd_dev; 28 int bd_openers; 29 struct inode * bd_inode; /* will die */ 30 struct super_block * bd_super; 31 struct mutex bd_mutex; /* open/close mutex */ 32 void * bd_claiming; 33 + struct device bd_device; 34 void * bd_holder; 35 int bd_holders; 36 bool bd_write_holder; 37 #ifdef CONFIG_SYSFS 38 struct list_head bd_holder_disks; 39 #endif 40 + struct kobject *bd_holder_dir; 41 u8 bd_partno; 42 /* number of times partitions within this device have been opened. */ 43 unsigned bd_part_count; 44 ··· 46 int bd_fsfreeze_count; 47 /* Mutex for freeze */ 48 struct mutex bd_fsfreeze_mutex; 49 + struct super_block *bd_fsfreeze_sb; 50 + 51 + struct partition_meta_info *bd_meta_info; 52 + #ifdef CONFIG_FAIL_MAKE_REQUEST 53 + bool bd_make_it_fail; 54 + #endif 55 } __randomize_layout; 56 + 57 + #define bdev_whole(_bdev) \ 58 + ((_bdev)->bd_disk->part0) 59 + 60 + #define dev_to_bdev(device) \ 61 + container_of((device), struct block_device, bd_device) 62 + 63 + #define bdev_kobj(_bdev) \ 64 + (&((_bdev)->bd_device.kobj)) 65 66 /* 67 * Block error status values. See block/blk-core:blk_errors for the details.

+18 -16

include/linux/blkdev.h

··· 191 }; 192 193 struct gendisk *rq_disk; 194 - struct hd_struct *part; 195 #ifdef CONFIG_BLK_RQ_ALLOC_TIME 196 /* Time that the first bio started allocating this request. */ 197 u64 alloc_time_ns; ··· 1491 return -1; 1492 if (bdev_is_partition(bdev)) 1493 return queue_limit_alignment_offset(&q->limits, 1494 - bdev->bd_part->start_sect); 1495 return q->limits.alignment_offset; 1496 } 1497 ··· 1532 1533 if (bdev_is_partition(bdev)) 1534 return queue_limit_discard_alignment(&q->limits, 1535 - bdev->bd_part->start_sect); 1536 return q->limits.discard_alignment; 1537 } 1538 ··· 1853 void (*unlock_native_capacity) (struct gendisk *); 1854 int (*revalidate_disk) (struct gendisk *); 1855 int (*getgeo)(struct block_device *, struct hd_geometry *); 1856 /* this callback is with swap_lock and sometimes page table lock held */ 1857 void (*swap_slot_free_notify) (struct block_device *, unsigned long); 1858 int (*report_zones)(struct gendisk *, sector_t sector, ··· 1870 #define blkdev_compat_ptr_ioctl NULL 1871 #endif 1872 1873 - extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int, 1874 - unsigned long); 1875 extern int bdev_read_page(struct block_device *, sector_t, struct page *); 1876 extern int bdev_write_page(struct block_device *, sector_t, struct page *, 1877 struct writeback_control *); ··· 1946 void disk_end_io_acct(struct gendisk *disk, unsigned int op, 1947 unsigned long start_time); 1948 1949 - unsigned long part_start_io_acct(struct gendisk *disk, struct hd_struct **part, 1950 - struct bio *bio); 1951 - void part_end_io_acct(struct hd_struct *part, struct bio *bio, 1952 unsigned long start_time); 1953 1954 /** ··· 1976 int set_blocksize(struct block_device *bdev, int size); 1977 1978 const char *bdevname(struct block_device *bdev, char *buffer); 1979 - struct block_device *lookup_bdev(const char *); 1980 1981 void blkdev_show(struct seq_file *seqf, off_t offset); 1982 ··· 1991 struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, 1992 void *holder); 1993 struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder); 1994 - int bd_prepare_to_claim(struct block_device *bdev, struct block_device *whole, 1995 - void *holder); 1996 - void bd_abort_claiming(struct block_device *bdev, struct block_device *whole, 1997 - void *holder); 1998 void blkdev_put(struct block_device *bdev, fmode_t mode); 1999 2000 struct block_device *I_BDEV(struct inode *inode); 2001 - struct block_device *bdget_part(struct hd_struct *part); 2002 struct block_device *bdgrab(struct block_device *bdev); 2003 void bdput(struct block_device *); 2004 ··· 2026 #endif 2027 int fsync_bdev(struct block_device *bdev); 2028 2029 - struct super_block *freeze_bdev(struct block_device *bdev); 2030 - int thaw_bdev(struct block_device *bdev, struct super_block *sb); 2031 2032 #endif /* _LINUX_BLKDEV_H */

··· 191 }; 192 193 struct gendisk *rq_disk; 194 + struct block_device *part; 195 #ifdef CONFIG_BLK_RQ_ALLOC_TIME 196 /* Time that the first bio started allocating this request. */ 197 u64 alloc_time_ns; ··· 1491 return -1; 1492 if (bdev_is_partition(bdev)) 1493 return queue_limit_alignment_offset(&q->limits, 1494 + bdev->bd_start_sect); 1495 return q->limits.alignment_offset; 1496 } 1497 ··· 1532 1533 if (bdev_is_partition(bdev)) 1534 return queue_limit_discard_alignment(&q->limits, 1535 + bdev->bd_start_sect); 1536 return q->limits.discard_alignment; 1537 } 1538 ··· 1853 void (*unlock_native_capacity) (struct gendisk *); 1854 int (*revalidate_disk) (struct gendisk *); 1855 int (*getgeo)(struct block_device *, struct hd_geometry *); 1856 + int (*set_read_only)(struct block_device *bdev, bool ro); 1857 /* this callback is with swap_lock and sometimes page table lock held */ 1858 void (*swap_slot_free_notify) (struct block_device *, unsigned long); 1859 int (*report_zones)(struct gendisk *, sector_t sector, ··· 1869 #define blkdev_compat_ptr_ioctl NULL 1870 #endif 1871 1872 extern int bdev_read_page(struct block_device *, sector_t, struct page *); 1873 extern int bdev_write_page(struct block_device *, sector_t, struct page *, 1874 struct writeback_control *); ··· 1947 void disk_end_io_acct(struct gendisk *disk, unsigned int op, 1948 unsigned long start_time); 1949 1950 + unsigned long part_start_io_acct(struct gendisk *disk, 1951 + struct block_device **part, struct bio *bio); 1952 + void part_end_io_acct(struct block_device *part, struct bio *bio, 1953 unsigned long start_time); 1954 1955 /** ··· 1977 int set_blocksize(struct block_device *bdev, int size); 1978 1979 const char *bdevname(struct block_device *bdev, char *buffer); 1980 + int lookup_bdev(const char *pathname, dev_t *dev); 1981 1982 void blkdev_show(struct seq_file *seqf, off_t offset); 1983 ··· 1992 struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, 1993 void *holder); 1994 struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder); 1995 + int bd_prepare_to_claim(struct block_device *bdev, void *holder); 1996 + void bd_abort_claiming(struct block_device *bdev, void *holder); 1997 void blkdev_put(struct block_device *bdev, fmode_t mode); 1998 1999 + /* just for blk-cgroup, don't use elsewhere */ 2000 + struct block_device *blkdev_get_no_open(dev_t dev); 2001 + void blkdev_put_no_open(struct block_device *bdev); 2002 + 2003 + struct block_device *bdev_alloc(struct gendisk *disk, u8 partno); 2004 + void bdev_add(struct block_device *bdev, dev_t dev); 2005 struct block_device *I_BDEV(struct inode *inode); 2006 struct block_device *bdgrab(struct block_device *bdev); 2007 void bdput(struct block_device *); 2008 ··· 2024 #endif 2025 int fsync_bdev(struct block_device *bdev); 2026 2027 + int freeze_bdev(struct block_device *bdev); 2028 + int thaw_bdev(struct block_device *bdev); 2029 2030 #endif /* _LINUX_BLKDEV_H */

+2 -3

include/linux/blktrace_api.h

··· 75 return ret; 76 } 77 78 - extern void blk_add_driver_data(struct request_queue *q, struct request *rq, 79 - void *data, size_t len); 80 extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, 81 struct block_device *bdev, 82 char __user *arg); ··· 89 #else /* !CONFIG_BLK_DEV_IO_TRACE */ 90 # define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) 91 # define blk_trace_shutdown(q) do { } while (0) 92 - # define blk_add_driver_data(q, rq, data, len) do {} while (0) 93 # define blk_trace_setup(q, name, dev, bdev, arg) (-ENOTTY) 94 # define blk_trace_startstop(q, start) (-ENOTTY) 95 # define blk_trace_remove(q) (-ENOTTY)

··· 75 return ret; 76 } 77 78 + extern void blk_add_driver_data(struct request *rq, void *data, size_t len); 79 extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, 80 struct block_device *bdev, 81 char __user *arg); ··· 90 #else /* !CONFIG_BLK_DEV_IO_TRACE */ 91 # define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) 92 # define blk_trace_shutdown(q) do { } while (0) 93 + # define blk_add_driver_data(rq, data, len) do {} while (0) 94 # define blk_trace_setup(q, name, dev, bdev, arg) (-ENOTTY) 95 # define blk_trace_startstop(q, start) (-ENOTTY) 96 # define blk_trace_remove(q) (-ENOTTY)

+15 -5

include/linux/bvec.h

··· 121 return true; 122 } 123 124 - static inline void bvec_iter_skip_zero_bvec(struct bvec_iter *iter) 125 { 126 - iter->bi_bvec_done = 0; 127 - iter->bi_idx++; 128 } 129 130 #define for_each_bvec(bvl, bio_vec, iter, start) \ 131 for (iter = (start); \ 132 (iter).bi_size && \ 133 ((bvl = bvec_iter_bvec((bio_vec), (iter))), 1); \ 134 - (bvl).bv_len ? (void)bvec_iter_advance((bio_vec), &(iter), \ 135 - (bvl).bv_len) : bvec_iter_skip_zero_bvec(&(iter))) 136 137 /* for iterating one bio from start to end */ 138 #define BVEC_ITER_ALL_INIT (struct bvec_iter) \

··· 121 return true; 122 } 123 124 + /* 125 + * A simpler version of bvec_iter_advance(), @bytes should not span 126 + * across multiple bvec entries, i.e. bytes <= bv[i->bi_idx].bv_len 127 + */ 128 + static inline void bvec_iter_advance_single(const struct bio_vec *bv, 129 + struct bvec_iter *iter, unsigned int bytes) 130 { 131 + unsigned int done = iter->bi_bvec_done + bytes; 132 + 133 + if (done == bv[iter->bi_idx].bv_len) { 134 + done = 0; 135 + iter->bi_idx++; 136 + } 137 + iter->bi_bvec_done = done; 138 + iter->bi_size -= bytes; 139 } 140 141 #define for_each_bvec(bvl, bio_vec, iter, start) \ 142 for (iter = (start); \ 143 (iter).bi_size && \ 144 ((bvl = bvec_iter_bvec((bio_vec), (iter))), 1); \ 145 + bvec_iter_advance_single((bio_vec), &(iter), (bvl).bv_len)) 146 147 /* for iterating one bio from start to end */ 148 #define BVEC_ITER_ALL_INIT (struct bvec_iter) \

+1 -4

include/linux/fs.h

··· 696 struct list_head i_devices; 697 union { 698 struct pipe_inode_info *i_pipe; 699 - struct block_device *i_bdev; 700 struct cdev *i_cdev; 701 char *i_link; 702 unsigned i_dir_seq; ··· 1407 1408 struct sb_writers { 1409 int frozen; /* Is sb frozen? */ 1410 - wait_queue_head_t wait_unfrozen; /* for get_super_thawed() */ 1411 struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS]; 1412 }; 1413 ··· 3130 extern void put_filesystem(struct file_system_type *fs); 3131 extern struct file_system_type *get_fs_type(const char *name); 3132 extern struct super_block *get_super(struct block_device *); 3133 - extern struct super_block *get_super_thawed(struct block_device *); 3134 - extern struct super_block *get_super_exclusive_thawed(struct block_device *bdev); 3135 extern struct super_block *get_active_super(struct block_device *bdev); 3136 extern void drop_super(struct super_block *sb); 3137 extern void drop_super_exclusive(struct super_block *sb);

··· 696 struct list_head i_devices; 697 union { 698 struct pipe_inode_info *i_pipe; 699 struct cdev *i_cdev; 700 char *i_link; 701 unsigned i_dir_seq; ··· 1408 1409 struct sb_writers { 1410 int frozen; /* Is sb frozen? */ 1411 + wait_queue_head_t wait_unfrozen; /* wait for thaw */ 1412 struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS]; 1413 }; 1414 ··· 3131 extern void put_filesystem(struct file_system_type *fs); 3132 extern struct file_system_type *get_fs_type(const char *name); 3133 extern struct super_block *get_super(struct block_device *); 3134 extern struct super_block *get_active_super(struct block_device *bdev); 3135 extern void drop_super(struct super_block *sb); 3136 extern void drop_super_exclusive(struct super_block *sb);

+33 -96

include/linux/genhd.h

··· 19 #include <linux/blk_types.h> 20 #include <asm/local.h> 21 22 - #define dev_to_disk(device) container_of((device), struct gendisk, part0.__dev) 23 - #define dev_to_part(device) container_of((device), struct hd_struct, __dev) 24 - #define disk_to_dev(disk) (&(disk)->part0.__dev) 25 - #define part_to_dev(part) (&((part)->__dev)) 26 - 27 extern const struct device_type disk_type; 28 extern struct device_type part_type; 29 extern struct class block_class; ··· 43 struct partition_meta_info { 44 char uuid[PARTITION_META_INFO_UUIDLTH]; 45 u8 volname[PARTITION_META_INFO_VOLNAMELTH]; 46 - }; 47 - 48 - struct hd_struct { 49 - sector_t start_sect; 50 - /* 51 - * nr_sects is protected by sequence counter. One might extend a 52 - * partition while IO is happening to it and update of nr_sects 53 - * can be non-atomic on 32bit machines with 64bit sector_t. 54 - */ 55 - sector_t nr_sects; 56 - #if BITS_PER_LONG==32 && defined(CONFIG_SMP) 57 - seqcount_t nr_sects_seq; 58 - #endif 59 - unsigned long stamp; 60 - struct disk_stats __percpu *dkstats; 61 - struct percpu_ref ref; 62 - 63 - struct device __dev; 64 - struct kobject *holder_dir; 65 - int policy, partno; 66 - struct partition_meta_info *info; 67 - #ifdef CONFIG_FAIL_MAKE_REQUEST 68 - int make_it_fail; 69 - #endif 70 - struct rcu_work rcu_work; 71 }; 72 73 /** ··· 119 struct disk_part_tbl { 120 struct rcu_head rcu_head; 121 int len; 122 - struct hd_struct __rcu *last_lookup; 123 - struct hd_struct __rcu *part[]; 124 }; 125 126 struct disk_events; ··· 154 * helpers. 155 */ 156 struct disk_part_tbl __rcu *part_tbl; 157 - struct hd_struct part0; 158 159 const struct block_device_operations *fops; 160 struct request_queue *queue; ··· 163 int flags; 164 unsigned long state; 165 #define GD_NEED_PART_SCAN 0 166 - struct rw_semaphore lookup_sem; 167 struct kobject *slave_dir; 168 169 struct timer_rand_state *random; ··· 179 struct lockdep_map lockdep_map; 180 }; 181 182 #if IS_REACHABLE(CONFIG_CDROM) 183 #define disk_to_cdi(disk) ((disk)->cdi) 184 #else 185 #define disk_to_cdi(disk) NULL 186 #endif 187 - 188 - static inline struct gendisk *part_to_disk(struct hd_struct *part) 189 - { 190 - if (likely(part)) { 191 - if (part->partno) 192 - return dev_to_disk(part_to_dev(part)->parent); 193 - else 194 - return dev_to_disk(part_to_dev(part)); 195 - } 196 - return NULL; 197 - } 198 199 static inline int disk_max_parts(struct gendisk *disk) 200 { ··· 212 return MKDEV(disk->major, disk->first_minor); 213 } 214 215 - static inline dev_t part_devt(struct hd_struct *part) 216 - { 217 - return part_to_dev(part)->devt; 218 - } 219 - 220 - extern struct hd_struct *__disk_get_part(struct gendisk *disk, int partno); 221 - extern struct hd_struct *disk_get_part(struct gendisk *disk, int partno); 222 - 223 - static inline void disk_put_part(struct hd_struct *part) 224 - { 225 - if (likely(part)) 226 - put_device(part_to_dev(part)); 227 - } 228 - 229 - static inline void hd_sects_seq_init(struct hd_struct *p) 230 - { 231 - #if BITS_PER_LONG==32 && defined(CONFIG_SMP) 232 - seqcount_init(&p->nr_sects_seq); 233 - #endif 234 - } 235 - 236 /* 237 * Smarter partition iterator without context limits. 238 */ ··· 222 223 struct disk_part_iter { 224 struct gendisk *disk; 225 - struct hd_struct *part; 226 int idx; 227 unsigned int flags; 228 }; 229 230 extern void disk_part_iter_init(struct disk_part_iter *piter, 231 struct gendisk *disk, unsigned int flags); 232 - extern struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter); 233 extern void disk_part_iter_exit(struct disk_part_iter *piter); 234 extern bool disk_has_partitions(struct gendisk *disk); 235 ··· 247 } 248 249 extern void del_gendisk(struct gendisk *gp); 250 - extern struct gendisk *get_gendisk(dev_t dev, int *partno); 251 extern struct block_device *bdget_disk(struct gendisk *disk, int partno); 252 253 - extern void set_device_ro(struct block_device *bdev, int flag); 254 extern void set_disk_ro(struct gendisk *disk, int flag); 255 256 static inline int get_disk_ro(struct gendisk *disk) 257 { 258 - return disk->part0.policy; 259 } 260 261 extern void disk_block_events(struct gendisk *disk); 262 extern void disk_unblock_events(struct gendisk *disk); 263 extern void disk_flush_events(struct gendisk *disk, unsigned int mask); 264 - bool set_capacity_revalidate_and_notify(struct gendisk *disk, sector_t size, 265 - bool update_bdev); 266 267 /* drivers/char/random.c */ 268 extern void add_disk_randomness(struct gendisk *disk) __latent_entropy; ··· 267 268 static inline sector_t get_start_sect(struct block_device *bdev) 269 { 270 - return bdev->bd_part->start_sect; 271 } 272 static inline sector_t get_capacity(struct gendisk *disk) 273 { 274 - return disk->part0.nr_sects; 275 - } 276 - static inline void set_capacity(struct gendisk *disk, sector_t size) 277 - { 278 - disk->part0.nr_sects = size; 279 } 280 281 int bdev_disk_changed(struct block_device *bdev, bool invalidate); ··· 285 int blk_drop_partitions(struct block_device *bdev); 286 287 extern struct gendisk *__alloc_disk_node(int minors, int node_id); 288 - extern struct kobject *get_disk_and_module(struct gendisk *disk); 289 extern void put_disk(struct gendisk *disk); 290 - extern void put_disk_and_module(struct gendisk *disk); 291 - extern void blk_register_region(dev_t devt, unsigned long range, 292 - struct module *module, 293 - struct kobject *(*probe)(dev_t, int *, void *), 294 - int (*lock)(dev_t, void *), 295 - void *data); 296 - extern void blk_unregister_region(dev_t devt, unsigned long range); 297 298 #define alloc_disk_node(minors, node_id) \ 299 ({ \ ··· 305 306 #define alloc_disk(minors) alloc_disk_node(minors, NUMA_NO_NODE) 307 308 - int register_blkdev(unsigned int major, const char *name); 309 void unregister_blkdev(unsigned int major, const char *name); 310 311 - void revalidate_disk_size(struct gendisk *disk, bool verbose); 312 bool bdev_check_media_change(struct block_device *bdev); 313 int __invalidate_device(struct block_device *bdev, bool kill_dirty); 314 - void bd_set_nr_sectors(struct block_device *bdev, sector_t sectors); 315 316 /* for drivers/char/raw.c: */ 317 int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long); ··· 334 } 335 #endif /* CONFIG_SYSFS */ 336 337 #ifdef CONFIG_BLOCK 338 void printk_all_partitions(void); 339 - dev_t blk_lookup_devt(const char *name, int partno); 340 #else /* CONFIG_BLOCK */ 341 static inline void printk_all_partitions(void) 342 { 343 - } 344 - static inline dev_t blk_lookup_devt(const char *name, int partno) 345 - { 346 - dev_t devt = MKDEV(0, 0); 347 - return devt; 348 } 349 #endif /* CONFIG_BLOCK */ 350

··· 19 #include <linux/blk_types.h> 20 #include <asm/local.h> 21 22 extern const struct device_type disk_type; 23 extern struct device_type part_type; 24 extern struct class block_class; ··· 48 struct partition_meta_info { 49 char uuid[PARTITION_META_INFO_UUIDLTH]; 50 u8 volname[PARTITION_META_INFO_VOLNAMELTH]; 51 }; 52 53 /** ··· 149 struct disk_part_tbl { 150 struct rcu_head rcu_head; 151 int len; 152 + struct block_device __rcu *last_lookup; 153 + struct block_device __rcu *part[]; 154 }; 155 156 struct disk_events; ··· 184 * helpers. 185 */ 186 struct disk_part_tbl __rcu *part_tbl; 187 + struct block_device *part0; 188 189 const struct block_device_operations *fops; 190 struct request_queue *queue; ··· 193 int flags; 194 unsigned long state; 195 #define GD_NEED_PART_SCAN 0 196 struct kobject *slave_dir; 197 198 struct timer_rand_state *random; ··· 210 struct lockdep_map lockdep_map; 211 }; 212 213 + /* 214 + * The gendisk is refcounted by the part0 block_device, and the bd_device 215 + * therein is also used for device model presentation in sysfs. 216 + */ 217 + #define dev_to_disk(device) \ 218 + (dev_to_bdev(device)->bd_disk) 219 + #define disk_to_dev(disk) \ 220 + (&((disk)->part0->bd_device)) 221 + 222 #if IS_REACHABLE(CONFIG_CDROM) 223 #define disk_to_cdi(disk) ((disk)->cdi) 224 #else 225 #define disk_to_cdi(disk) NULL 226 #endif 227 228 static inline int disk_max_parts(struct gendisk *disk) 229 { ··· 245 return MKDEV(disk->major, disk->first_minor); 246 } 247 248 /* 249 * Smarter partition iterator without context limits. 250 */ ··· 276 277 struct disk_part_iter { 278 struct gendisk *disk; 279 + struct block_device *part; 280 int idx; 281 unsigned int flags; 282 }; 283 284 extern void disk_part_iter_init(struct disk_part_iter *piter, 285 struct gendisk *disk, unsigned int flags); 286 + struct block_device *disk_part_iter_next(struct disk_part_iter *piter); 287 extern void disk_part_iter_exit(struct disk_part_iter *piter); 288 extern bool disk_has_partitions(struct gendisk *disk); 289 ··· 301 } 302 303 extern void del_gendisk(struct gendisk *gp); 304 extern struct block_device *bdget_disk(struct gendisk *disk, int partno); 305 306 extern void set_disk_ro(struct gendisk *disk, int flag); 307 308 static inline int get_disk_ro(struct gendisk *disk) 309 { 310 + return disk->part0->bd_read_only; 311 } 312 313 extern void disk_block_events(struct gendisk *disk); 314 extern void disk_unblock_events(struct gendisk *disk); 315 extern void disk_flush_events(struct gendisk *disk, unsigned int mask); 316 + bool set_capacity_and_notify(struct gendisk *disk, sector_t size); 317 318 /* drivers/char/random.c */ 319 extern void add_disk_randomness(struct gendisk *disk) __latent_entropy; ··· 324 325 static inline sector_t get_start_sect(struct block_device *bdev) 326 { 327 + return bdev->bd_start_sect; 328 } 329 + 330 + static inline sector_t bdev_nr_sectors(struct block_device *bdev) 331 + { 332 + return i_size_read(bdev->bd_inode) >> 9; 333 + } 334 + 335 static inline sector_t get_capacity(struct gendisk *disk) 336 { 337 + return bdev_nr_sectors(disk->part0); 338 } 339 340 int bdev_disk_changed(struct block_device *bdev, bool invalidate); ··· 340 int blk_drop_partitions(struct block_device *bdev); 341 342 extern struct gendisk *__alloc_disk_node(int minors, int node_id); 343 extern void put_disk(struct gendisk *disk); 344 345 #define alloc_disk_node(minors, node_id) \ 346 ({ \ ··· 368 369 #define alloc_disk(minors) alloc_disk_node(minors, NUMA_NO_NODE) 370 371 + int __register_blkdev(unsigned int major, const char *name, 372 + void (*probe)(dev_t devt)); 373 + #define register_blkdev(major, name) \ 374 + __register_blkdev(major, name, NULL) 375 void unregister_blkdev(unsigned int major, const char *name); 376 377 bool bdev_check_media_change(struct block_device *bdev); 378 int __invalidate_device(struct block_device *bdev, bool kill_dirty); 379 + void set_capacity(struct gendisk *disk, sector_t size); 380 381 /* for drivers/char/raw.c: */ 382 int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long); ··· 395 } 396 #endif /* CONFIG_SYSFS */ 397 398 + extern struct rw_semaphore bdev_lookup_sem; 399 + 400 + dev_t blk_lookup_devt(const char *name, int partno); 401 + void blk_request_module(dev_t devt); 402 #ifdef CONFIG_BLOCK 403 void printk_all_partitions(void); 404 #else /* CONFIG_BLOCK */ 405 static inline void printk_all_partitions(void) 406 { 407 } 408 #endif /* CONFIG_BLOCK */ 409

-3

include/linux/ide.h

··· 1493 static inline void ide_acpi_set_state(ide_hwif_t *hwif, int on) {} 1494 #endif 1495 1496 - void ide_register_region(struct gendisk *); 1497 - void ide_unregister_region(struct gendisk *); 1498 - 1499 void ide_check_nien_quirk_list(ide_drive_t *); 1500 void ide_undecoded_slave(ide_drive_t *); 1501

··· 1493 static inline void ide_acpi_set_state(ide_hwif_t *hwif, int on) {} 1494 #endif 1495 1496 void ide_check_nien_quirk_list(ide_drive_t *); 1497 void ide_undecoded_slave(ide_drive_t *); 1498

+22 -23

include/linux/part_stat.h

··· 25 #define part_stat_unlock() preempt_enable() 26 27 #define part_stat_get_cpu(part, field, cpu) \ 28 - (per_cpu_ptr((part)->dkstats, (cpu))->field) 29 30 #define part_stat_get(part, field) \ 31 part_stat_get_cpu(part, field, smp_processor_id()) 32 33 #define part_stat_read(part, field) \ 34 ({ \ 35 - typeof((part)->dkstats->field) res = 0; \ 36 unsigned int _cpu; \ 37 for_each_possible_cpu(_cpu) \ 38 - res += per_cpu_ptr((part)->dkstats, _cpu)->field; \ 39 res; \ 40 }) 41 42 - static inline void part_stat_set_all(struct hd_struct *part, int value) 43 { 44 int i; 45 46 for_each_possible_cpu(i) 47 - memset(per_cpu_ptr(part->dkstats, i), value, 48 sizeof(struct disk_stats)); 49 } 50 ··· 54 part_stat_read(part, field[STAT_DISCARD])) 55 56 #define __part_stat_add(part, field, addnd) \ 57 - __this_cpu_add((part)->dkstats->field, addnd) 58 59 #define part_stat_add(part, field, addnd) do { \ 60 __part_stat_add((part), field, addnd); \ 61 - if ((part)->partno) \ 62 - __part_stat_add(&part_to_disk((part))->part0, \ 63 - field, addnd); \ 64 } while (0) 65 66 - #define part_stat_dec(gendiskp, field) \ 67 - part_stat_add(gendiskp, field, -1) 68 - #define part_stat_inc(gendiskp, field) \ 69 - part_stat_add(gendiskp, field, 1) 70 - #define part_stat_sub(gendiskp, field, subnd) \ 71 - part_stat_add(gendiskp, field, -subnd) 72 73 - #define part_stat_local_dec(gendiskp, field) \ 74 - local_dec(&(part_stat_get(gendiskp, field))) 75 - #define part_stat_local_inc(gendiskp, field) \ 76 - local_inc(&(part_stat_get(gendiskp, field))) 77 - #define part_stat_local_read(gendiskp, field) \ 78 - local_read(&(part_stat_get(gendiskp, field))) 79 - #define part_stat_local_read_cpu(gendiskp, field, cpu) \ 80 - local_read(&(part_stat_get_cpu(gendiskp, field, cpu))) 81 82 #endif /* _LINUX_PART_STAT_H */

··· 25 #define part_stat_unlock() preempt_enable() 26 27 #define part_stat_get_cpu(part, field, cpu) \ 28 + (per_cpu_ptr((part)->bd_stats, (cpu))->field) 29 30 #define part_stat_get(part, field) \ 31 part_stat_get_cpu(part, field, smp_processor_id()) 32 33 #define part_stat_read(part, field) \ 34 ({ \ 35 + typeof((part)->bd_stats->field) res = 0; \ 36 unsigned int _cpu; \ 37 for_each_possible_cpu(_cpu) \ 38 + res += per_cpu_ptr((part)->bd_stats, _cpu)->field; \ 39 res; \ 40 }) 41 42 + static inline void part_stat_set_all(struct block_device *part, int value) 43 { 44 int i; 45 46 for_each_possible_cpu(i) 47 + memset(per_cpu_ptr(part->bd_stats, i), value, 48 sizeof(struct disk_stats)); 49 } 50 ··· 54 part_stat_read(part, field[STAT_DISCARD])) 55 56 #define __part_stat_add(part, field, addnd) \ 57 + __this_cpu_add((part)->bd_stats->field, addnd) 58 59 #define part_stat_add(part, field, addnd) do { \ 60 __part_stat_add((part), field, addnd); \ 61 + if ((part)->bd_partno) \ 62 + __part_stat_add(bdev_whole(part), field, addnd); \ 63 } while (0) 64 65 + #define part_stat_dec(part, field) \ 66 + part_stat_add(part, field, -1) 67 + #define part_stat_inc(part, field) \ 68 + part_stat_add(part, field, 1) 69 + #define part_stat_sub(part, field, subnd) \ 70 + part_stat_add(part, field, -subnd) 71 72 + #define part_stat_local_dec(part, field) \ 73 + local_dec(&(part_stat_get(part, field))) 74 + #define part_stat_local_inc(part, field) \ 75 + local_inc(&(part_stat_get(part, field))) 76 + #define part_stat_local_read(part, field) \ 77 + local_read(&(part_stat_get(part, field))) 78 + #define part_stat_local_read_cpu(part, field, cpu) \ 79 + local_read(&(part_stat_get_cpu(part, field, cpu))) 80 81 #endif /* _LINUX_PART_STAT_H */

-5

include/linux/sbitmap.h

··· 32 * @cleared: word holding cleared bits 33 */ 34 unsigned long cleared ____cacheline_aligned_in_smp; 35 - 36 - /** 37 - * @swap_lock: Held while swapping word <-> cleared 38 - */ 39 - spinlock_t swap_lock; 40 } ____cacheline_aligned_in_smp; 41 42 /**

··· 32 * @cleared: word holding cleared bits 33 */ 34 unsigned long cleared ____cacheline_aligned_in_smp; 35 } ____cacheline_aligned_in_smp; 36 37 /**

+54 -174

include/trace/events/block.h

··· 64 65 /** 66 * block_rq_requeue - place block IO request back on a queue 67 - * @q: queue holding operation 68 * @rq: block IO operation request 69 * 70 * The block operation request @rq is being placed back into queue ··· 72 */ 73 TRACE_EVENT(block_rq_requeue, 74 75 - TP_PROTO(struct request_queue *q, struct request *rq), 76 77 - TP_ARGS(q, rq), 78 79 TP_STRUCT__entry( 80 __field( dev_t, dev ) ··· 146 147 DECLARE_EVENT_CLASS(block_rq, 148 149 - TP_PROTO(struct request_queue *q, struct request *rq), 150 151 - TP_ARGS(q, rq), 152 153 TP_STRUCT__entry( 154 __field( dev_t, dev ) ··· 180 181 /** 182 * block_rq_insert - insert block operation request into queue 183 - * @q: target queue 184 * @rq: block IO operation request 185 * 186 * Called immediately before block operation request @rq is inserted ··· 189 */ 190 DEFINE_EVENT(block_rq, block_rq_insert, 191 192 - TP_PROTO(struct request_queue *q, struct request *rq), 193 194 - TP_ARGS(q, rq) 195 ); 196 197 /** 198 * block_rq_issue - issue pending block IO request operation to device driver 199 - * @q: queue holding operation 200 * @rq: block IO operation operation request 201 * 202 * Called when block operation request @rq from queue @q is sent to a ··· 203 */ 204 DEFINE_EVENT(block_rq, block_rq_issue, 205 206 - TP_PROTO(struct request_queue *q, struct request *rq), 207 208 - TP_ARGS(q, rq) 209 ); 210 211 /** 212 * block_rq_merge - merge request with another one in the elevator 213 - * @q: queue holding operation 214 * @rq: block IO operation operation request 215 * 216 * Called when block operation request @rq from queue @q is merged to another ··· 217 */ 218 DEFINE_EVENT(block_rq, block_rq_merge, 219 220 - TP_PROTO(struct request_queue *q, struct request *rq), 221 222 - TP_ARGS(q, rq) 223 - ); 224 - 225 - /** 226 - * block_bio_bounce - used bounce buffer when processing block operation 227 - * @q: queue holding the block operation 228 - * @bio: block operation 229 - * 230 - * A bounce buffer was used to handle the block operation @bio in @q. 231 - * This occurs when hardware limitations prevent a direct transfer of 232 - * data between the @bio data memory area and the IO device. Use of a 233 - * bounce buffer requires extra copying of data and decreases 234 - * performance. 235 - */ 236 - TRACE_EVENT(block_bio_bounce, 237 - 238 - TP_PROTO(struct request_queue *q, struct bio *bio), 239 - 240 - TP_ARGS(q, bio), 241 - 242 - TP_STRUCT__entry( 243 - __field( dev_t, dev ) 244 - __field( sector_t, sector ) 245 - __field( unsigned int, nr_sector ) 246 - __array( char, rwbs, RWBS_LEN ) 247 - __array( char, comm, TASK_COMM_LEN ) 248 - ), 249 - 250 - TP_fast_assign( 251 - __entry->dev = bio_dev(bio); 252 - __entry->sector = bio->bi_iter.bi_sector; 253 - __entry->nr_sector = bio_sectors(bio); 254 - blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); 255 - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); 256 - ), 257 - 258 - TP_printk("%d,%d %s %llu + %u [%s]", 259 - MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, 260 - (unsigned long long)__entry->sector, 261 - __entry->nr_sector, __entry->comm) 262 ); 263 264 /** ··· 258 __entry->nr_sector, __entry->error) 259 ); 260 261 - DECLARE_EVENT_CLASS(block_bio_merge, 262 263 - TP_PROTO(struct request_queue *q, struct request *rq, struct bio *bio), 264 265 - TP_ARGS(q, rq, bio), 266 267 TP_STRUCT__entry( 268 __field( dev_t, dev ) ··· 284 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, 285 (unsigned long long)__entry->sector, 286 __entry->nr_sector, __entry->comm) 287 ); 288 289 /** 290 * block_bio_backmerge - merging block operation to the end of an existing operation 291 - * @q: queue holding operation 292 - * @rq: request bio is being merged into 293 * @bio: new block operation to merge 294 * 295 - * Merging block request @bio to the end of an existing block request 296 - * in queue @q. 297 */ 298 - DEFINE_EVENT(block_bio_merge, block_bio_backmerge, 299 - 300 - TP_PROTO(struct request_queue *q, struct request *rq, struct bio *bio), 301 - 302 - TP_ARGS(q, rq, bio) 303 ); 304 305 /** 306 * block_bio_frontmerge - merging block operation to the beginning of an existing operation 307 - * @q: queue holding operation 308 - * @rq: request bio is being merged into 309 * @bio: new block operation to merge 310 * 311 - * Merging block IO operation @bio to the beginning of an existing block 312 - * operation in queue @q. 313 */ 314 - DEFINE_EVENT(block_bio_merge, block_bio_frontmerge, 315 - 316 - TP_PROTO(struct request_queue *q, struct request *rq, struct bio *bio), 317 - 318 - TP_ARGS(q, rq, bio) 319 ); 320 321 /** 322 * block_bio_queue - putting new block IO operation in queue 323 - * @q: queue holding operation 324 * @bio: new block operation 325 * 326 * About to place the block IO operation @bio into queue @q. 327 */ 328 - TRACE_EVENT(block_bio_queue, 329 - 330 - TP_PROTO(struct request_queue *q, struct bio *bio), 331 - 332 - TP_ARGS(q, bio), 333 - 334 - TP_STRUCT__entry( 335 - __field( dev_t, dev ) 336 - __field( sector_t, sector ) 337 - __field( unsigned int, nr_sector ) 338 - __array( char, rwbs, RWBS_LEN ) 339 - __array( char, comm, TASK_COMM_LEN ) 340 - ), 341 - 342 - TP_fast_assign( 343 - __entry->dev = bio_dev(bio); 344 - __entry->sector = bio->bi_iter.bi_sector; 345 - __entry->nr_sector = bio_sectors(bio); 346 - blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size); 347 - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); 348 - ), 349 - 350 - TP_printk("%d,%d %s %llu + %u [%s]", 351 - MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, 352 - (unsigned long long)__entry->sector, 353 - __entry->nr_sector, __entry->comm) 354 - ); 355 - 356 - DECLARE_EVENT_CLASS(block_get_rq, 357 - 358 - TP_PROTO(struct request_queue *q, struct bio *bio, int rw), 359 - 360 - TP_ARGS(q, bio, rw), 361 - 362 - TP_STRUCT__entry( 363 - __field( dev_t, dev ) 364 - __field( sector_t, sector ) 365 - __field( unsigned int, nr_sector ) 366 - __array( char, rwbs, RWBS_LEN ) 367 - __array( char, comm, TASK_COMM_LEN ) 368 - ), 369 - 370 - TP_fast_assign( 371 - __entry->dev = bio ? bio_dev(bio) : 0; 372 - __entry->sector = bio ? bio->bi_iter.bi_sector : 0; 373 - __entry->nr_sector = bio ? bio_sectors(bio) : 0; 374 - blk_fill_rwbs(__entry->rwbs, 375 - bio ? bio->bi_opf : 0, __entry->nr_sector); 376 - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); 377 - ), 378 - 379 - TP_printk("%d,%d %s %llu + %u [%s]", 380 - MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, 381 - (unsigned long long)__entry->sector, 382 - __entry->nr_sector, __entry->comm) 383 ); 384 385 /** 386 * block_getrq - get a free request entry in queue for block IO operations 387 - * @q: queue for operations 388 * @bio: pending block IO operation (can be %NULL) 389 - * @rw: low bit indicates a read (%0) or a write (%1) 390 * 391 - * A request struct for queue @q has been allocated to handle the 392 - * block IO operation @bio. 393 */ 394 - DEFINE_EVENT(block_get_rq, block_getrq, 395 - 396 - TP_PROTO(struct request_queue *q, struct bio *bio, int rw), 397 - 398 - TP_ARGS(q, bio, rw) 399 - ); 400 - 401 - /** 402 - * block_sleeprq - waiting to get a free request entry in queue for block IO operation 403 - * @q: queue for operation 404 - * @bio: pending block IO operation (can be %NULL) 405 - * @rw: low bit indicates a read (%0) or a write (%1) 406 - * 407 - * In the case where a request struct cannot be provided for queue @q 408 - * the process needs to wait for an request struct to become 409 - * available. This tracepoint event is generated each time the 410 - * process goes to sleep waiting for request struct become available. 411 - */ 412 - DEFINE_EVENT(block_get_rq, block_sleeprq, 413 - 414 - TP_PROTO(struct request_queue *q, struct bio *bio, int rw), 415 - 416 - TP_ARGS(q, bio, rw) 417 ); 418 419 /** ··· 407 408 /** 409 * block_split - split a single bio struct into two bio structs 410 - * @q: queue containing the bio 411 * @bio: block operation being split 412 * @new_sector: The starting sector for the new bio 413 * 414 - * The bio request @bio in request queue @q needs to be split into two 415 - * bio requests. The newly created @bio request starts at 416 - * @new_sector. This split may be required due to hardware limitation 417 - * such as operation crossing device boundaries in a RAID system. 418 */ 419 TRACE_EVENT(block_split, 420 421 - TP_PROTO(struct request_queue *q, struct bio *bio, 422 - unsigned int new_sector), 423 424 - TP_ARGS(q, bio, new_sector), 425 426 TP_STRUCT__entry( 427 __field( dev_t, dev ) ··· 446 447 /** 448 * block_bio_remap - map request for a logical device to the raw device 449 - * @q: queue holding the operation 450 * @bio: revised operation 451 - * @dev: device for the operation 452 * @from: original sector for the operation 453 * 454 * An operation for a logical device has been mapped to the ··· 455 */ 456 TRACE_EVENT(block_bio_remap, 457 458 - TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev, 459 - sector_t from), 460 461 - TP_ARGS(q, bio, dev, from), 462 463 TP_STRUCT__entry( 464 __field( dev_t, dev ) ··· 487 488 /** 489 * block_rq_remap - map request for a block operation request 490 - * @q: queue holding the operation 491 * @rq: block IO operation request 492 * @dev: device for the operation 493 * @from: original sector for the operation ··· 497 */ 498 TRACE_EVENT(block_rq_remap, 499 500 - TP_PROTO(struct request_queue *q, struct request *rq, dev_t dev, 501 - sector_t from), 502 503 - TP_ARGS(q, rq, dev, from), 504 505 TP_STRUCT__entry( 506 __field( dev_t, dev )

··· 64 65 /** 66 * block_rq_requeue - place block IO request back on a queue 67 * @rq: block IO operation request 68 * 69 * The block operation request @rq is being placed back into queue ··· 73 */ 74 TRACE_EVENT(block_rq_requeue, 75 76 + TP_PROTO(struct request *rq), 77 78 + TP_ARGS(rq), 79 80 TP_STRUCT__entry( 81 __field( dev_t, dev ) ··· 147 148 DECLARE_EVENT_CLASS(block_rq, 149 150 + TP_PROTO(struct request *rq), 151 152 + TP_ARGS(rq), 153 154 TP_STRUCT__entry( 155 __field( dev_t, dev ) ··· 181 182 /** 183 * block_rq_insert - insert block operation request into queue 184 * @rq: block IO operation request 185 * 186 * Called immediately before block operation request @rq is inserted ··· 191 */ 192 DEFINE_EVENT(block_rq, block_rq_insert, 193 194 + TP_PROTO(struct request *rq), 195 196 + TP_ARGS(rq) 197 ); 198 199 /** 200 * block_rq_issue - issue pending block IO request operation to device driver 201 * @rq: block IO operation operation request 202 * 203 * Called when block operation request @rq from queue @q is sent to a ··· 206 */ 207 DEFINE_EVENT(block_rq, block_rq_issue, 208 209 + TP_PROTO(struct request *rq), 210 211 + TP_ARGS(rq) 212 ); 213 214 /** 215 * block_rq_merge - merge request with another one in the elevator 216 * @rq: block IO operation operation request 217 * 218 * Called when block operation request @rq from queue @q is merged to another ··· 221 */ 222 DEFINE_EVENT(block_rq, block_rq_merge, 223 224 + TP_PROTO(struct request *rq), 225 226 + TP_ARGS(rq) 227 ); 228 229 /** ··· 301 __entry->nr_sector, __entry->error) 302 ); 303 304 + DECLARE_EVENT_CLASS(block_bio, 305 306 + TP_PROTO(struct bio *bio), 307 308 + TP_ARGS(bio), 309 310 TP_STRUCT__entry( 311 __field( dev_t, dev ) ··· 327 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, 328 (unsigned long long)__entry->sector, 329 __entry->nr_sector, __entry->comm) 330 + ); 331 + 332 + /** 333 + * block_bio_bounce - used bounce buffer when processing block operation 334 + * @bio: block operation 335 + * 336 + * A bounce buffer was used to handle the block operation @bio in @q. 337 + * This occurs when hardware limitations prevent a direct transfer of 338 + * data between the @bio data memory area and the IO device. Use of a 339 + * bounce buffer requires extra copying of data and decreases 340 + * performance. 341 + */ 342 + DEFINE_EVENT(block_bio, block_bio_bounce, 343 + TP_PROTO(struct bio *bio), 344 + TP_ARGS(bio) 345 ); 346 347 /** 348 * block_bio_backmerge - merging block operation to the end of an existing operation 349 * @bio: new block operation to merge 350 * 351 + * Merging block request @bio to the end of an existing block request. 352 */ 353 + DEFINE_EVENT(block_bio, block_bio_backmerge, 354 + TP_PROTO(struct bio *bio), 355 + TP_ARGS(bio) 356 ); 357 358 /** 359 * block_bio_frontmerge - merging block operation to the beginning of an existing operation 360 * @bio: new block operation to merge 361 * 362 + * Merging block IO operation @bio to the beginning of an existing block request. 363 */ 364 + DEFINE_EVENT(block_bio, block_bio_frontmerge, 365 + TP_PROTO(struct bio *bio), 366 + TP_ARGS(bio) 367 ); 368 369 /** 370 * block_bio_queue - putting new block IO operation in queue 371 * @bio: new block operation 372 * 373 * About to place the block IO operation @bio into queue @q. 374 */ 375 + DEFINE_EVENT(block_bio, block_bio_queue, 376 + TP_PROTO(struct bio *bio), 377 + TP_ARGS(bio) 378 ); 379 380 /** 381 * block_getrq - get a free request entry in queue for block IO operations 382 * @bio: pending block IO operation (can be %NULL) 383 * 384 + * A request struct has been allocated to handle the block IO operation @bio. 385 */ 386 + DEFINE_EVENT(block_bio, block_getrq, 387 + TP_PROTO(struct bio *bio), 388 + TP_ARGS(bio) 389 ); 390 391 /** ··· 521 522 /** 523 * block_split - split a single bio struct into two bio structs 524 * @bio: block operation being split 525 * @new_sector: The starting sector for the new bio 526 * 527 + * The bio request @bio needs to be split into two bio requests. The newly 528 + * created @bio request starts at @new_sector. This split may be required due to 529 + * hardware limitations such as operation crossing device boundaries in a RAID 530 + * system. 531 */ 532 TRACE_EVENT(block_split, 533 534 + TP_PROTO(struct bio *bio, unsigned int new_sector), 535 536 + TP_ARGS(bio, new_sector), 537 538 TP_STRUCT__entry( 539 __field( dev_t, dev ) ··· 562 563 /** 564 * block_bio_remap - map request for a logical device to the raw device 565 * @bio: revised operation 566 + * @dev: original device for the operation 567 * @from: original sector for the operation 568 * 569 * An operation for a logical device has been mapped to the ··· 572 */ 573 TRACE_EVENT(block_bio_remap, 574 575 + TP_PROTO(struct bio *bio, dev_t dev, sector_t from), 576 577 + TP_ARGS(bio, dev, from), 578 579 TP_STRUCT__entry( 580 __field( dev_t, dev ) ··· 605 606 /** 607 * block_rq_remap - map request for a block operation request 608 * @rq: block IO operation request 609 * @dev: device for the operation 610 * @from: original sector for the operation ··· 616 */ 617 TRACE_EVENT(block_rq_remap, 618 619 + TP_PROTO(struct request *rq, dev_t dev, sector_t from), 620 621 + TP_ARGS(rq, dev, from), 622 623 TP_STRUCT__entry( 624 __field( dev_t, dev )

+129 -144

init/do_mounts.c

··· 76 */ 77 static int match_dev_by_uuid(struct device *dev, const void *data) 78 { 79 const struct uuidcmp *cmp = data; 80 - struct hd_struct *part = dev_to_part(dev); 81 82 - if (!part->info) 83 - goto no_match; 84 - 85 - if (strncasecmp(cmp->uuid, part->info->uuid, cmp->len)) 86 - goto no_match; 87 - 88 return 1; 89 - no_match: 90 - return 0; 91 } 92 - 93 94 /** 95 * devt_from_partuuid - looks up the dev_t of a partition by its UUID ··· 100 */ 101 static dev_t devt_from_partuuid(const char *uuid_str) 102 { 103 - dev_t res = 0; 104 struct uuidcmp cmp; 105 struct device *dev = NULL; 106 - struct gendisk *disk; 107 - struct hd_struct *part; 108 int offset = 0; 109 - bool clear_root_wait = false; 110 char *slash; 111 112 cmp.uuid = uuid_str; ··· 112 /* Check for optional partition number offset attributes. */ 113 if (slash) { 114 char c = 0; 115 /* Explicitly fail on poor PARTUUID syntax. */ 116 - if (sscanf(slash + 1, 117 - "PARTNROFF=%d%c", &offset, &c) != 1) { 118 - clear_root_wait = true; 119 - goto done; 120 - } 121 cmp.len = slash - uuid_str; 122 } else { 123 cmp.len = strlen(uuid_str); 124 } 125 126 - if (!cmp.len) { 127 - clear_root_wait = true; 128 - goto done; 129 - } 130 131 - dev = class_find_device(&block_class, NULL, &cmp, 132 - &match_dev_by_uuid); 133 if (!dev) 134 - goto done; 135 136 - res = dev->devt; 137 138 - /* Attempt to find the partition by offset. */ 139 - if (!offset) 140 - goto no_offset; 141 - 142 - res = 0; 143 - disk = part_to_disk(dev_to_part(dev)); 144 - part = disk_get_part(disk, dev_to_part(dev)->partno + offset); 145 - if (part) { 146 - res = part_devt(part); 147 - put_device(part_to_dev(part)); 148 } 149 150 - no_offset: 151 put_device(dev); 152 - done: 153 - if (clear_root_wait) { 154 - pr_err("VFS: PARTUUID= is invalid.\n" 155 - "Expected PARTUUID=<valid-uuid-id>[/PARTNROFF=%%d]\n"); 156 - if (root_wait) 157 - pr_err("Disabling rootwait; root= is invalid.\n"); 158 - root_wait = 0; 159 - } 160 - return res; 161 } 162 163 /** ··· 166 */ 167 static int match_dev_by_label(struct device *dev, const void *data) 168 { 169 const char *label = data; 170 - struct hd_struct *part = dev_to_part(dev); 171 172 - if (part->info && !strcmp(label, part->info->volname)) 173 - return 1; 174 - 175 - return 0; 176 } 177 - #endif 178 179 /* 180 * Convert a name into device number. We accept the following variants: ··· 281 * name contains slashes, the device name has them replaced with 282 * bangs. 283 */ 284 - 285 dev_t name_to_dev_t(const char *name) 286 { 287 - char s[32]; 288 - char *p; 289 - dev_t res = 0; 290 - int part; 291 - 292 #ifdef CONFIG_BLOCK 293 - if (strncmp(name, "PARTUUID=", 9) == 0) { 294 - name += 9; 295 - res = devt_from_partuuid(name); 296 - if (!res) 297 - goto fail; 298 - goto done; 299 - } else if (strncmp(name, "PARTLABEL=", 10) == 0) { 300 - struct device *dev; 301 - 302 - dev = class_find_device(&block_class, NULL, name + 10, 303 - &match_dev_by_label); 304 - if (!dev) 305 - goto fail; 306 - 307 - res = dev->devt; 308 - put_device(dev); 309 - goto done; 310 - } 311 #endif 312 - 313 - if (strncmp(name, "/dev/", 5) != 0) { 314 - unsigned maj, min, offset; 315 - char dummy; 316 - 317 - if ((sscanf(name, "%u:%u%c", &maj, &min, &dummy) == 2) || 318 - (sscanf(name, "%u:%u:%u:%c", &maj, &min, &offset, &dummy) == 3)) { 319 - res = MKDEV(maj, min); 320 - if (maj != MAJOR(res) || min != MINOR(res)) 321 - goto fail; 322 - } else { 323 - res = new_decode_dev(simple_strtoul(name, &p, 16)); 324 - if (*p) 325 - goto fail; 326 - } 327 - goto done; 328 - } 329 - 330 - name += 5; 331 - res = Root_NFS; 332 - if (strcmp(name, "nfs") == 0) 333 - goto done; 334 - res = Root_CIFS; 335 - if (strcmp(name, "cifs") == 0) 336 - goto done; 337 - res = Root_RAM0; 338 - if (strcmp(name, "ram") == 0) 339 - goto done; 340 - 341 - if (strlen(name) > 31) 342 - goto fail; 343 - strcpy(s, name); 344 - for (p = s; *p; p++) 345 - if (*p == '/') 346 - *p = '!'; 347 - res = blk_lookup_devt(s, 0); 348 - if (res) 349 - goto done; 350 - 351 - /* 352 - * try non-existent, but valid partition, which may only exist 353 - * after revalidating the disk, like partitioned md devices 354 - */ 355 - while (p > s && isdigit(p[-1])) 356 - p--; 357 - if (p == s || !*p || *p == '0') 358 - goto fail; 359 - 360 - /* try disk name without <part number> */ 361 - part = simple_strtoul(p, NULL, 10); 362 - *p = '\0'; 363 - res = blk_lookup_devt(s, part); 364 - if (res) 365 - goto done; 366 - 367 - /* try disk name without p<part number> */ 368 - if (p < s + 2 || !isdigit(p[-2]) || p[-1] != 'p') 369 - goto fail; 370 - p[-1] = '\0'; 371 - res = blk_lookup_devt(s, part); 372 - if (res) 373 - goto done; 374 - 375 - fail: 376 - return 0; 377 - done: 378 - return res; 379 } 380 EXPORT_SYMBOL_GPL(name_to_dev_t); 381

··· 76 */ 77 static int match_dev_by_uuid(struct device *dev, const void *data) 78 { 79 + struct block_device *bdev = dev_to_bdev(dev); 80 const struct uuidcmp *cmp = data; 81 82 + if (!bdev->bd_meta_info || 83 + strncasecmp(cmp->uuid, bdev->bd_meta_info->uuid, cmp->len)) 84 + return 0; 85 return 1; 86 } 87 88 /** 89 * devt_from_partuuid - looks up the dev_t of a partition by its UUID ··· 106 */ 107 static dev_t devt_from_partuuid(const char *uuid_str) 108 { 109 struct uuidcmp cmp; 110 struct device *dev = NULL; 111 + dev_t devt = 0; 112 int offset = 0; 113 char *slash; 114 115 cmp.uuid = uuid_str; ··· 121 /* Check for optional partition number offset attributes. */ 122 if (slash) { 123 char c = 0; 124 + 125 /* Explicitly fail on poor PARTUUID syntax. */ 126 + if (sscanf(slash + 1, "PARTNROFF=%d%c", &offset, &c) != 1) 127 + goto clear_root_wait; 128 cmp.len = slash - uuid_str; 129 } else { 130 cmp.len = strlen(uuid_str); 131 } 132 133 + if (!cmp.len) 134 + goto clear_root_wait; 135 136 + dev = class_find_device(&block_class, NULL, &cmp, &match_dev_by_uuid); 137 if (!dev) 138 + return 0; 139 140 + if (offset) { 141 + /* 142 + * Attempt to find the requested partition by adding an offset 143 + * to the partition number found by UUID. 144 + */ 145 + struct block_device *part; 146 147 + part = bdget_disk(dev_to_disk(dev), 148 + dev_to_bdev(dev)->bd_partno + offset); 149 + if (part) { 150 + devt = part->bd_dev; 151 + bdput(part); 152 + } 153 + } else { 154 + devt = dev->devt; 155 } 156 157 put_device(dev); 158 + return devt; 159 + 160 + clear_root_wait: 161 + pr_err("VFS: PARTUUID= is invalid.\n" 162 + "Expected PARTUUID=<valid-uuid-id>[/PARTNROFF=%%d]\n"); 163 + if (root_wait) 164 + pr_err("Disabling rootwait; root= is invalid.\n"); 165 + root_wait = 0; 166 + return 0; 167 } 168 169 /** ··· 178 */ 179 static int match_dev_by_label(struct device *dev, const void *data) 180 { 181 + struct block_device *bdev = dev_to_bdev(dev); 182 const char *label = data; 183 184 + if (!bdev->bd_meta_info || strcmp(label, bdev->bd_meta_info->volname)) 185 + return 0; 186 + return 1; 187 } 188 + 189 + static dev_t devt_from_partlabel(const char *label) 190 + { 191 + struct device *dev; 192 + dev_t devt = 0; 193 + 194 + dev = class_find_device(&block_class, NULL, label, &match_dev_by_label); 195 + if (dev) { 196 + devt = dev->devt; 197 + put_device(dev); 198 + } 199 + 200 + return devt; 201 + } 202 + 203 + static dev_t devt_from_devname(const char *name) 204 + { 205 + dev_t devt = 0; 206 + int part; 207 + char s[32]; 208 + char *p; 209 + 210 + if (strlen(name) > 31) 211 + return 0; 212 + strcpy(s, name); 213 + for (p = s; *p; p++) { 214 + if (*p == '/') 215 + *p = '!'; 216 + } 217 + 218 + devt = blk_lookup_devt(s, 0); 219 + if (devt) 220 + return devt; 221 + 222 + /* 223 + * Try non-existent, but valid partition, which may only exist after 224 + * opening the device, like partitioned md devices. 225 + */ 226 + while (p > s && isdigit(p[-1])) 227 + p--; 228 + if (p == s || !*p || *p == '0') 229 + return 0; 230 + 231 + /* try disk name without <part number> */ 232 + part = simple_strtoul(p, NULL, 10); 233 + *p = '\0'; 234 + devt = blk_lookup_devt(s, part); 235 + if (devt) 236 + return devt; 237 + 238 + /* try disk name without p<part number> */ 239 + if (p < s + 2 || !isdigit(p[-2]) || p[-1] != 'p') 240 + return 0; 241 + p[-1] = '\0'; 242 + return blk_lookup_devt(s, part); 243 + } 244 + #endif /* CONFIG_BLOCK */ 245 + 246 + static dev_t devt_from_devnum(const char *name) 247 + { 248 + unsigned maj, min, offset; 249 + dev_t devt = 0; 250 + char *p, dummy; 251 + 252 + if (sscanf(name, "%u:%u%c", &maj, &min, &dummy) == 2 || 253 + sscanf(name, "%u:%u:%u:%c", &maj, &min, &offset, &dummy) == 3) { 254 + devt = MKDEV(maj, min); 255 + if (maj != MAJOR(devt) || min != MINOR(devt)) 256 + return 0; 257 + } else { 258 + devt = new_decode_dev(simple_strtoul(name, &p, 16)); 259 + if (*p) 260 + return 0; 261 + } 262 + 263 + return devt; 264 + } 265 266 /* 267 * Convert a name into device number. We accept the following variants: ··· 218 * name contains slashes, the device name has them replaced with 219 * bangs. 220 */ 221 dev_t name_to_dev_t(const char *name) 222 { 223 + if (strcmp(name, "/dev/nfs") == 0) 224 + return Root_NFS; 225 + if (strcmp(name, "/dev/cifs") == 0) 226 + return Root_CIFS; 227 + if (strcmp(name, "/dev/ram") == 0) 228 + return Root_RAM0; 229 #ifdef CONFIG_BLOCK 230 + if (strncmp(name, "PARTUUID=", 9) == 0) 231 + return devt_from_partuuid(name + 9); 232 + if (strncmp(name, "PARTLABEL=", 10) == 0) 233 + return devt_from_partlabel(name + 10); 234 + if (strncmp(name, "/dev/", 5) == 0) 235 + return devt_from_devname(name + 5); 236 #endif 237 + return devt_from_devnum(name); 238 } 239 EXPORT_SYMBOL_GPL(name_to_dev_t); 240

+45 -136

kernel/trace/blktrace.c

··· 458 static void blk_trace_setup_lba(struct blk_trace *bt, 459 struct block_device *bdev) 460 { 461 - struct hd_struct *part = NULL; 462 - 463 - if (bdev) 464 - part = bdev->bd_part; 465 - 466 - if (part) { 467 - bt->start_lba = part->start_sect; 468 - bt->end_lba = part->start_sect + part->nr_sects; 469 } else { 470 bt->start_lba = 0; 471 bt->end_lba = -1ULL; ··· 795 #endif 796 797 static u64 798 - blk_trace_request_get_cgid(struct request_queue *q, struct request *rq) 799 { 800 if (!rq->bio) 801 return 0; 802 /* Use the first bio */ 803 - return blk_trace_bio_get_cgid(q, rq->bio); 804 } 805 806 /* ··· 841 rcu_read_unlock(); 842 } 843 844 - static void blk_add_trace_rq_insert(void *ignore, 845 - struct request_queue *q, struct request *rq) 846 { 847 blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_INSERT, 848 - blk_trace_request_get_cgid(q, rq)); 849 } 850 851 - static void blk_add_trace_rq_issue(void *ignore, 852 - struct request_queue *q, struct request *rq) 853 { 854 blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_ISSUE, 855 - blk_trace_request_get_cgid(q, rq)); 856 } 857 858 - static void blk_add_trace_rq_merge(void *ignore, 859 - struct request_queue *q, struct request *rq) 860 { 861 blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_BACKMERGE, 862 - blk_trace_request_get_cgid(q, rq)); 863 } 864 865 - static void blk_add_trace_rq_requeue(void *ignore, 866 - struct request_queue *q, 867 - struct request *rq) 868 { 869 blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_REQUEUE, 870 - blk_trace_request_get_cgid(q, rq)); 871 } 872 873 static void blk_add_trace_rq_complete(void *ignore, struct request *rq, 874 int error, unsigned int nr_bytes) 875 { 876 blk_add_trace_rq(rq, error, nr_bytes, BLK_TA_COMPLETE, 877 - blk_trace_request_get_cgid(rq->q, rq)); 878 } 879 880 /** ··· 901 rcu_read_unlock(); 902 } 903 904 - static void blk_add_trace_bio_bounce(void *ignore, 905 - struct request_queue *q, struct bio *bio) 906 { 907 - blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0); 908 } 909 910 static void blk_add_trace_bio_complete(void *ignore, ··· 913 blk_status_to_errno(bio->bi_status)); 914 } 915 916 - static void blk_add_trace_bio_backmerge(void *ignore, 917 - struct request_queue *q, 918 - struct request *rq, 919 - struct bio *bio) 920 { 921 - blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0); 922 } 923 924 - static void blk_add_trace_bio_frontmerge(void *ignore, 925 - struct request_queue *q, 926 - struct request *rq, 927 - struct bio *bio) 928 { 929 - blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0); 930 } 931 932 - static void blk_add_trace_bio_queue(void *ignore, 933 - struct request_queue *q, struct bio *bio) 934 { 935 - blk_add_trace_bio(q, bio, BLK_TA_QUEUE, 0); 936 } 937 938 - static void blk_add_trace_getrq(void *ignore, 939 - struct request_queue *q, 940 - struct bio *bio, int rw) 941 { 942 - if (bio) 943 - blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0); 944 - else { 945 - struct blk_trace *bt; 946 - 947 - rcu_read_lock(); 948 - bt = rcu_dereference(q->blk_trace); 949 - if (bt) 950 - __blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_GETRQ, 0, 0, 951 - NULL, 0); 952 - rcu_read_unlock(); 953 - } 954 - } 955 - 956 - 957 - static void blk_add_trace_sleeprq(void *ignore, 958 - struct request_queue *q, 959 - struct bio *bio, int rw) 960 - { 961 - if (bio) 962 - blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0); 963 - else { 964 - struct blk_trace *bt; 965 - 966 - rcu_read_lock(); 967 - bt = rcu_dereference(q->blk_trace); 968 - if (bt) 969 - __blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_SLEEPRQ, 970 - 0, 0, NULL, 0); 971 - rcu_read_unlock(); 972 - } 973 } 974 975 static void blk_add_trace_plug(void *ignore, struct request_queue *q) ··· 965 rcu_read_unlock(); 966 } 967 968 - static void blk_add_trace_split(void *ignore, 969 - struct request_queue *q, struct bio *bio, 970 - unsigned int pdu) 971 { 972 struct blk_trace *bt; 973 974 rcu_read_lock(); ··· 988 /** 989 * blk_add_trace_bio_remap - Add a trace for a bio-remap operation 990 * @ignore: trace callback data parameter (not used) 991 - * @q: queue the io is for 992 * @bio: the source bio 993 - * @dev: target device 994 * @from: source sector 995 * 996 - * Description: 997 - * Device mapper or raid target sometimes need to split a bio because 998 - * it spans a stripe (or similar). Add a trace for that action. 999 - * 1000 **/ 1001 - static void blk_add_trace_bio_remap(void *ignore, 1002 - struct request_queue *q, struct bio *bio, 1003 - dev_t dev, sector_t from) 1004 { 1005 struct blk_trace *bt; 1006 struct blk_io_trace_remap r; 1007 ··· 1022 /** 1023 * blk_add_trace_rq_remap - Add a trace for a request-remap operation 1024 * @ignore: trace callback data parameter (not used) 1025 - * @q: queue the io is for 1026 * @rq: the source request 1027 * @dev: target device 1028 * @from: source sector ··· 1031 * Add a trace for that action. 1032 * 1033 **/ 1034 - static void blk_add_trace_rq_remap(void *ignore, 1035 - struct request_queue *q, 1036 - struct request *rq, dev_t dev, 1037 sector_t from) 1038 { 1039 struct blk_trace *bt; 1040 struct blk_io_trace_remap r; 1041 1042 rcu_read_lock(); 1043 - bt = rcu_dereference(q->blk_trace); 1044 if (likely(!bt)) { 1045 rcu_read_unlock(); 1046 return; ··· 1050 1051 __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), 1052 rq_data_dir(rq), 0, BLK_TA_REMAP, 0, 1053 - sizeof(r), &r, blk_trace_request_get_cgid(q, rq)); 1054 rcu_read_unlock(); 1055 } 1056 1057 /** 1058 * blk_add_driver_data - Add binary message with driver-specific data 1059 - * @q: queue the io is for 1060 * @rq: io request 1061 * @data: driver-specific data 1062 * @len: length of driver-specific data ··· 1064 * Some drivers might want to write driver-specific data per request. 1065 * 1066 **/ 1067 - void blk_add_driver_data(struct request_queue *q, 1068 - struct request *rq, 1069 - void *data, size_t len) 1070 { 1071 struct blk_trace *bt; 1072 1073 rcu_read_lock(); 1074 - bt = rcu_dereference(q->blk_trace); 1075 if (likely(!bt)) { 1076 rcu_read_unlock(); 1077 return; ··· 1077 1078 __blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, 0, 1079 BLK_TA_DRV_DATA, 0, len, data, 1080 - blk_trace_request_get_cgid(q, rq)); 1081 rcu_read_unlock(); 1082 } 1083 EXPORT_SYMBOL_GPL(blk_add_driver_data); ··· 1108 WARN_ON(ret); 1109 ret = register_trace_block_getrq(blk_add_trace_getrq, NULL); 1110 WARN_ON(ret); 1111 - ret = register_trace_block_sleeprq(blk_add_trace_sleeprq, NULL); 1112 - WARN_ON(ret); 1113 ret = register_trace_block_plug(blk_add_trace_plug, NULL); 1114 WARN_ON(ret); 1115 ret = register_trace_block_unplug(blk_add_trace_unplug, NULL); ··· 1127 unregister_trace_block_split(blk_add_trace_split, NULL); 1128 unregister_trace_block_unplug(blk_add_trace_unplug, NULL); 1129 unregister_trace_block_plug(blk_add_trace_plug, NULL); 1130 - unregister_trace_block_sleeprq(blk_add_trace_sleeprq, NULL); 1131 unregister_trace_block_getrq(blk_add_trace_getrq, NULL); 1132 unregister_trace_block_bio_queue(blk_add_trace_bio_queue, NULL); 1133 unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge, NULL); ··· 1751 return p - buf; 1752 } 1753 1754 - static struct request_queue *blk_trace_get_queue(struct block_device *bdev) 1755 - { 1756 - if (bdev->bd_disk == NULL) 1757 - return NULL; 1758 - 1759 - return bdev_get_queue(bdev); 1760 - } 1761 - 1762 static ssize_t sysfs_blk_trace_attr_show(struct device *dev, 1763 struct device_attribute *attr, 1764 char *buf) 1765 { 1766 - struct block_device *bdev = bdget_part(dev_to_part(dev)); 1767 - struct request_queue *q; 1768 struct blk_trace *bt; 1769 ssize_t ret = -ENXIO; 1770 - 1771 - if (bdev == NULL) 1772 - goto out; 1773 - 1774 - q = blk_trace_get_queue(bdev); 1775 - if (q == NULL) 1776 - goto out_bdput; 1777 1778 mutex_lock(&q->debugfs_mutex); 1779 ··· 1782 1783 out_unlock_bdev: 1784 mutex_unlock(&q->debugfs_mutex); 1785 - out_bdput: 1786 - bdput(bdev); 1787 - out: 1788 return ret; 1789 } 1790 ··· 1789 struct device_attribute *attr, 1790 const char *buf, size_t count) 1791 { 1792 - struct block_device *bdev; 1793 - struct request_queue *q; 1794 struct blk_trace *bt; 1795 u64 value; 1796 ssize_t ret = -EINVAL; ··· 1806 goto out; 1807 value = ret; 1808 } 1809 - } else if (kstrtoull(buf, 0, &value)) 1810 - goto out; 1811 - 1812 - ret = -ENXIO; 1813 - bdev = bdget_part(dev_to_part(dev)); 1814 - if (bdev == NULL) 1815 - goto out; 1816 - 1817 - q = blk_trace_get_queue(bdev); 1818 - if (q == NULL) 1819 - goto out_bdput; 1820 1821 mutex_lock(&q->debugfs_mutex); 1822 ··· 1847 1848 out_unlock_bdev: 1849 mutex_unlock(&q->debugfs_mutex); 1850 - out_bdput: 1851 - bdput(bdev); 1852 out: 1853 return ret ? ret : count; 1854 }

··· 458 static void blk_trace_setup_lba(struct blk_trace *bt, 459 struct block_device *bdev) 460 { 461 + if (bdev) { 462 + bt->start_lba = bdev->bd_start_sect; 463 + bt->end_lba = bdev->bd_start_sect + bdev_nr_sectors(bdev); 464 } else { 465 bt->start_lba = 0; 466 bt->end_lba = -1ULL; ··· 800 #endif 801 802 static u64 803 + blk_trace_request_get_cgid(struct request *rq) 804 { 805 if (!rq->bio) 806 return 0; 807 /* Use the first bio */ 808 + return blk_trace_bio_get_cgid(rq->q, rq->bio); 809 } 810 811 /* ··· 846 rcu_read_unlock(); 847 } 848 849 + static void blk_add_trace_rq_insert(void *ignore, struct request *rq) 850 { 851 blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_INSERT, 852 + blk_trace_request_get_cgid(rq)); 853 } 854 855 + static void blk_add_trace_rq_issue(void *ignore, struct request *rq) 856 { 857 blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_ISSUE, 858 + blk_trace_request_get_cgid(rq)); 859 } 860 861 + static void blk_add_trace_rq_merge(void *ignore, struct request *rq) 862 { 863 blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_BACKMERGE, 864 + blk_trace_request_get_cgid(rq)); 865 } 866 867 + static void blk_add_trace_rq_requeue(void *ignore, struct request *rq) 868 { 869 blk_add_trace_rq(rq, 0, blk_rq_bytes(rq), BLK_TA_REQUEUE, 870 + blk_trace_request_get_cgid(rq)); 871 } 872 873 static void blk_add_trace_rq_complete(void *ignore, struct request *rq, 874 int error, unsigned int nr_bytes) 875 { 876 blk_add_trace_rq(rq, error, nr_bytes, BLK_TA_COMPLETE, 877 + blk_trace_request_get_cgid(rq)); 878 } 879 880 /** ··· 911 rcu_read_unlock(); 912 } 913 914 + static void blk_add_trace_bio_bounce(void *ignore, struct bio *bio) 915 { 916 + blk_add_trace_bio(bio->bi_disk->queue, bio, BLK_TA_BOUNCE, 0); 917 } 918 919 static void blk_add_trace_bio_complete(void *ignore, ··· 924 blk_status_to_errno(bio->bi_status)); 925 } 926 927 + static void blk_add_trace_bio_backmerge(void *ignore, struct bio *bio) 928 { 929 + blk_add_trace_bio(bio->bi_disk->queue, bio, BLK_TA_BACKMERGE, 0); 930 } 931 932 + static void blk_add_trace_bio_frontmerge(void *ignore, struct bio *bio) 933 { 934 + blk_add_trace_bio(bio->bi_disk->queue, bio, BLK_TA_FRONTMERGE, 0); 935 } 936 937 + static void blk_add_trace_bio_queue(void *ignore, struct bio *bio) 938 { 939 + blk_add_trace_bio(bio->bi_disk->queue, bio, BLK_TA_QUEUE, 0); 940 } 941 942 + static void blk_add_trace_getrq(void *ignore, struct bio *bio) 943 { 944 + blk_add_trace_bio(bio->bi_disk->queue, bio, BLK_TA_GETRQ, 0); 945 } 946 947 static void blk_add_trace_plug(void *ignore, struct request_queue *q) ··· 1015 rcu_read_unlock(); 1016 } 1017 1018 + static void blk_add_trace_split(void *ignore, struct bio *bio, unsigned int pdu) 1019 { 1020 + struct request_queue *q = bio->bi_disk->queue; 1021 struct blk_trace *bt; 1022 1023 rcu_read_lock(); ··· 1039 /** 1040 * blk_add_trace_bio_remap - Add a trace for a bio-remap operation 1041 * @ignore: trace callback data parameter (not used) 1042 * @bio: the source bio 1043 + * @dev: source device 1044 * @from: source sector 1045 * 1046 + * Called after a bio is remapped to a different device and/or sector. 1047 **/ 1048 + static void blk_add_trace_bio_remap(void *ignore, struct bio *bio, dev_t dev, 1049 + sector_t from) 1050 { 1051 + struct request_queue *q = bio->bi_disk->queue; 1052 struct blk_trace *bt; 1053 struct blk_io_trace_remap r; 1054 ··· 1077 /** 1078 * blk_add_trace_rq_remap - Add a trace for a request-remap operation 1079 * @ignore: trace callback data parameter (not used) 1080 * @rq: the source request 1081 * @dev: target device 1082 * @from: source sector ··· 1087 * Add a trace for that action. 1088 * 1089 **/ 1090 + static void blk_add_trace_rq_remap(void *ignore, struct request *rq, dev_t dev, 1091 sector_t from) 1092 { 1093 struct blk_trace *bt; 1094 struct blk_io_trace_remap r; 1095 1096 rcu_read_lock(); 1097 + bt = rcu_dereference(rq->q->blk_trace); 1098 if (likely(!bt)) { 1099 rcu_read_unlock(); 1100 return; ··· 1108 1109 __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), 1110 rq_data_dir(rq), 0, BLK_TA_REMAP, 0, 1111 + sizeof(r), &r, blk_trace_request_get_cgid(rq)); 1112 rcu_read_unlock(); 1113 } 1114 1115 /** 1116 * blk_add_driver_data - Add binary message with driver-specific data 1117 * @rq: io request 1118 * @data: driver-specific data 1119 * @len: length of driver-specific data ··· 1123 * Some drivers might want to write driver-specific data per request. 1124 * 1125 **/ 1126 + void blk_add_driver_data(struct request *rq, void *data, size_t len) 1127 { 1128 struct blk_trace *bt; 1129 1130 rcu_read_lock(); 1131 + bt = rcu_dereference(rq->q->blk_trace); 1132 if (likely(!bt)) { 1133 rcu_read_unlock(); 1134 return; ··· 1138 1139 __blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, 0, 1140 BLK_TA_DRV_DATA, 0, len, data, 1141 + blk_trace_request_get_cgid(rq)); 1142 rcu_read_unlock(); 1143 } 1144 EXPORT_SYMBOL_GPL(blk_add_driver_data); ··· 1169 WARN_ON(ret); 1170 ret = register_trace_block_getrq(blk_add_trace_getrq, NULL); 1171 WARN_ON(ret); 1172 ret = register_trace_block_plug(blk_add_trace_plug, NULL); 1173 WARN_ON(ret); 1174 ret = register_trace_block_unplug(blk_add_trace_unplug, NULL); ··· 1190 unregister_trace_block_split(blk_add_trace_split, NULL); 1191 unregister_trace_block_unplug(blk_add_trace_unplug, NULL); 1192 unregister_trace_block_plug(blk_add_trace_plug, NULL); 1193 unregister_trace_block_getrq(blk_add_trace_getrq, NULL); 1194 unregister_trace_block_bio_queue(blk_add_trace_bio_queue, NULL); 1195 unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge, NULL); ··· 1815 return p - buf; 1816 } 1817 1818 static ssize_t sysfs_blk_trace_attr_show(struct device *dev, 1819 struct device_attribute *attr, 1820 char *buf) 1821 { 1822 + struct block_device *bdev = dev_to_bdev(dev); 1823 + struct request_queue *q = bdev_get_queue(bdev); 1824 struct blk_trace *bt; 1825 ssize_t ret = -ENXIO; 1826 1827 mutex_lock(&q->debugfs_mutex); 1828 ··· 1861 1862 out_unlock_bdev: 1863 mutex_unlock(&q->debugfs_mutex); 1864 return ret; 1865 } 1866 ··· 1871 struct device_attribute *attr, 1872 const char *buf, size_t count) 1873 { 1874 + struct block_device *bdev = dev_to_bdev(dev); 1875 + struct request_queue *q = bdev_get_queue(bdev); 1876 struct blk_trace *bt; 1877 u64 value; 1878 ssize_t ret = -EINVAL; ··· 1888 goto out; 1889 value = ret; 1890 } 1891 + } else { 1892 + if (kstrtoull(buf, 0, &value)) 1893 + goto out; 1894 + } 1895 1896 mutex_lock(&q->debugfs_mutex); 1897 ··· 1936 1937 out_unlock_bdev: 1938 mutex_unlock(&q->debugfs_mutex); 1939 out: 1940 return ret ? ret : count; 1941 }

+18 -26

lib/sbitmap.c

··· 12 /* 13 * See if we have deferred clears that we can batch move 14 */ 15 - static inline bool sbitmap_deferred_clear(struct sbitmap *sb, int index) 16 { 17 - unsigned long mask, val; 18 - bool ret = false; 19 - unsigned long flags; 20 21 - spin_lock_irqsave(&sb->map[index].swap_lock, flags); 22 - 23 - if (!sb->map[index].cleared) 24 - goto out_unlock; 25 26 /* 27 * First get a stable cleared mask, setting the old mask to 0. 28 */ 29 - mask = xchg(&sb->map[index].cleared, 0); 30 31 /* 32 * Now clear the masked bits in our free word 33 */ 34 - do { 35 - val = sb->map[index].word; 36 - } while (cmpxchg(&sb->map[index].word, val, val & ~mask) != val); 37 - 38 - ret = true; 39 - out_unlock: 40 - spin_unlock_irqrestore(&sb->map[index].swap_lock, flags); 41 - return ret; 42 } 43 44 int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, ··· 71 for (i = 0; i < sb->map_nr; i++) { 72 sb->map[i].depth = min(depth, bits_per_word); 73 depth -= sb->map[i].depth; 74 - spin_lock_init(&sb->map[i].swap_lock); 75 } 76 return 0; 77 } ··· 82 unsigned int i; 83 84 for (i = 0; i < sb->map_nr; i++) 85 - sbitmap_deferred_clear(sb, i); 86 87 sb->depth = depth; 88 sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); ··· 97 static int __sbitmap_get_word(unsigned long *word, unsigned long depth, 98 unsigned int hint, bool wrap) 99 { 100 - unsigned int orig_hint = hint; 101 int nr; 102 103 while (1) { 104 nr = find_next_zero_bit(word, depth, hint); ··· 110 * offset to 0 in a failure case, so start from 0 to 111 * exhaust the map. 112 */ 113 - if (orig_hint && hint && wrap) { 114 - hint = orig_hint = 0; 115 continue; 116 } 117 return -1; ··· 131 static int sbitmap_find_bit_in_index(struct sbitmap *sb, int index, 132 unsigned int alloc_hint, bool round_robin) 133 { 134 int nr; 135 136 do { 137 - nr = __sbitmap_get_word(&sb->map[index].word, 138 - sb->map[index].depth, alloc_hint, 139 !round_robin); 140 if (nr != -1) 141 break; 142 - if (!sbitmap_deferred_clear(sb, index)) 143 break; 144 } while (1); 145 ··· 199 break; 200 } 201 202 - if (sbitmap_deferred_clear(sb, index)) 203 goto again; 204 205 /* Jump to next index. */

··· 12 /* 13 * See if we have deferred clears that we can batch move 14 */ 15 + static inline bool sbitmap_deferred_clear(struct sbitmap_word *map) 16 { 17 + unsigned long mask; 18 19 + if (!READ_ONCE(map->cleared)) 20 + return false; 21 22 /* 23 * First get a stable cleared mask, setting the old mask to 0. 24 */ 25 + mask = xchg(&map->cleared, 0); 26 27 /* 28 * Now clear the masked bits in our free word 29 */ 30 + atomic_long_andnot(mask, (atomic_long_t *)&map->word); 31 + BUILD_BUG_ON(sizeof(atomic_long_t) != sizeof(map->word)); 32 + return true; 33 } 34 35 int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, ··· 80 for (i = 0; i < sb->map_nr; i++) { 81 sb->map[i].depth = min(depth, bits_per_word); 82 depth -= sb->map[i].depth; 83 } 84 return 0; 85 } ··· 92 unsigned int i; 93 94 for (i = 0; i < sb->map_nr; i++) 95 + sbitmap_deferred_clear(&sb->map[i]); 96 97 sb->depth = depth; 98 sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word); ··· 107 static int __sbitmap_get_word(unsigned long *word, unsigned long depth, 108 unsigned int hint, bool wrap) 109 { 110 int nr; 111 + 112 + /* don't wrap if starting from 0 */ 113 + wrap = wrap && hint; 114 115 while (1) { 116 nr = find_next_zero_bit(word, depth, hint); ··· 118 * offset to 0 in a failure case, so start from 0 to 119 * exhaust the map. 120 */ 121 + if (hint && wrap) { 122 + hint = 0; 123 continue; 124 } 125 return -1; ··· 139 static int sbitmap_find_bit_in_index(struct sbitmap *sb, int index, 140 unsigned int alloc_hint, bool round_robin) 141 { 142 + struct sbitmap_word *map = &sb->map[index]; 143 int nr; 144 145 do { 146 + nr = __sbitmap_get_word(&map->word, map->depth, alloc_hint, 147 !round_robin); 148 if (nr != -1) 149 break; 150 + if (!sbitmap_deferred_clear(map)) 151 break; 152 } while (1); 153 ··· 207 break; 208 } 209 210 + if (sbitmap_deferred_clear(&sb->map[index])) 211 goto again; 212 213 /* Jump to next index. */

+6 -7

mm/filemap.c

··· 2981 2982 vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf) 2983 { 2984 struct page *page = vmf->page; 2985 - struct inode *inode = file_inode(vmf->vma->vm_file); 2986 vm_fault_t ret = VM_FAULT_LOCKED; 2987 2988 - sb_start_pagefault(inode->i_sb); 2989 file_update_time(vmf->vma->vm_file); 2990 lock_page(page); 2991 - if (page->mapping != inode->i_mapping) { 2992 unlock_page(page); 2993 ret = VM_FAULT_NOPAGE; 2994 goto out; ··· 3001 set_page_dirty(page); 3002 wait_for_stable_page(page); 3003 out: 3004 - sb_end_pagefault(inode->i_sb); 3005 return ret; 3006 } 3007 ··· 3244 { 3245 static DEFINE_RATELIMIT_STATE(_rs, 86400 * HZ, DEFAULT_RATELIMIT_BURST); 3246 char pathname[128]; 3247 - struct inode *inode = file_inode(filp); 3248 char *path; 3249 3250 - errseq_set(&inode->i_mapping->wb_err, -EIO); 3251 if (__ratelimit(&_rs)) { 3252 path = file_path(filp, pathname, sizeof(pathname)); 3253 if (IS_ERR(path)) ··· 3273 3274 if (iocb->ki_flags & IOCB_NOWAIT) { 3275 /* If there are pages to writeback, return */ 3276 - if (filemap_range_has_page(inode->i_mapping, pos, 3277 pos + write_len - 1)) 3278 return -EAGAIN; 3279 } else {

··· 2981 2982 vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf) 2983 { 2984 + struct address_space *mapping = vmf->vma->vm_file->f_mapping; 2985 struct page *page = vmf->page; 2986 vm_fault_t ret = VM_FAULT_LOCKED; 2987 2988 + sb_start_pagefault(mapping->host->i_sb); 2989 file_update_time(vmf->vma->vm_file); 2990 lock_page(page); 2991 + if (page->mapping != mapping) { 2992 unlock_page(page); 2993 ret = VM_FAULT_NOPAGE; 2994 goto out; ··· 3001 set_page_dirty(page); 3002 wait_for_stable_page(page); 3003 out: 3004 + sb_end_pagefault(mapping->host->i_sb); 3005 return ret; 3006 } 3007 ··· 3244 { 3245 static DEFINE_RATELIMIT_STATE(_rs, 86400 * HZ, DEFAULT_RATELIMIT_BURST); 3246 char pathname[128]; 3247 char *path; 3248 3249 + errseq_set(&filp->f_mapping->wb_err, -EIO); 3250 if (__ratelimit(&_rs)) { 3251 path = file_path(filp, pathname, sizeof(pathname)); 3252 if (IS_ERR(path)) ··· 3274 3275 if (iocb->ki_flags & IOCB_NOWAIT) { 3276 /* If there are pages to writeback, return */ 3277 + if (filemap_range_has_page(file->f_mapping, pos, 3278 pos + write_len - 1)) 3279 return -EAGAIN; 3280 } else {