Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

md: initialize 'writes_pending' while allocating mddev

Currently 'writes_pending' is initialized in pers->run for raid1/5/10,
and it's freed while deleing mddev, instead of pers->free. pers->run can
be called multiple times before mddev is deleted, and a helper
mddev_init_writes_pending() is used to prevent 'writes_pending' to be
initialized multiple times, this usage is safe but a litter weird.

On the other hand, 'writes_pending' is only initialized for raid1/5/10,
however, it's used in common layer, for example:

array_state_store
set_in_sync
if (!mddev->in_sync) -> in_sync is used for all levels
// access writes_pending

There might be some implicit dependency that I don't recognized to make
sure 'writes_pending' can only be accessed for raid1/5/10, but there are
no comments about that.

By the way, it make sense to initialize 'writes_pending' in common layer
because there are already three levels use it.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20230825030956.1527023-3-yukuai1@huaweicloud.com

authored by

Yu Kuai and committed by
Song Liu
b8494823 d58eff83

+13 -26
+12 -17
drivers/md/md.c
··· 646 646 wake_up(&mddev->sb_wait); 647 647 } 648 648 649 + static void no_op(struct percpu_ref *r) {} 650 + 649 651 int mddev_init(struct mddev *mddev) 650 652 { 651 653 652 654 if (percpu_ref_init(&mddev->active_io, active_io_release, 653 655 PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) 654 656 return -ENOMEM; 657 + 658 + if (percpu_ref_init(&mddev->writes_pending, no_op, 659 + PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) { 660 + percpu_ref_exit(&mddev->active_io); 661 + return -ENOMEM; 662 + } 663 + 664 + /* We want to start with the refcount at zero */ 665 + percpu_ref_put(&mddev->writes_pending); 655 666 656 667 mutex_init(&mddev->open_mutex); 657 668 mutex_init(&mddev->reconfig_mutex); ··· 696 685 void mddev_destroy(struct mddev *mddev) 697 686 { 698 687 percpu_ref_exit(&mddev->active_io); 688 + percpu_ref_exit(&mddev->writes_pending); 699 689 } 700 690 EXPORT_SYMBOL_GPL(mddev_destroy); 701 691 ··· 5640 5628 kobject_put(&mddev->kobj); 5641 5629 } 5642 5630 5643 - static void no_op(struct percpu_ref *r) {} 5644 - 5645 - int mddev_init_writes_pending(struct mddev *mddev) 5646 - { 5647 - if (mddev->writes_pending.percpu_count_ptr) 5648 - return 0; 5649 - if (percpu_ref_init(&mddev->writes_pending, no_op, 5650 - PERCPU_REF_ALLOW_REINIT, GFP_KERNEL) < 0) 5651 - return -ENOMEM; 5652 - /* We want to start with the refcount at zero */ 5653 - percpu_ref_put(&mddev->writes_pending); 5654 - return 0; 5655 - } 5656 - EXPORT_SYMBOL_GPL(mddev_init_writes_pending); 5657 - 5658 5631 struct mddev *md_alloc(dev_t dev, char *name) 5659 5632 { 5660 5633 /* ··· 6320 6323 */ 6321 6324 __md_stop_writes(mddev); 6322 6325 __md_stop(mddev); 6323 - percpu_ref_exit(&mddev->writes_pending); 6324 6326 } 6325 6327 6326 6328 EXPORT_SYMBOL_GPL(md_stop); ··· 7903 7907 { 7904 7908 struct mddev *mddev = disk->private_data; 7905 7909 7906 - percpu_ref_exit(&mddev->writes_pending); 7907 7910 mddev_free(mddev); 7908 7911 } 7909 7912
-1
drivers/md/md.h
··· 771 771 extern void md_wakeup_thread(struct md_thread __rcu *thread); 772 772 extern void md_check_recovery(struct mddev *mddev); 773 773 extern void md_reap_sync_thread(struct mddev *mddev); 774 - extern int mddev_init_writes_pending(struct mddev *mddev); 775 774 extern bool md_write_start(struct mddev *mddev, struct bio *bi); 776 775 extern void md_write_inc(struct mddev *mddev, struct bio *bi); 777 776 extern void md_write_end(struct mddev *mddev);
+1 -2
drivers/md/raid1.c
··· 3122 3122 mdname(mddev)); 3123 3123 return -EIO; 3124 3124 } 3125 - if (mddev_init_writes_pending(mddev) < 0) 3126 - return -ENOMEM; 3125 + 3127 3126 /* 3128 3127 * copy the already verified devices into our private RAID1 3129 3128 * bookkeeping area. [whatever we allocate in run(),
-3
drivers/md/raid10.c
··· 4154 4154 sector_t min_offset_diff = 0; 4155 4155 int first = 1; 4156 4156 4157 - if (mddev_init_writes_pending(mddev) < 0) 4158 - return -ENOMEM; 4159 - 4160 4157 if (mddev->private == NULL) { 4161 4158 conf = setup_conf(mddev); 4162 4159 if (IS_ERR(conf))
-3
drivers/md/raid5.c
··· 7778 7778 long long min_offset_diff = 0; 7779 7779 int first = 1; 7780 7780 7781 - if (mddev_init_writes_pending(mddev) < 0) 7782 - return -ENOMEM; 7783 - 7784 7781 if (mddev->recovery_cp != MaxSector) 7785 7782 pr_notice("md/raid:%s: not clean -- starting background reconstruction\n", 7786 7783 mdname(mddev));