Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'md-6.15-20250312' of https://git.kernel.org/pub/scm/linux/kernel/git/mdraid/linux into for-6.15/block

Merge MD changes from Yu:

"- fix recovery can preempt resync (Li Nan)
- fix md-bitmap IO limit (Su Yue)
- fix raid10 discard with REQ_NOWAIT (Xiao Ni)
- fix raid1 memory leak (Zheng Qixing)
- fix mddev uaf (Yu Kuai)
- fix raid1,raid10 IO flags (Yu Kuai)
- some refactor and cleanup (Yu Kuai)"

* tag 'md-6.15-20250312' of https://git.kernel.org/pub/scm/linux/kernel/git/mdraid/linux:
md/raid10: wait barrier before returning discard request with REQ_NOWAIT
md/md-bitmap: fix wrong bitmap_limit for clustermd when write sb
md/raid1,raid10: don't ignore IO flags
md/raid5: merge reshape_progress checking inside get_reshape_loc()
md: fix mddev uaf while iterating all_mddevs list
md: switch md-cluster to use md_submodle_head
md: don't export md_cluster_ops
md/md-cluster: cleanup md_cluster_ops reference
md: switch personalities to use md_submodule_head
md: introduce struct md_submodule_head and APIs
md: only include md-cluster.h if necessary
md: merge common code into find_pers()
md/raid1: fix memory leak in raid1_run() if no active rdev
md: ensure resync is prioritized over recovery

+342 -273
+8 -6
drivers/md/md-bitmap.c
··· 29 29 #include <linux/buffer_head.h> 30 30 #include <linux/seq_file.h> 31 31 #include <trace/events/block.h> 32 + 32 33 #include "md.h" 33 34 #include "md-bitmap.h" 35 + #include "md-cluster.h" 34 36 35 37 #define BITMAP_MAJOR_LO 3 36 38 /* version 4 insists the bitmap is in little-endian order ··· 428 426 struct block_device *bdev; 429 427 struct mddev *mddev = bitmap->mddev; 430 428 struct bitmap_storage *store = &bitmap->storage; 431 - unsigned int bitmap_limit = (bitmap->storage.file_pages - pg_index) << 432 - PAGE_SHIFT; 429 + unsigned long num_pages = bitmap->storage.file_pages; 430 + unsigned int bitmap_limit = (num_pages - pg_index % num_pages) << PAGE_SHIFT; 433 431 loff_t sboff, offset = mddev->bitmap_info.offset; 434 432 sector_t ps = pg_index * PAGE_SIZE / SECTOR_SIZE; 435 433 unsigned int size = PAGE_SIZE; ··· 438 436 439 437 bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev; 440 438 /* we compare length (page numbers), not page offset. */ 441 - if ((pg_index - store->sb_index) == store->file_pages - 1) { 439 + if ((pg_index - store->sb_index) == num_pages - 1) { 442 440 unsigned int last_page_size = store->bytes & (PAGE_SIZE - 1); 443 441 444 442 if (last_page_size == 0) ··· 944 942 bmname(bitmap), err); 945 943 goto out_no_sb; 946 944 } 947 - bitmap->cluster_slot = md_cluster_ops->slot_number(bitmap->mddev); 945 + bitmap->cluster_slot = bitmap->mddev->cluster_ops->slot_number(bitmap->mddev); 948 946 goto re_read; 949 947 } 950 948 ··· 2023 2021 sysfs_put(bitmap->sysfs_can_clear); 2024 2022 2025 2023 if (mddev_is_clustered(bitmap->mddev) && bitmap->mddev->cluster_info && 2026 - bitmap->cluster_slot == md_cluster_ops->slot_number(bitmap->mddev)) 2024 + bitmap->cluster_slot == bitmap->mddev->cluster_ops->slot_number(bitmap->mddev)) 2027 2025 md_cluster_stop(bitmap->mddev); 2028 2026 2029 2027 /* Shouldn't be needed - but just in case.... */ ··· 2231 2229 mddev_create_serial_pool(mddev, rdev); 2232 2230 2233 2231 if (mddev_is_clustered(mddev)) 2234 - md_cluster_ops->load_bitmaps(mddev, mddev->bitmap_info.nodes); 2232 + mddev->cluster_ops->load_bitmaps(mddev, mddev->bitmap_info.nodes); 2235 2233 2236 2234 /* Clear out old bitmap info first: Either there is none, or we 2237 2235 * are resuming after someone else has possibly changed things,
+12 -6
drivers/md/md-cluster.c
··· 1166 1166 struct dlm_lock_resource *bm_lockres; 1167 1167 char str[64]; 1168 1168 1169 - if (i == md_cluster_ops->slot_number(mddev)) 1169 + if (i == slot_number(mddev)) 1170 1170 continue; 1171 1171 1172 1172 bitmap = mddev->bitmap_ops->get_from_slot(mddev, i); ··· 1216 1216 */ 1217 1217 static int cluster_check_sync_size(struct mddev *mddev) 1218 1218 { 1219 - int current_slot = md_cluster_ops->slot_number(mddev); 1219 + int current_slot = slot_number(mddev); 1220 1220 int node_num = mddev->bitmap_info.nodes; 1221 1221 struct dlm_lock_resource *bm_lockres; 1222 1222 struct md_bitmap_stats stats; ··· 1612 1612 return err; 1613 1613 } 1614 1614 1615 - static const struct md_cluster_operations cluster_ops = { 1615 + static struct md_cluster_operations cluster_ops = { 1616 + .head = { 1617 + .type = MD_CLUSTER, 1618 + .id = ID_CLUSTER, 1619 + .name = "cluster", 1620 + .owner = THIS_MODULE, 1621 + }, 1622 + 1616 1623 .join = join, 1617 1624 .leave = leave, 1618 1625 .slot_number = slot_number, ··· 1649 1642 { 1650 1643 pr_warn("md-cluster: support raid1 and raid10 (limited support)\n"); 1651 1644 pr_info("Registering Cluster MD functions\n"); 1652 - register_md_cluster_operations(&cluster_ops, THIS_MODULE); 1653 - return 0; 1645 + return register_md_submodule(&cluster_ops.head); 1654 1646 } 1655 1647 1656 1648 static void cluster_exit(void) 1657 1649 { 1658 - unregister_md_cluster_operations(); 1650 + unregister_md_submodule(&cluster_ops.head); 1659 1651 } 1660 1652 1661 1653 module_init(cluster_init);
+6
drivers/md/md-cluster.h
··· 10 10 struct md_rdev; 11 11 12 12 struct md_cluster_operations { 13 + struct md_submodule_head head; 14 + 13 15 int (*join)(struct mddev *mddev, int nodes); 14 16 int (*leave)(struct mddev *mddev); 15 17 int (*slot_number)(struct mddev *mddev); ··· 36 34 void (*unlock_all_bitmaps)(struct mddev *mddev); 37 35 void (*update_size)(struct mddev *mddev, sector_t old_dev_sectors); 38 36 }; 37 + 38 + extern int md_setup_cluster(struct mddev *mddev, int nodes); 39 + extern void md_cluster_stop(struct mddev *mddev); 40 + extern void md_reload_sb(struct mddev *mddev, int raid_disk); 39 41 40 42 #endif /* _MD_CLUSTER_H */
+9 -6
drivers/md/md-linear.c
··· 5 5 */ 6 6 7 7 #include <linux/blkdev.h> 8 - #include <linux/raid/md_u.h> 9 8 #include <linux/seq_file.h> 10 9 #include <linux/module.h> 11 10 #include <linux/slab.h> ··· 319 320 } 320 321 321 322 static struct md_personality linear_personality = { 322 - .name = "linear", 323 - .level = LEVEL_LINEAR, 324 - .owner = THIS_MODULE, 323 + .head = { 324 + .type = MD_PERSONALITY, 325 + .id = ID_LINEAR, 326 + .name = "linear", 327 + .owner = THIS_MODULE, 328 + }, 329 + 325 330 .make_request = linear_make_request, 326 331 .run = linear_run, 327 332 .free = linear_free, ··· 338 335 339 336 static int __init linear_init(void) 340 337 { 341 - return register_md_personality(&linear_personality); 338 + return register_md_submodule(&linear_personality.head); 342 339 } 343 340 344 341 static void linear_exit(void) 345 342 { 346 - unregister_md_personality(&linear_personality); 343 + unregister_md_submodule(&linear_personality.head); 347 344 } 348 345 349 346 module_init(linear_init);
+150 -153
drivers/md/md.c
··· 79 79 [ACTION_IDLE] = "idle", 80 80 }; 81 81 82 - /* pers_list is a list of registered personalities protected by pers_lock. */ 83 - static LIST_HEAD(pers_list); 84 - static DEFINE_SPINLOCK(pers_lock); 82 + static DEFINE_XARRAY(md_submodule); 85 83 86 84 static const struct kobj_type md_ktype; 87 - 88 - const struct md_cluster_operations *md_cluster_ops; 89 - EXPORT_SYMBOL(md_cluster_ops); 90 - static struct module *md_cluster_mod; 91 85 92 86 static DECLARE_WAIT_QUEUE_HEAD(resync_wait); 93 87 static struct workqueue_struct *md_wq; ··· 623 629 queue_work(md_misc_wq, &mddev->del_work); 624 630 } 625 631 632 + static void mddev_put_locked(struct mddev *mddev) 633 + { 634 + if (atomic_dec_and_test(&mddev->active)) 635 + __mddev_put(mddev); 636 + } 637 + 626 638 void mddev_put(struct mddev *mddev) 627 639 { 628 640 if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock)) ··· 888 888 } 889 889 EXPORT_SYMBOL_GPL(md_find_rdev_rcu); 890 890 891 - static struct md_personality *find_pers(int level, char *clevel) 891 + static struct md_personality *get_pers(int level, char *clevel) 892 892 { 893 - struct md_personality *pers; 894 - list_for_each_entry(pers, &pers_list, list) { 895 - if (level != LEVEL_NONE && pers->level == level) 896 - return pers; 897 - if (strcmp(pers->name, clevel)==0) 898 - return pers; 893 + struct md_personality *ret = NULL; 894 + struct md_submodule_head *head; 895 + unsigned long i; 896 + 897 + xa_lock(&md_submodule); 898 + xa_for_each(&md_submodule, i, head) { 899 + if (head->type != MD_PERSONALITY) 900 + continue; 901 + if ((level != LEVEL_NONE && head->id == level) || 902 + !strcmp(head->name, clevel)) { 903 + if (try_module_get(head->owner)) 904 + ret = (void *)head; 905 + break; 906 + } 899 907 } 900 - return NULL; 908 + xa_unlock(&md_submodule); 909 + 910 + if (!ret) { 911 + if (level != LEVEL_NONE) 912 + pr_warn("md: personality for level %d is not loaded!\n", 913 + level); 914 + else 915 + pr_warn("md: personality for level %s is not loaded!\n", 916 + clevel); 917 + } 918 + 919 + return ret; 920 + } 921 + 922 + static void put_pers(struct md_personality *pers) 923 + { 924 + module_put(pers->head.owner); 901 925 } 902 926 903 927 /* return the offset of the super block in 512byte sectors */ ··· 1204 1180 if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset) 1205 1181 return 0; 1206 1182 pr_warn("%s: bitmaps are not supported for %s\n", 1207 - mdname(mddev), mddev->pers->name); 1183 + mdname(mddev), mddev->pers->head.name); 1208 1184 return 1; 1209 1185 } 1210 1186 EXPORT_SYMBOL(md_check_no_bitmap); ··· 2650 2626 force_change = 1; 2651 2627 if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags)) 2652 2628 nospares = 1; 2653 - ret = md_cluster_ops->metadata_update_start(mddev); 2629 + ret = mddev->cluster_ops->metadata_update_start(mddev); 2654 2630 /* Has someone else has updated the sb */ 2655 2631 if (!does_sb_need_changing(mddev)) { 2656 2632 if (ret == 0) 2657 - md_cluster_ops->metadata_update_cancel(mddev); 2633 + mddev->cluster_ops->metadata_update_cancel(mddev); 2658 2634 bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING), 2659 2635 BIT(MD_SB_CHANGE_DEVS) | 2660 2636 BIT(MD_SB_CHANGE_CLEAN)); ··· 2794 2770 /* if there was a failure, MD_SB_CHANGE_DEVS was set, and we re-write super */ 2795 2771 2796 2772 if (mddev_is_clustered(mddev) && ret == 0) 2797 - md_cluster_ops->metadata_update_finish(mddev); 2773 + mddev->cluster_ops->metadata_update_finish(mddev); 2798 2774 2799 2775 if (mddev->in_sync != sync_req || 2800 2776 !bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING), ··· 2953 2929 else { 2954 2930 err = 0; 2955 2931 if (mddev_is_clustered(mddev)) 2956 - err = md_cluster_ops->remove_disk(mddev, rdev); 2932 + err = mddev->cluster_ops->remove_disk(mddev, rdev); 2957 2933 2958 2934 if (err == 0) { 2959 2935 md_kick_rdev_from_array(rdev); ··· 3063 3039 * by this node eventually 3064 3040 */ 3065 3041 if (!mddev_is_clustered(rdev->mddev) || 3066 - (err = md_cluster_ops->gather_bitmaps(rdev)) == 0) { 3042 + (err = mddev->cluster_ops->gather_bitmaps(rdev)) == 0) { 3067 3043 clear_bit(Faulty, &rdev->flags); 3068 3044 err = add_bound_rdev(rdev); 3069 3045 } ··· 3871 3847 spin_lock(&mddev->lock); 3872 3848 p = mddev->pers; 3873 3849 if (p) 3874 - ret = sprintf(page, "%s\n", p->name); 3850 + ret = sprintf(page, "%s\n", p->head.name); 3875 3851 else if (mddev->clevel[0]) 3876 3852 ret = sprintf(page, "%s\n", mddev->clevel); 3877 3853 else if (mddev->level != LEVEL_NONE) ··· 3928 3904 rv = -EINVAL; 3929 3905 if (!mddev->pers->quiesce) { 3930 3906 pr_warn("md: %s: %s does not support online personality change\n", 3931 - mdname(mddev), mddev->pers->name); 3907 + mdname(mddev), mddev->pers->head.name); 3932 3908 goto out_unlock; 3933 3909 } 3934 3910 ··· 3942 3918 3943 3919 if (request_module("md-%s", clevel) != 0) 3944 3920 request_module("md-level-%s", clevel); 3945 - spin_lock(&pers_lock); 3946 - pers = find_pers(level, clevel); 3947 - if (!pers || !try_module_get(pers->owner)) { 3948 - spin_unlock(&pers_lock); 3949 - pr_warn("md: personality %s not loaded\n", clevel); 3921 + pers = get_pers(level, clevel); 3922 + if (!pers) { 3950 3923 rv = -EINVAL; 3951 3924 goto out_unlock; 3952 3925 } 3953 - spin_unlock(&pers_lock); 3954 3926 3955 3927 if (pers == mddev->pers) { 3956 3928 /* Nothing to do! */ 3957 - module_put(pers->owner); 3929 + put_pers(pers); 3958 3930 rv = len; 3959 3931 goto out_unlock; 3960 3932 } 3961 3933 if (!pers->takeover) { 3962 - module_put(pers->owner); 3934 + put_pers(pers); 3963 3935 pr_warn("md: %s: %s does not support personality takeover\n", 3964 3936 mdname(mddev), clevel); 3965 3937 rv = -EINVAL; ··· 3976 3956 mddev->raid_disks -= mddev->delta_disks; 3977 3957 mddev->delta_disks = 0; 3978 3958 mddev->reshape_backwards = 0; 3979 - module_put(pers->owner); 3959 + put_pers(pers); 3980 3960 pr_warn("md: %s: %s would not accept array\n", 3981 3961 mdname(mddev), clevel); 3982 3962 rv = PTR_ERR(priv); ··· 3991 3971 oldpriv = mddev->private; 3992 3972 mddev->pers = pers; 3993 3973 mddev->private = priv; 3994 - strscpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); 3974 + strscpy(mddev->clevel, pers->head.name, sizeof(mddev->clevel)); 3995 3975 mddev->level = mddev->new_level; 3996 3976 mddev->layout = mddev->new_layout; 3997 3977 mddev->chunk_sectors = mddev->new_chunk_sectors; ··· 4033 4013 mddev->to_remove = &md_redundancy_group; 4034 4014 } 4035 4015 4036 - module_put(oldpers->owner); 4016 + put_pers(oldpers); 4037 4017 4038 4018 rdev_for_each(rdev, mddev) { 4039 4019 if (rdev->raid_disk < 0) ··· 5591 5571 5592 5572 static ssize_t serialize_policy_show(struct mddev *mddev, char *page) 5593 5573 { 5594 - if (mddev->pers == NULL || (mddev->pers->level != 1)) 5574 + if (mddev->pers == NULL || (mddev->pers->head.id != ID_RAID1)) 5595 5575 return sprintf(page, "n/a\n"); 5596 5576 else 5597 5577 return sprintf(page, "%d\n", mddev->serialize_policy); ··· 5617 5597 err = mddev_suspend_and_lock(mddev); 5618 5598 if (err) 5619 5599 return err; 5620 - if (mddev->pers == NULL || (mddev->pers->level != 1)) { 5600 + if (mddev->pers == NULL || (mddev->pers->head.id != ID_RAID1)) { 5621 5601 pr_err("md: serialize_policy is only effective for raid1\n"); 5622 5602 err = -EINVAL; 5623 5603 goto unlock; ··· 6103 6083 goto exit_sync_set; 6104 6084 } 6105 6085 6106 - spin_lock(&pers_lock); 6107 - pers = find_pers(mddev->level, mddev->clevel); 6108 - if (!pers || !try_module_get(pers->owner)) { 6109 - spin_unlock(&pers_lock); 6110 - if (mddev->level != LEVEL_NONE) 6111 - pr_warn("md: personality for level %d is not loaded!\n", 6112 - mddev->level); 6113 - else 6114 - pr_warn("md: personality for level %s is not loaded!\n", 6115 - mddev->clevel); 6086 + pers = get_pers(mddev->level, mddev->clevel); 6087 + if (!pers) { 6116 6088 err = -EINVAL; 6117 6089 goto abort; 6118 6090 } 6119 - spin_unlock(&pers_lock); 6120 - if (mddev->level != pers->level) { 6121 - mddev->level = pers->level; 6122 - mddev->new_level = pers->level; 6091 + if (mddev->level != pers->head.id) { 6092 + mddev->level = pers->head.id; 6093 + mddev->new_level = pers->head.id; 6123 6094 } 6124 - strscpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); 6095 + strscpy(mddev->clevel, pers->head.name, sizeof(mddev->clevel)); 6125 6096 6126 6097 if (mddev->reshape_position != MaxSector && 6127 6098 pers->start_reshape == NULL) { 6128 6099 /* This personality cannot handle reshaping... */ 6129 - module_put(pers->owner); 6100 + put_pers(pers); 6130 6101 err = -EINVAL; 6131 6102 goto abort; 6132 6103 } ··· 6244 6233 if (mddev->private) 6245 6234 pers->free(mddev, mddev->private); 6246 6235 mddev->private = NULL; 6247 - module_put(pers->owner); 6236 + put_pers(pers); 6248 6237 mddev->bitmap_ops->destroy(mddev); 6249 6238 abort: 6250 6239 bioset_exit(&mddev->io_clone_set); ··· 6465 6454 mddev->private = NULL; 6466 6455 if (pers->sync_request && mddev->to_remove == NULL) 6467 6456 mddev->to_remove = &md_redundancy_group; 6468 - module_put(pers->owner); 6457 + put_pers(pers); 6469 6458 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 6470 6459 6471 6460 bioset_exit(&mddev->bio_set); ··· 6981 6970 set_bit(Candidate, &rdev->flags); 6982 6971 else if (info->state & (1 << MD_DISK_CLUSTER_ADD)) { 6983 6972 /* --add initiated by this node */ 6984 - err = md_cluster_ops->add_new_disk(mddev, rdev); 6973 + err = mddev->cluster_ops->add_new_disk(mddev, rdev); 6985 6974 if (err) { 6986 6975 export_rdev(rdev, mddev); 6987 6976 return err; ··· 6998 6987 if (mddev_is_clustered(mddev)) { 6999 6988 if (info->state & (1 << MD_DISK_CANDIDATE)) { 7000 6989 if (!err) { 7001 - err = md_cluster_ops->new_disk_ack(mddev, 7002 - err == 0); 6990 + err = mddev->cluster_ops->new_disk_ack( 6991 + mddev, err == 0); 7003 6992 if (err) 7004 6993 md_kick_rdev_from_array(rdev); 7005 6994 } 7006 6995 } else { 7007 6996 if (err) 7008 - md_cluster_ops->add_new_disk_cancel(mddev); 6997 + mddev->cluster_ops->add_new_disk_cancel(mddev); 7009 6998 else 7010 6999 err = add_bound_rdev(rdev); 7011 7000 } ··· 7085 7074 goto busy; 7086 7075 7087 7076 kick_rdev: 7088 - if (mddev_is_clustered(mddev)) { 7089 - if (md_cluster_ops->remove_disk(mddev, rdev)) 7090 - goto busy; 7091 - } 7077 + if (mddev_is_clustered(mddev) && 7078 + mddev->cluster_ops->remove_disk(mddev, rdev)) 7079 + goto busy; 7092 7080 7093 7081 md_kick_rdev_from_array(rdev); 7094 7082 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); ··· 7390 7380 rv = mddev->pers->resize(mddev, num_sectors); 7391 7381 if (!rv) { 7392 7382 if (mddev_is_clustered(mddev)) 7393 - md_cluster_ops->update_size(mddev, old_dev_sectors); 7383 + mddev->cluster_ops->update_size(mddev, old_dev_sectors); 7394 7384 else if (!mddev_is_dm(mddev)) 7395 7385 set_capacity_and_notify(mddev->gendisk, 7396 7386 mddev->array_sectors); ··· 7436 7426 mddev->reshape_backwards = 0; 7437 7427 } 7438 7428 return rv; 7429 + } 7430 + 7431 + static int get_cluster_ops(struct mddev *mddev) 7432 + { 7433 + xa_lock(&md_submodule); 7434 + mddev->cluster_ops = xa_load(&md_submodule, ID_CLUSTER); 7435 + if (mddev->cluster_ops && 7436 + !try_module_get(mddev->cluster_ops->head.owner)) 7437 + mddev->cluster_ops = NULL; 7438 + xa_unlock(&md_submodule); 7439 + 7440 + return mddev->cluster_ops == NULL ? -ENOENT : 0; 7441 + } 7442 + 7443 + static void put_cluster_ops(struct mddev *mddev) 7444 + { 7445 + if (!mddev->cluster_ops) 7446 + return; 7447 + 7448 + mddev->cluster_ops->leave(mddev); 7449 + module_put(mddev->cluster_ops->head.owner); 7450 + mddev->cluster_ops = NULL; 7439 7451 } 7440 7452 7441 7453 /* ··· 7568 7536 7569 7537 if (mddev->bitmap_info.nodes) { 7570 7538 /* hold PW on all the bitmap lock */ 7571 - if (md_cluster_ops->lock_all_bitmaps(mddev) <= 0) { 7539 + if (mddev->cluster_ops->lock_all_bitmaps(mddev) <= 0) { 7572 7540 pr_warn("md: can't change bitmap to none since the array is in use by more than one node\n"); 7573 7541 rv = -EPERM; 7574 - md_cluster_ops->unlock_all_bitmaps(mddev); 7542 + mddev->cluster_ops->unlock_all_bitmaps(mddev); 7575 7543 goto err; 7576 7544 } 7577 7545 7578 7546 mddev->bitmap_info.nodes = 0; 7579 - md_cluster_ops->leave(mddev); 7580 - module_put(md_cluster_mod); 7547 + put_cluster_ops(mddev); 7581 7548 mddev->safemode_delay = DEFAULT_SAFEMODE_DELAY; 7582 7549 } 7583 7550 mddev->bitmap_ops->destroy(mddev); ··· 7860 7829 7861 7830 case CLUSTERED_DISK_NACK: 7862 7831 if (mddev_is_clustered(mddev)) 7863 - md_cluster_ops->new_disk_ack(mddev, false); 7832 + mddev->cluster_ops->new_disk_ack(mddev, false); 7864 7833 else 7865 7834 err = -EINVAL; 7866 7835 goto unlock; ··· 8142 8111 return; 8143 8112 mddev->pers->error_handler(mddev, rdev); 8144 8113 8145 - if (mddev->pers->level == 0 || mddev->pers->level == LEVEL_LINEAR) 8114 + if (mddev->pers->head.id == ID_RAID0 || 8115 + mddev->pers->head.id == ID_LINEAR) 8146 8116 return; 8147 8117 8148 8118 if (mddev->degraded && !test_bit(MD_BROKEN, &mddev->flags)) ··· 8181 8149 8182 8150 static void status_personalities(struct seq_file *seq) 8183 8151 { 8184 - struct md_personality *pers; 8152 + struct md_submodule_head *head; 8153 + unsigned long i; 8185 8154 8186 8155 seq_puts(seq, "Personalities : "); 8187 - spin_lock(&pers_lock); 8188 - list_for_each_entry(pers, &pers_list, list) 8189 - seq_printf(seq, "[%s] ", pers->name); 8190 8156 8191 - spin_unlock(&pers_lock); 8157 + xa_lock(&md_submodule); 8158 + xa_for_each(&md_submodule, i, head) 8159 + if (head->type == MD_PERSONALITY) 8160 + seq_printf(seq, "[%s] ", head->name); 8161 + xa_unlock(&md_submodule); 8162 + 8192 8163 seq_puts(seq, "\n"); 8193 8164 } 8194 8165 ··· 8414 8379 seq_printf(seq, " (read-only)"); 8415 8380 if (mddev->ro == MD_AUTO_READ) 8416 8381 seq_printf(seq, " (auto-read-only)"); 8417 - seq_printf(seq, " %s", mddev->pers->name); 8382 + seq_printf(seq, " %s", mddev->pers->head.name); 8418 8383 } else { 8419 8384 seq_printf(seq, "inactive"); 8420 8385 } ··· 8483 8448 if (mddev == list_last_entry(&all_mddevs, struct mddev, all_mddevs)) 8484 8449 status_unused(seq); 8485 8450 8486 - if (atomic_dec_and_test(&mddev->active)) 8487 - __mddev_put(mddev); 8488 - 8451 + mddev_put_locked(mddev); 8489 8452 return 0; 8490 8453 } 8491 8454 ··· 8534 8501 .proc_poll = mdstat_poll, 8535 8502 }; 8536 8503 8537 - int register_md_personality(struct md_personality *p) 8504 + int register_md_submodule(struct md_submodule_head *msh) 8538 8505 { 8539 - pr_debug("md: %s personality registered for level %d\n", 8540 - p->name, p->level); 8541 - spin_lock(&pers_lock); 8542 - list_add_tail(&p->list, &pers_list); 8543 - spin_unlock(&pers_lock); 8544 - return 0; 8506 + return xa_insert(&md_submodule, msh->id, msh, GFP_KERNEL); 8545 8507 } 8546 - EXPORT_SYMBOL(register_md_personality); 8508 + EXPORT_SYMBOL_GPL(register_md_submodule); 8547 8509 8548 - int unregister_md_personality(struct md_personality *p) 8510 + void unregister_md_submodule(struct md_submodule_head *msh) 8549 8511 { 8550 - pr_debug("md: %s personality unregistered\n", p->name); 8551 - spin_lock(&pers_lock); 8552 - list_del_init(&p->list); 8553 - spin_unlock(&pers_lock); 8554 - return 0; 8512 + xa_erase(&md_submodule, msh->id); 8555 8513 } 8556 - EXPORT_SYMBOL(unregister_md_personality); 8557 - 8558 - int register_md_cluster_operations(const struct md_cluster_operations *ops, 8559 - struct module *module) 8560 - { 8561 - int ret = 0; 8562 - spin_lock(&pers_lock); 8563 - if (md_cluster_ops != NULL) 8564 - ret = -EALREADY; 8565 - else { 8566 - md_cluster_ops = ops; 8567 - md_cluster_mod = module; 8568 - } 8569 - spin_unlock(&pers_lock); 8570 - return ret; 8571 - } 8572 - EXPORT_SYMBOL(register_md_cluster_operations); 8573 - 8574 - int unregister_md_cluster_operations(void) 8575 - { 8576 - spin_lock(&pers_lock); 8577 - md_cluster_ops = NULL; 8578 - spin_unlock(&pers_lock); 8579 - return 0; 8580 - } 8581 - EXPORT_SYMBOL(unregister_md_cluster_operations); 8514 + EXPORT_SYMBOL_GPL(unregister_md_submodule); 8582 8515 8583 8516 int md_setup_cluster(struct mddev *mddev, int nodes) 8584 8517 { 8585 - int ret; 8586 - if (!md_cluster_ops) 8587 - request_module("md-cluster"); 8588 - spin_lock(&pers_lock); 8589 - /* ensure module won't be unloaded */ 8590 - if (!md_cluster_ops || !try_module_get(md_cluster_mod)) { 8591 - pr_warn("can't find md-cluster module or get its reference.\n"); 8592 - spin_unlock(&pers_lock); 8593 - return -ENOENT; 8594 - } 8595 - spin_unlock(&pers_lock); 8518 + int ret = get_cluster_ops(mddev); 8596 8519 8597 - ret = md_cluster_ops->join(mddev, nodes); 8520 + if (ret) { 8521 + request_module("md-cluster"); 8522 + ret = get_cluster_ops(mddev); 8523 + } 8524 + 8525 + /* ensure module won't be unloaded */ 8526 + if (ret) { 8527 + pr_warn("can't find md-cluster module or get its reference.\n"); 8528 + return ret; 8529 + } 8530 + 8531 + ret = mddev->cluster_ops->join(mddev, nodes); 8598 8532 if (!ret) 8599 8533 mddev->safemode_delay = 0; 8600 8534 return ret; ··· 8569 8569 8570 8570 void md_cluster_stop(struct mddev *mddev) 8571 8571 { 8572 - if (!md_cluster_ops) 8573 - return; 8574 - md_cluster_ops->leave(mddev); 8575 - module_put(md_cluster_mod); 8572 + put_cluster_ops(mddev); 8576 8573 } 8577 8574 8578 8575 static int is_mddev_idle(struct mddev *mddev, int init) ··· 8962 8965 } 8963 8966 8964 8967 if (mddev_is_clustered(mddev)) { 8965 - ret = md_cluster_ops->resync_start(mddev); 8968 + ret = mddev->cluster_ops->resync_start(mddev); 8966 8969 if (ret) 8967 8970 goto skip; 8968 8971 ··· 8989 8992 * 8990 8993 */ 8991 8994 if (mddev_is_clustered(mddev)) 8992 - md_cluster_ops->resync_start_notify(mddev); 8995 + mddev->cluster_ops->resync_start_notify(mddev); 8993 8996 do { 8994 8997 int mddev2_minor = -1; 8995 8998 mddev->curr_resync = MD_RESYNC_DELAYED; ··· 9444 9447 return true; 9445 9448 } 9446 9449 9450 + /* Check if resync is in progress. */ 9451 + if (mddev->recovery_cp < MaxSector) { 9452 + set_bit(MD_RECOVERY_SYNC, &mddev->recovery); 9453 + clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery); 9454 + return true; 9455 + } 9456 + 9447 9457 /* 9448 9458 * Remove any failed drives, then add spares if possible. Spares are 9449 9459 * also removed and re-added, to allow the personality to fail the ··· 9464 9460 9465 9461 /* Start new recovery. */ 9466 9462 set_bit(MD_RECOVERY_RECOVER, &mddev->recovery); 9467 - return true; 9468 - } 9469 - 9470 - /* Check if recovery is in progress. */ 9471 - if (mddev->recovery_cp < MaxSector) { 9472 - set_bit(MD_RECOVERY_SYNC, &mddev->recovery); 9473 - clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery); 9474 9463 return true; 9475 9464 } 9476 9465 ··· 9773 9776 * call resync_finish here if MD_CLUSTER_RESYNC_LOCKED is set by 9774 9777 * clustered raid */ 9775 9778 if (test_and_clear_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags)) 9776 - md_cluster_ops->resync_finish(mddev); 9779 + mddev->cluster_ops->resync_finish(mddev); 9777 9780 clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery); 9778 9781 clear_bit(MD_RECOVERY_DONE, &mddev->recovery); 9779 9782 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); ··· 9781 9784 clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); 9782 9785 clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); 9783 9786 /* 9784 - * We call md_cluster_ops->update_size here because sync_size could 9787 + * We call mddev->cluster_ops->update_size here because sync_size could 9785 9788 * be changed by md_update_sb, and MD_RECOVERY_RESHAPE is cleared, 9786 9789 * so it is time to update size across cluster. 9787 9790 */ 9788 9791 if (mddev_is_clustered(mddev) && is_reshaped 9789 9792 && !test_bit(MD_CLOSING, &mddev->flags)) 9790 - md_cluster_ops->update_size(mddev, old_dev_sectors); 9793 + mddev->cluster_ops->update_size(mddev, old_dev_sectors); 9791 9794 /* flag recovery needed just to double check */ 9792 9795 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 9793 9796 sysfs_notify_dirent_safe(mddev->sysfs_completed); ··· 9879 9882 static int md_notify_reboot(struct notifier_block *this, 9880 9883 unsigned long code, void *x) 9881 9884 { 9882 - struct mddev *mddev, *n; 9885 + struct mddev *mddev; 9883 9886 int need_delay = 0; 9884 9887 9885 9888 spin_lock(&all_mddevs_lock); 9886 - list_for_each_entry_safe(mddev, n, &all_mddevs, all_mddevs) { 9889 + list_for_each_entry(mddev, &all_mddevs, all_mddevs) { 9887 9890 if (!mddev_get(mddev)) 9888 9891 continue; 9889 9892 spin_unlock(&all_mddevs_lock); ··· 9895 9898 mddev_unlock(mddev); 9896 9899 } 9897 9900 need_delay = 1; 9898 - mddev_put(mddev); 9899 9901 spin_lock(&all_mddevs_lock); 9902 + mddev_put_locked(mddev); 9900 9903 } 9901 9904 spin_unlock(&all_mddevs_lock); 9902 9905 ··· 10013 10016 if (rdev2->raid_disk == -1 && role != MD_DISK_ROLE_SPARE && 10014 10017 !(le32_to_cpu(sb->feature_map) & 10015 10018 MD_FEATURE_RESHAPE_ACTIVE) && 10016 - !md_cluster_ops->resync_status_get(mddev)) { 10019 + !mddev->cluster_ops->resync_status_get(mddev)) { 10017 10020 /* 10018 10021 * -1 to make raid1_add_disk() set conf->fullsync 10019 10022 * to 1. This could avoid skipping sync when the ··· 10229 10232 10230 10233 static __exit void md_exit(void) 10231 10234 { 10232 - struct mddev *mddev, *n; 10235 + struct mddev *mddev; 10233 10236 int delay = 1; 10234 10237 10235 10238 unregister_blkdev(MD_MAJOR,"md"); ··· 10250 10253 remove_proc_entry("mdstat", NULL); 10251 10254 10252 10255 spin_lock(&all_mddevs_lock); 10253 - list_for_each_entry_safe(mddev, n, &all_mddevs, all_mddevs) { 10256 + list_for_each_entry(mddev, &all_mddevs, all_mddevs) { 10254 10257 if (!mddev_get(mddev)) 10255 10258 continue; 10256 10259 spin_unlock(&all_mddevs_lock); ··· 10262 10265 * the mddev for destruction by a workqueue, and the 10263 10266 * destroy_workqueue() below will wait for that to complete. 10264 10267 */ 10265 - mddev_put(mddev); 10266 10268 spin_lock(&all_mddevs_lock); 10269 + mddev_put_locked(mddev); 10267 10270 } 10268 10271 spin_unlock(&all_mddevs_lock); 10269 10272
+34 -14
drivers/md/md.h
··· 18 18 #include <linux/timer.h> 19 19 #include <linux/wait.h> 20 20 #include <linux/workqueue.h> 21 + #include <linux/raid/md_u.h> 21 22 #include <trace/events/block.h> 22 - #include "md-cluster.h" 23 23 24 24 #define MaxSector (~(sector_t)0) 25 + 26 + enum md_submodule_type { 27 + MD_PERSONALITY = 0, 28 + MD_CLUSTER, 29 + MD_BITMAP, /* TODO */ 30 + }; 31 + 32 + enum md_submodule_id { 33 + ID_LINEAR = LEVEL_LINEAR, 34 + ID_RAID0 = 0, 35 + ID_RAID1 = 1, 36 + ID_RAID4 = 4, 37 + ID_RAID5 = 5, 38 + ID_RAID6 = 6, 39 + ID_RAID10 = 10, 40 + ID_CLUSTER, 41 + ID_BITMAP, /* TODO */ 42 + ID_LLBITMAP, /* TODO */ 43 + }; 44 + 45 + struct md_submodule_head { 46 + enum md_submodule_type type; 47 + enum md_submodule_id id; 48 + const char *name; 49 + struct module *owner; 50 + }; 25 51 26 52 /* 27 53 * These flags should really be called "NO_RETRY" rather than ··· 320 294 extern void rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors, 321 295 int is_new); 322 296 struct md_cluster_info; 297 + struct md_cluster_operations; 323 298 324 299 /** 325 300 * enum mddev_flags - md device flags. ··· 603 576 mempool_t *serial_info_pool; 604 577 void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev); 605 578 struct md_cluster_info *cluster_info; 579 + struct md_cluster_operations *cluster_ops; 606 580 unsigned int good_device_nr; /* good device num within cluster raid */ 607 581 unsigned int noio_flag; /* for memalloc scope API */ 608 582 ··· 727 699 728 700 struct md_personality 729 701 { 730 - char *name; 731 - int level; 732 - struct list_head list; 733 - struct module *owner; 702 + struct md_submodule_head head; 703 + 734 704 bool __must_check (*make_request)(struct mddev *mddev, struct bio *bio); 735 705 /* 736 706 * start up works that do NOT require md_thread. tasks that ··· 869 843 if (p) put_page(p); 870 844 } 871 845 872 - extern int register_md_personality(struct md_personality *p); 873 - extern int unregister_md_personality(struct md_personality *p); 874 - extern int register_md_cluster_operations(const struct md_cluster_operations *ops, 875 - struct module *module); 876 - extern int unregister_md_cluster_operations(void); 877 - extern int md_setup_cluster(struct mddev *mddev, int nodes); 878 - extern void md_cluster_stop(struct mddev *mddev); 846 + int register_md_submodule(struct md_submodule_head *msh); 847 + void unregister_md_submodule(struct md_submodule_head *msh); 848 + 879 849 extern struct md_thread *md_register_thread( 880 850 void (*run)(struct md_thread *thread), 881 851 struct mddev *mddev, ··· 928 906 extern void md_frozen_sync_thread(struct mddev *mddev); 929 907 extern void md_unfrozen_sync_thread(struct mddev *mddev); 930 908 931 - extern void md_reload_sb(struct mddev *mddev, int raid_disk); 932 909 extern void md_update_sb(struct mddev *mddev, int force); 933 910 extern void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev); 934 911 extern void mddev_destroy_serial_pool(struct mddev *mddev, ··· 949 928 } 950 929 } 951 930 952 - extern const struct md_cluster_operations *md_cluster_ops; 953 931 static inline int mddev_is_clustered(struct mddev *mddev) 954 932 { 955 933 return mddev->cluster_info && mddev->bitmap_info.nodes > 1;
+11 -7
drivers/md/raid0.c
··· 811 811 812 812 static struct md_personality raid0_personality= 813 813 { 814 - .name = "raid0", 815 - .level = 0, 816 - .owner = THIS_MODULE, 814 + .head = { 815 + .type = MD_PERSONALITY, 816 + .id = ID_RAID0, 817 + .name = "raid0", 818 + .owner = THIS_MODULE, 819 + }, 820 + 817 821 .make_request = raid0_make_request, 818 822 .run = raid0_run, 819 823 .free = raid0_free, ··· 828 824 .error_handler = raid0_error, 829 825 }; 830 826 831 - static int __init raid0_init (void) 827 + static int __init raid0_init(void) 832 828 { 833 - return register_md_personality (&raid0_personality); 829 + return register_md_submodule(&raid0_personality.head); 834 830 } 835 831 836 - static void raid0_exit (void) 832 + static void __exit raid0_exit(void) 837 833 { 838 - unregister_md_personality (&raid0_personality); 834 + unregister_md_submodule(&raid0_personality.head); 839 835 } 840 836 841 837 module_init(raid0_init);
+2 -2
drivers/md/raid1-10.c
··· 287 287 return true; 288 288 289 289 if (mddev_is_clustered(mddev) && 290 - md_cluster_ops->area_resyncing(mddev, READ, this_sector, 291 - this_sector + len)) 290 + mddev->cluster_ops->area_resyncing(mddev, READ, this_sector, 291 + this_sector + len)) 292 292 return true; 293 293 294 294 return false;
+26 -20
drivers/md/raid1.c
··· 36 36 #include "md.h" 37 37 #include "raid1.h" 38 38 #include "md-bitmap.h" 39 + #include "md-cluster.h" 39 40 40 41 #define UNSUPPORTED_MDDEV_FLAGS \ 41 42 ((1L << MD_HAS_JOURNAL) | \ ··· 46 45 47 46 static void allow_barrier(struct r1conf *conf, sector_t sector_nr); 48 47 static void lower_barrier(struct r1conf *conf, sector_t sector_nr); 48 + static void raid1_free(struct mddev *mddev, void *priv); 49 49 50 50 #define RAID_1_10_NAME "raid1" 51 51 #include "raid1-10.c" ··· 1317 1315 struct r1conf *conf = mddev->private; 1318 1316 struct raid1_info *mirror; 1319 1317 struct bio *read_bio; 1320 - const enum req_op op = bio_op(bio); 1321 - const blk_opf_t do_sync = bio->bi_opf & REQ_SYNC; 1322 1318 int max_sectors; 1323 1319 int rdisk, error; 1324 1320 bool r1bio_existed = !!r1_bio; ··· 1404 1404 read_bio->bi_iter.bi_sector = r1_bio->sector + 1405 1405 mirror->rdev->data_offset; 1406 1406 read_bio->bi_end_io = raid1_end_read_request; 1407 - read_bio->bi_opf = op | do_sync; 1408 1407 if (test_bit(FailFast, &mirror->rdev->flags) && 1409 1408 test_bit(R1BIO_FailFast, &r1_bio->state)) 1410 1409 read_bio->bi_opf |= MD_FAILFAST; ··· 1466 1467 bool is_discard = (bio_op(bio) == REQ_OP_DISCARD); 1467 1468 1468 1469 if (mddev_is_clustered(mddev) && 1469 - md_cluster_ops->area_resyncing(mddev, WRITE, 1470 + mddev->cluster_ops->area_resyncing(mddev, WRITE, 1470 1471 bio->bi_iter.bi_sector, bio_end_sector(bio))) { 1471 1472 1472 1473 DEFINE_WAIT(w); ··· 1477 1478 for (;;) { 1478 1479 prepare_to_wait(&conf->wait_barrier, 1479 1480 &w, TASK_IDLE); 1480 - if (!md_cluster_ops->area_resyncing(mddev, WRITE, 1481 + if (!mddev->cluster_ops->area_resyncing(mddev, WRITE, 1481 1482 bio->bi_iter.bi_sector, 1482 1483 bio_end_sector(bio))) 1483 1484 break; ··· 1652 1653 1653 1654 mbio->bi_iter.bi_sector = (r1_bio->sector + rdev->data_offset); 1654 1655 mbio->bi_end_io = raid1_end_write_request; 1655 - mbio->bi_opf = bio_op(bio) | 1656 - (bio->bi_opf & (REQ_SYNC | REQ_FUA | REQ_ATOMIC)); 1657 1656 if (test_bit(FailFast, &rdev->flags) && 1658 1657 !test_bit(WriteMostly, &rdev->flags) && 1659 1658 conf->raid_disks - mddev->degraded > 1) ··· 3035 3038 conf->cluster_sync_low = mddev->curr_resync_completed; 3036 3039 conf->cluster_sync_high = conf->cluster_sync_low + CLUSTER_RESYNC_WINDOW_SECTORS; 3037 3040 /* Send resync message */ 3038 - md_cluster_ops->resync_info_update(mddev, 3039 - conf->cluster_sync_low, 3040 - conf->cluster_sync_high); 3041 + mddev->cluster_ops->resync_info_update(mddev, 3042 + conf->cluster_sync_low, 3043 + conf->cluster_sync_high); 3041 3044 } 3042 3045 3043 3046 /* For a user-requested sync, we read all readable devices and do a ··· 3255 3258 3256 3259 if (!mddev_is_dm(mddev)) { 3257 3260 ret = raid1_set_limits(mddev); 3258 - if (ret) 3261 + if (ret) { 3262 + if (!mddev->private) 3263 + raid1_free(mddev, conf); 3259 3264 return ret; 3265 + } 3260 3266 } 3261 3267 3262 3268 mddev->degraded = 0; ··· 3273 3273 */ 3274 3274 if (conf->raid_disks - mddev->degraded < 1) { 3275 3275 md_unregister_thread(mddev, &conf->thread); 3276 + if (!mddev->private) 3277 + raid1_free(mddev, conf); 3276 3278 return -EINVAL; 3277 3279 } 3278 3280 ··· 3495 3493 3496 3494 static struct md_personality raid1_personality = 3497 3495 { 3498 - .name = "raid1", 3499 - .level = 1, 3500 - .owner = THIS_MODULE, 3496 + .head = { 3497 + .type = MD_PERSONALITY, 3498 + .id = ID_RAID1, 3499 + .name = "raid1", 3500 + .owner = THIS_MODULE, 3501 + }, 3502 + 3501 3503 .make_request = raid1_make_request, 3502 3504 .run = raid1_run, 3503 3505 .free = raid1_free, ··· 3518 3512 .takeover = raid1_takeover, 3519 3513 }; 3520 3514 3521 - static int __init raid_init(void) 3515 + static int __init raid1_init(void) 3522 3516 { 3523 - return register_md_personality(&raid1_personality); 3517 + return register_md_submodule(&raid1_personality.head); 3524 3518 } 3525 3519 3526 - static void raid_exit(void) 3520 + static void __exit raid1_exit(void) 3527 3521 { 3528 - unregister_md_personality(&raid1_personality); 3522 + unregister_md_submodule(&raid1_personality.head); 3529 3523 } 3530 3524 3531 - module_init(raid_init); 3532 - module_exit(raid_exit); 3525 + module_init(raid1_init); 3526 + module_exit(raid1_exit); 3533 3527 MODULE_LICENSE("GPL"); 3534 3528 MODULE_DESCRIPTION("RAID1 (mirroring) personality for MD"); 3535 3529 MODULE_ALIAS("md-personality-3"); /* RAID1 */
+24 -28
drivers/md/raid10.c
··· 24 24 #include "raid10.h" 25 25 #include "raid0.h" 26 26 #include "md-bitmap.h" 27 + #include "md-cluster.h" 27 28 28 29 /* 29 30 * RAID10 provides a combination of RAID0 and RAID1 functionality. ··· 1147 1146 { 1148 1147 struct r10conf *conf = mddev->private; 1149 1148 struct bio *read_bio; 1150 - const enum req_op op = bio_op(bio); 1151 - const blk_opf_t do_sync = bio->bi_opf & REQ_SYNC; 1152 1149 int max_sectors; 1153 1150 struct md_rdev *rdev; 1154 1151 char b[BDEVNAME_SIZE]; ··· 1227 1228 read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr + 1228 1229 choose_data_offset(r10_bio, rdev); 1229 1230 read_bio->bi_end_io = raid10_end_read_request; 1230 - read_bio->bi_opf = op | do_sync; 1231 1231 if (test_bit(FailFast, &rdev->flags) && 1232 1232 test_bit(R10BIO_FailFast, &r10_bio->state)) 1233 1233 read_bio->bi_opf |= MD_FAILFAST; ··· 1245 1247 struct bio *bio, bool replacement, 1246 1248 int n_copy) 1247 1249 { 1248 - const enum req_op op = bio_op(bio); 1249 - const blk_opf_t do_sync = bio->bi_opf & REQ_SYNC; 1250 - const blk_opf_t do_fua = bio->bi_opf & REQ_FUA; 1251 - const blk_opf_t do_atomic = bio->bi_opf & REQ_ATOMIC; 1252 1250 unsigned long flags; 1253 1251 struct r10conf *conf = mddev->private; 1254 1252 struct md_rdev *rdev; ··· 1263 1269 mbio->bi_iter.bi_sector = (r10_bio->devs[n_copy].addr + 1264 1270 choose_data_offset(r10_bio, rdev)); 1265 1271 mbio->bi_end_io = raid10_end_write_request; 1266 - mbio->bi_opf = op | do_sync | do_fua | do_atomic; 1267 1272 if (!replacement && test_bit(FailFast, 1268 1273 &conf->mirrors[devnum].rdev->flags) 1269 1274 && enough(conf, devnum)) ··· 1348 1355 int error; 1349 1356 1350 1357 if ((mddev_is_clustered(mddev) && 1351 - md_cluster_ops->area_resyncing(mddev, WRITE, 1352 - bio->bi_iter.bi_sector, 1353 - bio_end_sector(bio)))) { 1358 + mddev->cluster_ops->area_resyncing(mddev, WRITE, 1359 + bio->bi_iter.bi_sector, 1360 + bio_end_sector(bio)))) { 1354 1361 DEFINE_WAIT(w); 1355 1362 /* Bail out if REQ_NOWAIT is set for the bio */ 1356 1363 if (bio->bi_opf & REQ_NOWAIT) { ··· 1360 1367 for (;;) { 1361 1368 prepare_to_wait(&conf->wait_barrier, 1362 1369 &w, TASK_IDLE); 1363 - if (!md_cluster_ops->area_resyncing(mddev, WRITE, 1370 + if (!mddev->cluster_ops->area_resyncing(mddev, WRITE, 1364 1371 bio->bi_iter.bi_sector, bio_end_sector(bio))) 1365 1372 break; 1366 1373 schedule(); ··· 1624 1631 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) 1625 1632 return -EAGAIN; 1626 1633 1627 - if (WARN_ON_ONCE(bio->bi_opf & REQ_NOWAIT)) { 1634 + if (!wait_barrier(conf, bio->bi_opf & REQ_NOWAIT)) { 1628 1635 bio_wouldblock_error(bio); 1629 1636 return 0; 1630 1637 } 1631 - wait_barrier(conf, false); 1632 1638 1633 1639 /* 1634 1640 * Check reshape again to avoid reshape happens after checking ··· 3708 3716 conf->cluster_sync_low = mddev->curr_resync_completed; 3709 3717 raid10_set_cluster_sync_high(conf); 3710 3718 /* Send resync message */ 3711 - md_cluster_ops->resync_info_update(mddev, 3719 + mddev->cluster_ops->resync_info_update(mddev, 3712 3720 conf->cluster_sync_low, 3713 3721 conf->cluster_sync_high); 3714 3722 } ··· 3741 3749 } 3742 3750 if (broadcast_msg) { 3743 3751 raid10_set_cluster_sync_high(conf); 3744 - md_cluster_ops->resync_info_update(mddev, 3752 + mddev->cluster_ops->resync_info_update(mddev, 3745 3753 conf->cluster_sync_low, 3746 3754 conf->cluster_sync_high); 3747 3755 } ··· 4535 4543 if (ret) 4536 4544 goto abort; 4537 4545 4538 - ret = md_cluster_ops->resize_bitmaps(mddev, newsize, oldsize); 4546 + ret = mddev->cluster_ops->resize_bitmaps(mddev, newsize, oldsize); 4539 4547 if (ret) { 4540 4548 mddev->bitmap_ops->resize(mddev, oldsize, 0, false); 4541 4549 goto abort; ··· 4826 4834 conf->cluster_sync_low = sb_reshape_pos; 4827 4835 } 4828 4836 4829 - md_cluster_ops->resync_info_update(mddev, conf->cluster_sync_low, 4837 + mddev->cluster_ops->resync_info_update(mddev, conf->cluster_sync_low, 4830 4838 conf->cluster_sync_high); 4831 4839 } 4832 4840 ··· 4971 4979 struct r10conf *conf = mddev->private; 4972 4980 sector_t lo, hi; 4973 4981 4974 - md_cluster_ops->resync_info_get(mddev, &lo, &hi); 4982 + mddev->cluster_ops->resync_info_get(mddev, &lo, &hi); 4975 4983 if (((mddev->reshape_position <= hi) && (mddev->reshape_position >= lo)) 4976 4984 || mddev->reshape_position == MaxSector) 4977 4985 conf->reshape_progress = mddev->reshape_position; ··· 5117 5125 5118 5126 static struct md_personality raid10_personality = 5119 5127 { 5120 - .name = "raid10", 5121 - .level = 10, 5122 - .owner = THIS_MODULE, 5128 + .head = { 5129 + .type = MD_PERSONALITY, 5130 + .id = ID_RAID10, 5131 + .name = "raid10", 5132 + .owner = THIS_MODULE, 5133 + }, 5134 + 5123 5135 .make_request = raid10_make_request, 5124 5136 .run = raid10_run, 5125 5137 .free = raid10_free, ··· 5143 5147 .update_reshape_pos = raid10_update_reshape_pos, 5144 5148 }; 5145 5149 5146 - static int __init raid_init(void) 5150 + static int __init raid10_init(void) 5147 5151 { 5148 - return register_md_personality(&raid10_personality); 5152 + return register_md_submodule(&raid10_personality.head); 5149 5153 } 5150 5154 5151 - static void raid_exit(void) 5155 + static void __exit raid10_exit(void) 5152 5156 { 5153 - unregister_md_personality(&raid10_personality); 5157 + unregister_md_submodule(&raid10_personality.head); 5154 5158 } 5155 5159 5156 - module_init(raid_init); 5157 - module_exit(raid_exit); 5160 + module_init(raid10_init); 5161 + module_exit(raid10_exit); 5158 5162 MODULE_LICENSE("GPL"); 5159 5163 MODULE_DESCRIPTION("RAID10 (striped mirror) personality for MD"); 5160 5164 MODULE_ALIAS("md-personality-9"); /* RAID10 */
+60 -31
drivers/md/raid5.c
··· 5858 5858 struct r5conf *conf, sector_t logical_sector) 5859 5859 { 5860 5860 sector_t reshape_progress, reshape_safe; 5861 + 5862 + if (likely(conf->reshape_progress == MaxSector)) 5863 + return LOC_NO_RESHAPE; 5861 5864 /* 5862 5865 * Spinlock is needed as reshape_progress may be 5863 5866 * 64bit on a 32bit platform, and so it might be ··· 5938 5935 const int rw = bio_data_dir(bi); 5939 5936 enum stripe_result ret; 5940 5937 struct stripe_head *sh; 5938 + enum reshape_loc loc; 5941 5939 sector_t new_sector; 5942 5940 int previous = 0, flags = 0; 5943 5941 int seq, dd_idx; 5944 5942 5945 5943 seq = read_seqcount_begin(&conf->gen_lock); 5946 - 5947 - if (unlikely(conf->reshape_progress != MaxSector)) { 5948 - enum reshape_loc loc = get_reshape_loc(mddev, conf, 5949 - logical_sector); 5950 - if (loc == LOC_INSIDE_RESHAPE) { 5951 - ret = STRIPE_SCHEDULE_AND_RETRY; 5952 - goto out; 5953 - } 5954 - if (loc == LOC_AHEAD_OF_RESHAPE) 5955 - previous = 1; 5944 + loc = get_reshape_loc(mddev, conf, logical_sector); 5945 + if (loc == LOC_INSIDE_RESHAPE) { 5946 + ret = STRIPE_SCHEDULE_AND_RETRY; 5947 + goto out; 5956 5948 } 5949 + if (loc == LOC_AHEAD_OF_RESHAPE) 5950 + previous = 1; 5957 5951 5958 5952 new_sector = raid5_compute_sector(conf, logical_sector, previous, 5959 5953 &dd_idx, NULL); ··· 6127 6127 6128 6128 /* Bail out if conflicts with reshape and REQ_NOWAIT is set */ 6129 6129 if ((bi->bi_opf & REQ_NOWAIT) && 6130 - (conf->reshape_progress != MaxSector) && 6131 6130 get_reshape_loc(mddev, conf, logical_sector) == LOC_INSIDE_RESHAPE) { 6132 6131 bio_wouldblock_error(bi); 6133 6132 if (rw == WRITE) ··· 8953 8954 8954 8955 static struct md_personality raid6_personality = 8955 8956 { 8956 - .name = "raid6", 8957 - .level = 6, 8958 - .owner = THIS_MODULE, 8957 + .head = { 8958 + .type = MD_PERSONALITY, 8959 + .id = ID_RAID6, 8960 + .name = "raid6", 8961 + .owner = THIS_MODULE, 8962 + }, 8963 + 8959 8964 .make_request = raid5_make_request, 8960 8965 .run = raid5_run, 8961 8966 .start = raid5_start, ··· 8983 8980 }; 8984 8981 static struct md_personality raid5_personality = 8985 8982 { 8986 - .name = "raid5", 8987 - .level = 5, 8988 - .owner = THIS_MODULE, 8983 + .head = { 8984 + .type = MD_PERSONALITY, 8985 + .id = ID_RAID5, 8986 + .name = "raid5", 8987 + .owner = THIS_MODULE, 8988 + }, 8989 + 8989 8990 .make_request = raid5_make_request, 8990 8991 .run = raid5_run, 8991 8992 .start = raid5_start, ··· 9014 9007 9015 9008 static struct md_personality raid4_personality = 9016 9009 { 9017 - .name = "raid4", 9018 - .level = 4, 9019 - .owner = THIS_MODULE, 9010 + .head = { 9011 + .type = MD_PERSONALITY, 9012 + .id = ID_RAID4, 9013 + .name = "raid4", 9014 + .owner = THIS_MODULE, 9015 + }, 9016 + 9020 9017 .make_request = raid5_make_request, 9021 9018 .run = raid5_run, 9022 9019 .start = raid5_start, ··· 9056 9045 "md/raid5:prepare", 9057 9046 raid456_cpu_up_prepare, 9058 9047 raid456_cpu_dead); 9059 - if (ret) { 9060 - destroy_workqueue(raid5_wq); 9061 - return ret; 9062 - } 9063 - register_md_personality(&raid6_personality); 9064 - register_md_personality(&raid5_personality); 9065 - register_md_personality(&raid4_personality); 9048 + if (ret) 9049 + goto err_destroy_wq; 9050 + 9051 + ret = register_md_submodule(&raid6_personality.head); 9052 + if (ret) 9053 + goto err_cpuhp_remove; 9054 + 9055 + ret = register_md_submodule(&raid5_personality.head); 9056 + if (ret) 9057 + goto err_unregister_raid6; 9058 + 9059 + ret = register_md_submodule(&raid4_personality.head); 9060 + if (ret) 9061 + goto err_unregister_raid5; 9062 + 9066 9063 return 0; 9064 + 9065 + err_unregister_raid5: 9066 + unregister_md_submodule(&raid5_personality.head); 9067 + err_unregister_raid6: 9068 + unregister_md_submodule(&raid6_personality.head); 9069 + err_cpuhp_remove: 9070 + cpuhp_remove_multi_state(CPUHP_MD_RAID5_PREPARE); 9071 + err_destroy_wq: 9072 + destroy_workqueue(raid5_wq); 9073 + return ret; 9067 9074 } 9068 9075 9069 - static void raid5_exit(void) 9076 + static void __exit raid5_exit(void) 9070 9077 { 9071 - unregister_md_personality(&raid6_personality); 9072 - unregister_md_personality(&raid5_personality); 9073 - unregister_md_personality(&raid4_personality); 9078 + unregister_md_submodule(&raid6_personality.head); 9079 + unregister_md_submodule(&raid5_personality.head); 9080 + unregister_md_submodule(&raid4_personality.head); 9074 9081 cpuhp_remove_multi_state(CPUHP_MD_RAID5_PREPARE); 9075 9082 destroy_workqueue(raid5_wq); 9076 9083 }