md: Remove deprecated CONFIG_MD_MULTIPATH

-11

drivers/md/Kconfig

··· 159 159 160 160 If unsure, say Y. 161 161 162 - config MD_MULTIPATH 163 - tristate "Multipath I/O support (deprecated)" 164 - depends on BLK_DEV_MD 165 - help 166 - MD_MULTIPATH provides a simple multi-path personality for use 167 - the MD framework. It is not under active development. New 168 - projects should consider using DM_MULTIPATH which has more 169 - features and more testing. 170 - 171 - If unsure, say N. 172 - 173 162 config MD_FAULTY 174 163 tristate "Faulty test module for MD (deprecated)" 175 164 depends on BLK_DEV_MD

-2

drivers/md/Makefile

··· 29 29 30 30 md-mod-y += md.o md-bitmap.o 31 31 raid456-y += raid5.o raid5-cache.o raid5-ppl.o 32 - multipath-y += md-multipath.o 33 32 faulty-y += md-faulty.o 34 33 35 34 # Note: link order is important. All raid personalities ··· 40 41 obj-$(CONFIG_MD_RAID1) += raid1.o 41 42 obj-$(CONFIG_MD_RAID10) += raid10.o 42 43 obj-$(CONFIG_MD_RAID456) += raid456.o 43 - obj-$(CONFIG_MD_MULTIPATH) += multipath.o 44 44 obj-$(CONFIG_MD_FAULTY) += faulty.o 45 45 obj-$(CONFIG_MD_CLUSTER) += md-cluster.o 46 46 obj-$(CONFIG_BCACHE) += bcache/

-463

drivers/md/md-multipath.c

··· 1 - // SPDX-License-Identifier: GPL-2.0-or-later 2 - /* 3 - * multipath.c : Multiple Devices driver for Linux 4 - * 5 - * Copyright (C) 1999, 2000, 2001 Ingo Molnar, Red Hat 6 - * 7 - * Copyright (C) 1996, 1997, 1998 Ingo Molnar, Miguel de Icaza, Gadi Oxman 8 - * 9 - * MULTIPATH management functions. 10 - * 11 - * derived from raid1.c. 12 - */ 13 - 14 - #include <linux/blkdev.h> 15 - #include <linux/module.h> 16 - #include <linux/raid/md_u.h> 17 - #include <linux/seq_file.h> 18 - #include <linux/slab.h> 19 - #include "md.h" 20 - #include "md-multipath.h" 21 - 22 - #define MAX_WORK_PER_DISK 128 23 - 24 - #define NR_RESERVED_BUFS 32 25 - 26 - static int multipath_map (struct mpconf *conf) 27 - { 28 - int i, disks = conf->raid_disks; 29 - 30 - /* 31 - * Later we do read balancing on the read side 32 - * now we use the first available disk. 33 - */ 34 - 35 - for (i = 0; i < disks; i++) { 36 - struct md_rdev *rdev = conf->multipaths[i].rdev; 37 - 38 - if (rdev && test_bit(In_sync, &rdev->flags) && 39 - !test_bit(Faulty, &rdev->flags)) { 40 - atomic_inc(&rdev->nr_pending); 41 - return i; 42 - } 43 - } 44 - 45 - pr_crit_ratelimited("multipath_map(): no more operational IO paths?\n"); 46 - return (-1); 47 - } 48 - 49 - static void multipath_reschedule_retry (struct multipath_bh *mp_bh) 50 - { 51 - unsigned long flags; 52 - struct mddev *mddev = mp_bh->mddev; 53 - struct mpconf *conf = mddev->private; 54 - 55 - spin_lock_irqsave(&conf->device_lock, flags); 56 - list_add(&mp_bh->retry_list, &conf->retry_list); 57 - spin_unlock_irqrestore(&conf->device_lock, flags); 58 - md_wakeup_thread(mddev->thread); 59 - } 60 - 61 - /* 62 - * multipath_end_bh_io() is called when we have finished servicing a multipathed 63 - * operation and are ready to return a success/failure code to the buffer 64 - * cache layer. 65 - */ 66 - static void multipath_end_bh_io(struct multipath_bh *mp_bh, blk_status_t status) 67 - { 68 - struct bio *bio = mp_bh->master_bio; 69 - struct mpconf *conf = mp_bh->mddev->private; 70 - 71 - bio->bi_status = status; 72 - bio_endio(bio); 73 - mempool_free(mp_bh, &conf->pool); 74 - } 75 - 76 - static void multipath_end_request(struct bio *bio) 77 - { 78 - struct multipath_bh *mp_bh = bio->bi_private; 79 - struct mpconf *conf = mp_bh->mddev->private; 80 - struct md_rdev *rdev = conf->multipaths[mp_bh->path].rdev; 81 - 82 - if (!bio->bi_status) 83 - multipath_end_bh_io(mp_bh, 0); 84 - else if (!(bio->bi_opf & REQ_RAHEAD)) { 85 - /* 86 - * oops, IO error: 87 - */ 88 - md_error (mp_bh->mddev, rdev); 89 - pr_info("multipath: %pg: rescheduling sector %llu\n", 90 - rdev->bdev, 91 - (unsigned long long)bio->bi_iter.bi_sector); 92 - multipath_reschedule_retry(mp_bh); 93 - } else 94 - multipath_end_bh_io(mp_bh, bio->bi_status); 95 - rdev_dec_pending(rdev, conf->mddev); 96 - } 97 - 98 - static bool multipath_make_request(struct mddev *mddev, struct bio * bio) 99 - { 100 - struct mpconf *conf = mddev->private; 101 - struct multipath_bh * mp_bh; 102 - struct multipath_info *multipath; 103 - 104 - if (unlikely(bio->bi_opf & REQ_PREFLUSH) 105 - && md_flush_request(mddev, bio)) 106 - return true; 107 - 108 - md_account_bio(mddev, &bio); 109 - mp_bh = mempool_alloc(&conf->pool, GFP_NOIO); 110 - 111 - mp_bh->master_bio = bio; 112 - mp_bh->mddev = mddev; 113 - 114 - mp_bh->path = multipath_map(conf); 115 - if (mp_bh->path < 0) { 116 - bio_io_error(bio); 117 - mempool_free(mp_bh, &conf->pool); 118 - return true; 119 - } 120 - multipath = conf->multipaths + mp_bh->path; 121 - 122 - bio_init_clone(multipath->rdev->bdev, &mp_bh->bio, bio, GFP_NOIO); 123 - 124 - mp_bh->bio.bi_iter.bi_sector += multipath->rdev->data_offset; 125 - mp_bh->bio.bi_opf |= REQ_FAILFAST_TRANSPORT; 126 - mp_bh->bio.bi_end_io = multipath_end_request; 127 - mp_bh->bio.bi_private = mp_bh; 128 - mddev_check_write_zeroes(mddev, &mp_bh->bio); 129 - submit_bio_noacct(&mp_bh->bio); 130 - return true; 131 - } 132 - 133 - static void multipath_status(struct seq_file *seq, struct mddev *mddev) 134 - { 135 - struct mpconf *conf = mddev->private; 136 - int i; 137 - 138 - lockdep_assert_held(&mddev->lock); 139 - 140 - seq_printf (seq, " [%d/%d] [", conf->raid_disks, 141 - conf->raid_disks - mddev->degraded); 142 - for (i = 0; i < conf->raid_disks; i++) { 143 - struct md_rdev *rdev = READ_ONCE(conf->multipaths[i].rdev); 144 - 145 - seq_printf(seq, "%s", 146 - rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_"); 147 - } 148 - seq_putc(seq, ']'); 149 - } 150 - 151 - /* 152 - * Careful, this can execute in IRQ contexts as well! 153 - */ 154 - static void multipath_error (struct mddev *mddev, struct md_rdev *rdev) 155 - { 156 - struct mpconf *conf = mddev->private; 157 - 158 - if (conf->raid_disks - mddev->degraded <= 1) { 159 - /* 160 - * Uh oh, we can do nothing if this is our last path, but 161 - * first check if this is a queued request for a device 162 - * which has just failed. 163 - */ 164 - pr_warn("multipath: only one IO path left and IO error.\n"); 165 - /* leave it active... it's all we have */ 166 - return; 167 - } 168 - /* 169 - * Mark disk as unusable 170 - */ 171 - if (test_and_clear_bit(In_sync, &rdev->flags)) { 172 - unsigned long flags; 173 - spin_lock_irqsave(&conf->device_lock, flags); 174 - mddev->degraded++; 175 - spin_unlock_irqrestore(&conf->device_lock, flags); 176 - } 177 - set_bit(Faulty, &rdev->flags); 178 - set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); 179 - pr_err("multipath: IO failure on %pg, disabling IO path.\n" 180 - "multipath: Operation continuing on %d IO paths.\n", 181 - rdev->bdev, 182 - conf->raid_disks - mddev->degraded); 183 - } 184 - 185 - static void print_multipath_conf(struct mpconf *conf) 186 - { 187 - int i; 188 - struct multipath_info *tmp; 189 - 190 - pr_debug("MULTIPATH conf printout:\n"); 191 - if (!conf) { 192 - pr_debug("(conf==NULL)\n"); 193 - return; 194 - } 195 - pr_debug(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded, 196 - conf->raid_disks); 197 - 198 - lockdep_assert_held(&conf->mddev->reconfig_mutex); 199 - for (i = 0; i < conf->raid_disks; i++) { 200 - tmp = conf->multipaths + i; 201 - if (tmp->rdev) 202 - pr_debug(" disk%d, o:%d, dev:%pg\n", 203 - i,!test_bit(Faulty, &tmp->rdev->flags), 204 - tmp->rdev->bdev); 205 - } 206 - } 207 - 208 - static int multipath_add_disk(struct mddev *mddev, struct md_rdev *rdev) 209 - { 210 - struct mpconf *conf = mddev->private; 211 - int err = -EEXIST; 212 - int path; 213 - struct multipath_info *p; 214 - int first = 0; 215 - int last = mddev->raid_disks - 1; 216 - 217 - if (rdev->raid_disk >= 0) 218 - first = last = rdev->raid_disk; 219 - 220 - print_multipath_conf(conf); 221 - 222 - for (path = first; path <= last; path++) 223 - if ((p=conf->multipaths+path)->rdev == NULL) { 224 - disk_stack_limits(mddev->gendisk, rdev->bdev, 225 - rdev->data_offset << 9); 226 - 227 - err = md_integrity_add_rdev(rdev, mddev); 228 - if (err) 229 - break; 230 - spin_lock_irq(&conf->device_lock); 231 - mddev->degraded--; 232 - rdev->raid_disk = path; 233 - set_bit(In_sync, &rdev->flags); 234 - spin_unlock_irq(&conf->device_lock); 235 - WRITE_ONCE(p->rdev, rdev); 236 - err = 0; 237 - break; 238 - } 239 - 240 - print_multipath_conf(conf); 241 - 242 - return err; 243 - } 244 - 245 - static int multipath_remove_disk(struct mddev *mddev, struct md_rdev *rdev) 246 - { 247 - struct mpconf *conf = mddev->private; 248 - int err = 0; 249 - int number = rdev->raid_disk; 250 - struct multipath_info *p = conf->multipaths + number; 251 - 252 - print_multipath_conf(conf); 253 - 254 - if (rdev == p->rdev) { 255 - if (test_bit(In_sync, &rdev->flags) || 256 - atomic_read(&rdev->nr_pending)) { 257 - pr_warn("hot-remove-disk, slot %d is identified but is still operational!\n", number); 258 - err = -EBUSY; 259 - goto abort; 260 - } 261 - WRITE_ONCE(p->rdev, NULL); 262 - err = md_integrity_register(mddev); 263 - } 264 - abort: 265 - 266 - print_multipath_conf(conf); 267 - return err; 268 - } 269 - 270 - /* 271 - * This is a kernel thread which: 272 - * 273 - * 1. Retries failed read operations on working multipaths. 274 - * 2. Updates the raid superblock when problems encounter. 275 - * 3. Performs writes following reads for array syncronising. 276 - */ 277 - 278 - static void multipathd(struct md_thread *thread) 279 - { 280 - struct mddev *mddev = thread->mddev; 281 - struct multipath_bh *mp_bh; 282 - struct bio *bio; 283 - unsigned long flags; 284 - struct mpconf *conf = mddev->private; 285 - struct list_head *head = &conf->retry_list; 286 - 287 - md_check_recovery(mddev); 288 - for (;;) { 289 - spin_lock_irqsave(&conf->device_lock, flags); 290 - if (list_empty(head)) 291 - break; 292 - mp_bh = list_entry(head->prev, struct multipath_bh, retry_list); 293 - list_del(head->prev); 294 - spin_unlock_irqrestore(&conf->device_lock, flags); 295 - 296 - bio = &mp_bh->bio; 297 - bio->bi_iter.bi_sector = mp_bh->master_bio->bi_iter.bi_sector; 298 - 299 - if ((mp_bh->path = multipath_map (conf))<0) { 300 - pr_err("multipath: %pg: unrecoverable IO read error for block %llu\n", 301 - bio->bi_bdev, 302 - (unsigned long long)bio->bi_iter.bi_sector); 303 - multipath_end_bh_io(mp_bh, BLK_STS_IOERR); 304 - } else { 305 - pr_err("multipath: %pg: redirecting sector %llu to another IO path\n", 306 - bio->bi_bdev, 307 - (unsigned long long)bio->bi_iter.bi_sector); 308 - *bio = *(mp_bh->master_bio); 309 - bio->bi_iter.bi_sector += 310 - conf->multipaths[mp_bh->path].rdev->data_offset; 311 - bio_set_dev(bio, conf->multipaths[mp_bh->path].rdev->bdev); 312 - bio->bi_opf |= REQ_FAILFAST_TRANSPORT; 313 - bio->bi_end_io = multipath_end_request; 314 - bio->bi_private = mp_bh; 315 - submit_bio_noacct(bio); 316 - } 317 - } 318 - spin_unlock_irqrestore(&conf->device_lock, flags); 319 - } 320 - 321 - static sector_t multipath_size(struct mddev *mddev, sector_t sectors, int raid_disks) 322 - { 323 - WARN_ONCE(sectors || raid_disks, 324 - "%s does not support generic reshape\n", __func__); 325 - 326 - return mddev->dev_sectors; 327 - } 328 - 329 - static int multipath_run (struct mddev *mddev) 330 - { 331 - struct mpconf *conf; 332 - int disk_idx; 333 - struct multipath_info *disk; 334 - struct md_rdev *rdev; 335 - int working_disks; 336 - int ret; 337 - 338 - if (md_check_no_bitmap(mddev)) 339 - return -EINVAL; 340 - 341 - if (mddev->level != LEVEL_MULTIPATH) { 342 - pr_warn("multipath: %s: raid level not set to multipath IO (%d)\n", 343 - mdname(mddev), mddev->level); 344 - goto out; 345 - } 346 - /* 347 - * copy the already verified devices into our private MULTIPATH 348 - * bookkeeping area. [whatever we allocate in multipath_run(), 349 - * should be freed in multipath_free()] 350 - */ 351 - 352 - conf = kzalloc(sizeof(struct mpconf), GFP_KERNEL); 353 - mddev->private = conf; 354 - if (!conf) 355 - goto out; 356 - 357 - conf->multipaths = kcalloc(mddev->raid_disks, 358 - sizeof(struct multipath_info), 359 - GFP_KERNEL); 360 - if (!conf->multipaths) 361 - goto out_free_conf; 362 - 363 - working_disks = 0; 364 - rdev_for_each(rdev, mddev) { 365 - disk_idx = rdev->raid_disk; 366 - if (disk_idx < 0 || 367 - disk_idx >= mddev->raid_disks) 368 - continue; 369 - 370 - disk = conf->multipaths + disk_idx; 371 - disk->rdev = rdev; 372 - disk_stack_limits(mddev->gendisk, rdev->bdev, 373 - rdev->data_offset << 9); 374 - 375 - if (!test_bit(Faulty, &rdev->flags)) 376 - working_disks++; 377 - } 378 - 379 - conf->raid_disks = mddev->raid_disks; 380 - conf->mddev = mddev; 381 - spin_lock_init(&conf->device_lock); 382 - INIT_LIST_HEAD(&conf->retry_list); 383 - 384 - if (!working_disks) { 385 - pr_warn("multipath: no operational IO paths for %s\n", 386 - mdname(mddev)); 387 - goto out_free_conf; 388 - } 389 - mddev->degraded = conf->raid_disks - working_disks; 390 - 391 - ret = mempool_init_kmalloc_pool(&conf->pool, NR_RESERVED_BUFS, 392 - sizeof(struct multipath_bh)); 393 - if (ret) 394 - goto out_free_conf; 395 - 396 - rcu_assign_pointer(mddev->thread, 397 - md_register_thread(multipathd, mddev, "multipath")); 398 - if (!mddev->thread) 399 - goto out_free_conf; 400 - 401 - pr_info("multipath: array %s active with %d out of %d IO paths\n", 402 - mdname(mddev), conf->raid_disks - mddev->degraded, 403 - mddev->raid_disks); 404 - /* 405 - * Ok, everything is just fine now 406 - */ 407 - md_set_array_sectors(mddev, multipath_size(mddev, 0, 0)); 408 - 409 - if (md_integrity_register(mddev)) 410 - goto out_free_conf; 411 - 412 - return 0; 413 - 414 - out_free_conf: 415 - mempool_exit(&conf->pool); 416 - kfree(conf->multipaths); 417 - kfree(conf); 418 - mddev->private = NULL; 419 - out: 420 - return -EIO; 421 - } 422 - 423 - static void multipath_free(struct mddev *mddev, void *priv) 424 - { 425 - struct mpconf *conf = priv; 426 - 427 - mempool_exit(&conf->pool); 428 - kfree(conf->multipaths); 429 - kfree(conf); 430 - } 431 - 432 - static struct md_personality multipath_personality = 433 - { 434 - .name = "multipath", 435 - .level = LEVEL_MULTIPATH, 436 - .owner = THIS_MODULE, 437 - .make_request = multipath_make_request, 438 - .run = multipath_run, 439 - .free = multipath_free, 440 - .status = multipath_status, 441 - .error_handler = multipath_error, 442 - .hot_add_disk = multipath_add_disk, 443 - .hot_remove_disk= multipath_remove_disk, 444 - .size = multipath_size, 445 - }; 446 - 447 - static int __init multipath_init (void) 448 - { 449 - return register_md_personality (&multipath_personality); 450 - } 451 - 452 - static void __exit multipath_exit (void) 453 - { 454 - unregister_md_personality (&multipath_personality); 455 - } 456 - 457 - module_init(multipath_init); 458 - module_exit(multipath_exit); 459 - MODULE_LICENSE("GPL"); 460 - MODULE_DESCRIPTION("simple multi-path personality for MD (deprecated)"); 461 - MODULE_ALIAS("md-personality-7"); /* MULTIPATH */ 462 - MODULE_ALIAS("md-multipath"); 463 - MODULE_ALIAS("md-level--4");

+107 -132

drivers/md/md.c

··· 1287 1287 rdev->sb_size = MD_SB_BYTES; 1288 1288 rdev->badblocks.shift = -1; 1289 1289 1290 - if (sb->level == LEVEL_MULTIPATH) 1291 - rdev->desc_nr = -1; 1292 - else 1293 - rdev->desc_nr = sb->this_disk.number; 1290 + rdev->desc_nr = sb->this_disk.number; 1294 1291 1295 - /* not spare disk, or LEVEL_MULTIPATH */ 1296 - if (sb->level == LEVEL_MULTIPATH || 1297 - (rdev->desc_nr >= 0 && 1298 - rdev->desc_nr < MD_SB_DISKS && 1299 - sb->disks[rdev->desc_nr].state & 1300 - ((1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE)))) 1292 + /* not spare disk */ 1293 + if (rdev->desc_nr >= 0 && rdev->desc_nr < MD_SB_DISKS && 1294 + sb->disks[rdev->desc_nr].state & ((1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE))) 1301 1295 spare_disk = false; 1302 1296 1303 1297 if (!refdev) { ··· 1438 1444 return 0; 1439 1445 } 1440 1446 1441 - if (mddev->level != LEVEL_MULTIPATH) { 1442 - desc = sb->disks + rdev->desc_nr; 1447 + desc = sb->disks + rdev->desc_nr; 1443 1448 1444 - if (desc->state & (1<<MD_DISK_FAULTY)) 1445 - set_bit(Faulty, &rdev->flags); 1446 - else if (desc->state & (1<<MD_DISK_SYNC) /* && 1447 - desc->raid_disk < mddev->raid_disks */) { 1448 - set_bit(In_sync, &rdev->flags); 1449 - rdev->raid_disk = desc->raid_disk; 1450 - rdev->saved_raid_disk = desc->raid_disk; 1451 - } else if (desc->state & (1<<MD_DISK_ACTIVE)) { 1452 - /* active but not in sync implies recovery up to 1453 - * reshape position. We don't know exactly where 1454 - * that is, so set to zero for now */ 1455 - if (mddev->minor_version >= 91) { 1456 - rdev->recovery_offset = 0; 1457 - rdev->raid_disk = desc->raid_disk; 1458 - } 1459 - } 1460 - if (desc->state & (1<<MD_DISK_WRITEMOSTLY)) 1461 - set_bit(WriteMostly, &rdev->flags); 1462 - if (desc->state & (1<<MD_DISK_FAILFAST)) 1463 - set_bit(FailFast, &rdev->flags); 1464 - } else /* MULTIPATH are always insync */ 1449 + if (desc->state & (1<<MD_DISK_FAULTY)) 1450 + set_bit(Faulty, &rdev->flags); 1451 + else if (desc->state & (1<<MD_DISK_SYNC)) { 1465 1452 set_bit(In_sync, &rdev->flags); 1453 + rdev->raid_disk = desc->raid_disk; 1454 + rdev->saved_raid_disk = desc->raid_disk; 1455 + } else if (desc->state & (1<<MD_DISK_ACTIVE)) { 1456 + /* active but not in sync implies recovery up to 1457 + * reshape position. We don't know exactly where 1458 + * that is, so set to zero for now 1459 + */ 1460 + if (mddev->minor_version >= 91) { 1461 + rdev->recovery_offset = 0; 1462 + rdev->raid_disk = desc->raid_disk; 1463 + } 1464 + } 1465 + if (desc->state & (1<<MD_DISK_WRITEMOSTLY)) 1466 + set_bit(WriteMostly, &rdev->flags); 1467 + if (desc->state & (1<<MD_DISK_FAILFAST)) 1468 + set_bit(FailFast, &rdev->flags); 1466 1469 return 0; 1467 1470 } 1468 1471 ··· 1749 1758 && rdev->new_data_offset < sb_start + (rdev->sb_size/512)) 1750 1759 return -EINVAL; 1751 1760 1752 - if (sb->level == cpu_to_le32(LEVEL_MULTIPATH)) 1753 - rdev->desc_nr = -1; 1754 - else 1755 - rdev->desc_nr = le32_to_cpu(sb->dev_number); 1761 + rdev->desc_nr = le32_to_cpu(sb->dev_number); 1756 1762 1757 1763 if (!rdev->bb_page) { 1758 1764 rdev->bb_page = alloc_page(GFP_KERNEL); ··· 1802 1814 sb->level != 0) 1803 1815 return -EINVAL; 1804 1816 1805 - /* not spare disk, or LEVEL_MULTIPATH */ 1806 - if (sb->level == cpu_to_le32(LEVEL_MULTIPATH) || 1807 - (rdev->desc_nr >= 0 && 1808 - rdev->desc_nr < le32_to_cpu(sb->max_dev) && 1809 - (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX || 1810 - le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL))) 1817 + /* not spare disk */ 1818 + if (rdev->desc_nr >= 0 && rdev->desc_nr < le32_to_cpu(sb->max_dev) && 1819 + (le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX || 1820 + le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL)) 1811 1821 spare_disk = false; 1812 1822 1813 1823 if (!refdev) { ··· 1848 1862 { 1849 1863 struct mdp_superblock_1 *sb = page_address(rdev->sb_page); 1850 1864 __u64 ev1 = le64_to_cpu(sb->events); 1865 + int role; 1851 1866 1852 1867 rdev->raid_disk = -1; 1853 1868 clear_bit(Faulty, &rdev->flags); ··· 1964 1977 /* just a hot-add of a new device, leave raid_disk at -1 */ 1965 1978 return 0; 1966 1979 } 1967 - if (mddev->level != LEVEL_MULTIPATH) { 1968 - int role; 1969 - if (rdev->desc_nr < 0 || 1970 - rdev->desc_nr >= le32_to_cpu(sb->max_dev)) { 1971 - role = MD_DISK_ROLE_SPARE; 1972 - rdev->desc_nr = -1; 1973 - } else if (mddev->pers == NULL && freshest && ev1 < mddev->events) { 1974 - /* 1975 - * If we are assembling, and our event counter is smaller than the 1976 - * highest event counter, we cannot trust our superblock about the role. 1977 - * It could happen that our rdev was marked as Faulty, and all other 1978 - * superblocks were updated with +1 event counter. 1979 - * Then, before the next superblock update, which typically happens when 1980 - * remove_and_add_spares() removes the device from the array, there was 1981 - * a crash or reboot. 1982 - * If we allow current rdev without consulting the freshest superblock, 1983 - * we could cause data corruption. 1984 - * Note that in this case our event counter is smaller by 1 than the 1985 - * highest, otherwise, this rdev would not be allowed into array; 1986 - * both kernel and mdadm allow event counter difference of 1. 1987 - */ 1988 - struct mdp_superblock_1 *freshest_sb = page_address(freshest->sb_page); 1989 - u32 freshest_max_dev = le32_to_cpu(freshest_sb->max_dev); 1990 1980 1991 - if (rdev->desc_nr >= freshest_max_dev) { 1992 - /* this is unexpected, better not proceed */ 1993 - pr_warn("md: %s: rdev[%pg]: desc_nr(%d) >= freshest(%pg)->sb->max_dev(%u)\n", 1994 - mdname(mddev), rdev->bdev, rdev->desc_nr, 1995 - freshest->bdev, freshest_max_dev); 1996 - return -EUCLEAN; 1997 - } 1981 + if (rdev->desc_nr < 0 || 1982 + rdev->desc_nr >= le32_to_cpu(sb->max_dev)) { 1983 + role = MD_DISK_ROLE_SPARE; 1984 + rdev->desc_nr = -1; 1985 + } else if (mddev->pers == NULL && freshest && ev1 < mddev->events) { 1986 + /* 1987 + * If we are assembling, and our event counter is smaller than the 1988 + * highest event counter, we cannot trust our superblock about the role. 1989 + * It could happen that our rdev was marked as Faulty, and all other 1990 + * superblocks were updated with +1 event counter. 1991 + * Then, before the next superblock update, which typically happens when 1992 + * remove_and_add_spares() removes the device from the array, there was 1993 + * a crash or reboot. 1994 + * If we allow current rdev without consulting the freshest superblock, 1995 + * we could cause data corruption. 1996 + * Note that in this case our event counter is smaller by 1 than the 1997 + * highest, otherwise, this rdev would not be allowed into array; 1998 + * both kernel and mdadm allow event counter difference of 1. 1999 + */ 2000 + struct mdp_superblock_1 *freshest_sb = page_address(freshest->sb_page); 2001 + u32 freshest_max_dev = le32_to_cpu(freshest_sb->max_dev); 1998 2002 1999 - role = le16_to_cpu(freshest_sb->dev_roles[rdev->desc_nr]); 2000 - pr_debug("md: %s: rdev[%pg]: role=%d(0x%x) according to freshest %pg\n", 2001 - mdname(mddev), rdev->bdev, role, role, freshest->bdev); 2003 + if (rdev->desc_nr >= freshest_max_dev) { 2004 + /* this is unexpected, better not proceed */ 2005 + pr_warn("md: %s: rdev[%pg]: desc_nr(%d) >= freshest(%pg)->sb->max_dev(%u)\n", 2006 + mdname(mddev), rdev->bdev, rdev->desc_nr, 2007 + freshest->bdev, freshest_max_dev); 2008 + return -EUCLEAN; 2009 + } 2010 + 2011 + role = le16_to_cpu(freshest_sb->dev_roles[rdev->desc_nr]); 2012 + pr_debug("md: %s: rdev[%pg]: role=%d(0x%x) according to freshest %pg\n", 2013 + mdname(mddev), rdev->bdev, role, role, freshest->bdev); 2014 + } else { 2015 + role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); 2016 + } 2017 + switch (role) { 2018 + case MD_DISK_ROLE_SPARE: /* spare */ 2019 + break; 2020 + case MD_DISK_ROLE_FAULTY: /* faulty */ 2021 + set_bit(Faulty, &rdev->flags); 2022 + break; 2023 + case MD_DISK_ROLE_JOURNAL: /* journal device */ 2024 + if (!(le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)) { 2025 + /* journal device without journal feature */ 2026 + pr_warn("md: journal device provided without journal feature, ignoring the device\n"); 2027 + return -EINVAL; 2028 + } 2029 + set_bit(Journal, &rdev->flags); 2030 + rdev->journal_tail = le64_to_cpu(sb->journal_tail); 2031 + rdev->raid_disk = 0; 2032 + break; 2033 + default: 2034 + rdev->saved_raid_disk = role; 2035 + if ((le32_to_cpu(sb->feature_map) & 2036 + MD_FEATURE_RECOVERY_OFFSET)) { 2037 + rdev->recovery_offset = le64_to_cpu(sb->recovery_offset); 2038 + if (!(le32_to_cpu(sb->feature_map) & 2039 + MD_FEATURE_RECOVERY_BITMAP)) 2040 + rdev->saved_raid_disk = -1; 2002 2041 } else { 2003 - role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); 2042 + /* 2043 + * If the array is FROZEN, then the device can't 2044 + * be in_sync with rest of array. 2045 + */ 2046 + if (!test_bit(MD_RECOVERY_FROZEN, 2047 + &mddev->recovery)) 2048 + set_bit(In_sync, &rdev->flags); 2004 2049 } 2005 - switch(role) { 2006 - case MD_DISK_ROLE_SPARE: /* spare */ 2007 - break; 2008 - case MD_DISK_ROLE_FAULTY: /* faulty */ 2009 - set_bit(Faulty, &rdev->flags); 2010 - break; 2011 - case MD_DISK_ROLE_JOURNAL: /* journal device */ 2012 - if (!(le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)) { 2013 - /* journal device without journal feature */ 2014 - pr_warn("md: journal device provided without journal feature, ignoring the device\n"); 2015 - return -EINVAL; 2016 - } 2017 - set_bit(Journal, &rdev->flags); 2018 - rdev->journal_tail = le64_to_cpu(sb->journal_tail); 2019 - rdev->raid_disk = 0; 2020 - break; 2021 - default: 2022 - rdev->saved_raid_disk = role; 2023 - if ((le32_to_cpu(sb->feature_map) & 2024 - MD_FEATURE_RECOVERY_OFFSET)) { 2025 - rdev->recovery_offset = le64_to_cpu(sb->recovery_offset); 2026 - if (!(le32_to_cpu(sb->feature_map) & 2027 - MD_FEATURE_RECOVERY_BITMAP)) 2028 - rdev->saved_raid_disk = -1; 2029 - } else { 2030 - /* 2031 - * If the array is FROZEN, then the device can't 2032 - * be in_sync with rest of array. 2033 - */ 2034 - if (!test_bit(MD_RECOVERY_FROZEN, 2035 - &mddev->recovery)) 2036 - set_bit(In_sync, &rdev->flags); 2037 - } 2038 - rdev->raid_disk = role; 2039 - break; 2040 - } 2041 - if (sb->devflags & WriteMostly1) 2042 - set_bit(WriteMostly, &rdev->flags); 2043 - if (sb->devflags & FailFast1) 2044 - set_bit(FailFast, &rdev->flags); 2045 - if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT) 2046 - set_bit(Replacement, &rdev->flags); 2047 - } else /* MULTIPATH are always insync */ 2048 - set_bit(In_sync, &rdev->flags); 2050 + rdev->raid_disk = role; 2051 + break; 2052 + } 2053 + if (sb->devflags & WriteMostly1) 2054 + set_bit(WriteMostly, &rdev->flags); 2055 + if (sb->devflags & FailFast1) 2056 + set_bit(FailFast, &rdev->flags); 2057 + if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT) 2058 + set_bit(Replacement, &rdev->flags); 2049 2059 2050 2060 return 0; 2051 2061 } ··· 2860 2876 } else 2861 2877 pr_debug("md: %pg (skipping faulty)\n", 2862 2878 rdev->bdev); 2863 - 2864 - if (mddev->level == LEVEL_MULTIPATH) 2865 - /* only need to write one superblock... */ 2866 - break; 2867 2879 } 2868 2880 if (md_super_wait(mddev) < 0) 2869 2881 goto rewrite; ··· 3860 3880 continue; 3861 3881 } 3862 3882 } 3863 - if (mddev->level == LEVEL_MULTIPATH) { 3864 - rdev->desc_nr = i++; 3865 - rdev->raid_disk = rdev->desc_nr; 3866 - set_bit(In_sync, &rdev->flags); 3867 - } else if (rdev->raid_disk >= 3868 - (mddev->raid_disks - min(0, mddev->delta_disks)) && 3869 - !test_bit(Journal, &rdev->flags)) { 3883 + if (rdev->raid_disk >= (mddev->raid_disks - min(0, mddev->delta_disks)) && 3884 + !test_bit(Journal, &rdev->flags)) { 3870 3885 rdev->raid_disk = -1; 3871 3886 clear_bit(In_sync, &rdev->flags); 3872 3887 }

+1 -1

include/uapi/linux/raid/md_p.h

··· 233 233 char set_name[32]; /* set and interpreted by user-space */ 234 234 235 235 __le64 ctime; /* lo 40 bits are seconds, top 24 are microseconds or 0*/ 236 - __le32 level; /* -4 (multipath), 0,1,4,5 */ 236 + __le32 level; /* 0,1,4,5 */ 237 237 __le32 layout; /* only for raid5 and raid10 currently */ 238 238 __le64 size; /* used size of component devices, in 512byte sectors */ 239 239

-1

include/uapi/linux/raid/md_u.h

··· 104 104 } mdu_array_info_t; 105 105 106 106 /* non-obvious values for 'level' */ 107 - #define LEVEL_MULTIPATH (-4) 108 107 #define LEVEL_FAULTY (-5) 109 108 110 109 /* we need a value for 'no level specified' and 0