Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

md: reintroduce md-linear

THe md-linear is removed by commit 849d18e27be9 ("md: Remove deprecated
CONFIG_MD_LINEAR") because it has been marked as deprecated for a long
time.

However, md-linear is used widely for underlying disks with different size,
sadly we didn't know this until now, and it's true useful to create
partitions and assemble multiple raid and then append one to the other.

People have to use dm-linear in this case now, however, they will prefer
to minimize the number of involved modules.

Fixes: 849d18e27be9 ("md: Remove deprecated CONFIG_MD_LINEAR")
Cc: stable@vger.kernel.org
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Acked-by: Coly Li <colyli@kernel.org>
Acked-by: Mike Snitzer <snitzer@kernel.org>
Link: https://lore.kernel.org/r/20250102112841.1227111-1-yukuai1@huaweicloud.com
Signed-off-by: Song Liu <song@kernel.org>

authored by

Yu Kuai and committed by
Song Liu
127186cf e494e451

+379 -4
+13
drivers/md/Kconfig
··· 61 61 various kernel APIs and can only work with files on a file system not 62 62 actually sitting on the MD device. 63 63 64 + config MD_LINEAR 65 + tristate "Linear (append) mode" 66 + depends on BLK_DEV_MD 67 + help 68 + If you say Y here, then your multiple devices driver will be able to 69 + use the so-called linear mode, i.e. it will combine the hard disk 70 + partitions by simply appending one to the other. 71 + 72 + To compile this as a module, choose M here: the module 73 + will be called linear. 74 + 75 + If unsure, say Y. 76 + 64 77 config MD_RAID0 65 78 tristate "RAID-0 (striping) mode" 66 79 depends on BLK_DEV_MD
+2
drivers/md/Makefile
··· 29 29 30 30 md-mod-y += md.o md-bitmap.o 31 31 raid456-y += raid5.o raid5-cache.o raid5-ppl.o 32 + linear-y += md-linear.o 32 33 33 34 # Note: link order is important. All raid personalities 34 35 # and must come before md.o, as they each initialise 35 36 # themselves, and md.o may use the personalities when it 36 37 # auto-initialised. 37 38 39 + obj-$(CONFIG_MD_LINEAR) += linear.o 38 40 obj-$(CONFIG_MD_RAID0) += raid0.o 39 41 obj-$(CONFIG_MD_RAID1) += raid1.o 40 42 obj-$(CONFIG_MD_RAID10) += raid10.o
+6 -2
drivers/md/md-autodetect.c
··· 49 49 * instead of just one. -- KTK 50 50 * 18May2000: Added support for persistent-superblock arrays: 51 51 * md=n,0,factor,fault,device-list uses RAID0 for device n 52 + * md=n,-1,factor,fault,device-list uses LINEAR for device n 52 53 * md=n,device-list reads a RAID superblock from the devices 53 54 * elements in device-list are read by name_to_kdev_t so can be 54 55 * a hex number or something like /dev/hda1 /dev/sdb ··· 88 87 md_setup_ents++; 89 88 switch (get_option(&str, &level)) { /* RAID level */ 90 89 case 2: /* could be 0 or -1.. */ 91 - if (level == 0) { 90 + if (level == 0 || level == LEVEL_LINEAR) { 92 91 if (get_option(&str, &factor) != 2 || /* Chunk Size */ 93 92 get_option(&str, &fault) != 2) { 94 93 printk(KERN_WARNING "md: Too few arguments supplied to md=.\n"); ··· 96 95 } 97 96 md_setup_args[ent].level = level; 98 97 md_setup_args[ent].chunk = 1 << (factor+12); 99 - pername = "raid0"; 98 + if (level == LEVEL_LINEAR) 99 + pername = "linear"; 100 + else 101 + pername = "raid0"; 100 102 break; 101 103 } 102 104 fallthrough;
+354
drivers/md/md-linear.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + /* 3 + * linear.c : Multiple Devices driver for Linux Copyright (C) 1994-96 Marc 4 + * ZYNGIER <zyngier@ufr-info-p7.ibp.fr> or <maz@gloups.fdn.fr> 5 + */ 6 + 7 + #include <linux/blkdev.h> 8 + #include <linux/raid/md_u.h> 9 + #include <linux/seq_file.h> 10 + #include <linux/module.h> 11 + #include <linux/slab.h> 12 + #include <trace/events/block.h> 13 + #include "md.h" 14 + 15 + struct dev_info { 16 + struct md_rdev *rdev; 17 + sector_t end_sector; 18 + }; 19 + 20 + struct linear_conf { 21 + struct rcu_head rcu; 22 + sector_t array_sectors; 23 + /* a copy of mddev->raid_disks */ 24 + int raid_disks; 25 + struct dev_info disks[] __counted_by(raid_disks); 26 + }; 27 + 28 + /* 29 + * find which device holds a particular offset 30 + */ 31 + static inline struct dev_info *which_dev(struct mddev *mddev, sector_t sector) 32 + { 33 + int lo, mid, hi; 34 + struct linear_conf *conf; 35 + 36 + lo = 0; 37 + hi = mddev->raid_disks - 1; 38 + conf = mddev->private; 39 + 40 + /* 41 + * Binary Search 42 + */ 43 + 44 + while (hi > lo) { 45 + 46 + mid = (hi + lo) / 2; 47 + if (sector < conf->disks[mid].end_sector) 48 + hi = mid; 49 + else 50 + lo = mid + 1; 51 + } 52 + 53 + return conf->disks + lo; 54 + } 55 + 56 + static sector_t linear_size(struct mddev *mddev, sector_t sectors, int raid_disks) 57 + { 58 + struct linear_conf *conf; 59 + sector_t array_sectors; 60 + 61 + conf = mddev->private; 62 + WARN_ONCE(sectors || raid_disks, 63 + "%s does not support generic reshape\n", __func__); 64 + array_sectors = conf->array_sectors; 65 + 66 + return array_sectors; 67 + } 68 + 69 + static int linear_set_limits(struct mddev *mddev) 70 + { 71 + struct queue_limits lim; 72 + int err; 73 + 74 + md_init_stacking_limits(&lim); 75 + lim.max_hw_sectors = mddev->chunk_sectors; 76 + lim.max_write_zeroes_sectors = mddev->chunk_sectors; 77 + lim.io_min = mddev->chunk_sectors << 9; 78 + err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY); 79 + if (err) { 80 + queue_limits_cancel_update(mddev->gendisk->queue); 81 + return err; 82 + } 83 + 84 + return queue_limits_set(mddev->gendisk->queue, &lim); 85 + } 86 + 87 + static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks) 88 + { 89 + struct linear_conf *conf; 90 + struct md_rdev *rdev; 91 + int ret = -EINVAL; 92 + int cnt; 93 + int i; 94 + 95 + conf = kzalloc(struct_size(conf, disks, raid_disks), GFP_KERNEL); 96 + if (!conf) 97 + return ERR_PTR(-ENOMEM); 98 + 99 + /* 100 + * conf->raid_disks is copy of mddev->raid_disks. The reason to 101 + * keep a copy of mddev->raid_disks in struct linear_conf is, 102 + * mddev->raid_disks may not be consistent with pointers number of 103 + * conf->disks[] when it is updated in linear_add() and used to 104 + * iterate old conf->disks[] earray in linear_congested(). 105 + * Here conf->raid_disks is always consitent with number of 106 + * pointers in conf->disks[] array, and mddev->private is updated 107 + * with rcu_assign_pointer() in linear_addr(), such race can be 108 + * avoided. 109 + */ 110 + conf->raid_disks = raid_disks; 111 + 112 + cnt = 0; 113 + conf->array_sectors = 0; 114 + 115 + rdev_for_each(rdev, mddev) { 116 + int j = rdev->raid_disk; 117 + struct dev_info *disk = conf->disks + j; 118 + sector_t sectors; 119 + 120 + if (j < 0 || j >= raid_disks || disk->rdev) { 121 + pr_warn("md/linear:%s: disk numbering problem. Aborting!\n", 122 + mdname(mddev)); 123 + goto out; 124 + } 125 + 126 + disk->rdev = rdev; 127 + if (mddev->chunk_sectors) { 128 + sectors = rdev->sectors; 129 + sector_div(sectors, mddev->chunk_sectors); 130 + rdev->sectors = sectors * mddev->chunk_sectors; 131 + } 132 + 133 + conf->array_sectors += rdev->sectors; 134 + cnt++; 135 + } 136 + if (cnt != raid_disks) { 137 + pr_warn("md/linear:%s: not enough drives present. Aborting!\n", 138 + mdname(mddev)); 139 + goto out; 140 + } 141 + 142 + /* 143 + * Here we calculate the device offsets. 144 + */ 145 + conf->disks[0].end_sector = conf->disks[0].rdev->sectors; 146 + 147 + for (i = 1; i < raid_disks; i++) 148 + conf->disks[i].end_sector = 149 + conf->disks[i-1].end_sector + 150 + conf->disks[i].rdev->sectors; 151 + 152 + if (!mddev_is_dm(mddev)) { 153 + ret = linear_set_limits(mddev); 154 + if (ret) 155 + goto out; 156 + } 157 + 158 + return conf; 159 + 160 + out: 161 + kfree(conf); 162 + return ERR_PTR(ret); 163 + } 164 + 165 + static int linear_run(struct mddev *mddev) 166 + { 167 + struct linear_conf *conf; 168 + int ret; 169 + 170 + if (md_check_no_bitmap(mddev)) 171 + return -EINVAL; 172 + 173 + conf = linear_conf(mddev, mddev->raid_disks); 174 + if (IS_ERR(conf)) 175 + return PTR_ERR(conf); 176 + 177 + mddev->private = conf; 178 + md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); 179 + 180 + ret = md_integrity_register(mddev); 181 + if (ret) { 182 + kfree(conf); 183 + mddev->private = NULL; 184 + } 185 + return ret; 186 + } 187 + 188 + static int linear_add(struct mddev *mddev, struct md_rdev *rdev) 189 + { 190 + /* Adding a drive to a linear array allows the array to grow. 191 + * It is permitted if the new drive has a matching superblock 192 + * already on it, with raid_disk equal to raid_disks. 193 + * It is achieved by creating a new linear_private_data structure 194 + * and swapping it in in-place of the current one. 195 + * The current one is never freed until the array is stopped. 196 + * This avoids races. 197 + */ 198 + struct linear_conf *newconf, *oldconf; 199 + 200 + if (rdev->saved_raid_disk != mddev->raid_disks) 201 + return -EINVAL; 202 + 203 + rdev->raid_disk = rdev->saved_raid_disk; 204 + rdev->saved_raid_disk = -1; 205 + 206 + newconf = linear_conf(mddev, mddev->raid_disks + 1); 207 + if (!newconf) 208 + return -ENOMEM; 209 + 210 + /* newconf->raid_disks already keeps a copy of * the increased 211 + * value of mddev->raid_disks, WARN_ONCE() is just used to make 212 + * sure of this. It is possible that oldconf is still referenced 213 + * in linear_congested(), therefore kfree_rcu() is used to free 214 + * oldconf until no one uses it anymore. 215 + */ 216 + oldconf = rcu_dereference_protected(mddev->private, 217 + lockdep_is_held(&mddev->reconfig_mutex)); 218 + mddev->raid_disks++; 219 + WARN_ONCE(mddev->raid_disks != newconf->raid_disks, 220 + "copied raid_disks doesn't match mddev->raid_disks"); 221 + rcu_assign_pointer(mddev->private, newconf); 222 + md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); 223 + set_capacity_and_notify(mddev->gendisk, mddev->array_sectors); 224 + kfree_rcu(oldconf, rcu); 225 + return 0; 226 + } 227 + 228 + static void linear_free(struct mddev *mddev, void *priv) 229 + { 230 + struct linear_conf *conf = priv; 231 + 232 + kfree(conf); 233 + } 234 + 235 + static bool linear_make_request(struct mddev *mddev, struct bio *bio) 236 + { 237 + struct dev_info *tmp_dev; 238 + sector_t start_sector, end_sector, data_offset; 239 + sector_t bio_sector = bio->bi_iter.bi_sector; 240 + 241 + if (unlikely(bio->bi_opf & REQ_PREFLUSH) 242 + && md_flush_request(mddev, bio)) 243 + return true; 244 + 245 + tmp_dev = which_dev(mddev, bio_sector); 246 + start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors; 247 + end_sector = tmp_dev->end_sector; 248 + data_offset = tmp_dev->rdev->data_offset; 249 + 250 + if (unlikely(bio_sector >= end_sector || 251 + bio_sector < start_sector)) 252 + goto out_of_bounds; 253 + 254 + if (unlikely(is_rdev_broken(tmp_dev->rdev))) { 255 + md_error(mddev, tmp_dev->rdev); 256 + bio_io_error(bio); 257 + return true; 258 + } 259 + 260 + if (unlikely(bio_end_sector(bio) > end_sector)) { 261 + /* This bio crosses a device boundary, so we have to split it */ 262 + struct bio *split = bio_split(bio, end_sector - bio_sector, 263 + GFP_NOIO, &mddev->bio_set); 264 + 265 + if (IS_ERR(split)) { 266 + bio->bi_status = errno_to_blk_status(PTR_ERR(split)); 267 + bio_endio(bio); 268 + return true; 269 + } 270 + 271 + bio_chain(split, bio); 272 + submit_bio_noacct(bio); 273 + bio = split; 274 + } 275 + 276 + md_account_bio(mddev, &bio); 277 + bio_set_dev(bio, tmp_dev->rdev->bdev); 278 + bio->bi_iter.bi_sector = bio->bi_iter.bi_sector - 279 + start_sector + data_offset; 280 + 281 + if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && 282 + !bdev_max_discard_sectors(bio->bi_bdev))) { 283 + /* Just ignore it */ 284 + bio_endio(bio); 285 + } else { 286 + if (mddev->gendisk) 287 + trace_block_bio_remap(bio, disk_devt(mddev->gendisk), 288 + bio_sector); 289 + mddev_check_write_zeroes(mddev, bio); 290 + submit_bio_noacct(bio); 291 + } 292 + return true; 293 + 294 + out_of_bounds: 295 + pr_err("md/linear:%s: make_request: Sector %llu out of bounds on dev %pg: %llu sectors, offset %llu\n", 296 + mdname(mddev), 297 + (unsigned long long)bio->bi_iter.bi_sector, 298 + tmp_dev->rdev->bdev, 299 + (unsigned long long)tmp_dev->rdev->sectors, 300 + (unsigned long long)start_sector); 301 + bio_io_error(bio); 302 + return true; 303 + } 304 + 305 + static void linear_status(struct seq_file *seq, struct mddev *mddev) 306 + { 307 + seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2); 308 + } 309 + 310 + static void linear_error(struct mddev *mddev, struct md_rdev *rdev) 311 + { 312 + if (!test_and_set_bit(MD_BROKEN, &mddev->flags)) { 313 + char *md_name = mdname(mddev); 314 + 315 + pr_crit("md/linear%s: Disk failure on %pg detected, failing array.\n", 316 + md_name, rdev->bdev); 317 + } 318 + } 319 + 320 + static void linear_quiesce(struct mddev *mddev, int state) 321 + { 322 + } 323 + 324 + static struct md_personality linear_personality = { 325 + .name = "linear", 326 + .level = LEVEL_LINEAR, 327 + .owner = THIS_MODULE, 328 + .make_request = linear_make_request, 329 + .run = linear_run, 330 + .free = linear_free, 331 + .status = linear_status, 332 + .hot_add_disk = linear_add, 333 + .size = linear_size, 334 + .quiesce = linear_quiesce, 335 + .error_handler = linear_error, 336 + }; 337 + 338 + static int __init linear_init(void) 339 + { 340 + return register_md_personality(&linear_personality); 341 + } 342 + 343 + static void linear_exit(void) 344 + { 345 + unregister_md_personality(&linear_personality); 346 + } 347 + 348 + module_init(linear_init); 349 + module_exit(linear_exit); 350 + MODULE_LICENSE("GPL"); 351 + MODULE_DESCRIPTION("Linear device concatenation personality for MD (deprecated)"); 352 + MODULE_ALIAS("md-personality-1"); /* LINEAR - deprecated*/ 353 + MODULE_ALIAS("md-linear"); 354 + MODULE_ALIAS("md-level--1");
+1 -1
drivers/md/md.c
··· 8124 8124 return; 8125 8125 mddev->pers->error_handler(mddev, rdev); 8126 8126 8127 - if (mddev->pers->level == 0) 8127 + if (mddev->pers->level == 0 || mddev->pers->level == LEVEL_LINEAR) 8128 8128 return; 8129 8129 8130 8130 if (mddev->degraded && !test_bit(MD_BROKEN, &mddev->flags))
+1 -1
include/uapi/linux/raid/md_p.h
··· 233 233 char set_name[32]; /* set and interpreted by user-space */ 234 234 235 235 __le64 ctime; /* lo 40 bits are seconds, top 24 are microseconds or 0*/ 236 - __le32 level; /* 0,1,4,5 */ 236 + __le32 level; /* 0,1,4,5, -1 (linear) */ 237 237 __le32 layout; /* only for raid5 and raid10 currently */ 238 238 __le64 size; /* used size of component devices, in 512byte sectors */ 239 239
+2
include/uapi/linux/raid/md_u.h
··· 103 103 104 104 } mdu_array_info_t; 105 105 106 + #define LEVEL_LINEAR (-1) 107 + 106 108 /* we need a value for 'no level specified' and 0 107 109 * means 'raid0', so we need something else. This is 108 110 * for internal use only