Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

md: allow a reshape operation to be reversed.

Currently a reshape operation always progresses from the start
of the array to the end unless the number of devices is being
reduced, in which case it progressed in the opposite direction.

To reverse a partial reshape which changes the number of devices
you can stop the array and re-assemble with the raid-disks numbers
reversed and it will undo.

However for a reshape that does not change the number of devices
it is not possible to reverse the reshape in the middle - you have to
wait until it completes.

So add a 'reshape_direction' attribute with is either 'forwards' or
'backwards' and can be explicitly set when delta_disks is zero.

This will become more important when we allow the data_offset to
change in a reshape. Then the explicit statement of what direction is
being used will be more useful.

This can be enabled in raid5 trivially as it already supports
reverse reshape and just needs to use a different trigger to request it.

Signed-off-by: NeilBrown <neilb@suse.de>

NeilBrown 2c810cdd b5e1b8ce

+84 -14
+65 -2
drivers/md/md.c
··· 607 607 init_waitqueue_head(&mddev->sb_wait); 608 608 init_waitqueue_head(&mddev->recovery_wait); 609 609 mddev->reshape_position = MaxSector; 610 + mddev->reshape_backwards = 0; 610 611 mddev->resync_min = 0; 611 612 mddev->resync_max = MaxSector; 612 613 mddev->level = LEVEL_NONE; ··· 1186 1185 mddev->events = ev1; 1187 1186 mddev->bitmap_info.offset = 0; 1188 1187 mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9; 1188 + mddev->reshape_backwards = 0; 1189 1189 1190 1190 if (mddev->minor_version >= 91) { 1191 1191 mddev->reshape_position = sb->reshape_position; ··· 1194 1192 mddev->new_level = sb->new_level; 1195 1193 mddev->new_layout = sb->new_layout; 1196 1194 mddev->new_chunk_sectors = sb->new_chunk >> 9; 1195 + if (mddev->delta_disks < 0) 1196 + mddev->reshape_backwards = 1; 1197 1197 } else { 1198 1198 mddev->reshape_position = MaxSector; 1199 1199 mddev->delta_disks = 0; ··· 1649 1645 mddev->events = ev1; 1650 1646 mddev->bitmap_info.offset = 0; 1651 1647 mddev->bitmap_info.default_offset = 1024 >> 9; 1652 - 1648 + mddev->reshape_backwards = 0; 1649 + 1653 1650 mddev->recovery_cp = le64_to_cpu(sb->resync_offset); 1654 1651 memcpy(mddev->uuid, sb->set_uuid, 16); 1655 1652 ··· 1667 1662 mddev->new_level = le32_to_cpu(sb->new_level); 1668 1663 mddev->new_layout = le32_to_cpu(sb->new_layout); 1669 1664 mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk); 1665 + if (mddev->delta_disks < 0 || 1666 + (mddev->delta_disks == 0 && 1667 + (le32_to_cpu(sb->feature_map) 1668 + & MD_FEATURE_RESHAPE_BACKWARDS))) 1669 + mddev->reshape_backwards = 1; 1670 1670 } else { 1671 1671 mddev->reshape_position = MaxSector; 1672 1672 mddev->delta_disks = 0; ··· 1791 1781 sb->delta_disks = cpu_to_le32(mddev->delta_disks); 1792 1782 sb->new_level = cpu_to_le32(mddev->new_level); 1793 1783 sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors); 1784 + if (mddev->delta_disks == 0 && 1785 + mddev->reshape_backwards) 1786 + sb->feature_map 1787 + |= cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS); 1794 1788 } 1795 1789 1796 1790 if (rdev->badblocks.count == 0) ··· 3433 3419 mddev->new_chunk_sectors = mddev->chunk_sectors; 3434 3420 mddev->raid_disks -= mddev->delta_disks; 3435 3421 mddev->delta_disks = 0; 3422 + mddev->reshape_backwards = 0; 3436 3423 module_put(pers->owner); 3437 3424 printk(KERN_WARNING "md: %s: %s would not accept array\n", 3438 3425 mdname(mddev), clevel); ··· 3507 3492 mddev->layout = mddev->new_layout; 3508 3493 mddev->chunk_sectors = mddev->new_chunk_sectors; 3509 3494 mddev->delta_disks = 0; 3495 + mddev->reshape_backwards = 0; 3510 3496 mddev->degraded = 0; 3511 3497 if (mddev->pers->sync_request == NULL) { 3512 3498 /* this is now an array without redundancy, so ··· 3601 3585 int olddisks = mddev->raid_disks - mddev->delta_disks; 3602 3586 mddev->delta_disks = n - olddisks; 3603 3587 mddev->raid_disks = n; 3588 + mddev->reshape_backwards = (mddev->delta_disks < 0); 3604 3589 } else 3605 3590 mddev->raid_disks = n; 3606 3591 return rv ? rv : len; ··· 4453 4436 return -EINVAL; 4454 4437 mddev->reshape_position = new; 4455 4438 mddev->delta_disks = 0; 4439 + mddev->reshape_backwards = 0; 4456 4440 mddev->new_level = mddev->level; 4457 4441 mddev->new_layout = mddev->layout; 4458 4442 mddev->new_chunk_sectors = mddev->chunk_sectors; ··· 4463 4445 static struct md_sysfs_entry md_reshape_position = 4464 4446 __ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show, 4465 4447 reshape_position_store); 4448 + 4449 + static ssize_t 4450 + reshape_direction_show(struct mddev *mddev, char *page) 4451 + { 4452 + return sprintf(page, "%s\n", 4453 + mddev->reshape_backwards ? "backwards" : "forwards"); 4454 + } 4455 + 4456 + static ssize_t 4457 + reshape_direction_store(struct mddev *mddev, const char *buf, size_t len) 4458 + { 4459 + int backwards = 0; 4460 + if (cmd_match(buf, "forwards")) 4461 + backwards = 0; 4462 + else if (cmd_match(buf, "backwards")) 4463 + backwards = 1; 4464 + else 4465 + return -EINVAL; 4466 + if (mddev->reshape_backwards == backwards) 4467 + return len; 4468 + 4469 + /* check if we are allowed to change */ 4470 + if (mddev->delta_disks) 4471 + return -EBUSY; 4472 + 4473 + if (mddev->persistent && 4474 + mddev->major_version == 0) 4475 + return -EINVAL; 4476 + 4477 + mddev->reshape_backwards = backwards; 4478 + return len; 4479 + } 4480 + 4481 + static struct md_sysfs_entry md_reshape_direction = 4482 + __ATTR(reshape_direction, S_IRUGO|S_IWUSR, reshape_direction_show, 4483 + reshape_direction_store); 4466 4484 4467 4485 static ssize_t 4468 4486 array_size_show(struct mddev *mddev, char *page) ··· 4555 4501 &md_safe_delay.attr, 4556 4502 &md_array_state.attr, 4557 4503 &md_reshape_position.attr, 4504 + &md_reshape_direction.attr, 4558 4505 &md_array_size.attr, 4559 4506 &max_corr_read_errors.attr, 4560 4507 NULL, ··· 5119 5064 mddev->events = 0; 5120 5065 mddev->can_decrease_events = 0; 5121 5066 mddev->delta_disks = 0; 5067 + mddev->reshape_backwards = 0; 5122 5068 mddev->new_level = LEVEL_NONE; 5123 5069 mddev->new_layout = 0; 5124 5070 mddev->new_chunk_sectors = 0; ··· 5944 5888 mddev->new_chunk_sectors = mddev->chunk_sectors; 5945 5889 mddev->new_layout = mddev->layout; 5946 5890 mddev->delta_disks = 0; 5891 + mddev->reshape_backwards = 0; 5947 5892 5948 5893 return 0; 5949 5894 } ··· 6010 5953 if (mddev->sync_thread || mddev->reshape_position != MaxSector) 6011 5954 return -EBUSY; 6012 5955 mddev->delta_disks = raid_disks - mddev->raid_disks; 5956 + if (mddev->delta_disks < 0) 5957 + mddev->reshape_backwards = 1; 5958 + else if (mddev->delta_disks > 0) 5959 + mddev->reshape_backwards = 0; 6013 5960 6014 5961 rv = mddev->pers->check_reshape(mddev); 6015 - if (rv < 0) 5962 + if (rv < 0) { 6016 5963 mddev->delta_disks = 0; 5964 + mddev->reshape_backwards = 0; 5965 + } 6017 5966 return rv; 6018 5967 } 6019 5968
+1
drivers/md/md.h
··· 262 262 sector_t reshape_position; 263 263 int delta_disks, new_level, new_layout; 264 264 int new_chunk_sectors; 265 + int reshape_backwards; 265 266 266 267 atomic_t plug_cnt; /* If device is expecting 267 268 * more bios soon.
+12 -11
drivers/md/raid5.c
··· 3970 3970 * to check again. 3971 3971 */ 3972 3972 spin_lock_irq(&conf->device_lock); 3973 - if (mddev->delta_disks < 0 3973 + if (mddev->reshape_backwards 3974 3974 ? logical_sector < conf->reshape_progress 3975 3975 : logical_sector >= conf->reshape_progress) { 3976 3976 disks = conf->previous_raid_disks; 3977 3977 previous = 1; 3978 3978 } else { 3979 - if (mddev->delta_disks < 0 3979 + if (mddev->reshape_backwards 3980 3980 ? logical_sector < conf->reshape_safe 3981 3981 : logical_sector >= conf->reshape_safe) { 3982 3982 spin_unlock_irq(&conf->device_lock); ··· 4009 4009 */ 4010 4010 int must_retry = 0; 4011 4011 spin_lock_irq(&conf->device_lock); 4012 - if (mddev->delta_disks < 0 4012 + if (mddev->reshape_backwards 4013 4013 ? logical_sector >= conf->reshape_progress 4014 4014 : logical_sector < conf->reshape_progress) 4015 4015 /* mismatch, need to try again */ ··· 4108 4108 4109 4109 if (sector_nr == 0) { 4110 4110 /* If restarting in the middle, skip the initial sectors */ 4111 - if (mddev->delta_disks < 0 && 4111 + if (mddev->reshape_backwards && 4112 4112 conf->reshape_progress < raid5_size(mddev, 0, 0)) { 4113 4113 sector_nr = raid5_size(mddev, 0, 0) 4114 4114 - conf->reshape_progress; 4115 - } else if (mddev->delta_disks >= 0 && 4115 + } else if (!mddev->reshape_backwards && 4116 4116 conf->reshape_progress > 0) 4117 4117 sector_nr = conf->reshape_progress; 4118 4118 sector_div(sector_nr, new_data_disks); ··· 4147 4147 sector_div(readpos, data_disks); 4148 4148 safepos = conf->reshape_safe; 4149 4149 sector_div(safepos, data_disks); 4150 - if (mddev->delta_disks < 0) { 4150 + if (mddev->reshape_backwards) { 4151 4151 writepos -= min_t(sector_t, reshape_sectors, writepos); 4152 4152 readpos += reshape_sectors; 4153 4153 safepos += reshape_sectors; ··· 4174 4174 * Maybe that number should be configurable, but I'm not sure it is 4175 4175 * worth it.... maybe it could be a multiple of safemode_delay??? 4176 4176 */ 4177 - if ((mddev->delta_disks < 0 4177 + if ((mddev->reshape_backwards 4178 4178 ? (safepos > writepos && readpos < writepos) 4179 4179 : (safepos < writepos && readpos > writepos)) || 4180 4180 time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) { ··· 4195 4195 sysfs_notify(&mddev->kobj, NULL, "sync_completed"); 4196 4196 } 4197 4197 4198 - if (mddev->delta_disks < 0) { 4198 + if (mddev->reshape_backwards) { 4199 4199 BUG_ON(conf->reshape_progress == 0); 4200 4200 stripe_addr = writepos; 4201 4201 BUG_ON((mddev->dev_sectors & ··· 4239 4239 list_add(&sh->lru, &stripes); 4240 4240 } 4241 4241 spin_lock_irq(&conf->device_lock); 4242 - if (mddev->delta_disks < 0) 4242 + if (mddev->reshape_backwards) 4243 4243 conf->reshape_progress -= reshape_sectors * new_data_disks; 4244 4244 else 4245 4245 conf->reshape_progress += reshape_sectors * new_data_disks; ··· 5008 5008 mdname(mddev)); 5009 5009 return -EINVAL; 5010 5010 } 5011 - } else if (mddev->delta_disks < 0 5011 + } else if (mddev->reshape_backwards 5012 5012 ? (here_new * mddev->new_chunk_sectors <= 5013 5013 here_old * mddev->chunk_sectors) 5014 5014 : (here_new * mddev->new_chunk_sectors >= ··· 5535 5535 conf->chunk_sectors = mddev->new_chunk_sectors; 5536 5536 conf->prev_algo = conf->algorithm; 5537 5537 conf->algorithm = mddev->new_layout; 5538 - if (mddev->delta_disks < 0) 5538 + if (mddev->reshape_backwards) 5539 5539 conf->reshape_progress = raid5_size(mddev, 0, 0); 5540 5540 else 5541 5541 conf->reshape_progress = 0; ··· 5663 5663 mddev->chunk_sectors = conf->chunk_sectors; 5664 5664 mddev->reshape_position = MaxSector; 5665 5665 mddev->delta_disks = 0; 5666 + mddev->reshape_backwards = 0; 5666 5667 } 5667 5668 } 5668 5669
+6 -1
include/linux/raid/md_p.h
··· 281 281 * active device with same 'role'. 282 282 * 'recovery_offset' is also set. 283 283 */ 284 + #define MD_FEATURE_RESHAPE_BACKWARDS 32 /* Reshape doesn't change number 285 + * of devices, but is going 286 + * backwards anyway. 287 + */ 284 288 #define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \ 285 289 |MD_FEATURE_RECOVERY_OFFSET \ 286 290 |MD_FEATURE_RESHAPE_ACTIVE \ 287 291 |MD_FEATURE_BAD_BLOCKS \ 288 - |MD_FEATURE_REPLACEMENT) 292 + |MD_FEATURE_REPLACEMENT \ 293 + |MD_FEATURE_RESHAPE_BACKWARDS) 289 294 290 295 #endif