Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'for-5.11/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper updates from Mike Snitzer:

- Add DM verity support for signature verification with 2nd keyring

- Fix DM verity to skip verity work if IO completes with error while
system is shutting down

- Add new DM multipath "IO affinity" path selector that maps IO
destined to a given path to a specific CPU based on user provided
mapping

- Rename DM multipath path selector source files to have "dm-ps" prefix

- Add REQ_NOWAIT support to some other simple DM targets that don't
block in more elaborate ways waiting for IO

- Export DM crypt's kcryptd workqueue via sysfs (WQ_SYSFS)

- Fix error return code in DM's target_message() if empty message is
received

- A handful of other small cleanups

* tag 'for-5.11/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
dm cache: simplify the return expression of load_mapping()
dm ebs: avoid double unlikely() notation when using IS_ERR()
dm verity: skip verity work if I/O error when system is shutting down
dm crypt: export sysfs of kcryptd workqueue
dm ioctl: fix error return code in target_message
dm crypt: Constify static crypt_iv_operations
dm: add support for REQ_NOWAIT to various targets
dm: rename multipath path selector source files to have "dm-ps" prefix
dm mpath: add IO affinity path selector
dm verity: Add support for signature verification with 2nd keyring
dm: remove unnecessary current->bio_list check when submitting split bio

+345 -27
+6 -1
Documentation/admin-guide/device-mapper/verity.rst
··· 134 134 the pkcs7 signature of the roothash. The pkcs7 signature is used to validate 135 135 the root hash during the creation of the device mapper block device. 136 136 Verification of roothash depends on the config DM_VERITY_VERIFY_ROOTHASH_SIG 137 - being set in the kernel. 137 + being set in the kernel. The signatures are checked against the builtin 138 + trusted keyring by default, or the secondary trusted keyring if 139 + DM_VERITY_VERIFY_ROOTHASH_SIG_SECONDARY_KEYRING is set. The secondary 140 + trusted keyring includes by default the builtin trusted keyring, and it can 141 + also gain new certificates at run time if they are signed by a certificate 142 + already in the secondary trusted keyring. 138 143 139 144 Theory of operation 140 145 ===================
+21 -1
drivers/md/Kconfig
··· 463 463 464 464 If unsure, say N. 465 465 466 + config DM_MULTIPATH_IOA 467 + tristate "I/O Path Selector based on CPU submission" 468 + depends on DM_MULTIPATH 469 + help 470 + This path selector selects the path based on the CPU the IO is 471 + executed on and the CPU to path mapping setup at path addition time. 472 + 473 + If unsure, say N. 474 + 466 475 config DM_DELAY 467 476 tristate "I/O delaying target" 468 477 depends on BLK_DEV_DM ··· 539 530 bool "Verity data device root hash signature verification support" 540 531 depends on DM_VERITY 541 532 select SYSTEM_DATA_VERIFICATION 542 - help 533 + help 543 534 Add ability for dm-verity device to be validated if the 544 535 pre-generated tree of cryptographic checksums passed has a pkcs#7 545 536 signature file that can validate the roothash of the tree. 537 + 538 + By default, rely on the builtin trusted keyring. 539 + 540 + If unsure, say N. 541 + 542 + config DM_VERITY_VERIFY_ROOTHASH_SIG_SECONDARY_KEYRING 543 + bool "Verity data device root hash signature verification with secondary keyring" 544 + depends on DM_VERITY_VERIFY_ROOTHASH_SIG 545 + depends on SECONDARY_TRUSTED_KEYRING 546 + help 547 + Rely on the secondary trusted keyring to verify dm-verity signatures. 546 548 547 549 If unsure, say N. 548 550
+13 -7
drivers/md/Makefile
··· 7 7 dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o dm-stats.o \ 8 8 dm-rq.o 9 9 dm-multipath-y += dm-path-selector.o dm-mpath.o 10 + dm-historical-service-time-y += dm-ps-historical-service-time.o 11 + dm-io-affinity-y += dm-ps-io-affinity.o 12 + dm-queue-length-y += dm-ps-queue-length.o 13 + dm-round-robin-y += dm-ps-round-robin.o 14 + dm-service-time-y += dm-ps-service-time.o 10 15 dm-snapshot-y += dm-snap.o dm-exception-store.o dm-snap-transient.o \ 11 16 dm-snap-persistent.o 12 17 dm-mirror-y += dm-raid1.o 13 - dm-log-userspace-y \ 14 - += dm-log-userspace-base.o dm-log-userspace-transfer.o 18 + dm-log-userspace-y += dm-log-userspace-base.o dm-log-userspace-transfer.o 15 19 dm-bio-prison-y += dm-bio-prison-v1.o dm-bio-prison-v2.o 16 20 dm-thin-pool-y += dm-thin.o dm-thin-metadata.o 17 21 dm-cache-y += dm-cache-target.o dm-cache-metadata.o dm-cache-policy.o \ 18 22 dm-cache-background-tracker.o 19 - dm-cache-smq-y += dm-cache-policy-smq.o 23 + dm-cache-smq-y += dm-cache-policy-smq.o 20 24 dm-ebs-y += dm-ebs-target.o 21 25 dm-era-y += dm-era-target.o 22 26 dm-clone-y += dm-clone-target.o dm-clone-metadata.o 23 27 dm-verity-y += dm-verity-target.o 28 + dm-zoned-y += dm-zoned-target.o dm-zoned-metadata.o dm-zoned-reclaim.o 29 + 24 30 md-mod-y += md.o md-bitmap.o 25 31 raid456-y += raid5.o raid5-cache.o raid5-ppl.o 26 - dm-zoned-y += dm-zoned-target.o dm-zoned-metadata.o dm-zoned-reclaim.o 27 32 linear-y += md-linear.o 28 33 multipath-y += md-multipath.o 29 34 faulty-y += md-faulty.o ··· 64 59 obj-$(CONFIG_DM_MULTIPATH_QL) += dm-queue-length.o 65 60 obj-$(CONFIG_DM_MULTIPATH_ST) += dm-service-time.o 66 61 obj-$(CONFIG_DM_MULTIPATH_HST) += dm-historical-service-time.o 62 + obj-$(CONFIG_DM_MULTIPATH_IOA) += dm-io-affinity.o 67 63 obj-$(CONFIG_DM_SWITCH) += dm-switch.o 68 64 obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o 69 - obj-$(CONFIG_DM_PERSISTENT_DATA) += persistent-data/ 65 + obj-$(CONFIG_DM_PERSISTENT_DATA) += persistent-data/ 70 66 obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o dm-region-hash.o 71 67 obj-$(CONFIG_DM_LOG_USERSPACE) += dm-log-userspace.o 72 68 obj-$(CONFIG_DM_ZERO) += dm-zero.o 73 - obj-$(CONFIG_DM_RAID) += dm-raid.o 74 - obj-$(CONFIG_DM_THIN_PROVISIONING) += dm-thin-pool.o 69 + obj-$(CONFIG_DM_RAID) += dm-raid.o 70 + obj-$(CONFIG_DM_THIN_PROVISIONING) += dm-thin-pool.o 75 71 obj-$(CONFIG_DM_VERITY) += dm-verity.o 76 72 obj-$(CONFIG_DM_CACHE) += dm-cache.o 77 73 obj-$(CONFIG_DM_CACHE_SMQ) += dm-cache-smq.o
+1 -6
drivers/md/dm-cache-target.c
··· 2840 2840 static int load_mapping(void *context, dm_oblock_t oblock, dm_cblock_t cblock, 2841 2841 bool dirty, uint32_t hint, bool hint_valid) 2842 2842 { 2843 - int r; 2844 2843 struct cache *cache = context; 2845 2844 2846 2845 if (dirty) { ··· 2848 2849 } else 2849 2850 clear_bit(from_cblock(cblock), cache->dirty_bitset); 2850 2851 2851 - r = policy_load_mapping(cache->policy, oblock, cblock, dirty, hint, hint_valid); 2852 - if (r) 2853 - return r; 2854 - 2855 - return 0; 2852 + return policy_load_mapping(cache->policy, oblock, cblock, dirty, hint, hint_valid); 2856 2853 } 2857 2854 2858 2855 /*
+7 -6
drivers/md/dm-crypt.c
··· 1090 1090 .post = crypt_iv_tcw_post 1091 1091 }; 1092 1092 1093 - static struct crypt_iv_operations crypt_iv_random_ops = { 1093 + static const struct crypt_iv_operations crypt_iv_random_ops = { 1094 1094 .generator = crypt_iv_random_gen 1095 1095 }; 1096 1096 1097 - static struct crypt_iv_operations crypt_iv_eboiv_ops = { 1097 + static const struct crypt_iv_operations crypt_iv_eboiv_ops = { 1098 1098 .ctr = crypt_iv_eboiv_ctr, 1099 1099 .generator = crypt_iv_eboiv_gen 1100 1100 }; 1101 1101 1102 - static struct crypt_iv_operations crypt_iv_elephant_ops = { 1102 + static const struct crypt_iv_operations crypt_iv_elephant_ops = { 1103 1103 .ctr = crypt_iv_elephant_ctr, 1104 1104 .dtr = crypt_iv_elephant_dtr, 1105 1105 .init = crypt_iv_elephant_init, ··· 3166 3166 } 3167 3167 3168 3168 if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags)) 3169 - cc->crypt_queue = alloc_workqueue("kcryptd/%s", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 3169 + cc->crypt_queue = alloc_workqueue("kcryptd-%s", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 3170 3170 1, devname); 3171 3171 else 3172 - cc->crypt_queue = alloc_workqueue("kcryptd/%s", 3173 - WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND, 3172 + cc->crypt_queue = alloc_workqueue("kcryptd-%s", 3173 + WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | 3174 + WQ_UNBOUND | WQ_SYSFS, 3174 3175 num_online_cpus(), devname); 3175 3176 if (!cc->crypt_queue) { 3176 3177 ti->error = "Couldn't create kcryptd queue";
+1 -1
drivers/md/dm-ebs-target.c
··· 86 86 else 87 87 ba = dm_bufio_new(ec->bufio, block, &b); 88 88 89 - if (unlikely(IS_ERR(ba))) { 89 + if (IS_ERR(ba)) { 90 90 /* 91 91 * Carry on with next buffer, if any, to issue all possible 92 92 * data but return error.
drivers/md/dm-historical-service-time.c drivers/md/dm-ps-historical-service-time.c
+1
drivers/md/dm-ioctl.c
··· 1600 1600 1601 1601 if (!argc) { 1602 1602 DMWARN("Empty message received."); 1603 + r = -EINVAL; 1603 1604 goto out_argv; 1604 1605 } 1605 1606
+272
drivers/md/dm-ps-io-affinity.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * Copyright (C) 2020 Oracle Corporation 4 + * 5 + * Module Author: Mike Christie 6 + */ 7 + #include "dm-path-selector.h" 8 + 9 + #include <linux/device-mapper.h> 10 + #include <linux/module.h> 11 + 12 + #define DM_MSG_PREFIX "multipath io-affinity" 13 + 14 + struct path_info { 15 + struct dm_path *path; 16 + cpumask_var_t cpumask; 17 + refcount_t refcount; 18 + bool failed; 19 + }; 20 + 21 + struct selector { 22 + struct path_info **path_map; 23 + cpumask_var_t path_mask; 24 + atomic_t map_misses; 25 + }; 26 + 27 + static void ioa_free_path(struct selector *s, unsigned int cpu) 28 + { 29 + struct path_info *pi = s->path_map[cpu]; 30 + 31 + if (!pi) 32 + return; 33 + 34 + if (refcount_dec_and_test(&pi->refcount)) { 35 + cpumask_clear_cpu(cpu, s->path_mask); 36 + free_cpumask_var(pi->cpumask); 37 + kfree(pi); 38 + 39 + s->path_map[cpu] = NULL; 40 + } 41 + } 42 + 43 + static int ioa_add_path(struct path_selector *ps, struct dm_path *path, 44 + int argc, char **argv, char **error) 45 + { 46 + struct selector *s = ps->context; 47 + struct path_info *pi = NULL; 48 + unsigned int cpu; 49 + int ret; 50 + 51 + if (argc != 1) { 52 + *error = "io-affinity ps: invalid number of arguments"; 53 + return -EINVAL; 54 + } 55 + 56 + pi = kzalloc(sizeof(*pi), GFP_KERNEL); 57 + if (!pi) { 58 + *error = "io-affinity ps: Error allocating path context"; 59 + return -ENOMEM; 60 + } 61 + 62 + pi->path = path; 63 + path->pscontext = pi; 64 + refcount_set(&pi->refcount, 1); 65 + 66 + if (!zalloc_cpumask_var(&pi->cpumask, GFP_KERNEL)) { 67 + *error = "io-affinity ps: Error allocating cpumask context"; 68 + ret = -ENOMEM; 69 + goto free_pi; 70 + } 71 + 72 + ret = cpumask_parse(argv[0], pi->cpumask); 73 + if (ret) { 74 + *error = "io-affinity ps: invalid cpumask"; 75 + ret = -EINVAL; 76 + goto free_mask; 77 + } 78 + 79 + for_each_cpu(cpu, pi->cpumask) { 80 + if (cpu >= nr_cpu_ids) { 81 + DMWARN_LIMIT("Ignoring mapping for CPU %u. Max CPU is %u", 82 + cpu, nr_cpu_ids); 83 + break; 84 + } 85 + 86 + if (s->path_map[cpu]) { 87 + DMWARN("CPU mapping for %u exists. Ignoring.", cpu); 88 + continue; 89 + } 90 + 91 + cpumask_set_cpu(cpu, s->path_mask); 92 + s->path_map[cpu] = pi; 93 + refcount_inc(&pi->refcount); 94 + continue; 95 + } 96 + 97 + if (refcount_dec_and_test(&pi->refcount)) { 98 + *error = "io-affinity ps: No new/valid CPU mapping found"; 99 + ret = -EINVAL; 100 + goto free_mask; 101 + } 102 + 103 + return 0; 104 + 105 + free_mask: 106 + free_cpumask_var(pi->cpumask); 107 + free_pi: 108 + kfree(pi); 109 + return ret; 110 + } 111 + 112 + static int ioa_create(struct path_selector *ps, unsigned argc, char **argv) 113 + { 114 + struct selector *s; 115 + 116 + s = kmalloc(sizeof(*s), GFP_KERNEL); 117 + if (!s) 118 + return -ENOMEM; 119 + 120 + s->path_map = kzalloc(nr_cpu_ids * sizeof(struct path_info *), 121 + GFP_KERNEL); 122 + if (!s->path_map) 123 + goto free_selector; 124 + 125 + if (!zalloc_cpumask_var(&s->path_mask, GFP_KERNEL)) 126 + goto free_map; 127 + 128 + atomic_set(&s->map_misses, 0); 129 + ps->context = s; 130 + return 0; 131 + 132 + free_map: 133 + kfree(s->path_map); 134 + free_selector: 135 + kfree(s); 136 + return -ENOMEM; 137 + } 138 + 139 + static void ioa_destroy(struct path_selector *ps) 140 + { 141 + struct selector *s = ps->context; 142 + unsigned cpu; 143 + 144 + for_each_cpu(cpu, s->path_mask) 145 + ioa_free_path(s, cpu); 146 + 147 + free_cpumask_var(s->path_mask); 148 + kfree(s->path_map); 149 + kfree(s); 150 + 151 + ps->context = NULL; 152 + } 153 + 154 + static int ioa_status(struct path_selector *ps, struct dm_path *path, 155 + status_type_t type, char *result, unsigned int maxlen) 156 + { 157 + struct selector *s = ps->context; 158 + struct path_info *pi; 159 + int sz = 0; 160 + 161 + if (!path) { 162 + DMEMIT("0 "); 163 + return sz; 164 + } 165 + 166 + switch(type) { 167 + case STATUSTYPE_INFO: 168 + DMEMIT("%d ", atomic_read(&s->map_misses)); 169 + break; 170 + case STATUSTYPE_TABLE: 171 + pi = path->pscontext; 172 + DMEMIT("%*pb ", cpumask_pr_args(pi->cpumask)); 173 + break; 174 + } 175 + 176 + return sz; 177 + } 178 + 179 + static void ioa_fail_path(struct path_selector *ps, struct dm_path *p) 180 + { 181 + struct path_info *pi = p->pscontext; 182 + 183 + pi->failed = true; 184 + } 185 + 186 + static int ioa_reinstate_path(struct path_selector *ps, struct dm_path *p) 187 + { 188 + struct path_info *pi = p->pscontext; 189 + 190 + pi->failed = false; 191 + return 0; 192 + } 193 + 194 + static struct dm_path *ioa_select_path(struct path_selector *ps, 195 + size_t nr_bytes) 196 + { 197 + unsigned int cpu, node; 198 + struct selector *s = ps->context; 199 + const struct cpumask *cpumask; 200 + struct path_info *pi; 201 + int i; 202 + 203 + cpu = get_cpu(); 204 + 205 + pi = s->path_map[cpu]; 206 + if (pi && !pi->failed) 207 + goto done; 208 + 209 + /* 210 + * Perf is not optimal, but we at least try the local node then just 211 + * try not to fail. 212 + */ 213 + if (!pi) 214 + atomic_inc(&s->map_misses); 215 + 216 + node = cpu_to_node(cpu); 217 + cpumask = cpumask_of_node(node); 218 + for_each_cpu(i, cpumask) { 219 + pi = s->path_map[i]; 220 + if (pi && !pi->failed) 221 + goto done; 222 + } 223 + 224 + for_each_cpu(i, s->path_mask) { 225 + pi = s->path_map[i]; 226 + if (pi && !pi->failed) 227 + goto done; 228 + } 229 + pi = NULL; 230 + 231 + done: 232 + put_cpu(); 233 + return pi ? pi->path : NULL; 234 + } 235 + 236 + static struct path_selector_type ioa_ps = { 237 + .name = "io-affinity", 238 + .module = THIS_MODULE, 239 + .table_args = 1, 240 + .info_args = 1, 241 + .create = ioa_create, 242 + .destroy = ioa_destroy, 243 + .status = ioa_status, 244 + .add_path = ioa_add_path, 245 + .fail_path = ioa_fail_path, 246 + .reinstate_path = ioa_reinstate_path, 247 + .select_path = ioa_select_path, 248 + }; 249 + 250 + static int __init dm_ioa_init(void) 251 + { 252 + int ret = dm_register_path_selector(&ioa_ps); 253 + 254 + if (ret < 0) 255 + DMERR("register failed %d", ret); 256 + return ret; 257 + } 258 + 259 + static void __exit dm_ioa_exit(void) 260 + { 261 + int ret = dm_unregister_path_selector(&ioa_ps); 262 + 263 + if (ret < 0) 264 + DMERR("unregister failed %d", ret); 265 + } 266 + 267 + module_init(dm_ioa_init); 268 + module_exit(dm_ioa_exit); 269 + 270 + MODULE_DESCRIPTION(DM_NAME " multipath path selector that selects paths based on the CPU IO is being executed on"); 271 + MODULE_AUTHOR("Mike Christie <michael.christie@oracle.com>"); 272 + MODULE_LICENSE("GPL");
drivers/md/dm-queue-length.c drivers/md/dm-ps-queue-length.c
drivers/md/dm-round-robin.c drivers/md/dm-ps-round-robin.c
drivers/md/dm-service-time.c drivers/md/dm-ps-service-time.c
+1 -1
drivers/md/dm-stripe.c
··· 496 496 static struct target_type stripe_target = { 497 497 .name = "striped", 498 498 .version = {1, 6, 0}, 499 - .features = DM_TARGET_PASSES_INTEGRITY, 499 + .features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_NOWAIT, 500 500 .module = THIS_MODULE, 501 501 .ctr = stripe_ctr, 502 502 .dtr = stripe_dtr,
+1
drivers/md/dm-switch.c
··· 550 550 static struct target_type switch_target = { 551 551 .name = "switch", 552 552 .version = {1, 1, 0}, 553 + .features = DM_TARGET_NOWAIT, 553 554 .module = THIS_MODULE, 554 555 .ctr = switch_ctr, 555 556 .dtr = switch_dtr,
+1
drivers/md/dm-unstripe.c
··· 178 178 static struct target_type unstripe_target = { 179 179 .name = "unstriped", 180 180 .version = {1, 1, 0}, 181 + .features = DM_TARGET_NOWAIT, 181 182 .module = THIS_MODULE, 182 183 .ctr = unstripe_ctr, 183 184 .dtr = unstripe_dtr,
+11 -1
drivers/md/dm-verity-target.c
··· 538 538 } 539 539 540 540 /* 541 + * Skip verity work in response to I/O error when system is shutting down. 542 + */ 543 + static inline bool verity_is_system_shutting_down(void) 544 + { 545 + return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF 546 + || system_state == SYSTEM_RESTART; 547 + } 548 + 549 + /* 541 550 * End one "io" structure with a given error. 542 551 */ 543 552 static void verity_finish_io(struct dm_verity_io *io, blk_status_t status) ··· 573 564 { 574 565 struct dm_verity_io *io = bio->bi_private; 575 566 576 - if (bio->bi_status && !verity_fec_is_enabled(io->v)) { 567 + if (bio->bi_status && 568 + (!verity_fec_is_enabled(io->v) || verity_is_system_shutting_down())) { 577 569 verity_finish_io(io, bio->bi_status); 578 570 return; 579 571 }
+7 -2
drivers/md/dm-verity-verify-sig.c
··· 119 119 } 120 120 121 121 ret = verify_pkcs7_signature(root_hash, root_hash_len, sig_data, 122 - sig_len, NULL, VERIFYING_UNSPECIFIED_SIGNATURE, 123 - NULL, NULL); 122 + sig_len, 123 + #ifdef CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG_SECONDARY_KEYRING 124 + VERIFY_USE_SECONDARY_KEYRING, 125 + #else 126 + NULL, 127 + #endif 128 + VERIFYING_UNSPECIFIED_SIGNATURE, NULL, NULL); 124 129 125 130 return ret; 126 131 }
+1
drivers/md/dm-zero.c
··· 59 59 static struct target_type zero_target = { 60 60 .name = "zero", 61 61 .version = {1, 1, 0}, 62 + .features = DM_TARGET_NOWAIT, 62 63 .module = THIS_MODULE, 63 64 .ctr = zero_ctr, 64 65 .map = zero_map,
+1 -1
drivers/md/dm.c
··· 1586 1586 ci.sector_count = bio_sectors(bio); 1587 1587 while (ci.sector_count && !error) { 1588 1588 error = __split_and_process_non_flush(&ci); 1589 - if (current->bio_list && ci.sector_count && !error) { 1589 + if (ci.sector_count && !error) { 1590 1590 /* 1591 1591 * Remainder must be passed to submit_bio_noacct() 1592 1592 * so that it gets handled *after* bios already submitted