Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

dm verity: add support for forward error correction

Add support for correcting corrupted blocks using Reed-Solomon.

This code uses RS(255, N) interleaved across data and hash
blocks. Each error-correcting block covers N bytes evenly
distributed across the combined total data, so that each byte is a
maximum distance away from the others. This makes it possible to
recover from several consecutive corrupted blocks with relatively
small space overhead.

In addition, using verity hashes to locate erasures nearly doubles
the effectiveness of error correction. Being able to detect
corrupted blocks also improves performance, because only corrupted
blocks need to corrected.

For a 2 GiB partition, RS(255, 253) (two parity bytes for each
253-byte block) can correct up to 16 MiB of consecutive corrupted
blocks if erasures can be located, and 8 MiB if they cannot, with
16 MiB space overhead.

Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>

authored by

Sami Tolvanen and committed by
Mike Snitzer
a739ff3f bb4d73ac

+1071 -9
+33 -2
Documentation/device-mapper/verity.txt
··· 18 18 19 19 0 is the original format used in the Chromium OS. 20 20 The salt is appended when hashing, digests are stored continuously and 21 - the rest of the block is padded with zeros. 21 + the rest of the block is padded with zeroes. 22 22 23 23 1 is the current format that should be used for new devices. 24 24 The salt is prepended when hashing and each digest is 25 - padded with zeros to the power of two. 25 + padded with zeroes to the power of two. 26 26 27 27 <dev> 28 28 This is the device containing data, the integrity of which needs to be ··· 79 79 not compatible with ignore_corruption and requires user space support to 80 80 avoid restart loops. 81 81 82 + use_fec_from_device <fec_dev> 83 + Use forward error correction (FEC) to recover from corruption if hash 84 + verification fails. Use encoding data from the specified device. This 85 + may be the same device where data and hash blocks reside, in which case 86 + fec_start must be outside data and hash areas. 87 + 88 + If the encoding data covers additional metadata, it must be accessible 89 + on the hash device after the hash blocks. 90 + 91 + Note: block sizes for data and hash devices must match. Also, if the 92 + verity <dev> is encrypted the <fec_dev> should be too. 93 + 94 + fec_roots <num> 95 + Number of generator roots. This equals to the number of parity bytes in 96 + the encoding data. For example, in RS(M, N) encoding, the number of roots 97 + is M-N. 98 + 99 + fec_blocks <num> 100 + The number of encoding data blocks on the FEC device. The block size for 101 + the FEC device is <data_block_size>. 102 + 103 + fec_start <offset> 104 + This is the offset, in <data_block_size> blocks, from the start of the 105 + FEC device to the beginning of the encoding data. 106 + 107 + 82 108 Theory of operation 83 109 =================== 84 110 ··· 123 97 per-block basis. This allows for a lightweight hash computation on first read 124 98 into the page cache. Block hashes are stored linearly, aligned to the nearest 125 99 block size. 100 + 101 + If forward error correction (FEC) support is enabled any recovery of 102 + corrupted data will be verified using the cryptographic hash of the 103 + corresponding data. This is why combining error correction with 104 + integrity checking is essential. 126 105 127 106 Hash Tree 128 107 ---------
+12
drivers/md/Kconfig
··· 467 467 468 468 If unsure, say N. 469 469 470 + config DM_VERITY_FEC 471 + bool "Verity forward error correction support" 472 + depends on DM_VERITY 473 + select REED_SOLOMON 474 + select REED_SOLOMON_DEC8 475 + ---help--- 476 + Add forward error correction support to dm-verity. This option 477 + makes it possible to use pre-generated error correction data to 478 + recover from corrupted blocks. 479 + 480 + If unsure, say N. 481 + 470 482 config DM_SWITCH 471 483 tristate "Switch target support (EXPERIMENTAL)" 472 484 depends on BLK_DEV_DM
+4
drivers/md/Makefile
··· 64 64 ifeq ($(CONFIG_DM_UEVENT),y) 65 65 dm-mod-objs += dm-uevent.o 66 66 endif 67 + 68 + ifeq ($(CONFIG_DM_VERITY_FEC),y) 69 + dm-verity-objs += dm-verity-fec.o 70 + endif
+812
drivers/md/dm-verity-fec.c
··· 1 + /* 2 + * Copyright (C) 2015 Google, Inc. 3 + * 4 + * Author: Sami Tolvanen <samitolvanen@google.com> 5 + * 6 + * This program is free software; you can redistribute it and/or modify it 7 + * under the terms of the GNU General Public License as published by the Free 8 + * Software Foundation; either version 2 of the License, or (at your option) 9 + * any later version. 10 + */ 11 + 12 + #include "dm-verity-fec.h" 13 + #include <linux/math64.h> 14 + 15 + #define DM_MSG_PREFIX "verity-fec" 16 + 17 + /* 18 + * If error correction has been configured, returns true. 19 + */ 20 + bool verity_fec_is_enabled(struct dm_verity *v) 21 + { 22 + return v->fec && v->fec->dev; 23 + } 24 + 25 + /* 26 + * Return a pointer to dm_verity_fec_io after dm_verity_io and its variable 27 + * length fields. 28 + */ 29 + static inline struct dm_verity_fec_io *fec_io(struct dm_verity_io *io) 30 + { 31 + return (struct dm_verity_fec_io *) verity_io_digest_end(io->v, io); 32 + } 33 + 34 + /* 35 + * Return an interleaved offset for a byte in RS block. 36 + */ 37 + static inline u64 fec_interleave(struct dm_verity *v, u64 offset) 38 + { 39 + u32 mod; 40 + 41 + mod = do_div(offset, v->fec->rsn); 42 + return offset + mod * (v->fec->rounds << v->data_dev_block_bits); 43 + } 44 + 45 + /* 46 + * Decode an RS block using Reed-Solomon. 47 + */ 48 + static int fec_decode_rs8(struct dm_verity *v, struct dm_verity_fec_io *fio, 49 + u8 *data, u8 *fec, int neras) 50 + { 51 + int i; 52 + uint16_t par[DM_VERITY_FEC_RSM - DM_VERITY_FEC_MIN_RSN]; 53 + 54 + for (i = 0; i < v->fec->roots; i++) 55 + par[i] = fec[i]; 56 + 57 + return decode_rs8(fio->rs, data, par, v->fec->rsn, NULL, neras, 58 + fio->erasures, 0, NULL); 59 + } 60 + 61 + /* 62 + * Read error-correcting codes for the requested RS block. Returns a pointer 63 + * to the data block. Caller is responsible for releasing buf. 64 + */ 65 + static u8 *fec_read_parity(struct dm_verity *v, u64 rsb, int index, 66 + unsigned *offset, struct dm_buffer **buf) 67 + { 68 + u64 position, block; 69 + u8 *res; 70 + 71 + position = (index + rsb) * v->fec->roots; 72 + block = position >> v->data_dev_block_bits; 73 + *offset = (unsigned)(position - (block << v->data_dev_block_bits)); 74 + 75 + res = dm_bufio_read(v->fec->bufio, v->fec->start + block, buf); 76 + if (unlikely(IS_ERR(res))) { 77 + DMERR("%s: FEC %llu: parity read failed (block %llu): %ld", 78 + v->data_dev->name, (unsigned long long)rsb, 79 + (unsigned long long)(v->fec->start + block), 80 + PTR_ERR(res)); 81 + *buf = NULL; 82 + } 83 + 84 + return res; 85 + } 86 + 87 + /* Loop over each preallocated buffer slot. */ 88 + #define fec_for_each_prealloc_buffer(__i) \ 89 + for (__i = 0; __i < DM_VERITY_FEC_BUF_PREALLOC; __i++) 90 + 91 + /* Loop over each extra buffer slot. */ 92 + #define fec_for_each_extra_buffer(io, __i) \ 93 + for (__i = DM_VERITY_FEC_BUF_PREALLOC; __i < DM_VERITY_FEC_BUF_MAX; __i++) 94 + 95 + /* Loop over each allocated buffer. */ 96 + #define fec_for_each_buffer(io, __i) \ 97 + for (__i = 0; __i < (io)->nbufs; __i++) 98 + 99 + /* Loop over each RS block in each allocated buffer. */ 100 + #define fec_for_each_buffer_rs_block(io, __i, __j) \ 101 + fec_for_each_buffer(io, __i) \ 102 + for (__j = 0; __j < 1 << DM_VERITY_FEC_BUF_RS_BITS; __j++) 103 + 104 + /* 105 + * Return a pointer to the current RS block when called inside 106 + * fec_for_each_buffer_rs_block. 107 + */ 108 + static inline u8 *fec_buffer_rs_block(struct dm_verity *v, 109 + struct dm_verity_fec_io *fio, 110 + unsigned i, unsigned j) 111 + { 112 + return &fio->bufs[i][j * v->fec->rsn]; 113 + } 114 + 115 + /* 116 + * Return an index to the current RS block when called inside 117 + * fec_for_each_buffer_rs_block. 118 + */ 119 + static inline unsigned fec_buffer_rs_index(unsigned i, unsigned j) 120 + { 121 + return (i << DM_VERITY_FEC_BUF_RS_BITS) + j; 122 + } 123 + 124 + /* 125 + * Decode all RS blocks from buffers and copy corrected bytes into fio->output 126 + * starting from block_offset. 127 + */ 128 + static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio, 129 + u64 rsb, int byte_index, unsigned block_offset, 130 + int neras) 131 + { 132 + int r, corrected = 0, res; 133 + struct dm_buffer *buf; 134 + unsigned n, i, offset; 135 + u8 *par, *block; 136 + 137 + par = fec_read_parity(v, rsb, block_offset, &offset, &buf); 138 + if (IS_ERR(par)) 139 + return PTR_ERR(par); 140 + 141 + /* 142 + * Decode the RS blocks we have in bufs. Each RS block results in 143 + * one corrected target byte and consumes fec->roots parity bytes. 144 + */ 145 + fec_for_each_buffer_rs_block(fio, n, i) { 146 + block = fec_buffer_rs_block(v, fio, n, i); 147 + res = fec_decode_rs8(v, fio, block, &par[offset], neras); 148 + if (res < 0) { 149 + dm_bufio_release(buf); 150 + 151 + r = res; 152 + goto error; 153 + } 154 + 155 + corrected += res; 156 + fio->output[block_offset] = block[byte_index]; 157 + 158 + block_offset++; 159 + if (block_offset >= 1 << v->data_dev_block_bits) 160 + goto done; 161 + 162 + /* read the next block when we run out of parity bytes */ 163 + offset += v->fec->roots; 164 + if (offset >= 1 << v->data_dev_block_bits) { 165 + dm_bufio_release(buf); 166 + 167 + par = fec_read_parity(v, rsb, block_offset, &offset, &buf); 168 + if (unlikely(IS_ERR(par))) 169 + return PTR_ERR(par); 170 + } 171 + } 172 + done: 173 + r = corrected; 174 + error: 175 + if (r < 0 && neras) 176 + DMERR_LIMIT("%s: FEC %llu: failed to correct: %d", 177 + v->data_dev->name, (unsigned long long)rsb, r); 178 + else if (r > 0) 179 + DMWARN_LIMIT("%s: FEC %llu: corrected %d errors", 180 + v->data_dev->name, (unsigned long long)rsb, r); 181 + 182 + return r; 183 + } 184 + 185 + /* 186 + * Locate data block erasures using verity hashes. 187 + */ 188 + static int fec_is_erasure(struct dm_verity *v, struct dm_verity_io *io, 189 + u8 *want_digest, u8 *data) 190 + { 191 + if (unlikely(verity_hash(v, verity_io_hash_desc(v, io), 192 + data, 1 << v->data_dev_block_bits, 193 + verity_io_real_digest(v, io)))) 194 + return 0; 195 + 196 + return memcmp(verity_io_real_digest(v, io), want_digest, 197 + v->digest_size) != 0; 198 + } 199 + 200 + /* 201 + * Read data blocks that are part of the RS block and deinterleave as much as 202 + * fits into buffers. Check for erasure locations if @neras is non-NULL. 203 + */ 204 + static int fec_read_bufs(struct dm_verity *v, struct dm_verity_io *io, 205 + u64 rsb, u64 target, unsigned block_offset, 206 + int *neras) 207 + { 208 + int i, j, target_index = -1; 209 + struct dm_buffer *buf; 210 + struct dm_bufio_client *bufio; 211 + struct dm_verity_fec_io *fio = fec_io(io); 212 + u64 block, ileaved; 213 + u8 *bbuf, *rs_block; 214 + u8 want_digest[v->digest_size]; 215 + unsigned n, k; 216 + 217 + if (neras) 218 + *neras = 0; 219 + 220 + /* 221 + * read each of the rsn data blocks that are part of the RS block, and 222 + * interleave contents to available bufs 223 + */ 224 + for (i = 0; i < v->fec->rsn; i++) { 225 + ileaved = fec_interleave(v, rsb * v->fec->rsn + i); 226 + 227 + /* 228 + * target is the data block we want to correct, target_index is 229 + * the index of this block within the rsn RS blocks 230 + */ 231 + if (ileaved == target) 232 + target_index = i; 233 + 234 + block = ileaved >> v->data_dev_block_bits; 235 + bufio = v->fec->data_bufio; 236 + 237 + if (block >= v->data_blocks) { 238 + block -= v->data_blocks; 239 + 240 + /* 241 + * blocks outside the area were assumed to contain 242 + * zeros when encoding data was generated 243 + */ 244 + if (unlikely(block >= v->fec->hash_blocks)) 245 + continue; 246 + 247 + block += v->hash_start; 248 + bufio = v->bufio; 249 + } 250 + 251 + bbuf = dm_bufio_read(bufio, block, &buf); 252 + if (unlikely(IS_ERR(bbuf))) { 253 + DMWARN_LIMIT("%s: FEC %llu: read failed (%llu): %ld", 254 + v->data_dev->name, 255 + (unsigned long long)rsb, 256 + (unsigned long long)block, PTR_ERR(bbuf)); 257 + 258 + /* assume the block is corrupted */ 259 + if (neras && *neras <= v->fec->roots) 260 + fio->erasures[(*neras)++] = i; 261 + 262 + continue; 263 + } 264 + 265 + /* locate erasures if the block is on the data device */ 266 + if (bufio == v->fec->data_bufio && 267 + verity_hash_for_block(v, io, block, want_digest) == 0) { 268 + /* 269 + * skip if we have already found the theoretical 270 + * maximum number (i.e. fec->roots) of erasures 271 + */ 272 + if (neras && *neras <= v->fec->roots && 273 + fec_is_erasure(v, io, want_digest, bbuf)) 274 + fio->erasures[(*neras)++] = i; 275 + } 276 + 277 + /* 278 + * deinterleave and copy the bytes that fit into bufs, 279 + * starting from block_offset 280 + */ 281 + fec_for_each_buffer_rs_block(fio, n, j) { 282 + k = fec_buffer_rs_index(n, j) + block_offset; 283 + 284 + if (k >= 1 << v->data_dev_block_bits) 285 + goto done; 286 + 287 + rs_block = fec_buffer_rs_block(v, fio, n, j); 288 + rs_block[i] = bbuf[k]; 289 + } 290 + done: 291 + dm_bufio_release(buf); 292 + } 293 + 294 + return target_index; 295 + } 296 + 297 + /* 298 + * Allocate RS control structure and FEC buffers from preallocated mempools, 299 + * and attempt to allocate as many extra buffers as available. 300 + */ 301 + static int fec_alloc_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio) 302 + { 303 + unsigned n; 304 + 305 + if (!fio->rs) { 306 + fio->rs = mempool_alloc(v->fec->rs_pool, 0); 307 + if (unlikely(!fio->rs)) { 308 + DMERR("failed to allocate RS"); 309 + return -ENOMEM; 310 + } 311 + } 312 + 313 + fec_for_each_prealloc_buffer(n) { 314 + if (fio->bufs[n]) 315 + continue; 316 + 317 + fio->bufs[n] = mempool_alloc(v->fec->prealloc_pool, GFP_NOIO); 318 + if (unlikely(!fio->bufs[n])) { 319 + DMERR("failed to allocate FEC buffer"); 320 + return -ENOMEM; 321 + } 322 + } 323 + 324 + /* try to allocate the maximum number of buffers */ 325 + fec_for_each_extra_buffer(fio, n) { 326 + if (fio->bufs[n]) 327 + continue; 328 + 329 + fio->bufs[n] = mempool_alloc(v->fec->extra_pool, GFP_NOIO); 330 + /* we can manage with even one buffer if necessary */ 331 + if (unlikely(!fio->bufs[n])) 332 + break; 333 + } 334 + fio->nbufs = n; 335 + 336 + if (!fio->output) { 337 + fio->output = mempool_alloc(v->fec->output_pool, GFP_NOIO); 338 + 339 + if (!fio->output) { 340 + DMERR("failed to allocate FEC page"); 341 + return -ENOMEM; 342 + } 343 + } 344 + 345 + return 0; 346 + } 347 + 348 + /* 349 + * Initialize buffers and clear erasures. fec_read_bufs() assumes buffers are 350 + * zeroed before deinterleaving. 351 + */ 352 + static void fec_init_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio) 353 + { 354 + unsigned n; 355 + 356 + fec_for_each_buffer(fio, n) 357 + memset(fio->bufs[n], 0, v->fec->rsn << DM_VERITY_FEC_BUF_RS_BITS); 358 + 359 + memset(fio->erasures, 0, sizeof(fio->erasures)); 360 + } 361 + 362 + /* 363 + * Decode all RS blocks in a single data block and return the target block 364 + * (indicated by @offset) in fio->output. If @use_erasures is non-zero, uses 365 + * hashes to locate erasures. 366 + */ 367 + static int fec_decode_rsb(struct dm_verity *v, struct dm_verity_io *io, 368 + struct dm_verity_fec_io *fio, u64 rsb, u64 offset, 369 + bool use_erasures) 370 + { 371 + int r, neras = 0; 372 + unsigned pos; 373 + 374 + r = fec_alloc_bufs(v, fio); 375 + if (unlikely(r < 0)) 376 + return r; 377 + 378 + for (pos = 0; pos < 1 << v->data_dev_block_bits; ) { 379 + fec_init_bufs(v, fio); 380 + 381 + r = fec_read_bufs(v, io, rsb, offset, pos, 382 + use_erasures ? &neras : NULL); 383 + if (unlikely(r < 0)) 384 + return r; 385 + 386 + r = fec_decode_bufs(v, fio, rsb, r, pos, neras); 387 + if (r < 0) 388 + return r; 389 + 390 + pos += fio->nbufs << DM_VERITY_FEC_BUF_RS_BITS; 391 + } 392 + 393 + /* Always re-validate the corrected block against the expected hash */ 394 + r = verity_hash(v, verity_io_hash_desc(v, io), fio->output, 395 + 1 << v->data_dev_block_bits, 396 + verity_io_real_digest(v, io)); 397 + if (unlikely(r < 0)) 398 + return r; 399 + 400 + if (memcmp(verity_io_real_digest(v, io), verity_io_want_digest(v, io), 401 + v->digest_size)) { 402 + DMERR_LIMIT("%s: FEC %llu: failed to correct (%d erasures)", 403 + v->data_dev->name, (unsigned long long)rsb, neras); 404 + return -EILSEQ; 405 + } 406 + 407 + return 0; 408 + } 409 + 410 + static int fec_bv_copy(struct dm_verity *v, struct dm_verity_io *io, u8 *data, 411 + size_t len) 412 + { 413 + struct dm_verity_fec_io *fio = fec_io(io); 414 + 415 + memcpy(data, &fio->output[fio->output_pos], len); 416 + fio->output_pos += len; 417 + 418 + return 0; 419 + } 420 + 421 + /* 422 + * Correct errors in a block. Copies corrected block to dest if non-NULL, 423 + * otherwise to a bio_vec starting from iter. 424 + */ 425 + int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io, 426 + enum verity_block_type type, sector_t block, u8 *dest, 427 + struct bvec_iter *iter) 428 + { 429 + int r; 430 + struct dm_verity_fec_io *fio = fec_io(io); 431 + u64 offset, res, rsb; 432 + 433 + if (!verity_fec_is_enabled(v)) 434 + return -EOPNOTSUPP; 435 + 436 + if (type == DM_VERITY_BLOCK_TYPE_METADATA) 437 + block += v->data_blocks; 438 + 439 + /* 440 + * For RS(M, N), the continuous FEC data is divided into blocks of N 441 + * bytes. Since block size may not be divisible by N, the last block 442 + * is zero padded when decoding. 443 + * 444 + * Each byte of the block is covered by a different RS(M, N) code, 445 + * and each code is interleaved over N blocks to make it less likely 446 + * that bursty corruption will leave us in unrecoverable state. 447 + */ 448 + 449 + offset = block << v->data_dev_block_bits; 450 + 451 + res = offset; 452 + div64_u64(res, v->fec->rounds << v->data_dev_block_bits); 453 + 454 + /* 455 + * The base RS block we can feed to the interleaver to find out all 456 + * blocks required for decoding. 457 + */ 458 + rsb = offset - res * (v->fec->rounds << v->data_dev_block_bits); 459 + 460 + /* 461 + * Locating erasures is slow, so attempt to recover the block without 462 + * them first. Do a second attempt with erasures if the corruption is 463 + * bad enough. 464 + */ 465 + r = fec_decode_rsb(v, io, fio, rsb, offset, false); 466 + if (r < 0) { 467 + r = fec_decode_rsb(v, io, fio, rsb, offset, true); 468 + if (r < 0) 469 + return r; 470 + } 471 + 472 + if (dest) 473 + memcpy(dest, fio->output, 1 << v->data_dev_block_bits); 474 + else if (iter) { 475 + fio->output_pos = 0; 476 + r = verity_for_bv_block(v, io, iter, fec_bv_copy); 477 + } 478 + 479 + return r; 480 + } 481 + 482 + /* 483 + * Clean up per-bio data. 484 + */ 485 + void verity_fec_finish_io(struct dm_verity_io *io) 486 + { 487 + unsigned n; 488 + struct dm_verity_fec *f = io->v->fec; 489 + struct dm_verity_fec_io *fio = fec_io(io); 490 + 491 + if (!verity_fec_is_enabled(io->v)) 492 + return; 493 + 494 + mempool_free(fio->rs, f->rs_pool); 495 + 496 + fec_for_each_prealloc_buffer(n) 497 + mempool_free(fio->bufs[n], f->prealloc_pool); 498 + 499 + fec_for_each_extra_buffer(fio, n) 500 + mempool_free(fio->bufs[n], f->extra_pool); 501 + 502 + mempool_free(fio->output, f->output_pool); 503 + } 504 + 505 + /* 506 + * Initialize per-bio data. 507 + */ 508 + void verity_fec_init_io(struct dm_verity_io *io) 509 + { 510 + struct dm_verity_fec_io *fio = fec_io(io); 511 + 512 + if (!verity_fec_is_enabled(io->v)) 513 + return; 514 + 515 + fio->rs = NULL; 516 + memset(fio->bufs, 0, sizeof(fio->bufs)); 517 + fio->nbufs = 0; 518 + fio->output = NULL; 519 + } 520 + 521 + /* 522 + * Append feature arguments and values to the status table. 523 + */ 524 + unsigned verity_fec_status_table(struct dm_verity *v, unsigned sz, 525 + char *result, unsigned maxlen) 526 + { 527 + if (!verity_fec_is_enabled(v)) 528 + return sz; 529 + 530 + DMEMIT(" " DM_VERITY_OPT_FEC_DEV " %s " 531 + DM_VERITY_OPT_FEC_BLOCKS " %llu " 532 + DM_VERITY_OPT_FEC_START " %llu " 533 + DM_VERITY_OPT_FEC_ROOTS " %d", 534 + v->fec->dev->name, 535 + (unsigned long long)v->fec->blocks, 536 + (unsigned long long)v->fec->start, 537 + v->fec->roots); 538 + 539 + return sz; 540 + } 541 + 542 + void verity_fec_dtr(struct dm_verity *v) 543 + { 544 + struct dm_verity_fec *f = v->fec; 545 + 546 + if (!verity_fec_is_enabled(v)) 547 + goto out; 548 + 549 + mempool_destroy(f->rs_pool); 550 + mempool_destroy(f->prealloc_pool); 551 + mempool_destroy(f->extra_pool); 552 + kmem_cache_destroy(f->cache); 553 + 554 + if (f->data_bufio) 555 + dm_bufio_client_destroy(f->data_bufio); 556 + if (f->bufio) 557 + dm_bufio_client_destroy(f->bufio); 558 + 559 + if (f->dev) 560 + dm_put_device(v->ti, f->dev); 561 + out: 562 + kfree(f); 563 + v->fec = NULL; 564 + } 565 + 566 + static void *fec_rs_alloc(gfp_t gfp_mask, void *pool_data) 567 + { 568 + struct dm_verity *v = (struct dm_verity *)pool_data; 569 + 570 + return init_rs(8, 0x11d, 0, 1, v->fec->roots); 571 + } 572 + 573 + static void fec_rs_free(void *element, void *pool_data) 574 + { 575 + struct rs_control *rs = (struct rs_control *)element; 576 + 577 + if (rs) 578 + free_rs(rs); 579 + } 580 + 581 + bool verity_is_fec_opt_arg(const char *arg_name) 582 + { 583 + return (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_DEV) || 584 + !strcasecmp(arg_name, DM_VERITY_OPT_FEC_BLOCKS) || 585 + !strcasecmp(arg_name, DM_VERITY_OPT_FEC_START) || 586 + !strcasecmp(arg_name, DM_VERITY_OPT_FEC_ROOTS)); 587 + } 588 + 589 + int verity_fec_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v, 590 + unsigned *argc, const char *arg_name) 591 + { 592 + int r; 593 + struct dm_target *ti = v->ti; 594 + const char *arg_value; 595 + unsigned long long num_ll; 596 + unsigned char num_c; 597 + char dummy; 598 + 599 + if (!*argc) { 600 + ti->error = "FEC feature arguments require a value"; 601 + return -EINVAL; 602 + } 603 + 604 + arg_value = dm_shift_arg(as); 605 + (*argc)--; 606 + 607 + if (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_DEV)) { 608 + r = dm_get_device(ti, arg_value, FMODE_READ, &v->fec->dev); 609 + if (r) { 610 + ti->error = "FEC device lookup failed"; 611 + return r; 612 + } 613 + 614 + } else if (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_BLOCKS)) { 615 + if (sscanf(arg_value, "%llu%c", &num_ll, &dummy) != 1 || 616 + ((sector_t)(num_ll << (v->data_dev_block_bits - SECTOR_SHIFT)) 617 + >> (v->data_dev_block_bits - SECTOR_SHIFT) != num_ll)) { 618 + ti->error = "Invalid " DM_VERITY_OPT_FEC_BLOCKS; 619 + return -EINVAL; 620 + } 621 + v->fec->blocks = num_ll; 622 + 623 + } else if (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_START)) { 624 + if (sscanf(arg_value, "%llu%c", &num_ll, &dummy) != 1 || 625 + ((sector_t)(num_ll << (v->data_dev_block_bits - SECTOR_SHIFT)) >> 626 + (v->data_dev_block_bits - SECTOR_SHIFT) != num_ll)) { 627 + ti->error = "Invalid " DM_VERITY_OPT_FEC_START; 628 + return -EINVAL; 629 + } 630 + v->fec->start = num_ll; 631 + 632 + } else if (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_ROOTS)) { 633 + if (sscanf(arg_value, "%hhu%c", &num_c, &dummy) != 1 || !num_c || 634 + num_c < (DM_VERITY_FEC_RSM - DM_VERITY_FEC_MAX_RSN) || 635 + num_c > (DM_VERITY_FEC_RSM - DM_VERITY_FEC_MIN_RSN)) { 636 + ti->error = "Invalid " DM_VERITY_OPT_FEC_ROOTS; 637 + return -EINVAL; 638 + } 639 + v->fec->roots = num_c; 640 + 641 + } else { 642 + ti->error = "Unrecognized verity FEC feature request"; 643 + return -EINVAL; 644 + } 645 + 646 + return 0; 647 + } 648 + 649 + /* 650 + * Allocate dm_verity_fec for v->fec. Must be called before verity_fec_ctr. 651 + */ 652 + int verity_fec_ctr_alloc(struct dm_verity *v) 653 + { 654 + struct dm_verity_fec *f; 655 + 656 + f = kzalloc(sizeof(struct dm_verity_fec), GFP_KERNEL); 657 + if (!f) { 658 + v->ti->error = "Cannot allocate FEC structure"; 659 + return -ENOMEM; 660 + } 661 + v->fec = f; 662 + 663 + return 0; 664 + } 665 + 666 + /* 667 + * Validate arguments and preallocate memory. Must be called after arguments 668 + * have been parsed using verity_fec_parse_opt_args. 669 + */ 670 + int verity_fec_ctr(struct dm_verity *v) 671 + { 672 + struct dm_verity_fec *f = v->fec; 673 + struct dm_target *ti = v->ti; 674 + u64 hash_blocks; 675 + 676 + if (!verity_fec_is_enabled(v)) { 677 + verity_fec_dtr(v); 678 + return 0; 679 + } 680 + 681 + /* 682 + * FEC is computed over data blocks, possible metadata, and 683 + * hash blocks. In other words, FEC covers total of fec_blocks 684 + * blocks consisting of the following: 685 + * 686 + * data blocks | hash blocks | metadata (optional) 687 + * 688 + * We allow metadata after hash blocks to support a use case 689 + * where all data is stored on the same device and FEC covers 690 + * the entire area. 691 + * 692 + * If metadata is included, we require it to be available on the 693 + * hash device after the hash blocks. 694 + */ 695 + 696 + hash_blocks = v->hash_blocks - v->hash_start; 697 + 698 + /* 699 + * Require matching block sizes for data and hash devices for 700 + * simplicity. 701 + */ 702 + if (v->data_dev_block_bits != v->hash_dev_block_bits) { 703 + ti->error = "Block sizes must match to use FEC"; 704 + return -EINVAL; 705 + } 706 + 707 + if (!f->roots) { 708 + ti->error = "Missing " DM_VERITY_OPT_FEC_ROOTS; 709 + return -EINVAL; 710 + } 711 + f->rsn = DM_VERITY_FEC_RSM - f->roots; 712 + 713 + if (!f->blocks) { 714 + ti->error = "Missing " DM_VERITY_OPT_FEC_BLOCKS; 715 + return -EINVAL; 716 + } 717 + 718 + f->rounds = f->blocks; 719 + if (sector_div(f->rounds, f->rsn)) 720 + f->rounds++; 721 + 722 + /* 723 + * Due to optional metadata, f->blocks can be larger than 724 + * data_blocks and hash_blocks combined. 725 + */ 726 + if (f->blocks < v->data_blocks + hash_blocks || !f->rounds) { 727 + ti->error = "Invalid " DM_VERITY_OPT_FEC_BLOCKS; 728 + return -EINVAL; 729 + } 730 + 731 + /* 732 + * Metadata is accessed through the hash device, so we require 733 + * it to be large enough. 734 + */ 735 + f->hash_blocks = f->blocks - v->data_blocks; 736 + if (dm_bufio_get_device_size(v->bufio) < f->hash_blocks) { 737 + ti->error = "Hash device is too small for " 738 + DM_VERITY_OPT_FEC_BLOCKS; 739 + return -E2BIG; 740 + } 741 + 742 + f->bufio = dm_bufio_client_create(f->dev->bdev, 743 + 1 << v->data_dev_block_bits, 744 + 1, 0, NULL, NULL); 745 + if (IS_ERR(f->bufio)) { 746 + ti->error = "Cannot initialize FEC bufio client"; 747 + return PTR_ERR(f->bufio); 748 + } 749 + 750 + if (dm_bufio_get_device_size(f->bufio) < 751 + ((f->start + f->rounds * f->roots) >> v->data_dev_block_bits)) { 752 + ti->error = "FEC device is too small"; 753 + return -E2BIG; 754 + } 755 + 756 + f->data_bufio = dm_bufio_client_create(v->data_dev->bdev, 757 + 1 << v->data_dev_block_bits, 758 + 1, 0, NULL, NULL); 759 + if (IS_ERR(f->data_bufio)) { 760 + ti->error = "Cannot initialize FEC data bufio client"; 761 + return PTR_ERR(f->data_bufio); 762 + } 763 + 764 + if (dm_bufio_get_device_size(f->data_bufio) < v->data_blocks) { 765 + ti->error = "Data device is too small"; 766 + return -E2BIG; 767 + } 768 + 769 + /* Preallocate an rs_control structure for each worker thread */ 770 + f->rs_pool = mempool_create(num_online_cpus(), fec_rs_alloc, 771 + fec_rs_free, (void *) v); 772 + if (!f->rs_pool) { 773 + ti->error = "Cannot allocate RS pool"; 774 + return -ENOMEM; 775 + } 776 + 777 + f->cache = kmem_cache_create("dm_verity_fec_buffers", 778 + f->rsn << DM_VERITY_FEC_BUF_RS_BITS, 779 + 0, 0, NULL); 780 + if (!f->cache) { 781 + ti->error = "Cannot create FEC buffer cache"; 782 + return -ENOMEM; 783 + } 784 + 785 + /* Preallocate DM_VERITY_FEC_BUF_PREALLOC buffers for each thread */ 786 + f->prealloc_pool = mempool_create_slab_pool(num_online_cpus() * 787 + DM_VERITY_FEC_BUF_PREALLOC, 788 + f->cache); 789 + if (!f->prealloc_pool) { 790 + ti->error = "Cannot allocate FEC buffer prealloc pool"; 791 + return -ENOMEM; 792 + } 793 + 794 + f->extra_pool = mempool_create_slab_pool(0, f->cache); 795 + if (!f->extra_pool) { 796 + ti->error = "Cannot allocate FEC buffer extra pool"; 797 + return -ENOMEM; 798 + } 799 + 800 + /* Preallocate an output buffer for each thread */ 801 + f->output_pool = mempool_create_kmalloc_pool(num_online_cpus(), 802 + 1 << v->data_dev_block_bits); 803 + if (!f->output_pool) { 804 + ti->error = "Cannot allocate FEC output pool"; 805 + return -ENOMEM; 806 + } 807 + 808 + /* Reserve space for our per-bio data */ 809 + ti->per_bio_data_size += sizeof(struct dm_verity_fec_io); 810 + 811 + return 0; 812 + }
+152
drivers/md/dm-verity-fec.h
··· 1 + /* 2 + * Copyright (C) 2015 Google, Inc. 3 + * 4 + * Author: Sami Tolvanen <samitolvanen@google.com> 5 + * 6 + * This program is free software; you can redistribute it and/or modify it 7 + * under the terms of the GNU General Public License as published by the Free 8 + * Software Foundation; either version 2 of the License, or (at your option) 9 + * any later version. 10 + */ 11 + 12 + #ifndef DM_VERITY_FEC_H 13 + #define DM_VERITY_FEC_H 14 + 15 + #include "dm-verity.h" 16 + #include <linux/rslib.h> 17 + 18 + /* Reed-Solomon(M, N) parameters */ 19 + #define DM_VERITY_FEC_RSM 255 20 + #define DM_VERITY_FEC_MAX_RSN 253 21 + #define DM_VERITY_FEC_MIN_RSN 231 /* ~10% space overhead */ 22 + 23 + /* buffers for deinterleaving and decoding */ 24 + #define DM_VERITY_FEC_BUF_PREALLOC 1 /* buffers to preallocate */ 25 + #define DM_VERITY_FEC_BUF_RS_BITS 4 /* 1 << RS blocks per buffer */ 26 + /* we need buffers for at most 1 << block size RS blocks */ 27 + #define DM_VERITY_FEC_BUF_MAX \ 28 + (1 << (PAGE_SHIFT - DM_VERITY_FEC_BUF_RS_BITS)) 29 + 30 + #define DM_VERITY_OPT_FEC_DEV "use_fec_from_device" 31 + #define DM_VERITY_OPT_FEC_BLOCKS "fec_blocks" 32 + #define DM_VERITY_OPT_FEC_START "fec_start" 33 + #define DM_VERITY_OPT_FEC_ROOTS "fec_roots" 34 + 35 + /* configuration */ 36 + struct dm_verity_fec { 37 + struct dm_dev *dev; /* parity data device */ 38 + struct dm_bufio_client *data_bufio; /* for data dev access */ 39 + struct dm_bufio_client *bufio; /* for parity data access */ 40 + sector_t start; /* parity data start in blocks */ 41 + sector_t blocks; /* number of blocks covered */ 42 + sector_t rounds; /* number of interleaving rounds */ 43 + sector_t hash_blocks; /* blocks covered after v->hash_start */ 44 + unsigned char roots; /* number of parity bytes, M-N of RS(M, N) */ 45 + unsigned char rsn; /* N of RS(M, N) */ 46 + mempool_t *rs_pool; /* mempool for fio->rs */ 47 + mempool_t *prealloc_pool; /* mempool for preallocated buffers */ 48 + mempool_t *extra_pool; /* mempool for extra buffers */ 49 + mempool_t *output_pool; /* mempool for output */ 50 + struct kmem_cache *cache; /* cache for buffers */ 51 + }; 52 + 53 + /* per-bio data */ 54 + struct dm_verity_fec_io { 55 + struct rs_control *rs; /* Reed-Solomon state */ 56 + int erasures[DM_VERITY_FEC_MAX_RSN]; /* erasures for decode_rs8 */ 57 + u8 *bufs[DM_VERITY_FEC_BUF_MAX]; /* bufs for deinterleaving */ 58 + unsigned nbufs; /* number of buffers allocated */ 59 + u8 *output; /* buffer for corrected output */ 60 + size_t output_pos; 61 + }; 62 + 63 + #ifdef CONFIG_DM_VERITY_FEC 64 + 65 + /* each feature parameter requires a value */ 66 + #define DM_VERITY_OPTS_FEC 8 67 + 68 + extern bool verity_fec_is_enabled(struct dm_verity *v); 69 + 70 + extern int verity_fec_decode(struct dm_verity *v, struct dm_verity_io *io, 71 + enum verity_block_type type, sector_t block, 72 + u8 *dest, struct bvec_iter *iter); 73 + 74 + extern unsigned verity_fec_status_table(struct dm_verity *v, unsigned sz, 75 + char *result, unsigned maxlen); 76 + 77 + extern void verity_fec_finish_io(struct dm_verity_io *io); 78 + extern void verity_fec_init_io(struct dm_verity_io *io); 79 + 80 + extern bool verity_is_fec_opt_arg(const char *arg_name); 81 + extern int verity_fec_parse_opt_args(struct dm_arg_set *as, 82 + struct dm_verity *v, unsigned *argc, 83 + const char *arg_name); 84 + 85 + extern void verity_fec_dtr(struct dm_verity *v); 86 + 87 + extern int verity_fec_ctr_alloc(struct dm_verity *v); 88 + extern int verity_fec_ctr(struct dm_verity *v); 89 + 90 + #else /* !CONFIG_DM_VERITY_FEC */ 91 + 92 + #define DM_VERITY_OPTS_FEC 0 93 + 94 + static inline bool verity_fec_is_enabled(struct dm_verity *v) 95 + { 96 + return false; 97 + } 98 + 99 + static inline int verity_fec_decode(struct dm_verity *v, 100 + struct dm_verity_io *io, 101 + enum verity_block_type type, 102 + sector_t block, u8 *dest, 103 + struct bvec_iter *iter) 104 + { 105 + return -EOPNOTSUPP; 106 + } 107 + 108 + static inline unsigned verity_fec_status_table(struct dm_verity *v, 109 + unsigned sz, char *result, 110 + unsigned maxlen) 111 + { 112 + return sz; 113 + } 114 + 115 + static inline void verity_fec_finish_io(struct dm_verity_io *io) 116 + { 117 + } 118 + 119 + static inline void verity_fec_init_io(struct dm_verity_io *io) 120 + { 121 + } 122 + 123 + static inline bool verity_is_fec_opt_arg(const char *arg_name) 124 + { 125 + return false; 126 + } 127 + 128 + static inline int verity_fec_parse_opt_args(struct dm_arg_set *as, 129 + struct dm_verity *v, 130 + unsigned *argc, 131 + const char *arg_name) 132 + { 133 + return -EINVAL; 134 + } 135 + 136 + static inline void verity_fec_dtr(struct dm_verity *v) 137 + { 138 + } 139 + 140 + static inline int verity_fec_ctr_alloc(struct dm_verity *v) 141 + { 142 + return 0; 143 + } 144 + 145 + static inline int verity_fec_ctr(struct dm_verity *v) 146 + { 147 + return 0; 148 + } 149 + 150 + #endif /* CONFIG_DM_VERITY_FEC */ 151 + 152 + #endif /* DM_VERITY_FEC_H */
+48 -7
drivers/md/dm-verity-target.c
··· 15 15 */ 16 16 17 17 #include "dm-verity.h" 18 + #include "dm-verity-fec.h" 18 19 19 20 #include <linux/module.h> 20 21 #include <linux/reboot.h> ··· 32 31 #define DM_VERITY_OPT_LOGGING "ignore_corruption" 33 32 #define DM_VERITY_OPT_RESTART "restart_on_corruption" 34 33 35 - #define DM_VERITY_OPTS_MAX 1 34 + #define DM_VERITY_OPTS_MAX (1 + DM_VERITY_OPTS_FEC) 36 35 37 36 static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE; 38 37 ··· 283 282 if (likely(memcmp(verity_io_real_digest(v, io), want_digest, 284 283 v->digest_size) == 0)) 285 284 aux->hash_verified = 1; 285 + else if (verity_fec_decode(v, io, 286 + DM_VERITY_BLOCK_TYPE_METADATA, 287 + hash_block, data, NULL) == 0) 288 + aux->hash_verified = 1; 286 289 else if (verity_handle_err(v, 287 290 DM_VERITY_BLOCK_TYPE_METADATA, 288 291 hash_block)) { ··· 416 411 if (likely(memcmp(verity_io_real_digest(v, io), 417 412 verity_io_want_digest(v, io), v->digest_size) == 0)) 418 413 continue; 414 + else if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA, 415 + io->block + b, NULL, &start) == 0) 416 + continue; 419 417 else if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA, 420 - io->block + b)) 418 + io->block + b)) 421 419 return -EIO; 422 420 } 423 421 ··· 438 430 bio->bi_end_io = io->orig_bi_end_io; 439 431 bio->bi_error = error; 440 432 433 + verity_fec_finish_io(io); 434 + 441 435 bio_endio(bio); 442 436 } 443 437 ··· 454 444 { 455 445 struct dm_verity_io *io = bio->bi_private; 456 446 457 - if (bio->bi_error) { 447 + if (bio->bi_error && !verity_fec_is_enabled(io->v)) { 458 448 verity_finish_io(io, bio->bi_error); 459 449 return; 460 450 } ··· 557 547 bio->bi_private = io; 558 548 io->iter = bio->bi_iter; 559 549 550 + verity_fec_init_io(io); 551 + 560 552 verity_submit_prefetch(v, io); 561 553 562 554 generic_make_request(bio); ··· 573 561 unsigned status_flags, char *result, unsigned maxlen) 574 562 { 575 563 struct dm_verity *v = ti->private; 564 + unsigned args = 0; 576 565 unsigned sz = 0; 577 566 unsigned x; 578 567 ··· 600 587 else 601 588 for (x = 0; x < v->salt_size; x++) 602 589 DMEMIT("%02x", v->salt[x]); 590 + if (v->mode != DM_VERITY_MODE_EIO) 591 + args++; 592 + if (verity_fec_is_enabled(v)) 593 + args += DM_VERITY_OPTS_FEC; 594 + if (!args) 595 + return; 596 + DMEMIT(" %u", args); 603 597 if (v->mode != DM_VERITY_MODE_EIO) { 604 - DMEMIT(" 1 "); 598 + DMEMIT(" "); 605 599 switch (v->mode) { 606 600 case DM_VERITY_MODE_LOGGING: 607 601 DMEMIT(DM_VERITY_OPT_LOGGING); ··· 620 600 BUG(); 621 601 } 622 602 } 603 + sz = verity_fec_status_table(v, sz, result, maxlen); 623 604 break; 624 605 } 625 606 } ··· 683 662 if (v->data_dev) 684 663 dm_put_device(ti, v->data_dev); 685 664 665 + verity_fec_dtr(v); 666 + 686 667 kfree(v); 687 668 } 688 669 ··· 716 693 717 694 } else if (!strcasecmp(arg_name, DM_VERITY_OPT_RESTART)) { 718 695 v->mode = DM_VERITY_MODE_RESTART; 696 + continue; 697 + 698 + } else if (verity_is_fec_opt_arg(arg_name)) { 699 + r = verity_fec_parse_opt_args(as, v, &argc, arg_name); 700 + if (r) 701 + return r; 719 702 continue; 720 703 } 721 704 ··· 764 735 } 765 736 ti->private = v; 766 737 v->ti = ti; 738 + 739 + r = verity_fec_ctr_alloc(v); 740 + if (r) 741 + goto bad; 767 742 768 743 if ((dm_table_get_mode(ti->table) & ~FMODE_READ)) { 769 744 ti->error = "Device must be readonly"; ··· 957 924 goto bad; 958 925 } 959 926 960 - ti->per_bio_data_size = roundup(sizeof(struct dm_verity_io) + v->shash_descsize + v->digest_size * 2, __alignof__(struct dm_verity_io)); 961 - 962 927 /* WQ_UNBOUND greatly improves performance when running on ramdisk */ 963 928 v->verify_wq = alloc_workqueue("kverityd", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND, num_online_cpus()); 964 929 if (!v->verify_wq) { ··· 964 933 r = -ENOMEM; 965 934 goto bad; 966 935 } 936 + 937 + ti->per_bio_data_size = sizeof(struct dm_verity_io) + 938 + v->shash_descsize + v->digest_size * 2; 939 + 940 + r = verity_fec_ctr(v); 941 + if (r) 942 + goto bad; 943 + 944 + ti->per_bio_data_size = roundup(ti->per_bio_data_size, 945 + __alignof__(struct dm_verity_io)); 967 946 968 947 return 0; 969 948 ··· 985 944 986 945 static struct target_type verity_target = { 987 946 .name = "verity", 988 - .version = {1, 2, 0}, 947 + .version = {1, 3, 0}, 989 948 .module = THIS_MODULE, 990 949 .ctr = verity_ctr, 991 950 .dtr = verity_dtr,
+10
drivers/md/dm-verity.h
··· 29 29 DM_VERITY_BLOCK_TYPE_METADATA 30 30 }; 31 31 32 + struct dm_verity_fec; 33 + 32 34 struct dm_verity { 33 35 struct dm_dev *data_dev; 34 36 struct dm_dev *hash_dev; ··· 60 58 61 59 /* starting blocks for each tree level. 0 is the lowest level. */ 62 60 sector_t hash_level_block[DM_VERITY_MAX_LEVELS]; 61 + 62 + struct dm_verity_fec *fec; /* forward error correction */ 63 63 }; 64 64 65 65 struct dm_verity_io { ··· 105 101 struct dm_verity_io *io) 106 102 { 107 103 return (u8 *)(io + 1) + v->shash_descsize + v->digest_size; 104 + } 105 + 106 + static inline u8 *verity_io_digest_end(struct dm_verity *v, 107 + struct dm_verity_io *io) 108 + { 109 + return verity_io_want_digest(v, io) + v->digest_size; 108 110 } 109 111 110 112 extern int verity_for_bv_block(struct dm_verity *v, struct dm_verity_io *io,