commit 238c6d54830c624f34ac9cf123ac04aebfca5013

+3 -2

drivers/md/Makefile

··· 3 # 4 5 dm-mod-objs := dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \ 6 - dm-ioctl.o dm-io.o dm-kcopyd.o 7 dm-multipath-objs := dm-path-selector.o dm-mpath.o 8 - dm-snapshot-objs := dm-snap.o dm-exception-store.o 9 dm-mirror-objs := dm-raid1.o 10 md-mod-objs := md.o bitmap.o 11 raid456-objs := raid5.o raid6algos.o raid6recov.o raid6tables.o \

··· 3 # 4 5 dm-mod-objs := dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \ 6 + dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o 7 dm-multipath-objs := dm-path-selector.o dm-mpath.o 8 + dm-snapshot-objs := dm-snap.o dm-exception-store.o dm-snap-transient.o \ 9 + dm-snap-persistent.o 10 dm-mirror-objs := dm-raid1.o 11 md-mod-objs := md.o bitmap.o 12 raid456-objs := raid5.o raid6algos.o raid6recov.o raid6tables.o \

+1 -5

drivers/md/dm-crypt.c

··· 1322 1323 static void __exit dm_crypt_exit(void) 1324 { 1325 - int r = dm_unregister_target(&crypt_target); 1326 - 1327 - if (r < 0) 1328 - DMERR("unregister failed %d", r); 1329 - 1330 kmem_cache_destroy(_crypt_io_pool); 1331 } 1332

··· 1322 1323 static void __exit dm_crypt_exit(void) 1324 { 1325 + dm_unregister_target(&crypt_target); 1326 kmem_cache_destroy(_crypt_io_pool); 1327 } 1328

+1 -5

drivers/md/dm-delay.c

··· 364 365 static void __exit dm_delay_exit(void) 366 { 367 - int r = dm_unregister_target(&delay_target); 368 - 369 - if (r < 0) 370 - DMERR("unregister failed %d", r); 371 - 372 kmem_cache_destroy(delayed_cache); 373 destroy_workqueue(kdelayd_wq); 374 }

··· 364 365 static void __exit dm_delay_exit(void) 366 { 367 + dm_unregister_target(&delay_target); 368 kmem_cache_destroy(delayed_cache); 369 destroy_workqueue(kdelayd_wq); 370 }

+20 -731

drivers/md/dm-exception-store.c

··· 1 /* 2 - * dm-exception-store.c 3 - * 4 * Copyright (C) 2001-2002 Sistina Software (UK) Limited. 5 - * Copyright (C) 2006 Red Hat GmbH 6 * 7 * This file is released under the GPL. 8 */ 9 10 - #include "dm-snap.h" 11 12 #include <linux/mm.h> 13 #include <linux/pagemap.h> 14 #include <linux/vmalloc.h> 15 #include <linux/slab.h> 16 - #include <linux/dm-io.h> 17 - #include <linux/dm-kcopyd.h> 18 19 - #define DM_MSG_PREFIX "snapshots" 20 - #define DM_CHUNK_SIZE_DEFAULT_SECTORS 32 /* 16KB */ 21 22 - /*----------------------------------------------------------------- 23 - * Persistent snapshots, by persistent we mean that the snapshot 24 - * will survive a reboot. 25 - *---------------------------------------------------------------*/ 26 - 27 - /* 28 - * We need to store a record of which parts of the origin have 29 - * been copied to the snapshot device. The snapshot code 30 - * requires that we copy exception chunks to chunk aligned areas 31 - * of the COW store. It makes sense therefore, to store the 32 - * metadata in chunk size blocks. 33 - * 34 - * There is no backward or forward compatibility implemented, 35 - * snapshots with different disk versions than the kernel will 36 - * not be usable. It is expected that "lvcreate" will blank out 37 - * the start of a fresh COW device before calling the snapshot 38 - * constructor. 39 - * 40 - * The first chunk of the COW device just contains the header. 41 - * After this there is a chunk filled with exception metadata, 42 - * followed by as many exception chunks as can fit in the 43 - * metadata areas. 44 - * 45 - * All on disk structures are in little-endian format. The end 46 - * of the exceptions info is indicated by an exception with a 47 - * new_chunk of 0, which is invalid since it would point to the 48 - * header chunk. 49 - */ 50 - 51 - /* 52 - * Magic for persistent snapshots: "SnAp" - Feeble isn't it. 53 - */ 54 - #define SNAP_MAGIC 0x70416e53 55 - 56 - /* 57 - * The on-disk version of the metadata. 58 - */ 59 - #define SNAPSHOT_DISK_VERSION 1 60 - 61 - struct disk_header { 62 - uint32_t magic; 63 - 64 - /* 65 - * Is this snapshot valid. There is no way of recovering 66 - * an invalid snapshot. 67 - */ 68 - uint32_t valid; 69 - 70 - /* 71 - * Simple, incrementing version. no backward 72 - * compatibility. 73 - */ 74 - uint32_t version; 75 - 76 - /* In sectors */ 77 - uint32_t chunk_size; 78 - }; 79 - 80 - struct disk_exception { 81 - uint64_t old_chunk; 82 - uint64_t new_chunk; 83 - }; 84 - 85 - struct commit_callback { 86 - void (*callback)(void *, int success); 87 - void *context; 88 - }; 89 - 90 - /* 91 - * The top level structure for a persistent exception store. 92 - */ 93 - struct pstore { 94 - struct dm_snapshot *snap; /* up pointer to my snapshot */ 95 - int version; 96 - int valid; 97 - uint32_t exceptions_per_area; 98 - 99 - /* 100 - * Now that we have an asynchronous kcopyd there is no 101 - * need for large chunk sizes, so it wont hurt to have a 102 - * whole chunks worth of metadata in memory at once. 103 - */ 104 - void *area; 105 - 106 - /* 107 - * An area of zeros used to clear the next area. 108 - */ 109 - void *zero_area; 110 - 111 - /* 112 - * Used to keep track of which metadata area the data in 113 - * 'chunk' refers to. 114 - */ 115 - chunk_t current_area; 116 - 117 - /* 118 - * The next free chunk for an exception. 119 - */ 120 - chunk_t next_free; 121 - 122 - /* 123 - * The index of next free exception in the current 124 - * metadata area. 125 - */ 126 - uint32_t current_committed; 127 - 128 - atomic_t pending_count; 129 - uint32_t callback_count; 130 - struct commit_callback *callbacks; 131 - struct dm_io_client *io_client; 132 - 133 - struct workqueue_struct *metadata_wq; 134 - }; 135 - 136 - static unsigned sectors_to_pages(unsigned sectors) 137 - { 138 - return DIV_ROUND_UP(sectors, PAGE_SIZE >> 9); 139 - } 140 - 141 - static int alloc_area(struct pstore *ps) 142 - { 143 - int r = -ENOMEM; 144 - size_t len; 145 - 146 - len = ps->snap->chunk_size << SECTOR_SHIFT; 147 - 148 - /* 149 - * Allocate the chunk_size block of memory that will hold 150 - * a single metadata area. 151 - */ 152 - ps->area = vmalloc(len); 153 - if (!ps->area) 154 - return r; 155 - 156 - ps->zero_area = vmalloc(len); 157 - if (!ps->zero_area) { 158 - vfree(ps->area); 159 - return r; 160 - } 161 - memset(ps->zero_area, 0, len); 162 - 163 - return 0; 164 - } 165 - 166 - static void free_area(struct pstore *ps) 167 - { 168 - vfree(ps->area); 169 - ps->area = NULL; 170 - vfree(ps->zero_area); 171 - ps->zero_area = NULL; 172 - } 173 - 174 - struct mdata_req { 175 - struct dm_io_region *where; 176 - struct dm_io_request *io_req; 177 - struct work_struct work; 178 - int result; 179 - }; 180 - 181 - static void do_metadata(struct work_struct *work) 182 - { 183 - struct mdata_req *req = container_of(work, struct mdata_req, work); 184 - 185 - req->result = dm_io(req->io_req, 1, req->where, NULL); 186 - } 187 - 188 - /* 189 - * Read or write a chunk aligned and sized block of data from a device. 190 - */ 191 - static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata) 192 - { 193 - struct dm_io_region where = { 194 - .bdev = ps->snap->cow->bdev, 195 - .sector = ps->snap->chunk_size * chunk, 196 - .count = ps->snap->chunk_size, 197 - }; 198 - struct dm_io_request io_req = { 199 - .bi_rw = rw, 200 - .mem.type = DM_IO_VMA, 201 - .mem.ptr.vma = ps->area, 202 - .client = ps->io_client, 203 - .notify.fn = NULL, 204 - }; 205 - struct mdata_req req; 206 - 207 - if (!metadata) 208 - return dm_io(&io_req, 1, &where, NULL); 209 - 210 - req.where = &where; 211 - req.io_req = &io_req; 212 - 213 - /* 214 - * Issue the synchronous I/O from a different thread 215 - * to avoid generic_make_request recursion. 216 - */ 217 - INIT_WORK(&req.work, do_metadata); 218 - queue_work(ps->metadata_wq, &req.work); 219 - flush_workqueue(ps->metadata_wq); 220 - 221 - return req.result; 222 - } 223 - 224 - /* 225 - * Convert a metadata area index to a chunk index. 226 - */ 227 - static chunk_t area_location(struct pstore *ps, chunk_t area) 228 - { 229 - return 1 + ((ps->exceptions_per_area + 1) * area); 230 - } 231 - 232 - /* 233 - * Read or write a metadata area. Remembering to skip the first 234 - * chunk which holds the header. 235 - */ 236 - static int area_io(struct pstore *ps, int rw) 237 { 238 int r; 239 - chunk_t chunk; 240 241 - chunk = area_location(ps, ps->current_area); 242 243 - r = chunk_io(ps, chunk, rw, 0); 244 - if (r) 245 - return r; 246 247 return 0; 248 - } 249 250 - static void zero_memory_area(struct pstore *ps) 251 - { 252 - memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT); 253 - } 254 - 255 - static int zero_disk_area(struct pstore *ps, chunk_t area) 256 - { 257 - struct dm_io_region where = { 258 - .bdev = ps->snap->cow->bdev, 259 - .sector = ps->snap->chunk_size * area_location(ps, area), 260 - .count = ps->snap->chunk_size, 261 - }; 262 - struct dm_io_request io_req = { 263 - .bi_rw = WRITE, 264 - .mem.type = DM_IO_VMA, 265 - .mem.ptr.vma = ps->zero_area, 266 - .client = ps->io_client, 267 - .notify.fn = NULL, 268 - }; 269 - 270 - return dm_io(&io_req, 1, &where, NULL); 271 - } 272 - 273 - static int read_header(struct pstore *ps, int *new_snapshot) 274 - { 275 - int r; 276 - struct disk_header *dh; 277 - chunk_t chunk_size; 278 - int chunk_size_supplied = 1; 279 - 280 - /* 281 - * Use default chunk size (or hardsect_size, if larger) if none supplied 282 - */ 283 - if (!ps->snap->chunk_size) { 284 - ps->snap->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS, 285 - bdev_hardsect_size(ps->snap->cow->bdev) >> 9); 286 - ps->snap->chunk_mask = ps->snap->chunk_size - 1; 287 - ps->snap->chunk_shift = ffs(ps->snap->chunk_size) - 1; 288 - chunk_size_supplied = 0; 289 - } 290 - 291 - ps->io_client = dm_io_client_create(sectors_to_pages(ps->snap-> 292 - chunk_size)); 293 - if (IS_ERR(ps->io_client)) 294 - return PTR_ERR(ps->io_client); 295 - 296 - r = alloc_area(ps); 297 - if (r) 298 - return r; 299 - 300 - r = chunk_io(ps, 0, READ, 1); 301 - if (r) 302 - goto bad; 303 - 304 - dh = (struct disk_header *) ps->area; 305 - 306 - if (le32_to_cpu(dh->magic) == 0) { 307 - *new_snapshot = 1; 308 - return 0; 309 - } 310 - 311 - if (le32_to_cpu(dh->magic) != SNAP_MAGIC) { 312 - DMWARN("Invalid or corrupt snapshot"); 313 - r = -ENXIO; 314 - goto bad; 315 - } 316 - 317 - *new_snapshot = 0; 318 - ps->valid = le32_to_cpu(dh->valid); 319 - ps->version = le32_to_cpu(dh->version); 320 - chunk_size = le32_to_cpu(dh->chunk_size); 321 - 322 - if (!chunk_size_supplied || ps->snap->chunk_size == chunk_size) 323 - return 0; 324 - 325 - DMWARN("chunk size %llu in device metadata overrides " 326 - "table chunk size of %llu.", 327 - (unsigned long long)chunk_size, 328 - (unsigned long long)ps->snap->chunk_size); 329 - 330 - /* We had a bogus chunk_size. Fix stuff up. */ 331 - free_area(ps); 332 - 333 - ps->snap->chunk_size = chunk_size; 334 - ps->snap->chunk_mask = chunk_size - 1; 335 - ps->snap->chunk_shift = ffs(chunk_size) - 1; 336 - 337 - r = dm_io_client_resize(sectors_to_pages(ps->snap->chunk_size), 338 - ps->io_client); 339 - if (r) 340 - return r; 341 - 342 - r = alloc_area(ps); 343 - return r; 344 - 345 - bad: 346 - free_area(ps); 347 return r; 348 } 349 350 - static int write_header(struct pstore *ps) 351 { 352 - struct disk_header *dh; 353 - 354 - memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT); 355 - 356 - dh = (struct disk_header *) ps->area; 357 - dh->magic = cpu_to_le32(SNAP_MAGIC); 358 - dh->valid = cpu_to_le32(ps->valid); 359 - dh->version = cpu_to_le32(ps->version); 360 - dh->chunk_size = cpu_to_le32(ps->snap->chunk_size); 361 - 362 - return chunk_io(ps, 0, WRITE, 1); 363 - } 364 - 365 - /* 366 - * Access functions for the disk exceptions, these do the endian conversions. 367 - */ 368 - static struct disk_exception *get_exception(struct pstore *ps, uint32_t index) 369 - { 370 - BUG_ON(index >= ps->exceptions_per_area); 371 - 372 - return ((struct disk_exception *) ps->area) + index; 373 - } 374 - 375 - static void read_exception(struct pstore *ps, 376 - uint32_t index, struct disk_exception *result) 377 - { 378 - struct disk_exception *e = get_exception(ps, index); 379 - 380 - /* copy it */ 381 - result->old_chunk = le64_to_cpu(e->old_chunk); 382 - result->new_chunk = le64_to_cpu(e->new_chunk); 383 - } 384 - 385 - static void write_exception(struct pstore *ps, 386 - uint32_t index, struct disk_exception *de) 387 - { 388 - struct disk_exception *e = get_exception(ps, index); 389 - 390 - /* copy it */ 391 - e->old_chunk = cpu_to_le64(de->old_chunk); 392 - e->new_chunk = cpu_to_le64(de->new_chunk); 393 - } 394 - 395 - /* 396 - * Registers the exceptions that are present in the current area. 397 - * 'full' is filled in to indicate if the area has been 398 - * filled. 399 - */ 400 - static int insert_exceptions(struct pstore *ps, int *full) 401 - { 402 - int r; 403 - unsigned int i; 404 - struct disk_exception de; 405 - 406 - /* presume the area is full */ 407 - *full = 1; 408 - 409 - for (i = 0; i < ps->exceptions_per_area; i++) { 410 - read_exception(ps, i, &de); 411 - 412 - /* 413 - * If the new_chunk is pointing at the start of 414 - * the COW device, where the first metadata area 415 - * is we know that we've hit the end of the 416 - * exceptions. Therefore the area is not full. 417 - */ 418 - if (de.new_chunk == 0LL) { 419 - ps->current_committed = i; 420 - *full = 0; 421 - break; 422 - } 423 - 424 - /* 425 - * Keep track of the start of the free chunks. 426 - */ 427 - if (ps->next_free <= de.new_chunk) 428 - ps->next_free = de.new_chunk + 1; 429 - 430 - /* 431 - * Otherwise we add the exception to the snapshot. 432 - */ 433 - r = dm_add_exception(ps->snap, de.old_chunk, de.new_chunk); 434 - if (r) 435 - return r; 436 - } 437 - 438 - return 0; 439 - } 440 - 441 - static int read_exceptions(struct pstore *ps) 442 - { 443 - int r, full = 1; 444 - 445 - /* 446 - * Keeping reading chunks and inserting exceptions until 447 - * we find a partially full area. 448 - */ 449 - for (ps->current_area = 0; full; ps->current_area++) { 450 - r = area_io(ps, READ); 451 - if (r) 452 - return r; 453 - 454 - r = insert_exceptions(ps, &full); 455 - if (r) 456 - return r; 457 - } 458 - 459 - ps->current_area--; 460 - 461 - return 0; 462 - } 463 - 464 - static struct pstore *get_info(struct exception_store *store) 465 - { 466 - return (struct pstore *) store->context; 467 - } 468 - 469 - static void persistent_fraction_full(struct exception_store *store, 470 - sector_t *numerator, sector_t *denominator) 471 - { 472 - *numerator = get_info(store)->next_free * store->snap->chunk_size; 473 - *denominator = get_dev_size(store->snap->cow->bdev); 474 - } 475 - 476 - static void persistent_destroy(struct exception_store *store) 477 - { 478 - struct pstore *ps = get_info(store); 479 - 480 - destroy_workqueue(ps->metadata_wq); 481 - dm_io_client_destroy(ps->io_client); 482 - vfree(ps->callbacks); 483 - free_area(ps); 484 - kfree(ps); 485 - } 486 - 487 - static int persistent_read_metadata(struct exception_store *store) 488 - { 489 - int r, uninitialized_var(new_snapshot); 490 - struct pstore *ps = get_info(store); 491 - 492 - /* 493 - * Read the snapshot header. 494 - */ 495 - r = read_header(ps, &new_snapshot); 496 - if (r) 497 - return r; 498 - 499 - /* 500 - * Now we know correct chunk_size, complete the initialisation. 501 - */ 502 - ps->exceptions_per_area = (ps->snap->chunk_size << SECTOR_SHIFT) / 503 - sizeof(struct disk_exception); 504 - ps->callbacks = dm_vcalloc(ps->exceptions_per_area, 505 - sizeof(*ps->callbacks)); 506 - if (!ps->callbacks) 507 - return -ENOMEM; 508 - 509 - /* 510 - * Do we need to setup a new snapshot ? 511 - */ 512 - if (new_snapshot) { 513 - r = write_header(ps); 514 - if (r) { 515 - DMWARN("write_header failed"); 516 - return r; 517 - } 518 - 519 - ps->current_area = 0; 520 - zero_memory_area(ps); 521 - r = zero_disk_area(ps, 0); 522 - if (r) { 523 - DMWARN("zero_disk_area(0) failed"); 524 - return r; 525 - } 526 - } else { 527 - /* 528 - * Sanity checks. 529 - */ 530 - if (ps->version != SNAPSHOT_DISK_VERSION) { 531 - DMWARN("unable to handle snapshot disk version %d", 532 - ps->version); 533 - return -EINVAL; 534 - } 535 - 536 - /* 537 - * Metadata are valid, but snapshot is invalidated 538 - */ 539 - if (!ps->valid) 540 - return 1; 541 - 542 - /* 543 - * Read the metadata. 544 - */ 545 - r = read_exceptions(ps); 546 - if (r) 547 - return r; 548 - } 549 - 550 - return 0; 551 - } 552 - 553 - static int persistent_prepare(struct exception_store *store, 554 - struct dm_snap_exception *e) 555 - { 556 - struct pstore *ps = get_info(store); 557 - uint32_t stride; 558 - chunk_t next_free; 559 - sector_t size = get_dev_size(store->snap->cow->bdev); 560 - 561 - /* Is there enough room ? */ 562 - if (size < ((ps->next_free + 1) * store->snap->chunk_size)) 563 - return -ENOSPC; 564 - 565 - e->new_chunk = ps->next_free; 566 - 567 - /* 568 - * Move onto the next free pending, making sure to take 569 - * into account the location of the metadata chunks. 570 - */ 571 - stride = (ps->exceptions_per_area + 1); 572 - next_free = ++ps->next_free; 573 - if (sector_div(next_free, stride) == 1) 574 - ps->next_free++; 575 - 576 - atomic_inc(&ps->pending_count); 577 - return 0; 578 - } 579 - 580 - static void persistent_commit(struct exception_store *store, 581 - struct dm_snap_exception *e, 582 - void (*callback) (void *, int success), 583 - void *callback_context) 584 - { 585 - unsigned int i; 586 - struct pstore *ps = get_info(store); 587 - struct disk_exception de; 588 - struct commit_callback *cb; 589 - 590 - de.old_chunk = e->old_chunk; 591 - de.new_chunk = e->new_chunk; 592 - write_exception(ps, ps->current_committed++, &de); 593 - 594 - /* 595 - * Add the callback to the back of the array. This code 596 - * is the only place where the callback array is 597 - * manipulated, and we know that it will never be called 598 - * multiple times concurrently. 599 - */ 600 - cb = ps->callbacks + ps->callback_count++; 601 - cb->callback = callback; 602 - cb->context = callback_context; 603 - 604 - /* 605 - * If there are exceptions in flight and we have not yet 606 - * filled this metadata area there's nothing more to do. 607 - */ 608 - if (!atomic_dec_and_test(&ps->pending_count) && 609 - (ps->current_committed != ps->exceptions_per_area)) 610 - return; 611 - 612 - /* 613 - * If we completely filled the current area, then wipe the next one. 614 - */ 615 - if ((ps->current_committed == ps->exceptions_per_area) && 616 - zero_disk_area(ps, ps->current_area + 1)) 617 - ps->valid = 0; 618 - 619 - /* 620 - * Commit exceptions to disk. 621 - */ 622 - if (ps->valid && area_io(ps, WRITE)) 623 - ps->valid = 0; 624 - 625 - /* 626 - * Advance to the next area if this one is full. 627 - */ 628 - if (ps->current_committed == ps->exceptions_per_area) { 629 - ps->current_committed = 0; 630 - ps->current_area++; 631 - zero_memory_area(ps); 632 - } 633 - 634 - for (i = 0; i < ps->callback_count; i++) { 635 - cb = ps->callbacks + i; 636 - cb->callback(cb->context, ps->valid); 637 - } 638 - 639 - ps->callback_count = 0; 640 - } 641 - 642 - static void persistent_drop(struct exception_store *store) 643 - { 644 - struct pstore *ps = get_info(store); 645 - 646 - ps->valid = 0; 647 - if (write_header(ps)) 648 - DMWARN("write header failed"); 649 - } 650 - 651 - int dm_create_persistent(struct exception_store *store) 652 - { 653 - struct pstore *ps; 654 - 655 - /* allocate the pstore */ 656 - ps = kmalloc(sizeof(*ps), GFP_KERNEL); 657 - if (!ps) 658 - return -ENOMEM; 659 - 660 - ps->snap = store->snap; 661 - ps->valid = 1; 662 - ps->version = SNAPSHOT_DISK_VERSION; 663 - ps->area = NULL; 664 - ps->next_free = 2; /* skipping the header and first area */ 665 - ps->current_committed = 0; 666 - 667 - ps->callback_count = 0; 668 - atomic_set(&ps->pending_count, 0); 669 - ps->callbacks = NULL; 670 - 671 - ps->metadata_wq = create_singlethread_workqueue("ksnaphd"); 672 - if (!ps->metadata_wq) { 673 - kfree(ps); 674 - DMERR("couldn't start header metadata update thread"); 675 - return -ENOMEM; 676 - } 677 - 678 - store->destroy = persistent_destroy; 679 - store->read_metadata = persistent_read_metadata; 680 - store->prepare_exception = persistent_prepare; 681 - store->commit_exception = persistent_commit; 682 - store->drop_snapshot = persistent_drop; 683 - store->fraction_full = persistent_fraction_full; 684 - store->context = ps; 685 - 686 - return 0; 687 - } 688 - 689 - /*----------------------------------------------------------------- 690 - * Implementation of the store for non-persistent snapshots. 691 - *---------------------------------------------------------------*/ 692 - struct transient_c { 693 - sector_t next_free; 694 - }; 695 - 696 - static void transient_destroy(struct exception_store *store) 697 - { 698 - kfree(store->context); 699 - } 700 - 701 - static int transient_read_metadata(struct exception_store *store) 702 - { 703 - return 0; 704 - } 705 - 706 - static int transient_prepare(struct exception_store *store, 707 - struct dm_snap_exception *e) 708 - { 709 - struct transient_c *tc = (struct transient_c *) store->context; 710 - sector_t size = get_dev_size(store->snap->cow->bdev); 711 - 712 - if (size < (tc->next_free + store->snap->chunk_size)) 713 - return -1; 714 - 715 - e->new_chunk = sector_to_chunk(store->snap, tc->next_free); 716 - tc->next_free += store->snap->chunk_size; 717 - 718 - return 0; 719 - } 720 - 721 - static void transient_commit(struct exception_store *store, 722 - struct dm_snap_exception *e, 723 - void (*callback) (void *, int success), 724 - void *callback_context) 725 - { 726 - /* Just succeed */ 727 - callback(callback_context, 1); 728 - } 729 - 730 - static void transient_fraction_full(struct exception_store *store, 731 - sector_t *numerator, sector_t *denominator) 732 - { 733 - *numerator = ((struct transient_c *) store->context)->next_free; 734 - *denominator = get_dev_size(store->snap->cow->bdev); 735 - } 736 - 737 - int dm_create_transient(struct exception_store *store) 738 - { 739 - struct transient_c *tc; 740 - 741 - store->destroy = transient_destroy; 742 - store->read_metadata = transient_read_metadata; 743 - store->prepare_exception = transient_prepare; 744 - store->commit_exception = transient_commit; 745 - store->drop_snapshot = NULL; 746 - store->fraction_full = transient_fraction_full; 747 - 748 - tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL); 749 - if (!tc) 750 - return -ENOMEM; 751 - 752 - tc->next_free = 0; 753 - store->context = tc; 754 - 755 - return 0; 756 }

··· 1 /* 2 * Copyright (C) 2001-2002 Sistina Software (UK) Limited. 3 + * Copyright (C) 2006-2008 Red Hat GmbH 4 * 5 * This file is released under the GPL. 6 */ 7 8 + #include "dm-exception-store.h" 9 10 #include <linux/mm.h> 11 #include <linux/pagemap.h> 12 #include <linux/vmalloc.h> 13 #include <linux/slab.h> 14 15 + #define DM_MSG_PREFIX "snapshot exception stores" 16 17 + int dm_exception_store_init(void) 18 { 19 int r; 20 21 + r = dm_transient_snapshot_init(); 22 + if (r) { 23 + DMERR("Unable to register transient exception store type."); 24 + goto transient_fail; 25 + } 26 27 + r = dm_persistent_snapshot_init(); 28 + if (r) { 29 + DMERR("Unable to register persistent exception store type"); 30 + goto persistent_fail; 31 + } 32 33 return 0; 34 35 + persistent_fail: 36 + dm_persistent_snapshot_exit(); 37 + transient_fail: 38 return r; 39 } 40 41 + void dm_exception_store_exit(void) 42 { 43 + dm_persistent_snapshot_exit(); 44 + dm_transient_snapshot_exit(); 45 }

+148

drivers/md/dm-exception-store.h

···

··· 1 + /* 2 + * Copyright (C) 2001-2002 Sistina Software (UK) Limited. 3 + * Copyright (C) 2008 Red Hat, Inc. All rights reserved. 4 + * 5 + * Device-mapper snapshot exception store. 6 + * 7 + * This file is released under the GPL. 8 + */ 9 + 10 + #ifndef _LINUX_DM_EXCEPTION_STORE 11 + #define _LINUX_DM_EXCEPTION_STORE 12 + 13 + #include <linux/blkdev.h> 14 + #include <linux/device-mapper.h> 15 + 16 + /* 17 + * The snapshot code deals with largish chunks of the disk at a 18 + * time. Typically 32k - 512k. 19 + */ 20 + typedef sector_t chunk_t; 21 + 22 + /* 23 + * An exception is used where an old chunk of data has been 24 + * replaced by a new one. 25 + * If chunk_t is 64 bits in size, the top 8 bits of new_chunk hold the number 26 + * of chunks that follow contiguously. Remaining bits hold the number of the 27 + * chunk within the device. 28 + */ 29 + struct dm_snap_exception { 30 + struct list_head hash_list; 31 + 32 + chunk_t old_chunk; 33 + chunk_t new_chunk; 34 + }; 35 + 36 + /* 37 + * Abstraction to handle the meta/layout of exception stores (the 38 + * COW device). 39 + */ 40 + struct dm_exception_store { 41 + /* 42 + * Destroys this object when you've finished with it. 43 + */ 44 + void (*destroy) (struct dm_exception_store *store); 45 + 46 + /* 47 + * The target shouldn't read the COW device until this is 48 + * called. As exceptions are read from the COW, they are 49 + * reported back via the callback. 50 + */ 51 + int (*read_metadata) (struct dm_exception_store *store, 52 + int (*callback)(void *callback_context, 53 + chunk_t old, chunk_t new), 54 + void *callback_context); 55 + 56 + /* 57 + * Find somewhere to store the next exception. 58 + */ 59 + int (*prepare_exception) (struct dm_exception_store *store, 60 + struct dm_snap_exception *e); 61 + 62 + /* 63 + * Update the metadata with this exception. 64 + */ 65 + void (*commit_exception) (struct dm_exception_store *store, 66 + struct dm_snap_exception *e, 67 + void (*callback) (void *, int success), 68 + void *callback_context); 69 + 70 + /* 71 + * The snapshot is invalid, note this in the metadata. 72 + */ 73 + void (*drop_snapshot) (struct dm_exception_store *store); 74 + 75 + int (*status) (struct dm_exception_store *store, status_type_t status, 76 + char *result, unsigned int maxlen); 77 + 78 + /* 79 + * Return how full the snapshot is. 80 + */ 81 + void (*fraction_full) (struct dm_exception_store *store, 82 + sector_t *numerator, 83 + sector_t *denominator); 84 + 85 + struct dm_snapshot *snap; 86 + void *context; 87 + }; 88 + 89 + /* 90 + * Funtions to manipulate consecutive chunks 91 + */ 92 + # if defined(CONFIG_LBD) || (BITS_PER_LONG == 64) 93 + # define DM_CHUNK_CONSECUTIVE_BITS 8 94 + # define DM_CHUNK_NUMBER_BITS 56 95 + 96 + static inline chunk_t dm_chunk_number(chunk_t chunk) 97 + { 98 + return chunk & (chunk_t)((1ULL << DM_CHUNK_NUMBER_BITS) - 1ULL); 99 + } 100 + 101 + static inline unsigned dm_consecutive_chunk_count(struct dm_snap_exception *e) 102 + { 103 + return e->new_chunk >> DM_CHUNK_NUMBER_BITS; 104 + } 105 + 106 + static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e) 107 + { 108 + e->new_chunk += (1ULL << DM_CHUNK_NUMBER_BITS); 109 + 110 + BUG_ON(!dm_consecutive_chunk_count(e)); 111 + } 112 + 113 + # else 114 + # define DM_CHUNK_CONSECUTIVE_BITS 0 115 + 116 + static inline chunk_t dm_chunk_number(chunk_t chunk) 117 + { 118 + return chunk; 119 + } 120 + 121 + static inline unsigned dm_consecutive_chunk_count(struct dm_snap_exception *e) 122 + { 123 + return 0; 124 + } 125 + 126 + static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e) 127 + { 128 + } 129 + 130 + # endif 131 + 132 + int dm_exception_store_init(void); 133 + void dm_exception_store_exit(void); 134 + 135 + /* 136 + * Two exception store implementations. 137 + */ 138 + int dm_persistent_snapshot_init(void); 139 + void dm_persistent_snapshot_exit(void); 140 + 141 + int dm_transient_snapshot_init(void); 142 + void dm_transient_snapshot_exit(void); 143 + 144 + int dm_create_persistent(struct dm_exception_store *store); 145 + 146 + int dm_create_transient(struct dm_exception_store *store); 147 + 148 + #endif /* _LINUX_DM_EXCEPTION_STORE */

+8 -8

drivers/md/dm-ioctl.c

··· 233 } 234 235 if (hc->new_map) 236 - dm_table_put(hc->new_map); 237 dm_put(hc->md); 238 free_cell(hc); 239 } ··· 827 828 r = dm_swap_table(md, new_map); 829 if (r) { 830 dm_put(md); 831 - dm_table_put(new_map); 832 return r; 833 } 834 ··· 836 set_disk_ro(dm_disk(md), 0); 837 else 838 set_disk_ro(dm_disk(md), 1); 839 - 840 - dm_table_put(new_map); 841 } 842 843 if (dm_suspended(md)) ··· 1078 } 1079 1080 if (hc->new_map) 1081 - dm_table_put(hc->new_map); 1082 hc->new_map = t; 1083 up_write(&_hash_lock); 1084 ··· 1107 } 1108 1109 if (hc->new_map) { 1110 - dm_table_put(hc->new_map); 1111 hc->new_map = NULL; 1112 } 1113 ··· 1548 goto out; 1549 } 1550 1551 - strcpy(name, hc->name); 1552 - strcpy(uuid, hc->uuid ? : ""); 1553 1554 out: 1555 up_read(&_hash_lock);

··· 233 } 234 235 if (hc->new_map) 236 + dm_table_destroy(hc->new_map); 237 dm_put(hc->md); 238 free_cell(hc); 239 } ··· 827 828 r = dm_swap_table(md, new_map); 829 if (r) { 830 + dm_table_destroy(new_map); 831 dm_put(md); 832 return r; 833 } 834 ··· 836 set_disk_ro(dm_disk(md), 0); 837 else 838 set_disk_ro(dm_disk(md), 1); 839 } 840 841 if (dm_suspended(md)) ··· 1080 } 1081 1082 if (hc->new_map) 1083 + dm_table_destroy(hc->new_map); 1084 hc->new_map = t; 1085 up_write(&_hash_lock); 1086 ··· 1109 } 1110 1111 if (hc->new_map) { 1112 + dm_table_destroy(hc->new_map); 1113 hc->new_map = NULL; 1114 } 1115 ··· 1550 goto out; 1551 } 1552 1553 + if (name) 1554 + strcpy(name, hc->name); 1555 + if (uuid) 1556 + strcpy(uuid, hc->uuid ? : ""); 1557 1558 out: 1559 up_read(&_hash_lock);

+2 -4

drivers/md/dm-linear.c

··· 142 .status = linear_status, 143 .ioctl = linear_ioctl, 144 .merge = linear_merge, 145 }; 146 147 int __init dm_linear_init(void) ··· 157 158 void dm_linear_exit(void) 159 { 160 - int r = dm_unregister_target(&linear_target); 161 - 162 - if (r < 0) 163 - DMERR("unregister failed %d", r); 164 }

··· 142 .status = linear_status, 143 .ioctl = linear_ioctl, 144 .merge = linear_merge, 145 + .features = DM_TARGET_SUPPORTS_BARRIERS, 146 }; 147 148 int __init dm_linear_init(void) ··· 156 157 void dm_linear_exit(void) 158 { 159 + dm_unregister_target(&linear_target); 160 }

+31 -9

drivers/md/dm-log.c

··· 326 static int rw_header(struct log_c *lc, int rw) 327 { 328 lc->io_req.bi_rw = rw; 329 - lc->io_req.mem.ptr.vma = lc->disk_header; 330 - lc->io_req.notify.fn = NULL; 331 332 return dm_io(&lc->io_req, 1, &lc->header_location, NULL); 333 } ··· 360 return 0; 361 } 362 363 - static inline int write_header(struct log_c *log) 364 { 365 - header_to_disk(&log->header, log->disk_header); 366 - return rw_header(log, WRITE); 367 } 368 369 /*---------------------------------------------------------------- ··· 406 } 407 } 408 409 - if (sscanf(argv[0], "%u", &region_size) != 1) { 410 - DMWARN("invalid region size string"); 411 return -EINVAL; 412 } 413 ··· 457 */ 458 buf_size = dm_round_up((LOG_OFFSET << SECTOR_SHIFT) + 459 bitset_size, ti->limits.hardsect_size); 460 lc->header_location.count = buf_size >> SECTOR_SHIFT; 461 lc->io_req.mem.type = DM_IO_VMA; 462 lc->io_req.client = dm_io_client_create(dm_div_up(buf_size, 463 PAGE_SIZE)); 464 if (IS_ERR(lc->io_req.client)) { ··· 481 lc->disk_header = vmalloc(buf_size); 482 if (!lc->disk_header) { 483 DMWARN("couldn't allocate disk log buffer"); 484 kfree(lc); 485 return -ENOMEM; 486 } 487 488 lc->clean_bits = (void *)lc->disk_header + 489 (LOG_OFFSET << SECTOR_SHIFT); 490 } ··· 498 DMWARN("couldn't allocate sync bitset"); 499 if (!dev) 500 vfree(lc->clean_bits); 501 vfree(lc->disk_header); 502 kfree(lc); 503 return -ENOMEM; ··· 513 vfree(lc->sync_bits); 514 if (!dev) 515 vfree(lc->clean_bits); 516 vfree(lc->disk_header); 517 kfree(lc); 518 return -ENOMEM; ··· 651 /* set the correct number of regions in the header */ 652 lc->header.nr_regions = lc->region_count; 653 654 /* write the new header */ 655 - r = write_header(lc); 656 if (r) { 657 DMWARN("%s: Failed to write header on dirty region log device", 658 lc->log_dev->name); ··· 704 if (!lc->touched) 705 return 0; 706 707 - r = write_header(lc); 708 if (r) 709 fail_log_device(lc); 710 else

··· 326 static int rw_header(struct log_c *lc, int rw) 327 { 328 lc->io_req.bi_rw = rw; 329 330 return dm_io(&lc->io_req, 1, &lc->header_location, NULL); 331 } ··· 362 return 0; 363 } 364 365 + static int _check_region_size(struct dm_target *ti, uint32_t region_size) 366 { 367 + if (region_size < 2 || region_size > ti->len) 368 + return 0; 369 + 370 + if (!is_power_of_2(region_size)) 371 + return 0; 372 + 373 + return 1; 374 } 375 376 /*---------------------------------------------------------------- ··· 403 } 404 } 405 406 + if (sscanf(argv[0], "%u", &region_size) != 1 || 407 + !_check_region_size(ti, region_size)) { 408 + DMWARN("invalid region size %s", argv[0]); 409 return -EINVAL; 410 } 411 ··· 453 */ 454 buf_size = dm_round_up((LOG_OFFSET << SECTOR_SHIFT) + 455 bitset_size, ti->limits.hardsect_size); 456 + 457 + if (buf_size > dev->bdev->bd_inode->i_size) { 458 + DMWARN("log device %s too small: need %llu bytes", 459 + dev->name, (unsigned long long)buf_size); 460 + kfree(lc); 461 + return -EINVAL; 462 + } 463 + 464 lc->header_location.count = buf_size >> SECTOR_SHIFT; 465 + 466 lc->io_req.mem.type = DM_IO_VMA; 467 + lc->io_req.notify.fn = NULL; 468 lc->io_req.client = dm_io_client_create(dm_div_up(buf_size, 469 PAGE_SIZE)); 470 if (IS_ERR(lc->io_req.client)) { ··· 467 lc->disk_header = vmalloc(buf_size); 468 if (!lc->disk_header) { 469 DMWARN("couldn't allocate disk log buffer"); 470 + dm_io_client_destroy(lc->io_req.client); 471 kfree(lc); 472 return -ENOMEM; 473 } 474 475 + lc->io_req.mem.ptr.vma = lc->disk_header; 476 lc->clean_bits = (void *)lc->disk_header + 477 (LOG_OFFSET << SECTOR_SHIFT); 478 } ··· 482 DMWARN("couldn't allocate sync bitset"); 483 if (!dev) 484 vfree(lc->clean_bits); 485 + else 486 + dm_io_client_destroy(lc->io_req.client); 487 vfree(lc->disk_header); 488 kfree(lc); 489 return -ENOMEM; ··· 495 vfree(lc->sync_bits); 496 if (!dev) 497 vfree(lc->clean_bits); 498 + else 499 + dm_io_client_destroy(lc->io_req.client); 500 vfree(lc->disk_header); 501 kfree(lc); 502 return -ENOMEM; ··· 631 /* set the correct number of regions in the header */ 632 lc->header.nr_regions = lc->region_count; 633 634 + header_to_disk(&lc->header, lc->disk_header); 635 + 636 /* write the new header */ 637 + r = rw_header(lc, WRITE); 638 if (r) { 639 DMWARN("%s: Failed to write header on dirty region log device", 640 lc->log_dev->name); ··· 682 if (!lc->touched) 683 return 0; 684 685 + r = rw_header(lc, WRITE); 686 if (r) 687 fail_log_device(lc); 688 else

+5 -9

drivers/md/dm-mpath.c

··· 889 dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti, 890 pgpath->path.dev->name, m->nr_valid_paths); 891 892 - queue_work(kmultipathd, &m->trigger_event); 893 queue_work(kmultipathd, &pgpath->deactivate_path); 894 895 out: ··· 932 dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti, 933 pgpath->path.dev->name, m->nr_valid_paths); 934 935 - queue_work(kmultipathd, &m->trigger_event); 936 937 out: 938 spin_unlock_irqrestore(&m->lock, flags); ··· 976 977 spin_unlock_irqrestore(&m->lock, flags); 978 979 - queue_work(kmultipathd, &m->trigger_event); 980 } 981 982 /* ··· 1006 } 1007 spin_unlock_irqrestore(&m->lock, flags); 1008 1009 - queue_work(kmultipathd, &m->trigger_event); 1010 return 0; 1011 } 1012 ··· 1495 1496 static void __exit dm_multipath_exit(void) 1497 { 1498 - int r; 1499 - 1500 destroy_workqueue(kmpath_handlerd); 1501 destroy_workqueue(kmultipathd); 1502 1503 - r = dm_unregister_target(&multipath_target); 1504 - if (r < 0) 1505 - DMERR("target unregister failed %d", r); 1506 kmem_cache_destroy(_mpio_cache); 1507 } 1508

··· 889 dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti, 890 pgpath->path.dev->name, m->nr_valid_paths); 891 892 + schedule_work(&m->trigger_event); 893 queue_work(kmultipathd, &pgpath->deactivate_path); 894 895 out: ··· 932 dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti, 933 pgpath->path.dev->name, m->nr_valid_paths); 934 935 + schedule_work(&m->trigger_event); 936 937 out: 938 spin_unlock_irqrestore(&m->lock, flags); ··· 976 977 spin_unlock_irqrestore(&m->lock, flags); 978 979 + schedule_work(&m->trigger_event); 980 } 981 982 /* ··· 1006 } 1007 spin_unlock_irqrestore(&m->lock, flags); 1008 1009 + schedule_work(&m->trigger_event); 1010 return 0; 1011 } 1012 ··· 1495 1496 static void __exit dm_multipath_exit(void) 1497 { 1498 destroy_workqueue(kmpath_handlerd); 1499 destroy_workqueue(kmultipathd); 1500 1501 + dm_unregister_target(&multipath_target); 1502 kmem_cache_destroy(_mpio_cache); 1503 } 1504

+4 -20

drivers/md/dm-raid1.c

··· 197 struct mirror_set *ms = m->ms; 198 struct mirror *new; 199 200 - if (!errors_handled(ms)) 201 - return; 202 - 203 /* 204 * error_count is used for nothing more than a 205 * simple way to tell if a device has encountered ··· 205 atomic_inc(&m->error_count); 206 207 if (test_and_set_bit(error_type, &m->error_type)) 208 return; 209 210 if (m != get_default_mirror(ms)) ··· 808 kfree(ms); 809 } 810 811 - static inline int _check_region_size(struct dm_target *ti, uint32_t size) 812 - { 813 - return !(size % (PAGE_SIZE >> 9) || !is_power_of_2(size) || 814 - size > ti->len); 815 - } 816 - 817 static int get_mirror(struct mirror_set *ms, struct dm_target *ti, 818 unsigned int mirror, char **argv) 819 { ··· 863 dl = dm_dirty_log_create(argv[0], ti, param_count, argv + 2); 864 if (!dl) { 865 ti->error = "Error creating mirror dirty log"; 866 - return NULL; 867 - } 868 - 869 - if (!_check_region_size(ti, dl->type->get_region_size(dl))) { 870 - ti->error = "Invalid region size"; 871 - dm_dirty_log_destroy(dl); 872 return NULL; 873 } 874 ··· 1288 1289 static void __exit dm_mirror_exit(void) 1290 { 1291 - int r; 1292 - 1293 - r = dm_unregister_target(&mirror_target); 1294 - if (r < 0) 1295 - DMERR("unregister failed %d", r); 1296 } 1297 1298 /* Module hooks */

··· 197 struct mirror_set *ms = m->ms; 198 struct mirror *new; 199 200 /* 201 * error_count is used for nothing more than a 202 * simple way to tell if a device has encountered ··· 208 atomic_inc(&m->error_count); 209 210 if (test_and_set_bit(error_type, &m->error_type)) 211 + return; 212 + 213 + if (!errors_handled(ms)) 214 return; 215 216 if (m != get_default_mirror(ms)) ··· 808 kfree(ms); 809 } 810 811 static int get_mirror(struct mirror_set *ms, struct dm_target *ti, 812 unsigned int mirror, char **argv) 813 { ··· 869 dl = dm_dirty_log_create(argv[0], ti, param_count, argv + 2); 870 if (!dl) { 871 ti->error = "Error creating mirror dirty log"; 872 return NULL; 873 } 874 ··· 1300 1301 static void __exit dm_mirror_exit(void) 1302 { 1303 + dm_unregister_target(&mirror_target); 1304 } 1305 1306 /* Module hooks */

+704

drivers/md/dm-snap-persistent.c

···

··· 1 + /* 2 + * Copyright (C) 2001-2002 Sistina Software (UK) Limited. 3 + * Copyright (C) 2006-2008 Red Hat GmbH 4 + * 5 + * This file is released under the GPL. 6 + */ 7 + 8 + #include "dm-exception-store.h" 9 + #include "dm-snap.h" 10 + 11 + #include <linux/mm.h> 12 + #include <linux/pagemap.h> 13 + #include <linux/vmalloc.h> 14 + #include <linux/slab.h> 15 + #include <linux/dm-io.h> 16 + 17 + #define DM_MSG_PREFIX "persistent snapshot" 18 + #define DM_CHUNK_SIZE_DEFAULT_SECTORS 32 /* 16KB */ 19 + 20 + /*----------------------------------------------------------------- 21 + * Persistent snapshots, by persistent we mean that the snapshot 22 + * will survive a reboot. 23 + *---------------------------------------------------------------*/ 24 + 25 + /* 26 + * We need to store a record of which parts of the origin have 27 + * been copied to the snapshot device. The snapshot code 28 + * requires that we copy exception chunks to chunk aligned areas 29 + * of the COW store. It makes sense therefore, to store the 30 + * metadata in chunk size blocks. 31 + * 32 + * There is no backward or forward compatibility implemented, 33 + * snapshots with different disk versions than the kernel will 34 + * not be usable. It is expected that "lvcreate" will blank out 35 + * the start of a fresh COW device before calling the snapshot 36 + * constructor. 37 + * 38 + * The first chunk of the COW device just contains the header. 39 + * After this there is a chunk filled with exception metadata, 40 + * followed by as many exception chunks as can fit in the 41 + * metadata areas. 42 + * 43 + * All on disk structures are in little-endian format. The end 44 + * of the exceptions info is indicated by an exception with a 45 + * new_chunk of 0, which is invalid since it would point to the 46 + * header chunk. 47 + */ 48 + 49 + /* 50 + * Magic for persistent snapshots: "SnAp" - Feeble isn't it. 51 + */ 52 + #define SNAP_MAGIC 0x70416e53 53 + 54 + /* 55 + * The on-disk version of the metadata. 56 + */ 57 + #define SNAPSHOT_DISK_VERSION 1 58 + 59 + struct disk_header { 60 + uint32_t magic; 61 + 62 + /* 63 + * Is this snapshot valid. There is no way of recovering 64 + * an invalid snapshot. 65 + */ 66 + uint32_t valid; 67 + 68 + /* 69 + * Simple, incrementing version. no backward 70 + * compatibility. 71 + */ 72 + uint32_t version; 73 + 74 + /* In sectors */ 75 + uint32_t chunk_size; 76 + }; 77 + 78 + struct disk_exception { 79 + uint64_t old_chunk; 80 + uint64_t new_chunk; 81 + }; 82 + 83 + struct commit_callback { 84 + void (*callback)(void *, int success); 85 + void *context; 86 + }; 87 + 88 + /* 89 + * The top level structure for a persistent exception store. 90 + */ 91 + struct pstore { 92 + struct dm_snapshot *snap; /* up pointer to my snapshot */ 93 + int version; 94 + int valid; 95 + uint32_t exceptions_per_area; 96 + 97 + /* 98 + * Now that we have an asynchronous kcopyd there is no 99 + * need for large chunk sizes, so it wont hurt to have a 100 + * whole chunks worth of metadata in memory at once. 101 + */ 102 + void *area; 103 + 104 + /* 105 + * An area of zeros used to clear the next area. 106 + */ 107 + void *zero_area; 108 + 109 + /* 110 + * Used to keep track of which metadata area the data in 111 + * 'chunk' refers to. 112 + */ 113 + chunk_t current_area; 114 + 115 + /* 116 + * The next free chunk for an exception. 117 + */ 118 + chunk_t next_free; 119 + 120 + /* 121 + * The index of next free exception in the current 122 + * metadata area. 123 + */ 124 + uint32_t current_committed; 125 + 126 + atomic_t pending_count; 127 + uint32_t callback_count; 128 + struct commit_callback *callbacks; 129 + struct dm_io_client *io_client; 130 + 131 + struct workqueue_struct *metadata_wq; 132 + }; 133 + 134 + static unsigned sectors_to_pages(unsigned sectors) 135 + { 136 + return DIV_ROUND_UP(sectors, PAGE_SIZE >> 9); 137 + } 138 + 139 + static int alloc_area(struct pstore *ps) 140 + { 141 + int r = -ENOMEM; 142 + size_t len; 143 + 144 + len = ps->snap->chunk_size << SECTOR_SHIFT; 145 + 146 + /* 147 + * Allocate the chunk_size block of memory that will hold 148 + * a single metadata area. 149 + */ 150 + ps->area = vmalloc(len); 151 + if (!ps->area) 152 + return r; 153 + 154 + ps->zero_area = vmalloc(len); 155 + if (!ps->zero_area) { 156 + vfree(ps->area); 157 + return r; 158 + } 159 + memset(ps->zero_area, 0, len); 160 + 161 + return 0; 162 + } 163 + 164 + static void free_area(struct pstore *ps) 165 + { 166 + vfree(ps->area); 167 + ps->area = NULL; 168 + vfree(ps->zero_area); 169 + ps->zero_area = NULL; 170 + } 171 + 172 + struct mdata_req { 173 + struct dm_io_region *where; 174 + struct dm_io_request *io_req; 175 + struct work_struct work; 176 + int result; 177 + }; 178 + 179 + static void do_metadata(struct work_struct *work) 180 + { 181 + struct mdata_req *req = container_of(work, struct mdata_req, work); 182 + 183 + req->result = dm_io(req->io_req, 1, req->where, NULL); 184 + } 185 + 186 + /* 187 + * Read or write a chunk aligned and sized block of data from a device. 188 + */ 189 + static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata) 190 + { 191 + struct dm_io_region where = { 192 + .bdev = ps->snap->cow->bdev, 193 + .sector = ps->snap->chunk_size * chunk, 194 + .count = ps->snap->chunk_size, 195 + }; 196 + struct dm_io_request io_req = { 197 + .bi_rw = rw, 198 + .mem.type = DM_IO_VMA, 199 + .mem.ptr.vma = ps->area, 200 + .client = ps->io_client, 201 + .notify.fn = NULL, 202 + }; 203 + struct mdata_req req; 204 + 205 + if (!metadata) 206 + return dm_io(&io_req, 1, &where, NULL); 207 + 208 + req.where = &where; 209 + req.io_req = &io_req; 210 + 211 + /* 212 + * Issue the synchronous I/O from a different thread 213 + * to avoid generic_make_request recursion. 214 + */ 215 + INIT_WORK(&req.work, do_metadata); 216 + queue_work(ps->metadata_wq, &req.work); 217 + flush_workqueue(ps->metadata_wq); 218 + 219 + return req.result; 220 + } 221 + 222 + /* 223 + * Convert a metadata area index to a chunk index. 224 + */ 225 + static chunk_t area_location(struct pstore *ps, chunk_t area) 226 + { 227 + return 1 + ((ps->exceptions_per_area + 1) * area); 228 + } 229 + 230 + /* 231 + * Read or write a metadata area. Remembering to skip the first 232 + * chunk which holds the header. 233 + */ 234 + static int area_io(struct pstore *ps, int rw) 235 + { 236 + int r; 237 + chunk_t chunk; 238 + 239 + chunk = area_location(ps, ps->current_area); 240 + 241 + r = chunk_io(ps, chunk, rw, 0); 242 + if (r) 243 + return r; 244 + 245 + return 0; 246 + } 247 + 248 + static void zero_memory_area(struct pstore *ps) 249 + { 250 + memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT); 251 + } 252 + 253 + static int zero_disk_area(struct pstore *ps, chunk_t area) 254 + { 255 + struct dm_io_region where = { 256 + .bdev = ps->snap->cow->bdev, 257 + .sector = ps->snap->chunk_size * area_location(ps, area), 258 + .count = ps->snap->chunk_size, 259 + }; 260 + struct dm_io_request io_req = { 261 + .bi_rw = WRITE, 262 + .mem.type = DM_IO_VMA, 263 + .mem.ptr.vma = ps->zero_area, 264 + .client = ps->io_client, 265 + .notify.fn = NULL, 266 + }; 267 + 268 + return dm_io(&io_req, 1, &where, NULL); 269 + } 270 + 271 + static int read_header(struct pstore *ps, int *new_snapshot) 272 + { 273 + int r; 274 + struct disk_header *dh; 275 + chunk_t chunk_size; 276 + int chunk_size_supplied = 1; 277 + 278 + /* 279 + * Use default chunk size (or hardsect_size, if larger) if none supplied 280 + */ 281 + if (!ps->snap->chunk_size) { 282 + ps->snap->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS, 283 + bdev_hardsect_size(ps->snap->cow->bdev) >> 9); 284 + ps->snap->chunk_mask = ps->snap->chunk_size - 1; 285 + ps->snap->chunk_shift = ffs(ps->snap->chunk_size) - 1; 286 + chunk_size_supplied = 0; 287 + } 288 + 289 + ps->io_client = dm_io_client_create(sectors_to_pages(ps->snap-> 290 + chunk_size)); 291 + if (IS_ERR(ps->io_client)) 292 + return PTR_ERR(ps->io_client); 293 + 294 + r = alloc_area(ps); 295 + if (r) 296 + return r; 297 + 298 + r = chunk_io(ps, 0, READ, 1); 299 + if (r) 300 + goto bad; 301 + 302 + dh = (struct disk_header *) ps->area; 303 + 304 + if (le32_to_cpu(dh->magic) == 0) { 305 + *new_snapshot = 1; 306 + return 0; 307 + } 308 + 309 + if (le32_to_cpu(dh->magic) != SNAP_MAGIC) { 310 + DMWARN("Invalid or corrupt snapshot"); 311 + r = -ENXIO; 312 + goto bad; 313 + } 314 + 315 + *new_snapshot = 0; 316 + ps->valid = le32_to_cpu(dh->valid); 317 + ps->version = le32_to_cpu(dh->version); 318 + chunk_size = le32_to_cpu(dh->chunk_size); 319 + 320 + if (!chunk_size_supplied || ps->snap->chunk_size == chunk_size) 321 + return 0; 322 + 323 + DMWARN("chunk size %llu in device metadata overrides " 324 + "table chunk size of %llu.", 325 + (unsigned long long)chunk_size, 326 + (unsigned long long)ps->snap->chunk_size); 327 + 328 + /* We had a bogus chunk_size. Fix stuff up. */ 329 + free_area(ps); 330 + 331 + ps->snap->chunk_size = chunk_size; 332 + ps->snap->chunk_mask = chunk_size - 1; 333 + ps->snap->chunk_shift = ffs(chunk_size) - 1; 334 + 335 + r = dm_io_client_resize(sectors_to_pages(ps->snap->chunk_size), 336 + ps->io_client); 337 + if (r) 338 + return r; 339 + 340 + r = alloc_area(ps); 341 + return r; 342 + 343 + bad: 344 + free_area(ps); 345 + return r; 346 + } 347 + 348 + static int write_header(struct pstore *ps) 349 + { 350 + struct disk_header *dh; 351 + 352 + memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT); 353 + 354 + dh = (struct disk_header *) ps->area; 355 + dh->magic = cpu_to_le32(SNAP_MAGIC); 356 + dh->valid = cpu_to_le32(ps->valid); 357 + dh->version = cpu_to_le32(ps->version); 358 + dh->chunk_size = cpu_to_le32(ps->snap->chunk_size); 359 + 360 + return chunk_io(ps, 0, WRITE, 1); 361 + } 362 + 363 + /* 364 + * Access functions for the disk exceptions, these do the endian conversions. 365 + */ 366 + static struct disk_exception *get_exception(struct pstore *ps, uint32_t index) 367 + { 368 + BUG_ON(index >= ps->exceptions_per_area); 369 + 370 + return ((struct disk_exception *) ps->area) + index; 371 + } 372 + 373 + static void read_exception(struct pstore *ps, 374 + uint32_t index, struct disk_exception *result) 375 + { 376 + struct disk_exception *e = get_exception(ps, index); 377 + 378 + /* copy it */ 379 + result->old_chunk = le64_to_cpu(e->old_chunk); 380 + result->new_chunk = le64_to_cpu(e->new_chunk); 381 + } 382 + 383 + static void write_exception(struct pstore *ps, 384 + uint32_t index, struct disk_exception *de) 385 + { 386 + struct disk_exception *e = get_exception(ps, index); 387 + 388 + /* copy it */ 389 + e->old_chunk = cpu_to_le64(de->old_chunk); 390 + e->new_chunk = cpu_to_le64(de->new_chunk); 391 + } 392 + 393 + /* 394 + * Registers the exceptions that are present in the current area. 395 + * 'full' is filled in to indicate if the area has been 396 + * filled. 397 + */ 398 + static int insert_exceptions(struct pstore *ps, 399 + int (*callback)(void *callback_context, 400 + chunk_t old, chunk_t new), 401 + void *callback_context, 402 + int *full) 403 + { 404 + int r; 405 + unsigned int i; 406 + struct disk_exception de; 407 + 408 + /* presume the area is full */ 409 + *full = 1; 410 + 411 + for (i = 0; i < ps->exceptions_per_area; i++) { 412 + read_exception(ps, i, &de); 413 + 414 + /* 415 + * If the new_chunk is pointing at the start of 416 + * the COW device, where the first metadata area 417 + * is we know that we've hit the end of the 418 + * exceptions. Therefore the area is not full. 419 + */ 420 + if (de.new_chunk == 0LL) { 421 + ps->current_committed = i; 422 + *full = 0; 423 + break; 424 + } 425 + 426 + /* 427 + * Keep track of the start of the free chunks. 428 + */ 429 + if (ps->next_free <= de.new_chunk) 430 + ps->next_free = de.new_chunk + 1; 431 + 432 + /* 433 + * Otherwise we add the exception to the snapshot. 434 + */ 435 + r = callback(callback_context, de.old_chunk, de.new_chunk); 436 + if (r) 437 + return r; 438 + } 439 + 440 + return 0; 441 + } 442 + 443 + static int read_exceptions(struct pstore *ps, 444 + int (*callback)(void *callback_context, chunk_t old, 445 + chunk_t new), 446 + void *callback_context) 447 + { 448 + int r, full = 1; 449 + 450 + /* 451 + * Keeping reading chunks and inserting exceptions until 452 + * we find a partially full area. 453 + */ 454 + for (ps->current_area = 0; full; ps->current_area++) { 455 + r = area_io(ps, READ); 456 + if (r) 457 + return r; 458 + 459 + r = insert_exceptions(ps, callback, callback_context, &full); 460 + if (r) 461 + return r; 462 + } 463 + 464 + ps->current_area--; 465 + 466 + return 0; 467 + } 468 + 469 + static struct pstore *get_info(struct dm_exception_store *store) 470 + { 471 + return (struct pstore *) store->context; 472 + } 473 + 474 + static void persistent_fraction_full(struct dm_exception_store *store, 475 + sector_t *numerator, sector_t *denominator) 476 + { 477 + *numerator = get_info(store)->next_free * store->snap->chunk_size; 478 + *denominator = get_dev_size(store->snap->cow->bdev); 479 + } 480 + 481 + static void persistent_destroy(struct dm_exception_store *store) 482 + { 483 + struct pstore *ps = get_info(store); 484 + 485 + destroy_workqueue(ps->metadata_wq); 486 + dm_io_client_destroy(ps->io_client); 487 + vfree(ps->callbacks); 488 + free_area(ps); 489 + kfree(ps); 490 + } 491 + 492 + static int persistent_read_metadata(struct dm_exception_store *store, 493 + int (*callback)(void *callback_context, 494 + chunk_t old, chunk_t new), 495 + void *callback_context) 496 + { 497 + int r, uninitialized_var(new_snapshot); 498 + struct pstore *ps = get_info(store); 499 + 500 + /* 501 + * Read the snapshot header. 502 + */ 503 + r = read_header(ps, &new_snapshot); 504 + if (r) 505 + return r; 506 + 507 + /* 508 + * Now we know correct chunk_size, complete the initialisation. 509 + */ 510 + ps->exceptions_per_area = (ps->snap->chunk_size << SECTOR_SHIFT) / 511 + sizeof(struct disk_exception); 512 + ps->callbacks = dm_vcalloc(ps->exceptions_per_area, 513 + sizeof(*ps->callbacks)); 514 + if (!ps->callbacks) 515 + return -ENOMEM; 516 + 517 + /* 518 + * Do we need to setup a new snapshot ? 519 + */ 520 + if (new_snapshot) { 521 + r = write_header(ps); 522 + if (r) { 523 + DMWARN("write_header failed"); 524 + return r; 525 + } 526 + 527 + ps->current_area = 0; 528 + zero_memory_area(ps); 529 + r = zero_disk_area(ps, 0); 530 + if (r) { 531 + DMWARN("zero_disk_area(0) failed"); 532 + return r; 533 + } 534 + } else { 535 + /* 536 + * Sanity checks. 537 + */ 538 + if (ps->version != SNAPSHOT_DISK_VERSION) { 539 + DMWARN("unable to handle snapshot disk version %d", 540 + ps->version); 541 + return -EINVAL; 542 + } 543 + 544 + /* 545 + * Metadata are valid, but snapshot is invalidated 546 + */ 547 + if (!ps->valid) 548 + return 1; 549 + 550 + /* 551 + * Read the metadata. 552 + */ 553 + r = read_exceptions(ps, callback, callback_context); 554 + if (r) 555 + return r; 556 + } 557 + 558 + return 0; 559 + } 560 + 561 + static int persistent_prepare_exception(struct dm_exception_store *store, 562 + struct dm_snap_exception *e) 563 + { 564 + struct pstore *ps = get_info(store); 565 + uint32_t stride; 566 + chunk_t next_free; 567 + sector_t size = get_dev_size(store->snap->cow->bdev); 568 + 569 + /* Is there enough room ? */ 570 + if (size < ((ps->next_free + 1) * store->snap->chunk_size)) 571 + return -ENOSPC; 572 + 573 + e->new_chunk = ps->next_free; 574 + 575 + /* 576 + * Move onto the next free pending, making sure to take 577 + * into account the location of the metadata chunks. 578 + */ 579 + stride = (ps->exceptions_per_area + 1); 580 + next_free = ++ps->next_free; 581 + if (sector_div(next_free, stride) == 1) 582 + ps->next_free++; 583 + 584 + atomic_inc(&ps->pending_count); 585 + return 0; 586 + } 587 + 588 + static void persistent_commit_exception(struct dm_exception_store *store, 589 + struct dm_snap_exception *e, 590 + void (*callback) (void *, int success), 591 + void *callback_context) 592 + { 593 + unsigned int i; 594 + struct pstore *ps = get_info(store); 595 + struct disk_exception de; 596 + struct commit_callback *cb; 597 + 598 + de.old_chunk = e->old_chunk; 599 + de.new_chunk = e->new_chunk; 600 + write_exception(ps, ps->current_committed++, &de); 601 + 602 + /* 603 + * Add the callback to the back of the array. This code 604 + * is the only place where the callback array is 605 + * manipulated, and we know that it will never be called 606 + * multiple times concurrently. 607 + */ 608 + cb = ps->callbacks + ps->callback_count++; 609 + cb->callback = callback; 610 + cb->context = callback_context; 611 + 612 + /* 613 + * If there are exceptions in flight and we have not yet 614 + * filled this metadata area there's nothing more to do. 615 + */ 616 + if (!atomic_dec_and_test(&ps->pending_count) && 617 + (ps->current_committed != ps->exceptions_per_area)) 618 + return; 619 + 620 + /* 621 + * If we completely filled the current area, then wipe the next one. 622 + */ 623 + if ((ps->current_committed == ps->exceptions_per_area) && 624 + zero_disk_area(ps, ps->current_area + 1)) 625 + ps->valid = 0; 626 + 627 + /* 628 + * Commit exceptions to disk. 629 + */ 630 + if (ps->valid && area_io(ps, WRITE)) 631 + ps->valid = 0; 632 + 633 + /* 634 + * Advance to the next area if this one is full. 635 + */ 636 + if (ps->current_committed == ps->exceptions_per_area) { 637 + ps->current_committed = 0; 638 + ps->current_area++; 639 + zero_memory_area(ps); 640 + } 641 + 642 + for (i = 0; i < ps->callback_count; i++) { 643 + cb = ps->callbacks + i; 644 + cb->callback(cb->context, ps->valid); 645 + } 646 + 647 + ps->callback_count = 0; 648 + } 649 + 650 + static void persistent_drop_snapshot(struct dm_exception_store *store) 651 + { 652 + struct pstore *ps = get_info(store); 653 + 654 + ps->valid = 0; 655 + if (write_header(ps)) 656 + DMWARN("write header failed"); 657 + } 658 + 659 + int dm_create_persistent(struct dm_exception_store *store) 660 + { 661 + struct pstore *ps; 662 + 663 + /* allocate the pstore */ 664 + ps = kmalloc(sizeof(*ps), GFP_KERNEL); 665 + if (!ps) 666 + return -ENOMEM; 667 + 668 + ps->snap = store->snap; 669 + ps->valid = 1; 670 + ps->version = SNAPSHOT_DISK_VERSION; 671 + ps->area = NULL; 672 + ps->next_free = 2; /* skipping the header and first area */ 673 + ps->current_committed = 0; 674 + 675 + ps->callback_count = 0; 676 + atomic_set(&ps->pending_count, 0); 677 + ps->callbacks = NULL; 678 + 679 + ps->metadata_wq = create_singlethread_workqueue("ksnaphd"); 680 + if (!ps->metadata_wq) { 681 + kfree(ps); 682 + DMERR("couldn't start header metadata update thread"); 683 + return -ENOMEM; 684 + } 685 + 686 + store->destroy = persistent_destroy; 687 + store->read_metadata = persistent_read_metadata; 688 + store->prepare_exception = persistent_prepare_exception; 689 + store->commit_exception = persistent_commit_exception; 690 + store->drop_snapshot = persistent_drop_snapshot; 691 + store->fraction_full = persistent_fraction_full; 692 + store->context = ps; 693 + 694 + return 0; 695 + } 696 + 697 + int dm_persistent_snapshot_init(void) 698 + { 699 + return 0; 700 + } 701 + 702 + void dm_persistent_snapshot_exit(void) 703 + { 704 + }

+98

drivers/md/dm-snap-transient.c

···

··· 1 + /* 2 + * Copyright (C) 2001-2002 Sistina Software (UK) Limited. 3 + * Copyright (C) 2006-2008 Red Hat GmbH 4 + * 5 + * This file is released under the GPL. 6 + */ 7 + 8 + #include "dm-exception-store.h" 9 + #include "dm-snap.h" 10 + 11 + #include <linux/mm.h> 12 + #include <linux/pagemap.h> 13 + #include <linux/vmalloc.h> 14 + #include <linux/slab.h> 15 + #include <linux/dm-io.h> 16 + 17 + #define DM_MSG_PREFIX "transient snapshot" 18 + 19 + /*----------------------------------------------------------------- 20 + * Implementation of the store for non-persistent snapshots. 21 + *---------------------------------------------------------------*/ 22 + struct transient_c { 23 + sector_t next_free; 24 + }; 25 + 26 + static void transient_destroy(struct dm_exception_store *store) 27 + { 28 + kfree(store->context); 29 + } 30 + 31 + static int transient_read_metadata(struct dm_exception_store *store, 32 + int (*callback)(void *callback_context, 33 + chunk_t old, chunk_t new), 34 + void *callback_context) 35 + { 36 + return 0; 37 + } 38 + 39 + static int transient_prepare_exception(struct dm_exception_store *store, 40 + struct dm_snap_exception *e) 41 + { 42 + struct transient_c *tc = (struct transient_c *) store->context; 43 + sector_t size = get_dev_size(store->snap->cow->bdev); 44 + 45 + if (size < (tc->next_free + store->snap->chunk_size)) 46 + return -1; 47 + 48 + e->new_chunk = sector_to_chunk(store->snap, tc->next_free); 49 + tc->next_free += store->snap->chunk_size; 50 + 51 + return 0; 52 + } 53 + 54 + static void transient_commit_exception(struct dm_exception_store *store, 55 + struct dm_snap_exception *e, 56 + void (*callback) (void *, int success), 57 + void *callback_context) 58 + { 59 + /* Just succeed */ 60 + callback(callback_context, 1); 61 + } 62 + 63 + static void transient_fraction_full(struct dm_exception_store *store, 64 + sector_t *numerator, sector_t *denominator) 65 + { 66 + *numerator = ((struct transient_c *) store->context)->next_free; 67 + *denominator = get_dev_size(store->snap->cow->bdev); 68 + } 69 + 70 + int dm_create_transient(struct dm_exception_store *store) 71 + { 72 + struct transient_c *tc; 73 + 74 + store->destroy = transient_destroy; 75 + store->read_metadata = transient_read_metadata; 76 + store->prepare_exception = transient_prepare_exception; 77 + store->commit_exception = transient_commit_exception; 78 + store->drop_snapshot = NULL; 79 + store->fraction_full = transient_fraction_full; 80 + 81 + tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL); 82 + if (!tc) 83 + return -ENOMEM; 84 + 85 + tc->next_free = 0; 86 + store->context = tc; 87 + 88 + return 0; 89 + } 90 + 91 + int dm_transient_snapshot_init(void) 92 + { 93 + return 0; 94 + } 95 + 96 + void dm_transient_snapshot_exit(void) 97 + { 98 + }

+28 -20

drivers/md/dm-snap.c

··· 9 #include <linux/blkdev.h> 10 #include <linux/ctype.h> 11 #include <linux/device-mapper.h> 12 #include <linux/fs.h> 13 #include <linux/init.h> 14 #include <linux/kdev_t.h> ··· 21 #include <linux/log2.h> 22 #include <linux/dm-kcopyd.h> 23 24 #include "dm-snap.h" 25 #include "dm-bio-list.h" 26 ··· 430 list_add(&new_e->hash_list, e ? &e->hash_list : l); 431 } 432 433 - int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new) 434 { 435 struct dm_snap_exception *e; 436 437 e = alloc_exception(); ··· 665 spin_lock_init(&s->tracked_chunk_lock); 666 667 /* Metadata must only be loaded into one table at once */ 668 - r = s->store.read_metadata(&s->store); 669 if (r < 0) { 670 ti->error = "Failed to read snapshot metadata"; 671 goto bad_load_and_register; ··· 742 unregister_snapshot(s); 743 744 while (atomic_read(&s->pending_exceptions_count)) 745 - yield(); 746 /* 747 * Ensure instructions in mempool_destroy aren't reordered 748 * before atomic_read. ··· 895 896 /* 897 * Check for conflicting reads. This is extremely improbable, 898 - * so yield() is sufficient and there is no need for a wait queue. 899 */ 900 while (__chunk_is_tracked(s, pe->e.old_chunk)) 901 - yield(); 902 903 /* 904 * Add a proper exception, and remove the ··· 1411 { 1412 int r; 1413 1414 r = dm_register_target(&snapshot_target); 1415 if (r) { 1416 DMERR("snapshot target register failed %d", r); ··· 1465 1466 return 0; 1467 1468 - bad_pending_pool: 1469 kmem_cache_destroy(tracked_chunk_cache); 1470 - bad5: 1471 kmem_cache_destroy(pending_cache); 1472 - bad4: 1473 kmem_cache_destroy(exception_cache); 1474 - bad3: 1475 exit_origin_hash(); 1476 - bad2: 1477 dm_unregister_target(&origin_target); 1478 - bad1: 1479 dm_unregister_target(&snapshot_target); 1480 return r; 1481 } 1482 1483 static void __exit dm_snapshot_exit(void) 1484 { 1485 - int r; 1486 - 1487 destroy_workqueue(ksnapd); 1488 1489 - r = dm_unregister_target(&snapshot_target); 1490 - if (r) 1491 - DMERR("snapshot unregister failed %d", r); 1492 - 1493 - r = dm_unregister_target(&origin_target); 1494 - if (r) 1495 - DMERR("origin unregister failed %d", r); 1496 1497 exit_origin_hash(); 1498 kmem_cache_destroy(pending_cache); 1499 kmem_cache_destroy(exception_cache); 1500 kmem_cache_destroy(tracked_chunk_cache); 1501 } 1502 1503 /* Module hooks */

··· 9 #include <linux/blkdev.h> 10 #include <linux/ctype.h> 11 #include <linux/device-mapper.h> 12 + #include <linux/delay.h> 13 #include <linux/fs.h> 14 #include <linux/init.h> 15 #include <linux/kdev_t.h> ··· 20 #include <linux/log2.h> 21 #include <linux/dm-kcopyd.h> 22 23 + #include "dm-exception-store.h" 24 #include "dm-snap.h" 25 #include "dm-bio-list.h" 26 ··· 428 list_add(&new_e->hash_list, e ? &e->hash_list : l); 429 } 430 431 + /* 432 + * Callback used by the exception stores to load exceptions when 433 + * initialising. 434 + */ 435 + static int dm_add_exception(void *context, chunk_t old, chunk_t new) 436 { 437 + struct dm_snapshot *s = context; 438 struct dm_snap_exception *e; 439 440 e = alloc_exception(); ··· 658 spin_lock_init(&s->tracked_chunk_lock); 659 660 /* Metadata must only be loaded into one table at once */ 661 + r = s->store.read_metadata(&s->store, dm_add_exception, (void *)s); 662 if (r < 0) { 663 ti->error = "Failed to read snapshot metadata"; 664 goto bad_load_and_register; ··· 735 unregister_snapshot(s); 736 737 while (atomic_read(&s->pending_exceptions_count)) 738 + msleep(1); 739 /* 740 * Ensure instructions in mempool_destroy aren't reordered 741 * before atomic_read. ··· 888 889 /* 890 * Check for conflicting reads. This is extremely improbable, 891 + * so msleep(1) is sufficient and there is no need for a wait queue. 892 */ 893 while (__chunk_is_tracked(s, pe->e.old_chunk)) 894 + msleep(1); 895 896 /* 897 * Add a proper exception, and remove the ··· 1404 { 1405 int r; 1406 1407 + r = dm_exception_store_init(); 1408 + if (r) { 1409 + DMERR("Failed to initialize exception stores"); 1410 + return r; 1411 + } 1412 + 1413 r = dm_register_target(&snapshot_target); 1414 if (r) { 1415 DMERR("snapshot target register failed %d", r); ··· 1452 1453 return 0; 1454 1455 + bad_pending_pool: 1456 kmem_cache_destroy(tracked_chunk_cache); 1457 + bad5: 1458 kmem_cache_destroy(pending_cache); 1459 + bad4: 1460 kmem_cache_destroy(exception_cache); 1461 + bad3: 1462 exit_origin_hash(); 1463 + bad2: 1464 dm_unregister_target(&origin_target); 1465 + bad1: 1466 dm_unregister_target(&snapshot_target); 1467 return r; 1468 } 1469 1470 static void __exit dm_snapshot_exit(void) 1471 { 1472 destroy_workqueue(ksnapd); 1473 1474 + dm_unregister_target(&snapshot_target); 1475 + dm_unregister_target(&origin_target); 1476 1477 exit_origin_hash(); 1478 kmem_cache_destroy(pending_cache); 1479 kmem_cache_destroy(exception_cache); 1480 kmem_cache_destroy(tracked_chunk_cache); 1481 + 1482 + dm_exception_store_exit(); 1483 } 1484 1485 /* Module hooks */

+2 -127

drivers/md/dm-snap.h

··· 1 /* 2 - * dm-snapshot.c 3 - * 4 * Copyright (C) 2001-2002 Sistina Software (UK) Limited. 5 * 6 * This file is released under the GPL. ··· 8 #define DM_SNAPSHOT_H 9 10 #include <linux/device-mapper.h> 11 #include "dm-bio-list.h" 12 #include <linux/blkdev.h> 13 #include <linux/workqueue.h> ··· 17 uint32_t hash_mask; 18 unsigned hash_shift; 19 struct list_head *table; 20 - }; 21 - 22 - /* 23 - * The snapshot code deals with largish chunks of the disk at a 24 - * time. Typically 32k - 512k. 25 - */ 26 - typedef sector_t chunk_t; 27 - 28 - /* 29 - * An exception is used where an old chunk of data has been 30 - * replaced by a new one. 31 - * If chunk_t is 64 bits in size, the top 8 bits of new_chunk hold the number 32 - * of chunks that follow contiguously. Remaining bits hold the number of the 33 - * chunk within the device. 34 - */ 35 - struct dm_snap_exception { 36 - struct list_head hash_list; 37 - 38 - chunk_t old_chunk; 39 - chunk_t new_chunk; 40 - }; 41 - 42 - /* 43 - * Funtions to manipulate consecutive chunks 44 - */ 45 - # if defined(CONFIG_LBD) || (BITS_PER_LONG == 64) 46 - # define DM_CHUNK_CONSECUTIVE_BITS 8 47 - # define DM_CHUNK_NUMBER_BITS 56 48 - 49 - static inline chunk_t dm_chunk_number(chunk_t chunk) 50 - { 51 - return chunk & (chunk_t)((1ULL << DM_CHUNK_NUMBER_BITS) - 1ULL); 52 - } 53 - 54 - static inline unsigned dm_consecutive_chunk_count(struct dm_snap_exception *e) 55 - { 56 - return e->new_chunk >> DM_CHUNK_NUMBER_BITS; 57 - } 58 - 59 - static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e) 60 - { 61 - e->new_chunk += (1ULL << DM_CHUNK_NUMBER_BITS); 62 - 63 - BUG_ON(!dm_consecutive_chunk_count(e)); 64 - } 65 - 66 - # else 67 - # define DM_CHUNK_CONSECUTIVE_BITS 0 68 - 69 - static inline chunk_t dm_chunk_number(chunk_t chunk) 70 - { 71 - return chunk; 72 - } 73 - 74 - static inline unsigned dm_consecutive_chunk_count(struct dm_snap_exception *e) 75 - { 76 - return 0; 77 - } 78 - 79 - static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e) 80 - { 81 - } 82 - 83 - # endif 84 - 85 - /* 86 - * Abstraction to handle the meta/layout of exception stores (the 87 - * COW device). 88 - */ 89 - struct exception_store { 90 - 91 - /* 92 - * Destroys this object when you've finished with it. 93 - */ 94 - void (*destroy) (struct exception_store *store); 95 - 96 - /* 97 - * The target shouldn't read the COW device until this is 98 - * called. 99 - */ 100 - int (*read_metadata) (struct exception_store *store); 101 - 102 - /* 103 - * Find somewhere to store the next exception. 104 - */ 105 - int (*prepare_exception) (struct exception_store *store, 106 - struct dm_snap_exception *e); 107 - 108 - /* 109 - * Update the metadata with this exception. 110 - */ 111 - void (*commit_exception) (struct exception_store *store, 112 - struct dm_snap_exception *e, 113 - void (*callback) (void *, int success), 114 - void *callback_context); 115 - 116 - /* 117 - * The snapshot is invalid, note this in the metadata. 118 - */ 119 - void (*drop_snapshot) (struct exception_store *store); 120 - 121 - /* 122 - * Return how full the snapshot is. 123 - */ 124 - void (*fraction_full) (struct exception_store *store, 125 - sector_t *numerator, 126 - sector_t *denominator); 127 - 128 - struct dm_snapshot *snap; 129 - void *context; 130 }; 131 132 #define DM_TRACKED_CHUNK_HASH_SIZE 16 ··· 61 spinlock_t pe_lock; 62 63 /* The on disk metadata handler */ 64 - struct exception_store store; 65 66 struct dm_kcopyd_client *kcopyd_client; 67 ··· 74 spinlock_t tracked_chunk_lock; 75 struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE]; 76 }; 77 - 78 - /* 79 - * Used by the exception stores to load exceptions hen 80 - * initialising. 81 - */ 82 - int dm_add_exception(struct dm_snapshot *s, chunk_t old, chunk_t new); 83 - 84 - /* 85 - * Constructor and destructor for the default persistent 86 - * store. 87 - */ 88 - int dm_create_persistent(struct exception_store *store); 89 - 90 - int dm_create_transient(struct exception_store *store); 91 92 /* 93 * Return the number of sectors in the device.

··· 1 /* 2 * Copyright (C) 2001-2002 Sistina Software (UK) Limited. 3 * 4 * This file is released under the GPL. ··· 10 #define DM_SNAPSHOT_H 11 12 #include <linux/device-mapper.h> 13 + #include "dm-exception-store.h" 14 #include "dm-bio-list.h" 15 #include <linux/blkdev.h> 16 #include <linux/workqueue.h> ··· 18 uint32_t hash_mask; 19 unsigned hash_shift; 20 struct list_head *table; 21 }; 22 23 #define DM_TRACKED_CHUNK_HASH_SIZE 16 ··· 172 spinlock_t pe_lock; 173 174 /* The on disk metadata handler */ 175 + struct dm_exception_store store; 176 177 struct dm_kcopyd_client *kcopyd_client; 178 ··· 185 spinlock_t tracked_chunk_lock; 186 struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE]; 187 }; 188 189 /* 190 * Return the number of sectors in the device.

+1 -3

drivers/md/dm-stripe.c

··· 337 338 void dm_stripe_exit(void) 339 { 340 - if (dm_unregister_target(&stripe_target)) 341 - DMWARN("target unregistration failed"); 342 - 343 destroy_workqueue(kstriped); 344 345 return;

··· 337 338 void dm_stripe_exit(void) 339 { 340 + dm_unregister_target(&stripe_target); 341 destroy_workqueue(kstriped); 342 343 return;

+99

drivers/md/dm-sysfs.c

···

··· 1 + /* 2 + * Copyright (C) 2008 Red Hat, Inc. All rights reserved. 3 + * 4 + * This file is released under the GPL. 5 + */ 6 + 7 + #include <linux/sysfs.h> 8 + #include <linux/dm-ioctl.h> 9 + #include "dm.h" 10 + 11 + struct dm_sysfs_attr { 12 + struct attribute attr; 13 + ssize_t (*show)(struct mapped_device *, char *); 14 + ssize_t (*store)(struct mapped_device *, char *); 15 + }; 16 + 17 + #define DM_ATTR_RO(_name) \ 18 + struct dm_sysfs_attr dm_attr_##_name = \ 19 + __ATTR(_name, S_IRUGO, dm_attr_##_name##_show, NULL) 20 + 21 + static ssize_t dm_attr_show(struct kobject *kobj, struct attribute *attr, 22 + char *page) 23 + { 24 + struct dm_sysfs_attr *dm_attr; 25 + struct mapped_device *md; 26 + ssize_t ret; 27 + 28 + dm_attr = container_of(attr, struct dm_sysfs_attr, attr); 29 + if (!dm_attr->show) 30 + return -EIO; 31 + 32 + md = dm_get_from_kobject(kobj); 33 + if (!md) 34 + return -EINVAL; 35 + 36 + ret = dm_attr->show(md, page); 37 + dm_put(md); 38 + 39 + return ret; 40 + } 41 + 42 + static ssize_t dm_attr_name_show(struct mapped_device *md, char *buf) 43 + { 44 + if (dm_copy_name_and_uuid(md, buf, NULL)) 45 + return -EIO; 46 + 47 + strcat(buf, "\n"); 48 + return strlen(buf); 49 + } 50 + 51 + static ssize_t dm_attr_uuid_show(struct mapped_device *md, char *buf) 52 + { 53 + if (dm_copy_name_and_uuid(md, NULL, buf)) 54 + return -EIO; 55 + 56 + strcat(buf, "\n"); 57 + return strlen(buf); 58 + } 59 + 60 + static DM_ATTR_RO(name); 61 + static DM_ATTR_RO(uuid); 62 + 63 + static struct attribute *dm_attrs[] = { 64 + &dm_attr_name.attr, 65 + &dm_attr_uuid.attr, 66 + NULL, 67 + }; 68 + 69 + static struct sysfs_ops dm_sysfs_ops = { 70 + .show = dm_attr_show, 71 + }; 72 + 73 + /* 74 + * dm kobject is embedded in mapped_device structure 75 + * no need to define release function here 76 + */ 77 + static struct kobj_type dm_ktype = { 78 + .sysfs_ops = &dm_sysfs_ops, 79 + .default_attrs = dm_attrs, 80 + }; 81 + 82 + /* 83 + * Initialize kobj 84 + * because nobody using md yet, no need to call explicit dm_get/put 85 + */ 86 + int dm_sysfs_init(struct mapped_device *md) 87 + { 88 + return kobject_init_and_add(dm_kobject(md), &dm_ktype, 89 + &disk_to_dev(dm_disk(md))->kobj, 90 + "%s", "dm"); 91 + } 92 + 93 + /* 94 + * Remove kobj, called after all references removed 95 + */ 96 + void dm_sysfs_exit(struct mapped_device *md) 97 + { 98 + kobject_put(dm_kobject(md)); 99 + }

+42 -5

drivers/md/dm-table.c

··· 1 /* 2 * Copyright (C) 2001 Sistina Software (UK) Limited. 3 - * Copyright (C) 2004 Red Hat, Inc. All rights reserved. 4 * 5 * This file is released under the GPL. 6 */ ··· 15 #include <linux/slab.h> 16 #include <linux/interrupt.h> 17 #include <linux/mutex.h> 18 #include <asm/atomic.h> 19 20 #define DM_MSG_PREFIX "table" ··· 24 #define NODE_SIZE L1_CACHE_BYTES 25 #define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t)) 26 #define CHILDREN_PER_NODE (KEYS_PER_NODE + 1) 27 28 struct dm_table { 29 struct mapped_device *md; ··· 51 unsigned int num_allocated; 52 sector_t *highs; 53 struct dm_target *targets; 54 55 /* 56 * Indicates the rw permissions for the new logical ··· 242 return -ENOMEM; 243 244 INIT_LIST_HEAD(&t->devices); 245 - atomic_set(&t->holders, 1); 246 247 if (!num_targets) 248 num_targets = KEYS_PER_NODE; ··· 273 } 274 } 275 276 - static void table_destroy(struct dm_table *t) 277 { 278 unsigned int i; 279 280 /* free the indexes (see dm_table_complete) */ 281 if (t->depth >= 2) ··· 318 if (!t) 319 return; 320 321 - if (atomic_dec_and_test(&t->holders)) 322 - table_destroy(t); 323 } 324 325 /* ··· 749 /* FIXME: the plan is to combine high here and then have 750 * the merge fn apply the target level restrictions. */ 751 combine_restrictions_low(&t->limits, &tgt->limits); 752 return 0; 753 754 bad: ··· 796 unsigned int leaf_nodes; 797 798 check_for_valid_limits(&t->limits); 799 800 /* how many indexes will the btree have ? */ 801 leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE); ··· 1016 1017 return t->md; 1018 } 1019 1020 EXPORT_SYMBOL(dm_vcalloc); 1021 EXPORT_SYMBOL(dm_get_device);

··· 1 /* 2 * Copyright (C) 2001 Sistina Software (UK) Limited. 3 + * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 4 * 5 * This file is released under the GPL. 6 */ ··· 15 #include <linux/slab.h> 16 #include <linux/interrupt.h> 17 #include <linux/mutex.h> 18 + #include <linux/delay.h> 19 #include <asm/atomic.h> 20 21 #define DM_MSG_PREFIX "table" ··· 23 #define NODE_SIZE L1_CACHE_BYTES 24 #define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t)) 25 #define CHILDREN_PER_NODE (KEYS_PER_NODE + 1) 26 + 27 + /* 28 + * The table has always exactly one reference from either mapped_device->map 29 + * or hash_cell->new_map. This reference is not counted in table->holders. 30 + * A pair of dm_create_table/dm_destroy_table functions is used for table 31 + * creation/destruction. 32 + * 33 + * Temporary references from the other code increase table->holders. A pair 34 + * of dm_table_get/dm_table_put functions is used to manipulate it. 35 + * 36 + * When the table is about to be destroyed, we wait for table->holders to 37 + * drop to zero. 38 + */ 39 40 struct dm_table { 41 struct mapped_device *md; ··· 37 unsigned int num_allocated; 38 sector_t *highs; 39 struct dm_target *targets; 40 + 41 + unsigned barriers_supported:1; 42 43 /* 44 * Indicates the rw permissions for the new logical ··· 226 return -ENOMEM; 227 228 INIT_LIST_HEAD(&t->devices); 229 + atomic_set(&t->holders, 0); 230 + t->barriers_supported = 1; 231 232 if (!num_targets) 233 num_targets = KEYS_PER_NODE; ··· 256 } 257 } 258 259 + void dm_table_destroy(struct dm_table *t) 260 { 261 unsigned int i; 262 + 263 + while (atomic_read(&t->holders)) 264 + msleep(1); 265 + smp_mb(); 266 267 /* free the indexes (see dm_table_complete) */ 268 if (t->depth >= 2) ··· 297 if (!t) 298 return; 299 300 + smp_mb__before_atomic_dec(); 301 + atomic_dec(&t->holders); 302 } 303 304 /* ··· 728 /* FIXME: the plan is to combine high here and then have 729 * the merge fn apply the target level restrictions. */ 730 combine_restrictions_low(&t->limits, &tgt->limits); 731 + 732 + if (!(tgt->type->features & DM_TARGET_SUPPORTS_BARRIERS)) 733 + t->barriers_supported = 0; 734 + 735 return 0; 736 737 bad: ··· 771 unsigned int leaf_nodes; 772 773 check_for_valid_limits(&t->limits); 774 + 775 + /* 776 + * We only support barriers if there is exactly one underlying device. 777 + */ 778 + if (!list_is_singular(&t->devices)) 779 + t->barriers_supported = 0; 780 781 /* how many indexes will the btree have ? */ 782 leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE); ··· 985 986 return t->md; 987 } 988 + 989 + int dm_table_barrier_ok(struct dm_table *t) 990 + { 991 + return t->barriers_supported; 992 + } 993 + EXPORT_SYMBOL(dm_table_barrier_ok); 994 995 EXPORT_SYMBOL(dm_vcalloc); 996 EXPORT_SYMBOL(dm_get_device);

+7 -8

drivers/md/dm-target.c

··· 130 return rv; 131 } 132 133 - int dm_unregister_target(struct target_type *t) 134 { 135 struct tt_internal *ti; 136 137 down_write(&_lock); 138 if (!(ti = __find_target_type(t->name))) { 139 - up_write(&_lock); 140 - return -EINVAL; 141 } 142 143 if (ti->use) { 144 - up_write(&_lock); 145 - return -ETXTBSY; 146 } 147 148 list_del(&ti->list); 149 kfree(ti); 150 151 up_write(&_lock); 152 - return 0; 153 } 154 155 /* ··· 187 188 void dm_target_exit(void) 189 { 190 - if (dm_unregister_target(&error_target)) 191 - DMWARN("error target unregistration failed"); 192 } 193 194 EXPORT_SYMBOL(dm_register_target);

··· 130 return rv; 131 } 132 133 + void dm_unregister_target(struct target_type *t) 134 { 135 struct tt_internal *ti; 136 137 down_write(&_lock); 138 if (!(ti = __find_target_type(t->name))) { 139 + DMCRIT("Unregistering unrecognised target: %s", t->name); 140 + BUG(); 141 } 142 143 if (ti->use) { 144 + DMCRIT("Attempt to unregister target still in use: %s", 145 + t->name); 146 + BUG(); 147 } 148 149 list_del(&ti->list); 150 kfree(ti); 151 152 up_write(&_lock); 153 } 154 155 /* ··· 187 188 void dm_target_exit(void) 189 { 190 + dm_unregister_target(&error_target); 191 } 192 193 EXPORT_SYMBOL(dm_register_target);

+1 -4

drivers/md/dm-zero.c

··· 69 70 static void __exit dm_zero_exit(void) 71 { 72 - int r = dm_unregister_target(&zero_target); 73 - 74 - if (r < 0) 75 - DMERR("unregister failed %d", r); 76 } 77 78 module_init(dm_zero_init)

··· 69 70 static void __exit dm_zero_exit(void) 71 { 72 + dm_unregister_target(&zero_target); 73 } 74 75 module_init(dm_zero_init)

+80 -23

drivers/md/dm.c

··· 1 /* 2 * Copyright (C) 2001, 2002 Sistina Software (UK) Limited. 3 - * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. 4 * 5 * This file is released under the GPL. 6 */ ··· 32 33 static DEFINE_SPINLOCK(_minor_lock); 34 /* 35 * One of these is allocated per bio. 36 */ 37 struct dm_io { ··· 44 }; 45 46 /* 47 * One of these is allocated per target within a bio. Hopefully 48 * this will be simplified out one day. 49 */ ··· 55 }; 56 57 DEFINE_TRACE(block_bio_complete); 58 59 union map_info *dm_get_mapinfo(struct bio *bio) 60 { ··· 167 168 /* forced geometry settings */ 169 struct hd_geometry geometry; 170 }; 171 172 #define MIN_IOS 256 173 static struct kmem_cache *_io_cache; 174 static struct kmem_cache *_tio_cache; 175 176 static int __init local_init(void) 177 { ··· 192 if (!_tio_cache) 193 goto out_free_io_cache; 194 195 r = dm_uevent_init(); 196 if (r) 197 - goto out_free_tio_cache; 198 199 _major = major; 200 r = register_blkdev(_major, _name); ··· 216 217 out_uevent_exit: 218 dm_uevent_exit(); 219 out_free_tio_cache: 220 kmem_cache_destroy(_tio_cache); 221 out_free_io_cache: ··· 230 231 static void local_exit(void) 232 { 233 kmem_cache_destroy(_tio_cache); 234 kmem_cache_destroy(_io_cache); 235 unregister_blkdev(_major, _name); ··· 838 ci.map = dm_get_table(md); 839 if (unlikely(!ci.map)) 840 return -EIO; 841 - 842 ci.md = md; 843 ci.bio = bio; 844 ci.io = alloc_io(md); ··· 926 struct mapped_device *md = q->queuedata; 927 int cpu; 928 929 - /* 930 - * There is no use in forwarding any barrier request since we can't 931 - * guarantee it is (or can be) handled by the targets correctly. 932 - */ 933 - if (unlikely(bio_barrier(bio))) { 934 - bio_endio(bio, -EOPNOTSUPP); 935 - return 0; 936 - } 937 - 938 down_read(&md->io_lock); 939 940 cpu = part_stat_lock(); ··· 980 struct mapped_device *md = congested_data; 981 struct dm_table *map; 982 983 - atomic_inc(&md->pending); 984 - 985 if (!test_bit(DMF_BLOCK_IO, &md->flags)) { 986 map = dm_get_table(md); 987 if (map) { ··· 987 dm_table_put(map); 988 } 989 } 990 - 991 - if (!atomic_dec_return(&md->pending)) 992 - /* nudge anyone waiting on suspend queue */ 993 - wake_up(&md->wait); 994 995 return r; 996 } ··· 1247 1248 if (md->suspended_bdev) 1249 __set_size(md, size); 1250 - if (size == 0) 1251 - return 0; 1252 1253 - dm_table_get(t); 1254 dm_table_event_callback(t, event_callback, md); 1255 1256 write_lock(&md->map_lock); ··· 1274 write_lock(&md->map_lock); 1275 md->map = NULL; 1276 write_unlock(&md->map_lock); 1277 - dm_table_put(map); 1278 } 1279 1280 /* ··· 1287 md = alloc_dev(minor); 1288 if (!md) 1289 return -ENXIO; 1290 1291 *result = md; 1292 return 0; ··· 1365 dm_table_presuspend_targets(map); 1366 dm_table_postsuspend_targets(map); 1367 } 1368 - __unbind(md); 1369 dm_table_put(map); 1370 free_dev(md); 1371 } 1372 } ··· 1703 struct gendisk *dm_disk(struct mapped_device *md) 1704 { 1705 return md->disk; 1706 } 1707 1708 int dm_suspended(struct mapped_device *md)

··· 1 /* 2 * Copyright (C) 2001, 2002 Sistina Software (UK) Limited. 3 + * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 4 * 5 * This file is released under the GPL. 6 */ ··· 32 33 static DEFINE_SPINLOCK(_minor_lock); 34 /* 35 + * For bio-based dm. 36 * One of these is allocated per bio. 37 */ 38 struct dm_io { ··· 43 }; 44 45 /* 46 + * For bio-based dm. 47 * One of these is allocated per target within a bio. Hopefully 48 * this will be simplified out one day. 49 */ ··· 53 }; 54 55 DEFINE_TRACE(block_bio_complete); 56 + 57 + /* 58 + * For request-based dm. 59 + * One of these is allocated per request. 60 + */ 61 + struct dm_rq_target_io { 62 + struct mapped_device *md; 63 + struct dm_target *ti; 64 + struct request *orig, clone; 65 + int error; 66 + union map_info info; 67 + }; 68 + 69 + /* 70 + * For request-based dm. 71 + * One of these is allocated per bio. 72 + */ 73 + struct dm_rq_clone_bio_info { 74 + struct bio *orig; 75 + struct request *rq; 76 + }; 77 78 union map_info *dm_get_mapinfo(struct bio *bio) 79 { ··· 144 145 /* forced geometry settings */ 146 struct hd_geometry geometry; 147 + 148 + /* sysfs handle */ 149 + struct kobject kobj; 150 }; 151 152 #define MIN_IOS 256 153 static struct kmem_cache *_io_cache; 154 static struct kmem_cache *_tio_cache; 155 + static struct kmem_cache *_rq_tio_cache; 156 + static struct kmem_cache *_rq_bio_info_cache; 157 158 static int __init local_init(void) 159 { ··· 164 if (!_tio_cache) 165 goto out_free_io_cache; 166 167 + _rq_tio_cache = KMEM_CACHE(dm_rq_target_io, 0); 168 + if (!_rq_tio_cache) 169 + goto out_free_tio_cache; 170 + 171 + _rq_bio_info_cache = KMEM_CACHE(dm_rq_clone_bio_info, 0); 172 + if (!_rq_bio_info_cache) 173 + goto out_free_rq_tio_cache; 174 + 175 r = dm_uevent_init(); 176 if (r) 177 + goto out_free_rq_bio_info_cache; 178 179 _major = major; 180 r = register_blkdev(_major, _name); ··· 180 181 out_uevent_exit: 182 dm_uevent_exit(); 183 + out_free_rq_bio_info_cache: 184 + kmem_cache_destroy(_rq_bio_info_cache); 185 + out_free_rq_tio_cache: 186 + kmem_cache_destroy(_rq_tio_cache); 187 out_free_tio_cache: 188 kmem_cache_destroy(_tio_cache); 189 out_free_io_cache: ··· 190 191 static void local_exit(void) 192 { 193 + kmem_cache_destroy(_rq_bio_info_cache); 194 + kmem_cache_destroy(_rq_tio_cache); 195 kmem_cache_destroy(_tio_cache); 196 kmem_cache_destroy(_io_cache); 197 unregister_blkdev(_major, _name); ··· 796 ci.map = dm_get_table(md); 797 if (unlikely(!ci.map)) 798 return -EIO; 799 + if (unlikely(bio_barrier(bio) && !dm_table_barrier_ok(ci.map))) { 800 + dm_table_put(ci.map); 801 + bio_endio(bio, -EOPNOTSUPP); 802 + return 0; 803 + } 804 ci.md = md; 805 ci.bio = bio; 806 ci.io = alloc_io(md); ··· 880 struct mapped_device *md = q->queuedata; 881 int cpu; 882 883 down_read(&md->io_lock); 884 885 cpu = part_stat_lock(); ··· 943 struct mapped_device *md = congested_data; 944 struct dm_table *map; 945 946 if (!test_bit(DMF_BLOCK_IO, &md->flags)) { 947 map = dm_get_table(md); 948 if (map) { ··· 952 dm_table_put(map); 953 } 954 } 955 956 return r; 957 } ··· 1216 1217 if (md->suspended_bdev) 1218 __set_size(md, size); 1219 1220 + if (!size) { 1221 + dm_table_destroy(t); 1222 + return 0; 1223 + } 1224 + 1225 dm_table_event_callback(t, event_callback, md); 1226 1227 write_lock(&md->map_lock); ··· 1241 write_lock(&md->map_lock); 1242 md->map = NULL; 1243 write_unlock(&md->map_lock); 1244 + dm_table_destroy(map); 1245 } 1246 1247 /* ··· 1254 md = alloc_dev(minor); 1255 if (!md) 1256 return -ENXIO; 1257 + 1258 + dm_sysfs_init(md); 1259 1260 *result = md; 1261 return 0; ··· 1330 dm_table_presuspend_targets(map); 1331 dm_table_postsuspend_targets(map); 1332 } 1333 + dm_sysfs_exit(md); 1334 dm_table_put(map); 1335 + __unbind(md); 1336 free_dev(md); 1337 } 1338 } ··· 1667 struct gendisk *dm_disk(struct mapped_device *md) 1668 { 1669 return md->disk; 1670 + } 1671 + 1672 + struct kobject *dm_kobject(struct mapped_device *md) 1673 + { 1674 + return &md->kobj; 1675 + } 1676 + 1677 + /* 1678 + * struct mapped_device should not be exported outside of dm.c 1679 + * so use this check to verify that kobj is part of md structure 1680 + */ 1681 + struct mapped_device *dm_get_from_kobject(struct kobject *kobj) 1682 + { 1683 + struct mapped_device *md; 1684 + 1685 + md = container_of(kobj, struct mapped_device, kobj); 1686 + if (&md->kobj != kobj) 1687 + return NULL; 1688 + 1689 + dm_get(md); 1690 + return md; 1691 } 1692 1693 int dm_suspended(struct mapped_device *md)

+10

drivers/md/dm.h

··· 36 /*----------------------------------------------------------------- 37 * Internal table functions. 38 *---------------------------------------------------------------*/ 39 void dm_table_event_callback(struct dm_table *t, 40 void (*fn)(void *), void *context); 41 struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index); ··· 52 * To check the return value from dm_table_find_target(). 53 */ 54 #define dm_target_is_valid(t) ((t)->table) 55 56 /*----------------------------------------------------------------- 57 * A registry of target types. ··· 72 */ 73 int dm_interface_init(void); 74 void dm_interface_exit(void); 75 76 /* 77 * Targets for linear and striped mappings

··· 36 /*----------------------------------------------------------------- 37 * Internal table functions. 38 *---------------------------------------------------------------*/ 39 + void dm_table_destroy(struct dm_table *t); 40 void dm_table_event_callback(struct dm_table *t, 41 void (*fn)(void *), void *context); 42 struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index); ··· 51 * To check the return value from dm_table_find_target(). 52 */ 53 #define dm_target_is_valid(t) ((t)->table) 54 + int dm_table_barrier_ok(struct dm_table *t); 55 56 /*----------------------------------------------------------------- 57 * A registry of target types. ··· 70 */ 71 int dm_interface_init(void); 72 void dm_interface_exit(void); 73 + 74 + /* 75 + * sysfs interface 76 + */ 77 + int dm_sysfs_init(struct mapped_device *md); 78 + void dm_sysfs_exit(struct mapped_device *md); 79 + struct kobject *dm_kobject(struct mapped_device *md); 80 + struct mapped_device *dm_get_from_kobject(struct kobject *kobj); 81 82 /* 83 * Targets for linear and striped mappings

+26 -2

include/linux/device-mapper.h

··· 45 */ 46 typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio, 47 union map_info *map_context); 48 49 /* 50 * Returns: ··· 59 typedef int (*dm_endio_fn) (struct dm_target *ti, 60 struct bio *bio, int error, 61 union map_info *map_context); 62 63 typedef void (*dm_flush_fn) (struct dm_target *ti); 64 typedef void (*dm_presuspend_fn) (struct dm_target *ti); ··· 79 80 typedef int (*dm_merge_fn) (struct dm_target *ti, struct bvec_merge_data *bvm, 81 struct bio_vec *biovec, int max_size); 82 83 void dm_error(const char *message); 84 ··· 112 /* 113 * Information about a target type 114 */ 115 struct target_type { 116 const char *name; 117 struct module *module; 118 unsigned version[3]; 119 dm_ctr_fn ctr; 120 dm_dtr_fn dtr; 121 dm_map_fn map; 122 dm_endio_fn end_io; 123 dm_flush_fn flush; 124 dm_presuspend_fn presuspend; 125 dm_postsuspend_fn postsuspend; ··· 138 dm_message_fn message; 139 dm_ioctl_fn ioctl; 140 dm_merge_fn merge; 141 }; 142 143 struct io_restrictions { ··· 179 }; 180 181 int dm_register_target(struct target_type *t); 182 - int dm_unregister_target(struct target_type *t); 183 - 184 185 /*----------------------------------------------------------------- 186 * Functions for creating and manipulating mapped devices. ··· 296 * Macros. 297 *---------------------------------------------------------------*/ 298 #define DM_NAME "device-mapper" 299 300 #define DMERR(f, arg...) \ 301 printk(KERN_ERR DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg)

··· 45 */ 46 typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio, 47 union map_info *map_context); 48 + typedef int (*dm_map_request_fn) (struct dm_target *ti, struct request *clone, 49 + union map_info *map_context); 50 51 /* 52 * Returns: ··· 57 typedef int (*dm_endio_fn) (struct dm_target *ti, 58 struct bio *bio, int error, 59 union map_info *map_context); 60 + typedef int (*dm_request_endio_fn) (struct dm_target *ti, 61 + struct request *clone, int error, 62 + union map_info *map_context); 63 64 typedef void (*dm_flush_fn) (struct dm_target *ti); 65 typedef void (*dm_presuspend_fn) (struct dm_target *ti); ··· 74 75 typedef int (*dm_merge_fn) (struct dm_target *ti, struct bvec_merge_data *bvm, 76 struct bio_vec *biovec, int max_size); 77 + 78 + /* 79 + * Returns: 80 + * 0: The target can handle the next I/O immediately. 81 + * 1: The target can't handle the next I/O immediately. 82 + */ 83 + typedef int (*dm_busy_fn) (struct dm_target *ti); 84 85 void dm_error(const char *message); 86 ··· 100 /* 101 * Information about a target type 102 */ 103 + 104 + /* 105 + * Target features 106 + */ 107 + #define DM_TARGET_SUPPORTS_BARRIERS 0x00000001 108 + 109 struct target_type { 110 + uint64_t features; 111 const char *name; 112 struct module *module; 113 unsigned version[3]; 114 dm_ctr_fn ctr; 115 dm_dtr_fn dtr; 116 dm_map_fn map; 117 + dm_map_request_fn map_rq; 118 dm_endio_fn end_io; 119 + dm_request_endio_fn rq_end_io; 120 dm_flush_fn flush; 121 dm_presuspend_fn presuspend; 122 dm_postsuspend_fn postsuspend; ··· 117 dm_message_fn message; 118 dm_ioctl_fn ioctl; 119 dm_merge_fn merge; 120 + dm_busy_fn busy; 121 }; 122 123 struct io_restrictions { ··· 157 }; 158 159 int dm_register_target(struct target_type *t); 160 + void dm_unregister_target(struct target_type *t); 161 162 /*----------------------------------------------------------------- 163 * Functions for creating and manipulating mapped devices. ··· 275 * Macros. 276 *---------------------------------------------------------------*/ 277 #define DM_NAME "device-mapper" 278 + 279 + #define DMCRIT(f, arg...) \ 280 + printk(KERN_CRIT DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg) 281 282 #define DMERR(f, arg...) \ 283 printk(KERN_ERR DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg)