Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v2.6.18-rc1 1396 lines 28 kB view raw
1/* 2 * Copyright (C) 2001, 2002 Sistina Software (UK) Limited. 3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. 4 * 5 * This file is released under the GPL. 6 */ 7 8#include "dm.h" 9#include "dm-bio-list.h" 10 11#include <linux/init.h> 12#include <linux/module.h> 13#include <linux/mutex.h> 14#include <linux/moduleparam.h> 15#include <linux/blkpg.h> 16#include <linux/bio.h> 17#include <linux/buffer_head.h> 18#include <linux/mempool.h> 19#include <linux/slab.h> 20#include <linux/idr.h> 21#include <linux/hdreg.h> 22#include <linux/blktrace_api.h> 23 24#define DM_MSG_PREFIX "core" 25 26static const char *_name = DM_NAME; 27 28static unsigned int major = 0; 29static unsigned int _major = 0; 30 31static DEFINE_SPINLOCK(_minor_lock); 32/* 33 * One of these is allocated per bio. 34 */ 35struct dm_io { 36 struct mapped_device *md; 37 int error; 38 struct bio *bio; 39 atomic_t io_count; 40 unsigned long start_time; 41}; 42 43/* 44 * One of these is allocated per target within a bio. Hopefully 45 * this will be simplified out one day. 46 */ 47struct target_io { 48 struct dm_io *io; 49 struct dm_target *ti; 50 union map_info info; 51}; 52 53union map_info *dm_get_mapinfo(struct bio *bio) 54{ 55 if (bio && bio->bi_private) 56 return &((struct target_io *)bio->bi_private)->info; 57 return NULL; 58} 59 60#define MINOR_ALLOCED ((void *)-1) 61 62/* 63 * Bits for the md->flags field. 64 */ 65#define DMF_BLOCK_IO 0 66#define DMF_SUSPENDED 1 67#define DMF_FROZEN 2 68#define DMF_FREEING 3 69#define DMF_DELETING 4 70 71struct mapped_device { 72 struct rw_semaphore io_lock; 73 struct semaphore suspend_lock; 74 rwlock_t map_lock; 75 atomic_t holders; 76 atomic_t open_count; 77 78 unsigned long flags; 79 80 request_queue_t *queue; 81 struct gendisk *disk; 82 char name[16]; 83 84 void *interface_ptr; 85 86 /* 87 * A list of ios that arrived while we were suspended. 88 */ 89 atomic_t pending; 90 wait_queue_head_t wait; 91 struct bio_list deferred; 92 93 /* 94 * The current mapping. 95 */ 96 struct dm_table *map; 97 98 /* 99 * io objects are allocated from here. 100 */ 101 mempool_t *io_pool; 102 mempool_t *tio_pool; 103 104 /* 105 * Event handling. 106 */ 107 atomic_t event_nr; 108 wait_queue_head_t eventq; 109 110 /* 111 * freeze/thaw support require holding onto a super block 112 */ 113 struct super_block *frozen_sb; 114 struct block_device *suspended_bdev; 115 116 /* forced geometry settings */ 117 struct hd_geometry geometry; 118}; 119 120#define MIN_IOS 256 121static kmem_cache_t *_io_cache; 122static kmem_cache_t *_tio_cache; 123 124static struct bio_set *dm_set; 125 126static int __init local_init(void) 127{ 128 int r; 129 130 dm_set = bioset_create(16, 16, 4); 131 if (!dm_set) 132 return -ENOMEM; 133 134 /* allocate a slab for the dm_ios */ 135 _io_cache = kmem_cache_create("dm_io", 136 sizeof(struct dm_io), 0, 0, NULL, NULL); 137 if (!_io_cache) 138 return -ENOMEM; 139 140 /* allocate a slab for the target ios */ 141 _tio_cache = kmem_cache_create("dm_tio", sizeof(struct target_io), 142 0, 0, NULL, NULL); 143 if (!_tio_cache) { 144 kmem_cache_destroy(_io_cache); 145 return -ENOMEM; 146 } 147 148 _major = major; 149 r = register_blkdev(_major, _name); 150 if (r < 0) { 151 kmem_cache_destroy(_tio_cache); 152 kmem_cache_destroy(_io_cache); 153 return r; 154 } 155 156 if (!_major) 157 _major = r; 158 159 return 0; 160} 161 162static void local_exit(void) 163{ 164 kmem_cache_destroy(_tio_cache); 165 kmem_cache_destroy(_io_cache); 166 167 bioset_free(dm_set); 168 169 if (unregister_blkdev(_major, _name) < 0) 170 DMERR("unregister_blkdev failed"); 171 172 _major = 0; 173 174 DMINFO("cleaned up"); 175} 176 177int (*_inits[])(void) __initdata = { 178 local_init, 179 dm_target_init, 180 dm_linear_init, 181 dm_stripe_init, 182 dm_interface_init, 183}; 184 185void (*_exits[])(void) = { 186 local_exit, 187 dm_target_exit, 188 dm_linear_exit, 189 dm_stripe_exit, 190 dm_interface_exit, 191}; 192 193static int __init dm_init(void) 194{ 195 const int count = ARRAY_SIZE(_inits); 196 197 int r, i; 198 199 for (i = 0; i < count; i++) { 200 r = _inits[i](); 201 if (r) 202 goto bad; 203 } 204 205 return 0; 206 207 bad: 208 while (i--) 209 _exits[i](); 210 211 return r; 212} 213 214static void __exit dm_exit(void) 215{ 216 int i = ARRAY_SIZE(_exits); 217 218 while (i--) 219 _exits[i](); 220} 221 222/* 223 * Block device functions 224 */ 225static int dm_blk_open(struct inode *inode, struct file *file) 226{ 227 struct mapped_device *md; 228 229 spin_lock(&_minor_lock); 230 231 md = inode->i_bdev->bd_disk->private_data; 232 if (!md) 233 goto out; 234 235 if (test_bit(DMF_FREEING, &md->flags) || 236 test_bit(DMF_DELETING, &md->flags)) { 237 md = NULL; 238 goto out; 239 } 240 241 dm_get(md); 242 atomic_inc(&md->open_count); 243 244out: 245 spin_unlock(&_minor_lock); 246 247 return md ? 0 : -ENXIO; 248} 249 250static int dm_blk_close(struct inode *inode, struct file *file) 251{ 252 struct mapped_device *md; 253 254 md = inode->i_bdev->bd_disk->private_data; 255 atomic_dec(&md->open_count); 256 dm_put(md); 257 return 0; 258} 259 260int dm_open_count(struct mapped_device *md) 261{ 262 return atomic_read(&md->open_count); 263} 264 265/* 266 * Guarantees nothing is using the device before it's deleted. 267 */ 268int dm_lock_for_deletion(struct mapped_device *md) 269{ 270 int r = 0; 271 272 spin_lock(&_minor_lock); 273 274 if (dm_open_count(md)) 275 r = -EBUSY; 276 else 277 set_bit(DMF_DELETING, &md->flags); 278 279 spin_unlock(&_minor_lock); 280 281 return r; 282} 283 284static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo) 285{ 286 struct mapped_device *md = bdev->bd_disk->private_data; 287 288 return dm_get_geometry(md, geo); 289} 290 291static inline struct dm_io *alloc_io(struct mapped_device *md) 292{ 293 return mempool_alloc(md->io_pool, GFP_NOIO); 294} 295 296static inline void free_io(struct mapped_device *md, struct dm_io *io) 297{ 298 mempool_free(io, md->io_pool); 299} 300 301static inline struct target_io *alloc_tio(struct mapped_device *md) 302{ 303 return mempool_alloc(md->tio_pool, GFP_NOIO); 304} 305 306static inline void free_tio(struct mapped_device *md, struct target_io *tio) 307{ 308 mempool_free(tio, md->tio_pool); 309} 310 311static void start_io_acct(struct dm_io *io) 312{ 313 struct mapped_device *md = io->md; 314 315 io->start_time = jiffies; 316 317 preempt_disable(); 318 disk_round_stats(dm_disk(md)); 319 preempt_enable(); 320 dm_disk(md)->in_flight = atomic_inc_return(&md->pending); 321} 322 323static int end_io_acct(struct dm_io *io) 324{ 325 struct mapped_device *md = io->md; 326 struct bio *bio = io->bio; 327 unsigned long duration = jiffies - io->start_time; 328 int pending; 329 int rw = bio_data_dir(bio); 330 331 preempt_disable(); 332 disk_round_stats(dm_disk(md)); 333 preempt_enable(); 334 dm_disk(md)->in_flight = pending = atomic_dec_return(&md->pending); 335 336 disk_stat_add(dm_disk(md), ticks[rw], duration); 337 338 return !pending; 339} 340 341/* 342 * Add the bio to the list of deferred io. 343 */ 344static int queue_io(struct mapped_device *md, struct bio *bio) 345{ 346 down_write(&md->io_lock); 347 348 if (!test_bit(DMF_BLOCK_IO, &md->flags)) { 349 up_write(&md->io_lock); 350 return 1; 351 } 352 353 bio_list_add(&md->deferred, bio); 354 355 up_write(&md->io_lock); 356 return 0; /* deferred successfully */ 357} 358 359/* 360 * Everyone (including functions in this file), should use this 361 * function to access the md->map field, and make sure they call 362 * dm_table_put() when finished. 363 */ 364struct dm_table *dm_get_table(struct mapped_device *md) 365{ 366 struct dm_table *t; 367 368 read_lock(&md->map_lock); 369 t = md->map; 370 if (t) 371 dm_table_get(t); 372 read_unlock(&md->map_lock); 373 374 return t; 375} 376 377/* 378 * Get the geometry associated with a dm device 379 */ 380int dm_get_geometry(struct mapped_device *md, struct hd_geometry *geo) 381{ 382 *geo = md->geometry; 383 384 return 0; 385} 386 387/* 388 * Set the geometry of a device. 389 */ 390int dm_set_geometry(struct mapped_device *md, struct hd_geometry *geo) 391{ 392 sector_t sz = (sector_t)geo->cylinders * geo->heads * geo->sectors; 393 394 if (geo->start > sz) { 395 DMWARN("Start sector is beyond the geometry limits."); 396 return -EINVAL; 397 } 398 399 md->geometry = *geo; 400 401 return 0; 402} 403 404/*----------------------------------------------------------------- 405 * CRUD START: 406 * A more elegant soln is in the works that uses the queue 407 * merge fn, unfortunately there are a couple of changes to 408 * the block layer that I want to make for this. So in the 409 * interests of getting something for people to use I give 410 * you this clearly demarcated crap. 411 *---------------------------------------------------------------*/ 412 413/* 414 * Decrements the number of outstanding ios that a bio has been 415 * cloned into, completing the original io if necc. 416 */ 417static void dec_pending(struct dm_io *io, int error) 418{ 419 if (error) 420 io->error = error; 421 422 if (atomic_dec_and_test(&io->io_count)) { 423 if (end_io_acct(io)) 424 /* nudge anyone waiting on suspend queue */ 425 wake_up(&io->md->wait); 426 427 blk_add_trace_bio(io->md->queue, io->bio, BLK_TA_COMPLETE); 428 429 bio_endio(io->bio, io->bio->bi_size, io->error); 430 free_io(io->md, io); 431 } 432} 433 434static int clone_endio(struct bio *bio, unsigned int done, int error) 435{ 436 int r = 0; 437 struct target_io *tio = bio->bi_private; 438 struct dm_io *io = tio->io; 439 dm_endio_fn endio = tio->ti->type->end_io; 440 441 if (bio->bi_size) 442 return 1; 443 444 if (!bio_flagged(bio, BIO_UPTODATE) && !error) 445 error = -EIO; 446 447 if (endio) { 448 r = endio(tio->ti, bio, error, &tio->info); 449 if (r < 0) 450 error = r; 451 452 else if (r > 0) 453 /* the target wants another shot at the io */ 454 return 1; 455 } 456 457 free_tio(io->md, tio); 458 dec_pending(io, error); 459 bio_put(bio); 460 return r; 461} 462 463static sector_t max_io_len(struct mapped_device *md, 464 sector_t sector, struct dm_target *ti) 465{ 466 sector_t offset = sector - ti->begin; 467 sector_t len = ti->len - offset; 468 469 /* 470 * Does the target need to split even further ? 471 */ 472 if (ti->split_io) { 473 sector_t boundary; 474 boundary = ((offset + ti->split_io) & ~(ti->split_io - 1)) 475 - offset; 476 if (len > boundary) 477 len = boundary; 478 } 479 480 return len; 481} 482 483static void __map_bio(struct dm_target *ti, struct bio *clone, 484 struct target_io *tio) 485{ 486 int r; 487 sector_t sector; 488 489 /* 490 * Sanity checks. 491 */ 492 BUG_ON(!clone->bi_size); 493 494 clone->bi_end_io = clone_endio; 495 clone->bi_private = tio; 496 497 /* 498 * Map the clone. If r == 0 we don't need to do 499 * anything, the target has assumed ownership of 500 * this io. 501 */ 502 atomic_inc(&tio->io->io_count); 503 sector = clone->bi_sector; 504 r = ti->type->map(ti, clone, &tio->info); 505 if (r > 0) { 506 /* the bio has been remapped so dispatch it */ 507 508 blk_add_trace_remap(bdev_get_queue(clone->bi_bdev), clone, 509 tio->io->bio->bi_bdev->bd_dev, sector, 510 clone->bi_sector); 511 512 generic_make_request(clone); 513 } 514 515 else if (r < 0) { 516 /* error the io and bail out */ 517 struct dm_io *io = tio->io; 518 free_tio(tio->io->md, tio); 519 dec_pending(io, r); 520 bio_put(clone); 521 } 522} 523 524struct clone_info { 525 struct mapped_device *md; 526 struct dm_table *map; 527 struct bio *bio; 528 struct dm_io *io; 529 sector_t sector; 530 sector_t sector_count; 531 unsigned short idx; 532}; 533 534static void dm_bio_destructor(struct bio *bio) 535{ 536 bio_free(bio, dm_set); 537} 538 539/* 540 * Creates a little bio that is just does part of a bvec. 541 */ 542static struct bio *split_bvec(struct bio *bio, sector_t sector, 543 unsigned short idx, unsigned int offset, 544 unsigned int len) 545{ 546 struct bio *clone; 547 struct bio_vec *bv = bio->bi_io_vec + idx; 548 549 clone = bio_alloc_bioset(GFP_NOIO, 1, dm_set); 550 clone->bi_destructor = dm_bio_destructor; 551 *clone->bi_io_vec = *bv; 552 553 clone->bi_sector = sector; 554 clone->bi_bdev = bio->bi_bdev; 555 clone->bi_rw = bio->bi_rw; 556 clone->bi_vcnt = 1; 557 clone->bi_size = to_bytes(len); 558 clone->bi_io_vec->bv_offset = offset; 559 clone->bi_io_vec->bv_len = clone->bi_size; 560 561 return clone; 562} 563 564/* 565 * Creates a bio that consists of range of complete bvecs. 566 */ 567static struct bio *clone_bio(struct bio *bio, sector_t sector, 568 unsigned short idx, unsigned short bv_count, 569 unsigned int len) 570{ 571 struct bio *clone; 572 573 clone = bio_clone(bio, GFP_NOIO); 574 clone->bi_sector = sector; 575 clone->bi_idx = idx; 576 clone->bi_vcnt = idx + bv_count; 577 clone->bi_size = to_bytes(len); 578 clone->bi_flags &= ~(1 << BIO_SEG_VALID); 579 580 return clone; 581} 582 583static void __clone_and_map(struct clone_info *ci) 584{ 585 struct bio *clone, *bio = ci->bio; 586 struct dm_target *ti = dm_table_find_target(ci->map, ci->sector); 587 sector_t len = 0, max = max_io_len(ci->md, ci->sector, ti); 588 struct target_io *tio; 589 590 /* 591 * Allocate a target io object. 592 */ 593 tio = alloc_tio(ci->md); 594 tio->io = ci->io; 595 tio->ti = ti; 596 memset(&tio->info, 0, sizeof(tio->info)); 597 598 if (ci->sector_count <= max) { 599 /* 600 * Optimise for the simple case where we can do all of 601 * the remaining io with a single clone. 602 */ 603 clone = clone_bio(bio, ci->sector, ci->idx, 604 bio->bi_vcnt - ci->idx, ci->sector_count); 605 __map_bio(ti, clone, tio); 606 ci->sector_count = 0; 607 608 } else if (to_sector(bio->bi_io_vec[ci->idx].bv_len) <= max) { 609 /* 610 * There are some bvecs that don't span targets. 611 * Do as many of these as possible. 612 */ 613 int i; 614 sector_t remaining = max; 615 sector_t bv_len; 616 617 for (i = ci->idx; remaining && (i < bio->bi_vcnt); i++) { 618 bv_len = to_sector(bio->bi_io_vec[i].bv_len); 619 620 if (bv_len > remaining) 621 break; 622 623 remaining -= bv_len; 624 len += bv_len; 625 } 626 627 clone = clone_bio(bio, ci->sector, ci->idx, i - ci->idx, len); 628 __map_bio(ti, clone, tio); 629 630 ci->sector += len; 631 ci->sector_count -= len; 632 ci->idx = i; 633 634 } else { 635 /* 636 * Handle a bvec that must be split between two or more targets. 637 */ 638 struct bio_vec *bv = bio->bi_io_vec + ci->idx; 639 sector_t remaining = to_sector(bv->bv_len); 640 unsigned int offset = 0; 641 642 do { 643 if (offset) { 644 ti = dm_table_find_target(ci->map, ci->sector); 645 max = max_io_len(ci->md, ci->sector, ti); 646 647 tio = alloc_tio(ci->md); 648 tio->io = ci->io; 649 tio->ti = ti; 650 memset(&tio->info, 0, sizeof(tio->info)); 651 } 652 653 len = min(remaining, max); 654 655 clone = split_bvec(bio, ci->sector, ci->idx, 656 bv->bv_offset + offset, len); 657 658 __map_bio(ti, clone, tio); 659 660 ci->sector += len; 661 ci->sector_count -= len; 662 offset += to_bytes(len); 663 } while (remaining -= len); 664 665 ci->idx++; 666 } 667} 668 669/* 670 * Split the bio into several clones. 671 */ 672static void __split_bio(struct mapped_device *md, struct bio *bio) 673{ 674 struct clone_info ci; 675 676 ci.map = dm_get_table(md); 677 if (!ci.map) { 678 bio_io_error(bio, bio->bi_size); 679 return; 680 } 681 682 ci.md = md; 683 ci.bio = bio; 684 ci.io = alloc_io(md); 685 ci.io->error = 0; 686 atomic_set(&ci.io->io_count, 1); 687 ci.io->bio = bio; 688 ci.io->md = md; 689 ci.sector = bio->bi_sector; 690 ci.sector_count = bio_sectors(bio); 691 ci.idx = bio->bi_idx; 692 693 start_io_acct(ci.io); 694 while (ci.sector_count) 695 __clone_and_map(&ci); 696 697 /* drop the extra reference count */ 698 dec_pending(ci.io, 0); 699 dm_table_put(ci.map); 700} 701/*----------------------------------------------------------------- 702 * CRUD END 703 *---------------------------------------------------------------*/ 704 705/* 706 * The request function that just remaps the bio built up by 707 * dm_merge_bvec. 708 */ 709static int dm_request(request_queue_t *q, struct bio *bio) 710{ 711 int r; 712 int rw = bio_data_dir(bio); 713 struct mapped_device *md = q->queuedata; 714 715 down_read(&md->io_lock); 716 717 disk_stat_inc(dm_disk(md), ios[rw]); 718 disk_stat_add(dm_disk(md), sectors[rw], bio_sectors(bio)); 719 720 /* 721 * If we're suspended we have to queue 722 * this io for later. 723 */ 724 while (test_bit(DMF_BLOCK_IO, &md->flags)) { 725 up_read(&md->io_lock); 726 727 if (bio_rw(bio) == READA) { 728 bio_io_error(bio, bio->bi_size); 729 return 0; 730 } 731 732 r = queue_io(md, bio); 733 if (r < 0) { 734 bio_io_error(bio, bio->bi_size); 735 return 0; 736 737 } else if (r == 0) 738 return 0; /* deferred successfully */ 739 740 /* 741 * We're in a while loop, because someone could suspend 742 * before we get to the following read lock. 743 */ 744 down_read(&md->io_lock); 745 } 746 747 __split_bio(md, bio); 748 up_read(&md->io_lock); 749 return 0; 750} 751 752static int dm_flush_all(request_queue_t *q, struct gendisk *disk, 753 sector_t *error_sector) 754{ 755 struct mapped_device *md = q->queuedata; 756 struct dm_table *map = dm_get_table(md); 757 int ret = -ENXIO; 758 759 if (map) { 760 ret = dm_table_flush_all(map); 761 dm_table_put(map); 762 } 763 764 return ret; 765} 766 767static void dm_unplug_all(request_queue_t *q) 768{ 769 struct mapped_device *md = q->queuedata; 770 struct dm_table *map = dm_get_table(md); 771 772 if (map) { 773 dm_table_unplug_all(map); 774 dm_table_put(map); 775 } 776} 777 778static int dm_any_congested(void *congested_data, int bdi_bits) 779{ 780 int r; 781 struct mapped_device *md = (struct mapped_device *) congested_data; 782 struct dm_table *map = dm_get_table(md); 783 784 if (!map || test_bit(DMF_BLOCK_IO, &md->flags)) 785 r = bdi_bits; 786 else 787 r = dm_table_any_congested(map, bdi_bits); 788 789 dm_table_put(map); 790 return r; 791} 792 793/*----------------------------------------------------------------- 794 * An IDR is used to keep track of allocated minor numbers. 795 *---------------------------------------------------------------*/ 796static DEFINE_IDR(_minor_idr); 797 798static void free_minor(int minor) 799{ 800 spin_lock(&_minor_lock); 801 idr_remove(&_minor_idr, minor); 802 spin_unlock(&_minor_lock); 803} 804 805/* 806 * See if the device with a specific minor # is free. 807 */ 808static int specific_minor(struct mapped_device *md, int minor) 809{ 810 int r, m; 811 812 if (minor >= (1 << MINORBITS)) 813 return -EINVAL; 814 815 r = idr_pre_get(&_minor_idr, GFP_KERNEL); 816 if (!r) 817 return -ENOMEM; 818 819 spin_lock(&_minor_lock); 820 821 if (idr_find(&_minor_idr, minor)) { 822 r = -EBUSY; 823 goto out; 824 } 825 826 r = idr_get_new_above(&_minor_idr, MINOR_ALLOCED, minor, &m); 827 if (r) 828 goto out; 829 830 if (m != minor) { 831 idr_remove(&_minor_idr, m); 832 r = -EBUSY; 833 goto out; 834 } 835 836out: 837 spin_unlock(&_minor_lock); 838 return r; 839} 840 841static int next_free_minor(struct mapped_device *md, int *minor) 842{ 843 int r, m; 844 845 r = idr_pre_get(&_minor_idr, GFP_KERNEL); 846 if (!r) 847 return -ENOMEM; 848 849 spin_lock(&_minor_lock); 850 851 r = idr_get_new(&_minor_idr, MINOR_ALLOCED, &m); 852 if (r) { 853 goto out; 854 } 855 856 if (m >= (1 << MINORBITS)) { 857 idr_remove(&_minor_idr, m); 858 r = -ENOSPC; 859 goto out; 860 } 861 862 *minor = m; 863 864out: 865 spin_unlock(&_minor_lock); 866 return r; 867} 868 869static struct block_device_operations dm_blk_dops; 870 871/* 872 * Allocate and initialise a blank device with a given minor. 873 */ 874static struct mapped_device *alloc_dev(int minor) 875{ 876 int r; 877 struct mapped_device *md = kmalloc(sizeof(*md), GFP_KERNEL); 878 void *old_md; 879 880 if (!md) { 881 DMWARN("unable to allocate device, out of memory."); 882 return NULL; 883 } 884 885 if (!try_module_get(THIS_MODULE)) 886 goto bad0; 887 888 /* get a minor number for the dev */ 889 if (minor == DM_ANY_MINOR) 890 r = next_free_minor(md, &minor); 891 else 892 r = specific_minor(md, minor); 893 if (r < 0) 894 goto bad1; 895 896 memset(md, 0, sizeof(*md)); 897 init_rwsem(&md->io_lock); 898 init_MUTEX(&md->suspend_lock); 899 rwlock_init(&md->map_lock); 900 atomic_set(&md->holders, 1); 901 atomic_set(&md->open_count, 0); 902 atomic_set(&md->event_nr, 0); 903 904 md->queue = blk_alloc_queue(GFP_KERNEL); 905 if (!md->queue) 906 goto bad1; 907 908 md->queue->queuedata = md; 909 md->queue->backing_dev_info.congested_fn = dm_any_congested; 910 md->queue->backing_dev_info.congested_data = md; 911 blk_queue_make_request(md->queue, dm_request); 912 blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); 913 md->queue->unplug_fn = dm_unplug_all; 914 md->queue->issue_flush_fn = dm_flush_all; 915 916 md->io_pool = mempool_create_slab_pool(MIN_IOS, _io_cache); 917 if (!md->io_pool) 918 goto bad2; 919 920 md->tio_pool = mempool_create_slab_pool(MIN_IOS, _tio_cache); 921 if (!md->tio_pool) 922 goto bad3; 923 924 md->disk = alloc_disk(1); 925 if (!md->disk) 926 goto bad4; 927 928 atomic_set(&md->pending, 0); 929 init_waitqueue_head(&md->wait); 930 init_waitqueue_head(&md->eventq); 931 932 md->disk->major = _major; 933 md->disk->first_minor = minor; 934 md->disk->fops = &dm_blk_dops; 935 md->disk->queue = md->queue; 936 md->disk->private_data = md; 937 sprintf(md->disk->disk_name, "dm-%d", minor); 938 add_disk(md->disk); 939 format_dev_t(md->name, MKDEV(_major, minor)); 940 941 /* Populate the mapping, nobody knows we exist yet */ 942 spin_lock(&_minor_lock); 943 old_md = idr_replace(&_minor_idr, md, minor); 944 spin_unlock(&_minor_lock); 945 946 BUG_ON(old_md != MINOR_ALLOCED); 947 948 return md; 949 950 bad4: 951 mempool_destroy(md->tio_pool); 952 bad3: 953 mempool_destroy(md->io_pool); 954 bad2: 955 blk_cleanup_queue(md->queue); 956 free_minor(minor); 957 bad1: 958 module_put(THIS_MODULE); 959 bad0: 960 kfree(md); 961 return NULL; 962} 963 964static void free_dev(struct mapped_device *md) 965{ 966 int minor = md->disk->first_minor; 967 968 if (md->suspended_bdev) { 969 thaw_bdev(md->suspended_bdev, NULL); 970 bdput(md->suspended_bdev); 971 } 972 mempool_destroy(md->tio_pool); 973 mempool_destroy(md->io_pool); 974 del_gendisk(md->disk); 975 free_minor(minor); 976 977 spin_lock(&_minor_lock); 978 md->disk->private_data = NULL; 979 spin_unlock(&_minor_lock); 980 981 put_disk(md->disk); 982 blk_cleanup_queue(md->queue); 983 module_put(THIS_MODULE); 984 kfree(md); 985} 986 987/* 988 * Bind a table to the device. 989 */ 990static void event_callback(void *context) 991{ 992 struct mapped_device *md = (struct mapped_device *) context; 993 994 atomic_inc(&md->event_nr); 995 wake_up(&md->eventq); 996} 997 998static void __set_size(struct mapped_device *md, sector_t size) 999{ 1000 set_capacity(md->disk, size); 1001 1002 mutex_lock(&md->suspended_bdev->bd_inode->i_mutex); 1003 i_size_write(md->suspended_bdev->bd_inode, (loff_t)size << SECTOR_SHIFT); 1004 mutex_unlock(&md->suspended_bdev->bd_inode->i_mutex); 1005} 1006 1007static int __bind(struct mapped_device *md, struct dm_table *t) 1008{ 1009 request_queue_t *q = md->queue; 1010 sector_t size; 1011 1012 size = dm_table_get_size(t); 1013 1014 /* 1015 * Wipe any geometry if the size of the table changed. 1016 */ 1017 if (size != get_capacity(md->disk)) 1018 memset(&md->geometry, 0, sizeof(md->geometry)); 1019 1020 __set_size(md, size); 1021 if (size == 0) 1022 return 0; 1023 1024 dm_table_get(t); 1025 dm_table_event_callback(t, event_callback, md); 1026 1027 write_lock(&md->map_lock); 1028 md->map = t; 1029 dm_table_set_restrictions(t, q); 1030 write_unlock(&md->map_lock); 1031 1032 return 0; 1033} 1034 1035static void __unbind(struct mapped_device *md) 1036{ 1037 struct dm_table *map = md->map; 1038 1039 if (!map) 1040 return; 1041 1042 dm_table_event_callback(map, NULL, NULL); 1043 write_lock(&md->map_lock); 1044 md->map = NULL; 1045 write_unlock(&md->map_lock); 1046 dm_table_put(map); 1047} 1048 1049/* 1050 * Constructor for a new device. 1051 */ 1052int dm_create(int minor, struct mapped_device **result) 1053{ 1054 struct mapped_device *md; 1055 1056 md = alloc_dev(minor); 1057 if (!md) 1058 return -ENXIO; 1059 1060 *result = md; 1061 return 0; 1062} 1063 1064static struct mapped_device *dm_find_md(dev_t dev) 1065{ 1066 struct mapped_device *md; 1067 unsigned minor = MINOR(dev); 1068 1069 if (MAJOR(dev) != _major || minor >= (1 << MINORBITS)) 1070 return NULL; 1071 1072 spin_lock(&_minor_lock); 1073 1074 md = idr_find(&_minor_idr, minor); 1075 if (md && (md == MINOR_ALLOCED || 1076 (dm_disk(md)->first_minor != minor) || 1077 test_bit(DMF_FREEING, &md->flags))) { 1078 md = NULL; 1079 goto out; 1080 } 1081 1082out: 1083 spin_unlock(&_minor_lock); 1084 1085 return md; 1086} 1087 1088struct mapped_device *dm_get_md(dev_t dev) 1089{ 1090 struct mapped_device *md = dm_find_md(dev); 1091 1092 if (md) 1093 dm_get(md); 1094 1095 return md; 1096} 1097 1098void *dm_get_mdptr(struct mapped_device *md) 1099{ 1100 return md->interface_ptr; 1101} 1102 1103void dm_set_mdptr(struct mapped_device *md, void *ptr) 1104{ 1105 md->interface_ptr = ptr; 1106} 1107 1108void dm_get(struct mapped_device *md) 1109{ 1110 atomic_inc(&md->holders); 1111} 1112 1113const char *dm_device_name(struct mapped_device *md) 1114{ 1115 return md->name; 1116} 1117EXPORT_SYMBOL_GPL(dm_device_name); 1118 1119void dm_put(struct mapped_device *md) 1120{ 1121 struct dm_table *map; 1122 1123 BUG_ON(test_bit(DMF_FREEING, &md->flags)); 1124 1125 if (atomic_dec_and_lock(&md->holders, &_minor_lock)) { 1126 map = dm_get_table(md); 1127 idr_replace(&_minor_idr, MINOR_ALLOCED, dm_disk(md)->first_minor); 1128 set_bit(DMF_FREEING, &md->flags); 1129 spin_unlock(&_minor_lock); 1130 if (!dm_suspended(md)) { 1131 dm_table_presuspend_targets(map); 1132 dm_table_postsuspend_targets(map); 1133 } 1134 __unbind(md); 1135 dm_table_put(map); 1136 free_dev(md); 1137 } 1138} 1139 1140/* 1141 * Process the deferred bios 1142 */ 1143static void __flush_deferred_io(struct mapped_device *md, struct bio *c) 1144{ 1145 struct bio *n; 1146 1147 while (c) { 1148 n = c->bi_next; 1149 c->bi_next = NULL; 1150 __split_bio(md, c); 1151 c = n; 1152 } 1153} 1154 1155/* 1156 * Swap in a new table (destroying old one). 1157 */ 1158int dm_swap_table(struct mapped_device *md, struct dm_table *table) 1159{ 1160 int r = -EINVAL; 1161 1162 down(&md->suspend_lock); 1163 1164 /* device must be suspended */ 1165 if (!dm_suspended(md)) 1166 goto out; 1167 1168 __unbind(md); 1169 r = __bind(md, table); 1170 1171out: 1172 up(&md->suspend_lock); 1173 return r; 1174} 1175 1176/* 1177 * Functions to lock and unlock any filesystem running on the 1178 * device. 1179 */ 1180static int lock_fs(struct mapped_device *md) 1181{ 1182 int r; 1183 1184 WARN_ON(md->frozen_sb); 1185 1186 md->frozen_sb = freeze_bdev(md->suspended_bdev); 1187 if (IS_ERR(md->frozen_sb)) { 1188 r = PTR_ERR(md->frozen_sb); 1189 md->frozen_sb = NULL; 1190 return r; 1191 } 1192 1193 set_bit(DMF_FROZEN, &md->flags); 1194 1195 /* don't bdput right now, we don't want the bdev 1196 * to go away while it is locked. 1197 */ 1198 return 0; 1199} 1200 1201static void unlock_fs(struct mapped_device *md) 1202{ 1203 if (!test_bit(DMF_FROZEN, &md->flags)) 1204 return; 1205 1206 thaw_bdev(md->suspended_bdev, md->frozen_sb); 1207 md->frozen_sb = NULL; 1208 clear_bit(DMF_FROZEN, &md->flags); 1209} 1210 1211/* 1212 * We need to be able to change a mapping table under a mounted 1213 * filesystem. For example we might want to move some data in 1214 * the background. Before the table can be swapped with 1215 * dm_bind_table, dm_suspend must be called to flush any in 1216 * flight bios and ensure that any further io gets deferred. 1217 */ 1218int dm_suspend(struct mapped_device *md, int do_lockfs) 1219{ 1220 struct dm_table *map = NULL; 1221 DECLARE_WAITQUEUE(wait, current); 1222 struct bio *def; 1223 int r = -EINVAL; 1224 1225 down(&md->suspend_lock); 1226 1227 if (dm_suspended(md)) 1228 goto out; 1229 1230 map = dm_get_table(md); 1231 1232 /* This does not get reverted if there's an error later. */ 1233 dm_table_presuspend_targets(map); 1234 1235 md->suspended_bdev = bdget_disk(md->disk, 0); 1236 if (!md->suspended_bdev) { 1237 DMWARN("bdget failed in dm_suspend"); 1238 r = -ENOMEM; 1239 goto out; 1240 } 1241 1242 /* Flush I/O to the device. */ 1243 if (do_lockfs) { 1244 r = lock_fs(md); 1245 if (r) 1246 goto out; 1247 } 1248 1249 /* 1250 * First we set the BLOCK_IO flag so no more ios will be mapped. 1251 */ 1252 down_write(&md->io_lock); 1253 set_bit(DMF_BLOCK_IO, &md->flags); 1254 1255 add_wait_queue(&md->wait, &wait); 1256 up_write(&md->io_lock); 1257 1258 /* unplug */ 1259 if (map) 1260 dm_table_unplug_all(map); 1261 1262 /* 1263 * Then we wait for the already mapped ios to 1264 * complete. 1265 */ 1266 while (1) { 1267 set_current_state(TASK_INTERRUPTIBLE); 1268 1269 if (!atomic_read(&md->pending) || signal_pending(current)) 1270 break; 1271 1272 io_schedule(); 1273 } 1274 set_current_state(TASK_RUNNING); 1275 1276 down_write(&md->io_lock); 1277 remove_wait_queue(&md->wait, &wait); 1278 1279 /* were we interrupted ? */ 1280 r = -EINTR; 1281 if (atomic_read(&md->pending)) { 1282 clear_bit(DMF_BLOCK_IO, &md->flags); 1283 def = bio_list_get(&md->deferred); 1284 __flush_deferred_io(md, def); 1285 up_write(&md->io_lock); 1286 unlock_fs(md); 1287 goto out; 1288 } 1289 up_write(&md->io_lock); 1290 1291 dm_table_postsuspend_targets(map); 1292 1293 set_bit(DMF_SUSPENDED, &md->flags); 1294 1295 r = 0; 1296 1297out: 1298 if (r && md->suspended_bdev) { 1299 bdput(md->suspended_bdev); 1300 md->suspended_bdev = NULL; 1301 } 1302 1303 dm_table_put(map); 1304 up(&md->suspend_lock); 1305 return r; 1306} 1307 1308int dm_resume(struct mapped_device *md) 1309{ 1310 int r = -EINVAL; 1311 struct bio *def; 1312 struct dm_table *map = NULL; 1313 1314 down(&md->suspend_lock); 1315 if (!dm_suspended(md)) 1316 goto out; 1317 1318 map = dm_get_table(md); 1319 if (!map || !dm_table_get_size(map)) 1320 goto out; 1321 1322 dm_table_resume_targets(map); 1323 1324 down_write(&md->io_lock); 1325 clear_bit(DMF_BLOCK_IO, &md->flags); 1326 1327 def = bio_list_get(&md->deferred); 1328 __flush_deferred_io(md, def); 1329 up_write(&md->io_lock); 1330 1331 unlock_fs(md); 1332 1333 bdput(md->suspended_bdev); 1334 md->suspended_bdev = NULL; 1335 1336 clear_bit(DMF_SUSPENDED, &md->flags); 1337 1338 dm_table_unplug_all(map); 1339 1340 r = 0; 1341 1342out: 1343 dm_table_put(map); 1344 up(&md->suspend_lock); 1345 1346 return r; 1347} 1348 1349/*----------------------------------------------------------------- 1350 * Event notification. 1351 *---------------------------------------------------------------*/ 1352uint32_t dm_get_event_nr(struct mapped_device *md) 1353{ 1354 return atomic_read(&md->event_nr); 1355} 1356 1357int dm_wait_event(struct mapped_device *md, int event_nr) 1358{ 1359 return wait_event_interruptible(md->eventq, 1360 (event_nr != atomic_read(&md->event_nr))); 1361} 1362 1363/* 1364 * The gendisk is only valid as long as you have a reference 1365 * count on 'md'. 1366 */ 1367struct gendisk *dm_disk(struct mapped_device *md) 1368{ 1369 return md->disk; 1370} 1371 1372int dm_suspended(struct mapped_device *md) 1373{ 1374 return test_bit(DMF_SUSPENDED, &md->flags); 1375} 1376 1377static struct block_device_operations dm_blk_dops = { 1378 .open = dm_blk_open, 1379 .release = dm_blk_close, 1380 .getgeo = dm_blk_getgeo, 1381 .owner = THIS_MODULE 1382}; 1383 1384EXPORT_SYMBOL(dm_get_mapinfo); 1385 1386/* 1387 * module hooks 1388 */ 1389module_init(dm_init); 1390module_exit(dm_exit); 1391 1392module_param(major, uint, 0); 1393MODULE_PARM_DESC(major, "The major number of the device mapper"); 1394MODULE_DESCRIPTION(DM_NAME " driver"); 1395MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>"); 1396MODULE_LICENSE("GPL");