Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at v2.6.27-rc2 1618 lines 43 kB view raw
1/* 2 * bitmap.c two-level bitmap (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003 3 * 4 * bitmap_create - sets up the bitmap structure 5 * bitmap_destroy - destroys the bitmap structure 6 * 7 * additions, Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.: 8 * - added disk storage for bitmap 9 * - changes to allow various bitmap chunk sizes 10 */ 11 12/* 13 * Still to do: 14 * 15 * flush after percent set rather than just time based. (maybe both). 16 * wait if count gets too high, wake when it drops to half. 17 */ 18 19#include <linux/module.h> 20#include <linux/errno.h> 21#include <linux/slab.h> 22#include <linux/init.h> 23#include <linux/timer.h> 24#include <linux/sched.h> 25#include <linux/list.h> 26#include <linux/file.h> 27#include <linux/mount.h> 28#include <linux/buffer_head.h> 29#include <linux/raid/md.h> 30#include <linux/raid/bitmap.h> 31 32/* debug macros */ 33 34#define DEBUG 0 35 36#if DEBUG 37/* these are for debugging purposes only! */ 38 39/* define one and only one of these */ 40#define INJECT_FAULTS_1 0 /* cause bitmap_alloc_page to fail always */ 41#define INJECT_FAULTS_2 0 /* cause bitmap file to be kicked when first bit set*/ 42#define INJECT_FAULTS_3 0 /* treat bitmap file as kicked at init time */ 43#define INJECT_FAULTS_4 0 /* undef */ 44#define INJECT_FAULTS_5 0 /* undef */ 45#define INJECT_FAULTS_6 0 46 47/* if these are defined, the driver will fail! debug only */ 48#define INJECT_FATAL_FAULT_1 0 /* fail kmalloc, causing bitmap_create to fail */ 49#define INJECT_FATAL_FAULT_2 0 /* undef */ 50#define INJECT_FATAL_FAULT_3 0 /* undef */ 51#endif 52 53//#define DPRINTK PRINTK /* set this NULL to avoid verbose debug output */ 54#define DPRINTK(x...) do { } while(0) 55 56#ifndef PRINTK 57# if DEBUG > 0 58# define PRINTK(x...) printk(KERN_DEBUG x) 59# else 60# define PRINTK(x...) 61# endif 62#endif 63 64static inline char * bmname(struct bitmap *bitmap) 65{ 66 return bitmap->mddev ? mdname(bitmap->mddev) : "mdX"; 67} 68 69 70/* 71 * just a placeholder - calls kmalloc for bitmap pages 72 */ 73static unsigned char *bitmap_alloc_page(struct bitmap *bitmap) 74{ 75 unsigned char *page; 76 77#ifdef INJECT_FAULTS_1 78 page = NULL; 79#else 80 page = kmalloc(PAGE_SIZE, GFP_NOIO); 81#endif 82 if (!page) 83 printk("%s: bitmap_alloc_page FAILED\n", bmname(bitmap)); 84 else 85 PRINTK("%s: bitmap_alloc_page: allocated page at %p\n", 86 bmname(bitmap), page); 87 return page; 88} 89 90/* 91 * for now just a placeholder -- just calls kfree for bitmap pages 92 */ 93static void bitmap_free_page(struct bitmap *bitmap, unsigned char *page) 94{ 95 PRINTK("%s: bitmap_free_page: free page %p\n", bmname(bitmap), page); 96 kfree(page); 97} 98 99/* 100 * check a page and, if necessary, allocate it (or hijack it if the alloc fails) 101 * 102 * 1) check to see if this page is allocated, if it's not then try to alloc 103 * 2) if the alloc fails, set the page's hijacked flag so we'll use the 104 * page pointer directly as a counter 105 * 106 * if we find our page, we increment the page's refcount so that it stays 107 * allocated while we're using it 108 */ 109static int bitmap_checkpage(struct bitmap *bitmap, unsigned long page, int create) 110{ 111 unsigned char *mappage; 112 113 if (page >= bitmap->pages) { 114 printk(KERN_ALERT 115 "%s: invalid bitmap page request: %lu (> %lu)\n", 116 bmname(bitmap), page, bitmap->pages-1); 117 return -EINVAL; 118 } 119 120 121 if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */ 122 return 0; 123 124 if (bitmap->bp[page].map) /* page is already allocated, just return */ 125 return 0; 126 127 if (!create) 128 return -ENOENT; 129 130 spin_unlock_irq(&bitmap->lock); 131 132 /* this page has not been allocated yet */ 133 134 if ((mappage = bitmap_alloc_page(bitmap)) == NULL) { 135 PRINTK("%s: bitmap map page allocation failed, hijacking\n", 136 bmname(bitmap)); 137 /* failed - set the hijacked flag so that we can use the 138 * pointer as a counter */ 139 spin_lock_irq(&bitmap->lock); 140 if (!bitmap->bp[page].map) 141 bitmap->bp[page].hijacked = 1; 142 goto out; 143 } 144 145 /* got a page */ 146 147 spin_lock_irq(&bitmap->lock); 148 149 /* recheck the page */ 150 151 if (bitmap->bp[page].map || bitmap->bp[page].hijacked) { 152 /* somebody beat us to getting the page */ 153 bitmap_free_page(bitmap, mappage); 154 return 0; 155 } 156 157 /* no page was in place and we have one, so install it */ 158 159 memset(mappage, 0, PAGE_SIZE); 160 bitmap->bp[page].map = mappage; 161 bitmap->missing_pages--; 162out: 163 return 0; 164} 165 166 167/* if page is completely empty, put it back on the free list, or dealloc it */ 168/* if page was hijacked, unmark the flag so it might get alloced next time */ 169/* Note: lock should be held when calling this */ 170static void bitmap_checkfree(struct bitmap *bitmap, unsigned long page) 171{ 172 char *ptr; 173 174 if (bitmap->bp[page].count) /* page is still busy */ 175 return; 176 177 /* page is no longer in use, it can be released */ 178 179 if (bitmap->bp[page].hijacked) { /* page was hijacked, undo this now */ 180 bitmap->bp[page].hijacked = 0; 181 bitmap->bp[page].map = NULL; 182 return; 183 } 184 185 /* normal case, free the page */ 186 187#if 0 188/* actually ... let's not. We will probably need the page again exactly when 189 * memory is tight and we are flusing to disk 190 */ 191 return; 192#else 193 ptr = bitmap->bp[page].map; 194 bitmap->bp[page].map = NULL; 195 bitmap->missing_pages++; 196 bitmap_free_page(bitmap, ptr); 197 return; 198#endif 199} 200 201 202/* 203 * bitmap file handling - read and write the bitmap file and its superblock 204 */ 205 206/* 207 * basic page I/O operations 208 */ 209 210/* IO operations when bitmap is stored near all superblocks */ 211static struct page *read_sb_page(mddev_t *mddev, long offset, unsigned long index) 212{ 213 /* choose a good rdev and read the page from there */ 214 215 mdk_rdev_t *rdev; 216 struct list_head *tmp; 217 struct page *page = alloc_page(GFP_KERNEL); 218 sector_t target; 219 220 if (!page) 221 return ERR_PTR(-ENOMEM); 222 223 rdev_for_each(rdev, tmp, mddev) { 224 if (! test_bit(In_sync, &rdev->flags) 225 || test_bit(Faulty, &rdev->flags)) 226 continue; 227 228 target = rdev->sb_start + offset + index * (PAGE_SIZE/512); 229 230 if (sync_page_io(rdev->bdev, target, PAGE_SIZE, page, READ)) { 231 page->index = index; 232 attach_page_buffers(page, NULL); /* so that free_buffer will 233 * quietly no-op */ 234 return page; 235 } 236 } 237 return ERR_PTR(-EIO); 238 239} 240 241static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) 242{ 243 mdk_rdev_t *rdev; 244 mddev_t *mddev = bitmap->mddev; 245 246 rcu_read_lock(); 247 rdev_for_each_rcu(rdev, mddev) 248 if (test_bit(In_sync, &rdev->flags) 249 && !test_bit(Faulty, &rdev->flags)) { 250 int size = PAGE_SIZE; 251 if (page->index == bitmap->file_pages-1) 252 size = roundup(bitmap->last_page_size, 253 bdev_hardsect_size(rdev->bdev)); 254 /* Just make sure we aren't corrupting data or 255 * metadata 256 */ 257 if (bitmap->offset < 0) { 258 /* DATA BITMAP METADATA */ 259 if (bitmap->offset 260 + (long)(page->index * (PAGE_SIZE/512)) 261 + size/512 > 0) 262 /* bitmap runs in to metadata */ 263 goto bad_alignment; 264 if (rdev->data_offset + mddev->size*2 265 > rdev->sb_start + bitmap->offset) 266 /* data runs in to bitmap */ 267 goto bad_alignment; 268 } else if (rdev->sb_start < rdev->data_offset) { 269 /* METADATA BITMAP DATA */ 270 if (rdev->sb_start 271 + bitmap->offset 272 + page->index*(PAGE_SIZE/512) + size/512 273 > rdev->data_offset) 274 /* bitmap runs in to data */ 275 goto bad_alignment; 276 } else { 277 /* DATA METADATA BITMAP - no problems */ 278 } 279 md_super_write(mddev, rdev, 280 rdev->sb_start + bitmap->offset 281 + page->index * (PAGE_SIZE/512), 282 size, 283 page); 284 } 285 rcu_read_unlock(); 286 287 if (wait) 288 md_super_wait(mddev); 289 return 0; 290 291 bad_alignment: 292 rcu_read_unlock(); 293 return -EINVAL; 294} 295 296static void bitmap_file_kick(struct bitmap *bitmap); 297/* 298 * write out a page to a file 299 */ 300static void write_page(struct bitmap *bitmap, struct page *page, int wait) 301{ 302 struct buffer_head *bh; 303 304 if (bitmap->file == NULL) { 305 switch (write_sb_page(bitmap, page, wait)) { 306 case -EINVAL: 307 bitmap->flags |= BITMAP_WRITE_ERROR; 308 } 309 } else { 310 311 bh = page_buffers(page); 312 313 while (bh && bh->b_blocknr) { 314 atomic_inc(&bitmap->pending_writes); 315 set_buffer_locked(bh); 316 set_buffer_mapped(bh); 317 submit_bh(WRITE, bh); 318 bh = bh->b_this_page; 319 } 320 321 if (wait) { 322 wait_event(bitmap->write_wait, 323 atomic_read(&bitmap->pending_writes)==0); 324 } 325 } 326 if (bitmap->flags & BITMAP_WRITE_ERROR) 327 bitmap_file_kick(bitmap); 328} 329 330static void end_bitmap_write(struct buffer_head *bh, int uptodate) 331{ 332 struct bitmap *bitmap = bh->b_private; 333 unsigned long flags; 334 335 if (!uptodate) { 336 spin_lock_irqsave(&bitmap->lock, flags); 337 bitmap->flags |= BITMAP_WRITE_ERROR; 338 spin_unlock_irqrestore(&bitmap->lock, flags); 339 } 340 if (atomic_dec_and_test(&bitmap->pending_writes)) 341 wake_up(&bitmap->write_wait); 342} 343 344/* copied from buffer.c */ 345static void 346__clear_page_buffers(struct page *page) 347{ 348 ClearPagePrivate(page); 349 set_page_private(page, 0); 350 page_cache_release(page); 351} 352static void free_buffers(struct page *page) 353{ 354 struct buffer_head *bh = page_buffers(page); 355 356 while (bh) { 357 struct buffer_head *next = bh->b_this_page; 358 free_buffer_head(bh); 359 bh = next; 360 } 361 __clear_page_buffers(page); 362 put_page(page); 363} 364 365/* read a page from a file. 366 * We both read the page, and attach buffers to the page to record the 367 * address of each block (using bmap). These addresses will be used 368 * to write the block later, completely bypassing the filesystem. 369 * This usage is similar to how swap files are handled, and allows us 370 * to write to a file with no concerns of memory allocation failing. 371 */ 372static struct page *read_page(struct file *file, unsigned long index, 373 struct bitmap *bitmap, 374 unsigned long count) 375{ 376 struct page *page = NULL; 377 struct inode *inode = file->f_path.dentry->d_inode; 378 struct buffer_head *bh; 379 sector_t block; 380 381 PRINTK("read bitmap file (%dB @ %Lu)\n", (int)PAGE_SIZE, 382 (unsigned long long)index << PAGE_SHIFT); 383 384 page = alloc_page(GFP_KERNEL); 385 if (!page) 386 page = ERR_PTR(-ENOMEM); 387 if (IS_ERR(page)) 388 goto out; 389 390 bh = alloc_page_buffers(page, 1<<inode->i_blkbits, 0); 391 if (!bh) { 392 put_page(page); 393 page = ERR_PTR(-ENOMEM); 394 goto out; 395 } 396 attach_page_buffers(page, bh); 397 block = index << (PAGE_SHIFT - inode->i_blkbits); 398 while (bh) { 399 if (count == 0) 400 bh->b_blocknr = 0; 401 else { 402 bh->b_blocknr = bmap(inode, block); 403 if (bh->b_blocknr == 0) { 404 /* Cannot use this file! */ 405 free_buffers(page); 406 page = ERR_PTR(-EINVAL); 407 goto out; 408 } 409 bh->b_bdev = inode->i_sb->s_bdev; 410 if (count < (1<<inode->i_blkbits)) 411 count = 0; 412 else 413 count -= (1<<inode->i_blkbits); 414 415 bh->b_end_io = end_bitmap_write; 416 bh->b_private = bitmap; 417 atomic_inc(&bitmap->pending_writes); 418 set_buffer_locked(bh); 419 set_buffer_mapped(bh); 420 submit_bh(READ, bh); 421 } 422 block++; 423 bh = bh->b_this_page; 424 } 425 page->index = index; 426 427 wait_event(bitmap->write_wait, 428 atomic_read(&bitmap->pending_writes)==0); 429 if (bitmap->flags & BITMAP_WRITE_ERROR) { 430 free_buffers(page); 431 page = ERR_PTR(-EIO); 432 } 433out: 434 if (IS_ERR(page)) 435 printk(KERN_ALERT "md: bitmap read error: (%dB @ %Lu): %ld\n", 436 (int)PAGE_SIZE, 437 (unsigned long long)index << PAGE_SHIFT, 438 PTR_ERR(page)); 439 return page; 440} 441 442/* 443 * bitmap file superblock operations 444 */ 445 446/* update the event counter and sync the superblock to disk */ 447void bitmap_update_sb(struct bitmap *bitmap) 448{ 449 bitmap_super_t *sb; 450 unsigned long flags; 451 452 if (!bitmap || !bitmap->mddev) /* no bitmap for this array */ 453 return; 454 spin_lock_irqsave(&bitmap->lock, flags); 455 if (!bitmap->sb_page) { /* no superblock */ 456 spin_unlock_irqrestore(&bitmap->lock, flags); 457 return; 458 } 459 spin_unlock_irqrestore(&bitmap->lock, flags); 460 sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0); 461 sb->events = cpu_to_le64(bitmap->mddev->events); 462 if (bitmap->mddev->events < bitmap->events_cleared) { 463 /* rocking back to read-only */ 464 bitmap->events_cleared = bitmap->mddev->events; 465 sb->events_cleared = cpu_to_le64(bitmap->events_cleared); 466 } 467 kunmap_atomic(sb, KM_USER0); 468 write_page(bitmap, bitmap->sb_page, 1); 469} 470 471/* print out the bitmap file superblock */ 472void bitmap_print_sb(struct bitmap *bitmap) 473{ 474 bitmap_super_t *sb; 475 476 if (!bitmap || !bitmap->sb_page) 477 return; 478 sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0); 479 printk(KERN_DEBUG "%s: bitmap file superblock:\n", bmname(bitmap)); 480 printk(KERN_DEBUG " magic: %08x\n", le32_to_cpu(sb->magic)); 481 printk(KERN_DEBUG " version: %d\n", le32_to_cpu(sb->version)); 482 printk(KERN_DEBUG " uuid: %08x.%08x.%08x.%08x\n", 483 *(__u32 *)(sb->uuid+0), 484 *(__u32 *)(sb->uuid+4), 485 *(__u32 *)(sb->uuid+8), 486 *(__u32 *)(sb->uuid+12)); 487 printk(KERN_DEBUG " events: %llu\n", 488 (unsigned long long) le64_to_cpu(sb->events)); 489 printk(KERN_DEBUG "events cleared: %llu\n", 490 (unsigned long long) le64_to_cpu(sb->events_cleared)); 491 printk(KERN_DEBUG " state: %08x\n", le32_to_cpu(sb->state)); 492 printk(KERN_DEBUG " chunksize: %d B\n", le32_to_cpu(sb->chunksize)); 493 printk(KERN_DEBUG " daemon sleep: %ds\n", le32_to_cpu(sb->daemon_sleep)); 494 printk(KERN_DEBUG " sync size: %llu KB\n", 495 (unsigned long long)le64_to_cpu(sb->sync_size)/2); 496 printk(KERN_DEBUG "max write behind: %d\n", le32_to_cpu(sb->write_behind)); 497 kunmap_atomic(sb, KM_USER0); 498} 499 500/* read the superblock from the bitmap file and initialize some bitmap fields */ 501static int bitmap_read_sb(struct bitmap *bitmap) 502{ 503 char *reason = NULL; 504 bitmap_super_t *sb; 505 unsigned long chunksize, daemon_sleep, write_behind; 506 unsigned long long events; 507 int err = -EINVAL; 508 509 /* page 0 is the superblock, read it... */ 510 if (bitmap->file) { 511 loff_t isize = i_size_read(bitmap->file->f_mapping->host); 512 int bytes = isize > PAGE_SIZE ? PAGE_SIZE : isize; 513 514 bitmap->sb_page = read_page(bitmap->file, 0, bitmap, bytes); 515 } else { 516 bitmap->sb_page = read_sb_page(bitmap->mddev, bitmap->offset, 0); 517 } 518 if (IS_ERR(bitmap->sb_page)) { 519 err = PTR_ERR(bitmap->sb_page); 520 bitmap->sb_page = NULL; 521 return err; 522 } 523 524 sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0); 525 526 chunksize = le32_to_cpu(sb->chunksize); 527 daemon_sleep = le32_to_cpu(sb->daemon_sleep); 528 write_behind = le32_to_cpu(sb->write_behind); 529 530 /* verify that the bitmap-specific fields are valid */ 531 if (sb->magic != cpu_to_le32(BITMAP_MAGIC)) 532 reason = "bad magic"; 533 else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO || 534 le32_to_cpu(sb->version) > BITMAP_MAJOR_HI) 535 reason = "unrecognized superblock version"; 536 else if (chunksize < PAGE_SIZE) 537 reason = "bitmap chunksize too small"; 538 else if ((1 << ffz(~chunksize)) != chunksize) 539 reason = "bitmap chunksize not a power of 2"; 540 else if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT / HZ) 541 reason = "daemon sleep period out of range"; 542 else if (write_behind > COUNTER_MAX) 543 reason = "write-behind limit out of range (0 - 16383)"; 544 if (reason) { 545 printk(KERN_INFO "%s: invalid bitmap file superblock: %s\n", 546 bmname(bitmap), reason); 547 goto out; 548 } 549 550 /* keep the array size field of the bitmap superblock up to date */ 551 sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors); 552 553 if (!bitmap->mddev->persistent) 554 goto success; 555 556 /* 557 * if we have a persistent array superblock, compare the 558 * bitmap's UUID and event counter to the mddev's 559 */ 560 if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) { 561 printk(KERN_INFO "%s: bitmap superblock UUID mismatch\n", 562 bmname(bitmap)); 563 goto out; 564 } 565 events = le64_to_cpu(sb->events); 566 if (events < bitmap->mddev->events) { 567 printk(KERN_INFO "%s: bitmap file is out of date (%llu < %llu) " 568 "-- forcing full recovery\n", bmname(bitmap), events, 569 (unsigned long long) bitmap->mddev->events); 570 sb->state |= cpu_to_le32(BITMAP_STALE); 571 } 572success: 573 /* assign fields using values from superblock */ 574 bitmap->chunksize = chunksize; 575 bitmap->daemon_sleep = daemon_sleep; 576 bitmap->daemon_lastrun = jiffies; 577 bitmap->max_write_behind = write_behind; 578 bitmap->flags |= le32_to_cpu(sb->state); 579 if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN) 580 bitmap->flags |= BITMAP_HOSTENDIAN; 581 bitmap->events_cleared = le64_to_cpu(sb->events_cleared); 582 if (sb->state & cpu_to_le32(BITMAP_STALE)) 583 bitmap->events_cleared = bitmap->mddev->events; 584 err = 0; 585out: 586 kunmap_atomic(sb, KM_USER0); 587 if (err) 588 bitmap_print_sb(bitmap); 589 return err; 590} 591 592enum bitmap_mask_op { 593 MASK_SET, 594 MASK_UNSET 595}; 596 597/* record the state of the bitmap in the superblock. Return the old value */ 598static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits, 599 enum bitmap_mask_op op) 600{ 601 bitmap_super_t *sb; 602 unsigned long flags; 603 int old; 604 605 spin_lock_irqsave(&bitmap->lock, flags); 606 if (!bitmap->sb_page) { /* can't set the state */ 607 spin_unlock_irqrestore(&bitmap->lock, flags); 608 return 0; 609 } 610 spin_unlock_irqrestore(&bitmap->lock, flags); 611 sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0); 612 old = le32_to_cpu(sb->state) & bits; 613 switch (op) { 614 case MASK_SET: sb->state |= cpu_to_le32(bits); 615 break; 616 case MASK_UNSET: sb->state &= cpu_to_le32(~bits); 617 break; 618 default: BUG(); 619 } 620 kunmap_atomic(sb, KM_USER0); 621 return old; 622} 623 624/* 625 * general bitmap file operations 626 */ 627 628/* calculate the index of the page that contains this bit */ 629static inline unsigned long file_page_index(unsigned long chunk) 630{ 631 return CHUNK_BIT_OFFSET(chunk) >> PAGE_BIT_SHIFT; 632} 633 634/* calculate the (bit) offset of this bit within a page */ 635static inline unsigned long file_page_offset(unsigned long chunk) 636{ 637 return CHUNK_BIT_OFFSET(chunk) & (PAGE_BITS - 1); 638} 639 640/* 641 * return a pointer to the page in the filemap that contains the given bit 642 * 643 * this lookup is complicated by the fact that the bitmap sb might be exactly 644 * 1 page (e.g., x86) or less than 1 page -- so the bitmap might start on page 645 * 0 or page 1 646 */ 647static inline struct page *filemap_get_page(struct bitmap *bitmap, 648 unsigned long chunk) 649{ 650 if (file_page_index(chunk) >= bitmap->file_pages) return NULL; 651 return bitmap->filemap[file_page_index(chunk) - file_page_index(0)]; 652} 653 654 655static void bitmap_file_unmap(struct bitmap *bitmap) 656{ 657 struct page **map, *sb_page; 658 unsigned long *attr; 659 int pages; 660 unsigned long flags; 661 662 spin_lock_irqsave(&bitmap->lock, flags); 663 map = bitmap->filemap; 664 bitmap->filemap = NULL; 665 attr = bitmap->filemap_attr; 666 bitmap->filemap_attr = NULL; 667 pages = bitmap->file_pages; 668 bitmap->file_pages = 0; 669 sb_page = bitmap->sb_page; 670 bitmap->sb_page = NULL; 671 spin_unlock_irqrestore(&bitmap->lock, flags); 672 673 while (pages--) 674 if (map[pages]->index != 0) /* 0 is sb_page, release it below */ 675 free_buffers(map[pages]); 676 kfree(map); 677 kfree(attr); 678 679 if (sb_page) 680 free_buffers(sb_page); 681} 682 683static void bitmap_file_put(struct bitmap *bitmap) 684{ 685 struct file *file; 686 unsigned long flags; 687 688 spin_lock_irqsave(&bitmap->lock, flags); 689 file = bitmap->file; 690 bitmap->file = NULL; 691 spin_unlock_irqrestore(&bitmap->lock, flags); 692 693 if (file) 694 wait_event(bitmap->write_wait, 695 atomic_read(&bitmap->pending_writes)==0); 696 bitmap_file_unmap(bitmap); 697 698 if (file) { 699 struct inode *inode = file->f_path.dentry->d_inode; 700 invalidate_mapping_pages(inode->i_mapping, 0, -1); 701 fput(file); 702 } 703} 704 705 706/* 707 * bitmap_file_kick - if an error occurs while manipulating the bitmap file 708 * then it is no longer reliable, so we stop using it and we mark the file 709 * as failed in the superblock 710 */ 711static void bitmap_file_kick(struct bitmap *bitmap) 712{ 713 char *path, *ptr = NULL; 714 715 if (bitmap_mask_state(bitmap, BITMAP_STALE, MASK_SET) == 0) { 716 bitmap_update_sb(bitmap); 717 718 if (bitmap->file) { 719 path = kmalloc(PAGE_SIZE, GFP_KERNEL); 720 if (path) 721 ptr = d_path(&bitmap->file->f_path, path, 722 PAGE_SIZE); 723 724 725 printk(KERN_ALERT 726 "%s: kicking failed bitmap file %s from array!\n", 727 bmname(bitmap), IS_ERR(ptr) ? "" : ptr); 728 729 kfree(path); 730 } else 731 printk(KERN_ALERT 732 "%s: disabling internal bitmap due to errors\n", 733 bmname(bitmap)); 734 } 735 736 bitmap_file_put(bitmap); 737 738 return; 739} 740 741enum bitmap_page_attr { 742 BITMAP_PAGE_DIRTY = 0, // there are set bits that need to be synced 743 BITMAP_PAGE_CLEAN = 1, // there are bits that might need to be cleared 744 BITMAP_PAGE_NEEDWRITE=2, // there are cleared bits that need to be synced 745}; 746 747static inline void set_page_attr(struct bitmap *bitmap, struct page *page, 748 enum bitmap_page_attr attr) 749{ 750 __set_bit((page->index<<2) + attr, bitmap->filemap_attr); 751} 752 753static inline void clear_page_attr(struct bitmap *bitmap, struct page *page, 754 enum bitmap_page_attr attr) 755{ 756 __clear_bit((page->index<<2) + attr, bitmap->filemap_attr); 757} 758 759static inline unsigned long test_page_attr(struct bitmap *bitmap, struct page *page, 760 enum bitmap_page_attr attr) 761{ 762 return test_bit((page->index<<2) + attr, bitmap->filemap_attr); 763} 764 765/* 766 * bitmap_file_set_bit -- called before performing a write to the md device 767 * to set (and eventually sync) a particular bit in the bitmap file 768 * 769 * we set the bit immediately, then we record the page number so that 770 * when an unplug occurs, we can flush the dirty pages out to disk 771 */ 772static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) 773{ 774 unsigned long bit; 775 struct page *page; 776 void *kaddr; 777 unsigned long chunk = block >> CHUNK_BLOCK_SHIFT(bitmap); 778 779 if (!bitmap->filemap) { 780 return; 781 } 782 783 page = filemap_get_page(bitmap, chunk); 784 if (!page) return; 785 bit = file_page_offset(chunk); 786 787 /* set the bit */ 788 kaddr = kmap_atomic(page, KM_USER0); 789 if (bitmap->flags & BITMAP_HOSTENDIAN) 790 set_bit(bit, kaddr); 791 else 792 ext2_set_bit(bit, kaddr); 793 kunmap_atomic(kaddr, KM_USER0); 794 PRINTK("set file bit %lu page %lu\n", bit, page->index); 795 796 /* record page number so it gets flushed to disk when unplug occurs */ 797 set_page_attr(bitmap, page, BITMAP_PAGE_DIRTY); 798 799} 800 801/* this gets called when the md device is ready to unplug its underlying 802 * (slave) device queues -- before we let any writes go down, we need to 803 * sync the dirty pages of the bitmap file to disk */ 804void bitmap_unplug(struct bitmap *bitmap) 805{ 806 unsigned long i, flags; 807 int dirty, need_write; 808 struct page *page; 809 int wait = 0; 810 811 if (!bitmap) 812 return; 813 814 /* look at each page to see if there are any set bits that need to be 815 * flushed out to disk */ 816 for (i = 0; i < bitmap->file_pages; i++) { 817 spin_lock_irqsave(&bitmap->lock, flags); 818 if (!bitmap->filemap) { 819 spin_unlock_irqrestore(&bitmap->lock, flags); 820 return; 821 } 822 page = bitmap->filemap[i]; 823 dirty = test_page_attr(bitmap, page, BITMAP_PAGE_DIRTY); 824 need_write = test_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE); 825 clear_page_attr(bitmap, page, BITMAP_PAGE_DIRTY); 826 clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE); 827 if (dirty) 828 wait = 1; 829 spin_unlock_irqrestore(&bitmap->lock, flags); 830 831 if (dirty | need_write) 832 write_page(bitmap, page, 0); 833 } 834 if (wait) { /* if any writes were performed, we need to wait on them */ 835 if (bitmap->file) 836 wait_event(bitmap->write_wait, 837 atomic_read(&bitmap->pending_writes)==0); 838 else 839 md_super_wait(bitmap->mddev); 840 } 841 if (bitmap->flags & BITMAP_WRITE_ERROR) 842 bitmap_file_kick(bitmap); 843} 844 845static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed); 846/* * bitmap_init_from_disk -- called at bitmap_create time to initialize 847 * the in-memory bitmap from the on-disk bitmap -- also, sets up the 848 * memory mapping of the bitmap file 849 * Special cases: 850 * if there's no bitmap file, or if the bitmap file had been 851 * previously kicked from the array, we mark all the bits as 852 * 1's in order to cause a full resync. 853 * 854 * We ignore all bits for sectors that end earlier than 'start'. 855 * This is used when reading an out-of-date bitmap... 856 */ 857static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) 858{ 859 unsigned long i, chunks, index, oldindex, bit; 860 struct page *page = NULL, *oldpage = NULL; 861 unsigned long num_pages, bit_cnt = 0; 862 struct file *file; 863 unsigned long bytes, offset; 864 int outofdate; 865 int ret = -ENOSPC; 866 void *paddr; 867 868 chunks = bitmap->chunks; 869 file = bitmap->file; 870 871 BUG_ON(!file && !bitmap->offset); 872 873#ifdef INJECT_FAULTS_3 874 outofdate = 1; 875#else 876 outofdate = bitmap->flags & BITMAP_STALE; 877#endif 878 if (outofdate) 879 printk(KERN_INFO "%s: bitmap file is out of date, doing full " 880 "recovery\n", bmname(bitmap)); 881 882 bytes = (chunks + 7) / 8; 883 884 num_pages = (bytes + sizeof(bitmap_super_t) + PAGE_SIZE - 1) / PAGE_SIZE; 885 886 if (file && i_size_read(file->f_mapping->host) < bytes + sizeof(bitmap_super_t)) { 887 printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n", 888 bmname(bitmap), 889 (unsigned long) i_size_read(file->f_mapping->host), 890 bytes + sizeof(bitmap_super_t)); 891 goto err; 892 } 893 894 ret = -ENOMEM; 895 896 bitmap->filemap = kmalloc(sizeof(struct page *) * num_pages, GFP_KERNEL); 897 if (!bitmap->filemap) 898 goto err; 899 900 /* We need 4 bits per page, rounded up to a multiple of sizeof(unsigned long) */ 901 bitmap->filemap_attr = kzalloc( 902 roundup( DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)), 903 GFP_KERNEL); 904 if (!bitmap->filemap_attr) 905 goto err; 906 907 oldindex = ~0L; 908 909 for (i = 0; i < chunks; i++) { 910 int b; 911 index = file_page_index(i); 912 bit = file_page_offset(i); 913 if (index != oldindex) { /* this is a new page, read it in */ 914 int count; 915 /* unmap the old page, we're done with it */ 916 if (index == num_pages-1) 917 count = bytes + sizeof(bitmap_super_t) 918 - index * PAGE_SIZE; 919 else 920 count = PAGE_SIZE; 921 if (index == 0) { 922 /* 923 * if we're here then the superblock page 924 * contains some bits (PAGE_SIZE != sizeof sb) 925 * we've already read it in, so just use it 926 */ 927 page = bitmap->sb_page; 928 offset = sizeof(bitmap_super_t); 929 } else if (file) { 930 page = read_page(file, index, bitmap, count); 931 offset = 0; 932 } else { 933 page = read_sb_page(bitmap->mddev, bitmap->offset, index); 934 offset = 0; 935 } 936 if (IS_ERR(page)) { /* read error */ 937 ret = PTR_ERR(page); 938 goto err; 939 } 940 941 oldindex = index; 942 oldpage = page; 943 944 if (outofdate) { 945 /* 946 * if bitmap is out of date, dirty the 947 * whole page and write it out 948 */ 949 paddr = kmap_atomic(page, KM_USER0); 950 memset(paddr + offset, 0xff, 951 PAGE_SIZE - offset); 952 kunmap_atomic(paddr, KM_USER0); 953 write_page(bitmap, page, 1); 954 955 ret = -EIO; 956 if (bitmap->flags & BITMAP_WRITE_ERROR) { 957 /* release, page not in filemap yet */ 958 put_page(page); 959 goto err; 960 } 961 } 962 963 bitmap->filemap[bitmap->file_pages++] = page; 964 bitmap->last_page_size = count; 965 } 966 paddr = kmap_atomic(page, KM_USER0); 967 if (bitmap->flags & BITMAP_HOSTENDIAN) 968 b = test_bit(bit, paddr); 969 else 970 b = ext2_test_bit(bit, paddr); 971 kunmap_atomic(paddr, KM_USER0); 972 if (b) { 973 /* if the disk bit is set, set the memory bit */ 974 bitmap_set_memory_bits(bitmap, i << CHUNK_BLOCK_SHIFT(bitmap), 975 ((i+1) << (CHUNK_BLOCK_SHIFT(bitmap)) >= start) 976 ); 977 bit_cnt++; 978 set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); 979 } 980 } 981 982 /* everything went OK */ 983 ret = 0; 984 bitmap_mask_state(bitmap, BITMAP_STALE, MASK_UNSET); 985 986 if (bit_cnt) { /* Kick recovery if any bits were set */ 987 set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery); 988 md_wakeup_thread(bitmap->mddev->thread); 989 } 990 991 printk(KERN_INFO "%s: bitmap initialized from disk: " 992 "read %lu/%lu pages, set %lu bits\n", 993 bmname(bitmap), bitmap->file_pages, num_pages, bit_cnt); 994 995 return 0; 996 997 err: 998 printk(KERN_INFO "%s: bitmap initialisation failed: %d\n", 999 bmname(bitmap), ret); 1000 return ret; 1001} 1002 1003void bitmap_write_all(struct bitmap *bitmap) 1004{ 1005 /* We don't actually write all bitmap blocks here, 1006 * just flag them as needing to be written 1007 */ 1008 int i; 1009 1010 for (i=0; i < bitmap->file_pages; i++) 1011 set_page_attr(bitmap, bitmap->filemap[i], 1012 BITMAP_PAGE_NEEDWRITE); 1013} 1014 1015 1016static void bitmap_count_page(struct bitmap *bitmap, sector_t offset, int inc) 1017{ 1018 sector_t chunk = offset >> CHUNK_BLOCK_SHIFT(bitmap); 1019 unsigned long page = chunk >> PAGE_COUNTER_SHIFT; 1020 bitmap->bp[page].count += inc; 1021/* 1022 if (page == 0) printk("count page 0, offset %llu: %d gives %d\n", 1023 (unsigned long long)offset, inc, bitmap->bp[page].count); 1024*/ 1025 bitmap_checkfree(bitmap, page); 1026} 1027static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, 1028 sector_t offset, int *blocks, 1029 int create); 1030 1031/* 1032 * bitmap daemon -- periodically wakes up to clean bits and flush pages 1033 * out to disk 1034 */ 1035 1036void bitmap_daemon_work(struct bitmap *bitmap) 1037{ 1038 unsigned long j; 1039 unsigned long flags; 1040 struct page *page = NULL, *lastpage = NULL; 1041 int blocks; 1042 void *paddr; 1043 1044 if (bitmap == NULL) 1045 return; 1046 if (time_before(jiffies, bitmap->daemon_lastrun + bitmap->daemon_sleep*HZ)) 1047 goto done; 1048 1049 bitmap->daemon_lastrun = jiffies; 1050 if (bitmap->allclean) { 1051 bitmap->mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; 1052 return; 1053 } 1054 bitmap->allclean = 1; 1055 1056 for (j = 0; j < bitmap->chunks; j++) { 1057 bitmap_counter_t *bmc; 1058 spin_lock_irqsave(&bitmap->lock, flags); 1059 if (!bitmap->filemap) { 1060 /* error or shutdown */ 1061 spin_unlock_irqrestore(&bitmap->lock, flags); 1062 break; 1063 } 1064 1065 page = filemap_get_page(bitmap, j); 1066 1067 if (page != lastpage) { 1068 /* skip this page unless it's marked as needing cleaning */ 1069 if (!test_page_attr(bitmap, page, BITMAP_PAGE_CLEAN)) { 1070 int need_write = test_page_attr(bitmap, page, 1071 BITMAP_PAGE_NEEDWRITE); 1072 if (need_write) 1073 clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE); 1074 1075 spin_unlock_irqrestore(&bitmap->lock, flags); 1076 if (need_write) { 1077 write_page(bitmap, page, 0); 1078 bitmap->allclean = 0; 1079 } 1080 continue; 1081 } 1082 1083 /* grab the new page, sync and release the old */ 1084 if (lastpage != NULL) { 1085 if (test_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE)) { 1086 clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); 1087 spin_unlock_irqrestore(&bitmap->lock, flags); 1088 write_page(bitmap, lastpage, 0); 1089 } else { 1090 set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); 1091 spin_unlock_irqrestore(&bitmap->lock, flags); 1092 } 1093 } else 1094 spin_unlock_irqrestore(&bitmap->lock, flags); 1095 lastpage = page; 1096 1097 /* We are possibly going to clear some bits, so make 1098 * sure that events_cleared is up-to-date. 1099 */ 1100 if (bitmap->need_sync) { 1101 bitmap_super_t *sb; 1102 bitmap->need_sync = 0; 1103 sb = kmap_atomic(bitmap->sb_page, KM_USER0); 1104 sb->events_cleared = 1105 cpu_to_le64(bitmap->events_cleared); 1106 kunmap_atomic(sb, KM_USER0); 1107 write_page(bitmap, bitmap->sb_page, 1); 1108 } 1109 spin_lock_irqsave(&bitmap->lock, flags); 1110 clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); 1111 } 1112 bmc = bitmap_get_counter(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap), 1113 &blocks, 0); 1114 if (bmc) { 1115/* 1116 if (j < 100) printk("bitmap: j=%lu, *bmc = 0x%x\n", j, *bmc); 1117*/ 1118 if (*bmc) 1119 bitmap->allclean = 0; 1120 1121 if (*bmc == 2) { 1122 *bmc=1; /* maybe clear the bit next time */ 1123 set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); 1124 } else if (*bmc == 1) { 1125 /* we can clear the bit */ 1126 *bmc = 0; 1127 bitmap_count_page(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap), 1128 -1); 1129 1130 /* clear the bit */ 1131 paddr = kmap_atomic(page, KM_USER0); 1132 if (bitmap->flags & BITMAP_HOSTENDIAN) 1133 clear_bit(file_page_offset(j), paddr); 1134 else 1135 ext2_clear_bit(file_page_offset(j), paddr); 1136 kunmap_atomic(paddr, KM_USER0); 1137 } 1138 } 1139 spin_unlock_irqrestore(&bitmap->lock, flags); 1140 } 1141 1142 /* now sync the final page */ 1143 if (lastpage != NULL) { 1144 spin_lock_irqsave(&bitmap->lock, flags); 1145 if (test_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE)) { 1146 clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); 1147 spin_unlock_irqrestore(&bitmap->lock, flags); 1148 write_page(bitmap, lastpage, 0); 1149 } else { 1150 set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); 1151 spin_unlock_irqrestore(&bitmap->lock, flags); 1152 } 1153 } 1154 1155 done: 1156 if (bitmap->allclean == 0) 1157 bitmap->mddev->thread->timeout = bitmap->daemon_sleep * HZ; 1158} 1159 1160static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, 1161 sector_t offset, int *blocks, 1162 int create) 1163{ 1164 /* If 'create', we might release the lock and reclaim it. 1165 * The lock must have been taken with interrupts enabled. 1166 * If !create, we don't release the lock. 1167 */ 1168 sector_t chunk = offset >> CHUNK_BLOCK_SHIFT(bitmap); 1169 unsigned long page = chunk >> PAGE_COUNTER_SHIFT; 1170 unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT; 1171 sector_t csize; 1172 1173 if (bitmap_checkpage(bitmap, page, create) < 0) { 1174 csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap)); 1175 *blocks = csize - (offset & (csize- 1)); 1176 return NULL; 1177 } 1178 /* now locked ... */ 1179 1180 if (bitmap->bp[page].hijacked) { /* hijacked pointer */ 1181 /* should we use the first or second counter field 1182 * of the hijacked pointer? */ 1183 int hi = (pageoff > PAGE_COUNTER_MASK); 1184 csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap) + 1185 PAGE_COUNTER_SHIFT - 1); 1186 *blocks = csize - (offset & (csize- 1)); 1187 return &((bitmap_counter_t *) 1188 &bitmap->bp[page].map)[hi]; 1189 } else { /* page is allocated */ 1190 csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap)); 1191 *blocks = csize - (offset & (csize- 1)); 1192 return (bitmap_counter_t *) 1193 &(bitmap->bp[page].map[pageoff]); 1194 } 1195} 1196 1197int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, int behind) 1198{ 1199 if (!bitmap) return 0; 1200 1201 if (behind) { 1202 atomic_inc(&bitmap->behind_writes); 1203 PRINTK(KERN_DEBUG "inc write-behind count %d/%d\n", 1204 atomic_read(&bitmap->behind_writes), bitmap->max_write_behind); 1205 } 1206 1207 while (sectors) { 1208 int blocks; 1209 bitmap_counter_t *bmc; 1210 1211 spin_lock_irq(&bitmap->lock); 1212 bmc = bitmap_get_counter(bitmap, offset, &blocks, 1); 1213 if (!bmc) { 1214 spin_unlock_irq(&bitmap->lock); 1215 return 0; 1216 } 1217 1218 if (unlikely((*bmc & COUNTER_MAX) == COUNTER_MAX)) { 1219 DEFINE_WAIT(__wait); 1220 /* note that it is safe to do the prepare_to_wait 1221 * after the test as long as we do it before dropping 1222 * the spinlock. 1223 */ 1224 prepare_to_wait(&bitmap->overflow_wait, &__wait, 1225 TASK_UNINTERRUPTIBLE); 1226 spin_unlock_irq(&bitmap->lock); 1227 blk_unplug(bitmap->mddev->queue); 1228 schedule(); 1229 finish_wait(&bitmap->overflow_wait, &__wait); 1230 continue; 1231 } 1232 1233 switch(*bmc) { 1234 case 0: 1235 bitmap_file_set_bit(bitmap, offset); 1236 bitmap_count_page(bitmap,offset, 1); 1237 blk_plug_device_unlocked(bitmap->mddev->queue); 1238 /* fall through */ 1239 case 1: 1240 *bmc = 2; 1241 } 1242 1243 (*bmc)++; 1244 1245 spin_unlock_irq(&bitmap->lock); 1246 1247 offset += blocks; 1248 if (sectors > blocks) 1249 sectors -= blocks; 1250 else sectors = 0; 1251 } 1252 bitmap->allclean = 0; 1253 return 0; 1254} 1255 1256void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, 1257 int success, int behind) 1258{ 1259 if (!bitmap) return; 1260 if (behind) { 1261 atomic_dec(&bitmap->behind_writes); 1262 PRINTK(KERN_DEBUG "dec write-behind count %d/%d\n", 1263 atomic_read(&bitmap->behind_writes), bitmap->max_write_behind); 1264 } 1265 1266 while (sectors) { 1267 int blocks; 1268 unsigned long flags; 1269 bitmap_counter_t *bmc; 1270 1271 spin_lock_irqsave(&bitmap->lock, flags); 1272 bmc = bitmap_get_counter(bitmap, offset, &blocks, 0); 1273 if (!bmc) { 1274 spin_unlock_irqrestore(&bitmap->lock, flags); 1275 return; 1276 } 1277 1278 if (success && 1279 bitmap->events_cleared < bitmap->mddev->events) { 1280 bitmap->events_cleared = bitmap->mddev->events; 1281 bitmap->need_sync = 1; 1282 } 1283 1284 if (!success && ! (*bmc & NEEDED_MASK)) 1285 *bmc |= NEEDED_MASK; 1286 1287 if ((*bmc & COUNTER_MAX) == COUNTER_MAX) 1288 wake_up(&bitmap->overflow_wait); 1289 1290 (*bmc)--; 1291 if (*bmc <= 2) { 1292 set_page_attr(bitmap, 1293 filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)), 1294 BITMAP_PAGE_CLEAN); 1295 } 1296 spin_unlock_irqrestore(&bitmap->lock, flags); 1297 offset += blocks; 1298 if (sectors > blocks) 1299 sectors -= blocks; 1300 else sectors = 0; 1301 } 1302} 1303 1304int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks, 1305 int degraded) 1306{ 1307 bitmap_counter_t *bmc; 1308 int rv; 1309 if (bitmap == NULL) {/* FIXME or bitmap set as 'failed' */ 1310 *blocks = 1024; 1311 return 1; /* always resync if no bitmap */ 1312 } 1313 spin_lock_irq(&bitmap->lock); 1314 bmc = bitmap_get_counter(bitmap, offset, blocks, 0); 1315 rv = 0; 1316 if (bmc) { 1317 /* locked */ 1318 if (RESYNC(*bmc)) 1319 rv = 1; 1320 else if (NEEDED(*bmc)) { 1321 rv = 1; 1322 if (!degraded) { /* don't set/clear bits if degraded */ 1323 *bmc |= RESYNC_MASK; 1324 *bmc &= ~NEEDED_MASK; 1325 } 1326 } 1327 } 1328 spin_unlock_irq(&bitmap->lock); 1329 bitmap->allclean = 0; 1330 return rv; 1331} 1332 1333void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted) 1334{ 1335 bitmap_counter_t *bmc; 1336 unsigned long flags; 1337/* 1338 if (offset == 0) printk("bitmap_end_sync 0 (%d)\n", aborted); 1339*/ if (bitmap == NULL) { 1340 *blocks = 1024; 1341 return; 1342 } 1343 spin_lock_irqsave(&bitmap->lock, flags); 1344 bmc = bitmap_get_counter(bitmap, offset, blocks, 0); 1345 if (bmc == NULL) 1346 goto unlock; 1347 /* locked */ 1348/* 1349 if (offset == 0) printk("bitmap_end sync found 0x%x, blocks %d\n", *bmc, *blocks); 1350*/ 1351 if (RESYNC(*bmc)) { 1352 *bmc &= ~RESYNC_MASK; 1353 1354 if (!NEEDED(*bmc) && aborted) 1355 *bmc |= NEEDED_MASK; 1356 else { 1357 if (*bmc <= 2) { 1358 set_page_attr(bitmap, 1359 filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)), 1360 BITMAP_PAGE_CLEAN); 1361 } 1362 } 1363 } 1364 unlock: 1365 spin_unlock_irqrestore(&bitmap->lock, flags); 1366 bitmap->allclean = 0; 1367} 1368 1369void bitmap_close_sync(struct bitmap *bitmap) 1370{ 1371 /* Sync has finished, and any bitmap chunks that weren't synced 1372 * properly have been aborted. It remains to us to clear the 1373 * RESYNC bit wherever it is still on 1374 */ 1375 sector_t sector = 0; 1376 int blocks; 1377 if (!bitmap) 1378 return; 1379 while (sector < bitmap->mddev->resync_max_sectors) { 1380 bitmap_end_sync(bitmap, sector, &blocks, 0); 1381 sector += blocks; 1382 } 1383} 1384 1385void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector) 1386{ 1387 sector_t s = 0; 1388 int blocks; 1389 1390 if (!bitmap) 1391 return; 1392 if (sector == 0) { 1393 bitmap->last_end_sync = jiffies; 1394 return; 1395 } 1396 if (time_before(jiffies, (bitmap->last_end_sync 1397 + bitmap->daemon_sleep * HZ))) 1398 return; 1399 wait_event(bitmap->mddev->recovery_wait, 1400 atomic_read(&bitmap->mddev->recovery_active) == 0); 1401 1402 sector &= ~((1ULL << CHUNK_BLOCK_SHIFT(bitmap)) - 1); 1403 s = 0; 1404 while (s < sector && s < bitmap->mddev->resync_max_sectors) { 1405 bitmap_end_sync(bitmap, s, &blocks, 0); 1406 s += blocks; 1407 } 1408 bitmap->last_end_sync = jiffies; 1409} 1410 1411static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed) 1412{ 1413 /* For each chunk covered by any of these sectors, set the 1414 * counter to 1 and set resync_needed. They should all 1415 * be 0 at this point 1416 */ 1417 1418 int secs; 1419 bitmap_counter_t *bmc; 1420 spin_lock_irq(&bitmap->lock); 1421 bmc = bitmap_get_counter(bitmap, offset, &secs, 1); 1422 if (!bmc) { 1423 spin_unlock_irq(&bitmap->lock); 1424 return; 1425 } 1426 if (! *bmc) { 1427 struct page *page; 1428 *bmc = 1 | (needed?NEEDED_MASK:0); 1429 bitmap_count_page(bitmap, offset, 1); 1430 page = filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)); 1431 set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); 1432 } 1433 spin_unlock_irq(&bitmap->lock); 1434 bitmap->allclean = 0; 1435} 1436 1437/* dirty the memory and file bits for bitmap chunks "s" to "e" */ 1438void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e) 1439{ 1440 unsigned long chunk; 1441 1442 for (chunk = s; chunk <= e; chunk++) { 1443 sector_t sec = chunk << CHUNK_BLOCK_SHIFT(bitmap); 1444 bitmap_set_memory_bits(bitmap, sec, 1); 1445 bitmap_file_set_bit(bitmap, sec); 1446 } 1447} 1448 1449/* 1450 * flush out any pending updates 1451 */ 1452void bitmap_flush(mddev_t *mddev) 1453{ 1454 struct bitmap *bitmap = mddev->bitmap; 1455 int sleep; 1456 1457 if (!bitmap) /* there was no bitmap */ 1458 return; 1459 1460 /* run the daemon_work three time to ensure everything is flushed 1461 * that can be 1462 */ 1463 sleep = bitmap->daemon_sleep; 1464 bitmap->daemon_sleep = 0; 1465 bitmap_daemon_work(bitmap); 1466 bitmap_daemon_work(bitmap); 1467 bitmap_daemon_work(bitmap); 1468 bitmap->daemon_sleep = sleep; 1469 bitmap_update_sb(bitmap); 1470} 1471 1472/* 1473 * free memory that was allocated 1474 */ 1475static void bitmap_free(struct bitmap *bitmap) 1476{ 1477 unsigned long k, pages; 1478 struct bitmap_page *bp; 1479 1480 if (!bitmap) /* there was no bitmap */ 1481 return; 1482 1483 /* release the bitmap file and kill the daemon */ 1484 bitmap_file_put(bitmap); 1485 1486 bp = bitmap->bp; 1487 pages = bitmap->pages; 1488 1489 /* free all allocated memory */ 1490 1491 if (bp) /* deallocate the page memory */ 1492 for (k = 0; k < pages; k++) 1493 if (bp[k].map && !bp[k].hijacked) 1494 kfree(bp[k].map); 1495 kfree(bp); 1496 kfree(bitmap); 1497} 1498void bitmap_destroy(mddev_t *mddev) 1499{ 1500 struct bitmap *bitmap = mddev->bitmap; 1501 1502 if (!bitmap) /* there was no bitmap */ 1503 return; 1504 1505 mddev->bitmap = NULL; /* disconnect from the md device */ 1506 if (mddev->thread) 1507 mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; 1508 1509 bitmap_free(bitmap); 1510} 1511 1512/* 1513 * initialize the bitmap structure 1514 * if this returns an error, bitmap_destroy must be called to do clean up 1515 */ 1516int bitmap_create(mddev_t *mddev) 1517{ 1518 struct bitmap *bitmap; 1519 unsigned long blocks = mddev->resync_max_sectors; 1520 unsigned long chunks; 1521 unsigned long pages; 1522 struct file *file = mddev->bitmap_file; 1523 int err; 1524 sector_t start; 1525 1526 BUILD_BUG_ON(sizeof(bitmap_super_t) != 256); 1527 1528 if (!file && !mddev->bitmap_offset) /* bitmap disabled, nothing to do */ 1529 return 0; 1530 1531 BUG_ON(file && mddev->bitmap_offset); 1532 1533 bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL); 1534 if (!bitmap) 1535 return -ENOMEM; 1536 1537 spin_lock_init(&bitmap->lock); 1538 atomic_set(&bitmap->pending_writes, 0); 1539 init_waitqueue_head(&bitmap->write_wait); 1540 init_waitqueue_head(&bitmap->overflow_wait); 1541 1542 bitmap->mddev = mddev; 1543 1544 bitmap->file = file; 1545 bitmap->offset = mddev->bitmap_offset; 1546 if (file) { 1547 get_file(file); 1548 do_sync_mapping_range(file->f_mapping, 0, LLONG_MAX, 1549 SYNC_FILE_RANGE_WAIT_BEFORE | 1550 SYNC_FILE_RANGE_WRITE | 1551 SYNC_FILE_RANGE_WAIT_AFTER); 1552 } 1553 /* read superblock from bitmap file (this sets bitmap->chunksize) */ 1554 err = bitmap_read_sb(bitmap); 1555 if (err) 1556 goto error; 1557 1558 bitmap->chunkshift = ffz(~bitmap->chunksize); 1559 1560 /* now that chunksize and chunkshift are set, we can use these macros */ 1561 chunks = (blocks + CHUNK_BLOCK_RATIO(bitmap) - 1) / 1562 CHUNK_BLOCK_RATIO(bitmap); 1563 pages = (chunks + PAGE_COUNTER_RATIO - 1) / PAGE_COUNTER_RATIO; 1564 1565 BUG_ON(!pages); 1566 1567 bitmap->chunks = chunks; 1568 bitmap->pages = pages; 1569 bitmap->missing_pages = pages; 1570 bitmap->counter_bits = COUNTER_BITS; 1571 1572 bitmap->syncchunk = ~0UL; 1573 1574#ifdef INJECT_FATAL_FAULT_1 1575 bitmap->bp = NULL; 1576#else 1577 bitmap->bp = kzalloc(pages * sizeof(*bitmap->bp), GFP_KERNEL); 1578#endif 1579 err = -ENOMEM; 1580 if (!bitmap->bp) 1581 goto error; 1582 1583 /* now that we have some pages available, initialize the in-memory 1584 * bitmap from the on-disk bitmap */ 1585 start = 0; 1586 if (mddev->degraded == 0 1587 || bitmap->events_cleared == mddev->events) 1588 /* no need to keep dirty bits to optimise a re-add of a missing device */ 1589 start = mddev->recovery_cp; 1590 err = bitmap_init_from_disk(bitmap, start); 1591 1592 if (err) 1593 goto error; 1594 1595 printk(KERN_INFO "created bitmap (%lu pages) for device %s\n", 1596 pages, bmname(bitmap)); 1597 1598 mddev->bitmap = bitmap; 1599 1600 mddev->thread->timeout = bitmap->daemon_sleep * HZ; 1601 1602 bitmap_update_sb(bitmap); 1603 1604 return (bitmap->flags & BITMAP_WRITE_ERROR) ? -EIO : 0; 1605 1606 error: 1607 bitmap_free(bitmap); 1608 return err; 1609} 1610 1611/* the bitmap API -- for raid personalities */ 1612EXPORT_SYMBOL(bitmap_startwrite); 1613EXPORT_SYMBOL(bitmap_endwrite); 1614EXPORT_SYMBOL(bitmap_start_sync); 1615EXPORT_SYMBOL(bitmap_end_sync); 1616EXPORT_SYMBOL(bitmap_unplug); 1617EXPORT_SYMBOL(bitmap_close_sync); 1618EXPORT_SYMBOL(bitmap_cond_end_sync);