at v2.6.26-rc2 1603 lines 42 kB view raw
1/* 2 * bitmap.c two-level bitmap (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003 3 * 4 * bitmap_create - sets up the bitmap structure 5 * bitmap_destroy - destroys the bitmap structure 6 * 7 * additions, Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.: 8 * - added disk storage for bitmap 9 * - changes to allow various bitmap chunk sizes 10 */ 11 12/* 13 * Still to do: 14 * 15 * flush after percent set rather than just time based. (maybe both). 16 * wait if count gets too high, wake when it drops to half. 17 */ 18 19#include <linux/module.h> 20#include <linux/errno.h> 21#include <linux/slab.h> 22#include <linux/init.h> 23#include <linux/timer.h> 24#include <linux/sched.h> 25#include <linux/list.h> 26#include <linux/file.h> 27#include <linux/mount.h> 28#include <linux/buffer_head.h> 29#include <linux/raid/md.h> 30#include <linux/raid/bitmap.h> 31 32/* debug macros */ 33 34#define DEBUG 0 35 36#if DEBUG 37/* these are for debugging purposes only! */ 38 39/* define one and only one of these */ 40#define INJECT_FAULTS_1 0 /* cause bitmap_alloc_page to fail always */ 41#define INJECT_FAULTS_2 0 /* cause bitmap file to be kicked when first bit set*/ 42#define INJECT_FAULTS_3 0 /* treat bitmap file as kicked at init time */ 43#define INJECT_FAULTS_4 0 /* undef */ 44#define INJECT_FAULTS_5 0 /* undef */ 45#define INJECT_FAULTS_6 0 46 47/* if these are defined, the driver will fail! debug only */ 48#define INJECT_FATAL_FAULT_1 0 /* fail kmalloc, causing bitmap_create to fail */ 49#define INJECT_FATAL_FAULT_2 0 /* undef */ 50#define INJECT_FATAL_FAULT_3 0 /* undef */ 51#endif 52 53//#define DPRINTK PRINTK /* set this NULL to avoid verbose debug output */ 54#define DPRINTK(x...) do { } while(0) 55 56#ifndef PRINTK 57# if DEBUG > 0 58# define PRINTK(x...) printk(KERN_DEBUG x) 59# else 60# define PRINTK(x...) 61# endif 62#endif 63 64static inline char * bmname(struct bitmap *bitmap) 65{ 66 return bitmap->mddev ? mdname(bitmap->mddev) : "mdX"; 67} 68 69 70/* 71 * just a placeholder - calls kmalloc for bitmap pages 72 */ 73static unsigned char *bitmap_alloc_page(struct bitmap *bitmap) 74{ 75 unsigned char *page; 76 77#ifdef INJECT_FAULTS_1 78 page = NULL; 79#else 80 page = kmalloc(PAGE_SIZE, GFP_NOIO); 81#endif 82 if (!page) 83 printk("%s: bitmap_alloc_page FAILED\n", bmname(bitmap)); 84 else 85 PRINTK("%s: bitmap_alloc_page: allocated page at %p\n", 86 bmname(bitmap), page); 87 return page; 88} 89 90/* 91 * for now just a placeholder -- just calls kfree for bitmap pages 92 */ 93static void bitmap_free_page(struct bitmap *bitmap, unsigned char *page) 94{ 95 PRINTK("%s: bitmap_free_page: free page %p\n", bmname(bitmap), page); 96 kfree(page); 97} 98 99/* 100 * check a page and, if necessary, allocate it (or hijack it if the alloc fails) 101 * 102 * 1) check to see if this page is allocated, if it's not then try to alloc 103 * 2) if the alloc fails, set the page's hijacked flag so we'll use the 104 * page pointer directly as a counter 105 * 106 * if we find our page, we increment the page's refcount so that it stays 107 * allocated while we're using it 108 */ 109static int bitmap_checkpage(struct bitmap *bitmap, unsigned long page, int create) 110{ 111 unsigned char *mappage; 112 113 if (page >= bitmap->pages) { 114 printk(KERN_ALERT 115 "%s: invalid bitmap page request: %lu (> %lu)\n", 116 bmname(bitmap), page, bitmap->pages-1); 117 return -EINVAL; 118 } 119 120 121 if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */ 122 return 0; 123 124 if (bitmap->bp[page].map) /* page is already allocated, just return */ 125 return 0; 126 127 if (!create) 128 return -ENOENT; 129 130 spin_unlock_irq(&bitmap->lock); 131 132 /* this page has not been allocated yet */ 133 134 if ((mappage = bitmap_alloc_page(bitmap)) == NULL) { 135 PRINTK("%s: bitmap map page allocation failed, hijacking\n", 136 bmname(bitmap)); 137 /* failed - set the hijacked flag so that we can use the 138 * pointer as a counter */ 139 spin_lock_irq(&bitmap->lock); 140 if (!bitmap->bp[page].map) 141 bitmap->bp[page].hijacked = 1; 142 goto out; 143 } 144 145 /* got a page */ 146 147 spin_lock_irq(&bitmap->lock); 148 149 /* recheck the page */ 150 151 if (bitmap->bp[page].map || bitmap->bp[page].hijacked) { 152 /* somebody beat us to getting the page */ 153 bitmap_free_page(bitmap, mappage); 154 return 0; 155 } 156 157 /* no page was in place and we have one, so install it */ 158 159 memset(mappage, 0, PAGE_SIZE); 160 bitmap->bp[page].map = mappage; 161 bitmap->missing_pages--; 162out: 163 return 0; 164} 165 166 167/* if page is completely empty, put it back on the free list, or dealloc it */ 168/* if page was hijacked, unmark the flag so it might get alloced next time */ 169/* Note: lock should be held when calling this */ 170static void bitmap_checkfree(struct bitmap *bitmap, unsigned long page) 171{ 172 char *ptr; 173 174 if (bitmap->bp[page].count) /* page is still busy */ 175 return; 176 177 /* page is no longer in use, it can be released */ 178 179 if (bitmap->bp[page].hijacked) { /* page was hijacked, undo this now */ 180 bitmap->bp[page].hijacked = 0; 181 bitmap->bp[page].map = NULL; 182 return; 183 } 184 185 /* normal case, free the page */ 186 187#if 0 188/* actually ... let's not. We will probably need the page again exactly when 189 * memory is tight and we are flusing to disk 190 */ 191 return; 192#else 193 ptr = bitmap->bp[page].map; 194 bitmap->bp[page].map = NULL; 195 bitmap->missing_pages++; 196 bitmap_free_page(bitmap, ptr); 197 return; 198#endif 199} 200 201 202/* 203 * bitmap file handling - read and write the bitmap file and its superblock 204 */ 205 206/* copy the pathname of a file to a buffer */ 207char *file_path(struct file *file, char *buf, int count) 208{ 209 if (!buf) 210 return NULL; 211 212 buf = d_path(&file->f_path, buf, count); 213 214 return IS_ERR(buf) ? NULL : buf; 215} 216 217/* 218 * basic page I/O operations 219 */ 220 221/* IO operations when bitmap is stored near all superblocks */ 222static struct page *read_sb_page(mddev_t *mddev, long offset, unsigned long index) 223{ 224 /* choose a good rdev and read the page from there */ 225 226 mdk_rdev_t *rdev; 227 struct list_head *tmp; 228 struct page *page = alloc_page(GFP_KERNEL); 229 sector_t target; 230 231 if (!page) 232 return ERR_PTR(-ENOMEM); 233 234 rdev_for_each(rdev, tmp, mddev) { 235 if (! test_bit(In_sync, &rdev->flags) 236 || test_bit(Faulty, &rdev->flags)) 237 continue; 238 239 target = (rdev->sb_offset << 1) + offset + index * (PAGE_SIZE/512); 240 241 if (sync_page_io(rdev->bdev, target, PAGE_SIZE, page, READ)) { 242 page->index = index; 243 attach_page_buffers(page, NULL); /* so that free_buffer will 244 * quietly no-op */ 245 return page; 246 } 247 } 248 return ERR_PTR(-EIO); 249 250} 251 252static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) 253{ 254 mdk_rdev_t *rdev; 255 struct list_head *tmp; 256 mddev_t *mddev = bitmap->mddev; 257 258 rdev_for_each(rdev, tmp, mddev) 259 if (test_bit(In_sync, &rdev->flags) 260 && !test_bit(Faulty, &rdev->flags)) { 261 int size = PAGE_SIZE; 262 if (page->index == bitmap->file_pages-1) 263 size = roundup(bitmap->last_page_size, 264 bdev_hardsect_size(rdev->bdev)); 265 /* Just make sure we aren't corrupting data or 266 * metadata 267 */ 268 if (bitmap->offset < 0) { 269 /* DATA BITMAP METADATA */ 270 if (bitmap->offset 271 + (long)(page->index * (PAGE_SIZE/512)) 272 + size/512 > 0) 273 /* bitmap runs in to metadata */ 274 return -EINVAL; 275 if (rdev->data_offset + mddev->size*2 276 > rdev->sb_offset*2 + bitmap->offset) 277 /* data runs in to bitmap */ 278 return -EINVAL; 279 } else if (rdev->sb_offset*2 < rdev->data_offset) { 280 /* METADATA BITMAP DATA */ 281 if (rdev->sb_offset*2 282 + bitmap->offset 283 + page->index*(PAGE_SIZE/512) + size/512 284 > rdev->data_offset) 285 /* bitmap runs in to data */ 286 return -EINVAL; 287 } else { 288 /* DATA METADATA BITMAP - no problems */ 289 } 290 md_super_write(mddev, rdev, 291 (rdev->sb_offset<<1) + bitmap->offset 292 + page->index * (PAGE_SIZE/512), 293 size, 294 page); 295 } 296 297 if (wait) 298 md_super_wait(mddev); 299 return 0; 300} 301 302static void bitmap_file_kick(struct bitmap *bitmap); 303/* 304 * write out a page to a file 305 */ 306static void write_page(struct bitmap *bitmap, struct page *page, int wait) 307{ 308 struct buffer_head *bh; 309 310 if (bitmap->file == NULL) { 311 switch (write_sb_page(bitmap, page, wait)) { 312 case -EINVAL: 313 bitmap->flags |= BITMAP_WRITE_ERROR; 314 } 315 } else { 316 317 bh = page_buffers(page); 318 319 while (bh && bh->b_blocknr) { 320 atomic_inc(&bitmap->pending_writes); 321 set_buffer_locked(bh); 322 set_buffer_mapped(bh); 323 submit_bh(WRITE, bh); 324 bh = bh->b_this_page; 325 } 326 327 if (wait) { 328 wait_event(bitmap->write_wait, 329 atomic_read(&bitmap->pending_writes)==0); 330 } 331 } 332 if (bitmap->flags & BITMAP_WRITE_ERROR) 333 bitmap_file_kick(bitmap); 334} 335 336static void end_bitmap_write(struct buffer_head *bh, int uptodate) 337{ 338 struct bitmap *bitmap = bh->b_private; 339 unsigned long flags; 340 341 if (!uptodate) { 342 spin_lock_irqsave(&bitmap->lock, flags); 343 bitmap->flags |= BITMAP_WRITE_ERROR; 344 spin_unlock_irqrestore(&bitmap->lock, flags); 345 } 346 if (atomic_dec_and_test(&bitmap->pending_writes)) 347 wake_up(&bitmap->write_wait); 348} 349 350/* copied from buffer.c */ 351static void 352__clear_page_buffers(struct page *page) 353{ 354 ClearPagePrivate(page); 355 set_page_private(page, 0); 356 page_cache_release(page); 357} 358static void free_buffers(struct page *page) 359{ 360 struct buffer_head *bh = page_buffers(page); 361 362 while (bh) { 363 struct buffer_head *next = bh->b_this_page; 364 free_buffer_head(bh); 365 bh = next; 366 } 367 __clear_page_buffers(page); 368 put_page(page); 369} 370 371/* read a page from a file. 372 * We both read the page, and attach buffers to the page to record the 373 * address of each block (using bmap). These addresses will be used 374 * to write the block later, completely bypassing the filesystem. 375 * This usage is similar to how swap files are handled, and allows us 376 * to write to a file with no concerns of memory allocation failing. 377 */ 378static struct page *read_page(struct file *file, unsigned long index, 379 struct bitmap *bitmap, 380 unsigned long count) 381{ 382 struct page *page = NULL; 383 struct inode *inode = file->f_path.dentry->d_inode; 384 struct buffer_head *bh; 385 sector_t block; 386 387 PRINTK("read bitmap file (%dB @ %Lu)\n", (int)PAGE_SIZE, 388 (unsigned long long)index << PAGE_SHIFT); 389 390 page = alloc_page(GFP_KERNEL); 391 if (!page) 392 page = ERR_PTR(-ENOMEM); 393 if (IS_ERR(page)) 394 goto out; 395 396 bh = alloc_page_buffers(page, 1<<inode->i_blkbits, 0); 397 if (!bh) { 398 put_page(page); 399 page = ERR_PTR(-ENOMEM); 400 goto out; 401 } 402 attach_page_buffers(page, bh); 403 block = index << (PAGE_SHIFT - inode->i_blkbits); 404 while (bh) { 405 if (count == 0) 406 bh->b_blocknr = 0; 407 else { 408 bh->b_blocknr = bmap(inode, block); 409 if (bh->b_blocknr == 0) { 410 /* Cannot use this file! */ 411 free_buffers(page); 412 page = ERR_PTR(-EINVAL); 413 goto out; 414 } 415 bh->b_bdev = inode->i_sb->s_bdev; 416 if (count < (1<<inode->i_blkbits)) 417 count = 0; 418 else 419 count -= (1<<inode->i_blkbits); 420 421 bh->b_end_io = end_bitmap_write; 422 bh->b_private = bitmap; 423 atomic_inc(&bitmap->pending_writes); 424 set_buffer_locked(bh); 425 set_buffer_mapped(bh); 426 submit_bh(READ, bh); 427 } 428 block++; 429 bh = bh->b_this_page; 430 } 431 page->index = index; 432 433 wait_event(bitmap->write_wait, 434 atomic_read(&bitmap->pending_writes)==0); 435 if (bitmap->flags & BITMAP_WRITE_ERROR) { 436 free_buffers(page); 437 page = ERR_PTR(-EIO); 438 } 439out: 440 if (IS_ERR(page)) 441 printk(KERN_ALERT "md: bitmap read error: (%dB @ %Lu): %ld\n", 442 (int)PAGE_SIZE, 443 (unsigned long long)index << PAGE_SHIFT, 444 PTR_ERR(page)); 445 return page; 446} 447 448/* 449 * bitmap file superblock operations 450 */ 451 452/* update the event counter and sync the superblock to disk */ 453void bitmap_update_sb(struct bitmap *bitmap) 454{ 455 bitmap_super_t *sb; 456 unsigned long flags; 457 458 if (!bitmap || !bitmap->mddev) /* no bitmap for this array */ 459 return; 460 spin_lock_irqsave(&bitmap->lock, flags); 461 if (!bitmap->sb_page) { /* no superblock */ 462 spin_unlock_irqrestore(&bitmap->lock, flags); 463 return; 464 } 465 spin_unlock_irqrestore(&bitmap->lock, flags); 466 sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0); 467 sb->events = cpu_to_le64(bitmap->mddev->events); 468 if (!bitmap->mddev->degraded) 469 sb->events_cleared = cpu_to_le64(bitmap->mddev->events); 470 kunmap_atomic(sb, KM_USER0); 471 write_page(bitmap, bitmap->sb_page, 1); 472} 473 474/* print out the bitmap file superblock */ 475void bitmap_print_sb(struct bitmap *bitmap) 476{ 477 bitmap_super_t *sb; 478 479 if (!bitmap || !bitmap->sb_page) 480 return; 481 sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0); 482 printk(KERN_DEBUG "%s: bitmap file superblock:\n", bmname(bitmap)); 483 printk(KERN_DEBUG " magic: %08x\n", le32_to_cpu(sb->magic)); 484 printk(KERN_DEBUG " version: %d\n", le32_to_cpu(sb->version)); 485 printk(KERN_DEBUG " uuid: %08x.%08x.%08x.%08x\n", 486 *(__u32 *)(sb->uuid+0), 487 *(__u32 *)(sb->uuid+4), 488 *(__u32 *)(sb->uuid+8), 489 *(__u32 *)(sb->uuid+12)); 490 printk(KERN_DEBUG " events: %llu\n", 491 (unsigned long long) le64_to_cpu(sb->events)); 492 printk(KERN_DEBUG "events cleared: %llu\n", 493 (unsigned long long) le64_to_cpu(sb->events_cleared)); 494 printk(KERN_DEBUG " state: %08x\n", le32_to_cpu(sb->state)); 495 printk(KERN_DEBUG " chunksize: %d B\n", le32_to_cpu(sb->chunksize)); 496 printk(KERN_DEBUG " daemon sleep: %ds\n", le32_to_cpu(sb->daemon_sleep)); 497 printk(KERN_DEBUG " sync size: %llu KB\n", 498 (unsigned long long)le64_to_cpu(sb->sync_size)/2); 499 printk(KERN_DEBUG "max write behind: %d\n", le32_to_cpu(sb->write_behind)); 500 kunmap_atomic(sb, KM_USER0); 501} 502 503/* read the superblock from the bitmap file and initialize some bitmap fields */ 504static int bitmap_read_sb(struct bitmap *bitmap) 505{ 506 char *reason = NULL; 507 bitmap_super_t *sb; 508 unsigned long chunksize, daemon_sleep, write_behind; 509 unsigned long long events; 510 int err = -EINVAL; 511 512 /* page 0 is the superblock, read it... */ 513 if (bitmap->file) { 514 loff_t isize = i_size_read(bitmap->file->f_mapping->host); 515 int bytes = isize > PAGE_SIZE ? PAGE_SIZE : isize; 516 517 bitmap->sb_page = read_page(bitmap->file, 0, bitmap, bytes); 518 } else { 519 bitmap->sb_page = read_sb_page(bitmap->mddev, bitmap->offset, 0); 520 } 521 if (IS_ERR(bitmap->sb_page)) { 522 err = PTR_ERR(bitmap->sb_page); 523 bitmap->sb_page = NULL; 524 return err; 525 } 526 527 sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0); 528 529 chunksize = le32_to_cpu(sb->chunksize); 530 daemon_sleep = le32_to_cpu(sb->daemon_sleep); 531 write_behind = le32_to_cpu(sb->write_behind); 532 533 /* verify that the bitmap-specific fields are valid */ 534 if (sb->magic != cpu_to_le32(BITMAP_MAGIC)) 535 reason = "bad magic"; 536 else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO || 537 le32_to_cpu(sb->version) > BITMAP_MAJOR_HI) 538 reason = "unrecognized superblock version"; 539 else if (chunksize < PAGE_SIZE) 540 reason = "bitmap chunksize too small"; 541 else if ((1 << ffz(~chunksize)) != chunksize) 542 reason = "bitmap chunksize not a power of 2"; 543 else if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT / HZ) 544 reason = "daemon sleep period out of range"; 545 else if (write_behind > COUNTER_MAX) 546 reason = "write-behind limit out of range (0 - 16383)"; 547 if (reason) { 548 printk(KERN_INFO "%s: invalid bitmap file superblock: %s\n", 549 bmname(bitmap), reason); 550 goto out; 551 } 552 553 /* keep the array size field of the bitmap superblock up to date */ 554 sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors); 555 556 if (!bitmap->mddev->persistent) 557 goto success; 558 559 /* 560 * if we have a persistent array superblock, compare the 561 * bitmap's UUID and event counter to the mddev's 562 */ 563 if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) { 564 printk(KERN_INFO "%s: bitmap superblock UUID mismatch\n", 565 bmname(bitmap)); 566 goto out; 567 } 568 events = le64_to_cpu(sb->events); 569 if (events < bitmap->mddev->events) { 570 printk(KERN_INFO "%s: bitmap file is out of date (%llu < %llu) " 571 "-- forcing full recovery\n", bmname(bitmap), events, 572 (unsigned long long) bitmap->mddev->events); 573 sb->state |= cpu_to_le32(BITMAP_STALE); 574 } 575success: 576 /* assign fields using values from superblock */ 577 bitmap->chunksize = chunksize; 578 bitmap->daemon_sleep = daemon_sleep; 579 bitmap->daemon_lastrun = jiffies; 580 bitmap->max_write_behind = write_behind; 581 bitmap->flags |= le32_to_cpu(sb->state); 582 if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN) 583 bitmap->flags |= BITMAP_HOSTENDIAN; 584 bitmap->events_cleared = le64_to_cpu(sb->events_cleared); 585 if (sb->state & cpu_to_le32(BITMAP_STALE)) 586 bitmap->events_cleared = bitmap->mddev->events; 587 err = 0; 588out: 589 kunmap_atomic(sb, KM_USER0); 590 if (err) 591 bitmap_print_sb(bitmap); 592 return err; 593} 594 595enum bitmap_mask_op { 596 MASK_SET, 597 MASK_UNSET 598}; 599 600/* record the state of the bitmap in the superblock. Return the old value */ 601static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits, 602 enum bitmap_mask_op op) 603{ 604 bitmap_super_t *sb; 605 unsigned long flags; 606 int old; 607 608 spin_lock_irqsave(&bitmap->lock, flags); 609 if (!bitmap->sb_page) { /* can't set the state */ 610 spin_unlock_irqrestore(&bitmap->lock, flags); 611 return 0; 612 } 613 spin_unlock_irqrestore(&bitmap->lock, flags); 614 sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0); 615 old = le32_to_cpu(sb->state) & bits; 616 switch (op) { 617 case MASK_SET: sb->state |= cpu_to_le32(bits); 618 break; 619 case MASK_UNSET: sb->state &= cpu_to_le32(~bits); 620 break; 621 default: BUG(); 622 } 623 kunmap_atomic(sb, KM_USER0); 624 return old; 625} 626 627/* 628 * general bitmap file operations 629 */ 630 631/* calculate the index of the page that contains this bit */ 632static inline unsigned long file_page_index(unsigned long chunk) 633{ 634 return CHUNK_BIT_OFFSET(chunk) >> PAGE_BIT_SHIFT; 635} 636 637/* calculate the (bit) offset of this bit within a page */ 638static inline unsigned long file_page_offset(unsigned long chunk) 639{ 640 return CHUNK_BIT_OFFSET(chunk) & (PAGE_BITS - 1); 641} 642 643/* 644 * return a pointer to the page in the filemap that contains the given bit 645 * 646 * this lookup is complicated by the fact that the bitmap sb might be exactly 647 * 1 page (e.g., x86) or less than 1 page -- so the bitmap might start on page 648 * 0 or page 1 649 */ 650static inline struct page *filemap_get_page(struct bitmap *bitmap, 651 unsigned long chunk) 652{ 653 if (file_page_index(chunk) >= bitmap->file_pages) return NULL; 654 return bitmap->filemap[file_page_index(chunk) - file_page_index(0)]; 655} 656 657 658static void bitmap_file_unmap(struct bitmap *bitmap) 659{ 660 struct page **map, *sb_page; 661 unsigned long *attr; 662 int pages; 663 unsigned long flags; 664 665 spin_lock_irqsave(&bitmap->lock, flags); 666 map = bitmap->filemap; 667 bitmap->filemap = NULL; 668 attr = bitmap->filemap_attr; 669 bitmap->filemap_attr = NULL; 670 pages = bitmap->file_pages; 671 bitmap->file_pages = 0; 672 sb_page = bitmap->sb_page; 673 bitmap->sb_page = NULL; 674 spin_unlock_irqrestore(&bitmap->lock, flags); 675 676 while (pages--) 677 if (map[pages]->index != 0) /* 0 is sb_page, release it below */ 678 free_buffers(map[pages]); 679 kfree(map); 680 kfree(attr); 681 682 if (sb_page) 683 free_buffers(sb_page); 684} 685 686static void bitmap_file_put(struct bitmap *bitmap) 687{ 688 struct file *file; 689 unsigned long flags; 690 691 spin_lock_irqsave(&bitmap->lock, flags); 692 file = bitmap->file; 693 bitmap->file = NULL; 694 spin_unlock_irqrestore(&bitmap->lock, flags); 695 696 if (file) 697 wait_event(bitmap->write_wait, 698 atomic_read(&bitmap->pending_writes)==0); 699 bitmap_file_unmap(bitmap); 700 701 if (file) { 702 struct inode *inode = file->f_path.dentry->d_inode; 703 invalidate_mapping_pages(inode->i_mapping, 0, -1); 704 fput(file); 705 } 706} 707 708 709/* 710 * bitmap_file_kick - if an error occurs while manipulating the bitmap file 711 * then it is no longer reliable, so we stop using it and we mark the file 712 * as failed in the superblock 713 */ 714static void bitmap_file_kick(struct bitmap *bitmap) 715{ 716 char *path, *ptr = NULL; 717 718 if (bitmap_mask_state(bitmap, BITMAP_STALE, MASK_SET) == 0) { 719 bitmap_update_sb(bitmap); 720 721 if (bitmap->file) { 722 path = kmalloc(PAGE_SIZE, GFP_KERNEL); 723 if (path) 724 ptr = file_path(bitmap->file, path, PAGE_SIZE); 725 726 printk(KERN_ALERT 727 "%s: kicking failed bitmap file %s from array!\n", 728 bmname(bitmap), ptr ? ptr : ""); 729 730 kfree(path); 731 } else 732 printk(KERN_ALERT 733 "%s: disabling internal bitmap due to errors\n", 734 bmname(bitmap)); 735 } 736 737 bitmap_file_put(bitmap); 738 739 return; 740} 741 742enum bitmap_page_attr { 743 BITMAP_PAGE_DIRTY = 0, // there are set bits that need to be synced 744 BITMAP_PAGE_CLEAN = 1, // there are bits that might need to be cleared 745 BITMAP_PAGE_NEEDWRITE=2, // there are cleared bits that need to be synced 746}; 747 748static inline void set_page_attr(struct bitmap *bitmap, struct page *page, 749 enum bitmap_page_attr attr) 750{ 751 __set_bit((page->index<<2) + attr, bitmap->filemap_attr); 752} 753 754static inline void clear_page_attr(struct bitmap *bitmap, struct page *page, 755 enum bitmap_page_attr attr) 756{ 757 __clear_bit((page->index<<2) + attr, bitmap->filemap_attr); 758} 759 760static inline unsigned long test_page_attr(struct bitmap *bitmap, struct page *page, 761 enum bitmap_page_attr attr) 762{ 763 return test_bit((page->index<<2) + attr, bitmap->filemap_attr); 764} 765 766/* 767 * bitmap_file_set_bit -- called before performing a write to the md device 768 * to set (and eventually sync) a particular bit in the bitmap file 769 * 770 * we set the bit immediately, then we record the page number so that 771 * when an unplug occurs, we can flush the dirty pages out to disk 772 */ 773static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) 774{ 775 unsigned long bit; 776 struct page *page; 777 void *kaddr; 778 unsigned long chunk = block >> CHUNK_BLOCK_SHIFT(bitmap); 779 780 if (!bitmap->filemap) { 781 return; 782 } 783 784 page = filemap_get_page(bitmap, chunk); 785 if (!page) return; 786 bit = file_page_offset(chunk); 787 788 /* set the bit */ 789 kaddr = kmap_atomic(page, KM_USER0); 790 if (bitmap->flags & BITMAP_HOSTENDIAN) 791 set_bit(bit, kaddr); 792 else 793 ext2_set_bit(bit, kaddr); 794 kunmap_atomic(kaddr, KM_USER0); 795 PRINTK("set file bit %lu page %lu\n", bit, page->index); 796 797 /* record page number so it gets flushed to disk when unplug occurs */ 798 set_page_attr(bitmap, page, BITMAP_PAGE_DIRTY); 799 800} 801 802/* this gets called when the md device is ready to unplug its underlying 803 * (slave) device queues -- before we let any writes go down, we need to 804 * sync the dirty pages of the bitmap file to disk */ 805void bitmap_unplug(struct bitmap *bitmap) 806{ 807 unsigned long i, flags; 808 int dirty, need_write; 809 struct page *page; 810 int wait = 0; 811 812 if (!bitmap) 813 return; 814 815 /* look at each page to see if there are any set bits that need to be 816 * flushed out to disk */ 817 for (i = 0; i < bitmap->file_pages; i++) { 818 spin_lock_irqsave(&bitmap->lock, flags); 819 if (!bitmap->filemap) { 820 spin_unlock_irqrestore(&bitmap->lock, flags); 821 return; 822 } 823 page = bitmap->filemap[i]; 824 dirty = test_page_attr(bitmap, page, BITMAP_PAGE_DIRTY); 825 need_write = test_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE); 826 clear_page_attr(bitmap, page, BITMAP_PAGE_DIRTY); 827 clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE); 828 if (dirty) 829 wait = 1; 830 spin_unlock_irqrestore(&bitmap->lock, flags); 831 832 if (dirty | need_write) 833 write_page(bitmap, page, 0); 834 } 835 if (wait) { /* if any writes were performed, we need to wait on them */ 836 if (bitmap->file) 837 wait_event(bitmap->write_wait, 838 atomic_read(&bitmap->pending_writes)==0); 839 else 840 md_super_wait(bitmap->mddev); 841 } 842 if (bitmap->flags & BITMAP_WRITE_ERROR) 843 bitmap_file_kick(bitmap); 844} 845 846static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed); 847/* * bitmap_init_from_disk -- called at bitmap_create time to initialize 848 * the in-memory bitmap from the on-disk bitmap -- also, sets up the 849 * memory mapping of the bitmap file 850 * Special cases: 851 * if there's no bitmap file, or if the bitmap file had been 852 * previously kicked from the array, we mark all the bits as 853 * 1's in order to cause a full resync. 854 * 855 * We ignore all bits for sectors that end earlier than 'start'. 856 * This is used when reading an out-of-date bitmap... 857 */ 858static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) 859{ 860 unsigned long i, chunks, index, oldindex, bit; 861 struct page *page = NULL, *oldpage = NULL; 862 unsigned long num_pages, bit_cnt = 0; 863 struct file *file; 864 unsigned long bytes, offset; 865 int outofdate; 866 int ret = -ENOSPC; 867 void *paddr; 868 869 chunks = bitmap->chunks; 870 file = bitmap->file; 871 872 BUG_ON(!file && !bitmap->offset); 873 874#ifdef INJECT_FAULTS_3 875 outofdate = 1; 876#else 877 outofdate = bitmap->flags & BITMAP_STALE; 878#endif 879 if (outofdate) 880 printk(KERN_INFO "%s: bitmap file is out of date, doing full " 881 "recovery\n", bmname(bitmap)); 882 883 bytes = (chunks + 7) / 8; 884 885 num_pages = (bytes + sizeof(bitmap_super_t) + PAGE_SIZE - 1) / PAGE_SIZE; 886 887 if (file && i_size_read(file->f_mapping->host) < bytes + sizeof(bitmap_super_t)) { 888 printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n", 889 bmname(bitmap), 890 (unsigned long) i_size_read(file->f_mapping->host), 891 bytes + sizeof(bitmap_super_t)); 892 goto err; 893 } 894 895 ret = -ENOMEM; 896 897 bitmap->filemap = kmalloc(sizeof(struct page *) * num_pages, GFP_KERNEL); 898 if (!bitmap->filemap) 899 goto err; 900 901 /* We need 4 bits per page, rounded up to a multiple of sizeof(unsigned long) */ 902 bitmap->filemap_attr = kzalloc( 903 roundup( DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)), 904 GFP_KERNEL); 905 if (!bitmap->filemap_attr) 906 goto err; 907 908 oldindex = ~0L; 909 910 for (i = 0; i < chunks; i++) { 911 int b; 912 index = file_page_index(i); 913 bit = file_page_offset(i); 914 if (index != oldindex) { /* this is a new page, read it in */ 915 int count; 916 /* unmap the old page, we're done with it */ 917 if (index == num_pages-1) 918 count = bytes + sizeof(bitmap_super_t) 919 - index * PAGE_SIZE; 920 else 921 count = PAGE_SIZE; 922 if (index == 0) { 923 /* 924 * if we're here then the superblock page 925 * contains some bits (PAGE_SIZE != sizeof sb) 926 * we've already read it in, so just use it 927 */ 928 page = bitmap->sb_page; 929 offset = sizeof(bitmap_super_t); 930 } else if (file) { 931 page = read_page(file, index, bitmap, count); 932 offset = 0; 933 } else { 934 page = read_sb_page(bitmap->mddev, bitmap->offset, index); 935 offset = 0; 936 } 937 if (IS_ERR(page)) { /* read error */ 938 ret = PTR_ERR(page); 939 goto err; 940 } 941 942 oldindex = index; 943 oldpage = page; 944 945 if (outofdate) { 946 /* 947 * if bitmap is out of date, dirty the 948 * whole page and write it out 949 */ 950 paddr = kmap_atomic(page, KM_USER0); 951 memset(paddr + offset, 0xff, 952 PAGE_SIZE - offset); 953 kunmap_atomic(paddr, KM_USER0); 954 write_page(bitmap, page, 1); 955 956 ret = -EIO; 957 if (bitmap->flags & BITMAP_WRITE_ERROR) { 958 /* release, page not in filemap yet */ 959 put_page(page); 960 goto err; 961 } 962 } 963 964 bitmap->filemap[bitmap->file_pages++] = page; 965 bitmap->last_page_size = count; 966 } 967 paddr = kmap_atomic(page, KM_USER0); 968 if (bitmap->flags & BITMAP_HOSTENDIAN) 969 b = test_bit(bit, paddr); 970 else 971 b = ext2_test_bit(bit, paddr); 972 kunmap_atomic(paddr, KM_USER0); 973 if (b) { 974 /* if the disk bit is set, set the memory bit */ 975 bitmap_set_memory_bits(bitmap, i << CHUNK_BLOCK_SHIFT(bitmap), 976 ((i+1) << (CHUNK_BLOCK_SHIFT(bitmap)) >= start) 977 ); 978 bit_cnt++; 979 set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); 980 } 981 } 982 983 /* everything went OK */ 984 ret = 0; 985 bitmap_mask_state(bitmap, BITMAP_STALE, MASK_UNSET); 986 987 if (bit_cnt) { /* Kick recovery if any bits were set */ 988 set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery); 989 md_wakeup_thread(bitmap->mddev->thread); 990 } 991 992 printk(KERN_INFO "%s: bitmap initialized from disk: " 993 "read %lu/%lu pages, set %lu bits\n", 994 bmname(bitmap), bitmap->file_pages, num_pages, bit_cnt); 995 996 return 0; 997 998 err: 999 printk(KERN_INFO "%s: bitmap initialisation failed: %d\n", 1000 bmname(bitmap), ret); 1001 return ret; 1002} 1003 1004void bitmap_write_all(struct bitmap *bitmap) 1005{ 1006 /* We don't actually write all bitmap blocks here, 1007 * just flag them as needing to be written 1008 */ 1009 int i; 1010 1011 for (i=0; i < bitmap->file_pages; i++) 1012 set_page_attr(bitmap, bitmap->filemap[i], 1013 BITMAP_PAGE_NEEDWRITE); 1014} 1015 1016 1017static void bitmap_count_page(struct bitmap *bitmap, sector_t offset, int inc) 1018{ 1019 sector_t chunk = offset >> CHUNK_BLOCK_SHIFT(bitmap); 1020 unsigned long page = chunk >> PAGE_COUNTER_SHIFT; 1021 bitmap->bp[page].count += inc; 1022/* 1023 if (page == 0) printk("count page 0, offset %llu: %d gives %d\n", 1024 (unsigned long long)offset, inc, bitmap->bp[page].count); 1025*/ 1026 bitmap_checkfree(bitmap, page); 1027} 1028static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, 1029 sector_t offset, int *blocks, 1030 int create); 1031 1032/* 1033 * bitmap daemon -- periodically wakes up to clean bits and flush pages 1034 * out to disk 1035 */ 1036 1037void bitmap_daemon_work(struct bitmap *bitmap) 1038{ 1039 unsigned long j; 1040 unsigned long flags; 1041 struct page *page = NULL, *lastpage = NULL; 1042 int blocks; 1043 void *paddr; 1044 1045 if (bitmap == NULL) 1046 return; 1047 if (time_before(jiffies, bitmap->daemon_lastrun + bitmap->daemon_sleep*HZ)) 1048 goto done; 1049 1050 bitmap->daemon_lastrun = jiffies; 1051 if (bitmap->allclean) { 1052 bitmap->mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; 1053 return; 1054 } 1055 bitmap->allclean = 1; 1056 1057 for (j = 0; j < bitmap->chunks; j++) { 1058 bitmap_counter_t *bmc; 1059 spin_lock_irqsave(&bitmap->lock, flags); 1060 if (!bitmap->filemap) { 1061 /* error or shutdown */ 1062 spin_unlock_irqrestore(&bitmap->lock, flags); 1063 break; 1064 } 1065 1066 page = filemap_get_page(bitmap, j); 1067 1068 if (page != lastpage) { 1069 /* skip this page unless it's marked as needing cleaning */ 1070 if (!test_page_attr(bitmap, page, BITMAP_PAGE_CLEAN)) { 1071 int need_write = test_page_attr(bitmap, page, 1072 BITMAP_PAGE_NEEDWRITE); 1073 if (need_write) 1074 clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE); 1075 1076 spin_unlock_irqrestore(&bitmap->lock, flags); 1077 if (need_write) { 1078 write_page(bitmap, page, 0); 1079 bitmap->allclean = 0; 1080 } 1081 continue; 1082 } 1083 1084 /* grab the new page, sync and release the old */ 1085 if (lastpage != NULL) { 1086 if (test_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE)) { 1087 clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); 1088 spin_unlock_irqrestore(&bitmap->lock, flags); 1089 write_page(bitmap, lastpage, 0); 1090 } else { 1091 set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); 1092 spin_unlock_irqrestore(&bitmap->lock, flags); 1093 } 1094 } else 1095 spin_unlock_irqrestore(&bitmap->lock, flags); 1096 lastpage = page; 1097/* 1098 printk("bitmap clean at page %lu\n", j); 1099*/ 1100 spin_lock_irqsave(&bitmap->lock, flags); 1101 clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); 1102 } 1103 bmc = bitmap_get_counter(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap), 1104 &blocks, 0); 1105 if (bmc) { 1106/* 1107 if (j < 100) printk("bitmap: j=%lu, *bmc = 0x%x\n", j, *bmc); 1108*/ 1109 if (*bmc) 1110 bitmap->allclean = 0; 1111 1112 if (*bmc == 2) { 1113 *bmc=1; /* maybe clear the bit next time */ 1114 set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); 1115 } else if (*bmc == 1) { 1116 /* we can clear the bit */ 1117 *bmc = 0; 1118 bitmap_count_page(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap), 1119 -1); 1120 1121 /* clear the bit */ 1122 paddr = kmap_atomic(page, KM_USER0); 1123 if (bitmap->flags & BITMAP_HOSTENDIAN) 1124 clear_bit(file_page_offset(j), paddr); 1125 else 1126 ext2_clear_bit(file_page_offset(j), paddr); 1127 kunmap_atomic(paddr, KM_USER0); 1128 } 1129 } 1130 spin_unlock_irqrestore(&bitmap->lock, flags); 1131 } 1132 1133 /* now sync the final page */ 1134 if (lastpage != NULL) { 1135 spin_lock_irqsave(&bitmap->lock, flags); 1136 if (test_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE)) { 1137 clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); 1138 spin_unlock_irqrestore(&bitmap->lock, flags); 1139 write_page(bitmap, lastpage, 0); 1140 } else { 1141 set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); 1142 spin_unlock_irqrestore(&bitmap->lock, flags); 1143 } 1144 } 1145 1146 done: 1147 if (bitmap->allclean == 0) 1148 bitmap->mddev->thread->timeout = bitmap->daemon_sleep * HZ; 1149} 1150 1151static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, 1152 sector_t offset, int *blocks, 1153 int create) 1154{ 1155 /* If 'create', we might release the lock and reclaim it. 1156 * The lock must have been taken with interrupts enabled. 1157 * If !create, we don't release the lock. 1158 */ 1159 sector_t chunk = offset >> CHUNK_BLOCK_SHIFT(bitmap); 1160 unsigned long page = chunk >> PAGE_COUNTER_SHIFT; 1161 unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT; 1162 sector_t csize; 1163 1164 if (bitmap_checkpage(bitmap, page, create) < 0) { 1165 csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap)); 1166 *blocks = csize - (offset & (csize- 1)); 1167 return NULL; 1168 } 1169 /* now locked ... */ 1170 1171 if (bitmap->bp[page].hijacked) { /* hijacked pointer */ 1172 /* should we use the first or second counter field 1173 * of the hijacked pointer? */ 1174 int hi = (pageoff > PAGE_COUNTER_MASK); 1175 csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap) + 1176 PAGE_COUNTER_SHIFT - 1); 1177 *blocks = csize - (offset & (csize- 1)); 1178 return &((bitmap_counter_t *) 1179 &bitmap->bp[page].map)[hi]; 1180 } else { /* page is allocated */ 1181 csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap)); 1182 *blocks = csize - (offset & (csize- 1)); 1183 return (bitmap_counter_t *) 1184 &(bitmap->bp[page].map[pageoff]); 1185 } 1186} 1187 1188int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, int behind) 1189{ 1190 if (!bitmap) return 0; 1191 1192 if (behind) { 1193 atomic_inc(&bitmap->behind_writes); 1194 PRINTK(KERN_DEBUG "inc write-behind count %d/%d\n", 1195 atomic_read(&bitmap->behind_writes), bitmap->max_write_behind); 1196 } 1197 1198 while (sectors) { 1199 int blocks; 1200 bitmap_counter_t *bmc; 1201 1202 spin_lock_irq(&bitmap->lock); 1203 bmc = bitmap_get_counter(bitmap, offset, &blocks, 1); 1204 if (!bmc) { 1205 spin_unlock_irq(&bitmap->lock); 1206 return 0; 1207 } 1208 1209 if (unlikely((*bmc & COUNTER_MAX) == COUNTER_MAX)) { 1210 DEFINE_WAIT(__wait); 1211 /* note that it is safe to do the prepare_to_wait 1212 * after the test as long as we do it before dropping 1213 * the spinlock. 1214 */ 1215 prepare_to_wait(&bitmap->overflow_wait, &__wait, 1216 TASK_UNINTERRUPTIBLE); 1217 spin_unlock_irq(&bitmap->lock); 1218 blk_unplug(bitmap->mddev->queue); 1219 schedule(); 1220 finish_wait(&bitmap->overflow_wait, &__wait); 1221 continue; 1222 } 1223 1224 switch(*bmc) { 1225 case 0: 1226 bitmap_file_set_bit(bitmap, offset); 1227 bitmap_count_page(bitmap,offset, 1); 1228 blk_plug_device(bitmap->mddev->queue); 1229 /* fall through */ 1230 case 1: 1231 *bmc = 2; 1232 } 1233 1234 (*bmc)++; 1235 1236 spin_unlock_irq(&bitmap->lock); 1237 1238 offset += blocks; 1239 if (sectors > blocks) 1240 sectors -= blocks; 1241 else sectors = 0; 1242 } 1243 bitmap->allclean = 0; 1244 return 0; 1245} 1246 1247void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, 1248 int success, int behind) 1249{ 1250 if (!bitmap) return; 1251 if (behind) { 1252 atomic_dec(&bitmap->behind_writes); 1253 PRINTK(KERN_DEBUG "dec write-behind count %d/%d\n", 1254 atomic_read(&bitmap->behind_writes), bitmap->max_write_behind); 1255 } 1256 1257 while (sectors) { 1258 int blocks; 1259 unsigned long flags; 1260 bitmap_counter_t *bmc; 1261 1262 spin_lock_irqsave(&bitmap->lock, flags); 1263 bmc = bitmap_get_counter(bitmap, offset, &blocks, 0); 1264 if (!bmc) { 1265 spin_unlock_irqrestore(&bitmap->lock, flags); 1266 return; 1267 } 1268 1269 if (!success && ! (*bmc & NEEDED_MASK)) 1270 *bmc |= NEEDED_MASK; 1271 1272 if ((*bmc & COUNTER_MAX) == COUNTER_MAX) 1273 wake_up(&bitmap->overflow_wait); 1274 1275 (*bmc)--; 1276 if (*bmc <= 2) { 1277 set_page_attr(bitmap, 1278 filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)), 1279 BITMAP_PAGE_CLEAN); 1280 } 1281 spin_unlock_irqrestore(&bitmap->lock, flags); 1282 offset += blocks; 1283 if (sectors > blocks) 1284 sectors -= blocks; 1285 else sectors = 0; 1286 } 1287} 1288 1289int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks, 1290 int degraded) 1291{ 1292 bitmap_counter_t *bmc; 1293 int rv; 1294 if (bitmap == NULL) {/* FIXME or bitmap set as 'failed' */ 1295 *blocks = 1024; 1296 return 1; /* always resync if no bitmap */ 1297 } 1298 spin_lock_irq(&bitmap->lock); 1299 bmc = bitmap_get_counter(bitmap, offset, blocks, 0); 1300 rv = 0; 1301 if (bmc) { 1302 /* locked */ 1303 if (RESYNC(*bmc)) 1304 rv = 1; 1305 else if (NEEDED(*bmc)) { 1306 rv = 1; 1307 if (!degraded) { /* don't set/clear bits if degraded */ 1308 *bmc |= RESYNC_MASK; 1309 *bmc &= ~NEEDED_MASK; 1310 } 1311 } 1312 } 1313 spin_unlock_irq(&bitmap->lock); 1314 bitmap->allclean = 0; 1315 return rv; 1316} 1317 1318void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted) 1319{ 1320 bitmap_counter_t *bmc; 1321 unsigned long flags; 1322/* 1323 if (offset == 0) printk("bitmap_end_sync 0 (%d)\n", aborted); 1324*/ if (bitmap == NULL) { 1325 *blocks = 1024; 1326 return; 1327 } 1328 spin_lock_irqsave(&bitmap->lock, flags); 1329 bmc = bitmap_get_counter(bitmap, offset, blocks, 0); 1330 if (bmc == NULL) 1331 goto unlock; 1332 /* locked */ 1333/* 1334 if (offset == 0) printk("bitmap_end sync found 0x%x, blocks %d\n", *bmc, *blocks); 1335*/ 1336 if (RESYNC(*bmc)) { 1337 *bmc &= ~RESYNC_MASK; 1338 1339 if (!NEEDED(*bmc) && aborted) 1340 *bmc |= NEEDED_MASK; 1341 else { 1342 if (*bmc <= 2) { 1343 set_page_attr(bitmap, 1344 filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)), 1345 BITMAP_PAGE_CLEAN); 1346 } 1347 } 1348 } 1349 unlock: 1350 spin_unlock_irqrestore(&bitmap->lock, flags); 1351 bitmap->allclean = 0; 1352} 1353 1354void bitmap_close_sync(struct bitmap *bitmap) 1355{ 1356 /* Sync has finished, and any bitmap chunks that weren't synced 1357 * properly have been aborted. It remains to us to clear the 1358 * RESYNC bit wherever it is still on 1359 */ 1360 sector_t sector = 0; 1361 int blocks; 1362 if (!bitmap) 1363 return; 1364 while (sector < bitmap->mddev->resync_max_sectors) { 1365 bitmap_end_sync(bitmap, sector, &blocks, 0); 1366 sector += blocks; 1367 } 1368} 1369 1370void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector) 1371{ 1372 sector_t s = 0; 1373 int blocks; 1374 1375 if (!bitmap) 1376 return; 1377 if (sector == 0) { 1378 bitmap->last_end_sync = jiffies; 1379 return; 1380 } 1381 if (time_before(jiffies, (bitmap->last_end_sync 1382 + bitmap->daemon_sleep * HZ))) 1383 return; 1384 wait_event(bitmap->mddev->recovery_wait, 1385 atomic_read(&bitmap->mddev->recovery_active) == 0); 1386 1387 sector &= ~((1ULL << CHUNK_BLOCK_SHIFT(bitmap)) - 1); 1388 s = 0; 1389 while (s < sector && s < bitmap->mddev->resync_max_sectors) { 1390 bitmap_end_sync(bitmap, s, &blocks, 0); 1391 s += blocks; 1392 } 1393 bitmap->last_end_sync = jiffies; 1394} 1395 1396static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed) 1397{ 1398 /* For each chunk covered by any of these sectors, set the 1399 * counter to 1 and set resync_needed. They should all 1400 * be 0 at this point 1401 */ 1402 1403 int secs; 1404 bitmap_counter_t *bmc; 1405 spin_lock_irq(&bitmap->lock); 1406 bmc = bitmap_get_counter(bitmap, offset, &secs, 1); 1407 if (!bmc) { 1408 spin_unlock_irq(&bitmap->lock); 1409 return; 1410 } 1411 if (! *bmc) { 1412 struct page *page; 1413 *bmc = 1 | (needed?NEEDED_MASK:0); 1414 bitmap_count_page(bitmap, offset, 1); 1415 page = filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)); 1416 set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); 1417 } 1418 spin_unlock_irq(&bitmap->lock); 1419 bitmap->allclean = 0; 1420} 1421 1422/* dirty the memory and file bits for bitmap chunks "s" to "e" */ 1423void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e) 1424{ 1425 unsigned long chunk; 1426 1427 for (chunk = s; chunk <= e; chunk++) { 1428 sector_t sec = chunk << CHUNK_BLOCK_SHIFT(bitmap); 1429 bitmap_set_memory_bits(bitmap, sec, 1); 1430 bitmap_file_set_bit(bitmap, sec); 1431 } 1432} 1433 1434/* 1435 * flush out any pending updates 1436 */ 1437void bitmap_flush(mddev_t *mddev) 1438{ 1439 struct bitmap *bitmap = mddev->bitmap; 1440 int sleep; 1441 1442 if (!bitmap) /* there was no bitmap */ 1443 return; 1444 1445 /* run the daemon_work three time to ensure everything is flushed 1446 * that can be 1447 */ 1448 sleep = bitmap->daemon_sleep; 1449 bitmap->daemon_sleep = 0; 1450 bitmap_daemon_work(bitmap); 1451 bitmap_daemon_work(bitmap); 1452 bitmap_daemon_work(bitmap); 1453 bitmap->daemon_sleep = sleep; 1454 bitmap_update_sb(bitmap); 1455} 1456 1457/* 1458 * free memory that was allocated 1459 */ 1460static void bitmap_free(struct bitmap *bitmap) 1461{ 1462 unsigned long k, pages; 1463 struct bitmap_page *bp; 1464 1465 if (!bitmap) /* there was no bitmap */ 1466 return; 1467 1468 /* release the bitmap file and kill the daemon */ 1469 bitmap_file_put(bitmap); 1470 1471 bp = bitmap->bp; 1472 pages = bitmap->pages; 1473 1474 /* free all allocated memory */ 1475 1476 if (bp) /* deallocate the page memory */ 1477 for (k = 0; k < pages; k++) 1478 if (bp[k].map && !bp[k].hijacked) 1479 kfree(bp[k].map); 1480 kfree(bp); 1481 kfree(bitmap); 1482} 1483void bitmap_destroy(mddev_t *mddev) 1484{ 1485 struct bitmap *bitmap = mddev->bitmap; 1486 1487 if (!bitmap) /* there was no bitmap */ 1488 return; 1489 1490 mddev->bitmap = NULL; /* disconnect from the md device */ 1491 if (mddev->thread) 1492 mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; 1493 1494 bitmap_free(bitmap); 1495} 1496 1497/* 1498 * initialize the bitmap structure 1499 * if this returns an error, bitmap_destroy must be called to do clean up 1500 */ 1501int bitmap_create(mddev_t *mddev) 1502{ 1503 struct bitmap *bitmap; 1504 unsigned long blocks = mddev->resync_max_sectors; 1505 unsigned long chunks; 1506 unsigned long pages; 1507 struct file *file = mddev->bitmap_file; 1508 int err; 1509 sector_t start; 1510 1511 BUILD_BUG_ON(sizeof(bitmap_super_t) != 256); 1512 1513 if (!file && !mddev->bitmap_offset) /* bitmap disabled, nothing to do */ 1514 return 0; 1515 1516 BUG_ON(file && mddev->bitmap_offset); 1517 1518 bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL); 1519 if (!bitmap) 1520 return -ENOMEM; 1521 1522 spin_lock_init(&bitmap->lock); 1523 atomic_set(&bitmap->pending_writes, 0); 1524 init_waitqueue_head(&bitmap->write_wait); 1525 init_waitqueue_head(&bitmap->overflow_wait); 1526 1527 bitmap->mddev = mddev; 1528 1529 bitmap->file = file; 1530 bitmap->offset = mddev->bitmap_offset; 1531 if (file) { 1532 get_file(file); 1533 do_sync_mapping_range(file->f_mapping, 0, LLONG_MAX, 1534 SYNC_FILE_RANGE_WAIT_BEFORE | 1535 SYNC_FILE_RANGE_WRITE | 1536 SYNC_FILE_RANGE_WAIT_AFTER); 1537 } 1538 /* read superblock from bitmap file (this sets bitmap->chunksize) */ 1539 err = bitmap_read_sb(bitmap); 1540 if (err) 1541 goto error; 1542 1543 bitmap->chunkshift = ffz(~bitmap->chunksize); 1544 1545 /* now that chunksize and chunkshift are set, we can use these macros */ 1546 chunks = (blocks + CHUNK_BLOCK_RATIO(bitmap) - 1) / 1547 CHUNK_BLOCK_RATIO(bitmap); 1548 pages = (chunks + PAGE_COUNTER_RATIO - 1) / PAGE_COUNTER_RATIO; 1549 1550 BUG_ON(!pages); 1551 1552 bitmap->chunks = chunks; 1553 bitmap->pages = pages; 1554 bitmap->missing_pages = pages; 1555 bitmap->counter_bits = COUNTER_BITS; 1556 1557 bitmap->syncchunk = ~0UL; 1558 1559#ifdef INJECT_FATAL_FAULT_1 1560 bitmap->bp = NULL; 1561#else 1562 bitmap->bp = kzalloc(pages * sizeof(*bitmap->bp), GFP_KERNEL); 1563#endif 1564 err = -ENOMEM; 1565 if (!bitmap->bp) 1566 goto error; 1567 1568 /* now that we have some pages available, initialize the in-memory 1569 * bitmap from the on-disk bitmap */ 1570 start = 0; 1571 if (mddev->degraded == 0 1572 || bitmap->events_cleared == mddev->events) 1573 /* no need to keep dirty bits to optimise a re-add of a missing device */ 1574 start = mddev->recovery_cp; 1575 err = bitmap_init_from_disk(bitmap, start); 1576 1577 if (err) 1578 goto error; 1579 1580 printk(KERN_INFO "created bitmap (%lu pages) for device %s\n", 1581 pages, bmname(bitmap)); 1582 1583 mddev->bitmap = bitmap; 1584 1585 mddev->thread->timeout = bitmap->daemon_sleep * HZ; 1586 1587 bitmap_update_sb(bitmap); 1588 1589 return (bitmap->flags & BITMAP_WRITE_ERROR) ? -EIO : 0; 1590 1591 error: 1592 bitmap_free(bitmap); 1593 return err; 1594} 1595 1596/* the bitmap API -- for raid personalities */ 1597EXPORT_SYMBOL(bitmap_startwrite); 1598EXPORT_SYMBOL(bitmap_endwrite); 1599EXPORT_SYMBOL(bitmap_start_sync); 1600EXPORT_SYMBOL(bitmap_end_sync); 1601EXPORT_SYMBOL(bitmap_unplug); 1602EXPORT_SYMBOL(bitmap_close_sync); 1603EXPORT_SYMBOL(bitmap_cond_end_sync);