1/* 2 * bitmap.c two-level bitmap (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003 3 * 4 * bitmap_create - sets up the bitmap structure 5 * bitmap_destroy - destroys the bitmap structure 6 * 7 * additions, Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.: 8 * - added disk storage for bitmap 9 * - changes to allow various bitmap chunk sizes 10 */ 11 12/* 13 * Still to do: 14 * 15 * flush after percent set rather than just time based. (maybe both). 16 * wait if count gets too high, wake when it drops to half. 17 */ 18 19#include <linux/module.h> 20#include <linux/errno.h> 21#include <linux/slab.h> 22#include <linux/init.h> 23#include <linux/timer.h> 24#include <linux/sched.h> 25#include <linux/list.h> 26#include <linux/file.h> 27#include <linux/mount.h> 28#include <linux/buffer_head.h> 29#include <linux/raid/md.h> 30#include <linux/raid/bitmap.h> 31 32/* debug macros */ 33 34#define DEBUG 0 35 36#if DEBUG 37/* these are for debugging purposes only! */ 38 39/* define one and only one of these */ 40#define INJECT_FAULTS_1 0 /* cause bitmap_alloc_page to fail always */ 41#define INJECT_FAULTS_2 0 /* cause bitmap file to be kicked when first bit set*/ 42#define INJECT_FAULTS_3 0 /* treat bitmap file as kicked at init time */ 43#define INJECT_FAULTS_4 0 /* undef */ 44#define INJECT_FAULTS_5 0 /* undef */ 45#define INJECT_FAULTS_6 0 46 47/* if these are defined, the driver will fail! debug only */ 48#define INJECT_FATAL_FAULT_1 0 /* fail kmalloc, causing bitmap_create to fail */ 49#define INJECT_FATAL_FAULT_2 0 /* undef */ 50#define INJECT_FATAL_FAULT_3 0 /* undef */ 51#endif 52 53//#define DPRINTK PRINTK /* set this NULL to avoid verbose debug output */ 54#define DPRINTK(x...) do { } while(0) 55 56#ifndef PRINTK 57# if DEBUG > 0 58# define PRINTK(x...) printk(KERN_DEBUG x) 59# else 60# define PRINTK(x...) 61# endif 62#endif 63 64static inline char * bmname(struct bitmap *bitmap) 65{ 66 return bitmap->mddev ? mdname(bitmap->mddev) : "mdX"; 67} 68 69 70/* 71 * just a placeholder - calls kmalloc for bitmap pages 72 */ 73static unsigned char *bitmap_alloc_page(struct bitmap *bitmap) 74{ 75 unsigned char *page; 76 77#ifdef INJECT_FAULTS_1 78 page = NULL; 79#else 80 page = kmalloc(PAGE_SIZE, GFP_NOIO); 81#endif 82 if (!page) 83 printk("%s: bitmap_alloc_page FAILED\n", bmname(bitmap)); 84 else 85 PRINTK("%s: bitmap_alloc_page: allocated page at %p\n", 86 bmname(bitmap), page); 87 return page; 88} 89 90/* 91 * for now just a placeholder -- just calls kfree for bitmap pages 92 */ 93static void bitmap_free_page(struct bitmap *bitmap, unsigned char *page) 94{ 95 PRINTK("%s: bitmap_free_page: free page %p\n", bmname(bitmap), page); 96 kfree(page); 97} 98 99/* 100 * check a page and, if necessary, allocate it (or hijack it if the alloc fails) 101 * 102 * 1) check to see if this page is allocated, if it's not then try to alloc 103 * 2) if the alloc fails, set the page's hijacked flag so we'll use the 104 * page pointer directly as a counter 105 * 106 * if we find our page, we increment the page's refcount so that it stays 107 * allocated while we're using it 108 */ 109static int bitmap_checkpage(struct bitmap *bitmap, unsigned long page, int create) 110{ 111 unsigned char *mappage; 112 113 if (page >= bitmap->pages) { 114 printk(KERN_ALERT 115 "%s: invalid bitmap page request: %lu (> %lu)\n", 116 bmname(bitmap), page, bitmap->pages-1); 117 return -EINVAL; 118 } 119 120 121 if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */ 122 return 0; 123 124 if (bitmap->bp[page].map) /* page is already allocated, just return */ 125 return 0; 126 127 if (!create) 128 return -ENOENT; 129 130 spin_unlock_irq(&bitmap->lock); 131 132 /* this page has not been allocated yet */ 133 134 if ((mappage = bitmap_alloc_page(bitmap)) == NULL) { 135 PRINTK("%s: bitmap map page allocation failed, hijacking\n", 136 bmname(bitmap)); 137 /* failed - set the hijacked flag so that we can use the 138 * pointer as a counter */ 139 spin_lock_irq(&bitmap->lock); 140 if (!bitmap->bp[page].map) 141 bitmap->bp[page].hijacked = 1; 142 goto out; 143 } 144 145 /* got a page */ 146 147 spin_lock_irq(&bitmap->lock); 148 149 /* recheck the page */ 150 151 if (bitmap->bp[page].map || bitmap->bp[page].hijacked) { 152 /* somebody beat us to getting the page */ 153 bitmap_free_page(bitmap, mappage); 154 return 0; 155 } 156 157 /* no page was in place and we have one, so install it */ 158 159 memset(mappage, 0, PAGE_SIZE); 160 bitmap->bp[page].map = mappage; 161 bitmap->missing_pages--; 162out: 163 return 0; 164} 165 166 167/* if page is completely empty, put it back on the free list, or dealloc it */ 168/* if page was hijacked, unmark the flag so it might get alloced next time */ 169/* Note: lock should be held when calling this */ 170static void bitmap_checkfree(struct bitmap *bitmap, unsigned long page) 171{ 172 char *ptr; 173 174 if (bitmap->bp[page].count) /* page is still busy */ 175 return; 176 177 /* page is no longer in use, it can be released */ 178 179 if (bitmap->bp[page].hijacked) { /* page was hijacked, undo this now */ 180 bitmap->bp[page].hijacked = 0; 181 bitmap->bp[page].map = NULL; 182 return; 183 } 184 185 /* normal case, free the page */ 186 187#if 0 188/* actually ... let's not. We will probably need the page again exactly when 189 * memory is tight and we are flusing to disk 190 */ 191 return; 192#else 193 ptr = bitmap->bp[page].map; 194 bitmap->bp[page].map = NULL; 195 bitmap->missing_pages++; 196 bitmap_free_page(bitmap, ptr); 197 return; 198#endif 199} 200 201 202/* 203 * bitmap file handling - read and write the bitmap file and its superblock 204 */ 205 206/* copy the pathname of a file to a buffer */ 207char *file_path(struct file *file, char *buf, int count) 208{ 209 if (!buf) 210 return NULL; 211 212 buf = d_path(&file->f_path, buf, count); 213 214 return IS_ERR(buf) ? NULL : buf; 215} 216 217/* 218 * basic page I/O operations 219 */ 220 221/* IO operations when bitmap is stored near all superblocks */ 222static struct page *read_sb_page(mddev_t *mddev, long offset, unsigned long index) 223{ 224 /* choose a good rdev and read the page from there */ 225 226 mdk_rdev_t *rdev; 227 struct list_head *tmp; 228 struct page *page = alloc_page(GFP_KERNEL); 229 sector_t target; 230 231 if (!page) 232 return ERR_PTR(-ENOMEM); 233 234 rdev_for_each(rdev, tmp, mddev) { 235 if (! test_bit(In_sync, &rdev->flags) 236 || test_bit(Faulty, &rdev->flags)) 237 continue; 238 239 target = (rdev->sb_offset << 1) + offset + index * (PAGE_SIZE/512); 240 241 if (sync_page_io(rdev->bdev, target, PAGE_SIZE, page, READ)) { 242 page->index = index; 243 attach_page_buffers(page, NULL); /* so that free_buffer will 244 * quietly no-op */ 245 return page; 246 } 247 } 248 return ERR_PTR(-EIO); 249 250} 251 252static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) 253{ 254 mdk_rdev_t *rdev; 255 struct list_head *tmp; 256 mddev_t *mddev = bitmap->mddev; 257 258 rdev_for_each(rdev, tmp, mddev) 259 if (test_bit(In_sync, &rdev->flags) 260 && !test_bit(Faulty, &rdev->flags)) { 261 int size = PAGE_SIZE; 262 if (page->index == bitmap->file_pages-1) 263 size = roundup(bitmap->last_page_size, 264 bdev_hardsect_size(rdev->bdev)); 265 /* Just make sure we aren't corrupting data or 266 * metadata 267 */ 268 if (bitmap->offset < 0) { 269 /* DATA BITMAP METADATA */ 270 if (bitmap->offset 271 + (long)(page->index * (PAGE_SIZE/512)) 272 + size/512 > 0) 273 /* bitmap runs in to metadata */ 274 return -EINVAL; 275 if (rdev->data_offset + mddev->size*2 276 > rdev->sb_offset*2 + bitmap->offset) 277 /* data runs in to bitmap */ 278 return -EINVAL; 279 } else if (rdev->sb_offset*2 < rdev->data_offset) { 280 /* METADATA BITMAP DATA */ 281 if (rdev->sb_offset*2 282 + bitmap->offset 283 + page->index*(PAGE_SIZE/512) + size/512 284 > rdev->data_offset) 285 /* bitmap runs in to data */ 286 return -EINVAL; 287 } else { 288 /* DATA METADATA BITMAP - no problems */ 289 } 290 md_super_write(mddev, rdev, 291 (rdev->sb_offset<<1) + bitmap->offset 292 + page->index * (PAGE_SIZE/512), 293 size, 294 page); 295 } 296 297 if (wait) 298 md_super_wait(mddev); 299 return 0; 300} 301 302static void bitmap_file_kick(struct bitmap *bitmap); 303/* 304 * write out a page to a file 305 */ 306static void write_page(struct bitmap *bitmap, struct page *page, int wait) 307{ 308 struct buffer_head *bh; 309 310 if (bitmap->file == NULL) { 311 switch (write_sb_page(bitmap, page, wait)) { 312 case -EINVAL: 313 bitmap->flags |= BITMAP_WRITE_ERROR; 314 } 315 } else { 316 317 bh = page_buffers(page); 318 319 while (bh && bh->b_blocknr) { 320 atomic_inc(&bitmap->pending_writes); 321 set_buffer_locked(bh); 322 set_buffer_mapped(bh); 323 submit_bh(WRITE, bh); 324 bh = bh->b_this_page; 325 } 326 327 if (wait) { 328 wait_event(bitmap->write_wait, 329 atomic_read(&bitmap->pending_writes)==0); 330 } 331 } 332 if (bitmap->flags & BITMAP_WRITE_ERROR) 333 bitmap_file_kick(bitmap); 334} 335 336static void end_bitmap_write(struct buffer_head *bh, int uptodate) 337{ 338 struct bitmap *bitmap = bh->b_private; 339 unsigned long flags; 340 341 if (!uptodate) { 342 spin_lock_irqsave(&bitmap->lock, flags); 343 bitmap->flags |= BITMAP_WRITE_ERROR; 344 spin_unlock_irqrestore(&bitmap->lock, flags); 345 } 346 if (atomic_dec_and_test(&bitmap->pending_writes)) 347 wake_up(&bitmap->write_wait); 348} 349 350/* copied from buffer.c */ 351static void 352__clear_page_buffers(struct page *page) 353{ 354 ClearPagePrivate(page); 355 set_page_private(page, 0); 356 page_cache_release(page); 357} 358static void free_buffers(struct page *page) 359{ 360 struct buffer_head *bh = page_buffers(page); 361 362 while (bh) { 363 struct buffer_head *next = bh->b_this_page; 364 free_buffer_head(bh); 365 bh = next; 366 } 367 __clear_page_buffers(page); 368 put_page(page); 369} 370 371/* read a page from a file. 372 * We both read the page, and attach buffers to the page to record the 373 * address of each block (using bmap). These addresses will be used 374 * to write the block later, completely bypassing the filesystem. 375 * This usage is similar to how swap files are handled, and allows us 376 * to write to a file with no concerns of memory allocation failing. 377 */ 378static struct page *read_page(struct file *file, unsigned long index, 379 struct bitmap *bitmap, 380 unsigned long count) 381{ 382 struct page *page = NULL; 383 struct inode *inode = file->f_path.dentry->d_inode; 384 struct buffer_head *bh; 385 sector_t block; 386 387 PRINTK("read bitmap file (%dB @ %Lu)\n", (int)PAGE_SIZE, 388 (unsigned long long)index << PAGE_SHIFT); 389 390 page = alloc_page(GFP_KERNEL); 391 if (!page) 392 page = ERR_PTR(-ENOMEM); 393 if (IS_ERR(page)) 394 goto out; 395 396 bh = alloc_page_buffers(page, 1<<inode->i_blkbits, 0); 397 if (!bh) { 398 put_page(page); 399 page = ERR_PTR(-ENOMEM); 400 goto out; 401 } 402 attach_page_buffers(page, bh); 403 block = index << (PAGE_SHIFT - inode->i_blkbits); 404 while (bh) { 405 if (count == 0) 406 bh->b_blocknr = 0; 407 else { 408 bh->b_blocknr = bmap(inode, block); 409 if (bh->b_blocknr == 0) { 410 /* Cannot use this file! */ 411 free_buffers(page); 412 page = ERR_PTR(-EINVAL); 413 goto out; 414 } 415 bh->b_bdev = inode->i_sb->s_bdev; 416 if (count < (1<<inode->i_blkbits)) 417 count = 0; 418 else 419 count -= (1<<inode->i_blkbits); 420 421 bh->b_end_io = end_bitmap_write; 422 bh->b_private = bitmap; 423 atomic_inc(&bitmap->pending_writes); 424 set_buffer_locked(bh); 425 set_buffer_mapped(bh); 426 submit_bh(READ, bh); 427 } 428 block++; 429 bh = bh->b_this_page; 430 } 431 page->index = index; 432 433 wait_event(bitmap->write_wait, 434 atomic_read(&bitmap->pending_writes)==0); 435 if (bitmap->flags & BITMAP_WRITE_ERROR) { 436 free_buffers(page); 437 page = ERR_PTR(-EIO); 438 } 439out: 440 if (IS_ERR(page)) 441 printk(KERN_ALERT "md: bitmap read error: (%dB @ %Lu): %ld\n", 442 (int)PAGE_SIZE, 443 (unsigned long long)index << PAGE_SHIFT, 444 PTR_ERR(page)); 445 return page; 446} 447 448/* 449 * bitmap file superblock operations 450 */ 451 452/* update the event counter and sync the superblock to disk */ 453void bitmap_update_sb(struct bitmap *bitmap) 454{ 455 bitmap_super_t *sb; 456 unsigned long flags; 457 458 if (!bitmap || !bitmap->mddev) /* no bitmap for this array */ 459 return; 460 spin_lock_irqsave(&bitmap->lock, flags); 461 if (!bitmap->sb_page) { /* no superblock */ 462 spin_unlock_irqrestore(&bitmap->lock, flags); 463 return; 464 } 465 spin_unlock_irqrestore(&bitmap->lock, flags); 466 sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0); 467 sb->events = cpu_to_le64(bitmap->mddev->events); 468 if (!bitmap->mddev->degraded) 469 sb->events_cleared = cpu_to_le64(bitmap->mddev->events); 470 kunmap_atomic(sb, KM_USER0); 471 write_page(bitmap, bitmap->sb_page, 1); 472} 473 474/* print out the bitmap file superblock */ 475void bitmap_print_sb(struct bitmap *bitmap) 476{ 477 bitmap_super_t *sb; 478 479 if (!bitmap || !bitmap->sb_page) 480 return; 481 sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0); 482 printk(KERN_DEBUG "%s: bitmap file superblock:\n", bmname(bitmap)); 483 printk(KERN_DEBUG " magic: %08x\n", le32_to_cpu(sb->magic)); 484 printk(KERN_DEBUG " version: %d\n", le32_to_cpu(sb->version)); 485 printk(KERN_DEBUG " uuid: %08x.%08x.%08x.%08x\n", 486 *(__u32 *)(sb->uuid+0), 487 *(__u32 *)(sb->uuid+4), 488 *(__u32 *)(sb->uuid+8), 489 *(__u32 *)(sb->uuid+12)); 490 printk(KERN_DEBUG " events: %llu\n", 491 (unsigned long long) le64_to_cpu(sb->events)); 492 printk(KERN_DEBUG "events cleared: %llu\n", 493 (unsigned long long) le64_to_cpu(sb->events_cleared)); 494 printk(KERN_DEBUG " state: %08x\n", le32_to_cpu(sb->state)); 495 printk(KERN_DEBUG " chunksize: %d B\n", le32_to_cpu(sb->chunksize)); 496 printk(KERN_DEBUG " daemon sleep: %ds\n", le32_to_cpu(sb->daemon_sleep)); 497 printk(KERN_DEBUG " sync size: %llu KB\n", 498 (unsigned long long)le64_to_cpu(sb->sync_size)/2); 499 printk(KERN_DEBUG "max write behind: %d\n", le32_to_cpu(sb->write_behind)); 500 kunmap_atomic(sb, KM_USER0); 501} 502 503/* read the superblock from the bitmap file and initialize some bitmap fields */ 504static int bitmap_read_sb(struct bitmap *bitmap) 505{ 506 char *reason = NULL; 507 bitmap_super_t *sb; 508 unsigned long chunksize, daemon_sleep, write_behind; 509 unsigned long long events; 510 int err = -EINVAL; 511 512 /* page 0 is the superblock, read it... */ 513 if (bitmap->file) { 514 loff_t isize = i_size_read(bitmap->file->f_mapping->host); 515 int bytes = isize > PAGE_SIZE ? PAGE_SIZE : isize; 516 517 bitmap->sb_page = read_page(bitmap->file, 0, bitmap, bytes); 518 } else { 519 bitmap->sb_page = read_sb_page(bitmap->mddev, bitmap->offset, 0); 520 } 521 if (IS_ERR(bitmap->sb_page)) { 522 err = PTR_ERR(bitmap->sb_page); 523 bitmap->sb_page = NULL; 524 return err; 525 } 526 527 sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0); 528 529 chunksize = le32_to_cpu(sb->chunksize); 530 daemon_sleep = le32_to_cpu(sb->daemon_sleep); 531 write_behind = le32_to_cpu(sb->write_behind); 532 533 /* verify that the bitmap-specific fields are valid */ 534 if (sb->magic != cpu_to_le32(BITMAP_MAGIC)) 535 reason = "bad magic"; 536 else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO || 537 le32_to_cpu(sb->version) > BITMAP_MAJOR_HI) 538 reason = "unrecognized superblock version"; 539 else if (chunksize < PAGE_SIZE) 540 reason = "bitmap chunksize too small"; 541 else if ((1 << ffz(~chunksize)) != chunksize) 542 reason = "bitmap chunksize not a power of 2"; 543 else if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT / HZ) 544 reason = "daemon sleep period out of range"; 545 else if (write_behind > COUNTER_MAX) 546 reason = "write-behind limit out of range (0 - 16383)"; 547 if (reason) { 548 printk(KERN_INFO "%s: invalid bitmap file superblock: %s\n", 549 bmname(bitmap), reason); 550 goto out; 551 } 552 553 /* keep the array size field of the bitmap superblock up to date */ 554 sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors); 555 556 if (!bitmap->mddev->persistent) 557 goto success; 558 559 /* 560 * if we have a persistent array superblock, compare the 561 * bitmap's UUID and event counter to the mddev's 562 */ 563 if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) { 564 printk(KERN_INFO "%s: bitmap superblock UUID mismatch\n", 565 bmname(bitmap)); 566 goto out; 567 } 568 events = le64_to_cpu(sb->events); 569 if (events < bitmap->mddev->events) { 570 printk(KERN_INFO "%s: bitmap file is out of date (%llu < %llu) " 571 "-- forcing full recovery\n", bmname(bitmap), events, 572 (unsigned long long) bitmap->mddev->events); 573 sb->state |= cpu_to_le32(BITMAP_STALE); 574 } 575success: 576 /* assign fields using values from superblock */ 577 bitmap->chunksize = chunksize; 578 bitmap->daemon_sleep = daemon_sleep; 579 bitmap->daemon_lastrun = jiffies; 580 bitmap->max_write_behind = write_behind; 581 bitmap->flags |= le32_to_cpu(sb->state); 582 if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN) 583 bitmap->flags |= BITMAP_HOSTENDIAN; 584 bitmap->events_cleared = le64_to_cpu(sb->events_cleared); 585 if (sb->state & cpu_to_le32(BITMAP_STALE)) 586 bitmap->events_cleared = bitmap->mddev->events; 587 err = 0; 588out: 589 kunmap_atomic(sb, KM_USER0); 590 if (err) 591 bitmap_print_sb(bitmap); 592 return err; 593} 594 595enum bitmap_mask_op { 596 MASK_SET, 597 MASK_UNSET 598}; 599 600/* record the state of the bitmap in the superblock. Return the old value */ 601static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits, 602 enum bitmap_mask_op op) 603{ 604 bitmap_super_t *sb; 605 unsigned long flags; 606 int old; 607 608 spin_lock_irqsave(&bitmap->lock, flags); 609 if (!bitmap->sb_page) { /* can't set the state */ 610 spin_unlock_irqrestore(&bitmap->lock, flags); 611 return 0; 612 } 613 spin_unlock_irqrestore(&bitmap->lock, flags); 614 sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0); 615 old = le32_to_cpu(sb->state) & bits; 616 switch (op) { 617 case MASK_SET: sb->state |= cpu_to_le32(bits); 618 break; 619 case MASK_UNSET: sb->state &= cpu_to_le32(~bits); 620 break; 621 default: BUG(); 622 } 623 kunmap_atomic(sb, KM_USER0); 624 return old; 625} 626 627/* 628 * general bitmap file operations 629 */ 630 631/* calculate the index of the page that contains this bit */ 632static inline unsigned long file_page_index(unsigned long chunk) 633{ 634 return CHUNK_BIT_OFFSET(chunk) >> PAGE_BIT_SHIFT; 635} 636 637/* calculate the (bit) offset of this bit within a page */ 638static inline unsigned long file_page_offset(unsigned long chunk) 639{ 640 return CHUNK_BIT_OFFSET(chunk) & (PAGE_BITS - 1); 641} 642 643/* 644 * return a pointer to the page in the filemap that contains the given bit 645 * 646 * this lookup is complicated by the fact that the bitmap sb might be exactly 647 * 1 page (e.g., x86) or less than 1 page -- so the bitmap might start on page 648 * 0 or page 1 649 */ 650static inline struct page *filemap_get_page(struct bitmap *bitmap, 651 unsigned long chunk) 652{ 653 if (file_page_index(chunk) >= bitmap->file_pages) return NULL; 654 return bitmap->filemap[file_page_index(chunk) - file_page_index(0)]; 655} 656 657 658static void bitmap_file_unmap(struct bitmap *bitmap) 659{ 660 struct page **map, *sb_page; 661 unsigned long *attr; 662 int pages; 663 unsigned long flags; 664 665 spin_lock_irqsave(&bitmap->lock, flags); 666 map = bitmap->filemap; 667 bitmap->filemap = NULL; 668 attr = bitmap->filemap_attr; 669 bitmap->filemap_attr = NULL; 670 pages = bitmap->file_pages; 671 bitmap->file_pages = 0; 672 sb_page = bitmap->sb_page; 673 bitmap->sb_page = NULL; 674 spin_unlock_irqrestore(&bitmap->lock, flags); 675 676 while (pages--) 677 if (map[pages]->index != 0) /* 0 is sb_page, release it below */ 678 free_buffers(map[pages]); 679 kfree(map); 680 kfree(attr); 681 682 if (sb_page) 683 free_buffers(sb_page); 684} 685 686static void bitmap_file_put(struct bitmap *bitmap) 687{ 688 struct file *file; 689 unsigned long flags; 690 691 spin_lock_irqsave(&bitmap->lock, flags); 692 file = bitmap->file; 693 bitmap->file = NULL; 694 spin_unlock_irqrestore(&bitmap->lock, flags); 695 696 if (file) 697 wait_event(bitmap->write_wait, 698 atomic_read(&bitmap->pending_writes)==0); 699 bitmap_file_unmap(bitmap); 700 701 if (file) { 702 struct inode *inode = file->f_path.dentry->d_inode; 703 invalidate_mapping_pages(inode->i_mapping, 0, -1); 704 fput(file); 705 } 706} 707 708 709/* 710 * bitmap_file_kick - if an error occurs while manipulating the bitmap file 711 * then it is no longer reliable, so we stop using it and we mark the file 712 * as failed in the superblock 713 */ 714static void bitmap_file_kick(struct bitmap *bitmap) 715{ 716 char *path, *ptr = NULL; 717 718 if (bitmap_mask_state(bitmap, BITMAP_STALE, MASK_SET) == 0) { 719 bitmap_update_sb(bitmap); 720 721 if (bitmap->file) { 722 path = kmalloc(PAGE_SIZE, GFP_KERNEL); 723 if (path) 724 ptr = file_path(bitmap->file, path, PAGE_SIZE); 725 726 printk(KERN_ALERT 727 "%s: kicking failed bitmap file %s from array!\n", 728 bmname(bitmap), ptr ? ptr : ""); 729 730 kfree(path); 731 } else 732 printk(KERN_ALERT 733 "%s: disabling internal bitmap due to errors\n", 734 bmname(bitmap)); 735 } 736 737 bitmap_file_put(bitmap); 738 739 return; 740} 741 742enum bitmap_page_attr { 743 BITMAP_PAGE_DIRTY = 0, // there are set bits that need to be synced 744 BITMAP_PAGE_CLEAN = 1, // there are bits that might need to be cleared 745 BITMAP_PAGE_NEEDWRITE=2, // there are cleared bits that need to be synced 746}; 747 748static inline void set_page_attr(struct bitmap *bitmap, struct page *page, 749 enum bitmap_page_attr attr) 750{ 751 __set_bit((page->index<<2) + attr, bitmap->filemap_attr); 752} 753 754static inline void clear_page_attr(struct bitmap *bitmap, struct page *page, 755 enum bitmap_page_attr attr) 756{ 757 __clear_bit((page->index<<2) + attr, bitmap->filemap_attr); 758} 759 760static inline unsigned long test_page_attr(struct bitmap *bitmap, struct page *page, 761 enum bitmap_page_attr attr) 762{ 763 return test_bit((page->index<<2) + attr, bitmap->filemap_attr); 764} 765 766/* 767 * bitmap_file_set_bit -- called before performing a write to the md device 768 * to set (and eventually sync) a particular bit in the bitmap file 769 * 770 * we set the bit immediately, then we record the page number so that 771 * when an unplug occurs, we can flush the dirty pages out to disk 772 */ 773static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) 774{ 775 unsigned long bit; 776 struct page *page; 777 void *kaddr; 778 unsigned long chunk = block >> CHUNK_BLOCK_SHIFT(bitmap); 779 780 if (!bitmap->filemap) { 781 return; 782 } 783 784 page = filemap_get_page(bitmap, chunk); 785 if (!page) return; 786 bit = file_page_offset(chunk); 787 788 /* set the bit */ 789 kaddr = kmap_atomic(page, KM_USER0); 790 if (bitmap->flags & BITMAP_HOSTENDIAN) 791 set_bit(bit, kaddr); 792 else 793 ext2_set_bit(bit, kaddr); 794 kunmap_atomic(kaddr, KM_USER0); 795 PRINTK("set file bit %lu page %lu\n", bit, page->index); 796 797 /* record page number so it gets flushed to disk when unplug occurs */ 798 set_page_attr(bitmap, page, BITMAP_PAGE_DIRTY); 799 800} 801 802/* this gets called when the md device is ready to unplug its underlying 803 * (slave) device queues -- before we let any writes go down, we need to 804 * sync the dirty pages of the bitmap file to disk */ 805void bitmap_unplug(struct bitmap *bitmap) 806{ 807 unsigned long i, flags; 808 int dirty, need_write; 809 struct page *page; 810 int wait = 0; 811 812 if (!bitmap) 813 return; 814 815 /* look at each page to see if there are any set bits that need to be 816 * flushed out to disk */ 817 for (i = 0; i < bitmap->file_pages; i++) { 818 spin_lock_irqsave(&bitmap->lock, flags); 819 if (!bitmap->filemap) { 820 spin_unlock_irqrestore(&bitmap->lock, flags); 821 return; 822 } 823 page = bitmap->filemap[i]; 824 dirty = test_page_attr(bitmap, page, BITMAP_PAGE_DIRTY); 825 need_write = test_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE); 826 clear_page_attr(bitmap, page, BITMAP_PAGE_DIRTY); 827 clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE); 828 if (dirty) 829 wait = 1; 830 spin_unlock_irqrestore(&bitmap->lock, flags); 831 832 if (dirty | need_write) 833 write_page(bitmap, page, 0); 834 } 835 if (wait) { /* if any writes were performed, we need to wait on them */ 836 if (bitmap->file) 837 wait_event(bitmap->write_wait, 838 atomic_read(&bitmap->pending_writes)==0); 839 else 840 md_super_wait(bitmap->mddev); 841 } 842 if (bitmap->flags & BITMAP_WRITE_ERROR) 843 bitmap_file_kick(bitmap); 844} 845 846static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed); 847/* * bitmap_init_from_disk -- called at bitmap_create time to initialize 848 * the in-memory bitmap from the on-disk bitmap -- also, sets up the 849 * memory mapping of the bitmap file 850 * Special cases: 851 * if there's no bitmap file, or if the bitmap file had been 852 * previously kicked from the array, we mark all the bits as 853 * 1's in order to cause a full resync. 854 * 855 * We ignore all bits for sectors that end earlier than 'start'. 856 * This is used when reading an out-of-date bitmap... 857 */ 858static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) 859{ 860 unsigned long i, chunks, index, oldindex, bit; 861 struct page *page = NULL, *oldpage = NULL; 862 unsigned long num_pages, bit_cnt = 0; 863 struct file *file; 864 unsigned long bytes, offset; 865 int outofdate; 866 int ret = -ENOSPC; 867 void *paddr; 868 869 chunks = bitmap->chunks; 870 file = bitmap->file; 871 872 BUG_ON(!file && !bitmap->offset); 873 874#ifdef INJECT_FAULTS_3 875 outofdate = 1; 876#else 877 outofdate = bitmap->flags & BITMAP_STALE; 878#endif 879 if (outofdate) 880 printk(KERN_INFO "%s: bitmap file is out of date, doing full " 881 "recovery\n", bmname(bitmap)); 882 883 bytes = (chunks + 7) / 8; 884 885 num_pages = (bytes + sizeof(bitmap_super_t) + PAGE_SIZE - 1) / PAGE_SIZE; 886 887 if (file && i_size_read(file->f_mapping->host) < bytes + sizeof(bitmap_super_t)) { 888 printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n", 889 bmname(bitmap), 890 (unsigned long) i_size_read(file->f_mapping->host), 891 bytes + sizeof(bitmap_super_t)); 892 goto err; 893 } 894 895 ret = -ENOMEM; 896 897 bitmap->filemap = kmalloc(sizeof(struct page *) * num_pages, GFP_KERNEL); 898 if (!bitmap->filemap) 899 goto err; 900 901 /* We need 4 bits per page, rounded up to a multiple of sizeof(unsigned long) */ 902 bitmap->filemap_attr = kzalloc( 903 roundup( DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)), 904 GFP_KERNEL); 905 if (!bitmap->filemap_attr) 906 goto err; 907 908 oldindex = ~0L; 909 910 for (i = 0; i < chunks; i++) { 911 int b; 912 index = file_page_index(i); 913 bit = file_page_offset(i); 914 if (index != oldindex) { /* this is a new page, read it in */ 915 int count; 916 /* unmap the old page, we're done with it */ 917 if (index == num_pages-1) 918 count = bytes + sizeof(bitmap_super_t) 919 - index * PAGE_SIZE; 920 else 921 count = PAGE_SIZE; 922 if (index == 0) { 923 /* 924 * if we're here then the superblock page 925 * contains some bits (PAGE_SIZE != sizeof sb) 926 * we've already read it in, so just use it 927 */ 928 page = bitmap->sb_page; 929 offset = sizeof(bitmap_super_t); 930 } else if (file) { 931 page = read_page(file, index, bitmap, count); 932 offset = 0; 933 } else { 934 page = read_sb_page(bitmap->mddev, bitmap->offset, index); 935 offset = 0; 936 } 937 if (IS_ERR(page)) { /* read error */ 938 ret = PTR_ERR(page); 939 goto err; 940 } 941 942 oldindex = index; 943 oldpage = page; 944 945 if (outofdate) { 946 /* 947 * if bitmap is out of date, dirty the 948 * whole page and write it out 949 */ 950 paddr = kmap_atomic(page, KM_USER0); 951 memset(paddr + offset, 0xff, 952 PAGE_SIZE - offset); 953 kunmap_atomic(paddr, KM_USER0); 954 write_page(bitmap, page, 1); 955 956 ret = -EIO; 957 if (bitmap->flags & BITMAP_WRITE_ERROR) { 958 /* release, page not in filemap yet */ 959 put_page(page); 960 goto err; 961 } 962 } 963 964 bitmap->filemap[bitmap->file_pages++] = page; 965 bitmap->last_page_size = count; 966 } 967 paddr = kmap_atomic(page, KM_USER0); 968 if (bitmap->flags & BITMAP_HOSTENDIAN) 969 b = test_bit(bit, paddr); 970 else 971 b = ext2_test_bit(bit, paddr); 972 kunmap_atomic(paddr, KM_USER0); 973 if (b) { 974 /* if the disk bit is set, set the memory bit */ 975 bitmap_set_memory_bits(bitmap, i << CHUNK_BLOCK_SHIFT(bitmap), 976 ((i+1) << (CHUNK_BLOCK_SHIFT(bitmap)) >= start) 977 ); 978 bit_cnt++; 979 set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); 980 } 981 } 982 983 /* everything went OK */ 984 ret = 0; 985 bitmap_mask_state(bitmap, BITMAP_STALE, MASK_UNSET); 986 987 if (bit_cnt) { /* Kick recovery if any bits were set */ 988 set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery); 989 md_wakeup_thread(bitmap->mddev->thread); 990 } 991 992 printk(KERN_INFO "%s: bitmap initialized from disk: " 993 "read %lu/%lu pages, set %lu bits\n", 994 bmname(bitmap), bitmap->file_pages, num_pages, bit_cnt); 995 996 return 0; 997 998 err: 999 printk(KERN_INFO "%s: bitmap initialisation failed: %d\n", 1000 bmname(bitmap), ret); 1001 return ret; 1002} 1003 1004void bitmap_write_all(struct bitmap *bitmap) 1005{ 1006 /* We don't actually write all bitmap blocks here, 1007 * just flag them as needing to be written 1008 */ 1009 int i; 1010 1011 for (i=0; i < bitmap->file_pages; i++) 1012 set_page_attr(bitmap, bitmap->filemap[i], 1013 BITMAP_PAGE_NEEDWRITE); 1014} 1015 1016 1017static void bitmap_count_page(struct bitmap *bitmap, sector_t offset, int inc) 1018{ 1019 sector_t chunk = offset >> CHUNK_BLOCK_SHIFT(bitmap); 1020 unsigned long page = chunk >> PAGE_COUNTER_SHIFT; 1021 bitmap->bp[page].count += inc; 1022/* 1023 if (page == 0) printk("count page 0, offset %llu: %d gives %d\n", 1024 (unsigned long long)offset, inc, bitmap->bp[page].count); 1025*/ 1026 bitmap_checkfree(bitmap, page); 1027} 1028static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, 1029 sector_t offset, int *blocks, 1030 int create); 1031 1032/* 1033 * bitmap daemon -- periodically wakes up to clean bits and flush pages 1034 * out to disk 1035 */ 1036 1037void bitmap_daemon_work(struct bitmap *bitmap) 1038{ 1039 unsigned long j; 1040 unsigned long flags; 1041 struct page *page = NULL, *lastpage = NULL; 1042 int blocks; 1043 void *paddr; 1044 1045 if (bitmap == NULL) 1046 return; 1047 if (time_before(jiffies, bitmap->daemon_lastrun + bitmap->daemon_sleep*HZ)) 1048 return; 1049 bitmap->daemon_lastrun = jiffies; 1050 if (bitmap->allclean) { 1051 bitmap->mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; 1052 return; 1053 } 1054 bitmap->allclean = 1; 1055 1056 for (j = 0; j < bitmap->chunks; j++) { 1057 bitmap_counter_t *bmc; 1058 spin_lock_irqsave(&bitmap->lock, flags); 1059 if (!bitmap->filemap) { 1060 /* error or shutdown */ 1061 spin_unlock_irqrestore(&bitmap->lock, flags); 1062 break; 1063 } 1064 1065 page = filemap_get_page(bitmap, j); 1066 1067 if (page != lastpage) { 1068 /* skip this page unless it's marked as needing cleaning */ 1069 if (!test_page_attr(bitmap, page, BITMAP_PAGE_CLEAN)) { 1070 int need_write = test_page_attr(bitmap, page, 1071 BITMAP_PAGE_NEEDWRITE); 1072 if (need_write) 1073 clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE); 1074 1075 spin_unlock_irqrestore(&bitmap->lock, flags); 1076 if (need_write) { 1077 write_page(bitmap, page, 0); 1078 bitmap->allclean = 0; 1079 } 1080 continue; 1081 } 1082 1083 /* grab the new page, sync and release the old */ 1084 if (lastpage != NULL) { 1085 if (test_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE)) { 1086 clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); 1087 spin_unlock_irqrestore(&bitmap->lock, flags); 1088 write_page(bitmap, lastpage, 0); 1089 } else { 1090 set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); 1091 spin_unlock_irqrestore(&bitmap->lock, flags); 1092 } 1093 } else 1094 spin_unlock_irqrestore(&bitmap->lock, flags); 1095 lastpage = page; 1096/* 1097 printk("bitmap clean at page %lu\n", j); 1098*/ 1099 spin_lock_irqsave(&bitmap->lock, flags); 1100 clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); 1101 } 1102 bmc = bitmap_get_counter(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap), 1103 &blocks, 0); 1104 if (bmc) { 1105/* 1106 if (j < 100) printk("bitmap: j=%lu, *bmc = 0x%x\n", j, *bmc); 1107*/ 1108 if (*bmc) 1109 bitmap->allclean = 0; 1110 1111 if (*bmc == 2) { 1112 *bmc=1; /* maybe clear the bit next time */ 1113 set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); 1114 } else if (*bmc == 1) { 1115 /* we can clear the bit */ 1116 *bmc = 0; 1117 bitmap_count_page(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap), 1118 -1); 1119 1120 /* clear the bit */ 1121 paddr = kmap_atomic(page, KM_USER0); 1122 if (bitmap->flags & BITMAP_HOSTENDIAN) 1123 clear_bit(file_page_offset(j), paddr); 1124 else 1125 ext2_clear_bit(file_page_offset(j), paddr); 1126 kunmap_atomic(paddr, KM_USER0); 1127 } 1128 } 1129 spin_unlock_irqrestore(&bitmap->lock, flags); 1130 } 1131 1132 /* now sync the final page */ 1133 if (lastpage != NULL) { 1134 spin_lock_irqsave(&bitmap->lock, flags); 1135 if (test_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE)) { 1136 clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); 1137 spin_unlock_irqrestore(&bitmap->lock, flags); 1138 write_page(bitmap, lastpage, 0); 1139 } else { 1140 set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); 1141 spin_unlock_irqrestore(&bitmap->lock, flags); 1142 } 1143 } 1144 1145 if (bitmap->allclean == 0) 1146 bitmap->mddev->thread->timeout = bitmap->daemon_sleep * HZ; 1147} 1148 1149static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, 1150 sector_t offset, int *blocks, 1151 int create) 1152{ 1153 /* If 'create', we might release the lock and reclaim it. 1154 * The lock must have been taken with interrupts enabled. 1155 * If !create, we don't release the lock. 1156 */ 1157 sector_t chunk = offset >> CHUNK_BLOCK_SHIFT(bitmap); 1158 unsigned long page = chunk >> PAGE_COUNTER_SHIFT; 1159 unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT; 1160 sector_t csize; 1161 1162 if (bitmap_checkpage(bitmap, page, create) < 0) { 1163 csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap)); 1164 *blocks = csize - (offset & (csize- 1)); 1165 return NULL; 1166 } 1167 /* now locked ... */ 1168 1169 if (bitmap->bp[page].hijacked) { /* hijacked pointer */ 1170 /* should we use the first or second counter field 1171 * of the hijacked pointer? */ 1172 int hi = (pageoff > PAGE_COUNTER_MASK); 1173 csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap) + 1174 PAGE_COUNTER_SHIFT - 1); 1175 *blocks = csize - (offset & (csize- 1)); 1176 return &((bitmap_counter_t *) 1177 &bitmap->bp[page].map)[hi]; 1178 } else { /* page is allocated */ 1179 csize = ((sector_t)1) << (CHUNK_BLOCK_SHIFT(bitmap)); 1180 *blocks = csize - (offset & (csize- 1)); 1181 return (bitmap_counter_t *) 1182 &(bitmap->bp[page].map[pageoff]); 1183 } 1184} 1185 1186int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, int behind) 1187{ 1188 if (!bitmap) return 0; 1189 1190 if (behind) { 1191 atomic_inc(&bitmap->behind_writes); 1192 PRINTK(KERN_DEBUG "inc write-behind count %d/%d\n", 1193 atomic_read(&bitmap->behind_writes), bitmap->max_write_behind); 1194 } 1195 1196 while (sectors) { 1197 int blocks; 1198 bitmap_counter_t *bmc; 1199 1200 spin_lock_irq(&bitmap->lock); 1201 bmc = bitmap_get_counter(bitmap, offset, &blocks, 1); 1202 if (!bmc) { 1203 spin_unlock_irq(&bitmap->lock); 1204 return 0; 1205 } 1206 1207 if (unlikely((*bmc & COUNTER_MAX) == COUNTER_MAX)) { 1208 DEFINE_WAIT(__wait); 1209 /* note that it is safe to do the prepare_to_wait 1210 * after the test as long as we do it before dropping 1211 * the spinlock. 1212 */ 1213 prepare_to_wait(&bitmap->overflow_wait, &__wait, 1214 TASK_UNINTERRUPTIBLE); 1215 spin_unlock_irq(&bitmap->lock); 1216 blk_unplug(bitmap->mddev->queue); 1217 schedule(); 1218 finish_wait(&bitmap->overflow_wait, &__wait); 1219 continue; 1220 } 1221 1222 switch(*bmc) { 1223 case 0: 1224 bitmap_file_set_bit(bitmap, offset); 1225 bitmap_count_page(bitmap,offset, 1); 1226 blk_plug_device(bitmap->mddev->queue); 1227 /* fall through */ 1228 case 1: 1229 *bmc = 2; 1230 } 1231 1232 (*bmc)++; 1233 1234 spin_unlock_irq(&bitmap->lock); 1235 1236 offset += blocks; 1237 if (sectors > blocks) 1238 sectors -= blocks; 1239 else sectors = 0; 1240 } 1241 bitmap->allclean = 0; 1242 return 0; 1243} 1244 1245void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, 1246 int success, int behind) 1247{ 1248 if (!bitmap) return; 1249 if (behind) { 1250 atomic_dec(&bitmap->behind_writes); 1251 PRINTK(KERN_DEBUG "dec write-behind count %d/%d\n", 1252 atomic_read(&bitmap->behind_writes), bitmap->max_write_behind); 1253 } 1254 1255 while (sectors) { 1256 int blocks; 1257 unsigned long flags; 1258 bitmap_counter_t *bmc; 1259 1260 spin_lock_irqsave(&bitmap->lock, flags); 1261 bmc = bitmap_get_counter(bitmap, offset, &blocks, 0); 1262 if (!bmc) { 1263 spin_unlock_irqrestore(&bitmap->lock, flags); 1264 return; 1265 } 1266 1267 if (!success && ! (*bmc & NEEDED_MASK)) 1268 *bmc |= NEEDED_MASK; 1269 1270 if ((*bmc & COUNTER_MAX) == COUNTER_MAX) 1271 wake_up(&bitmap->overflow_wait); 1272 1273 (*bmc)--; 1274 if (*bmc <= 2) { 1275 set_page_attr(bitmap, 1276 filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)), 1277 BITMAP_PAGE_CLEAN); 1278 } 1279 spin_unlock_irqrestore(&bitmap->lock, flags); 1280 offset += blocks; 1281 if (sectors > blocks) 1282 sectors -= blocks; 1283 else sectors = 0; 1284 } 1285} 1286 1287int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks, 1288 int degraded) 1289{ 1290 bitmap_counter_t *bmc; 1291 int rv; 1292 if (bitmap == NULL) {/* FIXME or bitmap set as 'failed' */ 1293 *blocks = 1024; 1294 return 1; /* always resync if no bitmap */ 1295 } 1296 spin_lock_irq(&bitmap->lock); 1297 bmc = bitmap_get_counter(bitmap, offset, blocks, 0); 1298 rv = 0; 1299 if (bmc) { 1300 /* locked */ 1301 if (RESYNC(*bmc)) 1302 rv = 1; 1303 else if (NEEDED(*bmc)) { 1304 rv = 1; 1305 if (!degraded) { /* don't set/clear bits if degraded */ 1306 *bmc |= RESYNC_MASK; 1307 *bmc &= ~NEEDED_MASK; 1308 } 1309 } 1310 } 1311 spin_unlock_irq(&bitmap->lock); 1312 bitmap->allclean = 0; 1313 return rv; 1314} 1315 1316void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted) 1317{ 1318 bitmap_counter_t *bmc; 1319 unsigned long flags; 1320/* 1321 if (offset == 0) printk("bitmap_end_sync 0 (%d)\n", aborted); 1322*/ if (bitmap == NULL) { 1323 *blocks = 1024; 1324 return; 1325 } 1326 spin_lock_irqsave(&bitmap->lock, flags); 1327 bmc = bitmap_get_counter(bitmap, offset, blocks, 0); 1328 if (bmc == NULL) 1329 goto unlock; 1330 /* locked */ 1331/* 1332 if (offset == 0) printk("bitmap_end sync found 0x%x, blocks %d\n", *bmc, *blocks); 1333*/ 1334 if (RESYNC(*bmc)) { 1335 *bmc &= ~RESYNC_MASK; 1336 1337 if (!NEEDED(*bmc) && aborted) 1338 *bmc |= NEEDED_MASK; 1339 else { 1340 if (*bmc <= 2) { 1341 set_page_attr(bitmap, 1342 filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)), 1343 BITMAP_PAGE_CLEAN); 1344 } 1345 } 1346 } 1347 unlock: 1348 spin_unlock_irqrestore(&bitmap->lock, flags); 1349 bitmap->allclean = 0; 1350} 1351 1352void bitmap_close_sync(struct bitmap *bitmap) 1353{ 1354 /* Sync has finished, and any bitmap chunks that weren't synced 1355 * properly have been aborted. It remains to us to clear the 1356 * RESYNC bit wherever it is still on 1357 */ 1358 sector_t sector = 0; 1359 int blocks; 1360 if (!bitmap) 1361 return; 1362 while (sector < bitmap->mddev->resync_max_sectors) { 1363 bitmap_end_sync(bitmap, sector, &blocks, 0); 1364 sector += blocks; 1365 } 1366} 1367 1368void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector) 1369{ 1370 sector_t s = 0; 1371 int blocks; 1372 1373 if (!bitmap) 1374 return; 1375 if (sector == 0) { 1376 bitmap->last_end_sync = jiffies; 1377 return; 1378 } 1379 if (time_before(jiffies, (bitmap->last_end_sync 1380 + bitmap->daemon_sleep * HZ))) 1381 return; 1382 wait_event(bitmap->mddev->recovery_wait, 1383 atomic_read(&bitmap->mddev->recovery_active) == 0); 1384 1385 sector &= ~((1ULL << CHUNK_BLOCK_SHIFT(bitmap)) - 1); 1386 s = 0; 1387 while (s < sector && s < bitmap->mddev->resync_max_sectors) { 1388 bitmap_end_sync(bitmap, s, &blocks, 0); 1389 s += blocks; 1390 } 1391 bitmap->last_end_sync = jiffies; 1392} 1393 1394static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed) 1395{ 1396 /* For each chunk covered by any of these sectors, set the 1397 * counter to 1 and set resync_needed. They should all 1398 * be 0 at this point 1399 */ 1400 1401 int secs; 1402 bitmap_counter_t *bmc; 1403 spin_lock_irq(&bitmap->lock); 1404 bmc = bitmap_get_counter(bitmap, offset, &secs, 1); 1405 if (!bmc) { 1406 spin_unlock_irq(&bitmap->lock); 1407 return; 1408 } 1409 if (! *bmc) { 1410 struct page *page; 1411 *bmc = 1 | (needed?NEEDED_MASK:0); 1412 bitmap_count_page(bitmap, offset, 1); 1413 page = filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)); 1414 set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); 1415 } 1416 spin_unlock_irq(&bitmap->lock); 1417 bitmap->allclean = 0; 1418} 1419 1420/* dirty the memory and file bits for bitmap chunks "s" to "e" */ 1421void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e) 1422{ 1423 unsigned long chunk; 1424 1425 for (chunk = s; chunk <= e; chunk++) { 1426 sector_t sec = chunk << CHUNK_BLOCK_SHIFT(bitmap); 1427 bitmap_set_memory_bits(bitmap, sec, 1); 1428 bitmap_file_set_bit(bitmap, sec); 1429 } 1430} 1431 1432/* 1433 * flush out any pending updates 1434 */ 1435void bitmap_flush(mddev_t *mddev) 1436{ 1437 struct bitmap *bitmap = mddev->bitmap; 1438 int sleep; 1439 1440 if (!bitmap) /* there was no bitmap */ 1441 return; 1442 1443 /* run the daemon_work three time to ensure everything is flushed 1444 * that can be 1445 */ 1446 sleep = bitmap->daemon_sleep; 1447 bitmap->daemon_sleep = 0; 1448 bitmap_daemon_work(bitmap); 1449 bitmap_daemon_work(bitmap); 1450 bitmap_daemon_work(bitmap); 1451 bitmap->daemon_sleep = sleep; 1452 bitmap_update_sb(bitmap); 1453} 1454 1455/* 1456 * free memory that was allocated 1457 */ 1458static void bitmap_free(struct bitmap *bitmap) 1459{ 1460 unsigned long k, pages; 1461 struct bitmap_page *bp; 1462 1463 if (!bitmap) /* there was no bitmap */ 1464 return; 1465 1466 /* release the bitmap file and kill the daemon */ 1467 bitmap_file_put(bitmap); 1468 1469 bp = bitmap->bp; 1470 pages = bitmap->pages; 1471 1472 /* free all allocated memory */ 1473 1474 if (bp) /* deallocate the page memory */ 1475 for (k = 0; k < pages; k++) 1476 if (bp[k].map && !bp[k].hijacked) 1477 kfree(bp[k].map); 1478 kfree(bp); 1479 kfree(bitmap); 1480} 1481void bitmap_destroy(mddev_t *mddev) 1482{ 1483 struct bitmap *bitmap = mddev->bitmap; 1484 1485 if (!bitmap) /* there was no bitmap */ 1486 return; 1487 1488 mddev->bitmap = NULL; /* disconnect from the md device */ 1489 if (mddev->thread) 1490 mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; 1491 1492 bitmap_free(bitmap); 1493} 1494 1495/* 1496 * initialize the bitmap structure 1497 * if this returns an error, bitmap_destroy must be called to do clean up 1498 */ 1499int bitmap_create(mddev_t *mddev) 1500{ 1501 struct bitmap *bitmap; 1502 unsigned long blocks = mddev->resync_max_sectors; 1503 unsigned long chunks; 1504 unsigned long pages; 1505 struct file *file = mddev->bitmap_file; 1506 int err; 1507 sector_t start; 1508 1509 BUILD_BUG_ON(sizeof(bitmap_super_t) != 256); 1510 1511 if (!file && !mddev->bitmap_offset) /* bitmap disabled, nothing to do */ 1512 return 0; 1513 1514 BUG_ON(file && mddev->bitmap_offset); 1515 1516 bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL); 1517 if (!bitmap) 1518 return -ENOMEM; 1519 1520 spin_lock_init(&bitmap->lock); 1521 atomic_set(&bitmap->pending_writes, 0); 1522 init_waitqueue_head(&bitmap->write_wait); 1523 init_waitqueue_head(&bitmap->overflow_wait); 1524 1525 bitmap->mddev = mddev; 1526 1527 bitmap->file = file; 1528 bitmap->offset = mddev->bitmap_offset; 1529 if (file) { 1530 get_file(file); 1531 do_sync_mapping_range(file->f_mapping, 0, LLONG_MAX, 1532 SYNC_FILE_RANGE_WAIT_BEFORE | 1533 SYNC_FILE_RANGE_WRITE | 1534 SYNC_FILE_RANGE_WAIT_AFTER); 1535 } 1536 /* read superblock from bitmap file (this sets bitmap->chunksize) */ 1537 err = bitmap_read_sb(bitmap); 1538 if (err) 1539 goto error; 1540 1541 bitmap->chunkshift = ffz(~bitmap->chunksize); 1542 1543 /* now that chunksize and chunkshift are set, we can use these macros */ 1544 chunks = (blocks + CHUNK_BLOCK_RATIO(bitmap) - 1) / 1545 CHUNK_BLOCK_RATIO(bitmap); 1546 pages = (chunks + PAGE_COUNTER_RATIO - 1) / PAGE_COUNTER_RATIO; 1547 1548 BUG_ON(!pages); 1549 1550 bitmap->chunks = chunks; 1551 bitmap->pages = pages; 1552 bitmap->missing_pages = pages; 1553 bitmap->counter_bits = COUNTER_BITS; 1554 1555 bitmap->syncchunk = ~0UL; 1556 1557#ifdef INJECT_FATAL_FAULT_1 1558 bitmap->bp = NULL; 1559#else 1560 bitmap->bp = kzalloc(pages * sizeof(*bitmap->bp), GFP_KERNEL); 1561#endif 1562 err = -ENOMEM; 1563 if (!bitmap->bp) 1564 goto error; 1565 1566 /* now that we have some pages available, initialize the in-memory 1567 * bitmap from the on-disk bitmap */ 1568 start = 0; 1569 if (mddev->degraded == 0 1570 || bitmap->events_cleared == mddev->events) 1571 /* no need to keep dirty bits to optimise a re-add of a missing device */ 1572 start = mddev->recovery_cp; 1573 err = bitmap_init_from_disk(bitmap, start); 1574 1575 if (err) 1576 goto error; 1577 1578 printk(KERN_INFO "created bitmap (%lu pages) for device %s\n", 1579 pages, bmname(bitmap)); 1580 1581 mddev->bitmap = bitmap; 1582 1583 mddev->thread->timeout = bitmap->daemon_sleep * HZ; 1584 1585 bitmap_update_sb(bitmap); 1586 1587 return (bitmap->flags & BITMAP_WRITE_ERROR) ? -EIO : 0; 1588 1589 error: 1590 bitmap_free(bitmap); 1591 return err; 1592} 1593 1594/* the bitmap API -- for raid personalities */ 1595EXPORT_SYMBOL(bitmap_startwrite); 1596EXPORT_SYMBOL(bitmap_endwrite); 1597EXPORT_SYMBOL(bitmap_start_sync); 1598EXPORT_SYMBOL(bitmap_end_sync); 1599EXPORT_SYMBOL(bitmap_unplug); 1600EXPORT_SYMBOL(bitmap_close_sync); 1601EXPORT_SYMBOL(bitmap_cond_end_sync);