at v4.13 70 kB view raw
1/* 2 * bitmap.c two-level bitmap (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003 3 * 4 * bitmap_create - sets up the bitmap structure 5 * bitmap_destroy - destroys the bitmap structure 6 * 7 * additions, Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.: 8 * - added disk storage for bitmap 9 * - changes to allow various bitmap chunk sizes 10 */ 11 12/* 13 * Still to do: 14 * 15 * flush after percent set rather than just time based. (maybe both). 16 */ 17 18#include <linux/blkdev.h> 19#include <linux/module.h> 20#include <linux/errno.h> 21#include <linux/slab.h> 22#include <linux/init.h> 23#include <linux/timer.h> 24#include <linux/sched.h> 25#include <linux/list.h> 26#include <linux/file.h> 27#include <linux/mount.h> 28#include <linux/buffer_head.h> 29#include <linux/seq_file.h> 30#include <trace/events/block.h> 31#include "md.h" 32#include "bitmap.h" 33 34static inline char *bmname(struct bitmap *bitmap) 35{ 36 return bitmap->mddev ? mdname(bitmap->mddev) : "mdX"; 37} 38 39/* 40 * check a page and, if necessary, allocate it (or hijack it if the alloc fails) 41 * 42 * 1) check to see if this page is allocated, if it's not then try to alloc 43 * 2) if the alloc fails, set the page's hijacked flag so we'll use the 44 * page pointer directly as a counter 45 * 46 * if we find our page, we increment the page's refcount so that it stays 47 * allocated while we're using it 48 */ 49static int bitmap_checkpage(struct bitmap_counts *bitmap, 50 unsigned long page, int create, int no_hijack) 51__releases(bitmap->lock) 52__acquires(bitmap->lock) 53{ 54 unsigned char *mappage; 55 56 if (page >= bitmap->pages) { 57 /* This can happen if bitmap_start_sync goes beyond 58 * End-of-device while looking for a whole page. 59 * It is harmless. 60 */ 61 return -EINVAL; 62 } 63 64 if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */ 65 return 0; 66 67 if (bitmap->bp[page].map) /* page is already allocated, just return */ 68 return 0; 69 70 if (!create) 71 return -ENOENT; 72 73 /* this page has not been allocated yet */ 74 75 spin_unlock_irq(&bitmap->lock); 76 /* It is possible that this is being called inside a 77 * prepare_to_wait/finish_wait loop from raid5c:make_request(). 78 * In general it is not permitted to sleep in that context as it 79 * can cause the loop to spin freely. 80 * That doesn't apply here as we can only reach this point 81 * once with any loop. 82 * When this function completes, either bp[page].map or 83 * bp[page].hijacked. In either case, this function will 84 * abort before getting to this point again. So there is 85 * no risk of a free-spin, and so it is safe to assert 86 * that sleeping here is allowed. 87 */ 88 sched_annotate_sleep(); 89 mappage = kzalloc(PAGE_SIZE, GFP_NOIO); 90 spin_lock_irq(&bitmap->lock); 91 92 if (mappage == NULL) { 93 pr_debug("md/bitmap: map page allocation failed, hijacking\n"); 94 /* We don't support hijack for cluster raid */ 95 if (no_hijack) 96 return -ENOMEM; 97 /* failed - set the hijacked flag so that we can use the 98 * pointer as a counter */ 99 if (!bitmap->bp[page].map) 100 bitmap->bp[page].hijacked = 1; 101 } else if (bitmap->bp[page].map || 102 bitmap->bp[page].hijacked) { 103 /* somebody beat us to getting the page */ 104 kfree(mappage); 105 } else { 106 107 /* no page was in place and we have one, so install it */ 108 109 bitmap->bp[page].map = mappage; 110 bitmap->missing_pages--; 111 } 112 return 0; 113} 114 115/* if page is completely empty, put it back on the free list, or dealloc it */ 116/* if page was hijacked, unmark the flag so it might get alloced next time */ 117/* Note: lock should be held when calling this */ 118static void bitmap_checkfree(struct bitmap_counts *bitmap, unsigned long page) 119{ 120 char *ptr; 121 122 if (bitmap->bp[page].count) /* page is still busy */ 123 return; 124 125 /* page is no longer in use, it can be released */ 126 127 if (bitmap->bp[page].hijacked) { /* page was hijacked, undo this now */ 128 bitmap->bp[page].hijacked = 0; 129 bitmap->bp[page].map = NULL; 130 } else { 131 /* normal case, free the page */ 132 ptr = bitmap->bp[page].map; 133 bitmap->bp[page].map = NULL; 134 bitmap->missing_pages++; 135 kfree(ptr); 136 } 137} 138 139/* 140 * bitmap file handling - read and write the bitmap file and its superblock 141 */ 142 143/* 144 * basic page I/O operations 145 */ 146 147/* IO operations when bitmap is stored near all superblocks */ 148static int read_sb_page(struct mddev *mddev, loff_t offset, 149 struct page *page, 150 unsigned long index, int size) 151{ 152 /* choose a good rdev and read the page from there */ 153 154 struct md_rdev *rdev; 155 sector_t target; 156 157 rdev_for_each(rdev, mddev) { 158 if (! test_bit(In_sync, &rdev->flags) 159 || test_bit(Faulty, &rdev->flags) 160 || test_bit(Bitmap_sync, &rdev->flags)) 161 continue; 162 163 target = offset + index * (PAGE_SIZE/512); 164 165 if (sync_page_io(rdev, target, 166 roundup(size, bdev_logical_block_size(rdev->bdev)), 167 page, REQ_OP_READ, 0, true)) { 168 page->index = index; 169 return 0; 170 } 171 } 172 return -EIO; 173} 174 175static struct md_rdev *next_active_rdev(struct md_rdev *rdev, struct mddev *mddev) 176{ 177 /* Iterate the disks of an mddev, using rcu to protect access to the 178 * linked list, and raising the refcount of devices we return to ensure 179 * they don't disappear while in use. 180 * As devices are only added or removed when raid_disk is < 0 and 181 * nr_pending is 0 and In_sync is clear, the entries we return will 182 * still be in the same position on the list when we re-enter 183 * list_for_each_entry_continue_rcu. 184 * 185 * Note that if entered with 'rdev == NULL' to start at the 186 * beginning, we temporarily assign 'rdev' to an address which 187 * isn't really an rdev, but which can be used by 188 * list_for_each_entry_continue_rcu() to find the first entry. 189 */ 190 rcu_read_lock(); 191 if (rdev == NULL) 192 /* start at the beginning */ 193 rdev = list_entry(&mddev->disks, struct md_rdev, same_set); 194 else { 195 /* release the previous rdev and start from there. */ 196 rdev_dec_pending(rdev, mddev); 197 } 198 list_for_each_entry_continue_rcu(rdev, &mddev->disks, same_set) { 199 if (rdev->raid_disk >= 0 && 200 !test_bit(Faulty, &rdev->flags)) { 201 /* this is a usable devices */ 202 atomic_inc(&rdev->nr_pending); 203 rcu_read_unlock(); 204 return rdev; 205 } 206 } 207 rcu_read_unlock(); 208 return NULL; 209} 210 211static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) 212{ 213 struct md_rdev *rdev; 214 struct block_device *bdev; 215 struct mddev *mddev = bitmap->mddev; 216 struct bitmap_storage *store = &bitmap->storage; 217 218restart: 219 rdev = NULL; 220 while ((rdev = next_active_rdev(rdev, mddev)) != NULL) { 221 int size = PAGE_SIZE; 222 loff_t offset = mddev->bitmap_info.offset; 223 224 bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev; 225 226 if (page->index == store->file_pages-1) { 227 int last_page_size = store->bytes & (PAGE_SIZE-1); 228 if (last_page_size == 0) 229 last_page_size = PAGE_SIZE; 230 size = roundup(last_page_size, 231 bdev_logical_block_size(bdev)); 232 } 233 /* Just make sure we aren't corrupting data or 234 * metadata 235 */ 236 if (mddev->external) { 237 /* Bitmap could be anywhere. */ 238 if (rdev->sb_start + offset + (page->index 239 * (PAGE_SIZE/512)) 240 > rdev->data_offset 241 && 242 rdev->sb_start + offset 243 < (rdev->data_offset + mddev->dev_sectors 244 + (PAGE_SIZE/512))) 245 goto bad_alignment; 246 } else if (offset < 0) { 247 /* DATA BITMAP METADATA */ 248 if (offset 249 + (long)(page->index * (PAGE_SIZE/512)) 250 + size/512 > 0) 251 /* bitmap runs in to metadata */ 252 goto bad_alignment; 253 if (rdev->data_offset + mddev->dev_sectors 254 > rdev->sb_start + offset) 255 /* data runs in to bitmap */ 256 goto bad_alignment; 257 } else if (rdev->sb_start < rdev->data_offset) { 258 /* METADATA BITMAP DATA */ 259 if (rdev->sb_start 260 + offset 261 + page->index*(PAGE_SIZE/512) + size/512 262 > rdev->data_offset) 263 /* bitmap runs in to data */ 264 goto bad_alignment; 265 } else { 266 /* DATA METADATA BITMAP - no problems */ 267 } 268 md_super_write(mddev, rdev, 269 rdev->sb_start + offset 270 + page->index * (PAGE_SIZE/512), 271 size, 272 page); 273 } 274 275 if (wait && md_super_wait(mddev) < 0) 276 goto restart; 277 return 0; 278 279 bad_alignment: 280 return -EINVAL; 281} 282 283static void bitmap_file_kick(struct bitmap *bitmap); 284/* 285 * write out a page to a file 286 */ 287static void write_page(struct bitmap *bitmap, struct page *page, int wait) 288{ 289 struct buffer_head *bh; 290 291 if (bitmap->storage.file == NULL) { 292 switch (write_sb_page(bitmap, page, wait)) { 293 case -EINVAL: 294 set_bit(BITMAP_WRITE_ERROR, &bitmap->flags); 295 } 296 } else { 297 298 bh = page_buffers(page); 299 300 while (bh && bh->b_blocknr) { 301 atomic_inc(&bitmap->pending_writes); 302 set_buffer_locked(bh); 303 set_buffer_mapped(bh); 304 submit_bh(REQ_OP_WRITE, REQ_SYNC, bh); 305 bh = bh->b_this_page; 306 } 307 308 if (wait) 309 wait_event(bitmap->write_wait, 310 atomic_read(&bitmap->pending_writes)==0); 311 } 312 if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) 313 bitmap_file_kick(bitmap); 314} 315 316static void end_bitmap_write(struct buffer_head *bh, int uptodate) 317{ 318 struct bitmap *bitmap = bh->b_private; 319 320 if (!uptodate) 321 set_bit(BITMAP_WRITE_ERROR, &bitmap->flags); 322 if (atomic_dec_and_test(&bitmap->pending_writes)) 323 wake_up(&bitmap->write_wait); 324} 325 326/* copied from buffer.c */ 327static void 328__clear_page_buffers(struct page *page) 329{ 330 ClearPagePrivate(page); 331 set_page_private(page, 0); 332 put_page(page); 333} 334static void free_buffers(struct page *page) 335{ 336 struct buffer_head *bh; 337 338 if (!PagePrivate(page)) 339 return; 340 341 bh = page_buffers(page); 342 while (bh) { 343 struct buffer_head *next = bh->b_this_page; 344 free_buffer_head(bh); 345 bh = next; 346 } 347 __clear_page_buffers(page); 348 put_page(page); 349} 350 351/* read a page from a file. 352 * We both read the page, and attach buffers to the page to record the 353 * address of each block (using bmap). These addresses will be used 354 * to write the block later, completely bypassing the filesystem. 355 * This usage is similar to how swap files are handled, and allows us 356 * to write to a file with no concerns of memory allocation failing. 357 */ 358static int read_page(struct file *file, unsigned long index, 359 struct bitmap *bitmap, 360 unsigned long count, 361 struct page *page) 362{ 363 int ret = 0; 364 struct inode *inode = file_inode(file); 365 struct buffer_head *bh; 366 sector_t block; 367 368 pr_debug("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE, 369 (unsigned long long)index << PAGE_SHIFT); 370 371 bh = alloc_page_buffers(page, 1<<inode->i_blkbits, 0); 372 if (!bh) { 373 ret = -ENOMEM; 374 goto out; 375 } 376 attach_page_buffers(page, bh); 377 block = index << (PAGE_SHIFT - inode->i_blkbits); 378 while (bh) { 379 if (count == 0) 380 bh->b_blocknr = 0; 381 else { 382 bh->b_blocknr = bmap(inode, block); 383 if (bh->b_blocknr == 0) { 384 /* Cannot use this file! */ 385 ret = -EINVAL; 386 goto out; 387 } 388 bh->b_bdev = inode->i_sb->s_bdev; 389 if (count < (1<<inode->i_blkbits)) 390 count = 0; 391 else 392 count -= (1<<inode->i_blkbits); 393 394 bh->b_end_io = end_bitmap_write; 395 bh->b_private = bitmap; 396 atomic_inc(&bitmap->pending_writes); 397 set_buffer_locked(bh); 398 set_buffer_mapped(bh); 399 submit_bh(REQ_OP_READ, 0, bh); 400 } 401 block++; 402 bh = bh->b_this_page; 403 } 404 page->index = index; 405 406 wait_event(bitmap->write_wait, 407 atomic_read(&bitmap->pending_writes)==0); 408 if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) 409 ret = -EIO; 410out: 411 if (ret) 412 pr_err("md: bitmap read error: (%dB @ %llu): %d\n", 413 (int)PAGE_SIZE, 414 (unsigned long long)index << PAGE_SHIFT, 415 ret); 416 return ret; 417} 418 419/* 420 * bitmap file superblock operations 421 */ 422 423/* 424 * bitmap_wait_writes() should be called before writing any bitmap 425 * blocks, to ensure previous writes, particularly from 426 * bitmap_daemon_work(), have completed. 427 */ 428static void bitmap_wait_writes(struct bitmap *bitmap) 429{ 430 if (bitmap->storage.file) 431 wait_event(bitmap->write_wait, 432 atomic_read(&bitmap->pending_writes)==0); 433 else 434 /* Note that we ignore the return value. The writes 435 * might have failed, but that would just mean that 436 * some bits which should be cleared haven't been, 437 * which is safe. The relevant bitmap blocks will 438 * probably get written again, but there is no great 439 * loss if they aren't. 440 */ 441 md_super_wait(bitmap->mddev); 442} 443 444 445/* update the event counter and sync the superblock to disk */ 446void bitmap_update_sb(struct bitmap *bitmap) 447{ 448 bitmap_super_t *sb; 449 450 if (!bitmap || !bitmap->mddev) /* no bitmap for this array */ 451 return; 452 if (bitmap->mddev->bitmap_info.external) 453 return; 454 if (!bitmap->storage.sb_page) /* no superblock */ 455 return; 456 sb = kmap_atomic(bitmap->storage.sb_page); 457 sb->events = cpu_to_le64(bitmap->mddev->events); 458 if (bitmap->mddev->events < bitmap->events_cleared) 459 /* rocking back to read-only */ 460 bitmap->events_cleared = bitmap->mddev->events; 461 sb->events_cleared = cpu_to_le64(bitmap->events_cleared); 462 sb->state = cpu_to_le32(bitmap->flags); 463 /* Just in case these have been changed via sysfs: */ 464 sb->daemon_sleep = cpu_to_le32(bitmap->mddev->bitmap_info.daemon_sleep/HZ); 465 sb->write_behind = cpu_to_le32(bitmap->mddev->bitmap_info.max_write_behind); 466 /* This might have been changed by a reshape */ 467 sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors); 468 sb->chunksize = cpu_to_le32(bitmap->mddev->bitmap_info.chunksize); 469 sb->nodes = cpu_to_le32(bitmap->mddev->bitmap_info.nodes); 470 sb->sectors_reserved = cpu_to_le32(bitmap->mddev-> 471 bitmap_info.space); 472 kunmap_atomic(sb); 473 write_page(bitmap, bitmap->storage.sb_page, 1); 474} 475EXPORT_SYMBOL(bitmap_update_sb); 476 477/* print out the bitmap file superblock */ 478void bitmap_print_sb(struct bitmap *bitmap) 479{ 480 bitmap_super_t *sb; 481 482 if (!bitmap || !bitmap->storage.sb_page) 483 return; 484 sb = kmap_atomic(bitmap->storage.sb_page); 485 pr_debug("%s: bitmap file superblock:\n", bmname(bitmap)); 486 pr_debug(" magic: %08x\n", le32_to_cpu(sb->magic)); 487 pr_debug(" version: %d\n", le32_to_cpu(sb->version)); 488 pr_debug(" uuid: %08x.%08x.%08x.%08x\n", 489 le32_to_cpu(*(__u32 *)(sb->uuid+0)), 490 le32_to_cpu(*(__u32 *)(sb->uuid+4)), 491 le32_to_cpu(*(__u32 *)(sb->uuid+8)), 492 le32_to_cpu(*(__u32 *)(sb->uuid+12))); 493 pr_debug(" events: %llu\n", 494 (unsigned long long) le64_to_cpu(sb->events)); 495 pr_debug("events cleared: %llu\n", 496 (unsigned long long) le64_to_cpu(sb->events_cleared)); 497 pr_debug(" state: %08x\n", le32_to_cpu(sb->state)); 498 pr_debug(" chunksize: %d B\n", le32_to_cpu(sb->chunksize)); 499 pr_debug(" daemon sleep: %ds\n", le32_to_cpu(sb->daemon_sleep)); 500 pr_debug(" sync size: %llu KB\n", 501 (unsigned long long)le64_to_cpu(sb->sync_size)/2); 502 pr_debug("max write behind: %d\n", le32_to_cpu(sb->write_behind)); 503 kunmap_atomic(sb); 504} 505 506/* 507 * bitmap_new_disk_sb 508 * @bitmap 509 * 510 * This function is somewhat the reverse of bitmap_read_sb. bitmap_read_sb 511 * reads and verifies the on-disk bitmap superblock and populates bitmap_info. 512 * This function verifies 'bitmap_info' and populates the on-disk bitmap 513 * structure, which is to be written to disk. 514 * 515 * Returns: 0 on success, -Exxx on error 516 */ 517static int bitmap_new_disk_sb(struct bitmap *bitmap) 518{ 519 bitmap_super_t *sb; 520 unsigned long chunksize, daemon_sleep, write_behind; 521 522 bitmap->storage.sb_page = alloc_page(GFP_KERNEL | __GFP_ZERO); 523 if (bitmap->storage.sb_page == NULL) 524 return -ENOMEM; 525 bitmap->storage.sb_page->index = 0; 526 527 sb = kmap_atomic(bitmap->storage.sb_page); 528 529 sb->magic = cpu_to_le32(BITMAP_MAGIC); 530 sb->version = cpu_to_le32(BITMAP_MAJOR_HI); 531 532 chunksize = bitmap->mddev->bitmap_info.chunksize; 533 BUG_ON(!chunksize); 534 if (!is_power_of_2(chunksize)) { 535 kunmap_atomic(sb); 536 pr_warn("bitmap chunksize not a power of 2\n"); 537 return -EINVAL; 538 } 539 sb->chunksize = cpu_to_le32(chunksize); 540 541 daemon_sleep = bitmap->mddev->bitmap_info.daemon_sleep; 542 if (!daemon_sleep || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) { 543 pr_debug("Choosing daemon_sleep default (5 sec)\n"); 544 daemon_sleep = 5 * HZ; 545 } 546 sb->daemon_sleep = cpu_to_le32(daemon_sleep); 547 bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep; 548 549 /* 550 * FIXME: write_behind for RAID1. If not specified, what 551 * is a good choice? We choose COUNTER_MAX / 2 arbitrarily. 552 */ 553 write_behind = bitmap->mddev->bitmap_info.max_write_behind; 554 if (write_behind > COUNTER_MAX) 555 write_behind = COUNTER_MAX / 2; 556 sb->write_behind = cpu_to_le32(write_behind); 557 bitmap->mddev->bitmap_info.max_write_behind = write_behind; 558 559 /* keep the array size field of the bitmap superblock up to date */ 560 sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors); 561 562 memcpy(sb->uuid, bitmap->mddev->uuid, 16); 563 564 set_bit(BITMAP_STALE, &bitmap->flags); 565 sb->state = cpu_to_le32(bitmap->flags); 566 bitmap->events_cleared = bitmap->mddev->events; 567 sb->events_cleared = cpu_to_le64(bitmap->mddev->events); 568 bitmap->mddev->bitmap_info.nodes = 0; 569 570 kunmap_atomic(sb); 571 572 return 0; 573} 574 575/* read the superblock from the bitmap file and initialize some bitmap fields */ 576static int bitmap_read_sb(struct bitmap *bitmap) 577{ 578 char *reason = NULL; 579 bitmap_super_t *sb; 580 unsigned long chunksize, daemon_sleep, write_behind; 581 unsigned long long events; 582 int nodes = 0; 583 unsigned long sectors_reserved = 0; 584 int err = -EINVAL; 585 struct page *sb_page; 586 loff_t offset = bitmap->mddev->bitmap_info.offset; 587 588 if (!bitmap->storage.file && !bitmap->mddev->bitmap_info.offset) { 589 chunksize = 128 * 1024 * 1024; 590 daemon_sleep = 5 * HZ; 591 write_behind = 0; 592 set_bit(BITMAP_STALE, &bitmap->flags); 593 err = 0; 594 goto out_no_sb; 595 } 596 /* page 0 is the superblock, read it... */ 597 sb_page = alloc_page(GFP_KERNEL); 598 if (!sb_page) 599 return -ENOMEM; 600 bitmap->storage.sb_page = sb_page; 601 602re_read: 603 /* If cluster_slot is set, the cluster is setup */ 604 if (bitmap->cluster_slot >= 0) { 605 sector_t bm_blocks = bitmap->mddev->resync_max_sectors; 606 607 sector_div(bm_blocks, 608 bitmap->mddev->bitmap_info.chunksize >> 9); 609 /* bits to bytes */ 610 bm_blocks = ((bm_blocks+7) >> 3) + sizeof(bitmap_super_t); 611 /* to 4k blocks */ 612 bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks, 4096); 613 offset = bitmap->mddev->bitmap_info.offset + (bitmap->cluster_slot * (bm_blocks << 3)); 614 pr_debug("%s:%d bm slot: %d offset: %llu\n", __func__, __LINE__, 615 bitmap->cluster_slot, offset); 616 } 617 618 if (bitmap->storage.file) { 619 loff_t isize = i_size_read(bitmap->storage.file->f_mapping->host); 620 int bytes = isize > PAGE_SIZE ? PAGE_SIZE : isize; 621 622 err = read_page(bitmap->storage.file, 0, 623 bitmap, bytes, sb_page); 624 } else { 625 err = read_sb_page(bitmap->mddev, 626 offset, 627 sb_page, 628 0, sizeof(bitmap_super_t)); 629 } 630 if (err) 631 return err; 632 633 err = -EINVAL; 634 sb = kmap_atomic(sb_page); 635 636 chunksize = le32_to_cpu(sb->chunksize); 637 daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ; 638 write_behind = le32_to_cpu(sb->write_behind); 639 sectors_reserved = le32_to_cpu(sb->sectors_reserved); 640 /* Setup nodes/clustername only if bitmap version is 641 * cluster-compatible 642 */ 643 if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) { 644 nodes = le32_to_cpu(sb->nodes); 645 strlcpy(bitmap->mddev->bitmap_info.cluster_name, 646 sb->cluster_name, 64); 647 } 648 649 /* verify that the bitmap-specific fields are valid */ 650 if (sb->magic != cpu_to_le32(BITMAP_MAGIC)) 651 reason = "bad magic"; 652 else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO || 653 le32_to_cpu(sb->version) > BITMAP_MAJOR_CLUSTERED) 654 reason = "unrecognized superblock version"; 655 else if (chunksize < 512) 656 reason = "bitmap chunksize too small"; 657 else if (!is_power_of_2(chunksize)) 658 reason = "bitmap chunksize not a power of 2"; 659 else if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT) 660 reason = "daemon sleep period out of range"; 661 else if (write_behind > COUNTER_MAX) 662 reason = "write-behind limit out of range (0 - 16383)"; 663 if (reason) { 664 pr_warn("%s: invalid bitmap file superblock: %s\n", 665 bmname(bitmap), reason); 666 goto out; 667 } 668 669 /* keep the array size field of the bitmap superblock up to date */ 670 sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors); 671 672 if (bitmap->mddev->persistent) { 673 /* 674 * We have a persistent array superblock, so compare the 675 * bitmap's UUID and event counter to the mddev's 676 */ 677 if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) { 678 pr_warn("%s: bitmap superblock UUID mismatch\n", 679 bmname(bitmap)); 680 goto out; 681 } 682 events = le64_to_cpu(sb->events); 683 if (!nodes && (events < bitmap->mddev->events)) { 684 pr_warn("%s: bitmap file is out of date (%llu < %llu) -- forcing full recovery\n", 685 bmname(bitmap), events, 686 (unsigned long long) bitmap->mddev->events); 687 set_bit(BITMAP_STALE, &bitmap->flags); 688 } 689 } 690 691 /* assign fields using values from superblock */ 692 bitmap->flags |= le32_to_cpu(sb->state); 693 if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN) 694 set_bit(BITMAP_HOSTENDIAN, &bitmap->flags); 695 bitmap->events_cleared = le64_to_cpu(sb->events_cleared); 696 strlcpy(bitmap->mddev->bitmap_info.cluster_name, sb->cluster_name, 64); 697 err = 0; 698 699out: 700 kunmap_atomic(sb); 701 /* Assigning chunksize is required for "re_read" */ 702 bitmap->mddev->bitmap_info.chunksize = chunksize; 703 if (err == 0 && nodes && (bitmap->cluster_slot < 0)) { 704 err = md_setup_cluster(bitmap->mddev, nodes); 705 if (err) { 706 pr_warn("%s: Could not setup cluster service (%d)\n", 707 bmname(bitmap), err); 708 goto out_no_sb; 709 } 710 bitmap->cluster_slot = md_cluster_ops->slot_number(bitmap->mddev); 711 goto re_read; 712 } 713 714 715out_no_sb: 716 if (test_bit(BITMAP_STALE, &bitmap->flags)) 717 bitmap->events_cleared = bitmap->mddev->events; 718 bitmap->mddev->bitmap_info.chunksize = chunksize; 719 bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep; 720 bitmap->mddev->bitmap_info.max_write_behind = write_behind; 721 bitmap->mddev->bitmap_info.nodes = nodes; 722 if (bitmap->mddev->bitmap_info.space == 0 || 723 bitmap->mddev->bitmap_info.space > sectors_reserved) 724 bitmap->mddev->bitmap_info.space = sectors_reserved; 725 if (err) { 726 bitmap_print_sb(bitmap); 727 if (bitmap->cluster_slot < 0) 728 md_cluster_stop(bitmap->mddev); 729 } 730 return err; 731} 732 733/* 734 * general bitmap file operations 735 */ 736 737/* 738 * on-disk bitmap: 739 * 740 * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap 741 * file a page at a time. There's a superblock at the start of the file. 742 */ 743/* calculate the index of the page that contains this bit */ 744static inline unsigned long file_page_index(struct bitmap_storage *store, 745 unsigned long chunk) 746{ 747 if (store->sb_page) 748 chunk += sizeof(bitmap_super_t) << 3; 749 return chunk >> PAGE_BIT_SHIFT; 750} 751 752/* calculate the (bit) offset of this bit within a page */ 753static inline unsigned long file_page_offset(struct bitmap_storage *store, 754 unsigned long chunk) 755{ 756 if (store->sb_page) 757 chunk += sizeof(bitmap_super_t) << 3; 758 return chunk & (PAGE_BITS - 1); 759} 760 761/* 762 * return a pointer to the page in the filemap that contains the given bit 763 * 764 */ 765static inline struct page *filemap_get_page(struct bitmap_storage *store, 766 unsigned long chunk) 767{ 768 if (file_page_index(store, chunk) >= store->file_pages) 769 return NULL; 770 return store->filemap[file_page_index(store, chunk)]; 771} 772 773static int bitmap_storage_alloc(struct bitmap_storage *store, 774 unsigned long chunks, int with_super, 775 int slot_number) 776{ 777 int pnum, offset = 0; 778 unsigned long num_pages; 779 unsigned long bytes; 780 781 bytes = DIV_ROUND_UP(chunks, 8); 782 if (with_super) 783 bytes += sizeof(bitmap_super_t); 784 785 num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE); 786 offset = slot_number * num_pages; 787 788 store->filemap = kmalloc(sizeof(struct page *) 789 * num_pages, GFP_KERNEL); 790 if (!store->filemap) 791 return -ENOMEM; 792 793 if (with_super && !store->sb_page) { 794 store->sb_page = alloc_page(GFP_KERNEL|__GFP_ZERO); 795 if (store->sb_page == NULL) 796 return -ENOMEM; 797 } 798 799 pnum = 0; 800 if (store->sb_page) { 801 store->filemap[0] = store->sb_page; 802 pnum = 1; 803 store->sb_page->index = offset; 804 } 805 806 for ( ; pnum < num_pages; pnum++) { 807 store->filemap[pnum] = alloc_page(GFP_KERNEL|__GFP_ZERO); 808 if (!store->filemap[pnum]) { 809 store->file_pages = pnum; 810 return -ENOMEM; 811 } 812 store->filemap[pnum]->index = pnum + offset; 813 } 814 store->file_pages = pnum; 815 816 /* We need 4 bits per page, rounded up to a multiple 817 * of sizeof(unsigned long) */ 818 store->filemap_attr = kzalloc( 819 roundup(DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)), 820 GFP_KERNEL); 821 if (!store->filemap_attr) 822 return -ENOMEM; 823 824 store->bytes = bytes; 825 826 return 0; 827} 828 829static void bitmap_file_unmap(struct bitmap_storage *store) 830{ 831 struct page **map, *sb_page; 832 int pages; 833 struct file *file; 834 835 file = store->file; 836 map = store->filemap; 837 pages = store->file_pages; 838 sb_page = store->sb_page; 839 840 while (pages--) 841 if (map[pages] != sb_page) /* 0 is sb_page, release it below */ 842 free_buffers(map[pages]); 843 kfree(map); 844 kfree(store->filemap_attr); 845 846 if (sb_page) 847 free_buffers(sb_page); 848 849 if (file) { 850 struct inode *inode = file_inode(file); 851 invalidate_mapping_pages(inode->i_mapping, 0, -1); 852 fput(file); 853 } 854} 855 856/* 857 * bitmap_file_kick - if an error occurs while manipulating the bitmap file 858 * then it is no longer reliable, so we stop using it and we mark the file 859 * as failed in the superblock 860 */ 861static void bitmap_file_kick(struct bitmap *bitmap) 862{ 863 char *path, *ptr = NULL; 864 865 if (!test_and_set_bit(BITMAP_STALE, &bitmap->flags)) { 866 bitmap_update_sb(bitmap); 867 868 if (bitmap->storage.file) { 869 path = kmalloc(PAGE_SIZE, GFP_KERNEL); 870 if (path) 871 ptr = file_path(bitmap->storage.file, 872 path, PAGE_SIZE); 873 874 pr_warn("%s: kicking failed bitmap file %s from array!\n", 875 bmname(bitmap), IS_ERR(ptr) ? "" : ptr); 876 877 kfree(path); 878 } else 879 pr_warn("%s: disabling internal bitmap due to errors\n", 880 bmname(bitmap)); 881 } 882} 883 884enum bitmap_page_attr { 885 BITMAP_PAGE_DIRTY = 0, /* there are set bits that need to be synced */ 886 BITMAP_PAGE_PENDING = 1, /* there are bits that are being cleaned. 887 * i.e. counter is 1 or 2. */ 888 BITMAP_PAGE_NEEDWRITE = 2, /* there are cleared bits that need to be synced */ 889}; 890 891static inline void set_page_attr(struct bitmap *bitmap, int pnum, 892 enum bitmap_page_attr attr) 893{ 894 set_bit((pnum<<2) + attr, bitmap->storage.filemap_attr); 895} 896 897static inline void clear_page_attr(struct bitmap *bitmap, int pnum, 898 enum bitmap_page_attr attr) 899{ 900 clear_bit((pnum<<2) + attr, bitmap->storage.filemap_attr); 901} 902 903static inline int test_page_attr(struct bitmap *bitmap, int pnum, 904 enum bitmap_page_attr attr) 905{ 906 return test_bit((pnum<<2) + attr, bitmap->storage.filemap_attr); 907} 908 909static inline int test_and_clear_page_attr(struct bitmap *bitmap, int pnum, 910 enum bitmap_page_attr attr) 911{ 912 return test_and_clear_bit((pnum<<2) + attr, 913 bitmap->storage.filemap_attr); 914} 915/* 916 * bitmap_file_set_bit -- called before performing a write to the md device 917 * to set (and eventually sync) a particular bit in the bitmap file 918 * 919 * we set the bit immediately, then we record the page number so that 920 * when an unplug occurs, we can flush the dirty pages out to disk 921 */ 922static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) 923{ 924 unsigned long bit; 925 struct page *page; 926 void *kaddr; 927 unsigned long chunk = block >> bitmap->counts.chunkshift; 928 struct bitmap_storage *store = &bitmap->storage; 929 unsigned long node_offset = 0; 930 931 if (mddev_is_clustered(bitmap->mddev)) 932 node_offset = bitmap->cluster_slot * store->file_pages; 933 934 page = filemap_get_page(&bitmap->storage, chunk); 935 if (!page) 936 return; 937 bit = file_page_offset(&bitmap->storage, chunk); 938 939 /* set the bit */ 940 kaddr = kmap_atomic(page); 941 if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags)) 942 set_bit(bit, kaddr); 943 else 944 set_bit_le(bit, kaddr); 945 kunmap_atomic(kaddr); 946 pr_debug("set file bit %lu page %lu\n", bit, page->index); 947 /* record page number so it gets flushed to disk when unplug occurs */ 948 set_page_attr(bitmap, page->index - node_offset, BITMAP_PAGE_DIRTY); 949} 950 951static void bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block) 952{ 953 unsigned long bit; 954 struct page *page; 955 void *paddr; 956 unsigned long chunk = block >> bitmap->counts.chunkshift; 957 struct bitmap_storage *store = &bitmap->storage; 958 unsigned long node_offset = 0; 959 960 if (mddev_is_clustered(bitmap->mddev)) 961 node_offset = bitmap->cluster_slot * store->file_pages; 962 963 page = filemap_get_page(&bitmap->storage, chunk); 964 if (!page) 965 return; 966 bit = file_page_offset(&bitmap->storage, chunk); 967 paddr = kmap_atomic(page); 968 if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags)) 969 clear_bit(bit, paddr); 970 else 971 clear_bit_le(bit, paddr); 972 kunmap_atomic(paddr); 973 if (!test_page_attr(bitmap, page->index - node_offset, BITMAP_PAGE_NEEDWRITE)) { 974 set_page_attr(bitmap, page->index - node_offset, BITMAP_PAGE_PENDING); 975 bitmap->allclean = 0; 976 } 977} 978 979static int bitmap_file_test_bit(struct bitmap *bitmap, sector_t block) 980{ 981 unsigned long bit; 982 struct page *page; 983 void *paddr; 984 unsigned long chunk = block >> bitmap->counts.chunkshift; 985 int set = 0; 986 987 page = filemap_get_page(&bitmap->storage, chunk); 988 if (!page) 989 return -EINVAL; 990 bit = file_page_offset(&bitmap->storage, chunk); 991 paddr = kmap_atomic(page); 992 if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags)) 993 set = test_bit(bit, paddr); 994 else 995 set = test_bit_le(bit, paddr); 996 kunmap_atomic(paddr); 997 return set; 998} 999 1000 1001/* this gets called when the md device is ready to unplug its underlying 1002 * (slave) device queues -- before we let any writes go down, we need to 1003 * sync the dirty pages of the bitmap file to disk */ 1004void bitmap_unplug(struct bitmap *bitmap) 1005{ 1006 unsigned long i; 1007 int dirty, need_write; 1008 int writing = 0; 1009 1010 if (!bitmap || !bitmap->storage.filemap || 1011 test_bit(BITMAP_STALE, &bitmap->flags)) 1012 return; 1013 1014 /* look at each page to see if there are any set bits that need to be 1015 * flushed out to disk */ 1016 for (i = 0; i < bitmap->storage.file_pages; i++) { 1017 if (!bitmap->storage.filemap) 1018 return; 1019 dirty = test_and_clear_page_attr(bitmap, i, BITMAP_PAGE_DIRTY); 1020 need_write = test_and_clear_page_attr(bitmap, i, 1021 BITMAP_PAGE_NEEDWRITE); 1022 if (dirty || need_write) { 1023 if (!writing) { 1024 bitmap_wait_writes(bitmap); 1025 if (bitmap->mddev->queue) 1026 blk_add_trace_msg(bitmap->mddev->queue, 1027 "md bitmap_unplug"); 1028 } 1029 clear_page_attr(bitmap, i, BITMAP_PAGE_PENDING); 1030 write_page(bitmap, bitmap->storage.filemap[i], 0); 1031 writing = 1; 1032 } 1033 } 1034 if (writing) 1035 bitmap_wait_writes(bitmap); 1036 1037 if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) 1038 bitmap_file_kick(bitmap); 1039} 1040EXPORT_SYMBOL(bitmap_unplug); 1041 1042static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed); 1043/* * bitmap_init_from_disk -- called at bitmap_create time to initialize 1044 * the in-memory bitmap from the on-disk bitmap -- also, sets up the 1045 * memory mapping of the bitmap file 1046 * Special cases: 1047 * if there's no bitmap file, or if the bitmap file had been 1048 * previously kicked from the array, we mark all the bits as 1049 * 1's in order to cause a full resync. 1050 * 1051 * We ignore all bits for sectors that end earlier than 'start'. 1052 * This is used when reading an out-of-date bitmap... 1053 */ 1054static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) 1055{ 1056 unsigned long i, chunks, index, oldindex, bit, node_offset = 0; 1057 struct page *page = NULL; 1058 unsigned long bit_cnt = 0; 1059 struct file *file; 1060 unsigned long offset; 1061 int outofdate; 1062 int ret = -ENOSPC; 1063 void *paddr; 1064 struct bitmap_storage *store = &bitmap->storage; 1065 1066 chunks = bitmap->counts.chunks; 1067 file = store->file; 1068 1069 if (!file && !bitmap->mddev->bitmap_info.offset) { 1070 /* No permanent bitmap - fill with '1s'. */ 1071 store->filemap = NULL; 1072 store->file_pages = 0; 1073 for (i = 0; i < chunks ; i++) { 1074 /* if the disk bit is set, set the memory bit */ 1075 int needed = ((sector_t)(i+1) << (bitmap->counts.chunkshift) 1076 >= start); 1077 bitmap_set_memory_bits(bitmap, 1078 (sector_t)i << bitmap->counts.chunkshift, 1079 needed); 1080 } 1081 return 0; 1082 } 1083 1084 outofdate = test_bit(BITMAP_STALE, &bitmap->flags); 1085 if (outofdate) 1086 pr_warn("%s: bitmap file is out of date, doing full recovery\n", bmname(bitmap)); 1087 1088 if (file && i_size_read(file->f_mapping->host) < store->bytes) { 1089 pr_warn("%s: bitmap file too short %lu < %lu\n", 1090 bmname(bitmap), 1091 (unsigned long) i_size_read(file->f_mapping->host), 1092 store->bytes); 1093 goto err; 1094 } 1095 1096 oldindex = ~0L; 1097 offset = 0; 1098 if (!bitmap->mddev->bitmap_info.external) 1099 offset = sizeof(bitmap_super_t); 1100 1101 if (mddev_is_clustered(bitmap->mddev)) 1102 node_offset = bitmap->cluster_slot * (DIV_ROUND_UP(store->bytes, PAGE_SIZE)); 1103 1104 for (i = 0; i < chunks; i++) { 1105 int b; 1106 index = file_page_index(&bitmap->storage, i); 1107 bit = file_page_offset(&bitmap->storage, i); 1108 if (index != oldindex) { /* this is a new page, read it in */ 1109 int count; 1110 /* unmap the old page, we're done with it */ 1111 if (index == store->file_pages-1) 1112 count = store->bytes - index * PAGE_SIZE; 1113 else 1114 count = PAGE_SIZE; 1115 page = store->filemap[index]; 1116 if (file) 1117 ret = read_page(file, index, bitmap, 1118 count, page); 1119 else 1120 ret = read_sb_page( 1121 bitmap->mddev, 1122 bitmap->mddev->bitmap_info.offset, 1123 page, 1124 index + node_offset, count); 1125 1126 if (ret) 1127 goto err; 1128 1129 oldindex = index; 1130 1131 if (outofdate) { 1132 /* 1133 * if bitmap is out of date, dirty the 1134 * whole page and write it out 1135 */ 1136 paddr = kmap_atomic(page); 1137 memset(paddr + offset, 0xff, 1138 PAGE_SIZE - offset); 1139 kunmap_atomic(paddr); 1140 write_page(bitmap, page, 1); 1141 1142 ret = -EIO; 1143 if (test_bit(BITMAP_WRITE_ERROR, 1144 &bitmap->flags)) 1145 goto err; 1146 } 1147 } 1148 paddr = kmap_atomic(page); 1149 if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags)) 1150 b = test_bit(bit, paddr); 1151 else 1152 b = test_bit_le(bit, paddr); 1153 kunmap_atomic(paddr); 1154 if (b) { 1155 /* if the disk bit is set, set the memory bit */ 1156 int needed = ((sector_t)(i+1) << bitmap->counts.chunkshift 1157 >= start); 1158 bitmap_set_memory_bits(bitmap, 1159 (sector_t)i << bitmap->counts.chunkshift, 1160 needed); 1161 bit_cnt++; 1162 } 1163 offset = 0; 1164 } 1165 1166 pr_debug("%s: bitmap initialized from disk: read %lu pages, set %lu of %lu bits\n", 1167 bmname(bitmap), store->file_pages, 1168 bit_cnt, chunks); 1169 1170 return 0; 1171 1172 err: 1173 pr_warn("%s: bitmap initialisation failed: %d\n", 1174 bmname(bitmap), ret); 1175 return ret; 1176} 1177 1178void bitmap_write_all(struct bitmap *bitmap) 1179{ 1180 /* We don't actually write all bitmap blocks here, 1181 * just flag them as needing to be written 1182 */ 1183 int i; 1184 1185 if (!bitmap || !bitmap->storage.filemap) 1186 return; 1187 if (bitmap->storage.file) 1188 /* Only one copy, so nothing needed */ 1189 return; 1190 1191 for (i = 0; i < bitmap->storage.file_pages; i++) 1192 set_page_attr(bitmap, i, 1193 BITMAP_PAGE_NEEDWRITE); 1194 bitmap->allclean = 0; 1195} 1196 1197static void bitmap_count_page(struct bitmap_counts *bitmap, 1198 sector_t offset, int inc) 1199{ 1200 sector_t chunk = offset >> bitmap->chunkshift; 1201 unsigned long page = chunk >> PAGE_COUNTER_SHIFT; 1202 bitmap->bp[page].count += inc; 1203 bitmap_checkfree(bitmap, page); 1204} 1205 1206static void bitmap_set_pending(struct bitmap_counts *bitmap, sector_t offset) 1207{ 1208 sector_t chunk = offset >> bitmap->chunkshift; 1209 unsigned long page = chunk >> PAGE_COUNTER_SHIFT; 1210 struct bitmap_page *bp = &bitmap->bp[page]; 1211 1212 if (!bp->pending) 1213 bp->pending = 1; 1214} 1215 1216static bitmap_counter_t *bitmap_get_counter(struct bitmap_counts *bitmap, 1217 sector_t offset, sector_t *blocks, 1218 int create); 1219 1220/* 1221 * bitmap daemon -- periodically wakes up to clean bits and flush pages 1222 * out to disk 1223 */ 1224 1225void bitmap_daemon_work(struct mddev *mddev) 1226{ 1227 struct bitmap *bitmap; 1228 unsigned long j; 1229 unsigned long nextpage; 1230 sector_t blocks; 1231 struct bitmap_counts *counts; 1232 1233 /* Use a mutex to guard daemon_work against 1234 * bitmap_destroy. 1235 */ 1236 mutex_lock(&mddev->bitmap_info.mutex); 1237 bitmap = mddev->bitmap; 1238 if (bitmap == NULL) { 1239 mutex_unlock(&mddev->bitmap_info.mutex); 1240 return; 1241 } 1242 if (time_before(jiffies, bitmap->daemon_lastrun 1243 + mddev->bitmap_info.daemon_sleep)) 1244 goto done; 1245 1246 bitmap->daemon_lastrun = jiffies; 1247 if (bitmap->allclean) { 1248 mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; 1249 goto done; 1250 } 1251 bitmap->allclean = 1; 1252 1253 if (bitmap->mddev->queue) 1254 blk_add_trace_msg(bitmap->mddev->queue, 1255 "md bitmap_daemon_work"); 1256 1257 /* Any file-page which is PENDING now needs to be written. 1258 * So set NEEDWRITE now, then after we make any last-minute changes 1259 * we will write it. 1260 */ 1261 for (j = 0; j < bitmap->storage.file_pages; j++) 1262 if (test_and_clear_page_attr(bitmap, j, 1263 BITMAP_PAGE_PENDING)) 1264 set_page_attr(bitmap, j, 1265 BITMAP_PAGE_NEEDWRITE); 1266 1267 if (bitmap->need_sync && 1268 mddev->bitmap_info.external == 0) { 1269 /* Arrange for superblock update as well as 1270 * other changes */ 1271 bitmap_super_t *sb; 1272 bitmap->need_sync = 0; 1273 if (bitmap->storage.filemap) { 1274 sb = kmap_atomic(bitmap->storage.sb_page); 1275 sb->events_cleared = 1276 cpu_to_le64(bitmap->events_cleared); 1277 kunmap_atomic(sb); 1278 set_page_attr(bitmap, 0, 1279 BITMAP_PAGE_NEEDWRITE); 1280 } 1281 } 1282 /* Now look at the bitmap counters and if any are '2' or '1', 1283 * decrement and handle accordingly. 1284 */ 1285 counts = &bitmap->counts; 1286 spin_lock_irq(&counts->lock); 1287 nextpage = 0; 1288 for (j = 0; j < counts->chunks; j++) { 1289 bitmap_counter_t *bmc; 1290 sector_t block = (sector_t)j << counts->chunkshift; 1291 1292 if (j == nextpage) { 1293 nextpage += PAGE_COUNTER_RATIO; 1294 if (!counts->bp[j >> PAGE_COUNTER_SHIFT].pending) { 1295 j |= PAGE_COUNTER_MASK; 1296 continue; 1297 } 1298 counts->bp[j >> PAGE_COUNTER_SHIFT].pending = 0; 1299 } 1300 bmc = bitmap_get_counter(counts, 1301 block, 1302 &blocks, 0); 1303 1304 if (!bmc) { 1305 j |= PAGE_COUNTER_MASK; 1306 continue; 1307 } 1308 if (*bmc == 1 && !bitmap->need_sync) { 1309 /* We can clear the bit */ 1310 *bmc = 0; 1311 bitmap_count_page(counts, block, -1); 1312 bitmap_file_clear_bit(bitmap, block); 1313 } else if (*bmc && *bmc <= 2) { 1314 *bmc = 1; 1315 bitmap_set_pending(counts, block); 1316 bitmap->allclean = 0; 1317 } 1318 } 1319 spin_unlock_irq(&counts->lock); 1320 1321 bitmap_wait_writes(bitmap); 1322 /* Now start writeout on any page in NEEDWRITE that isn't DIRTY. 1323 * DIRTY pages need to be written by bitmap_unplug so it can wait 1324 * for them. 1325 * If we find any DIRTY page we stop there and let bitmap_unplug 1326 * handle all the rest. This is important in the case where 1327 * the first blocking holds the superblock and it has been updated. 1328 * We mustn't write any other blocks before the superblock. 1329 */ 1330 for (j = 0; 1331 j < bitmap->storage.file_pages 1332 && !test_bit(BITMAP_STALE, &bitmap->flags); 1333 j++) { 1334 if (test_page_attr(bitmap, j, 1335 BITMAP_PAGE_DIRTY)) 1336 /* bitmap_unplug will handle the rest */ 1337 break; 1338 if (test_and_clear_page_attr(bitmap, j, 1339 BITMAP_PAGE_NEEDWRITE)) { 1340 write_page(bitmap, bitmap->storage.filemap[j], 0); 1341 } 1342 } 1343 1344 done: 1345 if (bitmap->allclean == 0) 1346 mddev->thread->timeout = 1347 mddev->bitmap_info.daemon_sleep; 1348 mutex_unlock(&mddev->bitmap_info.mutex); 1349} 1350 1351static bitmap_counter_t *bitmap_get_counter(struct bitmap_counts *bitmap, 1352 sector_t offset, sector_t *blocks, 1353 int create) 1354__releases(bitmap->lock) 1355__acquires(bitmap->lock) 1356{ 1357 /* If 'create', we might release the lock and reclaim it. 1358 * The lock must have been taken with interrupts enabled. 1359 * If !create, we don't release the lock. 1360 */ 1361 sector_t chunk = offset >> bitmap->chunkshift; 1362 unsigned long page = chunk >> PAGE_COUNTER_SHIFT; 1363 unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT; 1364 sector_t csize; 1365 int err; 1366 1367 err = bitmap_checkpage(bitmap, page, create, 0); 1368 1369 if (bitmap->bp[page].hijacked || 1370 bitmap->bp[page].map == NULL) 1371 csize = ((sector_t)1) << (bitmap->chunkshift + 1372 PAGE_COUNTER_SHIFT - 1); 1373 else 1374 csize = ((sector_t)1) << bitmap->chunkshift; 1375 *blocks = csize - (offset & (csize - 1)); 1376 1377 if (err < 0) 1378 return NULL; 1379 1380 /* now locked ... */ 1381 1382 if (bitmap->bp[page].hijacked) { /* hijacked pointer */ 1383 /* should we use the first or second counter field 1384 * of the hijacked pointer? */ 1385 int hi = (pageoff > PAGE_COUNTER_MASK); 1386 return &((bitmap_counter_t *) 1387 &bitmap->bp[page].map)[hi]; 1388 } else /* page is allocated */ 1389 return (bitmap_counter_t *) 1390 &(bitmap->bp[page].map[pageoff]); 1391} 1392 1393int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, int behind) 1394{ 1395 if (!bitmap) 1396 return 0; 1397 1398 if (behind) { 1399 int bw; 1400 atomic_inc(&bitmap->behind_writes); 1401 bw = atomic_read(&bitmap->behind_writes); 1402 if (bw > bitmap->behind_writes_used) 1403 bitmap->behind_writes_used = bw; 1404 1405 pr_debug("inc write-behind count %d/%lu\n", 1406 bw, bitmap->mddev->bitmap_info.max_write_behind); 1407 } 1408 1409 while (sectors) { 1410 sector_t blocks; 1411 bitmap_counter_t *bmc; 1412 1413 spin_lock_irq(&bitmap->counts.lock); 1414 bmc = bitmap_get_counter(&bitmap->counts, offset, &blocks, 1); 1415 if (!bmc) { 1416 spin_unlock_irq(&bitmap->counts.lock); 1417 return 0; 1418 } 1419 1420 if (unlikely(COUNTER(*bmc) == COUNTER_MAX)) { 1421 DEFINE_WAIT(__wait); 1422 /* note that it is safe to do the prepare_to_wait 1423 * after the test as long as we do it before dropping 1424 * the spinlock. 1425 */ 1426 prepare_to_wait(&bitmap->overflow_wait, &__wait, 1427 TASK_UNINTERRUPTIBLE); 1428 spin_unlock_irq(&bitmap->counts.lock); 1429 schedule(); 1430 finish_wait(&bitmap->overflow_wait, &__wait); 1431 continue; 1432 } 1433 1434 switch (*bmc) { 1435 case 0: 1436 bitmap_file_set_bit(bitmap, offset); 1437 bitmap_count_page(&bitmap->counts, offset, 1); 1438 /* fall through */ 1439 case 1: 1440 *bmc = 2; 1441 } 1442 1443 (*bmc)++; 1444 1445 spin_unlock_irq(&bitmap->counts.lock); 1446 1447 offset += blocks; 1448 if (sectors > blocks) 1449 sectors -= blocks; 1450 else 1451 sectors = 0; 1452 } 1453 return 0; 1454} 1455EXPORT_SYMBOL(bitmap_startwrite); 1456 1457void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors, 1458 int success, int behind) 1459{ 1460 if (!bitmap) 1461 return; 1462 if (behind) { 1463 if (atomic_dec_and_test(&bitmap->behind_writes)) 1464 wake_up(&bitmap->behind_wait); 1465 pr_debug("dec write-behind count %d/%lu\n", 1466 atomic_read(&bitmap->behind_writes), 1467 bitmap->mddev->bitmap_info.max_write_behind); 1468 } 1469 1470 while (sectors) { 1471 sector_t blocks; 1472 unsigned long flags; 1473 bitmap_counter_t *bmc; 1474 1475 spin_lock_irqsave(&bitmap->counts.lock, flags); 1476 bmc = bitmap_get_counter(&bitmap->counts, offset, &blocks, 0); 1477 if (!bmc) { 1478 spin_unlock_irqrestore(&bitmap->counts.lock, flags); 1479 return; 1480 } 1481 1482 if (success && !bitmap->mddev->degraded && 1483 bitmap->events_cleared < bitmap->mddev->events) { 1484 bitmap->events_cleared = bitmap->mddev->events; 1485 bitmap->need_sync = 1; 1486 sysfs_notify_dirent_safe(bitmap->sysfs_can_clear); 1487 } 1488 1489 if (!success && !NEEDED(*bmc)) 1490 *bmc |= NEEDED_MASK; 1491 1492 if (COUNTER(*bmc) == COUNTER_MAX) 1493 wake_up(&bitmap->overflow_wait); 1494 1495 (*bmc)--; 1496 if (*bmc <= 2) { 1497 bitmap_set_pending(&bitmap->counts, offset); 1498 bitmap->allclean = 0; 1499 } 1500 spin_unlock_irqrestore(&bitmap->counts.lock, flags); 1501 offset += blocks; 1502 if (sectors > blocks) 1503 sectors -= blocks; 1504 else 1505 sectors = 0; 1506 } 1507} 1508EXPORT_SYMBOL(bitmap_endwrite); 1509 1510static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, 1511 int degraded) 1512{ 1513 bitmap_counter_t *bmc; 1514 int rv; 1515 if (bitmap == NULL) {/* FIXME or bitmap set as 'failed' */ 1516 *blocks = 1024; 1517 return 1; /* always resync if no bitmap */ 1518 } 1519 spin_lock_irq(&bitmap->counts.lock); 1520 bmc = bitmap_get_counter(&bitmap->counts, offset, blocks, 0); 1521 rv = 0; 1522 if (bmc) { 1523 /* locked */ 1524 if (RESYNC(*bmc)) 1525 rv = 1; 1526 else if (NEEDED(*bmc)) { 1527 rv = 1; 1528 if (!degraded) { /* don't set/clear bits if degraded */ 1529 *bmc |= RESYNC_MASK; 1530 *bmc &= ~NEEDED_MASK; 1531 } 1532 } 1533 } 1534 spin_unlock_irq(&bitmap->counts.lock); 1535 return rv; 1536} 1537 1538int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, 1539 int degraded) 1540{ 1541 /* bitmap_start_sync must always report on multiples of whole 1542 * pages, otherwise resync (which is very PAGE_SIZE based) will 1543 * get confused. 1544 * So call __bitmap_start_sync repeatedly (if needed) until 1545 * At least PAGE_SIZE>>9 blocks are covered. 1546 * Return the 'or' of the result. 1547 */ 1548 int rv = 0; 1549 sector_t blocks1; 1550 1551 *blocks = 0; 1552 while (*blocks < (PAGE_SIZE>>9)) { 1553 rv |= __bitmap_start_sync(bitmap, offset, 1554 &blocks1, degraded); 1555 offset += blocks1; 1556 *blocks += blocks1; 1557 } 1558 return rv; 1559} 1560EXPORT_SYMBOL(bitmap_start_sync); 1561 1562void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, int aborted) 1563{ 1564 bitmap_counter_t *bmc; 1565 unsigned long flags; 1566 1567 if (bitmap == NULL) { 1568 *blocks = 1024; 1569 return; 1570 } 1571 spin_lock_irqsave(&bitmap->counts.lock, flags); 1572 bmc = bitmap_get_counter(&bitmap->counts, offset, blocks, 0); 1573 if (bmc == NULL) 1574 goto unlock; 1575 /* locked */ 1576 if (RESYNC(*bmc)) { 1577 *bmc &= ~RESYNC_MASK; 1578 1579 if (!NEEDED(*bmc) && aborted) 1580 *bmc |= NEEDED_MASK; 1581 else { 1582 if (*bmc <= 2) { 1583 bitmap_set_pending(&bitmap->counts, offset); 1584 bitmap->allclean = 0; 1585 } 1586 } 1587 } 1588 unlock: 1589 spin_unlock_irqrestore(&bitmap->counts.lock, flags); 1590} 1591EXPORT_SYMBOL(bitmap_end_sync); 1592 1593void bitmap_close_sync(struct bitmap *bitmap) 1594{ 1595 /* Sync has finished, and any bitmap chunks that weren't synced 1596 * properly have been aborted. It remains to us to clear the 1597 * RESYNC bit wherever it is still on 1598 */ 1599 sector_t sector = 0; 1600 sector_t blocks; 1601 if (!bitmap) 1602 return; 1603 while (sector < bitmap->mddev->resync_max_sectors) { 1604 bitmap_end_sync(bitmap, sector, &blocks, 0); 1605 sector += blocks; 1606 } 1607} 1608EXPORT_SYMBOL(bitmap_close_sync); 1609 1610void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force) 1611{ 1612 sector_t s = 0; 1613 sector_t blocks; 1614 1615 if (!bitmap) 1616 return; 1617 if (sector == 0) { 1618 bitmap->last_end_sync = jiffies; 1619 return; 1620 } 1621 if (!force && time_before(jiffies, (bitmap->last_end_sync 1622 + bitmap->mddev->bitmap_info.daemon_sleep))) 1623 return; 1624 wait_event(bitmap->mddev->recovery_wait, 1625 atomic_read(&bitmap->mddev->recovery_active) == 0); 1626 1627 bitmap->mddev->curr_resync_completed = sector; 1628 set_bit(MD_SB_CHANGE_CLEAN, &bitmap->mddev->sb_flags); 1629 sector &= ~((1ULL << bitmap->counts.chunkshift) - 1); 1630 s = 0; 1631 while (s < sector && s < bitmap->mddev->resync_max_sectors) { 1632 bitmap_end_sync(bitmap, s, &blocks, 0); 1633 s += blocks; 1634 } 1635 bitmap->last_end_sync = jiffies; 1636 sysfs_notify(&bitmap->mddev->kobj, NULL, "sync_completed"); 1637} 1638EXPORT_SYMBOL(bitmap_cond_end_sync); 1639 1640void bitmap_sync_with_cluster(struct mddev *mddev, 1641 sector_t old_lo, sector_t old_hi, 1642 sector_t new_lo, sector_t new_hi) 1643{ 1644 struct bitmap *bitmap = mddev->bitmap; 1645 sector_t sector, blocks = 0; 1646 1647 for (sector = old_lo; sector < new_lo; ) { 1648 bitmap_end_sync(bitmap, sector, &blocks, 0); 1649 sector += blocks; 1650 } 1651 WARN((blocks > new_lo) && old_lo, "alignment is not correct for lo\n"); 1652 1653 for (sector = old_hi; sector < new_hi; ) { 1654 bitmap_start_sync(bitmap, sector, &blocks, 0); 1655 sector += blocks; 1656 } 1657 WARN((blocks > new_hi) && old_hi, "alignment is not correct for hi\n"); 1658} 1659EXPORT_SYMBOL(bitmap_sync_with_cluster); 1660 1661static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed) 1662{ 1663 /* For each chunk covered by any of these sectors, set the 1664 * counter to 2 and possibly set resync_needed. They should all 1665 * be 0 at this point 1666 */ 1667 1668 sector_t secs; 1669 bitmap_counter_t *bmc; 1670 spin_lock_irq(&bitmap->counts.lock); 1671 bmc = bitmap_get_counter(&bitmap->counts, offset, &secs, 1); 1672 if (!bmc) { 1673 spin_unlock_irq(&bitmap->counts.lock); 1674 return; 1675 } 1676 if (!*bmc) { 1677 *bmc = 2; 1678 bitmap_count_page(&bitmap->counts, offset, 1); 1679 bitmap_set_pending(&bitmap->counts, offset); 1680 bitmap->allclean = 0; 1681 } 1682 if (needed) 1683 *bmc |= NEEDED_MASK; 1684 spin_unlock_irq(&bitmap->counts.lock); 1685} 1686 1687/* dirty the memory and file bits for bitmap chunks "s" to "e" */ 1688void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e) 1689{ 1690 unsigned long chunk; 1691 1692 for (chunk = s; chunk <= e; chunk++) { 1693 sector_t sec = (sector_t)chunk << bitmap->counts.chunkshift; 1694 bitmap_set_memory_bits(bitmap, sec, 1); 1695 bitmap_file_set_bit(bitmap, sec); 1696 if (sec < bitmap->mddev->recovery_cp) 1697 /* We are asserting that the array is dirty, 1698 * so move the recovery_cp address back so 1699 * that it is obvious that it is dirty 1700 */ 1701 bitmap->mddev->recovery_cp = sec; 1702 } 1703} 1704 1705/* 1706 * flush out any pending updates 1707 */ 1708void bitmap_flush(struct mddev *mddev) 1709{ 1710 struct bitmap *bitmap = mddev->bitmap; 1711 long sleep; 1712 1713 if (!bitmap) /* there was no bitmap */ 1714 return; 1715 1716 /* run the daemon_work three time to ensure everything is flushed 1717 * that can be 1718 */ 1719 sleep = mddev->bitmap_info.daemon_sleep * 2; 1720 bitmap->daemon_lastrun -= sleep; 1721 bitmap_daemon_work(mddev); 1722 bitmap->daemon_lastrun -= sleep; 1723 bitmap_daemon_work(mddev); 1724 bitmap->daemon_lastrun -= sleep; 1725 bitmap_daemon_work(mddev); 1726 bitmap_update_sb(bitmap); 1727} 1728 1729/* 1730 * free memory that was allocated 1731 */ 1732void bitmap_free(struct bitmap *bitmap) 1733{ 1734 unsigned long k, pages; 1735 struct bitmap_page *bp; 1736 1737 if (!bitmap) /* there was no bitmap */ 1738 return; 1739 1740 if (bitmap->sysfs_can_clear) 1741 sysfs_put(bitmap->sysfs_can_clear); 1742 1743 if (mddev_is_clustered(bitmap->mddev) && bitmap->mddev->cluster_info && 1744 bitmap->cluster_slot == md_cluster_ops->slot_number(bitmap->mddev)) 1745 md_cluster_stop(bitmap->mddev); 1746 1747 /* Shouldn't be needed - but just in case.... */ 1748 wait_event(bitmap->write_wait, 1749 atomic_read(&bitmap->pending_writes) == 0); 1750 1751 /* release the bitmap file */ 1752 bitmap_file_unmap(&bitmap->storage); 1753 1754 bp = bitmap->counts.bp; 1755 pages = bitmap->counts.pages; 1756 1757 /* free all allocated memory */ 1758 1759 if (bp) /* deallocate the page memory */ 1760 for (k = 0; k < pages; k++) 1761 if (bp[k].map && !bp[k].hijacked) 1762 kfree(bp[k].map); 1763 kfree(bp); 1764 kfree(bitmap); 1765} 1766EXPORT_SYMBOL(bitmap_free); 1767 1768void bitmap_wait_behind_writes(struct mddev *mddev) 1769{ 1770 struct bitmap *bitmap = mddev->bitmap; 1771 1772 /* wait for behind writes to complete */ 1773 if (bitmap && atomic_read(&bitmap->behind_writes) > 0) { 1774 pr_debug("md:%s: behind writes in progress - waiting to stop.\n", 1775 mdname(mddev)); 1776 /* need to kick something here to make sure I/O goes? */ 1777 wait_event(bitmap->behind_wait, 1778 atomic_read(&bitmap->behind_writes) == 0); 1779 } 1780} 1781 1782void bitmap_destroy(struct mddev *mddev) 1783{ 1784 struct bitmap *bitmap = mddev->bitmap; 1785 1786 if (!bitmap) /* there was no bitmap */ 1787 return; 1788 1789 bitmap_wait_behind_writes(mddev); 1790 1791 mutex_lock(&mddev->bitmap_info.mutex); 1792 spin_lock(&mddev->lock); 1793 mddev->bitmap = NULL; /* disconnect from the md device */ 1794 spin_unlock(&mddev->lock); 1795 mutex_unlock(&mddev->bitmap_info.mutex); 1796 if (mddev->thread) 1797 mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; 1798 1799 bitmap_free(bitmap); 1800} 1801 1802/* 1803 * initialize the bitmap structure 1804 * if this returns an error, bitmap_destroy must be called to do clean up 1805 * once mddev->bitmap is set 1806 */ 1807struct bitmap *bitmap_create(struct mddev *mddev, int slot) 1808{ 1809 struct bitmap *bitmap; 1810 sector_t blocks = mddev->resync_max_sectors; 1811 struct file *file = mddev->bitmap_info.file; 1812 int err; 1813 struct kernfs_node *bm = NULL; 1814 1815 BUILD_BUG_ON(sizeof(bitmap_super_t) != 256); 1816 1817 BUG_ON(file && mddev->bitmap_info.offset); 1818 1819 bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL); 1820 if (!bitmap) 1821 return ERR_PTR(-ENOMEM); 1822 1823 spin_lock_init(&bitmap->counts.lock); 1824 atomic_set(&bitmap->pending_writes, 0); 1825 init_waitqueue_head(&bitmap->write_wait); 1826 init_waitqueue_head(&bitmap->overflow_wait); 1827 init_waitqueue_head(&bitmap->behind_wait); 1828 1829 bitmap->mddev = mddev; 1830 bitmap->cluster_slot = slot; 1831 1832 if (mddev->kobj.sd) 1833 bm = sysfs_get_dirent(mddev->kobj.sd, "bitmap"); 1834 if (bm) { 1835 bitmap->sysfs_can_clear = sysfs_get_dirent(bm, "can_clear"); 1836 sysfs_put(bm); 1837 } else 1838 bitmap->sysfs_can_clear = NULL; 1839 1840 bitmap->storage.file = file; 1841 if (file) { 1842 get_file(file); 1843 /* As future accesses to this file will use bmap, 1844 * and bypass the page cache, we must sync the file 1845 * first. 1846 */ 1847 vfs_fsync(file, 1); 1848 } 1849 /* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */ 1850 if (!mddev->bitmap_info.external) { 1851 /* 1852 * If 'MD_ARRAY_FIRST_USE' is set, then device-mapper is 1853 * instructing us to create a new on-disk bitmap instance. 1854 */ 1855 if (test_and_clear_bit(MD_ARRAY_FIRST_USE, &mddev->flags)) 1856 err = bitmap_new_disk_sb(bitmap); 1857 else 1858 err = bitmap_read_sb(bitmap); 1859 } else { 1860 err = 0; 1861 if (mddev->bitmap_info.chunksize == 0 || 1862 mddev->bitmap_info.daemon_sleep == 0) 1863 /* chunksize and time_base need to be 1864 * set first. */ 1865 err = -EINVAL; 1866 } 1867 if (err) 1868 goto error; 1869 1870 bitmap->daemon_lastrun = jiffies; 1871 err = bitmap_resize(bitmap, blocks, mddev->bitmap_info.chunksize, 1); 1872 if (err) 1873 goto error; 1874 1875 pr_debug("created bitmap (%lu pages) for device %s\n", 1876 bitmap->counts.pages, bmname(bitmap)); 1877 1878 err = test_bit(BITMAP_WRITE_ERROR, &bitmap->flags) ? -EIO : 0; 1879 if (err) 1880 goto error; 1881 1882 return bitmap; 1883 error: 1884 bitmap_free(bitmap); 1885 return ERR_PTR(err); 1886} 1887 1888int bitmap_load(struct mddev *mddev) 1889{ 1890 int err = 0; 1891 sector_t start = 0; 1892 sector_t sector = 0; 1893 struct bitmap *bitmap = mddev->bitmap; 1894 1895 if (!bitmap) 1896 goto out; 1897 1898 if (mddev_is_clustered(mddev)) 1899 md_cluster_ops->load_bitmaps(mddev, mddev->bitmap_info.nodes); 1900 1901 /* Clear out old bitmap info first: Either there is none, or we 1902 * are resuming after someone else has possibly changed things, 1903 * so we should forget old cached info. 1904 * All chunks should be clean, but some might need_sync. 1905 */ 1906 while (sector < mddev->resync_max_sectors) { 1907 sector_t blocks; 1908 bitmap_start_sync(bitmap, sector, &blocks, 0); 1909 sector += blocks; 1910 } 1911 bitmap_close_sync(bitmap); 1912 1913 if (mddev->degraded == 0 1914 || bitmap->events_cleared == mddev->events) 1915 /* no need to keep dirty bits to optimise a 1916 * re-add of a missing device */ 1917 start = mddev->recovery_cp; 1918 1919 mutex_lock(&mddev->bitmap_info.mutex); 1920 err = bitmap_init_from_disk(bitmap, start); 1921 mutex_unlock(&mddev->bitmap_info.mutex); 1922 1923 if (err) 1924 goto out; 1925 clear_bit(BITMAP_STALE, &bitmap->flags); 1926 1927 /* Kick recovery in case any bits were set */ 1928 set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery); 1929 1930 mddev->thread->timeout = mddev->bitmap_info.daemon_sleep; 1931 md_wakeup_thread(mddev->thread); 1932 1933 bitmap_update_sb(bitmap); 1934 1935 if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) 1936 err = -EIO; 1937out: 1938 return err; 1939} 1940EXPORT_SYMBOL_GPL(bitmap_load); 1941 1942struct bitmap *get_bitmap_from_slot(struct mddev *mddev, int slot) 1943{ 1944 int rv = 0; 1945 struct bitmap *bitmap; 1946 1947 bitmap = bitmap_create(mddev, slot); 1948 if (IS_ERR(bitmap)) { 1949 rv = PTR_ERR(bitmap); 1950 return ERR_PTR(rv); 1951 } 1952 1953 rv = bitmap_init_from_disk(bitmap, 0); 1954 if (rv) { 1955 bitmap_free(bitmap); 1956 return ERR_PTR(rv); 1957 } 1958 1959 return bitmap; 1960} 1961EXPORT_SYMBOL(get_bitmap_from_slot); 1962 1963/* Loads the bitmap associated with slot and copies the resync information 1964 * to our bitmap 1965 */ 1966int bitmap_copy_from_slot(struct mddev *mddev, int slot, 1967 sector_t *low, sector_t *high, bool clear_bits) 1968{ 1969 int rv = 0, i, j; 1970 sector_t block, lo = 0, hi = 0; 1971 struct bitmap_counts *counts; 1972 struct bitmap *bitmap; 1973 1974 bitmap = get_bitmap_from_slot(mddev, slot); 1975 if (IS_ERR(bitmap)) { 1976 pr_err("%s can't get bitmap from slot %d\n", __func__, slot); 1977 return -1; 1978 } 1979 1980 counts = &bitmap->counts; 1981 for (j = 0; j < counts->chunks; j++) { 1982 block = (sector_t)j << counts->chunkshift; 1983 if (bitmap_file_test_bit(bitmap, block)) { 1984 if (!lo) 1985 lo = block; 1986 hi = block; 1987 bitmap_file_clear_bit(bitmap, block); 1988 bitmap_set_memory_bits(mddev->bitmap, block, 1); 1989 bitmap_file_set_bit(mddev->bitmap, block); 1990 } 1991 } 1992 1993 if (clear_bits) { 1994 bitmap_update_sb(bitmap); 1995 /* BITMAP_PAGE_PENDING is set, but bitmap_unplug needs 1996 * BITMAP_PAGE_DIRTY or _NEEDWRITE to write ... */ 1997 for (i = 0; i < bitmap->storage.file_pages; i++) 1998 if (test_page_attr(bitmap, i, BITMAP_PAGE_PENDING)) 1999 set_page_attr(bitmap, i, BITMAP_PAGE_NEEDWRITE); 2000 bitmap_unplug(bitmap); 2001 } 2002 bitmap_unplug(mddev->bitmap); 2003 *low = lo; 2004 *high = hi; 2005 2006 return rv; 2007} 2008EXPORT_SYMBOL_GPL(bitmap_copy_from_slot); 2009 2010 2011void bitmap_status(struct seq_file *seq, struct bitmap *bitmap) 2012{ 2013 unsigned long chunk_kb; 2014 struct bitmap_counts *counts; 2015 2016 if (!bitmap) 2017 return; 2018 2019 counts = &bitmap->counts; 2020 2021 chunk_kb = bitmap->mddev->bitmap_info.chunksize >> 10; 2022 seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], " 2023 "%lu%s chunk", 2024 counts->pages - counts->missing_pages, 2025 counts->pages, 2026 (counts->pages - counts->missing_pages) 2027 << (PAGE_SHIFT - 10), 2028 chunk_kb ? chunk_kb : bitmap->mddev->bitmap_info.chunksize, 2029 chunk_kb ? "KB" : "B"); 2030 if (bitmap->storage.file) { 2031 seq_printf(seq, ", file: "); 2032 seq_file_path(seq, bitmap->storage.file, " \t\n"); 2033 } 2034 2035 seq_printf(seq, "\n"); 2036} 2037 2038int bitmap_resize(struct bitmap *bitmap, sector_t blocks, 2039 int chunksize, int init) 2040{ 2041 /* If chunk_size is 0, choose an appropriate chunk size. 2042 * Then possibly allocate new storage space. 2043 * Then quiesce, copy bits, replace bitmap, and re-start 2044 * 2045 * This function is called both to set up the initial bitmap 2046 * and to resize the bitmap while the array is active. 2047 * If this happens as a result of the array being resized, 2048 * chunksize will be zero, and we need to choose a suitable 2049 * chunksize, otherwise we use what we are given. 2050 */ 2051 struct bitmap_storage store; 2052 struct bitmap_counts old_counts; 2053 unsigned long chunks; 2054 sector_t block; 2055 sector_t old_blocks, new_blocks; 2056 int chunkshift; 2057 int ret = 0; 2058 long pages; 2059 struct bitmap_page *new_bp; 2060 2061 if (chunksize == 0) { 2062 /* If there is enough space, leave the chunk size unchanged, 2063 * else increase by factor of two until there is enough space. 2064 */ 2065 long bytes; 2066 long space = bitmap->mddev->bitmap_info.space; 2067 2068 if (space == 0) { 2069 /* We don't know how much space there is, so limit 2070 * to current size - in sectors. 2071 */ 2072 bytes = DIV_ROUND_UP(bitmap->counts.chunks, 8); 2073 if (!bitmap->mddev->bitmap_info.external) 2074 bytes += sizeof(bitmap_super_t); 2075 space = DIV_ROUND_UP(bytes, 512); 2076 bitmap->mddev->bitmap_info.space = space; 2077 } 2078 chunkshift = bitmap->counts.chunkshift; 2079 chunkshift--; 2080 do { 2081 /* 'chunkshift' is shift from block size to chunk size */ 2082 chunkshift++; 2083 chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift); 2084 bytes = DIV_ROUND_UP(chunks, 8); 2085 if (!bitmap->mddev->bitmap_info.external) 2086 bytes += sizeof(bitmap_super_t); 2087 } while (bytes > (space << 9)); 2088 } else 2089 chunkshift = ffz(~chunksize) - BITMAP_BLOCK_SHIFT; 2090 2091 chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift); 2092 memset(&store, 0, sizeof(store)); 2093 if (bitmap->mddev->bitmap_info.offset || bitmap->mddev->bitmap_info.file) 2094 ret = bitmap_storage_alloc(&store, chunks, 2095 !bitmap->mddev->bitmap_info.external, 2096 mddev_is_clustered(bitmap->mddev) 2097 ? bitmap->cluster_slot : 0); 2098 if (ret) { 2099 bitmap_file_unmap(&store); 2100 goto err; 2101 } 2102 2103 pages = DIV_ROUND_UP(chunks, PAGE_COUNTER_RATIO); 2104 2105 new_bp = kzalloc(pages * sizeof(*new_bp), GFP_KERNEL); 2106 ret = -ENOMEM; 2107 if (!new_bp) { 2108 bitmap_file_unmap(&store); 2109 goto err; 2110 } 2111 2112 if (!init) 2113 bitmap->mddev->pers->quiesce(bitmap->mddev, 1); 2114 2115 store.file = bitmap->storage.file; 2116 bitmap->storage.file = NULL; 2117 2118 if (store.sb_page && bitmap->storage.sb_page) 2119 memcpy(page_address(store.sb_page), 2120 page_address(bitmap->storage.sb_page), 2121 sizeof(bitmap_super_t)); 2122 bitmap_file_unmap(&bitmap->storage); 2123 bitmap->storage = store; 2124 2125 old_counts = bitmap->counts; 2126 bitmap->counts.bp = new_bp; 2127 bitmap->counts.pages = pages; 2128 bitmap->counts.missing_pages = pages; 2129 bitmap->counts.chunkshift = chunkshift; 2130 bitmap->counts.chunks = chunks; 2131 bitmap->mddev->bitmap_info.chunksize = 1 << (chunkshift + 2132 BITMAP_BLOCK_SHIFT); 2133 2134 blocks = min(old_counts.chunks << old_counts.chunkshift, 2135 chunks << chunkshift); 2136 2137 spin_lock_irq(&bitmap->counts.lock); 2138 /* For cluster raid, need to pre-allocate bitmap */ 2139 if (mddev_is_clustered(bitmap->mddev)) { 2140 unsigned long page; 2141 for (page = 0; page < pages; page++) { 2142 ret = bitmap_checkpage(&bitmap->counts, page, 1, 1); 2143 if (ret) { 2144 unsigned long k; 2145 2146 /* deallocate the page memory */ 2147 for (k = 0; k < page; k++) { 2148 kfree(new_bp[k].map); 2149 } 2150 2151 /* restore some fields from old_counts */ 2152 bitmap->counts.bp = old_counts.bp; 2153 bitmap->counts.pages = old_counts.pages; 2154 bitmap->counts.missing_pages = old_counts.pages; 2155 bitmap->counts.chunkshift = old_counts.chunkshift; 2156 bitmap->counts.chunks = old_counts.chunks; 2157 bitmap->mddev->bitmap_info.chunksize = 1 << (old_counts.chunkshift + 2158 BITMAP_BLOCK_SHIFT); 2159 blocks = old_counts.chunks << old_counts.chunkshift; 2160 pr_warn("Could not pre-allocate in-memory bitmap for cluster raid\n"); 2161 break; 2162 } else 2163 bitmap->counts.bp[page].count += 1; 2164 } 2165 } 2166 2167 for (block = 0; block < blocks; ) { 2168 bitmap_counter_t *bmc_old, *bmc_new; 2169 int set; 2170 2171 bmc_old = bitmap_get_counter(&old_counts, block, 2172 &old_blocks, 0); 2173 set = bmc_old && NEEDED(*bmc_old); 2174 2175 if (set) { 2176 bmc_new = bitmap_get_counter(&bitmap->counts, block, 2177 &new_blocks, 1); 2178 if (*bmc_new == 0) { 2179 /* need to set on-disk bits too. */ 2180 sector_t end = block + new_blocks; 2181 sector_t start = block >> chunkshift; 2182 start <<= chunkshift; 2183 while (start < end) { 2184 bitmap_file_set_bit(bitmap, block); 2185 start += 1 << chunkshift; 2186 } 2187 *bmc_new = 2; 2188 bitmap_count_page(&bitmap->counts, 2189 block, 1); 2190 bitmap_set_pending(&bitmap->counts, 2191 block); 2192 } 2193 *bmc_new |= NEEDED_MASK; 2194 if (new_blocks < old_blocks) 2195 old_blocks = new_blocks; 2196 } 2197 block += old_blocks; 2198 } 2199 2200 if (!init) { 2201 int i; 2202 while (block < (chunks << chunkshift)) { 2203 bitmap_counter_t *bmc; 2204 bmc = bitmap_get_counter(&bitmap->counts, block, 2205 &new_blocks, 1); 2206 if (bmc) { 2207 /* new space. It needs to be resynced, so 2208 * we set NEEDED_MASK. 2209 */ 2210 if (*bmc == 0) { 2211 *bmc = NEEDED_MASK | 2; 2212 bitmap_count_page(&bitmap->counts, 2213 block, 1); 2214 bitmap_set_pending(&bitmap->counts, 2215 block); 2216 } 2217 } 2218 block += new_blocks; 2219 } 2220 for (i = 0; i < bitmap->storage.file_pages; i++) 2221 set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY); 2222 } 2223 spin_unlock_irq(&bitmap->counts.lock); 2224 2225 if (!init) { 2226 bitmap_unplug(bitmap); 2227 bitmap->mddev->pers->quiesce(bitmap->mddev, 0); 2228 } 2229 ret = 0; 2230err: 2231 return ret; 2232} 2233EXPORT_SYMBOL_GPL(bitmap_resize); 2234 2235static ssize_t 2236location_show(struct mddev *mddev, char *page) 2237{ 2238 ssize_t len; 2239 if (mddev->bitmap_info.file) 2240 len = sprintf(page, "file"); 2241 else if (mddev->bitmap_info.offset) 2242 len = sprintf(page, "%+lld", (long long)mddev->bitmap_info.offset); 2243 else 2244 len = sprintf(page, "none"); 2245 len += sprintf(page+len, "\n"); 2246 return len; 2247} 2248 2249static ssize_t 2250location_store(struct mddev *mddev, const char *buf, size_t len) 2251{ 2252 int rv; 2253 2254 rv = mddev_lock(mddev); 2255 if (rv) 2256 return rv; 2257 if (mddev->pers) { 2258 if (!mddev->pers->quiesce) { 2259 rv = -EBUSY; 2260 goto out; 2261 } 2262 if (mddev->recovery || mddev->sync_thread) { 2263 rv = -EBUSY; 2264 goto out; 2265 } 2266 } 2267 2268 if (mddev->bitmap || mddev->bitmap_info.file || 2269 mddev->bitmap_info.offset) { 2270 /* bitmap already configured. Only option is to clear it */ 2271 if (strncmp(buf, "none", 4) != 0) { 2272 rv = -EBUSY; 2273 goto out; 2274 } 2275 if (mddev->pers) { 2276 mddev->pers->quiesce(mddev, 1); 2277 bitmap_destroy(mddev); 2278 mddev->pers->quiesce(mddev, 0); 2279 } 2280 mddev->bitmap_info.offset = 0; 2281 if (mddev->bitmap_info.file) { 2282 struct file *f = mddev->bitmap_info.file; 2283 mddev->bitmap_info.file = NULL; 2284 fput(f); 2285 } 2286 } else { 2287 /* No bitmap, OK to set a location */ 2288 long long offset; 2289 if (strncmp(buf, "none", 4) == 0) 2290 /* nothing to be done */; 2291 else if (strncmp(buf, "file:", 5) == 0) { 2292 /* Not supported yet */ 2293 rv = -EINVAL; 2294 goto out; 2295 } else { 2296 if (buf[0] == '+') 2297 rv = kstrtoll(buf+1, 10, &offset); 2298 else 2299 rv = kstrtoll(buf, 10, &offset); 2300 if (rv) 2301 goto out; 2302 if (offset == 0) { 2303 rv = -EINVAL; 2304 goto out; 2305 } 2306 if (mddev->bitmap_info.external == 0 && 2307 mddev->major_version == 0 && 2308 offset != mddev->bitmap_info.default_offset) { 2309 rv = -EINVAL; 2310 goto out; 2311 } 2312 mddev->bitmap_info.offset = offset; 2313 if (mddev->pers) { 2314 struct bitmap *bitmap; 2315 mddev->pers->quiesce(mddev, 1); 2316 bitmap = bitmap_create(mddev, -1); 2317 if (IS_ERR(bitmap)) 2318 rv = PTR_ERR(bitmap); 2319 else { 2320 mddev->bitmap = bitmap; 2321 rv = bitmap_load(mddev); 2322 if (rv) 2323 mddev->bitmap_info.offset = 0; 2324 } 2325 mddev->pers->quiesce(mddev, 0); 2326 if (rv) { 2327 bitmap_destroy(mddev); 2328 goto out; 2329 } 2330 } 2331 } 2332 } 2333 if (!mddev->external) { 2334 /* Ensure new bitmap info is stored in 2335 * metadata promptly. 2336 */ 2337 set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); 2338 md_wakeup_thread(mddev->thread); 2339 } 2340 rv = 0; 2341out: 2342 mddev_unlock(mddev); 2343 if (rv) 2344 return rv; 2345 return len; 2346} 2347 2348static struct md_sysfs_entry bitmap_location = 2349__ATTR(location, S_IRUGO|S_IWUSR, location_show, location_store); 2350 2351/* 'bitmap/space' is the space available at 'location' for the 2352 * bitmap. This allows the kernel to know when it is safe to 2353 * resize the bitmap to match a resized array. 2354 */ 2355static ssize_t 2356space_show(struct mddev *mddev, char *page) 2357{ 2358 return sprintf(page, "%lu\n", mddev->bitmap_info.space); 2359} 2360 2361static ssize_t 2362space_store(struct mddev *mddev, const char *buf, size_t len) 2363{ 2364 unsigned long sectors; 2365 int rv; 2366 2367 rv = kstrtoul(buf, 10, &sectors); 2368 if (rv) 2369 return rv; 2370 2371 if (sectors == 0) 2372 return -EINVAL; 2373 2374 if (mddev->bitmap && 2375 sectors < (mddev->bitmap->storage.bytes + 511) >> 9) 2376 return -EFBIG; /* Bitmap is too big for this small space */ 2377 2378 /* could make sure it isn't too big, but that isn't really 2379 * needed - user-space should be careful. 2380 */ 2381 mddev->bitmap_info.space = sectors; 2382 return len; 2383} 2384 2385static struct md_sysfs_entry bitmap_space = 2386__ATTR(space, S_IRUGO|S_IWUSR, space_show, space_store); 2387 2388static ssize_t 2389timeout_show(struct mddev *mddev, char *page) 2390{ 2391 ssize_t len; 2392 unsigned long secs = mddev->bitmap_info.daemon_sleep / HZ; 2393 unsigned long jifs = mddev->bitmap_info.daemon_sleep % HZ; 2394 2395 len = sprintf(page, "%lu", secs); 2396 if (jifs) 2397 len += sprintf(page+len, ".%03u", jiffies_to_msecs(jifs)); 2398 len += sprintf(page+len, "\n"); 2399 return len; 2400} 2401 2402static ssize_t 2403timeout_store(struct mddev *mddev, const char *buf, size_t len) 2404{ 2405 /* timeout can be set at any time */ 2406 unsigned long timeout; 2407 int rv = strict_strtoul_scaled(buf, &timeout, 4); 2408 if (rv) 2409 return rv; 2410 2411 /* just to make sure we don't overflow... */ 2412 if (timeout >= LONG_MAX / HZ) 2413 return -EINVAL; 2414 2415 timeout = timeout * HZ / 10000; 2416 2417 if (timeout >= MAX_SCHEDULE_TIMEOUT) 2418 timeout = MAX_SCHEDULE_TIMEOUT-1; 2419 if (timeout < 1) 2420 timeout = 1; 2421 mddev->bitmap_info.daemon_sleep = timeout; 2422 if (mddev->thread) { 2423 /* if thread->timeout is MAX_SCHEDULE_TIMEOUT, then 2424 * the bitmap is all clean and we don't need to 2425 * adjust the timeout right now 2426 */ 2427 if (mddev->thread->timeout < MAX_SCHEDULE_TIMEOUT) { 2428 mddev->thread->timeout = timeout; 2429 md_wakeup_thread(mddev->thread); 2430 } 2431 } 2432 return len; 2433} 2434 2435static struct md_sysfs_entry bitmap_timeout = 2436__ATTR(time_base, S_IRUGO|S_IWUSR, timeout_show, timeout_store); 2437 2438static ssize_t 2439backlog_show(struct mddev *mddev, char *page) 2440{ 2441 return sprintf(page, "%lu\n", mddev->bitmap_info.max_write_behind); 2442} 2443 2444static ssize_t 2445backlog_store(struct mddev *mddev, const char *buf, size_t len) 2446{ 2447 unsigned long backlog; 2448 int rv = kstrtoul(buf, 10, &backlog); 2449 if (rv) 2450 return rv; 2451 if (backlog > COUNTER_MAX) 2452 return -EINVAL; 2453 mddev->bitmap_info.max_write_behind = backlog; 2454 return len; 2455} 2456 2457static struct md_sysfs_entry bitmap_backlog = 2458__ATTR(backlog, S_IRUGO|S_IWUSR, backlog_show, backlog_store); 2459 2460static ssize_t 2461chunksize_show(struct mddev *mddev, char *page) 2462{ 2463 return sprintf(page, "%lu\n", mddev->bitmap_info.chunksize); 2464} 2465 2466static ssize_t 2467chunksize_store(struct mddev *mddev, const char *buf, size_t len) 2468{ 2469 /* Can only be changed when no bitmap is active */ 2470 int rv; 2471 unsigned long csize; 2472 if (mddev->bitmap) 2473 return -EBUSY; 2474 rv = kstrtoul(buf, 10, &csize); 2475 if (rv) 2476 return rv; 2477 if (csize < 512 || 2478 !is_power_of_2(csize)) 2479 return -EINVAL; 2480 mddev->bitmap_info.chunksize = csize; 2481 return len; 2482} 2483 2484static struct md_sysfs_entry bitmap_chunksize = 2485__ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store); 2486 2487static ssize_t metadata_show(struct mddev *mddev, char *page) 2488{ 2489 if (mddev_is_clustered(mddev)) 2490 return sprintf(page, "clustered\n"); 2491 return sprintf(page, "%s\n", (mddev->bitmap_info.external 2492 ? "external" : "internal")); 2493} 2494 2495static ssize_t metadata_store(struct mddev *mddev, const char *buf, size_t len) 2496{ 2497 if (mddev->bitmap || 2498 mddev->bitmap_info.file || 2499 mddev->bitmap_info.offset) 2500 return -EBUSY; 2501 if (strncmp(buf, "external", 8) == 0) 2502 mddev->bitmap_info.external = 1; 2503 else if ((strncmp(buf, "internal", 8) == 0) || 2504 (strncmp(buf, "clustered", 9) == 0)) 2505 mddev->bitmap_info.external = 0; 2506 else 2507 return -EINVAL; 2508 return len; 2509} 2510 2511static struct md_sysfs_entry bitmap_metadata = 2512__ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store); 2513 2514static ssize_t can_clear_show(struct mddev *mddev, char *page) 2515{ 2516 int len; 2517 spin_lock(&mddev->lock); 2518 if (mddev->bitmap) 2519 len = sprintf(page, "%s\n", (mddev->bitmap->need_sync ? 2520 "false" : "true")); 2521 else 2522 len = sprintf(page, "\n"); 2523 spin_unlock(&mddev->lock); 2524 return len; 2525} 2526 2527static ssize_t can_clear_store(struct mddev *mddev, const char *buf, size_t len) 2528{ 2529 if (mddev->bitmap == NULL) 2530 return -ENOENT; 2531 if (strncmp(buf, "false", 5) == 0) 2532 mddev->bitmap->need_sync = 1; 2533 else if (strncmp(buf, "true", 4) == 0) { 2534 if (mddev->degraded) 2535 return -EBUSY; 2536 mddev->bitmap->need_sync = 0; 2537 } else 2538 return -EINVAL; 2539 return len; 2540} 2541 2542static struct md_sysfs_entry bitmap_can_clear = 2543__ATTR(can_clear, S_IRUGO|S_IWUSR, can_clear_show, can_clear_store); 2544 2545static ssize_t 2546behind_writes_used_show(struct mddev *mddev, char *page) 2547{ 2548 ssize_t ret; 2549 spin_lock(&mddev->lock); 2550 if (mddev->bitmap == NULL) 2551 ret = sprintf(page, "0\n"); 2552 else 2553 ret = sprintf(page, "%lu\n", 2554 mddev->bitmap->behind_writes_used); 2555 spin_unlock(&mddev->lock); 2556 return ret; 2557} 2558 2559static ssize_t 2560behind_writes_used_reset(struct mddev *mddev, const char *buf, size_t len) 2561{ 2562 if (mddev->bitmap) 2563 mddev->bitmap->behind_writes_used = 0; 2564 return len; 2565} 2566 2567static struct md_sysfs_entry max_backlog_used = 2568__ATTR(max_backlog_used, S_IRUGO | S_IWUSR, 2569 behind_writes_used_show, behind_writes_used_reset); 2570 2571static struct attribute *md_bitmap_attrs[] = { 2572 &bitmap_location.attr, 2573 &bitmap_space.attr, 2574 &bitmap_timeout.attr, 2575 &bitmap_backlog.attr, 2576 &bitmap_chunksize.attr, 2577 &bitmap_metadata.attr, 2578 &bitmap_can_clear.attr, 2579 &max_backlog_used.attr, 2580 NULL 2581}; 2582struct attribute_group md_bitmap_group = { 2583 .name = "bitmap", 2584 .attrs = md_bitmap_attrs, 2585}; 2586