Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'md/4.12-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md

Pull MD fixes from Shaohua Li:

- Several bug fixes for raid5-cache from Song Liu, mainly handle
journal disk error

- Fix bad block handling in choosing raid1 disk from Tomasz Majchrzak

- Simplify external metadata array sysfs handling from Artur
Paszkiewicz

- Optimize raid0 discard handling from me, now raid0 will dispatch
large discard IO directly to underlayer disks.

* tag 'md/4.12-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md:
raid1: prefer disk without bad blocks
md/r5cache: handle sync with data in write back cache
md/r5cache: gracefully handle journal device errors for writeback mode
md/raid1/10: avoid unnecessary locking
md/raid5-cache: in r5l_do_submit_io(), submit io->split_bio first
md/md0: optimize raid0 discard handling
md: don't return -EAGAIN in md_allow_write for external metadata arrays
md/raid5: make use of spin_lock_irq over local_irq_disable + spin_lock

+209 -86
+8 -12
drivers/md/md.c
··· 8022 8022 * may proceed without blocking. It is important to call this before 8023 8023 * attempting a GFP_KERNEL allocation while holding the mddev lock. 8024 8024 * Must be called with mddev_lock held. 8025 - * 8026 - * In the ->external case MD_SB_CHANGE_PENDING can not be cleared until mddev->lock 8027 - * is dropped, so return -EAGAIN after notifying userspace. 8028 8025 */ 8029 - int md_allow_write(struct mddev *mddev) 8026 + void md_allow_write(struct mddev *mddev) 8030 8027 { 8031 8028 if (!mddev->pers) 8032 - return 0; 8029 + return; 8033 8030 if (mddev->ro) 8034 - return 0; 8031 + return; 8035 8032 if (!mddev->pers->sync_request) 8036 - return 0; 8033 + return; 8037 8034 8038 8035 spin_lock(&mddev->lock); 8039 8036 if (mddev->in_sync) { ··· 8043 8046 spin_unlock(&mddev->lock); 8044 8047 md_update_sb(mddev, 0); 8045 8048 sysfs_notify_dirent_safe(mddev->sysfs_state); 8049 + /* wait for the dirty state to be recorded in the metadata */ 8050 + wait_event(mddev->sb_wait, 8051 + !test_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags) && 8052 + !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)); 8046 8053 } else 8047 8054 spin_unlock(&mddev->lock); 8048 - 8049 - if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) 8050 - return -EAGAIN; 8051 - else 8052 - return 0; 8053 8055 } 8054 8056 EXPORT_SYMBOL_GPL(md_allow_write); 8055 8057
+1 -1
drivers/md/md.h
··· 665 665 bool metadata_op); 666 666 extern void md_do_sync(struct md_thread *thread); 667 667 extern void md_new_event(struct mddev *mddev); 668 - extern int md_allow_write(struct mddev *mddev); 668 + extern void md_allow_write(struct mddev *mddev); 669 669 extern void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev); 670 670 extern void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors); 671 671 extern int md_check_no_bitmap(struct mddev *mddev);
+102 -14
drivers/md/raid0.c
··· 385 385 blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors); 386 386 blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors); 387 387 blk_queue_max_write_zeroes_sectors(mddev->queue, mddev->chunk_sectors); 388 - blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors); 388 + blk_queue_max_discard_sectors(mddev->queue, UINT_MAX); 389 389 390 390 blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9); 391 391 blk_queue_io_opt(mddev->queue, ··· 459 459 } 460 460 } 461 461 462 + static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) 463 + { 464 + struct r0conf *conf = mddev->private; 465 + struct strip_zone *zone; 466 + sector_t start = bio->bi_iter.bi_sector; 467 + sector_t end; 468 + unsigned int stripe_size; 469 + sector_t first_stripe_index, last_stripe_index; 470 + sector_t start_disk_offset; 471 + unsigned int start_disk_index; 472 + sector_t end_disk_offset; 473 + unsigned int end_disk_index; 474 + unsigned int disk; 475 + 476 + zone = find_zone(conf, &start); 477 + 478 + if (bio_end_sector(bio) > zone->zone_end) { 479 + struct bio *split = bio_split(bio, 480 + zone->zone_end - bio->bi_iter.bi_sector, GFP_NOIO, 481 + mddev->bio_set); 482 + bio_chain(split, bio); 483 + generic_make_request(bio); 484 + bio = split; 485 + end = zone->zone_end; 486 + } else 487 + end = bio_end_sector(bio); 488 + 489 + if (zone != conf->strip_zone) 490 + end = end - zone[-1].zone_end; 491 + 492 + /* Now start and end is the offset in zone */ 493 + stripe_size = zone->nb_dev * mddev->chunk_sectors; 494 + 495 + first_stripe_index = start; 496 + sector_div(first_stripe_index, stripe_size); 497 + last_stripe_index = end; 498 + sector_div(last_stripe_index, stripe_size); 499 + 500 + start_disk_index = (int)(start - first_stripe_index * stripe_size) / 501 + mddev->chunk_sectors; 502 + start_disk_offset = ((int)(start - first_stripe_index * stripe_size) % 503 + mddev->chunk_sectors) + 504 + first_stripe_index * mddev->chunk_sectors; 505 + end_disk_index = (int)(end - last_stripe_index * stripe_size) / 506 + mddev->chunk_sectors; 507 + end_disk_offset = ((int)(end - last_stripe_index * stripe_size) % 508 + mddev->chunk_sectors) + 509 + last_stripe_index * mddev->chunk_sectors; 510 + 511 + for (disk = 0; disk < zone->nb_dev; disk++) { 512 + sector_t dev_start, dev_end; 513 + struct bio *discard_bio = NULL; 514 + struct md_rdev *rdev; 515 + 516 + if (disk < start_disk_index) 517 + dev_start = (first_stripe_index + 1) * 518 + mddev->chunk_sectors; 519 + else if (disk > start_disk_index) 520 + dev_start = first_stripe_index * mddev->chunk_sectors; 521 + else 522 + dev_start = start_disk_offset; 523 + 524 + if (disk < end_disk_index) 525 + dev_end = (last_stripe_index + 1) * mddev->chunk_sectors; 526 + else if (disk > end_disk_index) 527 + dev_end = last_stripe_index * mddev->chunk_sectors; 528 + else 529 + dev_end = end_disk_offset; 530 + 531 + if (dev_end <= dev_start) 532 + continue; 533 + 534 + rdev = conf->devlist[(zone - conf->strip_zone) * 535 + conf->strip_zone[0].nb_dev + disk]; 536 + if (__blkdev_issue_discard(rdev->bdev, 537 + dev_start + zone->dev_start + rdev->data_offset, 538 + dev_end - dev_start, GFP_NOIO, 0, &discard_bio) || 539 + !discard_bio) 540 + continue; 541 + bio_chain(discard_bio, bio); 542 + if (mddev->gendisk) 543 + trace_block_bio_remap(bdev_get_queue(rdev->bdev), 544 + discard_bio, disk_devt(mddev->gendisk), 545 + bio->bi_iter.bi_sector); 546 + generic_make_request(discard_bio); 547 + } 548 + bio_endio(bio); 549 + } 550 + 462 551 static void raid0_make_request(struct mddev *mddev, struct bio *bio) 463 552 { 464 553 struct strip_zone *zone; ··· 559 470 560 471 if (unlikely(bio->bi_opf & REQ_PREFLUSH)) { 561 472 md_flush_request(mddev, bio); 473 + return; 474 + } 475 + 476 + if (unlikely((bio_op(bio) == REQ_OP_DISCARD))) { 477 + raid0_handle_discard(mddev, bio); 562 478 return; 563 479 } 564 480 ··· 592 498 bio->bi_iter.bi_sector = sector + zone->dev_start + 593 499 tmp_dev->data_offset; 594 500 595 - if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && 596 - !blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) { 597 - /* Just ignore it */ 598 - bio_endio(bio); 599 - } else { 600 - if (mddev->gendisk) 601 - trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), 602 - bio, disk_devt(mddev->gendisk), 603 - bio_sector); 604 - mddev_check_writesame(mddev, bio); 605 - mddev_check_write_zeroes(mddev, bio); 606 - generic_make_request(bio); 607 - } 501 + if (mddev->gendisk) 502 + trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), 503 + bio, disk_devt(mddev->gendisk), 504 + bio_sector); 505 + mddev_check_writesame(mddev, bio); 506 + mddev_check_write_zeroes(mddev, bio); 507 + generic_make_request(bio); 608 508 } 609 509 610 510 static void raid0_status(struct seq_file *seq, struct mddev *mddev)
+10 -11
drivers/md/raid1.c
··· 666 666 break; 667 667 } 668 668 continue; 669 - } else 669 + } else { 670 + if ((sectors > best_good_sectors) && (best_disk >= 0)) 671 + best_disk = -1; 670 672 best_good_sectors = sectors; 673 + } 671 674 672 675 if (best_disk >= 0) 673 676 /* At least two disks to choose from so failfast is OK */ ··· 1532 1529 plug = container_of(cb, struct raid1_plug_cb, cb); 1533 1530 else 1534 1531 plug = NULL; 1535 - spin_lock_irqsave(&conf->device_lock, flags); 1536 1532 if (plug) { 1537 1533 bio_list_add(&plug->pending, mbio); 1538 1534 plug->pending_cnt++; 1539 1535 } else { 1536 + spin_lock_irqsave(&conf->device_lock, flags); 1540 1537 bio_list_add(&conf->pending_bio_list, mbio); 1541 1538 conf->pending_count++; 1542 - } 1543 - spin_unlock_irqrestore(&conf->device_lock, flags); 1544 - if (!plug) 1539 + spin_unlock_irqrestore(&conf->device_lock, flags); 1545 1540 md_wakeup_thread(mddev->thread); 1541 + } 1546 1542 } 1547 1543 1548 1544 r1_bio_write_done(r1_bio); ··· 3199 3197 struct r1conf *conf = mddev->private; 3200 3198 int cnt, raid_disks; 3201 3199 unsigned long flags; 3202 - int d, d2, err; 3200 + int d, d2; 3203 3201 3204 3202 /* Cannot change chunk_size, layout, or level */ 3205 3203 if (mddev->chunk_sectors != mddev->new_chunk_sectors || ··· 3211 3209 return -EINVAL; 3212 3210 } 3213 3211 3214 - if (!mddev_is_clustered(mddev)) { 3215 - err = md_allow_write(mddev); 3216 - if (err) 3217 - return err; 3218 - } 3212 + if (!mddev_is_clustered(mddev)) 3213 + md_allow_write(mddev); 3219 3214 3220 3215 raid_disks = mddev->raid_disks + mddev->delta_disks; 3221 3216
+3 -4
drivers/md/raid10.c
··· 1282 1282 plug = container_of(cb, struct raid10_plug_cb, cb); 1283 1283 else 1284 1284 plug = NULL; 1285 - spin_lock_irqsave(&conf->device_lock, flags); 1286 1285 if (plug) { 1287 1286 bio_list_add(&plug->pending, mbio); 1288 1287 plug->pending_cnt++; 1289 1288 } else { 1289 + spin_lock_irqsave(&conf->device_lock, flags); 1290 1290 bio_list_add(&conf->pending_bio_list, mbio); 1291 1291 conf->pending_count++; 1292 - } 1293 - spin_unlock_irqrestore(&conf->device_lock, flags); 1294 - if (!plug) 1292 + spin_unlock_irqrestore(&conf->device_lock, flags); 1295 1293 md_wakeup_thread(mddev->thread); 1294 + } 1296 1295 } 1297 1296 1298 1297 static void raid10_write_request(struct mddev *mddev, struct bio *bio,
+35 -12
drivers/md/raid5-cache.c
··· 24 24 #include "md.h" 25 25 #include "raid5.h" 26 26 #include "bitmap.h" 27 + #include "raid5-log.h" 27 28 28 29 /* 29 30 * metadata/data stored in disk with 4k size unit (a block) regardless ··· 623 622 __r5l_set_io_unit_state(io, IO_UNIT_IO_START); 624 623 spin_unlock_irqrestore(&log->io_list_lock, flags); 625 624 625 + /* 626 + * In case of journal device failures, submit_bio will get error 627 + * and calls endio, then active stripes will continue write 628 + * process. Therefore, it is not necessary to check Faulty bit 629 + * of journal device here. 630 + * 631 + * We can't check split_bio after current_bio is submitted. If 632 + * io->split_bio is null, after current_bio is submitted, current_bio 633 + * might already be completed and the io_unit is freed. We submit 634 + * split_bio first to avoid the issue. 635 + */ 636 + if (io->split_bio) { 637 + if (io->has_flush) 638 + io->split_bio->bi_opf |= REQ_PREFLUSH; 639 + if (io->has_fua) 640 + io->split_bio->bi_opf |= REQ_FUA; 641 + submit_bio(io->split_bio); 642 + } 643 + 626 644 if (io->has_flush) 627 645 io->current_bio->bi_opf |= REQ_PREFLUSH; 628 646 if (io->has_fua) 629 647 io->current_bio->bi_opf |= REQ_FUA; 630 648 submit_bio(io->current_bio); 631 - 632 - if (!io->split_bio) 633 - return; 634 - 635 - if (io->has_flush) 636 - io->split_bio->bi_opf |= REQ_PREFLUSH; 637 - if (io->has_fua) 638 - io->split_bio->bi_opf |= REQ_FUA; 639 - submit_bio(io->split_bio); 640 649 } 641 650 642 651 /* deferred io_unit will be dispatched here */ ··· 681 670 return; 682 671 pr_info("md/raid:%s: Disabling writeback cache for degraded array.\n", 683 672 mdname(mddev)); 673 + 674 + /* wait superblock change before suspend */ 675 + wait_event(mddev->sb_wait, 676 + !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)); 677 + 684 678 mddev_suspend(mddev); 685 679 log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH; 686 680 mddev_resume(mddev); ··· 2637 2621 * When run in degraded mode, array is set to write-through mode. 2638 2622 * This check helps drain pending write safely in the transition to 2639 2623 * write-through mode. 2624 + * 2625 + * When a stripe is syncing, the write is also handled in write 2626 + * through mode. 2640 2627 */ 2641 - if (s->failed) { 2628 + if (s->failed || test_bit(STRIPE_SYNCING, &sh->state)) { 2642 2629 r5c_make_stripe_write_out(sh); 2643 2630 return -EAGAIN; 2644 2631 } ··· 2844 2825 } 2845 2826 2846 2827 r5l_append_flush_payload(log, sh->sector); 2828 + /* stripe is flused to raid disks, we can do resync now */ 2829 + if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state)) 2830 + set_bit(STRIPE_HANDLE, &sh->state); 2847 2831 } 2848 2832 2849 2833 int r5c_cache_data(struct r5l_log *log, struct stripe_head *sh) ··· 2995 2973 return ret; 2996 2974 } 2997 2975 2998 - void r5c_update_on_rdev_error(struct mddev *mddev) 2976 + void r5c_update_on_rdev_error(struct mddev *mddev, struct md_rdev *rdev) 2999 2977 { 3000 2978 struct r5conf *conf = mddev->private; 3001 2979 struct r5l_log *log = conf->log; ··· 3003 2981 if (!log) 3004 2982 return; 3005 2983 3006 - if (raid5_calc_degraded(conf) > 0 && 2984 + if ((raid5_calc_degraded(conf) > 0 || 2985 + test_bit(Journal, &rdev->flags)) && 3007 2986 conf->log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_BACK) 3008 2987 schedule_work(&log->disable_writeback_work); 3009 2988 }
+2 -1
drivers/md/raid5-log.h
··· 28 28 extern void r5c_check_stripe_cache_usage(struct r5conf *conf); 29 29 extern void r5c_check_cached_full_stripe(struct r5conf *conf); 30 30 extern struct md_sysfs_entry r5c_journal_mode; 31 - extern void r5c_update_on_rdev_error(struct mddev *mddev); 31 + extern void r5c_update_on_rdev_error(struct mddev *mddev, 32 + struct md_rdev *rdev); 32 33 extern bool r5c_big_stripe_cached(struct r5conf *conf, sector_t sect); 33 34 34 35 extern struct dma_async_tx_descriptor *
+48 -31
drivers/md/raid5.c
··· 103 103 static inline void lock_all_device_hash_locks_irq(struct r5conf *conf) 104 104 { 105 105 int i; 106 - local_irq_disable(); 107 - spin_lock(conf->hash_locks); 106 + spin_lock_irq(conf->hash_locks); 108 107 for (i = 1; i < NR_STRIPE_HASH_LOCKS; i++) 109 108 spin_lock_nest_lock(conf->hash_locks + i, conf->hash_locks); 110 109 spin_lock(&conf->device_lock); ··· 113 114 { 114 115 int i; 115 116 spin_unlock(&conf->device_lock); 116 - for (i = NR_STRIPE_HASH_LOCKS; i; i--) 117 - spin_unlock(conf->hash_locks + i - 1); 118 - local_irq_enable(); 117 + for (i = NR_STRIPE_HASH_LOCKS - 1; i; i--) 118 + spin_unlock(conf->hash_locks + i); 119 + spin_unlock_irq(conf->hash_locks); 119 120 } 120 121 121 122 /* Find first data disk in a raid6 stripe */ ··· 233 234 if (test_bit(R5_InJournal, &sh->dev[i].flags)) 234 235 injournal++; 235 236 /* 236 - * When quiesce in r5c write back, set STRIPE_HANDLE for stripes with 237 - * data in journal, so they are not released to cached lists 237 + * In the following cases, the stripe cannot be released to cached 238 + * lists. Therefore, we make the stripe write out and set 239 + * STRIPE_HANDLE: 240 + * 1. when quiesce in r5c write back; 241 + * 2. when resync is requested fot the stripe. 238 242 */ 239 - if (conf->quiesce && r5c_is_writeback(conf->log) && 240 - !test_bit(STRIPE_HANDLE, &sh->state) && injournal != 0) { 243 + if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) || 244 + (conf->quiesce && r5c_is_writeback(conf->log) && 245 + !test_bit(STRIPE_HANDLE, &sh->state) && injournal != 0)) { 241 246 if (test_bit(STRIPE_R5C_CACHING, &sh->state)) 242 247 r5c_make_stripe_write_out(sh); 243 248 set_bit(STRIPE_HANDLE, &sh->state); ··· 717 714 718 715 static void lock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2) 719 716 { 720 - local_irq_disable(); 721 717 if (sh1 > sh2) { 722 - spin_lock(&sh2->stripe_lock); 718 + spin_lock_irq(&sh2->stripe_lock); 723 719 spin_lock_nested(&sh1->stripe_lock, 1); 724 720 } else { 725 - spin_lock(&sh1->stripe_lock); 721 + spin_lock_irq(&sh1->stripe_lock); 726 722 spin_lock_nested(&sh2->stripe_lock, 1); 727 723 } 728 724 } ··· 729 727 static void unlock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2) 730 728 { 731 729 spin_unlock(&sh1->stripe_lock); 732 - spin_unlock(&sh2->stripe_lock); 733 - local_irq_enable(); 730 + spin_unlock_irq(&sh2->stripe_lock); 734 731 } 735 732 736 733 /* Only freshly new full stripe normal write stripe can be added to a batch list */ ··· 2313 2312 struct stripe_head *osh, *nsh; 2314 2313 LIST_HEAD(newstripes); 2315 2314 struct disk_info *ndisks; 2316 - int err; 2315 + int err = 0; 2317 2316 struct kmem_cache *sc; 2318 2317 int i; 2319 2318 int hash, cnt; 2320 2319 2321 - err = md_allow_write(conf->mddev); 2322 - if (err) 2323 - return err; 2320 + md_allow_write(conf->mddev); 2324 2321 2325 2322 /* Step 1 */ 2326 2323 sc = kmem_cache_create(conf->cache_name[1-conf->active_name], ··· 2693 2694 bdevname(rdev->bdev, b), 2694 2695 mdname(mddev), 2695 2696 conf->raid_disks - mddev->degraded); 2696 - r5c_update_on_rdev_error(mddev); 2697 + r5c_update_on_rdev_error(mddev, rdev); 2697 2698 } 2698 2699 2699 2700 /* ··· 3054 3055 * When LOG_CRITICAL, stripes with injournal == 0 will be sent to 3055 3056 * no_space_stripes list. 3056 3057 * 3058 + * 3. during journal failure 3059 + * In journal failure, we try to flush all cached data to raid disks 3060 + * based on data in stripe cache. The array is read-only to upper 3061 + * layers, so we would skip all pending writes. 3062 + * 3057 3063 */ 3058 3064 static inline bool delay_towrite(struct r5conf *conf, 3059 3065 struct r5dev *dev, ··· 3071 3067 /* case 2 above */ 3072 3068 if (test_bit(R5C_LOG_CRITICAL, &conf->cache_state) && 3073 3069 s->injournal > 0) 3070 + return true; 3071 + /* case 3 above */ 3072 + if (s->log_failed && s->injournal) 3074 3073 return true; 3075 3074 return false; 3076 3075 } ··· 4660 4653 4661 4654 if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state) && !sh->batch_head) { 4662 4655 spin_lock(&sh->stripe_lock); 4663 - /* Cannot process 'sync' concurrently with 'discard' */ 4664 - if (!test_bit(STRIPE_DISCARD, &sh->state) && 4656 + /* 4657 + * Cannot process 'sync' concurrently with 'discard'. 4658 + * Flush data in r5cache before 'sync'. 4659 + */ 4660 + if (!test_bit(STRIPE_R5C_PARTIAL_STRIPE, &sh->state) && 4661 + !test_bit(STRIPE_R5C_FULL_STRIPE, &sh->state) && 4662 + !test_bit(STRIPE_DISCARD, &sh->state) && 4665 4663 test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) { 4666 4664 set_bit(STRIPE_SYNCING, &sh->state); 4667 4665 clear_bit(STRIPE_INSYNC, &sh->state); ··· 4713 4701 " to_write=%d failed=%d failed_num=%d,%d\n", 4714 4702 s.locked, s.uptodate, s.to_read, s.to_write, s.failed, 4715 4703 s.failed_num[0], s.failed_num[1]); 4716 - /* check if the array has lost more than max_degraded devices and, 4704 + /* 4705 + * check if the array has lost more than max_degraded devices and, 4717 4706 * if so, some requests might need to be failed. 4707 + * 4708 + * When journal device failed (log_failed), we will only process 4709 + * the stripe if there is data need write to raid disks 4718 4710 */ 4719 - if (s.failed > conf->max_degraded || s.log_failed) { 4711 + if (s.failed > conf->max_degraded || 4712 + (s.log_failed && s.injournal == 0)) { 4720 4713 sh->check_state = 0; 4721 4714 sh->reconstruct_state = 0; 4722 4715 break_stripe_batch_list(sh, 0); ··· 5294 5277 struct stripe_head *sh, *tmp; 5295 5278 struct list_head *handle_list = NULL; 5296 5279 struct r5worker_group *wg; 5297 - bool second_try = !r5c_is_writeback(conf->log); 5298 - bool try_loprio = test_bit(R5C_LOG_TIGHT, &conf->cache_state); 5280 + bool second_try = !r5c_is_writeback(conf->log) && 5281 + !r5l_log_disk_error(conf); 5282 + bool try_loprio = test_bit(R5C_LOG_TIGHT, &conf->cache_state) || 5283 + r5l_log_disk_error(conf); 5299 5284 5300 5285 again: 5301 5286 wg = NULL; ··· 6332 6313 raid5_set_cache_size(struct mddev *mddev, int size) 6333 6314 { 6334 6315 struct r5conf *conf = mddev->private; 6335 - int err; 6336 6316 6337 6317 if (size <= 16 || size > 32768) 6338 6318 return -EINVAL; ··· 6343 6325 ; 6344 6326 mutex_unlock(&conf->cache_size_mutex); 6345 6327 6346 - 6347 - err = md_allow_write(mddev); 6348 - if (err) 6349 - return err; 6328 + md_allow_write(mddev); 6350 6329 6351 6330 mutex_lock(&conf->cache_size_mutex); 6352 6331 while (size > conf->max_nr_stripes) ··· 7545 7530 * neilb: there is no locking about new writes here, 7546 7531 * so this cannot be safe. 7547 7532 */ 7548 - if (atomic_read(&conf->active_stripes)) { 7533 + if (atomic_read(&conf->active_stripes) || 7534 + atomic_read(&conf->r5c_cached_full_stripes) || 7535 + atomic_read(&conf->r5c_cached_partial_stripes)) { 7549 7536 return -EBUSY; 7550 7537 } 7551 7538 log_exit(conf);