Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0-or-later
2
3#include <linux/blkdev.h>
4#include <linux/module.h>
5#include <linux/errno.h>
6#include <linux/slab.h>
7#include <linux/init.h>
8#include <linux/timer.h>
9#include <linux/sched.h>
10#include <linux/list.h>
11#include <linux/file.h>
12#include <linux/seq_file.h>
13#include <trace/events/block.h>
14
15#include "md.h"
16#include "md-bitmap.h"
17
18/*
19 * #### Background
20 *
21 * Redundant data is used to enhance data fault tolerance, and the storage
22 * methods for redundant data vary depending on the RAID levels. And it's
23 * important to maintain the consistency of redundant data.
24 *
25 * Bitmap is used to record which data blocks have been synchronized and which
26 * ones need to be resynchronized or recovered. Each bit in the bitmap
27 * represents a segment of data in the array. When a bit is set, it indicates
28 * that the multiple redundant copies of that data segment may not be
29 * consistent. Data synchronization can be performed based on the bitmap after
30 * power failure or readding a disk. If there is no bitmap, a full disk
31 * synchronization is required.
32 *
33 * #### Key Features
34 *
35 * - IO fastpath is lockless, if user issues lots of write IO to the same
36 * bitmap bit in a short time, only the first write has additional overhead
37 * to update bitmap bit, no additional overhead for the following writes;
38 * - support only resync or recover written data, means in the case creating
39 * new array or replacing with a new disk, there is no need to do a full disk
40 * resync/recovery;
41 *
42 * #### Key Concept
43 *
44 * ##### State Machine
45 *
46 * Each bit is one byte, contain 6 different states, see llbitmap_state. And
47 * there are total 8 different actions, see llbitmap_action, can change state:
48 *
49 * llbitmap state machine: transitions between states
50 *
51 * | | Startwrite | Startsync | Endsync | Abortsync|
52 * | --------- | ---------- | --------- | ------- | ------- |
53 * | Unwritten | Dirty | x | x | x |
54 * | Clean | Dirty | x | x | x |
55 * | Dirty | x | x | x | x |
56 * | NeedSync | x | Syncing | x | x |
57 * | Syncing | x | Syncing | Dirty | NeedSync |
58 *
59 * | | Reload | Daemon | Discard | Stale |
60 * | --------- | -------- | ------ | --------- | --------- |
61 * | Unwritten | x | x | x | x |
62 * | Clean | x | x | Unwritten | NeedSync |
63 * | Dirty | NeedSync | Clean | Unwritten | NeedSync |
64 * | NeedSync | x | x | Unwritten | x |
65 * | Syncing | NeedSync | x | Unwritten | NeedSync |
66 *
67 * Typical scenarios:
68 *
69 * 1) Create new array
70 * All bits will be set to Unwritten by default, if --assume-clean is set,
71 * all bits will be set to Clean instead.
72 *
73 * 2) write data, raid1/raid10 have full copy of data, while raid456 doesn't and
74 * rely on xor data
75 *
76 * 2.1) write new data to raid1/raid10:
77 * Unwritten --StartWrite--> Dirty
78 *
79 * 2.2) write new data to raid456:
80 * Unwritten --StartWrite--> NeedSync
81 *
82 * Because the initial recover for raid456 is skipped, the xor data is not built
83 * yet, the bit must be set to NeedSync first and after lazy initial recover is
84 * finished, the bit will finally set to Dirty(see 5.1 and 5.4);
85 *
86 * 2.3) cover write
87 * Clean --StartWrite--> Dirty
88 *
89 * 3) daemon, if the array is not degraded:
90 * Dirty --Daemon--> Clean
91 *
92 * 4) discard
93 * {Clean, Dirty, NeedSync, Syncing} --Discard--> Unwritten
94 *
95 * 5) resync and recover
96 *
97 * 5.1) common process
98 * NeedSync --Startsync--> Syncing --Endsync--> Dirty --Daemon--> Clean
99 *
100 * 5.2) resync after power failure
101 * Dirty --Reload--> NeedSync
102 *
103 * 5.3) recover while replacing with a new disk
104 * By default, the old bitmap framework will recover all data, and llbitmap
105 * implements this by a new helper, see llbitmap_skip_sync_blocks:
106 *
107 * skip recover for bits other than dirty or clean;
108 *
109 * 5.4) lazy initial recover for raid5:
110 * By default, the old bitmap framework will only allow new recover when there
111 * are spares(new disk), a new recovery flag MD_RECOVERY_LAZY_RECOVER is added
112 * to perform raid456 lazy recover for set bits(from 2.2).
113 *
114 * 6. special handling for degraded array:
115 *
116 * - Dirty bits will never be cleared, daemon will just do nothing, so that if
117 * a disk is readded, Clean bits can be skipped with recovery;
118 * - Dirty bits will convert to Syncing from start write, to do data recovery
119 * for new added disks;
120 * - New write will convert bits to NeedSync directly;
121 *
122 * ##### Bitmap IO
123 *
124 * ##### Chunksize
125 *
126 * The default bitmap size is 128k, incluing 1k bitmap super block, and
127 * the default size of segment of data in the array each bit(chunksize) is 64k,
128 * and chunksize will adjust to twice the old size each time if the total number
129 * bits is not less than 127k.(see llbitmap_init)
130 *
131 * ##### READ
132 *
133 * While creating bitmap, all pages will be allocated and read for llbitmap,
134 * there won't be read afterwards
135 *
136 * ##### WRITE
137 *
138 * WRITE IO is divided into logical_block_size of the array, the dirty state
139 * of each block is tracked independently, for example:
140 *
141 * each page is 4k, contain 8 blocks; each block is 512 bytes contain 512 bit;
142 *
143 * | page0 | page1 | ... | page 31 |
144 * | |
145 * | \-----------------------\
146 * | |
147 * | block0 | block1 | ... | block 8|
148 * | |
149 * | \-----------------\
150 * | |
151 * | bit0 | bit1 | ... | bit511 |
152 *
153 * From IO path, if one bit is changed to Dirty or NeedSync, the corresponding
154 * subpage will be marked dirty, such block must write first before the IO is
155 * issued. This behaviour will affect IO performance, to reduce the impact, if
156 * multiple bits are changed in the same block in a short time, all bits in this
157 * block will be changed to Dirty/NeedSync, so that there won't be any overhead
158 * until daemon clears dirty bits.
159 *
160 * ##### Dirty Bits synchronization
161 *
162 * IO fast path will set bits to dirty, and those dirty bits will be cleared
163 * by daemon after IO is done. llbitmap_page_ctl is used to synchronize between
164 * IO path and daemon;
165 *
166 * IO path:
167 * 1) try to grab a reference, if succeed, set expire time after 5s and return;
168 * 2) if failed to grab a reference, wait for daemon to finish clearing dirty
169 * bits;
170 *
171 * Daemon (Daemon will be woken up every daemon_sleep seconds):
172 * For each page:
173 * 1) check if page expired, if not skip this page; for expired page:
174 * 2) suspend the page and wait for inflight write IO to be done;
175 * 3) change dirty page to clean;
176 * 4) resume the page;
177 */
178
179#define BITMAP_DATA_OFFSET 1024
180
181/* 64k is the max IO size of sync IO for raid1/raid10 */
182#define MIN_CHUNK_SIZE (64 * 2)
183
184/* By default, daemon will be woken up every 30s */
185#define DEFAULT_DAEMON_SLEEP 30
186
187/*
188 * Dirtied bits that have not been accessed for more than 5s will be cleared
189 * by daemon.
190 */
191#define DEFAULT_BARRIER_IDLE 5
192
193enum llbitmap_state {
194 /* No valid data, init state after assemble the array */
195 BitUnwritten = 0,
196 /* data is consistent */
197 BitClean,
198 /* data will be consistent after IO is done, set directly for writes */
199 BitDirty,
200 /*
201 * data need to be resynchronized:
202 * 1) set directly for writes if array is degraded, prevent full disk
203 * synchronization after readding a disk;
204 * 2) reassemble the array after power failure, and dirty bits are
205 * found after reloading the bitmap;
206 * 3) set for first write for raid5, to build initial xor data lazily
207 */
208 BitNeedSync,
209 /* data is synchronizing */
210 BitSyncing,
211 BitStateCount,
212 BitNone = 0xff,
213};
214
215enum llbitmap_action {
216 /* User write new data, this is the only action from IO fast path */
217 BitmapActionStartwrite = 0,
218 /* Start recovery */
219 BitmapActionStartsync,
220 /* Finish recovery */
221 BitmapActionEndsync,
222 /* Failed recovery */
223 BitmapActionAbortsync,
224 /* Reassemble the array */
225 BitmapActionReload,
226 /* Daemon thread is trying to clear dirty bits */
227 BitmapActionDaemon,
228 /* Data is deleted */
229 BitmapActionDiscard,
230 /*
231 * Bitmap is stale, mark all bits in addition to BitUnwritten to
232 * BitNeedSync.
233 */
234 BitmapActionStale,
235 BitmapActionCount,
236 /* Init state is BitUnwritten */
237 BitmapActionInit,
238};
239
240enum llbitmap_page_state {
241 LLPageFlush = 0,
242 LLPageDirty,
243};
244
245struct llbitmap_page_ctl {
246 char *state;
247 struct page *page;
248 unsigned long expire;
249 unsigned long flags;
250 wait_queue_head_t wait;
251 struct percpu_ref active;
252 /* Per block size dirty state, maximum 64k page / 1 sector = 128 */
253 unsigned long dirty[];
254};
255
256struct llbitmap {
257 struct mddev *mddev;
258 struct llbitmap_page_ctl **pctl;
259
260 unsigned int nr_pages;
261 unsigned int io_size;
262 unsigned int blocks_per_page;
263
264 /* shift of one chunk */
265 unsigned long chunkshift;
266 /* size of one chunk in sector */
267 unsigned long chunksize;
268 /* total number of chunks */
269 unsigned long chunks;
270 unsigned long last_end_sync;
271 /*
272 * time in seconds that dirty bits will be cleared if the page is not
273 * accessed.
274 */
275 unsigned long barrier_idle;
276 /* fires on first BitDirty state */
277 struct timer_list pending_timer;
278 struct work_struct daemon_work;
279
280 unsigned long flags;
281 __u64 events_cleared;
282
283 /* for slow disks */
284 atomic_t behind_writes;
285 wait_queue_head_t behind_wait;
286};
287
288struct llbitmap_unplug_work {
289 struct work_struct work;
290 struct llbitmap *llbitmap;
291 struct completion *done;
292};
293
294static struct workqueue_struct *md_llbitmap_io_wq;
295static struct workqueue_struct *md_llbitmap_unplug_wq;
296
297static char state_machine[BitStateCount][BitmapActionCount] = {
298 [BitUnwritten] = {
299 [BitmapActionStartwrite] = BitDirty,
300 [BitmapActionStartsync] = BitNone,
301 [BitmapActionEndsync] = BitNone,
302 [BitmapActionAbortsync] = BitNone,
303 [BitmapActionReload] = BitNone,
304 [BitmapActionDaemon] = BitNone,
305 [BitmapActionDiscard] = BitNone,
306 [BitmapActionStale] = BitNone,
307 },
308 [BitClean] = {
309 [BitmapActionStartwrite] = BitDirty,
310 [BitmapActionStartsync] = BitNone,
311 [BitmapActionEndsync] = BitNone,
312 [BitmapActionAbortsync] = BitNone,
313 [BitmapActionReload] = BitNone,
314 [BitmapActionDaemon] = BitNone,
315 [BitmapActionDiscard] = BitUnwritten,
316 [BitmapActionStale] = BitNeedSync,
317 },
318 [BitDirty] = {
319 [BitmapActionStartwrite] = BitNone,
320 [BitmapActionStartsync] = BitNone,
321 [BitmapActionEndsync] = BitNone,
322 [BitmapActionAbortsync] = BitNone,
323 [BitmapActionReload] = BitNeedSync,
324 [BitmapActionDaemon] = BitClean,
325 [BitmapActionDiscard] = BitUnwritten,
326 [BitmapActionStale] = BitNeedSync,
327 },
328 [BitNeedSync] = {
329 [BitmapActionStartwrite] = BitNone,
330 [BitmapActionStartsync] = BitSyncing,
331 [BitmapActionEndsync] = BitNone,
332 [BitmapActionAbortsync] = BitNone,
333 [BitmapActionReload] = BitNone,
334 [BitmapActionDaemon] = BitNone,
335 [BitmapActionDiscard] = BitUnwritten,
336 [BitmapActionStale] = BitNone,
337 },
338 [BitSyncing] = {
339 [BitmapActionStartwrite] = BitNone,
340 [BitmapActionStartsync] = BitSyncing,
341 [BitmapActionEndsync] = BitDirty,
342 [BitmapActionAbortsync] = BitNeedSync,
343 [BitmapActionReload] = BitNeedSync,
344 [BitmapActionDaemon] = BitNone,
345 [BitmapActionDiscard] = BitUnwritten,
346 [BitmapActionStale] = BitNeedSync,
347 },
348};
349
350static void __llbitmap_flush(struct mddev *mddev);
351
352static enum llbitmap_state llbitmap_read(struct llbitmap *llbitmap, loff_t pos)
353{
354 unsigned int idx;
355 unsigned int offset;
356
357 pos += BITMAP_DATA_OFFSET;
358 idx = pos >> PAGE_SHIFT;
359 offset = offset_in_page(pos);
360
361 return llbitmap->pctl[idx]->state[offset];
362}
363
364/* set all the bits in the subpage as dirty */
365static void llbitmap_infect_dirty_bits(struct llbitmap *llbitmap,
366 struct llbitmap_page_ctl *pctl,
367 unsigned int block)
368{
369 bool level_456 = raid_is_456(llbitmap->mddev);
370 unsigned int io_size = llbitmap->io_size;
371 int pos;
372
373 for (pos = block * io_size; pos < (block + 1) * io_size; pos++) {
374 switch (pctl->state[pos]) {
375 case BitUnwritten:
376 pctl->state[pos] = level_456 ? BitNeedSync : BitDirty;
377 break;
378 case BitClean:
379 pctl->state[pos] = BitDirty;
380 break;
381 }
382 }
383}
384
385static void llbitmap_set_page_dirty(struct llbitmap *llbitmap, int idx,
386 int offset)
387{
388 struct llbitmap_page_ctl *pctl = llbitmap->pctl[idx];
389 unsigned int io_size = llbitmap->io_size;
390 int block = offset / io_size;
391 int pos;
392
393 if (!test_bit(LLPageDirty, &pctl->flags))
394 set_bit(LLPageDirty, &pctl->flags);
395
396 /*
397 * For degraded array, dirty bits will never be cleared, and we must
398 * resync all the dirty bits, hence skip infect new dirty bits to
399 * prevent resync unnecessary data.
400 */
401 if (llbitmap->mddev->degraded) {
402 set_bit(block, pctl->dirty);
403 return;
404 }
405
406 /*
407 * The subpage usually contains a total of 512 bits. If any single bit
408 * within the subpage is marked as dirty, the entire sector will be
409 * written. To avoid impacting write performance, when multiple bits
410 * within the same sector are modified within llbitmap->barrier_idle,
411 * all bits in the sector will be collectively marked as dirty at once.
412 */
413 if (test_and_set_bit(block, pctl->dirty)) {
414 llbitmap_infect_dirty_bits(llbitmap, pctl, block);
415 return;
416 }
417
418 for (pos = block * io_size; pos < (block + 1) * io_size; pos++) {
419 if (pos == offset)
420 continue;
421 if (pctl->state[pos] == BitDirty ||
422 pctl->state[pos] == BitNeedSync) {
423 llbitmap_infect_dirty_bits(llbitmap, pctl, block);
424 return;
425 }
426 }
427}
428
429static void llbitmap_write(struct llbitmap *llbitmap, enum llbitmap_state state,
430 loff_t pos)
431{
432 unsigned int idx;
433 unsigned int bit;
434
435 pos += BITMAP_DATA_OFFSET;
436 idx = pos >> PAGE_SHIFT;
437 bit = offset_in_page(pos);
438
439 llbitmap->pctl[idx]->state[bit] = state;
440 if (state == BitDirty || state == BitNeedSync)
441 llbitmap_set_page_dirty(llbitmap, idx, bit);
442}
443
444static struct page *llbitmap_read_page(struct llbitmap *llbitmap, int idx)
445{
446 struct mddev *mddev = llbitmap->mddev;
447 struct page *page = NULL;
448 struct md_rdev *rdev;
449
450 if (llbitmap->pctl && llbitmap->pctl[idx])
451 page = llbitmap->pctl[idx]->page;
452 if (page)
453 return page;
454
455 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
456 if (!page)
457 return ERR_PTR(-ENOMEM);
458
459 rdev_for_each(rdev, mddev) {
460 sector_t sector;
461
462 if (rdev->raid_disk < 0 || test_bit(Faulty, &rdev->flags))
463 continue;
464
465 sector = mddev->bitmap_info.offset +
466 (idx << PAGE_SECTORS_SHIFT);
467
468 if (sync_page_io(rdev, sector, PAGE_SIZE, page, REQ_OP_READ,
469 true))
470 return page;
471
472 md_error(mddev, rdev);
473 }
474
475 __free_page(page);
476 return ERR_PTR(-EIO);
477}
478
479static void llbitmap_write_page(struct llbitmap *llbitmap, int idx)
480{
481 struct page *page = llbitmap->pctl[idx]->page;
482 struct mddev *mddev = llbitmap->mddev;
483 struct md_rdev *rdev;
484 int block;
485
486 for (block = 0; block < llbitmap->blocks_per_page; block++) {
487 struct llbitmap_page_ctl *pctl = llbitmap->pctl[idx];
488
489 if (!test_and_clear_bit(block, pctl->dirty))
490 continue;
491
492 rdev_for_each(rdev, mddev) {
493 sector_t sector;
494 sector_t bit_sector = llbitmap->io_size >> SECTOR_SHIFT;
495
496 if (rdev->raid_disk < 0 || test_bit(Faulty, &rdev->flags))
497 continue;
498
499 sector = mddev->bitmap_info.offset + rdev->sb_start +
500 (idx << PAGE_SECTORS_SHIFT) +
501 block * bit_sector;
502 md_write_metadata(mddev, rdev, sector,
503 llbitmap->io_size, page,
504 block * llbitmap->io_size);
505 }
506 }
507}
508
509static void active_release(struct percpu_ref *ref)
510{
511 struct llbitmap_page_ctl *pctl =
512 container_of(ref, struct llbitmap_page_ctl, active);
513
514 wake_up(&pctl->wait);
515}
516
517static void llbitmap_free_pages(struct llbitmap *llbitmap)
518{
519 int i;
520
521 if (!llbitmap->pctl)
522 return;
523
524 for (i = 0; i < llbitmap->nr_pages; i++) {
525 struct llbitmap_page_ctl *pctl = llbitmap->pctl[i];
526
527 if (!pctl || !pctl->page)
528 break;
529
530 __free_page(pctl->page);
531 percpu_ref_exit(&pctl->active);
532 }
533
534 kfree(llbitmap->pctl[0]);
535 kfree(llbitmap->pctl);
536 llbitmap->pctl = NULL;
537}
538
539static int llbitmap_cache_pages(struct llbitmap *llbitmap)
540{
541 struct llbitmap_page_ctl *pctl;
542 unsigned int nr_pages = DIV_ROUND_UP(llbitmap->chunks +
543 BITMAP_DATA_OFFSET, PAGE_SIZE);
544 unsigned int size = struct_size(pctl, dirty, BITS_TO_LONGS(
545 llbitmap->blocks_per_page));
546 int i;
547
548 llbitmap->pctl = kmalloc_array(nr_pages, sizeof(void *),
549 GFP_KERNEL | __GFP_ZERO);
550 if (!llbitmap->pctl)
551 return -ENOMEM;
552
553 size = round_up(size, cache_line_size());
554 pctl = kmalloc_array(nr_pages, size, GFP_KERNEL | __GFP_ZERO);
555 if (!pctl) {
556 kfree(llbitmap->pctl);
557 return -ENOMEM;
558 }
559
560 llbitmap->nr_pages = nr_pages;
561
562 for (i = 0; i < nr_pages; i++, pctl = (void *)pctl + size) {
563 struct page *page = llbitmap_read_page(llbitmap, i);
564
565 llbitmap->pctl[i] = pctl;
566
567 if (IS_ERR(page)) {
568 llbitmap_free_pages(llbitmap);
569 return PTR_ERR(page);
570 }
571
572 if (percpu_ref_init(&pctl->active, active_release,
573 PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) {
574 __free_page(page);
575 llbitmap_free_pages(llbitmap);
576 return -ENOMEM;
577 }
578
579 pctl->page = page;
580 pctl->state = page_address(page);
581 init_waitqueue_head(&pctl->wait);
582 }
583
584 return 0;
585}
586
587static void llbitmap_init_state(struct llbitmap *llbitmap)
588{
589 enum llbitmap_state state = BitUnwritten;
590 unsigned long i;
591
592 if (test_and_clear_bit(BITMAP_CLEAN, &llbitmap->flags))
593 state = BitClean;
594
595 for (i = 0; i < llbitmap->chunks; i++)
596 llbitmap_write(llbitmap, state, i);
597}
598
599/* The return value is only used from resync, where @start == @end. */
600static enum llbitmap_state llbitmap_state_machine(struct llbitmap *llbitmap,
601 unsigned long start,
602 unsigned long end,
603 enum llbitmap_action action)
604{
605 struct mddev *mddev = llbitmap->mddev;
606 enum llbitmap_state state = BitNone;
607 bool level_456 = raid_is_456(llbitmap->mddev);
608 bool need_resync = false;
609 bool need_recovery = false;
610
611 if (test_bit(BITMAP_WRITE_ERROR, &llbitmap->flags))
612 return BitNone;
613
614 if (action == BitmapActionInit) {
615 llbitmap_init_state(llbitmap);
616 return BitNone;
617 }
618
619 while (start <= end) {
620 enum llbitmap_state c = llbitmap_read(llbitmap, start);
621
622 if (c < 0 || c >= BitStateCount) {
623 pr_err("%s: invalid bit %lu state %d action %d, forcing resync\n",
624 __func__, start, c, action);
625 state = BitNeedSync;
626 goto write_bitmap;
627 }
628
629 if (c == BitNeedSync)
630 need_resync = !mddev->degraded;
631
632 state = state_machine[c][action];
633
634write_bitmap:
635 if (unlikely(mddev->degraded)) {
636 /* For degraded array, mark new data as need sync. */
637 if (state == BitDirty &&
638 action == BitmapActionStartwrite)
639 state = BitNeedSync;
640 /*
641 * For degraded array, resync dirty data as well, noted
642 * if array is still degraded after resync is done, all
643 * new data will still be dirty until array is clean.
644 */
645 else if (c == BitDirty &&
646 action == BitmapActionStartsync)
647 state = BitSyncing;
648 } else if (c == BitUnwritten && state == BitDirty &&
649 action == BitmapActionStartwrite && level_456) {
650 /* Delay raid456 initial recovery to first write. */
651 state = BitNeedSync;
652 }
653
654 if (state == BitNone) {
655 start++;
656 continue;
657 }
658
659 llbitmap_write(llbitmap, state, start);
660
661 if (state == BitNeedSync)
662 need_resync = !mddev->degraded;
663 else if (state == BitDirty &&
664 !timer_pending(&llbitmap->pending_timer))
665 mod_timer(&llbitmap->pending_timer,
666 jiffies + mddev->bitmap_info.daemon_sleep * HZ);
667
668 start++;
669 }
670
671 if (need_resync && level_456)
672 need_recovery = true;
673
674 if (need_recovery) {
675 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
676 set_bit(MD_RECOVERY_LAZY_RECOVER, &mddev->recovery);
677 md_wakeup_thread(mddev->thread);
678 } else if (need_resync) {
679 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
680 set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
681 md_wakeup_thread(mddev->thread);
682 }
683
684 return state;
685}
686
687static void llbitmap_raise_barrier(struct llbitmap *llbitmap, int page_idx)
688{
689 struct llbitmap_page_ctl *pctl = llbitmap->pctl[page_idx];
690
691retry:
692 if (likely(percpu_ref_tryget_live(&pctl->active))) {
693 WRITE_ONCE(pctl->expire, jiffies + llbitmap->barrier_idle * HZ);
694 return;
695 }
696
697 wait_event(pctl->wait, !percpu_ref_is_dying(&pctl->active));
698 goto retry;
699}
700
701static void llbitmap_release_barrier(struct llbitmap *llbitmap, int page_idx)
702{
703 struct llbitmap_page_ctl *pctl = llbitmap->pctl[page_idx];
704
705 percpu_ref_put(&pctl->active);
706}
707
708static int llbitmap_suspend_timeout(struct llbitmap *llbitmap, int page_idx)
709{
710 struct llbitmap_page_ctl *pctl = llbitmap->pctl[page_idx];
711
712 percpu_ref_kill(&pctl->active);
713
714 if (!wait_event_timeout(pctl->wait, percpu_ref_is_zero(&pctl->active),
715 llbitmap->mddev->bitmap_info.daemon_sleep * HZ))
716 return -ETIMEDOUT;
717
718 return 0;
719}
720
721static void llbitmap_resume(struct llbitmap *llbitmap, int page_idx)
722{
723 struct llbitmap_page_ctl *pctl = llbitmap->pctl[page_idx];
724
725 pctl->expire = LONG_MAX;
726 percpu_ref_resurrect(&pctl->active);
727 wake_up(&pctl->wait);
728}
729
730static int llbitmap_check_support(struct mddev *mddev)
731{
732 if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
733 pr_notice("md/llbitmap: %s: array with journal cannot have bitmap\n",
734 mdname(mddev));
735 return -EBUSY;
736 }
737
738 if (mddev->bitmap_info.space == 0) {
739 if (mddev->bitmap_info.default_space == 0) {
740 pr_notice("md/llbitmap: %s: no space for bitmap\n",
741 mdname(mddev));
742 return -ENOSPC;
743 }
744 }
745
746 if (!mddev->persistent) {
747 pr_notice("md/llbitmap: %s: array must be persistent\n",
748 mdname(mddev));
749 return -EOPNOTSUPP;
750 }
751
752 if (mddev->bitmap_info.file) {
753 pr_notice("md/llbitmap: %s: doesn't support bitmap file\n",
754 mdname(mddev));
755 return -EOPNOTSUPP;
756 }
757
758 if (mddev->bitmap_info.external) {
759 pr_notice("md/llbitmap: %s: doesn't support external metadata\n",
760 mdname(mddev));
761 return -EOPNOTSUPP;
762 }
763
764 if (mddev_is_dm(mddev)) {
765 pr_notice("md/llbitmap: %s: doesn't support dm-raid\n",
766 mdname(mddev));
767 return -EOPNOTSUPP;
768 }
769
770 return 0;
771}
772
773static int llbitmap_init(struct llbitmap *llbitmap)
774{
775 struct mddev *mddev = llbitmap->mddev;
776 sector_t blocks = mddev->resync_max_sectors;
777 unsigned long chunksize = MIN_CHUNK_SIZE;
778 unsigned long chunks = DIV_ROUND_UP(blocks, chunksize);
779 unsigned long space = mddev->bitmap_info.space << SECTOR_SHIFT;
780 int ret;
781
782 while (chunks > space) {
783 chunksize = chunksize << 1;
784 chunks = DIV_ROUND_UP_SECTOR_T(blocks, chunksize);
785 }
786
787 llbitmap->barrier_idle = DEFAULT_BARRIER_IDLE;
788 llbitmap->chunkshift = ffz(~chunksize);
789 llbitmap->chunksize = chunksize;
790 llbitmap->chunks = chunks;
791 mddev->bitmap_info.daemon_sleep = DEFAULT_DAEMON_SLEEP;
792
793 ret = llbitmap_cache_pages(llbitmap);
794 if (ret)
795 return ret;
796
797 llbitmap_state_machine(llbitmap, 0, llbitmap->chunks - 1,
798 BitmapActionInit);
799 /* flush initial llbitmap to disk */
800 __llbitmap_flush(mddev);
801
802 return 0;
803}
804
805static int llbitmap_read_sb(struct llbitmap *llbitmap)
806{
807 struct mddev *mddev = llbitmap->mddev;
808 unsigned long daemon_sleep;
809 unsigned long chunksize;
810 unsigned long events;
811 struct page *sb_page;
812 bitmap_super_t *sb;
813 int ret = -EINVAL;
814
815 if (!mddev->bitmap_info.offset) {
816 pr_err("md/llbitmap: %s: no super block found", mdname(mddev));
817 return -EINVAL;
818 }
819
820 sb_page = llbitmap_read_page(llbitmap, 0);
821 if (IS_ERR(sb_page)) {
822 pr_err("md/llbitmap: %s: read super block failed",
823 mdname(mddev));
824 return -EIO;
825 }
826
827 sb = kmap_local_page(sb_page);
828 if (sb->magic != cpu_to_le32(BITMAP_MAGIC)) {
829 pr_err("md/llbitmap: %s: invalid super block magic number",
830 mdname(mddev));
831 goto out_put_page;
832 }
833
834 if (sb->version != cpu_to_le32(BITMAP_MAJOR_LOCKLESS)) {
835 pr_err("md/llbitmap: %s: invalid super block version",
836 mdname(mddev));
837 goto out_put_page;
838 }
839
840 if (memcmp(sb->uuid, mddev->uuid, 16)) {
841 pr_err("md/llbitmap: %s: bitmap superblock UUID mismatch\n",
842 mdname(mddev));
843 goto out_put_page;
844 }
845
846 if (mddev->bitmap_info.space == 0) {
847 int room = le32_to_cpu(sb->sectors_reserved);
848
849 if (room)
850 mddev->bitmap_info.space = room;
851 else
852 mddev->bitmap_info.space = mddev->bitmap_info.default_space;
853 }
854 llbitmap->flags = le32_to_cpu(sb->state);
855 if (test_and_clear_bit(BITMAP_FIRST_USE, &llbitmap->flags)) {
856 ret = llbitmap_init(llbitmap);
857 goto out_put_page;
858 }
859
860 chunksize = le32_to_cpu(sb->chunksize);
861 if (!is_power_of_2(chunksize)) {
862 pr_err("md/llbitmap: %s: chunksize not a power of 2",
863 mdname(mddev));
864 goto out_put_page;
865 }
866
867 if (chunksize < DIV_ROUND_UP_SECTOR_T(mddev->resync_max_sectors,
868 mddev->bitmap_info.space << SECTOR_SHIFT)) {
869 pr_err("md/llbitmap: %s: chunksize too small %lu < %llu / %lu",
870 mdname(mddev), chunksize, mddev->resync_max_sectors,
871 mddev->bitmap_info.space);
872 goto out_put_page;
873 }
874
875 daemon_sleep = le32_to_cpu(sb->daemon_sleep);
876 if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT / HZ) {
877 pr_err("md/llbitmap: %s: daemon sleep %lu period out of range",
878 mdname(mddev), daemon_sleep);
879 goto out_put_page;
880 }
881
882 events = le64_to_cpu(sb->events);
883 if (events < mddev->events) {
884 pr_warn("md/llbitmap :%s: bitmap file is out of date (%lu < %llu) -- forcing full recovery",
885 mdname(mddev), events, mddev->events);
886 set_bit(BITMAP_STALE, &llbitmap->flags);
887 }
888
889 sb->sync_size = cpu_to_le64(mddev->resync_max_sectors);
890 mddev->bitmap_info.chunksize = chunksize;
891 mddev->bitmap_info.daemon_sleep = daemon_sleep;
892
893 llbitmap->barrier_idle = DEFAULT_BARRIER_IDLE;
894 llbitmap->chunksize = chunksize;
895 llbitmap->chunks = DIV_ROUND_UP_SECTOR_T(mddev->resync_max_sectors, chunksize);
896 llbitmap->chunkshift = ffz(~chunksize);
897 ret = llbitmap_cache_pages(llbitmap);
898
899out_put_page:
900 __free_page(sb_page);
901 kunmap_local(sb);
902 return ret;
903}
904
905static void llbitmap_pending_timer_fn(struct timer_list *pending_timer)
906{
907 struct llbitmap *llbitmap =
908 container_of(pending_timer, struct llbitmap, pending_timer);
909
910 if (work_busy(&llbitmap->daemon_work)) {
911 pr_warn("md/llbitmap: %s daemon_work not finished in %lu seconds\n",
912 mdname(llbitmap->mddev),
913 llbitmap->mddev->bitmap_info.daemon_sleep);
914 set_bit(BITMAP_DAEMON_BUSY, &llbitmap->flags);
915 return;
916 }
917
918 queue_work(md_llbitmap_io_wq, &llbitmap->daemon_work);
919}
920
921static void md_llbitmap_daemon_fn(struct work_struct *work)
922{
923 struct llbitmap *llbitmap =
924 container_of(work, struct llbitmap, daemon_work);
925 unsigned long start;
926 unsigned long end;
927 bool restart;
928 int idx;
929
930 if (llbitmap->mddev->degraded)
931 return;
932retry:
933 start = 0;
934 end = min(llbitmap->chunks, PAGE_SIZE - BITMAP_DATA_OFFSET) - 1;
935 restart = false;
936
937 for (idx = 0; idx < llbitmap->nr_pages; idx++) {
938 struct llbitmap_page_ctl *pctl = llbitmap->pctl[idx];
939
940 if (idx > 0) {
941 start = end + 1;
942 end = min(end + PAGE_SIZE, llbitmap->chunks - 1);
943 }
944
945 if (!test_bit(LLPageFlush, &pctl->flags) &&
946 time_before(jiffies, pctl->expire)) {
947 restart = true;
948 continue;
949 }
950
951 if (llbitmap_suspend_timeout(llbitmap, idx) < 0) {
952 pr_warn("md/llbitmap: %s: %s waiting for page %d timeout\n",
953 mdname(llbitmap->mddev), __func__, idx);
954 continue;
955 }
956
957 llbitmap_state_machine(llbitmap, start, end, BitmapActionDaemon);
958 llbitmap_resume(llbitmap, idx);
959 }
960
961 /*
962 * If the daemon took a long time to finish, retry to prevent missing
963 * clearing dirty bits.
964 */
965 if (test_and_clear_bit(BITMAP_DAEMON_BUSY, &llbitmap->flags))
966 goto retry;
967
968 /* If some page is dirty but not expired, setup timer again */
969 if (restart)
970 mod_timer(&llbitmap->pending_timer,
971 jiffies + llbitmap->mddev->bitmap_info.daemon_sleep * HZ);
972}
973
974static int llbitmap_create(struct mddev *mddev)
975{
976 struct llbitmap *llbitmap;
977 int ret;
978
979 ret = llbitmap_check_support(mddev);
980 if (ret)
981 return ret;
982
983 llbitmap = kzalloc(sizeof(*llbitmap), GFP_KERNEL);
984 if (!llbitmap)
985 return -ENOMEM;
986
987 llbitmap->mddev = mddev;
988 llbitmap->io_size = bdev_logical_block_size(mddev->gendisk->part0);
989 llbitmap->blocks_per_page = PAGE_SIZE / llbitmap->io_size;
990
991 timer_setup(&llbitmap->pending_timer, llbitmap_pending_timer_fn, 0);
992 INIT_WORK(&llbitmap->daemon_work, md_llbitmap_daemon_fn);
993 atomic_set(&llbitmap->behind_writes, 0);
994 init_waitqueue_head(&llbitmap->behind_wait);
995
996 mutex_lock(&mddev->bitmap_info.mutex);
997 mddev->bitmap = llbitmap;
998 ret = llbitmap_read_sb(llbitmap);
999 mutex_unlock(&mddev->bitmap_info.mutex);
1000 if (ret) {
1001 kfree(llbitmap);
1002 mddev->bitmap = NULL;
1003 }
1004
1005 return ret;
1006}
1007
1008static int llbitmap_resize(struct mddev *mddev, sector_t blocks, int chunksize)
1009{
1010 struct llbitmap *llbitmap = mddev->bitmap;
1011 unsigned long chunks;
1012
1013 if (chunksize == 0)
1014 chunksize = llbitmap->chunksize;
1015
1016 /* If there is enough space, leave the chunksize unchanged. */
1017 chunks = DIV_ROUND_UP_SECTOR_T(blocks, chunksize);
1018 while (chunks > mddev->bitmap_info.space << SECTOR_SHIFT) {
1019 chunksize = chunksize << 1;
1020 chunks = DIV_ROUND_UP_SECTOR_T(blocks, chunksize);
1021 }
1022
1023 llbitmap->chunkshift = ffz(~chunksize);
1024 llbitmap->chunksize = chunksize;
1025 llbitmap->chunks = chunks;
1026
1027 return 0;
1028}
1029
1030static int llbitmap_load(struct mddev *mddev)
1031{
1032 enum llbitmap_action action = BitmapActionReload;
1033 struct llbitmap *llbitmap = mddev->bitmap;
1034
1035 if (test_and_clear_bit(BITMAP_STALE, &llbitmap->flags))
1036 action = BitmapActionStale;
1037
1038 llbitmap_state_machine(llbitmap, 0, llbitmap->chunks - 1, action);
1039 return 0;
1040}
1041
1042static void llbitmap_destroy(struct mddev *mddev)
1043{
1044 struct llbitmap *llbitmap = mddev->bitmap;
1045
1046 if (!llbitmap)
1047 return;
1048
1049 mutex_lock(&mddev->bitmap_info.mutex);
1050
1051 timer_delete_sync(&llbitmap->pending_timer);
1052 flush_workqueue(md_llbitmap_io_wq);
1053 flush_workqueue(md_llbitmap_unplug_wq);
1054
1055 mddev->bitmap = NULL;
1056 llbitmap_free_pages(llbitmap);
1057 kfree(llbitmap);
1058 mutex_unlock(&mddev->bitmap_info.mutex);
1059}
1060
1061static void llbitmap_start_write(struct mddev *mddev, sector_t offset,
1062 unsigned long sectors)
1063{
1064 struct llbitmap *llbitmap = mddev->bitmap;
1065 unsigned long start = offset >> llbitmap->chunkshift;
1066 unsigned long end = (offset + sectors - 1) >> llbitmap->chunkshift;
1067 int page_start = (start + BITMAP_DATA_OFFSET) >> PAGE_SHIFT;
1068 int page_end = (end + BITMAP_DATA_OFFSET) >> PAGE_SHIFT;
1069
1070 llbitmap_state_machine(llbitmap, start, end, BitmapActionStartwrite);
1071
1072 while (page_start <= page_end) {
1073 llbitmap_raise_barrier(llbitmap, page_start);
1074 page_start++;
1075 }
1076}
1077
1078static void llbitmap_end_write(struct mddev *mddev, sector_t offset,
1079 unsigned long sectors)
1080{
1081 struct llbitmap *llbitmap = mddev->bitmap;
1082 unsigned long start = offset >> llbitmap->chunkshift;
1083 unsigned long end = (offset + sectors - 1) >> llbitmap->chunkshift;
1084 int page_start = (start + BITMAP_DATA_OFFSET) >> PAGE_SHIFT;
1085 int page_end = (end + BITMAP_DATA_OFFSET) >> PAGE_SHIFT;
1086
1087 while (page_start <= page_end) {
1088 llbitmap_release_barrier(llbitmap, page_start);
1089 page_start++;
1090 }
1091}
1092
1093static void llbitmap_start_discard(struct mddev *mddev, sector_t offset,
1094 unsigned long sectors)
1095{
1096 struct llbitmap *llbitmap = mddev->bitmap;
1097 unsigned long start = DIV_ROUND_UP_SECTOR_T(offset, llbitmap->chunksize);
1098 unsigned long end = (offset + sectors - 1) >> llbitmap->chunkshift;
1099 int page_start = (start + BITMAP_DATA_OFFSET) >> PAGE_SHIFT;
1100 int page_end = (end + BITMAP_DATA_OFFSET) >> PAGE_SHIFT;
1101
1102 llbitmap_state_machine(llbitmap, start, end, BitmapActionDiscard);
1103
1104 while (page_start <= page_end) {
1105 llbitmap_raise_barrier(llbitmap, page_start);
1106 page_start++;
1107 }
1108}
1109
1110static void llbitmap_end_discard(struct mddev *mddev, sector_t offset,
1111 unsigned long sectors)
1112{
1113 struct llbitmap *llbitmap = mddev->bitmap;
1114 unsigned long start = DIV_ROUND_UP_SECTOR_T(offset, llbitmap->chunksize);
1115 unsigned long end = (offset + sectors - 1) >> llbitmap->chunkshift;
1116 int page_start = (start + BITMAP_DATA_OFFSET) >> PAGE_SHIFT;
1117 int page_end = (end + BITMAP_DATA_OFFSET) >> PAGE_SHIFT;
1118
1119 while (page_start <= page_end) {
1120 llbitmap_release_barrier(llbitmap, page_start);
1121 page_start++;
1122 }
1123}
1124
1125static void llbitmap_unplug_fn(struct work_struct *work)
1126{
1127 struct llbitmap_unplug_work *unplug_work =
1128 container_of(work, struct llbitmap_unplug_work, work);
1129 struct llbitmap *llbitmap = unplug_work->llbitmap;
1130 struct blk_plug plug;
1131 int i;
1132
1133 blk_start_plug(&plug);
1134
1135 for (i = 0; i < llbitmap->nr_pages; i++) {
1136 if (!test_bit(LLPageDirty, &llbitmap->pctl[i]->flags) ||
1137 !test_and_clear_bit(LLPageDirty, &llbitmap->pctl[i]->flags))
1138 continue;
1139
1140 llbitmap_write_page(llbitmap, i);
1141 }
1142
1143 blk_finish_plug(&plug);
1144 md_super_wait(llbitmap->mddev);
1145 complete(unplug_work->done);
1146}
1147
1148static bool llbitmap_dirty(struct llbitmap *llbitmap)
1149{
1150 int i;
1151
1152 for (i = 0; i < llbitmap->nr_pages; i++)
1153 if (test_bit(LLPageDirty, &llbitmap->pctl[i]->flags))
1154 return true;
1155
1156 return false;
1157}
1158
1159static void llbitmap_unplug(struct mddev *mddev, bool sync)
1160{
1161 DECLARE_COMPLETION_ONSTACK(done);
1162 struct llbitmap *llbitmap = mddev->bitmap;
1163 struct llbitmap_unplug_work unplug_work = {
1164 .llbitmap = llbitmap,
1165 .done = &done,
1166 };
1167
1168 if (!llbitmap_dirty(llbitmap))
1169 return;
1170
1171 /*
1172 * Issue new bitmap IO under submit_bio() context will deadlock:
1173 * - the bio will wait for bitmap bio to be done, before it can be
1174 * issued;
1175 * - bitmap bio will be added to current->bio_list and wait for this
1176 * bio to be issued;
1177 */
1178 INIT_WORK_ONSTACK(&unplug_work.work, llbitmap_unplug_fn);
1179 queue_work(md_llbitmap_unplug_wq, &unplug_work.work);
1180 wait_for_completion(&done);
1181 destroy_work_on_stack(&unplug_work.work);
1182}
1183
1184/*
1185 * Force to write all bitmap pages to disk, called when stopping the array, or
1186 * every daemon_sleep seconds when sync_thread is running.
1187 */
1188static void __llbitmap_flush(struct mddev *mddev)
1189{
1190 struct llbitmap *llbitmap = mddev->bitmap;
1191 struct blk_plug plug;
1192 int i;
1193
1194 blk_start_plug(&plug);
1195 for (i = 0; i < llbitmap->nr_pages; i++) {
1196 struct llbitmap_page_ctl *pctl = llbitmap->pctl[i];
1197
1198 /* mark all blocks as dirty */
1199 set_bit(LLPageDirty, &pctl->flags);
1200 bitmap_fill(pctl->dirty, llbitmap->blocks_per_page);
1201 llbitmap_write_page(llbitmap, i);
1202 }
1203 blk_finish_plug(&plug);
1204 md_super_wait(llbitmap->mddev);
1205}
1206
1207static void llbitmap_flush(struct mddev *mddev)
1208{
1209 struct llbitmap *llbitmap = mddev->bitmap;
1210 int i;
1211
1212 for (i = 0; i < llbitmap->nr_pages; i++)
1213 set_bit(LLPageFlush, &llbitmap->pctl[i]->flags);
1214
1215 timer_delete_sync(&llbitmap->pending_timer);
1216 queue_work(md_llbitmap_io_wq, &llbitmap->daemon_work);
1217 flush_work(&llbitmap->daemon_work);
1218
1219 __llbitmap_flush(mddev);
1220}
1221
1222/* This is used for raid5 lazy initial recovery */
1223static bool llbitmap_blocks_synced(struct mddev *mddev, sector_t offset)
1224{
1225 struct llbitmap *llbitmap = mddev->bitmap;
1226 unsigned long p = offset >> llbitmap->chunkshift;
1227 enum llbitmap_state c = llbitmap_read(llbitmap, p);
1228
1229 return c == BitClean || c == BitDirty;
1230}
1231
1232static sector_t llbitmap_skip_sync_blocks(struct mddev *mddev, sector_t offset)
1233{
1234 struct llbitmap *llbitmap = mddev->bitmap;
1235 unsigned long p = offset >> llbitmap->chunkshift;
1236 int blocks = llbitmap->chunksize - (offset & (llbitmap->chunksize - 1));
1237 enum llbitmap_state c = llbitmap_read(llbitmap, p);
1238
1239 /* always skip unwritten blocks */
1240 if (c == BitUnwritten)
1241 return blocks;
1242
1243 /* For degraded array, don't skip */
1244 if (mddev->degraded)
1245 return 0;
1246
1247 /* For resync also skip clean/dirty blocks */
1248 if ((c == BitClean || c == BitDirty) &&
1249 test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
1250 !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
1251 return blocks;
1252
1253 return 0;
1254}
1255
1256static bool llbitmap_start_sync(struct mddev *mddev, sector_t offset,
1257 sector_t *blocks, bool degraded)
1258{
1259 struct llbitmap *llbitmap = mddev->bitmap;
1260 unsigned long p = offset >> llbitmap->chunkshift;
1261
1262 /*
1263 * Handle one bit at a time, this is much simpler. And it doesn't matter
1264 * if md_do_sync() loop more times.
1265 */
1266 *blocks = llbitmap->chunksize - (offset & (llbitmap->chunksize - 1));
1267 return llbitmap_state_machine(llbitmap, p, p,
1268 BitmapActionStartsync) == BitSyncing;
1269}
1270
1271/* Something is wrong, sync_thread stop at @offset */
1272static void llbitmap_end_sync(struct mddev *mddev, sector_t offset,
1273 sector_t *blocks)
1274{
1275 struct llbitmap *llbitmap = mddev->bitmap;
1276 unsigned long p = offset >> llbitmap->chunkshift;
1277
1278 *blocks = llbitmap->chunksize - (offset & (llbitmap->chunksize - 1));
1279 llbitmap_state_machine(llbitmap, p, llbitmap->chunks - 1,
1280 BitmapActionAbortsync);
1281}
1282
1283/* A full sync_thread is finished */
1284static void llbitmap_close_sync(struct mddev *mddev)
1285{
1286 struct llbitmap *llbitmap = mddev->bitmap;
1287 int i;
1288
1289 for (i = 0; i < llbitmap->nr_pages; i++) {
1290 struct llbitmap_page_ctl *pctl = llbitmap->pctl[i];
1291
1292 /* let daemon_fn clear dirty bits immediately */
1293 WRITE_ONCE(pctl->expire, jiffies);
1294 }
1295
1296 llbitmap_state_machine(llbitmap, 0, llbitmap->chunks - 1,
1297 BitmapActionEndsync);
1298}
1299
1300/*
1301 * sync_thread have reached @sector, update metadata every daemon_sleep seconds,
1302 * just in case sync_thread have to restart after power failure.
1303 */
1304static void llbitmap_cond_end_sync(struct mddev *mddev, sector_t sector,
1305 bool force)
1306{
1307 struct llbitmap *llbitmap = mddev->bitmap;
1308
1309 if (sector == 0) {
1310 llbitmap->last_end_sync = jiffies;
1311 return;
1312 }
1313
1314 if (time_before(jiffies, llbitmap->last_end_sync +
1315 HZ * mddev->bitmap_info.daemon_sleep))
1316 return;
1317
1318 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
1319
1320 mddev->curr_resync_completed = sector;
1321 set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
1322 llbitmap_state_machine(llbitmap, 0, sector >> llbitmap->chunkshift,
1323 BitmapActionEndsync);
1324 __llbitmap_flush(mddev);
1325
1326 llbitmap->last_end_sync = jiffies;
1327 sysfs_notify_dirent_safe(mddev->sysfs_completed);
1328}
1329
1330static bool llbitmap_enabled(void *data, bool flush)
1331{
1332 struct llbitmap *llbitmap = data;
1333
1334 return llbitmap && !test_bit(BITMAP_WRITE_ERROR, &llbitmap->flags);
1335}
1336
1337static void llbitmap_dirty_bits(struct mddev *mddev, unsigned long s,
1338 unsigned long e)
1339{
1340 llbitmap_state_machine(mddev->bitmap, s, e, BitmapActionStartwrite);
1341}
1342
1343static void llbitmap_write_sb(struct llbitmap *llbitmap)
1344{
1345 int nr_blocks = DIV_ROUND_UP(BITMAP_DATA_OFFSET, llbitmap->io_size);
1346
1347 bitmap_fill(llbitmap->pctl[0]->dirty, nr_blocks);
1348 llbitmap_write_page(llbitmap, 0);
1349 md_super_wait(llbitmap->mddev);
1350}
1351
1352static void llbitmap_update_sb(void *data)
1353{
1354 struct llbitmap *llbitmap = data;
1355 struct mddev *mddev = llbitmap->mddev;
1356 struct page *sb_page;
1357 bitmap_super_t *sb;
1358
1359 if (test_bit(BITMAP_WRITE_ERROR, &llbitmap->flags))
1360 return;
1361
1362 sb_page = llbitmap_read_page(llbitmap, 0);
1363 if (IS_ERR(sb_page)) {
1364 pr_err("%s: %s: read super block failed", __func__,
1365 mdname(mddev));
1366 set_bit(BITMAP_WRITE_ERROR, &llbitmap->flags);
1367 return;
1368 }
1369
1370 if (mddev->events < llbitmap->events_cleared)
1371 llbitmap->events_cleared = mddev->events;
1372
1373 sb = kmap_local_page(sb_page);
1374 sb->events = cpu_to_le64(mddev->events);
1375 sb->state = cpu_to_le32(llbitmap->flags);
1376 sb->chunksize = cpu_to_le32(llbitmap->chunksize);
1377 sb->sync_size = cpu_to_le64(mddev->resync_max_sectors);
1378 sb->events_cleared = cpu_to_le64(llbitmap->events_cleared);
1379 sb->sectors_reserved = cpu_to_le32(mddev->bitmap_info.space);
1380 sb->daemon_sleep = cpu_to_le32(mddev->bitmap_info.daemon_sleep);
1381
1382 kunmap_local(sb);
1383 llbitmap_write_sb(llbitmap);
1384}
1385
1386static int llbitmap_get_stats(void *data, struct md_bitmap_stats *stats)
1387{
1388 struct llbitmap *llbitmap = data;
1389
1390 memset(stats, 0, sizeof(*stats));
1391
1392 stats->missing_pages = 0;
1393 stats->pages = llbitmap->nr_pages;
1394 stats->file_pages = llbitmap->nr_pages;
1395
1396 stats->behind_writes = atomic_read(&llbitmap->behind_writes);
1397 stats->behind_wait = wq_has_sleeper(&llbitmap->behind_wait);
1398 stats->events_cleared = llbitmap->events_cleared;
1399
1400 return 0;
1401}
1402
1403/* just flag all pages as needing to be written */
1404static void llbitmap_write_all(struct mddev *mddev)
1405{
1406 int i;
1407 struct llbitmap *llbitmap = mddev->bitmap;
1408
1409 for (i = 0; i < llbitmap->nr_pages; i++) {
1410 struct llbitmap_page_ctl *pctl = llbitmap->pctl[i];
1411
1412 set_bit(LLPageDirty, &pctl->flags);
1413 bitmap_fill(pctl->dirty, llbitmap->blocks_per_page);
1414 }
1415}
1416
1417static void llbitmap_start_behind_write(struct mddev *mddev)
1418{
1419 struct llbitmap *llbitmap = mddev->bitmap;
1420
1421 atomic_inc(&llbitmap->behind_writes);
1422}
1423
1424static void llbitmap_end_behind_write(struct mddev *mddev)
1425{
1426 struct llbitmap *llbitmap = mddev->bitmap;
1427
1428 if (atomic_dec_and_test(&llbitmap->behind_writes))
1429 wake_up(&llbitmap->behind_wait);
1430}
1431
1432static void llbitmap_wait_behind_writes(struct mddev *mddev)
1433{
1434 struct llbitmap *llbitmap = mddev->bitmap;
1435
1436 if (!llbitmap)
1437 return;
1438
1439 wait_event(llbitmap->behind_wait,
1440 atomic_read(&llbitmap->behind_writes) == 0);
1441
1442}
1443
1444static ssize_t bits_show(struct mddev *mddev, char *page)
1445{
1446 struct llbitmap *llbitmap;
1447 int bits[BitStateCount] = {0};
1448 loff_t start = 0;
1449
1450 mutex_lock(&mddev->bitmap_info.mutex);
1451 llbitmap = mddev->bitmap;
1452 if (!llbitmap || !llbitmap->pctl) {
1453 mutex_unlock(&mddev->bitmap_info.mutex);
1454 return sprintf(page, "no bitmap\n");
1455 }
1456
1457 if (test_bit(BITMAP_WRITE_ERROR, &llbitmap->flags)) {
1458 mutex_unlock(&mddev->bitmap_info.mutex);
1459 return sprintf(page, "bitmap io error\n");
1460 }
1461
1462 while (start < llbitmap->chunks) {
1463 enum llbitmap_state c = llbitmap_read(llbitmap, start);
1464
1465 if (c < 0 || c >= BitStateCount)
1466 pr_err("%s: invalid bit %llu state %d\n",
1467 __func__, start, c);
1468 else
1469 bits[c]++;
1470 start++;
1471 }
1472
1473 mutex_unlock(&mddev->bitmap_info.mutex);
1474 return sprintf(page, "unwritten %d\nclean %d\ndirty %d\nneed sync %d\nsyncing %d\n",
1475 bits[BitUnwritten], bits[BitClean], bits[BitDirty],
1476 bits[BitNeedSync], bits[BitSyncing]);
1477}
1478
1479static struct md_sysfs_entry llbitmap_bits = __ATTR_RO(bits);
1480
1481static ssize_t metadata_show(struct mddev *mddev, char *page)
1482{
1483 struct llbitmap *llbitmap;
1484 ssize_t ret;
1485
1486 mutex_lock(&mddev->bitmap_info.mutex);
1487 llbitmap = mddev->bitmap;
1488 if (!llbitmap) {
1489 mutex_unlock(&mddev->bitmap_info.mutex);
1490 return sprintf(page, "no bitmap\n");
1491 }
1492
1493 ret = sprintf(page, "chunksize %lu\nchunkshift %lu\nchunks %lu\noffset %llu\ndaemon_sleep %lu\n",
1494 llbitmap->chunksize, llbitmap->chunkshift,
1495 llbitmap->chunks, mddev->bitmap_info.offset,
1496 llbitmap->mddev->bitmap_info.daemon_sleep);
1497 mutex_unlock(&mddev->bitmap_info.mutex);
1498
1499 return ret;
1500}
1501
1502static struct md_sysfs_entry llbitmap_metadata = __ATTR_RO(metadata);
1503
1504static ssize_t
1505daemon_sleep_show(struct mddev *mddev, char *page)
1506{
1507 return sprintf(page, "%lu\n", mddev->bitmap_info.daemon_sleep);
1508}
1509
1510static ssize_t
1511daemon_sleep_store(struct mddev *mddev, const char *buf, size_t len)
1512{
1513 unsigned long timeout;
1514 int rv = kstrtoul(buf, 10, &timeout);
1515
1516 if (rv)
1517 return rv;
1518
1519 mddev->bitmap_info.daemon_sleep = timeout;
1520 return len;
1521}
1522
1523static struct md_sysfs_entry llbitmap_daemon_sleep = __ATTR_RW(daemon_sleep);
1524
1525static ssize_t
1526barrier_idle_show(struct mddev *mddev, char *page)
1527{
1528 struct llbitmap *llbitmap = mddev->bitmap;
1529
1530 return sprintf(page, "%lu\n", llbitmap->barrier_idle);
1531}
1532
1533static ssize_t
1534barrier_idle_store(struct mddev *mddev, const char *buf, size_t len)
1535{
1536 struct llbitmap *llbitmap = mddev->bitmap;
1537 unsigned long timeout;
1538 int rv = kstrtoul(buf, 10, &timeout);
1539
1540 if (rv)
1541 return rv;
1542
1543 llbitmap->barrier_idle = timeout;
1544 return len;
1545}
1546
1547static struct md_sysfs_entry llbitmap_barrier_idle = __ATTR_RW(barrier_idle);
1548
1549static struct attribute *md_llbitmap_attrs[] = {
1550 &llbitmap_bits.attr,
1551 &llbitmap_metadata.attr,
1552 &llbitmap_daemon_sleep.attr,
1553 &llbitmap_barrier_idle.attr,
1554 NULL
1555};
1556
1557static struct attribute_group md_llbitmap_group = {
1558 .name = "llbitmap",
1559 .attrs = md_llbitmap_attrs,
1560};
1561
1562static struct bitmap_operations llbitmap_ops = {
1563 .head = {
1564 .type = MD_BITMAP,
1565 .id = ID_LLBITMAP,
1566 .name = "llbitmap",
1567 },
1568
1569 .enabled = llbitmap_enabled,
1570 .create = llbitmap_create,
1571 .resize = llbitmap_resize,
1572 .load = llbitmap_load,
1573 .destroy = llbitmap_destroy,
1574
1575 .start_write = llbitmap_start_write,
1576 .end_write = llbitmap_end_write,
1577 .start_discard = llbitmap_start_discard,
1578 .end_discard = llbitmap_end_discard,
1579 .unplug = llbitmap_unplug,
1580 .flush = llbitmap_flush,
1581
1582 .start_behind_write = llbitmap_start_behind_write,
1583 .end_behind_write = llbitmap_end_behind_write,
1584 .wait_behind_writes = llbitmap_wait_behind_writes,
1585
1586 .blocks_synced = llbitmap_blocks_synced,
1587 .skip_sync_blocks = llbitmap_skip_sync_blocks,
1588 .start_sync = llbitmap_start_sync,
1589 .end_sync = llbitmap_end_sync,
1590 .close_sync = llbitmap_close_sync,
1591 .cond_end_sync = llbitmap_cond_end_sync,
1592
1593 .update_sb = llbitmap_update_sb,
1594 .get_stats = llbitmap_get_stats,
1595 .dirty_bits = llbitmap_dirty_bits,
1596 .write_all = llbitmap_write_all,
1597
1598 .group = &md_llbitmap_group,
1599};
1600
1601int md_llbitmap_init(void)
1602{
1603 md_llbitmap_io_wq = alloc_workqueue("md_llbitmap_io",
1604 WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
1605 if (!md_llbitmap_io_wq)
1606 return -ENOMEM;
1607
1608 md_llbitmap_unplug_wq = alloc_workqueue("md_llbitmap_unplug",
1609 WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
1610 if (!md_llbitmap_unplug_wq) {
1611 destroy_workqueue(md_llbitmap_io_wq);
1612 md_llbitmap_io_wq = NULL;
1613 return -ENOMEM;
1614 }
1615
1616 return register_md_submodule(&llbitmap_ops.head);
1617}
1618
1619void md_llbitmap_exit(void)
1620{
1621 destroy_workqueue(md_llbitmap_io_wq);
1622 md_llbitmap_io_wq = NULL;
1623 destroy_workqueue(md_llbitmap_unplug_wq);
1624 md_llbitmap_unplug_wq = NULL;
1625 unregister_md_submodule(&llbitmap_ops.head);
1626}