Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

writeback: bdi_for_each_wb() iteration is memcg ID based not blkcg

wb's (bdi_writeback's) are currently keyed by memcg ID; however, in an
earlier implementation, wb's were keyed by blkcg ID.
bdi_for_each_wb() walks bdi->cgwb_tree in the ascending ID order and
allows iterations to start from an arbitrary ID which is used to
interrupt and resume iterations.

Unfortunately, while changing wb to be keyed by memcg ID instead of
blkcg, bdi_for_each_wb() was missed and is still assuming that wb's
are keyed by blkcg ID. This doesn't affect iterations which don't get
interrupted but bdi_split_work_to_wbs() makes use of iteration
resuming on allocation failures and thus may incorrectly skip or
repeat wb's.

Fix it by changing bdi_for_each_wb() to take memcg IDs instead of
blkcg IDs and updating bdi_split_work_to_wbs() accordingly.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@fb.com>

authored by

Tejun Heo and committed by
Jens Axboe
1ed8d48c 11743ee0

+15 -15
+3 -3
fs/fs-writeback.c
··· 839 839 bool skip_if_busy) 840 840 { 841 841 long nr_pages = base_work->nr_pages; 842 - int next_blkcg_id = 0; 842 + int next_memcg_id = 0; 843 843 struct bdi_writeback *wb; 844 844 struct wb_iter iter; 845 845 ··· 849 849 return; 850 850 restart: 851 851 rcu_read_lock(); 852 - bdi_for_each_wb(wb, bdi, &iter, next_blkcg_id) { 852 + bdi_for_each_wb(wb, bdi, &iter, next_memcg_id) { 853 853 if (!wb_has_dirty_io(wb) || 854 854 (skip_if_busy && writeback_in_progress(wb))) 855 855 continue; 856 856 857 857 base_work->nr_pages = wb_split_bdi_pages(wb, nr_pages); 858 858 if (!wb_clone_and_queue_work(wb, base_work)) { 859 - next_blkcg_id = wb->blkcg_css->id + 1; 859 + next_memcg_id = wb->memcg_css->id + 1; 860 860 rcu_read_unlock(); 861 861 wb_wait_for_single_work(bdi, base_work); 862 862 goto restart;
+12 -12
include/linux/backing-dev.h
··· 402 402 } 403 403 404 404 struct wb_iter { 405 - int start_blkcg_id; 405 + int start_memcg_id; 406 406 struct radix_tree_iter tree_iter; 407 407 void **slot; 408 408 }; ··· 414 414 415 415 WARN_ON_ONCE(!rcu_read_lock_held()); 416 416 417 - if (iter->start_blkcg_id >= 0) { 418 - iter->slot = radix_tree_iter_init(titer, iter->start_blkcg_id); 419 - iter->start_blkcg_id = -1; 417 + if (iter->start_memcg_id >= 0) { 418 + iter->slot = radix_tree_iter_init(titer, iter->start_memcg_id); 419 + iter->start_memcg_id = -1; 420 420 } else { 421 421 iter->slot = radix_tree_next_slot(iter->slot, titer, 0); 422 422 } ··· 430 430 431 431 static inline struct bdi_writeback *__wb_iter_init(struct wb_iter *iter, 432 432 struct backing_dev_info *bdi, 433 - int start_blkcg_id) 433 + int start_memcg_id) 434 434 { 435 - iter->start_blkcg_id = start_blkcg_id; 435 + iter->start_memcg_id = start_memcg_id; 436 436 437 - if (start_blkcg_id) 437 + if (start_memcg_id) 438 438 return __wb_iter_next(iter, bdi); 439 439 else 440 440 return &bdi->wb; 441 441 } 442 442 443 443 /** 444 - * bdi_for_each_wb - walk all wb's of a bdi in ascending blkcg ID order 444 + * bdi_for_each_wb - walk all wb's of a bdi in ascending memcg ID order 445 445 * @wb_cur: cursor struct bdi_writeback pointer 446 446 * @bdi: bdi to walk wb's of 447 447 * @iter: pointer to struct wb_iter to be used as iteration buffer 448 - * @start_blkcg_id: blkcg ID to start iteration from 448 + * @start_memcg_id: memcg ID to start iteration from 449 449 * 450 450 * Iterate @wb_cur through the wb's (bdi_writeback's) of @bdi in ascending 451 - * blkcg ID order starting from @start_blkcg_id. @iter is struct wb_iter 451 + * memcg ID order starting from @start_memcg_id. @iter is struct wb_iter 452 452 * to be used as temp storage during iteration. rcu_read_lock() must be 453 453 * held throughout iteration. 454 454 */ 455 - #define bdi_for_each_wb(wb_cur, bdi, iter, start_blkcg_id) \ 456 - for ((wb_cur) = __wb_iter_init(iter, bdi, start_blkcg_id); \ 455 + #define bdi_for_each_wb(wb_cur, bdi, iter, start_memcg_id) \ 456 + for ((wb_cur) = __wb_iter_init(iter, bdi, start_memcg_id); \ 457 457 (wb_cur); (wb_cur) = __wb_iter_next(iter, bdi)) 458 458 459 459 #else /* CONFIG_CGROUP_WRITEBACK */