Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

writeback: support retrieving per group debug writeback stats of bdi

Add /sys/kernel/debug/bdi/xxx/wb_stats to show per group writeback stats
of bdi.

Following domain hierarchy is tested:
global domain (320G)
/ \
cgroup domain1(10G) cgroup domain2(10G)
| |
bdi wb1 wb2

/* per wb writeback info of bdi is collected */
cat wb_stats
WbCgIno: 1
WbWriteback: 0 kB
WbReclaimable: 0 kB
WbDirtyThresh: 0 kB
WbDirtied: 0 kB
WbWritten: 0 kB
WbWriteBandwidth: 102400 kBps
b_dirty: 0
b_io: 0
b_more_io: 0
b_dirty_time: 0
state: 1

WbCgIno: 4091
WbWriteback: 1792 kB
WbReclaimable: 820512 kB
WbDirtyThresh: 6004692 kB
WbDirtied: 1820448 kB
WbWritten: 999488 kB
WbWriteBandwidth: 169020 kBps
b_dirty: 0
b_io: 0
b_more_io: 1
b_dirty_time: 0
state: 5

WbCgIno: 4131
WbWriteback: 1120 kB
WbReclaimable: 820064 kB
WbDirtyThresh: 6004728 kB
WbDirtied: 1822688 kB
WbWritten: 1002400 kB
WbWriteBandwidth: 153520 kBps
b_dirty: 0
b_io: 0
b_more_io: 1
b_dirty_time: 0
state: 5

[shikemeng@huaweicloud.com: fix build problems]
Link: https://lkml.kernel.org/r/20240423034643.141219-4-shikemeng@huaweicloud.com
Link: https://lkml.kernel.org/r/20240423034643.141219-3-shikemeng@huaweicloud.com
Signed-off-by: Kemeng Shi <shikemeng@huaweicloud.com>
Cc: Brian Foster <bfoster@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Cc: David Sterba <dsterba@suse.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Mateusz Guzik <mjguzik@gmail.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: SeongJae Park <sj@kernel.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Kemeng Shi and committed by
Andrew Morton
4b5bbc39 e32e2700

+99 -2
+1
include/linux/writeback.h
··· 355 355 356 356 void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty); 357 357 unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh); 358 + unsigned long cgwb_calc_thresh(struct bdi_writeback *wb); 358 359 359 360 void wb_update_bandwidth(struct bdi_writeback *wb); 360 361
+79 -2
mm/backing-dev.c
··· 155 155 } 156 156 DEFINE_SHOW_ATTRIBUTE(bdi_debug_stats); 157 157 158 + static void wb_stats_show(struct seq_file *m, struct bdi_writeback *wb, 159 + struct wb_stats *stats) 160 + { 161 + 162 + seq_printf(m, 163 + "WbCgIno: %10lu\n" 164 + "WbWriteback: %10lu kB\n" 165 + "WbReclaimable: %10lu kB\n" 166 + "WbDirtyThresh: %10lu kB\n" 167 + "WbDirtied: %10lu kB\n" 168 + "WbWritten: %10lu kB\n" 169 + "WbWriteBandwidth: %10lu kBps\n" 170 + "b_dirty: %10lu\n" 171 + "b_io: %10lu\n" 172 + "b_more_io: %10lu\n" 173 + "b_dirty_time: %10lu\n" 174 + "state: %10lx\n\n", 175 + #ifdef CONFIG_CGROUP_WRITEBACK 176 + cgroup_ino(wb->memcg_css->cgroup), 177 + #else 178 + 1ul, 179 + #endif 180 + K(stats->nr_writeback), 181 + K(stats->nr_reclaimable), 182 + K(stats->wb_thresh), 183 + K(stats->nr_dirtied), 184 + K(stats->nr_written), 185 + K(wb->avg_write_bandwidth), 186 + stats->nr_dirty, 187 + stats->nr_io, 188 + stats->nr_more_io, 189 + stats->nr_dirty_time, 190 + wb->state); 191 + } 192 + 193 + static int cgwb_debug_stats_show(struct seq_file *m, void *v) 194 + { 195 + struct backing_dev_info *bdi = m->private; 196 + unsigned long background_thresh; 197 + unsigned long dirty_thresh; 198 + struct bdi_writeback *wb; 199 + 200 + global_dirty_limits(&background_thresh, &dirty_thresh); 201 + 202 + rcu_read_lock(); 203 + list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node) { 204 + struct wb_stats stats = { .dirty_thresh = dirty_thresh }; 205 + 206 + if (!wb_tryget(wb)) 207 + continue; 208 + 209 + collect_wb_stats(&stats, wb); 210 + 211 + /* 212 + * Calculate thresh of wb in writeback cgroup which is min of 213 + * thresh in global domain and thresh in cgroup domain. Drop 214 + * rcu lock because cgwb_calc_thresh may sleep in 215 + * cgroup_rstat_flush. We can do so here because we have a ref. 216 + */ 217 + if (mem_cgroup_wb_domain(wb)) { 218 + rcu_read_unlock(); 219 + stats.wb_thresh = min(stats.wb_thresh, cgwb_calc_thresh(wb)); 220 + rcu_read_lock(); 221 + } 222 + 223 + wb_stats_show(m, wb, &stats); 224 + 225 + wb_put(wb); 226 + } 227 + rcu_read_unlock(); 228 + 229 + return 0; 230 + } 231 + DEFINE_SHOW_ATTRIBUTE(cgwb_debug_stats); 232 + 158 233 static void bdi_debug_register(struct backing_dev_info *bdi, const char *name) 159 234 { 160 235 bdi->debug_dir = debugfs_create_dir(name, bdi_debug_root); 161 236 162 237 debugfs_create_file("stats", 0444, bdi->debug_dir, bdi, 163 238 &bdi_debug_stats_fops); 239 + debugfs_create_file("wb_stats", 0444, bdi->debug_dir, bdi, 240 + &cgwb_debug_stats_fops); 164 241 } 165 242 166 243 static void bdi_debug_unregister(struct backing_dev_info *bdi) 167 244 { 168 245 debugfs_remove_recursive(bdi->debug_dir); 169 246 } 170 - #else 247 + #else /* CONFIG_DEBUG_FS */ 171 248 static inline void bdi_debug_init(void) 172 249 { 173 250 } ··· 255 178 static inline void bdi_debug_unregister(struct backing_dev_info *bdi) 256 179 { 257 180 } 258 - #endif 181 + #endif /* CONFIG_DEBUG_FS */ 259 182 260 183 static ssize_t read_ahead_kb_store(struct device *dev, 261 184 struct device_attribute *attr,
+19
mm/page-writeback.c
··· 892 892 return __wb_calc_thresh(&gdtc); 893 893 } 894 894 895 + unsigned long cgwb_calc_thresh(struct bdi_writeback *wb) 896 + { 897 + struct dirty_throttle_control gdtc = { GDTC_INIT_NO_WB }; 898 + struct dirty_throttle_control mdtc = { MDTC_INIT(wb, &gdtc) }; 899 + unsigned long filepages = 0, headroom = 0, writeback = 0; 900 + 901 + gdtc.avail = global_dirtyable_memory(); 902 + gdtc.dirty = global_node_page_state(NR_FILE_DIRTY) + 903 + global_node_page_state(NR_WRITEBACK); 904 + 905 + mem_cgroup_wb_stats(wb, &filepages, &headroom, 906 + &mdtc.dirty, &writeback); 907 + mdtc.dirty += writeback; 908 + mdtc_calc_avail(&mdtc, filepages, headroom); 909 + domain_dirty_limits(&mdtc); 910 + 911 + return __wb_calc_thresh(&mdtc); 912 + } 913 + 895 914 /* 896 915 * setpoint - dirty 3 897 916 * f(dirty) := 1.0 + (----------------)