at v5.4-rc2 14 kB view raw
1/* SPDX-License-Identifier: GPL-2.0 */ 2/* 3 * include/linux/backing-dev.h 4 * 5 * low-level device information and state which is propagated up through 6 * to high-level code. 7 */ 8 9#ifndef _LINUX_BACKING_DEV_H 10#define _LINUX_BACKING_DEV_H 11 12#include <linux/kernel.h> 13#include <linux/fs.h> 14#include <linux/sched.h> 15#include <linux/blkdev.h> 16#include <linux/writeback.h> 17#include <linux/blk-cgroup.h> 18#include <linux/backing-dev-defs.h> 19#include <linux/slab.h> 20 21static inline struct backing_dev_info *bdi_get(struct backing_dev_info *bdi) 22{ 23 kref_get(&bdi->refcnt); 24 return bdi; 25} 26 27struct backing_dev_info *bdi_get_by_id(u64 id); 28void bdi_put(struct backing_dev_info *bdi); 29 30__printf(2, 3) 31int bdi_register(struct backing_dev_info *bdi, const char *fmt, ...); 32__printf(2, 0) 33int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, 34 va_list args); 35int bdi_register_owner(struct backing_dev_info *bdi, struct device *owner); 36void bdi_unregister(struct backing_dev_info *bdi); 37 38struct backing_dev_info *bdi_alloc_node(gfp_t gfp_mask, int node_id); 39static inline struct backing_dev_info *bdi_alloc(gfp_t gfp_mask) 40{ 41 return bdi_alloc_node(gfp_mask, NUMA_NO_NODE); 42} 43 44void wb_start_background_writeback(struct bdi_writeback *wb); 45void wb_workfn(struct work_struct *work); 46void wb_wakeup_delayed(struct bdi_writeback *wb); 47 48void wb_wait_for_completion(struct wb_completion *done); 49 50extern spinlock_t bdi_lock; 51extern struct list_head bdi_list; 52 53extern struct workqueue_struct *bdi_wq; 54extern struct workqueue_struct *bdi_async_bio_wq; 55 56static inline bool wb_has_dirty_io(struct bdi_writeback *wb) 57{ 58 return test_bit(WB_has_dirty_io, &wb->state); 59} 60 61static inline bool bdi_has_dirty_io(struct backing_dev_info *bdi) 62{ 63 /* 64 * @bdi->tot_write_bandwidth is guaranteed to be > 0 if there are 65 * any dirty wbs. See wb_update_write_bandwidth(). 66 */ 67 return atomic_long_read(&bdi->tot_write_bandwidth); 68} 69 70static inline void __add_wb_stat(struct bdi_writeback *wb, 71 enum wb_stat_item item, s64 amount) 72{ 73 percpu_counter_add_batch(&wb->stat[item], amount, WB_STAT_BATCH); 74} 75 76static inline void inc_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) 77{ 78 __add_wb_stat(wb, item, 1); 79} 80 81static inline void dec_wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) 82{ 83 __add_wb_stat(wb, item, -1); 84} 85 86static inline s64 wb_stat(struct bdi_writeback *wb, enum wb_stat_item item) 87{ 88 return percpu_counter_read_positive(&wb->stat[item]); 89} 90 91static inline s64 wb_stat_sum(struct bdi_writeback *wb, enum wb_stat_item item) 92{ 93 return percpu_counter_sum_positive(&wb->stat[item]); 94} 95 96extern void wb_writeout_inc(struct bdi_writeback *wb); 97 98/* 99 * maximal error of a stat counter. 100 */ 101static inline unsigned long wb_stat_error(void) 102{ 103#ifdef CONFIG_SMP 104 return nr_cpu_ids * WB_STAT_BATCH; 105#else 106 return 1; 107#endif 108} 109 110int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio); 111int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); 112 113/* 114 * Flags in backing_dev_info::capability 115 * 116 * The first three flags control whether dirty pages will contribute to the 117 * VM's accounting and whether writepages() should be called for dirty pages 118 * (something that would not, for example, be appropriate for ramfs) 119 * 120 * WARNING: these flags are closely related and should not normally be 121 * used separately. The BDI_CAP_NO_ACCT_AND_WRITEBACK combines these 122 * three flags into a single convenience macro. 123 * 124 * BDI_CAP_NO_ACCT_DIRTY: Dirty pages shouldn't contribute to accounting 125 * BDI_CAP_NO_WRITEBACK: Don't write pages back 126 * BDI_CAP_NO_ACCT_WB: Don't automatically account writeback pages 127 * BDI_CAP_STRICTLIMIT: Keep number of dirty pages below bdi threshold. 128 * 129 * BDI_CAP_CGROUP_WRITEBACK: Supports cgroup-aware writeback. 130 * BDI_CAP_SYNCHRONOUS_IO: Device is so fast that asynchronous IO would be 131 * inefficient. 132 */ 133#define BDI_CAP_NO_ACCT_DIRTY 0x00000001 134#define BDI_CAP_NO_WRITEBACK 0x00000002 135#define BDI_CAP_NO_ACCT_WB 0x00000004 136#define BDI_CAP_STABLE_WRITES 0x00000008 137#define BDI_CAP_STRICTLIMIT 0x00000010 138#define BDI_CAP_CGROUP_WRITEBACK 0x00000020 139#define BDI_CAP_SYNCHRONOUS_IO 0x00000040 140 141#define BDI_CAP_NO_ACCT_AND_WRITEBACK \ 142 (BDI_CAP_NO_WRITEBACK | BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_ACCT_WB) 143 144extern struct backing_dev_info noop_backing_dev_info; 145 146/** 147 * writeback_in_progress - determine whether there is writeback in progress 148 * @wb: bdi_writeback of interest 149 * 150 * Determine whether there is writeback waiting to be handled against a 151 * bdi_writeback. 152 */ 153static inline bool writeback_in_progress(struct bdi_writeback *wb) 154{ 155 return test_bit(WB_writeback_running, &wb->state); 156} 157 158static inline struct backing_dev_info *inode_to_bdi(struct inode *inode) 159{ 160 struct super_block *sb; 161 162 if (!inode) 163 return &noop_backing_dev_info; 164 165 sb = inode->i_sb; 166#ifdef CONFIG_BLOCK 167 if (sb_is_blkdev_sb(sb)) 168 return I_BDEV(inode)->bd_bdi; 169#endif 170 return sb->s_bdi; 171} 172 173static inline int wb_congested(struct bdi_writeback *wb, int cong_bits) 174{ 175 struct backing_dev_info *bdi = wb->bdi; 176 177 if (bdi->congested_fn) 178 return bdi->congested_fn(bdi->congested_data, cong_bits); 179 return wb->congested->state & cong_bits; 180} 181 182long congestion_wait(int sync, long timeout); 183long wait_iff_congested(int sync, long timeout); 184 185static inline bool bdi_cap_synchronous_io(struct backing_dev_info *bdi) 186{ 187 return bdi->capabilities & BDI_CAP_SYNCHRONOUS_IO; 188} 189 190static inline bool bdi_cap_stable_pages_required(struct backing_dev_info *bdi) 191{ 192 return bdi->capabilities & BDI_CAP_STABLE_WRITES; 193} 194 195static inline bool bdi_cap_writeback_dirty(struct backing_dev_info *bdi) 196{ 197 return !(bdi->capabilities & BDI_CAP_NO_WRITEBACK); 198} 199 200static inline bool bdi_cap_account_dirty(struct backing_dev_info *bdi) 201{ 202 return !(bdi->capabilities & BDI_CAP_NO_ACCT_DIRTY); 203} 204 205static inline bool bdi_cap_account_writeback(struct backing_dev_info *bdi) 206{ 207 /* Paranoia: BDI_CAP_NO_WRITEBACK implies BDI_CAP_NO_ACCT_WB */ 208 return !(bdi->capabilities & (BDI_CAP_NO_ACCT_WB | 209 BDI_CAP_NO_WRITEBACK)); 210} 211 212static inline bool mapping_cap_writeback_dirty(struct address_space *mapping) 213{ 214 return bdi_cap_writeback_dirty(inode_to_bdi(mapping->host)); 215} 216 217static inline bool mapping_cap_account_dirty(struct address_space *mapping) 218{ 219 return bdi_cap_account_dirty(inode_to_bdi(mapping->host)); 220} 221 222static inline int bdi_sched_wait(void *word) 223{ 224 schedule(); 225 return 0; 226} 227 228#ifdef CONFIG_CGROUP_WRITEBACK 229 230struct bdi_writeback_congested * 231wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp); 232void wb_congested_put(struct bdi_writeback_congested *congested); 233struct bdi_writeback *wb_get_lookup(struct backing_dev_info *bdi, 234 struct cgroup_subsys_state *memcg_css); 235struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi, 236 struct cgroup_subsys_state *memcg_css, 237 gfp_t gfp); 238void wb_memcg_offline(struct mem_cgroup *memcg); 239void wb_blkcg_offline(struct blkcg *blkcg); 240int inode_congested(struct inode *inode, int cong_bits); 241 242/** 243 * inode_cgwb_enabled - test whether cgroup writeback is enabled on an inode 244 * @inode: inode of interest 245 * 246 * cgroup writeback requires support from both the bdi and filesystem. 247 * Also, both memcg and iocg have to be on the default hierarchy. Test 248 * whether all conditions are met. 249 * 250 * Note that the test result may change dynamically on the same inode 251 * depending on how memcg and iocg are configured. 252 */ 253static inline bool inode_cgwb_enabled(struct inode *inode) 254{ 255 struct backing_dev_info *bdi = inode_to_bdi(inode); 256 257 return cgroup_subsys_on_dfl(memory_cgrp_subsys) && 258 cgroup_subsys_on_dfl(io_cgrp_subsys) && 259 bdi_cap_account_dirty(bdi) && 260 (bdi->capabilities & BDI_CAP_CGROUP_WRITEBACK) && 261 (inode->i_sb->s_iflags & SB_I_CGROUPWB); 262} 263 264/** 265 * wb_find_current - find wb for %current on a bdi 266 * @bdi: bdi of interest 267 * 268 * Find the wb of @bdi which matches both the memcg and blkcg of %current. 269 * Must be called under rcu_read_lock() which protects the returend wb. 270 * NULL if not found. 271 */ 272static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi) 273{ 274 struct cgroup_subsys_state *memcg_css; 275 struct bdi_writeback *wb; 276 277 memcg_css = task_css(current, memory_cgrp_id); 278 if (!memcg_css->parent) 279 return &bdi->wb; 280 281 wb = radix_tree_lookup(&bdi->cgwb_tree, memcg_css->id); 282 283 /* 284 * %current's blkcg equals the effective blkcg of its memcg. No 285 * need to use the relatively expensive cgroup_get_e_css(). 286 */ 287 if (likely(wb && wb->blkcg_css == task_css(current, io_cgrp_id))) 288 return wb; 289 return NULL; 290} 291 292/** 293 * wb_get_create_current - get or create wb for %current on a bdi 294 * @bdi: bdi of interest 295 * @gfp: allocation mask 296 * 297 * Equivalent to wb_get_create() on %current's memcg. This function is 298 * called from a relatively hot path and optimizes the common cases using 299 * wb_find_current(). 300 */ 301static inline struct bdi_writeback * 302wb_get_create_current(struct backing_dev_info *bdi, gfp_t gfp) 303{ 304 struct bdi_writeback *wb; 305 306 rcu_read_lock(); 307 wb = wb_find_current(bdi); 308 if (wb && unlikely(!wb_tryget(wb))) 309 wb = NULL; 310 rcu_read_unlock(); 311 312 if (unlikely(!wb)) { 313 struct cgroup_subsys_state *memcg_css; 314 315 memcg_css = task_get_css(current, memory_cgrp_id); 316 wb = wb_get_create(bdi, memcg_css, gfp); 317 css_put(memcg_css); 318 } 319 return wb; 320} 321 322/** 323 * inode_to_wb_is_valid - test whether an inode has a wb associated 324 * @inode: inode of interest 325 * 326 * Returns %true if @inode has a wb associated. May be called without any 327 * locking. 328 */ 329static inline bool inode_to_wb_is_valid(struct inode *inode) 330{ 331 return inode->i_wb; 332} 333 334/** 335 * inode_to_wb - determine the wb of an inode 336 * @inode: inode of interest 337 * 338 * Returns the wb @inode is currently associated with. The caller must be 339 * holding either @inode->i_lock, the i_pages lock, or the 340 * associated wb's list_lock. 341 */ 342static inline struct bdi_writeback *inode_to_wb(const struct inode *inode) 343{ 344#ifdef CONFIG_LOCKDEP 345 WARN_ON_ONCE(debug_locks && 346 (!lockdep_is_held(&inode->i_lock) && 347 !lockdep_is_held(&inode->i_mapping->i_pages.xa_lock) && 348 !lockdep_is_held(&inode->i_wb->list_lock))); 349#endif 350 return inode->i_wb; 351} 352 353/** 354 * unlocked_inode_to_wb_begin - begin unlocked inode wb access transaction 355 * @inode: target inode 356 * @cookie: output param, to be passed to the end function 357 * 358 * The caller wants to access the wb associated with @inode but isn't 359 * holding inode->i_lock, the i_pages lock or wb->list_lock. This 360 * function determines the wb associated with @inode and ensures that the 361 * association doesn't change until the transaction is finished with 362 * unlocked_inode_to_wb_end(). 363 * 364 * The caller must call unlocked_inode_to_wb_end() with *@cookie afterwards and 365 * can't sleep during the transaction. IRQs may or may not be disabled on 366 * return. 367 */ 368static inline struct bdi_writeback * 369unlocked_inode_to_wb_begin(struct inode *inode, struct wb_lock_cookie *cookie) 370{ 371 rcu_read_lock(); 372 373 /* 374 * Paired with store_release in inode_switch_wbs_work_fn() and 375 * ensures that we see the new wb if we see cleared I_WB_SWITCH. 376 */ 377 cookie->locked = smp_load_acquire(&inode->i_state) & I_WB_SWITCH; 378 379 if (unlikely(cookie->locked)) 380 xa_lock_irqsave(&inode->i_mapping->i_pages, cookie->flags); 381 382 /* 383 * Protected by either !I_WB_SWITCH + rcu_read_lock() or the i_pages 384 * lock. inode_to_wb() will bark. Deref directly. 385 */ 386 return inode->i_wb; 387} 388 389/** 390 * unlocked_inode_to_wb_end - end inode wb access transaction 391 * @inode: target inode 392 * @cookie: @cookie from unlocked_inode_to_wb_begin() 393 */ 394static inline void unlocked_inode_to_wb_end(struct inode *inode, 395 struct wb_lock_cookie *cookie) 396{ 397 if (unlikely(cookie->locked)) 398 xa_unlock_irqrestore(&inode->i_mapping->i_pages, cookie->flags); 399 400 rcu_read_unlock(); 401} 402 403#else /* CONFIG_CGROUP_WRITEBACK */ 404 405static inline bool inode_cgwb_enabled(struct inode *inode) 406{ 407 return false; 408} 409 410static inline struct bdi_writeback_congested * 411wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp) 412{ 413 refcount_inc(&bdi->wb_congested->refcnt); 414 return bdi->wb_congested; 415} 416 417static inline void wb_congested_put(struct bdi_writeback_congested *congested) 418{ 419 if (refcount_dec_and_test(&congested->refcnt)) 420 kfree(congested); 421} 422 423static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi) 424{ 425 return &bdi->wb; 426} 427 428static inline struct bdi_writeback * 429wb_get_create_current(struct backing_dev_info *bdi, gfp_t gfp) 430{ 431 return &bdi->wb; 432} 433 434static inline bool inode_to_wb_is_valid(struct inode *inode) 435{ 436 return true; 437} 438 439static inline struct bdi_writeback *inode_to_wb(struct inode *inode) 440{ 441 return &inode_to_bdi(inode)->wb; 442} 443 444static inline struct bdi_writeback * 445unlocked_inode_to_wb_begin(struct inode *inode, struct wb_lock_cookie *cookie) 446{ 447 return inode_to_wb(inode); 448} 449 450static inline void unlocked_inode_to_wb_end(struct inode *inode, 451 struct wb_lock_cookie *cookie) 452{ 453} 454 455static inline void wb_memcg_offline(struct mem_cgroup *memcg) 456{ 457} 458 459static inline void wb_blkcg_offline(struct blkcg *blkcg) 460{ 461} 462 463static inline int inode_congested(struct inode *inode, int cong_bits) 464{ 465 return wb_congested(&inode_to_bdi(inode)->wb, cong_bits); 466} 467 468#endif /* CONFIG_CGROUP_WRITEBACK */ 469 470static inline int inode_read_congested(struct inode *inode) 471{ 472 return inode_congested(inode, 1 << WB_sync_congested); 473} 474 475static inline int inode_write_congested(struct inode *inode) 476{ 477 return inode_congested(inode, 1 << WB_async_congested); 478} 479 480static inline int inode_rw_congested(struct inode *inode) 481{ 482 return inode_congested(inode, (1 << WB_sync_congested) | 483 (1 << WB_async_congested)); 484} 485 486static inline int bdi_congested(struct backing_dev_info *bdi, int cong_bits) 487{ 488 return wb_congested(&bdi->wb, cong_bits); 489} 490 491static inline int bdi_read_congested(struct backing_dev_info *bdi) 492{ 493 return bdi_congested(bdi, 1 << WB_sync_congested); 494} 495 496static inline int bdi_write_congested(struct backing_dev_info *bdi) 497{ 498 return bdi_congested(bdi, 1 << WB_async_congested); 499} 500 501static inline int bdi_rw_congested(struct backing_dev_info *bdi) 502{ 503 return bdi_congested(bdi, (1 << WB_sync_congested) | 504 (1 << WB_async_congested)); 505} 506 507#endif /* _LINUX_BACKING_DEV_H */