at v4.14-rc8 13 kB view raw
1/* SPDX-License-Identifier: GPL-2.0 */ 2/* 3 * include/linux/writeback.h 4 */ 5#ifndef WRITEBACK_H 6#define WRITEBACK_H 7 8#include <linux/sched.h> 9#include <linux/workqueue.h> 10#include <linux/fs.h> 11#include <linux/flex_proportions.h> 12#include <linux/backing-dev-defs.h> 13#include <linux/blk_types.h> 14 15struct bio; 16 17DECLARE_PER_CPU(int, dirty_throttle_leaks); 18 19/* 20 * The 1/4 region under the global dirty thresh is for smooth dirty throttling: 21 * 22 * (thresh - thresh/DIRTY_FULL_SCOPE, thresh) 23 * 24 * Further beyond, all dirtier tasks will enter a loop waiting (possibly long 25 * time) for the dirty pages to drop, unless written enough pages. 26 * 27 * The global dirty threshold is normally equal to the global dirty limit, 28 * except when the system suddenly allocates a lot of anonymous memory and 29 * knocks down the global dirty threshold quickly, in which case the global 30 * dirty limit will follow down slowly to prevent livelocking all dirtier tasks. 31 */ 32#define DIRTY_SCOPE 8 33#define DIRTY_FULL_SCOPE (DIRTY_SCOPE / 2) 34 35struct backing_dev_info; 36 37/* 38 * fs/fs-writeback.c 39 */ 40enum writeback_sync_modes { 41 WB_SYNC_NONE, /* Don't wait on anything */ 42 WB_SYNC_ALL, /* Wait on every mapping */ 43}; 44 45/* 46 * why some writeback work was initiated 47 */ 48enum wb_reason { 49 WB_REASON_BACKGROUND, 50 WB_REASON_VMSCAN, 51 WB_REASON_SYNC, 52 WB_REASON_PERIODIC, 53 WB_REASON_LAPTOP_TIMER, 54 WB_REASON_FREE_MORE_MEM, 55 WB_REASON_FS_FREE_SPACE, 56 /* 57 * There is no bdi forker thread any more and works are done 58 * by emergency worker, however, this is TPs userland visible 59 * and we'll be exposing exactly the same information, 60 * so it has a mismatch name. 61 */ 62 WB_REASON_FORKER_THREAD, 63 64 WB_REASON_MAX, 65}; 66 67/* 68 * A control structure which tells the writeback code what to do. These are 69 * always on the stack, and hence need no locking. They are always initialised 70 * in a manner such that unspecified fields are set to zero. 71 */ 72struct writeback_control { 73 long nr_to_write; /* Write this many pages, and decrement 74 this for each page written */ 75 long pages_skipped; /* Pages which were not written */ 76 77 /* 78 * For a_ops->writepages(): if start or end are non-zero then this is 79 * a hint that the filesystem need only write out the pages inside that 80 * byterange. The byte at `end' is included in the writeout request. 81 */ 82 loff_t range_start; 83 loff_t range_end; 84 85 enum writeback_sync_modes sync_mode; 86 87 unsigned for_kupdate:1; /* A kupdate writeback */ 88 unsigned for_background:1; /* A background writeback */ 89 unsigned tagged_writepages:1; /* tag-and-write to avoid livelock */ 90 unsigned for_reclaim:1; /* Invoked from the page allocator */ 91 unsigned range_cyclic:1; /* range_start is cyclic */ 92 unsigned for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ 93#ifdef CONFIG_CGROUP_WRITEBACK 94 struct bdi_writeback *wb; /* wb this writeback is issued under */ 95 struct inode *inode; /* inode being written out */ 96 97 /* foreign inode detection, see wbc_detach_inode() */ 98 int wb_id; /* current wb id */ 99 int wb_lcand_id; /* last foreign candidate wb id */ 100 int wb_tcand_id; /* this foreign candidate wb id */ 101 size_t wb_bytes; /* bytes written by current wb */ 102 size_t wb_lcand_bytes; /* bytes written by last candidate */ 103 size_t wb_tcand_bytes; /* bytes written by this candidate */ 104#endif 105}; 106 107static inline int wbc_to_write_flags(struct writeback_control *wbc) 108{ 109 if (wbc->sync_mode == WB_SYNC_ALL) 110 return REQ_SYNC; 111 else if (wbc->for_kupdate || wbc->for_background) 112 return REQ_BACKGROUND; 113 114 return 0; 115} 116 117/* 118 * A wb_domain represents a domain that wb's (bdi_writeback's) belong to 119 * and are measured against each other in. There always is one global 120 * domain, global_wb_domain, that every wb in the system is a member of. 121 * This allows measuring the relative bandwidth of each wb to distribute 122 * dirtyable memory accordingly. 123 */ 124struct wb_domain { 125 spinlock_t lock; 126 127 /* 128 * Scale the writeback cache size proportional to the relative 129 * writeout speed. 130 * 131 * We do this by keeping a floating proportion between BDIs, based 132 * on page writeback completions [end_page_writeback()]. Those 133 * devices that write out pages fastest will get the larger share, 134 * while the slower will get a smaller share. 135 * 136 * We use page writeout completions because we are interested in 137 * getting rid of dirty pages. Having them written out is the 138 * primary goal. 139 * 140 * We introduce a concept of time, a period over which we measure 141 * these events, because demand can/will vary over time. The length 142 * of this period itself is measured in page writeback completions. 143 */ 144 struct fprop_global completions; 145 struct timer_list period_timer; /* timer for aging of completions */ 146 unsigned long period_time; 147 148 /* 149 * The dirtyable memory and dirty threshold could be suddenly 150 * knocked down by a large amount (eg. on the startup of KVM in a 151 * swapless system). This may throw the system into deep dirty 152 * exceeded state and throttle heavy/light dirtiers alike. To 153 * retain good responsiveness, maintain global_dirty_limit for 154 * tracking slowly down to the knocked down dirty threshold. 155 * 156 * Both fields are protected by ->lock. 157 */ 158 unsigned long dirty_limit_tstamp; 159 unsigned long dirty_limit; 160}; 161 162/** 163 * wb_domain_size_changed - memory available to a wb_domain has changed 164 * @dom: wb_domain of interest 165 * 166 * This function should be called when the amount of memory available to 167 * @dom has changed. It resets @dom's dirty limit parameters to prevent 168 * the past values which don't match the current configuration from skewing 169 * dirty throttling. Without this, when memory size of a wb_domain is 170 * greatly reduced, the dirty throttling logic may allow too many pages to 171 * be dirtied leading to consecutive unnecessary OOMs and may get stuck in 172 * that situation. 173 */ 174static inline void wb_domain_size_changed(struct wb_domain *dom) 175{ 176 spin_lock(&dom->lock); 177 dom->dirty_limit_tstamp = jiffies; 178 dom->dirty_limit = 0; 179 spin_unlock(&dom->lock); 180} 181 182/* 183 * fs/fs-writeback.c 184 */ 185struct bdi_writeback; 186void writeback_inodes_sb(struct super_block *, enum wb_reason reason); 187void writeback_inodes_sb_nr(struct super_block *, unsigned long nr, 188 enum wb_reason reason); 189bool try_to_writeback_inodes_sb(struct super_block *, enum wb_reason reason); 190bool try_to_writeback_inodes_sb_nr(struct super_block *, unsigned long nr, 191 enum wb_reason reason); 192void sync_inodes_sb(struct super_block *); 193void wakeup_flusher_threads(long nr_pages, enum wb_reason reason); 194void inode_wait_for_writeback(struct inode *inode); 195 196/* writeback.h requires fs.h; it, too, is not included from here. */ 197static inline void wait_on_inode(struct inode *inode) 198{ 199 might_sleep(); 200 wait_on_bit(&inode->i_state, __I_NEW, TASK_UNINTERRUPTIBLE); 201} 202 203#ifdef CONFIG_CGROUP_WRITEBACK 204 205#include <linux/cgroup.h> 206#include <linux/bio.h> 207 208void __inode_attach_wb(struct inode *inode, struct page *page); 209void wbc_attach_and_unlock_inode(struct writeback_control *wbc, 210 struct inode *inode) 211 __releases(&inode->i_lock); 212void wbc_detach_inode(struct writeback_control *wbc); 213void wbc_account_io(struct writeback_control *wbc, struct page *page, 214 size_t bytes); 215void cgroup_writeback_umount(void); 216 217/** 218 * inode_attach_wb - associate an inode with its wb 219 * @inode: inode of interest 220 * @page: page being dirtied (may be NULL) 221 * 222 * If @inode doesn't have its wb, associate it with the wb matching the 223 * memcg of @page or, if @page is NULL, %current. May be called w/ or w/o 224 * @inode->i_lock. 225 */ 226static inline void inode_attach_wb(struct inode *inode, struct page *page) 227{ 228 if (!inode->i_wb) 229 __inode_attach_wb(inode, page); 230} 231 232/** 233 * inode_detach_wb - disassociate an inode from its wb 234 * @inode: inode of interest 235 * 236 * @inode is being freed. Detach from its wb. 237 */ 238static inline void inode_detach_wb(struct inode *inode) 239{ 240 if (inode->i_wb) { 241 WARN_ON_ONCE(!(inode->i_state & I_CLEAR)); 242 wb_put(inode->i_wb); 243 inode->i_wb = NULL; 244 } 245} 246 247/** 248 * wbc_attach_fdatawrite_inode - associate wbc and inode for fdatawrite 249 * @wbc: writeback_control of interest 250 * @inode: target inode 251 * 252 * This function is to be used by __filemap_fdatawrite_range(), which is an 253 * alternative entry point into writeback code, and first ensures @inode is 254 * associated with a bdi_writeback and attaches it to @wbc. 255 */ 256static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc, 257 struct inode *inode) 258{ 259 spin_lock(&inode->i_lock); 260 inode_attach_wb(inode, NULL); 261 wbc_attach_and_unlock_inode(wbc, inode); 262} 263 264/** 265 * wbc_init_bio - writeback specific initializtion of bio 266 * @wbc: writeback_control for the writeback in progress 267 * @bio: bio to be initialized 268 * 269 * @bio is a part of the writeback in progress controlled by @wbc. Perform 270 * writeback specific initialization. This is used to apply the cgroup 271 * writeback context. 272 */ 273static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) 274{ 275 /* 276 * pageout() path doesn't attach @wbc to the inode being written 277 * out. This is intentional as we don't want the function to block 278 * behind a slow cgroup. Ultimately, we want pageout() to kick off 279 * regular writeback instead of writing things out itself. 280 */ 281 if (wbc->wb) 282 bio_associate_blkcg(bio, wbc->wb->blkcg_css); 283} 284 285#else /* CONFIG_CGROUP_WRITEBACK */ 286 287static inline void inode_attach_wb(struct inode *inode, struct page *page) 288{ 289} 290 291static inline void inode_detach_wb(struct inode *inode) 292{ 293} 294 295static inline void wbc_attach_and_unlock_inode(struct writeback_control *wbc, 296 struct inode *inode) 297 __releases(&inode->i_lock) 298{ 299 spin_unlock(&inode->i_lock); 300} 301 302static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc, 303 struct inode *inode) 304{ 305} 306 307static inline void wbc_detach_inode(struct writeback_control *wbc) 308{ 309} 310 311static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) 312{ 313} 314 315static inline void wbc_account_io(struct writeback_control *wbc, 316 struct page *page, size_t bytes) 317{ 318} 319 320static inline void cgroup_writeback_umount(void) 321{ 322} 323 324#endif /* CONFIG_CGROUP_WRITEBACK */ 325 326/* 327 * mm/page-writeback.c 328 */ 329#ifdef CONFIG_BLOCK 330void laptop_io_completion(struct backing_dev_info *info); 331void laptop_sync_completion(void); 332void laptop_mode_sync(struct work_struct *work); 333void laptop_mode_timer_fn(unsigned long data); 334#else 335static inline void laptop_sync_completion(void) { } 336#endif 337bool node_dirty_ok(struct pglist_data *pgdat); 338int wb_domain_init(struct wb_domain *dom, gfp_t gfp); 339#ifdef CONFIG_CGROUP_WRITEBACK 340void wb_domain_exit(struct wb_domain *dom); 341#endif 342 343extern struct wb_domain global_wb_domain; 344 345/* These are exported to sysctl. */ 346extern int dirty_background_ratio; 347extern unsigned long dirty_background_bytes; 348extern int vm_dirty_ratio; 349extern unsigned long vm_dirty_bytes; 350extern unsigned int dirty_writeback_interval; 351extern unsigned int dirty_expire_interval; 352extern unsigned int dirtytime_expire_interval; 353extern int vm_highmem_is_dirtyable; 354extern int block_dump; 355extern int laptop_mode; 356 357extern int dirty_background_ratio_handler(struct ctl_table *table, int write, 358 void __user *buffer, size_t *lenp, 359 loff_t *ppos); 360extern int dirty_background_bytes_handler(struct ctl_table *table, int write, 361 void __user *buffer, size_t *lenp, 362 loff_t *ppos); 363extern int dirty_ratio_handler(struct ctl_table *table, int write, 364 void __user *buffer, size_t *lenp, 365 loff_t *ppos); 366extern int dirty_bytes_handler(struct ctl_table *table, int write, 367 void __user *buffer, size_t *lenp, 368 loff_t *ppos); 369int dirtytime_interval_handler(struct ctl_table *table, int write, 370 void __user *buffer, size_t *lenp, loff_t *ppos); 371 372struct ctl_table; 373int dirty_writeback_centisecs_handler(struct ctl_table *, int, 374 void __user *, size_t *, loff_t *); 375 376void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty); 377unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh); 378 379void wb_update_bandwidth(struct bdi_writeback *wb, unsigned long start_time); 380void balance_dirty_pages_ratelimited(struct address_space *mapping); 381bool wb_over_bg_thresh(struct bdi_writeback *wb); 382 383typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc, 384 void *data); 385 386int generic_writepages(struct address_space *mapping, 387 struct writeback_control *wbc); 388void tag_pages_for_writeback(struct address_space *mapping, 389 pgoff_t start, pgoff_t end); 390int write_cache_pages(struct address_space *mapping, 391 struct writeback_control *wbc, writepage_t writepage, 392 void *data); 393int do_writepages(struct address_space *mapping, struct writeback_control *wbc); 394void writeback_set_ratelimit(void); 395void tag_pages_for_writeback(struct address_space *mapping, 396 pgoff_t start, pgoff_t end); 397 398void account_page_redirty(struct page *page); 399 400void sb_mark_inode_writeback(struct inode *inode); 401void sb_clear_inode_writeback(struct inode *inode); 402 403#endif /* WRITEBACK_H */