Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

fs: convert mpage_readpages to mpage_readahead

Implement the new readahead aop and convert all callers (block_dev,
exfat, ext2, fat, gfs2, hpfs, isofs, jfs, nilfs2, ocfs2, omfs, qnx6,
reiserfs & udf).

The callers are all trivial except for GFS2 & OCFS2.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Junxiao Bi <junxiao.bi@oracle.com> # ocfs2
Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com> # ocfs2
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: John Hubbard <jhubbard@nvidia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Cc: Chao Yu <yuchao0@huawei.com>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Darrick J. Wong <darrick.wong@oracle.com>
Cc: Eric Biggers <ebiggers@google.com>
Cc: Gao Xiang <gaoxiang25@huawei.com>
Cc: Jaegeuk Kim <jaegeuk@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Cc: Miklos Szeredi <mszeredi@redhat.com>
Link: http://lkml.kernel.org/r/20200414150233.24495-17-willy@infradead.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Matthew Wilcox (Oracle) and committed by
Linus Torvalds
d4388340 f2c817be

+73 -126
+3 -4
fs/block_dev.c
··· 614 614 return block_read_full_page(page, blkdev_get_block); 615 615 } 616 616 617 - static int blkdev_readpages(struct file *file, struct address_space *mapping, 618 - struct list_head *pages, unsigned nr_pages) 617 + static void blkdev_readahead(struct readahead_control *rac) 619 618 { 620 - return mpage_readpages(mapping, pages, nr_pages, blkdev_get_block); 619 + mpage_readahead(rac, blkdev_get_block); 621 620 } 622 621 623 622 static int blkdev_write_begin(struct file *file, struct address_space *mapping, ··· 2084 2085 2085 2086 static const struct address_space_operations def_blk_aops = { 2086 2087 .readpage = blkdev_readpage, 2087 - .readpages = blkdev_readpages, 2088 + .readahead = blkdev_readahead, 2088 2089 .writepage = blkdev_writepage, 2089 2090 .write_begin = blkdev_write_begin, 2090 2091 .write_end = blkdev_write_end,
+3 -4
fs/exfat/inode.c
··· 372 372 return mpage_readpage(page, exfat_get_block); 373 373 } 374 374 375 - static int exfat_readpages(struct file *file, struct address_space *mapping, 376 - struct list_head *pages, unsigned int nr_pages) 375 + static void exfat_readahead(struct readahead_control *rac) 377 376 { 378 - return mpage_readpages(mapping, pages, nr_pages, exfat_get_block); 377 + mpage_readahead(rac, exfat_get_block); 379 378 } 380 379 381 380 static int exfat_writepage(struct page *page, struct writeback_control *wbc) ··· 501 502 502 503 static const struct address_space_operations exfat_aops = { 503 504 .readpage = exfat_readpage, 504 - .readpages = exfat_readpages, 505 + .readahead = exfat_readahead, 505 506 .writepage = exfat_writepage, 506 507 .writepages = exfat_writepages, 507 508 .write_begin = exfat_write_begin,
+4 -6
fs/ext2/inode.c
··· 877 877 return mpage_readpage(page, ext2_get_block); 878 878 } 879 879 880 - static int 881 - ext2_readpages(struct file *file, struct address_space *mapping, 882 - struct list_head *pages, unsigned nr_pages) 880 + static void ext2_readahead(struct readahead_control *rac) 883 881 { 884 - return mpage_readpages(mapping, pages, nr_pages, ext2_get_block); 882 + mpage_readahead(rac, ext2_get_block); 885 883 } 886 884 887 885 static int ··· 965 967 966 968 const struct address_space_operations ext2_aops = { 967 969 .readpage = ext2_readpage, 968 - .readpages = ext2_readpages, 970 + .readahead = ext2_readahead, 969 971 .writepage = ext2_writepage, 970 972 .write_begin = ext2_write_begin, 971 973 .write_end = ext2_write_end, ··· 979 981 980 982 const struct address_space_operations ext2_nobh_aops = { 981 983 .readpage = ext2_readpage, 982 - .readpages = ext2_readpages, 984 + .readahead = ext2_readahead, 983 985 .writepage = ext2_nobh_writepage, 984 986 .write_begin = ext2_nobh_write_begin, 985 987 .write_end = nobh_write_end,
+3 -4
fs/fat/inode.c
··· 210 210 return mpage_readpage(page, fat_get_block); 211 211 } 212 212 213 - static int fat_readpages(struct file *file, struct address_space *mapping, 214 - struct list_head *pages, unsigned nr_pages) 213 + static void fat_readahead(struct readahead_control *rac) 215 214 { 216 - return mpage_readpages(mapping, pages, nr_pages, fat_get_block); 215 + mpage_readahead(rac, fat_get_block); 217 216 } 218 217 219 218 static void fat_write_failed(struct address_space *mapping, loff_t to) ··· 343 344 344 345 static const struct address_space_operations fat_aops = { 345 346 .readpage = fat_readpage, 346 - .readpages = fat_readpages, 347 + .readahead = fat_readahead, 347 348 .writepage = fat_writepage, 348 349 .writepages = fat_writepages, 349 350 .write_begin = fat_write_begin,
+8 -15
fs/gfs2/aops.c
··· 577 577 } 578 578 579 579 /** 580 - * gfs2_readpages - Read a bunch of pages at once 580 + * gfs2_readahead - Read a bunch of pages at once 581 581 * @file: The file to read from 582 582 * @mapping: Address space info 583 583 * @pages: List of pages to read ··· 590 590 * obviously not something we'd want to do on too regular a basis. 591 591 * Any I/O we ignore at this time will be done via readpage later. 592 592 * 2. We don't handle stuffed files here we let readpage do the honours. 593 - * 3. mpage_readpages() does most of the heavy lifting in the common case. 593 + * 3. mpage_readahead() does most of the heavy lifting in the common case. 594 594 * 4. gfs2_block_map() is relied upon to set BH_Boundary in the right places. 595 595 */ 596 596 597 - static int gfs2_readpages(struct file *file, struct address_space *mapping, 598 - struct list_head *pages, unsigned nr_pages) 597 + static void gfs2_readahead(struct readahead_control *rac) 599 598 { 600 - struct inode *inode = mapping->host; 599 + struct inode *inode = rac->mapping->host; 601 600 struct gfs2_inode *ip = GFS2_I(inode); 602 - struct gfs2_sbd *sdp = GFS2_SB(inode); 603 601 struct gfs2_holder gh; 604 - int ret; 605 602 606 603 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); 607 - ret = gfs2_glock_nq(&gh); 608 - if (unlikely(ret)) 604 + if (gfs2_glock_nq(&gh)) 609 605 goto out_uninit; 610 606 if (!gfs2_is_stuffed(ip)) 611 - ret = mpage_readpages(mapping, pages, nr_pages, gfs2_block_map); 607 + mpage_readahead(rac, gfs2_block_map); 612 608 gfs2_glock_dq(&gh); 613 609 out_uninit: 614 610 gfs2_holder_uninit(&gh); 615 - if (unlikely(gfs2_withdrawn(sdp))) 616 - ret = -EIO; 617 - return ret; 618 611 } 619 612 620 613 /** ··· 826 833 .writepage = gfs2_writepage, 827 834 .writepages = gfs2_writepages, 828 835 .readpage = gfs2_readpage, 829 - .readpages = gfs2_readpages, 836 + .readahead = gfs2_readahead, 830 837 .bmap = gfs2_bmap, 831 838 .invalidatepage = gfs2_invalidatepage, 832 839 .releasepage = gfs2_releasepage, ··· 840 847 .writepage = gfs2_jdata_writepage, 841 848 .writepages = gfs2_jdata_writepages, 842 849 .readpage = gfs2_readpage, 843 - .readpages = gfs2_readpages, 850 + .readahead = gfs2_readahead, 844 851 .set_page_dirty = jdata_set_page_dirty, 845 852 .bmap = gfs2_bmap, 846 853 .invalidatepage = gfs2_invalidatepage,
+3 -4
fs/hpfs/file.c
··· 125 125 return block_write_full_page(page, hpfs_get_block, wbc); 126 126 } 127 127 128 - static int hpfs_readpages(struct file *file, struct address_space *mapping, 129 - struct list_head *pages, unsigned nr_pages) 128 + static void hpfs_readahead(struct readahead_control *rac) 130 129 { 131 - return mpage_readpages(mapping, pages, nr_pages, hpfs_get_block); 130 + mpage_readahead(rac, hpfs_get_block); 132 131 } 133 132 134 133 static int hpfs_writepages(struct address_space *mapping, ··· 197 198 const struct address_space_operations hpfs_aops = { 198 199 .readpage = hpfs_readpage, 199 200 .writepage = hpfs_writepage, 200 - .readpages = hpfs_readpages, 201 + .readahead = hpfs_readahead, 201 202 .writepages = hpfs_writepages, 202 203 .write_begin = hpfs_write_begin, 203 204 .write_end = hpfs_write_end,
+1 -1
fs/iomap/buffered-io.c
··· 367 367 } 368 368 369 369 /* 370 - * Just like mpage_readpages and block_read_full_page we always 370 + * Just like mpage_readahead and block_read_full_page we always 371 371 * return 0 and just mark the page as PageError on errors. This 372 372 * should be cleaned up all through the stack eventually. 373 373 */
+3 -4
fs/isofs/inode.c
··· 1185 1185 return mpage_readpage(page, isofs_get_block); 1186 1186 } 1187 1187 1188 - static int isofs_readpages(struct file *file, struct address_space *mapping, 1189 - struct list_head *pages, unsigned nr_pages) 1188 + static void isofs_readahead(struct readahead_control *rac) 1190 1189 { 1191 - return mpage_readpages(mapping, pages, nr_pages, isofs_get_block); 1190 + mpage_readahead(rac, isofs_get_block); 1192 1191 } 1193 1192 1194 1193 static sector_t _isofs_bmap(struct address_space *mapping, sector_t block) ··· 1197 1198 1198 1199 static const struct address_space_operations isofs_aops = { 1199 1200 .readpage = isofs_readpage, 1200 - .readpages = isofs_readpages, 1201 + .readahead = isofs_readahead, 1201 1202 .bmap = _isofs_bmap 1202 1203 }; 1203 1204
+3 -4
fs/jfs/inode.c
··· 296 296 return mpage_readpage(page, jfs_get_block); 297 297 } 298 298 299 - static int jfs_readpages(struct file *file, struct address_space *mapping, 300 - struct list_head *pages, unsigned nr_pages) 299 + static void jfs_readahead(struct readahead_control *rac) 301 300 { 302 - return mpage_readpages(mapping, pages, nr_pages, jfs_get_block); 301 + mpage_readahead(rac, jfs_get_block); 303 302 } 304 303 305 304 static void jfs_write_failed(struct address_space *mapping, loff_t to) ··· 357 358 358 359 const struct address_space_operations jfs_aops = { 359 360 .readpage = jfs_readpage, 360 - .readpages = jfs_readpages, 361 + .readahead = jfs_readahead, 361 362 .writepage = jfs_writepage, 362 363 .writepages = jfs_writepages, 363 364 .write_begin = jfs_write_begin,
+11 -27
fs/mpage.c
··· 91 91 } 92 92 93 93 /* 94 - * support function for mpage_readpages. The fs supplied get_block might 94 + * support function for mpage_readahead. The fs supplied get_block might 95 95 * return an up to date buffer. This is used to map that buffer into 96 96 * the page, which allows readpage to avoid triggering a duplicate call 97 97 * to get_block. ··· 338 338 } 339 339 340 340 /** 341 - * mpage_readpages - populate an address space with some pages & start reads against them 342 - * @mapping: the address_space 343 - * @pages: The address of a list_head which contains the target pages. These 344 - * pages have their ->index populated and are otherwise uninitialised. 345 - * The page at @pages->prev has the lowest file offset, and reads should be 346 - * issued in @pages->prev to @pages->next order. 347 - * @nr_pages: The number of pages at *@pages 341 + * mpage_readahead - start reads against pages 342 + * @rac: Describes which pages to read. 348 343 * @get_block: The filesystem's block mapper function. 349 344 * 350 345 * This function walks the pages and the blocks within each page, building and ··· 376 381 * 377 382 * This all causes the disk requests to be issued in the correct order. 378 383 */ 379 - int 380 - mpage_readpages(struct address_space *mapping, struct list_head *pages, 381 - unsigned nr_pages, get_block_t get_block) 384 + void mpage_readahead(struct readahead_control *rac, get_block_t get_block) 382 385 { 386 + struct page *page; 383 387 struct mpage_readpage_args args = { 384 388 .get_block = get_block, 385 389 .is_readahead = true, 386 390 }; 387 - unsigned page_idx; 388 391 389 - for (page_idx = 0; page_idx < nr_pages; page_idx++) { 390 - struct page *page = lru_to_page(pages); 391 - 392 + while ((page = readahead_page(rac))) { 392 393 prefetchw(&page->flags); 393 - list_del(&page->lru); 394 - if (!add_to_page_cache_lru(page, mapping, 395 - page->index, 396 - readahead_gfp_mask(mapping))) { 397 - args.page = page; 398 - args.nr_pages = nr_pages - page_idx; 399 - args.bio = do_mpage_readpage(&args); 400 - } 394 + args.page = page; 395 + args.nr_pages = readahead_count(rac); 396 + args.bio = do_mpage_readpage(&args); 401 397 put_page(page); 402 398 } 403 - BUG_ON(!list_empty(pages)); 404 399 if (args.bio) 405 400 mpage_bio_submit(REQ_OP_READ, REQ_RAHEAD, args.bio); 406 - return 0; 407 401 } 408 - EXPORT_SYMBOL(mpage_readpages); 402 + EXPORT_SYMBOL(mpage_readahead); 409 403 410 404 /* 411 405 * This isn't called much at all ··· 547 563 * Page has buffers, but they are all unmapped. The page was 548 564 * created by pagein or read over a hole which was handled by 549 565 * block_read_full_page(). If this address_space is also 550 - * using mpage_readpages then this can rarely happen. 566 + * using mpage_readahead then this can rarely happen. 551 567 */ 552 568 goto confused; 553 569 }
+3 -12
fs/nilfs2/inode.c
··· 145 145 return mpage_readpage(page, nilfs_get_block); 146 146 } 147 147 148 - /** 149 - * nilfs_readpages() - implement readpages() method of nilfs_aops {} 150 - * address_space_operations. 151 - * @file - file struct of the file to be read 152 - * @mapping - address_space struct used for reading multiple pages 153 - * @pages - the pages to be read 154 - * @nr_pages - number of pages to be read 155 - */ 156 - static int nilfs_readpages(struct file *file, struct address_space *mapping, 157 - struct list_head *pages, unsigned int nr_pages) 148 + static void nilfs_readahead(struct readahead_control *rac) 158 149 { 159 - return mpage_readpages(mapping, pages, nr_pages, nilfs_get_block); 150 + mpage_readahead(rac, nilfs_get_block); 160 151 } 161 152 162 153 static int nilfs_writepages(struct address_space *mapping, ··· 299 308 .readpage = nilfs_readpage, 300 309 .writepages = nilfs_writepages, 301 310 .set_page_dirty = nilfs_set_page_dirty, 302 - .readpages = nilfs_readpages, 311 + .readahead = nilfs_readahead, 303 312 .write_begin = nilfs_write_begin, 304 313 .write_end = nilfs_write_end, 305 314 /* .releasepage = nilfs_releasepage, */
+13 -21
fs/ocfs2/aops.c
··· 350 350 * grow out to a tree. If need be, detecting boundary extents could 351 351 * trivially be added in a future version of ocfs2_get_block(). 352 352 */ 353 - static int ocfs2_readpages(struct file *filp, struct address_space *mapping, 354 - struct list_head *pages, unsigned nr_pages) 353 + static void ocfs2_readahead(struct readahead_control *rac) 355 354 { 356 - int ret, err = -EIO; 357 - struct inode *inode = mapping->host; 355 + int ret; 356 + struct inode *inode = rac->mapping->host; 358 357 struct ocfs2_inode_info *oi = OCFS2_I(inode); 359 - loff_t start; 360 - struct page *last; 361 358 362 359 /* 363 360 * Use the nonblocking flag for the dlm code to avoid page ··· 362 365 */ 363 366 ret = ocfs2_inode_lock_full(inode, NULL, 0, OCFS2_LOCK_NONBLOCK); 364 367 if (ret) 365 - return err; 368 + return; 366 369 367 - if (down_read_trylock(&oi->ip_alloc_sem) == 0) { 368 - ocfs2_inode_unlock(inode, 0); 369 - return err; 370 - } 370 + if (down_read_trylock(&oi->ip_alloc_sem) == 0) 371 + goto out_unlock; 371 372 372 373 /* 373 374 * Don't bother with inline-data. There isn't anything 374 375 * to read-ahead in that case anyway... 375 376 */ 376 377 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) 377 - goto out_unlock; 378 + goto out_up; 378 379 379 380 /* 380 381 * Check whether a remote node truncated this file - we just 381 382 * drop out in that case as it's not worth handling here. 382 383 */ 383 - last = lru_to_page(pages); 384 - start = (loff_t)last->index << PAGE_SHIFT; 385 - if (start >= i_size_read(inode)) 386 - goto out_unlock; 384 + if (readahead_pos(rac) >= i_size_read(inode)) 385 + goto out_up; 387 386 388 - err = mpage_readpages(mapping, pages, nr_pages, ocfs2_get_block); 387 + mpage_readahead(rac, ocfs2_get_block); 389 388 390 - out_unlock: 389 + out_up: 391 390 up_read(&oi->ip_alloc_sem); 391 + out_unlock: 392 392 ocfs2_inode_unlock(inode, 0); 393 - 394 - return err; 395 393 } 396 394 397 395 /* Note: Because we don't support holes, our allocation has ··· 2466 2474 2467 2475 const struct address_space_operations ocfs2_aops = { 2468 2476 .readpage = ocfs2_readpage, 2469 - .readpages = ocfs2_readpages, 2477 + .readahead = ocfs2_readahead, 2470 2478 .writepage = ocfs2_writepage, 2471 2479 .write_begin = ocfs2_write_begin, 2472 2480 .write_end = ocfs2_write_end,
+3 -4
fs/omfs/file.c
··· 289 289 return block_read_full_page(page, omfs_get_block); 290 290 } 291 291 292 - static int omfs_readpages(struct file *file, struct address_space *mapping, 293 - struct list_head *pages, unsigned nr_pages) 292 + static void omfs_readahead(struct readahead_control *rac) 294 293 { 295 - return mpage_readpages(mapping, pages, nr_pages, omfs_get_block); 294 + mpage_readahead(rac, omfs_get_block); 296 295 } 297 296 298 297 static int omfs_writepage(struct page *page, struct writeback_control *wbc) ··· 372 373 373 374 const struct address_space_operations omfs_aops = { 374 375 .readpage = omfs_readpage, 375 - .readpages = omfs_readpages, 376 + .readahead = omfs_readahead, 376 377 .writepage = omfs_writepage, 377 378 .writepages = omfs_writepages, 378 379 .write_begin = omfs_write_begin,
+3 -4
fs/qnx6/inode.c
··· 99 99 return mpage_readpage(page, qnx6_get_block); 100 100 } 101 101 102 - static int qnx6_readpages(struct file *file, struct address_space *mapping, 103 - struct list_head *pages, unsigned nr_pages) 102 + static void qnx6_readahead(struct readahead_control *rac) 104 103 { 105 - return mpage_readpages(mapping, pages, nr_pages, qnx6_get_block); 104 + mpage_readahead(rac, qnx6_get_block); 106 105 } 107 106 108 107 /* ··· 498 499 } 499 500 static const struct address_space_operations qnx6_aops = { 500 501 .readpage = qnx6_readpage, 501 - .readpages = qnx6_readpages, 502 + .readahead = qnx6_readahead, 502 503 .bmap = qnx6_bmap 503 504 }; 504 505
+3 -5
fs/reiserfs/inode.c
··· 1160 1160 return retval; 1161 1161 } 1162 1162 1163 - static int 1164 - reiserfs_readpages(struct file *file, struct address_space *mapping, 1165 - struct list_head *pages, unsigned nr_pages) 1163 + static void reiserfs_readahead(struct readahead_control *rac) 1166 1164 { 1167 - return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block); 1165 + mpage_readahead(rac, reiserfs_get_block); 1168 1166 } 1169 1167 1170 1168 /* ··· 3432 3434 const struct address_space_operations reiserfs_address_space_operations = { 3433 3435 .writepage = reiserfs_writepage, 3434 3436 .readpage = reiserfs_readpage, 3435 - .readpages = reiserfs_readpages, 3437 + .readahead = reiserfs_readahead, 3436 3438 .releasepage = reiserfs_releasepage, 3437 3439 .invalidatepage = reiserfs_invalidatepage, 3438 3440 .write_begin = reiserfs_write_begin,
+3 -4
fs/udf/inode.c
··· 195 195 return mpage_readpage(page, udf_get_block); 196 196 } 197 197 198 - static int udf_readpages(struct file *file, struct address_space *mapping, 199 - struct list_head *pages, unsigned nr_pages) 198 + static void udf_readahead(struct readahead_control *rac) 200 199 { 201 - return mpage_readpages(mapping, pages, nr_pages, udf_get_block); 200 + mpage_readahead(rac, udf_get_block); 202 201 } 203 202 204 203 static int udf_write_begin(struct file *file, struct address_space *mapping, ··· 233 234 234 235 const struct address_space_operations udf_aops = { 235 236 .readpage = udf_readpage, 236 - .readpages = udf_readpages, 237 + .readahead = udf_readahead, 237 238 .writepage = udf_writepage, 238 239 .writepages = udf_writepages, 239 240 .write_begin = udf_write_begin,
+2 -2
include/linux/mpage.h
··· 13 13 #ifdef CONFIG_BLOCK 14 14 15 15 struct writeback_control; 16 + struct readahead_control; 16 17 17 - int mpage_readpages(struct address_space *mapping, struct list_head *pages, 18 - unsigned nr_pages, get_block_t get_block); 18 + void mpage_readahead(struct readahead_control *, get_block_t get_block); 19 19 int mpage_readpage(struct page *page, get_block_t get_block); 20 20 int mpage_writepages(struct address_space *mapping, 21 21 struct writeback_control *wbc, get_block_t get_block);
+1 -1
mm/migrate.c
··· 1032 1032 * to the LRU. Later, when the IO completes the pages are 1033 1033 * marked uptodate and unlocked. However, the queueing 1034 1034 * could be merging multiple pages for one bio (e.g. 1035 - * mpage_readpages). If an allocation happens for the 1035 + * mpage_readahead). If an allocation happens for the 1036 1036 * second or third page, the process can end up locking 1037 1037 * the same page twice and deadlocking. Rather than 1038 1038 * trying to be clever about what pages can be locked,