Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mm + fs: prepare for non-page entries in page cache radix trees

shmem mappings already contain exceptional entries where swap slot
information is remembered.

To be able to store eviction information for regular page cache, prepare
every site dealing with the radix trees directly to handle entries other
than pages.

The common lookup functions will filter out non-page entries and return
NULL for page cache holes, just as before. But provide a raw version of
the API which returns non-page entries as well, and switch shmem over to
use it.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Rik van Riel <riel@redhat.com>
Reviewed-by: Minchan Kim <minchan@kernel.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Bob Liu <bob.liu@oracle.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jan Kara <jack@suse.cz>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Luigi Semenzato <semenzato@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Metin Doslu <metin@citusdata.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Ozgun Erdogan <ozgun@citusdata.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Roman Gushchin <klamm@yandex-team.ru>
Cc: Ryan Mallon <rmallon@gmail.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Johannes Weiner and committed by
Linus Torvalds
0cd6144a e7b563bb

+349 -130
+1 -1
fs/btrfs/compression.c
··· 472 472 rcu_read_lock(); 473 473 page = radix_tree_lookup(&mapping->page_tree, pg_index); 474 474 rcu_read_unlock(); 475 - if (page) { 475 + if (page && !radix_tree_exceptional_entry(page)) { 476 476 misses++; 477 477 if (misses > 4) 478 478 break;
+8
include/linux/mm.h
··· 1041 1041 extern bool skip_free_areas_node(unsigned int flags, int nid); 1042 1042 1043 1043 int shmem_zero_setup(struct vm_area_struct *); 1044 + #ifdef CONFIG_SHMEM 1045 + bool shmem_mapping(struct address_space *mapping); 1046 + #else 1047 + static inline bool shmem_mapping(struct address_space *mapping) 1048 + { 1049 + return false; 1050 + } 1051 + #endif 1044 1052 1045 1053 extern int can_do_mlock(void); 1046 1054 extern int user_shm_lock(size_t, struct user_struct *);
+9 -6
include/linux/pagemap.h
··· 248 248 pgoff_t page_cache_prev_hole(struct address_space *mapping, 249 249 pgoff_t index, unsigned long max_scan); 250 250 251 - extern struct page * find_get_page(struct address_space *mapping, 252 - pgoff_t index); 253 - extern struct page * find_lock_page(struct address_space *mapping, 254 - pgoff_t index); 255 - extern struct page * find_or_create_page(struct address_space *mapping, 256 - pgoff_t index, gfp_t gfp_mask); 251 + struct page *find_get_entry(struct address_space *mapping, pgoff_t offset); 252 + struct page *find_get_page(struct address_space *mapping, pgoff_t offset); 253 + struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset); 254 + struct page *find_lock_page(struct address_space *mapping, pgoff_t offset); 255 + struct page *find_or_create_page(struct address_space *mapping, pgoff_t index, 256 + gfp_t gfp_mask); 257 + unsigned find_get_entries(struct address_space *mapping, pgoff_t start, 258 + unsigned int nr_entries, struct page **entries, 259 + pgoff_t *indices); 257 260 unsigned find_get_pages(struct address_space *mapping, pgoff_t start, 258 261 unsigned int nr_pages, struct page **pages); 259 262 unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
+5
include/linux/pagevec.h
··· 22 22 23 23 void __pagevec_release(struct pagevec *pvec); 24 24 void __pagevec_lru_add(struct pagevec *pvec); 25 + unsigned pagevec_lookup_entries(struct pagevec *pvec, 26 + struct address_space *mapping, 27 + pgoff_t start, unsigned nr_entries, 28 + pgoff_t *indices); 29 + void pagevec_remove_exceptionals(struct pagevec *pvec); 25 30 unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping, 26 31 pgoff_t start, unsigned nr_pages); 27 32 unsigned pagevec_lookup_tag(struct pagevec *pvec,
+1
include/linux/shmem_fs.h
··· 51 51 unsigned long flags); 52 52 extern int shmem_zero_setup(struct vm_area_struct *); 53 53 extern int shmem_lock(struct file *file, int lock, struct user_struct *user); 54 + extern bool shmem_mapping(struct address_space *mapping); 54 55 extern void shmem_unlock_mapping(struct address_space *mapping); 55 56 extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, 56 57 pgoff_t index, gfp_t gfp_mask);
+178 -24
mm/filemap.c
··· 446 446 } 447 447 EXPORT_SYMBOL_GPL(replace_page_cache_page); 448 448 449 + static int page_cache_tree_insert(struct address_space *mapping, 450 + struct page *page) 451 + { 452 + void **slot; 453 + int error; 454 + 455 + slot = radix_tree_lookup_slot(&mapping->page_tree, page->index); 456 + if (slot) { 457 + void *p; 458 + 459 + p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock); 460 + if (!radix_tree_exceptional_entry(p)) 461 + return -EEXIST; 462 + radix_tree_replace_slot(slot, page); 463 + mapping->nrpages++; 464 + return 0; 465 + } 466 + error = radix_tree_insert(&mapping->page_tree, page->index, page); 467 + if (!error) 468 + mapping->nrpages++; 469 + return error; 470 + } 471 + 449 472 /** 450 473 * add_to_page_cache_locked - add a locked page to the pagecache 451 474 * @page: page to add ··· 503 480 page->index = offset; 504 481 505 482 spin_lock_irq(&mapping->tree_lock); 506 - error = radix_tree_insert(&mapping->page_tree, offset, page); 483 + error = page_cache_tree_insert(mapping, page); 507 484 radix_tree_preload_end(); 508 485 if (unlikely(error)) 509 486 goto err_insert; 510 - mapping->nrpages++; 511 487 __inc_zone_page_state(page, NR_FILE_PAGES); 512 488 spin_unlock_irq(&mapping->tree_lock); 513 489 trace_mm_filemap_add_to_page_cache(page); ··· 734 712 unsigned long i; 735 713 736 714 for (i = 0; i < max_scan; i++) { 737 - if (!radix_tree_lookup(&mapping->page_tree, index)) 715 + struct page *page; 716 + 717 + page = radix_tree_lookup(&mapping->page_tree, index); 718 + if (!page || radix_tree_exceptional_entry(page)) 738 719 break; 739 720 index++; 740 721 if (index == 0) ··· 775 750 unsigned long i; 776 751 777 752 for (i = 0; i < max_scan; i++) { 778 - if (!radix_tree_lookup(&mapping->page_tree, index)) 753 + struct page *page; 754 + 755 + page = radix_tree_lookup(&mapping->page_tree, index); 756 + if (!page || radix_tree_exceptional_entry(page)) 779 757 break; 780 758 index--; 781 759 if (index == ULONG_MAX) ··· 790 762 EXPORT_SYMBOL(page_cache_prev_hole); 791 763 792 764 /** 793 - * find_get_page - find and get a page reference 765 + * find_get_entry - find and get a page cache entry 794 766 * @mapping: the address_space to search 795 - * @offset: the page index 767 + * @offset: the page cache index 796 768 * 797 - * Is there a pagecache struct page at the given (mapping, offset) tuple? 798 - * If yes, increment its refcount and return it; if no, return NULL. 769 + * Looks up the page cache slot at @mapping & @offset. If there is a 770 + * page cache page, it is returned with an increased refcount. 771 + * 772 + * If the slot holds a shadow entry of a previously evicted page, it 773 + * is returned. 774 + * 775 + * Otherwise, %NULL is returned. 799 776 */ 800 - struct page *find_get_page(struct address_space *mapping, pgoff_t offset) 777 + struct page *find_get_entry(struct address_space *mapping, pgoff_t offset) 801 778 { 802 779 void **pagep; 803 780 struct page *page; ··· 843 810 844 811 return page; 845 812 } 846 - EXPORT_SYMBOL(find_get_page); 813 + EXPORT_SYMBOL(find_get_entry); 847 814 848 815 /** 849 - * find_lock_page - locate, pin and lock a pagecache page 816 + * find_get_page - find and get a page reference 850 817 * @mapping: the address_space to search 851 818 * @offset: the page index 852 819 * 853 - * Locates the desired pagecache page, locks it, increments its reference 854 - * count and returns its address. 820 + * Looks up the page cache slot at @mapping & @offset. If there is a 821 + * page cache page, it is returned with an increased refcount. 855 822 * 856 - * Returns zero if the page was not present. find_lock_page() may sleep. 823 + * Otherwise, %NULL is returned. 857 824 */ 858 - struct page *find_lock_page(struct address_space *mapping, pgoff_t offset) 825 + struct page *find_get_page(struct address_space *mapping, pgoff_t offset) 826 + { 827 + struct page *page = find_get_entry(mapping, offset); 828 + 829 + if (radix_tree_exceptional_entry(page)) 830 + page = NULL; 831 + return page; 832 + } 833 + EXPORT_SYMBOL(find_get_page); 834 + 835 + /** 836 + * find_lock_entry - locate, pin and lock a page cache entry 837 + * @mapping: the address_space to search 838 + * @offset: the page cache index 839 + * 840 + * Looks up the page cache slot at @mapping & @offset. If there is a 841 + * page cache page, it is returned locked and with an increased 842 + * refcount. 843 + * 844 + * If the slot holds a shadow entry of a previously evicted page, it 845 + * is returned. 846 + * 847 + * Otherwise, %NULL is returned. 848 + * 849 + * find_lock_entry() may sleep. 850 + */ 851 + struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset) 859 852 { 860 853 struct page *page; 861 854 862 855 repeat: 863 - page = find_get_page(mapping, offset); 856 + page = find_get_entry(mapping, offset); 864 857 if (page && !radix_tree_exception(page)) { 865 858 lock_page(page); 866 859 /* Has the page been truncated? */ ··· 899 840 } 900 841 return page; 901 842 } 843 + EXPORT_SYMBOL(find_lock_entry); 844 + 845 + /** 846 + * find_lock_page - locate, pin and lock a pagecache page 847 + * @mapping: the address_space to search 848 + * @offset: the page index 849 + * 850 + * Looks up the page cache slot at @mapping & @offset. If there is a 851 + * page cache page, it is returned locked and with an increased 852 + * refcount. 853 + * 854 + * Otherwise, %NULL is returned. 855 + * 856 + * find_lock_page() may sleep. 857 + */ 858 + struct page *find_lock_page(struct address_space *mapping, pgoff_t offset) 859 + { 860 + struct page *page = find_lock_entry(mapping, offset); 861 + 862 + if (radix_tree_exceptional_entry(page)) 863 + page = NULL; 864 + return page; 865 + } 902 866 EXPORT_SYMBOL(find_lock_page); 903 867 904 868 /** ··· 930 848 * @index: the page's index into the mapping 931 849 * @gfp_mask: page allocation mode 932 850 * 933 - * Locates a page in the pagecache. If the page is not present, a new page 934 - * is allocated using @gfp_mask and is added to the pagecache and to the VM's 935 - * LRU list. The returned page is locked and has its reference count 936 - * incremented. 851 + * Looks up the page cache slot at @mapping & @offset. If there is a 852 + * page cache page, it is returned locked and with an increased 853 + * refcount. 937 854 * 938 - * find_or_create_page() may sleep, even if @gfp_flags specifies an atomic 939 - * allocation! 855 + * If the page is not present, a new page is allocated using @gfp_mask 856 + * and added to the page cache and the VM's LRU list. The page is 857 + * returned locked and with an increased refcount. 940 858 * 941 - * find_or_create_page() returns the desired page's address, or zero on 942 - * memory exhaustion. 859 + * On memory exhaustion, %NULL is returned. 860 + * 861 + * find_or_create_page() may sleep, even if @gfp_flags specifies an 862 + * atomic allocation! 943 863 */ 944 864 struct page *find_or_create_page(struct address_space *mapping, 945 865 pgoff_t index, gfp_t gfp_mask) ··· 972 888 return page; 973 889 } 974 890 EXPORT_SYMBOL(find_or_create_page); 891 + 892 + /** 893 + * find_get_entries - gang pagecache lookup 894 + * @mapping: The address_space to search 895 + * @start: The starting page cache index 896 + * @nr_entries: The maximum number of entries 897 + * @entries: Where the resulting entries are placed 898 + * @indices: The cache indices corresponding to the entries in @entries 899 + * 900 + * find_get_entries() will search for and return a group of up to 901 + * @nr_entries entries in the mapping. The entries are placed at 902 + * @entries. find_get_entries() takes a reference against any actual 903 + * pages it returns. 904 + * 905 + * The search returns a group of mapping-contiguous page cache entries 906 + * with ascending indexes. There may be holes in the indices due to 907 + * not-present pages. 908 + * 909 + * Any shadow entries of evicted pages are included in the returned 910 + * array. 911 + * 912 + * find_get_entries() returns the number of pages and shadow entries 913 + * which were found. 914 + */ 915 + unsigned find_get_entries(struct address_space *mapping, 916 + pgoff_t start, unsigned int nr_entries, 917 + struct page **entries, pgoff_t *indices) 918 + { 919 + void **slot; 920 + unsigned int ret = 0; 921 + struct radix_tree_iter iter; 922 + 923 + if (!nr_entries) 924 + return 0; 925 + 926 + rcu_read_lock(); 927 + restart: 928 + radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) { 929 + struct page *page; 930 + repeat: 931 + page = radix_tree_deref_slot(slot); 932 + if (unlikely(!page)) 933 + continue; 934 + if (radix_tree_exception(page)) { 935 + if (radix_tree_deref_retry(page)) 936 + goto restart; 937 + /* 938 + * Otherwise, we must be storing a swap entry 939 + * here as an exceptional entry: so return it 940 + * without attempting to raise page count. 941 + */ 942 + goto export; 943 + } 944 + if (!page_cache_get_speculative(page)) 945 + goto repeat; 946 + 947 + /* Has the page moved? */ 948 + if (unlikely(page != *slot)) { 949 + page_cache_release(page); 950 + goto repeat; 951 + } 952 + export: 953 + indices[ret] = iter.index; 954 + entries[ret] = page; 955 + if (++ret == nr_entries) 956 + break; 957 + } 958 + rcu_read_unlock(); 959 + return ret; 960 + } 975 961 976 962 /** 977 963 * find_get_pages - gang pagecache lookup
+14 -6
mm/mincore.c
··· 70 70 * any other file mapping (ie. marked !present and faulted in with 71 71 * tmpfs's .fault). So swapped out tmpfs mappings are tested here. 72 72 */ 73 - page = find_get_page(mapping, pgoff); 74 73 #ifdef CONFIG_SWAP 75 - /* shmem/tmpfs may return swap: account for swapcache page too. */ 76 - if (radix_tree_exceptional_entry(page)) { 77 - swp_entry_t swap = radix_to_swp_entry(page); 78 - page = find_get_page(swap_address_space(swap), swap.val); 79 - } 74 + if (shmem_mapping(mapping)) { 75 + page = find_get_entry(mapping, pgoff); 76 + /* 77 + * shmem/tmpfs may return swap: account for swapcache 78 + * page too. 79 + */ 80 + if (radix_tree_exceptional_entry(page)) { 81 + swp_entry_t swp = radix_to_swp_entry(page); 82 + page = find_get_page(swap_address_space(swp), swp.val); 83 + } 84 + } else 85 + page = find_get_page(mapping, pgoff); 86 + #else 87 + page = find_get_page(mapping, pgoff); 80 88 #endif 81 89 if (page) { 82 90 present = PageUptodate(page);
+1 -1
mm/readahead.c
··· 179 179 rcu_read_lock(); 180 180 page = radix_tree_lookup(&mapping->page_tree, page_offset); 181 181 rcu_read_unlock(); 182 - if (page) 182 + if (page && !radix_tree_exceptional_entry(page)) 183 183 continue; 184 184 185 185 page = page_cache_alloc_readahead(mapping);
+20 -79
mm/shmem.c
··· 329 329 } 330 330 331 331 /* 332 - * Like find_get_pages, but collecting swap entries as well as pages. 333 - */ 334 - static unsigned shmem_find_get_pages_and_swap(struct address_space *mapping, 335 - pgoff_t start, unsigned int nr_pages, 336 - struct page **pages, pgoff_t *indices) 337 - { 338 - void **slot; 339 - unsigned int ret = 0; 340 - struct radix_tree_iter iter; 341 - 342 - if (!nr_pages) 343 - return 0; 344 - 345 - rcu_read_lock(); 346 - restart: 347 - radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) { 348 - struct page *page; 349 - repeat: 350 - page = radix_tree_deref_slot(slot); 351 - if (unlikely(!page)) 352 - continue; 353 - if (radix_tree_exception(page)) { 354 - if (radix_tree_deref_retry(page)) 355 - goto restart; 356 - /* 357 - * Otherwise, we must be storing a swap entry 358 - * here as an exceptional entry: so return it 359 - * without attempting to raise page count. 360 - */ 361 - goto export; 362 - } 363 - if (!page_cache_get_speculative(page)) 364 - goto repeat; 365 - 366 - /* Has the page moved? */ 367 - if (unlikely(page != *slot)) { 368 - page_cache_release(page); 369 - goto repeat; 370 - } 371 - export: 372 - indices[ret] = iter.index; 373 - pages[ret] = page; 374 - if (++ret == nr_pages) 375 - break; 376 - } 377 - rcu_read_unlock(); 378 - return ret; 379 - } 380 - 381 - /* 382 332 * Remove swap entry from radix tree, free the swap and its page cache. 383 333 */ 384 334 static int shmem_free_swap(struct address_space *mapping, ··· 343 393 return -ENOENT; 344 394 free_swap_and_cache(radix_to_swp_entry(radswap)); 345 395 return 0; 346 - } 347 - 348 - /* 349 - * Pagevec may contain swap entries, so shuffle up pages before releasing. 350 - */ 351 - static void shmem_deswap_pagevec(struct pagevec *pvec) 352 - { 353 - int i, j; 354 - 355 - for (i = 0, j = 0; i < pagevec_count(pvec); i++) { 356 - struct page *page = pvec->pages[i]; 357 - if (!radix_tree_exceptional_entry(page)) 358 - pvec->pages[j++] = page; 359 - } 360 - pvec->nr = j; 361 396 } 362 397 363 398 /* ··· 363 428 * Avoid pagevec_lookup(): find_get_pages() returns 0 as if it 364 429 * has finished, if it hits a row of PAGEVEC_SIZE swap entries. 365 430 */ 366 - pvec.nr = shmem_find_get_pages_and_swap(mapping, index, 367 - PAGEVEC_SIZE, pvec.pages, indices); 431 + pvec.nr = find_get_entries(mapping, index, 432 + PAGEVEC_SIZE, pvec.pages, indices); 368 433 if (!pvec.nr) 369 434 break; 370 435 index = indices[pvec.nr - 1] + 1; 371 - shmem_deswap_pagevec(&pvec); 436 + pagevec_remove_exceptionals(&pvec); 372 437 check_move_unevictable_pages(pvec.pages, pvec.nr); 373 438 pagevec_release(&pvec); 374 439 cond_resched(); ··· 400 465 pagevec_init(&pvec, 0); 401 466 index = start; 402 467 while (index < end) { 403 - pvec.nr = shmem_find_get_pages_and_swap(mapping, index, 404 - min(end - index, (pgoff_t)PAGEVEC_SIZE), 405 - pvec.pages, indices); 468 + pvec.nr = find_get_entries(mapping, index, 469 + min(end - index, (pgoff_t)PAGEVEC_SIZE), 470 + pvec.pages, indices); 406 471 if (!pvec.nr) 407 472 break; 408 473 mem_cgroup_uncharge_start(); ··· 431 496 } 432 497 unlock_page(page); 433 498 } 434 - shmem_deswap_pagevec(&pvec); 499 + pagevec_remove_exceptionals(&pvec); 435 500 pagevec_release(&pvec); 436 501 mem_cgroup_uncharge_end(); 437 502 cond_resched(); ··· 469 534 index = start; 470 535 for ( ; ; ) { 471 536 cond_resched(); 472 - pvec.nr = shmem_find_get_pages_and_swap(mapping, index, 537 + 538 + pvec.nr = find_get_entries(mapping, index, 473 539 min(end - index, (pgoff_t)PAGEVEC_SIZE), 474 - pvec.pages, indices); 540 + pvec.pages, indices); 475 541 if (!pvec.nr) { 476 542 if (index == start || unfalloc) 477 543 break; ··· 480 544 continue; 481 545 } 482 546 if ((index == start || unfalloc) && indices[0] >= end) { 483 - shmem_deswap_pagevec(&pvec); 547 + pagevec_remove_exceptionals(&pvec); 484 548 pagevec_release(&pvec); 485 549 break; 486 550 } ··· 509 573 } 510 574 unlock_page(page); 511 575 } 512 - shmem_deswap_pagevec(&pvec); 576 + pagevec_remove_exceptionals(&pvec); 513 577 pagevec_release(&pvec); 514 578 mem_cgroup_uncharge_end(); 515 579 index++; ··· 1015 1079 return -EFBIG; 1016 1080 repeat: 1017 1081 swap.val = 0; 1018 - page = find_lock_page(mapping, index); 1082 + page = find_lock_entry(mapping, index); 1019 1083 if (radix_tree_exceptional_entry(page)) { 1020 1084 swap = radix_to_swp_entry(page); 1021 1085 page = NULL; ··· 1352 1416 return inode; 1353 1417 } 1354 1418 1419 + bool shmem_mapping(struct address_space *mapping) 1420 + { 1421 + return mapping->backing_dev_info == &shmem_backing_dev_info; 1422 + } 1423 + 1355 1424 #ifdef CONFIG_TMPFS 1356 1425 static const struct inode_operations shmem_symlink_inode_operations; 1357 1426 static const struct inode_operations shmem_short_symlink_operations; ··· 1669 1728 pagevec_init(&pvec, 0); 1670 1729 pvec.nr = 1; /* start small: we may be there already */ 1671 1730 while (!done) { 1672 - pvec.nr = shmem_find_get_pages_and_swap(mapping, index, 1731 + pvec.nr = find_get_entries(mapping, index, 1673 1732 pvec.nr, pvec.pages, indices); 1674 1733 if (!pvec.nr) { 1675 1734 if (whence == SEEK_DATA) ··· 1696 1755 break; 1697 1756 } 1698 1757 } 1699 - shmem_deswap_pagevec(&pvec); 1758 + pagevec_remove_exceptionals(&pvec); 1700 1759 pagevec_release(&pvec); 1701 1760 pvec.nr = PAGEVEC_SIZE; 1702 1761 cond_resched();
+51
mm/swap.c
··· 948 948 EXPORT_SYMBOL(__pagevec_lru_add); 949 949 950 950 /** 951 + * pagevec_lookup_entries - gang pagecache lookup 952 + * @pvec: Where the resulting entries are placed 953 + * @mapping: The address_space to search 954 + * @start: The starting entry index 955 + * @nr_entries: The maximum number of entries 956 + * @indices: The cache indices corresponding to the entries in @pvec 957 + * 958 + * pagevec_lookup_entries() will search for and return a group of up 959 + * to @nr_entries pages and shadow entries in the mapping. All 960 + * entries are placed in @pvec. pagevec_lookup_entries() takes a 961 + * reference against actual pages in @pvec. 962 + * 963 + * The search returns a group of mapping-contiguous entries with 964 + * ascending indexes. There may be holes in the indices due to 965 + * not-present entries. 966 + * 967 + * pagevec_lookup_entries() returns the number of entries which were 968 + * found. 969 + */ 970 + unsigned pagevec_lookup_entries(struct pagevec *pvec, 971 + struct address_space *mapping, 972 + pgoff_t start, unsigned nr_pages, 973 + pgoff_t *indices) 974 + { 975 + pvec->nr = find_get_entries(mapping, start, nr_pages, 976 + pvec->pages, indices); 977 + return pagevec_count(pvec); 978 + } 979 + 980 + /** 981 + * pagevec_remove_exceptionals - pagevec exceptionals pruning 982 + * @pvec: The pagevec to prune 983 + * 984 + * pagevec_lookup_entries() fills both pages and exceptional radix 985 + * tree entries into the pagevec. This function prunes all 986 + * exceptionals from @pvec without leaving holes, so that it can be 987 + * passed on to page-only pagevec operations. 988 + */ 989 + void pagevec_remove_exceptionals(struct pagevec *pvec) 990 + { 991 + int i, j; 992 + 993 + for (i = 0, j = 0; i < pagevec_count(pvec); i++) { 994 + struct page *page = pvec->pages[i]; 995 + if (!radix_tree_exceptional_entry(page)) 996 + pvec->pages[j++] = page; 997 + } 998 + pvec->nr = j; 999 + } 1000 + 1001 + /** 951 1002 * pagevec_lookup - gang pagecache lookup 952 1003 * @pvec: Where the resulting pages are placed 953 1004 * @mapping: The address_space to search
+61 -13
mm/truncate.c
··· 22 22 #include <linux/cleancache.h> 23 23 #include "internal.h" 24 24 25 + static void clear_exceptional_entry(struct address_space *mapping, 26 + pgoff_t index, void *entry) 27 + { 28 + /* Handled by shmem itself */ 29 + if (shmem_mapping(mapping)) 30 + return; 31 + 32 + spin_lock_irq(&mapping->tree_lock); 33 + /* 34 + * Regular page slots are stabilized by the page lock even 35 + * without the tree itself locked. These unlocked entries 36 + * need verification under the tree lock. 37 + */ 38 + radix_tree_delete_item(&mapping->page_tree, index, entry); 39 + spin_unlock_irq(&mapping->tree_lock); 40 + } 25 41 26 42 /** 27 43 * do_invalidatepage - invalidate part or all of a page ··· 224 208 unsigned int partial_start; /* inclusive */ 225 209 unsigned int partial_end; /* exclusive */ 226 210 struct pagevec pvec; 211 + pgoff_t indices[PAGEVEC_SIZE]; 227 212 pgoff_t index; 228 213 int i; 229 214 ··· 255 238 256 239 pagevec_init(&pvec, 0); 257 240 index = start; 258 - while (index < end && pagevec_lookup(&pvec, mapping, index, 259 - min(end - index, (pgoff_t)PAGEVEC_SIZE))) { 241 + while (index < end && pagevec_lookup_entries(&pvec, mapping, index, 242 + min(end - index, (pgoff_t)PAGEVEC_SIZE), 243 + indices)) { 260 244 mem_cgroup_uncharge_start(); 261 245 for (i = 0; i < pagevec_count(&pvec); i++) { 262 246 struct page *page = pvec.pages[i]; 263 247 264 248 /* We rely upon deletion not changing page->index */ 265 - index = page->index; 249 + index = indices[i]; 266 250 if (index >= end) 267 251 break; 252 + 253 + if (radix_tree_exceptional_entry(page)) { 254 + clear_exceptional_entry(mapping, index, page); 255 + continue; 256 + } 268 257 269 258 if (!trylock_page(page)) 270 259 continue; ··· 282 259 truncate_inode_page(mapping, page); 283 260 unlock_page(page); 284 261 } 262 + pagevec_remove_exceptionals(&pvec); 285 263 pagevec_release(&pvec); 286 264 mem_cgroup_uncharge_end(); 287 265 cond_resched(); ··· 331 307 index = start; 332 308 for ( ; ; ) { 333 309 cond_resched(); 334 - if (!pagevec_lookup(&pvec, mapping, index, 335 - min(end - index, (pgoff_t)PAGEVEC_SIZE))) { 310 + if (!pagevec_lookup_entries(&pvec, mapping, index, 311 + min(end - index, (pgoff_t)PAGEVEC_SIZE), 312 + indices)) { 336 313 if (index == start) 337 314 break; 338 315 index = start; 339 316 continue; 340 317 } 341 - if (index == start && pvec.pages[0]->index >= end) { 318 + if (index == start && indices[0] >= end) { 319 + pagevec_remove_exceptionals(&pvec); 342 320 pagevec_release(&pvec); 343 321 break; 344 322 } ··· 349 323 struct page *page = pvec.pages[i]; 350 324 351 325 /* We rely upon deletion not changing page->index */ 352 - index = page->index; 326 + index = indices[i]; 353 327 if (index >= end) 354 328 break; 329 + 330 + if (radix_tree_exceptional_entry(page)) { 331 + clear_exceptional_entry(mapping, index, page); 332 + continue; 333 + } 355 334 356 335 lock_page(page); 357 336 WARN_ON(page->index != index); ··· 364 333 truncate_inode_page(mapping, page); 365 334 unlock_page(page); 366 335 } 336 + pagevec_remove_exceptionals(&pvec); 367 337 pagevec_release(&pvec); 368 338 mem_cgroup_uncharge_end(); 369 339 index++; ··· 407 375 unsigned long invalidate_mapping_pages(struct address_space *mapping, 408 376 pgoff_t start, pgoff_t end) 409 377 { 378 + pgoff_t indices[PAGEVEC_SIZE]; 410 379 struct pagevec pvec; 411 380 pgoff_t index = start; 412 381 unsigned long ret; ··· 423 390 */ 424 391 425 392 pagevec_init(&pvec, 0); 426 - while (index <= end && pagevec_lookup(&pvec, mapping, index, 427 - min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { 393 + while (index <= end && pagevec_lookup_entries(&pvec, mapping, index, 394 + min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, 395 + indices)) { 428 396 mem_cgroup_uncharge_start(); 429 397 for (i = 0; i < pagevec_count(&pvec); i++) { 430 398 struct page *page = pvec.pages[i]; 431 399 432 400 /* We rely upon deletion not changing page->index */ 433 - index = page->index; 401 + index = indices[i]; 434 402 if (index > end) 435 403 break; 404 + 405 + if (radix_tree_exceptional_entry(page)) { 406 + clear_exceptional_entry(mapping, index, page); 407 + continue; 408 + } 436 409 437 410 if (!trylock_page(page)) 438 411 continue; ··· 453 414 deactivate_page(page); 454 415 count += ret; 455 416 } 417 + pagevec_remove_exceptionals(&pvec); 456 418 pagevec_release(&pvec); 457 419 mem_cgroup_uncharge_end(); 458 420 cond_resched(); ··· 521 481 int invalidate_inode_pages2_range(struct address_space *mapping, 522 482 pgoff_t start, pgoff_t end) 523 483 { 484 + pgoff_t indices[PAGEVEC_SIZE]; 524 485 struct pagevec pvec; 525 486 pgoff_t index; 526 487 int i; ··· 532 491 cleancache_invalidate_inode(mapping); 533 492 pagevec_init(&pvec, 0); 534 493 index = start; 535 - while (index <= end && pagevec_lookup(&pvec, mapping, index, 536 - min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { 494 + while (index <= end && pagevec_lookup_entries(&pvec, mapping, index, 495 + min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, 496 + indices)) { 537 497 mem_cgroup_uncharge_start(); 538 498 for (i = 0; i < pagevec_count(&pvec); i++) { 539 499 struct page *page = pvec.pages[i]; 540 500 541 501 /* We rely upon deletion not changing page->index */ 542 - index = page->index; 502 + index = indices[i]; 543 503 if (index > end) 544 504 break; 505 + 506 + if (radix_tree_exceptional_entry(page)) { 507 + clear_exceptional_entry(mapping, index, page); 508 + continue; 509 + } 545 510 546 511 lock_page(page); 547 512 WARN_ON(page->index != index); ··· 586 539 ret = ret2; 587 540 unlock_page(page); 588 541 } 542 + pagevec_remove_exceptionals(&pvec); 589 543 pagevec_release(&pvec); 590 544 mem_cgroup_uncharge_end(); 591 545 cond_resched();