Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

CacheFiles: Fix the marking of cached pages

Under some circumstances CacheFiles defers the marking of pages with PG_fscache
so that it can take advantage of pagevecs to reduce the number of calls to
fscache_mark_pages_cached() and the netfs's hook to keep track of this.

There are, however, two problems with this:

(1) It can lead to the PG_fscache mark being applied _after_ the page is set
PG_uptodate and unlocked (by the call to fscache_end_io()).

(2) CacheFiles's ref on the page is dropped immediately following
fscache_end_io() - and so may not still be held when the mark is applied.
This can lead to the page being passed back to the allocator before the
mark is applied.

Fix this by, where appropriate, marking the page before calling
fscache_end_io() and releasing the page. This means that we can't take
advantage of pagevecs and have to make a separate call for each page to the
marking routines.

The symptoms of this are Bad Page state errors cropping up under memory
pressure, for example:

BUG: Bad page state in process tar pfn:002da
page:ffffea0000009fb0 count:0 mapcount:0 mapping: (null) index:0x1447
page flags: 0x1000(private_2)
Pid: 4574, comm: tar Tainted: G W 3.1.0-rc4-fsdevel+ #1064
Call Trace:
[<ffffffff8109583c>] ? dump_page+0xb9/0xbe
[<ffffffff81095916>] bad_page+0xd5/0xea
[<ffffffff81095d82>] get_page_from_freelist+0x35b/0x46a
[<ffffffff810961f3>] __alloc_pages_nodemask+0x362/0x662
[<ffffffff810989da>] __do_page_cache_readahead+0x13a/0x267
[<ffffffff81098942>] ? __do_page_cache_readahead+0xa2/0x267
[<ffffffff81098d7b>] ra_submit+0x1c/0x20
[<ffffffff8109900a>] ondemand_readahead+0x28b/0x29a
[<ffffffff81098ee2>] ? ondemand_readahead+0x163/0x29a
[<ffffffff810990ce>] page_cache_sync_readahead+0x38/0x3a
[<ffffffff81091d8a>] generic_file_aio_read+0x2ab/0x67e
[<ffffffffa008cfbe>] nfs_file_read+0xa4/0xc9 [nfs]
[<ffffffff810c22c4>] do_sync_read+0xba/0xfa
[<ffffffff81177a47>] ? security_file_permission+0x7b/0x84
[<ffffffff810c25dd>] ? rw_verify_area+0xab/0xc8
[<ffffffff810c29a4>] vfs_read+0xaa/0x13a
[<ffffffff810c2a79>] sys_read+0x45/0x6c
[<ffffffff813ac37b>] system_call_fastpath+0x16/0x1b

As can be seen, PG_private_2 (== PG_fscache) is set in the page flags.

Instrumenting fscache_mark_pages_cached() to verify whether page->mapping was
set appropriately showed that sometimes it wasn't. This led to the discovery
that sometimes the page has apparently been reclaimed by the time the marker
got to see it.

Reported-by: M. Stevens <m@tippett.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Jeff Layton <jlayton@redhat.com>

+56 -52
+11 -23
fs/cachefiles/rdwr.c
··· 176 176 recheck: 177 177 if (PageUptodate(monitor->back_page)) { 178 178 copy_highpage(monitor->netfs_page, monitor->back_page); 179 - 180 - pagevec_add(&pagevec, monitor->netfs_page); 181 - fscache_mark_pages_cached(monitor->op, &pagevec); 179 + fscache_mark_page_cached(monitor->op, 180 + monitor->netfs_page); 182 181 error = 0; 183 182 } else if (!PageError(monitor->back_page)) { 184 183 /* the page has probably been truncated */ ··· 334 335 backing_page_already_uptodate: 335 336 _debug("- uptodate"); 336 337 337 - pagevec_add(pagevec, netpage); 338 - fscache_mark_pages_cached(op, pagevec); 338 + fscache_mark_page_cached(op, netpage); 339 339 340 340 copy_highpage(netpage, backpage); 341 341 fscache_end_io(op, netpage, 0); ··· 446 448 &pagevec); 447 449 } else if (cachefiles_has_space(cache, 0, 1) == 0) { 448 450 /* there's space in the cache we can use */ 449 - pagevec_add(&pagevec, page); 450 - fscache_mark_pages_cached(op, &pagevec); 451 + fscache_mark_page_cached(op, page); 451 452 ret = -ENODATA; 452 453 } else { 453 454 ret = -ENOBUFS; ··· 462 465 */ 463 466 static int cachefiles_read_backing_file(struct cachefiles_object *object, 464 467 struct fscache_retrieval *op, 465 - struct list_head *list, 466 - struct pagevec *mark_pvec) 468 + struct list_head *list) 467 469 { 468 470 struct cachefiles_one_read *monitor = NULL; 469 471 struct address_space *bmapping = object->backer->d_inode->i_mapping; ··· 622 626 page_cache_release(backpage); 623 627 backpage = NULL; 624 628 625 - if (!pagevec_add(mark_pvec, netpage)) 626 - fscache_mark_pages_cached(op, mark_pvec); 629 + fscache_mark_page_cached(op, netpage); 627 630 628 631 page_cache_get(netpage); 629 632 if (!pagevec_add(&lru_pvec, netpage)) 630 633 __pagevec_lru_add_file(&lru_pvec); 631 634 635 + /* the netpage is unlocked and marked up to date here */ 632 636 fscache_end_io(op, netpage, 0); 633 637 page_cache_release(netpage); 634 638 netpage = NULL; ··· 771 775 /* submit the apparently valid pages to the backing fs to be read from 772 776 * disk */ 773 777 if (nrbackpages > 0) { 774 - ret2 = cachefiles_read_backing_file(object, op, &backpages, 775 - &pagevec); 778 + ret2 = cachefiles_read_backing_file(object, op, &backpages); 776 779 if (ret2 == -ENOMEM || ret2 == -EINTR) 777 780 ret = ret2; 778 781 } 779 - 780 - if (pagevec_count(&pagevec) > 0) 781 - fscache_mark_pages_cached(op, &pagevec); 782 782 783 783 _leave(" = %d [nr=%u%s]", 784 784 ret, *nr_pages, list_empty(pages) ? " empty" : ""); ··· 798 806 { 799 807 struct cachefiles_object *object; 800 808 struct cachefiles_cache *cache; 801 - struct pagevec pagevec; 802 809 int ret; 803 810 804 811 object = container_of(op->op.object, ··· 808 817 _enter("%p,{%lx},", object, page->index); 809 818 810 819 ret = cachefiles_has_space(cache, 0, 1); 811 - if (ret == 0) { 812 - pagevec_init(&pagevec, 0); 813 - pagevec_add(&pagevec, page); 814 - fscache_mark_pages_cached(op, &pagevec); 815 - } else { 820 + if (ret == 0) 821 + fscache_mark_page_cached(op, page); 822 + else 816 823 ret = -ENOBUFS; 817 - } 818 824 819 825 _leave(" = %d", ret); 820 826 return ret;
+36 -23
fs/fscache/page.c
··· 915 915 EXPORT_SYMBOL(__fscache_uncache_page); 916 916 917 917 /** 918 + * fscache_mark_page_cached - Mark a page as being cached 919 + * @op: The retrieval op pages are being marked for 920 + * @page: The page to be marked 921 + * 922 + * Mark a netfs page as being cached. After this is called, the netfs 923 + * must call fscache_uncache_page() to remove the mark. 924 + */ 925 + void fscache_mark_page_cached(struct fscache_retrieval *op, struct page *page) 926 + { 927 + struct fscache_cookie *cookie = op->op.object->cookie; 928 + 929 + #ifdef CONFIG_FSCACHE_STATS 930 + atomic_inc(&fscache_n_marks); 931 + #endif 932 + 933 + _debug("- mark %p{%lx}", page, page->index); 934 + if (TestSetPageFsCache(page)) { 935 + static bool once_only; 936 + if (!once_only) { 937 + once_only = true; 938 + printk(KERN_WARNING "FS-Cache:" 939 + " Cookie type %s marked page %lx" 940 + " multiple times\n", 941 + cookie->def->name, page->index); 942 + } 943 + } 944 + 945 + if (cookie->def->mark_page_cached) 946 + cookie->def->mark_page_cached(cookie->netfs_data, 947 + op->mapping, page); 948 + } 949 + EXPORT_SYMBOL(fscache_mark_page_cached); 950 + 951 + /** 918 952 * fscache_mark_pages_cached - Mark pages as being cached 919 953 * @op: The retrieval op pages are being marked for 920 954 * @pagevec: The pages to be marked ··· 959 925 void fscache_mark_pages_cached(struct fscache_retrieval *op, 960 926 struct pagevec *pagevec) 961 927 { 962 - struct fscache_cookie *cookie = op->op.object->cookie; 963 928 unsigned long loop; 964 929 965 - #ifdef CONFIG_FSCACHE_STATS 966 - atomic_add(pagevec->nr, &fscache_n_marks); 967 - #endif 930 + for (loop = 0; loop < pagevec->nr; loop++) 931 + fscache_mark_page_cached(op, pagevec->pages[loop]); 968 932 969 - for (loop = 0; loop < pagevec->nr; loop++) { 970 - struct page *page = pagevec->pages[loop]; 971 - 972 - _debug("- mark %p{%lx}", page, page->index); 973 - if (TestSetPageFsCache(page)) { 974 - static bool once_only; 975 - if (!once_only) { 976 - once_only = true; 977 - printk(KERN_WARNING "FS-Cache:" 978 - " Cookie type %s marked page %lx" 979 - " multiple times\n", 980 - cookie->def->name, page->index); 981 - } 982 - } 983 - } 984 - 985 - if (cookie->def->mark_pages_cached) 986 - cookie->def->mark_pages_cached(cookie->netfs_data, 987 - op->mapping, pagevec); 988 933 pagevec_reinit(pagevec); 989 934 } 990 935 EXPORT_SYMBOL(fscache_mark_pages_cached);
+3
include/linux/fscache-cache.h
··· 504 504 505 505 extern void fscache_io_error(struct fscache_cache *cache); 506 506 507 + extern void fscache_mark_page_cached(struct fscache_retrieval *op, 508 + struct page *page); 509 + 507 510 extern void fscache_mark_pages_cached(struct fscache_retrieval *op, 508 511 struct pagevec *pagevec); 509 512
+6 -6
include/linux/fscache.h
··· 135 135 */ 136 136 void (*put_context)(void *cookie_netfs_data, void *context); 137 137 138 - /* indicate pages that now have cache metadata retained 139 - * - this function should mark the specified pages as now being cached 140 - * - the pages will have been marked with PG_fscache before this is 138 + /* indicate page that now have cache metadata retained 139 + * - this function should mark the specified page as now being cached 140 + * - the page will have been marked with PG_fscache before this is 141 141 * called, so this is optional 142 142 */ 143 - void (*mark_pages_cached)(void *cookie_netfs_data, 144 - struct address_space *mapping, 145 - struct pagevec *cached_pvec); 143 + void (*mark_page_cached)(void *cookie_netfs_data, 144 + struct address_space *mapping, 145 + struct page *page); 146 146 147 147 /* indicate the cookie is no longer cached 148 148 * - this function is called when the backing store currently caching