Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

readahead: fault retry breaks mmap file read random detection

.fault now can retry. The retry can break state machine of .fault. In
filemap_fault, if page is miss, ra->mmap_miss is increased. In the second
try, since the page is in page cache now, ra->mmap_miss is decreased. And
these are done in one fault, so we can't detect random mmap file access.

Add a new flag to indicate .fault is tried once. In the second try, skip
ra->mmap_miss decreasing. The filemap_fault state machine is ok with it.

I only tested x86, didn't test other archs, but looks the change for other
archs is obvious, but who knows :)

Signed-off-by: Shaohua Li <shaohua.li@fusionio.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Shaohua Li and committed by
Linus Torvalds
45cac65b e79bee24

+21 -2
+1
arch/arm/mm/fault.c
··· 336 336 /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk 337 337 * of starvation. */ 338 338 flags &= ~FAULT_FLAG_ALLOW_RETRY; 339 + flags |= FAULT_FLAG_TRIED; 339 340 goto retry; 340 341 } 341 342 }
+1
arch/avr32/mm/fault.c
··· 152 152 tsk->min_flt++; 153 153 if (fault & VM_FAULT_RETRY) { 154 154 flags &= ~FAULT_FLAG_ALLOW_RETRY; 155 + flags |= FAULT_FLAG_TRIED; 155 156 156 157 /* 157 158 * No need to up_read(&mm->mmap_sem) as we would have
+1
arch/cris/mm/fault.c
··· 186 186 tsk->min_flt++; 187 187 if (fault & VM_FAULT_RETRY) { 188 188 flags &= ~FAULT_FLAG_ALLOW_RETRY; 189 + flags |= FAULT_FLAG_TRIED; 189 190 190 191 /* 191 192 * No need to up_read(&mm->mmap_sem) as we would
+1
arch/hexagon/mm/vm_fault.c
··· 113 113 current->min_flt++; 114 114 if (fault & VM_FAULT_RETRY) { 115 115 flags &= ~FAULT_FLAG_ALLOW_RETRY; 116 + flags |= FAULT_FLAG_TRIED; 116 117 goto retry; 117 118 } 118 119 }
+1
arch/ia64/mm/fault.c
··· 184 184 current->min_flt++; 185 185 if (fault & VM_FAULT_RETRY) { 186 186 flags &= ~FAULT_FLAG_ALLOW_RETRY; 187 + flags |= FAULT_FLAG_TRIED; 187 188 188 189 /* No need to up_read(&mm->mmap_sem) as we would 189 190 * have already released it in __lock_page_or_retry
+1
arch/m68k/mm/fault.c
··· 170 170 /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk 171 171 * of starvation. */ 172 172 flags &= ~FAULT_FLAG_ALLOW_RETRY; 173 + flags |= FAULT_FLAG_TRIED; 173 174 174 175 /* 175 176 * No need to up_read(&mm->mmap_sem) as we would
+1
arch/microblaze/mm/fault.c
··· 233 233 current->min_flt++; 234 234 if (fault & VM_FAULT_RETRY) { 235 235 flags &= ~FAULT_FLAG_ALLOW_RETRY; 236 + flags |= FAULT_FLAG_TRIED; 236 237 237 238 /* 238 239 * No need to up_read(&mm->mmap_sem) as we would
+1
arch/mips/mm/fault.c
··· 171 171 } 172 172 if (fault & VM_FAULT_RETRY) { 173 173 flags &= ~FAULT_FLAG_ALLOW_RETRY; 174 + flags |= FAULT_FLAG_TRIED; 174 175 175 176 /* 176 177 * No need to up_read(&mm->mmap_sem) as we would
+1
arch/openrisc/mm/fault.c
··· 183 183 tsk->min_flt++; 184 184 if (fault & VM_FAULT_RETRY) { 185 185 flags &= ~FAULT_FLAG_ALLOW_RETRY; 186 + flags |= FAULT_FLAG_TRIED; 186 187 187 188 /* No need to up_read(&mm->mmap_sem) as we would 188 189 * have already released it in __lock_page_or_retry
+1
arch/powerpc/mm/fault.c
··· 451 451 /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk 452 452 * of starvation. */ 453 453 flags &= ~FAULT_FLAG_ALLOW_RETRY; 454 + flags |= FAULT_FLAG_TRIED; 454 455 goto retry; 455 456 } 456 457 }
+1
arch/s390/mm/fault.c
··· 367 367 /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk 368 368 * of starvation. */ 369 369 flags &= ~FAULT_FLAG_ALLOW_RETRY; 370 + flags |= FAULT_FLAG_TRIED; 370 371 down_read(&mm->mmap_sem); 371 372 goto retry; 372 373 }
+1
arch/sh/mm/fault.c
··· 504 504 } 505 505 if (fault & VM_FAULT_RETRY) { 506 506 flags &= ~FAULT_FLAG_ALLOW_RETRY; 507 + flags |= FAULT_FLAG_TRIED; 507 508 508 509 /* 509 510 * No need to up_read(&mm->mmap_sem) as we would
+1
arch/sparc/mm/fault_32.c
··· 265 265 } 266 266 if (fault & VM_FAULT_RETRY) { 267 267 flags &= ~FAULT_FLAG_ALLOW_RETRY; 268 + flags |= FAULT_FLAG_TRIED; 268 269 269 270 /* No need to up_read(&mm->mmap_sem) as we would 270 271 * have already released it in __lock_page_or_retry
+1
arch/sparc/mm/fault_64.c
··· 452 452 } 453 453 if (fault & VM_FAULT_RETRY) { 454 454 flags &= ~FAULT_FLAG_ALLOW_RETRY; 455 + flags |= FAULT_FLAG_TRIED; 455 456 456 457 /* No need to up_read(&mm->mmap_sem) as we would 457 458 * have already released it in __lock_page_or_retry
+1
arch/tile/mm/fault.c
··· 454 454 tsk->min_flt++; 455 455 if (fault & VM_FAULT_RETRY) { 456 456 flags &= ~FAULT_FLAG_ALLOW_RETRY; 457 + flags |= FAULT_FLAG_TRIED; 457 458 458 459 /* 459 460 * No need to up_read(&mm->mmap_sem) as we would
+1
arch/um/kernel/trap.c
··· 89 89 current->min_flt++; 90 90 if (fault & VM_FAULT_RETRY) { 91 91 flags &= ~FAULT_FLAG_ALLOW_RETRY; 92 + flags |= FAULT_FLAG_TRIED; 92 93 93 94 goto retry; 94 95 }
+1
arch/x86/mm/fault.c
··· 1220 1220 /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk 1221 1221 * of starvation. */ 1222 1222 flags &= ~FAULT_FLAG_ALLOW_RETRY; 1223 + flags |= FAULT_FLAG_TRIED; 1223 1224 goto retry; 1224 1225 } 1225 1226 }
+1
arch/xtensa/mm/fault.c
··· 126 126 current->min_flt++; 127 127 if (fault & VM_FAULT_RETRY) { 128 128 flags &= ~FAULT_FLAG_ALLOW_RETRY; 129 + flags |= FAULT_FLAG_TRIED; 129 130 130 131 /* No need to up_read(&mm->mmap_sem) as we would 131 132 * have already released it in __lock_page_or_retry
+1
include/linux/mm.h
··· 161 161 #define FAULT_FLAG_ALLOW_RETRY 0x08 /* Retry fault if blocking */ 162 162 #define FAULT_FLAG_RETRY_NOWAIT 0x10 /* Don't drop mmap_sem and wait when retrying */ 163 163 #define FAULT_FLAG_KILLABLE 0x20 /* The fault task is in SIGKILL killable region */ 164 + #define FAULT_FLAG_TRIED 0x40 /* second try */ 164 165 165 166 /* 166 167 * vm_fault is filled by the the pagefault handler and passed to the vma's
+2 -2
mm/filemap.c
··· 1607 1607 * Do we have something in the page cache already? 1608 1608 */ 1609 1609 page = find_get_page(mapping, offset); 1610 - if (likely(page)) { 1610 + if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) { 1611 1611 /* 1612 1612 * We found the page, so try async readahead before 1613 1613 * waiting for the lock. 1614 1614 */ 1615 1615 do_async_mmap_readahead(vma, ra, file, page, offset); 1616 - } else { 1616 + } else if (!page) { 1617 1617 /* No page in the page cache at all */ 1618 1618 do_sync_mmap_readahead(vma, ra, file, offset); 1619 1619 count_vm_event(PGMAJFAULT);