Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork

Configure Feed

Select the types of activity you want to include in your feed.

mm: xip fix fault vs sparse page invalidate race

XIP has a race between sparse pages being inserted into page tables, and
sparse pages being zapped when its time to put a non-sparse page in.

What can happen is that a process can be left with a dangling sparse page
in a MAP_SHARED mapping, while the rest of the world sees the non-sparse
version. Ie. data corruption.

Guard these operations with a seqlock, making fault-in-sparse-pages the
slowpath, and try-to-unmap-sparse-pages the fastpath.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Jared Hulbert <jaredeh@gmail.com>
Acked-by: Carsten Otte <cotte@freenet.de>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Nick Piggin and committed by
Linus Torvalds
538f8ea6 479db0bf

+46 -14
+46 -14
mm/filemap_xip.c
··· 15 15 #include <linux/rmap.h> 16 16 #include <linux/mmu_notifier.h> 17 17 #include <linux/sched.h> 18 + #include <linux/seqlock.h> 19 + #include <linux/mutex.h> 18 20 #include <asm/tlbflush.h> 19 21 #include <asm/io.h> 20 22 ··· 24 22 * We do use our own empty page to avoid interference with other users 25 23 * of ZERO_PAGE(), such as /dev/zero 26 24 */ 25 + static DEFINE_MUTEX(xip_sparse_mutex); 26 + static seqcount_t xip_sparse_seq = SEQCNT_ZERO; 27 27 static struct page *__xip_sparse_page; 28 28 29 + /* called under xip_sparse_mutex */ 29 30 static struct page *xip_sparse_page(void) 30 31 { 31 32 if (!__xip_sparse_page) { 32 33 struct page *page = alloc_page(GFP_HIGHUSER | __GFP_ZERO); 33 34 34 - if (page) { 35 - static DEFINE_SPINLOCK(xip_alloc_lock); 36 - spin_lock(&xip_alloc_lock); 37 - if (!__xip_sparse_page) 38 - __xip_sparse_page = page; 39 - else 40 - __free_page(page); 41 - spin_unlock(&xip_alloc_lock); 42 - } 35 + if (page) 36 + __xip_sparse_page = page; 43 37 } 44 38 return __xip_sparse_page; 45 39 } ··· 172 174 pte_t pteval; 173 175 spinlock_t *ptl; 174 176 struct page *page; 177 + unsigned count; 178 + int locked = 0; 179 + 180 + count = read_seqcount_begin(&xip_sparse_seq); 175 181 176 182 page = __xip_sparse_page; 177 183 if (!page) 178 184 return; 179 185 186 + retry: 180 187 spin_lock(&mapping->i_mmap_lock); 181 188 vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 182 189 mm = vma->vm_mm; ··· 201 198 } 202 199 } 203 200 spin_unlock(&mapping->i_mmap_lock); 201 + 202 + if (locked) { 203 + mutex_unlock(&xip_sparse_mutex); 204 + } else if (read_seqcount_retry(&xip_sparse_seq, count)) { 205 + mutex_lock(&xip_sparse_mutex); 206 + locked = 1; 207 + goto retry; 208 + } 204 209 } 205 210 206 211 /* ··· 229 218 int error; 230 219 231 220 /* XXX: are VM_FAULT_ codes OK? */ 232 - 221 + again: 233 222 size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 234 223 if (vmf->pgoff >= size) 235 224 return VM_FAULT_SIGBUS; ··· 256 245 __xip_unmap(mapping, vmf->pgoff); 257 246 258 247 found: 248 + printk("%s insert %lx@%lx\n", current->comm, (unsigned long)vmf->virtual_address, xip_pfn); 259 249 err = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address, 260 250 xip_pfn); 261 251 if (err == -ENOMEM) ··· 264 252 BUG_ON(err); 265 253 return VM_FAULT_NOPAGE; 266 254 } else { 255 + int err, ret = VM_FAULT_OOM; 256 + 257 + mutex_lock(&xip_sparse_mutex); 258 + write_seqcount_begin(&xip_sparse_seq); 259 + error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 0, 260 + &xip_mem, &xip_pfn); 261 + if (unlikely(!error)) { 262 + write_seqcount_end(&xip_sparse_seq); 263 + mutex_unlock(&xip_sparse_mutex); 264 + goto again; 265 + } 266 + if (error != -ENODATA) 267 + goto out; 267 268 /* not shared and writable, use xip_sparse_page() */ 268 269 page = xip_sparse_page(); 269 270 if (!page) 270 - return VM_FAULT_OOM; 271 + goto out; 272 + err = vm_insert_page(vma, (unsigned long)vmf->virtual_address, 273 + page); 274 + if (err == -ENOMEM) 275 + goto out; 271 276 272 - page_cache_get(page); 273 - vmf->page = page; 274 - return 0; 277 + ret = VM_FAULT_NOPAGE; 278 + out: 279 + write_seqcount_end(&xip_sparse_seq); 280 + mutex_unlock(&xip_sparse_mutex); 281 + 282 + return ret; 275 283 } 276 284 } 277 285