Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

truncate: new helpers

Introduce new truncate helpers truncate_pagecache and inode_newsize_ok.
vmtruncate is also consolidated from mm/memory.c and mm/nommu.c and
into mm/truncate.c.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

authored by

npiggin@suse.de and committed by
al
25d9e2d1 eca6f534

+120 -108
+1 -1
Documentation/vm/locking
··· 80 80 mm start up ... this is a loose form of stability on mm_users. For 81 81 example, it is used in copy_mm to protect against a racing tlb_gather_mmu 82 82 single address space optimization, so that the zap_page_range (from 83 - vmtruncate) does not lose sending ipi's to cloned threads that might 83 + truncate) does not lose sending ipi's to cloned threads that might 84 84 be spawned underneath it and go to user mode to drag in pte's into tlbs. 85 85 86 86 swap_lock
+44 -2
fs/attr.c
··· 18 18 /* Taken over from the old code... */ 19 19 20 20 /* POSIX UID/GID verification for setting inode attributes. */ 21 - int inode_change_ok(struct inode *inode, struct iattr *attr) 21 + int inode_change_ok(const struct inode *inode, struct iattr *attr) 22 22 { 23 23 int retval = -EPERM; 24 24 unsigned int ia_valid = attr->ia_valid; ··· 60 60 error: 61 61 return retval; 62 62 } 63 - 64 63 EXPORT_SYMBOL(inode_change_ok); 64 + 65 + /** 66 + * inode_newsize_ok - may this inode be truncated to a given size 67 + * @inode: the inode to be truncated 68 + * @offset: the new size to assign to the inode 69 + * @Returns: 0 on success, -ve errno on failure 70 + * 71 + * inode_newsize_ok will check filesystem limits and ulimits to check that the 72 + * new inode size is within limits. inode_newsize_ok will also send SIGXFSZ 73 + * when necessary. Caller must not proceed with inode size change if failure is 74 + * returned. @inode must be a file (not directory), with appropriate 75 + * permissions to allow truncate (inode_newsize_ok does NOT check these 76 + * conditions). 77 + * 78 + * inode_newsize_ok must be called with i_mutex held. 79 + */ 80 + int inode_newsize_ok(const struct inode *inode, loff_t offset) 81 + { 82 + if (inode->i_size < offset) { 83 + unsigned long limit; 84 + 85 + limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; 86 + if (limit != RLIM_INFINITY && offset > limit) 87 + goto out_sig; 88 + if (offset > inode->i_sb->s_maxbytes) 89 + goto out_big; 90 + } else { 91 + /* 92 + * truncation of in-use swapfiles is disallowed - it would 93 + * cause subsequent swapout to scribble on the now-freed 94 + * blocks. 95 + */ 96 + if (IS_SWAPFILE(inode)) 97 + return -ETXTBSY; 98 + } 99 + 100 + return 0; 101 + out_sig: 102 + send_sig(SIGXFSZ, current, 0); 103 + out_big: 104 + return -EFBIG; 105 + } 106 + EXPORT_SYMBOL(inode_newsize_ok); 65 107 66 108 int inode_setattr(struct inode * inode, struct iattr * attr) 67 109 {
+2 -1
include/linux/fs.h
··· 2382 2382 #define buffer_migrate_page NULL 2383 2383 #endif 2384 2384 2385 - extern int inode_change_ok(struct inode *, struct iattr *); 2385 + extern int inode_change_ok(const struct inode *, struct iattr *); 2386 + extern int inode_newsize_ok(const struct inode *, loff_t offset); 2386 2387 extern int __must_check inode_setattr(struct inode *, struct iattr *); 2387 2388 2388 2389 extern void file_update_time(struct file *file);
+3 -2
include/linux/mm.h
··· 791 791 unmap_mapping_range(mapping, holebegin, holelen, 0); 792 792 } 793 793 794 - extern int vmtruncate(struct inode * inode, loff_t offset); 795 - extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end); 794 + extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new); 795 + extern int vmtruncate(struct inode *inode, loff_t offset); 796 + extern int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end); 796 797 797 798 #ifdef CONFIG_MMU 798 799 extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+1 -1
mm/filemap.c
··· 58 58 /* 59 59 * Lock ordering: 60 60 * 61 - * ->i_mmap_lock (vmtruncate) 61 + * ->i_mmap_lock (truncate_pagecache) 62 62 * ->private_lock (__free_pte->__set_page_dirty_buffers) 63 63 * ->swap_lock (exclusive_swap_page, others) 64 64 * ->mapping->tree_lock
+3 -59
mm/memory.c
··· 297 297 unsigned long addr = vma->vm_start; 298 298 299 299 /* 300 - * Hide vma from rmap and vmtruncate before freeing pgtables 300 + * Hide vma from rmap and truncate_pagecache before freeing 301 + * pgtables 301 302 */ 302 303 anon_vma_unlink(vma); 303 304 unlink_file_vma(vma); ··· 2408 2407 * @mapping: the address space containing mmaps to be unmapped. 2409 2408 * @holebegin: byte in first page to unmap, relative to the start of 2410 2409 * the underlying file. This will be rounded down to a PAGE_SIZE 2411 - * boundary. Note that this is different from vmtruncate(), which 2410 + * boundary. Note that this is different from truncate_pagecache(), which 2412 2411 * must keep the partial page. In contrast, we must get rid of 2413 2412 * partial pages. 2414 2413 * @holelen: size of prospective hole in bytes. This will be rounded ··· 2458 2457 spin_unlock(&mapping->i_mmap_lock); 2459 2458 } 2460 2459 EXPORT_SYMBOL(unmap_mapping_range); 2461 - 2462 - /** 2463 - * vmtruncate - unmap mappings "freed" by truncate() syscall 2464 - * @inode: inode of the file used 2465 - * @offset: file offset to start truncating 2466 - * 2467 - * NOTE! We have to be ready to update the memory sharing 2468 - * between the file and the memory map for a potential last 2469 - * incomplete page. Ugly, but necessary. 2470 - */ 2471 - int vmtruncate(struct inode * inode, loff_t offset) 2472 - { 2473 - if (inode->i_size < offset) { 2474 - unsigned long limit; 2475 - 2476 - limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; 2477 - if (limit != RLIM_INFINITY && offset > limit) 2478 - goto out_sig; 2479 - if (offset > inode->i_sb->s_maxbytes) 2480 - goto out_big; 2481 - i_size_write(inode, offset); 2482 - } else { 2483 - struct address_space *mapping = inode->i_mapping; 2484 - 2485 - /* 2486 - * truncation of in-use swapfiles is disallowed - it would 2487 - * cause subsequent swapout to scribble on the now-freed 2488 - * blocks. 2489 - */ 2490 - if (IS_SWAPFILE(inode)) 2491 - return -ETXTBSY; 2492 - i_size_write(inode, offset); 2493 - 2494 - /* 2495 - * unmap_mapping_range is called twice, first simply for 2496 - * efficiency so that truncate_inode_pages does fewer 2497 - * single-page unmaps. However after this first call, and 2498 - * before truncate_inode_pages finishes, it is possible for 2499 - * private pages to be COWed, which remain after 2500 - * truncate_inode_pages finishes, hence the second 2501 - * unmap_mapping_range call must be made for correctness. 2502 - */ 2503 - unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); 2504 - truncate_inode_pages(mapping, offset); 2505 - unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1); 2506 - } 2507 - 2508 - if (inode->i_op->truncate) 2509 - inode->i_op->truncate(inode); 2510 - return 0; 2511 - 2512 - out_sig: 2513 - send_sig(SIGXFSZ, current, 0); 2514 - out_big: 2515 - return -EFBIG; 2516 - } 2517 - EXPORT_SYMBOL(vmtruncate); 2518 2460 2519 2461 int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end) 2520 2462 {
+2 -2
mm/mremap.c
··· 86 86 if (vma->vm_file) { 87 87 /* 88 88 * Subtle point from Rajesh Venkatasubramanian: before 89 - * moving file-based ptes, we must lock vmtruncate out, 90 - * since it might clean the dst vma before the src vma, 89 + * moving file-based ptes, we must lock truncate_pagecache 90 + * out, since it might clean the dst vma before the src vma, 91 91 * and we propagate stale pages into the dst afterward. 92 92 */ 93 93 mapping = vma->vm_file->f_mapping;
-40
mm/nommu.c
··· 83 83 }; 84 84 85 85 /* 86 - * Handle all mappings that got truncated by a "truncate()" 87 - * system call. 88 - * 89 - * NOTE! We have to be ready to update the memory sharing 90 - * between the file and the memory map for a potential last 91 - * incomplete page. Ugly, but necessary. 92 - */ 93 - int vmtruncate(struct inode *inode, loff_t offset) 94 - { 95 - struct address_space *mapping = inode->i_mapping; 96 - unsigned long limit; 97 - 98 - if (inode->i_size < offset) 99 - goto do_expand; 100 - i_size_write(inode, offset); 101 - 102 - truncate_inode_pages(mapping, offset); 103 - goto out_truncate; 104 - 105 - do_expand: 106 - limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; 107 - if (limit != RLIM_INFINITY && offset > limit) 108 - goto out_sig; 109 - if (offset > inode->i_sb->s_maxbytes) 110 - goto out; 111 - i_size_write(inode, offset); 112 - 113 - out_truncate: 114 - if (inode->i_op->truncate) 115 - inode->i_op->truncate(inode); 116 - return 0; 117 - out_sig: 118 - send_sig(SIGXFSZ, current, 0); 119 - out: 120 - return -EFBIG; 121 - } 122 - 123 - EXPORT_SYMBOL(vmtruncate); 124 - 125 - /* 126 86 * Return the total memory allocated for this pointer, not 127 87 * just what the caller asked for. 128 88 *
+64
mm/truncate.c
··· 465 465 return invalidate_inode_pages2_range(mapping, 0, -1); 466 466 } 467 467 EXPORT_SYMBOL_GPL(invalidate_inode_pages2); 468 + 469 + /** 470 + * truncate_pagecache - unmap and remove pagecache that has been truncated 471 + * @inode: inode 472 + * @old: old file offset 473 + * @new: new file offset 474 + * 475 + * inode's new i_size must already be written before truncate_pagecache 476 + * is called. 477 + * 478 + * This function should typically be called before the filesystem 479 + * releases resources associated with the freed range (eg. deallocates 480 + * blocks). This way, pagecache will always stay logically coherent 481 + * with on-disk format, and the filesystem would not have to deal with 482 + * situations such as writepage being called for a page that has already 483 + * had its underlying blocks deallocated. 484 + */ 485 + void truncate_pagecache(struct inode *inode, loff_t old, loff_t new) 486 + { 487 + if (new < old) { 488 + struct address_space *mapping = inode->i_mapping; 489 + 490 + /* 491 + * unmap_mapping_range is called twice, first simply for 492 + * efficiency so that truncate_inode_pages does fewer 493 + * single-page unmaps. However after this first call, and 494 + * before truncate_inode_pages finishes, it is possible for 495 + * private pages to be COWed, which remain after 496 + * truncate_inode_pages finishes, hence the second 497 + * unmap_mapping_range call must be made for correctness. 498 + */ 499 + unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1); 500 + truncate_inode_pages(mapping, new); 501 + unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1); 502 + } 503 + } 504 + EXPORT_SYMBOL(truncate_pagecache); 505 + 506 + /** 507 + * vmtruncate - unmap mappings "freed" by truncate() syscall 508 + * @inode: inode of the file used 509 + * @offset: file offset to start truncating 510 + * 511 + * NOTE! We have to be ready to update the memory sharing 512 + * between the file and the memory map for a potential last 513 + * incomplete page. Ugly, but necessary. 514 + */ 515 + int vmtruncate(struct inode *inode, loff_t offset) 516 + { 517 + loff_t oldsize; 518 + int error; 519 + 520 + error = inode_newsize_ok(inode, offset); 521 + if (error) 522 + return error; 523 + oldsize = inode->i_size; 524 + i_size_write(inode, offset); 525 + truncate_pagecache(inode, oldsize, offset); 526 + if (inode->i_op->truncate) 527 + inode->i_op->truncate(inode); 528 + 529 + return error; 530 + } 531 + EXPORT_SYMBOL(vmtruncate);