nommu: fix shared mmap after truncate shrinkage problems

Fix a problem in NOMMU mmap with ramfs whereby a shared mmap can happen
over the end of a truncation. The problem is that
ramfs_nommu_check_mappings() checks that the reduced file size against the
VMA tree, but not the vm_region tree.

The following sequence of events can cause the problem:

fd = open("/tmp/x", O_RDWR|O_TRUNC|O_CREAT, 0600);
ftruncate(fd, 32 * 1024);
a = mmap(NULL, 32 * 1024, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
b = mmap(NULL, 16 * 1024, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
munmap(a, 32 * 1024);
ftruncate(fd, 16 * 1024);
c = mmap(NULL, 32 * 1024, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);

Mapping 'a' creates a vm_region covering 32KB of the file. Mapping 'b'
sees that the vm_region from 'a' is covering the region it wants and so
shares it, pinning it in memory.

Mapping 'a' then goes away and the file is truncated to the end of VMA
'b'. However, the region allocated by 'a' is still in effect, and has
_not_ been reduced.

Mapping 'c' is then created, and because there's a vm_region covering the
desired region, get_unmapped_area() is _not_ called to repeat the check,
and the mapping is granted, even though the pages from the latter half of
the mapping have been discarded.

However:

d = mmap(NULL, 16 * 1024, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);

Mapping 'd' should work, and should end up sharing the region allocated by
'a'.

To deal with this, we shrink the vm_region struct during the truncation,
lest do_mmap_pgoff() take it as licence to share the full region
automatically without calling the get_unmapped_area() file op again.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Greg Ungerer <gerg@snapgear.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by David Howells and committed by Linus Torvalds 7e660872 81759b5b

+64 -30
+1 -30
fs/ramfs/file-nommu.c
··· 123 124 /*****************************************************************************/ 125 /* 126 - * check that file shrinkage doesn't leave any VMAs dangling in midair 127 - */ 128 - static int ramfs_nommu_check_mappings(struct inode *inode, 129 - size_t newsize, size_t size) 130 - { 131 - struct vm_area_struct *vma; 132 - struct prio_tree_iter iter; 133 - 134 - down_write(&nommu_region_sem); 135 - 136 - /* search for VMAs that fall within the dead zone */ 137 - vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap, 138 - newsize >> PAGE_SHIFT, 139 - (size + PAGE_SIZE - 1) >> PAGE_SHIFT 140 - ) { 141 - /* found one - only interested if it's shared out of the page 142 - * cache */ 143 - if (vma->vm_flags & VM_SHARED) { 144 - up_write(&nommu_region_sem); 145 - return -ETXTBSY; /* not quite true, but near enough */ 146 - } 147 - } 148 - 149 - up_write(&nommu_region_sem); 150 - return 0; 151 - } 152 - 153 - /*****************************************************************************/ 154 - /* 155 * 156 */ 157 static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size) ··· 140 141 /* check that a decrease in size doesn't cut off any shared mappings */ 142 if (newsize < size) { 143 - ret = ramfs_nommu_check_mappings(inode, newsize, size); 144 if (ret < 0) 145 return ret; 146 }
··· 123 124 /*****************************************************************************/ 125 /* 126 * 127 */ 128 static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size) ··· 169 170 /* check that a decrease in size doesn't cut off any shared mappings */ 171 if (newsize < size) { 172 + ret = nommu_shrink_inode_mappings(inode, size, newsize); 173 if (ret < 0) 174 return ret; 175 }
+1
include/linux/mm.h
··· 1089 1090 /* nommu.c */ 1091 extern atomic_long_t mmap_pages_allocated; 1092 1093 /* prio_tree.c */ 1094 void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old);
··· 1089 1090 /* nommu.c */ 1091 extern atomic_long_t mmap_pages_allocated; 1092 + extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t); 1093 1094 /* prio_tree.c */ 1095 void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old);
+62
mm/nommu.c
··· 1914 mmput(mm); 1915 return len; 1916 }
··· 1914 mmput(mm); 1915 return len; 1916 } 1917 + 1918 + /** 1919 + * nommu_shrink_inode_mappings - Shrink the shared mappings on an inode 1920 + * @inode: The inode to check 1921 + * @size: The current filesize of the inode 1922 + * @newsize: The proposed filesize of the inode 1923 + * 1924 + * Check the shared mappings on an inode on behalf of a shrinking truncate to 1925 + * make sure that that any outstanding VMAs aren't broken and then shrink the 1926 + * vm_regions that extend that beyond so that do_mmap_pgoff() doesn't 1927 + * automatically grant mappings that are too large. 1928 + */ 1929 + int nommu_shrink_inode_mappings(struct inode *inode, size_t size, 1930 + size_t newsize) 1931 + { 1932 + struct vm_area_struct *vma; 1933 + struct prio_tree_iter iter; 1934 + struct vm_region *region; 1935 + pgoff_t low, high; 1936 + size_t r_size, r_top; 1937 + 1938 + low = newsize >> PAGE_SHIFT; 1939 + high = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; 1940 + 1941 + down_write(&nommu_region_sem); 1942 + 1943 + /* search for VMAs that fall within the dead zone */ 1944 + vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap, 1945 + low, high) { 1946 + /* found one - only interested if it's shared out of the page 1947 + * cache */ 1948 + if (vma->vm_flags & VM_SHARED) { 1949 + up_write(&nommu_region_sem); 1950 + return -ETXTBSY; /* not quite true, but near enough */ 1951 + } 1952 + } 1953 + 1954 + /* reduce any regions that overlap the dead zone - if in existence, 1955 + * these will be pointed to by VMAs that don't overlap the dead zone 1956 + * 1957 + * we don't check for any regions that start beyond the EOF as there 1958 + * shouldn't be any 1959 + */ 1960 + vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap, 1961 + 0, ULONG_MAX) { 1962 + if (!(vma->vm_flags & VM_SHARED)) 1963 + continue; 1964 + 1965 + region = vma->vm_region; 1966 + r_size = region->vm_top - region->vm_start; 1967 + r_top = (region->vm_pgoff << PAGE_SHIFT) + r_size; 1968 + 1969 + if (r_top > newsize) { 1970 + region->vm_top -= r_top - newsize; 1971 + if (region->vm_end > region->vm_top) 1972 + region->vm_end = region->vm_top; 1973 + } 1974 + } 1975 + 1976 + up_write(&nommu_region_sem); 1977 + return 0; 1978 + }