[PATCH] xip: fs/mm: execute in place

- generic_file* file operations do no longer have a xip/non-xip split
- filemap_xip.c implements a new set of fops that require get_xip_page
aop to work proper. all new fops are exported GPL-only (don't like to
see whatever code use those except GPL modules)
- __xip_unmap now uses page_check_address, which is no longer static
in rmap.c, and defined in linux/rmap.h
- mm/filemap.h is now much more clean, plainly having just Linus'
inline funcs moved here from filemap.c
- fix includes in filemap_xip to make it build cleanly on i386

Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by Carsten Otte and committed by Linus Torvalds ceffc078 420edbcc

+707 -75
+3 -1
fs/open.c
··· 808 808 809 809 /* NB: we're sure to have correct a_ops only after f_op->open */ 810 810 if (f->f_flags & O_DIRECT) { 811 - if (!f->f_mapping->a_ops || !f->f_mapping->a_ops->direct_IO) { 811 + if (!f->f_mapping->a_ops || 812 + ((!f->f_mapping->a_ops->direct_IO) && 813 + (!f->f_mapping->a_ops->get_xip_page))) { 812 814 fput(f); 813 815 f = ERR_PTR(-EINVAL); 814 816 }
+18
include/linux/fs.h
··· 330 330 int (*releasepage) (struct page *, int); 331 331 ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov, 332 332 loff_t offset, unsigned long nr_segs); 333 + struct page* (*get_xip_page)(struct address_space *, sector_t, 334 + int); 333 335 }; 334 336 335 337 struct backing_dev_info; ··· 1498 1496 extern loff_t remote_llseek(struct file *file, loff_t offset, int origin); 1499 1497 extern int generic_file_open(struct inode * inode, struct file * filp); 1500 1498 extern int nonseekable_open(struct inode * inode, struct file * filp); 1499 + 1500 + #ifdef CONFIG_FS_XIP 1501 + extern ssize_t xip_file_aio_read(struct kiocb *iocb, char __user *buf, 1502 + size_t count, loff_t pos); 1503 + extern ssize_t xip_file_readv(struct file *filp, const struct iovec *iov, 1504 + unsigned long nr_segs, loff_t *ppos); 1505 + extern ssize_t xip_file_sendfile(struct file *in_file, loff_t *ppos, 1506 + size_t count, read_actor_t actor, 1507 + void *target); 1508 + extern int xip_file_mmap(struct file * file, struct vm_area_struct * vma); 1509 + extern ssize_t xip_file_aio_write(struct kiocb *iocb, const char __user *buf, 1510 + size_t count, loff_t pos); 1511 + extern ssize_t xip_file_writev(struct file *file, const struct iovec *iov, 1512 + unsigned long nr_segs, loff_t *ppos); 1513 + extern int xip_truncate_page(struct address_space *mapping, loff_t from); 1514 + #endif 1501 1515 1502 1516 static inline void do_generic_file_read(struct file * filp, loff_t *ppos, 1503 1517 read_descriptor_t * desc,
+6
include/linux/rmap.h
··· 93 93 int try_to_unmap(struct page *); 94 94 95 95 /* 96 + * Called from mm/filemap_xip.c to unmap empty zero page 97 + */ 98 + pte_t *page_check_address(struct page *, struct mm_struct *, unsigned long); 99 + 100 + 101 + /* 96 102 * Used by swapoff to help locate where page is expected in vma. 97 103 */ 98 104 unsigned long page_address_in_vma(struct page *, struct vm_area_struct *);
+1
mm/Makefile
··· 19 19 obj-$(CONFIG_SHMEM) += shmem.o 20 20 obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o 21 21 22 + obj-$(CONFIG_FS_XIP) += filemap_xip.o
+2 -72
mm/filemap.c
··· 28 28 #include <linux/blkdev.h> 29 29 #include <linux/security.h> 30 30 #include <linux/syscalls.h> 31 + #include "filemap.h" 31 32 /* 32 33 * FIXME: remove all knowledge of the buffer layer from the core VM 33 34 */ ··· 1715 1714 } 1716 1715 EXPORT_SYMBOL(remove_suid); 1717 1716 1718 - /* 1719 - * Copy as much as we can into the page and return the number of bytes which 1720 - * were sucessfully copied. If a fault is encountered then clear the page 1721 - * out to (offset+bytes) and return the number of bytes which were copied. 1722 - */ 1723 - static inline size_t 1724 - filemap_copy_from_user(struct page *page, unsigned long offset, 1725 - const char __user *buf, unsigned bytes) 1726 - { 1727 - char *kaddr; 1728 - int left; 1729 - 1730 - kaddr = kmap_atomic(page, KM_USER0); 1731 - left = __copy_from_user_inatomic(kaddr + offset, buf, bytes); 1732 - kunmap_atomic(kaddr, KM_USER0); 1733 - 1734 - if (left != 0) { 1735 - /* Do it the slow way */ 1736 - kaddr = kmap(page); 1737 - left = __copy_from_user(kaddr + offset, buf, bytes); 1738 - kunmap(page); 1739 - } 1740 - return bytes - left; 1741 - } 1742 - 1743 - static size_t 1717 + size_t 1744 1718 __filemap_copy_from_user_iovec(char *vaddr, 1745 1719 const struct iovec *iov, size_t base, size_t bytes) 1746 1720 { ··· 1740 1764 } 1741 1765 } 1742 1766 return copied - left; 1743 - } 1744 - 1745 - /* 1746 - * This has the same sideeffects and return value as filemap_copy_from_user(). 1747 - * The difference is that on a fault we need to memset the remainder of the 1748 - * page (out to offset+bytes), to emulate filemap_copy_from_user()'s 1749 - * single-segment behaviour. 1750 - */ 1751 - static inline size_t 1752 - filemap_copy_from_user_iovec(struct page *page, unsigned long offset, 1753 - const struct iovec *iov, size_t base, size_t bytes) 1754 - { 1755 - char *kaddr; 1756 - size_t copied; 1757 - 1758 - kaddr = kmap_atomic(page, KM_USER0); 1759 - copied = __filemap_copy_from_user_iovec(kaddr + offset, iov, 1760 - base, bytes); 1761 - kunmap_atomic(kaddr, KM_USER0); 1762 - if (copied != bytes) { 1763 - kaddr = kmap(page); 1764 - copied = __filemap_copy_from_user_iovec(kaddr + offset, iov, 1765 - base, bytes); 1766 - kunmap(page); 1767 - } 1768 - return copied; 1769 - } 1770 - 1771 - static inline void 1772 - filemap_set_next_iovec(const struct iovec **iovp, size_t *basep, size_t bytes) 1773 - { 1774 - const struct iovec *iov = *iovp; 1775 - size_t base = *basep; 1776 - 1777 - while (bytes) { 1778 - int copy = min(bytes, iov->iov_len - base); 1779 - 1780 - bytes -= copy; 1781 - base += copy; 1782 - if (iov->iov_len == base) { 1783 - iov++; 1784 - base = 0; 1785 - } 1786 - } 1787 - *iovp = iov; 1788 - *basep = base; 1789 1767 } 1790 1768 1791 1769 /*
+94
mm/filemap.h
··· 1 + /* 2 + * linux/mm/filemap.h 3 + * 4 + * Copyright (C) 1994-1999 Linus Torvalds 5 + */ 6 + 7 + #ifndef __FILEMAP_H 8 + #define __FILEMAP_H 9 + 10 + #include <linux/types.h> 11 + #include <linux/fs.h> 12 + #include <linux/mm.h> 13 + #include <linux/highmem.h> 14 + #include <linux/uio.h> 15 + #include <linux/config.h> 16 + #include <asm/uaccess.h> 17 + 18 + extern size_t 19 + __filemap_copy_from_user_iovec(char *vaddr, 20 + const struct iovec *iov, 21 + size_t base, 22 + size_t bytes); 23 + 24 + /* 25 + * Copy as much as we can into the page and return the number of bytes which 26 + * were sucessfully copied. If a fault is encountered then clear the page 27 + * out to (offset+bytes) and return the number of bytes which were copied. 28 + */ 29 + static inline size_t 30 + filemap_copy_from_user(struct page *page, unsigned long offset, 31 + const char __user *buf, unsigned bytes) 32 + { 33 + char *kaddr; 34 + int left; 35 + 36 + kaddr = kmap_atomic(page, KM_USER0); 37 + left = __copy_from_user_inatomic(kaddr + offset, buf, bytes); 38 + kunmap_atomic(kaddr, KM_USER0); 39 + 40 + if (left != 0) { 41 + /* Do it the slow way */ 42 + kaddr = kmap(page); 43 + left = __copy_from_user(kaddr + offset, buf, bytes); 44 + kunmap(page); 45 + } 46 + return bytes - left; 47 + } 48 + 49 + /* 50 + * This has the same sideeffects and return value as filemap_copy_from_user(). 51 + * The difference is that on a fault we need to memset the remainder of the 52 + * page (out to offset+bytes), to emulate filemap_copy_from_user()'s 53 + * single-segment behaviour. 54 + */ 55 + static inline size_t 56 + filemap_copy_from_user_iovec(struct page *page, unsigned long offset, 57 + const struct iovec *iov, size_t base, size_t bytes) 58 + { 59 + char *kaddr; 60 + size_t copied; 61 + 62 + kaddr = kmap_atomic(page, KM_USER0); 63 + copied = __filemap_copy_from_user_iovec(kaddr + offset, iov, 64 + base, bytes); 65 + kunmap_atomic(kaddr, KM_USER0); 66 + if (copied != bytes) { 67 + kaddr = kmap(page); 68 + copied = __filemap_copy_from_user_iovec(kaddr + offset, iov, 69 + base, bytes); 70 + kunmap(page); 71 + } 72 + return copied; 73 + } 74 + 75 + static inline void 76 + filemap_set_next_iovec(const struct iovec **iovp, size_t *basep, size_t bytes) 77 + { 78 + const struct iovec *iov = *iovp; 79 + size_t base = *basep; 80 + 81 + while (bytes) { 82 + int copy = min(bytes, iov->iov_len - base); 83 + 84 + bytes -= copy; 85 + base += copy; 86 + if (iov->iov_len == base) { 87 + iov++; 88 + base = 0; 89 + } 90 + } 91 + *iovp = iov; 92 + *basep = base; 93 + } 94 + #endif
+581
mm/filemap_xip.c
··· 1 + /* 2 + * linux/mm/filemap_xip.c 3 + * 4 + * Copyright (C) 2005 IBM Corporation 5 + * Author: Carsten Otte <cotte@de.ibm.com> 6 + * 7 + * derived from linux/mm/filemap.c - Copyright (C) Linus Torvalds 8 + * 9 + */ 10 + 11 + #include <linux/fs.h> 12 + #include <linux/pagemap.h> 13 + #include <linux/module.h> 14 + #include <linux/uio.h> 15 + #include <linux/rmap.h> 16 + #include <asm/tlbflush.h> 17 + #include "filemap.h" 18 + 19 + /* 20 + * This is a file read routine for execute in place files, and uses 21 + * the mapping->a_ops->get_xip_page() function for the actual low-level 22 + * stuff. 23 + * 24 + * Note the struct file* is not used at all. It may be NULL. 25 + */ 26 + static void 27 + do_xip_mapping_read(struct address_space *mapping, 28 + struct file_ra_state *_ra, 29 + struct file *filp, 30 + loff_t *ppos, 31 + read_descriptor_t *desc, 32 + read_actor_t actor) 33 + { 34 + struct inode *inode = mapping->host; 35 + unsigned long index, end_index, offset; 36 + loff_t isize; 37 + 38 + BUG_ON(!mapping->a_ops->get_xip_page); 39 + 40 + index = *ppos >> PAGE_CACHE_SHIFT; 41 + offset = *ppos & ~PAGE_CACHE_MASK; 42 + 43 + isize = i_size_read(inode); 44 + if (!isize) 45 + goto out; 46 + 47 + end_index = (isize - 1) >> PAGE_CACHE_SHIFT; 48 + for (;;) { 49 + struct page *page; 50 + unsigned long nr, ret; 51 + 52 + /* nr is the maximum number of bytes to copy from this page */ 53 + nr = PAGE_CACHE_SIZE; 54 + if (index >= end_index) { 55 + if (index > end_index) 56 + goto out; 57 + nr = ((isize - 1) & ~PAGE_CACHE_MASK) + 1; 58 + if (nr <= offset) { 59 + goto out; 60 + } 61 + } 62 + nr = nr - offset; 63 + 64 + page = mapping->a_ops->get_xip_page(mapping, 65 + index*(PAGE_SIZE/512), 0); 66 + if (!page) 67 + goto no_xip_page; 68 + if (unlikely(IS_ERR(page))) { 69 + if (PTR_ERR(page) == -ENODATA) { 70 + /* sparse */ 71 + page = virt_to_page(empty_zero_page); 72 + } else { 73 + desc->error = PTR_ERR(page); 74 + goto out; 75 + } 76 + } else 77 + BUG_ON(!PageUptodate(page)); 78 + 79 + /* If users can be writing to this page using arbitrary 80 + * virtual addresses, take care about potential aliasing 81 + * before reading the page on the kernel side. 82 + */ 83 + if (mapping_writably_mapped(mapping)) 84 + flush_dcache_page(page); 85 + 86 + /* 87 + * Ok, we have the page, and it's up-to-date, so 88 + * now we can copy it to user space... 89 + * 90 + * The actor routine returns how many bytes were actually used.. 91 + * NOTE! This may not be the same as how much of a user buffer 92 + * we filled up (we may be padding etc), so we can only update 93 + * "pos" here (the actor routine has to update the user buffer 94 + * pointers and the remaining count). 95 + */ 96 + ret = actor(desc, page, offset, nr); 97 + offset += ret; 98 + index += offset >> PAGE_CACHE_SHIFT; 99 + offset &= ~PAGE_CACHE_MASK; 100 + 101 + if (ret == nr && desc->count) 102 + continue; 103 + goto out; 104 + 105 + no_xip_page: 106 + /* Did not get the page. Report it */ 107 + desc->error = -EIO; 108 + goto out; 109 + } 110 + 111 + out: 112 + *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset; 113 + if (filp) 114 + file_accessed(filp); 115 + } 116 + 117 + /* 118 + * This is the "read()" routine for all filesystems 119 + * that uses the get_xip_page address space operation. 120 + */ 121 + static ssize_t 122 + __xip_file_aio_read(struct kiocb *iocb, const struct iovec *iov, 123 + unsigned long nr_segs, loff_t *ppos) 124 + { 125 + struct file *filp = iocb->ki_filp; 126 + ssize_t retval; 127 + unsigned long seg; 128 + size_t count; 129 + 130 + count = 0; 131 + for (seg = 0; seg < nr_segs; seg++) { 132 + const struct iovec *iv = &iov[seg]; 133 + 134 + /* 135 + * If any segment has a negative length, or the cumulative 136 + * length ever wraps negative then return -EINVAL. 137 + */ 138 + count += iv->iov_len; 139 + if (unlikely((ssize_t)(count|iv->iov_len) < 0)) 140 + return -EINVAL; 141 + if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len)) 142 + continue; 143 + if (seg == 0) 144 + return -EFAULT; 145 + nr_segs = seg; 146 + count -= iv->iov_len; /* This segment is no good */ 147 + break; 148 + } 149 + 150 + retval = 0; 151 + if (count) { 152 + for (seg = 0; seg < nr_segs; seg++) { 153 + read_descriptor_t desc; 154 + 155 + desc.written = 0; 156 + desc.arg.buf = iov[seg].iov_base; 157 + desc.count = iov[seg].iov_len; 158 + if (desc.count == 0) 159 + continue; 160 + desc.error = 0; 161 + do_xip_mapping_read(filp->f_mapping, &filp->f_ra, filp, 162 + ppos, &desc, file_read_actor); 163 + retval += desc.written; 164 + if (!retval) { 165 + retval = desc.error; 166 + break; 167 + } 168 + } 169 + } 170 + return retval; 171 + } 172 + 173 + ssize_t 174 + xip_file_aio_read(struct kiocb *iocb, char __user *buf, size_t count, 175 + loff_t pos) 176 + { 177 + struct iovec local_iov = { .iov_base = buf, .iov_len = count }; 178 + 179 + BUG_ON(iocb->ki_pos != pos); 180 + return __xip_file_aio_read(iocb, &local_iov, 1, &iocb->ki_pos); 181 + } 182 + EXPORT_SYMBOL_GPL(xip_file_aio_read); 183 + 184 + ssize_t 185 + xip_file_readv(struct file *filp, const struct iovec *iov, 186 + unsigned long nr_segs, loff_t *ppos) 187 + { 188 + struct kiocb kiocb; 189 + 190 + init_sync_kiocb(&kiocb, filp); 191 + return __xip_file_aio_read(&kiocb, iov, nr_segs, ppos); 192 + } 193 + EXPORT_SYMBOL_GPL(xip_file_readv); 194 + 195 + ssize_t 196 + xip_file_sendfile(struct file *in_file, loff_t *ppos, 197 + size_t count, read_actor_t actor, void *target) 198 + { 199 + read_descriptor_t desc; 200 + 201 + if (!count) 202 + return 0; 203 + 204 + desc.written = 0; 205 + desc.count = count; 206 + desc.arg.data = target; 207 + desc.error = 0; 208 + 209 + do_xip_mapping_read(in_file->f_mapping, &in_file->f_ra, in_file, 210 + ppos, &desc, actor); 211 + if (desc.written) 212 + return desc.written; 213 + return desc.error; 214 + } 215 + EXPORT_SYMBOL_GPL(xip_file_sendfile); 216 + 217 + /* 218 + * __xip_unmap is invoked from xip_unmap and 219 + * xip_write 220 + * 221 + * This function walks all vmas of the address_space and unmaps the 222 + * empty_zero_page when found at pgoff. Should it go in rmap.c? 223 + */ 224 + static void 225 + __xip_unmap (struct address_space * mapping, 226 + unsigned long pgoff) 227 + { 228 + struct vm_area_struct *vma; 229 + struct mm_struct *mm; 230 + struct prio_tree_iter iter; 231 + unsigned long address; 232 + pte_t *pte; 233 + pte_t pteval; 234 + 235 + spin_lock(&mapping->i_mmap_lock); 236 + vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { 237 + mm = vma->vm_mm; 238 + address = vma->vm_start + 239 + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); 240 + BUG_ON(address < vma->vm_start || address >= vma->vm_end); 241 + /* 242 + * We need the page_table_lock to protect us from page faults, 243 + * munmap, fork, etc... 244 + */ 245 + pte = page_check_address(virt_to_page(empty_zero_page), mm, 246 + address); 247 + if (!IS_ERR(pte)) { 248 + /* Nuke the page table entry. */ 249 + flush_cache_page(vma, address, pte_pfn(pte)); 250 + pteval = ptep_clear_flush(vma, address, pte); 251 + BUG_ON(pte_dirty(pteval)); 252 + pte_unmap(pte); 253 + spin_unlock(&mm->page_table_lock); 254 + } 255 + } 256 + spin_unlock(&mapping->i_mmap_lock); 257 + } 258 + 259 + /* 260 + * xip_nopage() is invoked via the vma operations vector for a 261 + * mapped memory region to read in file data during a page fault. 262 + * 263 + * This function is derived from filemap_nopage, but used for execute in place 264 + */ 265 + static struct page * 266 + xip_file_nopage(struct vm_area_struct * area, 267 + unsigned long address, 268 + int *type) 269 + { 270 + struct file *file = area->vm_file; 271 + struct address_space *mapping = file->f_mapping; 272 + struct inode *inode = mapping->host; 273 + struct page *page; 274 + unsigned long size, pgoff, endoff; 275 + 276 + pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) 277 + + area->vm_pgoff; 278 + endoff = ((area->vm_end - area->vm_start) >> PAGE_CACHE_SHIFT) 279 + + area->vm_pgoff; 280 + 281 + size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 282 + if (pgoff >= size) { 283 + return NULL; 284 + } 285 + 286 + page = mapping->a_ops->get_xip_page(mapping, pgoff*(PAGE_SIZE/512), 0); 287 + if (!IS_ERR(page)) { 288 + BUG_ON(!PageUptodate(page)); 289 + return page; 290 + } 291 + if (PTR_ERR(page) != -ENODATA) 292 + return NULL; 293 + 294 + /* sparse block */ 295 + if ((area->vm_flags & (VM_WRITE | VM_MAYWRITE)) && 296 + (area->vm_flags & (VM_SHARED| VM_MAYSHARE)) && 297 + (!(mapping->host->i_sb->s_flags & MS_RDONLY))) { 298 + /* maybe shared writable, allocate new block */ 299 + page = mapping->a_ops->get_xip_page (mapping, 300 + pgoff*(PAGE_SIZE/512), 1); 301 + if (IS_ERR(page)) 302 + return NULL; 303 + BUG_ON(!PageUptodate(page)); 304 + /* unmap page at pgoff from all other vmas */ 305 + __xip_unmap(mapping, pgoff); 306 + } else { 307 + /* not shared and writable, use empty_zero_page */ 308 + page = virt_to_page(empty_zero_page); 309 + } 310 + 311 + return page; 312 + } 313 + 314 + static struct vm_operations_struct xip_file_vm_ops = { 315 + .nopage = xip_file_nopage, 316 + }; 317 + 318 + int xip_file_mmap(struct file * file, struct vm_area_struct * vma) 319 + { 320 + BUG_ON(!file->f_mapping->a_ops->get_xip_page); 321 + 322 + file_accessed(file); 323 + vma->vm_ops = &xip_file_vm_ops; 324 + return 0; 325 + } 326 + EXPORT_SYMBOL_GPL(xip_file_mmap); 327 + 328 + static ssize_t 329 + do_xip_file_write(struct kiocb *iocb, const struct iovec *iov, 330 + unsigned long nr_segs, loff_t pos, loff_t *ppos, 331 + size_t count) 332 + { 333 + struct file *file = iocb->ki_filp; 334 + struct address_space * mapping = file->f_mapping; 335 + struct address_space_operations *a_ops = mapping->a_ops; 336 + struct inode *inode = mapping->host; 337 + long status = 0; 338 + struct page *page; 339 + size_t bytes; 340 + const struct iovec *cur_iov = iov; /* current iovec */ 341 + size_t iov_base = 0; /* offset in the current iovec */ 342 + char __user *buf; 343 + ssize_t written = 0; 344 + 345 + BUG_ON(!mapping->a_ops->get_xip_page); 346 + 347 + buf = iov->iov_base; 348 + do { 349 + unsigned long index; 350 + unsigned long offset; 351 + size_t copied; 352 + 353 + offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ 354 + index = pos >> PAGE_CACHE_SHIFT; 355 + bytes = PAGE_CACHE_SIZE - offset; 356 + if (bytes > count) 357 + bytes = count; 358 + 359 + /* 360 + * Bring in the user page that we will copy from _first_. 361 + * Otherwise there's a nasty deadlock on copying from the 362 + * same page as we're writing to, without it being marked 363 + * up-to-date. 364 + */ 365 + fault_in_pages_readable(buf, bytes); 366 + 367 + page = a_ops->get_xip_page(mapping, 368 + index*(PAGE_SIZE/512), 0); 369 + if (IS_ERR(page) && (PTR_ERR(page) == -ENODATA)) { 370 + /* we allocate a new page unmap it */ 371 + page = a_ops->get_xip_page(mapping, 372 + index*(PAGE_SIZE/512), 1); 373 + if (!IS_ERR(page)) 374 + /* unmap page at pgoff from all other vmas */ 375 + __xip_unmap(mapping, index); 376 + 377 + } 378 + 379 + if (IS_ERR(page)) { 380 + status = PTR_ERR(page); 381 + break; 382 + } 383 + 384 + BUG_ON(!PageUptodate(page)); 385 + 386 + if (likely(nr_segs == 1)) 387 + copied = filemap_copy_from_user(page, offset, 388 + buf, bytes); 389 + else 390 + copied = filemap_copy_from_user_iovec(page, offset, 391 + cur_iov, iov_base, bytes); 392 + flush_dcache_page(page); 393 + if (likely(copied > 0)) { 394 + status = copied; 395 + 396 + if (status >= 0) { 397 + written += status; 398 + count -= status; 399 + pos += status; 400 + buf += status; 401 + if (unlikely(nr_segs > 1)) 402 + filemap_set_next_iovec(&cur_iov, 403 + &iov_base, status); 404 + } 405 + } 406 + if (unlikely(copied != bytes)) 407 + if (status >= 0) 408 + status = -EFAULT; 409 + if (status < 0) 410 + break; 411 + } while (count); 412 + *ppos = pos; 413 + /* 414 + * No need to use i_size_read() here, the i_size 415 + * cannot change under us because we hold i_sem. 416 + */ 417 + if (pos > inode->i_size) { 418 + i_size_write(inode, pos); 419 + mark_inode_dirty(inode); 420 + } 421 + 422 + return written ? written : status; 423 + } 424 + 425 + static ssize_t 426 + xip_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov, 427 + unsigned long nr_segs, loff_t *ppos) 428 + { 429 + struct file *file = iocb->ki_filp; 430 + struct address_space * mapping = file->f_mapping; 431 + size_t ocount; /* original count */ 432 + size_t count; /* after file limit checks */ 433 + struct inode *inode = mapping->host; 434 + unsigned long seg; 435 + loff_t pos; 436 + ssize_t written; 437 + ssize_t err; 438 + 439 + ocount = 0; 440 + for (seg = 0; seg < nr_segs; seg++) { 441 + const struct iovec *iv = &iov[seg]; 442 + 443 + /* 444 + * If any segment has a negative length, or the cumulative 445 + * length ever wraps negative then return -EINVAL. 446 + */ 447 + ocount += iv->iov_len; 448 + if (unlikely((ssize_t)(ocount|iv->iov_len) < 0)) 449 + return -EINVAL; 450 + if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len)) 451 + continue; 452 + if (seg == 0) 453 + return -EFAULT; 454 + nr_segs = seg; 455 + ocount -= iv->iov_len; /* This segment is no good */ 456 + break; 457 + } 458 + 459 + count = ocount; 460 + pos = *ppos; 461 + 462 + vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); 463 + 464 + written = 0; 465 + 466 + err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); 467 + if (err) 468 + goto out; 469 + 470 + if (count == 0) 471 + goto out; 472 + 473 + err = remove_suid(file->f_dentry); 474 + if (err) 475 + goto out; 476 + 477 + inode_update_time(inode, 1); 478 + 479 + /* use execute in place to copy directly to disk */ 480 + written = do_xip_file_write (iocb, iov, 481 + nr_segs, pos, ppos, count); 482 + out: 483 + return written ? written : err; 484 + } 485 + 486 + static ssize_t 487 + __xip_file_write_nolock(struct file *file, const struct iovec *iov, 488 + unsigned long nr_segs, loff_t *ppos) 489 + { 490 + struct kiocb kiocb; 491 + 492 + init_sync_kiocb(&kiocb, file); 493 + return xip_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos); 494 + } 495 + 496 + ssize_t 497 + xip_file_aio_write(struct kiocb *iocb, const char __user *buf, 498 + size_t count, loff_t pos) 499 + { 500 + struct file *file = iocb->ki_filp; 501 + struct address_space *mapping = file->f_mapping; 502 + struct inode *inode = mapping->host; 503 + ssize_t ret; 504 + struct iovec local_iov = { .iov_base = (void __user *)buf, 505 + .iov_len = count }; 506 + 507 + BUG_ON(iocb->ki_pos != pos); 508 + 509 + down(&inode->i_sem); 510 + ret = xip_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos); 511 + up(&inode->i_sem); 512 + return ret; 513 + } 514 + EXPORT_SYMBOL_GPL(xip_file_aio_write); 515 + 516 + ssize_t xip_file_writev(struct file *file, const struct iovec *iov, 517 + unsigned long nr_segs, loff_t *ppos) 518 + { 519 + struct address_space *mapping = file->f_mapping; 520 + struct inode *inode = mapping->host; 521 + ssize_t ret; 522 + 523 + down(&inode->i_sem); 524 + ret = __xip_file_write_nolock(file, iov, nr_segs, ppos); 525 + up(&inode->i_sem); 526 + return ret; 527 + } 528 + EXPORT_SYMBOL_GPL(xip_file_writev); 529 + 530 + /* 531 + * truncate a page used for execute in place 532 + * functionality is analog to block_truncate_page but does use get_xip_page 533 + * to get the page instead of page cache 534 + */ 535 + int 536 + xip_truncate_page(struct address_space *mapping, loff_t from) 537 + { 538 + pgoff_t index = from >> PAGE_CACHE_SHIFT; 539 + unsigned offset = from & (PAGE_CACHE_SIZE-1); 540 + unsigned blocksize; 541 + unsigned length; 542 + struct page *page; 543 + void *kaddr; 544 + int err; 545 + 546 + BUG_ON(!mapping->a_ops->get_xip_page); 547 + 548 + blocksize = 1 << mapping->host->i_blkbits; 549 + length = offset & (blocksize - 1); 550 + 551 + /* Block boundary? Nothing to do */ 552 + if (!length) 553 + return 0; 554 + 555 + length = blocksize - length; 556 + 557 + page = mapping->a_ops->get_xip_page(mapping, 558 + index*(PAGE_SIZE/512), 0); 559 + err = -ENOMEM; 560 + if (!page) 561 + goto out; 562 + if (unlikely(IS_ERR(page))) { 563 + if (PTR_ERR(page) == -ENODATA) { 564 + /* Hole? No need to truncate */ 565 + return 0; 566 + } else { 567 + err = PTR_ERR(page); 568 + goto out; 569 + } 570 + } else 571 + BUG_ON(!PageUptodate(page)); 572 + kaddr = kmap_atomic(page, KM_USER0); 573 + memset(kaddr + offset, 0, length); 574 + kunmap_atomic(kaddr, KM_USER0); 575 + 576 + flush_dcache_page(page); 577 + err = 0; 578 + out: 579 + return err; 580 + } 581 + EXPORT_SYMBOL_GPL(xip_truncate_page);
+2 -2
mm/rmap.c
··· 247 247 * 248 248 * On success returns with mapped pte and locked mm->page_table_lock. 249 249 */ 250 - static pte_t *page_check_address(struct page *page, struct mm_struct *mm, 251 - unsigned long address) 250 + pte_t *page_check_address(struct page *page, struct mm_struct *mm, 251 + unsigned long address) 252 252 { 253 253 pgd_t *pgd; 254 254 pud_t *pud;