Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

xip: support non-struct page backed memory

Convert XIP to support non-struct page backed memory, using VM_MIXEDMAP for
the user mappings.

This requires the get_xip_page API to be changed to an address based one.
Improve the API layering a little bit too, while we're here.

This is required in order to support XIP filesystems on memory that isn't
backed with struct page (but memory with struct page is still supported too).

Signed-off-by: Nick Piggin <npiggin@suse.de>
Acked-by: Carsten Otte <cotte@de.ibm.com>
Cc: Jared Hulbert <jaredeh@gmail.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Nick Piggin and committed by
Linus Torvalds
70688e4d 30afcb4b

+126 -132
+1 -1
fs/ext2/inode.c
··· 796 796 797 797 const struct address_space_operations ext2_aops_xip = { 798 798 .bmap = ext2_bmap, 799 - .get_xip_page = ext2_get_xip_page, 799 + .get_xip_mem = ext2_get_xip_mem, 800 800 }; 801 801 802 802 const struct address_space_operations ext2_nobh_aops = {
+15 -22
fs/ext2/xip.c
··· 15 15 #include "xip.h" 16 16 17 17 static inline int 18 - __inode_direct_access(struct inode *inode, sector_t sector, 18 + __inode_direct_access(struct inode *inode, sector_t block, 19 19 void **kaddr, unsigned long *pfn) 20 20 { 21 21 struct block_device *bdev = inode->i_sb->s_bdev; 22 22 struct block_device_operations *ops = bdev->bd_disk->fops; 23 + sector_t sector; 24 + 25 + sector = block * (PAGE_SIZE / 512); /* ext2 block to bdev sector */ 23 26 24 27 BUG_ON(!ops->direct_access); 25 28 return ops->direct_access(bdev, sector, kaddr, pfn); 26 29 } 27 30 28 31 static inline int 29 - __ext2_get_sector(struct inode *inode, sector_t offset, int create, 32 + __ext2_get_block(struct inode *inode, pgoff_t pgoff, int create, 30 33 sector_t *result) 31 34 { 32 35 struct buffer_head tmp; 33 36 int rc; 34 37 35 38 memset(&tmp, 0, sizeof(struct buffer_head)); 36 - rc = ext2_get_block(inode, offset/ (PAGE_SIZE/512), &tmp, 37 - create); 39 + rc = ext2_get_block(inode, pgoff, &tmp, create); 38 40 *result = tmp.b_blocknr; 39 41 40 42 /* did we get a sparse block (hole in the file)? */ ··· 49 47 } 50 48 51 49 int 52 - ext2_clear_xip_target(struct inode *inode, int block) 50 + ext2_clear_xip_target(struct inode *inode, sector_t block) 53 51 { 54 - sector_t sector = block * (PAGE_SIZE/512); 55 52 void *kaddr; 56 53 unsigned long pfn; 57 54 int rc; 58 55 59 - rc = __inode_direct_access(inode, sector, &kaddr, &pfn); 56 + rc = __inode_direct_access(inode, block, &kaddr, &pfn); 60 57 if (!rc) 61 58 clear_page(kaddr); 62 59 return rc; ··· 73 72 } 74 73 } 75 74 76 - struct page * 77 - ext2_get_xip_page(struct address_space *mapping, sector_t offset, 78 - int create) 75 + int ext2_get_xip_mem(struct address_space *mapping, pgoff_t pgoff, int create, 76 + void **kmem, unsigned long *pfn) 79 77 { 80 78 int rc; 81 - void *kaddr; 82 - unsigned long pfn; 83 - sector_t sector; 79 + sector_t block; 84 80 85 81 /* first, retrieve the sector number */ 86 - rc = __ext2_get_sector(mapping->host, offset, create, &sector); 82 + rc = __ext2_get_block(mapping->host, pgoff, create, &block); 87 83 if (rc) 88 - goto error; 84 + return rc; 89 85 90 86 /* retrieve address of the target data */ 91 - rc = __inode_direct_access 92 - (mapping->host, sector * (PAGE_SIZE/512), &kaddr, &pfn); 93 - if (!rc) 94 - return pfn_to_page(pfn); 95 - 96 - error: 97 - return ERR_PTR(rc); 87 + rc = __inode_direct_access(mapping->host, block, kmem, pfn); 88 + return rc; 98 89 }
+5 -4
fs/ext2/xip.h
··· 7 7 8 8 #ifdef CONFIG_EXT2_FS_XIP 9 9 extern void ext2_xip_verify_sb (struct super_block *); 10 - extern int ext2_clear_xip_target (struct inode *, int); 10 + extern int ext2_clear_xip_target (struct inode *, sector_t); 11 11 12 12 static inline int ext2_use_xip (struct super_block *sb) 13 13 { 14 14 struct ext2_sb_info *sbi = EXT2_SB(sb); 15 15 return (sbi->s_mount_opt & EXT2_MOUNT_XIP); 16 16 } 17 - struct page* ext2_get_xip_page (struct address_space *, sector_t, int); 18 - #define mapping_is_xip(map) unlikely(map->a_ops->get_xip_page) 17 + int ext2_get_xip_mem(struct address_space *, pgoff_t, int, 18 + void **, unsigned long *); 19 + #define mapping_is_xip(map) unlikely(map->a_ops->get_xip_mem) 19 20 #else 20 21 #define mapping_is_xip(map) 0 21 22 #define ext2_xip_verify_sb(sb) do { } while (0) 22 23 #define ext2_use_xip(sb) 0 23 24 #define ext2_clear_xip_target(inode, chain) 0 24 - #define ext2_get_xip_page NULL 25 + #define ext2_get_xip_mem NULL 25 26 #endif
+1 -1
fs/open.c
··· 837 837 if (f->f_flags & O_DIRECT) { 838 838 if (!f->f_mapping->a_ops || 839 839 ((!f->f_mapping->a_ops->direct_IO) && 840 - (!f->f_mapping->a_ops->get_xip_page))) { 840 + (!f->f_mapping->a_ops->get_xip_mem))) { 841 841 fput(f); 842 842 f = ERR_PTR(-EINVAL); 843 843 }
+2 -2
include/linux/fs.h
··· 474 474 int (*releasepage) (struct page *, gfp_t); 475 475 ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov, 476 476 loff_t offset, unsigned long nr_segs); 477 - struct page* (*get_xip_page)(struct address_space *, sector_t, 478 - int); 477 + int (*get_xip_mem)(struct address_space *, pgoff_t, int, 478 + void **, unsigned long *); 479 479 /* migrate the contents of a page to the specified target */ 480 480 int (*migratepage) (struct address_space *, 481 481 struct page *, struct page *);
+1 -1
mm/fadvise.c
··· 49 49 goto out; 50 50 } 51 51 52 - if (mapping->a_ops->get_xip_page) { 52 + if (mapping->a_ops->get_xip_mem) { 53 53 switch (advice) { 54 54 case POSIX_FADV_NORMAL: 55 55 case POSIX_FADV_RANDOM:
+100 -100
mm/filemap_xip.c
··· 15 15 #include <linux/rmap.h> 16 16 #include <linux/sched.h> 17 17 #include <asm/tlbflush.h> 18 + #include <asm/io.h> 18 19 19 20 /* 20 21 * We do use our own empty page to avoid interference with other users ··· 43 42 44 43 /* 45 44 * This is a file read routine for execute in place files, and uses 46 - * the mapping->a_ops->get_xip_page() function for the actual low-level 45 + * the mapping->a_ops->get_xip_mem() function for the actual low-level 47 46 * stuff. 48 47 * 49 48 * Note the struct file* is not used at all. It may be NULL. 50 49 */ 51 - static void 50 + static ssize_t 52 51 do_xip_mapping_read(struct address_space *mapping, 53 52 struct file_ra_state *_ra, 54 53 struct file *filp, 55 - loff_t *ppos, 56 - read_descriptor_t *desc, 57 - read_actor_t actor) 54 + char __user *buf, 55 + size_t len, 56 + loff_t *ppos) 58 57 { 59 58 struct inode *inode = mapping->host; 60 59 pgoff_t index, end_index; 61 60 unsigned long offset; 62 - loff_t isize; 61 + loff_t isize, pos; 62 + size_t copied = 0, error = 0; 63 63 64 - BUG_ON(!mapping->a_ops->get_xip_page); 64 + BUG_ON(!mapping->a_ops->get_xip_mem); 65 65 66 - index = *ppos >> PAGE_CACHE_SHIFT; 67 - offset = *ppos & ~PAGE_CACHE_MASK; 66 + pos = *ppos; 67 + index = pos >> PAGE_CACHE_SHIFT; 68 + offset = pos & ~PAGE_CACHE_MASK; 68 69 69 70 isize = i_size_read(inode); 70 71 if (!isize) 71 72 goto out; 72 73 73 74 end_index = (isize - 1) >> PAGE_CACHE_SHIFT; 74 - for (;;) { 75 - struct page *page; 76 - unsigned long nr, ret; 75 + do { 76 + unsigned long nr, left; 77 + void *xip_mem; 78 + unsigned long xip_pfn; 79 + int zero = 0; 77 80 78 81 /* nr is the maximum number of bytes to copy from this page */ 79 82 nr = PAGE_CACHE_SIZE; ··· 90 85 } 91 86 } 92 87 nr = nr - offset; 88 + if (nr > len) 89 + nr = len; 93 90 94 - page = mapping->a_ops->get_xip_page(mapping, 95 - index*(PAGE_SIZE/512), 0); 96 - if (!page) 97 - goto no_xip_page; 98 - if (unlikely(IS_ERR(page))) { 99 - if (PTR_ERR(page) == -ENODATA) { 91 + error = mapping->a_ops->get_xip_mem(mapping, index, 0, 92 + &xip_mem, &xip_pfn); 93 + if (unlikely(error)) { 94 + if (error == -ENODATA) { 100 95 /* sparse */ 101 - page = ZERO_PAGE(0); 102 - } else { 103 - desc->error = PTR_ERR(page); 96 + zero = 1; 97 + } else 104 98 goto out; 105 - } 106 99 } 107 100 108 101 /* If users can be writing to this page using arbitrary ··· 108 105 * before reading the page on the kernel side. 109 106 */ 110 107 if (mapping_writably_mapped(mapping)) 111 - flush_dcache_page(page); 108 + /* address based flush */ ; 112 109 113 110 /* 114 - * Ok, we have the page, so now we can copy it to user space... 111 + * Ok, we have the mem, so now we can copy it to user space... 115 112 * 116 113 * The actor routine returns how many bytes were actually used.. 117 114 * NOTE! This may not be the same as how much of a user buffer ··· 119 116 * "pos" here (the actor routine has to update the user buffer 120 117 * pointers and the remaining count). 121 118 */ 122 - ret = actor(desc, page, offset, nr); 123 - offset += ret; 119 + if (!zero) 120 + left = __copy_to_user(buf+copied, xip_mem+offset, nr); 121 + else 122 + left = __clear_user(buf + copied, nr); 123 + 124 + if (left) { 125 + error = -EFAULT; 126 + goto out; 127 + } 128 + 129 + copied += (nr - left); 130 + offset += (nr - left); 124 131 index += offset >> PAGE_CACHE_SHIFT; 125 132 offset &= ~PAGE_CACHE_MASK; 126 - 127 - if (ret == nr && desc->count) 128 - continue; 129 - goto out; 130 - 131 - no_xip_page: 132 - /* Did not get the page. Report it */ 133 - desc->error = -EIO; 134 - goto out; 135 - } 133 + } while (copied < len); 136 134 137 135 out: 138 - *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset; 136 + *ppos = pos + copied; 139 137 if (filp) 140 138 file_accessed(filp); 139 + 140 + return (copied ? copied : error); 141 141 } 142 142 143 143 ssize_t 144 144 xip_file_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) 145 145 { 146 - read_descriptor_t desc; 147 - 148 146 if (!access_ok(VERIFY_WRITE, buf, len)) 149 147 return -EFAULT; 150 148 151 - desc.written = 0; 152 - desc.arg.buf = buf; 153 - desc.count = len; 154 - desc.error = 0; 155 - 156 - do_xip_mapping_read(filp->f_mapping, &filp->f_ra, filp, 157 - ppos, &desc, file_read_actor); 158 - 159 - if (desc.written) 160 - return desc.written; 161 - else 162 - return desc.error; 149 + return do_xip_mapping_read(filp->f_mapping, &filp->f_ra, filp, 150 + buf, len, ppos); 163 151 } 164 152 EXPORT_SYMBOL_GPL(xip_file_read); 165 153 ··· 205 211 * 206 212 * This function is derived from filemap_fault, but used for execute in place 207 213 */ 208 - static int xip_file_fault(struct vm_area_struct *area, struct vm_fault *vmf) 214 + static int xip_file_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 209 215 { 210 - struct file *file = area->vm_file; 216 + struct file *file = vma->vm_file; 211 217 struct address_space *mapping = file->f_mapping; 212 218 struct inode *inode = mapping->host; 213 - struct page *page; 214 219 pgoff_t size; 220 + void *xip_mem; 221 + unsigned long xip_pfn; 222 + struct page *page; 223 + int error; 215 224 216 225 /* XXX: are VM_FAULT_ codes OK? */ 217 226 ··· 222 225 if (vmf->pgoff >= size) 223 226 return VM_FAULT_SIGBUS; 224 227 225 - page = mapping->a_ops->get_xip_page(mapping, 226 - vmf->pgoff*(PAGE_SIZE/512), 0); 227 - if (!IS_ERR(page)) 228 - goto out; 229 - if (PTR_ERR(page) != -ENODATA) 228 + error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 0, 229 + &xip_mem, &xip_pfn); 230 + if (likely(!error)) 231 + goto found; 232 + if (error != -ENODATA) 230 233 return VM_FAULT_OOM; 231 234 232 235 /* sparse block */ 233 - if ((area->vm_flags & (VM_WRITE | VM_MAYWRITE)) && 234 - (area->vm_flags & (VM_SHARED| VM_MAYSHARE)) && 236 + if ((vma->vm_flags & (VM_WRITE | VM_MAYWRITE)) && 237 + (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) && 235 238 (!(mapping->host->i_sb->s_flags & MS_RDONLY))) { 239 + int err; 240 + 236 241 /* maybe shared writable, allocate new block */ 237 - page = mapping->a_ops->get_xip_page(mapping, 238 - vmf->pgoff*(PAGE_SIZE/512), 1); 239 - if (IS_ERR(page)) 242 + error = mapping->a_ops->get_xip_mem(mapping, vmf->pgoff, 1, 243 + &xip_mem, &xip_pfn); 244 + if (error) 240 245 return VM_FAULT_SIGBUS; 241 - /* unmap page at pgoff from all other vmas */ 246 + /* unmap sparse mappings at pgoff from all other vmas */ 242 247 __xip_unmap(mapping, vmf->pgoff); 248 + 249 + found: 250 + err = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address, 251 + xip_pfn); 252 + if (err == -ENOMEM) 253 + return VM_FAULT_OOM; 254 + BUG_ON(err); 255 + return VM_FAULT_NOPAGE; 243 256 } else { 244 257 /* not shared and writable, use xip_sparse_page() */ 245 258 page = xip_sparse_page(); 246 259 if (!page) 247 260 return VM_FAULT_OOM; 248 - } 249 261 250 - out: 251 - page_cache_get(page); 252 - vmf->page = page; 253 - return 0; 262 + page_cache_get(page); 263 + vmf->page = page; 264 + return 0; 265 + } 254 266 } 255 267 256 268 static struct vm_operations_struct xip_file_vm_ops = { ··· 268 262 269 263 int xip_file_mmap(struct file * file, struct vm_area_struct * vma) 270 264 { 271 - BUG_ON(!file->f_mapping->a_ops->get_xip_page); 265 + BUG_ON(!file->f_mapping->a_ops->get_xip_mem); 272 266 273 267 file_accessed(file); 274 268 vma->vm_ops = &xip_file_vm_ops; 275 - vma->vm_flags |= VM_CAN_NONLINEAR; 269 + vma->vm_flags |= VM_CAN_NONLINEAR | VM_MIXEDMAP; 276 270 return 0; 277 271 } 278 272 EXPORT_SYMBOL_GPL(xip_file_mmap); ··· 285 279 const struct address_space_operations *a_ops = mapping->a_ops; 286 280 struct inode *inode = mapping->host; 287 281 long status = 0; 288 - struct page *page; 289 282 size_t bytes; 290 283 ssize_t written = 0; 291 284 292 - BUG_ON(!mapping->a_ops->get_xip_page); 285 + BUG_ON(!mapping->a_ops->get_xip_mem); 293 286 294 287 do { 295 288 unsigned long index; 296 289 unsigned long offset; 297 290 size_t copied; 298 - char *kaddr; 291 + void *xip_mem; 292 + unsigned long xip_pfn; 299 293 300 294 offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ 301 295 index = pos >> PAGE_CACHE_SHIFT; ··· 303 297 if (bytes > count) 304 298 bytes = count; 305 299 306 - page = a_ops->get_xip_page(mapping, 307 - index*(PAGE_SIZE/512), 0); 308 - if (IS_ERR(page) && (PTR_ERR(page) == -ENODATA)) { 300 + status = a_ops->get_xip_mem(mapping, index, 0, 301 + &xip_mem, &xip_pfn); 302 + if (status == -ENODATA) { 309 303 /* we allocate a new page unmap it */ 310 - page = a_ops->get_xip_page(mapping, 311 - index*(PAGE_SIZE/512), 1); 312 - if (!IS_ERR(page)) 304 + status = a_ops->get_xip_mem(mapping, index, 1, 305 + &xip_mem, &xip_pfn); 306 + if (!status) 313 307 /* unmap page at pgoff from all other vmas */ 314 308 __xip_unmap(mapping, index); 315 309 } 316 310 317 - if (IS_ERR(page)) { 318 - status = PTR_ERR(page); 311 + if (status) 319 312 break; 320 - } 321 313 322 - fault_in_pages_readable(buf, bytes); 323 - kaddr = kmap_atomic(page, KM_USER0); 324 314 copied = bytes - 325 - __copy_from_user_inatomic_nocache(kaddr + offset, buf, bytes); 326 - kunmap_atomic(kaddr, KM_USER0); 327 - flush_dcache_page(page); 315 + __copy_from_user_nocache(xip_mem + offset, buf, bytes); 328 316 329 317 if (likely(copied > 0)) { 330 318 status = copied; ··· 398 398 399 399 /* 400 400 * truncate a page used for execute in place 401 - * functionality is analog to block_truncate_page but does use get_xip_page 401 + * functionality is analog to block_truncate_page but does use get_xip_mem 402 402 * to get the page instead of page cache 403 403 */ 404 404 int ··· 408 408 unsigned offset = from & (PAGE_CACHE_SIZE-1); 409 409 unsigned blocksize; 410 410 unsigned length; 411 - struct page *page; 411 + void *xip_mem; 412 + unsigned long xip_pfn; 413 + int err; 412 414 413 - BUG_ON(!mapping->a_ops->get_xip_page); 415 + BUG_ON(!mapping->a_ops->get_xip_mem); 414 416 415 417 blocksize = 1 << mapping->host->i_blkbits; 416 418 length = offset & (blocksize - 1); ··· 423 421 424 422 length = blocksize - length; 425 423 426 - page = mapping->a_ops->get_xip_page(mapping, 427 - index*(PAGE_SIZE/512), 0); 428 - if (!page) 429 - return -ENOMEM; 430 - if (unlikely(IS_ERR(page))) { 431 - if (PTR_ERR(page) == -ENODATA) 424 + err = mapping->a_ops->get_xip_mem(mapping, index, 0, 425 + &xip_mem, &xip_pfn); 426 + if (unlikely(err)) { 427 + if (err == -ENODATA) 432 428 /* Hole? No need to truncate */ 433 429 return 0; 434 430 else 435 - return PTR_ERR(page); 431 + return err; 436 432 } 437 - zero_user(page, offset, length); 433 + memset(xip_mem + offset, 0, length); 438 434 return 0; 439 435 } 440 436 EXPORT_SYMBOL_GPL(xip_truncate_page);
+1 -1
mm/madvise.c
··· 112 112 if (!file) 113 113 return -EBADF; 114 114 115 - if (file->f_mapping->a_ops->get_xip_page) { 115 + if (file->f_mapping->a_ops->get_xip_mem) { 116 116 /* no bad return value, but ignore advice */ 117 117 return 0; 118 118 }