Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

cramfs: add mmap support

When cramfs in physical memory is used then we have the opportunity
to map files directly from ROM, directly into user space, saving on
RAM usage. This gives us Execute-In-Place (XIP) support.

For a file to be mmap()-able, the map area has to correspond to a range
of uncompressed and contiguous blocks, and in the MMU case it also has
to be page aligned. A version of mkcramfs with appropriate support is
necessary to create such a filesystem image.

In the MMU case it may happen for a vma structure to extend beyond the
actual file size. This is notably the case in binfmt_elf.c:elf_map().
Or the file's last block is shared with other files and cannot be mapped
as is. Rather than refusing to mmap it, we do a "mixed" map and let the
regular fault handler populate the unmapped area with RAM-backed pages.
In practice the unmapped area is seldom accessed so page faults might
never occur before this area is discarded.

In the non-MMU case it is the get_unmapped_area method that is responsible
for providing the address where the actual data can be found. No mapping
is necessary of course.

Signed-off-by: Nicolas Pitre <nico@linaro.org>
Tested-by: Chris Brandt <chris.brandt@renesas.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

authored by

Nicolas Pitre and committed by
Al Viro
eddcd976 fd4f6f2a

+209
+209
fs/cramfs/inode.c
··· 15 15 16 16 #include <linux/module.h> 17 17 #include <linux/fs.h> 18 + #include <linux/file.h> 18 19 #include <linux/pagemap.h> 20 + #include <linux/pfn_t.h> 21 + #include <linux/ramfs.h> 19 22 #include <linux/init.h> 20 23 #include <linux/string.h> 21 24 #include <linux/blkdev.h> ··· 54 51 static const struct super_operations cramfs_ops; 55 52 static const struct inode_operations cramfs_dir_inode_operations; 56 53 static const struct file_operations cramfs_directory_operations; 54 + static const struct file_operations cramfs_physmem_fops; 57 55 static const struct address_space_operations cramfs_aops; 58 56 59 57 static DEFINE_MUTEX(read_mutex); ··· 102 98 case S_IFREG: 103 99 inode->i_fop = &generic_ro_fops; 104 100 inode->i_data.a_ops = &cramfs_aops; 101 + if (IS_ENABLED(CONFIG_CRAMFS_MTD) && 102 + CRAMFS_SB(sb)->flags & CRAMFS_FLAG_EXT_BLOCK_POINTERS && 103 + CRAMFS_SB(sb)->linear_phys_addr) 104 + inode->i_fop = &cramfs_physmem_fops; 105 105 break; 106 106 case S_IFDIR: 107 107 inode->i_op = &cramfs_dir_inode_operations; ··· 286 278 else 287 279 return NULL; 288 280 } 281 + 282 + /* 283 + * For a mapping to be possible, we need a range of uncompressed and 284 + * contiguous blocks. Return the offset for the first block and number of 285 + * valid blocks for which that is true, or zero otherwise. 286 + */ 287 + static u32 cramfs_get_block_range(struct inode *inode, u32 pgoff, u32 *pages) 288 + { 289 + struct cramfs_sb_info *sbi = CRAMFS_SB(inode->i_sb); 290 + int i; 291 + u32 *blockptrs, first_block_addr; 292 + 293 + /* 294 + * We can dereference memory directly here as this code may be 295 + * reached only when there is a direct filesystem image mapping 296 + * available in memory. 297 + */ 298 + blockptrs = (u32 *)(sbi->linear_virt_addr + OFFSET(inode) + pgoff * 4); 299 + first_block_addr = blockptrs[0] & ~CRAMFS_BLK_FLAGS; 300 + i = 0; 301 + do { 302 + u32 block_off = i * (PAGE_SIZE >> CRAMFS_BLK_DIRECT_PTR_SHIFT); 303 + u32 expect = (first_block_addr + block_off) | 304 + CRAMFS_BLK_FLAG_DIRECT_PTR | 305 + CRAMFS_BLK_FLAG_UNCOMPRESSED; 306 + if (blockptrs[i] != expect) { 307 + pr_debug("range: block %d/%d got %#x expects %#x\n", 308 + pgoff+i, pgoff + *pages - 1, 309 + blockptrs[i], expect); 310 + if (i == 0) 311 + return 0; 312 + break; 313 + } 314 + } while (++i < *pages); 315 + 316 + *pages = i; 317 + return first_block_addr << CRAMFS_BLK_DIRECT_PTR_SHIFT; 318 + } 319 + 320 + #ifdef CONFIG_MMU 321 + 322 + /* 323 + * Return true if the last page of a file in the filesystem image contains 324 + * some other data that doesn't belong to that file. It is assumed that the 325 + * last block is CRAMFS_BLK_FLAG_DIRECT_PTR | CRAMFS_BLK_FLAG_UNCOMPRESSED 326 + * (verified by cramfs_get_block_range() and directly accessible in memory. 327 + */ 328 + static bool cramfs_last_page_is_shared(struct inode *inode) 329 + { 330 + struct cramfs_sb_info *sbi = CRAMFS_SB(inode->i_sb); 331 + u32 partial, last_page, blockaddr, *blockptrs; 332 + char *tail_data; 333 + 334 + partial = offset_in_page(inode->i_size); 335 + if (!partial) 336 + return false; 337 + last_page = inode->i_size >> PAGE_SHIFT; 338 + blockptrs = (u32 *)(sbi->linear_virt_addr + OFFSET(inode)); 339 + blockaddr = blockptrs[last_page] & ~CRAMFS_BLK_FLAGS; 340 + blockaddr <<= CRAMFS_BLK_DIRECT_PTR_SHIFT; 341 + tail_data = sbi->linear_virt_addr + blockaddr + partial; 342 + return memchr_inv(tail_data, 0, PAGE_SIZE - partial) ? true : false; 343 + } 344 + 345 + static int cramfs_physmem_mmap(struct file *file, struct vm_area_struct *vma) 346 + { 347 + struct inode *inode = file_inode(file); 348 + struct cramfs_sb_info *sbi = CRAMFS_SB(inode->i_sb); 349 + unsigned int pages, max_pages, offset; 350 + unsigned long address, pgoff = vma->vm_pgoff; 351 + char *bailout_reason; 352 + int ret; 353 + 354 + ret = generic_file_readonly_mmap(file, vma); 355 + if (ret) 356 + return ret; 357 + 358 + /* 359 + * Now try to pre-populate ptes for this vma with a direct 360 + * mapping avoiding memory allocation when possible. 361 + */ 362 + 363 + /* Could COW work here? */ 364 + bailout_reason = "vma is writable"; 365 + if (vma->vm_flags & VM_WRITE) 366 + goto bailout; 367 + 368 + max_pages = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT; 369 + bailout_reason = "beyond file limit"; 370 + if (pgoff >= max_pages) 371 + goto bailout; 372 + pages = min(vma_pages(vma), max_pages - pgoff); 373 + 374 + offset = cramfs_get_block_range(inode, pgoff, &pages); 375 + bailout_reason = "unsuitable block layout"; 376 + if (!offset) 377 + goto bailout; 378 + address = sbi->linear_phys_addr + offset; 379 + bailout_reason = "data is not page aligned"; 380 + if (!PAGE_ALIGNED(address)) 381 + goto bailout; 382 + 383 + /* Don't map the last page if it contains some other data */ 384 + if (pgoff + pages == max_pages && cramfs_last_page_is_shared(inode)) { 385 + pr_debug("mmap: %s: last page is shared\n", 386 + file_dentry(file)->d_name.name); 387 + pages--; 388 + } 389 + 390 + if (!pages) { 391 + bailout_reason = "no suitable block remaining"; 392 + goto bailout; 393 + } 394 + 395 + if (pages == vma_pages(vma)) { 396 + /* 397 + * The entire vma is mappable. remap_pfn_range() will 398 + * make it distinguishable from a non-direct mapping 399 + * in /proc/<pid>/maps by substituting the file offset 400 + * with the actual physical address. 401 + */ 402 + ret = remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT, 403 + pages * PAGE_SIZE, vma->vm_page_prot); 404 + } else { 405 + /* 406 + * Let's create a mixed map if we can't map it all. 407 + * The normal paging machinery will take care of the 408 + * unpopulated ptes via cramfs_readpage(). 409 + */ 410 + int i; 411 + vma->vm_flags |= VM_MIXEDMAP; 412 + for (i = 0; i < pages && !ret; i++) { 413 + unsigned long off = i * PAGE_SIZE; 414 + pfn_t pfn = phys_to_pfn_t(address + off, PFN_DEV); 415 + ret = vm_insert_mixed(vma, vma->vm_start + off, pfn); 416 + } 417 + } 418 + 419 + if (!ret) 420 + pr_debug("mapped %s[%lu] at 0x%08lx (%u/%lu pages) " 421 + "to vma 0x%08lx, page_prot 0x%llx\n", 422 + file_dentry(file)->d_name.name, pgoff, 423 + address, pages, vma_pages(vma), vma->vm_start, 424 + (unsigned long long)pgprot_val(vma->vm_page_prot)); 425 + return ret; 426 + 427 + bailout: 428 + pr_debug("%s[%lu]: direct mmap impossible: %s\n", 429 + file_dentry(file)->d_name.name, pgoff, bailout_reason); 430 + /* Didn't manage any direct map, but normal paging is still possible */ 431 + return 0; 432 + } 433 + 434 + #else /* CONFIG_MMU */ 435 + 436 + static int cramfs_physmem_mmap(struct file *file, struct vm_area_struct *vma) 437 + { 438 + return vma->vm_flags & (VM_SHARED | VM_MAYSHARE) ? 0 : -ENOSYS; 439 + } 440 + 441 + static unsigned long cramfs_physmem_get_unmapped_area(struct file *file, 442 + unsigned long addr, unsigned long len, 443 + unsigned long pgoff, unsigned long flags) 444 + { 445 + struct inode *inode = file_inode(file); 446 + struct super_block *sb = inode->i_sb; 447 + struct cramfs_sb_info *sbi = CRAMFS_SB(sb); 448 + unsigned int pages, block_pages, max_pages, offset; 449 + 450 + pages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; 451 + max_pages = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT; 452 + if (pgoff >= max_pages || pages > max_pages - pgoff) 453 + return -EINVAL; 454 + block_pages = pages; 455 + offset = cramfs_get_block_range(inode, pgoff, &block_pages); 456 + if (!offset || block_pages != pages) 457 + return -ENOSYS; 458 + addr = sbi->linear_phys_addr + offset; 459 + pr_debug("get_unmapped for %s ofs %#lx siz %lu at 0x%08lx\n", 460 + file_dentry(file)->d_name.name, pgoff*PAGE_SIZE, len, addr); 461 + return addr; 462 + } 463 + 464 + static unsigned int cramfs_physmem_mmap_capabilities(struct file *file) 465 + { 466 + return NOMMU_MAP_COPY | NOMMU_MAP_DIRECT | 467 + NOMMU_MAP_READ | NOMMU_MAP_EXEC; 468 + } 469 + 470 + #endif /* CONFIG_MMU */ 471 + 472 + static const struct file_operations cramfs_physmem_fops = { 473 + .llseek = generic_file_llseek, 474 + .read_iter = generic_file_read_iter, 475 + .splice_read = generic_file_splice_read, 476 + .mmap = cramfs_physmem_mmap, 477 + #ifndef CONFIG_MMU 478 + .get_unmapped_area = cramfs_physmem_get_unmapped_area, 479 + .mmap_capabilities = cramfs_physmem_mmap_capabilities, 480 + #endif 481 + }; 289 482 290 483 static void cramfs_kill_sb(struct super_block *sb) 291 484 {