Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mm: update mem char driver to use mmap_prepare

Update the mem char driver (backing /dev/mem and /dev/zero) to use
f_op->mmap_prepare hook rather than the deprecated f_op->mmap.

The /dev/zero implementation has a very unique and rather concerning
characteristic in that it converts MAP_PRIVATE mmap() mappings anonymous
when they are, in fact, not.

The new f_op->mmap_prepare() can support this, but rather than introducing
a helper function to perform this hack (and risk introducing other users),
utilise the success hook to do so.

We utilise the newly introduced shmem_zero_setup_desc() to allow for the
shared mapping case via an f_op->mmap_prepare() hook.

We also use the desc->action_error_hook to filter the remap error to
-EAGAIN to keep behaviour consistent.

Link: https://lkml.kernel.org/r/48f60764d7a6901819d1af778fa33b775d2e8c77.1760959442.git.lorenzo.stoakes@oracle.com
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Chatre, Reinette <reinette.chatre@intel.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christian Brauner <brauner@kernel.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Dave Martin <dave.martin@arm.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Dmitriy Vyukov <dvyukov@google.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guo Ren <guoren@kernel.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: James Morse <james.morse@arm.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jann Horn <jannh@google.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Kevin Tian <kevin.tian@intel.com>
Cc: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Nicolas Pitre <nico@fluxnic.net>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Pedro Falcato <pfalcato@suse.de>
Cc: Robin Murohy <robin.murphy@arm.com>
Cc: Sumanth Korikkar <sumanthk@linux.ibm.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Lorenzo Stoakes and committed by
Andrew Morton
ab04945f 89646d9c

+50 -34
+50 -34
drivers/char/mem.c
··· 304 304 } 305 305 306 306 /* can't do an in-place private mapping if there's no MMU */ 307 - static inline int private_mapping_ok(struct vm_area_struct *vma) 307 + static inline int private_mapping_ok(struct vm_area_desc *desc) 308 308 { 309 - return is_nommu_shared_mapping(vma->vm_flags); 309 + return is_nommu_shared_mapping(desc->vm_flags); 310 310 } 311 311 #else 312 312 313 - static inline int private_mapping_ok(struct vm_area_struct *vma) 313 + static inline int private_mapping_ok(struct vm_area_desc *desc) 314 314 { 315 315 return 1; 316 316 } ··· 322 322 #endif 323 323 }; 324 324 325 - static int mmap_mem(struct file *file, struct vm_area_struct *vma) 325 + static int mmap_filter_error(int err) 326 326 { 327 - size_t size = vma->vm_end - vma->vm_start; 328 - phys_addr_t offset = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT; 327 + return -EAGAIN; 328 + } 329 + 330 + static int mmap_mem_prepare(struct vm_area_desc *desc) 331 + { 332 + struct file *file = desc->file; 333 + const size_t size = vma_desc_size(desc); 334 + const phys_addr_t offset = (phys_addr_t)desc->pgoff << PAGE_SHIFT; 329 335 330 336 /* Does it even fit in phys_addr_t? */ 331 - if (offset >> PAGE_SHIFT != vma->vm_pgoff) 337 + if (offset >> PAGE_SHIFT != desc->pgoff) 332 338 return -EINVAL; 333 339 334 340 /* It's illegal to wrap around the end of the physical address space. */ 335 341 if (offset + (phys_addr_t)size - 1 < offset) 336 342 return -EINVAL; 337 343 338 - if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size)) 344 + if (!valid_mmap_phys_addr_range(desc->pgoff, size)) 339 345 return -EINVAL; 340 346 341 - if (!private_mapping_ok(vma)) 347 + if (!private_mapping_ok(desc)) 342 348 return -ENOSYS; 343 349 344 - if (!range_is_allowed(vma->vm_pgoff, size)) 350 + if (!range_is_allowed(desc->pgoff, size)) 345 351 return -EPERM; 346 352 347 - if (!phys_mem_access_prot_allowed(file, vma->vm_pgoff, size, 348 - &vma->vm_page_prot)) 353 + if (!phys_mem_access_prot_allowed(file, desc->pgoff, size, 354 + &desc->page_prot)) 349 355 return -EINVAL; 350 356 351 - vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff, 352 - size, 353 - vma->vm_page_prot); 357 + desc->page_prot = phys_mem_access_prot(file, desc->pgoff, 358 + size, 359 + desc->page_prot); 354 360 355 - vma->vm_ops = &mmap_mem_ops; 361 + desc->vm_ops = &mmap_mem_ops; 356 362 357 - /* Remap-pfn-range will mark the range VM_IO */ 358 - if (remap_pfn_range(vma, 359 - vma->vm_start, 360 - vma->vm_pgoff, 361 - size, 362 - vma->vm_page_prot)) { 363 - return -EAGAIN; 364 - } 363 + /* Remap-pfn-range will mark the range VM_IO. */ 364 + mmap_action_remap_full(desc, desc->pgoff); 365 + /* We filter remap errors to -EAGAIN. */ 366 + desc->action.error_hook = mmap_filter_error; 367 + 365 368 return 0; 366 369 } 367 370 ··· 504 501 return cleared; 505 502 } 506 503 507 - static int mmap_zero(struct file *file, struct vm_area_struct *vma) 504 + static int mmap_zero_private_success(const struct vm_area_struct *vma) 505 + { 506 + /* 507 + * This is a highly unique situation where we mark a MAP_PRIVATE mapping 508 + * of /dev/zero anonymous, despite it not being. 509 + */ 510 + vma_set_anonymous((struct vm_area_struct *)vma); 511 + 512 + return 0; 513 + } 514 + 515 + static int mmap_zero_prepare(struct vm_area_desc *desc) 508 516 { 509 517 #ifndef CONFIG_MMU 510 518 return -ENOSYS; 511 519 #endif 512 - if (vma->vm_flags & VM_SHARED) 513 - return shmem_zero_setup(vma); 514 - vma_set_anonymous(vma); 520 + if (desc->vm_flags & VM_SHARED) 521 + return shmem_zero_setup_desc(desc); 522 + 523 + desc->action.success_hook = mmap_zero_private_success; 515 524 return 0; 516 525 } 517 526 ··· 541 526 { 542 527 if (flags & MAP_SHARED) { 543 528 /* 544 - * mmap_zero() will call shmem_zero_setup() to create a file, 545 - * so use shmem's get_unmapped_area in case it can be huge; 546 - * and pass NULL for file as in mmap.c's get_unmapped_area(), 547 - * so as not to confuse shmem with our handle on "/dev/zero". 529 + * mmap_zero_prepare() will call shmem_zero_setup() to create a 530 + * file, so use shmem's get_unmapped_area in case it can be 531 + * huge; and pass NULL for file as in mmap.c's 532 + * get_unmapped_area(), so as not to confuse shmem with our 533 + * handle on "/dev/zero". 548 534 */ 549 535 return shmem_get_unmapped_area(NULL, addr, len, pgoff, flags); 550 536 } ··· 648 632 .llseek = memory_lseek, 649 633 .read = read_mem, 650 634 .write = write_mem, 651 - .mmap = mmap_mem, 635 + .mmap_prepare = mmap_mem_prepare, 652 636 .open = open_mem, 653 637 #ifndef CONFIG_MMU 654 638 .get_unmapped_area = get_unmapped_area_mem, ··· 684 668 .write_iter = write_iter_zero, 685 669 .splice_read = copy_splice_read, 686 670 .splice_write = splice_write_zero, 687 - .mmap = mmap_zero, 671 + .mmap_prepare = mmap_zero_prepare, 688 672 .get_unmapped_area = get_unmapped_area_zero, 689 673 #ifndef CONFIG_MMU 690 674 .mmap_capabilities = zero_mmap_capabilities,