Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: guest_memfd: Use guest mem inodes instead of anonymous inodes

guest_memfd's inode represents memory the guest_memfd is
providing. guest_memfd's file represents a struct kvm's view of that
memory.

Using a custom inode allows customization of the inode teardown
process via callbacks. For example, ->evict_inode() allows
customization of the truncation process on file close, and
->destroy_inode() and ->free_inode() allow customization of the inode
freeing process.

Customizing the truncation process allows flexibility in management of
guest_memfd memory and customization of the inode freeing process
allows proper cleanup of memory metadata stored on the inode.

Memory metadata is more appropriately stored on the inode (as opposed
to the file), since the metadata is for the memory and is not unique
to a specific binding and struct kvm.

Acked-by: David Hildenbrand <david@redhat.com>
Co-developed-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
Signed-off-by: Shivank Garg <shivankg@amd.com>
Tested-by: Ashish Kalra <ashish.kalra@amd.com>
[sean: drop helpers, open code logic in __kvm_gmem_create()]
Link: https://lore.kernel.org/r/20251016172853.52451-4-seanjc@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>

authored by

Ackerley Tng and committed by
Sean Christopherson
a63ca423 392dd9d9

+80 -19
+1
include/uapi/linux/magic.h
··· 103 103 #define DEVMEM_MAGIC 0x454d444d /* "DMEM" */ 104 104 #define SECRETMEM_MAGIC 0x5345434d /* "SECM" */ 105 105 #define PID_FS_MAGIC 0x50494446 /* "PIDF" */ 106 + #define GUEST_MEMFD_MAGIC 0x474d454d /* "GMEM" */ 106 107 107 108 #endif /* __LINUX_MAGIC_H__ */
+68 -14
virt/kvm/guest_memfd.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 + #include <linux/anon_inodes.h> 2 3 #include <linux/backing-dev.h> 3 4 #include <linux/falloc.h> 5 + #include <linux/fs.h> 4 6 #include <linux/kvm_host.h> 7 + #include <linux/pseudo_fs.h> 5 8 #include <linux/pagemap.h> 6 - #include <linux/anon_inodes.h> 7 9 8 10 #include "kvm_mm.h" 11 + 12 + static struct vfsmount *kvm_gmem_mnt; 9 13 10 14 /* 11 15 * A guest_memfd instance can be associated multiple VMs, each with its own ··· 428 424 .fallocate = kvm_gmem_fallocate, 429 425 }; 430 426 431 - void kvm_gmem_init(struct module *module) 432 - { 433 - kvm_gmem_fops.owner = module; 434 - } 435 - 436 427 static int kvm_gmem_migrate_folio(struct address_space *mapping, 437 428 struct folio *dst, struct folio *src, 438 429 enum migrate_mode mode) ··· 499 500 500 501 static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags) 501 502 { 502 - const char *anon_name = "[kvm-gmem]"; 503 + static const char *name = "[kvm-gmem]"; 503 504 struct gmem_file *f; 504 505 struct inode *inode; 505 506 struct file *file; ··· 515 516 goto err_fd; 516 517 } 517 518 518 - file = anon_inode_create_getfile(anon_name, &kvm_gmem_fops, f, O_RDWR, NULL); 519 - if (IS_ERR(file)) { 520 - err = PTR_ERR(file); 519 + /* __fput() will take care of fops_put(). */ 520 + if (!fops_get(&kvm_gmem_fops)) { 521 + err = -ENOENT; 521 522 goto err_gmem; 522 523 } 523 524 524 - file->f_flags |= O_LARGEFILE; 525 - 526 - inode = file->f_inode; 527 - WARN_ON(file->f_mapping != inode->i_mapping); 525 + inode = anon_inode_make_secure_inode(kvm_gmem_mnt->mnt_sb, name, NULL); 526 + if (IS_ERR(inode)) { 527 + err = PTR_ERR(inode); 528 + goto err_fops; 529 + } 528 530 529 531 inode->i_private = (void *)(unsigned long)flags; 530 532 inode->i_op = &kvm_gmem_iops; ··· 537 537 /* Unmovable mappings are supposed to be marked unevictable as well. */ 538 538 WARN_ON_ONCE(!mapping_unevictable(inode->i_mapping)); 539 539 540 + file = alloc_file_pseudo(inode, kvm_gmem_mnt, name, O_RDWR, &kvm_gmem_fops); 541 + if (IS_ERR(file)) { 542 + err = PTR_ERR(file); 543 + goto err_inode; 544 + } 545 + 546 + file->f_flags |= O_LARGEFILE; 547 + file->private_data = f; 548 + 540 549 kvm_get_kvm(kvm); 541 550 f->kvm = kvm; 542 551 xa_init(&f->bindings); ··· 554 545 fd_install(fd, file); 555 546 return fd; 556 547 548 + err_inode: 549 + iput(inode); 550 + err_fops: 551 + fops_put(&kvm_gmem_fops); 557 552 err_gmem: 558 553 kfree(f); 559 554 err_fd: ··· 829 816 } 830 817 EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_gmem_populate); 831 818 #endif 819 + 820 + static int kvm_gmem_init_fs_context(struct fs_context *fc) 821 + { 822 + if (!init_pseudo(fc, GUEST_MEMFD_MAGIC)) 823 + return -ENOMEM; 824 + 825 + fc->s_iflags |= SB_I_NOEXEC; 826 + fc->s_iflags |= SB_I_NODEV; 827 + 828 + return 0; 829 + } 830 + 831 + static struct file_system_type kvm_gmem_fs = { 832 + .name = "guest_memfd", 833 + .init_fs_context = kvm_gmem_init_fs_context, 834 + .kill_sb = kill_anon_super, 835 + }; 836 + 837 + static int kvm_gmem_init_mount(void) 838 + { 839 + kvm_gmem_mnt = kern_mount(&kvm_gmem_fs); 840 + 841 + if (IS_ERR(kvm_gmem_mnt)) 842 + return PTR_ERR(kvm_gmem_mnt); 843 + 844 + kvm_gmem_mnt->mnt_flags |= MNT_NOEXEC; 845 + return 0; 846 + } 847 + 848 + int kvm_gmem_init(struct module *module) 849 + { 850 + kvm_gmem_fops.owner = module; 851 + 852 + return kvm_gmem_init_mount(); 853 + } 854 + 855 + void kvm_gmem_exit(void) 856 + { 857 + kern_unmount(kvm_gmem_mnt); 858 + kvm_gmem_mnt = NULL; 859 + }
+6 -1
virt/kvm/kvm_main.c
··· 6517 6517 if (WARN_ON_ONCE(r)) 6518 6518 goto err_vfio; 6519 6519 6520 - kvm_gmem_init(module); 6520 + r = kvm_gmem_init(module); 6521 + if (r) 6522 + goto err_gmem; 6521 6523 6522 6524 r = kvm_init_virtualization(); 6523 6525 if (r) ··· 6540 6538 err_register: 6541 6539 kvm_uninit_virtualization(); 6542 6540 err_virt: 6541 + kvm_gmem_exit(); 6542 + err_gmem: 6543 6543 kvm_vfio_ops_exit(); 6544 6544 err_vfio: 6545 6545 kvm_async_pf_deinit(); ··· 6573 6569 for_each_possible_cpu(cpu) 6574 6570 free_cpumask_var(per_cpu(cpu_kick_mask, cpu)); 6575 6571 kmem_cache_destroy(kvm_vcpu_cache); 6572 + kvm_gmem_exit(); 6576 6573 kvm_vfio_ops_exit(); 6577 6574 kvm_async_pf_deinit(); 6578 6575 kvm_irqfd_exit();
+5 -4
virt/kvm/kvm_mm.h
··· 68 68 #endif /* HAVE_KVM_PFNCACHE */ 69 69 70 70 #ifdef CONFIG_KVM_GUEST_MEMFD 71 - void kvm_gmem_init(struct module *module); 71 + int kvm_gmem_init(struct module *module); 72 + void kvm_gmem_exit(void); 72 73 int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args); 73 74 int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot, 74 75 unsigned int fd, loff_t offset); 75 76 void kvm_gmem_unbind(struct kvm_memory_slot *slot); 76 77 #else 77 - static inline void kvm_gmem_init(struct module *module) 78 + static inline int kvm_gmem_init(struct module *module) 78 79 { 79 - 80 + return 0; 80 81 } 81 - 82 + static inline void kvm_gmem_exit(void) {}; 82 83 static inline int kvm_gmem_bind(struct kvm *kvm, 83 84 struct kvm_memory_slot *slot, 84 85 unsigned int fd, loff_t offset)