Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: Add generic support for dirty page logging

kvm_get_dirty_log() provides generic handling of dirty bitmap, currently reused
by several architectures. Building on that we intrdoduce
kvm_get_dirty_log_protect() adding write protection to mark these pages dirty
for future write access, before next KVM_GET_DIRTY_LOG ioctl call from user
space.

Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Mario Smarduch <m.smarduch@samsung.com>

authored by

Mario Smarduch and committed by
Christoffer Dall
ba0513b5 a6d51016

+95
+9
include/linux/kvm_host.h
··· 611 611 612 612 int kvm_get_dirty_log(struct kvm *kvm, 613 613 struct kvm_dirty_log *log, int *is_dirty); 614 + 615 + int kvm_get_dirty_log_protect(struct kvm *kvm, 616 + struct kvm_dirty_log *log, bool *is_dirty); 617 + 618 + void kvm_arch_mmu_write_protect_pt_masked(struct kvm *kvm, 619 + struct kvm_memory_slot *slot, 620 + gfn_t gfn_offset, 621 + unsigned long mask); 622 + 614 623 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 615 624 struct kvm_dirty_log *log); 616 625
+6
virt/kvm/Kconfig
··· 40 40 41 41 config HAVE_KVM_ARCH_TLB_FLUSH_ALL 42 42 bool 43 + 44 + config HAVE_KVM_ARCH_DIRTY_LOG_PROTECT 45 + bool 46 + 47 + config KVM_GENERIC_DIRTYLOG_READ_PROTECT 48 + bool
+80
virt/kvm/kvm_main.c
··· 995 995 } 996 996 EXPORT_SYMBOL_GPL(kvm_get_dirty_log); 997 997 998 + #ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT 999 + /** 1000 + * kvm_get_dirty_log_protect - get a snapshot of dirty pages, and if any pages 1001 + * are dirty write protect them for next write. 1002 + * @kvm: pointer to kvm instance 1003 + * @log: slot id and address to which we copy the log 1004 + * @is_dirty: flag set if any page is dirty 1005 + * 1006 + * We need to keep it in mind that VCPU threads can write to the bitmap 1007 + * concurrently. So, to avoid losing track of dirty pages we keep the 1008 + * following order: 1009 + * 1010 + * 1. Take a snapshot of the bit and clear it if needed. 1011 + * 2. Write protect the corresponding page. 1012 + * 3. Copy the snapshot to the userspace. 1013 + * 4. Upon return caller flushes TLB's if needed. 1014 + * 1015 + * Between 2 and 4, the guest may write to the page using the remaining TLB 1016 + * entry. This is not a problem because the page is reported dirty using 1017 + * the snapshot taken before and step 4 ensures that writes done after 1018 + * exiting to userspace will be logged for the next call. 1019 + * 1020 + */ 1021 + int kvm_get_dirty_log_protect(struct kvm *kvm, 1022 + struct kvm_dirty_log *log, bool *is_dirty) 1023 + { 1024 + struct kvm_memory_slot *memslot; 1025 + int r, i; 1026 + unsigned long n; 1027 + unsigned long *dirty_bitmap; 1028 + unsigned long *dirty_bitmap_buffer; 1029 + 1030 + r = -EINVAL; 1031 + if (log->slot >= KVM_USER_MEM_SLOTS) 1032 + goto out; 1033 + 1034 + memslot = id_to_memslot(kvm->memslots, log->slot); 1035 + 1036 + dirty_bitmap = memslot->dirty_bitmap; 1037 + r = -ENOENT; 1038 + if (!dirty_bitmap) 1039 + goto out; 1040 + 1041 + n = kvm_dirty_bitmap_bytes(memslot); 1042 + 1043 + dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long); 1044 + memset(dirty_bitmap_buffer, 0, n); 1045 + 1046 + spin_lock(&kvm->mmu_lock); 1047 + *is_dirty = false; 1048 + for (i = 0; i < n / sizeof(long); i++) { 1049 + unsigned long mask; 1050 + gfn_t offset; 1051 + 1052 + if (!dirty_bitmap[i]) 1053 + continue; 1054 + 1055 + *is_dirty = true; 1056 + 1057 + mask = xchg(&dirty_bitmap[i], 0); 1058 + dirty_bitmap_buffer[i] = mask; 1059 + 1060 + offset = i * BITS_PER_LONG; 1061 + kvm_arch_mmu_write_protect_pt_masked(kvm, memslot, offset, 1062 + mask); 1063 + } 1064 + 1065 + spin_unlock(&kvm->mmu_lock); 1066 + 1067 + r = -EFAULT; 1068 + if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n)) 1069 + goto out; 1070 + 1071 + r = 0; 1072 + out: 1073 + return r; 1074 + } 1075 + EXPORT_SYMBOL_GPL(kvm_get_dirty_log_protect); 1076 + #endif 1077 + 998 1078 bool kvm_largepages_enabled(void) 999 1079 { 1000 1080 return largepages_enabled;