Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: PPC: Protect kvm_vcpu_read_guest with srcu locks

The kvm_vcpu_read_guest/kvm_vcpu_write_guest used for nested guests
eventually call srcu_dereference_check to dereference a memslot and
lockdep produces a warning as neither kvm->slots_lock nor
kvm->srcu lock is held and kvm->users_count is above zero (>100 in fact).

This wraps mentioned VCPU read/write helpers in srcu read lock/unlock as
it is done in other places. This uses vcpu->srcu_idx when possible.

These helpers are only used for nested KVM so this may explain why
we did not see these before.

Here is an example of a warning:

=============================
WARNING: suspicious RCU usage
5.7.0-rc3-le_dma-bypass.3.2_a+fstn1 #897 Not tainted
-----------------------------
include/linux/kvm_host.h:633 suspicious rcu_dereference_check() usage!

other info that might help us debug this:

rcu_scheduler_active = 2, debug_locks = 1
1 lock held by qemu-system-ppc/2752:
#0: c000200359016be0 (&vcpu->mutex){+.+.}-{3:3}, at: kvm_vcpu_ioctl+0x144/0xd80 [kvm]

stack backtrace:
CPU: 80 PID: 2752 Comm: qemu-system-ppc Not tainted 5.7.0-rc3-le_dma-bypass.3.2_a+fstn1 #897
Call Trace:
[c0002003591ab240] [c000000000b23ab4] dump_stack+0x190/0x25c (unreliable)
[c0002003591ab2b0] [c00000000023f954] lockdep_rcu_suspicious+0x140/0x164
[c0002003591ab330] [c008000004a445f8] kvm_vcpu_gfn_to_memslot+0x4c0/0x510 [kvm]
[c0002003591ab3a0] [c008000004a44c18] kvm_vcpu_read_guest+0xa0/0x180 [kvm]
[c0002003591ab410] [c008000004ff9bd8] kvmhv_enter_nested_guest+0x90/0xb80 [kvm_hv]
[c0002003591ab980] [c008000004fe07bc] kvmppc_pseries_do_hcall+0x7b4/0x1c30 [kvm_hv]
[c0002003591aba10] [c008000004fe5d30] kvmppc_vcpu_run_hv+0x10a8/0x1a30 [kvm_hv]
[c0002003591abae0] [c008000004a5d954] kvmppc_vcpu_run+0x4c/0x70 [kvm]
[c0002003591abb10] [c008000004a56e54] kvm_arch_vcpu_ioctl_run+0x56c/0x7c0 [kvm]
[c0002003591abba0] [c008000004a3ddc4] kvm_vcpu_ioctl+0x4ac/0xd80 [kvm]
[c0002003591abd20] [c0000000006ebb58] ksys_ioctl+0x188/0x210
[c0002003591abd70] [c0000000006ebc28] sys_ioctl+0x48/0xb0
[c0002003591abdb0] [c000000000042764] system_call_exception+0x1d4/0x2e0
[c0002003591abe20] [c00000000000cce8] system_call_common+0xe8/0x214

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>

authored by

Alexey Kardashevskiy and committed by
Paul Mackerras
1508c22f e55f4d58

+29 -12
+4
arch/powerpc/kvm/book3s_64_mmu_radix.c
··· 160 160 return -EINVAL; 161 161 /* Read the entry from guest memory */ 162 162 addr = base + (index * sizeof(rpte)); 163 + vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); 163 164 ret = kvm_read_guest(kvm, addr, &rpte, sizeof(rpte)); 165 + srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); 164 166 if (ret) { 165 167 if (pte_ret_p) 166 168 *pte_ret_p = addr; ··· 238 236 239 237 /* Read the table to find the root of the radix tree */ 240 238 ptbl = (table & PRTB_MASK) + (table_index * sizeof(entry)); 239 + vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); 241 240 ret = kvm_read_guest(kvm, ptbl, &entry, sizeof(entry)); 241 + srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); 242 242 if (ret) 243 243 return ret; 244 244
+19 -11
arch/powerpc/kvm/book3s_hv_nested.c
··· 233 233 234 234 /* copy parameters in */ 235 235 hv_ptr = kvmppc_get_gpr(vcpu, 4); 236 + regs_ptr = kvmppc_get_gpr(vcpu, 5); 237 + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 236 238 err = kvm_vcpu_read_guest(vcpu, hv_ptr, &l2_hv, 237 - sizeof(struct hv_guest_state)); 239 + sizeof(struct hv_guest_state)) || 240 + kvm_vcpu_read_guest(vcpu, regs_ptr, &l2_regs, 241 + sizeof(struct pt_regs)); 242 + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 238 243 if (err) 239 244 return H_PARAMETER; 245 + 240 246 if (kvmppc_need_byteswap(vcpu)) 241 247 byteswap_hv_regs(&l2_hv); 242 248 if (l2_hv.version != HV_GUEST_STATE_VERSION) 243 249 return H_P2; 244 250 245 - regs_ptr = kvmppc_get_gpr(vcpu, 5); 246 - err = kvm_vcpu_read_guest(vcpu, regs_ptr, &l2_regs, 247 - sizeof(struct pt_regs)); 248 - if (err) 249 - return H_PARAMETER; 250 251 if (kvmppc_need_byteswap(vcpu)) 251 252 byteswap_pt_regs(&l2_regs); 252 253 if (l2_hv.vcpu_token >= NR_CPUS) ··· 324 323 byteswap_hv_regs(&l2_hv); 325 324 byteswap_pt_regs(&l2_regs); 326 325 } 326 + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 327 327 err = kvm_vcpu_write_guest(vcpu, hv_ptr, &l2_hv, 328 - sizeof(struct hv_guest_state)); 329 - if (err) 330 - return H_AUTHORITY; 331 - err = kvm_vcpu_write_guest(vcpu, regs_ptr, &l2_regs, 328 + sizeof(struct hv_guest_state)) || 329 + kvm_vcpu_write_guest(vcpu, regs_ptr, &l2_regs, 332 330 sizeof(struct pt_regs)); 331 + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 333 332 if (err) 334 333 return H_AUTHORITY; 335 334 ··· 509 508 goto not_found; 510 509 511 510 /* Write what was loaded into our buffer back to the L1 guest */ 511 + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 512 512 rc = kvm_vcpu_write_guest(vcpu, gp_to, buf, n); 513 + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 513 514 if (rc) 514 515 goto not_found; 515 516 } else { 516 517 /* Load the data to be stored from the L1 guest into our buf */ 518 + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 517 519 rc = kvm_vcpu_read_guest(vcpu, gp_from, buf, n); 520 + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 518 521 if (rc) 519 522 goto not_found; 520 523 ··· 553 548 554 549 ret = -EFAULT; 555 550 ptbl_addr = (kvm->arch.l1_ptcr & PRTB_MASK) + (gp->l1_lpid << 4); 556 - if (gp->l1_lpid < (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 8))) 551 + if (gp->l1_lpid < (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 8))) { 552 + int srcu_idx = srcu_read_lock(&kvm->srcu); 557 553 ret = kvm_read_guest(kvm, ptbl_addr, 558 554 &ptbl_entry, sizeof(ptbl_entry)); 555 + srcu_read_unlock(&kvm->srcu, srcu_idx); 556 + } 559 557 if (ret) { 560 558 gp->l1_gr_to_hr = 0; 561 559 gp->process_table = 0;
+2
arch/powerpc/kvm/book3s_rtas.c
··· 229 229 */ 230 230 args_phys = kvmppc_get_gpr(vcpu, 4) & KVM_PAM; 231 231 232 + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 232 233 rc = kvm_read_guest(vcpu->kvm, args_phys, &args, sizeof(args)); 234 + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 233 235 if (rc) 234 236 goto fail; 235 237
+4 -1
arch/powerpc/kvm/powerpc.c
··· 403 403 return EMULATE_DONE; 404 404 } 405 405 406 - if (kvm_read_guest(vcpu->kvm, pte.raddr, ptr, size)) 406 + vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 407 + rc = kvm_read_guest(vcpu->kvm, pte.raddr, ptr, size); 408 + srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 409 + if (rc) 407 410 return EMULATE_DO_MMIO; 408 411 409 412 return EMULATE_DONE;