Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * RISC-V code
4 *
5 * Copyright (C) 2021 Western Digital Corporation or its affiliates.
6 */
7
8#include <linux/compiler.h>
9#include <assert.h>
10
11#include "kvm_util.h"
12#include "processor.h"
13
14#define DEFAULT_RISCV_GUEST_STACK_VADDR_MIN 0xac0000
15
16static vm_vaddr_t exception_handlers;
17
18bool __vcpu_has_ext(struct kvm_vcpu *vcpu, uint64_t ext)
19{
20 unsigned long value = 0;
21 int ret;
22
23 ret = __vcpu_get_reg(vcpu, ext, &value);
24
25 return !ret && !!value;
26}
27
28static uint64_t page_align(struct kvm_vm *vm, uint64_t v)
29{
30 return (v + vm->page_size) & ~(vm->page_size - 1);
31}
32
33static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry)
34{
35 return ((entry & PGTBL_PTE_ADDR_MASK) >> PGTBL_PTE_ADDR_SHIFT) <<
36 PGTBL_PAGE_SIZE_SHIFT;
37}
38
39static uint64_t ptrs_per_pte(struct kvm_vm *vm)
40{
41 return PGTBL_PAGE_SIZE / sizeof(uint64_t);
42}
43
44static uint64_t pte_index_mask[] = {
45 PGTBL_L0_INDEX_MASK,
46 PGTBL_L1_INDEX_MASK,
47 PGTBL_L2_INDEX_MASK,
48 PGTBL_L3_INDEX_MASK,
49};
50
51static uint32_t pte_index_shift[] = {
52 PGTBL_L0_INDEX_SHIFT,
53 PGTBL_L1_INDEX_SHIFT,
54 PGTBL_L2_INDEX_SHIFT,
55 PGTBL_L3_INDEX_SHIFT,
56};
57
58static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level)
59{
60 TEST_ASSERT(level > -1,
61 "Negative page table level (%d) not possible", level);
62 TEST_ASSERT(level < vm->pgtable_levels,
63 "Invalid page table level (%d)", level);
64
65 return (gva & pte_index_mask[level]) >> pte_index_shift[level];
66}
67
68void virt_arch_pgd_alloc(struct kvm_vm *vm)
69{
70 size_t nr_pages = page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size;
71
72 if (vm->pgd_created)
73 return;
74
75 vm->pgd = vm_phy_pages_alloc(vm, nr_pages,
76 KVM_GUEST_PAGE_TABLE_MIN_PADDR,
77 vm->memslots[MEM_REGION_PT]);
78 vm->pgd_created = true;
79}
80
81void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
82{
83 uint64_t *ptep, next_ppn;
84 int level = vm->pgtable_levels - 1;
85
86 TEST_ASSERT((vaddr % vm->page_size) == 0,
87 "Virtual address not on page boundary,\n"
88 " vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size);
89 TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
90 (vaddr >> vm->page_shift)),
91 "Invalid virtual address, vaddr: 0x%lx", vaddr);
92 TEST_ASSERT((paddr % vm->page_size) == 0,
93 "Physical address not on page boundary,\n"
94 " paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size);
95 TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
96 "Physical address beyond maximum supported,\n"
97 " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
98 paddr, vm->max_gfn, vm->page_size);
99
100 ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, vaddr, level) * 8;
101 if (!*ptep) {
102 next_ppn = vm_alloc_page_table(vm) >> PGTBL_PAGE_SIZE_SHIFT;
103 *ptep = (next_ppn << PGTBL_PTE_ADDR_SHIFT) |
104 PGTBL_PTE_VALID_MASK;
105 }
106 level--;
107
108 while (level > -1) {
109 ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) +
110 pte_index(vm, vaddr, level) * 8;
111 if (!*ptep && level > 0) {
112 next_ppn = vm_alloc_page_table(vm) >>
113 PGTBL_PAGE_SIZE_SHIFT;
114 *ptep = (next_ppn << PGTBL_PTE_ADDR_SHIFT) |
115 PGTBL_PTE_VALID_MASK;
116 }
117 level--;
118 }
119
120 paddr = paddr >> PGTBL_PAGE_SIZE_SHIFT;
121 *ptep = (paddr << PGTBL_PTE_ADDR_SHIFT) |
122 PGTBL_PTE_PERM_MASK | PGTBL_PTE_VALID_MASK;
123}
124
125vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
126{
127 uint64_t *ptep;
128 int level = vm->pgtable_levels - 1;
129
130 if (!vm->pgd_created)
131 goto unmapped_gva;
132
133 ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, gva, level) * 8;
134 if (!ptep)
135 goto unmapped_gva;
136 level--;
137
138 while (level > -1) {
139 ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) +
140 pte_index(vm, gva, level) * 8;
141 if (!ptep)
142 goto unmapped_gva;
143 level--;
144 }
145
146 return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
147
148unmapped_gva:
149 TEST_FAIL("No mapping for vm virtual address gva: 0x%lx level: %d",
150 gva, level);
151 exit(1);
152}
153
154static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent,
155 uint64_t page, int level)
156{
157#ifdef DEBUG
158 static const char *const type[] = { "pte", "pmd", "pud", "p4d"};
159 uint64_t pte, *ptep;
160
161 if (level < 0)
162 return;
163
164 for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) {
165 ptep = addr_gpa2hva(vm, pte);
166 if (!*ptep)
167 continue;
168 fprintf(stream, "%*s%s: %lx: %lx at %p\n", indent, "",
169 type[level], pte, *ptep, ptep);
170 pte_dump(stream, vm, indent + 1,
171 pte_addr(vm, *ptep), level - 1);
172 }
173#endif
174}
175
176void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
177{
178 int level = vm->pgtable_levels - 1;
179 uint64_t pgd, *ptep;
180
181 if (!vm->pgd_created)
182 return;
183
184 for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pte(vm) * 8; pgd += 8) {
185 ptep = addr_gpa2hva(vm, pgd);
186 if (!*ptep)
187 continue;
188 fprintf(stream, "%*spgd: %lx: %lx at %p\n", indent, "",
189 pgd, *ptep, ptep);
190 pte_dump(stream, vm, indent + 1,
191 pte_addr(vm, *ptep), level - 1);
192 }
193}
194
195void riscv_vcpu_mmu_setup(struct kvm_vcpu *vcpu)
196{
197 struct kvm_vm *vm = vcpu->vm;
198 unsigned long satp;
199
200 /*
201 * The RISC-V Sv48 MMU mode supports 56-bit physical address
202 * for 48-bit virtual address with 4KB last level page size.
203 */
204 switch (vm->mode) {
205 case VM_MODE_P52V48_4K:
206 case VM_MODE_P48V48_4K:
207 case VM_MODE_P40V48_4K:
208 break;
209 default:
210 TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
211 }
212
213 satp = (vm->pgd >> PGTBL_PAGE_SIZE_SHIFT) & SATP_PPN;
214 satp |= SATP_MODE_48;
215
216 vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(satp), satp);
217}
218
219void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
220{
221 struct kvm_riscv_core core;
222
223 vcpu_get_reg(vcpu, RISCV_CORE_REG(mode), &core.mode);
224 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc), &core.regs.pc);
225 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.ra), &core.regs.ra);
226 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.sp), &core.regs.sp);
227 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.gp), &core.regs.gp);
228 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.tp), &core.regs.tp);
229 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t0), &core.regs.t0);
230 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t1), &core.regs.t1);
231 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t2), &core.regs.t2);
232 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s0), &core.regs.s0);
233 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s1), &core.regs.s1);
234 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a0), &core.regs.a0);
235 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a1), &core.regs.a1);
236 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a2), &core.regs.a2);
237 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a3), &core.regs.a3);
238 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a4), &core.regs.a4);
239 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a5), &core.regs.a5);
240 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a6), &core.regs.a6);
241 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a7), &core.regs.a7);
242 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s2), &core.regs.s2);
243 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s3), &core.regs.s3);
244 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s4), &core.regs.s4);
245 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s5), &core.regs.s5);
246 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s6), &core.regs.s6);
247 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s7), &core.regs.s7);
248 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s8), &core.regs.s8);
249 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s9), &core.regs.s9);
250 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s10), &core.regs.s10);
251 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s11), &core.regs.s11);
252 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t3), &core.regs.t3);
253 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t4), &core.regs.t4);
254 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t5), &core.regs.t5);
255 vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t6), &core.regs.t6);
256
257 fprintf(stream,
258 " MODE: 0x%lx\n", core.mode);
259 fprintf(stream,
260 " PC: 0x%016lx RA: 0x%016lx SP: 0x%016lx GP: 0x%016lx\n",
261 core.regs.pc, core.regs.ra, core.regs.sp, core.regs.gp);
262 fprintf(stream,
263 " TP: 0x%016lx T0: 0x%016lx T1: 0x%016lx T2: 0x%016lx\n",
264 core.regs.tp, core.regs.t0, core.regs.t1, core.regs.t2);
265 fprintf(stream,
266 " S0: 0x%016lx S1: 0x%016lx A0: 0x%016lx A1: 0x%016lx\n",
267 core.regs.s0, core.regs.s1, core.regs.a0, core.regs.a1);
268 fprintf(stream,
269 " A2: 0x%016lx A3: 0x%016lx A4: 0x%016lx A5: 0x%016lx\n",
270 core.regs.a2, core.regs.a3, core.regs.a4, core.regs.a5);
271 fprintf(stream,
272 " A6: 0x%016lx A7: 0x%016lx S2: 0x%016lx S3: 0x%016lx\n",
273 core.regs.a6, core.regs.a7, core.regs.s2, core.regs.s3);
274 fprintf(stream,
275 " S4: 0x%016lx S5: 0x%016lx S6: 0x%016lx S7: 0x%016lx\n",
276 core.regs.s4, core.regs.s5, core.regs.s6, core.regs.s7);
277 fprintf(stream,
278 " S8: 0x%016lx S9: 0x%016lx S10: 0x%016lx S11: 0x%016lx\n",
279 core.regs.s8, core.regs.s9, core.regs.s10, core.regs.s11);
280 fprintf(stream,
281 " T3: 0x%016lx T4: 0x%016lx T5: 0x%016lx T6: 0x%016lx\n",
282 core.regs.t3, core.regs.t4, core.regs.t5, core.regs.t6);
283}
284
285static void __aligned(16) guest_unexp_trap(void)
286{
287 sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT,
288 KVM_RISCV_SELFTESTS_SBI_UNEXP,
289 0, 0, 0, 0, 0, 0);
290}
291
292void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
293{
294 vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.pc), (unsigned long)guest_code);
295}
296
297struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
298{
299 int r;
300 size_t stack_size;
301 unsigned long stack_vaddr;
302 unsigned long current_gp = 0;
303 struct kvm_mp_state mps;
304 struct kvm_vcpu *vcpu;
305
306 stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size :
307 vm->page_size;
308 stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
309 DEFAULT_RISCV_GUEST_STACK_VADDR_MIN,
310 MEM_REGION_DATA);
311
312 vcpu = __vm_vcpu_add(vm, vcpu_id);
313 riscv_vcpu_mmu_setup(vcpu);
314
315 /*
316 * With SBI HSM support in KVM RISC-V, all secondary VCPUs are
317 * powered-off by default so we ensure that all secondary VCPUs
318 * are powered-on using KVM_SET_MP_STATE ioctl().
319 */
320 mps.mp_state = KVM_MP_STATE_RUNNABLE;
321 r = __vcpu_ioctl(vcpu, KVM_SET_MP_STATE, &mps);
322 TEST_ASSERT(!r, "IOCTL KVM_SET_MP_STATE failed (error %d)", r);
323
324 /* Setup global pointer of guest to be same as the host */
325 asm volatile (
326 "add %0, gp, zero" : "=r" (current_gp) : : "memory");
327 vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.gp), current_gp);
328
329 /* Setup stack pointer and program counter of guest */
330 vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.sp), stack_vaddr + stack_size);
331
332 /* Setup sscratch for guest_get_vcpuid() */
333 vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(sscratch), vcpu_id);
334
335 /* Setup default exception vector of guest */
336 vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(stvec), (unsigned long)guest_unexp_trap);
337
338 return vcpu;
339}
340
341void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
342{
343 va_list ap;
344 uint64_t id = RISCV_CORE_REG(regs.a0);
345 int i;
346
347 TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n"
348 " num: %u", num);
349
350 va_start(ap, num);
351
352 for (i = 0; i < num; i++) {
353 switch (i) {
354 case 0:
355 id = RISCV_CORE_REG(regs.a0);
356 break;
357 case 1:
358 id = RISCV_CORE_REG(regs.a1);
359 break;
360 case 2:
361 id = RISCV_CORE_REG(regs.a2);
362 break;
363 case 3:
364 id = RISCV_CORE_REG(regs.a3);
365 break;
366 case 4:
367 id = RISCV_CORE_REG(regs.a4);
368 break;
369 case 5:
370 id = RISCV_CORE_REG(regs.a5);
371 break;
372 case 6:
373 id = RISCV_CORE_REG(regs.a6);
374 break;
375 case 7:
376 id = RISCV_CORE_REG(regs.a7);
377 break;
378 }
379 vcpu_set_reg(vcpu, id, va_arg(ap, uint64_t));
380 }
381
382 va_end(ap);
383}
384
385void kvm_exit_unexpected_exception(int vector, int ec)
386{
387 ucall(UCALL_UNHANDLED, 2, vector, ec);
388}
389
390void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
391{
392 struct ucall uc;
393
394 if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED) {
395 TEST_FAIL("Unexpected exception (vector:0x%lx, ec:0x%lx)",
396 uc.args[0], uc.args[1]);
397 }
398}
399
400struct handlers {
401 exception_handler_fn exception_handlers[NR_VECTORS][NR_EXCEPTIONS];
402};
403
404void route_exception(struct ex_regs *regs)
405{
406 struct handlers *handlers = (struct handlers *)exception_handlers;
407 int vector = 0, ec;
408
409 ec = regs->cause & ~CAUSE_IRQ_FLAG;
410 if (ec >= NR_EXCEPTIONS)
411 goto unexpected_exception;
412
413 /* Use the same handler for all the interrupts */
414 if (regs->cause & CAUSE_IRQ_FLAG) {
415 vector = 1;
416 ec = 0;
417 }
418
419 if (handlers && handlers->exception_handlers[vector][ec])
420 return handlers->exception_handlers[vector][ec](regs);
421
422unexpected_exception:
423 return kvm_exit_unexpected_exception(vector, ec);
424}
425
426void vcpu_init_vector_tables(struct kvm_vcpu *vcpu)
427{
428 extern char exception_vectors;
429
430 vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(stvec), (unsigned long)&exception_vectors);
431}
432
433void vm_init_vector_tables(struct kvm_vm *vm)
434{
435 vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers),
436 vm->page_size, MEM_REGION_DATA);
437
438 *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
439}
440
441void vm_install_exception_handler(struct kvm_vm *vm, int vector, exception_handler_fn handler)
442{
443 struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
444
445 assert(vector < NR_EXCEPTIONS);
446 handlers->exception_handlers[0][vector] = handler;
447}
448
449void vm_install_interrupt_handler(struct kvm_vm *vm, exception_handler_fn handler)
450{
451 struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
452
453 handlers->exception_handlers[1][0] = handler;
454}
455
456uint32_t guest_get_vcpuid(void)
457{
458 return csr_read(CSR_SSCRATCH);
459}
460
461struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
462 unsigned long arg1, unsigned long arg2,
463 unsigned long arg3, unsigned long arg4,
464 unsigned long arg5)
465{
466 register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0);
467 register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1);
468 register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2);
469 register uintptr_t a3 asm ("a3") = (uintptr_t)(arg3);
470 register uintptr_t a4 asm ("a4") = (uintptr_t)(arg4);
471 register uintptr_t a5 asm ("a5") = (uintptr_t)(arg5);
472 register uintptr_t a6 asm ("a6") = (uintptr_t)(fid);
473 register uintptr_t a7 asm ("a7") = (uintptr_t)(ext);
474 struct sbiret ret;
475
476 asm volatile (
477 "ecall"
478 : "+r" (a0), "+r" (a1)
479 : "r" (a2), "r" (a3), "r" (a4), "r" (a5), "r" (a6), "r" (a7)
480 : "memory");
481 ret.error = a0;
482 ret.value = a1;
483
484 return ret;
485}
486
487bool guest_sbi_probe_extension(int extid, long *out_val)
488{
489 struct sbiret ret;
490
491 ret = sbi_ecall(SBI_EXT_BASE, SBI_EXT_BASE_PROBE_EXT, extid,
492 0, 0, 0, 0, 0);
493
494 __GUEST_ASSERT(!ret.error || ret.error == SBI_ERR_NOT_SUPPORTED,
495 "ret.error=%ld, ret.value=%ld\n", ret.error, ret.value);
496
497 if (ret.error == SBI_ERR_NOT_SUPPORTED)
498 return false;
499
500 if (out_val)
501 *out_val = ret.value;
502
503 return true;
504}