Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * RISC-V code
4 *
5 * Copyright (C) 2021 Western Digital Corporation or its affiliates.
6 */
7
8#include <linux/compiler.h>
9#include <assert.h>
10
11#include "kvm_util.h"
12#include "processor.h"
13#include "ucall_common.h"
14
15#define DEFAULT_RISCV_GUEST_STACK_VADDR_MIN 0xac0000
16
17static vm_vaddr_t exception_handlers;
18
19bool __vcpu_has_ext(struct kvm_vcpu *vcpu, uint64_t ext)
20{
21 unsigned long value = 0;
22 int ret;
23
24 ret = __vcpu_get_reg(vcpu, ext, &value);
25
26 return !ret && !!value;
27}
28
29static uint64_t page_align(struct kvm_vm *vm, uint64_t v)
30{
31 return (v + vm->page_size) & ~(vm->page_size - 1);
32}
33
34static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry)
35{
36 return ((entry & PGTBL_PTE_ADDR_MASK) >> PGTBL_PTE_ADDR_SHIFT) <<
37 PGTBL_PAGE_SIZE_SHIFT;
38}
39
40static uint64_t ptrs_per_pte(struct kvm_vm *vm)
41{
42 return PGTBL_PAGE_SIZE / sizeof(uint64_t);
43}
44
45static uint64_t pte_index_mask[] = {
46 PGTBL_L0_INDEX_MASK,
47 PGTBL_L1_INDEX_MASK,
48 PGTBL_L2_INDEX_MASK,
49 PGTBL_L3_INDEX_MASK,
50};
51
52static uint32_t pte_index_shift[] = {
53 PGTBL_L0_INDEX_SHIFT,
54 PGTBL_L1_INDEX_SHIFT,
55 PGTBL_L2_INDEX_SHIFT,
56 PGTBL_L3_INDEX_SHIFT,
57};
58
59static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level)
60{
61 TEST_ASSERT(level > -1,
62 "Negative page table level (%d) not possible", level);
63 TEST_ASSERT(level < vm->pgtable_levels,
64 "Invalid page table level (%d)", level);
65
66 return (gva & pte_index_mask[level]) >> pte_index_shift[level];
67}
68
69void virt_arch_pgd_alloc(struct kvm_vm *vm)
70{
71 size_t nr_pages = page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size;
72
73 if (vm->pgd_created)
74 return;
75
76 vm->pgd = vm_phy_pages_alloc(vm, nr_pages,
77 KVM_GUEST_PAGE_TABLE_MIN_PADDR,
78 vm->memslots[MEM_REGION_PT]);
79 vm->pgd_created = true;
80}
81
82void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
83{
84 uint64_t *ptep, next_ppn;
85 int level = vm->pgtable_levels - 1;
86
87 TEST_ASSERT((vaddr % vm->page_size) == 0,
88 "Virtual address not on page boundary,\n"
89 " vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size);
90 TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
91 (vaddr >> vm->page_shift)),
92 "Invalid virtual address, vaddr: 0x%lx", vaddr);
93 TEST_ASSERT((paddr % vm->page_size) == 0,
94 "Physical address not on page boundary,\n"
95 " paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size);
96 TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
97 "Physical address beyond maximum supported,\n"
98 " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
99 paddr, vm->max_gfn, vm->page_size);
100
101 ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, vaddr, level) * 8;
102 if (!*ptep) {
103 next_ppn = vm_alloc_page_table(vm) >> PGTBL_PAGE_SIZE_SHIFT;
104 *ptep = (next_ppn << PGTBL_PTE_ADDR_SHIFT) |
105 PGTBL_PTE_VALID_MASK;
106 }
107 level--;
108
109 while (level > -1) {
110 ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) +
111 pte_index(vm, vaddr, level) * 8;
112 if (!*ptep && level > 0) {
113 next_ppn = vm_alloc_page_table(vm) >>
114 PGTBL_PAGE_SIZE_SHIFT;
115 *ptep = (next_ppn << PGTBL_PTE_ADDR_SHIFT) |
116 PGTBL_PTE_VALID_MASK;
117 }
118 level--;
119 }
120
121 paddr = paddr >> PGTBL_PAGE_SIZE_SHIFT;
122 *ptep = (paddr << PGTBL_PTE_ADDR_SHIFT) |
123 PGTBL_PTE_PERM_MASK | PGTBL_PTE_VALID_MASK;
124}
125
126vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
127{
128 uint64_t *ptep;
129 int level = vm->pgtable_levels - 1;
130
131 if (!vm->pgd_created)
132 goto unmapped_gva;
133
134 ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, gva, level) * 8;
135 if (!ptep)
136 goto unmapped_gva;
137 level--;
138
139 while (level > -1) {
140 ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) +
141 pte_index(vm, gva, level) * 8;
142 if (!ptep)
143 goto unmapped_gva;
144 level--;
145 }
146
147 return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
148
149unmapped_gva:
150 TEST_FAIL("No mapping for vm virtual address gva: 0x%lx level: %d",
151 gva, level);
152 exit(1);
153}
154
155static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent,
156 uint64_t page, int level)
157{
158#ifdef DEBUG
159 static const char *const type[] = { "pte", "pmd", "pud", "p4d"};
160 uint64_t pte, *ptep;
161
162 if (level < 0)
163 return;
164
165 for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) {
166 ptep = addr_gpa2hva(vm, pte);
167 if (!*ptep)
168 continue;
169 fprintf(stream, "%*s%s: %lx: %lx at %p\n", indent, "",
170 type[level], pte, *ptep, ptep);
171 pte_dump(stream, vm, indent + 1,
172 pte_addr(vm, *ptep), level - 1);
173 }
174#endif
175}
176
177void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
178{
179 int level = vm->pgtable_levels - 1;
180 uint64_t pgd, *ptep;
181
182 if (!vm->pgd_created)
183 return;
184
185 for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pte(vm) * 8; pgd += 8) {
186 ptep = addr_gpa2hva(vm, pgd);
187 if (!*ptep)
188 continue;
189 fprintf(stream, "%*spgd: %lx: %lx at %p\n", indent, "",
190 pgd, *ptep, ptep);
191 pte_dump(stream, vm, indent + 1,
192 pte_addr(vm, *ptep), level - 1);
193 }
194}
195
196void riscv_vcpu_mmu_setup(struct kvm_vcpu *vcpu)
197{
198 struct kvm_vm *vm = vcpu->vm;
199 unsigned long satp;
200
201 /*
202 * The RISC-V Sv48 MMU mode supports 56-bit physical address
203 * for 48-bit virtual address with 4KB last level page size.
204 */
205 switch (vm->mode) {
206 case VM_MODE_P52V48_4K:
207 case VM_MODE_P48V48_4K:
208 case VM_MODE_P40V48_4K:
209 break;
210 default:
211 TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
212 }
213
214 satp = (vm->pgd >> PGTBL_PAGE_SIZE_SHIFT) & SATP_PPN;
215 satp |= SATP_MODE_48;
216
217 vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(satp), satp);
218}
219
220void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
221{
222 struct kvm_riscv_core core;
223
224 core.mode = vcpu_get_reg(vcpu, RISCV_CORE_REG(mode));
225 core.regs.pc = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc));
226 core.regs.ra = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.ra));
227 core.regs.sp = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.sp));
228 core.regs.gp = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.gp));
229 core.regs.tp = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.tp));
230 core.regs.t0 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t0));
231 core.regs.t1 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t1));
232 core.regs.t2 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t2));
233 core.regs.s0 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s0));
234 core.regs.s1 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s1));
235 core.regs.a0 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a0));
236 core.regs.a1 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a1));
237 core.regs.a2 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a2));
238 core.regs.a3 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a3));
239 core.regs.a4 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a4));
240 core.regs.a5 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a5));
241 core.regs.a6 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a6));
242 core.regs.a7 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a7));
243 core.regs.s2 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s2));
244 core.regs.s3 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s3));
245 core.regs.s4 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s4));
246 core.regs.s5 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s5));
247 core.regs.s6 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s6));
248 core.regs.s7 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s7));
249 core.regs.s8 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s8));
250 core.regs.s9 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s9));
251 core.regs.s10 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s10));
252 core.regs.s11 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s11));
253 core.regs.t3 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t3));
254 core.regs.t4 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t4));
255 core.regs.t5 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t5));
256 core.regs.t6 = vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t6));
257
258 fprintf(stream,
259 " MODE: 0x%lx\n", core.mode);
260 fprintf(stream,
261 " PC: 0x%016lx RA: 0x%016lx SP: 0x%016lx GP: 0x%016lx\n",
262 core.regs.pc, core.regs.ra, core.regs.sp, core.regs.gp);
263 fprintf(stream,
264 " TP: 0x%016lx T0: 0x%016lx T1: 0x%016lx T2: 0x%016lx\n",
265 core.regs.tp, core.regs.t0, core.regs.t1, core.regs.t2);
266 fprintf(stream,
267 " S0: 0x%016lx S1: 0x%016lx A0: 0x%016lx A1: 0x%016lx\n",
268 core.regs.s0, core.regs.s1, core.regs.a0, core.regs.a1);
269 fprintf(stream,
270 " A2: 0x%016lx A3: 0x%016lx A4: 0x%016lx A5: 0x%016lx\n",
271 core.regs.a2, core.regs.a3, core.regs.a4, core.regs.a5);
272 fprintf(stream,
273 " A6: 0x%016lx A7: 0x%016lx S2: 0x%016lx S3: 0x%016lx\n",
274 core.regs.a6, core.regs.a7, core.regs.s2, core.regs.s3);
275 fprintf(stream,
276 " S4: 0x%016lx S5: 0x%016lx S6: 0x%016lx S7: 0x%016lx\n",
277 core.regs.s4, core.regs.s5, core.regs.s6, core.regs.s7);
278 fprintf(stream,
279 " S8: 0x%016lx S9: 0x%016lx S10: 0x%016lx S11: 0x%016lx\n",
280 core.regs.s8, core.regs.s9, core.regs.s10, core.regs.s11);
281 fprintf(stream,
282 " T3: 0x%016lx T4: 0x%016lx T5: 0x%016lx T6: 0x%016lx\n",
283 core.regs.t3, core.regs.t4, core.regs.t5, core.regs.t6);
284}
285
286static void __aligned(16) guest_unexp_trap(void)
287{
288 sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT,
289 KVM_RISCV_SELFTESTS_SBI_UNEXP,
290 0, 0, 0, 0, 0, 0);
291}
292
293void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
294{
295 vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.pc), (unsigned long)guest_code);
296}
297
298struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
299{
300 int r;
301 size_t stack_size;
302 unsigned long stack_vaddr;
303 unsigned long current_gp = 0;
304 struct kvm_mp_state mps;
305 struct kvm_vcpu *vcpu;
306
307 stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size :
308 vm->page_size;
309 stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
310 DEFAULT_RISCV_GUEST_STACK_VADDR_MIN,
311 MEM_REGION_DATA);
312
313 vcpu = __vm_vcpu_add(vm, vcpu_id);
314 riscv_vcpu_mmu_setup(vcpu);
315
316 /*
317 * With SBI HSM support in KVM RISC-V, all secondary VCPUs are
318 * powered-off by default so we ensure that all secondary VCPUs
319 * are powered-on using KVM_SET_MP_STATE ioctl().
320 */
321 mps.mp_state = KVM_MP_STATE_RUNNABLE;
322 r = __vcpu_ioctl(vcpu, KVM_SET_MP_STATE, &mps);
323 TEST_ASSERT(!r, "IOCTL KVM_SET_MP_STATE failed (error %d)", r);
324
325 /* Setup global pointer of guest to be same as the host */
326 asm volatile (
327 "add %0, gp, zero" : "=r" (current_gp) : : "memory");
328 vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.gp), current_gp);
329
330 /* Setup stack pointer and program counter of guest */
331 vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.sp), stack_vaddr + stack_size);
332
333 /* Setup sscratch for guest_get_vcpuid() */
334 vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(sscratch), vcpu_id);
335
336 /* Setup default exception vector of guest */
337 vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(stvec), (unsigned long)guest_unexp_trap);
338
339 return vcpu;
340}
341
342void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
343{
344 va_list ap;
345 uint64_t id = RISCV_CORE_REG(regs.a0);
346 int i;
347
348 TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n"
349 " num: %u", num);
350
351 va_start(ap, num);
352
353 for (i = 0; i < num; i++) {
354 switch (i) {
355 case 0:
356 id = RISCV_CORE_REG(regs.a0);
357 break;
358 case 1:
359 id = RISCV_CORE_REG(regs.a1);
360 break;
361 case 2:
362 id = RISCV_CORE_REG(regs.a2);
363 break;
364 case 3:
365 id = RISCV_CORE_REG(regs.a3);
366 break;
367 case 4:
368 id = RISCV_CORE_REG(regs.a4);
369 break;
370 case 5:
371 id = RISCV_CORE_REG(regs.a5);
372 break;
373 case 6:
374 id = RISCV_CORE_REG(regs.a6);
375 break;
376 case 7:
377 id = RISCV_CORE_REG(regs.a7);
378 break;
379 }
380 vcpu_set_reg(vcpu, id, va_arg(ap, uint64_t));
381 }
382
383 va_end(ap);
384}
385
386void kvm_exit_unexpected_exception(int vector, int ec)
387{
388 ucall(UCALL_UNHANDLED, 2, vector, ec);
389}
390
391void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
392{
393 struct ucall uc;
394
395 if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED) {
396 TEST_FAIL("Unexpected exception (vector:0x%lx, ec:0x%lx)",
397 uc.args[0], uc.args[1]);
398 }
399}
400
401struct handlers {
402 exception_handler_fn exception_handlers[NR_VECTORS][NR_EXCEPTIONS];
403};
404
405void route_exception(struct pt_regs *regs)
406{
407 struct handlers *handlers = (struct handlers *)exception_handlers;
408 int vector = 0, ec;
409
410 ec = regs->cause & ~CAUSE_IRQ_FLAG;
411 if (ec >= NR_EXCEPTIONS)
412 goto unexpected_exception;
413
414 /* Use the same handler for all the interrupts */
415 if (regs->cause & CAUSE_IRQ_FLAG) {
416 vector = 1;
417 ec = 0;
418 }
419
420 if (handlers && handlers->exception_handlers[vector][ec])
421 return handlers->exception_handlers[vector][ec](regs);
422
423unexpected_exception:
424 return kvm_exit_unexpected_exception(vector, ec);
425}
426
427void vcpu_init_vector_tables(struct kvm_vcpu *vcpu)
428{
429 extern char exception_vectors;
430
431 vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(stvec), (unsigned long)&exception_vectors);
432}
433
434void vm_init_vector_tables(struct kvm_vm *vm)
435{
436 vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers),
437 vm->page_size, MEM_REGION_DATA);
438
439 *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
440}
441
442void vm_install_exception_handler(struct kvm_vm *vm, int vector, exception_handler_fn handler)
443{
444 struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
445
446 assert(vector < NR_EXCEPTIONS);
447 handlers->exception_handlers[0][vector] = handler;
448}
449
450void vm_install_interrupt_handler(struct kvm_vm *vm, exception_handler_fn handler)
451{
452 struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
453
454 handlers->exception_handlers[1][0] = handler;
455}
456
457uint32_t guest_get_vcpuid(void)
458{
459 return csr_read(CSR_SSCRATCH);
460}
461
462struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
463 unsigned long arg1, unsigned long arg2,
464 unsigned long arg3, unsigned long arg4,
465 unsigned long arg5)
466{
467 register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0);
468 register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1);
469 register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2);
470 register uintptr_t a3 asm ("a3") = (uintptr_t)(arg3);
471 register uintptr_t a4 asm ("a4") = (uintptr_t)(arg4);
472 register uintptr_t a5 asm ("a5") = (uintptr_t)(arg5);
473 register uintptr_t a6 asm ("a6") = (uintptr_t)(fid);
474 register uintptr_t a7 asm ("a7") = (uintptr_t)(ext);
475 struct sbiret ret;
476
477 asm volatile (
478 "ecall"
479 : "+r" (a0), "+r" (a1)
480 : "r" (a2), "r" (a3), "r" (a4), "r" (a5), "r" (a6), "r" (a7)
481 : "memory");
482 ret.error = a0;
483 ret.value = a1;
484
485 return ret;
486}
487
488bool guest_sbi_probe_extension(int extid, long *out_val)
489{
490 struct sbiret ret;
491
492 ret = sbi_ecall(SBI_EXT_BASE, SBI_EXT_BASE_PROBE_EXT, extid,
493 0, 0, 0, 0, 0);
494
495 __GUEST_ASSERT(!ret.error || ret.error == SBI_ERR_NOT_SUPPORTED,
496 "ret.error=%ld, ret.value=%ld\n", ret.error, ret.value);
497
498 if (ret.error == SBI_ERR_NOT_SUPPORTED)
499 return false;
500
501 if (out_val)
502 *out_val = ret.value;
503
504 return true;
505}
506
507unsigned long get_host_sbi_spec_version(void)
508{
509 struct sbiret ret;
510
511 ret = sbi_ecall(SBI_EXT_BASE, SBI_EXT_BASE_GET_SPEC_VERSION, 0,
512 0, 0, 0, 0, 0);
513
514 GUEST_ASSERT(!ret.error);
515
516 return ret.value;
517}