Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: s390: Add configuration dump functionality

Sometimes dumping inside of a VM fails, is unavailable or doesn't
yield the required data. For these occasions we dump the VM from the
outside, writing memory and cpu data to a file.

Up to now PV guests only supported dumping from the inside of the
guest through dumpers like KDUMP. A PV guest can be dumped from the
hypervisor but the data will be stale and / or encrypted.

To get the actual state of the PV VM we need the help of the
Ultravisor who safeguards the VM state. New UV calls have been added
to initialize the dump, dump storage state data, dump cpu data and
complete the dump process. We expose these calls in this patch via a
new UV ioctl command.

The sensitive parts of the dump data are encrypted, the dump key is
derived from the Customer Communication Key (CCK). This ensures that
only the owner of the VM who has the CCK can decrypt the dump data.

The memory is dumped / read via a normal export call and a re-import
after the dump initialization is not needed (no re-encryption with a
dump key).

Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Link: https://lore.kernel.org/r/20220517163629.3443-7-frankja@linux.ibm.com
Message-Id: <20220517163629.3443-7-frankja@linux.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@linux.ibm.com>

authored by

Janosch Frank and committed by
Christian Borntraeger
0460eb35 fe9a93e0

+295
+1
arch/s390/include/asm/kvm_host.h
··· 923 923 u64 guest_len; 924 924 unsigned long stor_base; 925 925 void *stor_var; 926 + bool dumping; 926 927 }; 927 928 928 929 struct kvm_arch{
+93
arch/s390/kvm/kvm-s390.c
··· 2271 2271 } 2272 2272 } 2273 2273 2274 + static int kvm_s390_pv_dmp(struct kvm *kvm, struct kvm_pv_cmd *cmd, 2275 + struct kvm_s390_pv_dmp dmp) 2276 + { 2277 + int r = -EINVAL; 2278 + void __user *result_buff = (void __user *)dmp.buff_addr; 2279 + 2280 + switch (dmp.subcmd) { 2281 + case KVM_PV_DUMP_INIT: { 2282 + if (kvm->arch.pv.dumping) 2283 + break; 2284 + 2285 + /* 2286 + * Block SIE entry as concurrent dump UVCs could lead 2287 + * to validities. 2288 + */ 2289 + kvm_s390_vcpu_block_all(kvm); 2290 + 2291 + r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), 2292 + UVC_CMD_DUMP_INIT, &cmd->rc, &cmd->rrc); 2293 + KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP INIT: rc %x rrc %x", 2294 + cmd->rc, cmd->rrc); 2295 + if (!r) { 2296 + kvm->arch.pv.dumping = true; 2297 + } else { 2298 + kvm_s390_vcpu_unblock_all(kvm); 2299 + r = -EINVAL; 2300 + } 2301 + break; 2302 + } 2303 + case KVM_PV_DUMP_CONFIG_STOR_STATE: { 2304 + if (!kvm->arch.pv.dumping) 2305 + break; 2306 + 2307 + /* 2308 + * gaddr is an output parameter since we might stop 2309 + * early. As dmp will be copied back in our caller, we 2310 + * don't need to do it ourselves. 2311 + */ 2312 + r = kvm_s390_pv_dump_stor_state(kvm, result_buff, &dmp.gaddr, dmp.buff_len, 2313 + &cmd->rc, &cmd->rrc); 2314 + break; 2315 + } 2316 + case KVM_PV_DUMP_COMPLETE: { 2317 + if (!kvm->arch.pv.dumping) 2318 + break; 2319 + 2320 + r = -EINVAL; 2321 + if (dmp.buff_len < uv_info.conf_dump_finalize_len) 2322 + break; 2323 + 2324 + r = kvm_s390_pv_dump_complete(kvm, result_buff, 2325 + &cmd->rc, &cmd->rrc); 2326 + break; 2327 + } 2328 + default: 2329 + r = -ENOTTY; 2330 + break; 2331 + } 2332 + 2333 + return r; 2334 + } 2335 + 2274 2336 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) 2275 2337 { 2276 2338 int r = 0; ··· 2507 2445 break; 2508 2446 2509 2447 r = 0; 2448 + break; 2449 + } 2450 + case KVM_PV_DUMP: { 2451 + struct kvm_s390_pv_dmp dmp; 2452 + 2453 + r = -EINVAL; 2454 + if (!kvm_s390_pv_is_protected(kvm)) 2455 + break; 2456 + 2457 + r = -EFAULT; 2458 + if (copy_from_user(&dmp, argp, sizeof(dmp))) 2459 + break; 2460 + 2461 + r = kvm_s390_pv_dmp(kvm, cmd, dmp); 2462 + if (r) 2463 + break; 2464 + 2465 + if (copy_to_user(argp, &dmp, sizeof(dmp))) { 2466 + r = -EFAULT; 2467 + break; 2468 + } 2469 + 2510 2470 break; 2511 2471 } 2512 2472 default: ··· 4647 4563 { 4648 4564 struct kvm_run *kvm_run = vcpu->run; 4649 4565 int rc; 4566 + 4567 + /* 4568 + * Running a VM while dumping always has the potential to 4569 + * produce inconsistent dump data. But for PV vcpus a SIE 4570 + * entry while dumping could also lead to a fatal validity 4571 + * intercept which we absolutely want to avoid. 4572 + */ 4573 + if (vcpu->kvm->arch.pv.dumping) 4574 + return -EINVAL; 4650 4575 4651 4576 if (kvm_run->immediate_exit) 4652 4577 return -EINTR;
+4
arch/s390/kvm/kvm-s390.h
··· 250 250 int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size, 251 251 unsigned long tweak, u16 *rc, u16 *rrc); 252 252 int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state); 253 + int kvm_s390_pv_dump_stor_state(struct kvm *kvm, void __user *buff_user, 254 + u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc); 255 + int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user, 256 + u16 *rc, u16 *rrc); 253 257 254 258 static inline u64 kvm_s390_pv_get_handle(struct kvm *kvm) 255 259 {
+182
arch/s390/kvm/pv.c
··· 7 7 */ 8 8 #include <linux/kvm.h> 9 9 #include <linux/kvm_host.h> 10 + #include <linux/minmax.h> 10 11 #include <linux/pagemap.h> 11 12 #include <linux/sched/signal.h> 12 13 #include <asm/gmap.h> ··· 298 297 if (cc) 299 298 return -EINVAL; 300 299 return 0; 300 + } 301 + 302 + /* Size of the cache for the storage state dump data. 1MB for now */ 303 + #define DUMP_BUFF_LEN HPAGE_SIZE 304 + 305 + /** 306 + * kvm_s390_pv_dump_stor_state 307 + * 308 + * @kvm: pointer to the guest's KVM struct 309 + * @buff_user: Userspace pointer where we will write the results to 310 + * @gaddr: Starting absolute guest address for which the storage state 311 + * is requested. 312 + * @buff_user_len: Length of the buff_user buffer 313 + * @rc: Pointer to where the uvcb return code is stored 314 + * @rrc: Pointer to where the uvcb return reason code is stored 315 + * 316 + * Stores buff_len bytes of tweak component values to buff_user 317 + * starting with the 1MB block specified by the absolute guest address 318 + * (gaddr). The gaddr pointer will be updated with the last address 319 + * for which data was written when returning to userspace. buff_user 320 + * might be written to even if an error rc is returned. For instance 321 + * if we encounter a fault after writing the first page of data. 322 + * 323 + * Context: kvm->lock needs to be held 324 + * 325 + * Return: 326 + * 0 on success 327 + * -ENOMEM if allocating the cache fails 328 + * -EINVAL if gaddr is not aligned to 1MB 329 + * -EINVAL if buff_user_len is not aligned to uv_info.conf_dump_storage_state_len 330 + * -EINVAL if the UV call fails, rc and rrc will be set in this case 331 + * -EFAULT if copying the result to buff_user failed 332 + */ 333 + int kvm_s390_pv_dump_stor_state(struct kvm *kvm, void __user *buff_user, 334 + u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc) 335 + { 336 + struct uv_cb_dump_stor_state uvcb = { 337 + .header.cmd = UVC_CMD_DUMP_CONF_STOR_STATE, 338 + .header.len = sizeof(uvcb), 339 + .config_handle = kvm->arch.pv.handle, 340 + .gaddr = *gaddr, 341 + .dump_area_origin = 0, 342 + }; 343 + const u64 increment_len = uv_info.conf_dump_storage_state_len; 344 + size_t buff_kvm_size; 345 + size_t size_done = 0; 346 + u8 *buff_kvm = NULL; 347 + int cc, ret; 348 + 349 + ret = -EINVAL; 350 + /* UV call processes 1MB guest storage chunks at a time */ 351 + if (!IS_ALIGNED(*gaddr, HPAGE_SIZE)) 352 + goto out; 353 + 354 + /* 355 + * We provide the storage state for 1MB chunks of guest 356 + * storage. The buffer will need to be aligned to 357 + * conf_dump_storage_state_len so we don't end on a partial 358 + * chunk. 359 + */ 360 + if (!buff_user_len || 361 + !IS_ALIGNED(buff_user_len, increment_len)) 362 + goto out; 363 + 364 + /* 365 + * Allocate a buffer from which we will later copy to the user 366 + * process. We don't want userspace to dictate our buffer size 367 + * so we limit it to DUMP_BUFF_LEN. 368 + */ 369 + ret = -ENOMEM; 370 + buff_kvm_size = min_t(u64, buff_user_len, DUMP_BUFF_LEN); 371 + buff_kvm = vzalloc(buff_kvm_size); 372 + if (!buff_kvm) 373 + goto out; 374 + 375 + ret = 0; 376 + uvcb.dump_area_origin = (u64)buff_kvm; 377 + /* We will loop until the user buffer is filled or an error occurs */ 378 + do { 379 + /* Get 1MB worth of guest storage state data */ 380 + cc = uv_call_sched(0, (u64)&uvcb); 381 + 382 + /* All or nothing */ 383 + if (cc) { 384 + ret = -EINVAL; 385 + break; 386 + } 387 + 388 + size_done += increment_len; 389 + uvcb.dump_area_origin += increment_len; 390 + buff_user_len -= increment_len; 391 + uvcb.gaddr += HPAGE_SIZE; 392 + 393 + /* KVM Buffer full, time to copy to the process */ 394 + if (!buff_user_len || size_done == DUMP_BUFF_LEN) { 395 + if (copy_to_user(buff_user, buff_kvm, size_done)) { 396 + ret = -EFAULT; 397 + break; 398 + } 399 + 400 + buff_user += size_done; 401 + size_done = 0; 402 + uvcb.dump_area_origin = (u64)buff_kvm; 403 + } 404 + } while (buff_user_len); 405 + 406 + /* Report back where we ended dumping */ 407 + *gaddr = uvcb.gaddr; 408 + 409 + /* Lets only log errors, we don't want to spam */ 410 + out: 411 + if (ret) 412 + KVM_UV_EVENT(kvm, 3, 413 + "PROTVIRT DUMP STORAGE STATE: addr %llx ret %d, uvcb rc %x rrc %x", 414 + uvcb.gaddr, ret, uvcb.header.rc, uvcb.header.rrc); 415 + *rc = uvcb.header.rc; 416 + *rrc = uvcb.header.rrc; 417 + vfree(buff_kvm); 418 + 419 + return ret; 420 + } 421 + 422 + /** 423 + * kvm_s390_pv_dump_complete 424 + * 425 + * @kvm: pointer to the guest's KVM struct 426 + * @buff_user: Userspace pointer where we will write the results to 427 + * @rc: Pointer to where the uvcb return code is stored 428 + * @rrc: Pointer to where the uvcb return reason code is stored 429 + * 430 + * Completes the dumping operation and writes the completion data to 431 + * user space. 432 + * 433 + * Context: kvm->lock needs to be held 434 + * 435 + * Return: 436 + * 0 on success 437 + * -ENOMEM if allocating the completion buffer fails 438 + * -EINVAL if the UV call fails, rc and rrc will be set in this case 439 + * -EFAULT if copying the result to buff_user failed 440 + */ 441 + int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user, 442 + u16 *rc, u16 *rrc) 443 + { 444 + struct uv_cb_dump_complete complete = { 445 + .header.len = sizeof(complete), 446 + .header.cmd = UVC_CMD_DUMP_COMPLETE, 447 + .config_handle = kvm_s390_pv_get_handle(kvm), 448 + }; 449 + u64 *compl_data; 450 + int ret; 451 + 452 + /* Allocate dump area */ 453 + compl_data = vzalloc(uv_info.conf_dump_finalize_len); 454 + if (!compl_data) 455 + return -ENOMEM; 456 + complete.dump_area_origin = (u64)compl_data; 457 + 458 + ret = uv_call_sched(0, (u64)&complete); 459 + *rc = complete.header.rc; 460 + *rrc = complete.header.rrc; 461 + KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP COMPLETE: rc %x rrc %x", 462 + complete.header.rc, complete.header.rrc); 463 + 464 + if (!ret) { 465 + /* 466 + * kvm_s390_pv_dealloc_vm() will also (mem)set 467 + * this to false on a reboot or other destroy 468 + * operation for this vm. 469 + */ 470 + kvm->arch.pv.dumping = false; 471 + kvm_s390_vcpu_unblock_all(kvm); 472 + ret = copy_to_user(buff_user, compl_data, uv_info.conf_dump_finalize_len); 473 + if (ret) 474 + ret = -EFAULT; 475 + } 476 + vfree(compl_data); 477 + /* If the UVC returned an error, translate it to -EINVAL */ 478 + if (ret > 0) 479 + ret = -EINVAL; 480 + return ret; 301 481 }
+15
include/uapi/linux/kvm.h
··· 1660 1660 __u64 tweak; 1661 1661 }; 1662 1662 1663 + enum pv_cmd_dmp_id { 1664 + KVM_PV_DUMP_INIT, 1665 + KVM_PV_DUMP_CONFIG_STOR_STATE, 1666 + KVM_PV_DUMP_COMPLETE, 1667 + }; 1668 + 1669 + struct kvm_s390_pv_dmp { 1670 + __u64 subcmd; 1671 + __u64 buff_addr; 1672 + __u64 buff_len; 1673 + __u64 gaddr; /* For dump storage state */ 1674 + __u64 reserved[4]; 1675 + }; 1676 + 1663 1677 enum pv_cmd_info_id { 1664 1678 KVM_PV_INFO_VM, 1665 1679 KVM_PV_INFO_DUMP, ··· 1717 1703 KVM_PV_PREP_RESET, 1718 1704 KVM_PV_UNSHARE_ALL, 1719 1705 KVM_PV_INFO, 1706 + KVM_PV_DUMP, 1720 1707 }; 1721 1708 1722 1709 struct kvm_pv_cmd {