Merge tag 'x86_urgent_for_v5.13_rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes from Borislav Petkov:
"A first set of urgent fixes to the FPU/XSTATE handling mess^W code.
(There's a lot more in the pipe):

- Prevent corruption of the XSTATE buffer in signal handling by
validating what is being copied from userspace first.

- Invalidate other task's preserved FPU registers on XRSTOR failure
(#PF) because latter can still modify some of them.

- Restore the proper PKRU value in case userspace modified it

- Reset FPU state when signal restoring fails

Other:

- Map EFI boot services data memory as encrypted in a SEV guest so
that the guest can access it and actually boot properly

- Two SGX correctness fixes: proper resources freeing and a NUMA fix"

* tag 'x86_urgent_for_v5.13_rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/mm: Avoid truncating memblocks for SGX memory
x86/sgx: Add missing xa_destroy() when virtual EPC is destroyed
x86/fpu: Reset state for all signal restore failures
x86/pkru: Write hardware init value to PKRU when xstate is init
x86/process: Check PF_KTHREAD and not current->mm for kernel threads
x86/fpu: Invalidate FPU state after a failed XRSTOR from a user buffer
x86/fpu: Prevent state corruption in __fpu__restore_sig()
x86/ioremap: Map EFI-reserved memory as encrypted for SEV

Changed files
+56 -24
arch
x86
include
asm
kernel
cpu
sgx
fpu
mm
+10 -3
arch/x86/include/asm/fpu/internal.h
··· 578 578 * PKRU state is switched eagerly because it needs to be valid before we 579 579 * return to userland e.g. for a copy_to_user() operation. 580 580 */ 581 - if (current->mm) { 581 + if (!(current->flags & PF_KTHREAD)) { 582 + /* 583 + * If the PKRU bit in xsave.header.xfeatures is not set, 584 + * then the PKRU component was in init state, which means 585 + * XRSTOR will set PKRU to 0. If the bit is not set then 586 + * get_xsave_addr() will return NULL because the PKRU value 587 + * in memory is not valid. This means pkru_val has to be 588 + * set to 0 and not to init_pkru_value. 589 + */ 582 590 pk = get_xsave_addr(&new_fpu->state.xsave, XFEATURE_PKRU); 583 - if (pk) 584 - pkru_val = pk->pkru; 591 + pkru_val = pk ? pk->pkru : 0; 585 592 } 586 593 __write_pkru(pkru_val); 587 594 }
+1
arch/x86/kernel/cpu/sgx/virt.c
··· 212 212 list_splice_tail(&secs_pages, &zombie_secs_pages); 213 213 mutex_unlock(&zombie_secs_pages_lock); 214 214 215 + xa_destroy(&vepc->page_array); 215 216 kfree(vepc); 216 217 217 218 return 0;
+35 -19
arch/x86/kernel/fpu/signal.c
··· 307 307 return 0; 308 308 } 309 309 310 - if (!access_ok(buf, size)) 311 - return -EACCES; 310 + if (!access_ok(buf, size)) { 311 + ret = -EACCES; 312 + goto out; 313 + } 312 314 313 - if (!static_cpu_has(X86_FEATURE_FPU)) 314 - return fpregs_soft_set(current, NULL, 315 - 0, sizeof(struct user_i387_ia32_struct), 316 - NULL, buf) != 0; 315 + if (!static_cpu_has(X86_FEATURE_FPU)) { 316 + ret = fpregs_soft_set(current, NULL, 0, 317 + sizeof(struct user_i387_ia32_struct), 318 + NULL, buf); 319 + goto out; 320 + } 317 321 318 322 if (use_xsave()) { 319 323 struct _fpx_sw_bytes fx_sw_user; ··· 373 369 fpregs_unlock(); 374 370 return 0; 375 371 } 372 + 373 + /* 374 + * The above did an FPU restore operation, restricted to 375 + * the user portion of the registers, and failed, but the 376 + * microcode might have modified the FPU registers 377 + * nevertheless. 378 + * 379 + * If the FPU registers do not belong to current, then 380 + * invalidate the FPU register state otherwise the task might 381 + * preempt current and return to user space with corrupted 382 + * FPU registers. 383 + * 384 + * In case current owns the FPU registers then no further 385 + * action is required. The fixup below will handle it 386 + * correctly. 387 + */ 388 + if (test_thread_flag(TIF_NEED_FPU_LOAD)) 389 + __cpu_invalidate_fpregs_state(); 390 + 376 391 fpregs_unlock(); 377 392 } else { 378 393 /* ··· 400 377 */ 401 378 ret = __copy_from_user(&env, buf, sizeof(env)); 402 379 if (ret) 403 - goto err_out; 380 + goto out; 404 381 envp = &env; 405 382 } 406 383 ··· 428 405 if (use_xsave() && !fx_only) { 429 406 u64 init_bv = xfeatures_mask_user() & ~user_xfeatures; 430 407 431 - if (using_compacted_format()) { 432 - ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx); 433 - } else { 434 - ret = __copy_from_user(&fpu->state.xsave, buf_fx, state_size); 435 - 436 - if (!ret && state_size > offsetof(struct xregs_state, header)) 437 - ret = validate_user_xstate_header(&fpu->state.xsave.header); 438 - } 408 + ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx); 439 409 if (ret) 440 - goto err_out; 410 + goto out; 441 411 442 412 sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures, 443 413 fx_only); ··· 450 434 ret = __copy_from_user(&fpu->state.fxsave, buf_fx, state_size); 451 435 if (ret) { 452 436 ret = -EFAULT; 453 - goto err_out; 437 + goto out; 454 438 } 455 439 456 440 sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures, ··· 468 452 } else { 469 453 ret = __copy_from_user(&fpu->state.fsave, buf_fx, state_size); 470 454 if (ret) 471 - goto err_out; 455 + goto out; 472 456 473 457 fpregs_lock(); 474 458 ret = copy_kernel_to_fregs_err(&fpu->state.fsave); ··· 479 463 fpregs_deactivate(fpu); 480 464 fpregs_unlock(); 481 465 482 - err_out: 466 + out: 483 467 if (ret) 484 468 fpu__clear_user_states(fpu); 485 469 return ret;
+3 -1
arch/x86/mm/ioremap.c
··· 118 118 if (!IS_ENABLED(CONFIG_EFI)) 119 119 return; 120 120 121 - if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA) 121 + if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA || 122 + (efi_mem_type(addr) == EFI_BOOT_SERVICES_DATA && 123 + efi_mem_attributes(addr) & EFI_MEMORY_RUNTIME)) 122 124 desc->flags |= IORES_MAP_ENCRYPTED; 123 125 } 124 126
+7 -1
arch/x86/mm/numa.c
··· 254 254 255 255 /* make sure all non-reserved blocks are inside the limits */ 256 256 bi->start = max(bi->start, low); 257 - bi->end = min(bi->end, high); 257 + 258 + /* preserve info for non-RAM areas above 'max_pfn': */ 259 + if (bi->end > high) { 260 + numa_add_memblk_to(bi->nid, high, bi->end, 261 + &numa_reserved_meminfo); 262 + bi->end = high; 263 + } 258 264 259 265 /* and there's no empty block */ 260 266 if (bi->start >= bi->end)