Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'x86_sgx_for_v5.19_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 SGX updates from Dave Hansen:
"A set of patches to prevent crashes in SGX enclaves under heavy memory
pressure:

SGX uses normal RAM allocated from special shmem files as backing
storage when it runs out of SGX memory (EPC). The code was overly
aggressive when freeing shmem pages and was inadvertently freeing
perfectly good data. This resulted in failures in the SGX instructions
used to swap data back into SGX memory.

This turned out to be really hard to trigger in mainline. It was
originally encountered testing the out-of-tree "SGX2" patches, but
later reproduced on mainline.

Fix the data loss by being more careful about truncating pages out of
the backing storage and more judiciously setting pages dirty"

* tag 'x86_sgx_for_v5.19_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/sgx: Ensure no data in PCMD page after truncate
x86/sgx: Fix race between reclaimer and page fault handler
x86/sgx: Obtain backing storage page with enclave mutex held
x86/sgx: Mark PCMD page as dirty when modifying contents
x86/sgx: Disconnect backing page references from dirty status

+115 -15
+104 -9
arch/x86/kernel/cpu/sgx/encl.c
··· 12 12 #include "encls.h" 13 13 #include "sgx.h" 14 14 15 + #define PCMDS_PER_PAGE (PAGE_SIZE / sizeof(struct sgx_pcmd)) 16 + /* 17 + * 32 PCMD entries share a PCMD page. PCMD_FIRST_MASK is used to 18 + * determine the page index associated with the first PCMD entry 19 + * within a PCMD page. 20 + */ 21 + #define PCMD_FIRST_MASK GENMASK(4, 0) 22 + 23 + /** 24 + * reclaimer_writing_to_pcmd() - Query if any enclave page associated with 25 + * a PCMD page is in process of being reclaimed. 26 + * @encl: Enclave to which PCMD page belongs 27 + * @start_addr: Address of enclave page using first entry within the PCMD page 28 + * 29 + * When an enclave page is reclaimed some Paging Crypto MetaData (PCMD) is 30 + * stored. The PCMD data of a reclaimed enclave page contains enough 31 + * information for the processor to verify the page at the time 32 + * it is loaded back into the Enclave Page Cache (EPC). 33 + * 34 + * The backing storage to which enclave pages are reclaimed is laid out as 35 + * follows: 36 + * Encrypted enclave pages:SECS page:PCMD pages 37 + * 38 + * Each PCMD page contains the PCMD metadata of 39 + * PAGE_SIZE/sizeof(struct sgx_pcmd) enclave pages. 40 + * 41 + * A PCMD page can only be truncated if it is (a) empty, and (b) not in the 42 + * process of getting data (and thus soon being non-empty). (b) is tested with 43 + * a check if an enclave page sharing the PCMD page is in the process of being 44 + * reclaimed. 45 + * 46 + * The reclaimer sets the SGX_ENCL_PAGE_BEING_RECLAIMED flag when it 47 + * intends to reclaim that enclave page - it means that the PCMD page 48 + * associated with that enclave page is about to get some data and thus 49 + * even if the PCMD page is empty, it should not be truncated. 50 + * 51 + * Context: Enclave mutex (&sgx_encl->lock) must be held. 52 + * Return: 1 if the reclaimer is about to write to the PCMD page 53 + * 0 if the reclaimer has no intention to write to the PCMD page 54 + */ 55 + static int reclaimer_writing_to_pcmd(struct sgx_encl *encl, 56 + unsigned long start_addr) 57 + { 58 + int reclaimed = 0; 59 + int i; 60 + 61 + /* 62 + * PCMD_FIRST_MASK is based on number of PCMD entries within 63 + * PCMD page being 32. 64 + */ 65 + BUILD_BUG_ON(PCMDS_PER_PAGE != 32); 66 + 67 + for (i = 0; i < PCMDS_PER_PAGE; i++) { 68 + struct sgx_encl_page *entry; 69 + unsigned long addr; 70 + 71 + addr = start_addr + i * PAGE_SIZE; 72 + 73 + /* 74 + * Stop when reaching the SECS page - it does not 75 + * have a page_array entry and its reclaim is 76 + * started and completed with enclave mutex held so 77 + * it does not use the SGX_ENCL_PAGE_BEING_RECLAIMED 78 + * flag. 79 + */ 80 + if (addr == encl->base + encl->size) 81 + break; 82 + 83 + entry = xa_load(&encl->page_array, PFN_DOWN(addr)); 84 + if (!entry) 85 + continue; 86 + 87 + /* 88 + * VA page slot ID uses same bit as the flag so it is important 89 + * to ensure that the page is not already in backing store. 90 + */ 91 + if (entry->epc_page && 92 + (entry->desc & SGX_ENCL_PAGE_BEING_RECLAIMED)) { 93 + reclaimed = 1; 94 + break; 95 + } 96 + } 97 + 98 + return reclaimed; 99 + } 100 + 15 101 /* 16 102 * Calculate byte offset of a PCMD struct associated with an enclave page. PCMD's 17 103 * follow right after the EPC data in the backing storage. In addition to the ··· 133 47 unsigned long va_offset = encl_page->desc & SGX_ENCL_PAGE_VA_OFFSET_MASK; 134 48 struct sgx_encl *encl = encl_page->encl; 135 49 pgoff_t page_index, page_pcmd_off; 50 + unsigned long pcmd_first_page; 136 51 struct sgx_pageinfo pginfo; 137 52 struct sgx_backing b; 138 53 bool pcmd_page_empty; ··· 144 57 page_index = PFN_DOWN(encl_page->desc - encl_page->encl->base); 145 58 else 146 59 page_index = PFN_DOWN(encl->size); 60 + 61 + /* 62 + * Address of enclave page using the first entry within the PCMD page. 63 + */ 64 + pcmd_first_page = PFN_PHYS(page_index & ~PCMD_FIRST_MASK) + encl->base; 147 65 148 66 page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index); 149 67 ··· 176 84 } 177 85 178 86 memset(pcmd_page + b.pcmd_offset, 0, sizeof(struct sgx_pcmd)); 87 + set_page_dirty(b.pcmd); 179 88 180 89 /* 181 90 * The area for the PCMD in the page was zeroed above. Check if the ··· 187 94 kunmap_atomic(pcmd_page); 188 95 kunmap_atomic((void *)(unsigned long)pginfo.contents); 189 96 190 - sgx_encl_put_backing(&b, false); 97 + get_page(b.pcmd); 98 + sgx_encl_put_backing(&b); 191 99 192 100 sgx_encl_truncate_backing_page(encl, page_index); 193 101 194 - if (pcmd_page_empty) 102 + if (pcmd_page_empty && !reclaimer_writing_to_pcmd(encl, pcmd_first_page)) { 195 103 sgx_encl_truncate_backing_page(encl, PFN_DOWN(page_pcmd_off)); 104 + pcmd_page = kmap_atomic(b.pcmd); 105 + if (memchr_inv(pcmd_page, 0, PAGE_SIZE)) 106 + pr_warn("PCMD page not empty after truncate.\n"); 107 + kunmap_atomic(pcmd_page); 108 + } 109 + 110 + put_page(b.pcmd); 196 111 197 112 return ret; 198 113 } ··· 746 645 /** 747 646 * sgx_encl_put_backing() - Unpin the backing storage 748 647 * @backing: data for accessing backing storage for the page 749 - * @do_write: mark pages dirty 750 648 */ 751 - void sgx_encl_put_backing(struct sgx_backing *backing, bool do_write) 649 + void sgx_encl_put_backing(struct sgx_backing *backing) 752 650 { 753 - if (do_write) { 754 - set_page_dirty(backing->pcmd); 755 - set_page_dirty(backing->contents); 756 - } 757 - 758 651 put_page(backing->pcmd); 759 652 put_page(backing->contents); 760 653 }
+1 -1
arch/x86/kernel/cpu/sgx/encl.h
··· 107 107 int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm); 108 108 int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index, 109 109 struct sgx_backing *backing); 110 - void sgx_encl_put_backing(struct sgx_backing *backing, bool do_write); 110 + void sgx_encl_put_backing(struct sgx_backing *backing); 111 111 int sgx_encl_test_and_clear_young(struct mm_struct *mm, 112 112 struct sgx_encl_page *page); 113 113
+10 -5
arch/x86/kernel/cpu/sgx/main.c
··· 191 191 backing->pcmd_offset; 192 192 193 193 ret = __ewb(&pginfo, sgx_get_epc_virt_addr(epc_page), va_slot); 194 + set_page_dirty(backing->pcmd); 195 + set_page_dirty(backing->contents); 194 196 195 197 kunmap_atomic((void *)(unsigned long)(pginfo.metadata - 196 198 backing->pcmd_offset)); ··· 310 308 sgx_encl_ewb(epc_page, backing); 311 309 encl_page->epc_page = NULL; 312 310 encl->secs_child_cnt--; 311 + sgx_encl_put_backing(backing); 313 312 314 313 if (!encl->secs_child_cnt && test_bit(SGX_ENCL_INITIALIZED, &encl->flags)) { 315 314 ret = sgx_encl_get_backing(encl, PFN_DOWN(encl->size), ··· 323 320 sgx_encl_free_epc_page(encl->secs.epc_page); 324 321 encl->secs.epc_page = NULL; 325 322 326 - sgx_encl_put_backing(&secs_backing, true); 323 + sgx_encl_put_backing(&secs_backing); 327 324 } 328 325 329 326 out: ··· 382 379 goto skip; 383 380 384 381 page_index = PFN_DOWN(encl_page->desc - encl_page->encl->base); 385 - ret = sgx_encl_get_backing(encl_page->encl, page_index, &backing[i]); 386 - if (ret) 387 - goto skip; 388 382 389 383 mutex_lock(&encl_page->encl->lock); 384 + ret = sgx_encl_get_backing(encl_page->encl, page_index, &backing[i]); 385 + if (ret) { 386 + mutex_unlock(&encl_page->encl->lock); 387 + goto skip; 388 + } 389 + 390 390 encl_page->desc |= SGX_ENCL_PAGE_BEING_RECLAIMED; 391 391 mutex_unlock(&encl_page->encl->lock); 392 392 continue; ··· 417 411 418 412 encl_page = epc_page->owner; 419 413 sgx_reclaimer_write(epc_page, &backing[i]); 420 - sgx_encl_put_backing(&backing[i], true); 421 414 422 415 kref_put(&encl_page->encl->refcount, sgx_encl_release); 423 416 epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;