x86/sgx: Free backing memory after faulting the enclave page

There is a limited amount of SGX memory (EPC) on each system. When that
memory is used up, SGX has its own swapping mechanism which is similar
in concept but totally separate from the core mm/* code. Instead of
swapping to disk, SGX swaps from EPC to normal RAM. That normal RAM
comes from a shared memory pseudo-file and can itself be swapped by the
core mm code. There is a hierarchy like this:

EPC <-> shmem <-> disk

After data is swapped back in from shmem to EPC, the shmem backing
storage needs to be freed. Currently, the backing shmem is not freed.
This effectively wastes the shmem while the enclave is running. The
memory is recovered when the enclave is destroyed and the backing
storage freed.

Sort this out by freeing memory with shmem_truncate_range(), as soon as
a page is faulted back to the EPC. In addition, free the memory for
PCMD pages as soon as all PCMD's in a page have been marked as unused
by zeroing its contents.

Cc: stable@vger.kernel.org
Fixes: 1728ab54b4be ("x86/sgx: Add a page reclaimer")
Reported-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Link: https://lkml.kernel.org/r/20220303223859.273187-1-jarkko@kernel.org

authored by Jarkko Sakkinen and committed by Dave Hansen 08999b24 a365a65f

Changed files
+48 -9
arch
x86
kernel
cpu
sgx
+48 -9
arch/x86/kernel/cpu/sgx/encl.c
··· 13 13 #include "sgx.h" 14 14 15 15 /* 16 + * Calculate byte offset of a PCMD struct associated with an enclave page. PCMD's 17 + * follow right after the EPC data in the backing storage. In addition to the 18 + * visible enclave pages, there's one extra page slot for SECS, before PCMD 19 + * structs. 20 + */ 21 + static inline pgoff_t sgx_encl_get_backing_page_pcmd_offset(struct sgx_encl *encl, 22 + unsigned long page_index) 23 + { 24 + pgoff_t epc_end_off = encl->size + sizeof(struct sgx_secs); 25 + 26 + return epc_end_off + page_index * sizeof(struct sgx_pcmd); 27 + } 28 + 29 + /* 30 + * Free a page from the backing storage in the given page index. 31 + */ 32 + static inline void sgx_encl_truncate_backing_page(struct sgx_encl *encl, unsigned long page_index) 33 + { 34 + struct inode *inode = file_inode(encl->backing); 35 + 36 + shmem_truncate_range(inode, PFN_PHYS(page_index), PFN_PHYS(page_index) + PAGE_SIZE - 1); 37 + } 38 + 39 + /* 16 40 * ELDU: Load an EPC page as unblocked. For more info, see "OS Management of EPC 17 41 * Pages" in the SDM. 18 42 */ ··· 46 22 { 47 23 unsigned long va_offset = encl_page->desc & SGX_ENCL_PAGE_VA_OFFSET_MASK; 48 24 struct sgx_encl *encl = encl_page->encl; 25 + pgoff_t page_index, page_pcmd_off; 49 26 struct sgx_pageinfo pginfo; 50 27 struct sgx_backing b; 51 - pgoff_t page_index; 28 + bool pcmd_page_empty; 29 + u8 *pcmd_page; 52 30 int ret; 53 31 54 32 if (secs_page) ··· 58 32 else 59 33 page_index = PFN_DOWN(encl->size); 60 34 35 + page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index); 36 + 61 37 ret = sgx_encl_get_backing(encl, page_index, &b); 62 38 if (ret) 63 39 return ret; 64 40 65 41 pginfo.addr = encl_page->desc & PAGE_MASK; 66 42 pginfo.contents = (unsigned long)kmap_atomic(b.contents); 67 - pginfo.metadata = (unsigned long)kmap_atomic(b.pcmd) + 68 - b.pcmd_offset; 43 + pcmd_page = kmap_atomic(b.pcmd); 44 + pginfo.metadata = (unsigned long)pcmd_page + b.pcmd_offset; 69 45 70 46 if (secs_page) 71 47 pginfo.secs = (u64)sgx_get_epc_virt_addr(secs_page); ··· 83 55 ret = -EFAULT; 84 56 } 85 57 86 - kunmap_atomic((void *)(unsigned long)(pginfo.metadata - b.pcmd_offset)); 58 + memset(pcmd_page + b.pcmd_offset, 0, sizeof(struct sgx_pcmd)); 59 + 60 + /* 61 + * The area for the PCMD in the page was zeroed above. Check if the 62 + * whole page is now empty meaning that all PCMD's have been zeroed: 63 + */ 64 + pcmd_page_empty = !memchr_inv(pcmd_page, 0, PAGE_SIZE); 65 + 66 + kunmap_atomic(pcmd_page); 87 67 kunmap_atomic((void *)(unsigned long)pginfo.contents); 88 68 89 69 sgx_encl_put_backing(&b, false); 70 + 71 + sgx_encl_truncate_backing_page(encl, page_index); 72 + 73 + if (pcmd_page_empty) 74 + sgx_encl_truncate_backing_page(encl, PFN_DOWN(page_pcmd_off)); 90 75 91 76 return ret; 92 77 } ··· 620 579 int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index, 621 580 struct sgx_backing *backing) 622 581 { 623 - pgoff_t pcmd_index = PFN_DOWN(encl->size) + 1 + (page_index >> 5); 582 + pgoff_t page_pcmd_off = sgx_encl_get_backing_page_pcmd_offset(encl, page_index); 624 583 struct page *contents; 625 584 struct page *pcmd; 626 585 ··· 628 587 if (IS_ERR(contents)) 629 588 return PTR_ERR(contents); 630 589 631 - pcmd = sgx_encl_get_backing_page(encl, pcmd_index); 590 + pcmd = sgx_encl_get_backing_page(encl, PFN_DOWN(page_pcmd_off)); 632 591 if (IS_ERR(pcmd)) { 633 592 put_page(contents); 634 593 return PTR_ERR(pcmd); ··· 637 596 backing->page_index = page_index; 638 597 backing->contents = contents; 639 598 backing->pcmd = pcmd; 640 - backing->pcmd_offset = 641 - (page_index & (PAGE_SIZE / sizeof(struct sgx_pcmd) - 1)) * 642 - sizeof(struct sgx_pcmd); 599 + backing->pcmd_offset = page_pcmd_off & (PAGE_SIZE - 1); 643 600 644 601 return 0; 645 602 }