Merge tag 'x86_sgx_for_v5.16_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 SGX updates from Borislav Petkov:
"Add a SGX_IOC_VEPC_REMOVE ioctl to the /dev/sgx_vepc virt interface
with which EPC pages can be put back into their uninitialized state
without having to reopen /dev/sgx_vepc, which could not be possible
anymore after startup due to security policies"

* tag 'x86_sgx_for_v5.16_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/sgx/virt: implement SGX_IOC_VEPC_REMOVE ioctl
x86/sgx/virt: extract sgx_vepc_remove_page

+97 -5
+35
Documentation/x86/sgx.rst
··· 250 on the same machine, the user should reserve enough EPC (by taking out 251 total virtual EPC size of all SGX VMs from the physical EPC size) for 252 host SGX applications so they can run with acceptable performance.
··· 250 on the same machine, the user should reserve enough EPC (by taking out 251 total virtual EPC size of all SGX VMs from the physical EPC size) for 252 host SGX applications so they can run with acceptable performance. 253 + 254 + Architectural behavior is to restore all EPC pages to an uninitialized 255 + state also after a guest reboot. Because this state can be reached only 256 + through the privileged ``ENCLS[EREMOVE]`` instruction, ``/dev/sgx_vepc`` 257 + provides the ``SGX_IOC_VEPC_REMOVE_ALL`` ioctl to execute the instruction 258 + on all pages in the virtual EPC. 259 + 260 + ``EREMOVE`` can fail for three reasons. Userspace must pay attention 261 + to expected failures and handle them as follows: 262 + 263 + 1. Page removal will always fail when any thread is running in the 264 + enclave to which the page belongs. In this case the ioctl will 265 + return ``EBUSY`` independent of whether it has successfully removed 266 + some pages; userspace can avoid these failures by preventing execution 267 + of any vcpu which maps the virtual EPC. 268 + 269 + 2. Page removal will cause a general protection fault if two calls to 270 + ``EREMOVE`` happen concurrently for pages that refer to the same 271 + "SECS" metadata pages. This can happen if there are concurrent 272 + invocations to ``SGX_IOC_VEPC_REMOVE_ALL``, or if a ``/dev/sgx_vepc`` 273 + file descriptor in the guest is closed at the same time as 274 + ``SGX_IOC_VEPC_REMOVE_ALL``; it will also be reported as ``EBUSY``. 275 + This can be avoided in userspace by serializing calls to the ioctl() 276 + and to close(), but in general it should not be a problem. 277 + 278 + 3. Finally, page removal will fail for SECS metadata pages which still 279 + have child pages. Child pages can be removed by executing 280 + ``SGX_IOC_VEPC_REMOVE_ALL`` on all ``/dev/sgx_vepc`` file descriptors 281 + mapped into the guest. This means that the ioctl() must be called 282 + twice: an initial set of calls to remove child pages and a subsequent 283 + set of calls to remove SECS pages. The second set of calls is only 284 + required for those mappings that returned a nonzero value from the 285 + first call. It indicates a bug in the kernel or the userspace client 286 + if any of the second round of ``SGX_IOC_VEPC_REMOVE_ALL`` calls has 287 + a return code other than 0.
+2
arch/x86/include/uapi/asm/sgx.h
··· 27 _IOW(SGX_MAGIC, 0x02, struct sgx_enclave_init) 28 #define SGX_IOC_ENCLAVE_PROVISION \ 29 _IOW(SGX_MAGIC, 0x03, struct sgx_enclave_provision) 30 31 /** 32 * struct sgx_enclave_create - parameter structure for the
··· 27 _IOW(SGX_MAGIC, 0x02, struct sgx_enclave_init) 28 #define SGX_IOC_ENCLAVE_PROVISION \ 29 _IOW(SGX_MAGIC, 0x03, struct sgx_enclave_provision) 30 + #define SGX_IOC_VEPC_REMOVE_ALL \ 31 + _IO(SGX_MAGIC, 0x04) 32 33 /** 34 * struct sgx_enclave_create - parameter structure for the
+60 -5
arch/x86/kernel/cpu/sgx/virt.c
··· 111 return 0; 112 } 113 114 - static int sgx_vepc_free_page(struct sgx_epc_page *epc_page) 115 { 116 - int ret; 117 - 118 /* 119 * Take a previously guest-owned EPC page and return it to the 120 * general EPC page pool. ··· 122 * case that a guest properly EREMOVE'd this page, a superfluous 123 * EREMOVE is harmless. 124 */ 125 - ret = __eremove(sgx_get_epc_virt_addr(epc_page)); 126 if (ret) { 127 /* 128 * Only SGX_CHILD_PRESENT is expected, which is because of ··· 147 } 148 149 sgx_free_epc_page(epc_page); 150 - 151 return 0; 152 } 153 154 static int sgx_vepc_release(struct inode *inode, struct file *file) ··· 270 return 0; 271 } 272 273 static const struct file_operations sgx_vepc_fops = { 274 .owner = THIS_MODULE, 275 .open = sgx_vepc_open, 276 .release = sgx_vepc_release, 277 .mmap = sgx_vepc_mmap, 278 };
··· 111 return 0; 112 } 113 114 + static int sgx_vepc_remove_page(struct sgx_epc_page *epc_page) 115 { 116 /* 117 * Take a previously guest-owned EPC page and return it to the 118 * general EPC page pool. ··· 124 * case that a guest properly EREMOVE'd this page, a superfluous 125 * EREMOVE is harmless. 126 */ 127 + return __eremove(sgx_get_epc_virt_addr(epc_page)); 128 + } 129 + 130 + static int sgx_vepc_free_page(struct sgx_epc_page *epc_page) 131 + { 132 + int ret = sgx_vepc_remove_page(epc_page); 133 if (ret) { 134 /* 135 * Only SGX_CHILD_PRESENT is expected, which is because of ··· 144 } 145 146 sgx_free_epc_page(epc_page); 147 return 0; 148 + } 149 + 150 + static long sgx_vepc_remove_all(struct sgx_vepc *vepc) 151 + { 152 + struct sgx_epc_page *entry; 153 + unsigned long index; 154 + long failures = 0; 155 + 156 + xa_for_each(&vepc->page_array, index, entry) { 157 + int ret = sgx_vepc_remove_page(entry); 158 + if (ret) { 159 + if (ret == SGX_CHILD_PRESENT) { 160 + /* The page is a SECS, userspace will retry. */ 161 + failures++; 162 + } else { 163 + /* 164 + * Report errors due to #GP or SGX_ENCLAVE_ACT; do not 165 + * WARN, as userspace can induce said failures by 166 + * calling the ioctl concurrently on multiple vEPCs or 167 + * while one or more CPUs is running the enclave. Only 168 + * a #PF on EREMOVE indicates a kernel/hardware issue. 169 + */ 170 + WARN_ON_ONCE(encls_faulted(ret) && 171 + ENCLS_TRAPNR(ret) != X86_TRAP_GP); 172 + return -EBUSY; 173 + } 174 + } 175 + cond_resched(); 176 + } 177 + 178 + /* 179 + * Return the number of SECS pages that failed to be removed, so 180 + * userspace knows that it has to retry. 181 + */ 182 + return failures; 183 } 184 185 static int sgx_vepc_release(struct inode *inode, struct file *file) ··· 233 return 0; 234 } 235 236 + static long sgx_vepc_ioctl(struct file *file, 237 + unsigned int cmd, unsigned long arg) 238 + { 239 + struct sgx_vepc *vepc = file->private_data; 240 + 241 + switch (cmd) { 242 + case SGX_IOC_VEPC_REMOVE_ALL: 243 + if (arg) 244 + return -EINVAL; 245 + return sgx_vepc_remove_all(vepc); 246 + 247 + default: 248 + return -ENOTTY; 249 + } 250 + } 251 + 252 static const struct file_operations sgx_vepc_fops = { 253 .owner = THIS_MODULE, 254 .open = sgx_vepc_open, 255 + .unlocked_ioctl = sgx_vepc_ioctl, 256 + .compat_ioctl = sgx_vepc_ioctl, 257 .release = sgx_vepc_release, 258 .mmap = sgx_vepc_mmap, 259 };