···33#include <asm/facility.h>44#include <asm/sections.h>5566+/* will be used in arch/s390/kernel/uv.c */77+#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST68int __bootdata_preserved(prot_virt_guest);99+#endif1010+#if IS_ENABLED(CONFIG_KVM)1111+struct uv_info __bootdata_preserved(uv_info);1212+#endif713814void uv_query_info(void)915{···2519 if (uv_call(0, (uint64_t)&uvcb) && uvcb.header.rc != 0x100)2620 return;27212222+ if (IS_ENABLED(CONFIG_KVM)) {2323+ memcpy(uv_info.inst_calls_list, uvcb.inst_calls_list, sizeof(uv_info.inst_calls_list));2424+ uv_info.uv_base_stor_len = uvcb.uv_base_stor_len;2525+ uv_info.guest_base_stor_len = uvcb.conf_base_phys_stor_len;2626+ uv_info.guest_virt_base_stor_len = uvcb.conf_base_virt_stor_len;2727+ uv_info.guest_virt_var_stor_len = uvcb.conf_virt_var_stor_len;2828+ uv_info.guest_cpu_stor_len = uvcb.cpu_stor_len;2929+ uv_info.max_sec_stor_addr = ALIGN(uvcb.max_guest_stor_addr, PAGE_SIZE);3030+ uv_info.max_num_sec_conf = uvcb.max_num_sec_conf;3131+ uv_info.max_guest_cpus = uvcb.max_guest_cpus;3232+ }3333+3434+#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST2835 if (test_bit_inv(BIT_UVC_CMD_SET_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list) &&2936 test_bit_inv(BIT_UVC_CMD_REMOVE_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list))3037 prot_virt_guest = 1;3838+#endif3139}
+4
arch/s390/include/asm/gmap.h
···99#ifndef _ASM_S390_GMAP_H1010#define _ASM_S390_GMAP_H11111212+#include <linux/radix-tree.h>1213#include <linux/refcount.h>13141415/* Generic bits for GMAP notification on DAT table entry changes. */···3231 * @table: pointer to the page directory3332 * @asce: address space control element for gmap page table3433 * @pfault_enabled: defines if pfaults are applicable for the guest3434+ * @guest_handle: protected virtual machine handle for the ultravisor3535 * @host_to_rmap: radix tree with gmap_rmap lists3636 * @children: list of shadow gmap structures3737 * @pt_list: list of all page tables used in the shadow guest address space···5654 unsigned long asce_end;5755 void *private;5856 bool pfault_enabled;5757+ /* only set for protected virtual machines */5858+ unsigned long guest_handle;5959 /* Additional data for shadow guest address spaces */6060 struct radix_tree_root host_to_rmap;6161 struct list_head children;
+2
arch/s390/include/asm/mmu.h
···1616 unsigned long asce;1717 unsigned long asce_limit;1818 unsigned long vdso_base;1919+ /* The mmu context belongs to a secure guest. */2020+ atomic_t is_protected;1921 /*2022 * The following bitfields need a down_write on the mm2123 * semaphore when they are written to. As they are only
···7878PGM_CHECK(do_dat_exception) /* 3a */7979PGM_CHECK(do_dat_exception) /* 3b */8080PGM_CHECK_DEFAULT /* 3c */8181-PGM_CHECK_DEFAULT /* 3d */8282-PGM_CHECK_DEFAULT /* 3e */8181+PGM_CHECK(do_secure_storage_access) /* 3d */8282+PGM_CHECK(do_non_secure_storage_access) /* 3e */8383PGM_CHECK_DEFAULT /* 3f */8484PGM_CHECK(monitor_event_exception) /* 40 */8585PGM_CHECK_DEFAULT /* 41 */
+5-4
arch/s390/kernel/setup.c
···92929393unsigned long int_hwcap = 0;94949595-#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST9696-int __bootdata_preserved(prot_virt_guest);9797-#endif9898-9995int __bootdata(noexec_disabled);10096int __bootdata(memory_end_set);10197unsigned long __bootdata(memory_end);···559563 else560564 vmax = _REGION1_SIZE; /* 4-level kernel page table */561565 }566566+567567+ if (is_prot_virt_host())568568+ adjust_to_uv_max(&vmax);562569563570 /* module area is at the end of the kernel address space. */564571 MODULES_END = vmax;···11371138 */11381139 memblock_trim_memory(1UL << (MAX_ORDER - 1 + PAGE_SHIFT));1139114011411141+ if (is_prot_virt_host())11421142+ setup_uv();11401143 setup_memory_end();11411144 setup_memory();11421145 dma_contiguous_reserve(memory_end);
+414
arch/s390/kernel/uv.c
···11+// SPDX-License-Identifier: GPL-2.022+/*33+ * Common Ultravisor functions and initialization44+ *55+ * Copyright IBM Corp. 2019, 202066+ */77+#define KMSG_COMPONENT "prot_virt"88+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt99+1010+#include <linux/kernel.h>1111+#include <linux/types.h>1212+#include <linux/sizes.h>1313+#include <linux/bitmap.h>1414+#include <linux/memblock.h>1515+#include <linux/pagemap.h>1616+#include <linux/swap.h>1717+#include <asm/facility.h>1818+#include <asm/sections.h>1919+#include <asm/uv.h>2020+2121+/* the bootdata_preserved fields come from ones in arch/s390/boot/uv.c */2222+#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST2323+int __bootdata_preserved(prot_virt_guest);2424+#endif2525+2626+#if IS_ENABLED(CONFIG_KVM)2727+int prot_virt_host;2828+EXPORT_SYMBOL(prot_virt_host);2929+struct uv_info __bootdata_preserved(uv_info);3030+EXPORT_SYMBOL(uv_info);3131+3232+static int __init prot_virt_setup(char *val)3333+{3434+ bool enabled;3535+ int rc;3636+3737+ rc = kstrtobool(val, &enabled);3838+ if (!rc && enabled)3939+ prot_virt_host = 1;4040+4141+ if (is_prot_virt_guest() && prot_virt_host) {4242+ prot_virt_host = 0;4343+ pr_warn("Protected virtualization not available in protected guests.");4444+ }4545+4646+ if (prot_virt_host && !test_facility(158)) {4747+ prot_virt_host = 0;4848+ pr_warn("Protected virtualization not supported by the hardware.");4949+ }5050+5151+ return rc;5252+}5353+early_param("prot_virt", prot_virt_setup);5454+5555+static int __init uv_init(unsigned long stor_base, unsigned long stor_len)5656+{5757+ struct uv_cb_init uvcb = {5858+ .header.cmd = UVC_CMD_INIT_UV,5959+ .header.len = sizeof(uvcb),6060+ .stor_origin = stor_base,6161+ .stor_len = stor_len,6262+ };6363+6464+ if (uv_call(0, (uint64_t)&uvcb)) {6565+ pr_err("Ultravisor init failed with rc: 0x%x rrc: 0%x\n",6666+ uvcb.header.rc, uvcb.header.rrc);6767+ return -1;6868+ }6969+ return 0;7070+}7171+7272+void __init setup_uv(void)7373+{7474+ unsigned long uv_stor_base;7575+7676+ uv_stor_base = (unsigned long)memblock_alloc_try_nid(7777+ uv_info.uv_base_stor_len, SZ_1M, SZ_2G,7878+ MEMBLOCK_ALLOC_ACCESSIBLE, NUMA_NO_NODE);7979+ if (!uv_stor_base) {8080+ pr_warn("Failed to reserve %lu bytes for ultravisor base storage\n",8181+ uv_info.uv_base_stor_len);8282+ goto fail;8383+ }8484+8585+ if (uv_init(uv_stor_base, uv_info.uv_base_stor_len)) {8686+ memblock_free(uv_stor_base, uv_info.uv_base_stor_len);8787+ goto fail;8888+ }8989+9090+ pr_info("Reserving %luMB as ultravisor base storage\n",9191+ uv_info.uv_base_stor_len >> 20);9292+ return;9393+fail:9494+ pr_info("Disabling support for protected virtualization");9595+ prot_virt_host = 0;9696+}9797+9898+void adjust_to_uv_max(unsigned long *vmax)9999+{100100+ *vmax = min_t(unsigned long, *vmax, uv_info.max_sec_stor_addr);101101+}102102+103103+/*104104+ * Requests the Ultravisor to pin the page in the shared state. This will105105+ * cause an intercept when the guest attempts to unshare the pinned page.106106+ */107107+static int uv_pin_shared(unsigned long paddr)108108+{109109+ struct uv_cb_cfs uvcb = {110110+ .header.cmd = UVC_CMD_PIN_PAGE_SHARED,111111+ .header.len = sizeof(uvcb),112112+ .paddr = paddr,113113+ };114114+115115+ if (uv_call(0, (u64)&uvcb))116116+ return -EINVAL;117117+ return 0;118118+}119119+120120+/*121121+ * Requests the Ultravisor to encrypt a guest page and make it122122+ * accessible to the host for paging (export).123123+ *124124+ * @paddr: Absolute host address of page to be exported125125+ */126126+int uv_convert_from_secure(unsigned long paddr)127127+{128128+ struct uv_cb_cfs uvcb = {129129+ .header.cmd = UVC_CMD_CONV_FROM_SEC_STOR,130130+ .header.len = sizeof(uvcb),131131+ .paddr = paddr132132+ };133133+134134+ if (uv_call(0, (u64)&uvcb))135135+ return -EINVAL;136136+ return 0;137137+}138138+139139+/*140140+ * Calculate the expected ref_count for a page that would otherwise have no141141+ * further pins. This was cribbed from similar functions in other places in142142+ * the kernel, but with some slight modifications. We know that a secure143143+ * page can not be a huge page for example.144144+ */145145+static int expected_page_refs(struct page *page)146146+{147147+ int res;148148+149149+ res = page_mapcount(page);150150+ if (PageSwapCache(page)) {151151+ res++;152152+ } else if (page_mapping(page)) {153153+ res++;154154+ if (page_has_private(page))155155+ res++;156156+ }157157+ return res;158158+}159159+160160+static int make_secure_pte(pte_t *ptep, unsigned long addr,161161+ struct page *exp_page, struct uv_cb_header *uvcb)162162+{163163+ pte_t entry = READ_ONCE(*ptep);164164+ struct page *page;165165+ int expected, rc = 0;166166+167167+ if (!pte_present(entry))168168+ return -ENXIO;169169+ if (pte_val(entry) & _PAGE_INVALID)170170+ return -ENXIO;171171+172172+ page = pte_page(entry);173173+ if (page != exp_page)174174+ return -ENXIO;175175+ if (PageWriteback(page))176176+ return -EAGAIN;177177+ expected = expected_page_refs(page);178178+ if (!page_ref_freeze(page, expected))179179+ return -EBUSY;180180+ set_bit(PG_arch_1, &page->flags);181181+ rc = uv_call(0, (u64)uvcb);182182+ page_ref_unfreeze(page, expected);183183+ /* Return -ENXIO if the page was not mapped, -EINVAL otherwise */184184+ if (rc)185185+ rc = uvcb->rc == 0x10a ? -ENXIO : -EINVAL;186186+ return rc;187187+}188188+189189+/*190190+ * Requests the Ultravisor to make a page accessible to a guest.191191+ * If it's brought in the first time, it will be cleared. If192192+ * it has been exported before, it will be decrypted and integrity193193+ * checked.194194+ */195195+int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)196196+{197197+ struct vm_area_struct *vma;198198+ bool local_drain = false;199199+ spinlock_t *ptelock;200200+ unsigned long uaddr;201201+ struct page *page;202202+ pte_t *ptep;203203+ int rc;204204+205205+again:206206+ rc = -EFAULT;207207+ down_read(&gmap->mm->mmap_sem);208208+209209+ uaddr = __gmap_translate(gmap, gaddr);210210+ if (IS_ERR_VALUE(uaddr))211211+ goto out;212212+ vma = find_vma(gmap->mm, uaddr);213213+ if (!vma)214214+ goto out;215215+ /*216216+ * Secure pages cannot be huge and userspace should not combine both.217217+ * In case userspace does it anyway this will result in an -EFAULT for218218+ * the unpack. The guest is thus never reaching secure mode. If219219+ * userspace is playing dirty tricky with mapping huge pages later220220+ * on this will result in a segmentation fault.221221+ */222222+ if (is_vm_hugetlb_page(vma))223223+ goto out;224224+225225+ rc = -ENXIO;226226+ page = follow_page(vma, uaddr, FOLL_WRITE);227227+ if (IS_ERR_OR_NULL(page))228228+ goto out;229229+230230+ lock_page(page);231231+ ptep = get_locked_pte(gmap->mm, uaddr, &ptelock);232232+ rc = make_secure_pte(ptep, uaddr, page, uvcb);233233+ pte_unmap_unlock(ptep, ptelock);234234+ unlock_page(page);235235+out:236236+ up_read(&gmap->mm->mmap_sem);237237+238238+ if (rc == -EAGAIN) {239239+ wait_on_page_writeback(page);240240+ } else if (rc == -EBUSY) {241241+ /*242242+ * If we have tried a local drain and the page refcount243243+ * still does not match our expected safe value, try with a244244+ * system wide drain. This is needed if the pagevecs holding245245+ * the page are on a different CPU.246246+ */247247+ if (local_drain) {248248+ lru_add_drain_all();249249+ /* We give up here, and let the caller try again */250250+ return -EAGAIN;251251+ }252252+ /*253253+ * We are here if the page refcount does not match the254254+ * expected safe value. The main culprits are usually255255+ * pagevecs. With lru_add_drain() we drain the pagevecs256256+ * on the local CPU so that hopefully the refcount will257257+ * reach the expected safe value.258258+ */259259+ lru_add_drain();260260+ local_drain = true;261261+ /* And now we try again immediately after draining */262262+ goto again;263263+ } else if (rc == -ENXIO) {264264+ if (gmap_fault(gmap, gaddr, FAULT_FLAG_WRITE))265265+ return -EFAULT;266266+ return -EAGAIN;267267+ }268268+ return rc;269269+}270270+EXPORT_SYMBOL_GPL(gmap_make_secure);271271+272272+int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr)273273+{274274+ struct uv_cb_cts uvcb = {275275+ .header.cmd = UVC_CMD_CONV_TO_SEC_STOR,276276+ .header.len = sizeof(uvcb),277277+ .guest_handle = gmap->guest_handle,278278+ .gaddr = gaddr,279279+ };280280+281281+ return gmap_make_secure(gmap, gaddr, &uvcb);282282+}283283+EXPORT_SYMBOL_GPL(gmap_convert_to_secure);284284+285285+/*286286+ * To be called with the page locked or with an extra reference! This will287287+ * prevent gmap_make_secure from touching the page concurrently. Having 2288288+ * parallel make_page_accessible is fine, as the UV calls will become a289289+ * no-op if the page is already exported.290290+ */291291+int arch_make_page_accessible(struct page *page)292292+{293293+ int rc = 0;294294+295295+ /* Hugepage cannot be protected, so nothing to do */296296+ if (PageHuge(page))297297+ return 0;298298+299299+ /*300300+ * PG_arch_1 is used in 3 places:301301+ * 1. for kernel page tables during early boot302302+ * 2. for storage keys of huge pages and KVM303303+ * 3. As an indication that this page might be secure. This can304304+ * overindicate, e.g. we set the bit before calling305305+ * convert_to_secure.306306+ * As secure pages are never huge, all 3 variants can co-exists.307307+ */308308+ if (!test_bit(PG_arch_1, &page->flags))309309+ return 0;310310+311311+ rc = uv_pin_shared(page_to_phys(page));312312+ if (!rc) {313313+ clear_bit(PG_arch_1, &page->flags);314314+ return 0;315315+ }316316+317317+ rc = uv_convert_from_secure(page_to_phys(page));318318+ if (!rc) {319319+ clear_bit(PG_arch_1, &page->flags);320320+ return 0;321321+ }322322+323323+ return rc;324324+}325325+EXPORT_SYMBOL_GPL(arch_make_page_accessible);326326+327327+#endif328328+329329+#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM)330330+static ssize_t uv_query_facilities(struct kobject *kobj,331331+ struct kobj_attribute *attr, char *page)332332+{333333+ return snprintf(page, PAGE_SIZE, "%lx\n%lx\n%lx\n%lx\n",334334+ uv_info.inst_calls_list[0],335335+ uv_info.inst_calls_list[1],336336+ uv_info.inst_calls_list[2],337337+ uv_info.inst_calls_list[3]);338338+}339339+340340+static struct kobj_attribute uv_query_facilities_attr =341341+ __ATTR(facilities, 0444, uv_query_facilities, NULL);342342+343343+static ssize_t uv_query_max_guest_cpus(struct kobject *kobj,344344+ struct kobj_attribute *attr, char *page)345345+{346346+ return snprintf(page, PAGE_SIZE, "%d\n",347347+ uv_info.max_guest_cpus);348348+}349349+350350+static struct kobj_attribute uv_query_max_guest_cpus_attr =351351+ __ATTR(max_cpus, 0444, uv_query_max_guest_cpus, NULL);352352+353353+static ssize_t uv_query_max_guest_vms(struct kobject *kobj,354354+ struct kobj_attribute *attr, char *page)355355+{356356+ return snprintf(page, PAGE_SIZE, "%d\n",357357+ uv_info.max_num_sec_conf);358358+}359359+360360+static struct kobj_attribute uv_query_max_guest_vms_attr =361361+ __ATTR(max_guests, 0444, uv_query_max_guest_vms, NULL);362362+363363+static ssize_t uv_query_max_guest_addr(struct kobject *kobj,364364+ struct kobj_attribute *attr, char *page)365365+{366366+ return snprintf(page, PAGE_SIZE, "%lx\n",367367+ uv_info.max_sec_stor_addr);368368+}369369+370370+static struct kobj_attribute uv_query_max_guest_addr_attr =371371+ __ATTR(max_address, 0444, uv_query_max_guest_addr, NULL);372372+373373+static struct attribute *uv_query_attrs[] = {374374+ &uv_query_facilities_attr.attr,375375+ &uv_query_max_guest_cpus_attr.attr,376376+ &uv_query_max_guest_vms_attr.attr,377377+ &uv_query_max_guest_addr_attr.attr,378378+ NULL,379379+};380380+381381+static struct attribute_group uv_query_attr_group = {382382+ .attrs = uv_query_attrs,383383+};384384+385385+static struct kset *uv_query_kset;386386+static struct kobject *uv_kobj;387387+388388+static int __init uv_info_init(void)389389+{390390+ int rc = -ENOMEM;391391+392392+ if (!test_facility(158))393393+ return 0;394394+395395+ uv_kobj = kobject_create_and_add("uv", firmware_kobj);396396+ if (!uv_kobj)397397+ return -ENOMEM;398398+399399+ uv_query_kset = kset_create_and_add("query", NULL, uv_kobj);400400+ if (!uv_query_kset)401401+ goto out_kobj;402402+403403+ rc = sysfs_create_group(&uv_query_kset->kobj, &uv_query_attr_group);404404+ if (!rc)405405+ return 0;406406+407407+ kset_unregister(uv_query_kset);408408+out_kobj:409409+ kobject_del(uv_kobj);410410+ kobject_put(uv_kobj);411411+ return rc;412412+}413413+device_initcall(uv_info_init);414414+#endif