Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/efi: Add efi_fake_mem support for EFI_MEMORY_SP

Given that EFI_MEMORY_SP is platform BIOS policy decision for marking
memory ranges as "reserved for a specific purpose" there will inevitably
be scenarios where the BIOS omits the attribute in situations where it
is desired. Unlike other attributes if the OS wants to reserve this
memory from the kernel the reservation needs to happen early in init. So
early, in fact, that it needs to happen before e820__memblock_setup()
which is a pre-requisite for efi_fake_memmap() that wants to allocate
memory for the updated table.

Introduce an x86 specific efi_fake_memmap_early() that can search for
attempts to set EFI_MEMORY_SP via efi_fake_mem and update the e820 table
accordingly.

The KASLR code that scans the command line looking for user-directed
memory reservations also needs to be updated to consider
"efi_fake_mem=nn@ss:0x40000" requests.

Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

authored by

Dan Williams and committed by
Rafael J. Wysocki
199c8471 16993c0f

+147 -23
+8 -2
Documentation/admin-guide/kernel-parameters.txt
··· 1196 1196 updating original EFI memory map. 1197 1197 Region of memory which aa attribute is added to is 1198 1198 from ss to ss+nn. 1199 + 1199 1200 If efi_fake_mem=2G@4G:0x10000,2G@0x10a0000000:0x10000 1200 1201 is specified, EFI_MEMORY_MORE_RELIABLE(0x10000) 1201 1202 attribute is added to range 0x100000000-0x180000000 and 1202 1203 0x10a0000000-0x1120000000. 1203 1204 1205 + If efi_fake_mem=8G@9G:0x40000 is specified, the 1206 + EFI_MEMORY_SP(0x40000) attribute is added to 1207 + range 0x240000000-0x43fffffff. 1208 + 1204 1209 Using this parameter you can do debugging of EFI memmap 1205 - related feature. For example, you can do debugging of 1210 + related features. For example, you can do debugging of 1206 1211 Address Range Mirroring feature even if your box 1207 - doesn't support it. 1212 + doesn't support it, or mark specific memory as 1213 + "soft reserved". 1208 1214 1209 1215 efivar_ssdt= [EFI; X86] Name of an EFI variable that contains an SSDT 1210 1216 that is to be dynamically loaded by Linux. If there are
+35 -7
arch/x86/boot/compressed/kaslr.c
··· 132 132 #include "../../../../lib/ctype.c" 133 133 #include "../../../../lib/cmdline.c" 134 134 135 + enum parse_mode { 136 + PARSE_MEMMAP, 137 + PARSE_EFI, 138 + }; 139 + 135 140 static int 136 - parse_memmap(char *p, unsigned long long *start, unsigned long long *size) 141 + parse_memmap(char *p, unsigned long long *start, unsigned long long *size, 142 + enum parse_mode mode) 137 143 { 138 144 char *oldp; 139 145 ··· 162 156 *start = memparse(p + 1, &p); 163 157 return 0; 164 158 case '@': 165 - /* memmap=nn@ss specifies usable region, should be skipped */ 166 - *size = 0; 159 + if (mode == PARSE_MEMMAP) { 160 + /* 161 + * memmap=nn@ss specifies usable region, should 162 + * be skipped 163 + */ 164 + *size = 0; 165 + } else { 166 + unsigned long long flags; 167 + 168 + /* 169 + * efi_fake_mem=nn@ss:attr the attr specifies 170 + * flags that might imply a soft-reservation. 171 + */ 172 + *start = memparse(p + 1, &p); 173 + if (p && *p == ':') { 174 + p++; 175 + if (kstrtoull(p, 0, &flags) < 0) 176 + *size = 0; 177 + else if (flags & EFI_MEMORY_SP) 178 + return 0; 179 + } 180 + *size = 0; 181 + } 167 182 /* Fall through */ 168 183 default: 169 184 /* ··· 199 172 return -EINVAL; 200 173 } 201 174 202 - static void mem_avoid_memmap(char *str) 175 + static void mem_avoid_memmap(enum parse_mode mode, char *str) 203 176 { 204 177 static int i; 205 178 ··· 214 187 if (k) 215 188 *k++ = 0; 216 189 217 - rc = parse_memmap(str, &start, &size); 190 + rc = parse_memmap(str, &start, &size, mode); 218 191 if (rc < 0) 219 192 break; 220 193 str = k; ··· 265 238 } 266 239 } 267 240 268 - 269 241 static void handle_mem_options(void) 270 242 { 271 243 char *args = (char *)get_cmd_line_ptr(); ··· 297 271 } 298 272 299 273 if (!strcmp(param, "memmap")) { 300 - mem_avoid_memmap(val); 274 + mem_avoid_memmap(PARSE_MEMMAP, val); 301 275 } else if (strstr(param, "hugepages")) { 302 276 parse_gb_huge_pages(param, val); 303 277 } else if (!strcmp(param, "mem")) { ··· 310 284 goto out; 311 285 312 286 mem_limit = mem_size; 287 + } else if (!strcmp(param, "efi_fake_mem")) { 288 + mem_avoid_memmap(PARSE_EFI, val); 313 289 } 314 290 } 315 291
+8
arch/x86/include/asm/efi.h
··· 263 263 } 264 264 #endif /* CONFIG_EFI */ 265 265 266 + #ifdef CONFIG_EFI_FAKE_MEMMAP 267 + extern void __init efi_fake_memmap_early(void); 268 + #else 269 + static inline void efi_fake_memmap_early(void) 270 + { 271 + } 272 + #endif 273 + 266 274 #endif /* _ASM_X86_EFI_H */
+2
arch/x86/platform/efi/efi.c
··· 262 262 if (add_efi_memmap || do_efi_soft_reserve()) 263 263 do_add_efi_memmap(); 264 264 265 + efi_fake_memmap_early(); 266 + 265 267 WARN(efi.memmap.desc_version != 1, 266 268 "Unexpected EFI_MEMORY_DESCRIPTOR version %ld", 267 269 efi.memmap.desc_version);
+4 -1
drivers/firmware/efi/Makefile
··· 20 20 obj-$(CONFIG_EFI_RUNTIME_MAP) += runtime-map.o 21 21 obj-$(CONFIG_EFI_RUNTIME_WRAPPERS) += runtime-wrappers.o 22 22 obj-$(CONFIG_EFI_STUB) += libstub/ 23 - obj-$(CONFIG_EFI_FAKE_MEMMAP) += fake_mem.o 23 + obj-$(CONFIG_EFI_FAKE_MEMMAP) += fake_map.o 24 24 obj-$(CONFIG_EFI_BOOTLOADER_CONTROL) += efibc.o 25 25 obj-$(CONFIG_EFI_TEST) += test/ 26 26 obj-$(CONFIG_EFI_DEV_PATH_PARSER) += dev-path-parser.o 27 27 obj-$(CONFIG_APPLE_PROPERTIES) += apple-properties.o 28 28 obj-$(CONFIG_EFI_RCI2_TABLE) += rci2-table.o 29 + 30 + fake_map-y += fake_mem.o 31 + fake_map-$(CONFIG_X86) += x86_fake_mem.o 29 32 30 33 arm-obj-$(CONFIG_EFI) := arm-init.o arm-runtime.o 31 34 obj-$(CONFIG_ARM) += $(arm-obj-y)
+11 -13
drivers/firmware/efi/fake_mem.c
··· 17 17 #include <linux/memblock.h> 18 18 #include <linux/types.h> 19 19 #include <linux/sort.h> 20 - #include <asm/efi.h> 20 + #include "fake_mem.h" 21 21 22 - #define EFI_MAX_FAKEMEM CONFIG_EFI_MAX_FAKE_MEM 23 - 24 - static struct efi_mem_range fake_mems[EFI_MAX_FAKEMEM]; 25 - static int nr_fake_mem; 22 + struct efi_mem_range efi_fake_mems[EFI_MAX_FAKEMEM]; 23 + int nr_fake_mem; 26 24 27 25 static int __init cmp_fake_mem(const void *x1, const void *x2) 28 26 { ··· 48 50 /* count up the number of EFI memory descriptor */ 49 51 for (i = 0; i < nr_fake_mem; i++) { 50 52 for_each_efi_memory_desc(md) { 51 - struct range *r = &fake_mems[i].range; 53 + struct range *r = &efi_fake_mems[i].range; 52 54 53 55 new_nr_map += efi_memmap_split_count(md, r); 54 56 } ··· 68 70 } 69 71 70 72 for (i = 0; i < nr_fake_mem; i++) 71 - efi_memmap_insert(&efi.memmap, new_memmap, &fake_mems[i]); 73 + efi_memmap_insert(&efi.memmap, new_memmap, &efi_fake_mems[i]); 72 74 73 75 /* swap into new EFI memmap */ 74 76 early_memunmap(new_memmap, efi.memmap.desc_size * new_nr_map); ··· 102 104 if (nr_fake_mem >= EFI_MAX_FAKEMEM) 103 105 break; 104 106 105 - fake_mems[nr_fake_mem].range.start = start; 106 - fake_mems[nr_fake_mem].range.end = start + mem_size - 1; 107 - fake_mems[nr_fake_mem].attribute = attribute; 107 + efi_fake_mems[nr_fake_mem].range.start = start; 108 + efi_fake_mems[nr_fake_mem].range.end = start + mem_size - 1; 109 + efi_fake_mems[nr_fake_mem].attribute = attribute; 108 110 nr_fake_mem++; 109 111 110 112 if (*p == ',') 111 113 p++; 112 114 } 113 115 114 - sort(fake_mems, nr_fake_mem, sizeof(struct efi_mem_range), 116 + sort(efi_fake_mems, nr_fake_mem, sizeof(struct efi_mem_range), 115 117 cmp_fake_mem, NULL); 116 118 117 119 for (i = 0; i < nr_fake_mem; i++) 118 120 pr_info("efi_fake_mem: add attr=0x%016llx to [mem 0x%016llx-0x%016llx]", 119 - fake_mems[i].attribute, fake_mems[i].range.start, 120 - fake_mems[i].range.end); 121 + efi_fake_mems[i].attribute, efi_fake_mems[i].range.start, 122 + efi_fake_mems[i].range.end); 121 123 122 124 return *p == '\0' ? 0 : -EINVAL; 123 125 }
+10
drivers/firmware/efi/fake_mem.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef __EFI_FAKE_MEM_H__ 3 + #define __EFI_FAKE_MEM_H__ 4 + #include <asm/efi.h> 5 + 6 + #define EFI_MAX_FAKEMEM CONFIG_EFI_MAX_FAKE_MEM 7 + 8 + extern struct efi_mem_range efi_fake_mems[EFI_MAX_FAKEMEM]; 9 + extern int nr_fake_mem; 10 + #endif /* __EFI_FAKE_MEM_H__ */
+69
drivers/firmware/efi/x86_fake_mem.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright(c) 2019 Intel Corporation. All rights reserved. */ 3 + #include <linux/efi.h> 4 + #include <asm/e820/api.h> 5 + #include "fake_mem.h" 6 + 7 + void __init efi_fake_memmap_early(void) 8 + { 9 + int i; 10 + 11 + /* 12 + * The late efi_fake_mem() call can handle all requests if 13 + * EFI_MEMORY_SP support is disabled. 14 + */ 15 + if (!efi_soft_reserve_enabled()) 16 + return; 17 + 18 + if (!efi_enabled(EFI_MEMMAP) || !nr_fake_mem) 19 + return; 20 + 21 + /* 22 + * Given that efi_fake_memmap() needs to perform memblock 23 + * allocations it needs to run after e820__memblock_setup(). 24 + * However, if efi_fake_mem specifies EFI_MEMORY_SP for a given 25 + * address range that potentially needs to mark the memory as 26 + * reserved prior to e820__memblock_setup(). Update e820 27 + * directly if EFI_MEMORY_SP is specified for an 28 + * EFI_CONVENTIONAL_MEMORY descriptor. 29 + */ 30 + for (i = 0; i < nr_fake_mem; i++) { 31 + struct efi_mem_range *mem = &efi_fake_mems[i]; 32 + efi_memory_desc_t *md; 33 + u64 m_start, m_end; 34 + 35 + if ((mem->attribute & EFI_MEMORY_SP) == 0) 36 + continue; 37 + 38 + m_start = mem->range.start; 39 + m_end = mem->range.end; 40 + for_each_efi_memory_desc(md) { 41 + u64 start, end; 42 + 43 + if (md->type != EFI_CONVENTIONAL_MEMORY) 44 + continue; 45 + 46 + start = md->phys_addr; 47 + end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1; 48 + 49 + if (m_start <= end && m_end >= start) 50 + /* fake range overlaps descriptor */; 51 + else 52 + continue; 53 + 54 + /* 55 + * Trim the boundary of the e820 update to the 56 + * descriptor in case the fake range overlaps 57 + * !EFI_CONVENTIONAL_MEMORY 58 + */ 59 + start = max(start, m_start); 60 + end = min(end, m_end); 61 + 62 + if (end <= start) 63 + continue; 64 + e820__range_update(start, end - start + 1, E820_TYPE_RAM, 65 + E820_TYPE_SOFT_RESERVED); 66 + e820__update_table(e820_table); 67 + } 68 + } 69 + }