Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/efi: EFI soft reservation to E820 enumeration

UEFI 2.8 defines an EFI_MEMORY_SP attribute bit to augment the
interpretation of the EFI Memory Types as "reserved for a specific
purpose".

The proposed Linux behavior for specific purpose memory is that it is
reserved for direct-access (device-dax) by default and not available for
any kernel usage, not even as an OOM fallback. Later, through udev
scripts or another init mechanism, these device-dax claimed ranges can
be reconfigured and hot-added to the available System-RAM with a unique
node identifier. This device-dax management scheme implements "soft" in
the "soft reserved" designation by allowing some or all of the
reservation to be recovered as typical memory. This policy can be
disabled at compile-time with CONFIG_EFI_SOFT_RESERVE=n, or runtime with
efi=nosoftreserve.

This patch introduces 2 new concepts at once given the entanglement
between early boot enumeration relative to memory that can optionally be
reserved from the kernel page allocator by default. The new concepts
are:

- E820_TYPE_SOFT_RESERVED: Upon detecting the EFI_MEMORY_SP
attribute on EFI_CONVENTIONAL memory, update the E820 map with this
new type. Only perform this classification if the
CONFIG_EFI_SOFT_RESERVE=y policy is enabled, otherwise treat it as
typical ram.

- IORES_DESC_SOFT_RESERVED: Add a new I/O resource descriptor for
a device driver to search iomem resources for application specific
memory. Teach the iomem code to identify such ranges as "Soft Reserved".

Note that the comment for do_add_efi_memmap() needed refreshing since it
seemed to imply that the efi map might overflow the e820 table, but that
is not an issue as of commit 7b6e4ba3cb1f "x86/boot/e820: Clean up the
E820_X_MAX definition" that removed the 128 entry limit for
e820__range_add().

A follow-on change integrates parsing of the ACPI HMAT to identify the
node and sub-range boundaries of EFI_MEMORY_SP designated memory. For
now, just identify and reserve memory of this type.

Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Reported-by: kbuild test robot <lkp@intel.com>
Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

authored by

Dan Williams and committed by
Rafael J. Wysocki
262b45ae b617c526

+73 -7
+5 -1
arch/x86/boot/compressed/eboot.c
··· 554 554 case EFI_BOOT_SERVICES_CODE: 555 555 case EFI_BOOT_SERVICES_DATA: 556 556 case EFI_CONVENTIONAL_MEMORY: 557 - e820_type = E820_TYPE_RAM; 557 + if (efi_soft_reserve_enabled() && 558 + (d->attribute & EFI_MEMORY_SP)) 559 + e820_type = E820_TYPE_SOFT_RESERVED; 560 + else 561 + e820_type = E820_TYPE_RAM; 558 562 break; 559 563 560 564 case EFI_ACPI_MEMORY_NVS:
+4
arch/x86/boot/compressed/kaslr.c
··· 760 760 if (md->type != EFI_CONVENTIONAL_MEMORY) 761 761 continue; 762 762 763 + if (efi_soft_reserve_enabled() && 764 + (md->attribute & EFI_MEMORY_SP)) 765 + continue; 766 + 763 767 if (efi_mirror_found && 764 768 !(md->attribute & EFI_MEMORY_MORE_RELIABLE)) 765 769 continue;
+8
arch/x86/include/asm/e820/types.h
··· 29 29 E820_TYPE_PRAM = 12, 30 30 31 31 /* 32 + * Special-purpose memory is indicated to the system via the 33 + * EFI_MEMORY_SP attribute. Define an e820 translation of this 34 + * memory type for the purpose of reserving this range and 35 + * marking it with the IORES_DESC_SOFT_RESERVED designation. 36 + */ 37 + E820_TYPE_SOFT_RESERVED = 0xefffffff, 38 + 39 + /* 32 40 * Reserved RAM used by the kernel itself if 33 41 * CONFIG_INTEL_TXT=y is enabled, memory of this type 34 42 * will be included in the S3 integrity calculation
+10 -2
arch/x86/kernel/e820.c
··· 190 190 case E820_TYPE_RAM: /* Fall through: */ 191 191 case E820_TYPE_RESERVED_KERN: pr_cont("usable"); break; 192 192 case E820_TYPE_RESERVED: pr_cont("reserved"); break; 193 + case E820_TYPE_SOFT_RESERVED: pr_cont("soft reserved"); break; 193 194 case E820_TYPE_ACPI: pr_cont("ACPI data"); break; 194 195 case E820_TYPE_NVS: pr_cont("ACPI NVS"); break; 195 196 case E820_TYPE_UNUSABLE: pr_cont("unusable"); break; ··· 1038 1037 case E820_TYPE_PRAM: return "Persistent Memory (legacy)"; 1039 1038 case E820_TYPE_PMEM: return "Persistent Memory"; 1040 1039 case E820_TYPE_RESERVED: return "Reserved"; 1040 + case E820_TYPE_SOFT_RESERVED: return "Soft Reserved"; 1041 1041 default: return "Unknown E820 type"; 1042 1042 } 1043 1043 } ··· 1054 1052 case E820_TYPE_PRAM: /* Fall-through: */ 1055 1053 case E820_TYPE_PMEM: /* Fall-through: */ 1056 1054 case E820_TYPE_RESERVED: /* Fall-through: */ 1055 + case E820_TYPE_SOFT_RESERVED: /* Fall-through: */ 1057 1056 default: return IORESOURCE_MEM; 1058 1057 } 1059 1058 } ··· 1067 1064 case E820_TYPE_PMEM: return IORES_DESC_PERSISTENT_MEMORY; 1068 1065 case E820_TYPE_PRAM: return IORES_DESC_PERSISTENT_MEMORY_LEGACY; 1069 1066 case E820_TYPE_RESERVED: return IORES_DESC_RESERVED; 1067 + case E820_TYPE_SOFT_RESERVED: return IORES_DESC_SOFT_RESERVED; 1070 1068 case E820_TYPE_RESERVED_KERN: /* Fall-through: */ 1071 1069 case E820_TYPE_RAM: /* Fall-through: */ 1072 1070 case E820_TYPE_UNUSABLE: /* Fall-through: */ ··· 1082 1078 return true; 1083 1079 1084 1080 /* 1085 - * Treat persistent memory like device memory, i.e. reserve it 1086 - * for exclusive use of a driver 1081 + * Treat persistent memory and other special memory ranges like 1082 + * device memory, i.e. reserve it for exclusive use of a driver 1087 1083 */ 1088 1084 switch (type) { 1089 1085 case E820_TYPE_RESERVED: 1086 + case E820_TYPE_SOFT_RESERVED: 1090 1087 case E820_TYPE_PRAM: 1091 1088 case E820_TYPE_PMEM: 1092 1089 return false; ··· 1289 1284 end = entry->addr + entry->size; 1290 1285 if (end != (resource_size_t)end) 1291 1286 continue; 1287 + 1288 + if (entry->type == E820_TYPE_SOFT_RESERVED) 1289 + memblock_reserve(entry->addr, entry->size); 1292 1290 1293 1291 if (entry->type != E820_TYPE_RAM && entry->type != E820_TYPE_RESERVED_KERN) 1294 1292 continue;
+45 -4
arch/x86/platform/efi/efi.c
··· 148 148 149 149 /* 150 150 * Tell the kernel about the EFI memory map. This might include 151 - * more than the max 128 entries that can fit in the e820 legacy 152 - * (zeropage) memory map. 151 + * more than the max 128 entries that can fit in the passed in e820 152 + * legacy (zeropage) memory map, but the kernel's e820 table can hold 153 + * E820_MAX_ENTRIES. 153 154 */ 154 155 155 156 static void __init do_add_efi_memmap(void) 156 157 { 157 158 efi_memory_desc_t *md; 159 + 160 + if (!efi_enabled(EFI_MEMMAP)) 161 + return; 158 162 159 163 for_each_efi_memory_desc(md) { 160 164 unsigned long long start = md->phys_addr; ··· 171 167 case EFI_BOOT_SERVICES_CODE: 172 168 case EFI_BOOT_SERVICES_DATA: 173 169 case EFI_CONVENTIONAL_MEMORY: 174 - if (md->attribute & EFI_MEMORY_WB) 170 + if (efi_soft_reserve_enabled() 171 + && (md->attribute & EFI_MEMORY_SP)) 172 + e820_type = E820_TYPE_SOFT_RESERVED; 173 + else if (md->attribute & EFI_MEMORY_WB) 175 174 e820_type = E820_TYPE_RAM; 176 175 else 177 176 e820_type = E820_TYPE_RESERVED; ··· 200 193 e820_type = E820_TYPE_RESERVED; 201 194 break; 202 195 } 196 + 203 197 e820__range_add(start, size, e820_type); 204 198 } 205 199 e820__update_table(e820_table); 200 + } 201 + 202 + /* 203 + * Given add_efi_memmap defaults to 0 and there there is no alternative 204 + * e820 mechanism for soft-reserved memory, import the full EFI memory 205 + * map if soft reservations are present and enabled. Otherwise, the 206 + * mechanism to disable the kernel's consideration of EFI_MEMORY_SP is 207 + * the efi=nosoftreserve option. 208 + */ 209 + static bool do_efi_soft_reserve(void) 210 + { 211 + efi_memory_desc_t *md; 212 + 213 + if (!efi_enabled(EFI_MEMMAP)) 214 + return false; 215 + 216 + if (!efi_soft_reserve_enabled()) 217 + return false; 218 + 219 + for_each_efi_memory_desc(md) 220 + if (md->type == EFI_CONVENTIONAL_MEMORY && 221 + (md->attribute & EFI_MEMORY_SP)) 222 + return true; 223 + return false; 206 224 } 207 225 208 226 int __init efi_memblock_x86_reserve_range(void) ··· 259 227 if (rv) 260 228 return rv; 261 229 262 - if (add_efi_memmap) 230 + if (add_efi_memmap || do_efi_soft_reserve()) 263 231 do_add_efi_memmap(); 264 232 265 233 WARN(efi.memmap.desc_version != 1, ··· 811 779 * doesn't exist for 32-bit kernels. 812 780 */ 813 781 if (IS_ENABLED(CONFIG_X86_32)) 782 + return false; 783 + 784 + /* 785 + * EFI specific purpose memory may be reserved by default 786 + * depending on kernel config and boot options. 787 + */ 788 + if (md->type == EFI_CONVENTIONAL_MEMORY && 789 + efi_soft_reserve_enabled() && 790 + (md->attribute & EFI_MEMORY_SP)) 814 791 return false; 815 792 816 793 /*
+1
include/linux/ioport.h
··· 134 134 IORES_DESC_PERSISTENT_MEMORY_LEGACY = 5, 135 135 IORES_DESC_DEVICE_PRIVATE_MEMORY = 6, 136 136 IORES_DESC_RESERVED = 7, 137 + IORES_DESC_SOFT_RESERVED = 8, 137 138 }; 138 139 139 140 /*