Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/fpu/amx: Define AMX state components and have it used for boot-time checks

The XSTATE initialization uses check_xstate_against_struct() to sanity
check the size of XSTATE-enabled features. AMX is a XSAVE-enabled feature,
and its size is not hard-coded but discoverable at run-time via CPUID.

The AMX state is composed of state components 17 and 18, which are all user
state components. The first component is the XTILECFG state of a 64-byte
tile-related control register. The state component 18, called XTILEDATA,
contains the actual tile data, and the state size varies on
implementations. The architectural maximum, as defined in the CPUID(0x1d,
1): EAX[15:0], is a byte less than 64KB. The first implementation supports
8KB.

Check the XTILEDATA state size dynamically. The feature introduces the new
tile register, TMM. Define one register struct only and read the number of
registers from CPUID. Cross-check the overall size with CPUID again.

Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lore.kernel.org/r/20211021225527.10184-21-chang.seok.bae@intel.com

authored by

Chang S. Bae and committed by
Borislav Petkov
eec2113e 70c3f167

+114 -1
+1
arch/x86/include/asm/cpufeatures.h
··· 299 299 /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ 300 300 #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ 301 301 #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ 302 + #define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */ 302 303 303 304 /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ 304 305 #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
+32
arch/x86/include/asm/fpu/types.h
··· 120 120 XFEATURE_RSRVD_COMP_13, 121 121 XFEATURE_RSRVD_COMP_14, 122 122 XFEATURE_LBR, 123 + XFEATURE_RSRVD_COMP_16, 124 + XFEATURE_XTILE_CFG, 125 + XFEATURE_XTILE_DATA, 123 126 124 127 XFEATURE_MAX, 125 128 }; ··· 139 136 #define XFEATURE_MASK_PKRU (1 << XFEATURE_PKRU) 140 137 #define XFEATURE_MASK_PASID (1 << XFEATURE_PASID) 141 138 #define XFEATURE_MASK_LBR (1 << XFEATURE_LBR) 139 + #define XFEATURE_MASK_XTILE_CFG (1 << XFEATURE_XTILE_CFG) 140 + #define XFEATURE_MASK_XTILE_DATA (1 << XFEATURE_XTILE_DATA) 142 141 143 142 #define XFEATURE_MASK_FPSSE (XFEATURE_MASK_FP | XFEATURE_MASK_SSE) 144 143 #define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK \ 145 144 | XFEATURE_MASK_ZMM_Hi256 \ 146 145 | XFEATURE_MASK_Hi16_ZMM) 146 + 147 + #ifdef CONFIG_X86_64 148 + # define XFEATURE_MASK_XTILE (XFEATURE_MASK_XTILE_DATA \ 149 + | XFEATURE_MASK_XTILE_CFG) 150 + #else 151 + # define XFEATURE_MASK_XTILE (0) 152 + #endif 147 153 148 154 #define FIRST_EXTENDED_XFEATURE XFEATURE_YMM 149 155 ··· 164 152 }; 165 153 struct reg_512_bit { 166 154 u8 regbytes[512/8]; 155 + }; 156 + struct reg_1024_byte { 157 + u8 regbytes[1024]; 167 158 }; 168 159 169 160 /* ··· 270 255 u64 ler_to; 271 256 u64 ler_info; 272 257 struct lbr_entry entries[]; 258 + }; 259 + 260 + /* 261 + * State component 17: 64-byte tile configuration register. 262 + */ 263 + struct xtile_cfg { 264 + u64 tcfg[8]; 265 + } __packed; 266 + 267 + /* 268 + * State component 18: 1KB tile data register. 269 + * Each register represents 16 64-byte rows of the matrix 270 + * data. But the number of registers depends on the actual 271 + * implementation. 272 + */ 273 + struct xtile_data { 274 + struct reg_1024_byte tmm; 273 275 } __packed; 274 276 275 277 /*
+2
arch/x86/include/asm/fpu/xstate.h
··· 14 14 15 15 #define XSTATE_CPUID 0x0000000d 16 16 17 + #define TILE_CPUID 0x0000001d 18 + 17 19 #define FXSAVE_SIZE 512 18 20 19 21 #define XSAVE_HDR_SIZE 64
+79 -1
arch/x86/kernel/fpu/xstate.c
··· 51 51 "Protection Keys User registers", 52 52 "PASID state", 53 53 "unknown xstate feature" , 54 + "unknown xstate feature" , 55 + "unknown xstate feature" , 56 + "unknown xstate feature" , 57 + "unknown xstate feature" , 58 + "unknown xstate feature" , 59 + "AMX Tile config" , 60 + "AMX Tile data" , 61 + "unknown xstate feature" , 54 62 }; 55 63 56 64 static unsigned short xsave_cpuid_features[] __initdata = { ··· 73 65 [XFEATURE_PT_UNIMPLEMENTED_SO_FAR] = X86_FEATURE_INTEL_PT, 74 66 [XFEATURE_PKRU] = X86_FEATURE_PKU, 75 67 [XFEATURE_PASID] = X86_FEATURE_ENQCMD, 68 + [XFEATURE_XTILE_CFG] = X86_FEATURE_AMX_TILE, 69 + [XFEATURE_XTILE_DATA] = X86_FEATURE_AMX_TILE, 76 70 }; 77 71 78 72 static unsigned int xstate_offsets[XFEATURE_MAX] __ro_after_init = ··· 250 240 print_xstate_feature(XFEATURE_MASK_Hi16_ZMM); 251 241 print_xstate_feature(XFEATURE_MASK_PKRU); 252 242 print_xstate_feature(XFEATURE_MASK_PASID); 243 + print_xstate_feature(XFEATURE_MASK_XTILE_CFG); 244 + print_xstate_feature(XFEATURE_MASK_XTILE_DATA); 253 245 } 254 246 255 247 /* ··· 535 523 } \ 536 524 } while (0) 537 525 526 + /** 527 + * check_xtile_data_against_struct - Check tile data state size. 528 + * 529 + * Calculate the state size by multiplying the single tile size which is 530 + * recorded in a C struct, and the number of tiles that the CPU informs. 531 + * Compare the provided size with the calculation. 532 + * 533 + * @size: The tile data state size 534 + * 535 + * Returns: 0 on success, -EINVAL on mismatch. 536 + */ 537 + static int __init check_xtile_data_against_struct(int size) 538 + { 539 + u32 max_palid, palid, state_size; 540 + u32 eax, ebx, ecx, edx; 541 + u16 max_tile; 542 + 543 + /* 544 + * Check the maximum palette id: 545 + * eax: the highest numbered palette subleaf. 546 + */ 547 + cpuid_count(TILE_CPUID, 0, &max_palid, &ebx, &ecx, &edx); 548 + 549 + /* 550 + * Cross-check each tile size and find the maximum number of 551 + * supported tiles. 552 + */ 553 + for (palid = 1, max_tile = 0; palid <= max_palid; palid++) { 554 + u16 tile_size, max; 555 + 556 + /* 557 + * Check the tile size info: 558 + * eax[31:16]: bytes per title 559 + * ebx[31:16]: the max names (or max number of tiles) 560 + */ 561 + cpuid_count(TILE_CPUID, palid, &eax, &ebx, &edx, &edx); 562 + tile_size = eax >> 16; 563 + max = ebx >> 16; 564 + 565 + if (tile_size != sizeof(struct xtile_data)) { 566 + pr_err("%s: struct is %zu bytes, cpu xtile %d bytes\n", 567 + __stringify(XFEATURE_XTILE_DATA), 568 + sizeof(struct xtile_data), tile_size); 569 + __xstate_dump_leaves(); 570 + return -EINVAL; 571 + } 572 + 573 + if (max > max_tile) 574 + max_tile = max; 575 + } 576 + 577 + state_size = sizeof(struct xtile_data) * max_tile; 578 + if (size != state_size) { 579 + pr_err("%s: calculated size is %u bytes, cpu state %d bytes\n", 580 + __stringify(XFEATURE_XTILE_DATA), state_size, size); 581 + __xstate_dump_leaves(); 582 + return -EINVAL; 583 + } 584 + return 0; 585 + } 586 + 538 587 /* 539 588 * We have a C struct for each 'xstate'. We need to ensure 540 589 * that our software representation matches what the CPU ··· 619 546 XCHECK_SZ(sz, nr, XFEATURE_Hi16_ZMM, struct avx_512_hi16_state); 620 547 XCHECK_SZ(sz, nr, XFEATURE_PKRU, struct pkru_state); 621 548 XCHECK_SZ(sz, nr, XFEATURE_PASID, struct ia32_pasid_state); 549 + XCHECK_SZ(sz, nr, XFEATURE_XTILE_CFG, struct xtile_cfg); 550 + 551 + /* The tile data size varies between implementations. */ 552 + if (nr == XFEATURE_XTILE_DATA) 553 + check_xtile_data_against_struct(sz); 622 554 623 555 /* 624 556 * Make *SURE* to add any feature numbers in below if ··· 633 555 if ((nr < XFEATURE_YMM) || 634 556 (nr >= XFEATURE_MAX) || 635 557 (nr == XFEATURE_PT_UNIMPLEMENTED_SO_FAR) || 636 - ((nr >= XFEATURE_RSRVD_COMP_11) && (nr <= XFEATURE_LBR))) { 558 + ((nr >= XFEATURE_RSRVD_COMP_11) && (nr <= XFEATURE_RSRVD_COMP_16))) { 637 559 WARN_ONCE(1, "no structure for xstate: %d\n", nr); 638 560 XSTATE_WARN_ON(1); 639 561 return false;