x86/mm: Fix _pgd_alloc() for Xen PV mode

Recently _pgd_alloc() was switched from using __get_free_pages() to
pagetable_alloc_noprof(), which might return a compound page in case
the allocation order is larger than 0.

On x86 this will be the case if CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
is set, even if PTI has been disabled at runtime.

When running as a Xen PV guest (this will always disable PTI), using
a compound page for a PGD will result in VM_BUG_ON_PGFLAGS being
triggered when the Xen code tries to pin the PGD.

Fix the Xen issue together with the not needed 8k allocation for a
PGD with PTI disabled by replacing PGD_ALLOCATION_ORDER with an
inline helper returning the needed order for PGD allocations.

Fixes: a9b3c355c2e6 ("asm-generic: pgalloc: provide generic __pgd_{alloc,free}")
Reported-by: Petr Vaněk <arkamar@atlas.cz>
Signed-off-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Tested-by: Petr Vaněk <arkamar@atlas.cz>
Cc:stable@vger.kernel.org
Link: https://lore.kernel.org/all/20250422131717.25724-1-jgross%40suse.com

authored by Juergen Gross and committed by Dave Hansen 4ce385f5 83b2d345

+17 -14
+11 -8
arch/x86/include/asm/pgalloc.h
··· 6 6 #include <linux/mm.h> /* for struct page */ 7 7 #include <linux/pagemap.h> 8 8 9 + #include <asm/cpufeature.h> 10 + 9 11 #define __HAVE_ARCH_PTE_ALLOC_ONE 10 12 #define __HAVE_ARCH_PGD_FREE 11 13 #include <asm-generic/pgalloc.h> ··· 31 29 static inline void paravirt_release_p4d(unsigned long pfn) {} 32 30 #endif 33 31 34 - #ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION 35 32 /* 36 - * Instead of one PGD, we acquire two PGDs. Being order-1, it is 37 - * both 8k in size and 8k-aligned. That lets us just flip bit 12 38 - * in a pointer to swap between the two 4k halves. 33 + * In case of Page Table Isolation active, we acquire two PGDs instead of one. 34 + * Being order-1, it is both 8k in size and 8k-aligned. That lets us just 35 + * flip bit 12 in a pointer to swap between the two 4k halves. 39 36 */ 40 - #define PGD_ALLOCATION_ORDER 1 41 - #else 42 - #define PGD_ALLOCATION_ORDER 0 43 - #endif 37 + static inline unsigned int pgd_allocation_order(void) 38 + { 39 + if (cpu_feature_enabled(X86_FEATURE_PTI)) 40 + return 1; 41 + return 0; 42 + } 44 43 45 44 /* 46 45 * Allocate and free page tables.
+2 -2
arch/x86/kernel/machine_kexec_32.c
··· 42 42 43 43 static void machine_kexec_free_page_tables(struct kimage *image) 44 44 { 45 - free_pages((unsigned long)image->arch.pgd, PGD_ALLOCATION_ORDER); 45 + free_pages((unsigned long)image->arch.pgd, pgd_allocation_order()); 46 46 image->arch.pgd = NULL; 47 47 #ifdef CONFIG_X86_PAE 48 48 free_page((unsigned long)image->arch.pmd0); ··· 59 59 static int machine_kexec_alloc_page_tables(struct kimage *image) 60 60 { 61 61 image->arch.pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 62 - PGD_ALLOCATION_ORDER); 62 + pgd_allocation_order()); 63 63 #ifdef CONFIG_X86_PAE 64 64 image->arch.pmd0 = (pmd_t *)get_zeroed_page(GFP_KERNEL); 65 65 image->arch.pmd1 = (pmd_t *)get_zeroed_page(GFP_KERNEL);
+2 -2
arch/x86/mm/pgtable.c
··· 360 360 * We allocate one page for pgd. 361 361 */ 362 362 if (!SHARED_KERNEL_PMD) 363 - return __pgd_alloc(mm, PGD_ALLOCATION_ORDER); 363 + return __pgd_alloc(mm, pgd_allocation_order()); 364 364 365 365 /* 366 366 * Now PAE kernel is not running as a Xen domain. We can allocate ··· 380 380 381 381 static inline pgd_t *_pgd_alloc(struct mm_struct *mm) 382 382 { 383 - return __pgd_alloc(mm, PGD_ALLOCATION_ORDER); 383 + return __pgd_alloc(mm, pgd_allocation_order()); 384 384 } 385 385 386 386 static inline void _pgd_free(struct mm_struct *mm, pgd_t *pgd)
+2 -2
arch/x86/platform/efi/efi_64.c
··· 73 73 gfp_t gfp_mask; 74 74 75 75 gfp_mask = GFP_KERNEL | __GFP_ZERO; 76 - efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER); 76 + efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, pgd_allocation_order()); 77 77 if (!efi_pgd) 78 78 goto fail; 79 79 ··· 96 96 if (pgtable_l5_enabled()) 97 97 free_page((unsigned long)pgd_page_vaddr(*pgd)); 98 98 free_pgd: 99 - free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER); 99 + free_pages((unsigned long)efi_pgd, pgd_allocation_order()); 100 100 fail: 101 101 return -ENOMEM; 102 102 }