x86/mm: Prepare sme_encrypt_kernel() for PAGE aligned encryption

In preparation for encrypting more than just the kernel, the encryption
support in sme_encrypt_kernel() needs to support 4KB page aligned
encryption instead of just 2MB large page aligned encryption.

Update the routines that populate the PGD to support non-2MB aligned
addresses. This is done by creating PTE page tables for the start
and end portion of the address range that fall outside of the 2MB
alignment. This results in, at most, two extra pages to hold the
PTE entries for each mapping of a range.

Tested-by: Gabriel Craciunescu <nix.or.die@gmail.com>
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Reviewed-by: Borislav Petkov <bp@suse.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brijesh Singh <brijesh.singh@amd.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20180110192626.6026.75387.stgit@tlendack-t1.amdoffice.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>

authored by Tom Lendacky and committed by Ingo Molnar cc5f01e2 2b5d00b6

+121 -22
+107 -16
arch/x86/mm/mem_encrypt.c
··· 469 469 pgd_t *pgd; 470 470 471 471 pmdval_t pmd_flags; 472 + pteval_t pte_flags; 472 473 unsigned long paddr; 473 474 474 475 unsigned long vaddr; ··· 494 493 #define PGD_FLAGS _KERNPG_TABLE_NOENC 495 494 #define P4D_FLAGS _KERNPG_TABLE_NOENC 496 495 #define PUD_FLAGS _KERNPG_TABLE_NOENC 496 + #define PMD_FLAGS _KERNPG_TABLE_NOENC 497 497 498 498 #define PMD_FLAGS_LARGE (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL) 499 499 ··· 504 502 505 503 #define PMD_FLAGS_ENC (PMD_FLAGS_LARGE | _PAGE_ENC) 506 504 507 - static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) 505 + #define PTE_FLAGS (__PAGE_KERNEL_EXEC & ~_PAGE_GLOBAL) 506 + 507 + #define PTE_FLAGS_DEC PTE_FLAGS 508 + #define PTE_FLAGS_DEC_WP ((PTE_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \ 509 + (_PAGE_PAT | _PAGE_PWT)) 510 + 511 + #define PTE_FLAGS_ENC (PTE_FLAGS | _PAGE_ENC) 512 + 513 + static pmd_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd) 508 514 { 509 515 pgd_t *pgd_p; 510 516 p4d_t *p4d_p; ··· 563 553 pud_p += pud_index(ppd->vaddr); 564 554 if (native_pud_val(*pud_p)) { 565 555 if (native_pud_val(*pud_p) & _PAGE_PSE) 566 - return; 556 + return NULL; 567 557 568 558 pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK); 569 559 } else { ··· 577 567 native_set_pud(pud_p, pud); 578 568 } 579 569 570 + return pmd_p; 571 + } 572 + 573 + static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) 574 + { 575 + pmd_t *pmd_p; 576 + 577 + pmd_p = sme_prepare_pgd(ppd); 578 + if (!pmd_p) 579 + return; 580 + 580 581 pmd_p += pmd_index(ppd->vaddr); 581 582 if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE)) 582 583 native_set_pmd(pmd_p, native_make_pmd(ppd->paddr | ppd->pmd_flags)); 583 584 } 584 585 585 - static void __init __sme_map_range(struct sme_populate_pgd_data *ppd, 586 - pmdval_t pmd_flags) 586 + static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd) 587 587 { 588 - ppd->pmd_flags = pmd_flags; 588 + pmd_t *pmd_p; 589 + pte_t *pte_p; 589 590 591 + pmd_p = sme_prepare_pgd(ppd); 592 + if (!pmd_p) 593 + return; 594 + 595 + pmd_p += pmd_index(ppd->vaddr); 596 + if (native_pmd_val(*pmd_p)) { 597 + if (native_pmd_val(*pmd_p) & _PAGE_PSE) 598 + return; 599 + 600 + pte_p = (pte_t *)(native_pmd_val(*pmd_p) & ~PTE_FLAGS_MASK); 601 + } else { 602 + pmd_t pmd; 603 + 604 + pte_p = ppd->pgtable_area; 605 + memset(pte_p, 0, sizeof(*pte_p) * PTRS_PER_PTE); 606 + ppd->pgtable_area += sizeof(*pte_p) * PTRS_PER_PTE; 607 + 608 + pmd = native_make_pmd((pteval_t)pte_p + PMD_FLAGS); 609 + native_set_pmd(pmd_p, pmd); 610 + } 611 + 612 + pte_p += pte_index(ppd->vaddr); 613 + if (!native_pte_val(*pte_p)) 614 + native_set_pte(pte_p, native_make_pte(ppd->paddr | ppd->pte_flags)); 615 + } 616 + 617 + static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd) 618 + { 590 619 while (ppd->vaddr < ppd->vaddr_end) { 591 620 sme_populate_pgd_large(ppd); 592 621 ··· 634 585 } 635 586 } 636 587 588 + static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd) 589 + { 590 + while (ppd->vaddr < ppd->vaddr_end) { 591 + sme_populate_pgd(ppd); 592 + 593 + ppd->vaddr += PAGE_SIZE; 594 + ppd->paddr += PAGE_SIZE; 595 + } 596 + } 597 + 598 + static void __init __sme_map_range(struct sme_populate_pgd_data *ppd, 599 + pmdval_t pmd_flags, pteval_t pte_flags) 600 + { 601 + unsigned long vaddr_end; 602 + 603 + ppd->pmd_flags = pmd_flags; 604 + ppd->pte_flags = pte_flags; 605 + 606 + /* Save original end value since we modify the struct value */ 607 + vaddr_end = ppd->vaddr_end; 608 + 609 + /* If start is not 2MB aligned, create PTE entries */ 610 + ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_PAGE_SIZE); 611 + __sme_map_range_pte(ppd); 612 + 613 + /* Create PMD entries */ 614 + ppd->vaddr_end = vaddr_end & PMD_PAGE_MASK; 615 + __sme_map_range_pmd(ppd); 616 + 617 + /* If end is not 2MB aligned, create PTE entries */ 618 + ppd->vaddr_end = vaddr_end; 619 + __sme_map_range_pte(ppd); 620 + } 621 + 637 622 static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd) 638 623 { 639 - __sme_map_range(ppd, PMD_FLAGS_ENC); 624 + __sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC); 640 625 } 641 626 642 627 static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd) 643 628 { 644 - __sme_map_range(ppd, PMD_FLAGS_DEC); 629 + __sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC); 645 630 } 646 631 647 632 static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd) 648 633 { 649 - __sme_map_range(ppd, PMD_FLAGS_DEC_WP); 634 + __sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP); 650 635 } 651 636 652 637 static unsigned long __init sme_pgtable_calc(unsigned long len) 653 638 { 654 - unsigned long p4d_size, pud_size, pmd_size; 639 + unsigned long p4d_size, pud_size, pmd_size, pte_size; 655 640 unsigned long total; 656 641 657 642 /* 658 643 * Perform a relatively simplistic calculation of the pagetable 659 - * entries that are needed. That mappings will be covered by 2MB 660 - * PMD entries so we can conservatively calculate the required 644 + * entries that are needed. Those mappings will be covered mostly 645 + * by 2MB PMD entries so we can conservatively calculate the required 661 646 * number of P4D, PUD and PMD structures needed to perform the 662 - * mappings. Incrementing the count for each covers the case where 663 - * the addresses cross entries. 647 + * mappings. For mappings that are not 2MB aligned, PTE mappings 648 + * would be needed for the start and end portion of the address range 649 + * that fall outside of the 2MB alignment. This results in, at most, 650 + * two extra pages to hold PTE entries for each range that is mapped. 651 + * Incrementing the count for each covers the case where the addresses 652 + * cross entries. 664 653 */ 665 654 if (IS_ENABLED(CONFIG_X86_5LEVEL)) { 666 655 p4d_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1; ··· 712 625 } 713 626 pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1; 714 627 pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD; 628 + pte_size = 2 * sizeof(pte_t) * PTRS_PER_PTE; 715 629 716 - total = p4d_size + pud_size + pmd_size; 630 + total = p4d_size + pud_size + pmd_size + pte_size; 717 631 718 632 /* 719 633 * Now calculate the added pagetable structures needed to populate ··· 797 709 798 710 /* 799 711 * The total workarea includes the executable encryption area and 800 - * the pagetable area. 712 + * the pagetable area. The start of the workarea is already 2MB 713 + * aligned, align the end of the workarea on a 2MB boundary so that 714 + * we don't try to create/allocate PTE entries from the workarea 715 + * before it is mapped. 801 716 */ 802 717 workarea_len = execute_len + pgtable_area_len; 803 - workarea_end = workarea_start + workarea_len; 718 + workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE); 804 719 805 720 /* 806 721 * Set the address to the start of where newly created pagetable
+14 -6
arch/x86/mm/mem_encrypt_boot.S
··· 104 104 mov %rdx, %cr4 105 105 106 106 push %r15 107 + push %r12 107 108 108 109 movq %rcx, %r9 /* Save kernel length */ 109 110 movq %rdi, %r10 /* Save encrypted kernel address */ ··· 120 119 121 120 wbinvd /* Invalidate any cache entries */ 122 121 123 - /* Copy/encrypt 2MB at a time */ 122 + /* Copy/encrypt up to 2MB at a time */ 123 + movq $PMD_PAGE_SIZE, %r12 124 124 1: 125 + cmpq %r12, %r9 126 + jnb 2f 127 + movq %r9, %r12 128 + 129 + 2: 125 130 movq %r11, %rsi /* Source - decrypted kernel */ 126 131 movq %r8, %rdi /* Dest - intermediate copy buffer */ 127 - movq $PMD_PAGE_SIZE, %rcx /* 2MB length */ 132 + movq %r12, %rcx 128 133 rep movsb 129 134 130 135 movq %r8, %rsi /* Source - intermediate copy buffer */ 131 136 movq %r10, %rdi /* Dest - encrypted kernel */ 132 - movq $PMD_PAGE_SIZE, %rcx /* 2MB length */ 137 + movq %r12, %rcx 133 138 rep movsb 134 139 135 - addq $PMD_PAGE_SIZE, %r11 136 - addq $PMD_PAGE_SIZE, %r10 137 - subq $PMD_PAGE_SIZE, %r9 /* Kernel length decrement */ 140 + addq %r12, %r11 141 + addq %r12, %r10 142 + subq %r12, %r9 /* Kernel length decrement */ 138 143 jnz 1b /* Kernel length not zero? */ 139 144 140 145 /* Restore PAT register */ ··· 149 142 mov %r15, %rdx /* Restore original PAT value */ 150 143 wrmsr 151 144 145 + pop %r12 152 146 pop %r15 153 147 154 148 ret