Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

s390/cmma: rework no-dat handling

Rework the way physical pages are set no-dat / dat:

The old way is:

- Rely on that all pages are initially marked "dat"
- Allocate page tables for the kernel mapping
- Enable dat
- Walk the whole kernel mapping and set PG_arch_1 bit in all struct pages
that belong to pages of kernel page tables
- Walk all struct pages and test and clear the PG_arch_1 bit. If the bit is
not set, set the page state to no-dat
- For all subsequent page table allocations, set the page state to dat
(remove the no-dat state) on allocation time

Change this rather complex logic to a simpler approach:

- Set the whole physical memory (all pages) to "no-dat"
- Explicitly set those page table pages to "dat" which are part of the
kernel image (e.g. swapper_pg_dir)
- For all subsequent page table allocations, set the page state to dat
(remove the no-dat state) on allocation time

In result the code is simpler, and this also allows to get rid of one
odd usage of the PG_arch_1 bit.

Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>

authored by

Heiko Carstens and committed by
Vasily Gorbik
a51324c4 65d37f16

+21 -131
+17
arch/s390/boot/vmem.c
··· 2 2 #include <linux/sched/task.h> 3 3 #include <linux/pgtable.h> 4 4 #include <linux/kasan.h> 5 + #include <asm/page-states.h> 5 6 #include <asm/pgalloc.h> 6 7 #include <asm/facility.h> 7 8 #include <asm/sections.h> ··· 71 70 crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z)); 72 71 crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z)); 73 72 memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE); 73 + __arch_set_page_dat(kasan_early_shadow_p4d, 1UL << CRST_ALLOC_ORDER); 74 + __arch_set_page_dat(kasan_early_shadow_pud, 1UL << CRST_ALLOC_ORDER); 75 + __arch_set_page_dat(kasan_early_shadow_pmd, 1UL << CRST_ALLOC_ORDER); 76 + __arch_set_page_dat(kasan_early_shadow_pte, 1); 74 77 75 78 /* 76 79 * Current memory layout: ··· 228 223 229 224 table = (unsigned long *)physmem_alloc_top_down(RR_VMEM, size, size); 230 225 crst_table_init(table, val); 226 + __arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER); 231 227 return table; 232 228 } 233 229 ··· 244 238 if (!pte_leftover) { 245 239 pte_leftover = (void *)physmem_alloc_top_down(RR_VMEM, PAGE_SIZE, PAGE_SIZE); 246 240 pte = pte_leftover + _PAGE_TABLE_SIZE; 241 + __arch_set_page_dat(pte, 1); 247 242 } else { 248 243 pte = pte_leftover; 249 244 pte_leftover = NULL; ··· 425 418 unsigned long asce_bits; 426 419 int i; 427 420 421 + /* 422 + * Mark whole memory as no-dat. This must be done before any 423 + * page tables are allocated, or kernel image builtin pages 424 + * are marked as dat tables. 425 + */ 426 + for_each_physmem_online_range(i, &start, &end) 427 + __arch_set_page_nodat((void *)start, (end - start) >> PAGE_SHIFT); 428 + 428 429 if (asce_limit == _REGION1_SIZE) { 429 430 asce_type = _REGION2_ENTRY_EMPTY; 430 431 asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH; ··· 444 429 445 430 crst_table_init((unsigned long *)swapper_pg_dir, asce_type); 446 431 crst_table_init((unsigned long *)invalid_pg_dir, _REGION3_ENTRY_EMPTY); 432 + __arch_set_page_dat((void *)swapper_pg_dir, 1UL << CRST_ALLOC_ORDER); 433 + __arch_set_page_dat((void *)invalid_pg_dir, 1UL << CRST_ALLOC_ORDER); 447 434 448 435 /* 449 436 * To allow prefixing the lowcore must be mapped with 4KB pages.
-2
arch/s390/include/asm/setup.h
··· 125 125 126 126 void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault); 127 127 128 - void cmma_init_nodat(void); 129 - 130 128 extern void (*_machine_restart)(char *command); 131 129 extern void (*_machine_halt)(void); 132 130 extern void (*_machine_power_off)(void);
-2
arch/s390/mm/init.c
··· 168 168 /* this will put all low memory onto the freelists */ 169 169 memblock_free_all(); 170 170 setup_zero_pages(); /* Setup zeroed pages. */ 171 - 172 - cmma_init_nodat(); 173 171 } 174 172 175 173 void free_initmem(void)
+2 -125
arch/s390/mm/page-states.c
··· 7 7 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 8 8 */ 9 9 10 - #include <linux/kernel.h> 11 - #include <linux/errno.h> 12 - #include <linux/types.h> 13 10 #include <linux/mm.h> 14 - #include <linux/memblock.h> 15 - #include <linux/gfp.h> 16 - #include <linux/init.h> 17 - #include <asm/asm-extable.h> 18 - #include <asm/facility.h> 19 11 #include <asm/page-states.h> 12 + #include <asm/sections.h> 13 + #include <asm/page.h> 20 14 21 15 int __bootdata_preserved(cmma_flag); 22 - 23 - static void mark_kernel_pmd(pud_t *pud, unsigned long addr, unsigned long end) 24 - { 25 - unsigned long next; 26 - struct page *page; 27 - pmd_t *pmd; 28 - 29 - pmd = pmd_offset(pud, addr); 30 - do { 31 - next = pmd_addr_end(addr, end); 32 - if (pmd_none(*pmd) || pmd_large(*pmd)) 33 - continue; 34 - page = phys_to_page(pmd_val(*pmd)); 35 - set_bit(PG_arch_1, &page->flags); 36 - } while (pmd++, addr = next, addr != end); 37 - } 38 - 39 - static void mark_kernel_pud(p4d_t *p4d, unsigned long addr, unsigned long end) 40 - { 41 - unsigned long next; 42 - struct page *page; 43 - pud_t *pud; 44 - int i; 45 - 46 - pud = pud_offset(p4d, addr); 47 - do { 48 - next = pud_addr_end(addr, end); 49 - if (pud_none(*pud) || pud_large(*pud)) 50 - continue; 51 - if (!pud_folded(*pud)) { 52 - page = phys_to_page(pud_val(*pud)); 53 - for (i = 0; i < 4; i++) 54 - set_bit(PG_arch_1, &page[i].flags); 55 - } 56 - mark_kernel_pmd(pud, addr, next); 57 - } while (pud++, addr = next, addr != end); 58 - } 59 - 60 - static void mark_kernel_p4d(pgd_t *pgd, unsigned long addr, unsigned long end) 61 - { 62 - unsigned long next; 63 - struct page *page; 64 - p4d_t *p4d; 65 - int i; 66 - 67 - p4d = p4d_offset(pgd, addr); 68 - do { 69 - next = p4d_addr_end(addr, end); 70 - if (p4d_none(*p4d)) 71 - continue; 72 - if (!p4d_folded(*p4d)) { 73 - page = phys_to_page(p4d_val(*p4d)); 74 - for (i = 0; i < 4; i++) 75 - set_bit(PG_arch_1, &page[i].flags); 76 - } 77 - mark_kernel_pud(p4d, addr, next); 78 - } while (p4d++, addr = next, addr != end); 79 - } 80 - 81 - static void mark_kernel_pgd(void) 82 - { 83 - unsigned long addr, next, max_addr; 84 - struct page *page; 85 - pgd_t *pgd; 86 - int i; 87 - 88 - addr = 0; 89 - /* 90 - * Figure out maximum virtual address accessible with the 91 - * kernel ASCE. This is required to keep the page table walker 92 - * from accessing non-existent entries. 93 - */ 94 - max_addr = (S390_lowcore.kernel_asce.val & _ASCE_TYPE_MASK) >> 2; 95 - max_addr = 1UL << (max_addr * 11 + 31); 96 - pgd = pgd_offset_k(addr); 97 - do { 98 - next = pgd_addr_end(addr, max_addr); 99 - if (pgd_none(*pgd)) 100 - continue; 101 - if (!pgd_folded(*pgd)) { 102 - page = phys_to_page(pgd_val(*pgd)); 103 - for (i = 0; i < 4; i++) 104 - set_bit(PG_arch_1, &page[i].flags); 105 - } 106 - mark_kernel_p4d(pgd, addr, next); 107 - } while (pgd++, addr = next, addr != max_addr); 108 - } 109 - 110 - void __init cmma_init_nodat(void) 111 - { 112 - struct page *page; 113 - unsigned long start, end, ix; 114 - int i; 115 - 116 - if (cmma_flag < 2) 117 - return; 118 - /* Mark pages used in kernel page tables */ 119 - mark_kernel_pgd(); 120 - page = virt_to_page(&swapper_pg_dir); 121 - for (i = 0; i < 4; i++) 122 - set_bit(PG_arch_1, &page[i].flags); 123 - page = virt_to_page(&invalid_pg_dir); 124 - for (i = 0; i < 4; i++) 125 - set_bit(PG_arch_1, &page[i].flags); 126 - 127 - /* Set all kernel pages not used for page tables to stable/no-dat */ 128 - for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) { 129 - page = pfn_to_page(start); 130 - for (ix = start; ix < end; ix++, page++) { 131 - if (__test_and_clear_bit(PG_arch_1, &page->flags)) 132 - continue; /* skip page table pages */ 133 - if (!list_empty(&page->lru)) 134 - continue; /* skip free pages */ 135 - __set_page_stable_nodat(page_to_virt(page), 1); 136 - } 137 - } 138 - } 139 16 140 17 void arch_free_page(struct page *page, int order) 141 18 {
+2 -2
arch/s390/mm/vmem.c
··· 50 50 if (!table) 51 51 return NULL; 52 52 crst_table_init(table, val); 53 - if (slab_is_available()) 54 - __arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER); 53 + __arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER); 55 54 return table; 56 55 } 57 56 ··· 66 67 if (!pte) 67 68 return NULL; 68 69 memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE); 70 + __arch_set_page_dat(pte, 1); 69 71 return pte; 70 72 } 71 73