x86/cpu_entry_area: Sync cpu_entry_area to initial_page_table

The separation of the cpu_entry_area from the fixmap missed the fact that
on 32bit non-PAE kernels the cpu_entry_area mapping might not be covered in
initial_page_table by the previous synchronizations.

This results in suspend/resume failures because 32bit utilizes initial page
table for resume. The absence of the cpu_entry_area mapping results in a
triple fault, aka. insta reboot.

With PAE enabled this works by chance because the PGD entry which covers
the fixmap and other parts incindentally provides the cpu_entry_area
mapping as well.

Synchronize the initial page table after setting up the cpu entry
area. Instead of adding yet another copy of the same code, move it to a
function and invoke it from the various places.

It needs to be investigated if the existing calls in setup_arch() and
setup_per_cpu_areas() can be replaced by the later invocation from
setup_cpu_entry_areas(), but that's beyond the scope of this fix.

Fixes: 92a0f81d8957 ("x86/cpu_entry_area: Move it out of the fixmap")
Reported-by: Woody Suwalski <terraluna977@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Woody Suwalski <terraluna977@gmail.com>
Cc: William Grant <william.grant@canonical.com>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1802282137290.1392@nanos.tec.linutronix.de

Changed files
+32 -25
arch
+1
arch/x86/include/asm/pgtable_32.h
··· 32 32 static inline void pgtable_cache_init(void) { } 33 33 static inline void check_pgt_cache(void) { } 34 34 void paging_init(void); 35 + void sync_initial_page_table(void); 35 36 36 37 /* 37 38 * Define this if things work differently on an i386 and an i486:
+1
arch/x86/include/asm/pgtable_64.h
··· 28 28 #define swapper_pg_dir init_top_pgt 29 29 30 30 extern void paging_init(void); 31 + static inline void sync_initial_page_table(void) { } 31 32 32 33 #define pte_ERROR(e) \ 33 34 pr_err("%s:%d: bad pte %p(%016lx)\n", \
+5 -12
arch/x86/kernel/setup.c
··· 1204 1204 1205 1205 kasan_init(); 1206 1206 1207 - #ifdef CONFIG_X86_32 1208 - /* sync back kernel address range */ 1209 - clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY, 1210 - swapper_pg_dir + KERNEL_PGD_BOUNDARY, 1211 - KERNEL_PGD_PTRS); 1212 - 1213 1207 /* 1214 - * sync back low identity map too. It is used for example 1215 - * in the 32-bit EFI stub. 1208 + * Sync back kernel address range. 1209 + * 1210 + * FIXME: Can the later sync in setup_cpu_entry_areas() replace 1211 + * this call? 1216 1212 */ 1217 - clone_pgd_range(initial_page_table, 1218 - swapper_pg_dir + KERNEL_PGD_BOUNDARY, 1219 - min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); 1220 - #endif 1213 + sync_initial_page_table(); 1221 1214 1222 1215 tboot_probe(); 1223 1216
+4 -13
arch/x86/kernel/setup_percpu.c
··· 287 287 /* Setup cpu initialized, callin, callout masks */ 288 288 setup_cpu_local_masks(); 289 289 290 - #ifdef CONFIG_X86_32 291 290 /* 292 291 * Sync back kernel address range again. We already did this in 293 292 * setup_arch(), but percpu data also needs to be available in 294 293 * the smpboot asm. We can't reliably pick up percpu mappings 295 294 * using vmalloc_fault(), because exception dispatch needs 296 295 * percpu data. 296 + * 297 + * FIXME: Can the later sync in setup_cpu_entry_areas() replace 298 + * this call? 297 299 */ 298 - clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY, 299 - swapper_pg_dir + KERNEL_PGD_BOUNDARY, 300 - KERNEL_PGD_PTRS); 301 - 302 - /* 303 - * sync back low identity map too. It is used for example 304 - * in the 32-bit EFI stub. 305 - */ 306 - clone_pgd_range(initial_page_table, 307 - swapper_pg_dir + KERNEL_PGD_BOUNDARY, 308 - min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); 309 - #endif 300 + sync_initial_page_table(); 310 301 }
+6
arch/x86/mm/cpu_entry_area.c
··· 163 163 164 164 for_each_possible_cpu(cpu) 165 165 setup_cpu_entry_area(cpu); 166 + 167 + /* 168 + * This is the last essential update to swapper_pgdir which needs 169 + * to be synchronized to initial_page_table on 32bit. 170 + */ 171 + sync_initial_page_table(); 166 172 }
+15
arch/x86/mm/init_32.c
··· 453 453 } 454 454 #endif /* CONFIG_HIGHMEM */ 455 455 456 + void __init sync_initial_page_table(void) 457 + { 458 + clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY, 459 + swapper_pg_dir + KERNEL_PGD_BOUNDARY, 460 + KERNEL_PGD_PTRS); 461 + 462 + /* 463 + * sync back low identity map too. It is used for example 464 + * in the 32-bit EFI stub. 465 + */ 466 + clone_pgd_range(initial_page_table, 467 + swapper_pg_dir + KERNEL_PGD_BOUNDARY, 468 + min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); 469 + } 470 + 456 471 void __init native_pagetable_init(void) 457 472 { 458 473 unsigned long pfn, va;