Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

powerpc/thp: Fix crash on mremap

This patch fix the below crash

NIP [c00000000004cee4] .__hash_page_thp+0x2a4/0x440
LR [c0000000000439ac] .hash_page+0x18c/0x5e0
...
Call Trace:
[c000000736103c40] [00001ffffb000000] 0x1ffffb000000(unreliable)
[437908.479693] [c000000736103d50] [c0000000000439ac] .hash_page+0x18c/0x5e0
[437908.479699] [c000000736103e30] [c00000000000924c] .do_hash_page+0x4c/0x58

On ppc64 we use the pgtable for storing the hpte slot information and
store address to the pgtable at a constant offset (PTRS_PER_PMD) from
pmd. On mremap, when we switch the pmd, we need to withdraw and deposit
the pgtable again, so that we find the pgtable at PTRS_PER_PMD offset
from new pmd.

We also want to move the withdraw and deposit before the set_pmd so
that, when page fault find the pmd as trans huge we can be sure that
pgtable can be located at the offset.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

authored by

Aneesh Kumar K.V and committed by
Benjamin Herrenschmidt
b3084f4d a6da83f9

+31 -9
+14
arch/powerpc/include/asm/pgtable-ppc64.h
··· 558 558 #define __HAVE_ARCH_PMDP_INVALIDATE 559 559 extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, 560 560 pmd_t *pmdp); 561 + 562 + #define pmd_move_must_withdraw pmd_move_must_withdraw 563 + typedef struct spinlock spinlock_t; 564 + static inline int pmd_move_must_withdraw(spinlock_t *new_pmd_ptl, 565 + spinlock_t *old_pmd_ptl) 566 + { 567 + /* 568 + * Archs like ppc64 use pgtable to store per pmd 569 + * specific information. So when we switch the pmd, 570 + * we should also withdraw and deposit the pgtable 571 + */ 572 + return true; 573 + } 574 + 561 575 #endif /* __ASSEMBLY__ */ 562 576 #endif /* _ASM_POWERPC_PGTABLE_PPC64_H_ */
+12
include/asm-generic/pgtable.h
··· 558 558 } 559 559 #endif 560 560 561 + #ifndef pmd_move_must_withdraw 562 + static inline int pmd_move_must_withdraw(spinlock_t *new_pmd_ptl, 563 + spinlock_t *old_pmd_ptl) 564 + { 565 + /* 566 + * With split pmd lock we also need to move preallocated 567 + * PTE page table if new_pmd is on different PMD page table. 568 + */ 569 + return new_pmd_ptl != old_pmd_ptl; 570 + } 571 + #endif 572 + 561 573 /* 562 574 * This function is meant to be used by sites walking pagetables with 563 575 * the mmap_sem hold in read mode to protect against MADV_DONTNEED and
+5 -9
mm/huge_memory.c
··· 1502 1502 spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); 1503 1503 pmd = pmdp_get_and_clear(mm, old_addr, old_pmd); 1504 1504 VM_BUG_ON(!pmd_none(*new_pmd)); 1505 - set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd)); 1506 - if (new_ptl != old_ptl) { 1507 - pgtable_t pgtable; 1508 1505 1509 - /* 1510 - * Move preallocated PTE page table if new_pmd is on 1511 - * different PMD page table. 1512 - */ 1506 + if (pmd_move_must_withdraw(new_ptl, old_ptl)) { 1507 + pgtable_t pgtable; 1513 1508 pgtable = pgtable_trans_huge_withdraw(mm, old_pmd); 1514 1509 pgtable_trans_huge_deposit(mm, new_pmd, pgtable); 1515 - 1516 - spin_unlock(new_ptl); 1517 1510 } 1511 + set_pmd_at(mm, new_addr, new_pmd, pmd_mksoft_dirty(pmd)); 1512 + if (new_ptl != old_ptl) 1513 + spin_unlock(new_ptl); 1518 1514 spin_unlock(old_ptl); 1519 1515 } 1520 1516 out: