Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mm: change huge_ptep_clear_flush() to return the original pte

Patch series "Fix CONT-PTE/PMD size hugetlb issue when unmapping or migrating", v4.

presently, migrating a hugetlb page or unmapping a poisoned hugetlb page,
we'll use ptep_clear_flush() and set_pte_at() to nuke the page table entry
and remap it, and this is incorrect for CONT-PTE or CONT-PMD size hugetlb
page, which will cause potential data consistent issue. This patch set
will change to use hugetlb related APIs to fix this issue.

Note: Mike pointed out the huge_ptep_get() will only return the one
specific value, and it would not take into account the dirty or young bits
of CONT-PTE/PMDs like the huge_ptep_get_and_clear() [1]. This
inconsistent issue is not introduced by this patch set, and this issue
will be addressed in another thread [2]. Meanwhile the uffd for hugetlb
case [3] pointed out by Gerald also needs another patch to address.

[1] https://lore.kernel.org/linux-mm/85bd80b4-b4fd-0d3f-a2e5-149559f2f387@oracle.com/
[2] https://lore.kernel.org/all/cover.1651998586.git.baolin.wang@linux.alibaba.com/
[3] https://lore.kernel.org/linux-mm/20220503120343.6264e126@thinkpad/


This patch (of 3):

It is incorrect to use ptep_clear_flush() to nuke a hugetlb page table
when unmapping or migrating a hugetlb page, and will change to use
huge_ptep_clear_flush() instead in the following patches.

So this is a preparation patch, which changes the huge_ptep_clear_flush()
to return the original pte to help to nuke a hugetlb page table.

[baolin.wang@linux.alibaba.com: fix build in several more architectures]
Link: https://lkml.kernel.org/r/0009a4cd-2826-e8be-e671-f050d4f18d5d@linux.alibaba.com
[sfr@canb.auug.org.au: fixup]
Link: https://lkml.kernel.org/r/20220511181531.7f27a5c1@canb.auug.org.au
Link: https://lkml.kernel.org/r/cover.1652270205.git.baolin.wang@linux.alibaba.com
Link: https://lkml.kernel.org/r/20f77ddab90baa249bd24504c413189b82acde69.1652270205.git.baolin.wang@linux.alibaba.com
Link: https://lkml.kernel.org/r/cover.1652147571.git.baolin.wang@linux.alibaba.com
Link: https://lkml.kernel.org/r/dcf065868cce35bceaf138613ad27f17bb7c0c19.1652147571.git.baolin.wang@linux.alibaba.com
Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Acked-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Muchun Song <songmuchun@bytedance.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Yoshinori Sato <ysato@users.osdn.me>
Cc: Rich Felker <dalias@libc.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Baolin Wang and committed by
Andrew Morton
ae075629 110bf7a5

+36 -28
+2 -2
arch/arm64/include/asm/hugetlb.h
··· 39 39 extern void huge_ptep_set_wrprotect(struct mm_struct *mm, 40 40 unsigned long addr, pte_t *ptep); 41 41 #define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH 42 - extern void huge_ptep_clear_flush(struct vm_area_struct *vma, 43 - unsigned long addr, pte_t *ptep); 42 + extern pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, 43 + unsigned long addr, pte_t *ptep); 44 44 #define __HAVE_ARCH_HUGE_PTE_CLEAR 45 45 extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr, 46 46 pte_t *ptep, unsigned long sz);
+5 -7
arch/arm64/mm/hugetlbpage.c
··· 486 486 set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot)); 487 487 } 488 488 489 - void huge_ptep_clear_flush(struct vm_area_struct *vma, 490 - unsigned long addr, pte_t *ptep) 489 + pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, 490 + unsigned long addr, pte_t *ptep) 491 491 { 492 492 size_t pgsize; 493 493 int ncontig; 494 494 495 - if (!pte_cont(READ_ONCE(*ptep))) { 496 - ptep_clear_flush(vma, addr, ptep); 497 - return; 498 - } 495 + if (!pte_cont(READ_ONCE(*ptep))) 496 + return ptep_clear_flush(vma, addr, ptep); 499 497 500 498 ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize); 501 - clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig); 499 + return get_clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig); 502 500 } 503 501 504 502 static int __init hugetlbpage_init(void)
+3 -2
arch/ia64/include/asm/hugetlb.h
··· 23 23 #define is_hugepage_only_range is_hugepage_only_range 24 24 25 25 #define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH 26 - static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, 27 - unsigned long addr, pte_t *ptep) 26 + static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, 27 + unsigned long addr, pte_t *ptep) 28 28 { 29 + return *ptep; 29 30 } 30 31 31 32 #include <asm-generic/hugetlb.h>
+6 -3
arch/mips/include/asm/hugetlb.h
··· 43 43 } 44 44 45 45 #define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH 46 - static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, 47 - unsigned long addr, pte_t *ptep) 46 + static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, 47 + unsigned long addr, pte_t *ptep) 48 48 { 49 + pte_t pte; 50 + 49 51 /* 50 52 * clear the huge pte entry firstly, so that the other smp threads will 51 53 * not get old pte entry after finishing flush_tlb_page and before 52 54 * setting new huge pte entry 53 55 */ 54 - huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); 56 + pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); 55 57 flush_tlb_page(vma, addr); 58 + return pte; 56 59 } 57 60 58 61 #define __HAVE_ARCH_HUGE_PTE_NONE
+3 -2
arch/parisc/include/asm/hugetlb.h
··· 28 28 } 29 29 30 30 #define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH 31 - static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, 32 - unsigned long addr, pte_t *ptep) 31 + static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, 32 + unsigned long addr, pte_t *ptep) 33 33 { 34 + return *ptep; 34 35 } 35 36 36 37 #define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
+6 -3
arch/powerpc/include/asm/hugetlb.h
··· 43 43 } 44 44 45 45 #define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH 46 - static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, 47 - unsigned long addr, pte_t *ptep) 46 + static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, 47 + unsigned long addr, pte_t *ptep) 48 48 { 49 - huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); 49 + pte_t pte; 50 + 51 + pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); 50 52 flush_hugetlb_page(vma, addr); 53 + return pte; 51 54 } 52 55 53 56 #define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS
+3 -3
arch/s390/include/asm/hugetlb.h
··· 50 50 set_pte(ptep, __pte(_SEGMENT_ENTRY_EMPTY)); 51 51 } 52 52 53 - static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, 54 - unsigned long address, pte_t *ptep) 53 + static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, 54 + unsigned long address, pte_t *ptep) 55 55 { 56 - huge_ptep_get_and_clear(vma->vm_mm, address, ptep); 56 + return huge_ptep_get_and_clear(vma->vm_mm, address, ptep); 57 57 } 58 58 59 59 static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+3 -2
arch/sh/include/asm/hugetlb.h
··· 21 21 } 22 22 23 23 #define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH 24 - static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, 25 - unsigned long addr, pte_t *ptep) 24 + static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, 25 + unsigned long addr, pte_t *ptep) 26 26 { 27 + return *ptep; 27 28 } 28 29 29 30 static inline void arch_clear_hugepage_flags(struct page *page)
+3 -2
arch/sparc/include/asm/hugetlb.h
··· 21 21 pte_t *ptep); 22 22 23 23 #define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH 24 - static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, 25 - unsigned long addr, pte_t *ptep) 24 + static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, 25 + unsigned long addr, pte_t *ptep) 26 26 { 27 + return *ptep; 27 28 } 28 29 29 30 #define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
+2 -2
include/asm-generic/hugetlb.h
··· 84 84 #endif 85 85 86 86 #ifndef __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH 87 - static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, 87 + static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, 88 88 unsigned long addr, pte_t *ptep) 89 89 { 90 - ptep_clear_flush(vma, addr, ptep); 90 + return ptep_clear_flush(vma, addr, ptep); 91 91 } 92 92 #endif 93 93