Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mm: convert p[te|md]_numa users to p[te|md]_protnone_numa

Convert existing users of pte_numa and friends to the new helper. Note
that the kernel is broken after this patch is applied until the other page
table modifiers are also altered. This patch layout is to make review
easier.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Aneesh Kumar <aneesh.kumar@linux.vnet.ibm.com>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Tested-by: Sasha Levin <sasha.levin@oracle.com>
Cc: Dave Jones <davej@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Kirill Shutemov <kirill.shutemov@linux.intel.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Mel Gorman and committed by
Linus Torvalds
8a0516ed e7bb4b6d

+39 -56
+1 -1
arch/powerpc/kvm/book3s_hv_rm_mmu.c
··· 212 212 /* Look up the Linux PTE for the backing page */ 213 213 pte_size = psize; 214 214 pte = lookup_linux_pte_and_update(pgdir, hva, writing, &pte_size); 215 - if (pte_present(pte) && !pte_numa(pte)) { 215 + if (pte_present(pte) && !pte_protnone(pte)) { 216 216 if (writing && !pte_write(pte)) 217 217 /* make the actual HPTE be read-only */ 218 218 ptel = hpte_make_readonly(ptel);
-5
arch/powerpc/mm/fault.c
··· 398 398 * processors use the same I/D cache coherency mechanism 399 399 * as embedded. 400 400 */ 401 - if (error_code & DSISR_PROTFAULT) 402 - goto bad_area; 403 401 #endif /* CONFIG_PPC_STD_MMU */ 404 402 405 403 /* ··· 421 423 flags |= FAULT_FLAG_WRITE; 422 424 /* a read */ 423 425 } else { 424 - /* protection fault */ 425 - if (error_code & 0x08000000) 426 - goto bad_area; 427 426 if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) 428 427 goto bad_area; 429 428 }
+8 -3
arch/powerpc/mm/pgtable.c
··· 172 172 void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, 173 173 pte_t pte) 174 174 { 175 - #ifdef CONFIG_DEBUG_VM 176 - WARN_ON(pte_val(*ptep) & _PAGE_PRESENT); 177 - #endif 175 + /* 176 + * When handling numa faults, we already have the pte marked 177 + * _PAGE_PRESENT, but we can be sure that it is not in hpte. 178 + * Hence we can use set_pte_at for them. 179 + */ 180 + VM_WARN_ON((pte_val(*ptep) & (_PAGE_PRESENT | _PAGE_USER)) == 181 + (_PAGE_PRESENT | _PAGE_USER)); 182 + 178 183 /* Note: mm->context.id might not yet have been assigned as 179 184 * this context might not have been activated yet when this 180 185 * is called.
+2 -1
arch/powerpc/mm/pgtable_64.c
··· 718 718 pmd_t *pmdp, pmd_t pmd) 719 719 { 720 720 #ifdef CONFIG_DEBUG_VM 721 - WARN_ON(pmd_val(*pmdp) & _PAGE_PRESENT); 721 + WARN_ON((pmd_val(*pmdp) & (_PAGE_PRESENT | _PAGE_USER)) == 722 + (_PAGE_PRESENT | _PAGE_USER)); 722 723 assert_spin_locked(&mm->page_table_lock); 723 724 WARN_ON(!pmd_trans_huge(pmd)); 724 725 #endif
+2 -2
arch/x86/mm/gup.c
··· 84 84 struct page *page; 85 85 86 86 /* Similar to the PMD case, NUMA hinting must take slow path */ 87 - if (pte_numa(pte)) { 87 + if (pte_protnone(pte)) { 88 88 pte_unmap(ptep); 89 89 return 0; 90 90 } ··· 178 178 * slowpath for accounting purposes and so that they 179 179 * can be serialised against THP migration. 180 180 */ 181 - if (pmd_numa(pmd)) 181 + if (pmd_protnone(pmd)) 182 182 return 0; 183 183 if (!gup_huge_pmd(pmd, addr, next, write, pages, nr)) 184 184 return 0;
+1 -1
include/uapi/linux/mempolicy.h
··· 67 67 #define MPOL_F_LOCAL (1 << 1) /* preferred local allocation */ 68 68 #define MPOL_F_REBINDING (1 << 2) /* identify policies in rebinding */ 69 69 #define MPOL_F_MOF (1 << 3) /* this policy wants migrate on fault */ 70 - #define MPOL_F_MORON (1 << 4) /* Migrate On pte_numa Reference On Node */ 70 + #define MPOL_F_MORON (1 << 4) /* Migrate On protnone Reference On Node */ 71 71 72 72 73 73 #endif /* _UAPI_LINUX_MEMPOLICY_H */
+5 -5
mm/gup.c
··· 64 64 migration_entry_wait(mm, pmd, address); 65 65 goto retry; 66 66 } 67 - if ((flags & FOLL_NUMA) && pte_numa(pte)) 67 + if ((flags & FOLL_NUMA) && pte_protnone(pte)) 68 68 goto no_page; 69 69 if ((flags & FOLL_WRITE) && !pte_write(pte)) { 70 70 pte_unmap_unlock(ptep, ptl); ··· 184 184 return page; 185 185 return no_page_table(vma, flags); 186 186 } 187 - if ((flags & FOLL_NUMA) && pmd_numa(*pmd)) 187 + if ((flags & FOLL_NUMA) && pmd_protnone(*pmd)) 188 188 return no_page_table(vma, flags); 189 189 if (pmd_trans_huge(*pmd)) { 190 190 if (flags & FOLL_SPLIT) { ··· 906 906 907 907 /* 908 908 * Similar to the PMD case below, NUMA hinting must take slow 909 - * path 909 + * path using the pte_protnone check. 910 910 */ 911 911 if (!pte_present(pte) || pte_special(pte) || 912 - pte_numa(pte) || (write && !pte_write(pte))) 912 + pte_protnone(pte) || (write && !pte_write(pte))) 913 913 goto pte_unmap; 914 914 915 915 VM_BUG_ON(!pfn_valid(pte_pfn(pte))); ··· 1104 1104 * slowpath for accounting purposes and so that they 1105 1105 * can be serialised against THP migration. 1106 1106 */ 1107 - if (pmd_numa(pmd)) 1107 + if (pmd_protnone(pmd)) 1108 1108 return 0; 1109 1109 1110 1110 if (!gup_huge_pmd(pmd, pmdp, addr, next, write,
+8 -8
mm/huge_memory.c
··· 1211 1211 return ERR_PTR(-EFAULT); 1212 1212 1213 1213 /* Full NUMA hinting faults to serialise migration in fault paths */ 1214 - if ((flags & FOLL_NUMA) && pmd_numa(*pmd)) 1214 + if ((flags & FOLL_NUMA) && pmd_protnone(*pmd)) 1215 1215 goto out; 1216 1216 1217 1217 page = pmd_page(*pmd); ··· 1342 1342 1343 1343 /* 1344 1344 * Migrate the THP to the requested node, returns with page unlocked 1345 - * and pmd_numa cleared. 1345 + * and access rights restored. 1346 1346 */ 1347 1347 spin_unlock(ptl); 1348 1348 migrated = migrate_misplaced_transhuge_page(mm, vma, ··· 1357 1357 BUG_ON(!PageLocked(page)); 1358 1358 pmd = pmd_mknonnuma(pmd); 1359 1359 set_pmd_at(mm, haddr, pmdp, pmd); 1360 - VM_BUG_ON(pmd_numa(*pmdp)); 1360 + VM_BUG_ON(pmd_protnone(*pmdp)); 1361 1361 update_mmu_cache_pmd(vma, addr, pmdp); 1362 1362 unlock_page(page); 1363 1363 out_unlock: ··· 1483 1483 ret = 1; 1484 1484 if (!prot_numa) { 1485 1485 entry = pmdp_get_and_clear_notify(mm, addr, pmd); 1486 - if (pmd_numa(entry)) 1486 + if (pmd_protnone(entry)) 1487 1487 entry = pmd_mknonnuma(entry); 1488 1488 entry = pmd_modify(entry, newprot); 1489 1489 ret = HPAGE_PMD_NR; ··· 1499 1499 * local vs remote hits on the zero page. 1500 1500 */ 1501 1501 if (!is_huge_zero_page(page) && 1502 - !pmd_numa(*pmd)) { 1502 + !pmd_protnone(*pmd)) { 1503 1503 pmdp_set_numa(mm, addr, pmd); 1504 1504 ret = HPAGE_PMD_NR; 1505 1505 } ··· 1767 1767 pte_t *pte, entry; 1768 1768 BUG_ON(PageCompound(page+i)); 1769 1769 /* 1770 - * Note that pmd_numa is not transferred deliberately 1771 - * to avoid any possibility that pte_numa leaks to 1772 - * a PROT_NONE VMA by accident. 1770 + * Note that NUMA hinting access restrictions are not 1771 + * transferred to avoid any possibility of altering 1772 + * permissions across VMAs. 1773 1773 */ 1774 1774 entry = mk_pte(page + i, vma->vm_page_prot); 1775 1775 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+2 -2
mm/memory.c
··· 3124 3124 pte, pmd, flags, entry); 3125 3125 } 3126 3126 3127 - if (pte_numa(entry)) 3127 + if (pte_protnone(entry)) 3128 3128 return do_numa_page(mm, vma, address, entry, pte, pmd); 3129 3129 3130 3130 ptl = pte_lockptr(mm, pmd); ··· 3202 3202 if (pmd_trans_splitting(orig_pmd)) 3203 3203 return 0; 3204 3204 3205 - if (pmd_numa(orig_pmd)) 3205 + if (pmd_protnone(orig_pmd)) 3206 3206 return do_huge_pmd_numa_page(mm, vma, address, 3207 3207 orig_pmd, pmd); 3208 3208
+9 -27
mm/mprotect.c
··· 75 75 oldpte = *pte; 76 76 if (pte_present(oldpte)) { 77 77 pte_t ptent; 78 - bool updated = false; 79 78 80 - if (!prot_numa) { 81 - ptent = ptep_modify_prot_start(mm, addr, pte); 82 - if (pte_numa(ptent)) 83 - ptent = pte_mknonnuma(ptent); 84 - ptent = pte_modify(ptent, newprot); 85 - /* 86 - * Avoid taking write faults for pages we 87 - * know to be dirty. 88 - */ 89 - if (dirty_accountable && pte_dirty(ptent) && 90 - (pte_soft_dirty(ptent) || 91 - !(vma->vm_flags & VM_SOFTDIRTY))) 92 - ptent = pte_mkwrite(ptent); 93 - ptep_modify_prot_commit(mm, addr, pte, ptent); 94 - updated = true; 95 - } else { 96 - struct page *page; 79 + ptent = ptep_modify_prot_start(mm, addr, pte); 80 + ptent = pte_modify(ptent, newprot); 97 81 98 - page = vm_normal_page(vma, addr, oldpte); 99 - if (page && !PageKsm(page)) { 100 - if (!pte_numa(oldpte)) { 101 - ptep_set_numa(mm, addr, pte); 102 - updated = true; 103 - } 104 - } 82 + /* Avoid taking write faults for known dirty pages */ 83 + if (dirty_accountable && pte_dirty(ptent) && 84 + (pte_soft_dirty(ptent) || 85 + !(vma->vm_flags & VM_SOFTDIRTY))) { 86 + ptent = pte_mkwrite(ptent); 105 87 } 106 - if (updated) 107 - pages++; 88 + ptep_modify_prot_commit(mm, addr, pte, ptent); 89 + pages++; 108 90 } else if (IS_ENABLED(CONFIG_MIGRATION)) { 109 91 swp_entry_t entry = pte_to_swp_entry(oldpte); 110 92
+1 -1
mm/pgtable-generic.c
··· 193 193 pmd_t *pmdp) 194 194 { 195 195 pmd_t entry = *pmdp; 196 - if (pmd_numa(entry)) 196 + if (pmd_protnone(entry)) 197 197 entry = pmd_mknonnuma(entry); 198 198 set_pmd_at(vma->vm_mm, address, pmdp, pmd_mknotpresent(entry)); 199 199 flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);