Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[PATCH] mm: ptd_alloc inline and out

It seems odd to me that, whereas pud_alloc and pmd_alloc test inline, only
calling out-of-line __pud_alloc __pmd_alloc if allocation needed,
pte_alloc_map and pte_alloc_kernel are entirely out-of-line. Though it does
add a little to kernel size, change them to macros testing inline, calling
__pte_alloc or __pte_alloc_kernel to allocate out-of-line. Mark none of them
as fastcalls, leave that to CONFIG_REGPARM or not.

It also seems more natural for the out-of-line functions to leave the offset
calculation and map to the inline, which has to do it anyway for the common
case. At least mremap move wants __pte_alloc without _map.

Macros rather than inline functions, certainly to avoid the header file issues
which arise from CONFIG_HIGHPTE needing kmap_types.h, but also in case any
architectures I haven't built would have other such problems.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by

Hugh Dickins and committed by
Linus Torvalds
1bb3630e 872fec16

+61 -88
+3 -8
include/asm-generic/4level-fixup.h
··· 10 10 11 11 #define pud_t pgd_t 12 12 13 - #define pmd_alloc(mm, pud, address) \ 14 - ({ pmd_t *ret; \ 15 - if (pgd_none(*pud)) \ 16 - ret = __pmd_alloc(mm, pud, address); \ 17 - else \ 18 - ret = pmd_offset(pud, address); \ 19 - ret; \ 20 - }) 13 + #define pmd_alloc(mm, pud, address) \ 14 + ((unlikely(pgd_none(*(pud))) && __pmd_alloc(mm, pud, address))? \ 15 + NULL: pmd_offset(pud, address)) 21 16 22 17 #define pud_alloc(mm, pgd, address) (pgd) 23 18 #define pud_offset(pgd, start) (pgd)
+19 -19
include/linux/mm.h
··· 704 704 } 705 705 706 706 extern int vmtruncate(struct inode * inode, loff_t offset); 707 - extern pud_t *FASTCALL(__pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)); 708 - extern pmd_t *FASTCALL(__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)); 709 - extern pte_t *FASTCALL(pte_alloc_kernel(pmd_t *pmd, unsigned long address)); 710 - extern pte_t *FASTCALL(pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address)); 711 707 extern int install_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot); 712 708 extern int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long pgoff, pgprot_t prot); 713 709 extern int __handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access); ··· 756 760 extern struct shrinker *set_shrinker(int, shrinker_t); 757 761 extern void remove_shrinker(struct shrinker *shrinker); 758 762 759 - /* 760 - * On a two-level or three-level page table, this ends up being trivial. Thus 761 - * the inlining and the symmetry break with pte_alloc_map() that does all 762 - * of this out-of-line. 763 - */ 763 + int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address); 764 + int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address); 765 + int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address); 766 + int __pte_alloc_kernel(pmd_t *pmd, unsigned long address); 767 + 764 768 /* 765 769 * The following ifdef needed to get the 4level-fixup.h header to work. 766 770 * Remove it when 4level-fixup.h has been removed. 767 771 */ 768 - #ifdef CONFIG_MMU 769 - #ifndef __ARCH_HAS_4LEVEL_HACK 772 + #if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK) 770 773 static inline pud_t *pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) 771 774 { 772 - if (pgd_none(*pgd)) 773 - return __pud_alloc(mm, pgd, address); 774 - return pud_offset(pgd, address); 775 + return (unlikely(pgd_none(*pgd)) && __pud_alloc(mm, pgd, address))? 776 + NULL: pud_offset(pgd, address); 775 777 } 776 778 777 779 static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) 778 780 { 779 - if (pud_none(*pud)) 780 - return __pmd_alloc(mm, pud, address); 781 - return pmd_offset(pud, address); 781 + return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))? 782 + NULL: pmd_offset(pud, address); 782 783 } 783 - #endif 784 - #endif /* CONFIG_MMU */ 784 + #endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */ 785 + 786 + #define pte_alloc_map(mm, pmd, address) \ 787 + ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \ 788 + NULL: pte_offset_map(pmd, address)) 789 + 790 + #define pte_alloc_kernel(pmd, address) \ 791 + ((unlikely(!pmd_present(*(pmd))) && __pte_alloc_kernel(pmd, address))? \ 792 + NULL: pte_offset_kernel(pmd, address)) 785 793 786 794 extern void free_area_init(unsigned long * zones_size); 787 795 extern void free_area_init_node(int nid, pg_data_t *pgdat,
+38 -55
mm/memory.c
··· 280 280 } 281 281 } 282 282 283 - pte_t fastcall *pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, 284 - unsigned long address) 283 + int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address) 285 284 { 286 - if (!pmd_present(*pmd)) { 287 - struct page *new; 285 + struct page *new; 288 286 289 - spin_unlock(&mm->page_table_lock); 290 - new = pte_alloc_one(mm, address); 291 - spin_lock(&mm->page_table_lock); 292 - if (!new) 293 - return NULL; 294 - /* 295 - * Because we dropped the lock, we should re-check the 296 - * entry, as somebody else could have populated it.. 297 - */ 298 - if (pmd_present(*pmd)) { 299 - pte_free(new); 300 - goto out; 301 - } 287 + spin_unlock(&mm->page_table_lock); 288 + new = pte_alloc_one(mm, address); 289 + spin_lock(&mm->page_table_lock); 290 + if (!new) 291 + return -ENOMEM; 292 + 293 + if (pmd_present(*pmd)) /* Another has populated it */ 294 + pte_free(new); 295 + else { 302 296 mm->nr_ptes++; 303 297 inc_page_state(nr_page_table_pages); 304 298 pmd_populate(mm, pmd, new); 305 299 } 306 - out: 307 - return pte_offset_map(pmd, address); 300 + return 0; 308 301 } 309 302 310 - pte_t fastcall * pte_alloc_kernel(pmd_t *pmd, unsigned long address) 303 + int __pte_alloc_kernel(pmd_t *pmd, unsigned long address) 311 304 { 312 - if (!pmd_present(*pmd)) { 313 - pte_t *new; 305 + pte_t *new = pte_alloc_one_kernel(&init_mm, address); 306 + if (!new) 307 + return -ENOMEM; 314 308 315 - new = pte_alloc_one_kernel(&init_mm, address); 316 - if (!new) 317 - return NULL; 318 - 319 - spin_lock(&init_mm.page_table_lock); 320 - if (pmd_present(*pmd)) 321 - pte_free_kernel(new); 322 - else 323 - pmd_populate_kernel(&init_mm, pmd, new); 324 - spin_unlock(&init_mm.page_table_lock); 325 - } 326 - return pte_offset_kernel(pmd, address); 309 + spin_lock(&init_mm.page_table_lock); 310 + if (pmd_present(*pmd)) /* Another has populated it */ 311 + pte_free_kernel(new); 312 + else 313 + pmd_populate_kernel(&init_mm, pmd, new); 314 + spin_unlock(&init_mm.page_table_lock); 315 + return 0; 327 316 } 328 317 329 318 static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss) ··· 2082 2093 * Allocate page upper directory. 2083 2094 * We've already handled the fast-path in-line. 2084 2095 */ 2085 - pud_t fastcall *__pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) 2096 + int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) 2086 2097 { 2087 2098 pud_t *new; 2088 2099 ··· 2092 2103 if (!new) { 2093 2104 if (mm != &init_mm) /* Temporary bridging hack */ 2094 2105 spin_lock(&mm->page_table_lock); 2095 - return NULL; 2106 + return -ENOMEM; 2096 2107 } 2097 2108 2098 2109 spin_lock(&mm->page_table_lock); 2099 - if (pgd_present(*pgd)) { 2110 + if (pgd_present(*pgd)) /* Another has populated it */ 2100 2111 pud_free(new); 2101 - goto out; 2102 - } 2103 - pgd_populate(mm, pgd, new); 2104 - out: 2112 + else 2113 + pgd_populate(mm, pgd, new); 2105 2114 if (mm == &init_mm) /* Temporary bridging hack */ 2106 2115 spin_unlock(&mm->page_table_lock); 2107 - return pud_offset(pgd, address); 2116 + return 0; 2108 2117 } 2109 2118 #endif /* __PAGETABLE_PUD_FOLDED */ 2110 2119 ··· 2111 2124 * Allocate page middle directory. 2112 2125 * We've already handled the fast-path in-line. 2113 2126 */ 2114 - pmd_t fastcall *__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) 2127 + int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) 2115 2128 { 2116 2129 pmd_t *new; 2117 2130 ··· 2121 2134 if (!new) { 2122 2135 if (mm != &init_mm) /* Temporary bridging hack */ 2123 2136 spin_lock(&mm->page_table_lock); 2124 - return NULL; 2137 + return -ENOMEM; 2125 2138 } 2126 2139 2127 2140 spin_lock(&mm->page_table_lock); 2128 2141 #ifndef __ARCH_HAS_4LEVEL_HACK 2129 - if (pud_present(*pud)) { 2142 + if (pud_present(*pud)) /* Another has populated it */ 2130 2143 pmd_free(new); 2131 - goto out; 2132 - } 2133 - pud_populate(mm, pud, new); 2144 + else 2145 + pud_populate(mm, pud, new); 2134 2146 #else 2135 - if (pgd_present(*pud)) { 2147 + if (pgd_present(*pud)) /* Another has populated it */ 2136 2148 pmd_free(new); 2137 - goto out; 2138 - } 2139 - pgd_populate(mm, pud, new); 2149 + else 2150 + pgd_populate(mm, pud, new); 2140 2151 #endif /* __ARCH_HAS_4LEVEL_HACK */ 2141 - 2142 - out: 2143 2152 if (mm == &init_mm) /* Temporary bridging hack */ 2144 2153 spin_unlock(&mm->page_table_lock); 2145 - return pmd_offset(pud, address); 2154 + return 0; 2146 2155 } 2147 2156 #endif /* __PAGETABLE_PMD_FOLDED */ 2148 2157
+1 -6
mm/mremap.c
··· 51 51 pgd_t *pgd; 52 52 pud_t *pud; 53 53 pmd_t *pmd = NULL; 54 - pte_t *pte; 55 54 56 55 /* 57 56 * We do need page_table_lock: because allocators expect that. ··· 65 66 if (!pmd) 66 67 goto out; 67 68 68 - pte = pte_alloc_map(mm, pmd, addr); 69 - if (!pte) { 69 + if (!pmd_present(*pmd) && __pte_alloc(mm, pmd, addr)) 70 70 pmd = NULL; 71 - goto out; 72 - } 73 - pte_unmap(pte); 74 71 out: 75 72 spin_unlock(&mm->page_table_lock); 76 73 return pmd;