Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

module: Fix performance regression on modules with large symbol tables

Looking at /proc/kallsyms, one starts to ponder whether all of the extra
strtab-related complexity in module.c is worth the memory savings.

Instead of making the add_kallsyms() loop even more complex, I tried the
other route of deleting the strmap logic and naively copying each string
into core_strtab with no consideration for consolidating duplicates.

Performance on an "already exists" insmod of nvidia.ko (runs
add_kallsyms() but does not actually initialize the module):

Original scheme: 1.230s
With naive copying: 0.058s

Extra space used: 35k (of a 408k module).

Signed-off-by: Kevin Cernekee <cernekee@gmail.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
LKML-Reference: <73defb5e4bca04a6431392cc341112b1@localhost>

authored by

Kevin Cernekee and committed by
Rusty Russell
48fd1188 70b1e916

+21 -44
+21 -44
kernel/module.c
··· 138 138 unsigned long len; 139 139 Elf_Shdr *sechdrs; 140 140 char *secstrings, *strtab; 141 - unsigned long *strmap; 142 141 unsigned long symoffs, stroffs; 143 142 struct _ddebug *debug; 144 143 unsigned int num_debug; ··· 2177 2178 return true; 2178 2179 } 2179 2180 2181 + /* 2182 + * We only allocate and copy the strings needed by the parts of symtab 2183 + * we keep. This is simple, but has the effect of making multiple 2184 + * copies of duplicates. We could be more sophisticated, see 2185 + * linux-kernel thread starting with 2186 + * <73defb5e4bca04a6431392cc341112b1@localhost>. 2187 + */ 2180 2188 static void layout_symtab(struct module *mod, struct load_info *info) 2181 2189 { 2182 2190 Elf_Shdr *symsect = info->sechdrs + info->index.sym; 2183 2191 Elf_Shdr *strsect = info->sechdrs + info->index.str; 2184 2192 const Elf_Sym *src; 2185 - unsigned int i, nsrc, ndst; 2193 + unsigned int i, nsrc, ndst, strtab_size; 2186 2194 2187 2195 /* Put symbol section at end of init part of module. */ 2188 2196 symsect->sh_flags |= SHF_ALLOC; ··· 2200 2194 src = (void *)info->hdr + symsect->sh_offset; 2201 2195 nsrc = symsect->sh_size / sizeof(*src); 2202 2196 2203 - /* 2204 - * info->strmap has a '1' bit for each byte of .strtab we want to 2205 - * keep resident in mod->core_strtab. Everything else in .strtab 2206 - * is unreferenced by the symbols in mod->core_symtab, and will be 2207 - * discarded when add_kallsyms() compacts the string table. 2208 - */ 2209 - for (ndst = i = 1; i < nsrc; ++i, ++src) 2197 + /* Compute total space required for the core symbols' strtab. */ 2198 + for (ndst = i = strtab_size = 1; i < nsrc; ++i, ++src) 2210 2199 if (is_core_symbol(src, info->sechdrs, info->hdr->e_shnum)) { 2211 - unsigned int j = src->st_name; 2212 - 2213 - while (!__test_and_set_bit(j, info->strmap) 2214 - && info->strtab[j]) 2215 - ++j; 2216 - ++ndst; 2200 + strtab_size += strlen(&info->strtab[src->st_name]) + 1; 2201 + ndst++; 2217 2202 } 2218 2203 2219 2204 /* Append room for core symbols at end of core part. */ 2220 2205 info->symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1); 2221 - mod->core_size = info->symoffs + ndst * sizeof(Elf_Sym); 2206 + info->stroffs = mod->core_size = info->symoffs + ndst * sizeof(Elf_Sym); 2207 + mod->core_size += strtab_size; 2222 2208 2223 2209 /* Put string table section at end of init part of module. */ 2224 2210 strsect->sh_flags |= SHF_ALLOC; 2225 2211 strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect, 2226 2212 info->index.str) | INIT_OFFSET_MASK; 2227 2213 DEBUGP("\t%s\n", info->secstrings + strsect->sh_name); 2228 - 2229 - /* Append room for core symbols' strings at end of core part. */ 2230 - info->stroffs = mod->core_size; 2231 - 2232 - /* First strtab byte (and first symtab entry) are zeroes. */ 2233 - __set_bit(0, info->strmap); 2234 - mod->core_size += bitmap_weight(info->strmap, strsect->sh_size); 2235 2214 } 2236 2215 2237 2216 static void add_kallsyms(struct module *mod, const struct load_info *info) ··· 2237 2246 mod->symtab[i].st_info = elf_type(&mod->symtab[i], info); 2238 2247 2239 2248 mod->core_symtab = dst = mod->module_core + info->symoffs; 2249 + mod->core_strtab = s = mod->module_core + info->stroffs; 2240 2250 src = mod->symtab; 2241 2251 *dst = *src; 2252 + *s++ = 0; 2242 2253 for (ndst = i = 1; i < mod->num_symtab; ++i, ++src) { 2243 2254 if (!is_core_symbol(src, info->sechdrs, info->hdr->e_shnum)) 2244 2255 continue; 2256 + 2245 2257 dst[ndst] = *src; 2246 - dst[ndst].st_name = bitmap_weight(info->strmap, 2247 - dst[ndst].st_name); 2248 - ++ndst; 2258 + dst[ndst++].st_name = s - mod->core_strtab; 2259 + s += strlcpy(s, &mod->strtab[src->st_name], KSYM_NAME_LEN) + 1; 2249 2260 } 2250 2261 mod->core_num_syms = ndst; 2251 - 2252 - mod->core_strtab = s = mod->module_core + info->stroffs; 2253 - for (*s = 0, i = 1; i < info->sechdrs[info->index.str].sh_size; ++i) 2254 - if (test_bit(i, info->strmap)) 2255 - *++s = mod->strtab[i]; 2256 2262 } 2257 2263 #else 2258 2264 static inline void layout_symtab(struct module *mod, struct load_info *info) ··· 2739 2751 this is done generically; there doesn't appear to be any 2740 2752 special cases for the architectures. */ 2741 2753 layout_sections(mod, info); 2742 - 2743 - info->strmap = kzalloc(BITS_TO_LONGS(info->sechdrs[info->index.str].sh_size) 2744 - * sizeof(long), GFP_KERNEL); 2745 - if (!info->strmap) { 2746 - err = -ENOMEM; 2747 - goto free_percpu; 2748 - } 2749 2754 layout_symtab(mod, info); 2750 2755 2751 2756 /* Allocate and move to the final place */ 2752 2757 err = move_module(mod, info); 2753 2758 if (err) 2754 - goto free_strmap; 2759 + goto free_percpu; 2755 2760 2756 2761 /* Module has been copied to its final place now: return it. */ 2757 2762 mod = (void *)info->sechdrs[info->index.mod].sh_addr; 2758 2763 kmemleak_load_module(mod, info); 2759 2764 return mod; 2760 2765 2761 - free_strmap: 2762 - kfree(info->strmap); 2763 2766 free_percpu: 2764 2767 percpu_modfree(mod); 2765 2768 out: ··· 2760 2781 /* mod is no longer valid after this! */ 2761 2782 static void module_deallocate(struct module *mod, struct load_info *info) 2762 2783 { 2763 - kfree(info->strmap); 2764 2784 percpu_modfree(mod); 2765 2785 module_free(mod, mod->module_init); 2766 2786 module_free(mod, mod->module_core); ··· 2889 2911 if (err < 0) 2890 2912 goto unlink; 2891 2913 2892 - /* Get rid of temporary copy and strmap. */ 2893 - kfree(info.strmap); 2914 + /* Get rid of temporary copy. */ 2894 2915 free_copy(&info); 2895 2916 2896 2917 /* Done! */