Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

fs, file table: reinit files_stat.max_files after deferred memory initialisation

Dave Hansen reported the following;

My laptop has been behaving strangely with 4.2-rc2. Once I log
in to my X session, I start getting all kinds of strange errors
from applications and see this in my dmesg:

VFS: file-max limit 8192 reached

The problem is that the file-max is calculated before memory is fully
initialised and miscalculates how much memory the kernel is using. This
patch recalculates file-max after deferred memory initialisation. Note
that using memory hotplug infrastructure would not have avoided this
problem as the value is not recalculated after memory hot-add.

4.1: files_stat.max_files = 6582781
4.2-rc2: files_stat.max_files = 8192
4.2-rc2 patched: files_stat.max_files = 6562467

Small differences with the patch applied and 4.1 but not enough to matter.

Signed-off-by: Mel Gorman <mgorman@suse.de>
Reported-by: Dave Hansen <dave.hansen@intel.com>
Cc: Nicolai Stange <nicstange@gmail.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Alex Ng <alexng@microsoft.com>
Cc: Fengguang Wu <fengguang.wu@intel.com>
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Mel Gorman and committed by
Linus Torvalds
4248b0da d3cd131d

+27 -24
+3 -10
fs/dcache.c
··· 3442 3442 inode_init_early(); 3443 3443 } 3444 3444 3445 - void __init vfs_caches_init(unsigned long mempages) 3445 + void __init vfs_caches_init(void) 3446 3446 { 3447 - unsigned long reserve; 3448 - 3449 - /* Base hash sizes on available memory, with a reserve equal to 3450 - 150% of current kernel size */ 3451 - 3452 - reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1); 3453 - mempages -= reserve; 3454 - 3455 3447 names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0, 3456 3448 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 3457 3449 3458 3450 dcache_init(); 3459 3451 inode_init(); 3460 - files_init(mempages); 3452 + files_init(); 3453 + files_maxfiles_init(); 3461 3454 mnt_init(); 3462 3455 bdev_cache_init(); 3463 3456 chrdev_init();
+17 -11
fs/file_table.c
··· 25 25 #include <linux/hardirq.h> 26 26 #include <linux/task_work.h> 27 27 #include <linux/ima.h> 28 + #include <linux/swap.h> 28 29 29 30 #include <linux/atomic.h> 30 31 ··· 309 308 } 310 309 } 311 310 312 - void __init files_init(unsigned long mempages) 311 + void __init files_init(void) 313 312 { 314 - unsigned long n; 315 - 316 313 filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, 317 314 SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); 318 - 319 - /* 320 - * One file with associated inode and dcache is very roughly 1K. 321 - * Per default don't use more than 10% of our memory for files. 322 - */ 323 - 324 - n = (mempages * (PAGE_SIZE / 1024)) / 10; 325 - files_stat.max_files = max_t(unsigned long, n, NR_FILE); 326 315 percpu_counter_init(&nr_files, 0, GFP_KERNEL); 316 + } 317 + 318 + /* 319 + * One file with associated inode and dcache is very roughly 1K. Per default 320 + * do not use more than 10% of our memory for files. 321 + */ 322 + void __init files_maxfiles_init(void) 323 + { 324 + unsigned long n; 325 + unsigned long memreserve = (totalram_pages - nr_free_pages()) * 3/2; 326 + 327 + memreserve = min(memreserve, totalram_pages - 1); 328 + n = ((totalram_pages - memreserve) * (PAGE_SIZE / 1024)) / 10; 329 + 330 + files_stat.max_files = max_t(unsigned long, n, NR_FILE); 327 331 }
+3 -2
include/linux/fs.h
··· 55 55 56 56 extern void __init inode_init(void); 57 57 extern void __init inode_init_early(void); 58 - extern void __init files_init(unsigned long); 58 + extern void __init files_init(void); 59 + extern void __init files_maxfiles_init(void); 59 60 60 61 extern struct files_stat_struct files_stat; 61 62 extern unsigned long get_max_files(void); ··· 2246 2245 2247 2246 /* fs/dcache.c */ 2248 2247 extern void __init vfs_caches_init_early(void); 2249 - extern void __init vfs_caches_init(unsigned long); 2248 + extern void __init vfs_caches_init(void); 2250 2249 2251 2250 extern struct kmem_cache *names_cachep; 2252 2251
+1 -1
init/main.c
··· 656 656 key_init(); 657 657 security_init(); 658 658 dbg_late_init(); 659 - vfs_caches_init(totalram_pages); 659 + vfs_caches_init(); 660 660 signals_init(); 661 661 /* rootfs populating might need page-writeback */ 662 662 page_writeback_init();
+3
mm/page_alloc.c
··· 1201 1201 1202 1202 /* Block until all are initialised */ 1203 1203 wait_for_completion(&pgdat_init_all_done_comp); 1204 + 1205 + /* Reinit limits that are based on free pages after the kernel is up */ 1206 + files_maxfiles_init(); 1204 1207 } 1205 1208 #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ 1206 1209