Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mm: add overcommit_kbytes sysctl variable

Some applications that run on HPC clusters are designed around the
availability of RAM and the overcommit ratio is fine tuned to get the
maximum usage of memory without swapping. With growing memory, the
1%-of-all-RAM grain provided by overcommit_ratio has become too coarse
for these workload (on a 2TB machine it represents no less than 20GB).

This patch adds the new overcommit_kbytes sysctl variable that allow a
much finer grain.

[akpm@linux-foundation.org: coding-style fixes]
[akpm@linux-foundation.org: fix nommu build]
Signed-off-by: Jerome Marchand <jmarchan@redhat.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Jerome Marchand and committed by
Linus Torvalds
49f0ce5f aec6a888

+70 -8
+12
Documentation/sysctl/vm.txt
··· 47 47 - numa_zonelist_order 48 48 - oom_dump_tasks 49 49 - oom_kill_allocating_task 50 + - overcommit_kbytes 50 51 - overcommit_memory 51 52 - overcommit_ratio 52 53 - page-cluster ··· 572 571 is used in oom_kill_allocating_task. 573 572 574 573 The default value is 0. 574 + 575 + ============================================================== 576 + 577 + overcommit_kbytes: 578 + 579 + When overcommit_memory is set to 2, the committed address space is not 580 + permitted to exceed swap plus this amount of physical RAM. See below. 581 + 582 + Note: overcommit_kbytes is the counterpart of overcommit_ratio. Only one 583 + of them may be specified at a time. Setting one disables the other (which 584 + then appears as 0 when read). 575 585 576 586 ============================================================== 577 587
+4 -3
Documentation/vm/overcommit-accounting
··· 14 14 15 15 2 - Don't overcommit. The total address space commit 16 16 for the system is not permitted to exceed swap + a 17 - configurable percentage (default is 50) of physical RAM. 18 - Depending on the percentage you use, in most situations 17 + configurable amount (default is 50%) of physical RAM. 18 + Depending on the amount you use, in most situations 19 19 this means a process will not be killed while accessing 20 20 pages but will receive errors on memory allocation as 21 21 appropriate. ··· 26 26 27 27 The overcommit policy is set via the sysctl `vm.overcommit_memory'. 28 28 29 - The overcommit percentage is set via `vm.overcommit_ratio'. 29 + The overcommit amount can be set via `vm.overcommit_ratio' (percentage) 30 + or `vm.overcommit_kbytes' (absolute value). 30 31 31 32 The current overcommit limit and amount committed are viewable in 32 33 /proc/meminfo as CommitLimit and Committed_AS respectively.
+9
include/linux/mm.h
··· 57 57 extern unsigned long sysctl_user_reserve_kbytes; 58 58 extern unsigned long sysctl_admin_reserve_kbytes; 59 59 60 + extern int sysctl_overcommit_memory; 61 + extern int sysctl_overcommit_ratio; 62 + extern unsigned long sysctl_overcommit_kbytes; 63 + 64 + extern int overcommit_ratio_handler(struct ctl_table *, int, void __user *, 65 + size_t *, loff_t *); 66 + extern int overcommit_kbytes_handler(struct ctl_table *, int, void __user *, 67 + size_t *, loff_t *); 68 + 60 69 #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) 61 70 62 71 /* to align the pointer to the (next) page boundary */
+1
include/linux/mman.h
··· 9 9 10 10 extern int sysctl_overcommit_memory; 11 11 extern int sysctl_overcommit_ratio; 12 + extern unsigned long sysctl_overcommit_kbytes; 12 13 extern struct percpu_counter vm_committed_as; 13 14 14 15 #ifdef CONFIG_SMP
+8 -3
kernel/sysctl.c
··· 95 95 #if defined(CONFIG_SYSCTL) 96 96 97 97 /* External variables not in a header file. */ 98 - extern int sysctl_overcommit_memory; 99 - extern int sysctl_overcommit_ratio; 100 98 extern int max_threads; 101 99 extern int suid_dumpable; 102 100 #ifdef CONFIG_COREDUMP ··· 1119 1121 .data = &sysctl_overcommit_ratio, 1120 1122 .maxlen = sizeof(sysctl_overcommit_ratio), 1121 1123 .mode = 0644, 1122 - .proc_handler = proc_dointvec, 1124 + .proc_handler = overcommit_ratio_handler, 1125 + }, 1126 + { 1127 + .procname = "overcommit_kbytes", 1128 + .data = &sysctl_overcommit_kbytes, 1129 + .maxlen = sizeof(sysctl_overcommit_kbytes), 1130 + .mode = 0644, 1131 + .proc_handler = overcommit_kbytes_handler, 1123 1132 }, 1124 1133 { 1125 1134 .procname = "page-cluster",
+1
mm/mmap.c
··· 86 86 87 87 int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS; /* heuristic overcommit */ 88 88 int sysctl_overcommit_ratio __read_mostly = 50; /* default is 50% */ 89 + unsigned long sysctl_overcommit_kbytes __read_mostly; 89 90 int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; 90 91 unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */ 91 92 unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13; /* 8MB */
+1
mm/nommu.c
··· 60 60 struct percpu_counter vm_committed_as; 61 61 int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ 62 62 int sysctl_overcommit_ratio = 50; /* default is 50% */ 63 + unsigned long sysctl_overcommit_kbytes __read_mostly; 63 64 int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; 64 65 int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS; 65 66 unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */
+34 -2
mm/util.c
··· 404 404 return mapping; 405 405 } 406 406 407 + int overcommit_ratio_handler(struct ctl_table *table, int write, 408 + void __user *buffer, size_t *lenp, 409 + loff_t *ppos) 410 + { 411 + int ret; 412 + 413 + ret = proc_dointvec(table, write, buffer, lenp, ppos); 414 + if (ret == 0 && write) 415 + sysctl_overcommit_kbytes = 0; 416 + return ret; 417 + } 418 + 419 + int overcommit_kbytes_handler(struct ctl_table *table, int write, 420 + void __user *buffer, size_t *lenp, 421 + loff_t *ppos) 422 + { 423 + int ret; 424 + 425 + ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); 426 + if (ret == 0 && write) 427 + sysctl_overcommit_ratio = 0; 428 + return ret; 429 + } 430 + 407 431 /* 408 432 * Committed memory limit enforced when OVERCOMMIT_NEVER policy is used 409 433 */ 410 434 unsigned long vm_commit_limit(void) 411 435 { 412 - return ((totalram_pages - hugetlb_total_pages()) 413 - * sysctl_overcommit_ratio / 100) + total_swap_pages; 436 + unsigned long allowed; 437 + 438 + if (sysctl_overcommit_kbytes) 439 + allowed = sysctl_overcommit_kbytes >> (PAGE_SHIFT - 10); 440 + else 441 + allowed = ((totalram_pages - hugetlb_total_pages()) 442 + * sysctl_overcommit_ratio / 100); 443 + allowed += total_swap_pages; 444 + 445 + return allowed; 414 446 } 415 447 416 448