Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mm: deduplicate memory overcommitment code

Currently we have two copies of the same code which implements memory
overcommitment logic. Let's move it into mm/util.c and hence avoid
duplication. No functional changes here.

Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Andrey Ryabinin and committed by
Linus Torvalds
39a1aa8e ea606cf5

+124 -240
-124
mm/mmap.c
··· 122 122 } 123 123 } 124 124 125 - 126 - int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS; /* heuristic overcommit */ 127 - int sysctl_overcommit_ratio __read_mostly = 50; /* default is 50% */ 128 - unsigned long sysctl_overcommit_kbytes __read_mostly; 129 - int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; 130 - unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */ 131 - unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13; /* 8MB */ 132 - /* 133 - * Make sure vm_committed_as in one cacheline and not cacheline shared with 134 - * other variables. It can be updated by several CPUs frequently. 135 - */ 136 - struct percpu_counter vm_committed_as ____cacheline_aligned_in_smp; 137 - 138 - /* 139 - * The global memory commitment made in the system can be a metric 140 - * that can be used to drive ballooning decisions when Linux is hosted 141 - * as a guest. On Hyper-V, the host implements a policy engine for dynamically 142 - * balancing memory across competing virtual machines that are hosted. 143 - * Several metrics drive this policy engine including the guest reported 144 - * memory commitment. 145 - */ 146 - unsigned long vm_memory_committed(void) 147 - { 148 - return percpu_counter_read_positive(&vm_committed_as); 149 - } 150 - EXPORT_SYMBOL_GPL(vm_memory_committed); 151 - 152 - /* 153 - * Check that a process has enough memory to allocate a new virtual 154 - * mapping. 0 means there is enough memory for the allocation to 155 - * succeed and -ENOMEM implies there is not. 156 - * 157 - * We currently support three overcommit policies, which are set via the 158 - * vm.overcommit_memory sysctl. See Documentation/vm/overcommit-accounting 159 - * 160 - * Strict overcommit modes added 2002 Feb 26 by Alan Cox. 161 - * Additional code 2002 Jul 20 by Robert Love. 162 - * 163 - * cap_sys_admin is 1 if the process has admin privileges, 0 otherwise. 164 - * 165 - * Note this is a helper function intended to be used by LSMs which 166 - * wish to use this logic. 167 - */ 168 - int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) 169 - { 170 - long free, allowed, reserve; 171 - 172 - VM_WARN_ONCE(percpu_counter_read(&vm_committed_as) < 173 - -(s64)vm_committed_as_batch * num_online_cpus(), 174 - "memory commitment underflow"); 175 - 176 - vm_acct_memory(pages); 177 - 178 - /* 179 - * Sometimes we want to use more memory than we have 180 - */ 181 - if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS) 182 - return 0; 183 - 184 - if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) { 185 - free = global_page_state(NR_FREE_PAGES); 186 - free += global_page_state(NR_FILE_PAGES); 187 - 188 - /* 189 - * shmem pages shouldn't be counted as free in this 190 - * case, they can't be purged, only swapped out, and 191 - * that won't affect the overall amount of available 192 - * memory in the system. 193 - */ 194 - free -= global_page_state(NR_SHMEM); 195 - 196 - free += get_nr_swap_pages(); 197 - 198 - /* 199 - * Any slabs which are created with the 200 - * SLAB_RECLAIM_ACCOUNT flag claim to have contents 201 - * which are reclaimable, under pressure. The dentry 202 - * cache and most inode caches should fall into this 203 - */ 204 - free += global_page_state(NR_SLAB_RECLAIMABLE); 205 - 206 - /* 207 - * Leave reserved pages. The pages are not for anonymous pages. 208 - */ 209 - if (free <= totalreserve_pages) 210 - goto error; 211 - else 212 - free -= totalreserve_pages; 213 - 214 - /* 215 - * Reserve some for root 216 - */ 217 - if (!cap_sys_admin) 218 - free -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10); 219 - 220 - if (free > pages) 221 - return 0; 222 - 223 - goto error; 224 - } 225 - 226 - allowed = vm_commit_limit(); 227 - /* 228 - * Reserve some for root 229 - */ 230 - if (!cap_sys_admin) 231 - allowed -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10); 232 - 233 - /* 234 - * Don't let a single process grow so big a user can't recover 235 - */ 236 - if (mm) { 237 - reserve = sysctl_user_reserve_kbytes >> (PAGE_SHIFT - 10); 238 - allowed -= min_t(long, mm->total_vm / 32, reserve); 239 - } 240 - 241 - if (percpu_counter_read_positive(&vm_committed_as) < allowed) 242 - return 0; 243 - error: 244 - vm_unacct_memory(pages); 245 - 246 - return -ENOMEM; 247 - } 248 - 249 125 /* 250 126 * Requires inode->i_mapping->i_mmap_rwsem 251 127 */
-116
mm/nommu.c
··· 47 47 unsigned long max_mapnr; 48 48 EXPORT_SYMBOL(max_mapnr); 49 49 unsigned long highest_memmap_pfn; 50 - struct percpu_counter vm_committed_as; 51 - int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ 52 - int sysctl_overcommit_ratio = 50; /* default is 50% */ 53 - unsigned long sysctl_overcommit_kbytes __read_mostly; 54 - int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; 55 50 int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS; 56 - unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */ 57 - unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13; /* 8MB */ 58 51 int heap_stack_gap = 0; 59 52 60 53 atomic_long_t mmap_pages_allocated; 61 - 62 - /* 63 - * The global memory commitment made in the system can be a metric 64 - * that can be used to drive ballooning decisions when Linux is hosted 65 - * as a guest. On Hyper-V, the host implements a policy engine for dynamically 66 - * balancing memory across competing virtual machines that are hosted. 67 - * Several metrics drive this policy engine including the guest reported 68 - * memory commitment. 69 - */ 70 - unsigned long vm_memory_committed(void) 71 - { 72 - return percpu_counter_read_positive(&vm_committed_as); 73 - } 74 - 75 - EXPORT_SYMBOL_GPL(vm_memory_committed); 76 54 77 55 EXPORT_SYMBOL(mem_map); 78 56 ··· 1805 1827 { 1806 1828 } 1807 1829 EXPORT_SYMBOL(unmap_mapping_range); 1808 - 1809 - /* 1810 - * Check that a process has enough memory to allocate a new virtual 1811 - * mapping. 0 means there is enough memory for the allocation to 1812 - * succeed and -ENOMEM implies there is not. 1813 - * 1814 - * We currently support three overcommit policies, which are set via the 1815 - * vm.overcommit_memory sysctl. See Documentation/vm/overcommit-accounting 1816 - * 1817 - * Strict overcommit modes added 2002 Feb 26 by Alan Cox. 1818 - * Additional code 2002 Jul 20 by Robert Love. 1819 - * 1820 - * cap_sys_admin is 1 if the process has admin privileges, 0 otherwise. 1821 - * 1822 - * Note this is a helper function intended to be used by LSMs which 1823 - * wish to use this logic. 1824 - */ 1825 - int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) 1826 - { 1827 - long free, allowed, reserve; 1828 - 1829 - vm_acct_memory(pages); 1830 - 1831 - /* 1832 - * Sometimes we want to use more memory than we have 1833 - */ 1834 - if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS) 1835 - return 0; 1836 - 1837 - if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) { 1838 - free = global_page_state(NR_FREE_PAGES); 1839 - free += global_page_state(NR_FILE_PAGES); 1840 - 1841 - /* 1842 - * shmem pages shouldn't be counted as free in this 1843 - * case, they can't be purged, only swapped out, and 1844 - * that won't affect the overall amount of available 1845 - * memory in the system. 1846 - */ 1847 - free -= global_page_state(NR_SHMEM); 1848 - 1849 - free += get_nr_swap_pages(); 1850 - 1851 - /* 1852 - * Any slabs which are created with the 1853 - * SLAB_RECLAIM_ACCOUNT flag claim to have contents 1854 - * which are reclaimable, under pressure. The dentry 1855 - * cache and most inode caches should fall into this 1856 - */ 1857 - free += global_page_state(NR_SLAB_RECLAIMABLE); 1858 - 1859 - /* 1860 - * Leave reserved pages. The pages are not for anonymous pages. 1861 - */ 1862 - if (free <= totalreserve_pages) 1863 - goto error; 1864 - else 1865 - free -= totalreserve_pages; 1866 - 1867 - /* 1868 - * Reserve some for root 1869 - */ 1870 - if (!cap_sys_admin) 1871 - free -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10); 1872 - 1873 - if (free > pages) 1874 - return 0; 1875 - 1876 - goto error; 1877 - } 1878 - 1879 - allowed = vm_commit_limit(); 1880 - /* 1881 - * Reserve some 3% for root 1882 - */ 1883 - if (!cap_sys_admin) 1884 - allowed -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10); 1885 - 1886 - /* 1887 - * Don't let a single process grow so big a user can't recover 1888 - */ 1889 - if (mm) { 1890 - reserve = sysctl_user_reserve_kbytes >> (PAGE_SHIFT - 10); 1891 - allowed -= min_t(long, mm->total_vm / 32, reserve); 1892 - } 1893 - 1894 - if (percpu_counter_read_positive(&vm_committed_as) < allowed) 1895 - return 0; 1896 - 1897 - error: 1898 - vm_unacct_memory(pages); 1899 - 1900 - return -ENOMEM; 1901 - } 1902 1830 1903 1831 int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 1904 1832 {
+124
mm/util.c
··· 396 396 } 397 397 EXPORT_SYMBOL_GPL(__page_mapcount); 398 398 399 + int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS; 400 + int sysctl_overcommit_ratio __read_mostly = 50; 401 + unsigned long sysctl_overcommit_kbytes __read_mostly; 402 + int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; 403 + unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */ 404 + unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13; /* 8MB */ 405 + 399 406 int overcommit_ratio_handler(struct ctl_table *table, int write, 400 407 void __user *buffer, size_t *lenp, 401 408 loff_t *ppos) ··· 442 435 allowed += total_swap_pages; 443 436 444 437 return allowed; 438 + } 439 + 440 + /* 441 + * Make sure vm_committed_as in one cacheline and not cacheline shared with 442 + * other variables. It can be updated by several CPUs frequently. 443 + */ 444 + struct percpu_counter vm_committed_as ____cacheline_aligned_in_smp; 445 + 446 + /* 447 + * The global memory commitment made in the system can be a metric 448 + * that can be used to drive ballooning decisions when Linux is hosted 449 + * as a guest. On Hyper-V, the host implements a policy engine for dynamically 450 + * balancing memory across competing virtual machines that are hosted. 451 + * Several metrics drive this policy engine including the guest reported 452 + * memory commitment. 453 + */ 454 + unsigned long vm_memory_committed(void) 455 + { 456 + return percpu_counter_read_positive(&vm_committed_as); 457 + } 458 + EXPORT_SYMBOL_GPL(vm_memory_committed); 459 + 460 + /* 461 + * Check that a process has enough memory to allocate a new virtual 462 + * mapping. 0 means there is enough memory for the allocation to 463 + * succeed and -ENOMEM implies there is not. 464 + * 465 + * We currently support three overcommit policies, which are set via the 466 + * vm.overcommit_memory sysctl. See Documentation/vm/overcommit-accounting 467 + * 468 + * Strict overcommit modes added 2002 Feb 26 by Alan Cox. 469 + * Additional code 2002 Jul 20 by Robert Love. 470 + * 471 + * cap_sys_admin is 1 if the process has admin privileges, 0 otherwise. 472 + * 473 + * Note this is a helper function intended to be used by LSMs which 474 + * wish to use this logic. 475 + */ 476 + int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) 477 + { 478 + long free, allowed, reserve; 479 + 480 + VM_WARN_ONCE(percpu_counter_read(&vm_committed_as) < 481 + -(s64)vm_committed_as_batch * num_online_cpus(), 482 + "memory commitment underflow"); 483 + 484 + vm_acct_memory(pages); 485 + 486 + /* 487 + * Sometimes we want to use more memory than we have 488 + */ 489 + if (sysctl_overcommit_memory == OVERCOMMIT_ALWAYS) 490 + return 0; 491 + 492 + if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) { 493 + free = global_page_state(NR_FREE_PAGES); 494 + free += global_page_state(NR_FILE_PAGES); 495 + 496 + /* 497 + * shmem pages shouldn't be counted as free in this 498 + * case, they can't be purged, only swapped out, and 499 + * that won't affect the overall amount of available 500 + * memory in the system. 501 + */ 502 + free -= global_page_state(NR_SHMEM); 503 + 504 + free += get_nr_swap_pages(); 505 + 506 + /* 507 + * Any slabs which are created with the 508 + * SLAB_RECLAIM_ACCOUNT flag claim to have contents 509 + * which are reclaimable, under pressure. The dentry 510 + * cache and most inode caches should fall into this 511 + */ 512 + free += global_page_state(NR_SLAB_RECLAIMABLE); 513 + 514 + /* 515 + * Leave reserved pages. The pages are not for anonymous pages. 516 + */ 517 + if (free <= totalreserve_pages) 518 + goto error; 519 + else 520 + free -= totalreserve_pages; 521 + 522 + /* 523 + * Reserve some for root 524 + */ 525 + if (!cap_sys_admin) 526 + free -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10); 527 + 528 + if (free > pages) 529 + return 0; 530 + 531 + goto error; 532 + } 533 + 534 + allowed = vm_commit_limit(); 535 + /* 536 + * Reserve some for root 537 + */ 538 + if (!cap_sys_admin) 539 + allowed -= sysctl_admin_reserve_kbytes >> (PAGE_SHIFT - 10); 540 + 541 + /* 542 + * Don't let a single process grow so big a user can't recover 543 + */ 544 + if (mm) { 545 + reserve = sysctl_user_reserve_kbytes >> (PAGE_SHIFT - 10); 546 + allowed -= min_t(long, mm->total_vm / 32, reserve); 547 + } 548 + 549 + if (percpu_counter_read_positive(&vm_committed_as) < allowed) 550 + return 0; 551 + error: 552 + vm_unacct_memory(pages); 553 + 554 + return -ENOMEM; 445 555 } 446 556 447 557 /**