Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'mm-hotfixes-stable-2026-01-15-08-03' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

Pull misc fixes from Andrew Morton:

- kerneldoc fixes from Bagas Sanjaya

- DAMON fixes from SeongJae

- mremap VMA-related fixes from Lorenzo

- various singletons - please see the changelogs for details

* tag 'mm-hotfixes-stable-2026-01-15-08-03' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (30 commits)
drivers/dax: add some missing kerneldoc comment fields for struct dev_dax
mm: numa,memblock: include <asm/numa.h> for 'numa_nodes_parsed'
mailmap: add entry for Daniel Thompson
tools/testing/selftests: fix gup_longterm for unknown fs
mm/page_alloc: prevent pcp corruption with SMP=n
iommu/sva: include mmu_notifier.h header
mm: kmsan: fix poisoning of high-order non-compound pages
tools/testing/selftests: add forked (un)/faulted VMA merge tests
mm/vma: enforce VMA fork limit on unfaulted,faulted mremap merge too
tools/testing/selftests: add tests for !tgt, src mremap() merges
mm/vma: fix anon_vma UAF on mremap() faulted, unfaulted merge
mm/zswap: fix error pointer free in zswap_cpu_comp_prepare()
mm/damon/sysfs-scheme: cleanup access_pattern subdirs on scheme dir setup failure
mm/damon/sysfs-scheme: cleanup quotas subdirs on scheme dir setup failure
mm/damon/sysfs: cleanup attrs subdirs on context dir setup failure
mm/damon/sysfs: cleanup intervals subdirs on attrs dir setup failure
mm/damon/core: remove call_control in inactive contexts
powerpc/watchdog: add support for hardlockup_sys_info sysctl
mips: fix HIGHMEM initialization
mm/hugetlb: ignore hugepage kernel args if hugepages are unsupported
...

+674 -128
+2
.mailmap
··· 207 207 Daniel Borkmann <daniel@iogearbox.net> <dborkmann@redhat.com> 208 208 Daniel Borkmann <daniel@iogearbox.net> <dborkman@redhat.com> 209 209 Daniel Borkmann <daniel@iogearbox.net> <dxchgb@gmail.com> 210 + Daniel Thompson <danielt@kernel.org> <daniel.thompson@linaro.org> 210 211 Danilo Krummrich <dakr@kernel.org> <dakr@redhat.com> 211 212 David Brownell <david-b@pacbell.net> 212 213 David Collins <quic_collinsd@quicinc.com> <collinsd@codeaurora.org> ··· 795 794 Sven Eckelmann <sven@narfation.org> <sven.eckelmann@openmesh.com> 796 795 Sven Eckelmann <sven@narfation.org> <sven@open-mesh.com> 797 796 Sven Peter <sven@kernel.org> <sven@svenpeter.dev> 797 + Szymon Wilczek <swilczek.lx@gmail.com> <szymonwilczek@gmx.com> 798 798 Takashi YOSHII <takashi.yoshii.zj@renesas.com> 799 799 Tamizh Chelvam Raja <quic_tamizhr@quicinc.com> <tamizhr@codeaurora.org> 800 800 Taniya Das <quic_tdas@quicinc.com> <tdas@codeaurora.org>
+35
Documentation/admin-guide/kernel-parameters.txt
··· 2917 2917 for Movable pages. "nn[KMGTPE]", "nn%", and "mirror" 2918 2918 are exclusive, so you cannot specify multiple forms. 2919 2919 2920 + kfence.burst= [MM,KFENCE] The number of additional successive 2921 + allocations to be attempted through KFENCE for each 2922 + sample interval. 2923 + Format: <unsigned integer> 2924 + Default: 0 2925 + 2926 + kfence.check_on_panic= 2927 + [MM,KFENCE] Whether to check all KFENCE-managed objects' 2928 + canaries on panic. 2929 + Format: <bool> 2930 + Default: false 2931 + 2932 + kfence.deferrable= 2933 + [MM,KFENCE] Whether to use a deferrable timer to trigger 2934 + allocations. This avoids forcing CPU wake-ups if the 2935 + system is idle, at the risk of a less predictable 2936 + sample interval. 2937 + Format: <bool> 2938 + Default: CONFIG_KFENCE_DEFERRABLE 2939 + 2940 + kfence.sample_interval= 2941 + [MM,KFENCE] KFENCE's sample interval in milliseconds. 2942 + Format: <unsigned integer> 2943 + 0 - Disable KFENCE. 2944 + >0 - Enabled KFENCE with given sample interval. 2945 + Default: CONFIG_KFENCE_SAMPLE_INTERVAL 2946 + 2947 + kfence.skip_covered_thresh= 2948 + [MM,KFENCE] If pool utilization reaches this threshold 2949 + (pool usage%), KFENCE limits currently covered 2950 + allocations of the same source from further filling 2951 + up the pool. 2952 + Format: <unsigned integer> 2953 + Default: 75 2954 + 2920 2955 kgdbdbgp= [KGDB,HW,EARLY] kgdb over EHCI usb debug port. 2921 2956 Format: <Controller#>[,poll interval] 2922 2957 The controller # is the number of the ehci usb debug
+23
arch/mips/mm/init.c
··· 425 425 static struct kcore_list kcore_kseg0; 426 426 #endif 427 427 428 + static inline void __init highmem_init(void) 429 + { 430 + #ifdef CONFIG_HIGHMEM 431 + unsigned long tmp; 432 + 433 + /* 434 + * If CPU cannot support HIGHMEM discard the memory above highstart_pfn 435 + */ 436 + if (cpu_has_dc_aliases) { 437 + memblock_remove(PFN_PHYS(highstart_pfn), -1); 438 + return; 439 + } 440 + 441 + for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) { 442 + struct page *page = pfn_to_page(tmp); 443 + 444 + if (!memblock_is_memory(PFN_PHYS(tmp))) 445 + SetPageReserved(page); 446 + } 447 + #endif 448 + } 449 + 428 450 void __init arch_mm_preinit(void) 429 451 { 430 452 /* ··· 457 435 458 436 maar_init(); 459 437 setup_zero_pages(); /* Setup zeroed pages. */ 438 + highmem_init(); 460 439 461 440 #ifdef CONFIG_64BIT 462 441 if ((unsigned long) &_text > (unsigned long) CKSEG0)
+10 -5
arch/powerpc/kernel/watchdog.c
··· 26 26 #include <linux/delay.h> 27 27 #include <linux/processor.h> 28 28 #include <linux/smp.h> 29 + #include <linux/sys_info.h> 29 30 30 31 #include <asm/interrupt.h> 31 32 #include <asm/paca.h> ··· 236 235 pr_emerg("CPU %d TB:%lld, last SMP heartbeat TB:%lld (%lldms ago)\n", 237 236 cpu, tb, last_reset, tb_to_ns(tb - last_reset) / 1000000); 238 237 239 - if (!sysctl_hardlockup_all_cpu_backtrace) { 238 + if (sysctl_hardlockup_all_cpu_backtrace || 239 + (hardlockup_si_mask & SYS_INFO_ALL_BT)) { 240 + trigger_allbutcpu_cpu_backtrace(cpu); 241 + cpumask_clear(&wd_smp_cpus_ipi); 242 + } else { 240 243 /* 241 244 * Try to trigger the stuck CPUs, unless we are going to 242 245 * get a backtrace on all of them anyway. ··· 249 244 smp_send_nmi_ipi(c, wd_lockup_ipi, 1000000); 250 245 __cpumask_clear_cpu(c, &wd_smp_cpus_ipi); 251 246 } 252 - } else { 253 - trigger_allbutcpu_cpu_backtrace(cpu); 254 - cpumask_clear(&wd_smp_cpus_ipi); 255 247 } 256 248 249 + sys_info(hardlockup_si_mask & ~SYS_INFO_ALL_BT); 257 250 if (hardlockup_panic) 258 251 nmi_panic(NULL, "Hard LOCKUP"); 259 252 ··· 418 415 419 416 xchg(&__wd_nmi_output, 1); // see wd_lockup_ipi 420 417 421 - if (sysctl_hardlockup_all_cpu_backtrace) 418 + if (sysctl_hardlockup_all_cpu_backtrace || 419 + (hardlockup_si_mask & SYS_INFO_ALL_BT)) 422 420 trigger_allbutcpu_cpu_backtrace(cpu); 423 421 422 + sys_info(hardlockup_si_mask & ~SYS_INFO_ALL_BT); 424 423 if (hardlockup_panic) 425 424 nmi_panic(regs, "Hard LOCKUP"); 426 425
+6 -4
drivers/dax/dax-private.h
··· 67 67 /** 68 68 * struct dev_dax - instance data for a subdivision of a dax region, and 69 69 * data while the device is activated in the driver. 70 - * @region - parent region 71 - * @dax_dev - core dax functionality 70 + * @region: parent region 71 + * @dax_dev: core dax functionality 72 + * @align: alignment of this instance 72 73 * @target_node: effective numa node if dev_dax memory range is onlined 73 74 * @dyn_id: is this a dynamic or statically created instance 74 75 * @id: ida allocated id when the dax_region is not static 75 76 * @ida: mapping id allocator 76 - * @dev - device core 77 - * @pgmap - pgmap for memmap setup / lifetime (driver owned) 77 + * @dev: device core 78 + * @pgmap: pgmap for memmap setup / lifetime (driver owned) 79 + * @memmap_on_memory: allow kmem to put the memmap in the memory 78 80 * @nr_range: size of @ranges 79 81 * @ranges: range tuples of memory used 80 82 */
+1
drivers/iommu/iommu-sva.c
··· 3 3 * Helpers for IOMMU drivers implementing SVA 4 4 */ 5 5 #include <linux/mmu_context.h> 6 + #include <linux/mmu_notifier.h> 6 7 #include <linux/mutex.h> 7 8 #include <linux/sched/mm.h> 8 9 #include <linux/iommu.h>
+1
include/linux/kfence.h
··· 211 211 * __kfence_obj_info() - fill kmem_obj_info struct 212 212 * @kpp: kmem_obj_info to be filled 213 213 * @object: the object 214 + * @slab: the slab 214 215 * 215 216 * Return: 216 217 * * false - not a KFENCE object
+1
include/linux/nmi.h
··· 83 83 #if defined(CONFIG_HARDLOCKUP_DETECTOR) 84 84 extern void hardlockup_detector_disable(void); 85 85 extern unsigned int hardlockup_panic; 86 + extern unsigned long hardlockup_si_mask; 86 87 #else 87 88 static inline void hardlockup_detector_disable(void) {} 88 89 #endif
+1
include/linux/sched/mm.h
··· 325 325 326 326 /** 327 327 * memalloc_flags_save - Add a PF_* flag to current->flags, save old value 328 + * @flags: Flags to add. 328 329 * 329 330 * This allows PF_* flags to be conveniently added, irrespective of current 330 331 * value, and then the old version restored with memalloc_flags_restore().
+1
include/linux/textsearch.h
··· 35 35 * @get_pattern: return head of pattern 36 36 * @get_pattern_len: return length of pattern 37 37 * @owner: module reference to algorithm 38 + * @list: list to search 38 39 */ 39 40 struct ts_ops 40 41 {
+19 -18
kernel/liveupdate/kexec_handover.c
··· 460 460 } 461 461 } 462 462 463 - /* Return true if memory was deserizlied */ 464 - static bool __init kho_mem_deserialize(const void *fdt) 463 + /* Returns physical address of the preserved memory map from FDT */ 464 + static phys_addr_t __init kho_get_mem_map_phys(const void *fdt) 465 465 { 466 - struct khoser_mem_chunk *chunk; 467 466 const void *mem_ptr; 468 - u64 mem; 469 467 int len; 470 468 471 469 mem_ptr = fdt_getprop(fdt, 0, PROP_PRESERVED_MEMORY_MAP, &len); 472 470 if (!mem_ptr || len != sizeof(u64)) { 473 471 pr_err("failed to get preserved memory bitmaps\n"); 474 - return false; 472 + return 0; 475 473 } 476 474 477 - mem = get_unaligned((const u64 *)mem_ptr); 478 - chunk = mem ? phys_to_virt(mem) : NULL; 475 + return get_unaligned((const u64 *)mem_ptr); 476 + } 479 477 480 - /* No preserved physical pages were passed, no deserialization */ 481 - if (!chunk) 482 - return false; 483 - 478 + static void __init kho_mem_deserialize(struct khoser_mem_chunk *chunk) 479 + { 484 480 while (chunk) { 485 481 unsigned int i; 486 482 ··· 485 489 &chunk->bitmaps[i]); 486 490 chunk = KHOSER_LOAD_PTR(chunk->hdr.next); 487 491 } 488 - 489 - return true; 490 492 } 491 493 492 494 /* ··· 1247 1253 struct kho_in { 1248 1254 phys_addr_t fdt_phys; 1249 1255 phys_addr_t scratch_phys; 1256 + phys_addr_t mem_map_phys; 1250 1257 struct kho_debugfs dbg; 1251 1258 }; 1252 1259 ··· 1429 1434 1430 1435 void __init kho_memory_init(void) 1431 1436 { 1432 - if (kho_in.scratch_phys) { 1437 + if (kho_in.mem_map_phys) { 1433 1438 kho_scratch = phys_to_virt(kho_in.scratch_phys); 1434 1439 kho_release_scratch(); 1435 - 1436 - if (!kho_mem_deserialize(kho_get_fdt())) 1437 - kho_in.fdt_phys = 0; 1440 + kho_mem_deserialize(phys_to_virt(kho_in.mem_map_phys)); 1438 1441 } else { 1439 1442 kho_reserve_scratch(); 1440 1443 } ··· 1441 1448 void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len, 1442 1449 phys_addr_t scratch_phys, u64 scratch_len) 1443 1450 { 1444 - void *fdt = NULL; 1445 1451 struct kho_scratch *scratch = NULL; 1452 + phys_addr_t mem_map_phys; 1453 + void *fdt = NULL; 1446 1454 int err = 0; 1447 1455 unsigned int scratch_cnt = scratch_len / sizeof(*kho_scratch); 1448 1456 ··· 1466 1472 pr_warn("setup: handover FDT (0x%llx) is incompatible with '%s': %d\n", 1467 1473 fdt_phys, KHO_FDT_COMPATIBLE, err); 1468 1474 err = -EINVAL; 1475 + goto out; 1476 + } 1477 + 1478 + mem_map_phys = kho_get_mem_map_phys(fdt); 1479 + if (!mem_map_phys) { 1480 + err = -ENOENT; 1469 1481 goto out; 1470 1482 } 1471 1483 ··· 1515 1515 1516 1516 kho_in.fdt_phys = fdt_phys; 1517 1517 kho_in.scratch_phys = scratch_phys; 1518 + kho_in.mem_map_phys = mem_map_phys; 1518 1519 kho_scratch_cnt = scratch_cnt; 1519 1520 pr_info("found kexec handover data.\n"); 1520 1521
+1 -1
kernel/watchdog.c
··· 71 71 * hard lockup is detected, it could be task, memory, lock etc. 72 72 * Refer include/linux/sys_info.h for detailed bit definition. 73 73 */ 74 - static unsigned long hardlockup_si_mask; 74 + unsigned long hardlockup_si_mask; 75 75 76 76 #ifdef CONFIG_SYSFS 77 77
+20 -12
lib/buildid.c
··· 5 5 #include <linux/elf.h> 6 6 #include <linux/kernel.h> 7 7 #include <linux/pagemap.h> 8 + #include <linux/fs.h> 8 9 #include <linux/secretmem.h> 9 10 10 11 #define BUILD_ID 3 ··· 47 46 48 47 freader_put_folio(r); 49 48 50 - /* reject secretmem folios created with memfd_secret() */ 51 - if (secretmem_mapping(r->file->f_mapping)) 52 - return -EFAULT; 53 - 49 + /* only use page cache lookup - fail if not already cached */ 54 50 r->folio = filemap_get_folio(r->file->f_mapping, file_off >> PAGE_SHIFT); 55 - 56 - /* if sleeping is allowed, wait for the page, if necessary */ 57 - if (r->may_fault && (IS_ERR(r->folio) || !folio_test_uptodate(r->folio))) { 58 - filemap_invalidate_lock_shared(r->file->f_mapping); 59 - r->folio = read_cache_folio(r->file->f_mapping, file_off >> PAGE_SHIFT, 60 - NULL, r->file); 61 - filemap_invalidate_unlock_shared(r->file->f_mapping); 62 - } 63 51 64 52 if (IS_ERR(r->folio) || !folio_test_uptodate(r->folio)) { 65 53 if (!IS_ERR(r->folio)) ··· 85 95 return NULL; 86 96 } 87 97 return r->data + file_off; 98 + } 99 + 100 + /* reject secretmem folios created with memfd_secret() */ 101 + if (secretmem_mapping(r->file->f_mapping)) { 102 + r->err = -EFAULT; 103 + return NULL; 104 + } 105 + 106 + /* use __kernel_read() for sleepable context */ 107 + if (r->may_fault) { 108 + ssize_t ret; 109 + 110 + ret = __kernel_read(r->file, r->buf, sz, &file_off); 111 + if (ret != sz) { 112 + r->err = (ret < 0) ? ret : -EIO; 113 + return NULL; 114 + } 115 + return r->buf; 88 116 } 89 117 90 118 /* fetch or reuse folio for given file offset */
+37 -4
mm/damon/core.c
··· 1431 1431 return running; 1432 1432 } 1433 1433 1434 + /* 1435 + * damon_call_handle_inactive_ctx() - handle DAMON call request that added to 1436 + * an inactive context. 1437 + * @ctx: The inactive DAMON context. 1438 + * @control: Control variable of the call request. 1439 + * 1440 + * This function is called in a case that @control is added to @ctx but @ctx is 1441 + * not running (inactive). See if @ctx handled @control or not, and cleanup 1442 + * @control if it was not handled. 1443 + * 1444 + * Returns 0 if @control was handled by @ctx, negative error code otherwise. 1445 + */ 1446 + static int damon_call_handle_inactive_ctx( 1447 + struct damon_ctx *ctx, struct damon_call_control *control) 1448 + { 1449 + struct damon_call_control *c; 1450 + 1451 + mutex_lock(&ctx->call_controls_lock); 1452 + list_for_each_entry(c, &ctx->call_controls, list) { 1453 + if (c == control) { 1454 + list_del(&control->list); 1455 + mutex_unlock(&ctx->call_controls_lock); 1456 + return -EINVAL; 1457 + } 1458 + } 1459 + mutex_unlock(&ctx->call_controls_lock); 1460 + return 0; 1461 + } 1462 + 1434 1463 /** 1435 1464 * damon_call() - Invoke a given function on DAMON worker thread (kdamond). 1436 1465 * @ctx: DAMON context to call the function for. ··· 1490 1461 list_add_tail(&control->list, &ctx->call_controls); 1491 1462 mutex_unlock(&ctx->call_controls_lock); 1492 1463 if (!damon_is_running(ctx)) 1493 - return -EINVAL; 1464 + return damon_call_handle_inactive_ctx(ctx, control); 1494 1465 if (control->repeat) 1495 1466 return 0; 1496 1467 wait_for_completion(&control->completion); ··· 2080 2051 2081 2052 rcu_read_lock(); 2082 2053 memcg = mem_cgroup_from_id(goal->memcg_id); 2083 - rcu_read_unlock(); 2084 - if (!memcg) { 2054 + if (!memcg || !mem_cgroup_tryget(memcg)) { 2055 + rcu_read_unlock(); 2085 2056 if (goal->metric == DAMOS_QUOTA_NODE_MEMCG_USED_BP) 2086 2057 return 0; 2087 2058 else /* DAMOS_QUOTA_NODE_MEMCG_FREE_BP */ 2088 2059 return 10000; 2089 2060 } 2061 + rcu_read_unlock(); 2062 + 2090 2063 mem_cgroup_flush_stats(memcg); 2091 2064 lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(goal->nid)); 2092 2065 used_pages = lruvec_page_state(lruvec, NR_ACTIVE_ANON); 2093 2066 used_pages += lruvec_page_state(lruvec, NR_INACTIVE_ANON); 2094 2067 used_pages += lruvec_page_state(lruvec, NR_ACTIVE_FILE); 2095 2068 used_pages += lruvec_page_state(lruvec, NR_INACTIVE_FILE); 2069 + 2070 + mem_cgroup_put(memcg); 2096 2071 2097 2072 si_meminfo_node(&i, goal->nid); 2098 2073 if (goal->metric == DAMOS_QUOTA_NODE_MEMCG_USED_BP) ··· 2784 2751 if (ctx->ops.cleanup) 2785 2752 ctx->ops.cleanup(ctx); 2786 2753 kfree(ctx->regions_score_histogram); 2754 + kdamond_call(ctx, true); 2787 2755 2788 2756 pr_debug("kdamond (%d) finishes\n", current->pid); 2789 2757 mutex_lock(&ctx->kdamond_lock); 2790 2758 ctx->kdamond = NULL; 2791 2759 mutex_unlock(&ctx->kdamond_lock); 2792 2760 2793 - kdamond_call(ctx, true); 2794 2761 damos_walk_cancel(ctx); 2795 2762 2796 2763 mutex_lock(&damon_lock);
+6 -4
mm/damon/sysfs-schemes.c
··· 2152 2152 return err; 2153 2153 err = damos_sysfs_set_dests(scheme); 2154 2154 if (err) 2155 - goto put_access_pattern_out; 2155 + goto rmdir_put_access_pattern_out; 2156 2156 err = damon_sysfs_scheme_set_quotas(scheme); 2157 2157 if (err) 2158 2158 goto put_dests_out; 2159 2159 err = damon_sysfs_scheme_set_watermarks(scheme); 2160 2160 if (err) 2161 - goto put_quotas_access_pattern_out; 2161 + goto rmdir_put_quotas_access_pattern_out; 2162 2162 err = damos_sysfs_set_filter_dirs(scheme); 2163 2163 if (err) 2164 2164 goto put_watermarks_quotas_access_pattern_out; ··· 2183 2183 put_watermarks_quotas_access_pattern_out: 2184 2184 kobject_put(&scheme->watermarks->kobj); 2185 2185 scheme->watermarks = NULL; 2186 - put_quotas_access_pattern_out: 2186 + rmdir_put_quotas_access_pattern_out: 2187 + damon_sysfs_quotas_rm_dirs(scheme->quotas); 2187 2188 kobject_put(&scheme->quotas->kobj); 2188 2189 scheme->quotas = NULL; 2189 2190 put_dests_out: 2190 2191 kobject_put(&scheme->dests->kobj); 2191 2192 scheme->dests = NULL; 2192 - put_access_pattern_out: 2193 + rmdir_put_access_pattern_out: 2194 + damon_sysfs_access_pattern_rm_dirs(scheme->access_pattern); 2193 2195 kobject_put(&scheme->access_pattern->kobj); 2194 2196 scheme->access_pattern = NULL; 2195 2197 return err;
+6 -3
mm/damon/sysfs.c
··· 792 792 nr_regions_range = damon_sysfs_ul_range_alloc(10, 1000); 793 793 if (!nr_regions_range) { 794 794 err = -ENOMEM; 795 - goto put_intervals_out; 795 + goto rmdir_put_intervals_out; 796 796 } 797 797 798 798 err = kobject_init_and_add(&nr_regions_range->kobj, ··· 806 806 put_nr_regions_intervals_out: 807 807 kobject_put(&nr_regions_range->kobj); 808 808 attrs->nr_regions_range = NULL; 809 + rmdir_put_intervals_out: 810 + damon_sysfs_intervals_rm_dirs(intervals); 809 811 put_intervals_out: 810 812 kobject_put(&intervals->kobj); 811 813 attrs->intervals = NULL; ··· 950 948 951 949 err = damon_sysfs_context_set_targets(context); 952 950 if (err) 953 - goto put_attrs_out; 951 + goto rmdir_put_attrs_out; 954 952 955 953 err = damon_sysfs_context_set_schemes(context); 956 954 if (err) ··· 960 958 put_targets_attrs_out: 961 959 kobject_put(&context->targets->kobj); 962 960 context->targets = NULL; 963 - put_attrs_out: 961 + rmdir_put_attrs_out: 962 + damon_sysfs_attrs_rm_dirs(context->attrs); 964 963 kobject_put(&context->attrs->kobj); 965 964 context->attrs = NULL; 966 965 return err;
+16
mm/hugetlb.c
··· 4286 4286 unsigned long tmp; 4287 4287 char *p = s; 4288 4288 4289 + if (!hugepages_supported()) { 4290 + pr_warn("HugeTLB: hugepages unsupported, ignoring hugepages=%s cmdline\n", s); 4291 + return 0; 4292 + } 4293 + 4289 4294 if (!parsed_valid_hugepagesz) { 4290 4295 pr_warn("HugeTLB: hugepages=%s does not follow a valid hugepagesz, ignoring\n", s); 4291 4296 parsed_valid_hugepagesz = true; ··· 4371 4366 unsigned long size; 4372 4367 struct hstate *h; 4373 4368 4369 + if (!hugepages_supported()) { 4370 + pr_warn("HugeTLB: hugepages unsupported, ignoring hugepagesz=%s cmdline\n", s); 4371 + return 0; 4372 + } 4373 + 4374 4374 parsed_valid_hugepagesz = false; 4375 4375 size = (unsigned long)memparse(s, NULL); 4376 4376 ··· 4423 4413 { 4424 4414 unsigned long size; 4425 4415 int i; 4416 + 4417 + if (!hugepages_supported()) { 4418 + pr_warn("HugeTLB: hugepages unsupported, ignoring default_hugepagesz=%s cmdline\n", 4419 + s); 4420 + return 0; 4421 + } 4426 4422 4427 4423 parsed_valid_hugepagesz = false; 4428 4424 if (parsed_default_hugepagesz) {
+1 -1
mm/kmsan/shadow.c
··· 207 207 if (!kmsan_enabled || kmsan_in_runtime()) 208 208 return; 209 209 kmsan_enter_runtime(); 210 - kmsan_internal_poison_memory(page_address(page), page_size(page), 210 + kmsan_internal_poison_memory(page_address(page), PAGE_SIZE << order, 211 211 GFP_KERNEL & ~(__GFP_RECLAIM), 212 212 KMSAN_POISON_CHECK | KMSAN_POISON_FREE); 213 213 kmsan_leave_runtime();
+2
mm/numa_memblks.c
··· 7 7 #include <linux/numa.h> 8 8 #include <linux/numa_memblks.h> 9 9 10 + #include <asm/numa.h> 11 + 10 12 int numa_distance_cnt; 11 13 static u8 *numa_distance; 12 14
+48 -9
mm/page_alloc.c
··· 167 167 pcp_trylock_finish(UP_flags); \ 168 168 }) 169 169 170 + /* 171 + * With the UP spinlock implementation, when we spin_lock(&pcp->lock) (for i.e. 172 + * a potentially remote cpu drain) and get interrupted by an operation that 173 + * attempts pcp_spin_trylock(), we can't rely on the trylock failure due to UP 174 + * spinlock assumptions making the trylock a no-op. So we have to turn that 175 + * spin_lock() to a spin_lock_irqsave(). This works because on UP there are no 176 + * remote cpu's so we can only be locking the only existing local one. 177 + */ 178 + #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) 179 + static inline void __flags_noop(unsigned long *flags) { } 180 + #define pcp_spin_lock_maybe_irqsave(ptr, flags) \ 181 + ({ \ 182 + __flags_noop(&(flags)); \ 183 + spin_lock(&(ptr)->lock); \ 184 + }) 185 + #define pcp_spin_unlock_maybe_irqrestore(ptr, flags) \ 186 + ({ \ 187 + spin_unlock(&(ptr)->lock); \ 188 + __flags_noop(&(flags)); \ 189 + }) 190 + #else 191 + #define pcp_spin_lock_maybe_irqsave(ptr, flags) \ 192 + spin_lock_irqsave(&(ptr)->lock, flags) 193 + #define pcp_spin_unlock_maybe_irqrestore(ptr, flags) \ 194 + spin_unlock_irqrestore(&(ptr)->lock, flags) 195 + #endif 196 + 170 197 #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID 171 198 DEFINE_PER_CPU(int, numa_node); 172 199 EXPORT_PER_CPU_SYMBOL(numa_node); ··· 2583 2556 bool decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp) 2584 2557 { 2585 2558 int high_min, to_drain, to_drain_batched, batch; 2559 + unsigned long UP_flags; 2586 2560 bool todo = false; 2587 2561 2588 2562 high_min = READ_ONCE(pcp->high_min); ··· 2603 2575 to_drain = pcp->count - pcp->high; 2604 2576 while (to_drain > 0) { 2605 2577 to_drain_batched = min(to_drain, batch); 2606 - spin_lock(&pcp->lock); 2578 + pcp_spin_lock_maybe_irqsave(pcp, UP_flags); 2607 2579 free_pcppages_bulk(zone, to_drain_batched, pcp, 0); 2608 - spin_unlock(&pcp->lock); 2580 + pcp_spin_unlock_maybe_irqrestore(pcp, UP_flags); 2609 2581 todo = true; 2610 2582 2611 2583 to_drain -= to_drain_batched; ··· 2622 2594 */ 2623 2595 void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) 2624 2596 { 2597 + unsigned long UP_flags; 2625 2598 int to_drain, batch; 2626 2599 2627 2600 batch = READ_ONCE(pcp->batch); 2628 2601 to_drain = min(pcp->count, batch); 2629 2602 if (to_drain > 0) { 2630 - spin_lock(&pcp->lock); 2603 + pcp_spin_lock_maybe_irqsave(pcp, UP_flags); 2631 2604 free_pcppages_bulk(zone, to_drain, pcp, 0); 2632 - spin_unlock(&pcp->lock); 2605 + pcp_spin_unlock_maybe_irqrestore(pcp, UP_flags); 2633 2606 } 2634 2607 } 2635 2608 #endif ··· 2641 2612 static void drain_pages_zone(unsigned int cpu, struct zone *zone) 2642 2613 { 2643 2614 struct per_cpu_pages *pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu); 2615 + unsigned long UP_flags; 2644 2616 int count; 2645 2617 2646 2618 do { 2647 - spin_lock(&pcp->lock); 2619 + pcp_spin_lock_maybe_irqsave(pcp, UP_flags); 2648 2620 count = pcp->count; 2649 2621 if (count) { 2650 2622 int to_drain = min(count, ··· 2654 2624 free_pcppages_bulk(zone, to_drain, pcp, 0); 2655 2625 count -= to_drain; 2656 2626 } 2657 - spin_unlock(&pcp->lock); 2627 + pcp_spin_unlock_maybe_irqrestore(pcp, UP_flags); 2658 2628 } while (count); 2659 2629 } 2660 2630 ··· 6139 6109 { 6140 6110 struct per_cpu_pages *pcp; 6141 6111 struct cpu_cacheinfo *cci; 6112 + unsigned long UP_flags; 6142 6113 6143 6114 pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu); 6144 6115 cci = get_cpu_cacheinfo(cpu); ··· 6150 6119 * This can reduce zone lock contention without hurting 6151 6120 * cache-hot pages sharing. 6152 6121 */ 6153 - spin_lock(&pcp->lock); 6122 + pcp_spin_lock_maybe_irqsave(pcp, UP_flags); 6154 6123 if ((cci->per_cpu_data_slice_size >> PAGE_SHIFT) > 3 * pcp->batch) 6155 6124 pcp->flags |= PCPF_FREE_HIGH_BATCH; 6156 6125 else 6157 6126 pcp->flags &= ~PCPF_FREE_HIGH_BATCH; 6158 - spin_unlock(&pcp->lock); 6127 + pcp_spin_unlock_maybe_irqrestore(pcp, UP_flags); 6159 6128 } 6160 6129 6161 6130 void setup_pcp_cacheinfo(unsigned int cpu) ··· 6698 6667 int old_percpu_pagelist_high_fraction; 6699 6668 int ret; 6700 6669 6670 + /* 6671 + * Avoid using pcp_batch_high_lock for reads as the value is read 6672 + * atomically and a race with offlining is harmless. 6673 + */ 6674 + 6675 + if (!write) 6676 + return proc_dointvec_minmax(table, write, buffer, length, ppos); 6677 + 6701 6678 mutex_lock(&pcp_batch_high_lock); 6702 6679 old_percpu_pagelist_high_fraction = percpu_pagelist_high_fraction; 6703 6680 6704 6681 ret = proc_dointvec_minmax(table, write, buffer, length, ppos); 6705 - if (!write || ret < 0) 6682 + if (ret < 0) 6706 6683 goto out; 6707 6684 6708 6685 /* Sanity checking to avoid pcp imbalance */
+74 -37
mm/vma.c
··· 67 67 .state = VMA_MERGE_START, \ 68 68 } 69 69 70 - /* 71 - * If, at any point, the VMA had unCoW'd mappings from parents, it will maintain 72 - * more than one anon_vma_chain connecting it to more than one anon_vma. A merge 73 - * would mean a wider range of folios sharing the root anon_vma lock, and thus 74 - * potential lock contention, we do not wish to encourage merging such that this 75 - * scales to a problem. 76 - */ 77 - static bool vma_had_uncowed_parents(struct vm_area_struct *vma) 70 + /* Was this VMA ever forked from a parent, i.e. maybe contains CoW mappings? */ 71 + static bool vma_is_fork_child(struct vm_area_struct *vma) 78 72 { 79 73 /* 80 74 * The list_is_singular() test is to avoid merging VMA cloned from 81 - * parents. This can improve scalability caused by anon_vma lock. 75 + * parents. This can improve scalability caused by the anon_vma root 76 + * lock. 82 77 */ 83 78 return vma && vma->anon_vma && !list_is_singular(&vma->anon_vma_chain); 84 79 } ··· 110 115 VM_WARN_ON(src && src_anon != src->anon_vma); 111 116 112 117 /* Case 1 - we will dup_anon_vma() from src into tgt. */ 113 - if (!tgt_anon && src_anon) 114 - return !vma_had_uncowed_parents(src); 118 + if (!tgt_anon && src_anon) { 119 + struct vm_area_struct *copied_from = vmg->copied_from; 120 + 121 + if (vma_is_fork_child(src)) 122 + return false; 123 + if (vma_is_fork_child(copied_from)) 124 + return false; 125 + 126 + return true; 127 + } 115 128 /* Case 2 - we will simply use tgt's anon_vma. */ 116 129 if (tgt_anon && !src_anon) 117 - return !vma_had_uncowed_parents(tgt); 130 + return !vma_is_fork_child(tgt); 118 131 /* Case 3 - the anon_vma's are already shared. */ 119 132 return src_anon == tgt_anon; 120 133 } ··· 832 829 VM_WARN_ON_VMG(middle && 833 830 !(vma_iter_addr(vmg->vmi) >= middle->vm_start && 834 831 vma_iter_addr(vmg->vmi) < middle->vm_end), vmg); 832 + /* An existing merge can never be used by the mremap() logic. */ 833 + VM_WARN_ON_VMG(vmg->copied_from, vmg); 835 834 836 835 vmg->state = VMA_MERGE_NOMERGE; 837 836 ··· 1104 1099 } 1105 1100 1106 1101 /* 1102 + * vma_merge_copied_range - Attempt to merge a VMA that is being copied by 1103 + * mremap() 1104 + * 1105 + * @vmg: Describes the VMA we are adding, in the copied-to range @vmg->start to 1106 + * @vmg->end (exclusive), which we try to merge with any adjacent VMAs if 1107 + * possible. 1108 + * 1109 + * vmg->prev, next, start, end, pgoff should all be relative to the COPIED TO 1110 + * range, i.e. the target range for the VMA. 1111 + * 1112 + * Returns: In instances where no merge was possible, NULL. Otherwise, a pointer 1113 + * to the VMA we expanded. 1114 + * 1115 + * ASSUMPTIONS: Same as vma_merge_new_range(), except vmg->middle must contain 1116 + * the copied-from VMA. 1117 + */ 1118 + static struct vm_area_struct *vma_merge_copied_range(struct vma_merge_struct *vmg) 1119 + { 1120 + /* We must have a copied-from VMA. */ 1121 + VM_WARN_ON_VMG(!vmg->middle, vmg); 1122 + 1123 + vmg->copied_from = vmg->middle; 1124 + vmg->middle = NULL; 1125 + return vma_merge_new_range(vmg); 1126 + } 1127 + 1128 + /* 1107 1129 * vma_expand - Expand an existing VMA 1108 1130 * 1109 1131 * @vmg: Describes a VMA expansion operation. ··· 1149 1117 int vma_expand(struct vma_merge_struct *vmg) 1150 1118 { 1151 1119 struct vm_area_struct *anon_dup = NULL; 1152 - bool remove_next = false; 1153 1120 struct vm_area_struct *target = vmg->target; 1154 1121 struct vm_area_struct *next = vmg->next; 1122 + bool remove_next = false; 1155 1123 vm_flags_t sticky_flags; 1156 - 1157 - sticky_flags = vmg->vm_flags & VM_STICKY; 1158 - sticky_flags |= target->vm_flags & VM_STICKY; 1159 - 1160 - VM_WARN_ON_VMG(!target, vmg); 1124 + int ret = 0; 1161 1125 1162 1126 mmap_assert_write_locked(vmg->mm); 1163 - 1164 1127 vma_start_write(target); 1165 - if (next && (target != next) && (vmg->end == next->vm_end)) { 1166 - int ret; 1167 1128 1168 - sticky_flags |= next->vm_flags & VM_STICKY; 1129 + if (next && target != next && vmg->end == next->vm_end) 1169 1130 remove_next = true; 1170 - /* This should already have been checked by this point. */ 1171 - VM_WARN_ON_VMG(!can_merge_remove_vma(next), vmg); 1172 - vma_start_write(next); 1173 - /* 1174 - * In this case we don't report OOM, so vmg->give_up_on_mm is 1175 - * safe. 1176 - */ 1177 - ret = dup_anon_vma(target, next, &anon_dup); 1178 - if (ret) 1179 - return ret; 1180 - } 1181 1131 1132 + /* We must have a target. */ 1133 + VM_WARN_ON_VMG(!target, vmg); 1134 + /* This should have already been checked by this point. */ 1135 + VM_WARN_ON_VMG(remove_next && !can_merge_remove_vma(next), vmg); 1182 1136 /* Not merging but overwriting any part of next is not handled. */ 1183 1137 VM_WARN_ON_VMG(next && !remove_next && 1184 1138 next != target && vmg->end > next->vm_start, vmg); 1185 - /* Only handles expanding */ 1139 + /* Only handles expanding. */ 1186 1140 VM_WARN_ON_VMG(target->vm_start < vmg->start || 1187 1141 target->vm_end > vmg->end, vmg); 1188 1142 1143 + sticky_flags = vmg->vm_flags & VM_STICKY; 1144 + sticky_flags |= target->vm_flags & VM_STICKY; 1189 1145 if (remove_next) 1190 - vmg->__remove_next = true; 1146 + sticky_flags |= next->vm_flags & VM_STICKY; 1191 1147 1148 + /* 1149 + * If we are removing the next VMA or copying from a VMA 1150 + * (e.g. mremap()'ing), we must propagate anon_vma state. 1151 + * 1152 + * Note that, by convention, callers ignore OOM for this case, so 1153 + * we don't need to account for vmg->give_up_on_mm here. 1154 + */ 1155 + if (remove_next) 1156 + ret = dup_anon_vma(target, next, &anon_dup); 1157 + if (!ret && vmg->copied_from) 1158 + ret = dup_anon_vma(target, vmg->copied_from, &anon_dup); 1159 + if (ret) 1160 + return ret; 1161 + 1162 + if (remove_next) { 1163 + vma_start_write(next); 1164 + vmg->__remove_next = true; 1165 + } 1192 1166 if (commit_merge(vmg)) 1193 1167 goto nomem; 1194 1168 ··· 1866 1828 if (new_vma && new_vma->vm_start < addr + len) 1867 1829 return NULL; /* should never get here */ 1868 1830 1869 - vmg.middle = NULL; /* New VMA range. */ 1870 1831 vmg.pgoff = pgoff; 1871 1832 vmg.next = vma_iter_next_rewind(&vmi, NULL); 1872 - new_vma = vma_merge_new_range(&vmg); 1833 + new_vma = vma_merge_copied_range(&vmg); 1873 1834 1874 1835 if (new_vma) { 1875 1836 /*
+3
mm/vma.h
··· 106 106 struct anon_vma_name *anon_name; 107 107 enum vma_merge_state state; 108 108 109 + /* If copied from (i.e. mremap()'d) the VMA from which we are copying. */ 110 + struct vm_area_struct *copied_from; 111 + 109 112 /* Flags which callers can use to modify merge behaviour: */ 110 113 111 114 /*
+1 -1
mm/vmalloc.c
··· 4248 4248 EXPORT_SYMBOL(vzalloc_node_noprof); 4249 4249 4250 4250 /** 4251 - * vrealloc_node_align_noprof - reallocate virtually contiguous memory; contents 4251 + * vrealloc_node_align - reallocate virtually contiguous memory; contents 4252 4252 * remain unchanged 4253 4253 * @p: object to reallocate memory for 4254 4254 * @size: the size to reallocate
+1 -1
mm/zswap.c
··· 787 787 return 0; 788 788 789 789 fail: 790 - if (acomp) 790 + if (!IS_ERR_OR_NULL(acomp)) 791 791 crypto_free_acomp(acomp); 792 792 kfree(buffer); 793 793 return ret;
+1 -1
tools/testing/selftests/mm/gup_longterm.c
··· 179 179 if (rw && shared && fs_is_unknown(fs_type)) { 180 180 ksft_print_msg("Unknown filesystem\n"); 181 181 result = KSFT_SKIP; 182 - return; 182 + break; 183 183 } 184 184 /* 185 185 * R/O pinning or pinning in a private mapping is always
+357 -27
tools/testing/selftests/mm/merge.c
··· 22 22 struct procmap_fd procmap; 23 23 }; 24 24 25 + static char *map_carveout(unsigned int page_size) 26 + { 27 + return mmap(NULL, 30 * page_size, PROT_NONE, 28 + MAP_ANON | MAP_PRIVATE, -1, 0); 29 + } 30 + 31 + static pid_t do_fork(struct procmap_fd *procmap) 32 + { 33 + pid_t pid = fork(); 34 + 35 + if (pid == -1) 36 + return -1; 37 + if (pid != 0) { 38 + wait(NULL); 39 + return pid; 40 + } 41 + 42 + /* Reopen for child. */ 43 + if (close_procmap(procmap)) 44 + return -1; 45 + if (open_self_procmap(procmap)) 46 + return -1; 47 + 48 + return 0; 49 + } 50 + 25 51 FIXTURE_SETUP(merge) 26 52 { 27 53 self->page_size = psize(); 28 54 /* Carve out PROT_NONE region to map over. */ 29 - self->carveout = mmap(NULL, 30 * self->page_size, PROT_NONE, 30 - MAP_ANON | MAP_PRIVATE, -1, 0); 55 + self->carveout = map_carveout(self->page_size); 31 56 ASSERT_NE(self->carveout, MAP_FAILED); 32 57 /* Setup PROCMAP_QUERY interface. */ 33 58 ASSERT_EQ(open_self_procmap(&self->procmap), 0); ··· 61 36 FIXTURE_TEARDOWN(merge) 62 37 { 63 38 ASSERT_EQ(munmap(self->carveout, 30 * self->page_size), 0); 64 - ASSERT_EQ(close_procmap(&self->procmap), 0); 39 + /* May fail for parent of forked process. */ 40 + close_procmap(&self->procmap); 65 41 /* 66 42 * Clear unconditionally, as some tests set this. It is no issue if this 67 43 * fails (KSM may be disabled for instance). 68 44 */ 45 + prctl(PR_SET_MEMORY_MERGE, 0, 0, 0, 0); 46 + } 47 + 48 + FIXTURE(merge_with_fork) 49 + { 50 + unsigned int page_size; 51 + char *carveout; 52 + struct procmap_fd procmap; 53 + }; 54 + 55 + FIXTURE_VARIANT(merge_with_fork) 56 + { 57 + bool forked; 58 + }; 59 + 60 + FIXTURE_VARIANT_ADD(merge_with_fork, forked) 61 + { 62 + .forked = true, 63 + }; 64 + 65 + FIXTURE_VARIANT_ADD(merge_with_fork, unforked) 66 + { 67 + .forked = false, 68 + }; 69 + 70 + FIXTURE_SETUP(merge_with_fork) 71 + { 72 + self->page_size = psize(); 73 + self->carveout = map_carveout(self->page_size); 74 + ASSERT_NE(self->carveout, MAP_FAILED); 75 + ASSERT_EQ(open_self_procmap(&self->procmap), 0); 76 + } 77 + 78 + FIXTURE_TEARDOWN(merge_with_fork) 79 + { 80 + ASSERT_EQ(munmap(self->carveout, 30 * self->page_size), 0); 81 + ASSERT_EQ(close_procmap(&self->procmap), 0); 82 + /* See above. */ 69 83 prctl(PR_SET_MEMORY_MERGE, 0, 0, 0, 0); 70 84 } 71 85 ··· 386 322 unsigned int page_size = self->page_size; 387 323 char *carveout = self->carveout; 388 324 struct procmap_fd *procmap = &self->procmap; 389 - pid_t pid; 390 325 char *ptr, *ptr2; 326 + pid_t pid; 391 327 int i; 392 328 393 329 /* ··· 408 344 */ 409 345 ptr[0] = 'x'; 410 346 411 - pid = fork(); 347 + pid = do_fork(&self->procmap); 412 348 ASSERT_NE(pid, -1); 413 - 414 - if (pid != 0) { 415 - wait(NULL); 349 + if (pid != 0) 416 350 return; 417 - } 418 - 419 - /* Child process below: */ 420 - 421 - /* Reopen for child. */ 422 - ASSERT_EQ(close_procmap(&self->procmap), 0); 423 - ASSERT_EQ(open_self_procmap(&self->procmap), 0); 424 351 425 352 /* unCOWing everything does not cause the AVC to go away. */ 426 353 for (i = 0; i < 5 * page_size; i += page_size) ··· 441 386 unsigned int page_size = self->page_size; 442 387 char *carveout = self->carveout; 443 388 struct procmap_fd *procmap = &self->procmap; 444 - pid_t pid; 445 389 char *ptr, *ptr2; 390 + pid_t pid; 446 391 int i; 447 392 448 393 /* ··· 463 408 */ 464 409 ptr[0] = 'x'; 465 410 466 - pid = fork(); 411 + pid = do_fork(&self->procmap); 467 412 ASSERT_NE(pid, -1); 468 - 469 - if (pid != 0) { 470 - wait(NULL); 413 + if (pid != 0) 471 414 return; 472 - } 473 - 474 - /* Child process below: */ 475 - 476 - /* Reopen for child. */ 477 - ASSERT_EQ(close_procmap(&self->procmap), 0); 478 - ASSERT_EQ(open_self_procmap(&self->procmap), 0); 479 415 480 416 /* unCOWing everything does not cause the AVC to go away. */ 481 417 for (i = 0; i < 5 * page_size; i += page_size) ··· 1215 1169 ASSERT_TRUE(find_vma_procmap(procmap, ptr)); 1216 1170 ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr); 1217 1171 ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr + 15 * page_size); 1172 + } 1173 + 1174 + TEST_F(merge_with_fork, mremap_faulted_to_unfaulted_prev) 1175 + { 1176 + struct procmap_fd *procmap = &self->procmap; 1177 + unsigned int page_size = self->page_size; 1178 + unsigned long offset; 1179 + char *ptr_a, *ptr_b; 1180 + 1181 + /* 1182 + * mremap() such that A and B merge: 1183 + * 1184 + * |------------| 1185 + * | \ | 1186 + * |-----------| | / |---------| 1187 + * | unfaulted | v \ | faulted | 1188 + * |-----------| / |---------| 1189 + * B \ A 1190 + */ 1191 + 1192 + /* Map VMA A into place. */ 1193 + ptr_a = mmap(&self->carveout[page_size + 3 * page_size], 1194 + 3 * page_size, 1195 + PROT_READ | PROT_WRITE, 1196 + MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0); 1197 + ASSERT_NE(ptr_a, MAP_FAILED); 1198 + /* Fault it in. */ 1199 + ptr_a[0] = 'x'; 1200 + 1201 + if (variant->forked) { 1202 + pid_t pid = do_fork(&self->procmap); 1203 + 1204 + ASSERT_NE(pid, -1); 1205 + if (pid != 0) 1206 + return; 1207 + } 1208 + 1209 + /* 1210 + * Now move it out of the way so we can place VMA B in position, 1211 + * unfaulted. 1212 + */ 1213 + ptr_a = mremap(ptr_a, 3 * page_size, 3 * page_size, 1214 + MREMAP_FIXED | MREMAP_MAYMOVE, &self->carveout[20 * page_size]); 1215 + ASSERT_NE(ptr_a, MAP_FAILED); 1216 + 1217 + /* Map VMA B into place. */ 1218 + ptr_b = mmap(&self->carveout[page_size], 3 * page_size, 1219 + PROT_READ | PROT_WRITE, 1220 + MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0); 1221 + ASSERT_NE(ptr_b, MAP_FAILED); 1222 + 1223 + /* 1224 + * Now move VMA A into position with MREMAP_DONTUNMAP to catch incorrect 1225 + * anon_vma propagation. 1226 + */ 1227 + ptr_a = mremap(ptr_a, 3 * page_size, 3 * page_size, 1228 + MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP, 1229 + &self->carveout[page_size + 3 * page_size]); 1230 + ASSERT_NE(ptr_a, MAP_FAILED); 1231 + 1232 + /* The VMAs should have merged, if not forked. */ 1233 + ASSERT_TRUE(find_vma_procmap(procmap, ptr_b)); 1234 + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr_b); 1235 + 1236 + offset = variant->forked ? 3 * page_size : 6 * page_size; 1237 + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr_b + offset); 1238 + } 1239 + 1240 + TEST_F(merge_with_fork, mremap_faulted_to_unfaulted_next) 1241 + { 1242 + struct procmap_fd *procmap = &self->procmap; 1243 + unsigned int page_size = self->page_size; 1244 + unsigned long offset; 1245 + char *ptr_a, *ptr_b; 1246 + 1247 + /* 1248 + * mremap() such that A and B merge: 1249 + * 1250 + * |---------------------------| 1251 + * | \ | 1252 + * | |-----------| / |---------| 1253 + * v | unfaulted | \ | faulted | 1254 + * |-----------| / |---------| 1255 + * B \ A 1256 + * 1257 + * Then unmap VMA A to trigger the bug. 1258 + */ 1259 + 1260 + /* Map VMA A into place. */ 1261 + ptr_a = mmap(&self->carveout[page_size], 3 * page_size, 1262 + PROT_READ | PROT_WRITE, 1263 + MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0); 1264 + ASSERT_NE(ptr_a, MAP_FAILED); 1265 + /* Fault it in. */ 1266 + ptr_a[0] = 'x'; 1267 + 1268 + if (variant->forked) { 1269 + pid_t pid = do_fork(&self->procmap); 1270 + 1271 + ASSERT_NE(pid, -1); 1272 + if (pid != 0) 1273 + return; 1274 + } 1275 + 1276 + /* 1277 + * Now move it out of the way so we can place VMA B in position, 1278 + * unfaulted. 1279 + */ 1280 + ptr_a = mremap(ptr_a, 3 * page_size, 3 * page_size, 1281 + MREMAP_FIXED | MREMAP_MAYMOVE, &self->carveout[20 * page_size]); 1282 + ASSERT_NE(ptr_a, MAP_FAILED); 1283 + 1284 + /* Map VMA B into place. */ 1285 + ptr_b = mmap(&self->carveout[page_size + 3 * page_size], 3 * page_size, 1286 + PROT_READ | PROT_WRITE, 1287 + MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0); 1288 + ASSERT_NE(ptr_b, MAP_FAILED); 1289 + 1290 + /* 1291 + * Now move VMA A into position with MREMAP_DONTUNMAP to catch incorrect 1292 + * anon_vma propagation. 1293 + */ 1294 + ptr_a = mremap(ptr_a, 3 * page_size, 3 * page_size, 1295 + MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP, 1296 + &self->carveout[page_size]); 1297 + ASSERT_NE(ptr_a, MAP_FAILED); 1298 + 1299 + /* The VMAs should have merged, if not forked. */ 1300 + ASSERT_TRUE(find_vma_procmap(procmap, ptr_a)); 1301 + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr_a); 1302 + offset = variant->forked ? 3 * page_size : 6 * page_size; 1303 + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr_a + offset); 1304 + } 1305 + 1306 + TEST_F(merge_with_fork, mremap_faulted_to_unfaulted_prev_unfaulted_next) 1307 + { 1308 + struct procmap_fd *procmap = &self->procmap; 1309 + unsigned int page_size = self->page_size; 1310 + unsigned long offset; 1311 + char *ptr_a, *ptr_b, *ptr_c; 1312 + 1313 + /* 1314 + * mremap() with MREMAP_DONTUNMAP such that A, B and C merge: 1315 + * 1316 + * |---------------------------| 1317 + * | \ | 1318 + * |-----------| | |-----------| / |---------| 1319 + * | unfaulted | v | unfaulted | \ | faulted | 1320 + * |-----------| |-----------| / |---------| 1321 + * A C \ B 1322 + */ 1323 + 1324 + /* Map VMA B into place. */ 1325 + ptr_b = mmap(&self->carveout[page_size + 3 * page_size], 3 * page_size, 1326 + PROT_READ | PROT_WRITE, 1327 + MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0); 1328 + ASSERT_NE(ptr_b, MAP_FAILED); 1329 + /* Fault it in. */ 1330 + ptr_b[0] = 'x'; 1331 + 1332 + if (variant->forked) { 1333 + pid_t pid = do_fork(&self->procmap); 1334 + 1335 + ASSERT_NE(pid, -1); 1336 + if (pid != 0) 1337 + return; 1338 + } 1339 + 1340 + /* 1341 + * Now move it out of the way so we can place VMAs A, C in position, 1342 + * unfaulted. 1343 + */ 1344 + ptr_b = mremap(ptr_b, 3 * page_size, 3 * page_size, 1345 + MREMAP_FIXED | MREMAP_MAYMOVE, &self->carveout[20 * page_size]); 1346 + ASSERT_NE(ptr_b, MAP_FAILED); 1347 + 1348 + /* Map VMA A into place. */ 1349 + 1350 + ptr_a = mmap(&self->carveout[page_size], 3 * page_size, 1351 + PROT_READ | PROT_WRITE, 1352 + MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0); 1353 + ASSERT_NE(ptr_a, MAP_FAILED); 1354 + 1355 + /* Map VMA C into place. */ 1356 + ptr_c = mmap(&self->carveout[page_size + 3 * page_size + 3 * page_size], 1357 + 3 * page_size, PROT_READ | PROT_WRITE, 1358 + MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0); 1359 + ASSERT_NE(ptr_c, MAP_FAILED); 1360 + 1361 + /* 1362 + * Now move VMA B into position with MREMAP_DONTUNMAP to catch incorrect 1363 + * anon_vma propagation. 1364 + */ 1365 + ptr_b = mremap(ptr_b, 3 * page_size, 3 * page_size, 1366 + MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP, 1367 + &self->carveout[page_size + 3 * page_size]); 1368 + ASSERT_NE(ptr_b, MAP_FAILED); 1369 + 1370 + /* The VMAs should have merged, if not forked. */ 1371 + ASSERT_TRUE(find_vma_procmap(procmap, ptr_a)); 1372 + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr_a); 1373 + offset = variant->forked ? 3 * page_size : 9 * page_size; 1374 + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr_a + offset); 1375 + 1376 + /* If forked, B and C should also not have merged. */ 1377 + if (variant->forked) { 1378 + ASSERT_TRUE(find_vma_procmap(procmap, ptr_b)); 1379 + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr_b); 1380 + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr_b + 3 * page_size); 1381 + } 1382 + } 1383 + 1384 + TEST_F(merge_with_fork, mremap_faulted_to_unfaulted_prev_faulted_next) 1385 + { 1386 + struct procmap_fd *procmap = &self->procmap; 1387 + unsigned int page_size = self->page_size; 1388 + char *ptr_a, *ptr_b, *ptr_bc; 1389 + 1390 + /* 1391 + * mremap() with MREMAP_DONTUNMAP such that A, B and C merge: 1392 + * 1393 + * |---------------------------| 1394 + * | \ | 1395 + * |-----------| | |-----------| / |---------| 1396 + * | unfaulted | v | faulted | \ | faulted | 1397 + * |-----------| |-----------| / |---------| 1398 + * A C \ B 1399 + */ 1400 + 1401 + /* 1402 + * Map VMA B and C into place. We have to map them together so their 1403 + * anon_vma is the same and the vma->vm_pgoff's are correctly aligned. 1404 + */ 1405 + ptr_bc = mmap(&self->carveout[page_size + 3 * page_size], 1406 + 3 * page_size + 3 * page_size, 1407 + PROT_READ | PROT_WRITE, 1408 + MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0); 1409 + ASSERT_NE(ptr_bc, MAP_FAILED); 1410 + 1411 + /* Fault it in. */ 1412 + ptr_bc[0] = 'x'; 1413 + 1414 + if (variant->forked) { 1415 + pid_t pid = do_fork(&self->procmap); 1416 + 1417 + ASSERT_NE(pid, -1); 1418 + if (pid != 0) 1419 + return; 1420 + } 1421 + 1422 + /* 1423 + * Now move VMA B out the way (splitting VMA BC) so we can place VMA A 1424 + * in position, unfaulted, and leave the remainder of the VMA we just 1425 + * moved in place, faulted, as VMA C. 1426 + */ 1427 + ptr_b = mremap(ptr_bc, 3 * page_size, 3 * page_size, 1428 + MREMAP_FIXED | MREMAP_MAYMOVE, &self->carveout[20 * page_size]); 1429 + ASSERT_NE(ptr_b, MAP_FAILED); 1430 + 1431 + /* Map VMA A into place. */ 1432 + ptr_a = mmap(&self->carveout[page_size], 3 * page_size, 1433 + PROT_READ | PROT_WRITE, 1434 + MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0); 1435 + ASSERT_NE(ptr_a, MAP_FAILED); 1436 + 1437 + /* 1438 + * Now move VMA B into position with MREMAP_DONTUNMAP to catch incorrect 1439 + * anon_vma propagation. 1440 + */ 1441 + ptr_b = mremap(ptr_b, 3 * page_size, 3 * page_size, 1442 + MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP, 1443 + &self->carveout[page_size + 3 * page_size]); 1444 + ASSERT_NE(ptr_b, MAP_FAILED); 1445 + 1446 + /* The VMAs should have merged. A,B,C if unforked, B, C if forked. */ 1447 + if (variant->forked) { 1448 + ASSERT_TRUE(find_vma_procmap(procmap, ptr_b)); 1449 + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr_b); 1450 + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr_b + 6 * page_size); 1451 + } else { 1452 + ASSERT_TRUE(find_vma_procmap(procmap, ptr_a)); 1453 + ASSERT_EQ(procmap->query.vma_start, (unsigned long)ptr_a); 1454 + ASSERT_EQ(procmap->query.vma_end, (unsigned long)ptr_a + 9 * page_size); 1455 + } 1218 1456 } 1219 1457 1220 1458 TEST_HARNESS_MAIN