Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'mm-hotfixes-stable-2023-12-15-07-11' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

Pull misc fixes from Andrew Morton:
"17 hotfixes. 8 are cc:stable and the other 9 pertain to post-6.6
issues"

* tag 'mm-hotfixes-stable-2023-12-15-07-11' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm:
mm/mglru: reclaim offlined memcgs harder
mm/mglru: respect min_ttl_ms with memcgs
mm/mglru: try to stop at high watermarks
mm/mglru: fix underprotected page cache
mm/shmem: fix race in shmem_undo_range w/THP
Revert "selftests: error out if kernel header files are not yet built"
crash_core: fix the check for whether crashkernel is from high memory
x86, kexec: fix the wrong ifdeffery CONFIG_KEXEC
sh, kexec: fix the incorrect ifdeffery and dependency of CONFIG_KEXEC
mips, kexec: fix the incorrect ifdeffery and dependency of CONFIG_KEXEC
m68k, kexec: fix the incorrect ifdeffery and build dependency of CONFIG_KEXEC
loongarch, kexec: change dependency of object files
mm/damon/core: make damon_start() waits until kdamond_fn() starts
selftests/mm: cow: print ksft header before printing anything else
mm: fix VMA heap bounds checking
riscv: fix VMALLOC_START definition
kexec: drop dependency on ARCH_SUPPORTS_KEXEC from CRASH_DUMP

+171 -158
+1 -1
arch/loongarch/kernel/Makefile
··· 57 57 58 58 obj-$(CONFIG_RELOCATABLE) += relocate.o 59 59 60 - obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o 60 + obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o 61 61 obj-$(CONFIG_CRASH_DUMP) += crash_dump.o 62 62 63 63 obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o
+2 -2
arch/m68k/include/asm/kexec.h
··· 2 2 #ifndef _ASM_M68K_KEXEC_H 3 3 #define _ASM_M68K_KEXEC_H 4 4 5 - #ifdef CONFIG_KEXEC 5 + #ifdef CONFIG_KEXEC_CORE 6 6 7 7 /* Maximum physical address we can use pages from */ 8 8 #define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) ··· 25 25 26 26 #endif /* __ASSEMBLY__ */ 27 27 28 - #endif /* CONFIG_KEXEC */ 28 + #endif /* CONFIG_KEXEC_CORE */ 29 29 30 30 #endif /* _ASM_M68K_KEXEC_H */
+1 -1
arch/m68k/kernel/Makefile
··· 25 25 26 26 obj-$(CONFIG_M68K_NONCOHERENT_DMA) += dma.o 27 27 28 - obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o 28 + obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o 29 29 obj-$(CONFIG_BOOTINFO_PROC) += bootinfo_proc.o 30 30 obj-$(CONFIG_UBOOT) += uboot.o 31 31
+2 -2
arch/mips/cavium-octeon/smp.c
··· 422 422 .cpu_disable = octeon_cpu_disable, 423 423 .cpu_die = octeon_cpu_die, 424 424 #endif 425 - #ifdef CONFIG_KEXEC 425 + #ifdef CONFIG_KEXEC_CORE 426 426 .kexec_nonboot_cpu = kexec_nonboot_cpu_jump, 427 427 #endif 428 428 }; ··· 502 502 .cpu_disable = octeon_cpu_disable, 503 503 .cpu_die = octeon_cpu_die, 504 504 #endif 505 - #ifdef CONFIG_KEXEC 505 + #ifdef CONFIG_KEXEC_CORE 506 506 .kexec_nonboot_cpu = kexec_nonboot_cpu_jump, 507 507 #endif 508 508 };
+1 -1
arch/mips/include/asm/kexec.h
··· 31 31 prepare_frametrace(newregs); 32 32 } 33 33 34 - #ifdef CONFIG_KEXEC 34 + #ifdef CONFIG_KEXEC_CORE 35 35 struct kimage; 36 36 extern unsigned long kexec_args[4]; 37 37 extern int (*_machine_kexec_prepare)(struct kimage *);
+1 -1
arch/mips/include/asm/smp-ops.h
··· 35 35 void (*cpu_die)(unsigned int cpu); 36 36 void (*cleanup_dead_cpu)(unsigned cpu); 37 37 #endif 38 - #ifdef CONFIG_KEXEC 38 + #ifdef CONFIG_KEXEC_CORE 39 39 void (*kexec_nonboot_cpu)(void); 40 40 #endif 41 41 };
+1 -1
arch/mips/include/asm/smp.h
··· 93 93 extern void __noreturn play_dead(void); 94 94 #endif 95 95 96 - #ifdef CONFIG_KEXEC 96 + #ifdef CONFIG_KEXEC_CORE 97 97 static inline void kexec_nonboot_cpu(void) 98 98 { 99 99 extern const struct plat_smp_ops *mp_ops; /* private */
+1 -1
arch/mips/kernel/Makefile
··· 90 90 91 91 obj-$(CONFIG_RELOCATABLE) += relocate.o 92 92 93 - obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o 93 + obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o crash.o 94 94 obj-$(CONFIG_CRASH_DUMP) += crash_dump.o 95 95 obj-$(CONFIG_EARLY_PRINTK) += early_printk.o 96 96 obj-$(CONFIG_EARLY_PRINTK_8250) += early_printk_8250.o
+2 -2
arch/mips/kernel/smp-bmips.c
··· 434 434 .cpu_disable = bmips_cpu_disable, 435 435 .cpu_die = bmips_cpu_die, 436 436 #endif 437 - #ifdef CONFIG_KEXEC 437 + #ifdef CONFIG_KEXEC_CORE 438 438 .kexec_nonboot_cpu = kexec_nonboot_cpu_jump, 439 439 #endif 440 440 }; ··· 451 451 .cpu_disable = bmips_cpu_disable, 452 452 .cpu_die = bmips_cpu_die, 453 453 #endif 454 - #ifdef CONFIG_KEXEC 454 + #ifdef CONFIG_KEXEC_CORE 455 455 .kexec_nonboot_cpu = kexec_nonboot_cpu_jump, 456 456 #endif 457 457 };
+5 -5
arch/mips/kernel/smp-cps.c
··· 392 392 local_irq_enable(); 393 393 } 394 394 395 - #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC) 395 + #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC_CORE) 396 396 397 397 enum cpu_death { 398 398 CPU_DEATH_HALT, ··· 429 429 } 430 430 } 431 431 432 - #ifdef CONFIG_KEXEC 432 + #ifdef CONFIG_KEXEC_CORE 433 433 434 434 static void cps_kexec_nonboot_cpu(void) 435 435 { ··· 439 439 cps_shutdown_this_cpu(CPU_DEATH_POWER); 440 440 } 441 441 442 - #endif /* CONFIG_KEXEC */ 442 + #endif /* CONFIG_KEXEC_CORE */ 443 443 444 - #endif /* CONFIG_HOTPLUG_CPU || CONFIG_KEXEC */ 444 + #endif /* CONFIG_HOTPLUG_CPU || CONFIG_KEXEC_CORE */ 445 445 446 446 #ifdef CONFIG_HOTPLUG_CPU 447 447 ··· 610 610 .cpu_die = cps_cpu_die, 611 611 .cleanup_dead_cpu = cps_cleanup_dead_cpu, 612 612 #endif 613 - #ifdef CONFIG_KEXEC 613 + #ifdef CONFIG_KEXEC_CORE 614 614 .kexec_nonboot_cpu = cps_kexec_nonboot_cpu, 615 615 #endif 616 616 };
+2 -2
arch/mips/loongson64/reset.c
··· 53 53 } 54 54 } 55 55 56 - #ifdef CONFIG_KEXEC 56 + #ifdef CONFIG_KEXEC_CORE 57 57 58 58 /* 0X80000000~0X80200000 is safe */ 59 59 #define MAX_ARGS 64 ··· 158 158 _machine_halt = loongson_halt; 159 159 pm_power_off = loongson_poweroff; 160 160 161 - #ifdef CONFIG_KEXEC 161 + #ifdef CONFIG_KEXEC_CORE 162 162 kexec_argv = kmalloc(KEXEC_ARGV_SIZE, GFP_KERNEL); 163 163 if (WARN_ON(!kexec_argv)) 164 164 return -ENOMEM;
+1 -1
arch/mips/loongson64/smp.c
··· 864 864 .cpu_disable = loongson3_cpu_disable, 865 865 .cpu_die = loongson3_cpu_die, 866 866 #endif 867 - #ifdef CONFIG_KEXEC 867 + #ifdef CONFIG_KEXEC_CORE 868 868 .kexec_nonboot_cpu = kexec_nonboot_cpu_jump, 869 869 #endif 870 870 };
+2 -2
arch/riscv/Kconfig
··· 685 685 If unsure what to do here, say N. 686 686 687 687 config ARCH_SUPPORTS_KEXEC 688 - def_bool MMU 688 + def_bool y 689 689 690 690 config ARCH_SELECTS_KEXEC 691 691 def_bool y ··· 693 693 select HOTPLUG_CPU if SMP 694 694 695 695 config ARCH_SUPPORTS_KEXEC_FILE 696 - def_bool 64BIT && MMU 696 + def_bool 64BIT 697 697 698 698 config ARCH_SELECTS_KEXEC_FILE 699 699 def_bool y
+1 -1
arch/riscv/include/asm/pgtable.h
··· 899 899 #define PAGE_KERNEL __pgprot(0) 900 900 #define swapper_pg_dir NULL 901 901 #define TASK_SIZE 0xffffffffUL 902 - #define VMALLOC_START 0 902 + #define VMALLOC_START _AC(0, UL) 903 903 #define VMALLOC_END TASK_SIZE 904 904 905 905 #endif /* !CONFIG_MMU */
+3 -1
arch/riscv/kernel/crash_core.c
··· 5 5 6 6 void arch_crash_save_vmcoreinfo(void) 7 7 { 8 - VMCOREINFO_NUMBER(VA_BITS); 9 8 VMCOREINFO_NUMBER(phys_ram_base); 10 9 11 10 vmcoreinfo_append_str("NUMBER(PAGE_OFFSET)=0x%lx\n", PAGE_OFFSET); 12 11 vmcoreinfo_append_str("NUMBER(VMALLOC_START)=0x%lx\n", VMALLOC_START); 13 12 vmcoreinfo_append_str("NUMBER(VMALLOC_END)=0x%lx\n", VMALLOC_END); 13 + #ifdef CONFIG_MMU 14 + VMCOREINFO_NUMBER(VA_BITS); 14 15 vmcoreinfo_append_str("NUMBER(VMEMMAP_START)=0x%lx\n", VMEMMAP_START); 15 16 vmcoreinfo_append_str("NUMBER(VMEMMAP_END)=0x%lx\n", VMEMMAP_END); 16 17 #ifdef CONFIG_64BIT 17 18 vmcoreinfo_append_str("NUMBER(MODULES_VADDR)=0x%lx\n", MODULES_VADDR); 18 19 vmcoreinfo_append_str("NUMBER(MODULES_END)=0x%lx\n", MODULES_END); 20 + #endif 19 21 #endif 20 22 vmcoreinfo_append_str("NUMBER(KERNEL_LINK_ADDR)=0x%lx\n", KERNEL_LINK_ADDR); 21 23 vmcoreinfo_append_str("NUMBER(va_kernel_pa_offset)=0x%lx\n",
+2 -2
arch/sh/include/asm/kexec.h
··· 28 28 /* The native architecture */ 29 29 #define KEXEC_ARCH KEXEC_ARCH_SH 30 30 31 - #ifdef CONFIG_KEXEC 31 + #ifdef CONFIG_KEXEC_CORE 32 32 /* arch/sh/kernel/machine_kexec.c */ 33 33 void reserve_crashkernel(void); 34 34 ··· 67 67 } 68 68 #else 69 69 static inline void reserve_crashkernel(void) { } 70 - #endif /* CONFIG_KEXEC */ 70 + #endif /* CONFIG_KEXEC_CORE */ 71 71 72 72 #endif /* __ASM_SH_KEXEC_H */
+1 -1
arch/sh/kernel/Makefile
··· 33 33 obj-$(CONFIG_SH_STANDARD_BIOS) += sh_bios.o 34 34 obj-$(CONFIG_KGDB) += kgdb.o 35 35 obj-$(CONFIG_MODULES) += sh_ksyms_32.o module.o 36 - obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o 36 + obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o 37 37 obj-$(CONFIG_CRASH_DUMP) += crash_dump.o 38 38 obj-$(CONFIG_STACKTRACE) += stacktrace.o 39 39 obj-$(CONFIG_IO_TRAPPED) += io_trapped.o
+2 -2
arch/sh/kernel/reboot.c
··· 63 63 .shutdown = native_machine_shutdown, 64 64 .restart = native_machine_restart, 65 65 .halt = native_machine_halt, 66 - #ifdef CONFIG_KEXEC 66 + #ifdef CONFIG_KEXEC_CORE 67 67 .crash_shutdown = native_machine_crash_shutdown, 68 68 #endif 69 69 }; ··· 88 88 machine_ops.halt(); 89 89 } 90 90 91 - #ifdef CONFIG_KEXEC 91 + #ifdef CONFIG_KEXEC_CORE 92 92 void machine_crash_shutdown(struct pt_regs *regs) 93 93 { 94 94 machine_ops.crash_shutdown(regs);
+1 -1
arch/sh/kernel/setup.c
··· 220 220 request_resource(res, &code_resource); 221 221 request_resource(res, &data_resource); 222 222 request_resource(res, &bss_resource); 223 - #ifdef CONFIG_KEXEC 223 + #ifdef CONFIG_KEXEC_CORE 224 224 request_resource(res, &crashk_res); 225 225 #endif 226 226
+1 -1
arch/x86/boot/compressed/acpi.c
··· 178 178 { 179 179 unsigned long addr = 0; 180 180 181 - #ifdef CONFIG_KEXEC 181 + #ifdef CONFIG_KEXEC_CORE 182 182 char val[MAX_ADDR_LEN] = { }; 183 183 int ret; 184 184
+2
include/linux/damon.h
··· 559 559 * update 560 560 */ 561 561 unsigned long next_ops_update_sis; 562 + /* for waiting until the execution of the kdamond_fn is started */ 563 + struct completion kdamond_started; 562 564 563 565 /* public: */ 564 566 struct task_struct *kdamond;
+4 -4
include/linux/mm.h
··· 886 886 */ 887 887 static inline bool vma_is_initial_heap(const struct vm_area_struct *vma) 888 888 { 889 - return vma->vm_start <= vma->vm_mm->brk && 890 - vma->vm_end >= vma->vm_mm->start_brk; 889 + return vma->vm_start < vma->vm_mm->brk && 890 + vma->vm_end > vma->vm_mm->start_brk; 891 891 } 892 892 893 893 /* ··· 901 901 * its "stack". It's not even well-defined for programs written 902 902 * languages like Go. 903 903 */ 904 - return vma->vm_start <= vma->vm_mm->start_stack && 905 - vma->vm_end >= vma->vm_mm->start_stack; 904 + return vma->vm_start <= vma->vm_mm->start_stack && 905 + vma->vm_end >= vma->vm_mm->start_stack; 906 906 } 907 907 908 908 static inline bool vma_is_temporary_stack(struct vm_area_struct *vma)
+14 -9
include/linux/mm_inline.h
··· 232 232 if (folio_test_unevictable(folio) || !lrugen->enabled) 233 233 return false; 234 234 /* 235 - * There are three common cases for this page: 236 - * 1. If it's hot, e.g., freshly faulted in or previously hot and 237 - * migrated, add it to the youngest generation. 238 - * 2. If it's cold but can't be evicted immediately, i.e., an anon page 239 - * not in swapcache or a dirty page pending writeback, add it to the 240 - * second oldest generation. 241 - * 3. Everything else (clean, cold) is added to the oldest generation. 235 + * There are four common cases for this page: 236 + * 1. If it's hot, i.e., freshly faulted in, add it to the youngest 237 + * generation, and it's protected over the rest below. 238 + * 2. If it can't be evicted immediately, i.e., a dirty page pending 239 + * writeback, add it to the second youngest generation. 240 + * 3. If it should be evicted first, e.g., cold and clean from 241 + * folio_rotate_reclaimable(), add it to the oldest generation. 242 + * 4. Everything else falls between 2 & 3 above and is added to the 243 + * second oldest generation if it's considered inactive, or the 244 + * oldest generation otherwise. See lru_gen_is_active(). 242 245 */ 243 246 if (folio_test_active(folio)) 244 247 seq = lrugen->max_seq; 245 248 else if ((type == LRU_GEN_ANON && !folio_test_swapcache(folio)) || 246 249 (folio_test_reclaim(folio) && 247 250 (folio_test_dirty(folio) || folio_test_writeback(folio)))) 248 - seq = lrugen->min_seq[type] + 1; 249 - else 251 + seq = lrugen->max_seq - 1; 252 + else if (reclaiming || lrugen->min_seq[type] + MIN_NR_GENS >= lrugen->max_seq) 250 253 seq = lrugen->min_seq[type]; 254 + else 255 + seq = lrugen->min_seq[type] + 1; 251 256 252 257 gen = lru_gen_from_seq(seq); 253 258 flags = (gen + 1UL) << LRU_GEN_PGOFF;
+19 -15
include/linux/mmzone.h
··· 505 505 * the old generation, is incremented when all its bins become empty. 506 506 * 507 507 * There are four operations: 508 - * 1. MEMCG_LRU_HEAD, which moves an memcg to the head of a random bin in its 508 + * 1. MEMCG_LRU_HEAD, which moves a memcg to the head of a random bin in its 509 509 * current generation (old or young) and updates its "seg" to "head"; 510 - * 2. MEMCG_LRU_TAIL, which moves an memcg to the tail of a random bin in its 510 + * 2. MEMCG_LRU_TAIL, which moves a memcg to the tail of a random bin in its 511 511 * current generation (old or young) and updates its "seg" to "tail"; 512 - * 3. MEMCG_LRU_OLD, which moves an memcg to the head of a random bin in the old 512 + * 3. MEMCG_LRU_OLD, which moves a memcg to the head of a random bin in the old 513 513 * generation, updates its "gen" to "old" and resets its "seg" to "default"; 514 - * 4. MEMCG_LRU_YOUNG, which moves an memcg to the tail of a random bin in the 514 + * 4. MEMCG_LRU_YOUNG, which moves a memcg to the tail of a random bin in the 515 515 * young generation, updates its "gen" to "young" and resets its "seg" to 516 516 * "default". 517 517 * 518 518 * The events that trigger the above operations are: 519 519 * 1. Exceeding the soft limit, which triggers MEMCG_LRU_HEAD; 520 - * 2. The first attempt to reclaim an memcg below low, which triggers 520 + * 2. The first attempt to reclaim a memcg below low, which triggers 521 521 * MEMCG_LRU_TAIL; 522 - * 3. The first attempt to reclaim an memcg below reclaimable size threshold, 523 - * which triggers MEMCG_LRU_TAIL; 524 - * 4. The second attempt to reclaim an memcg below reclaimable size threshold, 525 - * which triggers MEMCG_LRU_YOUNG; 526 - * 5. Attempting to reclaim an memcg below min, which triggers MEMCG_LRU_YOUNG; 522 + * 3. The first attempt to reclaim a memcg offlined or below reclaimable size 523 + * threshold, which triggers MEMCG_LRU_TAIL; 524 + * 4. The second attempt to reclaim a memcg offlined or below reclaimable size 525 + * threshold, which triggers MEMCG_LRU_YOUNG; 526 + * 5. Attempting to reclaim a memcg below min, which triggers MEMCG_LRU_YOUNG; 527 527 * 6. Finishing the aging on the eviction path, which triggers MEMCG_LRU_YOUNG; 528 - * 7. Offlining an memcg, which triggers MEMCG_LRU_OLD. 528 + * 7. Offlining a memcg, which triggers MEMCG_LRU_OLD. 529 529 * 530 - * Note that memcg LRU only applies to global reclaim, and the round-robin 531 - * incrementing of their max_seq counters ensures the eventual fairness to all 532 - * eligible memcgs. For memcg reclaim, it still relies on mem_cgroup_iter(). 530 + * Notes: 531 + * 1. Memcg LRU only applies to global reclaim, and the round-robin incrementing 532 + * of their max_seq counters ensures the eventual fairness to all eligible 533 + * memcgs. For memcg reclaim, it still relies on mem_cgroup_iter(). 534 + * 2. There are only two valid generations: old (seq) and young (seq+1). 535 + * MEMCG_NR_GENS is set to three so that when reading the generation counter 536 + * locklessly, a stale value (seq-1) does not wraparound to young. 533 537 */ 534 - #define MEMCG_NR_GENS 2 538 + #define MEMCG_NR_GENS 3 535 539 #define MEMCG_NR_BINS 8 536 540 537 541 struct lru_gen_memcg {
-1
kernel/Kconfig.kexec
··· 94 94 config CRASH_DUMP 95 95 bool "kernel crash dumps" 96 96 depends on ARCH_SUPPORTS_CRASH_DUMP 97 - depends on ARCH_SUPPORTS_KEXEC 98 97 select CRASH_CORE 99 98 select KEXEC_CORE 100 99 help
+5 -5
kernel/crash_core.c
··· 199 199 * It returns 0 on success and -EINVAL on failure. 200 200 */ 201 201 static int __init parse_crashkernel_suffix(char *cmdline, 202 - unsigned long long *crash_size, 202 + unsigned long long *crash_size, 203 203 const char *suffix) 204 204 { 205 205 char *cur = cmdline; ··· 268 268 unsigned long long *crash_base, 269 269 const char *suffix) 270 270 { 271 - char *first_colon, *first_space; 272 - char *ck_cmdline; 273 - char *name = "crashkernel="; 271 + char *first_colon, *first_space; 272 + char *ck_cmdline; 273 + char *name = "crashkernel="; 274 274 275 275 BUG_ON(!crash_size || !crash_base); 276 276 *crash_size = 0; ··· 440 440 return; 441 441 } 442 442 443 - if ((crash_base > CRASH_ADDR_LOW_MAX) && 443 + if ((crash_base >= CRASH_ADDR_LOW_MAX) && 444 444 crash_low_size && reserve_crashkernel_low(crash_low_size)) { 445 445 memblock_phys_free(crash_base, crash_size); 446 446 return;
+6
mm/damon/core.c
··· 445 445 if (!ctx) 446 446 return NULL; 447 447 448 + init_completion(&ctx->kdamond_started); 449 + 448 450 ctx->attrs.sample_interval = 5 * 1000; 449 451 ctx->attrs.aggr_interval = 100 * 1000; 450 452 ctx->attrs.ops_update_interval = 60 * 1000 * 1000; ··· 670 668 mutex_lock(&ctx->kdamond_lock); 671 669 if (!ctx->kdamond) { 672 670 err = 0; 671 + reinit_completion(&ctx->kdamond_started); 673 672 ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d", 674 673 nr_running_ctxs); 675 674 if (IS_ERR(ctx->kdamond)) { 676 675 err = PTR_ERR(ctx->kdamond); 677 676 ctx->kdamond = NULL; 677 + } else { 678 + wait_for_completion(&ctx->kdamond_started); 678 679 } 679 680 } 680 681 mutex_unlock(&ctx->kdamond_lock); ··· 1438 1433 1439 1434 pr_debug("kdamond (%d) starts\n", current->pid); 1440 1435 1436 + complete(&ctx->kdamond_started); 1441 1437 kdamond_init_intervals_sis(ctx); 1442 1438 1443 1439 if (ctx->ops.init)
+18 -1
mm/shmem.c
··· 1080 1080 } 1081 1081 VM_BUG_ON_FOLIO(folio_test_writeback(folio), 1082 1082 folio); 1083 - truncate_inode_folio(mapping, folio); 1083 + 1084 + if (!folio_test_large(folio)) { 1085 + truncate_inode_folio(mapping, folio); 1086 + } else if (truncate_inode_partial_folio(folio, lstart, lend)) { 1087 + /* 1088 + * If we split a page, reset the loop so 1089 + * that we pick up the new sub pages. 1090 + * Otherwise the THP was entirely 1091 + * dropped or the target range was 1092 + * zeroed, so just continue the loop as 1093 + * is. 1094 + */ 1095 + if (!folio_test_large(folio)) { 1096 + folio_unlock(folio); 1097 + index = start; 1098 + break; 1099 + } 1100 + } 1084 1101 } 1085 1102 folio_unlock(folio); 1086 1103 }
+61 -31
mm/vmscan.c
··· 4089 4089 else 4090 4090 VM_WARN_ON_ONCE(true); 4091 4091 4092 + WRITE_ONCE(lruvec->lrugen.seg, seg); 4093 + WRITE_ONCE(lruvec->lrugen.gen, new); 4094 + 4092 4095 hlist_nulls_del_rcu(&lruvec->lrugen.list); 4093 4096 4094 4097 if (op == MEMCG_LRU_HEAD || op == MEMCG_LRU_OLD) ··· 4101 4098 4102 4099 pgdat->memcg_lru.nr_memcgs[old]--; 4103 4100 pgdat->memcg_lru.nr_memcgs[new]++; 4104 - 4105 - lruvec->lrugen.gen = new; 4106 - WRITE_ONCE(lruvec->lrugen.seg, seg); 4107 4101 4108 4102 if (!pgdat->memcg_lru.nr_memcgs[old] && old == get_memcg_gen(pgdat->memcg_lru.seq)) 4109 4103 WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1); ··· 4124 4124 4125 4125 gen = get_memcg_gen(pgdat->memcg_lru.seq); 4126 4126 4127 + lruvec->lrugen.gen = gen; 4128 + 4127 4129 hlist_nulls_add_tail_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[gen][bin]); 4128 4130 pgdat->memcg_lru.nr_memcgs[gen]++; 4129 - 4130 - lruvec->lrugen.gen = gen; 4131 4131 4132 4132 spin_unlock_irq(&pgdat->memcg_lru.lock); 4133 4133 } ··· 4232 4232 } 4233 4233 4234 4234 /* protected */ 4235 - if (tier > tier_idx) { 4235 + if (tier > tier_idx || refs == BIT(LRU_REFS_WIDTH)) { 4236 4236 int hist = lru_hist_from_seq(lrugen->min_seq[type]); 4237 4237 4238 4238 gen = folio_inc_gen(lruvec, folio, false); ··· 4598 4598 } 4599 4599 4600 4600 /* try to scrape all its memory if this memcg was deleted */ 4601 - *nr_to_scan = mem_cgroup_online(memcg) ? (total >> sc->priority) : total; 4601 + if (!mem_cgroup_online(memcg)) { 4602 + *nr_to_scan = total; 4603 + return false; 4604 + } 4605 + 4606 + *nr_to_scan = total >> sc->priority; 4602 4607 4603 4608 /* 4604 4609 * The aging tries to be lazy to reduce the overhead, while the eviction ··· 4640 4635 DEFINE_MAX_SEQ(lruvec); 4641 4636 4642 4637 if (mem_cgroup_below_min(sc->target_mem_cgroup, memcg)) 4643 - return 0; 4638 + return -1; 4644 4639 4645 4640 if (!should_run_aging(lruvec, max_seq, sc, can_swap, &nr_to_scan)) 4646 4641 return nr_to_scan; ··· 4653 4648 return try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false) ? -1 : 0; 4654 4649 } 4655 4650 4656 - static unsigned long get_nr_to_reclaim(struct scan_control *sc) 4651 + static bool should_abort_scan(struct lruvec *lruvec, struct scan_control *sc) 4657 4652 { 4653 + int i; 4654 + enum zone_watermarks mark; 4655 + 4658 4656 /* don't abort memcg reclaim to ensure fairness */ 4659 4657 if (!root_reclaim(sc)) 4660 - return -1; 4658 + return false; 4661 4659 4662 - return max(sc->nr_to_reclaim, compact_gap(sc->order)); 4660 + if (sc->nr_reclaimed >= max(sc->nr_to_reclaim, compact_gap(sc->order))) 4661 + return true; 4662 + 4663 + /* check the order to exclude compaction-induced reclaim */ 4664 + if (!current_is_kswapd() || sc->order) 4665 + return false; 4666 + 4667 + mark = sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING ? 4668 + WMARK_PROMO : WMARK_HIGH; 4669 + 4670 + for (i = 0; i <= sc->reclaim_idx; i++) { 4671 + struct zone *zone = lruvec_pgdat(lruvec)->node_zones + i; 4672 + unsigned long size = wmark_pages(zone, mark) + MIN_LRU_BATCH; 4673 + 4674 + if (managed_zone(zone) && !zone_watermark_ok(zone, 0, size, sc->reclaim_idx, 0)) 4675 + return false; 4676 + } 4677 + 4678 + /* kswapd should abort if all eligible zones are safe */ 4679 + return true; 4663 4680 } 4664 4681 4665 4682 static bool try_to_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) 4666 4683 { 4667 4684 long nr_to_scan; 4668 4685 unsigned long scanned = 0; 4669 - unsigned long nr_to_reclaim = get_nr_to_reclaim(sc); 4670 4686 int swappiness = get_swappiness(lruvec, sc); 4671 4687 4672 4688 /* clean file folios are more likely to exist */ ··· 4709 4683 if (scanned >= nr_to_scan) 4710 4684 break; 4711 4685 4712 - if (sc->nr_reclaimed >= nr_to_reclaim) 4686 + if (should_abort_scan(lruvec, sc)) 4713 4687 break; 4714 4688 4715 4689 cond_resched(); 4716 4690 } 4717 4691 4718 - /* whether try_to_inc_max_seq() was successful */ 4692 + /* whether this lruvec should be rotated */ 4719 4693 return nr_to_scan < 0; 4720 4694 } 4721 4695 ··· 4724 4698 bool success; 4725 4699 unsigned long scanned = sc->nr_scanned; 4726 4700 unsigned long reclaimed = sc->nr_reclaimed; 4727 - int seg = lru_gen_memcg_seg(lruvec); 4728 4701 struct mem_cgroup *memcg = lruvec_memcg(lruvec); 4729 4702 struct pglist_data *pgdat = lruvec_pgdat(lruvec); 4730 - 4731 - /* see the comment on MEMCG_NR_GENS */ 4732 - if (!lruvec_is_sizable(lruvec, sc)) 4733 - return seg != MEMCG_LRU_TAIL ? MEMCG_LRU_TAIL : MEMCG_LRU_YOUNG; 4734 4703 4735 4704 mem_cgroup_calculate_protection(NULL, memcg); 4736 4705 ··· 4734 4713 4735 4714 if (mem_cgroup_below_low(NULL, memcg)) { 4736 4715 /* see the comment on MEMCG_NR_GENS */ 4737 - if (seg != MEMCG_LRU_TAIL) 4716 + if (lru_gen_memcg_seg(lruvec) != MEMCG_LRU_TAIL) 4738 4717 return MEMCG_LRU_TAIL; 4739 4718 4740 4719 memcg_memory_event(memcg, MEMCG_LOW); ··· 4750 4729 4751 4730 flush_reclaim_state(sc); 4752 4731 4753 - return success ? MEMCG_LRU_YOUNG : 0; 4732 + if (success && mem_cgroup_online(memcg)) 4733 + return MEMCG_LRU_YOUNG; 4734 + 4735 + if (!success && lruvec_is_sizable(lruvec, sc)) 4736 + return 0; 4737 + 4738 + /* one retry if offlined or too small */ 4739 + return lru_gen_memcg_seg(lruvec) != MEMCG_LRU_TAIL ? 4740 + MEMCG_LRU_TAIL : MEMCG_LRU_YOUNG; 4754 4741 } 4755 4742 4756 4743 #ifdef CONFIG_MEMCG ··· 4772 4743 struct lruvec *lruvec; 4773 4744 struct lru_gen_folio *lrugen; 4774 4745 struct mem_cgroup *memcg; 4775 - const struct hlist_nulls_node *pos; 4776 - unsigned long nr_to_reclaim = get_nr_to_reclaim(sc); 4746 + struct hlist_nulls_node *pos; 4777 4747 4748 + gen = get_memcg_gen(READ_ONCE(pgdat->memcg_lru.seq)); 4778 4749 bin = first_bin = get_random_u32_below(MEMCG_NR_BINS); 4779 4750 restart: 4780 4751 op = 0; 4781 4752 memcg = NULL; 4782 - gen = get_memcg_gen(READ_ONCE(pgdat->memcg_lru.seq)); 4783 4753 4784 4754 rcu_read_lock(); 4785 4755 ··· 4789 4761 } 4790 4762 4791 4763 mem_cgroup_put(memcg); 4764 + memcg = NULL; 4765 + 4766 + if (gen != READ_ONCE(lrugen->gen)) 4767 + continue; 4792 4768 4793 4769 lruvec = container_of(lrugen, struct lruvec, lrugen); 4794 4770 memcg = lruvec_memcg(lruvec); ··· 4809 4777 4810 4778 rcu_read_lock(); 4811 4779 4812 - if (sc->nr_reclaimed >= nr_to_reclaim) 4780 + if (should_abort_scan(lruvec, sc)) 4813 4781 break; 4814 4782 } 4815 4783 ··· 4820 4788 4821 4789 mem_cgroup_put(memcg); 4822 4790 4823 - if (sc->nr_reclaimed >= nr_to_reclaim) 4791 + if (!is_a_nulls(pos)) 4824 4792 return; 4825 4793 4826 4794 /* restart if raced with lru_gen_rotate_memcg() */ ··· 4877 4845 if (sc->priority != DEF_PRIORITY || sc->nr_to_reclaim < MIN_LRU_BATCH) 4878 4846 return; 4879 4847 /* 4880 - * Determine the initial priority based on ((total / MEMCG_NR_GENS) >> 4881 - * priority) * reclaimed_to_scanned_ratio = nr_to_reclaim, where the 4882 - * estimated reclaimed_to_scanned_ratio = inactive / total. 4848 + * Determine the initial priority based on 4849 + * (total >> priority) * reclaimed_to_scanned_ratio = nr_to_reclaim, 4850 + * where reclaimed_to_scanned_ratio = inactive / total. 4883 4851 */ 4884 4852 reclaimable = node_page_state(pgdat, NR_INACTIVE_FILE); 4885 4853 if (get_swappiness(lruvec, sc)) 4886 4854 reclaimable += node_page_state(pgdat, NR_INACTIVE_ANON); 4887 - 4888 - reclaimable /= MEMCG_NR_GENS; 4889 4855 4890 4856 /* round down reclaimable and round up sc->nr_to_reclaim */ 4891 4857 priority = fls_long(reclaimable) - 1 - fls_long(sc->nr_to_reclaim - 1);
+3 -3
mm/workingset.c
··· 313 313 * 1. For pages accessed through page tables, hotter pages pushed out 314 314 * hot pages which refaulted immediately. 315 315 * 2. For pages accessed multiple times through file descriptors, 316 - * numbers of accesses might have been out of the range. 316 + * they would have been protected by sort_folio(). 317 317 */ 318 - if (lru_gen_in_fault() || refs == BIT(LRU_REFS_WIDTH)) { 319 - folio_set_workingset(folio); 318 + if (lru_gen_in_fault() || refs >= BIT(LRU_REFS_WIDTH) - 1) { 319 + set_mask_bits(&folio->flags, 0, LRU_REFS_MASK | BIT(PG_workingset)); 320 320 mod_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + type, delta); 321 321 } 322 322 unlock:
+1 -20
tools/testing/selftests/Makefile
··· 155 155 abs_objtree := $(realpath $(abs_objtree)) 156 156 BUILD := $(abs_objtree)/kselftest 157 157 KHDR_INCLUDES := -isystem ${abs_objtree}/usr/include 158 - KHDR_DIR := ${abs_objtree}/usr/include 159 158 else 160 159 BUILD := $(CURDIR) 161 160 abs_srctree := $(shell cd $(top_srcdir) && pwd) 162 161 KHDR_INCLUDES := -isystem ${abs_srctree}/usr/include 163 - KHDR_DIR := ${abs_srctree}/usr/include 164 162 DEFAULT_INSTALL_HDR_PATH := 1 165 163 endif 166 164 ··· 172 174 # all isn't the first target in the file. 173 175 .DEFAULT_GOAL := all 174 176 175 - all: kernel_header_files 177 + all: 176 178 @ret=1; \ 177 179 for TARGET in $(TARGETS); do \ 178 180 BUILD_TARGET=$$BUILD/$$TARGET; \ ··· 182 184 $(if $(FORCE_TARGETS),|| exit); \ 183 185 ret=$$((ret * $$?)); \ 184 186 done; exit $$ret; 185 - 186 - kernel_header_files: 187 - @ls $(KHDR_DIR)/linux/*.h >/dev/null 2>/dev/null; \ 188 - if [ $$? -ne 0 ]; then \ 189 - RED='\033[1;31m'; \ 190 - NOCOLOR='\033[0m'; \ 191 - echo; \ 192 - echo -e "$${RED}error$${NOCOLOR}: missing kernel header files."; \ 193 - echo "Please run this and try again:"; \ 194 - echo; \ 195 - echo " cd $(top_srcdir)"; \ 196 - echo " make headers"; \ 197 - echo; \ 198 - exit 1; \ 199 - fi 200 - 201 - .PHONY: kernel_header_files 202 187 203 188 run_tests: all 204 189 @for TARGET in $(TARGETS); do \
+3 -37
tools/testing/selftests/lib.mk
··· 44 44 selfdir = $(realpath $(dir $(filter %/lib.mk,$(MAKEFILE_LIST)))) 45 45 top_srcdir = $(selfdir)/../../.. 46 46 47 - ifeq ("$(origin O)", "command line") 48 - KBUILD_OUTPUT := $(O) 47 + ifeq ($(KHDR_INCLUDES),) 48 + KHDR_INCLUDES := -isystem $(top_srcdir)/usr/include 49 49 endif 50 - 51 - ifneq ($(KBUILD_OUTPUT),) 52 - # Make's built-in functions such as $(abspath ...), $(realpath ...) cannot 53 - # expand a shell special character '~'. We use a somewhat tedious way here. 54 - abs_objtree := $(shell cd $(top_srcdir) && mkdir -p $(KBUILD_OUTPUT) && cd $(KBUILD_OUTPUT) && pwd) 55 - $(if $(abs_objtree),, \ 56 - $(error failed to create output directory "$(KBUILD_OUTPUT)")) 57 - # $(realpath ...) resolves symlinks 58 - abs_objtree := $(realpath $(abs_objtree)) 59 - KHDR_DIR := ${abs_objtree}/usr/include 60 - else 61 - abs_srctree := $(shell cd $(top_srcdir) && pwd) 62 - KHDR_DIR := ${abs_srctree}/usr/include 63 - endif 64 - 65 - KHDR_INCLUDES := -isystem $(KHDR_DIR) 66 50 67 51 # The following are built by lib.mk common compile rules. 68 52 # TEST_CUSTOM_PROGS should be used by tests that require ··· 58 74 TEST_GEN_PROGS_EXTENDED := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS_EXTENDED)) 59 75 TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES)) 60 76 61 - all: kernel_header_files $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) \ 62 - $(TEST_GEN_FILES) 63 - 64 - kernel_header_files: 65 - @ls $(KHDR_DIR)/linux/*.h >/dev/null 2>/dev/null; \ 66 - if [ $$? -ne 0 ]; then \ 67 - RED='\033[1;31m'; \ 68 - NOCOLOR='\033[0m'; \ 69 - echo; \ 70 - echo -e "$${RED}error$${NOCOLOR}: missing kernel header files."; \ 71 - echo "Please run this and try again:"; \ 72 - echo; \ 73 - echo " cd $(top_srcdir)"; \ 74 - echo " make headers"; \ 75 - echo; \ 76 - exit 1; \ 77 - fi 78 - 79 - .PHONY: kernel_header_files 77 + all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) 80 78 81 79 define RUN_TESTS 82 80 BASE_DIR="$(selfdir)"; \
+2 -1
tools/testing/selftests/mm/cow.c
··· 1680 1680 { 1681 1681 int err; 1682 1682 1683 + ksft_print_header(); 1684 + 1683 1685 pagesize = getpagesize(); 1684 1686 thpsize = read_pmd_pagesize(); 1685 1687 if (thpsize) ··· 1691 1689 ARRAY_SIZE(hugetlbsizes)); 1692 1690 detect_huge_zeropage(); 1693 1691 1694 - ksft_print_header(); 1695 1692 ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case() + 1696 1693 ARRAY_SIZE(anon_thp_test_cases) * tests_per_anon_thp_test_case() + 1697 1694 ARRAY_SIZE(non_anon_test_cases) * tests_per_non_anon_test_case());