Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net

+2 -1

.mailmap

··· 605 605 Oleksij Rempel <o.rempel@pengutronix.de> <ore@pengutronix.de> 606 606 Oliver Hartkopp <socketcan@hartkopp.net> <oliver.hartkopp@volkswagen.de> 607 607 Oliver Hartkopp <socketcan@hartkopp.net> <oliver@hartkopp.net> 608 - Oliver Upton <oliver.upton@linux.dev> <oupton@google.com> 608 + Oliver Upton <oupton@kernel.org> <oupton@google.com> 609 + Oliver Upton <oupton@kernel.org> <oliver.upton@linux.dev> 609 610 Ondřej Jirman <megi@xff.cz> <megous@megous.com> 610 611 Oza Pawandeep <quic_poza@quicinc.com> <poza@codeaurora.org> 611 612 Pali Rohár <pali@kernel.org> <pali.rohar@gmail.com>

+1 -1

Documentation/devicetree/bindings/gpio/ti,twl4030-gpio.yaml

··· 1 1 # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) 2 2 %YAML 1.2 3 3 --- 4 - $id: http://devicetree.org/schemas/ti,twl4030-gpio.yaml# 4 + $id: http://devicetree.org/schemas/gpio/ti,twl4030-gpio.yaml# 5 5 $schema: http://devicetree.org/meta-schemas/core.yaml# 6 6 7 7 title: TI TWL4030 GPIO controller

+4 -4

Documentation/firmware-guide/acpi/i2c-muxes.rst

··· 37 37 Name (_HID, ...) 38 38 Name (_CRS, ResourceTemplate () { 39 39 I2cSerialBus (0x50, ControllerInitiated, I2C_SPEED, 40 - AddressingMode7Bit, "\\_SB.SMB1.CH00", 0x00, 41 - ResourceConsumer,,) 40 + AddressingMode7Bit, "\\_SB.SMB1.MUX0.CH00", 41 + 0x00, ResourceConsumer,,) 42 42 } 43 43 } 44 44 } ··· 52 52 Name (_HID, ...) 53 53 Name (_CRS, ResourceTemplate () { 54 54 I2cSerialBus (0x50, ControllerInitiated, I2C_SPEED, 55 - AddressingMode7Bit, "\\_SB.SMB1.CH01", 0x00, 56 - ResourceConsumer,,) 55 + AddressingMode7Bit, "\\_SB.SMB1.MUX0.CH01", 56 + 0x00, ResourceConsumer,,) 57 57 } 58 58 } 59 59 }

+2 -2

Documentation/userspace-api/netlink/intro-specs.rst

··· 13 13 Kernel comes with a simple CLI tool which should be useful when 14 14 developing Netlink related code. The tool is implemented in Python 15 15 and can use a YAML specification to issue Netlink requests 16 - to the kernel. Only Generic Netlink is supported. 16 + to the kernel. 17 17 18 18 The tool is located at ``tools/net/ynl/pyynl/cli.py``. It accepts 19 - a handul of arguments, the most important ones are: 19 + a handful of arguments, the most important ones are: 20 20 21 21 - ``--spec`` - point to the spec file 22 22 - ``--do $name`` / ``--dump $name`` - issue request ``$name``

+6 -3

MAINTAINERS

··· 915 915 ALPHA PORT 916 916 M: Richard Henderson <richard.henderson@linaro.org> 917 917 M: Matt Turner <mattst88@gmail.com> 918 + M: Magnus Lindholm <linmag7@gmail.com> 918 919 L: linux-alpha@vger.kernel.org 919 920 S: Odd Fixes 920 921 F: arch/alpha/ ··· 9209 9208 R: Jeffle Xu <jefflexu@linux.alibaba.com> 9210 9209 R: Sandeep Dhavale <dhavale@google.com> 9211 9210 R: Hongbo Li <lihongbo22@huawei.com> 9211 + R: Chunhai Guo <guochunhai@vivo.com> 9212 9212 L: linux-erofs@lists.ozlabs.org 9213 9213 S: Maintained 9214 9214 W: https://erofs.docs.kernel.org ··· 13661 13659 13662 13660 KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64) 13663 13661 M: Marc Zyngier <maz@kernel.org> 13664 - M: Oliver Upton <oliver.upton@linux.dev> 13662 + M: Oliver Upton <oupton@kernel.org> 13665 13663 R: Joey Gouly <joey.gouly@arm.com> 13666 13664 R: Suzuki K Poulose <suzuki.poulose@arm.com> 13667 13665 R: Zenghui Yu <yuzenghui@huawei.com> ··· 16501 16499 16502 16500 MEMORY MANAGEMENT - SWAP 16503 16501 M: Andrew Morton <akpm@linux-foundation.org> 16502 + M: Chris Li <chrisl@kernel.org> 16503 + M: Kairui Song <kasong@tencent.com> 16504 16504 R: Kemeng Shi <shikemeng@huaweicloud.com> 16505 - R: Kairui Song <kasong@tencent.com> 16506 16505 R: Nhat Pham <nphamcs@gmail.com> 16507 16506 R: Baoquan He <bhe@redhat.com> 16508 16507 R: Barry Song <baohua@kernel.org> 16509 - R: Chris Li <chrisl@kernel.org> 16510 16508 L: linux-mm@kvack.org 16511 16509 S: Maintained 16512 16510 F: Documentation/mm/swap-table.rst ··· 20182 20180 R: Jiri Olsa <jolsa@kernel.org> 20183 20181 R: Ian Rogers <irogers@google.com> 20184 20182 R: Adrian Hunter <adrian.hunter@intel.com> 20183 + R: James Clark <james.clark@linaro.org> 20185 20184 L: linux-perf-users@vger.kernel.org 20186 20185 L: linux-kernel@vger.kernel.org 20187 20186 S: Supported

+1 -1

Makefile

··· 2 2 VERSION = 6 3 3 PATCHLEVEL = 18 4 4 SUBLEVEL = 0 5 - EXTRAVERSION = -rc4 5 + EXTRAVERSION = -rc5 6 6 NAME = Baby Opossum Posse 7 7 8 8 # *DOCUMENTATION*

+7

arch/Kconfig

··· 917 917 An architecture should select this option if it requires the 918 918 .kcfi_traps section for KCFI trap handling. 919 919 920 + config ARCH_USES_CFI_GENERIC_LLVM_PASS 921 + bool 922 + help 923 + An architecture should select this option if it uses the generic 924 + KCFIPass in LLVM to expand kCFI bundles instead of architecture-specific 925 + lowering. 926 + 920 927 config CFI 921 928 bool "Use Kernel Control Flow Integrity (kCFI)" 922 929 default CFI_CLANG

+2

arch/arm/Kconfig

··· 44 44 select ARCH_USE_BUILTIN_BSWAP 45 45 select ARCH_USE_CMPXCHG_LOCKREF 46 46 select ARCH_USE_MEMTEST 47 + # https://github.com/llvm/llvm-project/commit/d130f402642fba3d065aacb506cb061c899558de 48 + select ARCH_USES_CFI_GENERIC_LLVM_PASS if CLANG_VERSION < 220000 47 49 select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU 48 50 select ARCH_WANT_GENERAL_HUGETLB 49 51 select ARCH_WANT_IPC_PARSE_VERSION

+5 -2

arch/arm64/include/asm/alternative.h

··· 26 26 bool alternative_is_applied(u16 cpucap); 27 27 28 28 #ifdef CONFIG_MODULES 29 - void apply_alternatives_module(void *start, size_t length); 29 + int apply_alternatives_module(void *start, size_t length); 30 30 #else 31 - static inline void apply_alternatives_module(void *start, size_t length) { } 31 + static inline int apply_alternatives_module(void *start, size_t length) 32 + { 33 + return 0; 34 + } 32 35 #endif 33 36 34 37 void alt_cb_patch_nops(struct alt_instr *alt, __le32 *origptr,

+1 -2

arch/arm64/include/asm/kfence.h

··· 10 10 11 11 #include <asm/set_memory.h> 12 12 13 - static inline bool arch_kfence_init_pool(void) { return true; } 14 - 15 13 static inline bool kfence_protect_page(unsigned long addr, bool protect) 16 14 { 17 15 set_memory_valid(addr, 1, !protect); ··· 23 25 { 24 26 return !kfence_early_init; 25 27 } 28 + bool arch_kfence_init_pool(void); 26 29 #else /* CONFIG_KFENCE */ 27 30 static inline bool arm64_kfence_can_set_direct_map(void) { return false; } 28 31 #endif /* CONFIG_KFENCE */

+11 -4

arch/arm64/include/asm/percpu.h

··· 77 77 " stxr" #sfx "\t%w[loop], %" #w "[tmp], %[ptr]\n" \ 78 78 " cbnz %w[loop], 1b", \ 79 79 /* LSE atomics */ \ 80 - #op_lse "\t%" #w "[val], %[ptr]\n" \ 80 + #op_lse "\t%" #w "[val], %" #w "[tmp], %[ptr]\n" \ 81 81 __nops(3)) \ 82 82 : [loop] "=&r" (loop), [tmp] "=&r" (tmp), \ 83 83 [ptr] "+Q"(*(u##sz *)ptr) \ ··· 124 124 PERCPU_RW_OPS(16) 125 125 PERCPU_RW_OPS(32) 126 126 PERCPU_RW_OPS(64) 127 - PERCPU_OP(add, add, stadd) 128 - PERCPU_OP(andnot, bic, stclr) 129 - PERCPU_OP(or, orr, stset) 127 + 128 + /* 129 + * Use value-returning atomics for CPU-local ops as they are more likely 130 + * to execute "near" to the CPU (e.g. in L1$). 131 + * 132 + * https://lore.kernel.org/r/e7d539ed-ced0-4b96-8ecd-048a5b803b85@paulmck-laptop 133 + */ 134 + PERCPU_OP(add, add, ldadd) 135 + PERCPU_OP(andnot, bic, ldclr) 136 + PERCPU_OP(or, orr, ldset) 130 137 PERCPU_RET_OP(add, add, ldadd) 131 138 132 139 #undef PERCPU_RW_OPS

+1 -1

arch/arm64/include/asm/scs.h

··· 53 53 EDYNSCS_INVALID_CFA_OPCODE = 4, 54 54 }; 55 55 56 - int __pi_scs_patch(const u8 eh_frame[], int size); 56 + int __pi_scs_patch(const u8 eh_frame[], int size, bool skip_dry_run); 57 57 58 58 #endif /* __ASSEMBLY __ */ 59 59

+1

arch/arm64/include/asm/spectre.h

··· 117 117 __le32 *origptr, __le32 *updptr, int nr_inst); 118 118 void spectre_bhb_patch_clearbhb(struct alt_instr *alt, 119 119 __le32 *origptr, __le32 *updptr, int nr_inst); 120 + void spectre_print_disabled_mitigations(void); 120 121 121 122 #endif /* __ASSEMBLY__ */ 122 123 #endif /* __ASM_SPECTRE_H */

+1 -7

arch/arm64/kernel/acpi.c

··· 197 197 */ 198 198 void __init acpi_boot_table_init(void) 199 199 { 200 - int ret; 201 - 202 200 /* 203 201 * Enable ACPI instead of device tree unless 204 202 * - ACPI has been disabled explicitly (acpi=off), or ··· 250 252 * behaviour, use acpi=nospcr to disable console in ACPI SPCR 251 253 * table as default serial console. 252 254 */ 253 - ret = acpi_parse_spcr(earlycon_acpi_spcr_enable, 255 + acpi_parse_spcr(earlycon_acpi_spcr_enable, 254 256 !param_acpi_nospcr); 255 - if (!ret || param_acpi_nospcr || !IS_ENABLED(CONFIG_ACPI_SPCR_TABLE)) 256 - pr_info("Use ACPI SPCR as default console: No\n"); 257 - else 258 - pr_info("Use ACPI SPCR as default console: Yes\n"); 259 257 260 258 if (IS_ENABLED(CONFIG_ACPI_BGRT)) 261 259 acpi_table_parse(ACPI_SIG_BGRT, acpi_parse_bgrt);

+12 -7

arch/arm64/kernel/alternative.c

··· 139 139 } while (cur += d_size, cur < end); 140 140 } 141 141 142 - static void __apply_alternatives(const struct alt_region *region, 143 - bool is_module, 144 - unsigned long *cpucap_mask) 142 + static int __apply_alternatives(const struct alt_region *region, 143 + bool is_module, 144 + unsigned long *cpucap_mask) 145 145 { 146 146 struct alt_instr *alt; 147 147 __le32 *origptr, *updptr; ··· 166 166 updptr = is_module ? origptr : lm_alias(origptr); 167 167 nr_inst = alt->orig_len / AARCH64_INSN_SIZE; 168 168 169 - if (ALT_HAS_CB(alt)) 169 + if (ALT_HAS_CB(alt)) { 170 170 alt_cb = ALT_REPL_PTR(alt); 171 - else 171 + if (is_module && !core_kernel_text((unsigned long)alt_cb)) 172 + return -ENOEXEC; 173 + } else { 172 174 alt_cb = patch_alternative; 175 + } 173 176 174 177 alt_cb(alt, origptr, updptr, nr_inst); 175 178 ··· 196 193 bitmap_and(applied_alternatives, applied_alternatives, 197 194 system_cpucaps, ARM64_NCAPS); 198 195 } 196 + 197 + return 0; 199 198 } 200 199 201 200 static void __init apply_alternatives_vdso(void) ··· 282 277 } 283 278 284 279 #ifdef CONFIG_MODULES 285 - void apply_alternatives_module(void *start, size_t length) 280 + int apply_alternatives_module(void *start, size_t length) 286 281 { 287 282 struct alt_region region = { 288 283 .begin = start, ··· 292 287 293 288 bitmap_fill(all_capabilities, ARM64_NCAPS); 294 289 295 - __apply_alternatives(&region, true, &all_capabilities[0]); 290 + return __apply_alternatives(&region, true, &all_capabilities[0]); 296 291 } 297 292 #endif 298 293

+6

arch/arm64/kernel/cpufeature.c

··· 95 95 #include <asm/vectors.h> 96 96 #include <asm/virt.h> 97 97 98 + #include <asm/spectre.h> 98 99 /* Kernel representation of AT_HWCAP and AT_HWCAP2 */ 99 100 static DECLARE_BITMAP(elf_hwcap, MAX_CPU_FEATURES) __read_mostly; 100 101 ··· 3876 3875 */ 3877 3876 if (system_uses_ttbr0_pan()) 3878 3877 pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n"); 3878 + 3879 + /* 3880 + * Report Spectre mitigations status. 3881 + */ 3882 + spectre_print_disabled_mitigations(); 3879 3883 } 3880 3884 3881 3885 void __init setup_system_features(void)

+17 -4

arch/arm64/kernel/module.c

··· 489 489 int ret; 490 490 491 491 s = find_section(hdr, sechdrs, ".altinstructions"); 492 - if (s) 493 - apply_alternatives_module((void *)s->sh_addr, s->sh_size); 492 + if (s) { 493 + ret = apply_alternatives_module((void *)s->sh_addr, s->sh_size); 494 + if (ret < 0) { 495 + pr_err("module %s: error occurred when applying alternatives\n", me->name); 496 + return ret; 497 + } 498 + } 494 499 495 500 if (scs_is_dynamic()) { 496 501 s = find_section(hdr, sechdrs, ".init.eh_frame"); 497 502 if (s) { 498 - ret = __pi_scs_patch((void *)s->sh_addr, s->sh_size); 499 - if (ret) 503 + /* 504 + * Because we can reject modules that are malformed 505 + * so SCS patching fails, skip dry run and try to patch 506 + * it in place. If patching fails, the module would not 507 + * be loaded anyway. 508 + */ 509 + ret = __pi_scs_patch((void *)s->sh_addr, s->sh_size, true); 510 + if (ret) { 500 511 pr_err("module %s: error occurred during dynamic SCS patching (%d)\n", 501 512 me->name, ret); 513 + return -ENOEXEC; 514 + } 502 515 } 503 516 } 504 517

+2 -1

arch/arm64/kernel/mte.c

··· 476 476 477 477 folio = page_folio(page); 478 478 if (folio_test_hugetlb(folio)) 479 - WARN_ON_ONCE(!folio_test_hugetlb_mte_tagged(folio)); 479 + WARN_ON_ONCE(!folio_test_hugetlb_mte_tagged(folio) && 480 + !is_huge_zero_folio(folio)); 480 481 else 481 482 WARN_ON_ONCE(!page_mte_tagged(page) && !is_zero_page(page)); 482 483

+1 -1

arch/arm64/kernel/pi/map_kernel.c

··· 104 104 105 105 if (enable_scs) { 106 106 scs_patch(__eh_frame_start + va_offset, 107 - __eh_frame_end - __eh_frame_start); 107 + __eh_frame_end - __eh_frame_start, false); 108 108 asm("ic ialluis"); 109 109 110 110 dynamic_scs_is_enabled = true;

+6 -4

arch/arm64/kernel/pi/patch-scs.c

··· 225 225 return 0; 226 226 } 227 227 228 - int scs_patch(const u8 eh_frame[], int size) 228 + int scs_patch(const u8 eh_frame[], int size, bool skip_dry_run) 229 229 { 230 230 int code_alignment_factor = 1; 231 231 bool fde_use_sdata8 = false; ··· 277 277 } 278 278 } else { 279 279 ret = scs_handle_fde_frame(frame, code_alignment_factor, 280 - fde_use_sdata8, true); 280 + fde_use_sdata8, !skip_dry_run); 281 281 if (ret) 282 282 return ret; 283 - scs_handle_fde_frame(frame, code_alignment_factor, 284 - fde_use_sdata8, false); 283 + 284 + if (!skip_dry_run) 285 + scs_handle_fde_frame(frame, code_alignment_factor, 286 + fde_use_sdata8, false); 285 287 } 286 288 287 289 p += sizeof(frame->size) + frame->size;

+1 -1

arch/arm64/kernel/pi/pi.h

··· 27 27 void init_feature_override(u64 boot_status, const void *fdt, int chosen); 28 28 u64 kaslr_early_init(void *fdt, int chosen); 29 29 void relocate_kernel(u64 offset); 30 - int scs_patch(const u8 eh_frame[], int size); 30 + int scs_patch(const u8 eh_frame[], int size, bool skip_dry_run); 31 31 32 32 void map_range(phys_addr_t *pte, u64 start, u64 end, phys_addr_t pa, 33 33 pgprot_t prot, int level, pte_t *tbl, bool may_use_cont,

+4 -1

arch/arm64/kernel/probes/kprobes.c

··· 49 49 addr = execmem_alloc(EXECMEM_KPROBES, PAGE_SIZE); 50 50 if (!addr) 51 51 return NULL; 52 - set_memory_rox((unsigned long)addr, 1); 52 + if (set_memory_rox((unsigned long)addr, 1)) { 53 + execmem_free(addr); 54 + return NULL; 55 + } 53 56 return addr; 54 57 } 55 58

+18 -17

arch/arm64/kernel/proton-pack.c

··· 91 91 92 92 static bool spectre_v2_mitigations_off(void) 93 93 { 94 - bool ret = __nospectre_v2 || cpu_mitigations_off(); 95 - 96 - if (ret) 97 - pr_info_once("spectre-v2 mitigation disabled by command line option\n"); 98 - 99 - return ret; 94 + return __nospectre_v2 || cpu_mitigations_off(); 100 95 } 101 96 102 97 static const char *get_bhb_affected_string(enum mitigation_state bhb_state) ··· 416 421 */ 417 422 static bool spectre_v4_mitigations_off(void) 418 423 { 419 - bool ret = cpu_mitigations_off() || 420 - __spectre_v4_policy == SPECTRE_V4_POLICY_MITIGATION_DISABLED; 421 - 422 - if (ret) 423 - pr_info_once("spectre-v4 mitigation disabled by command-line option\n"); 424 - 425 - return ret; 424 + return cpu_mitigations_off() || 425 + __spectre_v4_policy == SPECTRE_V4_POLICY_MITIGATION_DISABLED; 426 426 } 427 427 428 428 /* Do we need to toggle the mitigation state on entry to/exit from the kernel? */ ··· 1032 1042 1033 1043 if (arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE) { 1034 1044 /* No point mitigating Spectre-BHB alone. */ 1035 - } else if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY)) { 1036 - pr_info_once("spectre-bhb mitigation disabled by compile time option\n"); 1037 - } else if (cpu_mitigations_off() || __nospectre_bhb) { 1038 - pr_info_once("spectre-bhb mitigation disabled by command line option\n"); 1039 1045 } else if (supports_ecbhb(SCOPE_LOCAL_CPU)) { 1040 1046 state = SPECTRE_MITIGATED; 1041 1047 set_bit(BHB_HW, &system_bhb_mitigations); ··· 1185 1199 pr_err("WARNING: %s", EBPF_WARN); 1186 1200 } 1187 1201 #endif 1202 + 1203 + void spectre_print_disabled_mitigations(void) 1204 + { 1205 + /* Keep a single copy of the common message suffix to avoid duplication. */ 1206 + const char *spectre_disabled_suffix = "mitigation disabled by command-line option\n"; 1207 + 1208 + if (spectre_v2_mitigations_off()) 1209 + pr_info("spectre-v2 %s", spectre_disabled_suffix); 1210 + 1211 + if (spectre_v4_mitigations_off()) 1212 + pr_info("spectre-v4 %s", spectre_disabled_suffix); 1213 + 1214 + if (__nospectre_bhb || cpu_mitigations_off()) 1215 + pr_info("spectre-bhb %s", spectre_disabled_suffix); 1216 + }

+7 -2

arch/arm64/kvm/hyp/nvhe/ffa.c

··· 479 479 struct ffa_mem_region_attributes *ep_mem_access; 480 480 struct ffa_composite_mem_region *reg; 481 481 struct ffa_mem_region *buf; 482 - u32 offset, nr_ranges; 482 + u32 offset, nr_ranges, checked_offset; 483 483 int ret = 0; 484 484 485 485 if (addr_mbz || npages_mbz || fraglen > len || ··· 516 516 goto out_unlock; 517 517 } 518 518 519 - if (fraglen < offset + sizeof(struct ffa_composite_mem_region)) { 519 + if (check_add_overflow(offset, sizeof(struct ffa_composite_mem_region), &checked_offset)) { 520 + ret = FFA_RET_INVALID_PARAMETERS; 521 + goto out_unlock; 522 + } 523 + 524 + if (fraglen < checked_offset) { 520 525 ret = FFA_RET_INVALID_PARAMETERS; 521 526 goto out_unlock; 522 527 }

+28

arch/arm64/kvm/hyp/nvhe/mem_protect.c

··· 367 367 return kvm_pgtable_stage2_unmap(pgt, addr, BIT(pgt->ia_bits) - addr); 368 368 } 369 369 370 + /* 371 + * Ensure the PFN range is contained within PA-range. 372 + * 373 + * This check is also robust to overflows and is therefore a requirement before 374 + * using a pfn/nr_pages pair from an untrusted source. 375 + */ 376 + static bool pfn_range_is_valid(u64 pfn, u64 nr_pages) 377 + { 378 + u64 limit = BIT(kvm_phys_shift(&host_mmu.arch.mmu) - PAGE_SHIFT); 379 + 380 + return pfn < limit && ((limit - pfn) >= nr_pages); 381 + } 382 + 370 383 struct kvm_mem_range { 371 384 u64 start; 372 385 u64 end; ··· 789 776 void *virt = __hyp_va(phys); 790 777 int ret; 791 778 779 + if (!pfn_range_is_valid(pfn, nr_pages)) 780 + return -EINVAL; 781 + 792 782 host_lock_component(); 793 783 hyp_lock_component(); 794 784 ··· 819 803 u64 size = PAGE_SIZE * nr_pages; 820 804 u64 virt = (u64)__hyp_va(phys); 821 805 int ret; 806 + 807 + if (!pfn_range_is_valid(pfn, nr_pages)) 808 + return -EINVAL; 822 809 823 810 host_lock_component(); 824 811 hyp_lock_component(); ··· 906 887 u64 size = PAGE_SIZE * nr_pages; 907 888 int ret; 908 889 890 + if (!pfn_range_is_valid(pfn, nr_pages)) 891 + return -EINVAL; 892 + 909 893 host_lock_component(); 910 894 ret = __host_check_page_state_range(phys, size, PKVM_PAGE_OWNED); 911 895 if (!ret) ··· 923 901 u64 phys = hyp_pfn_to_phys(pfn); 924 902 u64 size = PAGE_SIZE * nr_pages; 925 903 int ret; 904 + 905 + if (!pfn_range_is_valid(pfn, nr_pages)) 906 + return -EINVAL; 926 907 927 908 host_lock_component(); 928 909 ret = __host_check_page_state_range(phys, size, PKVM_PAGE_SHARED_OWNED); ··· 968 943 int ret; 969 944 970 945 if (prot & ~KVM_PGTABLE_PROT_RWX) 946 + return -EINVAL; 947 + 948 + if (!pfn_range_is_valid(pfn, nr_pages)) 971 949 return -EINVAL; 972 950 973 951 ret = __guest_check_transition_size(phys, ipa, nr_pages, &size);

+38 -33

arch/arm64/kvm/sys_regs.c

··· 2595 2595 .val = 0, \ 2596 2596 } 2597 2597 2598 - /* sys_reg_desc initialiser for known cpufeature ID registers */ 2599 - #define AA32_ID_SANITISED(name) { \ 2600 - ID_DESC(name), \ 2601 - .visibility = aa32_id_visibility, \ 2602 - .val = 0, \ 2603 - } 2604 - 2605 2598 /* sys_reg_desc initialiser for writable ID registers */ 2606 2599 #define ID_WRITABLE(name, mask) { \ 2607 2600 ID_DESC(name), \ 2608 2601 .val = mask, \ 2602 + } 2603 + 2604 + /* 2605 + * 32bit ID regs are fully writable when the guest is 32bit 2606 + * capable. Nothing in the KVM code should rely on 32bit features 2607 + * anyway, only 64bit, so let the VMM do its worse. 2608 + */ 2609 + #define AA32_ID_WRITABLE(name) { \ 2610 + ID_DESC(name), \ 2611 + .visibility = aa32_id_visibility, \ 2612 + .val = GENMASK(31, 0), \ 2609 2613 } 2610 2614 2611 2615 /* sys_reg_desc initialiser for cpufeature ID registers that need filtering */ ··· 3132 3128 3133 3129 /* AArch64 mappings of the AArch32 ID registers */ 3134 3130 /* CRm=1 */ 3135 - AA32_ID_SANITISED(ID_PFR0_EL1), 3136 - AA32_ID_SANITISED(ID_PFR1_EL1), 3131 + AA32_ID_WRITABLE(ID_PFR0_EL1), 3132 + AA32_ID_WRITABLE(ID_PFR1_EL1), 3137 3133 { SYS_DESC(SYS_ID_DFR0_EL1), 3138 3134 .access = access_id_reg, 3139 3135 .get_user = get_id_reg, 3140 3136 .set_user = set_id_dfr0_el1, 3141 3137 .visibility = aa32_id_visibility, 3142 3138 .reset = read_sanitised_id_dfr0_el1, 3143 - .val = ID_DFR0_EL1_PerfMon_MASK | 3144 - ID_DFR0_EL1_CopDbg_MASK, }, 3139 + .val = GENMASK(31, 0) }, 3145 3140 ID_HIDDEN(ID_AFR0_EL1), 3146 - AA32_ID_SANITISED(ID_MMFR0_EL1), 3147 - AA32_ID_SANITISED(ID_MMFR1_EL1), 3148 - AA32_ID_SANITISED(ID_MMFR2_EL1), 3149 - AA32_ID_SANITISED(ID_MMFR3_EL1), 3141 + AA32_ID_WRITABLE(ID_MMFR0_EL1), 3142 + AA32_ID_WRITABLE(ID_MMFR1_EL1), 3143 + AA32_ID_WRITABLE(ID_MMFR2_EL1), 3144 + AA32_ID_WRITABLE(ID_MMFR3_EL1), 3150 3145 3151 3146 /* CRm=2 */ 3152 - AA32_ID_SANITISED(ID_ISAR0_EL1), 3153 - AA32_ID_SANITISED(ID_ISAR1_EL1), 3154 - AA32_ID_SANITISED(ID_ISAR2_EL1), 3155 - AA32_ID_SANITISED(ID_ISAR3_EL1), 3156 - AA32_ID_SANITISED(ID_ISAR4_EL1), 3157 - AA32_ID_SANITISED(ID_ISAR5_EL1), 3158 - AA32_ID_SANITISED(ID_MMFR4_EL1), 3159 - AA32_ID_SANITISED(ID_ISAR6_EL1), 3147 + AA32_ID_WRITABLE(ID_ISAR0_EL1), 3148 + AA32_ID_WRITABLE(ID_ISAR1_EL1), 3149 + AA32_ID_WRITABLE(ID_ISAR2_EL1), 3150 + AA32_ID_WRITABLE(ID_ISAR3_EL1), 3151 + AA32_ID_WRITABLE(ID_ISAR4_EL1), 3152 + AA32_ID_WRITABLE(ID_ISAR5_EL1), 3153 + AA32_ID_WRITABLE(ID_MMFR4_EL1), 3154 + AA32_ID_WRITABLE(ID_ISAR6_EL1), 3160 3155 3161 3156 /* CRm=3 */ 3162 - AA32_ID_SANITISED(MVFR0_EL1), 3163 - AA32_ID_SANITISED(MVFR1_EL1), 3164 - AA32_ID_SANITISED(MVFR2_EL1), 3157 + AA32_ID_WRITABLE(MVFR0_EL1), 3158 + AA32_ID_WRITABLE(MVFR1_EL1), 3159 + AA32_ID_WRITABLE(MVFR2_EL1), 3165 3160 ID_UNALLOCATED(3,3), 3166 - AA32_ID_SANITISED(ID_PFR2_EL1), 3161 + AA32_ID_WRITABLE(ID_PFR2_EL1), 3167 3162 ID_HIDDEN(ID_DFR1_EL1), 3168 - AA32_ID_SANITISED(ID_MMFR5_EL1), 3163 + AA32_ID_WRITABLE(ID_MMFR5_EL1), 3169 3164 ID_UNALLOCATED(3,7), 3170 3165 3171 3166 /* AArch64 ID registers */ ··· 5609 5606 5610 5607 guard(mutex)(&kvm->arch.config_lock); 5611 5608 5612 - if (!(static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) && 5613 - irqchip_in_kernel(kvm) && 5614 - kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)) { 5615 - kvm->arch.id_regs[IDREG_IDX(SYS_ID_AA64PFR0_EL1)] &= ~ID_AA64PFR0_EL1_GIC_MASK; 5616 - kvm->arch.id_regs[IDREG_IDX(SYS_ID_PFR1_EL1)] &= ~ID_PFR1_EL1_GIC_MASK; 5609 + if (!irqchip_in_kernel(kvm)) { 5610 + u64 val; 5611 + 5612 + val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1) & ~ID_AA64PFR0_EL1_GIC; 5613 + kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1, val); 5614 + val = kvm_read_vm_id_reg(kvm, SYS_ID_PFR1_EL1) & ~ID_PFR1_EL1_GIC; 5615 + kvm_set_vm_id_reg(kvm, SYS_ID_PFR1_EL1, val); 5617 5616 } 5618 5617 5619 5618 if (vcpu_has_nv(vcpu)) {

+12 -4

arch/arm64/kvm/vgic/vgic-debug.c

··· 64 64 static int iter_mark_lpis(struct kvm *kvm) 65 65 { 66 66 struct vgic_dist *dist = &kvm->arch.vgic; 67 + unsigned long intid, flags; 67 68 struct vgic_irq *irq; 68 - unsigned long intid; 69 69 int nr_lpis = 0; 70 + 71 + xa_lock_irqsave(&dist->lpi_xa, flags); 70 72 71 73 xa_for_each(&dist->lpi_xa, intid, irq) { 72 74 if (!vgic_try_get_irq_ref(irq)) 73 75 continue; 74 76 75 - xa_set_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER); 77 + __xa_set_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER); 76 78 nr_lpis++; 77 79 } 80 + 81 + xa_unlock_irqrestore(&dist->lpi_xa, flags); 78 82 79 83 return nr_lpis; 80 84 } ··· 86 82 static void iter_unmark_lpis(struct kvm *kvm) 87 83 { 88 84 struct vgic_dist *dist = &kvm->arch.vgic; 85 + unsigned long intid, flags; 89 86 struct vgic_irq *irq; 90 - unsigned long intid; 91 87 92 88 xa_for_each_marked(&dist->lpi_xa, intid, irq, LPI_XA_MARK_DEBUG_ITER) { 93 - xa_clear_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER); 89 + xa_lock_irqsave(&dist->lpi_xa, flags); 90 + __xa_clear_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER); 91 + xa_unlock_irqrestore(&dist->lpi_xa, flags); 92 + 93 + /* vgic_put_irq() expects to be called outside of the xa_lock */ 94 94 vgic_put_irq(kvm, irq); 95 95 } 96 96 }

+13 -3

arch/arm64/kvm/vgic/vgic-init.c

··· 53 53 { 54 54 struct vgic_dist *dist = &kvm->arch.vgic; 55 55 56 - xa_init(&dist->lpi_xa); 56 + xa_init_flags(&dist->lpi_xa, XA_FLAGS_LOCK_IRQ); 57 57 } 58 58 59 59 /* CREATION */ ··· 71 71 int kvm_vgic_create(struct kvm *kvm, u32 type) 72 72 { 73 73 struct kvm_vcpu *vcpu; 74 + u64 aa64pfr0, pfr1; 74 75 unsigned long i; 75 76 int ret; 76 77 ··· 162 161 163 162 kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; 164 163 165 - if (type == KVM_DEV_TYPE_ARM_VGIC_V2) 164 + aa64pfr0 = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1) & ~ID_AA64PFR0_EL1_GIC; 165 + pfr1 = kvm_read_vm_id_reg(kvm, SYS_ID_PFR1_EL1) & ~ID_PFR1_EL1_GIC; 166 + 167 + if (type == KVM_DEV_TYPE_ARM_VGIC_V2) { 166 168 kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; 167 - else 169 + } else { 168 170 INIT_LIST_HEAD(&kvm->arch.vgic.rd_regions); 171 + aa64pfr0 |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, GIC, IMP); 172 + pfr1 |= SYS_FIELD_PREP_ENUM(ID_PFR1_EL1, GIC, GICv3); 173 + } 174 + 175 + kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1, aa64pfr0); 176 + kvm_set_vm_id_reg(kvm, SYS_ID_PFR1_EL1, pfr1); 169 177 170 178 if (type == KVM_DEV_TYPE_ARM_VGIC_V3) 171 179 kvm->arch.vgic.nassgicap = system_supports_direct_sgis();

+8 -10

arch/arm64/kvm/vgic/vgic-its.c

··· 78 78 { 79 79 struct vgic_dist *dist = &kvm->arch.vgic; 80 80 struct vgic_irq *irq = vgic_get_irq(kvm, intid), *oldirq; 81 + unsigned long flags; 81 82 int ret; 82 83 83 84 /* In this case there is no put, since we keep the reference. */ ··· 89 88 if (!irq) 90 89 return ERR_PTR(-ENOMEM); 91 90 92 - ret = xa_reserve(&dist->lpi_xa, intid, GFP_KERNEL_ACCOUNT); 91 + ret = xa_reserve_irq(&dist->lpi_xa, intid, GFP_KERNEL_ACCOUNT); 93 92 if (ret) { 94 93 kfree(irq); 95 94 return ERR_PTR(ret); ··· 104 103 irq->target_vcpu = vcpu; 105 104 irq->group = 1; 106 105 107 - xa_lock(&dist->lpi_xa); 106 + xa_lock_irqsave(&dist->lpi_xa, flags); 108 107 109 108 /* 110 109 * There could be a race with another vgic_add_lpi(), so we need to ··· 115 114 /* Someone was faster with adding this LPI, lets use that. */ 116 115 kfree(irq); 117 116 irq = oldirq; 118 - 119 - goto out_unlock; 117 + } else { 118 + ret = xa_err(__xa_store(&dist->lpi_xa, intid, irq, 0)); 120 119 } 121 120 122 - ret = xa_err(__xa_store(&dist->lpi_xa, intid, irq, 0)); 121 + xa_unlock_irqrestore(&dist->lpi_xa, flags); 122 + 123 123 if (ret) { 124 124 xa_release(&dist->lpi_xa, intid); 125 125 kfree(irq); 126 - } 127 126 128 - out_unlock: 129 - xa_unlock(&dist->lpi_xa); 130 - 131 - if (ret) 132 127 return ERR_PTR(ret); 128 + } 133 129 134 130 /* 135 131 * We "cache" the configuration table entries in our struct vgic_irq's.

+2 -1

arch/arm64/kvm/vgic/vgic-v3.c

··· 301 301 return; 302 302 303 303 /* Hide GICv3 sysreg if necessary */ 304 - if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) { 304 + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2 || 305 + !irqchip_in_kernel(vcpu->kvm)) { 305 306 vgic_v3->vgic_hcr |= (ICH_HCR_EL2_TALL0 | ICH_HCR_EL2_TALL1 | 306 307 ICH_HCR_EL2_TC); 307 308 return;

+15 -8

arch/arm64/kvm/vgic/vgic.c

··· 28 28 * kvm->arch.config_lock (mutex) 29 29 * its->cmd_lock (mutex) 30 30 * its->its_lock (mutex) 31 - * vgic_dist->lpi_xa.xa_lock 31 + * vgic_dist->lpi_xa.xa_lock must be taken with IRQs disabled 32 32 * vgic_cpu->ap_list_lock must be taken with IRQs disabled 33 33 * vgic_irq->irq_lock must be taken with IRQs disabled 34 34 * ··· 141 141 void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) 142 142 { 143 143 struct vgic_dist *dist = &kvm->arch.vgic; 144 + unsigned long flags; 144 145 145 - if (irq->intid >= VGIC_MIN_LPI) 146 - might_lock(&dist->lpi_xa.xa_lock); 146 + /* 147 + * Normally the lock is only taken when the refcount drops to 0. 148 + * Acquire/release it early on lockdep kernels to make locking issues 149 + * in rare release paths a bit more obvious. 150 + */ 151 + if (IS_ENABLED(CONFIG_LOCKDEP) && irq->intid >= VGIC_MIN_LPI) { 152 + guard(spinlock_irqsave)(&dist->lpi_xa.xa_lock); 153 + } 147 154 148 155 if (!__vgic_put_irq(kvm, irq)) 149 156 return; 150 157 151 - xa_lock(&dist->lpi_xa); 158 + xa_lock_irqsave(&dist->lpi_xa, flags); 152 159 vgic_release_lpi_locked(dist, irq); 153 - xa_unlock(&dist->lpi_xa); 160 + xa_unlock_irqrestore(&dist->lpi_xa, flags); 154 161 } 155 162 156 163 static void vgic_release_deleted_lpis(struct kvm *kvm) 157 164 { 158 165 struct vgic_dist *dist = &kvm->arch.vgic; 159 - unsigned long intid; 166 + unsigned long flags, intid; 160 167 struct vgic_irq *irq; 161 168 162 - xa_lock(&dist->lpi_xa); 169 + xa_lock_irqsave(&dist->lpi_xa, flags); 163 170 164 171 xa_for_each(&dist->lpi_xa, intid, irq) { 165 172 if (irq->pending_release) 166 173 vgic_release_lpi_locked(dist, irq); 167 174 } 168 175 169 - xa_unlock(&dist->lpi_xa); 176 + xa_unlock_irqrestore(&dist->lpi_xa, flags); 170 177 } 171 178 172 179 void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu)

+10

arch/arm64/mm/fault.c

··· 969 969 970 970 void tag_clear_highpage(struct page *page) 971 971 { 972 + /* 973 + * Check if MTE is supported and fall back to clear_highpage(). 974 + * get_huge_zero_folio() unconditionally passes __GFP_ZEROTAGS and 975 + * post_alloc_hook() will invoke tag_clear_highpage(). 976 + */ 977 + if (!system_supports_mte()) { 978 + clear_highpage(page); 979 + return; 980 + } 981 + 972 982 /* Newly allocated page, shouldn't have been tagged yet */ 973 983 WARN_ON_ONCE(!try_page_mte_tagging(page)); 974 984 mte_zero_clear_page_tags(page_address(page));

+80 -31

arch/arm64/mm/mmu.c

··· 708 708 return ret; 709 709 } 710 710 711 + static inline bool force_pte_mapping(void) 712 + { 713 + const bool bbml2 = system_capabilities_finalized() ? 714 + system_supports_bbml2_noabort() : cpu_supports_bbml2_noabort(); 715 + 716 + if (debug_pagealloc_enabled()) 717 + return true; 718 + if (bbml2) 719 + return false; 720 + return rodata_full || arm64_kfence_can_set_direct_map() || is_realm_world(); 721 + } 722 + 723 + static inline bool split_leaf_mapping_possible(void) 724 + { 725 + /* 726 + * !BBML2_NOABORT systems should never run into scenarios where we would 727 + * have to split. So exit early and let calling code detect it and raise 728 + * a warning. 729 + */ 730 + if (!system_supports_bbml2_noabort()) 731 + return false; 732 + return !force_pte_mapping(); 733 + } 734 + 711 735 static DEFINE_MUTEX(pgtable_split_lock); 712 736 713 737 int split_kernel_leaf_mapping(unsigned long start, unsigned long end) ··· 739 715 int ret; 740 716 741 717 /* 742 - * !BBML2_NOABORT systems should not be trying to change permissions on 743 - * anything that is not pte-mapped in the first place. Just return early 744 - * and let the permission change code raise a warning if not already 745 - * pte-mapped. 718 + * Exit early if the region is within a pte-mapped area or if we can't 719 + * split. For the latter case, the permission change code will raise a 720 + * warning if not already pte-mapped. 746 721 */ 747 - if (!system_supports_bbml2_noabort()) 722 + if (!split_leaf_mapping_possible() || is_kfence_address((void *)start)) 748 723 return 0; 749 724 750 725 /* ··· 781 758 return ret; 782 759 } 783 760 784 - static int __init split_to_ptes_pud_entry(pud_t *pudp, unsigned long addr, 785 - unsigned long next, 786 - struct mm_walk *walk) 761 + static int split_to_ptes_pud_entry(pud_t *pudp, unsigned long addr, 762 + unsigned long next, struct mm_walk *walk) 787 763 { 764 + gfp_t gfp = *(gfp_t *)walk->private; 788 765 pud_t pud = pudp_get(pudp); 789 766 int ret = 0; 790 767 791 768 if (pud_leaf(pud)) 792 - ret = split_pud(pudp, pud, GFP_ATOMIC, false); 769 + ret = split_pud(pudp, pud, gfp, false); 793 770 794 771 return ret; 795 772 } 796 773 797 - static int __init split_to_ptes_pmd_entry(pmd_t *pmdp, unsigned long addr, 798 - unsigned long next, 799 - struct mm_walk *walk) 774 + static int split_to_ptes_pmd_entry(pmd_t *pmdp, unsigned long addr, 775 + unsigned long next, struct mm_walk *walk) 800 776 { 777 + gfp_t gfp = *(gfp_t *)walk->private; 801 778 pmd_t pmd = pmdp_get(pmdp); 802 779 int ret = 0; 803 780 804 781 if (pmd_leaf(pmd)) { 805 782 if (pmd_cont(pmd)) 806 783 split_contpmd(pmdp); 807 - ret = split_pmd(pmdp, pmd, GFP_ATOMIC, false); 784 + ret = split_pmd(pmdp, pmd, gfp, false); 808 785 809 786 /* 810 787 * We have split the pmd directly to ptes so there is no need to ··· 816 793 return ret; 817 794 } 818 795 819 - static int __init split_to_ptes_pte_entry(pte_t *ptep, unsigned long addr, 820 - unsigned long next, 821 - struct mm_walk *walk) 796 + static int split_to_ptes_pte_entry(pte_t *ptep, unsigned long addr, 797 + unsigned long next, struct mm_walk *walk) 822 798 { 823 799 pte_t pte = __ptep_get(ptep); 824 800 ··· 827 805 return 0; 828 806 } 829 807 830 - static const struct mm_walk_ops split_to_ptes_ops __initconst = { 808 + static const struct mm_walk_ops split_to_ptes_ops = { 831 809 .pud_entry = split_to_ptes_pud_entry, 832 810 .pmd_entry = split_to_ptes_pmd_entry, 833 811 .pte_entry = split_to_ptes_pte_entry, 834 812 }; 813 + 814 + static int range_split_to_ptes(unsigned long start, unsigned long end, gfp_t gfp) 815 + { 816 + int ret; 817 + 818 + arch_enter_lazy_mmu_mode(); 819 + ret = walk_kernel_page_table_range_lockless(start, end, 820 + &split_to_ptes_ops, NULL, &gfp); 821 + arch_leave_lazy_mmu_mode(); 822 + 823 + return ret; 824 + } 835 825 836 826 static bool linear_map_requires_bbml2 __initdata; 837 827 ··· 881 847 * PTE. The kernel alias remains static throughout runtime so 882 848 * can continue to be safely mapped with large mappings. 883 849 */ 884 - ret = walk_kernel_page_table_range_lockless(lstart, kstart, 885 - &split_to_ptes_ops, NULL, NULL); 850 + ret = range_split_to_ptes(lstart, kstart, GFP_ATOMIC); 886 851 if (!ret) 887 - ret = walk_kernel_page_table_range_lockless(kend, lend, 888 - &split_to_ptes_ops, NULL, NULL); 852 + ret = range_split_to_ptes(kend, lend, GFP_ATOMIC); 889 853 if (ret) 890 854 panic("Failed to split linear map\n"); 891 855 flush_tlb_kernel_range(lstart, lend); ··· 1034 1002 memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE); 1035 1003 __kfence_pool = phys_to_virt(kfence_pool); 1036 1004 } 1005 + 1006 + bool arch_kfence_init_pool(void) 1007 + { 1008 + unsigned long start = (unsigned long)__kfence_pool; 1009 + unsigned long end = start + KFENCE_POOL_SIZE; 1010 + int ret; 1011 + 1012 + /* Exit early if we know the linear map is already pte-mapped. */ 1013 + if (!split_leaf_mapping_possible()) 1014 + return true; 1015 + 1016 + /* Kfence pool is already pte-mapped for the early init case. */ 1017 + if (kfence_early_init) 1018 + return true; 1019 + 1020 + mutex_lock(&pgtable_split_lock); 1021 + ret = range_split_to_ptes(start, end, GFP_PGTABLE_KERNEL); 1022 + mutex_unlock(&pgtable_split_lock); 1023 + 1024 + /* 1025 + * Since the system supports bbml2_noabort, tlb invalidation is not 1026 + * required here; the pgtable mappings have been split to pte but larger 1027 + * entries may safely linger in the TLB. 1028 + */ 1029 + 1030 + return !ret; 1031 + } 1037 1032 #else /* CONFIG_KFENCE */ 1038 1033 1039 1034 static inline phys_addr_t arm64_kfence_alloc_pool(void) { return 0; } 1040 1035 static inline void arm64_kfence_map_pool(phys_addr_t kfence_pool, pgd_t *pgdp) { } 1041 1036 1042 1037 #endif /* CONFIG_KFENCE */ 1043 - 1044 - static inline bool force_pte_mapping(void) 1045 - { 1046 - bool bbml2 = system_capabilities_finalized() ? 1047 - system_supports_bbml2_noabort() : cpu_supports_bbml2_noabort(); 1048 - 1049 - return (!bbml2 && (rodata_full || arm64_kfence_can_set_direct_map() || 1050 - is_realm_world())) || 1051 - debug_pagealloc_enabled(); 1052 - } 1053 1038 1054 1039 static void __init map_mem(pgd_t *pgdp) 1055 1040 {

+2

arch/loongarch/include/asm/cpu-features.h

··· 67 67 #define cpu_has_hypervisor cpu_opt(LOONGARCH_CPU_HYPERVISOR) 68 68 #define cpu_has_ptw cpu_opt(LOONGARCH_CPU_PTW) 69 69 #define cpu_has_lspw cpu_opt(LOONGARCH_CPU_LSPW) 70 + #define cpu_has_msgint cpu_opt(LOONGARCH_CPU_MSGINT) 70 71 #define cpu_has_avecint cpu_opt(LOONGARCH_CPU_AVECINT) 72 + #define cpu_has_redirectint cpu_opt(LOONGARCH_CPU_REDIRECTINT) 71 73 72 74 #endif /* __ASM_CPU_FEATURES_H */

+5 -1

arch/loongarch/include/asm/cpu.h

··· 101 101 #define CPU_FEATURE_HYPERVISOR 26 /* CPU has hypervisor (running in VM) */ 102 102 #define CPU_FEATURE_PTW 27 /* CPU has hardware page table walker */ 103 103 #define CPU_FEATURE_LSPW 28 /* CPU has LSPW (lddir/ldpte instructions) */ 104 - #define CPU_FEATURE_AVECINT 29 /* CPU has AVEC interrupt */ 104 + #define CPU_FEATURE_MSGINT 29 /* CPU has MSG interrupt */ 105 + #define CPU_FEATURE_AVECINT 30 /* CPU has AVEC interrupt */ 106 + #define CPU_FEATURE_REDIRECTINT 31 /* CPU has interrupt remapping */ 105 107 106 108 #define LOONGARCH_CPU_CPUCFG BIT_ULL(CPU_FEATURE_CPUCFG) 107 109 #define LOONGARCH_CPU_LAM BIT_ULL(CPU_FEATURE_LAM) ··· 134 132 #define LOONGARCH_CPU_HYPERVISOR BIT_ULL(CPU_FEATURE_HYPERVISOR) 135 133 #define LOONGARCH_CPU_PTW BIT_ULL(CPU_FEATURE_PTW) 136 134 #define LOONGARCH_CPU_LSPW BIT_ULL(CPU_FEATURE_LSPW) 135 + #define LOONGARCH_CPU_MSGINT BIT_ULL(CPU_FEATURE_MSGINT) 137 136 #define LOONGARCH_CPU_AVECINT BIT_ULL(CPU_FEATURE_AVECINT) 137 + #define LOONGARCH_CPU_REDIRECTINT BIT_ULL(CPU_FEATURE_REDIRECTINT) 138 138 139 139 #endif /* _ASM_CPU_H */

+2 -2

arch/loongarch/include/asm/hw_breakpoint.h

··· 134 134 /* Determine number of BRP registers available. */ 135 135 static inline int get_num_brps(void) 136 136 { 137 - return csr_read64(LOONGARCH_CSR_FWPC) & CSR_FWPC_NUM; 137 + return csr_read32(LOONGARCH_CSR_FWPC) & CSR_FWPC_NUM; 138 138 } 139 139 140 140 /* Determine number of WRP registers available. */ 141 141 static inline int get_num_wrps(void) 142 142 { 143 - return csr_read64(LOONGARCH_CSR_MWPC) & CSR_MWPC_NUM; 143 + return csr_read32(LOONGARCH_CSR_MWPC) & CSR_MWPC_NUM; 144 144 } 145 145 146 146 #endif /* __KERNEL__ */

+4 -1

arch/loongarch/include/asm/io.h

··· 14 14 #include <asm/pgtable-bits.h> 15 15 #include <asm/string.h> 16 16 17 - extern void __init __iomem *early_ioremap(u64 phys_addr, unsigned long size); 17 + extern void __init __iomem *early_ioremap(phys_addr_t phys_addr, unsigned long size); 18 18 extern void __init early_iounmap(void __iomem *addr, unsigned long size); 19 19 20 20 #define early_memremap early_ioremap ··· 25 25 static inline void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, 26 26 pgprot_t prot) 27 27 { 28 + if (offset > TO_PHYS_MASK) 29 + return NULL; 30 + 28 31 switch (pgprot_val(prot) & _CACHE_MASK) { 29 32 case _CACHE_CC: 30 33 return (void __iomem *)(unsigned long)(CACHE_BASE + offset);

+2

arch/loongarch/include/asm/loongarch.h

··· 128 128 #define CPUCFG6_PMNUM GENMASK(7, 4) 129 129 #define CPUCFG6_PMNUM_SHIFT 4 130 130 #define CPUCFG6_PMBITS GENMASK(13, 8) 131 + #define CPUCFG6_PMBITS_SHIFT 8 131 132 #define CPUCFG6_UPM BIT(14) 132 133 133 134 #define LOONGARCH_CPUCFG16 0x10 ··· 1138 1137 #define IOCSRF_FLATMODE BIT_ULL(10) 1139 1138 #define IOCSRF_VM BIT_ULL(11) 1140 1139 #define IOCSRF_AVEC BIT_ULL(15) 1140 + #define IOCSRF_REDIRECT BIT_ULL(16) 1141 1141 1142 1142 #define LOONGARCH_IOCSR_VENDOR 0x10 1143 1143

+1 -1

arch/loongarch/include/asm/pgalloc.h

··· 88 88 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) 89 89 { 90 90 pud_t *pud; 91 - struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL & ~__GFP_HIGHMEM, 0); 91 + struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, 0); 92 92 93 93 if (!ptdesc) 94 94 return NULL;

+8 -3

arch/loongarch/include/asm/pgtable.h

··· 424 424 425 425 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) 426 426 { 427 + if (pte_val(pte) & _PAGE_DIRTY) 428 + pte_val(pte) |= _PAGE_MODIFIED; 429 + 427 430 return __pte((pte_val(pte) & _PAGE_CHG_MASK) | 428 431 (pgprot_val(newprot) & ~_PAGE_CHG_MASK)); 429 432 } ··· 550 547 551 548 static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) 552 549 { 553 - pmd_val(pmd) = (pmd_val(pmd) & _HPAGE_CHG_MASK) | 554 - (pgprot_val(newprot) & ~_HPAGE_CHG_MASK); 555 - return pmd; 550 + if (pmd_val(pmd) & _PAGE_DIRTY) 551 + pmd_val(pmd) |= _PAGE_MODIFIED; 552 + 553 + return __pmd((pmd_val(pmd) & _HPAGE_CHG_MASK) | 554 + (pgprot_val(newprot) & ~_HPAGE_CHG_MASK)); 556 555 } 557 556 558 557 static inline pmd_t pmd_mkinvalid(pmd_t pmd)

+4

arch/loongarch/kernel/cpu-probe.c

··· 157 157 c->options |= LOONGARCH_CPU_TLB; 158 158 if (config & CPUCFG1_IOCSR) 159 159 c->options |= LOONGARCH_CPU_IOCSR; 160 + if (config & CPUCFG1_MSGINT) 161 + c->options |= LOONGARCH_CPU_MSGINT; 160 162 if (config & CPUCFG1_UAL) { 161 163 c->options |= LOONGARCH_CPU_UAL; 162 164 elf_hwcap |= HWCAP_LOONGARCH_UAL; ··· 333 331 c->options |= LOONGARCH_CPU_EIODECODE; 334 332 if (config & IOCSRF_AVEC) 335 333 c->options |= LOONGARCH_CPU_AVECINT; 334 + if (config & IOCSRF_REDIRECT) 335 + c->options |= LOONGARCH_CPU_REDIRECTINT; 336 336 if (config & IOCSRF_VM) 337 337 c->options |= LOONGARCH_CPU_HYPERVISOR; 338 338 }

+1 -1

arch/loongarch/kernel/kexec_efi.c

··· 42 42 { 43 43 int ret; 44 44 unsigned long text_offset, kernel_segment_number; 45 - struct kexec_buf kbuf; 45 + struct kexec_buf kbuf = {}; 46 46 struct kexec_segment *kernel_segment; 47 47 struct loongarch_image_header *h; 48 48

+1 -1

arch/loongarch/kernel/kexec_elf.c

··· 59 59 int ret; 60 60 unsigned long text_offset, kernel_segment_number; 61 61 struct elfhdr ehdr; 62 - struct kexec_buf kbuf; 62 + struct kexec_buf kbuf = {}; 63 63 struct kexec_elf_info elf_info; 64 64 struct kexec_segment *kernel_segment; 65 65

-22

arch/loongarch/kernel/machine_kexec.c

··· 39 39 static unsigned long start_addr; 40 40 static unsigned long first_ind_entry; 41 41 42 - static void kexec_image_info(const struct kimage *kimage) 43 - { 44 - unsigned long i; 45 - 46 - pr_debug("kexec kimage info:\n"); 47 - pr_debug("\ttype: %d\n", kimage->type); 48 - pr_debug("\tstart: %lx\n", kimage->start); 49 - pr_debug("\thead: %lx\n", kimage->head); 50 - pr_debug("\tnr_segments: %lu\n", kimage->nr_segments); 51 - 52 - for (i = 0; i < kimage->nr_segments; i++) { 53 - pr_debug("\t segment[%lu]: %016lx - %016lx", i, 54 - kimage->segment[i].mem, 55 - kimage->segment[i].mem + kimage->segment[i].memsz); 56 - pr_debug("\t\t0x%lx bytes, %lu pages\n", 57 - (unsigned long)kimage->segment[i].memsz, 58 - (unsigned long)kimage->segment[i].memsz / PAGE_SIZE); 59 - } 60 - } 61 - 62 42 int machine_kexec_prepare(struct kimage *kimage) 63 43 { 64 44 int i; 65 45 char *bootloader = "kexec"; 66 46 void *cmdline_ptr = (void *)KEXEC_CMDLINE_ADDR; 67 - 68 - kexec_image_info(kimage); 69 47 70 48 kimage->arch.efi_boot = fw_arg0; 71 49 kimage->arch.systable_ptr = fw_arg2;

+1 -1

arch/loongarch/kernel/machine_kexec_file.c

··· 143 143 unsigned long initrd_load_addr = 0; 144 144 unsigned long orig_segments = image->nr_segments; 145 145 char *modified_cmdline = NULL; 146 - struct kexec_buf kbuf; 146 + struct kexec_buf kbuf = {}; 147 147 148 148 kbuf.image = image; 149 149 /* Don't allocate anything below the kernel */

+3 -4

arch/loongarch/kernel/mem.c

··· 13 13 void __init memblock_init(void) 14 14 { 15 15 u32 mem_type; 16 - u64 mem_start, mem_end, mem_size; 16 + u64 mem_start, mem_size; 17 17 efi_memory_desc_t *md; 18 18 19 19 /* Parse memory information */ ··· 21 21 mem_type = md->type; 22 22 mem_start = md->phys_addr; 23 23 mem_size = md->num_pages << EFI_PAGE_SHIFT; 24 - mem_end = mem_start + mem_size; 25 24 26 25 switch (mem_type) { 27 26 case EFI_LOADER_CODE: ··· 30 31 case EFI_PERSISTENT_MEMORY: 31 32 case EFI_CONVENTIONAL_MEMORY: 32 33 memblock_add(mem_start, mem_size); 33 - if (max_low_pfn < (mem_end >> PAGE_SHIFT)) 34 - max_low_pfn = mem_end >> PAGE_SHIFT; 35 34 break; 36 35 case EFI_PAL_CODE: 37 36 case EFI_UNUSABLE_MEMORY: ··· 46 49 } 47 50 } 48 51 52 + max_pfn = PFN_DOWN(memblock_end_of_DRAM()); 53 + max_low_pfn = min(PFN_DOWN(HIGHMEM_START), max_pfn); 49 54 memblock_set_current_limit(PFN_PHYS(max_low_pfn)); 50 55 51 56 /* Reserve the first 2MB */

+2 -21

arch/loongarch/kernel/numa.c

··· 272 272 node_mem_init(node); 273 273 node_set_online(node); 274 274 } 275 - max_low_pfn = PHYS_PFN(memblock_end_of_DRAM()); 275 + max_pfn = PFN_DOWN(memblock_end_of_DRAM()); 276 + max_low_pfn = min(PFN_DOWN(HIGHMEM_START), max_pfn); 276 277 277 278 setup_nr_node_ids(); 278 279 loongson_sysconf.nr_nodes = nr_node_ids; ··· 283 282 } 284 283 285 284 #endif 286 - 287 - void __init paging_init(void) 288 - { 289 - unsigned int node; 290 - unsigned long zones_size[MAX_NR_ZONES] = {0, }; 291 - 292 - for_each_online_node(node) { 293 - unsigned long start_pfn, end_pfn; 294 - 295 - get_pfn_range_for_nid(node, &start_pfn, &end_pfn); 296 - 297 - if (end_pfn > max_low_pfn) 298 - max_low_pfn = end_pfn; 299 - } 300 - #ifdef CONFIG_ZONE_DMA32 301 - zones_size[ZONE_DMA32] = MAX_DMA32_PFN; 302 - #endif 303 - zones_size[ZONE_NORMAL] = max_low_pfn; 304 - free_area_init(zones_size); 305 - } 306 285 307 286 int pcibus_to_node(struct pci_bus *bus) 308 287 {

+4 -3

arch/loongarch/kernel/perf_event.c

··· 845 845 846 846 static int __init init_hw_perf_events(void) 847 847 { 848 - int counters; 848 + int bits, counters; 849 849 850 850 if (!cpu_has_pmp) 851 851 return -ENODEV; 852 852 853 853 pr_info("Performance counters: "); 854 - counters = ((read_cpucfg(LOONGARCH_CPUCFG6) & CPUCFG6_PMNUM) >> 4) + 1; 854 + bits = ((read_cpucfg(LOONGARCH_CPUCFG6) & CPUCFG6_PMBITS) >> CPUCFG6_PMBITS_SHIFT) + 1; 855 + counters = ((read_cpucfg(LOONGARCH_CPUCFG6) & CPUCFG6_PMNUM) >> CPUCFG6_PMNUM_SHIFT) + 1; 855 856 856 857 loongarch_pmu.num_counters = counters; 857 858 loongarch_pmu.max_period = (1ULL << 63) - 1; ··· 868 867 on_each_cpu(reset_counters, NULL, 1); 869 868 870 869 pr_cont("%s PMU enabled, %d %d-bit counters available to each CPU.\n", 871 - loongarch_pmu.name, counters, 64); 870 + loongarch_pmu.name, counters, bits); 872 871 873 872 perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW); 874 873

+2 -3

arch/loongarch/kernel/setup.c

··· 294 294 295 295 early_init_dt_scan(fdt_pointer, __pa(fdt_pointer)); 296 296 early_init_fdt_reserve_self(); 297 - 298 - max_low_pfn = PFN_PHYS(memblock_end_of_DRAM()); 299 297 #endif 300 298 } 301 299 ··· 388 390 static void __init arch_mem_init(char **cmdline_p) 389 391 { 390 392 /* Recalculate max_low_pfn for "mem=xxx" */ 391 - max_pfn = max_low_pfn = PHYS_PFN(memblock_end_of_DRAM()); 393 + max_pfn = PFN_DOWN(memblock_end_of_DRAM()); 394 + max_low_pfn = min(PFN_DOWN(HIGHMEM_START), max_pfn); 392 395 393 396 if (usermem) 394 397 pr_info("User-defined physical RAM map overwrite\n");

+2 -2

arch/loongarch/kernel/traps.c

··· 1131 1131 tlbrentry = (unsigned long)exception_handlers + 80*VECSIZE; 1132 1132 1133 1133 csr_write64(eentry, LOONGARCH_CSR_EENTRY); 1134 - csr_write64(eentry, LOONGARCH_CSR_MERRENTRY); 1135 - csr_write64(tlbrentry, LOONGARCH_CSR_TLBRENTRY); 1134 + csr_write64(__pa(eentry), LOONGARCH_CSR_MERRENTRY); 1135 + csr_write64(__pa(tlbrentry), LOONGARCH_CSR_TLBRENTRY); 1136 1136 } 1137 1137 1138 1138 void per_cpu_trap_init(int cpu)

+1 -1

arch/loongarch/kvm/intc/eiointc.c

··· 439 439 spin_lock_irqsave(&s->lock, flags); 440 440 switch (type) { 441 441 case KVM_DEV_LOONGARCH_EXTIOI_CTRL_INIT_NUM_CPU: 442 - if (val >= EIOINTC_ROUTE_MAX_VCPUS) 442 + if (val > EIOINTC_ROUTE_MAX_VCPUS) 443 443 ret = -EINVAL; 444 444 else 445 445 s->num_cpu = val;

+1 -1

arch/loongarch/kvm/mmu.c

··· 857 857 858 858 if (writeable) { 859 859 prot_bits = kvm_pte_mkwriteable(prot_bits); 860 - if (write) 860 + if (write || !kvm_slot_dirty_track_enabled(memslot)) 861 861 prot_bits = kvm_pte_mkdirty(prot_bits); 862 862 } 863 863

+2

arch/loongarch/kvm/timer.c

··· 4 4 */ 5 5 6 6 #include <linux/kvm_host.h> 7 + #include <asm/delay.h> 7 8 #include <asm/kvm_csr.h> 8 9 #include <asm/kvm_vcpu.h> 9 10 ··· 96 95 * and set CSR TVAL with -1 97 96 */ 98 97 write_gcsr_timertick(0); 98 + __delay(2); /* Wait cycles until timer interrupt injected */ 99 99 100 100 /* 101 101 * Writing CSR_TINTCLR_TI to LOONGARCH_CSR_TINTCLR will clear

+9 -10

arch/loongarch/kvm/vcpu.c

··· 132 132 * Clear KVM_LARCH_PMU if the guest is not using PMU CSRs when 133 133 * exiting the guest, so that the next time trap into the guest. 134 134 * We don't need to deal with PMU CSRs contexts. 135 + * 136 + * Otherwise set the request bit KVM_REQ_PMU to restore guest PMU 137 + * before entering guest VM 135 138 */ 136 139 val = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL0); 137 140 val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL1); ··· 142 139 val |= kvm_read_sw_gcsr(csr, LOONGARCH_CSR_PERFCTRL3); 143 140 if (!(val & KVM_PMU_EVENT_ENABLED)) 144 141 vcpu->arch.aux_inuse &= ~KVM_LARCH_PMU; 142 + else 143 + kvm_make_request(KVM_REQ_PMU, vcpu); 145 144 146 145 kvm_restore_host_pmu(vcpu); 147 - } 148 - 149 - static void kvm_restore_pmu(struct kvm_vcpu *vcpu) 150 - { 151 - if ((vcpu->arch.aux_inuse & KVM_LARCH_PMU)) 152 - kvm_make_request(KVM_REQ_PMU, vcpu); 153 146 } 154 147 155 148 static void kvm_check_pmu(struct kvm_vcpu *vcpu) ··· 298 299 vcpu->arch.aux_inuse &= ~KVM_LARCH_SWCSR_LATEST; 299 300 300 301 if (kvm_request_pending(vcpu) || xfer_to_guest_mode_work_pending()) { 301 - kvm_lose_pmu(vcpu); 302 + if (vcpu->arch.aux_inuse & KVM_LARCH_PMU) { 303 + kvm_lose_pmu(vcpu); 304 + kvm_make_request(KVM_REQ_PMU, vcpu); 305 + } 302 306 /* make sure the vcpu mode has been written */ 303 307 smp_store_mb(vcpu->mode, OUTSIDE_GUEST_MODE); 304 308 local_irq_enable(); ··· 1605 1603 /* Restore timer state regardless */ 1606 1604 kvm_restore_timer(vcpu); 1607 1605 kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu); 1608 - 1609 - /* Restore hardware PMU CSRs */ 1610 - kvm_restore_pmu(vcpu); 1611 1606 1612 1607 /* Don't bother restoring registers multiple times unless necessary */ 1613 1608 if (vcpu->arch.aux_inuse & KVM_LARCH_HWCSR_USABLE)

-2

arch/loongarch/mm/init.c

··· 60 60 return memblock_is_memory(addr) && !memblock_is_reserved(addr); 61 61 } 62 62 63 - #ifndef CONFIG_NUMA 64 63 void __init paging_init(void) 65 64 { 66 65 unsigned long max_zone_pfns[MAX_NR_ZONES]; ··· 71 72 72 73 free_area_init(max_zone_pfns); 73 74 } 74 - #endif /* !CONFIG_NUMA */ 75 75 76 76 void __ref free_initmem(void) 77 77 {

+1 -1

arch/loongarch/mm/ioremap.c

··· 6 6 #include <asm/io.h> 7 7 #include <asm-generic/early_ioremap.h> 8 8 9 - void __init __iomem *early_ioremap(u64 phys_addr, unsigned long size) 9 + void __init __iomem *early_ioremap(phys_addr_t phys_addr, unsigned long size) 10 10 { 11 11 return ((void __iomem *)TO_CACHE(phys_addr)); 12 12 }

+10 -3

arch/parisc/kernel/unwind.c

··· 35 35 36 36 #define KERNEL_START (KERNEL_BINARY_TEXT_START) 37 37 38 + #define ALIGNMENT_OK(ptr, type) (((ptr) & (sizeof(type) - 1)) == 0) 39 + 38 40 extern struct unwind_table_entry __start___unwind[]; 39 41 extern struct unwind_table_entry __stop___unwind[]; 40 42 ··· 259 257 if (pc_is_kernel_fn(pc, _switch_to) || 260 258 pc == (unsigned long)&_switch_to_ret) { 261 259 info->prev_sp = info->sp - CALLEE_SAVE_FRAME_SIZE; 262 - info->prev_ip = *(unsigned long *)(info->prev_sp - RP_OFFSET); 260 + if (ALIGNMENT_OK(info->prev_sp, long)) 261 + info->prev_ip = *(unsigned long *)(info->prev_sp - RP_OFFSET); 262 + else 263 + info->prev_ip = info->prev_sp = 0; 263 264 return 1; 264 265 } 265 266 266 267 #ifdef CONFIG_IRQSTACKS 267 - if (pc == (unsigned long)&_call_on_stack) { 268 + if (pc == (unsigned long)&_call_on_stack && ALIGNMENT_OK(info->sp, long)) { 268 269 info->prev_sp = *(unsigned long *)(info->sp - FRAME_SIZE - REG_SZ); 269 270 info->prev_ip = *(unsigned long *)(info->sp - FRAME_SIZE - RP_OFFSET); 270 271 return 1; ··· 375 370 info->prev_sp = info->sp - frame_size; 376 371 if (e->Millicode) 377 372 info->rp = info->r31; 378 - else if (rpoffset) 373 + else if (rpoffset && ALIGNMENT_OK(info->prev_sp, long)) 379 374 info->rp = *(unsigned long *)(info->prev_sp - rpoffset); 375 + else 376 + info->rp = 0; 380 377 info->prev_ip = info->rp; 381 378 info->rp = 0; 382 379 }

+1 -1

arch/riscv/Kconfig

··· 367 367 systems to handle cache management. 368 368 369 369 config AS_HAS_INSN 370 - def_bool $(as-instr,.insn r 51$(comma) 0$(comma) 0$(comma) t0$(comma) t0$(comma) zero) 370 + def_bool $(as-instr,.insn 0x100000f) 371 371 372 372 config AS_HAS_OPTION_ARCH 373 373 # https://github.com/llvm/llvm-project/commit/9e8ed3403c191ab9c4903e8eeb8f732ff8a43cb4

+1 -16

arch/riscv/Makefile

··· 134 134 CHECKFLAGS += -D__riscv -D__riscv_xlen=$(BITS) 135 135 136 136 # Default target when executing plain make 137 - boot := arch/riscv/boot 138 - ifeq ($(CONFIG_XIP_KERNEL),y) 139 - KBUILD_IMAGE := $(boot)/xipImage 140 - else 141 - ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_SOC_CANAAN_K210),yy) 142 - KBUILD_IMAGE := $(boot)/loader.bin 143 - else 144 - ifeq ($(CONFIG_EFI_ZBOOT),) 145 - KBUILD_IMAGE := $(boot)/Image.gz 146 - else 147 - KBUILD_IMAGE := $(boot)/vmlinuz.efi 148 - endif 149 - endif 150 - endif 151 - 152 137 boot := arch/riscv/boot 153 138 boot-image-y := Image 154 139 boot-image-$(CONFIG_KERNEL_BZIP2) := Image.bz2 ··· 144 159 boot-image-$(CONFIG_KERNEL_ZSTD) := Image.zst 145 160 boot-image-$(CONFIG_KERNEL_XZ) := Image.xz 146 161 ifdef CONFIG_RISCV_M_MODE 147 - boot-image-$(CONFIG_ARCH_CANAAN) := loader.bin 162 + boot-image-$(CONFIG_SOC_CANAAN_K210) := loader.bin 148 163 endif 149 164 boot-image-$(CONFIG_EFI_ZBOOT) := vmlinuz.efi 150 165 boot-image-$(CONFIG_XIP_KERNEL) := xipImage

+6

arch/riscv/include/asm/asm.h

··· 12 12 #define __ASM_STR(x) #x 13 13 #endif 14 14 15 + #ifdef CONFIG_AS_HAS_INSN 16 + #define ASM_INSN_I(__x) ".insn " __x 17 + #else 18 + #define ASM_INSN_I(__x) ".4byte " __x 19 + #endif 20 + 15 21 #if __riscv_xlen == 64 16 22 #define __REG_SEL(a, b) __ASM_STR(a) 17 23 #elif __riscv_xlen == 32

+4 -4

arch/riscv/include/asm/insn-def.h

··· 256 256 INSN_S(OPCODE_OP_IMM, FUNC3(6), __RS2(3), \ 257 257 SIMM12((offset) & 0xfe0), RS1(base)) 258 258 259 - #define RISCV_PAUSE ".4byte 0x100000f" 260 - #define ZAWRS_WRS_NTO ".4byte 0x00d00073" 261 - #define ZAWRS_WRS_STO ".4byte 0x01d00073" 262 - #define RISCV_NOP4 ".4byte 0x00000013" 259 + #define RISCV_PAUSE ASM_INSN_I("0x100000f") 260 + #define ZAWRS_WRS_NTO ASM_INSN_I("0x00d00073") 261 + #define ZAWRS_WRS_STO ASM_INSN_I("0x01d00073") 262 + #define RISCV_NOP4 ASM_INSN_I("0x00000013") 263 263 264 264 #define RISCV_INSN_NOP4 _AC(0x00000013, U) 265 265

+3 -3

arch/riscv/include/asm/vendor_extensions/mips.h

··· 30 30 * allowing any subsequent instructions to fetch. 31 31 */ 32 32 33 - #define MIPS_PAUSE ".4byte 0x00501013\n\t" 34 - #define MIPS_EHB ".4byte 0x00301013\n\t" 35 - #define MIPS_IHB ".4byte 0x00101013\n\t" 33 + #define MIPS_PAUSE ASM_INSN_I("0x00501013\n\t") 34 + #define MIPS_EHB ASM_INSN_I("0x00301013\n\t") 35 + #define MIPS_IHB ASM_INSN_I("0x00101013\n\t") 36 36 37 37 #endif // _ASM_RISCV_VENDOR_EXTENSIONS_MIPS_H

+2 -2

arch/riscv/kernel/kgdb.c

··· 265 265 { 266 266 if (!strncmp(remcom_in_buffer, gdb_xfer_read_target, 267 267 sizeof(gdb_xfer_read_target))) 268 - strcpy(remcom_out_buffer, riscv_gdb_stub_target_desc); 268 + strscpy(remcom_out_buffer, riscv_gdb_stub_target_desc, BUFMAX); 269 269 else if (!strncmp(remcom_in_buffer, gdb_xfer_read_cpuxml, 270 270 sizeof(gdb_xfer_read_cpuxml))) 271 - strcpy(remcom_out_buffer, riscv_gdb_stub_cpuxml); 271 + strscpy(remcom_out_buffer, riscv_gdb_stub_cpuxml, BUFMAX); 272 272 } 273 273 274 274 static inline void kgdb_arch_update_addr(struct pt_regs *regs,

+6 -2

arch/riscv/kernel/module-sections.c

··· 119 119 unsigned int num_plts = 0; 120 120 unsigned int num_gots = 0; 121 121 Elf_Rela *scratch = NULL; 122 + Elf_Rela *new_scratch; 122 123 size_t scratch_size = 0; 123 124 int i; 124 125 ··· 169 168 scratch_size_needed = (num_scratch_relas + num_relas) * sizeof(*scratch); 170 169 if (scratch_size_needed > scratch_size) { 171 170 scratch_size = scratch_size_needed; 172 - scratch = kvrealloc(scratch, scratch_size, GFP_KERNEL); 173 - if (!scratch) 171 + new_scratch = kvrealloc(scratch, scratch_size, GFP_KERNEL); 172 + if (!new_scratch) { 173 + kvfree(scratch); 174 174 return -ENOMEM; 175 + } 176 + scratch = new_scratch; 175 177 } 176 178 177 179 for (size_t j = 0; j < num_relas; j++)

+19 -2

arch/riscv/kernel/stacktrace.c

··· 16 16 17 17 #ifdef CONFIG_FRAME_POINTER 18 18 19 + /* 20 + * This disables KASAN checking when reading a value from another task's stack, 21 + * since the other task could be running on another CPU and could have poisoned 22 + * the stack in the meantime. 23 + */ 24 + #define READ_ONCE_TASK_STACK(task, x) \ 25 + ({ \ 26 + unsigned long val; \ 27 + unsigned long addr = x; \ 28 + if ((task) == current) \ 29 + val = READ_ONCE(addr); \ 30 + else \ 31 + val = READ_ONCE_NOCHECK(addr); \ 32 + val; \ 33 + }) 34 + 19 35 extern asmlinkage void handle_exception(void); 20 36 extern unsigned long ret_from_exception_end; 21 37 ··· 85 69 fp = frame->ra; 86 70 pc = regs->ra; 87 71 } else { 88 - fp = frame->fp; 89 - pc = ftrace_graph_ret_addr(current, &graph_idx, frame->ra, 72 + fp = READ_ONCE_TASK_STACK(task, frame->fp); 73 + pc = READ_ONCE_TASK_STACK(task, frame->ra); 74 + pc = ftrace_graph_ret_addr(current, &graph_idx, pc, 90 75 &frame->ra); 91 76 if (pc >= (unsigned long)handle_exception && 92 77 pc < (unsigned long)&ret_from_exception_end) {

+1 -1

arch/riscv/kernel/tests/Kconfig.debug

··· 31 31 If unsure, say N. 32 32 33 33 config RISCV_KPROBES_KUNIT 34 - bool "KUnit test for riscv kprobes" if !KUNIT_ALL_TESTS 34 + tristate "KUnit test for riscv kprobes" if !KUNIT_ALL_TESTS 35 35 depends on KUNIT 36 36 depends on KPROBES 37 37 default KUNIT_ALL_TESTS

+3 -1

arch/riscv/kernel/tests/kprobes/Makefile

··· 1 - obj-y += test-kprobes.o test-kprobes-asm.o 1 + obj-$(CONFIG_RISCV_KPROBES_KUNIT) += kprobes_riscv_kunit.o 2 + 3 + kprobes_riscv_kunit-objs := test-kprobes.o test-kprobes-asm.o

+4 -1

arch/riscv/kernel/tests/kprobes/test-kprobes.c

··· 49 49 }; 50 50 51 51 static struct kunit_suite kprobes_test_suite = { 52 - .name = "kprobes_test_riscv", 52 + .name = "kprobes_riscv", 53 53 .test_cases = kprobes_testcases, 54 54 }; 55 55 56 56 kunit_test_suites(&kprobes_test_suite); 57 + 58 + MODULE_LICENSE("GPL"); 59 + MODULE_DESCRIPTION("KUnit test for riscv kprobes");

+14 -2

arch/riscv/kvm/aia_imsic.c

··· 689 689 */ 690 690 691 691 read_lock_irqsave(&imsic->vsfile_lock, flags); 692 - if (imsic->vsfile_cpu > -1) 693 - ret = !!(csr_read(CSR_HGEIP) & BIT(imsic->vsfile_hgei)); 692 + if (imsic->vsfile_cpu > -1) { 693 + /* 694 + * This function is typically called from kvm_vcpu_block() via 695 + * kvm_arch_vcpu_runnable() upon WFI trap. The kvm_vcpu_block() 696 + * can be preempted and the blocking VCPU might resume on a 697 + * different CPU. This means it is possible that current CPU 698 + * does not match the imsic->vsfile_cpu hence this function 699 + * must check imsic->vsfile_cpu before accessing HGEIP CSR. 700 + */ 701 + if (imsic->vsfile_cpu != vcpu->cpu) 702 + ret = true; 703 + else 704 + ret = !!(csr_read(CSR_HGEIP) & BIT(imsic->vsfile_hgei)); 705 + } 694 706 read_unlock_irqrestore(&imsic->vsfile_lock, flags); 695 707 696 708 return ret;

+2 -23

arch/riscv/kvm/mmu.c

··· 171 171 enum kvm_mr_change change) 172 172 { 173 173 hva_t hva, reg_end, size; 174 - gpa_t base_gpa; 175 174 bool writable; 176 175 int ret = 0; 177 176 ··· 189 190 hva = new->userspace_addr; 190 191 size = new->npages << PAGE_SHIFT; 191 192 reg_end = hva + size; 192 - base_gpa = new->base_gfn << PAGE_SHIFT; 193 193 writable = !(new->flags & KVM_MEM_READONLY); 194 194 195 195 mmap_read_lock(current->mm); 196 196 197 197 /* 198 198 * A memory region could potentially cover multiple VMAs, and 199 - * any holes between them, so iterate over all of them to find 200 - * out if we can map any of them right now. 199 + * any holes between them, so iterate over all of them. 201 200 * 202 201 * +--------------------------------------------+ 203 202 * +---------------+----------------+ +----------------+ ··· 206 209 */ 207 210 do { 208 211 struct vm_area_struct *vma; 209 - hva_t vm_start, vm_end; 212 + hva_t vm_end; 210 213 211 214 vma = find_vma_intersection(current->mm, hva, reg_end); 212 215 if (!vma) ··· 222 225 } 223 226 224 227 /* Take the intersection of this VMA with the memory region */ 225 - vm_start = max(hva, vma->vm_start); 226 228 vm_end = min(reg_end, vma->vm_end); 227 229 228 230 if (vma->vm_flags & VM_PFNMAP) { 229 - gpa_t gpa = base_gpa + (vm_start - hva); 230 - phys_addr_t pa; 231 - 232 - pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT; 233 - pa += vm_start - vma->vm_start; 234 - 235 231 /* IO region dirty page logging not allowed */ 236 232 if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) { 237 233 ret = -EINVAL; 238 234 goto out; 239 235 } 240 - 241 - ret = kvm_riscv_mmu_ioremap(kvm, gpa, pa, vm_end - vm_start, 242 - writable, false); 243 - if (ret) 244 - break; 245 236 } 246 237 hva = vm_end; 247 238 } while (hva < reg_end); 248 - 249 - if (change == KVM_MR_FLAGS_ONLY) 250 - goto out; 251 - 252 - if (ret) 253 - kvm_riscv_mmu_iounmap(kvm, base_gpa, size); 254 239 255 240 out: 256 241 mmap_read_unlock(current->mm);

+1 -1

arch/riscv/kvm/vcpu.c

··· 212 212 213 213 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 214 214 { 215 - return (kvm_riscv_vcpu_has_interrupts(vcpu, -1UL) && 215 + return (kvm_riscv_vcpu_has_interrupts(vcpu, -1ULL) && 216 216 !kvm_riscv_vcpu_stopped(vcpu) && !vcpu->arch.pause); 217 217 } 218 218

+1 -1

arch/riscv/mm/ptdump.c

··· 21 21 #define pt_dump_seq_puts(m, fmt) \ 22 22 ({ \ 23 23 if (m) \ 24 - seq_printf(m, fmt); \ 24 + seq_puts(m, fmt); \ 25 25 }) 26 26 27 27 /*

-1

arch/x86/include/asm/amd/node.h

··· 23 23 #define AMD_NODE0_PCI_SLOT 0x18 24 24 25 25 struct pci_dev *amd_node_get_func(u16 node, u8 func); 26 - struct pci_dev *amd_node_get_root(u16 node); 27 26 28 27 static inline u16 amd_num_nodes(void) 29 28 {

+1

arch/x86/include/uapi/asm/vmx.h

··· 93 93 #define EXIT_REASON_TPAUSE 68 94 94 #define EXIT_REASON_BUS_LOCK 74 95 95 #define EXIT_REASON_NOTIFY 75 96 + #define EXIT_REASON_SEAMCALL 76 96 97 #define EXIT_REASON_TDCALL 77 97 98 #define EXIT_REASON_MSR_READ_IMM 84 98 99 #define EXIT_REASON_MSR_WRITE_IMM 85

+51 -99

arch/x86/kernel/amd_node.c

··· 34 34 return pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(AMD_NODE0_PCI_SLOT + node, func)); 35 35 } 36 36 37 - #define DF_BLK_INST_CNT 0x040 38 - #define DF_CFG_ADDR_CNTL_LEGACY 0x084 39 - #define DF_CFG_ADDR_CNTL_DF4 0xC04 40 - 41 - #define DF_MAJOR_REVISION GENMASK(27, 24) 42 - 43 - static u16 get_cfg_addr_cntl_offset(struct pci_dev *df_f0) 44 - { 45 - u32 reg; 46 - 47 - /* 48 - * Revision fields added for DF4 and later. 49 - * 50 - * Major revision of '0' is found pre-DF4. Field is Read-as-Zero. 51 - */ 52 - if (pci_read_config_dword(df_f0, DF_BLK_INST_CNT, &reg)) 53 - return 0; 54 - 55 - if (reg & DF_MAJOR_REVISION) 56 - return DF_CFG_ADDR_CNTL_DF4; 57 - 58 - return DF_CFG_ADDR_CNTL_LEGACY; 59 - } 60 - 61 - struct pci_dev *amd_node_get_root(u16 node) 62 - { 63 - struct pci_dev *root; 64 - u16 cntl_off; 65 - u8 bus; 66 - 67 - if (!cpu_feature_enabled(X86_FEATURE_ZEN)) 68 - return NULL; 69 - 70 - /* 71 - * D18F0xXXX [Config Address Control] (DF::CfgAddressCntl) 72 - * Bits [7:0] (SecBusNum) holds the bus number of the root device for 73 - * this Data Fabric instance. The segment, device, and function will be 0. 74 - */ 75 - struct pci_dev *df_f0 __free(pci_dev_put) = amd_node_get_func(node, 0); 76 - if (!df_f0) 77 - return NULL; 78 - 79 - cntl_off = get_cfg_addr_cntl_offset(df_f0); 80 - if (!cntl_off) 81 - return NULL; 82 - 83 - if (pci_read_config_byte(df_f0, cntl_off, &bus)) 84 - return NULL; 85 - 86 - /* Grab the pointer for the actual root device instance. */ 87 - root = pci_get_domain_bus_and_slot(0, bus, 0); 88 - 89 - pci_dbg(root, "is root for AMD node %u\n", node); 90 - return root; 91 - } 92 - 93 37 static struct pci_dev **amd_roots; 94 38 95 39 /* Protect the PCI config register pairs used for SMN. */ ··· 218 274 DEFINE_SHOW_STORE_ATTRIBUTE(smn_address); 219 275 DEFINE_SHOW_STORE_ATTRIBUTE(smn_value); 220 276 221 - static int amd_cache_roots(void) 277 + static struct pci_dev *get_next_root(struct pci_dev *root) 222 278 { 223 - u16 node, num_nodes = amd_num_nodes(); 224 - 225 - amd_roots = kcalloc(num_nodes, sizeof(*amd_roots), GFP_KERNEL); 226 - if (!amd_roots) 227 - return -ENOMEM; 228 - 229 - for (node = 0; node < num_nodes; node++) 230 - amd_roots[node] = amd_node_get_root(node); 231 - 232 - return 0; 233 - } 234 - 235 - static int reserve_root_config_spaces(void) 236 - { 237 - struct pci_dev *root = NULL; 238 - struct pci_bus *bus = NULL; 239 - 240 - while ((bus = pci_find_next_bus(bus))) { 241 - /* Root device is Device 0 Function 0 on each Primary Bus. */ 242 - root = pci_get_slot(bus, 0); 243 - if (!root) 279 + while ((root = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, root))) { 280 + /* Root device is Device 0 Function 0. */ 281 + if (root->devfn) 244 282 continue; 245 283 246 284 if (root->vendor != PCI_VENDOR_ID_AMD && 247 285 root->vendor != PCI_VENDOR_ID_HYGON) 248 286 continue; 249 287 250 - pci_dbg(root, "Reserving PCI config space\n"); 251 - 252 - /* 253 - * There are a few SMN index/data pairs and other registers 254 - * that shouldn't be accessed by user space. 255 - * So reserve the entire PCI config space for simplicity rather 256 - * than covering specific registers piecemeal. 257 - */ 258 - if (!pci_request_config_region_exclusive(root, 0, PCI_CFG_SPACE_SIZE, NULL)) { 259 - pci_err(root, "Failed to reserve config space\n"); 260 - return -EEXIST; 261 - } 288 + break; 262 289 } 263 290 264 - smn_exclusive = true; 265 - return 0; 291 + return root; 266 292 } 267 293 268 294 static bool enable_dfs; ··· 246 332 247 333 static int __init amd_smn_init(void) 248 334 { 249 - int err; 335 + u16 count, num_roots, roots_per_node, node, num_nodes; 336 + struct pci_dev *root; 250 337 251 338 if (!cpu_feature_enabled(X86_FEATURE_ZEN)) 252 339 return 0; ··· 257 342 if (amd_roots) 258 343 return 0; 259 344 260 - err = amd_cache_roots(); 261 - if (err) 262 - return err; 345 + num_roots = 0; 346 + root = NULL; 347 + while ((root = get_next_root(root))) { 348 + pci_dbg(root, "Reserving PCI config space\n"); 263 349 264 - err = reserve_root_config_spaces(); 265 - if (err) 266 - return err; 350 + /* 351 + * There are a few SMN index/data pairs and other registers 352 + * that shouldn't be accessed by user space. So reserve the 353 + * entire PCI config space for simplicity rather than covering 354 + * specific registers piecemeal. 355 + */ 356 + if (!pci_request_config_region_exclusive(root, 0, PCI_CFG_SPACE_SIZE, NULL)) { 357 + pci_err(root, "Failed to reserve config space\n"); 358 + return -EEXIST; 359 + } 360 + 361 + num_roots++; 362 + } 363 + 364 + pr_debug("Found %d AMD root devices\n", num_roots); 365 + 366 + if (!num_roots) 367 + return -ENODEV; 368 + 369 + num_nodes = amd_num_nodes(); 370 + amd_roots = kcalloc(num_nodes, sizeof(*amd_roots), GFP_KERNEL); 371 + if (!amd_roots) 372 + return -ENOMEM; 373 + 374 + roots_per_node = num_roots / num_nodes; 375 + 376 + count = 0; 377 + node = 0; 378 + root = NULL; 379 + while (node < num_nodes && (root = get_next_root(root))) { 380 + /* Use one root for each node and skip the rest. */ 381 + if (count++ % roots_per_node) 382 + continue; 383 + 384 + pci_dbg(root, "is root for AMD node %u\n", node); 385 + amd_roots[node++] = root; 386 + } 267 387 268 388 if (enable_dfs) { 269 389 debugfs_dir = debugfs_create_dir("amd_smn", arch_debugfs_dir); ··· 307 357 debugfs_create_file("address", 0600, debugfs_dir, NULL, &smn_address_fops); 308 358 debugfs_create_file("value", 0600, debugfs_dir, NULL, &smn_value_fops); 309 359 } 360 + 361 + smn_exclusive = true; 310 362 311 363 return 0; 312 364 }

+1

arch/x86/kernel/cpu/amd.c

··· 1038 1038 static const struct x86_cpu_id zen5_rdseed_microcode[] = { 1039 1039 ZEN_MODEL_STEP_UCODE(0x1a, 0x02, 0x1, 0x0b00215a), 1040 1040 ZEN_MODEL_STEP_UCODE(0x1a, 0x11, 0x0, 0x0b101054), 1041 + {}, 1041 1042 }; 1042 1043 1043 1044 static void init_amd_zen5(struct cpuinfo_x86 *c)

+2

arch/x86/kernel/cpu/microcode/amd.c

··· 220 220 case 0xaa001: return cur_rev <= 0xaa00116; break; 221 221 case 0xaa002: return cur_rev <= 0xaa00218; break; 222 222 case 0xb0021: return cur_rev <= 0xb002146; break; 223 + case 0xb0081: return cur_rev <= 0xb008111; break; 223 224 case 0xb1010: return cur_rev <= 0xb101046; break; 224 225 case 0xb2040: return cur_rev <= 0xb204031; break; 225 226 case 0xb4040: return cur_rev <= 0xb404031; break; 226 227 case 0xb6000: return cur_rev <= 0xb600031; break; 228 + case 0xb6080: return cur_rev <= 0xb608031; break; 227 229 case 0xb7000: return cur_rev <= 0xb700031; break; 228 230 default: break; 229 231 }

+15 -9

arch/x86/kvm/svm/avic.c

··· 216 216 * This function is called from IOMMU driver to notify 217 217 * SVM to schedule in a particular vCPU of a particular VM. 218 218 */ 219 - int avic_ga_log_notifier(u32 ga_tag) 219 + static int avic_ga_log_notifier(u32 ga_tag) 220 220 { 221 221 unsigned long flags; 222 222 struct kvm_svm *kvm_svm; ··· 788 788 struct kvm_vcpu *vcpu = &svm->vcpu; 789 789 790 790 INIT_LIST_HEAD(&svm->ir_list); 791 - spin_lock_init(&svm->ir_list_lock); 791 + raw_spin_lock_init(&svm->ir_list_lock); 792 792 793 793 if (!enable_apicv || !irqchip_in_kernel(vcpu->kvm)) 794 794 return 0; ··· 816 816 if (!vcpu) 817 817 return; 818 818 819 - spin_lock_irqsave(&to_svm(vcpu)->ir_list_lock, flags); 819 + raw_spin_lock_irqsave(&to_svm(vcpu)->ir_list_lock, flags); 820 820 list_del(&irqfd->vcpu_list); 821 - spin_unlock_irqrestore(&to_svm(vcpu)->ir_list_lock, flags); 821 + raw_spin_unlock_irqrestore(&to_svm(vcpu)->ir_list_lock, flags); 822 822 } 823 823 824 824 int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm, ··· 855 855 * list of IRQs being posted to the vCPU, to ensure the IRTE 856 856 * isn't programmed with stale pCPU/IsRunning information. 857 857 */ 858 - guard(spinlock_irqsave)(&svm->ir_list_lock); 858 + guard(raw_spinlock_irqsave)(&svm->ir_list_lock); 859 859 860 860 /* 861 861 * Update the target pCPU for IOMMU doorbells if the vCPU is ··· 972 972 * up-to-date entry information, or that this task will wait until 973 973 * svm_ir_list_add() completes to set the new target pCPU. 974 974 */ 975 - spin_lock_irqsave(&svm->ir_list_lock, flags); 975 + raw_spin_lock_irqsave(&svm->ir_list_lock, flags); 976 976 977 977 entry = svm->avic_physical_id_entry; 978 978 WARN_ON_ONCE(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK); ··· 997 997 998 998 avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, action); 999 999 1000 - spin_unlock_irqrestore(&svm->ir_list_lock, flags); 1000 + raw_spin_unlock_irqrestore(&svm->ir_list_lock, flags); 1001 1001 } 1002 1002 1003 1003 void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) ··· 1035 1035 * or that this task will wait until svm_ir_list_add() completes to 1036 1036 * mark the vCPU as not running. 1037 1037 */ 1038 - spin_lock_irqsave(&svm->ir_list_lock, flags); 1038 + raw_spin_lock_irqsave(&svm->ir_list_lock, flags); 1039 1039 1040 1040 avic_update_iommu_vcpu_affinity(vcpu, -1, action); 1041 1041 ··· 1059 1059 1060 1060 svm->avic_physical_id_entry = entry; 1061 1061 1062 - spin_unlock_irqrestore(&svm->ir_list_lock, flags); 1062 + raw_spin_unlock_irqrestore(&svm->ir_list_lock, flags); 1063 1063 } 1064 1064 1065 1065 void avic_vcpu_put(struct kvm_vcpu *vcpu) ··· 1242 1242 amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier); 1243 1243 1244 1244 return true; 1245 + } 1246 + 1247 + void avic_hardware_unsetup(void) 1248 + { 1249 + if (avic) 1250 + amd_iommu_register_ga_log_notifier(NULL); 1245 1251 }

+7 -13

arch/x86/kvm/svm/nested.c

··· 677 677 */ 678 678 svm_copy_lbrs(vmcb02, vmcb12); 679 679 vmcb02->save.dbgctl &= ~DEBUGCTL_RESERVED_BITS; 680 - svm_update_lbrv(&svm->vcpu); 681 - 682 - } else if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) { 680 + } else { 683 681 svm_copy_lbrs(vmcb02, vmcb01); 684 682 } 683 + svm_update_lbrv(&svm->vcpu); 685 684 } 686 685 687 686 static inline bool is_evtinj_soft(u32 evtinj) ··· 832 833 svm->soft_int_next_rip = vmcb12_rip; 833 834 } 834 835 835 - vmcb02->control.virt_ext = vmcb01->control.virt_ext & 836 - LBR_CTL_ENABLE_MASK; 837 - if (guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV)) 838 - vmcb02->control.virt_ext |= 839 - (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK); 836 + /* LBR_CTL_ENABLE_MASK is controlled by svm_update_lbrv() */ 840 837 841 838 if (!nested_vmcb_needs_vls_intercept(svm)) 842 839 vmcb02->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK; ··· 1184 1189 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); 1185 1190 1186 1191 if (unlikely(guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) && 1187 - (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) { 1192 + (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) 1188 1193 svm_copy_lbrs(vmcb12, vmcb02); 1189 - svm_update_lbrv(vcpu); 1190 - } else if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) { 1194 + else 1191 1195 svm_copy_lbrs(vmcb01, vmcb02); 1192 - svm_update_lbrv(vcpu); 1193 - } 1196 + 1197 + svm_update_lbrv(vcpu); 1194 1198 1195 1199 if (vnmi) { 1196 1200 if (vmcb02->control.int_ctl & V_NMI_BLOCKING_MASK)

+39 -49

arch/x86/kvm/svm/svm.c

··· 806 806 vmcb_mark_dirty(to_vmcb, VMCB_LBR); 807 807 } 808 808 809 + static void __svm_enable_lbrv(struct kvm_vcpu *vcpu) 810 + { 811 + to_svm(vcpu)->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK; 812 + } 813 + 809 814 void svm_enable_lbrv(struct kvm_vcpu *vcpu) 810 815 { 811 - struct vcpu_svm *svm = to_svm(vcpu); 812 - 813 - svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK; 816 + __svm_enable_lbrv(vcpu); 814 817 svm_recalc_lbr_msr_intercepts(vcpu); 815 - 816 - /* Move the LBR msrs to the vmcb02 so that the guest can see them. */ 817 - if (is_guest_mode(vcpu)) 818 - svm_copy_lbrs(svm->vmcb, svm->vmcb01.ptr); 819 818 } 820 819 821 - static void svm_disable_lbrv(struct kvm_vcpu *vcpu) 820 + static void __svm_disable_lbrv(struct kvm_vcpu *vcpu) 822 821 { 823 - struct vcpu_svm *svm = to_svm(vcpu); 824 - 825 822 KVM_BUG_ON(sev_es_guest(vcpu->kvm), vcpu->kvm); 826 - svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK; 827 - svm_recalc_lbr_msr_intercepts(vcpu); 828 - 829 - /* 830 - * Move the LBR msrs back to the vmcb01 to avoid copying them 831 - * on nested guest entries. 832 - */ 833 - if (is_guest_mode(vcpu)) 834 - svm_copy_lbrs(svm->vmcb01.ptr, svm->vmcb); 835 - } 836 - 837 - static struct vmcb *svm_get_lbr_vmcb(struct vcpu_svm *svm) 838 - { 839 - /* 840 - * If LBR virtualization is disabled, the LBR MSRs are always kept in 841 - * vmcb01. If LBR virtualization is enabled and L1 is running VMs of 842 - * its own, the MSRs are moved between vmcb01 and vmcb02 as needed. 843 - */ 844 - return svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK ? svm->vmcb : 845 - svm->vmcb01.ptr; 823 + to_svm(vcpu)->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK; 846 824 } 847 825 848 826 void svm_update_lbrv(struct kvm_vcpu *vcpu) 849 827 { 850 828 struct vcpu_svm *svm = to_svm(vcpu); 851 829 bool current_enable_lbrv = svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK; 852 - bool enable_lbrv = (svm_get_lbr_vmcb(svm)->save.dbgctl & DEBUGCTLMSR_LBR) || 830 + bool enable_lbrv = (svm->vmcb->save.dbgctl & DEBUGCTLMSR_LBR) || 853 831 (is_guest_mode(vcpu) && guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) && 854 832 (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK)); 855 833 856 - if (enable_lbrv == current_enable_lbrv) 857 - return; 834 + if (enable_lbrv && !current_enable_lbrv) 835 + __svm_enable_lbrv(vcpu); 836 + else if (!enable_lbrv && current_enable_lbrv) 837 + __svm_disable_lbrv(vcpu); 858 838 859 - if (enable_lbrv) 860 - svm_enable_lbrv(vcpu); 861 - else 862 - svm_disable_lbrv(vcpu); 839 + /* 840 + * During nested transitions, it is possible that the current VMCB has 841 + * LBR_CTL set, but the previous LBR_CTL had it cleared (or vice versa). 842 + * In this case, even though LBR_CTL does not need an update, intercepts 843 + * do, so always recalculate the intercepts here. 844 + */ 845 + svm_recalc_lbr_msr_intercepts(vcpu); 863 846 } 864 847 865 848 void disable_nmi_singlestep(struct vcpu_svm *svm) ··· 903 920 static void svm_hardware_unsetup(void) 904 921 { 905 922 int cpu; 923 + 924 + avic_hardware_unsetup(); 906 925 907 926 sev_hardware_unsetup(); 908 927 ··· 2707 2722 msr_info->data = svm->tsc_aux; 2708 2723 break; 2709 2724 case MSR_IA32_DEBUGCTLMSR: 2710 - msr_info->data = svm_get_lbr_vmcb(svm)->save.dbgctl; 2725 + msr_info->data = svm->vmcb->save.dbgctl; 2711 2726 break; 2712 2727 case MSR_IA32_LASTBRANCHFROMIP: 2713 - msr_info->data = svm_get_lbr_vmcb(svm)->save.br_from; 2728 + msr_info->data = svm->vmcb->save.br_from; 2714 2729 break; 2715 2730 case MSR_IA32_LASTBRANCHTOIP: 2716 - msr_info->data = svm_get_lbr_vmcb(svm)->save.br_to; 2731 + msr_info->data = svm->vmcb->save.br_to; 2717 2732 break; 2718 2733 case MSR_IA32_LASTINTFROMIP: 2719 - msr_info->data = svm_get_lbr_vmcb(svm)->save.last_excp_from; 2734 + msr_info->data = svm->vmcb->save.last_excp_from; 2720 2735 break; 2721 2736 case MSR_IA32_LASTINTTOIP: 2722 - msr_info->data = svm_get_lbr_vmcb(svm)->save.last_excp_to; 2737 + msr_info->data = svm->vmcb->save.last_excp_to; 2723 2738 break; 2724 2739 case MSR_VM_HSAVE_PA: 2725 2740 msr_info->data = svm->nested.hsave_msr; ··· 2987 3002 if (data & DEBUGCTL_RESERVED_BITS) 2988 3003 return 1; 2989 3004 2990 - svm_get_lbr_vmcb(svm)->save.dbgctl = data; 3005 + if (svm->vmcb->save.dbgctl == data) 3006 + break; 3007 + 3008 + svm->vmcb->save.dbgctl = data; 3009 + vmcb_mark_dirty(svm->vmcb, VMCB_LBR); 2991 3010 svm_update_lbrv(vcpu); 2992 3011 break; 2993 3012 case MSR_VM_HSAVE_PA: ··· 5375 5386 5376 5387 svm_hv_hardware_setup(); 5377 5388 5378 - for_each_possible_cpu(cpu) { 5379 - r = svm_cpu_init(cpu); 5380 - if (r) 5381 - goto err; 5382 - } 5383 - 5384 5389 enable_apicv = avic_hardware_setup(); 5385 5390 if (!enable_apicv) { 5386 5391 enable_ipiv = false; ··· 5418 5435 svm_set_cpu_caps(); 5419 5436 5420 5437 kvm_caps.inapplicable_quirks &= ~KVM_X86_QUIRK_CD_NW_CLEARED; 5438 + 5439 + for_each_possible_cpu(cpu) { 5440 + r = svm_cpu_init(cpu); 5441 + if (r) 5442 + goto err; 5443 + } 5444 + 5421 5445 return 0; 5422 5446 5423 5447 err:

+2 -2

arch/x86/kvm/svm/svm.h

··· 329 329 * back into remapped mode). 330 330 */ 331 331 struct list_head ir_list; 332 - spinlock_t ir_list_lock; 332 + raw_spinlock_t ir_list_lock; 333 333 334 334 struct vcpu_sev_es_state sev_es; 335 335 ··· 805 805 ) 806 806 807 807 bool __init avic_hardware_setup(void); 808 - int avic_ga_log_notifier(u32 ga_tag); 808 + void avic_hardware_unsetup(void); 809 809 void avic_vm_destroy(struct kvm *kvm); 810 810 int avic_vm_init(struct kvm *kvm); 811 811 void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb);

+1 -1

arch/x86/kvm/vmx/common.h

··· 98 98 error_code |= (exit_qualification & EPT_VIOLATION_PROT_MASK) 99 99 ? PFERR_PRESENT_MASK : 0; 100 100 101 - if (error_code & EPT_VIOLATION_GVA_IS_VALID) 101 + if (exit_qualification & EPT_VIOLATION_GVA_IS_VALID) 102 102 error_code |= (exit_qualification & EPT_VIOLATION_GVA_TRANSLATED) ? 103 103 PFERR_GUEST_FINAL_MASK : PFERR_GUEST_PAGE_MASK; 104 104

+8

arch/x86/kvm/vmx/nested.c

··· 6728 6728 case EXIT_REASON_NOTIFY: 6729 6729 /* Notify VM exit is not exposed to L1 */ 6730 6730 return false; 6731 + case EXIT_REASON_SEAMCALL: 6732 + case EXIT_REASON_TDCALL: 6733 + /* 6734 + * SEAMCALL and TDCALL unconditionally VM-Exit, but aren't 6735 + * virtualized by KVM for L1 hypervisors, i.e. L1 should 6736 + * never want or expect such an exit. 6737 + */ 6738 + return false; 6731 6739 default: 6732 6740 return true; 6733 6741 }

+8

arch/x86/kvm/vmx/vmx.c

··· 6032 6032 return 1; 6033 6033 } 6034 6034 6035 + static int handle_tdx_instruction(struct kvm_vcpu *vcpu) 6036 + { 6037 + kvm_queue_exception(vcpu, UD_VECTOR); 6038 + return 1; 6039 + } 6040 + 6035 6041 #ifndef CONFIG_X86_SGX_KVM 6036 6042 static int handle_encls(struct kvm_vcpu *vcpu) 6037 6043 { ··· 6163 6157 [EXIT_REASON_ENCLS] = handle_encls, 6164 6158 [EXIT_REASON_BUS_LOCK] = handle_bus_lock_vmexit, 6165 6159 [EXIT_REASON_NOTIFY] = handle_notify, 6160 + [EXIT_REASON_SEAMCALL] = handle_tdx_instruction, 6161 + [EXIT_REASON_TDCALL] = handle_tdx_instruction, 6166 6162 [EXIT_REASON_MSR_READ_IMM] = handle_rdmsr_imm, 6167 6163 [EXIT_REASON_MSR_WRITE_IMM] = handle_wrmsr_imm, 6168 6164 };

+29 -19

arch/x86/kvm/x86.c

··· 3874 3874 3875 3875 /* 3876 3876 * Returns true if the MSR in question is managed via XSTATE, i.e. is context 3877 - * switched with the rest of guest FPU state. Note! S_CET is _not_ context 3878 - * switched via XSTATE even though it _is_ saved/restored via XSAVES/XRSTORS. 3879 - * Because S_CET is loaded on VM-Enter and VM-Exit via dedicated VMCS fields, 3880 - * the value saved/restored via XSTATE is always the host's value. That detail 3881 - * is _extremely_ important, as the guest's S_CET must _never_ be resident in 3882 - * hardware while executing in the host. Loading guest values for U_CET and 3883 - * PL[0-3]_SSP while executing in the kernel is safe, as U_CET is specific to 3884 - * userspace, and PL[0-3]_SSP are only consumed when transitioning to lower 3885 - * privilege levels, i.e. are effectively only consumed by userspace as well. 3877 + * switched with the rest of guest FPU state. 3878 + * 3879 + * Note, S_CET is _not_ saved/restored via XSAVES/XRSTORS. 3886 3880 */ 3887 3881 static bool is_xstate_managed_msr(struct kvm_vcpu *vcpu, u32 msr) 3888 3882 { ··· 3899 3905 * MSR that is managed via XSTATE. Note, the caller is responsible for doing 3900 3906 * the initial FPU load, this helper only ensures that guest state is resident 3901 3907 * in hardware (the kernel can load its FPU state in IRQ context). 3908 + * 3909 + * Note, loading guest values for U_CET and PL[0-3]_SSP while executing in the 3910 + * kernel is safe, as U_CET is specific to userspace, and PL[0-3]_SSP are only 3911 + * consumed when transitioning to lower privilege levels, i.e. are effectively 3912 + * only consumed by userspace as well. 3902 3913 */ 3903 3914 static __always_inline void kvm_access_xstate_msr(struct kvm_vcpu *vcpu, 3904 3915 struct msr_data *msr_info, ··· 11806 11807 /* Swap (qemu) user FPU context for the guest FPU context. */ 11807 11808 static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) 11808 11809 { 11810 + if (KVM_BUG_ON(vcpu->arch.guest_fpu.fpstate->in_use, vcpu->kvm)) 11811 + return; 11812 + 11809 11813 /* Exclude PKRU, it's restored separately immediately after VM-Exit. */ 11810 11814 fpu_swap_kvm_fpstate(&vcpu->arch.guest_fpu, true); 11811 11815 trace_kvm_fpu(1); ··· 11817 11815 /* When vcpu_run ends, restore user space FPU context. */ 11818 11816 static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) 11819 11817 { 11818 + if (KVM_BUG_ON(!vcpu->arch.guest_fpu.fpstate->in_use, vcpu->kvm)) 11819 + return; 11820 + 11820 11821 fpu_swap_kvm_fpstate(&vcpu->arch.guest_fpu, false); 11821 11822 ++vcpu->stat.fpu_reload; 11822 11823 trace_kvm_fpu(0); ··· 12142 12137 int r; 12143 12138 12144 12139 vcpu_load(vcpu); 12145 - if (kvm_mpx_supported()) 12146 - kvm_load_guest_fpu(vcpu); 12147 - 12148 12140 kvm_vcpu_srcu_read_lock(vcpu); 12149 12141 12150 12142 r = kvm_apic_accept_events(vcpu); ··· 12158 12156 12159 12157 out: 12160 12158 kvm_vcpu_srcu_read_unlock(vcpu); 12161 - 12162 - if (kvm_mpx_supported()) 12163 - kvm_put_guest_fpu(vcpu); 12164 12159 vcpu_put(vcpu); 12165 12160 return r; 12166 12161 } ··· 12787 12788 { 12788 12789 struct fpstate *fpstate = vcpu->arch.guest_fpu.fpstate; 12789 12790 u64 xfeatures_mask; 12791 + bool fpu_in_use; 12790 12792 int i; 12791 12793 12792 12794 /* ··· 12811 12811 BUILD_BUG_ON(sizeof(xfeatures_mask) * BITS_PER_BYTE <= XFEATURE_MAX); 12812 12812 12813 12813 /* 12814 - * All paths that lead to INIT are required to load the guest's FPU 12815 - * state (because most paths are buried in KVM_RUN). 12814 + * Unload guest FPU state (if necessary) before zeroing XSTATE fields 12815 + * as the kernel can only modify the state when its resident in memory, 12816 + * i.e. when it's not loaded into hardware. 12817 + * 12818 + * WARN if the vCPU's desire to run, i.e. whether or not its in KVM_RUN, 12819 + * doesn't match the loaded/in-use state of the FPU, as KVM_RUN is the 12820 + * only path that can trigger INIT emulation _and_ loads FPU state, and 12821 + * KVM_RUN should _always_ load FPU state. 12816 12822 */ 12817 - kvm_put_guest_fpu(vcpu); 12823 + WARN_ON_ONCE(vcpu->wants_to_run != fpstate->in_use); 12824 + fpu_in_use = fpstate->in_use; 12825 + if (fpu_in_use) 12826 + kvm_put_guest_fpu(vcpu); 12818 12827 for_each_set_bit(i, (unsigned long *)&xfeatures_mask, XFEATURE_MAX) 12819 12828 fpstate_clear_xstate_component(fpstate, i); 12820 - kvm_load_guest_fpu(vcpu); 12829 + if (fpu_in_use) 12830 + kvm_load_guest_fpu(vcpu); 12821 12831 } 12822 12832 12823 12833 void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)

+1 -1

drivers/acpi/cppc_acpi.c

··· 750 750 } 751 751 752 752 /* 753 - * Disregard _CPC if the number of entries in the return pachage is not 753 + * Disregard _CPC if the number of entries in the return package is not 754 754 * as expected, but support future revisions being proper supersets of 755 755 * the v3 and only causing more entries to be returned by _CPC. 756 756 */

+1 -1

drivers/acpi/sbs.c

··· 487 487 if (result) 488 488 return result; 489 489 490 - battery->present = state & (1 << battery->id); 490 + battery->present = !!(state & (1 << battery->id)); 491 491 if (!battery->present) 492 492 return 0; 493 493

+13 -11

drivers/bluetooth/btrtl.c

··· 50 50 51 51 #define RTL_CHIP_SUBVER (&(struct rtl_vendor_cmd) {{0x10, 0x38, 0x04, 0x28, 0x80}}) 52 52 #define RTL_CHIP_REV (&(struct rtl_vendor_cmd) {{0x10, 0x3A, 0x04, 0x28, 0x80}}) 53 - #define RTL_SEC_PROJ (&(struct rtl_vendor_cmd) {{0x10, 0xA4, 0x0D, 0x00, 0xb0}}) 53 + #define RTL_SEC_PROJ (&(struct rtl_vendor_cmd) {{0x10, 0xA4, 0xAD, 0x00, 0xb0}}) 54 54 55 55 #define RTL_PATCH_SNIPPETS 0x01 56 56 #define RTL_PATCH_DUMMY_HEADER 0x02 ··· 534 534 { 535 535 struct rtl_epatch_header_v2 *hdr; 536 536 int rc; 537 - u8 reg_val[2]; 538 537 u8 key_id; 539 538 u32 num_sections; 540 539 struct rtl_section *section; ··· 548 549 .len = btrtl_dev->fw_len - 7, /* Cut the tail */ 549 550 }; 550 551 551 - rc = btrtl_vendor_read_reg16(hdev, RTL_SEC_PROJ, reg_val); 552 - if (rc < 0) 553 - return -EIO; 554 - key_id = reg_val[0]; 555 - 556 - rtl_dev_dbg(hdev, "%s: key id %u", __func__, key_id); 557 - 558 - btrtl_dev->key_id = key_id; 552 + key_id = btrtl_dev->key_id; 559 553 560 554 hdr = rtl_iov_pull_data(&iov, sizeof(*hdr)); 561 555 if (!hdr) ··· 1062 1070 u16 hci_rev, lmp_subver; 1063 1071 u8 hci_ver, lmp_ver, chip_type = 0; 1064 1072 int ret; 1073 + int rc; 1074 + u8 key_id; 1065 1075 u8 reg_val[2]; 1066 1076 1067 1077 btrtl_dev = kzalloc(sizeof(*btrtl_dev), GFP_KERNEL); ··· 1174 1180 goto err_free; 1175 1181 } 1176 1182 1183 + rc = btrtl_vendor_read_reg16(hdev, RTL_SEC_PROJ, reg_val); 1184 + if (rc < 0) 1185 + goto err_free; 1186 + 1187 + key_id = reg_val[0]; 1188 + btrtl_dev->key_id = key_id; 1189 + rtl_dev_info(hdev, "%s: key id %u", __func__, key_id); 1190 + 1177 1191 btrtl_dev->fw_len = -EIO; 1178 1192 if (lmp_subver == RTL_ROM_LMP_8852A && hci_rev == 0x000c) { 1179 1193 snprintf(fw_name, sizeof(fw_name), "%s_v2.bin", ··· 1204 1202 goto err_free; 1205 1203 } 1206 1204 1207 - if (btrtl_dev->ic_info->cfg_name) { 1205 + if (btrtl_dev->ic_info->cfg_name && !btrtl_dev->key_id) { 1208 1206 if (postfix) { 1209 1207 snprintf(cfg_name, sizeof(cfg_name), "%s-%s.bin", 1210 1208 btrtl_dev->ic_info->cfg_name, postfix);

+6 -7

drivers/bluetooth/btusb.c

··· 4361 4361 4362 4362 hci_unregister_dev(hdev); 4363 4363 4364 + if (data->oob_wake_irq) 4365 + device_init_wakeup(&data->udev->dev, false); 4366 + if (data->reset_gpio) 4367 + gpiod_put(data->reset_gpio); 4368 + 4364 4369 if (intf == data->intf) { 4365 4370 if (data->isoc) 4366 4371 usb_driver_release_interface(&btusb_driver, data->isoc); ··· 4376 4371 usb_driver_release_interface(&btusb_driver, data->diag); 4377 4372 usb_driver_release_interface(&btusb_driver, data->intf); 4378 4373 } else if (intf == data->diag) { 4379 - usb_driver_release_interface(&btusb_driver, data->intf); 4380 4374 if (data->isoc) 4381 4375 usb_driver_release_interface(&btusb_driver, data->isoc); 4376 + usb_driver_release_interface(&btusb_driver, data->intf); 4382 4377 } 4383 - 4384 - if (data->oob_wake_irq) 4385 - device_init_wakeup(&data->udev->dev, false); 4386 - 4387 - if (data->reset_gpio) 4388 - gpiod_put(data->reset_gpio); 4389 4378 4390 4379 hci_free_dev(hdev); 4391 4380 }

+3 -2

drivers/cpuidle/cpuidle-riscv-sbi.c

··· 18 18 #include <linux/module.h> 19 19 #include <linux/of.h> 20 20 #include <linux/slab.h> 21 + #include <linux/string.h> 21 22 #include <linux/platform_device.h> 22 23 #include <linux/pm_domain.h> 23 24 #include <linux/pm_runtime.h> ··· 304 303 drv->states[0].exit_latency = 1; 305 304 drv->states[0].target_residency = 1; 306 305 drv->states[0].power_usage = UINT_MAX; 307 - strcpy(drv->states[0].name, "WFI"); 308 - strcpy(drv->states[0].desc, "RISC-V WFI"); 306 + strscpy(drv->states[0].name, "WFI"); 307 + strscpy(drv->states[0].desc, "RISC-V WFI"); 309 308 310 309 /* 311 310 * If no DT idle states are detected (ret == 0) let the driver

+1

drivers/gpio/gpio-aggregator.c

··· 723 723 chip->get_multiple = gpio_fwd_get_multiple_locked; 724 724 chip->set = gpio_fwd_set; 725 725 chip->set_multiple = gpio_fwd_set_multiple_locked; 726 + chip->set_config = gpio_fwd_set_config; 726 727 chip->to_irq = gpio_fwd_to_irq; 727 728 chip->base = -1; 728 729 chip->ngpio = ngpios;

-19

drivers/gpio/gpio-tb10x.c

··· 50 50 return ioread32(gpio->base + offs); 51 51 } 52 52 53 - static inline void tb10x_reg_write(struct tb10x_gpio *gpio, unsigned int offs, 54 - u32 val) 55 - { 56 - iowrite32(val, gpio->base + offs); 57 - } 58 - 59 - static inline void tb10x_set_bits(struct tb10x_gpio *gpio, unsigned int offs, 60 - u32 mask, u32 val) 61 - { 62 - u32 r; 63 - 64 - guard(gpio_generic_lock_irqsave)(&gpio->chip); 65 - 66 - r = tb10x_reg_read(gpio, offs); 67 - r = (r & ~mask) | (val & mask); 68 - 69 - tb10x_reg_write(gpio, offs, r); 70 - } 71 - 72 53 static int tb10x_gpio_to_irq(struct gpio_chip *chip, unsigned offset) 73 54 { 74 55 struct tb10x_gpio *tb10x_gpio = gpiochip_get_data(chip);

+1 -1

drivers/gpio/gpiolib-swnode.c

··· 41 41 !strcmp(gdev_node->name, GPIOLIB_SWNODE_UNDEFINED_NAME)) 42 42 return ERR_PTR(-ENOENT); 43 43 44 - gdev = gpio_device_find_by_label(gdev_node->name); 44 + gdev = gpio_device_find_by_fwnode(fwnode); 45 45 return gdev ?: ERR_PTR(-EPROBE_DEFER); 46 46 } 47 47

+7 -1

drivers/gpio/gpiolib.c

··· 5296 5296 struct gpio_device *gdev; 5297 5297 loff_t index = *pos; 5298 5298 5299 + s->private = NULL; 5300 + 5299 5301 priv = kzalloc(sizeof(*priv), GFP_KERNEL); 5300 5302 if (!priv) 5301 5303 return NULL; ··· 5331 5329 5332 5330 static void gpiolib_seq_stop(struct seq_file *s, void *v) 5333 5331 { 5334 - struct gpiolib_seq_priv *priv = s->private; 5332 + struct gpiolib_seq_priv *priv; 5333 + 5334 + priv = s->private; 5335 + if (!priv) 5336 + return; 5335 5337 5336 5338 srcu_read_unlock(&gpio_devices_srcu, priv->idx); 5337 5339 kfree(priv);

+1 -1

drivers/gpu/drm/Makefile

··· 245 245 quiet_cmd_hdrtest = HDRTEST $(patsubst %.hdrtest,%.h,$@) 246 246 cmd_hdrtest = \ 247 247 $(CC) $(c_flags) -fsyntax-only -x c /dev/null -include $< -include $<; \ 248 - PYTHONDONTWRITEBYTECODE=1 $(KERNELDOC) -none $(if $(CONFIG_WERROR)$(CONFIG_DRM_WERROR),-Werror) $<; \ 248 + PYTHONDONTWRITEBYTECODE=1 $(PYTHON3) $(KERNELDOC) -none $(if $(CONFIG_WERROR)$(CONFIG_DRM_WERROR),-Werror) $<; \ 249 249 touch $@ 250 250 251 251 $(obj)/%.hdrtest: $(src)/%.h FORCE

+4

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

··· 1267 1267 1268 1268 (void)amdgpu_vm_bo_unmap(adev, bo_va, entry->va); 1269 1269 1270 + /* VM entity stopped if process killed, don't clear freed pt bo */ 1271 + if (!amdgpu_vm_ready(vm)) 1272 + return 0; 1273 + 1270 1274 (void)amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); 1271 1275 1272 1276 (void)amdgpu_sync_fence(sync, bo_va->last_pt_update, GFP_KERNEL);

-4

drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

··· 5243 5243 if (amdgpu_sriov_vf(adev)) 5244 5244 amdgpu_virt_release_full_gpu(adev, false); 5245 5245 5246 - r = amdgpu_dpm_notify_rlc_state(adev, false); 5247 - if (r) 5248 - return r; 5249 - 5250 5246 return 0; 5251 5247 } 5252 5248

+7 -2

drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c

··· 2632 2632 { 2633 2633 struct drm_device *drm_dev = dev_get_drvdata(dev); 2634 2634 struct amdgpu_device *adev = drm_to_adev(drm_dev); 2635 + int r; 2635 2636 2636 - if (amdgpu_acpi_should_gpu_reset(adev)) 2637 - return amdgpu_asic_reset(adev); 2637 + if (amdgpu_acpi_should_gpu_reset(adev)) { 2638 + amdgpu_device_lock_reset_domain(adev->reset_domain); 2639 + r = amdgpu_asic_reset(adev); 2640 + amdgpu_device_unlock_reset_domain(adev->reset_domain); 2641 + return r; 2642 + } 2638 2643 2639 2644 return 0; 2640 2645 }

+4 -1

drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c

··· 2355 2355 if (!ret && !psp->securedisplay_context.context.resp_status) { 2356 2356 psp->securedisplay_context.context.initialized = true; 2357 2357 mutex_init(&psp->securedisplay_context.mutex); 2358 - } else 2358 + } else { 2359 + /* don't try again */ 2360 + psp->securedisplay_context.context.bin_desc.size_bytes = 0; 2359 2361 return ret; 2362 + } 2360 2363 2361 2364 mutex_lock(&psp->securedisplay_context.mutex); 2362 2365

+2 -1

drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c

··· 407 407 return -EINVAL; 408 408 } 409 409 410 - if (adev->kfd.init_complete && !amdgpu_in_reset(adev)) 410 + if (adev->kfd.init_complete && !amdgpu_in_reset(adev) && 411 + !adev->in_suspend) 411 412 flags |= AMDGPU_XCP_OPS_KFD; 412 413 413 414 if (flags & AMDGPU_XCP_OPS_KFD) {

+5

drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c

··· 3102 3102 return r; 3103 3103 } 3104 3104 3105 + adev->gfx.gfx_supported_reset = 3106 + amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); 3107 + adev->gfx.compute_supported_reset = 3108 + amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); 3109 + 3105 3110 return r; 3106 3111 } 3107 3112

+5

drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c

··· 4399 4399 4400 4400 gfx_v7_0_gpu_early_init(adev); 4401 4401 4402 + adev->gfx.gfx_supported_reset = 4403 + amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); 4404 + adev->gfx.compute_supported_reset = 4405 + amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); 4406 + 4402 4407 return r; 4403 4408 } 4404 4409

+5

drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c

··· 2023 2023 if (r) 2024 2024 return r; 2025 2025 2026 + adev->gfx.gfx_supported_reset = 2027 + amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); 2028 + adev->gfx.compute_supported_reset = 2029 + amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); 2030 + 2026 2031 return 0; 2027 2032 } 2028 2033

+3 -1

drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c

··· 2292 2292 r = amdgpu_xcp_init(adev->xcp_mgr, num_xcp, mode); 2293 2293 2294 2294 } else { 2295 - if (amdgpu_xcp_query_partition_mode(adev->xcp_mgr, 2295 + if (adev->in_suspend) 2296 + amdgpu_xcp_restore_partition_mode(adev->xcp_mgr); 2297 + else if (amdgpu_xcp_query_partition_mode(adev->xcp_mgr, 2296 2298 AMDGPU_XCP_FL_NONE) == 2297 2299 AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE) 2298 2300 r = amdgpu_xcp_switch_partition_mode(

+25 -1

drivers/gpu/drm/amd/amdgpu/psp_v11_0.c

··· 142 142 return err; 143 143 } 144 144 145 + static int psp_v11_wait_for_tos_unload(struct psp_context *psp) 146 + { 147 + struct amdgpu_device *adev = psp->adev; 148 + uint32_t sol_reg1, sol_reg2; 149 + int retry_loop; 150 + 151 + /* Wait for the TOS to be unloaded */ 152 + for (retry_loop = 0; retry_loop < 20; retry_loop++) { 153 + sol_reg1 = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); 154 + usleep_range(1000, 2000); 155 + sol_reg2 = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); 156 + if (sol_reg1 == sol_reg2) 157 + return 0; 158 + } 159 + dev_err(adev->dev, "TOS unload failed, C2PMSG_33: %x C2PMSG_81: %x", 160 + RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_33), 161 + RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81)); 162 + 163 + return -ETIME; 164 + } 165 + 145 166 static int psp_v11_0_wait_for_bootloader(struct psp_context *psp) 146 167 { 147 168 struct amdgpu_device *adev = psp->adev; 148 - 149 169 int ret; 150 170 int retry_loop; 171 + 172 + /* For a reset done at the end of S3, only wait for TOS to be unloaded */ 173 + if (adev->in_s3 && !(adev->flags & AMD_IS_APU) && amdgpu_in_reset(adev)) 174 + return psp_v11_wait_for_tos_unload(psp); 151 175 152 176 for (retry_loop = 0; retry_loop < 20; retry_loop++) { 153 177 /* Wait for bootloader to signify that is

+10 -2

drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c

··· 3563 3563 /* Do mst topology probing after resuming cached state*/ 3564 3564 drm_connector_list_iter_begin(ddev, &iter); 3565 3565 drm_for_each_connector_iter(connector, &iter) { 3566 + bool init = false; 3566 3567 3567 3568 if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) 3568 3569 continue; ··· 3573 3572 aconnector->mst_root) 3574 3573 continue; 3575 3574 3576 - drm_dp_mst_topology_queue_probe(&aconnector->mst_mgr); 3575 + scoped_guard(mutex, &aconnector->mst_mgr.lock) { 3576 + init = !aconnector->mst_mgr.mst_primary; 3577 + } 3578 + if (init) 3579 + dm_helpers_dp_mst_start_top_mgr(aconnector->dc_link->ctx, 3580 + aconnector->dc_link, false); 3581 + else 3582 + drm_dp_mst_topology_queue_probe(&aconnector->mst_mgr); 3577 3583 } 3578 3584 drm_connector_list_iter_end(&iter); 3579 3585 ··· 8038 8030 "mode %dx%d@%dHz is not native, enabling scaling\n", 8039 8031 adjusted_mode->hdisplay, adjusted_mode->vdisplay, 8040 8032 drm_mode_vrefresh(adjusted_mode)); 8041 - dm_new_connector_state->scaling = RMX_FULL; 8033 + dm_new_connector_state->scaling = RMX_ASPECT; 8042 8034 } 8043 8035 return 0; 8044 8036 }

+2 -1

drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c

··· 1302 1302 if (connector->status != connector_status_connected) 1303 1303 return -ENODEV; 1304 1304 1305 - if (pipe_ctx != NULL && pipe_ctx->stream_res.tg->funcs->get_odm_combine_segments) 1305 + if (pipe_ctx && pipe_ctx->stream_res.tg && 1306 + pipe_ctx->stream_res.tg->funcs->get_odm_combine_segments) 1306 1307 pipe_ctx->stream_res.tg->funcs->get_odm_combine_segments(pipe_ctx->stream_res.tg, &segments); 1307 1308 1308 1309 seq_printf(m, "%d\n", segments);

+1

drivers/gpu/drm/amd/display/dc/link/link_detection.c

··· 1141 1141 !sink->edid_caps.edid_hdmi) 1142 1142 sink->sink_signal = SIGNAL_TYPE_DVI_SINGLE_LINK; 1143 1143 else if (dc_is_dvi_signal(sink->sink_signal) && 1144 + dc_is_dvi_signal(link->connector_signal) && 1144 1145 aud_support->hdmi_audio_native && 1145 1146 sink->edid_caps.edid_hdmi) 1146 1147 sink->sink_signal = SIGNAL_TYPE_HDMI_TYPE_A;

-18

drivers/gpu/drm/amd/pm/amdgpu_dpm.c

··· 195 195 return ret; 196 196 } 197 197 198 - int amdgpu_dpm_notify_rlc_state(struct amdgpu_device *adev, bool en) 199 - { 200 - int ret = 0; 201 - const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; 202 - 203 - if (pp_funcs && pp_funcs->notify_rlc_state) { 204 - mutex_lock(&adev->pm.mutex); 205 - 206 - ret = pp_funcs->notify_rlc_state( 207 - adev->powerplay.pp_handle, 208 - en); 209 - 210 - mutex_unlock(&adev->pm.mutex); 211 - } 212 - 213 - return ret; 214 - } 215 - 216 198 int amdgpu_dpm_is_baco_supported(struct amdgpu_device *adev) 217 199 { 218 200 const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;

+2 -2

drivers/gpu/drm/amd/pm/amdgpu_pm.c

··· 4724 4724 ret = devm_device_add_group(adev->dev, 4725 4725 &amdgpu_pm_policy_attr_group); 4726 4726 if (ret) 4727 - goto err_out0; 4727 + goto err_out1; 4728 4728 } 4729 4729 4730 4730 if (amdgpu_dpm_is_temp_metrics_supported(adev, SMU_TEMP_METRIC_GPUBOARD)) { 4731 4731 ret = devm_device_add_group(adev->dev, 4732 4732 &amdgpu_board_attr_group); 4733 4733 if (ret) 4734 - goto err_out0; 4734 + goto err_out1; 4735 4735 if (amdgpu_pm_get_sensor_generic(adev, AMDGPU_PP_SENSOR_MAXNODEPOWERLIMIT, 4736 4736 (void *)&tmp) != -EOPNOTSUPP) { 4737 4737 sysfs_add_file_to_group(&adev->dev->kobj,

-2

drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h

··· 424 424 int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev, 425 425 enum pp_mp1_state mp1_state); 426 426 427 - int amdgpu_dpm_notify_rlc_state(struct amdgpu_device *adev, bool en); 428 - 429 427 int amdgpu_dpm_set_gfx_power_up_by_imu(struct amdgpu_device *adev); 430 428 431 429 int amdgpu_dpm_baco_exit(struct amdgpu_device *adev);

+6

drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c

··· 2040 2040 smu->is_apu && (amdgpu_in_reset(adev) || adev->in_s0ix)) 2041 2041 return 0; 2042 2042 2043 + /* vangogh s0ix */ 2044 + if ((amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(11, 5, 0) || 2045 + amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(11, 5, 2)) && 2046 + adev->in_s0ix) 2047 + return 0; 2048 + 2043 2049 /* 2044 2050 * For gpu reset, runpm and hibernation through BACO, 2045 2051 * BACO feature has to be kept enabled.

+3

drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c

··· 2217 2217 uint32_t total_cu = adev->gfx.config.max_cu_per_sh * 2218 2218 adev->gfx.config.max_sh_per_se * adev->gfx.config.max_shader_engines; 2219 2219 2220 + if (adev->in_s0ix) 2221 + return 0; 2222 + 2220 2223 /* allow message will be sent after enable message on Vangogh*/ 2221 2224 if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_GFXCLK_BIT) && 2222 2225 (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {

+1 -1

drivers/gpu/drm/i915/Makefile

··· 413 413 # 414 414 # Enable locally for CONFIG_DRM_I915_WERROR=y. See also scripts/Makefile.build 415 415 ifdef CONFIG_DRM_I915_WERROR 416 - cmd_checkdoc = PYTHONDONTWRITEBYTECODE=1 $(KERNELDOC) -none -Werror $< 416 + cmd_checkdoc = PYTHONDONTWRITEBYTECODE=1 $(PYTHON3) $(KERNELDOC) -none -Werror $< 417 417 endif 418 418 419 419 # header test

+2 -2

drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c

··· 205 205 206 206 u64 intel_gt_clock_interval_to_ns(const struct intel_gt *gt, u64 count) 207 207 { 208 - return div_u64_roundup(count * NSEC_PER_SEC, gt->clock_frequency); 208 + return mul_u64_u32_div(count, NSEC_PER_SEC, gt->clock_frequency); 209 209 } 210 210 211 211 u64 intel_gt_pm_interval_to_ns(const struct intel_gt *gt, u64 count) ··· 215 215 216 216 u64 intel_gt_ns_to_clock_interval(const struct intel_gt *gt, u64 ns) 217 217 { 218 - return div_u64_roundup(gt->clock_frequency * ns, NSEC_PER_SEC); 218 + return mul_u64_u32_div(ns, gt->clock_frequency, NSEC_PER_SEC); 219 219 } 220 220 221 221 u64 intel_gt_ns_to_pm_interval(const struct intel_gt *gt, u64 ns)

+14 -2

drivers/gpu/drm/i915/i915_vma.c

··· 1595 1595 err_vma_res: 1596 1596 i915_vma_resource_free(vma_res); 1597 1597 err_fence: 1598 - if (work) 1599 - dma_fence_work_commit_imm(&work->base); 1598 + if (work) { 1599 + /* 1600 + * When pinning VMA to GGTT on CHV or BXT with VTD enabled, 1601 + * commit VMA binding asynchronously to avoid risk of lock 1602 + * inversion among reservation_ww locks held here and 1603 + * cpu_hotplug_lock acquired from stop_machine(), which we 1604 + * wrap around GGTT updates when running in those environments. 1605 + */ 1606 + if (i915_vma_is_ggtt(vma) && 1607 + intel_vm_no_concurrent_access_wa(vma->vm->i915)) 1608 + dma_fence_work_commit(&work->base); 1609 + else 1610 + dma_fence_work_commit_imm(&work->base); 1611 + } 1600 1612 err_rpm: 1601 1613 intel_runtime_pm_put(&vma->vm->i915->runtime_pm, wakeref); 1602 1614

+1

drivers/gpu/drm/imagination/Kconfig

··· 7 7 depends on DRM 8 8 depends on MMU 9 9 depends on PM 10 + depends on POWER_SEQUENCING || !POWER_SEQUENCING 10 11 select DRM_EXEC 11 12 select DRM_GEM_SHMEM_HELPER 12 13 select DRM_SCHED

+7

drivers/gpu/drm/mediatek/mtk_crtc.c

··· 283 283 unsigned int i; 284 284 unsigned long flags; 285 285 286 + /* release GCE HW usage and start autosuspend */ 287 + pm_runtime_mark_last_busy(cmdq_cl->chan->mbox->dev); 288 + pm_runtime_put_autosuspend(cmdq_cl->chan->mbox->dev); 289 + 286 290 if (data->sta < 0) 287 291 return; 288 292 ··· 621 617 spin_lock_irqsave(&mtk_crtc->config_lock, flags); 622 618 mtk_crtc->config_updating = false; 623 619 spin_unlock_irqrestore(&mtk_crtc->config_lock, flags); 620 + 621 + if (pm_runtime_resume_and_get(mtk_crtc->cmdq_client.chan->mbox->dev) < 0) 622 + goto update_config_out; 624 623 625 624 mbox_send_message(mtk_crtc->cmdq_client.chan, cmdq_handle); 626 625 mbox_client_txdone(mtk_crtc->cmdq_client.chan, 0);

+1 -23

drivers/gpu/drm/mediatek/mtk_plane.c

··· 21 21 22 22 static const u64 modifiers[] = { 23 23 DRM_FORMAT_MOD_LINEAR, 24 - DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 | 25 - AFBC_FORMAT_MOD_SPLIT | 26 - AFBC_FORMAT_MOD_SPARSE), 27 24 DRM_FORMAT_MOD_INVALID, 28 25 }; 29 26 ··· 68 71 uint32_t format, 69 72 uint64_t modifier) 70 73 { 71 - if (modifier == DRM_FORMAT_MOD_LINEAR) 72 - return true; 73 - 74 - if (modifier != DRM_FORMAT_MOD_ARM_AFBC( 75 - AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 | 76 - AFBC_FORMAT_MOD_SPLIT | 77 - AFBC_FORMAT_MOD_SPARSE)) 78 - return false; 79 - 80 - if (format != DRM_FORMAT_XRGB8888 && 81 - format != DRM_FORMAT_ARGB8888 && 82 - format != DRM_FORMAT_BGRX8888 && 83 - format != DRM_FORMAT_BGRA8888 && 84 - format != DRM_FORMAT_ABGR8888 && 85 - format != DRM_FORMAT_XBGR8888 && 86 - format != DRM_FORMAT_RGB888 && 87 - format != DRM_FORMAT_BGR888) 88 - return false; 89 - 90 - return true; 74 + return modifier == DRM_FORMAT_MOD_LINEAR; 91 75 } 92 76 93 77 static void mtk_plane_destroy_state(struct drm_plane *plane,

+3 -1

drivers/gpu/drm/nouveau/dispnv50/disp.c

··· 2867 2867 } 2868 2868 2869 2869 /* Assign the correct format modifiers */ 2870 - if (disp->disp->object.oclass >= TU102_DISP) 2870 + if (disp->disp->object.oclass >= GB202_DISP) 2871 + nouveau_display(dev)->format_modifiers = wndwca7e_modifiers; 2872 + else if (disp->disp->object.oclass >= TU102_DISP) 2871 2873 nouveau_display(dev)->format_modifiers = wndwc57e_modifiers; 2872 2874 else 2873 2875 if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_FERMI)

+1

drivers/gpu/drm/nouveau/dispnv50/disp.h

··· 104 104 extern const u64 disp50xx_modifiers[]; 105 105 extern const u64 disp90xx_modifiers[]; 106 106 extern const u64 wndwc57e_modifiers[]; 107 + extern const u64 wndwca7e_modifiers[]; 107 108 #endif

+22 -2

drivers/gpu/drm/nouveau/dispnv50/wndw.c

··· 786 786 } 787 787 788 788 /* This function assumes the format has already been validated against the plane 789 - * and the modifier was validated against the device-wides modifier list at FB 789 + * and the modifier was validated against the device-wide modifier list at FB 790 790 * creation time. 791 791 */ 792 792 static bool nv50_plane_format_mod_supported(struct drm_plane *plane, 793 793 u32 format, u64 modifier) 794 794 { 795 795 struct nouveau_drm *drm = nouveau_drm(plane->dev); 796 + const struct drm_format_info *info = drm_format_info(format); 796 797 uint8_t i; 797 798 798 799 /* All chipsets can display all formats in linear layout */ ··· 801 800 return true; 802 801 803 802 if (drm->client.device.info.chipset < 0xc0) { 804 - const struct drm_format_info *info = drm_format_info(format); 805 803 const uint8_t kind = (modifier >> 12) & 0xff; 806 804 807 805 if (!format) return false; 808 806 809 807 for (i = 0; i < info->num_planes; i++) 810 808 if ((info->cpp[i] != 4) && kind != 0x70) return false; 809 + } else if (drm->client.device.info.chipset >= 0x1b2) { 810 + const uint8_t slayout = ((modifier >> 22) & 0x1) | 811 + ((modifier >> 25) & 0x6); 812 + 813 + if (!format) 814 + return false; 815 + 816 + /* 817 + * Note in practice this implies only formats where cpp is equal 818 + * for each plane, or >= 4 for all planes, are supported. 819 + */ 820 + for (i = 0; i < info->num_planes; i++) { 821 + if (((info->cpp[i] == 2) && slayout != 3) || 822 + ((info->cpp[i] == 1) && slayout != 2) || 823 + ((info->cpp[i] >= 4) && slayout != 1)) 824 + return false; 825 + 826 + /* 24-bit not supported. It has yet another layout */ 827 + WARN_ON(info->cpp[i] == 3); 828 + } 811 829 } 812 830 813 831 return true;

+33

drivers/gpu/drm/nouveau/dispnv50/wndwca7e.c

··· 179 179 return 0; 180 180 } 181 181 182 + /**************************************************************** 183 + * Log2(block height) ----------------------------+ * 184 + * Page Kind ----------------------------------+ | * 185 + * Gob Height/Page Kind Generation ------+ | | * 186 + * Sector layout -------+ | | | * 187 + * Compression ------+ | | | | */ 188 + const u64 wndwca7e_modifiers[] = { /* | | | | | */ 189 + /* 4cpp+ modifiers */ 190 + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 2, 0x06, 0), 191 + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 2, 0x06, 1), 192 + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 2, 0x06, 2), 193 + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 2, 0x06, 3), 194 + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 2, 0x06, 4), 195 + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 1, 2, 0x06, 5), 196 + /* 1cpp/8bpp modifiers */ 197 + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 2, 2, 0x06, 0), 198 + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 2, 2, 0x06, 1), 199 + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 2, 2, 0x06, 2), 200 + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 2, 2, 0x06, 3), 201 + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 2, 2, 0x06, 4), 202 + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 2, 2, 0x06, 5), 203 + /* 2cpp/16bpp modifiers */ 204 + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 3, 2, 0x06, 0), 205 + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 3, 2, 0x06, 1), 206 + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 3, 2, 0x06, 2), 207 + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 3, 2, 0x06, 3), 208 + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 3, 2, 0x06, 4), 209 + DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(0, 3, 2, 0x06, 5), 210 + /* All formats support linear */ 211 + DRM_FORMAT_MOD_LINEAR, 212 + DRM_FORMAT_MOD_INVALID 213 + }; 214 + 182 215 static const struct nv50_wndw_func 183 216 wndwca7e = { 184 217 .acquire = wndwc37e_acquire,

+19 -15

drivers/gpu/drm/scheduler/sched_entity.c

··· 173 173 } 174 174 EXPORT_SYMBOL(drm_sched_entity_error); 175 175 176 + static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f, 177 + struct dma_fence_cb *cb); 178 + 176 179 static void drm_sched_entity_kill_jobs_work(struct work_struct *wrk) 177 180 { 178 181 struct drm_sched_job *job = container_of(wrk, typeof(*job), work); 179 - 180 - drm_sched_fence_scheduled(job->s_fence, NULL); 181 - drm_sched_fence_finished(job->s_fence, -ESRCH); 182 - WARN_ON(job->s_fence->parent); 183 - job->sched->ops->free_job(job); 184 - } 185 - 186 - /* Signal the scheduler finished fence when the entity in question is killed. */ 187 - static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f, 188 - struct dma_fence_cb *cb) 189 - { 190 - struct drm_sched_job *job = container_of(cb, struct drm_sched_job, 191 - finish_cb); 182 + struct dma_fence *f; 192 183 unsigned long index; 193 - 194 - dma_fence_put(f); 195 184 196 185 /* Wait for all dependencies to avoid data corruptions */ 197 186 xa_for_each(&job->dependencies, index, f) { ··· 208 219 209 220 dma_fence_put(f); 210 221 } 222 + 223 + drm_sched_fence_scheduled(job->s_fence, NULL); 224 + drm_sched_fence_finished(job->s_fence, -ESRCH); 225 + WARN_ON(job->s_fence->parent); 226 + job->sched->ops->free_job(job); 227 + } 228 + 229 + /* Signal the scheduler finished fence when the entity in question is killed. */ 230 + static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f, 231 + struct dma_fence_cb *cb) 232 + { 233 + struct drm_sched_job *job = container_of(cb, struct drm_sched_job, 234 + finish_cb); 235 + 236 + dma_fence_put(f); 211 237 212 238 INIT_WORK(&job->work, drm_sched_entity_kill_jobs_work); 213 239 schedule_work(&job->work);

+1

drivers/gpu/drm/tiny/Kconfig

··· 85 85 config DRM_PIXPAPER 86 86 tristate "DRM support for PIXPAPER display panels" 87 87 depends on DRM && SPI 88 + depends on MMU 88 89 select DRM_CLIENT_SELECTION 89 90 select DRM_GEM_SHMEM_HELPER 90 91 select DRM_KMS_HELPER

+7 -7

drivers/gpu/drm/xe/xe_device.c

··· 988 988 989 989 drm_dbg(&xe->drm, "Shutting down device\n"); 990 990 991 - if (xe_driver_flr_disabled(xe)) { 992 - xe_display_pm_shutdown(xe); 991 + xe_display_pm_shutdown(xe); 993 992 994 - xe_irq_suspend(xe); 993 + xe_irq_suspend(xe); 995 994 996 - for_each_gt(gt, xe, id) 997 - xe_gt_shutdown(gt); 995 + for_each_gt(gt, xe, id) 996 + xe_gt_shutdown(gt); 998 997 999 - xe_display_pm_shutdown_late(xe); 1000 - } else { 998 + xe_display_pm_shutdown_late(xe); 999 + 1000 + if (!xe_driver_flr_disabled(xe)) { 1001 1001 /* BOOM! */ 1002 1002 __xe_driver_flr(xe); 1003 1003 }

+2 -1

drivers/gpu/drm/xe/xe_exec.c

··· 165 165 166 166 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { 167 167 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], 168 - &syncs_user[num_syncs], SYNC_PARSE_FLAG_EXEC | 168 + &syncs_user[num_syncs], NULL, 0, 169 + SYNC_PARSE_FLAG_EXEC | 169 170 (xe_vm_in_lr_mode(vm) ? 170 171 SYNC_PARSE_FLAG_LR_MODE : 0)); 171 172 if (err)

+14

drivers/gpu/drm/xe/xe_exec_queue.c

··· 10 10 #include <drm/drm_device.h> 11 11 #include <drm/drm_drv.h> 12 12 #include <drm/drm_file.h> 13 + #include <drm/drm_syncobj.h> 13 14 #include <uapi/drm/xe_drm.h> 14 15 15 16 #include "xe_dep_scheduler.h" ··· 325 324 } 326 325 xe_vm_put(migrate_vm); 327 326 327 + if (!IS_ERR(q)) { 328 + int err = drm_syncobj_create(&q->ufence_syncobj, 329 + DRM_SYNCOBJ_CREATE_SIGNALED, 330 + NULL); 331 + if (err) { 332 + xe_exec_queue_put(q); 333 + return ERR_PTR(err); 334 + } 335 + } 336 + 328 337 return q; 329 338 } 330 339 ALLOW_ERROR_INJECTION(xe_exec_queue_create_bind, ERRNO); ··· 343 332 { 344 333 struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount); 345 334 struct xe_exec_queue *eq, *next; 335 + 336 + if (q->ufence_syncobj) 337 + drm_syncobj_put(q->ufence_syncobj); 346 338 347 339 if (xe_exec_queue_uses_pxp(q)) 348 340 xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q);

+7

drivers/gpu/drm/xe/xe_exec_queue_types.h

··· 15 15 #include "xe_hw_fence_types.h" 16 16 #include "xe_lrc_types.h" 17 17 18 + struct drm_syncobj; 18 19 struct xe_execlist_exec_queue; 19 20 struct xe_gt; 20 21 struct xe_guc_exec_queue; ··· 155 154 /** @pxp.link: link into the list of PXP exec queues */ 156 155 struct list_head link; 157 156 } pxp; 157 + 158 + /** @ufence_syncobj: User fence syncobj */ 159 + struct drm_syncobj *ufence_syncobj; 160 + 161 + /** @ufence_timeline_value: User fence timeline value */ 162 + u64 ufence_timeline_value; 158 163 159 164 /** @ops: submission backend exec queue operations */ 160 165 const struct xe_exec_queue_ops *ops;

+3

drivers/gpu/drm/xe/xe_guc_ct.c

··· 200 200 { 201 201 struct xe_guc_ct *ct = arg; 202 202 203 + #if IS_ENABLED(CONFIG_DRM_XE_DEBUG) 204 + cancel_work_sync(&ct->dead.worker); 205 + #endif 203 206 ct_exit_safe_mode(ct); 204 207 destroy_workqueue(ct->g2h_wq); 205 208 xa_destroy(&ct->fence_lookup);

+30 -15

drivers/gpu/drm/xe/xe_oa.c

··· 10 10 11 11 #include <drm/drm_drv.h> 12 12 #include <drm/drm_managed.h> 13 + #include <drm/drm_syncobj.h> 13 14 #include <uapi/drm/xe_drm.h> 14 15 15 16 #include <generated/xe_wa_oob.h> ··· 1390 1389 return 0; 1391 1390 } 1392 1391 1393 - static int xe_oa_parse_syncs(struct xe_oa *oa, struct xe_oa_open_param *param) 1392 + static int xe_oa_parse_syncs(struct xe_oa *oa, 1393 + struct xe_oa_stream *stream, 1394 + struct xe_oa_open_param *param) 1394 1395 { 1395 1396 int ret, num_syncs, num_ufence = 0; 1396 1397 ··· 1412 1409 1413 1410 for (num_syncs = 0; num_syncs < param->num_syncs; num_syncs++) { 1414 1411 ret = xe_sync_entry_parse(oa->xe, param->xef, &param->syncs[num_syncs], 1415 - &param->syncs_user[num_syncs], 0); 1412 + &param->syncs_user[num_syncs], 1413 + stream->ufence_syncobj, 1414 + ++stream->ufence_timeline_value, 0); 1416 1415 if (ret) 1417 1416 goto err_syncs; 1418 1417 ··· 1544 1539 return -ENODEV; 1545 1540 1546 1541 param.xef = stream->xef; 1547 - err = xe_oa_parse_syncs(stream->oa, &param); 1542 + err = xe_oa_parse_syncs(stream->oa, stream, &param); 1548 1543 if (err) 1549 1544 goto err_config_put; 1550 1545 ··· 1640 1635 if (stream->exec_q) 1641 1636 xe_exec_queue_put(stream->exec_q); 1642 1637 1638 + drm_syncobj_put(stream->ufence_syncobj); 1643 1639 kfree(stream); 1644 1640 } 1645 1641 ··· 1832 1826 struct xe_oa_open_param *param) 1833 1827 { 1834 1828 struct xe_oa_stream *stream; 1829 + struct drm_syncobj *ufence_syncobj; 1835 1830 int stream_fd; 1836 1831 int ret; 1837 1832 ··· 1843 1836 goto exit; 1844 1837 } 1845 1838 1839 + ret = drm_syncobj_create(&ufence_syncobj, DRM_SYNCOBJ_CREATE_SIGNALED, 1840 + NULL); 1841 + if (ret) 1842 + goto exit; 1843 + 1846 1844 stream = kzalloc(sizeof(*stream), GFP_KERNEL); 1847 1845 if (!stream) { 1848 1846 ret = -ENOMEM; 1849 - goto exit; 1847 + goto err_syncobj; 1850 1848 } 1851 - 1849 + stream->ufence_syncobj = ufence_syncobj; 1852 1850 stream->oa = oa; 1853 - ret = xe_oa_stream_init(stream, param); 1851 + 1852 + ret = xe_oa_parse_syncs(oa, stream, param); 1854 1853 if (ret) 1855 1854 goto err_free; 1855 + 1856 + ret = xe_oa_stream_init(stream, param); 1857 + if (ret) { 1858 + while (param->num_syncs--) 1859 + xe_sync_entry_cleanup(&param->syncs[param->num_syncs]); 1860 + kfree(param->syncs); 1861 + goto err_free; 1862 + } 1856 1863 1857 1864 if (!param->disabled) { 1858 1865 ret = xe_oa_enable_locked(stream); ··· 1891 1870 xe_oa_stream_destroy(stream); 1892 1871 err_free: 1893 1872 kfree(stream); 1873 + err_syncobj: 1874 + drm_syncobj_put(ufence_syncobj); 1894 1875 exit: 1895 1876 return ret; 1896 1877 } ··· 2106 2083 goto err_exec_q; 2107 2084 } 2108 2085 2109 - ret = xe_oa_parse_syncs(oa, &param); 2110 - if (ret) 2111 - goto err_exec_q; 2112 - 2113 2086 mutex_lock(&param.hwe->gt->oa.gt_lock); 2114 2087 ret = xe_oa_stream_open_ioctl_locked(oa, &param); 2115 2088 mutex_unlock(&param.hwe->gt->oa.gt_lock); 2116 2089 if (ret < 0) 2117 - goto err_sync_cleanup; 2090 + goto err_exec_q; 2118 2091 2119 2092 return ret; 2120 2093 2121 - err_sync_cleanup: 2122 - while (param.num_syncs--) 2123 - xe_sync_entry_cleanup(&param.syncs[param.num_syncs]); 2124 - kfree(param.syncs); 2125 2094 err_exec_q: 2126 2095 if (param.exec_q) 2127 2096 xe_exec_queue_put(param.exec_q);

+8

drivers/gpu/drm/xe/xe_oa_types.h

··· 15 15 #include "regs/xe_reg_defs.h" 16 16 #include "xe_hw_engine_types.h" 17 17 18 + struct drm_syncobj; 19 + 18 20 #define DEFAULT_XE_OA_BUFFER_SIZE SZ_16M 19 21 20 22 enum xe_oa_report_header { ··· 249 247 250 248 /** @xef: xe_file with which the stream was opened */ 251 249 struct xe_file *xef; 250 + 251 + /** @ufence_syncobj: User fence syncobj */ 252 + struct drm_syncobj *ufence_syncobj; 253 + 254 + /** @ufence_timeline_value: User fence timeline value */ 255 + u64 ufence_timeline_value; 252 256 253 257 /** @last_fence: fence to use in stream destroy when needed */ 254 258 struct dma_fence *last_fence;

+15 -2

drivers/gpu/drm/xe/xe_sync.c

··· 113 113 int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, 114 114 struct xe_sync_entry *sync, 115 115 struct drm_xe_sync __user *sync_user, 116 + struct drm_syncobj *ufence_syncobj, 117 + u64 ufence_timeline_value, 116 118 unsigned int flags) 117 119 { 118 120 struct drm_xe_sync sync_in; ··· 194 192 if (exec) { 195 193 sync->addr = sync_in.addr; 196 194 } else { 195 + sync->ufence_timeline_value = ufence_timeline_value; 197 196 sync->ufence = user_fence_create(xe, sync_in.addr, 198 197 sync_in.timeline_value); 199 198 if (XE_IOCTL_DBG(xe, IS_ERR(sync->ufence))) 200 199 return PTR_ERR(sync->ufence); 200 + sync->ufence_chain_fence = dma_fence_chain_alloc(); 201 + if (!sync->ufence_chain_fence) 202 + return -ENOMEM; 203 + sync->ufence_syncobj = ufence_syncobj; 201 204 } 202 205 203 206 break; ··· 246 239 } else if (sync->ufence) { 247 240 int err; 248 241 249 - dma_fence_get(fence); 242 + drm_syncobj_add_point(sync->ufence_syncobj, 243 + sync->ufence_chain_fence, 244 + fence, sync->ufence_timeline_value); 245 + sync->ufence_chain_fence = NULL; 246 + 247 + fence = drm_syncobj_fence_get(sync->ufence_syncobj); 250 248 user_fence_get(sync->ufence); 251 249 err = dma_fence_add_callback(fence, &sync->ufence->cb, 252 250 user_fence_cb); ··· 271 259 drm_syncobj_put(sync->syncobj); 272 260 dma_fence_put(sync->fence); 273 261 dma_fence_chain_free(sync->chain_fence); 274 - if (sync->ufence) 262 + dma_fence_chain_free(sync->ufence_chain_fence); 263 + if (!IS_ERR_OR_NULL(sync->ufence)) 275 264 user_fence_put(sync->ufence); 276 265 } 277 266

+3

drivers/gpu/drm/xe/xe_sync.h

··· 8 8 9 9 #include "xe_sync_types.h" 10 10 11 + struct drm_syncobj; 11 12 struct xe_device; 12 13 struct xe_exec_queue; 13 14 struct xe_file; ··· 22 21 int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, 23 22 struct xe_sync_entry *sync, 24 23 struct drm_xe_sync __user *sync_user, 24 + struct drm_syncobj *ufence_syncobj, 25 + u64 ufence_timeline_value, 25 26 unsigned int flags); 26 27 int xe_sync_entry_add_deps(struct xe_sync_entry *sync, 27 28 struct xe_sched_job *job);

+3

drivers/gpu/drm/xe/xe_sync_types.h

··· 18 18 struct drm_syncobj *syncobj; 19 19 struct dma_fence *fence; 20 20 struct dma_fence_chain *chain_fence; 21 + struct dma_fence_chain *ufence_chain_fence; 22 + struct drm_syncobj *ufence_syncobj; 21 23 struct xe_user_fence *ufence; 22 24 u64 addr; 23 25 u64 timeline_value; 26 + u64 ufence_timeline_value; 24 27 u32 type; 25 28 u32 flags; 26 29 };

+4

drivers/gpu/drm/xe/xe_vm.c

··· 3606 3606 3607 3607 syncs_user = u64_to_user_ptr(args->syncs); 3608 3608 for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) { 3609 + struct xe_exec_queue *__q = q ?: vm->q[0]; 3610 + 3609 3611 err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs], 3610 3612 &syncs_user[num_syncs], 3613 + __q->ufence_syncobj, 3614 + ++__q->ufence_timeline_value, 3611 3615 (xe_vm_in_lr_mode(vm) ? 3612 3616 SYNC_PARSE_FLAG_LR_MODE : 0) | 3613 3617 (!args->num_binds ?

+23 -27

drivers/i2c/muxes/i2c-mux-pca954x.c

··· 118 118 raw_spinlock_t lock; 119 119 struct regulator *supply; 120 120 121 + struct gpio_desc *reset_gpio; 121 122 struct reset_control *reset_cont; 122 123 }; 123 124 ··· 316 315 return 1 << chan; 317 316 } 318 317 319 - static void pca954x_reset_assert(struct pca954x *data) 320 - { 321 - if (data->reset_cont) 322 - reset_control_assert(data->reset_cont); 323 - } 324 - 325 - static void pca954x_reset_deassert(struct pca954x *data) 326 - { 327 - if (data->reset_cont) 328 - reset_control_deassert(data->reset_cont); 329 - } 330 - 331 - static void pca954x_reset_mux(struct pca954x *data) 332 - { 333 - pca954x_reset_assert(data); 334 - udelay(1); 335 - pca954x_reset_deassert(data); 336 - } 337 - 338 318 static int pca954x_select_chan(struct i2c_mux_core *muxc, u32 chan) 339 319 { 340 320 struct pca954x *data = i2c_mux_priv(muxc); ··· 329 347 ret = pca954x_reg_write(muxc->parent, client, regval); 330 348 data->last_chan = ret < 0 ? 0 : regval; 331 349 } 332 - if (ret == -ETIMEDOUT && data->reset_cont) 333 - pca954x_reset_mux(data); 334 350 335 351 return ret; 336 352 } ··· 338 358 struct pca954x *data = i2c_mux_priv(muxc); 339 359 struct i2c_client *client = data->client; 340 360 s32 idle_state; 341 - int ret = 0; 342 361 343 362 idle_state = READ_ONCE(data->idle_state); 344 363 if (idle_state >= 0) ··· 347 368 if (idle_state == MUX_IDLE_DISCONNECT) { 348 369 /* Deselect active channel */ 349 370 data->last_chan = 0; 350 - ret = pca954x_reg_write(muxc->parent, client, 351 - data->last_chan); 352 - if (ret == -ETIMEDOUT && data->reset_cont) 353 - pca954x_reset_mux(data); 371 + return pca954x_reg_write(muxc->parent, client, 372 + data->last_chan); 354 373 } 355 374 356 375 /* otherwise leave as-is */ ··· 527 550 if (IS_ERR(data->reset_cont)) 528 551 return dev_err_probe(dev, PTR_ERR(data->reset_cont), 529 552 "Failed to get reset\n"); 553 + else if (data->reset_cont) 554 + return 0; 555 + 556 + /* 557 + * fallback to legacy reset-gpios 558 + */ 559 + data->reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH); 560 + if (IS_ERR(data->reset_gpio)) { 561 + return dev_err_probe(dev, PTR_ERR(data->reset_gpio), 562 + "Failed to get reset gpio"); 563 + } 530 564 531 565 return 0; 566 + } 567 + 568 + static void pca954x_reset_deassert(struct pca954x *data) 569 + { 570 + if (data->reset_cont) 571 + reset_control_deassert(data->reset_cont); 572 + else 573 + gpiod_set_value_cansleep(data->reset_gpio, 0); 532 574 } 533 575 534 576 /* ··· 589 593 if (ret) 590 594 goto fail_cleanup; 591 595 592 - if (data->reset_cont) { 596 + if (data->reset_cont || data->reset_gpio) { 593 597 udelay(1); 594 598 pca954x_reset_deassert(data); 595 599 /* Give the chip some time to recover. */

+7 -4

drivers/infiniband/hw/mlx5/cq.c

··· 1020 1020 if (cq->create_flags & IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN) 1021 1021 MLX5_SET(cqc, cqc, oi, 1); 1022 1022 1023 + if (udata) { 1024 + cq->mcq.comp = mlx5_add_cq_to_tasklet; 1025 + cq->mcq.tasklet_ctx.comp = mlx5_ib_cq_comp; 1026 + } else { 1027 + cq->mcq.comp = mlx5_ib_cq_comp; 1028 + } 1029 + 1023 1030 err = mlx5_core_create_cq(dev->mdev, &cq->mcq, cqb, inlen, out, sizeof(out)); 1024 1031 if (err) 1025 1032 goto err_cqb; 1026 1033 1027 1034 mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn); 1028 - if (udata) 1029 - cq->mcq.tasklet_ctx.comp = mlx5_ib_cq_comp; 1030 - else 1031 - cq->mcq.comp = mlx5_ib_cq_comp; 1032 1035 cq->mcq.event = mlx5_ib_cq_event; 1033 1036 1034 1037 INIT_LIST_HEAD(&cq->wc_list);

+3 -9

drivers/iommu/iommufd/io_pagetable.c

··· 707 707 struct iopt_area *area; 708 708 unsigned long unmapped_bytes = 0; 709 709 unsigned int tries = 0; 710 - int rc = -ENOENT; 710 + /* If there are no mapped entries then success */ 711 + int rc = 0; 711 712 712 713 /* 713 714 * The domains_rwsem must be held in read mode any time any area->pages ··· 778 777 779 778 down_write(&iopt->iova_rwsem); 780 779 } 781 - if (unmapped_bytes) 782 - rc = 0; 783 780 784 781 out_unlock_iova: 785 782 up_write(&iopt->iova_rwsem); ··· 814 815 815 816 int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped) 816 817 { 817 - int rc; 818 - 819 - rc = iopt_unmap_iova_range(iopt, 0, ULONG_MAX, unmapped); 820 818 /* If the IOVAs are empty then unmap all succeeds */ 821 - if (rc == -ENOENT) 822 - return 0; 823 - return rc; 819 + return iopt_unmap_iova_range(iopt, 0, ULONG_MAX, unmapped); 824 820 } 825 821 826 822 /* The caller must always free all the nodes in the allowed_iova rb_root. */

+4

drivers/iommu/iommufd/ioas.c

··· 367 367 &unmapped); 368 368 if (rc) 369 369 goto out_put; 370 + if (!unmapped) { 371 + rc = -ENOENT; 372 + goto out_put; 373 + } 370 374 } 371 375 372 376 cmd->length = unmapped;

+2 -3

drivers/iommu/iommufd/iova_bitmap.c

··· 130 130 static unsigned long iova_bitmap_offset_to_index(struct iova_bitmap *bitmap, 131 131 unsigned long iova) 132 132 { 133 - unsigned long pgsize = 1UL << bitmap->mapped.pgshift; 134 - 135 - return iova / (BITS_PER_TYPE(*bitmap->bitmap) * pgsize); 133 + return (iova >> bitmap->mapped.pgshift) / 134 + BITS_PER_TYPE(*bitmap->bitmap); 136 135 } 137 136 138 137 /*

+3 -2

drivers/net/bonding/bond_main.c

··· 2029 2029 /* check for initial state */ 2030 2030 new_slave->link = BOND_LINK_NOCHANGE; 2031 2031 if (bond->params.miimon) { 2032 - if (netif_carrier_ok(slave_dev)) { 2032 + if (netif_running(slave_dev) && netif_carrier_ok(slave_dev)) { 2033 2033 if (bond->params.updelay) { 2034 2034 bond_set_slave_link_state(new_slave, 2035 2035 BOND_LINK_BACK, ··· 2574 2574 bond_for_each_slave_rcu(bond, slave, iter) { 2575 2575 bond_propose_link_state(slave, BOND_LINK_NOCHANGE); 2576 2576 2577 - link_state = netif_carrier_ok(slave->dev); 2577 + link_state = netif_running(slave->dev) && 2578 + netif_carrier_ok(slave->dev); 2578 2579 2579 2580 switch (slave->link) { 2580 2581 case BOND_LINK_UP:

+2

drivers/net/ethernet/freescale/fec_main.c

··· 1836 1836 ndev->stats.rx_packets++; 1837 1837 pkt_len = fec16_to_cpu(bdp->cbd_datlen); 1838 1838 ndev->stats.rx_bytes += pkt_len; 1839 + if (fep->quirks & FEC_QUIRK_HAS_RACC) 1840 + ndev->stats.rx_bytes -= 2; 1839 1841 1840 1842 index = fec_enet_get_bd_index(bdp, &rxq->bd); 1841 1843 page = rxq->rx_skb_info[index].page;

+20 -3

drivers/net/ethernet/mellanox/mlx5/core/cq.c

··· 66 66 tasklet_schedule(&ctx->task); 67 67 } 68 68 69 - static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq, 70 - struct mlx5_eqe *eqe) 69 + void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq, 70 + struct mlx5_eqe *eqe) 71 71 { 72 72 unsigned long flags; 73 73 struct mlx5_eq_tasklet *tasklet_ctx = cq->tasklet_ctx.priv; ··· 95 95 if (schedule_tasklet) 96 96 tasklet_schedule(&tasklet_ctx->task); 97 97 } 98 + EXPORT_SYMBOL(mlx5_add_cq_to_tasklet); 98 99 100 + static void mlx5_core_cq_dummy_cb(struct mlx5_core_cq *cq, struct mlx5_eqe *eqe) 101 + { 102 + mlx5_core_err(cq->eq->core.dev, 103 + "CQ default completion callback, CQ #%u\n", cq->cqn); 104 + } 105 + 106 + #define MLX5_CQ_INIT_CMD_SN cpu_to_be32(2 << 28) 99 107 /* Callers must verify outbox status in case of err */ 100 108 int mlx5_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, 101 109 u32 *in, int inlen, u32 *out, int outlen) ··· 129 121 cq->arm_sn = 0; 130 122 cq->eq = eq; 131 123 cq->uid = MLX5_GET(create_cq_in, in, uid); 124 + 125 + /* Kernel CQs must set the arm_db address prior to calling 126 + * this function, allowing for the proper value to be 127 + * initialized. User CQs are responsible for their own 128 + * initialization since they do not use the arm_db field. 129 + */ 130 + if (cq->arm_db) 131 + *cq->arm_db = MLX5_CQ_INIT_CMD_SN; 132 + 132 133 refcount_set(&cq->refcount, 1); 133 134 init_completion(&cq->free); 134 135 if (!cq->comp) 135 - cq->comp = mlx5_add_cq_to_tasklet; 136 + cq->comp = mlx5_core_cq_dummy_cb; 136 137 /* assuming CQ will be deleted before the EQ */ 137 138 cq->tasklet_ctx.priv = &eq->tasklet_ctx; 138 139 INIT_LIST_HEAD(&cq->tasklet_ctx.list);

+1 -1

drivers/net/ethernet/mellanox/mlx5/core/devlink.c

··· 541 541 max_num_channels = mlx5e_get_max_num_channels(mdev); 542 542 if (val32 > max_num_channels) { 543 543 NL_SET_ERR_MSG_FMT_MOD(extack, 544 - "Requested num_doorbells (%u) exceeds maximum number of channels (%u)", 544 + "Requested num_doorbells (%u) exceeds max number of channels (%u)", 545 545 val32, max_num_channels); 546 546 return -EINVAL; 547 547 }

+2 -1

drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c

··· 804 804 goto err_xfrm; 805 805 } 806 806 807 - if (mlx5_eswitch_block_mode(priv->mdev)) 807 + err = mlx5_eswitch_block_mode(priv->mdev); 808 + if (err) 808 809 goto unblock_ipsec; 809 810 810 811 if (x->props.mode == XFRM_MODE_TUNNEL &&

+28 -5

drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c

··· 595 595 struct mlx5_core_dev *mdev = priv->mdev; 596 596 u8 max_bw_value[IEEE_8021QAZ_MAX_TCS]; 597 597 u8 max_bw_unit[IEEE_8021QAZ_MAX_TCS]; 598 - __u64 upper_limit_mbps = roundup(255 * MLX5E_100MB, MLX5E_1GB); 598 + __u64 upper_limit_mbps; 599 + __u64 upper_limit_gbps; 599 600 int i; 601 + struct { 602 + int scale; 603 + const char *units_str; 604 + } units[] = { 605 + [MLX5_100_MBPS_UNIT] = { 606 + .scale = 100, 607 + .units_str = "Mbps", 608 + }, 609 + [MLX5_GBPS_UNIT] = { 610 + .scale = 1, 611 + .units_str = "Gbps", 612 + }, 613 + }; 600 614 601 615 memset(max_bw_value, 0, sizeof(max_bw_value)); 602 616 memset(max_bw_unit, 0, sizeof(max_bw_unit)); 617 + upper_limit_mbps = 255 * MLX5E_100MB; 618 + upper_limit_gbps = 255 * MLX5E_1GB; 603 619 604 620 for (i = 0; i <= mlx5_max_tc(mdev); i++) { 605 621 if (!maxrate->tc_maxrate[i]) { 606 622 max_bw_unit[i] = MLX5_BW_NO_LIMIT; 607 623 continue; 608 624 } 609 - if (maxrate->tc_maxrate[i] < upper_limit_mbps) { 625 + if (maxrate->tc_maxrate[i] <= upper_limit_mbps) { 610 626 max_bw_value[i] = div_u64(maxrate->tc_maxrate[i], 611 627 MLX5E_100MB); 612 628 max_bw_value[i] = max_bw_value[i] ? max_bw_value[i] : 1; 613 629 max_bw_unit[i] = MLX5_100_MBPS_UNIT; 614 - } else { 630 + } else if (max_bw_value[i] <= upper_limit_gbps) { 615 631 max_bw_value[i] = div_u64(maxrate->tc_maxrate[i], 616 632 MLX5E_1GB); 617 633 max_bw_unit[i] = MLX5_GBPS_UNIT; 634 + } else { 635 + netdev_err(netdev, 636 + "tc_%d maxrate %llu Kbps exceeds limit %llu\n", 637 + i, maxrate->tc_maxrate[i], 638 + upper_limit_gbps); 639 + return -EINVAL; 618 640 } 619 641 } 620 642 621 643 for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { 622 - netdev_dbg(netdev, "%s: tc_%d <=> max_bw %d Gbps\n", 623 - __func__, i, max_bw_value[i]); 644 + netdev_dbg(netdev, "%s: tc_%d <=> max_bw %u %s\n", __func__, i, 645 + max_bw_value[i] * units[max_bw_unit[i]].scale, 646 + units[max_bw_unit[i]].units_str); 624 647 } 625 648 626 649 return mlx5_modify_port_ets_rate_limit(mdev, max_bw_value, max_bw_unit);

-1

drivers/net/ethernet/mellanox/mlx5/core/en_main.c

··· 2219 2219 mcq->set_ci_db = cq->wq_ctrl.db.db; 2220 2220 mcq->arm_db = cq->wq_ctrl.db.db + 1; 2221 2221 *mcq->set_ci_db = 0; 2222 - *mcq->arm_db = 0; 2223 2222 mcq->vector = param->eq_ix; 2224 2223 mcq->comp = mlx5e_completion_event; 2225 2224 mcq->event = mlx5e_cq_error_event;

+7 -8

drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c

··· 421 421 __be64 *pas; 422 422 u32 i; 423 423 424 + conn->cq.mcq.cqe_sz = 64; 425 + conn->cq.mcq.set_ci_db = conn->cq.wq_ctrl.db.db; 426 + conn->cq.mcq.arm_db = conn->cq.wq_ctrl.db.db + 1; 427 + *conn->cq.mcq.set_ci_db = 0; 428 + conn->cq.mcq.vector = 0; 429 + conn->cq.mcq.comp = mlx5_fpga_conn_cq_complete; 430 + 424 431 cq_size = roundup_pow_of_two(cq_size); 425 432 MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(cq_size)); 426 433 ··· 475 468 if (err) 476 469 goto err_cqwq; 477 470 478 - conn->cq.mcq.cqe_sz = 64; 479 - conn->cq.mcq.set_ci_db = conn->cq.wq_ctrl.db.db; 480 - conn->cq.mcq.arm_db = conn->cq.wq_ctrl.db.db + 1; 481 - *conn->cq.mcq.set_ci_db = 0; 482 - *conn->cq.mcq.arm_db = 0; 483 - conn->cq.mcq.vector = 0; 484 - conn->cq.mcq.comp = mlx5_fpga_conn_cq_complete; 485 471 tasklet_setup(&conn->cq.tasklet, mlx5_fpga_conn_cq_tasklet); 486 - 487 472 mlx5_fpga_dbg(fdev, "Created CQ #0x%x\n", conn->cq.mcq.cqn); 488 473 489 474 goto out;

-7

drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c

··· 873 873 return err; 874 874 } 875 875 876 - static void hws_cq_complete(struct mlx5_core_cq *mcq, 877 - struct mlx5_eqe *eqe) 878 - { 879 - pr_err("CQ completion CQ: #%u\n", mcq->cqn); 880 - } 881 - 882 876 static int hws_send_ring_alloc_cq(struct mlx5_core_dev *mdev, 883 877 int numa_node, 884 878 struct mlx5hws_send_engine *queue, ··· 895 901 mcq->cqe_sz = 64; 896 902 mcq->set_ci_db = cq->wq_ctrl.db.db; 897 903 mcq->arm_db = cq->wq_ctrl.db.db + 1; 898 - mcq->comp = hws_cq_complete; 899 904 900 905 for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { 901 906 cqe = mlx5_cqwq_get_wqe(&cq->wq, i);

+7 -21

drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c

··· 1049 1049 return 0; 1050 1050 } 1051 1051 1052 - static void dr_cq_complete(struct mlx5_core_cq *mcq, 1053 - struct mlx5_eqe *eqe) 1054 - { 1055 - pr_err("CQ completion CQ: #%u\n", mcq->cqn); 1056 - } 1057 - 1058 1052 static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, 1059 1053 struct mlx5_uars_page *uar, 1060 1054 size_t ncqe) ··· 1083 1089 cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK; 1084 1090 } 1085 1091 1092 + cq->mcq.cqe_sz = 64; 1093 + cq->mcq.set_ci_db = cq->wq_ctrl.db.db; 1094 + cq->mcq.arm_db = cq->wq_ctrl.db.db + 1; 1095 + *cq->mcq.set_ci_db = 0; 1096 + cq->mcq.vector = 0; 1097 + cq->mdev = mdev; 1098 + 1086 1099 inlen = MLX5_ST_SZ_BYTES(create_cq_in) + 1087 1100 sizeof(u64) * cq->wq_ctrl.buf.npages; 1088 1101 in = kvzalloc(inlen, GFP_KERNEL); ··· 1113 1112 pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas); 1114 1113 mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, pas); 1115 1114 1116 - cq->mcq.comp = dr_cq_complete; 1117 - 1118 1115 err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out)); 1119 1116 kvfree(in); 1120 1117 1121 1118 if (err) 1122 1119 goto err_cqwq; 1123 - 1124 - cq->mcq.cqe_sz = 64; 1125 - cq->mcq.set_ci_db = cq->wq_ctrl.db.db; 1126 - cq->mcq.arm_db = cq->wq_ctrl.db.db + 1; 1127 - *cq->mcq.set_ci_db = 0; 1128 - 1129 - /* set no-zero value, in order to avoid the HW to run db-recovery on 1130 - * CQ that used in polling mode. 1131 - */ 1132 - *cq->mcq.arm_db = cpu_to_be32(2 << 28); 1133 - 1134 - cq->mcq.vector = 0; 1135 - cq->mdev = mdev; 1136 1120 1137 1121 return cq; 1138 1122

+38 -15

drivers/net/ethernet/ti/am65-cpsw-qos.c

··· 276 276 /* The number of wireside clocks contained in the verify 277 277 * timeout counter. The default is 0x1312d0 278 278 * (10ms at 125Mhz in 1G mode). 279 + * The frequency of the clock depends on the link speed 280 + * and the PHY interface. 279 281 */ 280 - val = 125 * HZ_PER_MHZ; /* assuming 125MHz wireside clock */ 282 + switch (port->slave.phy_if) { 283 + case PHY_INTERFACE_MODE_RGMII: 284 + case PHY_INTERFACE_MODE_RGMII_ID: 285 + case PHY_INTERFACE_MODE_RGMII_RXID: 286 + case PHY_INTERFACE_MODE_RGMII_TXID: 287 + if (port->qos.link_speed == SPEED_1000) 288 + val = 125 * HZ_PER_MHZ; /* 125 MHz at 1000Mbps*/ 289 + else if (port->qos.link_speed == SPEED_100) 290 + val = 25 * HZ_PER_MHZ; /* 25 MHz at 100Mbps*/ 291 + else 292 + val = (25 * HZ_PER_MHZ) / 10; /* 2.5 MHz at 10Mbps*/ 293 + break; 281 294 295 + case PHY_INTERFACE_MODE_QSGMII: 296 + case PHY_INTERFACE_MODE_SGMII: 297 + val = 125 * HZ_PER_MHZ; /* 125 MHz */ 298 + break; 299 + 300 + default: 301 + netdev_err(port->ndev, "selected mode does not supported IET\n"); 302 + return -EOPNOTSUPP; 303 + } 282 304 val /= MILLIHZ_PER_HZ; /* count per ms timeout */ 283 305 val *= verify_time_ms; /* count for timeout ms */ 284 306 ··· 317 295 u32 ctrl, status; 318 296 int try; 319 297 320 - try = 20; 298 + try = 3; 299 + 300 + /* Reset the verify state machine by writing 1 301 + * to LINKFAIL 302 + */ 303 + ctrl = readl(port->port_base + AM65_CPSW_PN_REG_IET_CTRL); 304 + ctrl |= AM65_CPSW_PN_IET_MAC_LINKFAIL; 305 + writel(ctrl, port->port_base + AM65_CPSW_PN_REG_IET_CTRL); 306 + 307 + /* Clear MAC_LINKFAIL bit to start Verify. */ 308 + ctrl = readl(port->port_base + AM65_CPSW_PN_REG_IET_CTRL); 309 + ctrl &= ~AM65_CPSW_PN_IET_MAC_LINKFAIL; 310 + writel(ctrl, port->port_base + AM65_CPSW_PN_REG_IET_CTRL); 311 + 321 312 do { 322 - /* Reset the verify state machine by writing 1 323 - * to LINKFAIL 324 - */ 325 - ctrl = readl(port->port_base + AM65_CPSW_PN_REG_IET_CTRL); 326 - ctrl |= AM65_CPSW_PN_IET_MAC_LINKFAIL; 327 - writel(ctrl, port->port_base + AM65_CPSW_PN_REG_IET_CTRL); 328 - 329 - /* Clear MAC_LINKFAIL bit to start Verify. */ 330 - ctrl = readl(port->port_base + AM65_CPSW_PN_REG_IET_CTRL); 331 - ctrl &= ~AM65_CPSW_PN_IET_MAC_LINKFAIL; 332 - writel(ctrl, port->port_base + AM65_CPSW_PN_REG_IET_CTRL); 333 - 334 313 msleep(port->qos.iet.verify_time_ms); 335 314 336 315 status = readl(port->port_base + AM65_CPSW_PN_REG_IET_STATUS); ··· 353 330 netdev_dbg(port->ndev, "MAC Merge verify error\n"); 354 331 return -ENODEV; 355 332 } 356 - } while (try-- > 0); 333 + } while (--try > 0); 357 334 358 335 netdev_dbg(port->ndev, "MAC Merge verify timeout\n"); 359 336 return -ETIMEDOUT;

+4 -1

drivers/net/phy/mdio_bus.c

··· 73 73 return err; 74 74 75 75 err = mdiobus_register_reset(mdiodev); 76 - if (err) 76 + if (err) { 77 + gpiod_put(mdiodev->reset_gpio); 78 + mdiodev->reset_gpio = NULL; 77 79 return err; 80 + } 78 81 79 82 /* Assert the reset signal */ 80 83 mdio_device_reset(mdiodev, 1);

+6 -6

drivers/net/phy/micrel.c

··· 4502 4502 { 4503 4503 struct kszphy_priv *lan8814 = phydev->priv; 4504 4504 4505 - /* Reset the PHY */ 4506 - lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, 4507 - LAN8814_QSGMII_SOFT_RESET, 4508 - LAN8814_QSGMII_SOFT_RESET_BIT, 4509 - LAN8814_QSGMII_SOFT_RESET_BIT); 4510 - 4511 4505 /* Disable ANEG with QSGMII PCS Host side */ 4512 4506 lanphy_modify_page_reg(phydev, LAN8814_PAGE_PORT_REGS, 4513 4507 LAN8814_QSGMII_PCS1G_ANEG_CONFIG, ··· 4599 4605 err == LAN8814_REV_LAN8818; 4600 4606 4601 4607 if (phy_package_init_once(phydev)) { 4608 + /* Reset the PHY */ 4609 + lanphy_modify_page_reg(phydev, LAN8814_PAGE_COMMON_REGS, 4610 + LAN8814_QSGMII_SOFT_RESET, 4611 + LAN8814_QSGMII_SOFT_RESET_BIT, 4612 + LAN8814_QSGMII_SOFT_RESET_BIT); 4613 + 4602 4614 err = lan8814_release_coma_mode(phydev); 4603 4615 if (err) 4604 4616 return err;

+11 -5

drivers/net/virtio_net.c

··· 2631 2631 return; 2632 2632 } 2633 2633 2634 - /* 1. Save the flags early, as the XDP program might overwrite them. 2634 + /* About the flags below: 2635 + * 1. Save the flags early, as the XDP program might overwrite them. 2635 2636 * These flags ensure packets marked as VIRTIO_NET_HDR_F_DATA_VALID 2636 2637 * stay valid after XDP processing. 2637 2638 * 2. XDP doesn't work with partially checksummed packets (refer to 2638 2639 * virtnet_xdp_set()), so packets marked as 2639 2640 * VIRTIO_NET_HDR_F_NEEDS_CSUM get dropped during XDP processing. 2640 2641 */ 2641 - flags = ((struct virtio_net_common_hdr *)buf)->hdr.flags; 2642 2642 2643 - if (vi->mergeable_rx_bufs) 2643 + if (vi->mergeable_rx_bufs) { 2644 + flags = ((struct virtio_net_common_hdr *)buf)->hdr.flags; 2644 2645 skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit, 2645 2646 stats); 2646 - else if (vi->big_packets) 2647 + } else if (vi->big_packets) { 2648 + void *p = page_address((struct page *)buf); 2649 + 2650 + flags = ((struct virtio_net_common_hdr *)p)->hdr.flags; 2647 2651 skb = receive_big(dev, vi, rq, buf, len, stats); 2648 - else 2652 + } else { 2653 + flags = ((struct virtio_net_common_hdr *)buf)->hdr.flags; 2649 2654 skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, stats); 2655 + } 2650 2656 2651 2657 if (unlikely(!skb)) 2652 2658 return;

+3

drivers/net/wireless/ath/ath11k/wmi.c

··· 5967 5967 dma_unmap_single(ar->ab->dev, skb_cb->paddr, msdu->len, DMA_TO_DEVICE); 5968 5968 5969 5969 info = IEEE80211_SKB_CB(msdu); 5970 + memset(&info->status, 0, sizeof(info->status)); 5971 + info->status.rates[0].idx = -1; 5972 + 5970 5973 if ((!(info->flags & IEEE80211_TX_CTL_NO_ACK)) && 5971 5974 !tx_compl_param->status) { 5972 5975 info->flags |= IEEE80211_TX_STAT_ACK;

+1 -6

drivers/net/wireless/intel/iwlwifi/mld/link.c

··· 716 716 iwl_mld_get_chan_load_from_element(struct iwl_mld *mld, 717 717 struct ieee80211_bss_conf *link_conf) 718 718 { 719 - struct ieee80211_vif *vif = link_conf->vif; 720 719 const struct cfg80211_bss_ies *ies; 721 720 const struct element *bss_load_elem = NULL; 722 721 const struct ieee80211_bss_load_elem *bss_load; 723 722 724 723 guard(rcu)(); 725 724 726 - if (ieee80211_vif_link_active(vif, link_conf->link_id)) 727 - ies = rcu_dereference(link_conf->bss->beacon_ies); 728 - else 729 - ies = rcu_dereference(link_conf->bss->ies); 730 - 725 + ies = rcu_dereference(link_conf->bss->beacon_ies); 731 726 if (ies) 732 727 bss_load_elem = cfg80211_find_elem(WLAN_EID_QBSS_LOAD, 733 728 ies->data, ies->len);

+3 -10

drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c

··· 938 938 939 939 u16 iwl_mvm_mac_ctxt_get_beacon_flags(const struct iwl_fw *fw, u8 rate_idx) 940 940 { 941 + u16 flags = iwl_mvm_mac80211_idx_to_hwrate(fw, rate_idx); 941 942 bool is_new_rate = iwl_fw_lookup_cmd_ver(fw, BEACON_TEMPLATE_CMD, 0) > 10; 942 - u16 flags, cck_flag; 943 - 944 - if (is_new_rate) { 945 - flags = iwl_mvm_mac80211_idx_to_hwrate(fw, rate_idx); 946 - cck_flag = IWL_MAC_BEACON_CCK; 947 - } else { 948 - cck_flag = IWL_MAC_BEACON_CCK_V1; 949 - flags = iwl_fw_rate_idx_to_plcp(rate_idx); 950 - } 951 943 952 944 if (rate_idx <= IWL_LAST_CCK_RATE) 953 - flags |= cck_flag; 945 + flags |= is_new_rate ? IWL_MAC_BEACON_CCK 946 + : IWL_MAC_BEACON_CCK_V1; 954 947 955 948 return flags; 956 949 }

+7 -7

drivers/net/wireless/intel/iwlwifi/mvm/time-event.c

··· 463 463 if (!aux_roc_te) /* Not a Aux ROC time event */ 464 464 return -EINVAL; 465 465 466 - iwl_mvm_te_check_trigger(mvm, notif, te_data); 466 + iwl_mvm_te_check_trigger(mvm, notif, aux_roc_te); 467 467 468 468 IWL_DEBUG_TE(mvm, 469 469 "Aux ROC time event notification - UID = 0x%x action %d (error = %d)\n", ··· 475 475 /* End TE, notify mac80211 */ 476 476 ieee80211_remain_on_channel_expired(mvm->hw); 477 477 iwl_mvm_roc_finished(mvm); /* flush aux queue */ 478 - list_del(&te_data->list); /* remove from list */ 479 - te_data->running = false; 480 - te_data->vif = NULL; 481 - te_data->uid = 0; 482 - te_data->id = TE_MAX; 478 + list_del(&aux_roc_te->list); /* remove from list */ 479 + aux_roc_te->running = false; 480 + aux_roc_te->vif = NULL; 481 + aux_roc_te->uid = 0; 482 + aux_roc_te->id = TE_MAX; 483 483 } else if (le32_to_cpu(notif->action) == TE_V2_NOTIF_HOST_EVENT_START) { 484 484 set_bit(IWL_MVM_STATUS_ROC_AUX_RUNNING, &mvm->status); 485 - te_data->running = true; 485 + aux_roc_te->running = true; 486 486 ieee80211_ready_on_channel(mvm->hw); /* Start TE */ 487 487 } else { 488 488 IWL_DEBUG_TE(mvm,

+9 -3

drivers/net/wireless/intel/iwlwifi/mvm/utils.c

··· 159 159 160 160 u8 iwl_mvm_mac80211_idx_to_hwrate(const struct iwl_fw *fw, int rate_idx) 161 161 { 162 - return (rate_idx >= IWL_FIRST_OFDM_RATE ? 163 - rate_idx - IWL_FIRST_OFDM_RATE : 164 - rate_idx); 162 + if (iwl_fw_lookup_cmd_ver(fw, TX_CMD, 0) > 8) 163 + /* In the new rate legacy rates are indexed: 164 + * 0 - 3 for CCK and 0 - 7 for OFDM. 165 + */ 166 + return (rate_idx >= IWL_FIRST_OFDM_RATE ? 167 + rate_idx - IWL_FIRST_OFDM_RATE : 168 + rate_idx); 169 + 170 + return iwl_fw_rate_idx_to_plcp(rate_idx); 165 171 } 166 172 167 173 u8 iwl_mvm_mac80211_ac_to_ucode_ac(enum ieee80211_ac_numbers ac)

+66 -5

drivers/net/wireless/marvell/mwl8k.c

··· 2966 2966 /* 2967 2967 * CMD_SET_BEACON. 2968 2968 */ 2969 + 2970 + static bool mwl8k_beacon_has_ds_params(const u8 *buf, int len) 2971 + { 2972 + const struct ieee80211_mgmt *mgmt = (const void *)buf; 2973 + int ies_len; 2974 + 2975 + if (len <= offsetof(struct ieee80211_mgmt, u.beacon.variable)) 2976 + return false; 2977 + 2978 + ies_len = len - offsetof(struct ieee80211_mgmt, u.beacon.variable); 2979 + 2980 + return cfg80211_find_ie(WLAN_EID_DS_PARAMS, mgmt->u.beacon.variable, 2981 + ies_len) != NULL; 2982 + } 2983 + 2984 + static void mwl8k_beacon_copy_inject_ds_params(struct ieee80211_hw *hw, 2985 + u8 *buf_dst, const u8 *buf_src, 2986 + int src_len) 2987 + { 2988 + const struct ieee80211_mgmt *mgmt = (const void *)buf_src; 2989 + static const u8 before_ds_params[] = { 2990 + WLAN_EID_SSID, 2991 + WLAN_EID_SUPP_RATES, 2992 + }; 2993 + const u8 *ies; 2994 + int hdr_len, left, offs, pos; 2995 + 2996 + ies = mgmt->u.beacon.variable; 2997 + hdr_len = offsetof(struct ieee80211_mgmt, u.beacon.variable); 2998 + 2999 + offs = ieee80211_ie_split(ies, src_len - hdr_len, before_ds_params, 3000 + ARRAY_SIZE(before_ds_params), 0); 3001 + 3002 + pos = hdr_len + offs; 3003 + left = src_len - pos; 3004 + 3005 + memcpy(buf_dst, buf_src, pos); 3006 + 3007 + /* Inject a DSSS Parameter Set after SSID + Supp Rates */ 3008 + buf_dst[pos + 0] = WLAN_EID_DS_PARAMS; 3009 + buf_dst[pos + 1] = 1; 3010 + buf_dst[pos + 2] = hw->conf.chandef.chan->hw_value; 3011 + 3012 + memcpy(buf_dst + pos + 3, buf_src + pos, left); 3013 + } 2969 3014 struct mwl8k_cmd_set_beacon { 2970 3015 struct mwl8k_cmd_pkt_hdr header; 2971 3016 __le16 beacon_len; ··· 3020 2975 static int mwl8k_cmd_set_beacon(struct ieee80211_hw *hw, 3021 2976 struct ieee80211_vif *vif, u8 *beacon, int len) 3022 2977 { 2978 + bool ds_params_present = mwl8k_beacon_has_ds_params(beacon, len); 3023 2979 struct mwl8k_cmd_set_beacon *cmd; 3024 - int rc; 2980 + int rc, final_len = len; 3025 2981 3026 - cmd = kzalloc(sizeof(*cmd) + len, GFP_KERNEL); 2982 + if (!ds_params_present) { 2983 + /* 2984 + * mwl8k firmware requires a DS Params IE with the current 2985 + * channel in AP beacons. If mac80211/hostapd does not 2986 + * include it, inject one here. IE ID + length + channel 2987 + * number = 3 bytes. 2988 + */ 2989 + final_len += 3; 2990 + } 2991 + 2992 + cmd = kzalloc(sizeof(*cmd) + final_len, GFP_KERNEL); 3027 2993 if (cmd == NULL) 3028 2994 return -ENOMEM; 3029 2995 3030 2996 cmd->header.code = cpu_to_le16(MWL8K_CMD_SET_BEACON); 3031 - cmd->header.length = cpu_to_le16(sizeof(*cmd) + len); 3032 - cmd->beacon_len = cpu_to_le16(len); 3033 - memcpy(cmd->beacon, beacon, len); 2997 + cmd->header.length = cpu_to_le16(sizeof(*cmd) + final_len); 2998 + cmd->beacon_len = cpu_to_le16(final_len); 2999 + 3000 + if (ds_params_present) 3001 + memcpy(cmd->beacon, beacon, len); 3002 + else 3003 + mwl8k_beacon_copy_inject_ds_params(hw, cmd->beacon, beacon, 3004 + len); 3034 3005 3035 3006 rc = mwl8k_post_pervif_cmd(hw, vif, &cmd->header); 3036 3007 kfree(cmd);

+10 -4

drivers/net/wireless/virtual/mac80211_hwsim.c

··· 2003 2003 struct ieee80211_sta *sta = control->sta; 2004 2004 struct ieee80211_bss_conf *bss_conf; 2005 2005 2006 + /* This can happen in case of monitor injection */ 2007 + if (!vif) { 2008 + ieee80211_free_txskb(hw, skb); 2009 + return; 2010 + } 2011 + 2006 2012 if (link != IEEE80211_LINK_UNSPECIFIED) { 2007 - bss_conf = rcu_dereference(txi->control.vif->link_conf[link]); 2013 + bss_conf = rcu_dereference(vif->link_conf[link]); 2008 2014 if (sta) 2009 2015 link_sta = rcu_dereference(sta->link[link]); 2010 2016 } else { ··· 2071 2065 return; 2072 2066 } 2073 2067 2074 - if (txi->control.vif) 2075 - hwsim_check_magic(txi->control.vif); 2068 + if (vif) 2069 + hwsim_check_magic(vif); 2076 2070 if (control->sta) 2077 2071 hwsim_check_sta_magic(control->sta); 2078 2072 2079 2073 if (ieee80211_hw_check(hw, SUPPORTS_RC_TABLE)) 2080 - ieee80211_get_tx_rates(txi->control.vif, control->sta, skb, 2074 + ieee80211_get_tx_rates(vif, control->sta, skb, 2081 2075 txi->control.rates, 2082 2076 ARRAY_SIZE(txi->control.rates)); 2083 2077

-1

drivers/rtc/rtc-cpcap.c

··· 268 268 return err; 269 269 270 270 rtc->alarm_irq = platform_get_irq(pdev, 0); 271 - rtc->alarm_enabled = true; 272 271 err = devm_request_threaded_irq(dev, rtc->alarm_irq, NULL, 273 272 cpcap_rtc_alarm_irq, 274 273 IRQF_TRIGGER_NONE | IRQF_ONESHOT,

+1 -1

drivers/rtc/rtc-rx8025.c

··· 316 316 return hour_reg; 317 317 rx8025->is_24 = (hour_reg & RX8035_BIT_HOUR_1224); 318 318 } else { 319 - rx8025->is_24 = (ctrl[1] & RX8025_BIT_CTRL1_1224); 319 + rx8025->is_24 = (ctrl[0] & RX8025_BIT_CTRL1_1224); 320 320 } 321 321 out: 322 322 return err;

-1

drivers/rtc/rtc-tps6586x.c

··· 258 258 259 259 irq_set_status_flags(rtc->irq, IRQ_NOAUTOEN); 260 260 261 - rtc->irq_en = true; 262 261 ret = devm_request_threaded_irq(&pdev->dev, rtc->irq, NULL, 263 262 tps6586x_rtc_irq, 264 263 IRQF_ONESHOT,

+1 -1

drivers/ufs/core/ufs-sysfs.c

··· 1949 1949 return hba->dev_info.hid_sup ? attr->mode : 0; 1950 1950 } 1951 1951 1952 - const struct attribute_group ufs_sysfs_hid_group = { 1952 + static const struct attribute_group ufs_sysfs_hid_group = { 1953 1953 .name = "hid", 1954 1954 .attrs = ufs_sysfs_hid, 1955 1955 .is_visible = ufs_sysfs_hid_is_visible,

-1

drivers/ufs/core/ufs-sysfs.h

··· 14 14 15 15 extern const struct attribute_group ufs_sysfs_unit_descriptor_group; 16 16 extern const struct attribute_group ufs_sysfs_lun_attributes_group; 17 - extern const struct attribute_group ufs_sysfs_hid_group; 18 17 19 18 #endif

+6 -11

drivers/ufs/core/ufshcd.c

··· 5066 5066 * If UFS device isn't active then we will have to issue link startup 5067 5067 * 2 times to make sure the device state move to active. 5068 5068 */ 5069 - if (!ufshcd_is_ufs_dev_active(hba)) 5069 + if (!(hba->quirks & UFSHCD_QUIRK_PERFORM_LINK_STARTUP_ONCE) && 5070 + !ufshcd_is_ufs_dev_active(hba)) 5070 5071 link_startup_again = true; 5071 5072 5072 5073 link_startup: ··· 5132 5131 ufshcd_readl(hba, REG_UIC_ERROR_CODE_PHY_ADAPTER_LAYER); 5133 5132 ret = ufshcd_make_hba_operational(hba); 5134 5133 out: 5135 - if (ret) { 5134 + if (ret) 5136 5135 dev_err(hba->dev, "link startup failed %d\n", ret); 5137 - ufshcd_print_host_state(hba); 5138 - ufshcd_print_pwr_info(hba); 5139 - ufshcd_print_evt_hist(hba); 5140 - } 5141 5136 return ret; 5142 5137 } 5143 5138 ··· 8500 8503 DEVICE_DESC_PARAM_EXT_UFS_FEATURE_SUP) & 8501 8504 UFS_DEV_HID_SUPPORT; 8502 8505 8503 - sysfs_update_group(&hba->dev->kobj, &ufs_sysfs_hid_group); 8504 - 8505 8506 model_index = desc_buf[DEVICE_DESC_PARAM_PRDCT_NAME]; 8506 8507 8507 8508 err = ufshcd_read_string_desc(hba, model_index, ··· 10656 10661 * @mmio_base: base register address 10657 10662 * @irq: Interrupt line of device 10658 10663 * 10659 - * Return: 0 on success, non-zero value on failure. 10664 + * Return: 0 on success; < 0 on failure. 10660 10665 */ 10661 10666 int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) 10662 10667 { ··· 10886 10891 if (err) 10887 10892 goto out_disable; 10888 10893 10889 - async_schedule(ufshcd_async_scan, hba); 10890 10894 ufs_sysfs_add_nodes(hba->dev); 10895 + async_schedule(ufshcd_async_scan, hba); 10891 10896 10892 10897 device_enable_async_suspend(dev); 10893 10898 ufshcd_pm_qos_init(hba); ··· 10897 10902 hba->is_irq_enabled = false; 10898 10903 ufshcd_hba_exit(hba); 10899 10904 out_error: 10900 - return err; 10905 + return err > 0 ? -EIO : err; 10901 10906 } 10902 10907 EXPORT_SYMBOL_GPL(ufshcd_init); 10903 10908

+14 -1

drivers/ufs/host/ufs-qcom.c

··· 740 740 741 741 742 742 /* reset the connected UFS device during power down */ 743 - if (ufs_qcom_is_link_off(hba) && host->device_reset) 743 + if (ufs_qcom_is_link_off(hba) && host->device_reset) { 744 744 ufs_qcom_device_reset_ctrl(hba, true); 745 + /* 746 + * After sending the SSU command, asserting the rst_n 747 + * line causes the device firmware to wake up and 748 + * execute its reset routine. 749 + * 750 + * During this process, the device may draw current 751 + * beyond the permissible limit for low-power mode (LPM). 752 + * A 10ms delay, based on experimental observations, 753 + * allows the UFS device to complete its hardware reset 754 + * before transitioning the power rail to LPM. 755 + */ 756 + usleep_range(10000, 11000); 757 + } 745 758 746 759 return ufs_qcom_ice_suspend(host); 747 760 }

+67 -3

drivers/ufs/host/ufshcd-pci.c

··· 15 15 #include <linux/pci.h> 16 16 #include <linux/pm_runtime.h> 17 17 #include <linux/pm_qos.h> 18 + #include <linux/suspend.h> 18 19 #include <linux/debugfs.h> 19 20 #include <linux/uuid.h> 20 21 #include <linux/acpi.h> ··· 32 31 u32 dsm_fns; 33 32 u32 active_ltr; 34 33 u32 idle_ltr; 34 + int saved_spm_lvl; 35 35 struct dentry *debugfs_root; 36 36 struct gpio_desc *reset_gpio; 37 37 }; ··· 349 347 host = devm_kzalloc(hba->dev, sizeof(*host), GFP_KERNEL); 350 348 if (!host) 351 349 return -ENOMEM; 350 + host->saved_spm_lvl = -1; 352 351 ufshcd_set_variant(hba, host); 353 352 intel_dsm_init(host, hba->dev); 354 353 if (INTEL_DSM_SUPPORTED(host, RESET)) { ··· 428 425 static int ufs_intel_adl_init(struct ufs_hba *hba) 429 426 { 430 427 hba->nop_out_timeout = 200; 431 - hba->quirks |= UFSHCD_QUIRK_BROKEN_AUTO_HIBERN8; 428 + hba->quirks |= UFSHCD_QUIRK_BROKEN_AUTO_HIBERN8 | 429 + UFSHCD_QUIRK_PERFORM_LINK_STARTUP_ONCE; 432 430 hba->caps |= UFSHCD_CAP_WB_EN; 433 431 return ufs_intel_common_init(hba); 434 432 } ··· 542 538 543 539 return ufshcd_system_resume(dev); 544 540 } 541 + 542 + static int ufs_intel_suspend_prepare(struct device *dev) 543 + { 544 + struct ufs_hba *hba = dev_get_drvdata(dev); 545 + struct intel_host *host = ufshcd_get_variant(hba); 546 + int err; 547 + 548 + /* 549 + * Only s2idle (S0ix) retains link state. Force power-off 550 + * (UFS_PM_LVL_5) for any other case. 551 + */ 552 + if (pm_suspend_target_state != PM_SUSPEND_TO_IDLE && hba->spm_lvl < UFS_PM_LVL_5) { 553 + host->saved_spm_lvl = hba->spm_lvl; 554 + hba->spm_lvl = UFS_PM_LVL_5; 555 + } 556 + 557 + err = ufshcd_suspend_prepare(dev); 558 + 559 + if (err < 0 && host->saved_spm_lvl != -1) { 560 + hba->spm_lvl = host->saved_spm_lvl; 561 + host->saved_spm_lvl = -1; 562 + } 563 + 564 + return err; 565 + } 566 + 567 + static void ufs_intel_resume_complete(struct device *dev) 568 + { 569 + struct ufs_hba *hba = dev_get_drvdata(dev); 570 + struct intel_host *host = ufshcd_get_variant(hba); 571 + 572 + ufshcd_resume_complete(dev); 573 + 574 + if (host->saved_spm_lvl != -1) { 575 + hba->spm_lvl = host->saved_spm_lvl; 576 + host->saved_spm_lvl = -1; 577 + } 578 + } 579 + 580 + static int ufshcd_pci_suspend_prepare(struct device *dev) 581 + { 582 + struct ufs_hba *hba = dev_get_drvdata(dev); 583 + 584 + if (!strcmp(hba->vops->name, "intel-pci")) 585 + return ufs_intel_suspend_prepare(dev); 586 + 587 + return ufshcd_suspend_prepare(dev); 588 + } 589 + 590 + static void ufshcd_pci_resume_complete(struct device *dev) 591 + { 592 + struct ufs_hba *hba = dev_get_drvdata(dev); 593 + 594 + if (!strcmp(hba->vops->name, "intel-pci")) { 595 + ufs_intel_resume_complete(dev); 596 + return; 597 + } 598 + 599 + ufshcd_resume_complete(dev); 600 + } 545 601 #endif 546 602 547 603 /** ··· 675 611 .thaw = ufshcd_system_resume, 676 612 .poweroff = ufshcd_system_suspend, 677 613 .restore = ufshcd_pci_restore, 678 - .prepare = ufshcd_suspend_prepare, 679 - .complete = ufshcd_resume_complete, 614 + .prepare = ufshcd_pci_suspend_prepare, 615 + .complete = ufshcd_pci_resume_complete, 680 616 #endif 681 617 }; 682 618

+2 -4

drivers/vdpa/mlx5/net/mlx5_vnet.c

··· 573 573 vcq->mcq.set_ci_db = vcq->db.db; 574 574 vcq->mcq.arm_db = vcq->db.db + 1; 575 575 vcq->mcq.cqe_sz = 64; 576 + vcq->mcq.comp = mlx5_vdpa_cq_comp; 577 + vcq->cqe = num_ent; 576 578 577 579 err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent); 578 580 if (err) ··· 614 612 if (err) 615 613 goto err_vec; 616 614 617 - vcq->mcq.comp = mlx5_vdpa_cq_comp; 618 - vcq->cqe = num_ent; 619 - vcq->mcq.set_ci_db = vcq->db.db; 620 - vcq->mcq.arm_db = vcq->db.db + 1; 621 615 mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index); 622 616 kfree(in); 623 617 return 0;

+3 -1

fs/btrfs/inode.c

··· 177 177 return ret; 178 178 } 179 179 ret = paths_from_inode(inum, ipath); 180 - if (ret < 0) 180 + if (ret < 0) { 181 + btrfs_put_root(local_root); 181 182 goto err; 183 + } 182 184 183 185 /* 184 186 * We deliberately ignore the bit ipath might have been too small to

+2

fs/btrfs/scrub.c

··· 2203 2203 ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, full_stripe_start, 2204 2204 &length, &bioc, NULL, NULL); 2205 2205 if (ret < 0) { 2206 + bio_put(bio); 2206 2207 btrfs_put_bioc(bioc); 2207 2208 btrfs_bio_counter_dec(fs_info); 2208 2209 goto out; ··· 2213 2212 btrfs_put_bioc(bioc); 2214 2213 if (!rbio) { 2215 2214 ret = -ENOMEM; 2215 + bio_put(bio); 2216 2216 btrfs_bio_counter_dec(fs_info); 2217 2217 goto out; 2218 2218 }

+1 -1

fs/btrfs/tree-log.c

··· 7122 7122 * a power failure unless the log was synced as part of an fsync 7123 7123 * against any other unrelated inode. 7124 7124 */ 7125 - if (inode_only != LOG_INODE_EXISTS) 7125 + if (!ctx->logging_new_name && inode_only != LOG_INODE_EXISTS) 7126 7126 inode->last_log_commit = inode->last_sub_trans; 7127 7127 spin_unlock(&inode->lock); 7128 7128

+28 -32

fs/btrfs/zoned.c

··· 1317 1317 if (!btrfs_dev_is_sequential(device, info->physical)) { 1318 1318 up_read(&dev_replace->rwsem); 1319 1319 info->alloc_offset = WP_CONVENTIONAL; 1320 + info->capacity = device->zone_info->zone_size; 1320 1321 return 0; 1321 1322 } 1322 1323 ··· 1523 1522 u64 last_alloc) 1524 1523 { 1525 1524 struct btrfs_fs_info *fs_info = bg->fs_info; 1525 + u64 stripe_nr = 0, stripe_offset = 0; 1526 + u32 stripe_index = 0; 1526 1527 1527 1528 if ((map->type & BTRFS_BLOCK_GROUP_DATA) && !fs_info->stripe_root) { 1528 1529 btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree", ··· 1532 1529 return -EINVAL; 1533 1530 } 1534 1531 1532 + if (last_alloc) { 1533 + u32 factor = map->num_stripes; 1534 + 1535 + stripe_nr = last_alloc >> BTRFS_STRIPE_LEN_SHIFT; 1536 + stripe_offset = last_alloc & BTRFS_STRIPE_LEN_MASK; 1537 + stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index); 1538 + } 1539 + 1535 1540 for (int i = 0; i < map->num_stripes; i++) { 1536 1541 if (zone_info[i].alloc_offset == WP_MISSING_DEV) 1537 1542 continue; 1538 1543 1539 1544 if (zone_info[i].alloc_offset == WP_CONVENTIONAL) { 1540 - u64 stripe_nr, full_stripe_nr; 1541 - u64 stripe_offset; 1542 - int stripe_index; 1543 1545 1544 - stripe_nr = div64_u64(last_alloc, map->stripe_size); 1545 - stripe_offset = stripe_nr * map->stripe_size; 1546 - full_stripe_nr = div_u64(stripe_nr, map->num_stripes); 1547 - div_u64_rem(stripe_nr, map->num_stripes, &stripe_index); 1548 - 1549 - zone_info[i].alloc_offset = 1550 - full_stripe_nr * map->stripe_size; 1546 + zone_info[i].alloc_offset = btrfs_stripe_nr_to_offset(stripe_nr); 1551 1547 1552 1548 if (stripe_index > i) 1553 - zone_info[i].alloc_offset += map->stripe_size; 1549 + zone_info[i].alloc_offset += BTRFS_STRIPE_LEN; 1554 1550 else if (stripe_index == i) 1555 - zone_info[i].alloc_offset += 1556 - (last_alloc - stripe_offset); 1551 + zone_info[i].alloc_offset += stripe_offset; 1557 1552 } 1558 1553 1559 1554 if (test_bit(0, active) != test_bit(i, active)) { ··· 1575 1574 u64 last_alloc) 1576 1575 { 1577 1576 struct btrfs_fs_info *fs_info = bg->fs_info; 1577 + u64 stripe_nr = 0, stripe_offset = 0; 1578 + u32 stripe_index = 0; 1578 1579 1579 1580 if ((map->type & BTRFS_BLOCK_GROUP_DATA) && !fs_info->stripe_root) { 1580 1581 btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree", 1581 1582 btrfs_bg_type_to_raid_name(map->type)); 1582 1583 return -EINVAL; 1584 + } 1585 + 1586 + if (last_alloc) { 1587 + u32 factor = map->num_stripes / map->sub_stripes; 1588 + 1589 + stripe_nr = last_alloc >> BTRFS_STRIPE_LEN_SHIFT; 1590 + stripe_offset = last_alloc & BTRFS_STRIPE_LEN_MASK; 1591 + stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index); 1583 1592 } 1584 1593 1585 1594 for (int i = 0; i < map->num_stripes; i++) { ··· 1605 1594 } 1606 1595 1607 1596 if (zone_info[i].alloc_offset == WP_CONVENTIONAL) { 1608 - u64 stripe_nr, full_stripe_nr; 1609 - u64 stripe_offset; 1610 - int stripe_index; 1611 - 1612 - stripe_nr = div64_u64(last_alloc, map->stripe_size); 1613 - stripe_offset = stripe_nr * map->stripe_size; 1614 - full_stripe_nr = div_u64(stripe_nr, 1615 - map->num_stripes / map->sub_stripes); 1616 - div_u64_rem(stripe_nr, 1617 - (map->num_stripes / map->sub_stripes), 1618 - &stripe_index); 1619 - 1620 - zone_info[i].alloc_offset = 1621 - full_stripe_nr * map->stripe_size; 1597 + zone_info[i].alloc_offset = btrfs_stripe_nr_to_offset(stripe_nr); 1622 1598 1623 1599 if (stripe_index > (i / map->sub_stripes)) 1624 - zone_info[i].alloc_offset += map->stripe_size; 1600 + zone_info[i].alloc_offset += BTRFS_STRIPE_LEN; 1625 1601 else if (stripe_index == (i / map->sub_stripes)) 1626 - zone_info[i].alloc_offset += 1627 - (last_alloc - stripe_offset); 1602 + zone_info[i].alloc_offset += stripe_offset; 1628 1603 } 1629 1604 1630 1605 if ((i % map->sub_stripes) == 0) { ··· 1680 1683 set_bit(BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE, &cache->runtime_flags); 1681 1684 1682 1685 if (num_conventional > 0) { 1683 - /* Zone capacity is always zone size in emulation */ 1684 - cache->zone_capacity = cache->length; 1685 1686 ret = calculate_alloc_pointer(cache, &last_alloc, new); 1686 1687 if (ret) { 1687 1688 btrfs_err(fs_info, ··· 1688 1693 goto out; 1689 1694 } else if (map->num_stripes == num_conventional) { 1690 1695 cache->alloc_offset = last_alloc; 1696 + cache->zone_capacity = cache->length; 1691 1697 set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags); 1692 1698 goto out; 1693 1699 }

+1 -2

fs/crypto/inline_crypt.c

··· 333 333 inode = mapping->host; 334 334 335 335 *inode_ret = inode; 336 - *lblk_num_ret = ((u64)folio->index << (PAGE_SHIFT - inode->i_blkbits)) + 337 - (bh_offset(bh) >> inode->i_blkbits); 336 + *lblk_num_ret = (folio_pos(folio) + bh_offset(bh)) >> inode->i_blkbits; 338 337 return true; 339 338 } 340 339

+7 -4

fs/erofs/decompressor_zstd.c

··· 172 172 dctx.bounce = strm->bounce; 173 173 174 174 do { 175 - dctx.avail_out = out_buf.size - out_buf.pos; 176 175 dctx.inbuf_sz = in_buf.size; 177 176 dctx.inbuf_pos = in_buf.pos; 178 177 err = z_erofs_stream_switch_bufs(&dctx, &out_buf.dst, ··· 187 188 in_buf.pos = dctx.inbuf_pos; 188 189 189 190 zerr = zstd_decompress_stream(stream, &out_buf, &in_buf); 190 - if (zstd_is_error(zerr) || (!zerr && rq->outputsize)) { 191 + dctx.avail_out = out_buf.size - out_buf.pos; 192 + if (zstd_is_error(zerr) || 193 + ((rq->outputsize + dctx.avail_out) && (!zerr || (zerr > 0 && 194 + !(rq->inputsize + in_buf.size - in_buf.pos))))) { 191 195 erofs_err(sb, "failed to decompress in[%u] out[%u]: %s", 192 196 rq->inputsize, rq->outputsize, 193 - zerr ? zstd_get_error_name(zerr) : "unexpected end of stream"); 197 + zstd_is_error(zerr) ? zstd_get_error_name(zerr) : 198 + "unexpected end of stream"); 194 199 err = -EFSCORRUPTED; 195 200 break; 196 201 } 197 - } while (rq->outputsize || out_buf.pos < out_buf.size); 202 + } while (rq->outputsize + dctx.avail_out); 198 203 199 204 if (dctx.kout) 200 205 kunmap_local(dctx.kout);

+49 -19

fs/nfsd/nfs4state.c

··· 1542 1542 release_all_access(stp); 1543 1543 if (stp->st_stateowner) 1544 1544 nfs4_put_stateowner(stp->st_stateowner); 1545 - WARN_ON(!list_empty(&stid->sc_cp_list)); 1545 + if (!list_empty(&stid->sc_cp_list)) 1546 + nfs4_free_cpntf_statelist(stid->sc_client->net, stid); 1546 1547 kmem_cache_free(stateid_slab, stid); 1547 1548 } 1548 1549 ··· 3487 3486 struct nfsd4_slot *slot = resp->cstate.slot; 3488 3487 unsigned int base; 3489 3488 3490 - dprintk("--> %s slot %p\n", __func__, slot); 3489 + /* 3490 + * RFC 5661 Section 2.10.6.1.2: 3491 + * 3492 + * Any time SEQUENCE ... returns an error ... [t]he replier MUST NOT 3493 + * modify the reply cache entry for the slot whenever an error is 3494 + * returned from SEQUENCE ... 3495 + * 3496 + * Because nfsd4_store_cache_entry is called only by 3497 + * nfsd4_sequence_done(), nfsd4_store_cache_entry() is called only 3498 + * when a SEQUENCE operation was part of the COMPOUND. 3499 + * nfs41_check_op_ordering() ensures SEQUENCE is the first op. 3500 + */ 3501 + if (resp->opcnt == 1 && resp->cstate.status != nfs_ok) 3502 + return; 3491 3503 3492 3504 slot->sl_flags |= NFSD4_SLOT_INITIALIZED; 3493 3505 slot->sl_opcnt = resp->opcnt; ··· 4363 4349 return true; 4364 4350 } 4365 4351 4352 + /* 4353 + * Note that the response is constructed here both for the case 4354 + * of a new SEQUENCE request and for a replayed SEQUENCE request. 4355 + * We do not cache SEQUENCE responses as SEQUENCE is idempotent. 4356 + */ 4357 + static void nfsd4_construct_sequence_response(struct nfsd4_session *session, 4358 + struct nfsd4_sequence *seq) 4359 + { 4360 + struct nfs4_client *clp = session->se_client; 4361 + 4362 + seq->maxslots_response = max(session->se_target_maxslots, 4363 + seq->maxslots); 4364 + seq->target_maxslots = session->se_target_maxslots; 4365 + 4366 + switch (clp->cl_cb_state) { 4367 + case NFSD4_CB_DOWN: 4368 + seq->status_flags = SEQ4_STATUS_CB_PATH_DOWN; 4369 + break; 4370 + case NFSD4_CB_FAULT: 4371 + seq->status_flags = SEQ4_STATUS_BACKCHANNEL_FAULT; 4372 + break; 4373 + default: 4374 + seq->status_flags = 0; 4375 + } 4376 + if (!list_empty(&clp->cl_revoked)) 4377 + seq->status_flags |= SEQ4_STATUS_RECALLABLE_STATE_REVOKED; 4378 + if (atomic_read(&clp->cl_admin_revoked)) 4379 + seq->status_flags |= SEQ4_STATUS_ADMIN_STATE_REVOKED; 4380 + } 4381 + 4366 4382 __be32 4367 4383 nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, 4368 4384 union nfsd4_op_u *u) ··· 4442 4398 dprintk("%s: slotid %d\n", __func__, seq->slotid); 4443 4399 4444 4400 trace_nfsd_slot_seqid_sequence(clp, seq, slot); 4401 + 4402 + nfsd4_construct_sequence_response(session, seq); 4403 + 4445 4404 status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags); 4446 4405 if (status == nfserr_replay_cache) { 4447 4406 status = nfserr_seq_misordered; ··· 4542 4495 } 4543 4496 4544 4497 out: 4545 - seq->maxslots = max(session->se_target_maxslots, seq->maxslots); 4546 - seq->target_maxslots = session->se_target_maxslots; 4547 - 4548 - switch (clp->cl_cb_state) { 4549 - case NFSD4_CB_DOWN: 4550 - seq->status_flags = SEQ4_STATUS_CB_PATH_DOWN; 4551 - break; 4552 - case NFSD4_CB_FAULT: 4553 - seq->status_flags = SEQ4_STATUS_BACKCHANNEL_FAULT; 4554 - break; 4555 - default: 4556 - seq->status_flags = 0; 4557 - } 4558 - if (!list_empty(&clp->cl_revoked)) 4559 - seq->status_flags |= SEQ4_STATUS_RECALLABLE_STATE_REVOKED; 4560 - if (atomic_read(&clp->cl_admin_revoked)) 4561 - seq->status_flags |= SEQ4_STATUS_ADMIN_STATE_REVOKED; 4562 4498 trace_nfsd_seq4_status(rqstp, seq); 4563 4499 out_no_session: 4564 4500 if (conn)

+2 -3

fs/nfsd/nfs4xdr.c

··· 5073 5073 return nfserr; 5074 5074 /* Note slotid's are numbered from zero: */ 5075 5075 /* sr_highest_slotid */ 5076 - nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1); 5076 + nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots_response - 1); 5077 5077 if (nfserr != nfs_ok) 5078 5078 return nfserr; 5079 5079 /* sr_target_highest_slotid */ ··· 5925 5925 */ 5926 5926 warn_on_nonidempotent_op(op); 5927 5927 xdr_truncate_encode(xdr, op_status_offset + XDR_UNIT); 5928 - } 5929 - if (so) { 5928 + } else if (so) { 5930 5929 int len = xdr->buf->len - (op_status_offset + XDR_UNIT); 5931 5930 5932 5931 so->so_replay.rp_status = op->status;

+1

fs/nfsd/nfsd.h

+3 -3

fs/nfsd/nfsfh.c

··· 269 269 dentry); 270 270 } 271 271 272 - fhp->fh_dentry = dentry; 273 - fhp->fh_export = exp; 274 - 275 272 switch (fhp->fh_maxsize) { 276 273 case NFS4_FHSIZE: 277 274 if (dentry->d_sb->s_export_op->flags & EXPORT_OP_NOATOMIC_ATTR) ··· 289 292 if (exp->ex_flags & NFSEXP_V4ROOT) 290 293 goto out; 291 294 } 295 + 296 + fhp->fh_dentry = dentry; 297 + fhp->fh_export = exp; 292 298 293 299 return 0; 294 300 out:

+2 -1

fs/nfsd/xdr4.h

··· 574 574 struct nfs4_sessionid sessionid; /* request/response */ 575 575 u32 seqid; /* request/response */ 576 576 u32 slotid; /* request/response */ 577 - u32 maxslots; /* request/response */ 577 + u32 maxslots; /* request */ 578 578 u32 cachethis; /* request */ 579 + u32 maxslots_response; /* response */ 579 580 u32 target_maxslots; /* response */ 580 581 u32 status_flags; /* response */ 581 582 };

+6 -1

fs/nilfs2/segment.c

··· 2768 2768 2769 2769 if (sci->sc_task) { 2770 2770 wake_up(&sci->sc_wait_daemon); 2771 - kthread_stop(sci->sc_task); 2771 + if (kthread_stop(sci->sc_task)) { 2772 + spin_lock(&sci->sc_state_lock); 2773 + sci->sc_task = NULL; 2774 + timer_shutdown_sync(&sci->sc_timer); 2775 + spin_unlock(&sci->sc_state_lock); 2776 + } 2772 2777 } 2773 2778 2774 2779 spin_lock(&sci->sc_state_lock);

+9 -3

fs/proc/generic.c

··· 698 698 } 699 699 } 700 700 701 + static void pde_erase(struct proc_dir_entry *pde, struct proc_dir_entry *parent) 702 + { 703 + rb_erase(&pde->subdir_node, &parent->subdir); 704 + RB_CLEAR_NODE(&pde->subdir_node); 705 + } 706 + 701 707 /* 702 708 * Remove a /proc entry and free it if it's not currently in use. 703 709 */ ··· 726 720 WARN(1, "removing permanent /proc entry '%s'", de->name); 727 721 de = NULL; 728 722 } else { 729 - rb_erase(&de->subdir_node, &parent->subdir); 723 + pde_erase(de, parent); 730 724 if (S_ISDIR(de->mode)) 731 725 parent->nlink--; 732 726 } ··· 770 764 root->parent->name, root->name); 771 765 return -EINVAL; 772 766 } 773 - rb_erase(&root->subdir_node, &parent->subdir); 767 + pde_erase(root, parent); 774 768 775 769 de = root; 776 770 while (1) { ··· 782 776 next->parent->name, next->name); 783 777 return -EINVAL; 784 778 } 785 - rb_erase(&next->subdir_node, &de->subdir); 779 + pde_erase(next, de); 786 780 de = next; 787 781 continue; 788 782 }

+9 -7

fs/smb/client/cached_dir.c

··· 388 388 * lease. Release one here, and the second below. 389 389 */ 390 390 cfid->has_lease = false; 391 - kref_put(&cfid->refcount, smb2_close_cached_fid); 391 + close_cached_dir(cfid); 392 392 } 393 393 spin_unlock(&cfids->cfid_list_lock); 394 394 395 - kref_put(&cfid->refcount, smb2_close_cached_fid); 395 + close_cached_dir(cfid); 396 396 } else { 397 397 *ret_cfid = cfid; 398 398 atomic_inc(&tcon->num_remote_opens); ··· 438 438 439 439 static void 440 440 smb2_close_cached_fid(struct kref *ref) 441 + __releases(&cfid->cfids->cfid_list_lock) 441 442 { 442 443 struct cached_fid *cfid = container_of(ref, struct cached_fid, 443 444 refcount); 444 445 int rc; 445 446 446 - spin_lock(&cfid->cfids->cfid_list_lock); 447 + lockdep_assert_held(&cfid->cfids->cfid_list_lock); 448 + 447 449 if (cfid->on_list) { 448 450 list_del(&cfid->entry); 449 451 cfid->on_list = false; ··· 480 478 spin_lock(&cfid->cfids->cfid_list_lock); 481 479 if (cfid->has_lease) { 482 480 cfid->has_lease = false; 483 - kref_put(&cfid->refcount, smb2_close_cached_fid); 481 + close_cached_dir(cfid); 484 482 } 485 483 spin_unlock(&cfid->cfids->cfid_list_lock); 486 484 close_cached_dir(cfid); ··· 489 487 490 488 void close_cached_dir(struct cached_fid *cfid) 491 489 { 492 - kref_put(&cfid->refcount, smb2_close_cached_fid); 490 + kref_put_lock(&cfid->refcount, smb2_close_cached_fid, &cfid->cfids->cfid_list_lock); 493 491 } 494 492 495 493 /* ··· 598 596 599 597 WARN_ON(cfid->on_list); 600 598 601 - kref_put(&cfid->refcount, smb2_close_cached_fid); 599 + close_cached_dir(cfid); 602 600 cifs_put_tcon(tcon, netfs_trace_tcon_ref_put_cached_close); 603 601 } 604 602 ··· 764 762 * Drop the ref-count from above, either the lease-ref (if there 765 763 * was one) or the extra one acquired. 766 764 */ 767 - kref_put(&cfid->refcount, smb2_close_cached_fid); 765 + close_cached_dir(cfid); 768 766 } 769 767 queue_delayed_work(cfid_put_wq, &cfids->laundromat_work, 770 768 dir_cache_timeout * HZ);

+2

fs/smb/client/smb2inode.c

··· 1294 1294 smb2_to_name = cifs_convert_path_to_utf16(to_name, cifs_sb); 1295 1295 if (smb2_to_name == NULL) { 1296 1296 rc = -ENOMEM; 1297 + if (cfile) 1298 + cifsFileInfo_put(cfile); 1297 1299 goto smb2_rename_path; 1298 1300 } 1299 1301 in_iov.iov_base = smb2_to_name;

+5 -2

fs/smb/client/smb2pdu.c

··· 4054 4054 4055 4055 smb_rsp = (struct smb2_change_notify_rsp *)rsp_iov.iov_base; 4056 4056 4057 - smb2_validate_iov(le16_to_cpu(smb_rsp->OutputBufferOffset), 4058 - le32_to_cpu(smb_rsp->OutputBufferLength), &rsp_iov, 4057 + rc = smb2_validate_iov(le16_to_cpu(smb_rsp->OutputBufferOffset), 4058 + le32_to_cpu(smb_rsp->OutputBufferLength), 4059 + &rsp_iov, 4059 4060 sizeof(struct file_notify_information)); 4061 + if (rc) 4062 + goto cnotify_exit; 4060 4063 4061 4064 *out_data = kmemdup((char *)smb_rsp + le16_to_cpu(smb_rsp->OutputBufferOffset), 4062 4065 le32_to_cpu(smb_rsp->OutputBufferLength), GFP_KERNEL);

+36 -2

fs/smb/server/transport_rdma.c

··· 334 334 break; 335 335 336 336 case SMBDIRECT_SOCKET_CREATED: 337 + sc->status = SMBDIRECT_SOCKET_DISCONNECTED; 338 + break; 339 + 337 340 case SMBDIRECT_SOCKET_CONNECTED: 338 341 sc->status = SMBDIRECT_SOCKET_ERROR; 339 342 break; ··· 1886 1883 static int smb_direct_prepare_negotiation(struct smbdirect_socket *sc) 1887 1884 { 1888 1885 struct smbdirect_recv_io *recvmsg; 1886 + bool recv_posted = false; 1889 1887 int ret; 1890 1888 1891 1889 WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_CREATED); ··· 1903 1899 pr_err("Can't post recv: %d\n", ret); 1904 1900 goto out_err; 1905 1901 } 1902 + recv_posted = true; 1906 1903 1907 1904 ret = smb_direct_accept_client(sc); 1908 1905 if (ret) { ··· 1913 1908 1914 1909 return 0; 1915 1910 out_err: 1916 - put_recvmsg(sc, recvmsg); 1911 + /* 1912 + * If the recv was never posted, return it to the free list. 1913 + * If it was posted, leave it alone so disconnect teardown can 1914 + * drain the QP and complete it (flush) and the completion path 1915 + * will unmap it exactly once. 1916 + */ 1917 + if (!recv_posted) 1918 + put_recvmsg(sc, recvmsg); 1917 1919 return ret; 1918 1920 } 1919 1921 ··· 2618 2606 } 2619 2607 } 2620 2608 2621 - bool ksmbd_rdma_capable_netdev(struct net_device *netdev) 2609 + static bool ksmbd_find_rdma_capable_netdev(struct net_device *netdev) 2622 2610 { 2623 2611 struct smb_direct_device *smb_dev; 2624 2612 int i; ··· 2658 2646 netdev->name, str_true_false(rdma_capable)); 2659 2647 2660 2648 return rdma_capable; 2649 + } 2650 + 2651 + bool ksmbd_rdma_capable_netdev(struct net_device *netdev) 2652 + { 2653 + struct net_device *lower_dev; 2654 + struct list_head *iter; 2655 + 2656 + if (ksmbd_find_rdma_capable_netdev(netdev)) 2657 + return true; 2658 + 2659 + /* check if netdev is bridge or VLAN */ 2660 + if (netif_is_bridge_master(netdev) || 2661 + netdev->priv_flags & IFF_802_1Q_VLAN) 2662 + netdev_for_each_lower_dev(netdev, lower_dev, iter) 2663 + if (ksmbd_find_rdma_capable_netdev(lower_dev)) 2664 + return true; 2665 + 2666 + /* check if netdev is IPoIB safely without layer violation */ 2667 + if (netdev->type == ARPHRD_INFINIBAND) 2668 + return true; 2669 + 2670 + return false; 2661 2671 } 2662 2672 2663 2673 static const struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = {

+4 -1

fs/smb/server/transport_tcp.c

··· 290 290 } 291 291 } 292 292 up_read(&conn_list_lock); 293 - if (ret == -EAGAIN) 293 + if (ret == -EAGAIN) { 294 + /* Per-IP limit hit: release the just-accepted socket. */ 295 + sock_release(client_sk); 294 296 continue; 297 + } 295 298 296 299 skip_max_ip_conns_limit: 297 300 if (server_conf.max_connections &&

+3 -1

fs/xfs/xfs_discard.c

··· 726 726 break; 727 727 } 728 728 729 - if (!tr.queued) 729 + if (!tr.queued) { 730 + kfree(tr.extents); 730 731 break; 732 + } 731 733 732 734 /* 733 735 * We hand the extent list to the discard function here so the

+69 -13

fs/xfs/xfs_iomap.c

··· 1091 1091 }; 1092 1092 #endif /* CONFIG_XFS_RT */ 1093 1093 1094 + #ifdef DEBUG 1095 + static void 1096 + xfs_check_atomic_cow_conversion( 1097 + struct xfs_inode *ip, 1098 + xfs_fileoff_t offset_fsb, 1099 + xfs_filblks_t count_fsb, 1100 + const struct xfs_bmbt_irec *cmap) 1101 + { 1102 + struct xfs_iext_cursor icur; 1103 + struct xfs_bmbt_irec cmap2 = { }; 1104 + 1105 + if (xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &cmap2)) 1106 + xfs_trim_extent(&cmap2, offset_fsb, count_fsb); 1107 + 1108 + ASSERT(cmap2.br_startoff == cmap->br_startoff); 1109 + ASSERT(cmap2.br_blockcount == cmap->br_blockcount); 1110 + ASSERT(cmap2.br_startblock == cmap->br_startblock); 1111 + ASSERT(cmap2.br_state == cmap->br_state); 1112 + } 1113 + #else 1114 + # define xfs_check_atomic_cow_conversion(...) ((void)0) 1115 + #endif 1116 + 1094 1117 static int 1095 1118 xfs_atomic_write_cow_iomap_begin( 1096 1119 struct inode *inode, ··· 1125 1102 { 1126 1103 struct xfs_inode *ip = XFS_I(inode); 1127 1104 struct xfs_mount *mp = ip->i_mount; 1128 - const xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); 1129 - xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, length); 1130 - xfs_filblks_t count_fsb = end_fsb - offset_fsb; 1105 + const xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); 1106 + const xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + length); 1107 + const xfs_filblks_t count_fsb = end_fsb - offset_fsb; 1108 + xfs_filblks_t hole_count_fsb; 1131 1109 int nmaps = 1; 1132 1110 xfs_filblks_t resaligned; 1133 1111 struct xfs_bmbt_irec cmap; ··· 1154 1130 return -EAGAIN; 1155 1131 1156 1132 trace_xfs_iomap_atomic_write_cow(ip, offset, length); 1157 - 1133 + retry: 1158 1134 xfs_ilock(ip, XFS_ILOCK_EXCL); 1159 1135 1160 1136 if (!ip->i_cowfp) { ··· 1165 1141 if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &cmap)) 1166 1142 cmap.br_startoff = end_fsb; 1167 1143 if (cmap.br_startoff <= offset_fsb) { 1144 + if (isnullstartblock(cmap.br_startblock)) 1145 + goto convert_delay; 1146 + 1147 + /* 1148 + * cmap could extend outside the write range due to previous 1149 + * speculative preallocations. We must trim cmap to the write 1150 + * range because the cow fork treats written mappings to mean 1151 + * "write in progress". 1152 + */ 1168 1153 xfs_trim_extent(&cmap, offset_fsb, count_fsb); 1169 1154 goto found; 1170 1155 } 1171 1156 1172 - end_fsb = cmap.br_startoff; 1173 - count_fsb = end_fsb - offset_fsb; 1157 + hole_count_fsb = cmap.br_startoff - offset_fsb; 1174 1158 1175 - resaligned = xfs_aligned_fsb_count(offset_fsb, count_fsb, 1159 + resaligned = xfs_aligned_fsb_count(offset_fsb, hole_count_fsb, 1176 1160 xfs_get_cowextsz_hint(ip)); 1177 1161 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1178 1162 ··· 1201 1169 if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &cmap)) 1202 1170 cmap.br_startoff = end_fsb; 1203 1171 if (cmap.br_startoff <= offset_fsb) { 1204 - xfs_trim_extent(&cmap, offset_fsb, count_fsb); 1205 1172 xfs_trans_cancel(tp); 1173 + if (isnullstartblock(cmap.br_startblock)) 1174 + goto convert_delay; 1175 + xfs_trim_extent(&cmap, offset_fsb, count_fsb); 1206 1176 goto found; 1207 1177 } 1208 1178 ··· 1216 1182 * atomic writes to that same range will be aligned (and don't require 1217 1183 * this COW-based method). 1218 1184 */ 1219 - error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, 1185 + error = xfs_bmapi_write(tp, ip, offset_fsb, hole_count_fsb, 1220 1186 XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC | 1221 1187 XFS_BMAPI_EXTSZALIGN, 0, &cmap, &nmaps); 1222 1188 if (error) { ··· 1229 1195 if (error) 1230 1196 goto out_unlock; 1231 1197 1198 + /* 1199 + * cmap could map more blocks than the range we passed into bmapi_write 1200 + * because of EXTSZALIGN or adjacent pre-existing unwritten mappings 1201 + * that were merged. Trim cmap to the original write range so that we 1202 + * don't convert more than we were asked to do for this write. 1203 + */ 1204 + xfs_trim_extent(&cmap, offset_fsb, count_fsb); 1205 + 1232 1206 found: 1233 1207 if (cmap.br_state != XFS_EXT_NORM) { 1234 - error = xfs_reflink_convert_cow_locked(ip, offset_fsb, 1235 - count_fsb); 1208 + error = xfs_reflink_convert_cow_locked(ip, cmap.br_startoff, 1209 + cmap.br_blockcount); 1236 1210 if (error) 1237 1211 goto out_unlock; 1238 1212 cmap.br_state = XFS_EXT_NORM; 1213 + xfs_check_atomic_cow_conversion(ip, offset_fsb, count_fsb, 1214 + &cmap); 1239 1215 } 1240 1216 1241 - length = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount); 1242 - trace_xfs_iomap_found(ip, offset, length - offset, XFS_COW_FORK, &cmap); 1217 + trace_xfs_iomap_found(ip, offset, length, XFS_COW_FORK, &cmap); 1243 1218 seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED); 1244 1219 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1245 1220 return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED, seq); 1246 1221 1222 + convert_delay: 1223 + xfs_iunlock(ip, XFS_ILOCK_EXCL); 1224 + error = xfs_bmapi_convert_delalloc(ip, XFS_COW_FORK, offset, iomap, 1225 + NULL); 1226 + if (error) 1227 + return error; 1228 + 1229 + /* 1230 + * Try the lookup again, because the delalloc conversion might have 1231 + * turned the COW mapping into unwritten, but we need it to be in 1232 + * written state. 1233 + */ 1234 + goto retry; 1247 1235 out_unlock: 1248 1236 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1249 1237 return error;

+4 -2

fs/xfs/xfs_zone_alloc.c

··· 615 615 lockdep_assert_held(&zi->zi_open_zones_lock); 616 616 617 617 list_for_each_entry_reverse(oz, &zi->zi_open_zones, oz_entry) 618 - if (xfs_try_use_zone(zi, file_hint, oz, false)) 618 + if (xfs_try_use_zone(zi, file_hint, oz, XFS_ZONE_ALLOC_OK)) 619 619 return oz; 620 620 621 621 cond_resched_lock(&zi->zi_open_zones_lock); ··· 1249 1249 1250 1250 while ((rtg = xfs_rtgroup_next(mp, rtg))) { 1251 1251 error = xfs_init_zone(&iz, rtg, NULL); 1252 - if (error) 1252 + if (error) { 1253 + xfs_rtgroup_rele(rtg); 1253 1254 goto out_free_zone_info; 1255 + } 1254 1256 } 1255 1257 } 1256 1258

+1 -1

include/drm/Makefile

··· 11 11 quiet_cmd_hdrtest = HDRTEST $(patsubst %.hdrtest,%.h,$@) 12 12 cmd_hdrtest = \ 13 13 $(CC) $(c_flags) -fsyntax-only -x c /dev/null -include $< -include $<; \ 14 - PYTHONDONTWRITEBYTECODE=1 $(KERNELDOC) -none $(if $(CONFIG_WERROR)$(CONFIG_DRM_WERROR),-Werror) $<; \ 14 + PYTHONDONTWRITEBYTECODE=1 $(PYTHON3) $(KERNELDOC) -none $(if $(CONFIG_WERROR)$(CONFIG_DRM_WERROR),-Werror) $<; \ 15 15 touch $@ 16 16 17 17 $(obj)/%.hdrtest: $(src)/%.h FORCE

+8 -3

include/linux/compiler_types.h

··· 250 250 /* 251 251 * GCC does not warn about unused static inline functions for -Wunused-function. 252 252 * Suppress the warning in clang as well by using __maybe_unused, but enable it 253 - * for W=1 build. This will allow clang to find unused functions. Remove the 254 - * __inline_maybe_unused entirely after fixing most of -Wunused-function warnings. 253 + * for W=2 build. This will allow clang to find unused functions. 255 254 */ 256 - #ifdef KBUILD_EXTRA_WARN1 255 + #ifdef KBUILD_EXTRA_WARN2 257 256 #define __inline_maybe_unused 258 257 #else 259 258 #define __inline_maybe_unused __maybe_unused ··· 458 459 # define __nocfi __attribute__((__no_sanitize__("kcfi"))) 459 460 #else 460 461 # define __nocfi 462 + #endif 463 + 464 + #if defined(CONFIG_ARCH_USES_CFI_GENERIC_LLVM_PASS) 465 + # define __nocfi_generic __nocfi 466 + #else 467 + # define __nocfi_generic 461 468 #endif 462 469 463 470 /*

+1 -1

include/linux/dma-mapping.h

··· 90 90 */ 91 91 #define DMA_MAPPING_ERROR (~(dma_addr_t)0) 92 92 93 - #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) 93 + #define DMA_BIT_MASK(n) GENMASK_ULL(n - 1, 0) 94 94 95 95 struct dma_iova_state { 96 96 dma_addr_t addr;

+1 -1

include/linux/ethtool.h

··· 492 492 }; 493 493 494 494 #define ETHTOOL_MAX_LANES 8 495 - /** 495 + /* 496 496 * IEEE 802.3ck/df defines 16 bins for FEC histogram plus one more for 497 497 * the end-of-list marker, total 17 items 498 498 */

+3

include/linux/gfp.h

··· 7 7 #include <linux/mmzone.h> 8 8 #include <linux/topology.h> 9 9 #include <linux/alloc_tag.h> 10 + #include <linux/cleanup.h> 10 11 #include <linux/sched.h> 11 12 12 13 struct vm_area_struct; ··· 463 462 #endif 464 463 /* This should be paired with folio_put() rather than free_contig_range(). */ 465 464 #define folio_alloc_gigantic(...) alloc_hooks(folio_alloc_gigantic_noprof(__VA_ARGS__)) 465 + 466 + DEFINE_FREE(free_page, void *, free_page((unsigned long)_T)) 466 467 467 468 #endif /* __LINUX_GFP_H */

+23 -32

include/linux/huge_mm.h

··· 376 376 int folio_split(struct folio *folio, unsigned int new_order, struct page *page, 377 377 struct list_head *list); 378 378 /* 379 - * try_folio_split - try to split a @folio at @page using non uniform split. 379 + * try_folio_split_to_order - try to split a @folio at @page to @new_order using 380 + * non uniform split. 380 381 * @folio: folio to be split 381 - * @page: split to order-0 at the given page 382 - * @list: store the after-split folios 382 + * @page: split to @new_order at the given page 383 + * @new_order: the target split order 383 384 * 384 - * Try to split a @folio at @page using non uniform split to order-0, if 385 - * non uniform split is not supported, fall back to uniform split. 385 + * Try to split a @folio at @page using non uniform split to @new_order, if 386 + * non uniform split is not supported, fall back to uniform split. After-split 387 + * folios are put back to LRU list. Use min_order_for_split() to get the lower 388 + * bound of @new_order. 386 389 * 387 390 * Return: 0: split is successful, otherwise split failed. 388 391 */ 389 - static inline int try_folio_split(struct folio *folio, struct page *page, 390 - struct list_head *list) 392 + static inline int try_folio_split_to_order(struct folio *folio, 393 + struct page *page, unsigned int new_order) 391 394 { 392 - int ret = min_order_for_split(folio); 393 - 394 - if (ret < 0) 395 - return ret; 396 - 397 - if (!non_uniform_split_supported(folio, 0, false)) 398 - return split_huge_page_to_list_to_order(&folio->page, list, 399 - ret); 400 - return folio_split(folio, ret, page, list); 395 + if (!non_uniform_split_supported(folio, new_order, /* warns= */ false)) 396 + return split_huge_page_to_list_to_order(&folio->page, NULL, 397 + new_order); 398 + return folio_split(folio, new_order, page, NULL); 401 399 } 402 400 static inline int split_huge_page(struct page *page) 403 401 { 404 - struct folio *folio = page_folio(page); 405 - int ret = min_order_for_split(folio); 406 - 407 - if (ret < 0) 408 - return ret; 409 - 410 - /* 411 - * split_huge_page() locks the page before splitting and 412 - * expects the same page that has been split to be locked when 413 - * returned. split_folio(page_folio(page)) cannot be used here 414 - * because it converts the page to folio and passes the head 415 - * page to be split. 416 - */ 417 - return split_huge_page_to_list_to_order(page, NULL, ret); 402 + return split_huge_page_to_list_to_order(page, NULL, 0); 418 403 } 419 404 void deferred_split_folio(struct folio *folio, bool partially_mapped); 420 405 ··· 582 597 return -EINVAL; 583 598 } 584 599 600 + static inline int min_order_for_split(struct folio *folio) 601 + { 602 + VM_WARN_ON_ONCE_FOLIO(1, folio); 603 + return -EINVAL; 604 + } 605 + 585 606 static inline int split_folio_to_list(struct folio *folio, struct list_head *list) 586 607 { 587 608 VM_WARN_ON_ONCE_FOLIO(1, folio); 588 609 return -EINVAL; 589 610 } 590 611 591 - static inline int try_folio_split(struct folio *folio, struct page *page, 592 - struct list_head *list) 612 + static inline int try_folio_split_to_order(struct folio *folio, 613 + struct page *page, unsigned int new_order) 593 614 { 594 615 VM_WARN_ON_ONCE_FOLIO(1, folio); 595 616 return -EINVAL;

+1

include/linux/map_benchmark.h

··· 27 27 __u32 dma_dir; /* DMA data direction */ 28 28 __u32 dma_trans_ns; /* time for DMA transmission in ns */ 29 29 __u32 granule; /* how many PAGE_SIZE will do map/unmap once a time */ 30 + __u8 expansion[76]; /* For future use */ 30 31 }; 31 32 #endif /* _KERNEL_DMA_BENCHMARK_H */

+1

include/linux/mlx5/cq.h

··· 183 183 complete(&cq->free); 184 184 } 185 185 186 + void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq, struct mlx5_eqe *eqe); 186 187 int mlx5_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, 187 188 u32 *in, int inlen, u32 *out, int outlen); 188 189 int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,

+5

include/net/bluetooth/hci.h

··· 2783 2783 __u8 data[]; 2784 2784 } __packed; 2785 2785 2786 + #define HCI_EV_LE_PA_SYNC_LOST 0x10 2787 + struct hci_ev_le_pa_sync_lost { 2788 + __le16 handle; 2789 + } __packed; 2790 + 2786 2791 #define LE_PA_DATA_COMPLETE 0x00 2787 2792 #define LE_PA_DATA_MORE_TO_COME 0x01 2788 2793 #define LE_PA_DATA_TRUNCATED 0x02

+1 -1

include/net/libeth/xdp.h

··· 513 513 * can't fail, but can send less frames if there's no enough free descriptors 514 514 * available. The actual free space is returned by @prep from the driver. 515 515 */ 516 - static __always_inline u32 516 + static __always_inline __nocfi_generic u32 517 517 libeth_xdp_tx_xmit_bulk(const struct libeth_xdp_tx_frame *bulk, void *xdpsq, 518 518 u32 n, bool unroll, u64 priv, 519 519 u32 (*prep)(void *xdpsq, struct libeth_xdpsq *sq),

+15 -8

include/uapi/drm/drm_fourcc.h

··· 979 979 * 2 = Gob Height 8, Turing+ Page Kind mapping 980 980 * 3 = Reserved for future use. 981 981 * 982 - * 22:22 s Sector layout. On Tegra GPUs prior to Xavier, there is a further 983 - * bit remapping step that occurs at an even lower level than the 984 - * page kind and block linear swizzles. This causes the layout of 985 - * surfaces mapped in those SOC's GPUs to be incompatible with the 986 - * equivalent mapping on other GPUs in the same system. 982 + * 22:22 s Sector layout. There is a further bit remapping step that occurs 983 + * 26:27 at an even lower level than the page kind and block linear 984 + * swizzles. This causes the bit arrangement of surfaces in memory 985 + * to differ subtly, and prevents direct sharing of surfaces between 986 + * GPUs with different layouts. 987 987 * 988 - * 0 = Tegra K1 - Tegra Parker/TX2 Layout. 989 - * 1 = Desktop GPU and Tegra Xavier+ Layout 988 + * 0 = Tegra K1 - Tegra Parker/TX2 Layout 989 + * 1 = Pre-GB20x, GB20x 32+ bpp, GB10, Tegra Xavier-Orin Layout 990 + * 2 = GB20x(Blackwell 2)+ 8 bpp surface layout 991 + * 3 = GB20x(Blackwell 2)+ 16 bpp surface layout 992 + * 4 = Reserved for future use. 993 + * 5 = Reserved for future use. 994 + * 6 = Reserved for future use. 995 + * 7 = Reserved for future use. 990 996 * 991 997 * 25:23 c Lossless Framebuffer Compression type. 992 998 * ··· 1007 1001 * 6 = Reserved for future use 1008 1002 * 7 = Reserved for future use 1009 1003 * 1010 - * 55:25 - Reserved for future use. Must be zero. 1004 + * 55:28 - Reserved for future use. Must be zero. 1011 1005 */ 1012 1006 #define DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D(c, s, g, k, h) \ 1013 1007 fourcc_mod_code(NVIDIA, (0x10 | \ ··· 1015 1009 (((k) & 0xff) << 12) | \ 1016 1010 (((g) & 0x3) << 20) | \ 1017 1011 (((s) & 0x1) << 22) | \ 1012 + (((s) & 0x6) << 25) | \ 1018 1013 (((c) & 0x7) << 23))) 1019 1014 1020 1015 /* To grandfather in prior block linear format modifiers to the above layout,

-12

include/uapi/linux/io_uring.h

··· 689 689 /* query various aspects of io_uring, see linux/io_uring/query.h */ 690 690 IORING_REGISTER_QUERY = 35, 691 691 692 - /* return zcrx buffers back into circulation */ 693 - IORING_REGISTER_ZCRX_REFILL = 36, 694 - 695 692 /* this goes last */ 696 693 IORING_REGISTER_LAST, 697 694 ··· 1068 1071 __u32 zcrx_id; 1069 1072 __u32 __resv2; 1070 1073 __u64 __resv[3]; 1071 - }; 1072 - 1073 - struct io_uring_zcrx_sync_refill { 1074 - __u32 zcrx_id; 1075 - /* the number of entries to return */ 1076 - __u32 nr_entries; 1077 - /* pointer to an array of struct io_uring_zcrx_rqe */ 1078 - __u64 rqes; 1079 - __u64 __resv[2]; 1080 1074 }; 1081 1075 1082 1076 #ifdef __cplusplus

+7

include/ufs/ufshcd.h

··· 688 688 * single doorbell mode. 689 689 */ 690 690 UFSHCD_QUIRK_BROKEN_LSDBS_CAP = 1 << 25, 691 + 692 + /* 693 + * This quirk indicates that DME_LINKSTARTUP should not be issued a 2nd 694 + * time (refer link_startup_again) after the 1st time was successful, 695 + * because it causes link startup to become unreliable. 696 + */ 697 + UFSHCD_QUIRK_PERFORM_LINK_STARTUP_ONCE = 1 << 26, 691 698 }; 692 699 693 700 enum ufshcd_caps {

+1 -1

io_uring/memmap.c

··· 135 135 struct io_mapped_region *mr, 136 136 struct io_uring_region_desc *reg) 137 137 { 138 - unsigned long size = mr->nr_pages << PAGE_SHIFT; 138 + unsigned long size = (size_t) mr->nr_pages << PAGE_SHIFT; 139 139 struct page **pages; 140 140 int nr_pages; 141 141

-3

io_uring/register.c

··· 827 827 case IORING_REGISTER_QUERY: 828 828 ret = io_query(ctx, arg, nr_args); 829 829 break; 830 - case IORING_REGISTER_ZCRX_REFILL: 831 - ret = io_zcrx_return_bufs(ctx, arg, nr_args); 832 - break; 833 830 default: 834 831 ret = -EINVAL; 835 832 break;

+9 -2

io_uring/rsrc.c

··· 1403 1403 size_t max_segs = 0; 1404 1404 unsigned i; 1405 1405 1406 - for (i = 0; i < nr_iovs; i++) 1406 + for (i = 0; i < nr_iovs; i++) { 1407 1407 max_segs += (iov[i].iov_len >> shift) + 2; 1408 + if (max_segs > INT_MAX) 1409 + return -EOVERFLOW; 1410 + } 1408 1411 return max_segs; 1409 1412 } 1410 1413 ··· 1513 1510 if (unlikely(ret)) 1514 1511 return ret; 1515 1512 } else { 1516 - nr_segs = io_estimate_bvec_size(iov, nr_iovs, imu); 1513 + int ret = io_estimate_bvec_size(iov, nr_iovs, imu); 1514 + 1515 + if (ret < 0) 1516 + return ret; 1517 + nr_segs = ret; 1517 1518 } 1518 1519 1519 1520 if (sizeof(struct bio_vec) > sizeof(struct iovec)) {

-68

io_uring/zcrx.c

··· 928 928 .uninstall = io_pp_uninstall, 929 929 }; 930 930 931 - #define IO_ZCRX_MAX_SYS_REFILL_BUFS (1 << 16) 932 - #define IO_ZCRX_SYS_REFILL_BATCH 32 933 - 934 - static void io_return_buffers(struct io_zcrx_ifq *ifq, 935 - struct io_uring_zcrx_rqe *rqes, unsigned nr) 936 - { 937 - int i; 938 - 939 - for (i = 0; i < nr; i++) { 940 - struct net_iov *niov; 941 - netmem_ref netmem; 942 - 943 - if (!io_parse_rqe(&rqes[i], ifq, &niov)) 944 - continue; 945 - 946 - scoped_guard(spinlock_bh, &ifq->rq_lock) { 947 - if (!io_zcrx_put_niov_uref(niov)) 948 - continue; 949 - } 950 - 951 - netmem = net_iov_to_netmem(niov); 952 - if (!page_pool_unref_and_test(netmem)) 953 - continue; 954 - io_zcrx_return_niov(niov); 955 - } 956 - } 957 - 958 - int io_zcrx_return_bufs(struct io_ring_ctx *ctx, 959 - void __user *arg, unsigned nr_arg) 960 - { 961 - struct io_uring_zcrx_rqe rqes[IO_ZCRX_SYS_REFILL_BATCH]; 962 - struct io_uring_zcrx_rqe __user *user_rqes; 963 - struct io_uring_zcrx_sync_refill zr; 964 - struct io_zcrx_ifq *ifq; 965 - unsigned nr, i; 966 - 967 - if (nr_arg) 968 - return -EINVAL; 969 - if (copy_from_user(&zr, arg, sizeof(zr))) 970 - return -EFAULT; 971 - if (!zr.nr_entries || zr.nr_entries > IO_ZCRX_MAX_SYS_REFILL_BUFS) 972 - return -EINVAL; 973 - if (!mem_is_zero(&zr.__resv, sizeof(zr.__resv))) 974 - return -EINVAL; 975 - 976 - ifq = xa_load(&ctx->zcrx_ctxs, zr.zcrx_id); 977 - if (!ifq) 978 - return -EINVAL; 979 - nr = zr.nr_entries; 980 - user_rqes = u64_to_user_ptr(zr.rqes); 981 - 982 - for (i = 0; i < nr;) { 983 - unsigned batch = min(nr - i, IO_ZCRX_SYS_REFILL_BATCH); 984 - size_t size = batch * sizeof(rqes[0]); 985 - 986 - if (copy_from_user(rqes, user_rqes + i, size)) 987 - return i ? i : -EFAULT; 988 - io_return_buffers(ifq, rqes, batch); 989 - 990 - i += batch; 991 - 992 - if (fatal_signal_pending(current)) 993 - return i; 994 - cond_resched(); 995 - } 996 - return nr; 997 - } 998 - 999 931 static bool io_zcrx_queue_cqe(struct io_kiocb *req, struct net_iov *niov, 1000 932 struct io_zcrx_ifq *ifq, int off, int len) 1001 933 {

-7

io_uring/zcrx.h

··· 63 63 }; 64 64 65 65 #if defined(CONFIG_IO_URING_ZCRX) 66 - int io_zcrx_return_bufs(struct io_ring_ctx *ctx, 67 - void __user *arg, unsigned nr_arg); 68 66 int io_register_zcrx_ifq(struct io_ring_ctx *ctx, 69 67 struct io_uring_zcrx_ifq_reg __user *arg); 70 68 void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx); ··· 94 96 unsigned int id) 95 97 { 96 98 return NULL; 97 - } 98 - static inline int io_zcrx_return_bufs(struct io_ring_ctx *ctx, 99 - void __user *arg, unsigned nr_arg) 100 - { 101 - return -EOPNOTSUPP; 102 99 } 103 100 #endif 104 101

+9

kernel/Kconfig.kexec

··· 109 109 to keep data or state alive across the kexec. For this to work, 110 110 both source and target kernels need to have this option enabled. 111 111 112 + config KEXEC_HANDOVER_DEBUG 113 + bool "Enable Kexec Handover debug checks" 114 + depends on KEXEC_HANDOVER 115 + help 116 + This option enables extra sanity checks for the Kexec Handover 117 + subsystem. Since, KHO performance is crucial in live update 118 + scenarios and the extra code might be adding overhead it is 119 + only optionally enabled. 120 + 112 121 config CRASH_DUMP 113 122 bool "kernel crash dumps" 114 123 default ARCH_DEFAULT_CRASH_DUMP

+1

kernel/Makefile

··· 83 83 obj-$(CONFIG_KEXEC_FILE) += kexec_file.o 84 84 obj-$(CONFIG_KEXEC_ELF) += kexec_elf.o 85 85 obj-$(CONFIG_KEXEC_HANDOVER) += kexec_handover.o 86 + obj-$(CONFIG_KEXEC_HANDOVER_DEBUG) += kexec_handover_debug.o 86 87 obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o 87 88 obj-$(CONFIG_COMPAT) += compat.o 88 89 obj-$(CONFIG_CGROUPS) += cgroup/

+15 -5

kernel/events/core.c

··· 11773 11773 11774 11774 event = container_of(hrtimer, struct perf_event, hw.hrtimer); 11775 11775 11776 - if (event->state != PERF_EVENT_STATE_ACTIVE) 11776 + if (event->state != PERF_EVENT_STATE_ACTIVE || 11777 + event->hw.state & PERF_HES_STOPPED) 11777 11778 return HRTIMER_NORESTART; 11778 11779 11779 11780 event->pmu->read(event); ··· 11820 11819 struct hw_perf_event *hwc = &event->hw; 11821 11820 11822 11821 /* 11823 - * The throttle can be triggered in the hrtimer handler. 11824 - * The HRTIMER_NORESTART should be used to stop the timer, 11825 - * rather than hrtimer_cancel(). See perf_swevent_hrtimer() 11822 + * Careful: this function can be triggered in the hrtimer handler, 11823 + * for cpu-clock events, so hrtimer_cancel() would cause a 11824 + * deadlock. 11825 + * 11826 + * So use hrtimer_try_to_cancel() to try to stop the hrtimer, 11827 + * and the cpu-clock handler also sets the PERF_HES_STOPPED flag, 11828 + * which guarantees that perf_swevent_hrtimer() will stop the 11829 + * hrtimer once it sees the PERF_HES_STOPPED flag. 11826 11830 */ 11827 11831 if (is_sampling_event(event) && (hwc->interrupts != MAX_INTERRUPTS)) { 11828 11832 ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); 11829 11833 local64_set(&hwc->period_left, ktime_to_ns(remaining)); 11830 11834 11831 - hrtimer_cancel(&hwc->hrtimer); 11835 + hrtimer_try_to_cancel(&hwc->hrtimer); 11832 11836 } 11833 11837 } 11834 11838 ··· 11877 11871 11878 11872 static void cpu_clock_event_start(struct perf_event *event, int flags) 11879 11873 { 11874 + event->hw.state = 0; 11880 11875 local64_set(&event->hw.prev_count, local_clock()); 11881 11876 perf_swevent_start_hrtimer(event); 11882 11877 } 11883 11878 11884 11879 static void cpu_clock_event_stop(struct perf_event *event, int flags) 11885 11880 { 11881 + event->hw.state = PERF_HES_STOPPED; 11886 11882 perf_swevent_cancel_hrtimer(event); 11887 11883 if (flags & PERF_EF_UPDATE) 11888 11884 cpu_clock_event_update(event); ··· 11958 11950 11959 11951 static void task_clock_event_start(struct perf_event *event, int flags) 11960 11952 { 11953 + event->hw.state = 0; 11961 11954 local64_set(&event->hw.prev_count, event->ctx->time); 11962 11955 perf_swevent_start_hrtimer(event); 11963 11956 } 11964 11957 11965 11958 static void task_clock_event_stop(struct perf_event *event, int flags) 11966 11959 { 11960 + event->hw.state = PERF_HES_STOPPED; 11967 11961 perf_swevent_cancel_hrtimer(event); 11968 11962 if (flags & PERF_EF_UPDATE) 11969 11963 task_clock_event_update(event, event->ctx->time);

+6 -6

kernel/futex/core.c

··· 1680 1680 { 1681 1681 struct mm_struct *mm = fph->mm; 1682 1682 1683 - guard(rcu)(); 1683 + guard(preempt)(); 1684 1684 1685 - if (smp_load_acquire(&fph->state) == FR_PERCPU) { 1686 - this_cpu_inc(*mm->futex_ref); 1685 + if (READ_ONCE(fph->state) == FR_PERCPU) { 1686 + __this_cpu_inc(*mm->futex_ref); 1687 1687 return true; 1688 1688 } 1689 1689 ··· 1694 1694 { 1695 1695 struct mm_struct *mm = fph->mm; 1696 1696 1697 - guard(rcu)(); 1697 + guard(preempt)(); 1698 1698 1699 - if (smp_load_acquire(&fph->state) == FR_PERCPU) { 1700 - this_cpu_dec(*mm->futex_ref); 1699 + if (READ_ONCE(fph->state) == FR_PERCPU) { 1700 + __this_cpu_dec(*mm->futex_ref); 1701 1701 return false; 1702 1702 } 1703 1703

+3 -1

kernel/gcov/gcc_4_7.c

··· 18 18 #include <linux/mm.h> 19 19 #include "gcov.h" 20 20 21 - #if (__GNUC__ >= 14) 21 + #if (__GNUC__ >= 15) 22 + #define GCOV_COUNTERS 10 23 + #elif (__GNUC__ >= 14) 22 24 #define GCOV_COUNTERS 9 23 25 #elif (__GNUC__ >= 10) 24 26 #define GCOV_COUNTERS 8

+58 -37

kernel/kexec_handover.c

··· 8 8 9 9 #define pr_fmt(fmt) "KHO: " fmt 10 10 11 + #include <linux/cleanup.h> 11 12 #include <linux/cma.h> 12 13 #include <linux/count_zeros.h> 13 14 #include <linux/debugfs.h> ··· 23 22 24 23 #include <asm/early_ioremap.h> 25 24 25 + #include "kexec_handover_internal.h" 26 26 /* 27 27 * KHO is tightly coupled with mm init and needs access to some of mm 28 28 * internal APIs. ··· 69 67 * Keep track of memory that is to be preserved across KHO. 70 68 * 71 69 * The serializing side uses two levels of xarrays to manage chunks of per-order 72 - * 512 byte bitmaps. For instance if PAGE_SIZE = 4096, the entire 1G order of a 73 - * 1TB system would fit inside a single 512 byte bitmap. For order 0 allocations 74 - * each bitmap will cover 16M of address space. Thus, for 16G of memory at most 75 - * 512K of bitmap memory will be needed for order 0. 70 + * PAGE_SIZE byte bitmaps. For instance if PAGE_SIZE = 4096, the entire 1G order 71 + * of a 8TB system would fit inside a single 4096 byte bitmap. For order 0 72 + * allocations each bitmap will cover 128M of address space. Thus, for 16G of 73 + * memory at most 512K of bitmap memory will be needed for order 0. 76 74 * 77 75 * This approach is fully incremental, as the serialization progresses folios 78 76 * can continue be aggregated to the tracker. The final step, immediately prior ··· 80 78 * successor kernel to parse. 81 79 */ 82 80 83 - #define PRESERVE_BITS (512 * 8) 81 + #define PRESERVE_BITS (PAGE_SIZE * 8) 84 82 85 83 struct kho_mem_phys_bits { 86 84 DECLARE_BITMAP(preserve, PRESERVE_BITS); 87 85 }; 86 + 87 + static_assert(sizeof(struct kho_mem_phys_bits) == PAGE_SIZE); 88 88 89 89 struct kho_mem_phys { 90 90 /* ··· 135 131 .finalized = false, 136 132 }; 137 133 138 - static void *xa_load_or_alloc(struct xarray *xa, unsigned long index, size_t sz) 134 + static void *xa_load_or_alloc(struct xarray *xa, unsigned long index) 139 135 { 140 - void *elm, *res; 136 + void *res = xa_load(xa, index); 141 137 142 - elm = xa_load(xa, index); 143 - if (elm) 144 - return elm; 138 + if (res) 139 + return res; 145 140 146 - elm = kzalloc(sz, GFP_KERNEL); 141 + void *elm __free(free_page) = (void *)get_zeroed_page(GFP_KERNEL); 142 + 147 143 if (!elm) 148 144 return ERR_PTR(-ENOMEM); 149 145 146 + if (WARN_ON(kho_scratch_overlap(virt_to_phys(elm), PAGE_SIZE))) 147 + return ERR_PTR(-EINVAL); 148 + 150 149 res = xa_cmpxchg(xa, index, NULL, elm, GFP_KERNEL); 151 150 if (xa_is_err(res)) 152 - res = ERR_PTR(xa_err(res)); 153 - 154 - if (res) { 155 - kfree(elm); 151 + return ERR_PTR(xa_err(res)); 152 + else if (res) 156 153 return res; 157 - } 158 154 159 - return elm; 155 + return no_free_ptr(elm); 160 156 } 161 157 162 158 static void __kho_unpreserve(struct kho_mem_track *track, unsigned long pfn, ··· 171 167 const unsigned long pfn_high = pfn >> order; 172 168 173 169 physxa = xa_load(&track->orders, order); 174 - if (!physxa) 175 - continue; 170 + if (WARN_ON_ONCE(!physxa)) 171 + return; 176 172 177 173 bits = xa_load(&physxa->phys_bits, pfn_high / PRESERVE_BITS); 178 - if (!bits) 179 - continue; 174 + if (WARN_ON_ONCE(!bits)) 175 + return; 180 176 181 177 clear_bit(pfn_high % PRESERVE_BITS, bits->preserve); 182 178 ··· 220 216 } 221 217 } 222 218 223 - bits = xa_load_or_alloc(&physxa->phys_bits, pfn_high / PRESERVE_BITS, 224 - sizeof(*bits)); 219 + bits = xa_load_or_alloc(&physxa->phys_bits, pfn_high / PRESERVE_BITS); 225 220 if (IS_ERR(bits)) 226 221 return PTR_ERR(bits); 227 222 ··· 348 345 static struct khoser_mem_chunk *new_chunk(struct khoser_mem_chunk *cur_chunk, 349 346 unsigned long order) 350 347 { 351 - struct khoser_mem_chunk *chunk; 348 + struct khoser_mem_chunk *chunk __free(free_page) = NULL; 352 349 353 - chunk = kzalloc(PAGE_SIZE, GFP_KERNEL); 350 + chunk = (void *)get_zeroed_page(GFP_KERNEL); 354 351 if (!chunk) 355 - return NULL; 352 + return ERR_PTR(-ENOMEM); 353 + 354 + if (WARN_ON(kho_scratch_overlap(virt_to_phys(chunk), PAGE_SIZE))) 355 + return ERR_PTR(-EINVAL); 356 + 356 357 chunk->hdr.order = order; 357 358 if (cur_chunk) 358 359 KHOSER_STORE_PTR(cur_chunk->hdr.next, chunk); 359 - return chunk; 360 + return no_free_ptr(chunk); 360 361 } 361 362 362 363 static void kho_mem_ser_free(struct khoser_mem_chunk *first_chunk) ··· 381 374 struct khoser_mem_chunk *chunk = NULL; 382 375 struct kho_mem_phys *physxa; 383 376 unsigned long order; 377 + int err = -ENOMEM; 384 378 385 379 xa_for_each(&ser->track.orders, order, physxa) { 386 380 struct kho_mem_phys_bits *bits; 387 381 unsigned long phys; 388 382 389 383 chunk = new_chunk(chunk, order); 390 - if (!chunk) 384 + if (IS_ERR(chunk)) { 385 + err = PTR_ERR(chunk); 391 386 goto err_free; 387 + } 392 388 393 389 if (!first_chunk) 394 390 first_chunk = chunk; ··· 401 391 402 392 if (chunk->hdr.num_elms == ARRAY_SIZE(chunk->bitmaps)) { 403 393 chunk = new_chunk(chunk, order); 404 - if (!chunk) 394 + if (IS_ERR(chunk)) { 395 + err = PTR_ERR(chunk); 405 396 goto err_free; 397 + } 406 398 } 407 399 408 400 elm = &chunk->bitmaps[chunk->hdr.num_elms]; ··· 421 409 422 410 err_free: 423 411 kho_mem_ser_free(first_chunk); 424 - return -ENOMEM; 412 + return err; 425 413 } 426 414 427 415 static void __init deserialize_bitmap(unsigned int order, ··· 477 465 * area for early allocations that happen before page allocator is 478 466 * initialized. 479 467 */ 480 - static struct kho_scratch *kho_scratch; 481 - static unsigned int kho_scratch_cnt; 468 + struct kho_scratch *kho_scratch; 469 + unsigned int kho_scratch_cnt; 482 470 483 471 /* 484 472 * The scratch areas are scaled by default as percent of memory allocated from ··· 764 752 const unsigned int order = folio_order(folio); 765 753 struct kho_mem_track *track = &kho_out.ser.track; 766 754 755 + if (WARN_ON(kho_scratch_overlap(pfn << PAGE_SHIFT, PAGE_SIZE << order))) 756 + return -EINVAL; 757 + 767 758 return __kho_preserve_order(track, pfn, order); 768 759 } 769 760 EXPORT_SYMBOL_GPL(kho_preserve_folio); ··· 789 774 unsigned long pfn = start_pfn; 790 775 unsigned long failed_pfn = 0; 791 776 int err = 0; 777 + 778 + if (WARN_ON(kho_scratch_overlap(start_pfn << PAGE_SHIFT, 779 + nr_pages << PAGE_SHIFT))) { 780 + return -EINVAL; 781 + } 792 782 793 783 while (pfn < end_pfn) { 794 784 const unsigned int order = ··· 882 862 return NULL; 883 863 } 884 864 885 - static void kho_vmalloc_unpreserve_chunk(struct kho_vmalloc_chunk *chunk) 865 + static void kho_vmalloc_unpreserve_chunk(struct kho_vmalloc_chunk *chunk, 866 + unsigned short order) 886 867 { 887 868 struct kho_mem_track *track = &kho_out.ser.track; 888 869 unsigned long pfn = PHYS_PFN(virt_to_phys(chunk)); 889 870 890 871 __kho_unpreserve(track, pfn, pfn + 1); 891 872 892 - for (int i = 0; chunk->phys[i]; i++) { 873 + for (int i = 0; i < ARRAY_SIZE(chunk->phys) && chunk->phys[i]; i++) { 893 874 pfn = PHYS_PFN(chunk->phys[i]); 894 - __kho_unpreserve(track, pfn, pfn + 1); 875 + __kho_unpreserve(track, pfn, pfn + (1 << order)); 895 876 } 896 877 } 897 878 ··· 903 882 while (chunk) { 904 883 struct kho_vmalloc_chunk *tmp = chunk; 905 884 906 - kho_vmalloc_unpreserve_chunk(chunk); 885 + kho_vmalloc_unpreserve_chunk(chunk, kho_vmalloc->order); 907 886 908 887 chunk = KHOSER_LOAD_PTR(chunk->hdr.next); 909 888 free_page((unsigned long)tmp); ··· 1013 992 while (chunk) { 1014 993 struct page *page; 1015 994 1016 - for (int i = 0; chunk->phys[i]; i++) { 995 + for (int i = 0; i < ARRAY_SIZE(chunk->phys) && chunk->phys[i]; i++) { 1017 996 phys_addr_t phys = chunk->phys[i]; 1018 997 1019 998 if (idx + contig_pages > total_pages)

+25

kernel/kexec_handover_debug.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * kexec_handover_debug.c - kexec handover optional debug functionality 4 + * Copyright (C) 2025 Google LLC, Pasha Tatashin <pasha.tatashin@soleen.com> 5 + */ 6 + 7 + #define pr_fmt(fmt) "KHO: " fmt 8 + 9 + #include "kexec_handover_internal.h" 10 + 11 + bool kho_scratch_overlap(phys_addr_t phys, size_t size) 12 + { 13 + phys_addr_t scratch_start, scratch_end; 14 + unsigned int i; 15 + 16 + for (i = 0; i < kho_scratch_cnt; i++) { 17 + scratch_start = kho_scratch[i].addr; 18 + scratch_end = kho_scratch[i].addr + kho_scratch[i].size; 19 + 20 + if (phys < scratch_end && (phys + size) > scratch_start) 21 + return true; 22 + } 23 + 24 + return false; 25 + }

+20

kernel/kexec_handover_internal.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef LINUX_KEXEC_HANDOVER_INTERNAL_H 3 + #define LINUX_KEXEC_HANDOVER_INTERNAL_H 4 + 5 + #include <linux/kexec_handover.h> 6 + #include <linux/types.h> 7 + 8 + extern struct kho_scratch *kho_scratch; 9 + extern unsigned int kho_scratch_cnt; 10 + 11 + #ifdef CONFIG_KEXEC_HANDOVER_DEBUG 12 + bool kho_scratch_overlap(phys_addr_t phys, size_t size); 13 + #else 14 + static inline bool kho_scratch_overlap(phys_addr_t phys, size_t size) 15 + { 16 + return false; 17 + } 18 + #endif /* CONFIG_KEXEC_HANDOVER_DEBUG */ 19 + 20 + #endif /* LINUX_KEXEC_HANDOVER_INTERNAL_H */

+1 -1

kernel/sched/core.c

··· 9606 9606 9607 9607 guard(rq_lock_irq)(rq); 9608 9608 cfs_rq->runtime_enabled = runtime_enabled; 9609 - cfs_rq->runtime_remaining = 0; 9609 + cfs_rq->runtime_remaining = 1; 9610 9610 9611 9611 if (cfs_rq->throttled) 9612 9612 unthrottle_cfs_rq(cfs_rq);

+6 -9

kernel/sched/fair.c

··· 6024 6024 struct sched_entity *se = cfs_rq->tg->se[cpu_of(rq)]; 6025 6025 6026 6026 /* 6027 - * It's possible we are called with !runtime_remaining due to things 6028 - * like user changed quota setting(see tg_set_cfs_bandwidth()) or async 6029 - * unthrottled us with a positive runtime_remaining but other still 6030 - * running entities consumed those runtime before we reached here. 6027 + * It's possible we are called with runtime_remaining < 0 due to things 6028 + * like async unthrottled us with a positive runtime_remaining but other 6029 + * still running entities consumed those runtime before we reached here. 6031 6030 * 6032 - * Anyway, we can't unthrottle this cfs_rq without any runtime remaining 6033 - * because any enqueue in tg_unthrottle_up() will immediately trigger a 6034 - * throttle, which is not supposed to happen on unthrottle path. 6031 + * We can't unthrottle this cfs_rq without any runtime remaining because 6032 + * any enqueue in tg_unthrottle_up() will immediately trigger a throttle, 6033 + * which is not supposed to happen on unthrottle path. 6035 6034 */ 6036 6035 if (cfs_rq->runtime_enabled && cfs_rq->runtime_remaining <= 0) 6037 6036 return; 6038 - 6039 - se = cfs_rq->tg->se[cpu_of(rq)]; 6040 6037 6041 6038 cfs_rq->throttled = 0; 6042 6039

+4

kernel/trace/ring_buffer.c

··· 7344 7344 goto out; 7345 7345 } 7346 7346 7347 + /* Did the reader catch up with the writer? */ 7348 + if (cpu_buffer->reader_page == cpu_buffer->commit_page) 7349 + goto out; 7350 + 7347 7351 reader = rb_get_reader_page(cpu_buffer); 7348 7352 if (WARN_ON(!reader)) 7349 7353 goto out;

+4 -2

kernel/trace/trace_events_hist.c

··· 3272 3272 var = create_var(hist_data, file, field_name, val->size, val->type); 3273 3273 if (IS_ERR(var)) { 3274 3274 hist_err(tr, HIST_ERR_VAR_CREATE_FIND_FAIL, errpos(field_name)); 3275 - kfree(val); 3275 + destroy_hist_field(val, 0); 3276 3276 ret = PTR_ERR(var); 3277 3277 goto err; 3278 3278 } 3279 3279 3280 3280 field_var = kzalloc(sizeof(struct field_var), GFP_KERNEL); 3281 3281 if (!field_var) { 3282 - kfree(val); 3282 + destroy_hist_field(val, 0); 3283 + kfree_const(var->type); 3284 + kfree(var->var.name); 3283 3285 kfree(var); 3284 3286 ret = -ENOMEM; 3285 3287 goto err;

+6 -1

kernel/trace/trace_fprobe.c

··· 106 106 if (!tuser->name) 107 107 return NULL; 108 108 109 + /* Register tracepoint if it is loaded. */ 109 110 if (tpoint) { 111 + tuser->tpoint = tpoint; 110 112 ret = tracepoint_user_register(tuser); 111 113 if (ret) 112 114 return ERR_PTR(ret); 113 115 } 114 116 115 - tuser->tpoint = tpoint; 116 117 tuser->refcount = 1; 117 118 INIT_LIST_HEAD(&tuser->list); 118 119 list_add(&tuser->list, &tracepoint_user_list); ··· 1514 1513 if (!trace_probe_is_enabled(tp)) { 1515 1514 list_for_each_entry(tf, trace_probe_probe_list(tp), tp.list) { 1516 1515 unregister_fprobe(&tf->fp); 1516 + if (tf->tuser) { 1517 + tracepoint_user_put(tf->tuser); 1518 + tf->tuser = NULL; 1519 + } 1517 1520 } 1518 1521 } 1519 1522

+1 -1

lib/crypto/Kconfig

··· 64 64 config CRYPTO_LIB_CURVE25519_ARCH 65 65 bool 66 66 depends on CRYPTO_LIB_CURVE25519 && !UML && !KMSAN 67 - default y if ARM && KERNEL_MODE_NEON 67 + default y if ARM && KERNEL_MODE_NEON && !CPU_BIG_ENDIAN 68 68 default y if PPC64 && CPU_LITTLE_ENDIAN 69 69 default y if X86_64 70 70

+1 -1

lib/crypto/Makefile

··· 90 90 libcurve25519-$(CONFIG_CRYPTO_LIB_CURVE25519_GENERIC) += curve25519-fiat32.o 91 91 endif 92 92 # clang versions prior to 18 may blow out the stack with KASAN 93 - ifeq ($(call clang-min-version, 180000),) 93 + ifeq ($(CONFIG_CC_IS_CLANG)_$(call clang-min-version, 180000),y_) 94 94 KASAN_SANITIZE_curve25519-hacl64.o := n 95 95 endif 96 96

+16 -14

lib/maple_tree.c

··· 64 64 #define CREATE_TRACE_POINTS 65 65 #include <trace/events/maple_tree.h> 66 66 67 + #define TP_FCT tracepoint_string(__func__) 68 + 67 69 /* 68 70 * Kernel pointer hashing renders much of the maple tree dump useless as tagged 69 71 * pointers get hashed to arbitrary values. ··· 2758 2756 MA_STATE(l_mas, mas->tree, mas->index, mas->last); 2759 2757 MA_STATE(r_mas, mas->tree, mas->index, mas->last); 2760 2758 2761 - trace_ma_op(__func__, mas); 2759 + trace_ma_op(TP_FCT, mas); 2762 2760 2763 2761 /* 2764 2762 * Rebalancing occurs if a node is insufficient. Data is rebalanced ··· 2999 2997 MA_STATE(prev_l_mas, mas->tree, mas->index, mas->last); 3000 2998 MA_STATE(prev_r_mas, mas->tree, mas->index, mas->last); 3001 2999 3002 - trace_ma_op(__func__, mas); 3000 + trace_ma_op(TP_FCT, mas); 3003 3001 3004 3002 mast.l = &l_mas; 3005 3003 mast.r = &r_mas; ··· 3174 3172 return false; 3175 3173 } 3176 3174 3177 - trace_ma_write(__func__, wr_mas->mas, wr_mas->r_max, entry); 3175 + trace_ma_write(TP_FCT, wr_mas->mas, wr_mas->r_max, entry); 3178 3176 return true; 3179 3177 } 3180 3178 ··· 3418 3416 * of data may happen. 3419 3417 */ 3420 3418 mas = wr_mas->mas; 3421 - trace_ma_op(__func__, mas); 3419 + trace_ma_op(TP_FCT, mas); 3422 3420 3423 3421 if (unlikely(!mas->index && mas->last == ULONG_MAX)) 3424 3422 return mas_new_root(mas, wr_mas->entry); ··· 3554 3552 } else { 3555 3553 memcpy(wr_mas->node, newnode, sizeof(struct maple_node)); 3556 3554 } 3557 - trace_ma_write(__func__, mas, 0, wr_mas->entry); 3555 + trace_ma_write(TP_FCT, mas, 0, wr_mas->entry); 3558 3556 mas_update_gap(mas); 3559 3557 mas->end = new_end; 3560 3558 return; ··· 3598 3596 mas->offset++; /* Keep mas accurate. */ 3599 3597 } 3600 3598 3601 - trace_ma_write(__func__, mas, 0, wr_mas->entry); 3599 + trace_ma_write(TP_FCT, mas, 0, wr_mas->entry); 3602 3600 /* 3603 3601 * Only update gap when the new entry is empty or there is an empty 3604 3602 * entry in the original two ranges. ··· 3719 3717 mas_update_gap(mas); 3720 3718 3721 3719 mas->end = new_end; 3722 - trace_ma_write(__func__, mas, new_end, wr_mas->entry); 3720 + trace_ma_write(TP_FCT, mas, new_end, wr_mas->entry); 3723 3721 return; 3724 3722 } 3725 3723 ··· 3733 3731 { 3734 3732 struct maple_big_node b_node; 3735 3733 3736 - trace_ma_write(__func__, wr_mas->mas, 0, wr_mas->entry); 3734 + trace_ma_write(TP_FCT, wr_mas->mas, 0, wr_mas->entry); 3737 3735 memset(&b_node, 0, sizeof(struct maple_big_node)); 3738 3736 mas_store_b_node(wr_mas, &b_node, wr_mas->offset_end); 3739 3737 mas_commit_b_node(wr_mas, &b_node); ··· 5064 5062 { 5065 5063 MA_WR_STATE(wr_mas, mas, entry); 5066 5064 5067 - trace_ma_write(__func__, mas, 0, entry); 5065 + trace_ma_write(TP_FCT, mas, 0, entry); 5068 5066 #ifdef CONFIG_DEBUG_MAPLE_TREE 5069 5067 if (MAS_WARN_ON(mas, mas->index > mas->last)) 5070 5068 pr_err("Error %lX > %lX " PTR_FMT "\n", mas->index, mas->last, ··· 5165 5163 } 5166 5164 5167 5165 store: 5168 - trace_ma_write(__func__, mas, 0, entry); 5166 + trace_ma_write(TP_FCT, mas, 0, entry); 5169 5167 mas_wr_store_entry(&wr_mas); 5170 5168 MAS_WR_BUG_ON(&wr_mas, mas_is_err(mas)); 5171 5169 mas_destroy(mas); ··· 5884 5882 MA_STATE(mas, mt, index, index); 5885 5883 void *entry; 5886 5884 5887 - trace_ma_read(__func__, &mas); 5885 + trace_ma_read(TP_FCT, &mas); 5888 5886 rcu_read_lock(); 5889 5887 retry: 5890 5888 entry = mas_start(&mas); ··· 5927 5925 MA_STATE(mas, mt, index, last); 5928 5926 int ret = 0; 5929 5927 5930 - trace_ma_write(__func__, &mas, 0, entry); 5928 + trace_ma_write(TP_FCT, &mas, 0, entry); 5931 5929 if (WARN_ON_ONCE(xa_is_advanced(entry))) 5932 5930 return -EINVAL; 5933 5931 ··· 6150 6148 void *entry = NULL; 6151 6149 6152 6150 MA_STATE(mas, mt, index, index); 6153 - trace_ma_op(__func__, &mas); 6151 + trace_ma_op(TP_FCT, &mas); 6154 6152 6155 6153 mtree_lock(mt); 6156 6154 entry = mas_erase(&mas); ··· 6487 6485 unsigned long copy = *index; 6488 6486 #endif 6489 6487 6490 - trace_ma_read(__func__, &mas); 6488 + trace_ma_read(TP_FCT, &mas); 6491 6489 6492 6490 if ((*index) > max) 6493 6491 return NULL;

+6 -3

mm/damon/stat.c

··· 46 46 47 47 static struct damon_ctx *damon_stat_context; 48 48 49 + static unsigned long damon_stat_last_refresh_jiffies; 50 + 49 51 static void damon_stat_set_estimated_memory_bandwidth(struct damon_ctx *c) 50 52 { 51 53 struct damon_target *t; ··· 132 130 static int damon_stat_damon_call_fn(void *data) 133 131 { 134 132 struct damon_ctx *c = data; 135 - static unsigned long last_refresh_jiffies; 136 133 137 134 /* avoid unnecessarily frequent stat update */ 138 - if (time_before_eq(jiffies, last_refresh_jiffies + 135 + if (time_before_eq(jiffies, damon_stat_last_refresh_jiffies + 139 136 msecs_to_jiffies(5 * MSEC_PER_SEC))) 140 137 return 0; 141 - last_refresh_jiffies = jiffies; 138 + damon_stat_last_refresh_jiffies = jiffies; 142 139 143 140 aggr_interval_us = c->attrs.aggr_interval; 144 141 damon_stat_set_estimated_memory_bandwidth(c); ··· 211 210 err = damon_start(&damon_stat_context, 1, true); 212 211 if (err) 213 212 return err; 213 + 214 + damon_stat_last_refresh_jiffies = jiffies; 214 215 call_control.data = damon_stat_context; 215 216 return damon_call(damon_stat_context, &call_control); 216 217 }

+7 -3

mm/damon/sysfs.c

··· 1552 1552 return ctx; 1553 1553 } 1554 1554 1555 + static unsigned long damon_sysfs_next_update_jiffies; 1556 + 1555 1557 static int damon_sysfs_repeat_call_fn(void *data) 1556 1558 { 1557 1559 struct damon_sysfs_kdamond *sysfs_kdamond = data; 1558 - static unsigned long next_update_jiffies; 1559 1560 1560 1561 if (!sysfs_kdamond->refresh_ms) 1561 1562 return 0; 1562 - if (time_before(jiffies, next_update_jiffies)) 1563 + if (time_before(jiffies, damon_sysfs_next_update_jiffies)) 1563 1564 return 0; 1564 - next_update_jiffies = jiffies + 1565 + damon_sysfs_next_update_jiffies = jiffies + 1565 1566 msecs_to_jiffies(sysfs_kdamond->refresh_ms); 1566 1567 1567 1568 if (!mutex_trylock(&damon_sysfs_lock)) ··· 1607 1606 return err; 1608 1607 } 1609 1608 kdamond->damon_ctx = ctx; 1609 + 1610 + damon_sysfs_next_update_jiffies = 1611 + jiffies + msecs_to_jiffies(kdamond->refresh_ms); 1610 1612 1611 1613 repeat_call_control->fn = damon_sysfs_repeat_call_fn; 1612 1614 repeat_call_control->data = kdamond;

+20 -8

mm/filemap.c

··· 3681 3681 static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf, 3682 3682 struct folio *folio, unsigned long start, 3683 3683 unsigned long addr, unsigned int nr_pages, 3684 - unsigned long *rss, unsigned short *mmap_miss) 3684 + unsigned long *rss, unsigned short *mmap_miss, 3685 + bool can_map_large) 3685 3686 { 3686 3687 unsigned int ref_from_caller = 1; 3687 3688 vm_fault_t ret = 0; ··· 3697 3696 * The folio must not cross VMA or page table boundary. 3698 3697 */ 3699 3698 addr0 = addr - start * PAGE_SIZE; 3700 - if (folio_within_vma(folio, vmf->vma) && 3699 + if (can_map_large && folio_within_vma(folio, vmf->vma) && 3701 3700 (addr0 & PMD_MASK) == ((addr0 + folio_size(folio) - 1) & PMD_MASK)) { 3702 3701 vmf->pte -= start; 3703 3702 page -= start; ··· 3812 3811 unsigned long rss = 0; 3813 3812 unsigned int nr_pages = 0, folio_type; 3814 3813 unsigned short mmap_miss = 0, mmap_miss_saved; 3814 + bool can_map_large; 3815 3815 3816 3816 rcu_read_lock(); 3817 3817 folio = next_uptodate_folio(&xas, mapping, end_pgoff); 3818 3818 if (!folio) 3819 3819 goto out; 3820 3820 3821 - if (filemap_map_pmd(vmf, folio, start_pgoff)) { 3821 + file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE) - 1; 3822 + end_pgoff = min(end_pgoff, file_end); 3823 + 3824 + /* 3825 + * Do not allow to map with PTEs beyond i_size and with PMD 3826 + * across i_size to preserve SIGBUS semantics. 3827 + * 3828 + * Make an exception for shmem/tmpfs that for long time 3829 + * intentionally mapped with PMDs across i_size. 3830 + */ 3831 + can_map_large = shmem_mapping(mapping) || 3832 + file_end >= folio_next_index(folio); 3833 + 3834 + if (can_map_large && filemap_map_pmd(vmf, folio, start_pgoff)) { 3822 3835 ret = VM_FAULT_NOPAGE; 3823 3836 goto out; 3824 3837 } ··· 3844 3829 folio_put(folio); 3845 3830 goto out; 3846 3831 } 3847 - 3848 - file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE) - 1; 3849 - if (end_pgoff > file_end) 3850 - end_pgoff = file_end; 3851 3832 3852 3833 folio_type = mm_counter_file(folio); 3853 3834 do { ··· 3861 3850 else 3862 3851 ret |= filemap_map_folio_range(vmf, folio, 3863 3852 xas.xa_index - folio->index, addr, 3864 - nr_pages, &rss, &mmap_miss); 3853 + nr_pages, &rss, &mmap_miss, 3854 + can_map_large); 3865 3855 3866 3856 folio_unlock(folio); 3867 3857 } while ((folio = next_uptodate_folio(&xas, mapping, end_pgoff)) != NULL);

+23 -12

mm/huge_memory.c

··· 214 214 if (likely(atomic_inc_not_zero(&huge_zero_refcount))) 215 215 return true; 216 216 217 - zero_folio = folio_alloc((GFP_TRANSHUGE | __GFP_ZERO) & ~__GFP_MOVABLE, 217 + zero_folio = folio_alloc((GFP_TRANSHUGE | __GFP_ZERO | __GFP_ZEROTAGS) & 218 + ~__GFP_MOVABLE, 218 219 HPAGE_PMD_ORDER); 219 220 if (!zero_folio) { 220 221 count_vm_event(THP_ZERO_PAGE_ALLOC_FAILED); ··· 3264 3263 caller_pins; 3265 3264 } 3266 3265 3266 + static bool page_range_has_hwpoisoned(struct page *page, long nr_pages) 3267 + { 3268 + for (; nr_pages; page++, nr_pages--) 3269 + if (PageHWPoison(page)) 3270 + return true; 3271 + return false; 3272 + } 3273 + 3267 3274 /* 3268 3275 * It splits @folio into @new_order folios and copies the @folio metadata to 3269 3276 * all the resulting folios. ··· 3279 3270 static void __split_folio_to_order(struct folio *folio, int old_order, 3280 3271 int new_order) 3281 3272 { 3273 + /* Scan poisoned pages when split a poisoned folio to large folios */ 3274 + const bool handle_hwpoison = folio_test_has_hwpoisoned(folio) && new_order; 3282 3275 long new_nr_pages = 1 << new_order; 3283 3276 long nr_pages = 1 << old_order; 3284 3277 long i; 3285 3278 3279 + folio_clear_has_hwpoisoned(folio); 3280 + 3281 + /* Check first new_nr_pages since the loop below skips them */ 3282 + if (handle_hwpoison && 3283 + page_range_has_hwpoisoned(folio_page(folio, 0), new_nr_pages)) 3284 + folio_set_has_hwpoisoned(folio); 3286 3285 /* 3287 3286 * Skip the first new_nr_pages, since the new folio from them have all 3288 3287 * the flags from the original folio. 3289 3288 */ 3290 3289 for (i = new_nr_pages; i < nr_pages; i += new_nr_pages) { 3291 3290 struct page *new_head = &folio->page + i; 3292 - 3293 3291 /* 3294 3292 * Careful: new_folio is not a "real" folio before we cleared PageTail. 3295 3293 * Don't pass it around before clear_compound_head(). ··· 3337 3321 #endif 3338 3322 (1L << PG_dirty) | 3339 3323 LRU_GEN_MASK | LRU_REFS_MASK)); 3324 + 3325 + if (handle_hwpoison && 3326 + page_range_has_hwpoisoned(new_head, new_nr_pages)) 3327 + folio_set_has_hwpoisoned(new_folio); 3340 3328 3341 3329 new_folio->mapping = folio->mapping; 3342 3330 new_folio->index = folio->index + i; ··· 3441 3421 3442 3422 if (folio_test_anon(folio)) 3443 3423 mod_mthp_stat(order, MTHP_STAT_NR_ANON, -1); 3444 - 3445 - folio_clear_has_hwpoisoned(folio); 3446 3424 3447 3425 /* 3448 3426 * split to new_order one order at a time. For uniform split, ··· 3671 3653 3672 3654 min_order = mapping_min_folio_order(folio->mapping); 3673 3655 if (new_order < min_order) { 3674 - VM_WARN_ONCE(1, "Cannot split mapped folio below min-order: %u", 3675 - min_order); 3676 3656 ret = -EINVAL; 3677 3657 goto out; 3678 3658 } ··· 4002 3986 4003 3987 int split_folio_to_list(struct folio *folio, struct list_head *list) 4004 3988 { 4005 - int ret = min_order_for_split(folio); 4006 - 4007 - if (ret < 0) 4008 - return ret; 4009 - 4010 - return split_huge_page_to_list_to_order(&folio->page, list, ret); 3989 + return split_huge_page_to_list_to_order(&folio->page, list, 0); 4011 3990 } 4012 3991 4013 3992 /*

-3

mm/kmsan/core.c

··· 72 72 73 73 nr_entries = stack_trace_save(entries, KMSAN_STACK_DEPTH, 0); 74 74 75 - /* Don't sleep. */ 76 - flags &= ~(__GFP_DIRECT_RECLAIM | __GFP_KSWAPD_RECLAIM); 77 - 78 75 handle = stack_depot_save(entries, nr_entries, flags); 79 76 return stack_depot_set_extra_bits(handle, extra); 80 77 }

+4 -2

mm/kmsan/hooks.c

··· 84 84 if (s->ctor) 85 85 return; 86 86 kmsan_enter_runtime(); 87 - kmsan_internal_poison_memory(object, s->object_size, GFP_KERNEL, 87 + kmsan_internal_poison_memory(object, s->object_size, 88 + GFP_KERNEL & ~(__GFP_RECLAIM), 88 89 KMSAN_POISON_CHECK | KMSAN_POISON_FREE); 89 90 kmsan_leave_runtime(); 90 91 } ··· 115 114 kmsan_enter_runtime(); 116 115 page = virt_to_head_page((void *)ptr); 117 116 KMSAN_WARN_ON(ptr != page_address(page)); 118 - kmsan_internal_poison_memory((void *)ptr, page_size(page), GFP_KERNEL, 117 + kmsan_internal_poison_memory((void *)ptr, page_size(page), 118 + GFP_KERNEL & ~(__GFP_RECLAIM), 119 119 KMSAN_POISON_CHECK | KMSAN_POISON_FREE); 120 120 kmsan_leave_runtime(); 121 121 }

+1 -1

mm/kmsan/shadow.c

··· 208 208 return; 209 209 kmsan_enter_runtime(); 210 210 kmsan_internal_poison_memory(page_address(page), page_size(page), 211 - GFP_KERNEL, 211 + GFP_KERNEL & ~(__GFP_RECLAIM), 212 212 KMSAN_POISON_CHECK | KMSAN_POISON_FREE); 213 213 kmsan_leave_runtime(); 214 214 }

+104 -9

mm/ksm.c

··· 2455 2455 return true; 2456 2456 } 2457 2457 2458 + struct ksm_next_page_arg { 2459 + struct folio *folio; 2460 + struct page *page; 2461 + unsigned long addr; 2462 + }; 2463 + 2464 + static int ksm_next_page_pmd_entry(pmd_t *pmdp, unsigned long addr, unsigned long end, 2465 + struct mm_walk *walk) 2466 + { 2467 + struct ksm_next_page_arg *private = walk->private; 2468 + struct vm_area_struct *vma = walk->vma; 2469 + pte_t *start_ptep = NULL, *ptep, pte; 2470 + struct mm_struct *mm = walk->mm; 2471 + struct folio *folio; 2472 + struct page *page; 2473 + spinlock_t *ptl; 2474 + pmd_t pmd; 2475 + 2476 + if (ksm_test_exit(mm)) 2477 + return 0; 2478 + 2479 + cond_resched(); 2480 + 2481 + pmd = pmdp_get_lockless(pmdp); 2482 + if (!pmd_present(pmd)) 2483 + return 0; 2484 + 2485 + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && pmd_leaf(pmd)) { 2486 + ptl = pmd_lock(mm, pmdp); 2487 + pmd = pmdp_get(pmdp); 2488 + 2489 + if (!pmd_present(pmd)) { 2490 + goto not_found_unlock; 2491 + } else if (pmd_leaf(pmd)) { 2492 + page = vm_normal_page_pmd(vma, addr, pmd); 2493 + if (!page) 2494 + goto not_found_unlock; 2495 + folio = page_folio(page); 2496 + 2497 + if (folio_is_zone_device(folio) || !folio_test_anon(folio)) 2498 + goto not_found_unlock; 2499 + 2500 + page += ((addr & (PMD_SIZE - 1)) >> PAGE_SHIFT); 2501 + goto found_unlock; 2502 + } 2503 + spin_unlock(ptl); 2504 + } 2505 + 2506 + start_ptep = pte_offset_map_lock(mm, pmdp, addr, &ptl); 2507 + if (!start_ptep) 2508 + return 0; 2509 + 2510 + for (ptep = start_ptep; addr < end; ptep++, addr += PAGE_SIZE) { 2511 + pte = ptep_get(ptep); 2512 + 2513 + if (!pte_present(pte)) 2514 + continue; 2515 + 2516 + page = vm_normal_page(vma, addr, pte); 2517 + if (!page) 2518 + continue; 2519 + folio = page_folio(page); 2520 + 2521 + if (folio_is_zone_device(folio) || !folio_test_anon(folio)) 2522 + continue; 2523 + goto found_unlock; 2524 + } 2525 + 2526 + not_found_unlock: 2527 + spin_unlock(ptl); 2528 + if (start_ptep) 2529 + pte_unmap(start_ptep); 2530 + return 0; 2531 + found_unlock: 2532 + folio_get(folio); 2533 + spin_unlock(ptl); 2534 + if (start_ptep) 2535 + pte_unmap(start_ptep); 2536 + private->page = page; 2537 + private->folio = folio; 2538 + private->addr = addr; 2539 + return 1; 2540 + } 2541 + 2542 + static struct mm_walk_ops ksm_next_page_ops = { 2543 + .pmd_entry = ksm_next_page_pmd_entry, 2544 + .walk_lock = PGWALK_RDLOCK, 2545 + }; 2546 + 2458 2547 static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page) 2459 2548 { 2460 2549 struct mm_struct *mm; ··· 2631 2542 ksm_scan.address = vma->vm_end; 2632 2543 2633 2544 while (ksm_scan.address < vma->vm_end) { 2545 + struct ksm_next_page_arg ksm_next_page_arg; 2634 2546 struct page *tmp_page = NULL; 2635 - struct folio_walk fw; 2636 2547 struct folio *folio; 2637 2548 2638 2549 if (ksm_test_exit(mm)) 2639 2550 break; 2640 2551 2641 - folio = folio_walk_start(&fw, vma, ksm_scan.address, 0); 2642 - if (folio) { 2643 - if (!folio_is_zone_device(folio) && 2644 - folio_test_anon(folio)) { 2645 - folio_get(folio); 2646 - tmp_page = fw.page; 2647 - } 2648 - folio_walk_end(&fw, vma); 2552 + int found; 2553 + 2554 + found = walk_page_range_vma(vma, ksm_scan.address, 2555 + vma->vm_end, 2556 + &ksm_next_page_ops, 2557 + &ksm_next_page_arg); 2558 + 2559 + if (found > 0) { 2560 + folio = ksm_next_page_arg.folio; 2561 + tmp_page = ksm_next_page_arg.page; 2562 + ksm_scan.address = ksm_next_page_arg.addr; 2563 + } else { 2564 + VM_WARN_ON_ONCE(found < 0); 2565 + ksm_scan.address = vma->vm_end - PAGE_SIZE; 2649 2566 } 2650 2567 2651 2568 if (tmp_page) {

+19 -1

mm/memory.c

··· 65 65 #include <linux/gfp.h> 66 66 #include <linux/migrate.h> 67 67 #include <linux/string.h> 68 + #include <linux/shmem_fs.h> 68 69 #include <linux/memory-tiers.h> 69 70 #include <linux/debugfs.h> 70 71 #include <linux/userfaultfd_k.h> ··· 5502 5501 return ret; 5503 5502 } 5504 5503 5504 + if (!needs_fallback && vma->vm_file) { 5505 + struct address_space *mapping = vma->vm_file->f_mapping; 5506 + pgoff_t file_end; 5507 + 5508 + file_end = DIV_ROUND_UP(i_size_read(mapping->host), PAGE_SIZE); 5509 + 5510 + /* 5511 + * Do not allow to map with PTEs beyond i_size and with PMD 5512 + * across i_size to preserve SIGBUS semantics. 5513 + * 5514 + * Make an exception for shmem/tmpfs that for long time 5515 + * intentionally mapped with PMDs across i_size. 5516 + */ 5517 + needs_fallback = !shmem_mapping(mapping) && 5518 + file_end < folio_next_index(folio); 5519 + } 5520 + 5505 5521 if (pmd_none(*vmf->pmd)) { 5506 - if (folio_test_pmd_mappable(folio)) { 5522 + if (!needs_fallback && folio_test_pmd_mappable(folio)) { 5507 5523 ret = do_set_pmd(vmf, folio, page); 5508 5524 if (ret != VM_FAULT_FALLBACK) 5509 5525 return ret;

+1 -1

mm/mm_init.c

··· 2469 2469 panic("Failed to allocate %s hash table\n", tablename); 2470 2470 2471 2471 pr_info("%s hash table entries: %ld (order: %d, %lu bytes, %s)\n", 2472 - tablename, 1UL << log2qty, ilog2(size) - PAGE_SHIFT, size, 2472 + tablename, 1UL << log2qty, get_order(size), size, 2473 2473 virt ? (huge ? "vmalloc hugepage" : "vmalloc") : "linear"); 2474 2474 2475 2475 if (_hash_shift)

+1 -1

mm/mremap.c

··· 187 187 if (!folio || !folio_test_large(folio)) 188 188 return 1; 189 189 190 - return folio_pte_batch(folio, ptep, pte, max_nr); 190 + return folio_pte_batch_flags(folio, NULL, ptep, &pte, max_nr, FPB_RESPECT_WRITE); 191 191 } 192 192 193 193 static int move_ptes(struct pagetable_move_control *pmc,

+1 -1

mm/secretmem.c

··· 82 82 __folio_mark_uptodate(folio); 83 83 err = filemap_add_folio(mapping, folio, offset, gfp); 84 84 if (unlikely(err)) { 85 - folio_put(folio); 86 85 /* 87 86 * If a split of large page was required, it 88 87 * already happened when we marked the page invalid 89 88 * which guarantees that this call won't fail 90 89 */ 91 90 set_direct_map_default_noflush(folio_page(folio, 0)); 91 + folio_put(folio); 92 92 if (err == -EEXIST) 93 93 goto retry; 94 94

+6 -3

mm/shmem.c

··· 1882 1882 struct shmem_inode_info *info = SHMEM_I(inode); 1883 1883 unsigned long suitable_orders = 0; 1884 1884 struct folio *folio = NULL; 1885 + pgoff_t aligned_index; 1885 1886 long pages; 1886 1887 int error, order; 1887 1888 ··· 1896 1895 order = highest_order(suitable_orders); 1897 1896 while (suitable_orders) { 1898 1897 pages = 1UL << order; 1899 - index = round_down(index, pages); 1900 - folio = shmem_alloc_folio(gfp, order, info, index); 1901 - if (folio) 1898 + aligned_index = round_down(index, pages); 1899 + folio = shmem_alloc_folio(gfp, order, info, aligned_index); 1900 + if (folio) { 1901 + index = aligned_index; 1902 1902 goto allocated; 1903 + } 1903 1904 1904 1905 if (pages == HPAGE_PMD_NR) 1905 1906 count_vm_event(THP_FILE_FALLBACK);

+10 -2

mm/slub.c

··· 2046 2046 if (slab_exts) { 2047 2047 unsigned int offs = obj_to_index(obj_exts_slab->slab_cache, 2048 2048 obj_exts_slab, obj_exts); 2049 - /* codetag should be NULL */ 2049 + 2050 + if (unlikely(is_codetag_empty(&slab_exts[offs].ref))) 2051 + return; 2052 + 2053 + /* codetag should be NULL here */ 2050 2054 WARN_ON(slab_exts[offs].ref.ct); 2051 2055 set_codetag_empty(&slab_exts[offs].ref); 2052 2056 } ··· 4670 4666 if (kmem_cache_debug(s)) { 4671 4667 freelist = alloc_single_from_new_slab(s, slab, orig_size, gfpflags); 4672 4668 4673 - if (unlikely(!freelist)) 4669 + if (unlikely(!freelist)) { 4670 + /* This could cause an endless loop. Fail instead. */ 4671 + if (!allow_spin) 4672 + return NULL; 4674 4673 goto new_objects; 4674 + } 4675 4675 4676 4676 if (s->flags & SLAB_STORE_USER) 4677 4677 set_track(s, freelist, TRACK_ALLOC, addr,

+31 -6

mm/truncate.c

··· 177 177 return 0; 178 178 } 179 179 180 + static int try_folio_split_or_unmap(struct folio *folio, struct page *split_at, 181 + unsigned long min_order) 182 + { 183 + enum ttu_flags ttu_flags = 184 + TTU_SYNC | 185 + TTU_SPLIT_HUGE_PMD | 186 + TTU_IGNORE_MLOCK; 187 + int ret; 188 + 189 + ret = try_folio_split_to_order(folio, split_at, min_order); 190 + 191 + /* 192 + * If the split fails, unmap the folio, so it will be refaulted 193 + * with PTEs to respect SIGBUS semantics. 194 + * 195 + * Make an exception for shmem/tmpfs that for long time 196 + * intentionally mapped with PMDs across i_size. 197 + */ 198 + if (ret && !shmem_mapping(folio->mapping)) { 199 + try_to_unmap(folio, ttu_flags); 200 + WARN_ON(folio_mapped(folio)); 201 + } 202 + 203 + return ret; 204 + } 205 + 180 206 /* 181 207 * Handle partial folios. The folio may be entirely within the 182 208 * range if a split has raced with us. If not, we zero the part of the ··· 220 194 size_t size = folio_size(folio); 221 195 unsigned int offset, length; 222 196 struct page *split_at, *split_at2; 197 + unsigned int min_order; 223 198 224 199 if (pos < start) 225 200 offset = start - pos; ··· 250 223 if (!folio_test_large(folio)) 251 224 return true; 252 225 226 + min_order = mapping_min_folio_order(folio->mapping); 253 227 split_at = folio_page(folio, PAGE_ALIGN_DOWN(offset) / PAGE_SIZE); 254 - if (!try_folio_split(folio, split_at, NULL)) { 228 + if (!try_folio_split_or_unmap(folio, split_at, min_order)) { 255 229 /* 256 230 * try to split at offset + length to make sure folios within 257 231 * the range can be dropped, especially to avoid memory waste ··· 276 248 if (!folio_trylock(folio2)) 277 249 goto out; 278 250 279 - /* 280 - * make sure folio2 is large and does not change its mapping. 281 - * Its split result does not matter here. 282 - */ 251 + /* make sure folio2 is large and does not change its mapping */ 283 252 if (folio_test_large(folio2) && 284 253 folio2->mapping == folio->mapping) 285 - try_folio_split(folio2, split_at2, NULL); 254 + try_folio_split_or_unmap(folio2, split_at2, min_order); 286 255 287 256 folio_unlock(folio2); 288 257 out:

+79 -32

net/bluetooth/6lowpan.c

··· 53 53 static struct l2cap_chan *listen_chan; 54 54 static DEFINE_MUTEX(set_lock); 55 55 56 + enum { 57 + LOWPAN_PEER_CLOSING, 58 + LOWPAN_PEER_MAXBITS 59 + }; 60 + 56 61 struct lowpan_peer { 57 62 struct list_head list; 58 63 struct rcu_head rcu; ··· 66 61 /* peer addresses in various formats */ 67 62 unsigned char lladdr[ETH_ALEN]; 68 63 struct in6_addr peer_addr; 64 + 65 + DECLARE_BITMAP(flags, LOWPAN_PEER_MAXBITS); 69 66 }; 70 67 71 68 struct lowpan_btle_dev { ··· 296 289 local_skb->pkt_type = PACKET_HOST; 297 290 local_skb->dev = dev; 298 291 292 + skb_reset_mac_header(local_skb); 299 293 skb_set_transport_header(local_skb, sizeof(struct ipv6hdr)); 300 294 301 295 if (give_skb_to_upper(local_skb, dev) != NET_RX_SUCCESS) { ··· 927 919 928 920 BT_DBG("peer %p chan %p", peer, peer->chan); 929 921 922 + l2cap_chan_lock(peer->chan); 930 923 l2cap_chan_close(peer->chan, ENOENT); 924 + l2cap_chan_unlock(peer->chan); 931 925 932 926 return 0; 933 927 } ··· 966 956 } 967 957 968 958 static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type, 969 - struct l2cap_conn **conn) 959 + struct l2cap_conn **conn, bool disconnect) 970 960 { 971 961 struct hci_conn *hcon; 972 962 struct hci_dev *hdev; 963 + int le_addr_type; 973 964 int n; 974 965 975 966 n = sscanf(buf, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx %hhu", ··· 981 970 if (n < 7) 982 971 return -EINVAL; 983 972 973 + if (disconnect) { 974 + /* The "disconnect" debugfs command has used different address 975 + * type constants than "connect" since 2015. Let's retain that 976 + * for now even though it's obviously buggy... 977 + */ 978 + *addr_type += 1; 979 + } 980 + 981 + switch (*addr_type) { 982 + case BDADDR_LE_PUBLIC: 983 + le_addr_type = ADDR_LE_DEV_PUBLIC; 984 + break; 985 + case BDADDR_LE_RANDOM: 986 + le_addr_type = ADDR_LE_DEV_RANDOM; 987 + break; 988 + default: 989 + return -EINVAL; 990 + } 991 + 984 992 /* The LE_PUBLIC address type is ignored because of BDADDR_ANY */ 985 993 hdev = hci_get_route(addr, BDADDR_ANY, BDADDR_LE_PUBLIC); 986 994 if (!hdev) 987 995 return -ENOENT; 988 996 989 997 hci_dev_lock(hdev); 990 - hcon = hci_conn_hash_lookup_le(hdev, addr, *addr_type); 998 + hcon = hci_conn_hash_lookup_le(hdev, addr, le_addr_type); 991 999 hci_dev_unlock(hdev); 992 1000 hci_dev_put(hdev); 993 1001 ··· 1023 993 static void disconnect_all_peers(void) 1024 994 { 1025 995 struct lowpan_btle_dev *entry; 1026 - struct lowpan_peer *peer, *tmp_peer, *new_peer; 1027 - struct list_head peers; 996 + struct lowpan_peer *peer; 997 + int nchans; 1028 998 1029 - INIT_LIST_HEAD(&peers); 1030 - 1031 - /* We make a separate list of peers as the close_cb() will 1032 - * modify the device peers list so it is better not to mess 1033 - * with the same list at the same time. 999 + /* l2cap_chan_close() cannot be called from RCU, and lock ordering 1000 + * chan->lock > devices_lock prevents taking write side lock, so copy 1001 + * then close. 1034 1002 */ 1035 1003 1036 1004 rcu_read_lock(); 1037 - 1038 - list_for_each_entry_rcu(entry, &bt_6lowpan_devices, list) { 1039 - list_for_each_entry_rcu(peer, &entry->peers, list) { 1040 - new_peer = kmalloc(sizeof(*new_peer), GFP_ATOMIC); 1041 - if (!new_peer) 1042 - break; 1043 - 1044 - new_peer->chan = peer->chan; 1045 - INIT_LIST_HEAD(&new_peer->list); 1046 - 1047 - list_add(&new_peer->list, &peers); 1048 - } 1049 - } 1050 - 1005 + list_for_each_entry_rcu(entry, &bt_6lowpan_devices, list) 1006 + list_for_each_entry_rcu(peer, &entry->peers, list) 1007 + clear_bit(LOWPAN_PEER_CLOSING, peer->flags); 1051 1008 rcu_read_unlock(); 1052 1009 1053 - spin_lock(&devices_lock); 1054 - list_for_each_entry_safe(peer, tmp_peer, &peers, list) { 1055 - l2cap_chan_close(peer->chan, ENOENT); 1010 + do { 1011 + struct l2cap_chan *chans[32]; 1012 + int i; 1056 1013 1057 - list_del_rcu(&peer->list); 1058 - kfree_rcu(peer, rcu); 1059 - } 1060 - spin_unlock(&devices_lock); 1014 + nchans = 0; 1015 + 1016 + spin_lock(&devices_lock); 1017 + 1018 + list_for_each_entry_rcu(entry, &bt_6lowpan_devices, list) { 1019 + list_for_each_entry_rcu(peer, &entry->peers, list) { 1020 + if (test_and_set_bit(LOWPAN_PEER_CLOSING, 1021 + peer->flags)) 1022 + continue; 1023 + 1024 + l2cap_chan_hold(peer->chan); 1025 + chans[nchans++] = peer->chan; 1026 + 1027 + if (nchans >= ARRAY_SIZE(chans)) 1028 + goto done; 1029 + } 1030 + } 1031 + 1032 + done: 1033 + spin_unlock(&devices_lock); 1034 + 1035 + for (i = 0; i < nchans; ++i) { 1036 + l2cap_chan_lock(chans[i]); 1037 + l2cap_chan_close(chans[i], ENOENT); 1038 + l2cap_chan_unlock(chans[i]); 1039 + l2cap_chan_put(chans[i]); 1040 + } 1041 + } while (nchans); 1061 1042 } 1062 1043 1063 1044 struct set_enable { ··· 1091 1050 1092 1051 mutex_lock(&set_lock); 1093 1052 if (listen_chan) { 1053 + l2cap_chan_lock(listen_chan); 1094 1054 l2cap_chan_close(listen_chan, 0); 1055 + l2cap_chan_unlock(listen_chan); 1095 1056 l2cap_chan_put(listen_chan); 1096 1057 } 1097 1058 ··· 1146 1103 buf[buf_size] = '\0'; 1147 1104 1148 1105 if (memcmp(buf, "connect ", 8) == 0) { 1149 - ret = get_l2cap_conn(&buf[8], &addr, &addr_type, &conn); 1106 + ret = get_l2cap_conn(&buf[8], &addr, &addr_type, &conn, false); 1150 1107 if (ret == -EINVAL) 1151 1108 return ret; 1152 1109 1153 1110 mutex_lock(&set_lock); 1154 1111 if (listen_chan) { 1112 + l2cap_chan_lock(listen_chan); 1155 1113 l2cap_chan_close(listen_chan, 0); 1114 + l2cap_chan_unlock(listen_chan); 1156 1115 l2cap_chan_put(listen_chan); 1157 1116 listen_chan = NULL; 1158 1117 } ··· 1185 1140 } 1186 1141 1187 1142 if (memcmp(buf, "disconnect ", 11) == 0) { 1188 - ret = get_l2cap_conn(&buf[11], &addr, &addr_type, &conn); 1143 + ret = get_l2cap_conn(&buf[11], &addr, &addr_type, &conn, true); 1189 1144 if (ret < 0) 1190 1145 return ret; 1191 1146 ··· 1316 1271 debugfs_remove(lowpan_control_debugfs); 1317 1272 1318 1273 if (listen_chan) { 1274 + l2cap_chan_lock(listen_chan); 1319 1275 l2cap_chan_close(listen_chan, 0); 1276 + l2cap_chan_unlock(listen_chan); 1320 1277 l2cap_chan_put(listen_chan); 1321 1278 } 1322 1279

+19 -14

net/bluetooth/hci_conn.c

··· 769 769 d->count++; 770 770 } 771 771 772 - static int hci_le_big_terminate(struct hci_dev *hdev, u8 big, struct hci_conn *conn) 772 + static int hci_le_big_terminate(struct hci_dev *hdev, struct hci_conn *conn) 773 773 { 774 774 struct iso_list_data *d; 775 775 int ret; 776 776 777 - bt_dev_dbg(hdev, "big 0x%2.2x sync_handle 0x%4.4x", big, conn->sync_handle); 777 + bt_dev_dbg(hdev, "hcon %p big 0x%2.2x sync_handle 0x%4.4x", conn, 778 + conn->iso_qos.bcast.big, conn->sync_handle); 778 779 779 780 d = kzalloc(sizeof(*d), GFP_KERNEL); 780 781 if (!d) 781 782 return -ENOMEM; 782 783 783 - d->big = big; 784 + d->big = conn->iso_qos.bcast.big; 784 785 d->sync_handle = conn->sync_handle; 785 786 786 - if (test_and_clear_bit(HCI_CONN_PA_SYNC, &conn->flags)) { 787 + if (conn->type == PA_LINK && 788 + test_and_clear_bit(HCI_CONN_PA_SYNC, &conn->flags)) { 787 789 hci_conn_hash_list_flag(hdev, find_bis, PA_LINK, 788 790 HCI_CONN_PA_SYNC, d); 789 791 ··· 802 800 if (!d->count) 803 801 d->big_sync_term = true; 804 802 } 803 + 804 + if (!d->pa_sync_term && !d->big_sync_term) 805 + return 0; 805 806 806 807 ret = hci_cmd_sync_queue(hdev, big_terminate_sync, d, 807 808 terminate_big_destroy); ··· 857 852 858 853 hci_le_terminate_big(hdev, conn); 859 854 } else { 860 - hci_le_big_terminate(hdev, conn->iso_qos.bcast.big, 861 - conn); 855 + hci_le_big_terminate(hdev, conn); 862 856 } 863 857 } 864 858 ··· 998 994 conn->mtu = hdev->le_mtu ? hdev->le_mtu : hdev->acl_mtu; 999 995 break; 1000 996 case CIS_LINK: 1001 - case BIS_LINK: 1002 - case PA_LINK: 1003 997 /* conn->src should reflect the local identity address */ 1004 998 hci_copy_identity_address(hdev, &conn->src, &conn->src_type); 1005 999 1006 - /* set proper cleanup function */ 1007 - if (!bacmp(dst, BDADDR_ANY)) 1008 - conn->cleanup = bis_cleanup; 1009 - else if (conn->role == HCI_ROLE_MASTER) 1000 + if (conn->role == HCI_ROLE_MASTER) 1010 1001 conn->cleanup = cis_cleanup; 1011 1002 1012 - conn->mtu = hdev->iso_mtu ? hdev->iso_mtu : 1013 - hdev->le_mtu ? hdev->le_mtu : hdev->acl_mtu; 1003 + conn->mtu = hdev->iso_mtu; 1004 + break; 1005 + case PA_LINK: 1006 + case BIS_LINK: 1007 + /* conn->src should reflect the local identity address */ 1008 + hci_copy_identity_address(hdev, &conn->src, &conn->src_type); 1009 + conn->cleanup = bis_cleanup; 1010 + conn->mtu = hdev->iso_mtu; 1014 1011 break; 1015 1012 case SCO_LINK: 1016 1013 if (lmp_esco_capable(hdev))

+36 -20

net/bluetooth/hci_event.c

··· 5843 5843 le16_to_cpu(ev->supervision_timeout)); 5844 5844 } 5845 5845 5846 + static void hci_le_pa_sync_lost_evt(struct hci_dev *hdev, void *data, 5847 + struct sk_buff *skb) 5848 + { 5849 + struct hci_ev_le_pa_sync_lost *ev = data; 5850 + u16 handle = le16_to_cpu(ev->handle); 5851 + struct hci_conn *conn; 5852 + 5853 + bt_dev_dbg(hdev, "sync handle 0x%4.4x", handle); 5854 + 5855 + hci_dev_lock(hdev); 5856 + 5857 + /* Delete the pa sync connection */ 5858 + conn = hci_conn_hash_lookup_pa_sync_handle(hdev, handle); 5859 + if (conn) { 5860 + clear_bit(HCI_CONN_BIG_SYNC, &conn->flags); 5861 + clear_bit(HCI_CONN_PA_SYNC, &conn->flags); 5862 + hci_disconn_cfm(conn, HCI_ERROR_REMOTE_USER_TERM); 5863 + hci_conn_del(conn); 5864 + } 5865 + 5866 + hci_dev_unlock(hdev); 5867 + } 5868 + 5846 5869 static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, void *data, 5847 5870 struct sk_buff *skb) 5848 5871 { ··· 7024 7001 continue; 7025 7002 } 7026 7003 7027 - if (ev->status != 0x42) { 7004 + if (ev->status != 0x42) 7028 7005 /* Mark PA sync as established */ 7029 7006 set_bit(HCI_CONN_PA_SYNC, &bis->flags); 7030 - /* Reset cleanup callback of PA Sync so it doesn't 7031 - * terminate the sync when deleting the connection. 7032 - */ 7033 - conn->cleanup = NULL; 7034 - } 7035 7007 7036 7008 bis->sync_handle = conn->sync_handle; 7037 7009 bis->iso_qos.bcast.big = ev->handle; ··· 7069 7051 struct sk_buff *skb) 7070 7052 { 7071 7053 struct hci_evt_le_big_sync_lost *ev = data; 7072 - struct hci_conn *bis, *conn; 7073 - bool mgmt_conn; 7054 + struct hci_conn *bis; 7055 + bool mgmt_conn = false; 7074 7056 7075 7057 bt_dev_dbg(hdev, "big handle 0x%2.2x", ev->handle); 7076 7058 7077 7059 hci_dev_lock(hdev); 7078 7060 7079 - /* Delete the pa sync connection */ 7080 - bis = hci_conn_hash_lookup_pa_sync_big_handle(hdev, ev->handle); 7081 - if (bis) { 7082 - conn = hci_conn_hash_lookup_pa_sync_handle(hdev, 7083 - bis->sync_handle); 7084 - if (conn) 7085 - hci_conn_del(conn); 7086 - } 7087 - 7088 7061 /* Delete each bis connection */ 7089 7062 while ((bis = hci_conn_hash_lookup_big_state(hdev, ev->handle, 7090 7063 BT_CONNECTED, 7091 7064 HCI_ROLE_SLAVE))) { 7092 - mgmt_conn = test_and_clear_bit(HCI_CONN_MGMT_CONNECTED, &bis->flags); 7093 - mgmt_device_disconnected(hdev, &bis->dst, bis->type, bis->dst_type, 7094 - ev->reason, mgmt_conn); 7065 + if (!mgmt_conn) { 7066 + mgmt_conn = test_and_clear_bit(HCI_CONN_MGMT_CONNECTED, 7067 + &bis->flags); 7068 + mgmt_device_disconnected(hdev, &bis->dst, bis->type, 7069 + bis->dst_type, ev->reason, 7070 + mgmt_conn); 7071 + } 7095 7072 7096 7073 clear_bit(HCI_CONN_BIG_SYNC, &bis->flags); 7097 7074 hci_disconn_cfm(bis, ev->reason); ··· 7200 7187 hci_le_per_adv_report_evt, 7201 7188 sizeof(struct hci_ev_le_per_adv_report), 7202 7189 HCI_MAX_EVENT_SIZE), 7190 + /* [0x10 = HCI_EV_LE_PA_SYNC_LOST] */ 7191 + HCI_LE_EV(HCI_EV_LE_PA_SYNC_LOST, hci_le_pa_sync_lost_evt, 7192 + sizeof(struct hci_ev_le_pa_sync_lost)), 7203 7193 /* [0x12 = HCI_EV_LE_EXT_ADV_SET_TERM] */ 7204 7194 HCI_LE_EV(HCI_EV_LE_EXT_ADV_SET_TERM, hci_le_ext_adv_term_evt, 7205 7195 sizeof(struct hci_evt_le_ext_adv_set_term)),

+1 -1

net/bluetooth/hci_sync.c

··· 6999 6999 7000 7000 hci_dev_lock(hdev); 7001 7001 7002 - if (!hci_conn_valid(hdev, conn)) 7002 + if (hci_conn_valid(hdev, conn)) 7003 7003 clear_bit(HCI_CONN_CREATE_PA_SYNC, &conn->flags); 7004 7004 7005 7005 if (!err)

+1

net/bluetooth/l2cap_core.c

··· 497 497 498 498 kref_get(&c->kref); 499 499 } 500 + EXPORT_SYMBOL_GPL(l2cap_chan_hold); 500 501 501 502 struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c) 502 503 {

+1

net/bluetooth/mgmt.c

··· 9497 9497 cancel_delayed_work_sync(&hdev->discov_off); 9498 9498 cancel_delayed_work_sync(&hdev->service_cache); 9499 9499 cancel_delayed_work_sync(&hdev->rpa_expired); 9500 + cancel_delayed_work_sync(&hdev->mesh_send_done); 9500 9501 } 9501 9502 9502 9503 void mgmt_power_on(struct hci_dev *hdev, int err)

+5 -2

net/core/netpoll.c

··· 811 811 if (!npinfo) 812 812 return; 813 813 814 + /* At this point, there is a single npinfo instance per netdevice, and 815 + * its refcnt tracks how many netpoll structures are linked to it. We 816 + * only perform npinfo cleanup when the refcnt decrements to zero. 817 + */ 814 818 if (refcount_dec_and_test(&npinfo->refcnt)) { 815 819 const struct net_device_ops *ops; 816 820 ··· 824 820 825 821 RCU_INIT_POINTER(np->dev->npinfo, NULL); 826 822 call_rcu(&npinfo->rcu, rcu_cleanup_netpoll_info); 827 - } else 828 - RCU_INIT_POINTER(np->dev->npinfo, NULL); 823 + } 829 824 830 825 skb_pool_flush(np); 831 826 }

+4 -2

net/dsa/tag_brcm.c

··· 176 176 /* Remove Broadcom tag and update checksum */ 177 177 skb_pull_rcsum(skb, BRCM_TAG_LEN); 178 178 179 - dsa_default_offload_fwd_mark(skb); 179 + if (likely(!is_link_local_ether_addr(eth_hdr(skb)->h_dest))) 180 + dsa_default_offload_fwd_mark(skb); 180 181 181 182 return skb; 182 183 } ··· 251 250 /* Remove Broadcom tag and update checksum */ 252 251 skb_pull_rcsum(skb, len); 253 252 254 - dsa_default_offload_fwd_mark(skb); 253 + if (likely(!is_link_local_ether_addr(eth_hdr(skb)->h_dest))) 254 + dsa_default_offload_fwd_mark(skb); 255 255 256 256 dsa_strip_etype_header(skb, len); 257 257

+1

net/handshake/tlshd.c

··· 259 259 260 260 out_cancel: 261 261 genlmsg_cancel(msg, hdr); 262 + nlmsg_free(msg); 262 263 out: 263 264 return ret; 264 265 }

+4 -1

net/hsr/hsr_device.c

··· 320 320 } 321 321 322 322 hsr_stag = skb_put(skb, sizeof(struct hsr_sup_tag)); 323 + skb_set_network_header(skb, ETH_HLEN + HSR_HLEN); 324 + skb_reset_mac_len(skb); 325 + 323 326 set_hsr_stag_path(hsr_stag, (hsr->prot_version ? 0x0 : 0xf)); 324 327 set_hsr_stag_HSR_ver(hsr_stag, hsr->prot_version); 325 328 ··· 337 334 } 338 335 339 336 hsr_stag->tlv.HSR_TLV_type = type; 340 - /* TODO: Why 12 in HSRv0? */ 337 + /* HSRv0 has 6 unused bytes after the MAC */ 341 338 hsr_stag->tlv.HSR_TLV_length = hsr->prot_version ? 342 339 sizeof(struct hsr_sup_payload) : 12; 343 340

+15 -7

net/hsr/hsr_forward.c

··· 262 262 return skb; 263 263 } 264 264 265 - static void hsr_set_path_id(struct hsr_ethhdr *hsr_ethhdr, 265 + static void hsr_set_path_id(struct hsr_frame_info *frame, 266 + struct hsr_ethhdr *hsr_ethhdr, 266 267 struct hsr_port *port) 267 268 { 268 269 int path_id; 269 270 270 - if (port->type == HSR_PT_SLAVE_A) 271 - path_id = 0; 272 - else 273 - path_id = 1; 271 + if (port->hsr->prot_version) { 272 + if (port->type == HSR_PT_SLAVE_A) 273 + path_id = 0; 274 + else 275 + path_id = 1; 276 + } else { 277 + if (frame->is_supervision) 278 + path_id = 0xf; 279 + else 280 + path_id = 1; 281 + } 274 282 275 283 set_hsr_tag_path(&hsr_ethhdr->hsr_tag, path_id); 276 284 } ··· 312 304 else 313 305 hsr_ethhdr = (struct hsr_ethhdr *)pc; 314 306 315 - hsr_set_path_id(hsr_ethhdr, port); 307 + hsr_set_path_id(frame, hsr_ethhdr, port); 316 308 set_hsr_tag_LSDU_size(&hsr_ethhdr->hsr_tag, lsdu_size); 317 309 hsr_ethhdr->hsr_tag.sequence_nr = htons(frame->sequence_nr); 318 310 hsr_ethhdr->hsr_tag.encap_proto = hsr_ethhdr->ethhdr.h_proto; ··· 338 330 (struct hsr_ethhdr *)skb_mac_header(frame->skb_hsr); 339 331 340 332 /* set the lane id properly */ 341 - hsr_set_path_id(hsr_ethhdr, port); 333 + hsr_set_path_id(frame, hsr_ethhdr, port); 342 334 return skb_clone(frame->skb_hsr, GFP_ATOMIC); 343 335 } else if (port->dev->features & NETIF_F_HW_HSR_TAG_INS) { 344 336 return skb_clone(frame->skb_std, GFP_ATOMIC);

+5

net/ipv4/route.c

··· 607 607 oldest_p = fnhe_p; 608 608 } 609 609 } 610 + 611 + /* Clear oldest->fnhe_daddr to prevent this fnhe from being 612 + * rebound with new dsts in rt_bind_exception(). 613 + */ 614 + oldest->fnhe_daddr = 0; 610 615 fnhe_flush_routes(oldest); 611 616 *oldest_p = oldest->fnhe_next; 612 617 kfree_rcu(oldest, rcu);

+11 -3

net/mac80211/iface.c

··· 223 223 if (netif_carrier_ok(sdata->dev)) 224 224 return -EBUSY; 225 225 226 + /* if any stations are set known (so they know this vif too), reject */ 227 + if (sta_info_get_by_idx(sdata, 0)) 228 + return -EBUSY; 229 + 226 230 /* First check no ROC work is happening on this iface */ 227 231 list_for_each_entry(roc, &local->roc_list, list) { 228 232 if (roc->sdata != sdata) ··· 246 242 ret = -EBUSY; 247 243 } 248 244 245 + /* 246 + * More interface types could be added here but changing the 247 + * address while powered makes the most sense in client modes. 248 + */ 249 249 switch (sdata->vif.type) { 250 250 case NL80211_IFTYPE_STATION: 251 251 case NL80211_IFTYPE_P2P_CLIENT: 252 - /* More interface types could be added here but changing the 253 - * address while powered makes the most sense in client modes. 254 - */ 252 + /* refuse while connecting */ 253 + if (sdata->u.mgd.auth_data || sdata->u.mgd.assoc_data) 254 + return -EBUSY; 255 255 break; 256 256 default: 257 257 ret = -EOPNOTSUPP;

+7 -3

net/mac80211/rx.c

··· 5446 5446 if (WARN_ON(!local->started)) 5447 5447 goto drop; 5448 5448 5449 - if (likely(!(status->flag & RX_FLAG_FAILED_PLCP_CRC))) { 5449 + if (likely(!(status->flag & RX_FLAG_FAILED_PLCP_CRC) && 5450 + !(status->flag & RX_FLAG_NO_PSDU && 5451 + status->zero_length_psdu_type == 5452 + IEEE80211_RADIOTAP_ZERO_LEN_PSDU_NOT_CAPTURED))) { 5450 5453 /* 5451 - * Validate the rate, unless a PLCP error means that 5452 - * we probably can't have a valid rate here anyway. 5454 + * Validate the rate, unless there was a PLCP error which may 5455 + * have an invalid rate or the PSDU was not capture and may be 5456 + * missing rate information. 5453 5457 */ 5454 5458 5455 5459 switch (status->encoding) {

+7 -5

net/sched/act_connmark.c

··· 195 195 const struct tcf_connmark_info *ci = to_connmark(a); 196 196 unsigned char *b = skb_tail_pointer(skb); 197 197 const struct tcf_connmark_parms *parms; 198 - struct tc_connmark opt = { 199 - .index = ci->tcf_index, 200 - .refcnt = refcount_read(&ci->tcf_refcnt) - ref, 201 - .bindcnt = atomic_read(&ci->tcf_bindcnt) - bind, 202 - }; 198 + struct tc_connmark opt; 203 199 struct tcf_t t; 200 + 201 + memset(&opt, 0, sizeof(opt)); 202 + 203 + opt.index = ci->tcf_index; 204 + opt.refcnt = refcount_read(&ci->tcf_refcnt) - ref; 205 + opt.bindcnt = atomic_read(&ci->tcf_bindcnt) - bind; 204 206 205 207 rcu_read_lock(); 206 208 parms = rcu_dereference(ci->parms);

+7 -5

net/sched/act_ife.c

··· 644 644 unsigned char *b = skb_tail_pointer(skb); 645 645 struct tcf_ife_info *ife = to_ife(a); 646 646 struct tcf_ife_params *p; 647 - struct tc_ife opt = { 648 - .index = ife->tcf_index, 649 - .refcnt = refcount_read(&ife->tcf_refcnt) - ref, 650 - .bindcnt = atomic_read(&ife->tcf_bindcnt) - bind, 651 - }; 647 + struct tc_ife opt; 652 648 struct tcf_t t; 649 + 650 + memset(&opt, 0, sizeof(opt)); 651 + 652 + opt.index = ife->tcf_index, 653 + opt.refcnt = refcount_read(&ife->tcf_refcnt) - ref, 654 + opt.bindcnt = atomic_read(&ife->tcf_bindcnt) - bind, 653 655 654 656 spin_lock_bh(&ife->tcf_lock); 655 657 opt.action = ife->tcf_action;

+5

net/sched/sch_api.c

··· 1599 1599 NL_SET_ERR_MSG(extack, "Failed to find specified qdisc"); 1600 1600 return -ENOENT; 1601 1601 } 1602 + if (p->flags & TCQ_F_INGRESS) { 1603 + NL_SET_ERR_MSG(extack, 1604 + "Cannot add children to ingress/clsact qdisc"); 1605 + return -EOPNOTSUPP; 1606 + } 1602 1607 q = qdisc_leaf(p, clid, extack); 1603 1608 if (IS_ERR(q)) 1604 1609 return PTR_ERR(q);

+10 -7

net/sched/sch_generic.c

··· 180 180 static void try_bulk_dequeue_skb(struct Qdisc *q, 181 181 struct sk_buff *skb, 182 182 const struct netdev_queue *txq, 183 - int *packets) 183 + int *packets, int budget) 184 184 { 185 185 int bytelimit = qdisc_avail_bulklimit(txq) - skb->len; 186 + int cnt = 0; 186 187 187 188 while (bytelimit > 0) { 188 189 struct sk_buff *nskb = q->dequeue(q); ··· 194 193 bytelimit -= nskb->len; /* covers GSO len */ 195 194 skb->next = nskb; 196 195 skb = nskb; 197 - (*packets)++; /* GSO counts as one pkt */ 196 + if (++cnt >= budget) 197 + break; 198 198 } 199 + (*packets) += cnt; 199 200 skb_mark_not_on_list(skb); 200 201 } 201 202 ··· 231 228 * A requeued skb (via q->gso_skb) can also be a SKB list. 232 229 */ 233 230 static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, 234 - int *packets) 231 + int *packets, int budget) 235 232 { 236 233 const struct netdev_queue *txq = q->dev_queue; 237 234 struct sk_buff *skb = NULL; ··· 298 295 if (skb) { 299 296 bulk: 300 297 if (qdisc_may_bulk(q)) 301 - try_bulk_dequeue_skb(q, skb, txq, packets); 298 + try_bulk_dequeue_skb(q, skb, txq, packets, budget); 302 299 else 303 300 try_bulk_dequeue_skb_slow(q, skb, packets); 304 301 } ··· 390 387 * >0 - queue is not empty. 391 388 * 392 389 */ 393 - static inline bool qdisc_restart(struct Qdisc *q, int *packets) 390 + static inline bool qdisc_restart(struct Qdisc *q, int *packets, int budget) 394 391 { 395 392 spinlock_t *root_lock = NULL; 396 393 struct netdev_queue *txq; ··· 399 396 bool validate; 400 397 401 398 /* Dequeue packet */ 402 - skb = dequeue_skb(q, &validate, packets); 399 + skb = dequeue_skb(q, &validate, packets, budget); 403 400 if (unlikely(!skb)) 404 401 return false; 405 402 ··· 417 414 int quota = READ_ONCE(net_hotdata.dev_tx_weight); 418 415 int packets; 419 416 420 - while (qdisc_restart(q, &packets)) { 417 + while (qdisc_restart(q, &packets, quota)) { 421 418 quota -= packets; 422 419 if (quota <= 0) { 423 420 if (q->flags & TCQ_F_NOLOCK)

+9 -4

net/sctp/transport.c

··· 486 486 487 487 if (tp->rttvar || tp->srtt) { 488 488 struct net *net = tp->asoc->base.net; 489 + unsigned int rto_beta, rto_alpha; 489 490 /* 6.3.1 C3) When a new RTT measurement R' is made, set 490 491 * RTTVAR <- (1 - RTO.Beta) * RTTVAR + RTO.Beta * |SRTT - R'| 491 492 * SRTT <- (1 - RTO.Alpha) * SRTT + RTO.Alpha * R' ··· 498 497 * For example, assuming the default value of RTO.Alpha of 499 498 * 1/8, rto_alpha would be expressed as 3. 500 499 */ 501 - tp->rttvar = tp->rttvar - (tp->rttvar >> net->sctp.rto_beta) 502 - + (((__u32)abs((__s64)tp->srtt - (__s64)rtt)) >> net->sctp.rto_beta); 503 - tp->srtt = tp->srtt - (tp->srtt >> net->sctp.rto_alpha) 504 - + (rtt >> net->sctp.rto_alpha); 500 + rto_beta = READ_ONCE(net->sctp.rto_beta); 501 + if (rto_beta < 32) 502 + tp->rttvar = tp->rttvar - (tp->rttvar >> rto_beta) 503 + + (((__u32)abs((__s64)tp->srtt - (__s64)rtt)) >> rto_beta); 504 + rto_alpha = READ_ONCE(net->sctp.rto_alpha); 505 + if (rto_alpha < 32) 506 + tp->srtt = tp->srtt - (tp->srtt >> rto_alpha) 507 + + (rtt >> rto_alpha); 505 508 } else { 506 509 /* 6.3.1 C2) When the first RTT measurement R is made, set 507 510 * SRTT <- R, RTTVAR <- R/2.

+1

net/smc/smc_clc.c

··· 890 890 return SMC_CLC_DECL_CNFERR; 891 891 } 892 892 pclc_base->hdr.typev1 = SMC_TYPE_N; 893 + ini->smc_type_v1 = SMC_TYPE_N; 893 894 } else { 894 895 pclc_base->iparea_offset = htons(sizeof(*pclc_smcd)); 895 896 plen += sizeof(*pclc_prfx) +

+1 -1

net/strparser/strparser.c

··· 238 238 strp_parser_err(strp, -EMSGSIZE, desc); 239 239 break; 240 240 } else if (len <= (ssize_t)head->len - 241 - skb->len - stm->strp.offset) { 241 + (ssize_t)skb->len - stm->strp.offset) { 242 242 /* Length must be into new skb (and also 243 243 * greater than zero) 244 244 */

+1 -2

net/sunrpc/Kconfig

··· 18 18 19 19 config RPCSEC_GSS_KRB5 20 20 tristate "Secure RPC: Kerberos V mechanism" 21 - depends on SUNRPC 21 + depends on SUNRPC && CRYPTO 22 22 default y 23 23 select SUNRPC_GSS 24 - select CRYPTO 25 24 select CRYPTO_SKCIPHER 26 25 select CRYPTO_HASH 27 26 help

+2

net/tipc/net.c

··· 145 145 { 146 146 struct tipc_net *tn = container_of(work, struct tipc_net, work); 147 147 148 + rtnl_lock(); 148 149 tipc_net_finalize(tipc_link_net(tn->bcl), tn->trial_addr); 150 + rtnl_unlock(); 149 151 } 150 152 151 153 void tipc_net_stop(struct net *net)

+11 -3

net/unix/garbage.c

··· 145 145 }; 146 146 147 147 static unsigned long unix_vertex_unvisited_index = UNIX_VERTEX_INDEX_MARK1; 148 + static unsigned long unix_vertex_max_scc_index = UNIX_VERTEX_INDEX_START; 148 149 149 150 static void unix_add_edge(struct scm_fp_list *fpl, struct unix_edge *edge) 150 151 { ··· 154 153 if (!vertex) { 155 154 vertex = list_first_entry(&fpl->vertices, typeof(*vertex), entry); 156 155 vertex->index = unix_vertex_unvisited_index; 156 + vertex->scc_index = ++unix_vertex_max_scc_index; 157 157 vertex->out_degree = 0; 158 158 INIT_LIST_HEAD(&vertex->edges); 159 159 INIT_LIST_HEAD(&vertex->scc_entry); ··· 491 489 scc_dead = unix_vertex_dead(v); 492 490 } 493 491 494 - if (scc_dead) 492 + if (scc_dead) { 495 493 unix_collect_skb(&scc, hitlist); 496 - else if (!unix_graph_maybe_cyclic) 497 - unix_graph_maybe_cyclic = unix_scc_cyclic(&scc); 494 + } else { 495 + if (unix_vertex_max_scc_index < vertex->scc_index) 496 + unix_vertex_max_scc_index = vertex->scc_index; 497 + 498 + if (!unix_graph_maybe_cyclic) 499 + unix_graph_maybe_cyclic = unix_scc_cyclic(&scc); 500 + } 498 501 499 502 list_del(&scc); 500 503 } ··· 514 507 unsigned long last_index = UNIX_VERTEX_INDEX_START; 515 508 516 509 unix_graph_maybe_cyclic = false; 510 + unix_vertex_max_scc_index = UNIX_VERTEX_INDEX_START; 517 511 518 512 /* Visit every vertex exactly once. 519 513 * __unix_walk_scc() moves visited vertices to unix_visited_vertices.

+1 -1

rust/Makefile

··· 298 298 -fno-inline-functions-called-once -fsanitize=bounds-strict \ 299 299 -fstrict-flex-arrays=% -fmin-function-alignment=% \ 300 300 -fzero-init-padding-bits=% -mno-fdpic \ 301 - --param=% --param asan-% 301 + --param=% --param asan-% -fno-isolate-erroneous-paths-dereference 302 302 303 303 # Derived from `scripts/Makefile.clang`. 304 304 BINDGEN_TARGET_x86 := x86_64-linux-gnu

+1 -1

scripts/Makefile.build

··· 167 167 endif 168 168 169 169 ifneq ($(KBUILD_EXTRA_WARN),) 170 - cmd_checkdoc = PYTHONDONTWRITEBYTECODE=1 $(KERNELDOC) -none $(KDOCFLAGS) \ 170 + cmd_checkdoc = PYTHONDONTWRITEBYTECODE=1 $(PYTHON3) $(KERNELDOC) -none $(KDOCFLAGS) \ 171 171 $(if $(findstring 2, $(KBUILD_EXTRA_WARN)), -Wall) \ 172 172 $< 173 173 endif

+14 -1

scripts/Makefile.vmlinux

··· 102 102 # modules.builtin.modinfo 103 103 # --------------------------------------------------------------------------- 104 104 105 + # .modinfo in vmlinux.unstripped is aligned to 8 bytes for compatibility with 106 + # tools that expect vmlinux to have sufficiently aligned sections but the 107 + # additional bytes used for padding .modinfo to satisfy this requirement break 108 + # certain versions of kmod with 109 + # 110 + # depmod: ERROR: kmod_builtin_iter_next: unexpected string without modname prefix 111 + # 112 + # Strip the trailing padding bytes after extracting .modinfo to comply with 113 + # what kmod expects to parse. 114 + quiet_cmd_modules_builtin_modinfo = GEN $@ 115 + cmd_modules_builtin_modinfo = $(cmd_objcopy); \ 116 + sed -i 's/\x00\+$$/\x00/g' $@ 117 + 105 118 OBJCOPYFLAGS_modules.builtin.modinfo := -j .modinfo -O binary 106 119 107 120 targets += modules.builtin.modinfo 108 121 modules.builtin.modinfo: vmlinux.unstripped FORCE 109 - $(call if_changed,objcopy) 122 + $(call if_changed,modules_builtin_modinfo) 110 123 111 124 # modules.builtin 112 125 # ---------------------------------------------------------------------------

+8 -6

scripts/decode_stacktrace.sh

··· 277 277 fi 278 278 done 279 279 280 - if [[ ${words[$last]} =~ ^[0-9a-f]+\] ]]; then 281 - words[$last-1]="${words[$last-1]} ${words[$last]}" 282 - unset words[$last] spaces[$last] 283 - last=$(( $last - 1 )) 284 - fi 285 - 286 280 # Extract info after the symbol if present. E.g.: 287 281 # func_name+0x54/0x80 (P) 288 282 # ^^^ ··· 285 291 local info_str="" 286 292 if [[ ${words[$last]} =~ $[A-Z]*$ ]]; then 287 293 info_str=${words[$last]} 294 + unset words[$last] spaces[$last] 295 + last=$(( $last - 1 )) 296 + fi 297 + 298 + # Join module name with its build id if present, as these were 299 + # split during tokenization (e.g. "[module" and "modbuildid]"). 300 + if [[ ${words[$last]} =~ ^[0-9a-f]+\] ]]; then 301 + words[$last-1]="${words[$last-1]} ${words[$last]}" 288 302 unset words[$last] spaces[$last] 289 303 last=$(( $last - 1 )) 290 304 fi

+5

tools/arch/x86/include/asm/cpufeatures.h

··· 444 444 #define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* VM Page Flush MSR is supported */ 445 445 #define X86_FEATURE_SEV_ES (19*32+ 3) /* "sev_es" Secure Encrypted Virtualization - Encrypted State */ 446 446 #define X86_FEATURE_SEV_SNP (19*32+ 4) /* "sev_snp" Secure Encrypted Virtualization - Secure Nested Paging */ 447 + #define X86_FEATURE_SNP_SECURE_TSC (19*32+ 8) /* SEV-SNP Secure TSC */ 447 448 #define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* Virtual TSC_AUX */ 448 449 #define X86_FEATURE_SME_COHERENT (19*32+10) /* hardware-enforced cache coherency */ 449 450 #define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" SEV-ES full debug state swap support */ ··· 496 495 #define X86_FEATURE_TSA_SQ_NO (21*32+11) /* AMD CPU not vulnerable to TSA-SQ */ 497 496 #define X86_FEATURE_TSA_L1_NO (21*32+12) /* AMD CPU not vulnerable to TSA-L1 */ 498 497 #define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* Clear CPU buffers using VERW before VMRUN */ 498 + #define X86_FEATURE_IBPB_EXIT_TO_USER (21*32+14) /* Use IBPB on exit-to-userspace, see VMSCAPE bug */ 499 + #define X86_FEATURE_ABMC (21*32+15) /* Assignable Bandwidth Monitoring Counters */ 500 + #define X86_FEATURE_MSR_IMM (21*32+16) /* MSR immediate form instructions */ 499 501 500 502 /* 501 503 * BUG word(s) ··· 555 551 #define X86_BUG_ITS X86_BUG( 1*32+ 7) /* "its" CPU is affected by Indirect Target Selection */ 556 552 #define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */ 557 553 #define X86_BUG_TSA X86_BUG( 1*32+ 9) /* "tsa" CPU is affected by Transient Scheduler Attacks */ 554 + #define X86_BUG_VMSCAPE X86_BUG( 1*32+10) /* "vmscape" CPU is affected by VMSCAPE attacks from guests */ 558 555 #endif /* _ASM_X86_CPUFEATURES_H */

+19 -1

tools/arch/x86/include/asm/msr-index.h

··· 315 315 #define PERF_CAP_PT_IDX 16 316 316 317 317 #define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6 318 + 319 + #define PERF_CAP_LBR_FMT 0x3f 318 320 #define PERF_CAP_PEBS_TRAP BIT_ULL(6) 319 321 #define PERF_CAP_ARCH_REG BIT_ULL(7) 320 322 #define PERF_CAP_PEBS_FORMAT 0xf00 323 + #define PERF_CAP_FW_WRITES BIT_ULL(13) 321 324 #define PERF_CAP_PEBS_BASELINE BIT_ULL(14) 322 325 #define PERF_CAP_PEBS_TIMING_INFO BIT_ULL(17) 323 326 #define PERF_CAP_PEBS_MASK (PERF_CAP_PEBS_TRAP | PERF_CAP_ARCH_REG | \ ··· 636 633 #define MSR_AMD_PPIN 0xc00102f1 637 634 #define MSR_AMD64_CPUID_FN_7 0xc0011002 638 635 #define MSR_AMD64_CPUID_FN_1 0xc0011004 636 + 637 + #define MSR_AMD64_CPUID_EXT_FEAT 0xc0011005 638 + #define MSR_AMD64_CPUID_EXT_FEAT_TOPOEXT_BIT 54 639 + #define MSR_AMD64_CPUID_EXT_FEAT_TOPOEXT BIT_ULL(MSR_AMD64_CPUID_EXT_FEAT_TOPOEXT_BIT) 640 + 639 641 #define MSR_AMD64_LS_CFG 0xc0011020 640 642 #define MSR_AMD64_DC_CFG 0xc0011022 641 643 #define MSR_AMD64_TW_CFG 0xc0011023 ··· 709 701 #define MSR_AMD64_SNP_VMSA_REG_PROT BIT_ULL(MSR_AMD64_SNP_VMSA_REG_PROT_BIT) 710 702 #define MSR_AMD64_SNP_SMT_PROT_BIT 17 711 703 #define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT) 712 - #define MSR_AMD64_SNP_RESV_BIT 18 704 + #define MSR_AMD64_SNP_SECURE_AVIC_BIT 18 705 + #define MSR_AMD64_SNP_SECURE_AVIC BIT_ULL(MSR_AMD64_SNP_SECURE_AVIC_BIT) 706 + #define MSR_AMD64_SNP_RESV_BIT 19 713 707 #define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT) 708 + #define MSR_AMD64_SAVIC_CONTROL 0xc0010138 709 + #define MSR_AMD64_SAVIC_EN_BIT 0 710 + #define MSR_AMD64_SAVIC_EN BIT_ULL(MSR_AMD64_SAVIC_EN_BIT) 711 + #define MSR_AMD64_SAVIC_ALLOWEDNMI_BIT 1 712 + #define MSR_AMD64_SAVIC_ALLOWEDNMI BIT_ULL(MSR_AMD64_SAVIC_ALLOWEDNMI_BIT) 714 713 #define MSR_AMD64_RMP_BASE 0xc0010132 715 714 #define MSR_AMD64_RMP_END 0xc0010133 716 715 #define MSR_AMD64_RMP_CFG 0xc0010136 ··· 750 735 #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300 751 736 #define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301 752 737 #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302 738 + #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET 0xc0000303 753 739 754 740 /* AMD Hardware Feedback Support MSRs */ 755 741 #define MSR_AMD_WORKLOAD_CLASS_CONFIG 0xc0000500 ··· 1241 1225 /* - AMD: */ 1242 1226 #define MSR_IA32_MBA_BW_BASE 0xc0000200 1243 1227 #define MSR_IA32_SMBA_BW_BASE 0xc0000280 1228 + #define MSR_IA32_L3_QOS_ABMC_CFG 0xc00003fd 1229 + #define MSR_IA32_L3_QOS_EXT_CFG 0xc00003ff 1244 1230 #define MSR_IA32_EVT_CFG_BASE 0xc0000400 1245 1231 1246 1232 /* AMD-V MSRs */

+34

tools/arch/x86/include/uapi/asm/kvm.h

··· 35 35 #define MC_VECTOR 18 36 36 #define XM_VECTOR 19 37 37 #define VE_VECTOR 20 38 + #define CP_VECTOR 21 39 + 40 + #define HV_VECTOR 28 41 + #define VC_VECTOR 29 42 + #define SX_VECTOR 30 38 43 39 44 /* Select x86 specific features in <linux/kvm.h> */ 40 45 #define __KVM_HAVE_PIT ··· 415 410 struct kvm_xcr xcrs[KVM_MAX_XCRS]; 416 411 __u64 padding[16]; 417 412 }; 413 + 414 + #define KVM_X86_REG_TYPE_MSR 2 415 + #define KVM_X86_REG_TYPE_KVM 3 416 + 417 + #define KVM_X86_KVM_REG_SIZE(reg) \ 418 + ({ \ 419 + reg == KVM_REG_GUEST_SSP ? KVM_REG_SIZE_U64 : 0; \ 420 + }) 421 + 422 + #define KVM_X86_REG_TYPE_SIZE(type, reg) \ 423 + ({ \ 424 + __u64 type_size = (__u64)type << 32; \ 425 + \ 426 + type_size |= type == KVM_X86_REG_TYPE_MSR ? KVM_REG_SIZE_U64 : \ 427 + type == KVM_X86_REG_TYPE_KVM ? KVM_X86_KVM_REG_SIZE(reg) : \ 428 + 0; \ 429 + type_size; \ 430 + }) 431 + 432 + #define KVM_X86_REG_ID(type, index) \ 433 + (KVM_REG_X86 | KVM_X86_REG_TYPE_SIZE(type, index) | index) 434 + 435 + #define KVM_X86_REG_MSR(index) \ 436 + KVM_X86_REG_ID(KVM_X86_REG_TYPE_MSR, index) 437 + #define KVM_X86_REG_KVM(index) \ 438 + KVM_X86_REG_ID(KVM_X86_REG_TYPE_KVM, index) 439 + 440 + /* KVM-defined registers starting from 0 */ 441 + #define KVM_REG_GUEST_SSP 0 418 442 419 443 #define KVM_SYNC_X86_REGS (1UL << 0) 420 444 #define KVM_SYNC_X86_SREGS (1UL << 1)

+4

tools/arch/x86/include/uapi/asm/svm.h

··· 118 118 #define SVM_VMGEXIT_AP_CREATE 1 119 119 #define SVM_VMGEXIT_AP_DESTROY 2 120 120 #define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018 121 + #define SVM_VMGEXIT_SAVIC 0x8000001a 122 + #define SVM_VMGEXIT_SAVIC_REGISTER_GPA 0 123 + #define SVM_VMGEXIT_SAVIC_UNREGISTER_GPA 1 124 + #define SVM_VMGEXIT_SAVIC_SELF_GPA ~0ULL 121 125 #define SVM_VMGEXIT_HV_FEATURES 0x8000fffd 122 126 #define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe 123 127 #define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code) \

+5 -1

tools/arch/x86/include/uapi/asm/vmx.h

··· 94 94 #define EXIT_REASON_BUS_LOCK 74 95 95 #define EXIT_REASON_NOTIFY 75 96 96 #define EXIT_REASON_TDCALL 77 97 + #define EXIT_REASON_MSR_READ_IMM 84 98 + #define EXIT_REASON_MSR_WRITE_IMM 85 97 99 98 100 #define VMX_EXIT_REASONS \ 99 101 { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ ··· 160 158 { EXIT_REASON_TPAUSE, "TPAUSE" }, \ 161 159 { EXIT_REASON_BUS_LOCK, "BUS_LOCK" }, \ 162 160 { EXIT_REASON_NOTIFY, "NOTIFY" }, \ 163 - { EXIT_REASON_TDCALL, "TDCALL" } 161 + { EXIT_REASON_TDCALL, "TDCALL" }, \ 162 + { EXIT_REASON_MSR_READ_IMM, "MSR_READ_IMM" }, \ 163 + { EXIT_REASON_MSR_WRITE_IMM, "MSR_WRITE_IMM" } 164 164 165 165 #define VMX_EXIT_REASON_FLAGS \ 166 166 { VMX_EXIT_REASONS_FAILED_VMENTRY, "FAILED_VMENTRY" }

+1 -1

tools/include/asm-generic/bitops/__fls.h

··· 10 10 * 11 11 * Undefined if no set bit exists, so code should check against 0 first. 12 12 */ 13 - static __always_inline unsigned int generic___fls(unsigned long word) 13 + static __always_inline __attribute_const__ unsigned int generic___fls(unsigned long word) 14 14 { 15 15 unsigned int num = BITS_PER_LONG - 1; 16 16

+1 -1

tools/include/asm-generic/bitops/fls.h

··· 10 10 * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32. 11 11 */ 12 12 13 - static __always_inline int generic_fls(unsigned int x) 13 + static __always_inline __attribute_const__ int generic_fls(unsigned int x) 14 14 { 15 15 int r = 32; 16 16

+2 -2

tools/include/asm-generic/bitops/fls64.h

··· 16 16 * at position 64. 17 17 */ 18 18 #if BITS_PER_LONG == 32 19 - static __always_inline int fls64(__u64 x) 19 + static __always_inline __attribute_const__ int fls64(__u64 x) 20 20 { 21 21 __u32 h = x >> 32; 22 22 if (h) ··· 24 24 return fls(x); 25 25 } 26 26 #elif BITS_PER_LONG == 64 27 - static __always_inline int fls64(__u64 x) 27 + static __always_inline __attribute_const__ int fls64(__u64 x) 28 28 { 29 29 if (x == 0) 30 30 return 0;

+51 -12

tools/include/uapi/drm/drm.h

··· 597 597 int drm_dd_minor; 598 598 }; 599 599 600 - /* DRM_IOCTL_GEM_CLOSE ioctl argument type */ 600 + /** 601 + * struct drm_gem_close - Argument for &DRM_IOCTL_GEM_CLOSE ioctl. 602 + * @handle: Handle of the object to be closed. 603 + * @pad: Padding. 604 + * 605 + * Releases the handle to an mm object. 606 + */ 601 607 struct drm_gem_close { 602 - /** Handle of the object to be closed. */ 603 608 __u32 handle; 604 609 __u32 pad; 605 610 }; 606 611 607 - /* DRM_IOCTL_GEM_FLINK ioctl argument type */ 612 + /** 613 + * struct drm_gem_flink - Argument for &DRM_IOCTL_GEM_FLINK ioctl. 614 + * @handle: Handle for the object being named. 615 + * @name: Returned global name. 616 + * 617 + * Create a global name for an object, returning the name. 618 + * 619 + * Note that the name does not hold a reference; when the object 620 + * is freed, the name goes away. 621 + */ 608 622 struct drm_gem_flink { 609 - /** Handle for the object being named */ 610 623 __u32 handle; 611 - 612 - /** Returned global name */ 613 624 __u32 name; 614 625 }; 615 626 616 - /* DRM_IOCTL_GEM_OPEN ioctl argument type */ 627 + /** 628 + * struct drm_gem_open - Argument for &DRM_IOCTL_GEM_OPEN ioctl. 629 + * @name: Name of object being opened. 630 + * @handle: Returned handle for the object. 631 + * @size: Returned size of the object 632 + * 633 + * Open an object using the global name, returning a handle and the size. 634 + * 635 + * This handle (of course) holds a reference to the object, so the object 636 + * will not go away until the handle is deleted. 637 + */ 617 638 struct drm_gem_open { 618 - /** Name of object being opened */ 619 639 __u32 name; 620 - 621 - /** Returned handle for the object */ 622 640 __u32 handle; 623 - 624 - /** Returned size of the object */ 625 641 __u64 size; 642 + }; 643 + 644 + /** 645 + * struct drm_gem_change_handle - Argument for &DRM_IOCTL_GEM_CHANGE_HANDLE ioctl. 646 + * @handle: The handle of a gem object. 647 + * @new_handle: An available gem handle. 648 + * 649 + * This ioctl changes the handle of a GEM object to the specified one. 650 + * The new handle must be unused. On success the old handle is closed 651 + * and all further IOCTL should refer to the new handle only. 652 + * Calls to DRM_IOCTL_PRIME_FD_TO_HANDLE will return the new handle. 653 + */ 654 + struct drm_gem_change_handle { 655 + __u32 handle; 656 + __u32 new_handle; 626 657 }; 627 658 628 659 /** ··· 1339 1308 * The call will fail if the name contains whitespaces or non-printable chars. 1340 1309 */ 1341 1310 #define DRM_IOCTL_SET_CLIENT_NAME DRM_IOWR(0xD1, struct drm_set_client_name) 1311 + 1312 + /** 1313 + * DRM_IOCTL_GEM_CHANGE_HANDLE - Move an object to a different handle 1314 + * 1315 + * Some applications (notably CRIU) need objects to have specific gem handles. 1316 + * This ioctl changes the object at one gem handle to use a new gem handle. 1317 + */ 1318 + #define DRM_IOCTL_GEM_CHANGE_HANDLE DRM_IOWR(0xD2, struct drm_gem_change_handle) 1342 1319 1343 1320 /* 1344 1321 * Device specific ioctls should only be in their respective headers

+3

tools/include/uapi/linux/kvm.h

··· 962 962 #define KVM_CAP_ARM_EL2_E2H0 241 963 963 #define KVM_CAP_RISCV_MP_STATE_RESET 242 964 964 #define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243 965 + #define KVM_CAP_GUEST_MEMFD_FLAGS 244 965 966 966 967 struct kvm_irq_routing_irqchip { 967 968 __u32 irqchip; ··· 1599 1598 #define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3) 1600 1599 1601 1600 #define KVM_CREATE_GUEST_MEMFD _IOWR(KVMIO, 0xd4, struct kvm_create_guest_memfd) 1601 + #define GUEST_MEMFD_FLAG_MMAP (1ULL << 0) 1602 + #define GUEST_MEMFD_FLAG_INIT_SHARED (1ULL << 1) 1602 1603 1603 1604 struct kvm_create_guest_memfd { 1604 1605 __u64 size;

+12

tools/net/ynl/pyynl/ynl_gen_c.py

··· 861 861 return [f"{member} = {self.c_name};", 862 862 f"{presence} = n_{self.c_name};"] 863 863 864 + def free_needs_iter(self): 865 + return self.sub_type == 'nest' 866 + 867 + def _free_lines(self, ri, var, ref): 868 + lines = [] 869 + if self.sub_type == 'nest': 870 + lines += [ 871 + f"for (i = 0; i < {var}->{ref}_count.{self.c_name}; i++)", 872 + f'{self.nested_render_name}_free(&{var}->{ref}{self.c_name}[i]);', 873 + ] 874 + lines += f"free({var}->{ref}{self.c_name});", 875 + return lines 864 876 865 877 class TypeNestTypeValue(Type): 866 878 def _complex_member_type(self, ri):

+1

tools/perf/arch/x86/entry/syscalls/syscall_64.tbl

··· 345 345 333 common io_pgetevents sys_io_pgetevents 346 346 334 common rseq sys_rseq 347 347 335 common uretprobe sys_uretprobe 348 + 336 common uprobe sys_uprobe 348 349 # don't use numbers 387 through 423, add new calls after the last 349 350 # 'common' entry 350 351 424 common pidfd_send_signal sys_pidfd_send_signal

+1

tools/perf/trace/beauty/include/uapi/linux/fcntl.h

··· 111 111 #define PIDFD_SELF_THREAD_GROUP -10001 /* Current thread group leader. */ 112 112 113 113 #define FD_PIDFS_ROOT -10002 /* Root of the pidfs filesystem */ 114 + #define FD_NSFS_ROOT -10003 /* Root of the nsfs filesystem */ 114 115 #define FD_INVALID -10009 /* Invalid file descriptor: -10000 - EBADF = -10009 */ 115 116 116 117 /* Generic flags for the *at(2) family of syscalls. */

+4 -1

tools/perf/trace/beauty/include/uapi/linux/fs.h

··· 430 430 /* buffered IO that drops the cache after reading or writing data */ 431 431 #define RWF_DONTCACHE ((__force __kernel_rwf_t)0x00000080) 432 432 433 + /* prevent pipe and socket writes from raising SIGPIPE */ 434 + #define RWF_NOSIGNAL ((__force __kernel_rwf_t)0x00000100) 435 + 433 436 /* mask of flags supported by the kernel */ 434 437 #define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\ 435 438 RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC |\ 436 - RWF_DONTCACHE) 439 + RWF_DONTCACHE | RWF_NOSIGNAL) 437 440 438 441 #define PROCFS_IOCTL_MAGIC 'f' 439 442

+10

tools/perf/trace/beauty/include/uapi/linux/prctl.h

··· 177 177 178 178 #define PR_GET_TID_ADDRESS 40 179 179 180 + /* 181 + * Flags for PR_SET_THP_DISABLE are only applicable when disabling. Bit 0 182 + * is reserved, so PR_GET_THP_DISABLE can return "1 | flags", to effectively 183 + * return "1" when no flags were specified for PR_SET_THP_DISABLE. 184 + */ 180 185 #define PR_SET_THP_DISABLE 41 186 + /* 187 + * Don't disable THPs when explicitly advised (e.g., MADV_HUGEPAGE / 188 + * VM_HUGEPAGE, MADV_COLLAPSE). 189 + */ 190 + # define PR_THP_DISABLE_EXCEPT_ADVISED (1 << 1) 181 191 #define PR_GET_THP_DISABLE 42 182 192 183 193 /*

+5 -1

tools/perf/util/symbol.c

··· 112 112 // 'N' first seen in: 113 113 // ffffffff9b35d130 N __pfx__RNCINvNtNtNtCsbDUBuN8AbD4_4core4iter8adapters3map12map_try_foldjNtCs6vVzKs5jPr6_12drm_panic_qr7VersionuINtNtNtBa_3ops12control_flow11ControlFlowB10_ENcB10_0NCINvNvNtNtNtB8_6traits8iterator8Iterator4find5checkB10_NCNvMB12_B10_13from_segments0E0E0B12_ 114 114 // a seemingly Rust mangled name 115 + // Ditto for '1': 116 + // root@x1:~# grep ' 1 ' /proc/kallsyms 117 + // ffffffffb098bc00 1 __pfx__RNCINvNtNtNtCsfwaGRd4cjqE_4core4iter8adapters3map12map_try_foldjNtCskFudTml27HW_12drm_panic_qr7VersionuINtNtNtBa_3ops12control_flow11ControlFlowB10_ENcB10_0NCINvNvNtNtNtB8_6traits8iterator8Iterator4find5checkB10_NCNvMB12_B10_13from_segments0E0E0B12_ 118 + // ffffffffb098bc10 1 _RNCINvNtNtNtCsfwaGRd4cjqE_4core4iter8adapters3map12map_try_foldjNtCskFudTml27HW_12drm_panic_qr7VersionuINtNtNtBa_3ops12control_flow11ControlFlowB10_ENcB10_0NCINvNvNtNtNtB8_6traits8iterator8Iterator4find5checkB10_NCNvMB12_B10_13from_segments0E0E0B12_ 115 119 char symbol_type = toupper(__symbol_type); 116 120 return symbol_type == 'T' || symbol_type == 'W' || symbol_type == 'D' || symbol_type == 'B' || 117 - __symbol_type == 'u' || __symbol_type == 'l' || __symbol_type == 'N'; 121 + __symbol_type == 'u' || __symbol_type == 'l' || __symbol_type == 'N' || __symbol_type == '1'; 118 122 } 119 123 120 124 static int prefix_underscores_count(const char *str)

+1

tools/testing/selftests/drivers/net/Makefile

··· 18 18 netcons_fragmented_msg.sh \ 19 19 netcons_overflow.sh \ 20 20 netcons_sysdata.sh \ 21 + netcons_torture.sh \ 21 22 netpoll_basic.py \ 22 23 ping.py \ 23 24 psp.py \

+2

tools/testing/selftests/drivers/net/bonding/Makefile

··· 14 14 dev_addr_lists.sh \ 15 15 mode-1-recovery-updelay.sh \ 16 16 mode-2-recovery-updelay.sh \ 17 + netcons_over_bonding.sh \ 17 18 # end of TEST_PROGS 18 19 19 20 TEST_FILES := \ ··· 25 24 26 25 TEST_INCLUDES := \ 27 26 ../../../net/lib.sh \ 27 + ../lib/sh/lib_netcons.sh \ 28 28 ../../../net/forwarding/lib.sh \ 29 29 # end of TEST_INCLUDES 30 30

+4

tools/testing/selftests/drivers/net/bonding/config

··· 1 1 CONFIG_BONDING=y 2 2 CONFIG_BRIDGE=y 3 + CONFIG_CONFIGFS_FS=y 3 4 CONFIG_DUMMY=y 4 5 CONFIG_INET_ESP=y 5 6 CONFIG_INET_ESP_OFFLOAD=y ··· 10 9 CONFIG_NET_ACT_GACT=y 11 10 CONFIG_NET_CLS_FLOWER=y 12 11 CONFIG_NET_CLS_MATCHALL=m 12 + CONFIG_NETCONSOLE=m 13 + CONFIG_NETCONSOLE_DYNAMIC=y 14 + CONFIG_NETCONSOLE_EXTENDED_LOG=y 13 15 CONFIG_NETDEVSIM=m 14 16 CONFIG_NET_SCH_INGRESS=y 15 17 CONFIG_NLMON=y

+361

tools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh

··· 1 + #!/usr/bin/env bash 2 + # SPDX-License-Identifier: GPL-2.0 3 + # 4 + # This selftest exercises trying to have multiple netpoll users at the same 5 + # time. 6 + # 7 + # This selftest has multiple smalls test inside, and the goal is to 8 + # get interfaces with bonding and netconsole in different orders in order 9 + # to catch any possible issue. 10 + # 11 + # The main test composes of four interfaces being created using netdevsim; two 12 + # of them are bonded to serve as the netconsole's transmit interface. The 13 + # remaining two interfaces are similarly bonded and assigned to a separate 14 + # network namespace, which acts as the receive interface, where socat monitors 15 + # for incoming messages. 16 + # 17 + # A netconsole message is then sent to ensure it is properly received across 18 + # this configuration. 19 + # 20 + # Later, run a few other tests, to make sure that bonding and netconsole 21 + # cannot coexist. 22 + # 23 + # The test's objective is to exercise netpoll usage when managed simultaneously 24 + # by multiple subsystems (netconsole and bonding). 25 + # 26 + # Author: Breno Leitao <leitao@debian.org> 27 + 28 + set -euo pipefail 29 + 30 + SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") 31 + 32 + source "${SCRIPTDIR}"/../lib/sh/lib_netcons.sh 33 + 34 + modprobe netdevsim 2> /dev/null || true 35 + modprobe netconsole 2> /dev/null || true 36 + modprobe bonding 2> /dev/null || true 37 + modprobe veth 2> /dev/null || true 38 + 39 + # The content of kmsg will be save to the following file 40 + OUTPUT_FILE="/tmp/${TARGET}" 41 + 42 + # Check for basic system dependency and exit if not found 43 + check_for_dependencies 44 + # Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) 45 + echo "6 5" > /proc/sys/kernel/printk 46 + # Remove the namespace, interfaces and netconsole target on exit 47 + trap cleanup_bond EXIT 48 + 49 + FORMAT="extended" 50 + IP_VERSION="ipv4" 51 + VETH0="veth"$(( RANDOM % 256)) 52 + VETH1="veth"$((256 + RANDOM % 256)) 53 + TXNS="" 54 + RXNS="" 55 + 56 + # Create "bond_tx_XX" and "bond_rx_XX" interfaces, and set DSTIF and SRCIF with 57 + # the bonding interfaces 58 + function setup_bonding_ifaces() { 59 + local RAND=$(( RANDOM % 100 )) 60 + BOND_TX_MAIN_IF="bond_tx_$RAND" 61 + BOND_RX_MAIN_IF="bond_rx_$RAND" 62 + 63 + # Setup TX 64 + if ! ip -n "${TXNS}" link add "${BOND_TX_MAIN_IF}" type bond mode balance-rr 65 + then 66 + echo "Failed to create bond TX interface. Is CONFIG_BONDING set?" >&2 67 + # only clean nsim ifaces and namespace. Nothing else has been 68 + # initialized 69 + cleanup_bond_nsim 70 + trap - EXIT 71 + exit "${ksft_skip}" 72 + fi 73 + 74 + # create_netdevsim() got the interface up, but it needs to be down 75 + # before being enslaved. 76 + ip -n "${TXNS}" \ 77 + link set "${BOND_TX1_SLAVE_IF}" down 78 + ip -n "${TXNS}" \ 79 + link set "${BOND_TX2_SLAVE_IF}" down 80 + ip -n "${TXNS}" \ 81 + link set "${BOND_TX1_SLAVE_IF}" master "${BOND_TX_MAIN_IF}" 82 + ip -n "${TXNS}" \ 83 + link set "${BOND_TX2_SLAVE_IF}" master "${BOND_TX_MAIN_IF}" 84 + ip -n "${TXNS}" \ 85 + link set "${BOND_TX_MAIN_IF}" up 86 + 87 + # Setup RX 88 + ip -n "${RXNS}" \ 89 + link add "${BOND_RX_MAIN_IF}" type bond mode balance-rr 90 + ip -n "${RXNS}" \ 91 + link set "${BOND_RX1_SLAVE_IF}" down 92 + ip -n "${RXNS}" \ 93 + link set "${BOND_RX2_SLAVE_IF}" down 94 + ip -n "${RXNS}" \ 95 + link set "${BOND_RX1_SLAVE_IF}" master "${BOND_RX_MAIN_IF}" 96 + ip -n "${RXNS}" \ 97 + link set "${BOND_RX2_SLAVE_IF}" master "${BOND_RX_MAIN_IF}" 98 + ip -n "${RXNS}" \ 99 + link set "${BOND_RX_MAIN_IF}" up 100 + 101 + export DSTIF="${BOND_RX_MAIN_IF}" 102 + export SRCIF="${BOND_TX_MAIN_IF}" 103 + } 104 + 105 + # Create 4 netdevsim interfaces. Two of them will be bound to TX bonding iface 106 + # and the other two will be bond to the RX interface (on the other namespace) 107 + function create_ifaces_bond() { 108 + BOND_TX1_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_TX_1}" "${TXNS}") 109 + BOND_TX2_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_TX_2}" "${TXNS}") 110 + BOND_RX1_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_RX_1}" "${RXNS}") 111 + BOND_RX2_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_RX_2}" "${RXNS}") 112 + } 113 + 114 + # netdevsim link BOND_TX to BOND_RX interfaces 115 + function link_ifaces_bond() { 116 + local BOND_TX1_SLAVE_IFIDX 117 + local BOND_TX2_SLAVE_IFIDX 118 + local BOND_RX1_SLAVE_IFIDX 119 + local BOND_RX2_SLAVE_IFIDX 120 + local TXNS_FD 121 + local RXNS_FD 122 + 123 + BOND_TX1_SLAVE_IFIDX=$(ip netns exec "${TXNS}" \ 124 + cat /sys/class/net/"$BOND_TX1_SLAVE_IF"/ifindex) 125 + BOND_TX2_SLAVE_IFIDX=$(ip netns exec "${TXNS}" \ 126 + cat /sys/class/net/"$BOND_TX2_SLAVE_IF"/ifindex) 127 + BOND_RX1_SLAVE_IFIDX=$(ip netns exec "${RXNS}" \ 128 + cat /sys/class/net/"$BOND_RX1_SLAVE_IF"/ifindex) 129 + BOND_RX2_SLAVE_IFIDX=$(ip netns exec "${RXNS}" \ 130 + cat /sys/class/net/"$BOND_RX2_SLAVE_IF"/ifindex) 131 + 132 + exec {TXNS_FD}</var/run/netns/"${TXNS}" 133 + exec {RXNS_FD}</var/run/netns/"${RXNS}" 134 + 135 + # Linking TX ifaces to the RX ones (on the other namespace) 136 + echo "${TXNS_FD}:$BOND_TX1_SLAVE_IFIDX $RXNS_FD:$BOND_RX1_SLAVE_IFIDX" \ 137 + > "$NSIM_DEV_SYS_LINK" 138 + echo "${TXNS_FD}:$BOND_TX2_SLAVE_IFIDX $RXNS_FD:$BOND_RX2_SLAVE_IFIDX" \ 139 + > "$NSIM_DEV_SYS_LINK" 140 + 141 + exec {TXNS_FD}<&- 142 + exec {RXNS_FD}<&- 143 + } 144 + 145 + function create_all_ifaces() { 146 + # setup_ns function is coming from lib.sh 147 + setup_ns TXNS RXNS 148 + export NAMESPACE="${RXNS}" 149 + 150 + # Create two interfaces for RX and two for TX 151 + create_ifaces_bond 152 + # Link netlink ifaces 153 + link_ifaces_bond 154 + } 155 + 156 + # configure DSTIF and SRCIF IPs 157 + function configure_ifaces_ips() { 158 + local IP_VERSION=${1:-"ipv4"} 159 + select_ipv4_or_ipv6 "${IP_VERSION}" 160 + 161 + ip -n "${RXNS}" addr add "${DSTIP}"/24 dev "${DSTIF}" 162 + ip -n "${RXNS}" link set "${DSTIF}" up 163 + 164 + ip -n "${TXNS}" addr add "${SRCIP}"/24 dev "${SRCIF}" 165 + ip -n "${TXNS}" link set "${SRCIF}" up 166 + } 167 + 168 + function test_enable_netpoll_on_enslaved_iface() { 169 + echo 0 > "${NETCONS_PATH}"/enabled 170 + 171 + # At this stage, BOND_TX1_SLAVE_IF is enslaved to BOND_TX_MAIN_IF, and 172 + # linked to BOND_RX1_SLAVE_IF inside the namespace. 173 + echo "${BOND_TX1_SLAVE_IF}" > "${NETCONS_PATH}"/dev_name 174 + 175 + # This should fail with the following message in dmesg: 176 + # netpoll: netconsole: ethX is a slave device, aborting 177 + set +e 178 + enable_netcons_ns 2> /dev/null 179 + set -e 180 + 181 + if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 1 ]] 182 + then 183 + echo "test failed: Bonding and netpoll cannot co-exists." >&2 184 + exit "${ksft_fail}" 185 + fi 186 + } 187 + 188 + function test_delete_bond_and_reenable_target() { 189 + ip -n "${TXNS}" \ 190 + link delete "${BOND_TX_MAIN_IF}" type bond 191 + 192 + # BOND_TX1_SLAVE_IF is not attached to a bond interface anymore 193 + # netpoll can be plugged in there 194 + echo "${BOND_TX1_SLAVE_IF}" > "${NETCONS_PATH}"/dev_name 195 + 196 + # this should work, since the interface is not enslaved 197 + enable_netcons_ns 198 + 199 + if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 0 ]] 200 + then 201 + echo "test failed: Unable to start netpoll on an unbond iface." >&2 202 + exit "${ksft_fail}" 203 + fi 204 + } 205 + 206 + # Send a netconsole message to the netconsole target 207 + function test_send_netcons_msg_through_bond_iface() { 208 + # Listen for netconsole port inside the namespace and 209 + # destination interface 210 + listen_port_and_save_to "${OUTPUT_FILE}" "${IP_VERSION}" & 211 + # Wait for socat to start and listen to the port. 212 + wait_for_port "${RXNS}" "${PORT}" "${IP_VERSION}" 213 + # Send the message 214 + echo "${MSG}: ${TARGET}" > /dev/kmsg 215 + # Wait until socat saves the file to disk 216 + busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" 217 + # Make sure the message was received in the dst part 218 + # and exit 219 + validate_result "${OUTPUT_FILE}" "${FORMAT}" 220 + # kill socat in case it is still running 221 + pkill_socat 222 + } 223 + 224 + # BOND_TX1_SLAVE_IF has netconsole enabled on it, bind it to BOND_TX_MAIN_IF. 225 + # Given BOND_TX_MAIN_IF was deleted, recreate it first 226 + function test_enslave_netcons_enabled_iface { 227 + # netconsole got disabled while the interface was down 228 + if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 0 ]] 229 + then 230 + echo "test failed: netconsole expected to be enabled against BOND_TX1_SLAVE_IF" >&2 231 + exit "${ksft_fail}" 232 + fi 233 + 234 + # recreate the bonding iface. it got deleted by previous 235 + # test (test_delete_bond_and_reenable_target) 236 + ip -n "${TXNS}" \ 237 + link add "${BOND_TX_MAIN_IF}" type bond mode balance-rr 238 + 239 + # sub-interface need to be down before attaching to bonding 240 + # This will also disable netconsole. 241 + ip -n "${TXNS}" \ 242 + link set "${BOND_TX1_SLAVE_IF}" down 243 + ip -n "${TXNS}" \ 244 + link set "${BOND_TX1_SLAVE_IF}" master "${BOND_TX_MAIN_IF}" 245 + ip -n "${TXNS}" \ 246 + link set "${BOND_TX_MAIN_IF}" up 247 + 248 + # netconsole got disabled while the interface was down 249 + if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 1 ]] 250 + then 251 + echo "test failed: Device is part of a bond iface, cannot have netcons enabled" >&2 252 + exit "${ksft_fail}" 253 + fi 254 + } 255 + 256 + # Get netconsole enabled on a bonding interface and attach a second 257 + # sub-interface. 258 + function test_enslave_iface_to_bond { 259 + # BOND_TX_MAIN_IF has only BOND_TX1_SLAVE_IF right now 260 + echo "${BOND_TX_MAIN_IF}" > "${NETCONS_PATH}"/dev_name 261 + enable_netcons_ns 262 + 263 + # netcons is attached to bond0 and BOND_TX1_SLAVE_IF is 264 + # part of BOND_TX_MAIN_IF. Attach BOND_TX2_SLAVE_IF to BOND_TX_MAIN_IF. 265 + ip -n "${TXNS}" \ 266 + link set "${BOND_TX2_SLAVE_IF}" master "${BOND_TX_MAIN_IF}" 267 + if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 0 ]] 268 + then 269 + echo "test failed: Netconsole should be enabled on bonding interface. Failed" >&2 270 + exit "${ksft_fail}" 271 + fi 272 + } 273 + 274 + function test_enslave_iff_disabled_netpoll_iface { 275 + local ret 276 + 277 + # Create two interfaces. veth interfaces it known to have 278 + # IFF_DISABLE_NETPOLL set 279 + if ! ip link add "${VETH0}" type veth peer name "${VETH1}" 280 + then 281 + echo "Failed to create veth TX interface. Is CONFIG_VETH set?" >&2 282 + exit "${ksft_skip}" 283 + fi 284 + set +e 285 + # This will print RTNETLINK answers: Device or resource busy 286 + ip link set "${VETH0}" master "${BOND_TX_MAIN_IF}" 2> /dev/null 287 + ret=$? 288 + set -e 289 + if [[ $ret -eq 0 ]] 290 + then 291 + echo "test failed: veth interface could not be enslaved" 292 + exit "${ksft_fail}" 293 + fi 294 + } 295 + 296 + # Given that netconsole picks the current net namespace, we need to enable it 297 + # from inside the TXNS namespace 298 + function enable_netcons_ns() { 299 + ip netns exec "${TXNS}" sh -c \ 300 + "mount -t configfs configfs /sys/kernel/config && echo 1 > $NETCONS_PATH/enabled" 301 + } 302 + 303 + #################### 304 + # Tests start here # 305 + #################### 306 + 307 + # Create regular interfaces using netdevsim and link them 308 + create_all_ifaces 309 + 310 + # Setup the bonding interfaces 311 + # BOND_RX_MAIN_IF has BOND_RX{1,2}_SLAVE_IF 312 + # BOND_TX_MAIN_IF has BOND_TX{1,2}_SLAVE_IF 313 + setup_bonding_ifaces 314 + 315 + # Configure the ips as BOND_RX1_SLAVE_IF and BOND_TX1_SLAVE_IF 316 + configure_ifaces_ips "${IP_VERSION}" 317 + 318 + _create_dynamic_target "${FORMAT}" "${NETCONS_PATH}" 319 + enable_netcons_ns 320 + set_user_data 321 + 322 + # Test #1 : Create an bonding interface and attach netpoll into 323 + # the bonding interface. Netconsole/netpoll should work on 324 + # the bonding interface. 325 + test_send_netcons_msg_through_bond_iface 326 + echo "test #1: netpoll on bonding interface worked. Test passed" >&2 327 + 328 + # Test #2: Attach netpoll to an enslaved interface 329 + # Try to attach netpoll to an enslaved sub-interface (while still being part of 330 + # a bonding interface), which shouldn't be allowed 331 + test_enable_netpoll_on_enslaved_iface 332 + echo "test #2: netpoll correctly rejected enslaved interface (expected behavior). Test passed." >&2 333 + 334 + # Test #3: Unplug the sub-interface from bond and enable netconsole 335 + # Detach the interface from a bonding interface and attach netpoll again 336 + test_delete_bond_and_reenable_target 337 + echo "test #3: Able to attach to an unbound interface. Test passed." >&2 338 + 339 + # Test #4: Enslave a sub-interface that had netconsole enabled 340 + # Try to enslave an interface that has netconsole/netpoll enabled. 341 + # Previous test has netconsole enabled in BOND_TX1_SLAVE_IF, try to enslave it 342 + test_enslave_netcons_enabled_iface 343 + echo "test #4: Enslaving an interface with netpoll attached. Test passed." >&2 344 + 345 + # Test #5: Enslave a sub-interface to a bonding interface 346 + # Enslave an interface to a bond interface that has netpoll attached 347 + # At this stage, BOND_TX_MAIN_IF is created and BOND_TX1_SLAVE_IF is part of 348 + # it. Netconsole is currently disabled 349 + test_enslave_iface_to_bond 350 + echo "test #5: Enslaving an interface to bond+netpoll. Test passed." >&2 351 + 352 + # Test #6: Enslave a IFF_DISABLE_NETPOLL sub-interface to a bonding interface 353 + # At this stage, BOND_TX_MAIN_IF has both sub interface and netconsole is 354 + # enabled. This test will try to enslave an a veth (IFF_DISABLE_NETPOLL) interface 355 + # and it should fail, with netpoll: veth0 doesn't support polling 356 + test_enslave_iff_disabled_netpoll_iface 357 + echo "test #6: Enslaving IFF_DISABLE_NETPOLL ifaces to bond iface is not supported. Test passed." >&2 358 + 359 + cleanup_bond 360 + trap - EXIT 361 + exit "${EXIT_STATUS}"

+63 -15

tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh

··· 11 11 LIBDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") 12 12 13 13 SRCIF="" # to be populated later 14 + SRCIP="" # to be populated later 14 15 SRCIP4="192.0.2.1" 15 16 SRCIP6="fc00::1" 16 17 DSTIF="" # to be populated later 18 + DSTIP="" # to be populated later 17 19 DSTIP4="192.0.2.2" 18 20 DSTIP6="fc00::2" 19 21 ··· 30 28 # NAMESPACE will be populated by setup_ns with a random value 31 29 NAMESPACE="" 32 30 33 - # IDs for netdevsim 31 + # IDs for netdevsim. We either use NSIM_DEV_{1,2}_ID for standard test 32 + # or NSIM_BOND_{T,R}X_{1,2} for the bonding tests. Not both at the 33 + # same time. 34 34 NSIM_DEV_1_ID=$((256 + RANDOM % 256)) 35 35 NSIM_DEV_2_ID=$((512 + RANDOM % 256)) 36 + NSIM_BOND_TX_1=$((768 + RANDOM % 256)) 37 + NSIM_BOND_TX_2=$((1024 + RANDOM % 256)) 38 + NSIM_BOND_RX_1=$((1280 + RANDOM % 256)) 39 + NSIM_BOND_RX_2=$((1536 + RANDOM % 256)) 36 40 NSIM_DEV_SYS_NEW="/sys/bus/netdevsim/new_device" 41 + NSIM_DEV_SYS_LINK="/sys/bus/netdevsim/link_device" 37 42 38 43 # Used to create and delete namespaces 39 44 source "${LIBDIR}"/../../../../net/lib.sh 40 45 41 46 # Create netdevsim interfaces 42 47 create_ifaces() { 43 - 44 48 echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_NEW" 45 49 echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_NEW" 46 50 udevadm settle 2> /dev/null || true ··· 121 113 configure_ip 122 114 } 123 115 124 - function create_dynamic_target() { 125 - local FORMAT=${1:-"extended"} 116 + function _create_dynamic_target() { 117 + local FORMAT="${1:?FORMAT parameter required}" 118 + local NCPATH="${2:?NCPATH parameter required}" 126 119 127 120 DSTMAC=$(ip netns exec "${NAMESPACE}" \ 128 121 ip link show "${DSTIF}" | awk '/ether/ {print $2}') 129 122 130 123 # Create a dynamic target 131 - mkdir "${NETCONS_PATH}" 124 + mkdir "${NCPATH}" 132 125 133 - echo "${DSTIP}" > "${NETCONS_PATH}"/remote_ip 134 - echo "${SRCIP}" > "${NETCONS_PATH}"/local_ip 135 - echo "${DSTMAC}" > "${NETCONS_PATH}"/remote_mac 136 - echo "${SRCIF}" > "${NETCONS_PATH}"/dev_name 126 + echo "${DSTIP}" > "${NCPATH}"/remote_ip 127 + echo "${SRCIP}" > "${NCPATH}"/local_ip 128 + echo "${DSTMAC}" > "${NCPATH}"/remote_mac 129 + echo "${SRCIF}" > "${NCPATH}"/dev_name 137 130 138 131 if [ "${FORMAT}" == "basic" ] 139 132 then 140 133 # Basic target does not support release 141 - echo 0 > "${NETCONS_PATH}"/release 142 - echo 0 > "${NETCONS_PATH}"/extended 134 + echo 0 > "${NCPATH}"/release 135 + echo 0 > "${NCPATH}"/extended 143 136 elif [ "${FORMAT}" == "extended" ] 144 137 then 145 - echo 1 > "${NETCONS_PATH}"/extended 138 + echo 1 > "${NCPATH}"/extended 146 139 fi 140 + } 147 141 148 - echo 1 > "${NETCONS_PATH}"/enabled 142 + function create_dynamic_target() { 143 + local FORMAT=${1:-"extended"} 144 + local NCPATH=${2:-"$NETCONS_PATH"} 145 + _create_dynamic_target "${FORMAT}" "${NCPATH}" 146 + 147 + echo 1 > "${NCPATH}"/enabled 149 148 150 149 # This will make sure that the kernel was able to 151 150 # load the netconsole driver configuration. The console message ··· 200 185 echo "${DEFAULT_PRINTK_VALUES}" > /proc/sys/kernel/printk 201 186 } 202 187 203 - function cleanup() { 188 + function cleanup_netcons() { 204 189 # delete netconsole dynamic reconfiguration 205 - echo 0 > "${NETCONS_PATH}"/enabled 190 + # do not fail if the target is already disabled 191 + if [[ ! -d "${NETCONS_PATH}" ]] 192 + then 193 + # in some cases this is called before netcons path is created 194 + return 195 + fi 196 + if [[ $(cat "${NETCONS_PATH}"/enabled) != 0 ]] 197 + then 198 + echo 0 > "${NETCONS_PATH}"/enabled || true 199 + fi 206 200 # Remove all the keys that got created during the selftest 207 201 find "${NETCONS_PATH}/userdata/" -mindepth 1 -type d -delete 208 202 # Remove the configfs entry 209 203 rmdir "${NETCONS_PATH}" 204 + } 210 205 206 + function cleanup() { 207 + cleanup_netcons 211 208 do_cleanup 212 209 } 213 210 ··· 395 368 # otherwise the packet could be missed, and the test will fail. Happens 396 369 # more frequently on IPv6 397 370 sleep 1 371 + } 372 + 373 + # Clean up netdevsim ifaces created for bonding test 374 + function cleanup_bond_nsim() { 375 + ip -n "${TXNS}" \ 376 + link delete "${BOND_TX_MAIN_IF}" type bond || true 377 + ip -n "${RXNS}" \ 378 + link delete "${BOND_RX_MAIN_IF}" type bond || true 379 + 380 + cleanup_netdevsim "$NSIM_BOND_TX_1" 381 + cleanup_netdevsim "$NSIM_BOND_TX_2" 382 + cleanup_netdevsim "$NSIM_BOND_RX_1" 383 + cleanup_netdevsim "$NSIM_BOND_RX_2" 384 + } 385 + 386 + # cleanup tests that use bonding interfaces 387 + function cleanup_bond() { 388 + cleanup_netcons 389 + cleanup_bond_nsim 390 + cleanup_all_ns 391 + ip link delete "${VETH0}" || true 398 392 }

+130

tools/testing/selftests/drivers/net/netcons_torture.sh

··· 1 + #!/usr/bin/env bash 2 + # SPDX-License-Identifier: GPL-2.0 3 + 4 + # Repeatedly send kernel messages, toggles netconsole targets on and off, 5 + # creates and deletes targets in parallel, and toggles the source interface to 6 + # simulate stress conditions. 7 + # 8 + # This test aims to verify the robustness of netconsole under dynamic 9 + # configurations and concurrent operations. 10 + # 11 + # The major goal is to run this test with LOCKDEP, Kmemleak and KASAN to make 12 + # sure no issues is reported. 13 + # 14 + # Author: Breno Leitao <leitao@debian.org> 15 + 16 + set -euo pipefail 17 + 18 + SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") 19 + 20 + source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh 21 + 22 + # Number of times the main loop run 23 + ITERATIONS=${1:-150} 24 + 25 + # Only test extended format 26 + FORMAT="extended" 27 + # And ipv6 only 28 + IP_VERSION="ipv6" 29 + 30 + # Create, enable and delete some targets. 31 + create_and_delete_random_target() { 32 + COUNT=2 33 + RND_PREFIX=$(mktemp -u netcons_rnd_XXXX_) 34 + 35 + if [ -d "${NETCONS_CONFIGFS}/${RND_PREFIX}${COUNT}" ] || \ 36 + [ -d "${NETCONS_CONFIGFS}/${RND_PREFIX}0" ]; then 37 + echo "Function didn't finish yet, skipping it." >&2 38 + return 39 + fi 40 + 41 + # enable COUNT targets 42 + for i in $(seq ${COUNT}) 43 + do 44 + RND_TARGET="${RND_PREFIX}"${i} 45 + RND_TARGET_PATH="${NETCONS_CONFIGFS}"/"${RND_TARGET}" 46 + 47 + # Basic population so the target can come up 48 + _create_dynamic_target "${FORMAT}" "${RND_TARGET_PATH}" 49 + done 50 + 51 + echo "netconsole selftest: ${COUNT} additional targets were created" > /dev/kmsg 52 + # disable them all 53 + for i in $(seq ${COUNT}) 54 + do 55 + RND_TARGET="${RND_PREFIX}"${i} 56 + RND_TARGET_PATH="${NETCONS_CONFIGFS}"/"${RND_TARGET}" 57 + if [[ $(cat "${RND_TARGET_PATH}/enabled") -eq 1 ]] 58 + then 59 + echo 0 > "${RND_TARGET_PATH}"/enabled 60 + fi 61 + rmdir "${RND_TARGET_PATH}" 62 + done 63 + } 64 + 65 + # Disable and enable the target mid-air, while messages 66 + # are being transmitted. 67 + toggle_netcons_target() { 68 + for i in $(seq 2) 69 + do 70 + if [ ! -d "${NETCONS_PATH}" ] 71 + then 72 + break 73 + fi 74 + echo 0 > "${NETCONS_PATH}"/enabled 2> /dev/null || true 75 + # Try to enable a bit harder, given it might fail to enable 76 + # Write to `enabled` might fail depending on the lock, which is 77 + # highly contentious here 78 + for _ in $(seq 5) 79 + do 80 + echo 1 > "${NETCONS_PATH}"/enabled 2> /dev/null || true 81 + done 82 + done 83 + } 84 + 85 + toggle_iface(){ 86 + ip link set "${SRCIF}" down 87 + ip link set "${SRCIF}" up 88 + } 89 + 90 + # Start here 91 + 92 + modprobe netdevsim 2> /dev/null || true 93 + modprobe netconsole 2> /dev/null || true 94 + 95 + # Check for basic system dependency and exit if not found 96 + check_for_dependencies 97 + # Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) 98 + echo "6 5" > /proc/sys/kernel/printk 99 + # Remove the namespace, interfaces and netconsole target on exit 100 + trap cleanup EXIT 101 + # Create one namespace and two interfaces 102 + set_network "${IP_VERSION}" 103 + # Create a dynamic target for netconsole 104 + create_dynamic_target "${FORMAT}" 105 + 106 + for i in $(seq "$ITERATIONS") 107 + do 108 + for _ in $(seq 10) 109 + do 110 + echo "${MSG}: ${TARGET} ${i}" > /dev/kmsg 111 + done 112 + wait 113 + 114 + if (( i % 30 == 0 )); then 115 + toggle_netcons_target & 116 + fi 117 + 118 + if (( i % 50 == 0 )); then 119 + # create some targets, enable them, send msg and disable 120 + # all in a parallel thread 121 + create_and_delete_random_target & 122 + fi 123 + 124 + if (( i % 70 == 0 )); then 125 + toggle_iface & 126 + fi 127 + done 128 + wait 129 + 130 + exit "${EXIT_STATUS}"

+2

tools/testing/selftests/iommu/iommufd.c

··· 2638 2638 ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd)); 2639 2639 ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd)); 2640 2640 ASSERT_EQ(BUFFER_SIZE, unmap_cmd.size); 2641 + /* Unmap of empty is success */ 2642 + ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd)); 2641 2643 2642 2644 /* UNMAP_FLAG_ALL requires 0 iova/size */ 2643 2645 ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd));

+2 -2

tools/testing/selftests/iommu/iommufd_utils.h

··· 1044 1044 }; 1045 1045 1046 1046 while (nvevents--) { 1047 - if (!ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_TRIGGER_VEVENT), 1048 - &trigger_vevent_cmd)) 1047 + if (ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_TRIGGER_VEVENT), 1048 + &trigger_vevent_cmd)) 1049 1049 return -1; 1050 1050 } 1051 1051 return 0;

+3

tools/testing/selftests/kvm/arm64/get-reg-list.c

··· 63 63 REG_FEAT(HDFGWTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2), 64 64 REG_FEAT(ZCR_EL2, ID_AA64PFR0_EL1, SVE, IMP), 65 65 REG_FEAT(SCTLR2_EL1, ID_AA64MMFR3_EL1, SCTLRX, IMP), 66 + REG_FEAT(SCTLR2_EL2, ID_AA64MMFR3_EL1, SCTLRX, IMP), 66 67 REG_FEAT(VDISR_EL2, ID_AA64PFR0_EL1, RAS, IMP), 67 68 REG_FEAT(VSESR_EL2, ID_AA64PFR0_EL1, RAS, IMP), 68 69 REG_FEAT(VNCR_EL2, ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY), 69 70 REG_FEAT(CNTHV_CTL_EL2, ID_AA64MMFR1_EL1, VH, IMP), 70 71 REG_FEAT(CNTHV_CVAL_EL2,ID_AA64MMFR1_EL1, VH, IMP), 72 + REG_FEAT(ZCR_EL2, ID_AA64PFR0_EL1, SVE, IMP), 71 73 }; 72 74 73 75 bool filter_reg(__u64 reg) ··· 720 718 SYS_REG(VMPIDR_EL2), 721 719 SYS_REG(SCTLR_EL2), 722 720 SYS_REG(ACTLR_EL2), 721 + SYS_REG(SCTLR2_EL2), 723 722 SYS_REG(HCR_EL2), 724 723 SYS_REG(MDCR_EL2), 725 724 SYS_REG(CPTR_EL2),

+8 -1

tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c

··· 15 15 #include "gic_v3.h" 16 16 #include "processor.h" 17 17 18 + #define GITS_COLLECTION_TARGET_SHIFT 16 19 + 18 20 static u64 its_read_u64(unsigned long offset) 19 21 { 20 22 return readq_relaxed(GITS_BASE_GVA + offset); ··· 165 163 its_mask_encode(&cmd->raw_cmd[2], col, 15, 0); 166 164 } 167 165 166 + static u64 procnum_to_rdbase(u32 vcpu_id) 167 + { 168 + return vcpu_id << GITS_COLLECTION_TARGET_SHIFT; 169 + } 170 + 168 171 #define GITS_CMDQ_POLL_ITERATIONS 0 169 172 170 173 static void its_send_cmd(void *cmdq_base, struct its_cmd_block *cmd) ··· 224 217 225 218 its_encode_cmd(&cmd, GITS_CMD_MAPC); 226 219 its_encode_collection(&cmd, collection_id); 227 - its_encode_target(&cmd, vcpu_id); 220 + its_encode_target(&cmd, procnum_to_rdbase(vcpu_id)); 228 221 its_encode_valid(&cmd, valid); 229 222 230 223 its_send_cmd(cmdq_base, &cmd);

+2

tools/testing/selftests/net/forwarding/local_termination.sh

··· 176 176 local rcv_dmac=$(mac_get $rcv_if_name) 177 177 local should_receive 178 178 179 + setup_wait 180 + 179 181 tcpdump_start $rcv_if_name 180 182 181 183 mc_route_prepare $send_if_name

+13 -5

tools/testing/selftests/net/mptcp/mptcp_connect.c

··· 710 710 711 711 bw = do_rnd_write(peerfd, winfo->buf + winfo->off, winfo->len); 712 712 if (bw < 0) { 713 - if (cfg_rcv_trunc) 714 - return 0; 713 + /* expected reset, continue to read */ 714 + if (cfg_rcv_trunc && 715 + (errno == ECONNRESET || 716 + errno == EPIPE)) { 717 + fds.events &= ~POLLOUT; 718 + continue; 719 + } 720 + 715 721 perror("write"); 716 722 return 111; 717 723 } ··· 743 737 } 744 738 745 739 if (fds.revents & (POLLERR | POLLNVAL)) { 746 - if (cfg_rcv_trunc) 747 - return 0; 740 + if (cfg_rcv_trunc) { 741 + fds.events &= ~(POLLERR | POLLNVAL); 742 + continue; 743 + } 748 744 fprintf(stderr, "Unexpected revents: " 749 745 "POLLERR/POLLNVAL(%x)\n", fds.revents); 750 746 return 5; ··· 1449 1441 */ 1450 1442 if (cfg_truncate < 0) { 1451 1443 cfg_rcv_trunc = true; 1452 - signal(SIGPIPE, handle_signal); 1444 + signal(SIGPIPE, SIG_IGN); 1453 1445 } 1454 1446 break; 1455 1447 case 'j':

+1 -1

tools/testing/selftests/net/mptcp/mptcp_connect.sh

··· 492 492 "than expected (${expect_synrx})" 493 493 retc=1 494 494 fi 495 - if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ] && [ ${stat_ooo_now} -eq 0 ]; then 495 + if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ]; then 496 496 if [ ${stat_ooo_now} -eq 0 ]; then 497 497 mptcp_lib_pr_fail "lower MPC ACK rx (${stat_ackrx_now_l})" \ 498 498 "than expected (${expect_ackrx})"

+45 -45

tools/testing/selftests/net/mptcp/mptcp_join.sh

··· 2547 2547 if reset "remove single subflow"; then 2548 2548 pm_nl_set_limits $ns1 0 1 2549 2549 pm_nl_set_limits $ns2 0 1 2550 - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow 2550 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup 2551 2551 addr_nr_ns2=-1 speed=slow \ 2552 2552 run_tests $ns1 $ns2 10.0.1.1 2553 2553 chk_join_nr 1 1 1 ··· 2560 2560 if reset "remove multiple subflows"; then 2561 2561 pm_nl_set_limits $ns1 0 2 2562 2562 pm_nl_set_limits $ns2 0 2 2563 - pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow 2564 - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow 2563 + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup 2564 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup 2565 2565 addr_nr_ns2=-2 speed=slow \ 2566 2566 run_tests $ns1 $ns2 10.0.1.1 2567 2567 chk_join_nr 2 2 2 ··· 2572 2572 # single address, remove 2573 2573 if reset "remove single address"; then 2574 2574 pm_nl_set_limits $ns1 0 1 2575 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal 2575 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup 2576 2576 pm_nl_set_limits $ns2 1 1 2577 2577 addr_nr_ns1=-1 speed=slow \ 2578 2578 run_tests $ns1 $ns2 10.0.1.1 ··· 2585 2585 # subflow and signal, remove 2586 2586 if reset "remove subflow and signal"; then 2587 2587 pm_nl_set_limits $ns1 0 2 2588 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal 2588 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup 2589 2589 pm_nl_set_limits $ns2 1 2 2590 - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow 2590 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup 2591 2591 addr_nr_ns1=-1 addr_nr_ns2=-1 speed=slow \ 2592 2592 run_tests $ns1 $ns2 10.0.1.1 2593 2593 chk_join_nr 2 2 2 ··· 2599 2599 # subflows and signal, remove 2600 2600 if reset "remove subflows and signal"; then 2601 2601 pm_nl_set_limits $ns1 0 3 2602 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal 2602 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup 2603 2603 pm_nl_set_limits $ns2 1 3 2604 - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow 2605 - pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow 2604 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup 2605 + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow,backup 2606 2606 addr_nr_ns1=-1 addr_nr_ns2=-2 speed=10 \ 2607 2607 run_tests $ns1 $ns2 10.0.1.1 2608 2608 chk_join_nr 3 3 3 ··· 2614 2614 # addresses remove 2615 2615 if reset "remove addresses"; then 2616 2616 pm_nl_set_limits $ns1 3 3 2617 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250 2618 - pm_nl_add_endpoint $ns1 10.0.3.1 flags signal 2619 - pm_nl_add_endpoint $ns1 10.0.4.1 flags signal 2617 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup id 250 2618 + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup 2619 + pm_nl_add_endpoint $ns1 10.0.4.1 flags signal,backup 2620 2620 pm_nl_set_limits $ns2 3 3 2621 2621 addr_nr_ns1=-3 speed=10 \ 2622 2622 run_tests $ns1 $ns2 10.0.1.1 ··· 2629 2629 # invalid addresses remove 2630 2630 if reset "remove invalid addresses"; then 2631 2631 pm_nl_set_limits $ns1 3 3 2632 - pm_nl_add_endpoint $ns1 10.0.12.1 flags signal 2632 + pm_nl_add_endpoint $ns1 10.0.12.1 flags signal,backup 2633 2633 # broadcast IP: no packet for this address will be received on ns1 2634 - pm_nl_add_endpoint $ns1 224.0.0.1 flags signal 2635 - pm_nl_add_endpoint $ns1 10.0.3.1 flags signal 2634 + pm_nl_add_endpoint $ns1 224.0.0.1 flags signal,backup 2635 + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup 2636 2636 pm_nl_set_limits $ns2 2 2 2637 2637 addr_nr_ns1=-3 speed=10 \ 2638 2638 run_tests $ns1 $ns2 10.0.1.1 ··· 2646 2646 # subflows and signal, flush 2647 2647 if reset "flush subflows and signal"; then 2648 2648 pm_nl_set_limits $ns1 0 3 2649 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal 2649 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup 2650 2650 pm_nl_set_limits $ns2 1 3 2651 - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow 2652 - pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow 2651 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup 2652 + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow,backup 2653 2653 addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \ 2654 2654 run_tests $ns1 $ns2 10.0.1.1 2655 2655 chk_join_nr 3 3 3 ··· 2662 2662 if reset "flush subflows"; then 2663 2663 pm_nl_set_limits $ns1 3 3 2664 2664 pm_nl_set_limits $ns2 3 3 2665 - pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow id 150 2666 - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow 2667 - pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow 2665 + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup id 150 2666 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup 2667 + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow,backup 2668 2668 addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \ 2669 2669 run_tests $ns1 $ns2 10.0.1.1 2670 2670 chk_join_nr 3 3 3 ··· 2681 2681 # addresses flush 2682 2682 if reset "flush addresses"; then 2683 2683 pm_nl_set_limits $ns1 3 3 2684 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250 2685 - pm_nl_add_endpoint $ns1 10.0.3.1 flags signal 2686 - pm_nl_add_endpoint $ns1 10.0.4.1 flags signal 2684 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup id 250 2685 + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup 2686 + pm_nl_add_endpoint $ns1 10.0.4.1 flags signal,backup 2687 2687 pm_nl_set_limits $ns2 3 3 2688 2688 addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \ 2689 2689 run_tests $ns1 $ns2 10.0.1.1 ··· 2696 2696 # invalid addresses flush 2697 2697 if reset "flush invalid addresses"; then 2698 2698 pm_nl_set_limits $ns1 3 3 2699 - pm_nl_add_endpoint $ns1 10.0.12.1 flags signal 2700 - pm_nl_add_endpoint $ns1 10.0.3.1 flags signal 2701 - pm_nl_add_endpoint $ns1 10.0.14.1 flags signal 2699 + pm_nl_add_endpoint $ns1 10.0.12.1 flags signal,backup 2700 + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup 2701 + pm_nl_add_endpoint $ns1 10.0.14.1 flags signal,backup 2702 2702 pm_nl_set_limits $ns2 3 3 2703 2703 addr_nr_ns1=-8 speed=slow \ 2704 2704 run_tests $ns1 $ns2 10.0.1.1 ··· 3952 3952 continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then 3953 3953 set_userspace_pm $ns1 3954 3954 pm_nl_set_limits $ns2 2 2 3955 - { speed=5 \ 3955 + { test_linkfail=128 speed=5 \ 3956 3956 run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null 3957 3957 local tests_pid=$! 3958 3958 wait_mpj $ns1 ··· 3977 3977 chk_mptcp_info subflows 0 subflows 0 3978 3978 chk_subflows_total 1 1 3979 3979 kill_events_pids 3980 - mptcp_lib_kill_wait $tests_pid 3980 + mptcp_lib_kill_group_wait $tests_pid 3981 3981 fi 3982 3982 3983 3983 # userspace pm create destroy subflow ··· 3985 3985 continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then 3986 3986 set_userspace_pm $ns2 3987 3987 pm_nl_set_limits $ns1 0 1 3988 - { speed=5 \ 3988 + { test_linkfail=128 speed=5 \ 3989 3989 run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null 3990 3990 local tests_pid=$! 3991 3991 wait_mpj $ns2 ··· 4005 4005 chk_mptcp_info subflows 0 subflows 0 4006 4006 chk_subflows_total 1 1 4007 4007 kill_events_pids 4008 - mptcp_lib_kill_wait $tests_pid 4008 + mptcp_lib_kill_group_wait $tests_pid 4009 4009 fi 4010 4010 4011 4011 # userspace pm create id 0 subflow ··· 4013 4013 continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then 4014 4014 set_userspace_pm $ns2 4015 4015 pm_nl_set_limits $ns1 0 1 4016 - { speed=5 \ 4016 + { test_linkfail=128 speed=5 \ 4017 4017 run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null 4018 4018 local tests_pid=$! 4019 4019 wait_mpj $ns2 ··· 4026 4026 chk_mptcp_info subflows 1 subflows 1 4027 4027 chk_subflows_total 2 2 4028 4028 kill_events_pids 4029 - mptcp_lib_kill_wait $tests_pid 4029 + mptcp_lib_kill_group_wait $tests_pid 4030 4030 fi 4031 4031 4032 4032 # userspace pm remove initial subflow ··· 4034 4034 continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then 4035 4035 set_userspace_pm $ns2 4036 4036 pm_nl_set_limits $ns1 0 1 4037 - { speed=5 \ 4037 + { test_linkfail=128 speed=5 \ 4038 4038 run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null 4039 4039 local tests_pid=$! 4040 4040 wait_mpj $ns2 ··· 4050 4050 chk_mptcp_info subflows 1 subflows 1 4051 4051 chk_subflows_total 1 1 4052 4052 kill_events_pids 4053 - mptcp_lib_kill_wait $tests_pid 4053 + mptcp_lib_kill_group_wait $tests_pid 4054 4054 fi 4055 4055 4056 4056 # userspace pm send RM_ADDR for ID 0 ··· 4058 4058 continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then 4059 4059 set_userspace_pm $ns1 4060 4060 pm_nl_set_limits $ns2 1 1 4061 - { speed=5 \ 4061 + { test_linkfail=128 speed=5 \ 4062 4062 run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null 4063 4063 local tests_pid=$! 4064 4064 wait_mpj $ns1 ··· 4076 4076 chk_mptcp_info subflows 1 subflows 1 4077 4077 chk_subflows_total 1 1 4078 4078 kill_events_pids 4079 - mptcp_lib_kill_wait $tests_pid 4079 + mptcp_lib_kill_group_wait $tests_pid 4080 4080 fi 4081 4081 } 4082 4082 ··· 4089 4089 pm_nl_set_limits $ns1 2 2 4090 4090 pm_nl_set_limits $ns2 2 2 4091 4091 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal 4092 - { speed=slow \ 4092 + { test_linkfail=128 speed=slow \ 4093 4093 run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null 4094 4094 local tests_pid=$! 4095 4095 ··· 4106 4106 pm_nl_add_endpoint $ns2 10.0.2.2 flags signal 4107 4107 pm_nl_check_endpoint "modif is allowed" \ 4108 4108 $ns2 10.0.2.2 id 1 flags signal 4109 - mptcp_lib_kill_wait $tests_pid 4109 + mptcp_lib_kill_group_wait $tests_pid 4110 4110 fi 4111 4111 4112 4112 if reset_with_tcp_filter "delete and re-add" ns2 10.0.3.2 REJECT OUTPUT && ··· 4116 4116 pm_nl_set_limits $ns2 0 3 4117 4117 pm_nl_add_endpoint $ns2 10.0.1.2 id 1 dev ns2eth1 flags subflow 4118 4118 pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow 4119 - { test_linkfail=4 speed=5 \ 4119 + { test_linkfail=128 speed=5 \ 4120 4120 run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null 4121 4121 local tests_pid=$! 4122 4122 ··· 4161 4161 chk_mptcp_info subflows 3 subflows 3 4162 4162 done 4163 4163 4164 - mptcp_lib_kill_wait $tests_pid 4164 + mptcp_lib_kill_group_wait $tests_pid 4165 4165 4166 4166 kill_events_pids 4167 4167 chk_evt_nr ns1 MPTCP_LIB_EVENT_LISTENER_CREATED 1 ··· 4194 4194 # broadcast IP: no packet for this address will be received on ns1 4195 4195 pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal 4196 4196 pm_nl_add_endpoint $ns1 10.0.1.1 id 42 flags signal 4197 - { test_linkfail=4 speed=5 \ 4197 + { test_linkfail=128 speed=5 \ 4198 4198 run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null 4199 4199 local tests_pid=$! 4200 4200 ··· 4235 4235 wait_mpj $ns2 4236 4236 chk_subflow_nr "after re-re-add ID 0" 3 4237 4237 chk_mptcp_info subflows 3 subflows 3 4238 - mptcp_lib_kill_wait $tests_pid 4238 + mptcp_lib_kill_group_wait $tests_pid 4239 4239 4240 4240 kill_events_pids 4241 4241 chk_evt_nr ns1 MPTCP_LIB_EVENT_LISTENER_CREATED 1 ··· 4267 4267 # broadcast IP: no packet for this address will be received on ns1 4268 4268 pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal 4269 4269 pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow 4270 - { test_linkfail=4 speed=20 \ 4270 + { test_linkfail=128 speed=20 \ 4271 4271 run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null 4272 4272 local tests_pid=$! 4273 4273 ··· 4283 4283 wait_mpj $ns2 4284 4284 pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal 4285 4285 wait_mpj $ns2 4286 - mptcp_lib_kill_wait $tests_pid 4286 + mptcp_lib_kill_group_wait $tests_pid 4287 4287 4288 4288 join_syn_tx=3 join_connect_err=1 \ 4289 4289 chk_join_nr 2 2 2

+21

tools/testing/selftests/net/mptcp/mptcp_lib.sh

··· 350 350 wait "${1}" 2>/dev/null 351 351 } 352 352 353 + # $1: PID 354 + mptcp_lib_pid_list_children() { 355 + local curr="${1}" 356 + # evoke 'ps' only once 357 + local pids="${2:-"$(ps o pid,ppid)"}" 358 + 359 + echo "${curr}" 360 + 361 + local pid 362 + for pid in $(echo "${pids}" | awk "\$2 == ${curr} { print \$1 }"); do 363 + mptcp_lib_pid_list_children "${pid}" "${pids}" 364 + done 365 + } 366 + 367 + # $1: PID 368 + mptcp_lib_kill_group_wait() { 369 + # Some users might not have procps-ng: cannot use "kill -- -PID" 370 + mptcp_lib_pid_list_children "${1}" | xargs -r kill &>/dev/null 371 + wait "${1}" 2>/dev/null 372 + } 373 + 353 374 # $1: IP address 354 375 mptcp_lib_is_v6() { 355 376 [ -z "${1##*:*}" ]

+44

tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json

··· 961 961 "teardown": [ 962 962 "$TC qdisc del dev $DUMMY root" 963 963 ] 964 + }, 965 + { 966 + "id": "4989", 967 + "name": "Try to add an fq child to an ingress qdisc", 968 + "category": [ 969 + "qdisc", 970 + "ingress" 971 + ], 972 + "plugins": { 973 + "requires": "nsPlugin" 974 + }, 975 + "setup": [ 976 + "$TC qdisc add dev $DUMMY handle ffff:0 ingress" 977 + ], 978 + "cmdUnderTest": "$TC qdisc add dev $DUMMY parent ffff:0 handle ffe0:0 fq", 979 + "expExitCode": "2", 980 + "verifyCmd": "$TC -j qdisc ls dev $DUMMY handle ffe0:", 981 + "matchJSON": [], 982 + "matchCount": "1", 983 + "teardown": [ 984 + "$TC qdisc del dev $DUMMY ingress" 985 + ] 986 + }, 987 + { 988 + "id": "c2b0", 989 + "name": "Try to add an fq child to a clsact qdisc", 990 + "category": [ 991 + "qdisc", 992 + "ingress" 993 + ], 994 + "plugins": { 995 + "requires": "nsPlugin" 996 + }, 997 + "setup": [ 998 + "$TC qdisc add dev $DUMMY handle ffff:0 clsact" 999 + ], 1000 + "cmdUnderTest": "$TC qdisc add dev $DUMMY parent ffff:0 handle ffe0:0 fq", 1001 + "expExitCode": "2", 1002 + "verifyCmd": "$TC -j qdisc ls dev $DUMMY handle ffe0:", 1003 + "matchJSON": [], 1004 + "matchCount": "1", 1005 + "teardown": [ 1006 + "$TC qdisc del dev $DUMMY clsact" 1007 + ] 964 1008 } 965 1009 ]

+1 -1

tools/tracing/latency/latency-collector.c

··· 1725 1725 "-n, --notrace\t\tIf latency is detected, do not print out the content of\n" 1726 1726 "\t\t\tthe trace file to standard output\n\n" 1727 1727 1728 - "-t, --threads NRTHR\tRun NRTHR threads for printing. Default is %d.\n\n" 1728 + "-e, --threads NRTHR\tRun NRTHR threads for printing. Default is %d.\n\n" 1729 1729 1730 1730 "-r, --random\t\tArbitrarily sleep a certain amount of time, default\n" 1731 1731 "\t\t\t%ld ms, before reading the trace file. The\n"

+33 -14

virt/kvm/guest_memfd.c

··· 623 623 return r; 624 624 } 625 625 626 - void kvm_gmem_unbind(struct kvm_memory_slot *slot) 626 + static void __kvm_gmem_unbind(struct kvm_memory_slot *slot, struct kvm_gmem *gmem) 627 627 { 628 628 unsigned long start = slot->gmem.pgoff; 629 629 unsigned long end = start + slot->npages; 630 - struct kvm_gmem *gmem; 631 - struct file *file; 632 630 633 - /* 634 - * Nothing to do if the underlying file was already closed (or is being 635 - * closed right now), kvm_gmem_release() invalidates all bindings. 636 - */ 637 - file = kvm_gmem_get_file(slot); 638 - if (!file) 639 - return; 640 - 641 - gmem = file->private_data; 642 - 643 - filemap_invalidate_lock(file->f_mapping); 644 631 xa_store_range(&gmem->bindings, start, end - 1, NULL, GFP_KERNEL); 645 632 646 633 /* ··· 635 648 * cannot see this memslot. 636 649 */ 637 650 WRITE_ONCE(slot->gmem.file, NULL); 651 + } 652 + 653 + void kvm_gmem_unbind(struct kvm_memory_slot *slot) 654 + { 655 + struct file *file; 656 + 657 + /* 658 + * Nothing to do if the underlying file was _already_ closed, as 659 + * kvm_gmem_release() invalidates and nullifies all bindings. 660 + */ 661 + if (!slot->gmem.file) 662 + return; 663 + 664 + file = kvm_gmem_get_file(slot); 665 + 666 + /* 667 + * However, if the file is _being_ closed, then the bindings need to be 668 + * removed as kvm_gmem_release() might not run until after the memslot 669 + * is freed. Note, modifying the bindings is safe even though the file 670 + * is dying as kvm_gmem_release() nullifies slot->gmem.file under 671 + * slots_lock, and only puts its reference to KVM after destroying all 672 + * bindings. I.e. reaching this point means kvm_gmem_release() hasn't 673 + * yet destroyed the bindings or freed the gmem_file, and can't do so 674 + * until the caller drops slots_lock. 675 + */ 676 + if (!file) { 677 + __kvm_gmem_unbind(slot, slot->gmem.file->private_data); 678 + return; 679 + } 680 + 681 + filemap_invalidate_lock(file->f_mapping); 682 + __kvm_gmem_unbind(slot, file->private_data); 638 683 filemap_invalidate_unlock(file->f_mapping); 639 684 640 685 fput(file);