Merge tag 'v5.13-rc7' into usb-next · tjh.dev/kernel@cfb0276

+2 -2

Documentation/riscv/vm-layout.rst

··· 58 58 | 59 59 ____________________________________________________________|____________________________________________________________ 60 60 | | | | 61 - ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | modules 62 - ffffffff80000000 | -2 GB | ffffffffffffffff | 2 GB | kernel, BPF 61 + ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | modules, BPF 62 + ffffffff80000000 | -2 GB | ffffffffffffffff | 2 GB | kernel 63 63 __________________|____________|__________________|_________|____________________________________________________________

+5 -5

Documentation/vm/slub.rst

··· 181 181 Here is a sample of slub debug output:: 182 182 183 183 ==================================================================== 184 - BUG kmalloc-8: Redzone overwritten 184 + BUG kmalloc-8: Right Redzone overwritten 185 185 -------------------------------------------------------------------- 186 186 187 187 INFO: 0xc90f6d28-0xc90f6d2b. First byte 0x00 instead of 0xcc ··· 189 189 INFO: Object 0xc90f6d20 @offset=3360 fp=0xc90f6d58 190 190 INFO: Allocated in get_modalias+0x61/0xf5 age=53 cpu=1 pid=554 191 191 192 - Bytes b4 0xc90f6d10: 00 00 00 00 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a ........ZZZZZZZZ 193 - Object 0xc90f6d20: 31 30 31 39 2e 30 30 35 1019.005 194 - Redzone 0xc90f6d28: 00 cc cc cc . 195 - Padding 0xc90f6d50: 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ 192 + Bytes b4 (0xc90f6d10): 00 00 00 00 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a ........ZZZZZZZZ 193 + Object (0xc90f6d20): 31 30 31 39 2e 30 30 35 1019.005 194 + Redzone (0xc90f6d28): 00 cc cc cc . 195 + Padding (0xc90f6d50): 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ 196 196 197 197 [<c010523d>] dump_trace+0x63/0x1eb 198 198 [<c01053df>] show_trace_log_lvl+0x1a/0x2f

+1

MAINTAINERS

··· 16560 16560 16561 16561 SHARED MEMORY COMMUNICATIONS (SMC) SOCKETS 16562 16562 M: Karsten Graul <kgraul@linux.ibm.com> 16563 + M: Guvenc Gulce <guvenc@linux.ibm.com> 16563 16564 L: linux-s390@vger.kernel.org 16564 16565 S: Supported 16565 16566 W: http://www.ibm.com/developerworks/linux/linux390/

+6 -3

Makefile

··· 2 2 VERSION = 5 3 3 PATCHLEVEL = 13 4 4 SUBLEVEL = 0 5 - EXTRAVERSION = -rc6 6 - NAME = Frozen Wasteland 5 + EXTRAVERSION = -rc7 6 + NAME = Opossums on Parade 7 7 8 8 # *DOCUMENTATION* 9 9 # To see a list of typical targets execute "make help" ··· 929 929 # Limit inlining across translation units to reduce binary size 930 930 KBUILD_LDFLAGS += -mllvm -import-instr-limit=5 931 931 932 - # Check for frame size exceeding threshold during prolog/epilog insertion. 932 + # Check for frame size exceeding threshold during prolog/epilog insertion 933 + # when using lld < 13.0.0. 933 934 ifneq ($(CONFIG_FRAME_WARN),0) 935 + ifeq ($(shell test $(CONFIG_LLD_VERSION) -lt 130000; echo $$?),0) 934 936 KBUILD_LDFLAGS += -plugin-opt=-warn-stack-size=$(CONFIG_FRAME_WARN) 937 + endif 935 938 endif 936 939 endif 937 940

+1

arch/arc/include/uapi/asm/sigcontext.h

··· 18 18 */ 19 19 struct sigcontext { 20 20 struct user_regs_struct regs; 21 + struct user_regs_arcv2 v2abi; 21 22 }; 22 23 23 24 #endif /* _ASM_ARC_SIGCONTEXT_H */

+43

arch/arc/kernel/signal.c

··· 61 61 unsigned int sigret_magic; 62 62 }; 63 63 64 + static int save_arcv2_regs(struct sigcontext *mctx, struct pt_regs *regs) 65 + { 66 + int err = 0; 67 + #ifndef CONFIG_ISA_ARCOMPACT 68 + struct user_regs_arcv2 v2abi; 69 + 70 + v2abi.r30 = regs->r30; 71 + #ifdef CONFIG_ARC_HAS_ACCL_REGS 72 + v2abi.r58 = regs->r58; 73 + v2abi.r59 = regs->r59; 74 + #else 75 + v2abi.r58 = v2abi.r59 = 0; 76 + #endif 77 + err = __copy_to_user(&mctx->v2abi, &v2abi, sizeof(v2abi)); 78 + #endif 79 + return err; 80 + } 81 + 82 + static int restore_arcv2_regs(struct sigcontext *mctx, struct pt_regs *regs) 83 + { 84 + int err = 0; 85 + #ifndef CONFIG_ISA_ARCOMPACT 86 + struct user_regs_arcv2 v2abi; 87 + 88 + err = __copy_from_user(&v2abi, &mctx->v2abi, sizeof(v2abi)); 89 + 90 + regs->r30 = v2abi.r30; 91 + #ifdef CONFIG_ARC_HAS_ACCL_REGS 92 + regs->r58 = v2abi.r58; 93 + regs->r59 = v2abi.r59; 94 + #endif 95 + #endif 96 + return err; 97 + } 98 + 64 99 static int 65 100 stash_usr_regs(struct rt_sigframe __user *sf, struct pt_regs *regs, 66 101 sigset_t *set) ··· 129 94 130 95 err = __copy_to_user(&(sf->uc.uc_mcontext.regs.scratch), &uregs.scratch, 131 96 sizeof(sf->uc.uc_mcontext.regs.scratch)); 97 + 98 + if (is_isa_arcv2()) 99 + err |= save_arcv2_regs(&(sf->uc.uc_mcontext), regs); 100 + 132 101 err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(sigset_t)); 133 102 134 103 return err ? -EFAULT : 0; ··· 148 109 err |= __copy_from_user(&uregs.scratch, 149 110 &(sf->uc.uc_mcontext.regs.scratch), 150 111 sizeof(sf->uc.uc_mcontext.regs.scratch)); 112 + 113 + if (is_isa_arcv2()) 114 + err |= restore_arcv2_regs(&(sf->uc.uc_mcontext), regs); 115 + 151 116 if (err) 152 117 return -EFAULT; 153 118

+1 -1

arch/arc/kernel/vmlinux.lds.S

··· 57 57 .init.ramfs : { INIT_RAM_FS } 58 58 59 59 . = ALIGN(PAGE_SIZE); 60 - _stext = .; 61 60 62 61 HEAD_TEXT_SECTION 63 62 INIT_TEXT_SECTION(L1_CACHE_BYTES) ··· 82 83 83 84 .text : { 84 85 _text = .; 86 + _stext = .; 85 87 TEXT_TEXT 86 88 SCHED_TEXT 87 89 CPUIDLE_TEXT

+1 -1

arch/powerpc/include/asm/jump_label.h

··· 50 50 1098: nop; \ 51 51 .pushsection __jump_table, "aw"; \ 52 52 .long 1098b - ., LABEL - .; \ 53 - FTR_ENTRY_LONG KEY; \ 53 + FTR_ENTRY_LONG KEY - .; \ 54 54 .popsection 55 55 #endif 56 56

+4 -5

arch/powerpc/kernel/signal_64.c

··· 902 902 unsafe_copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set), badframe_block); 903 903 user_write_access_end(); 904 904 905 + /* Save the siginfo outside of the unsafe block. */ 906 + if (copy_siginfo_to_user(&frame->info, &ksig->info)) 907 + goto badframe; 908 + 905 909 /* Make sure signal handler doesn't get spurious FP exceptions */ 906 910 tsk->thread.fp_state.fpscr = 0; 907 911 ··· 918 914 goto badframe; 919 915 regs->nip = (unsigned long) &frame->tramp[0]; 920 916 } 921 - 922 - 923 - /* Save the siginfo outside of the unsafe block. */ 924 - if (copy_siginfo_to_user(&frame->info, &ksig->info)) 925 - goto badframe; 926 917 927 918 /* Allocate a dummy caller frame for the signal handler. */ 928 919 newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE;

+1

arch/powerpc/mm/mem.c

··· 20 20 #include <asm/machdep.h> 21 21 #include <asm/rtas.h> 22 22 #include <asm/kasan.h> 23 + #include <asm/sparsemem.h> 23 24 #include <asm/svm.h> 24 25 25 26 #include <mm/mmu_decl.h>

+1 -1

arch/powerpc/perf/core-book3s.c

··· 2254 2254 bool use_siar = regs_use_siar(regs); 2255 2255 unsigned long siar = mfspr(SPRN_SIAR); 2256 2256 2257 - if (ppmu->flags & PPMU_P10_DD1) { 2257 + if (ppmu && (ppmu->flags & PPMU_P10_DD1)) { 2258 2258 if (siar) 2259 2259 return siar; 2260 2260 else

+1

arch/riscv/Kconfig.socs

··· 14 14 select CLK_SIFIVE 15 15 select CLK_SIFIVE_PRCI 16 16 select SIFIVE_PLIC 17 + select RISCV_ERRATA_ALTERNATIVE 17 18 select ERRATA_SIFIVE 18 19 help 19 20 This enables support for SiFive SoC platform hardware.

+1 -1

arch/riscv/Makefile

··· 16 16 CC_FLAGS_FTRACE := -fpatchable-function-entry=8 17 17 endif 18 18 19 - ifeq ($(CONFIG_64BIT)$(CONFIG_CMODEL_MEDLOW),yy) 19 + ifeq ($(CONFIG_CMODEL_MEDLOW),y) 20 20 KBUILD_CFLAGS_MODULE += -mcmodel=medany 21 21 endif 22 22

+1 -1

arch/riscv/boot/dts/sifive/fu740-c000.dtsi

··· 273 273 cache-size = <2097152>; 274 274 cache-unified; 275 275 interrupt-parent = <&plic0>; 276 - interrupts = <19 20 21 22>; 276 + interrupts = <19 21 22 20>; 277 277 reg = <0x0 0x2010000 0x0 0x1000>; 278 278 }; 279 279 gpio: gpio@10060000 {

+2 -3

arch/riscv/include/asm/pgtable.h

··· 30 30 31 31 #define BPF_JIT_REGION_SIZE (SZ_128M) 32 32 #ifdef CONFIG_64BIT 33 - /* KASLR should leave at least 128MB for BPF after the kernel */ 34 - #define BPF_JIT_REGION_START PFN_ALIGN((unsigned long)&_end) 35 - #define BPF_JIT_REGION_END (BPF_JIT_REGION_START + BPF_JIT_REGION_SIZE) 33 + #define BPF_JIT_REGION_START (BPF_JIT_REGION_END - BPF_JIT_REGION_SIZE) 34 + #define BPF_JIT_REGION_END (MODULES_END) 36 35 #else 37 36 #define BPF_JIT_REGION_START (PAGE_OFFSET - BPF_JIT_REGION_SIZE) 38 37 #define BPF_JIT_REGION_END (VMALLOC_END)

+5 -5

arch/riscv/mm/kasan_init.c

··· 169 169 170 170 void __init kasan_init(void) 171 171 { 172 - phys_addr_t _start, _end; 172 + phys_addr_t p_start, p_end; 173 173 u64 i; 174 174 175 175 /* ··· 189 189 (void *)kasan_mem_to_shadow((void *)VMALLOC_END)); 190 190 191 191 /* Populate the linear mapping */ 192 - for_each_mem_range(i, &_start, &_end) { 193 - void *start = (void *)__va(_start); 194 - void *end = (void *)__va(_end); 192 + for_each_mem_range(i, &p_start, &p_end) { 193 + void *start = (void *)__va(p_start); 194 + void *end = (void *)__va(p_end); 195 195 196 196 if (start >= end) 197 197 break; ··· 201 201 202 202 /* Populate kernel, BPF, modules mapping */ 203 203 kasan_populate(kasan_mem_to_shadow((const void *)MODULES_VADDR), 204 - kasan_mem_to_shadow((const void *)BPF_JIT_REGION_END)); 204 + kasan_mem_to_shadow((const void *)MODULES_VADDR + SZ_2G)); 205 205 206 206 for (i = 0; i < PTRS_PER_PTE; i++) 207 207 set_pte(&kasan_early_shadow_pte[i],

+2 -2

arch/s390/kernel/entry.S

··· 651 651 .Lcleanup_sie_mcck: 652 652 larl %r13,.Lsie_entry 653 653 slgr %r9,%r13 654 - larl %r13,.Lsie_skip 654 + lghi %r13,.Lsie_skip - .Lsie_entry 655 655 clgr %r9,%r13 656 - jh .Lcleanup_sie_int 656 + jhe .Lcleanup_sie_int 657 657 oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST 658 658 .Lcleanup_sie_int: 659 659 BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)

+10 -3

arch/x86/include/asm/fpu/internal.h

··· 578 578 * PKRU state is switched eagerly because it needs to be valid before we 579 579 * return to userland e.g. for a copy_to_user() operation. 580 580 */ 581 - if (current->mm) { 581 + if (!(current->flags & PF_KTHREAD)) { 582 + /* 583 + * If the PKRU bit in xsave.header.xfeatures is not set, 584 + * then the PKRU component was in init state, which means 585 + * XRSTOR will set PKRU to 0. If the bit is not set then 586 + * get_xsave_addr() will return NULL because the PKRU value 587 + * in memory is not valid. This means pkru_val has to be 588 + * set to 0 and not to init_pkru_value. 589 + */ 582 590 pk = get_xsave_addr(&new_fpu->state.xsave, XFEATURE_PKRU); 583 - if (pk) 584 - pkru_val = pk->pkru; 591 + pkru_val = pk ? pk->pkru : 0; 585 592 } 586 593 __write_pkru(pkru_val); 587 594 }

+1

arch/x86/kernel/cpu/sgx/virt.c

··· 212 212 list_splice_tail(&secs_pages, &zombie_secs_pages); 213 213 mutex_unlock(&zombie_secs_pages_lock); 214 214 215 + xa_destroy(&vepc->page_array); 215 216 kfree(vepc); 216 217 217 218 return 0;

+35 -19

arch/x86/kernel/fpu/signal.c

··· 307 307 return 0; 308 308 } 309 309 310 - if (!access_ok(buf, size)) 311 - return -EACCES; 310 + if (!access_ok(buf, size)) { 311 + ret = -EACCES; 312 + goto out; 313 + } 312 314 313 - if (!static_cpu_has(X86_FEATURE_FPU)) 314 - return fpregs_soft_set(current, NULL, 315 - 0, sizeof(struct user_i387_ia32_struct), 316 - NULL, buf) != 0; 315 + if (!static_cpu_has(X86_FEATURE_FPU)) { 316 + ret = fpregs_soft_set(current, NULL, 0, 317 + sizeof(struct user_i387_ia32_struct), 318 + NULL, buf); 319 + goto out; 320 + } 317 321 318 322 if (use_xsave()) { 319 323 struct _fpx_sw_bytes fx_sw_user; ··· 373 369 fpregs_unlock(); 374 370 return 0; 375 371 } 372 + 373 + /* 374 + * The above did an FPU restore operation, restricted to 375 + * the user portion of the registers, and failed, but the 376 + * microcode might have modified the FPU registers 377 + * nevertheless. 378 + * 379 + * If the FPU registers do not belong to current, then 380 + * invalidate the FPU register state otherwise the task might 381 + * preempt current and return to user space with corrupted 382 + * FPU registers. 383 + * 384 + * In case current owns the FPU registers then no further 385 + * action is required. The fixup below will handle it 386 + * correctly. 387 + */ 388 + if (test_thread_flag(TIF_NEED_FPU_LOAD)) 389 + __cpu_invalidate_fpregs_state(); 390 + 376 391 fpregs_unlock(); 377 392 } else { 378 393 /* ··· 400 377 */ 401 378 ret = __copy_from_user(&env, buf, sizeof(env)); 402 379 if (ret) 403 - goto err_out; 380 + goto out; 404 381 envp = &env; 405 382 } 406 383 ··· 428 405 if (use_xsave() && !fx_only) { 429 406 u64 init_bv = xfeatures_mask_user() & ~user_xfeatures; 430 407 431 - if (using_compacted_format()) { 432 - ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx); 433 - } else { 434 - ret = __copy_from_user(&fpu->state.xsave, buf_fx, state_size); 435 - 436 - if (!ret && state_size > offsetof(struct xregs_state, header)) 437 - ret = validate_user_xstate_header(&fpu->state.xsave.header); 438 - } 408 + ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx); 439 409 if (ret) 440 - goto err_out; 410 + goto out; 441 411 442 412 sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures, 443 413 fx_only); ··· 450 434 ret = __copy_from_user(&fpu->state.fxsave, buf_fx, state_size); 451 435 if (ret) { 452 436 ret = -EFAULT; 453 - goto err_out; 437 + goto out; 454 438 } 455 439 456 440 sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures, ··· 468 452 } else { 469 453 ret = __copy_from_user(&fpu->state.fsave, buf_fx, state_size); 470 454 if (ret) 471 - goto err_out; 455 + goto out; 472 456 473 457 fpregs_lock(); 474 458 ret = copy_kernel_to_fregs_err(&fpu->state.fsave); ··· 479 463 fpregs_deactivate(fpu); 480 464 fpregs_unlock(); 481 465 482 - err_out: 466 + out: 483 467 if (ret) 484 468 fpu__clear_user_states(fpu); 485 469 return ret;

+1

arch/x86/kvm/cpuid.c

··· 655 655 if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP)) 656 656 entry->ecx = F(RDPID); 657 657 ++array->nent; 658 + break; 658 659 default: 659 660 break; 660 661 }

+3

arch/x86/kvm/lapic.c

··· 1410 1410 if (!apic_x2apic_mode(apic)) 1411 1411 valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI); 1412 1412 1413 + if (alignment + len > 4) 1414 + return 1; 1415 + 1413 1416 if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset))) 1414 1417 return 1; 1415 1418

+25 -1

arch/x86/kvm/mmu/mmu.c

··· 4739 4739 context->inject_page_fault = kvm_inject_page_fault; 4740 4740 } 4741 4741 4742 + static union kvm_mmu_role kvm_calc_nested_mmu_role(struct kvm_vcpu *vcpu) 4743 + { 4744 + union kvm_mmu_role role = kvm_calc_shadow_root_page_role_common(vcpu, false); 4745 + 4746 + /* 4747 + * Nested MMUs are used only for walking L2's gva->gpa, they never have 4748 + * shadow pages of their own and so "direct" has no meaning. Set it 4749 + * to "true" to try to detect bogus usage of the nested MMU. 4750 + */ 4751 + role.base.direct = true; 4752 + 4753 + if (!is_paging(vcpu)) 4754 + role.base.level = 0; 4755 + else if (is_long_mode(vcpu)) 4756 + role.base.level = is_la57_mode(vcpu) ? PT64_ROOT_5LEVEL : 4757 + PT64_ROOT_4LEVEL; 4758 + else if (is_pae(vcpu)) 4759 + role.base.level = PT32E_ROOT_LEVEL; 4760 + else 4761 + role.base.level = PT32_ROOT_LEVEL; 4762 + 4763 + return role; 4764 + } 4765 + 4742 4766 static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu) 4743 4767 { 4744 - union kvm_mmu_role new_role = kvm_calc_mmu_role_common(vcpu, false); 4768 + union kvm_mmu_role new_role = kvm_calc_nested_mmu_role(vcpu); 4745 4769 struct kvm_mmu *g_context = &vcpu->arch.nested_mmu; 4746 4770 4747 4771 if (new_role.as_u64 == g_context->mmu_role.as_u64)

+3 -3

arch/x86/kvm/svm/avic.c

··· 221 221 return &avic_physical_id_table[index]; 222 222 } 223 223 224 - /** 224 + /* 225 225 * Note: 226 226 * AVIC hardware walks the nested page table to check permissions, 227 227 * but does not use the SPA address specified in the leaf page ··· 764 764 return ret; 765 765 } 766 766 767 - /** 767 + /* 768 768 * Note: 769 769 * The HW cannot support posting multicast/broadcast 770 770 * interrupts to a vCPU. So, we still use legacy interrupt ··· 1005 1005 WRITE_ONCE(*(svm->avic_physical_id_cache), entry); 1006 1006 } 1007 1007 1008 - /** 1008 + /* 1009 1009 * This function is called during VCPU halt/unhalt. 1010 1010 */ 1011 1011 static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)

+15 -5

arch/x86/kvm/svm/sev.c

··· 199 199 sev->misc_cg = NULL; 200 200 } 201 201 202 - static void sev_unbind_asid(struct kvm *kvm, unsigned int handle) 202 + static void sev_decommission(unsigned int handle) 203 203 { 204 204 struct sev_data_decommission decommission; 205 + 206 + if (!handle) 207 + return; 208 + 209 + decommission.handle = handle; 210 + sev_guest_decommission(&decommission, NULL); 211 + } 212 + 213 + static void sev_unbind_asid(struct kvm *kvm, unsigned int handle) 214 + { 205 215 struct sev_data_deactivate deactivate; 206 216 207 217 if (!handle) ··· 224 214 sev_guest_deactivate(&deactivate, NULL); 225 215 up_read(&sev_deactivate_lock); 226 216 227 - /* decommission handle */ 228 - decommission.handle = handle; 229 - sev_guest_decommission(&decommission, NULL); 217 + sev_decommission(handle); 230 218 } 231 219 232 220 static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) ··· 349 341 350 342 /* Bind ASID to this guest */ 351 343 ret = sev_bind_asid(kvm, start.handle, error); 352 - if (ret) 344 + if (ret) { 345 + sev_decommission(start.handle); 353 346 goto e_free_session; 347 + } 354 348 355 349 /* return handle to userspace */ 356 350 params.handle = start.handle;

+1

arch/x86/kvm/vmx/vmx.c

··· 6247 6247 switch (kvm_get_apic_mode(vcpu)) { 6248 6248 case LAPIC_MODE_INVALID: 6249 6249 WARN_ONCE(true, "Invalid local APIC state"); 6250 + break; 6250 6251 case LAPIC_MODE_DISABLED: 6251 6252 break; 6252 6253 case LAPIC_MODE_XAPIC:

+5 -1

arch/x86/kvm/x86.c

··· 7106 7106 7107 7107 static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_flags) 7108 7108 { 7109 - emul_to_vcpu(ctxt)->arch.hflags = emul_flags; 7109 + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); 7110 + 7111 + vcpu->arch.hflags = emul_flags; 7112 + kvm_mmu_reset_context(vcpu); 7110 7113 } 7111 7114 7112 7115 static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt, ··· 8261 8258 kvm_x86_ops.hardware_enable = NULL; 8262 8259 kvm_mmu_module_exit(); 8263 8260 free_percpu(user_return_msrs); 8261 + kmem_cache_destroy(x86_emulator_cache); 8264 8262 kmem_cache_destroy(x86_fpu_cache); 8265 8263 #ifdef CONFIG_KVM_XEN 8266 8264 static_key_deferred_flush(&kvm_xen_enabled);

+3 -1

arch/x86/mm/ioremap.c

··· 118 118 if (!IS_ENABLED(CONFIG_EFI)) 119 119 return; 120 120 121 - if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA) 121 + if (efi_mem_type(addr) == EFI_RUNTIME_SERVICES_DATA || 122 + (efi_mem_type(addr) == EFI_BOOT_SERVICES_DATA && 123 + efi_mem_attributes(addr) & EFI_MEMORY_RUNTIME)) 122 124 desc->flags |= IORES_MAP_ENCRYPTED; 123 125 } 124 126

+7 -1

arch/x86/mm/numa.c

··· 254 254 255 255 /* make sure all non-reserved blocks are inside the limits */ 256 256 bi->start = max(bi->start, low); 257 - bi->end = min(bi->end, high); 257 + 258 + /* preserve info for non-RAM areas above 'max_pfn': */ 259 + if (bi->end > high) { 260 + numa_add_memblk_to(bi->nid, high, bi->end, 261 + &numa_reserved_meminfo); 262 + bi->end = high; 263 + } 258 264 259 265 /* and there's no empty block */ 260 266 if (bi->start >= bi->end)

+44

arch/x86/pci/fixup.c

··· 779 779 DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x15b1, pci_amd_enable_64bit_bar); 780 780 DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1601, pci_amd_enable_64bit_bar); 781 781 782 + #define RS690_LOWER_TOP_OF_DRAM2 0x30 783 + #define RS690_LOWER_TOP_OF_DRAM2_VALID 0x1 784 + #define RS690_UPPER_TOP_OF_DRAM2 0x31 785 + #define RS690_HTIU_NB_INDEX 0xA8 786 + #define RS690_HTIU_NB_INDEX_WR_ENABLE 0x100 787 + #define RS690_HTIU_NB_DATA 0xAC 788 + 789 + /* 790 + * Some BIOS implementations support RAM above 4GB, but do not configure the 791 + * PCI host to respond to bus master accesses for these addresses. These 792 + * implementations set the TOP_OF_DRAM_SLOT1 register correctly, so PCI DMA 793 + * works as expected for addresses below 4GB. 794 + * 795 + * Reference: "AMD RS690 ASIC Family Register Reference Guide" (pg. 2-57) 796 + * https://www.amd.com/system/files/TechDocs/43372_rs690_rrg_3.00o.pdf 797 + */ 798 + static void rs690_fix_64bit_dma(struct pci_dev *pdev) 799 + { 800 + u32 val = 0; 801 + phys_addr_t top_of_dram = __pa(high_memory - 1) + 1; 802 + 803 + if (top_of_dram <= (1ULL << 32)) 804 + return; 805 + 806 + pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX, 807 + RS690_LOWER_TOP_OF_DRAM2); 808 + pci_read_config_dword(pdev, RS690_HTIU_NB_DATA, &val); 809 + 810 + if (val) 811 + return; 812 + 813 + pci_info(pdev, "Adjusting top of DRAM to %pa for 64-bit DMA support\n", &top_of_dram); 814 + 815 + pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX, 816 + RS690_UPPER_TOP_OF_DRAM2 | RS690_HTIU_NB_INDEX_WR_ENABLE); 817 + pci_write_config_dword(pdev, RS690_HTIU_NB_DATA, top_of_dram >> 32); 818 + 819 + pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX, 820 + RS690_LOWER_TOP_OF_DRAM2 | RS690_HTIU_NB_INDEX_WR_ENABLE); 821 + pci_write_config_dword(pdev, RS690_HTIU_NB_DATA, 822 + top_of_dram | RS690_LOWER_TOP_OF_DRAM2_VALID); 823 + } 824 + DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7910, rs690_fix_64bit_dma); 825 + 782 826 #endif

-10

drivers/cpufreq/Kconfig.arm

··· 19 19 20 20 If in doubt, say N. 21 21 22 - config ACPI_CPPC_CPUFREQ_FIE 23 - bool "Frequency Invariance support for CPPC cpufreq driver" 24 - depends on ACPI_CPPC_CPUFREQ && GENERIC_ARCH_TOPOLOGY 25 - default y 26 - help 27 - This extends frequency invariance support in the CPPC cpufreq driver, 28 - by using CPPC delivered and reference performance counters. 29 - 30 - If in doubt, say N. 31 - 32 22 config ARM_ALLWINNER_SUN50I_CPUFREQ_NVMEM 33 23 tristate "Allwinner nvmem based SUN50I CPUFreq driver" 34 24 depends on ARCH_SUNXI

+12 -233

drivers/cpufreq/cppc_cpufreq.c

··· 10 10 11 11 #define pr_fmt(fmt) "CPPC Cpufreq:" fmt 12 12 13 - #include <linux/arch_topology.h> 14 13 #include <linux/kernel.h> 15 14 #include <linux/module.h> 16 15 #include <linux/delay.h> 17 16 #include <linux/cpu.h> 18 17 #include <linux/cpufreq.h> 19 18 #include <linux/dmi.h> 20 - #include <linux/irq_work.h> 21 - #include <linux/kthread.h> 22 19 #include <linux/time.h> 23 20 #include <linux/vmalloc.h> 24 - #include <uapi/linux/sched/types.h> 25 21 26 22 #include <asm/unaligned.h> 27 23 ··· 56 60 .oem_revision = 0, 57 61 } 58 62 }; 59 - 60 - #ifdef CONFIG_ACPI_CPPC_CPUFREQ_FIE 61 - 62 - /* Frequency invariance support */ 63 - struct cppc_freq_invariance { 64 - int cpu; 65 - struct irq_work irq_work; 66 - struct kthread_work work; 67 - struct cppc_perf_fb_ctrs prev_perf_fb_ctrs; 68 - struct cppc_cpudata *cpu_data; 69 - }; 70 - 71 - static DEFINE_PER_CPU(struct cppc_freq_invariance, cppc_freq_inv); 72 - static struct kthread_worker *kworker_fie; 73 - static bool fie_disabled; 74 - 75 - static struct cpufreq_driver cppc_cpufreq_driver; 76 - static unsigned int hisi_cppc_cpufreq_get_rate(unsigned int cpu); 77 - static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data, 78 - struct cppc_perf_fb_ctrs fb_ctrs_t0, 79 - struct cppc_perf_fb_ctrs fb_ctrs_t1); 80 - 81 - /** 82 - * cppc_scale_freq_workfn - CPPC arch_freq_scale updater for frequency invariance 83 - * @work: The work item. 84 - * 85 - * The CPPC driver register itself with the topology core to provide its own 86 - * implementation (cppc_scale_freq_tick()) of topology_scale_freq_tick() which 87 - * gets called by the scheduler on every tick. 88 - * 89 - * Note that the arch specific counters have higher priority than CPPC counters, 90 - * if available, though the CPPC driver doesn't need to have any special 91 - * handling for that. 92 - * 93 - * On an invocation of cppc_scale_freq_tick(), we schedule an irq work (since we 94 - * reach here from hard-irq context), which then schedules a normal work item 95 - * and cppc_scale_freq_workfn() updates the per_cpu arch_freq_scale variable 96 - * based on the counter updates since the last tick. 97 - */ 98 - static void cppc_scale_freq_workfn(struct kthread_work *work) 99 - { 100 - struct cppc_freq_invariance *cppc_fi; 101 - struct cppc_perf_fb_ctrs fb_ctrs = {0}; 102 - struct cppc_cpudata *cpu_data; 103 - unsigned long local_freq_scale; 104 - u64 perf; 105 - 106 - cppc_fi = container_of(work, struct cppc_freq_invariance, work); 107 - cpu_data = cppc_fi->cpu_data; 108 - 109 - if (cppc_get_perf_ctrs(cppc_fi->cpu, &fb_ctrs)) { 110 - pr_warn("%s: failed to read perf counters\n", __func__); 111 - return; 112 - } 113 - 114 - cppc_fi->prev_perf_fb_ctrs = fb_ctrs; 115 - perf = cppc_perf_from_fbctrs(cpu_data, cppc_fi->prev_perf_fb_ctrs, 116 - fb_ctrs); 117 - 118 - perf <<= SCHED_CAPACITY_SHIFT; 119 - local_freq_scale = div64_u64(perf, cpu_data->perf_caps.highest_perf); 120 - if (WARN_ON(local_freq_scale > 1024)) 121 - local_freq_scale = 1024; 122 - 123 - per_cpu(arch_freq_scale, cppc_fi->cpu) = local_freq_scale; 124 - } 125 - 126 - static void cppc_irq_work(struct irq_work *irq_work) 127 - { 128 - struct cppc_freq_invariance *cppc_fi; 129 - 130 - cppc_fi = container_of(irq_work, struct cppc_freq_invariance, irq_work); 131 - kthread_queue_work(kworker_fie, &cppc_fi->work); 132 - } 133 - 134 - static void cppc_scale_freq_tick(void) 135 - { 136 - struct cppc_freq_invariance *cppc_fi = &per_cpu(cppc_freq_inv, smp_processor_id()); 137 - 138 - /* 139 - * cppc_get_perf_ctrs() can potentially sleep, call that from the right 140 - * context. 141 - */ 142 - irq_work_queue(&cppc_fi->irq_work); 143 - } 144 - 145 - static struct scale_freq_data cppc_sftd = { 146 - .source = SCALE_FREQ_SOURCE_CPPC, 147 - .set_freq_scale = cppc_scale_freq_tick, 148 - }; 149 - 150 - static void cppc_freq_invariance_policy_init(struct cpufreq_policy *policy, 151 - struct cppc_cpudata *cpu_data) 152 - { 153 - struct cppc_perf_fb_ctrs fb_ctrs = {0}; 154 - struct cppc_freq_invariance *cppc_fi; 155 - int i, ret; 156 - 157 - if (cppc_cpufreq_driver.get == hisi_cppc_cpufreq_get_rate) 158 - return; 159 - 160 - if (fie_disabled) 161 - return; 162 - 163 - for_each_cpu(i, policy->cpus) { 164 - cppc_fi = &per_cpu(cppc_freq_inv, i); 165 - cppc_fi->cpu = i; 166 - cppc_fi->cpu_data = cpu_data; 167 - kthread_init_work(&cppc_fi->work, cppc_scale_freq_workfn); 168 - init_irq_work(&cppc_fi->irq_work, cppc_irq_work); 169 - 170 - ret = cppc_get_perf_ctrs(i, &fb_ctrs); 171 - if (ret) { 172 - pr_warn("%s: failed to read perf counters: %d\n", 173 - __func__, ret); 174 - fie_disabled = true; 175 - } else { 176 - cppc_fi->prev_perf_fb_ctrs = fb_ctrs; 177 - } 178 - } 179 - } 180 - 181 - static void __init cppc_freq_invariance_init(void) 182 - { 183 - struct sched_attr attr = { 184 - .size = sizeof(struct sched_attr), 185 - .sched_policy = SCHED_DEADLINE, 186 - .sched_nice = 0, 187 - .sched_priority = 0, 188 - /* 189 - * Fake (unused) bandwidth; workaround to "fix" 190 - * priority inheritance. 191 - */ 192 - .sched_runtime = 1000000, 193 - .sched_deadline = 10000000, 194 - .sched_period = 10000000, 195 - }; 196 - int ret; 197 - 198 - if (cppc_cpufreq_driver.get == hisi_cppc_cpufreq_get_rate) 199 - return; 200 - 201 - if (fie_disabled) 202 - return; 203 - 204 - kworker_fie = kthread_create_worker(0, "cppc_fie"); 205 - if (IS_ERR(kworker_fie)) 206 - return; 207 - 208 - ret = sched_setattr_nocheck(kworker_fie->task, &attr); 209 - if (ret) { 210 - pr_warn("%s: failed to set SCHED_DEADLINE: %d\n", __func__, 211 - ret); 212 - kthread_destroy_worker(kworker_fie); 213 - return; 214 - } 215 - 216 - /* Register for freq-invariance */ 217 - topology_set_scale_freq_source(&cppc_sftd, cpu_present_mask); 218 - } 219 - 220 - static void cppc_freq_invariance_exit(void) 221 - { 222 - struct cppc_freq_invariance *cppc_fi; 223 - int i; 224 - 225 - if (cppc_cpufreq_driver.get == hisi_cppc_cpufreq_get_rate) 226 - return; 227 - 228 - if (fie_disabled) 229 - return; 230 - 231 - topology_clear_scale_freq_source(SCALE_FREQ_SOURCE_CPPC, cpu_present_mask); 232 - 233 - for_each_possible_cpu(i) { 234 - cppc_fi = &per_cpu(cppc_freq_inv, i); 235 - irq_work_sync(&cppc_fi->irq_work); 236 - } 237 - 238 - kthread_destroy_worker(kworker_fie); 239 - kworker_fie = NULL; 240 - } 241 - 242 - #else 243 - static inline void 244 - cppc_freq_invariance_policy_init(struct cpufreq_policy *policy, 245 - struct cppc_cpudata *cpu_data) 246 - { 247 - } 248 - 249 - static inline void cppc_freq_invariance_init(void) 250 - { 251 - } 252 - 253 - static inline void cppc_freq_invariance_exit(void) 254 - { 255 - } 256 - #endif /* CONFIG_ACPI_CPPC_CPUFREQ_FIE */ 257 63 258 64 /* Callback function used to retrieve the max frequency from DMI */ 259 65 static void cppc_find_dmi_mhz(const struct dmi_header *dm, void *private) ··· 345 547 cpu_data->perf_ctrls.desired_perf = caps->highest_perf; 346 548 347 549 ret = cppc_set_perf(cpu, &cpu_data->perf_ctrls); 348 - if (ret) { 550 + if (ret) 349 551 pr_debug("Err setting perf value:%d on CPU:%d. ret:%d\n", 350 552 caps->highest_perf, cpu, ret); 351 - } else { 352 - cppc_freq_invariance_policy_init(policy, cpu_data); 353 - } 354 553 355 554 return ret; 356 555 } ··· 360 565 return (u32)t1 - (u32)t0; 361 566 } 362 567 363 - static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data, 364 - struct cppc_perf_fb_ctrs fb_ctrs_t0, 365 - struct cppc_perf_fb_ctrs fb_ctrs_t1) 568 + static int cppc_get_rate_from_fbctrs(struct cppc_cpudata *cpu_data, 569 + struct cppc_perf_fb_ctrs fb_ctrs_t0, 570 + struct cppc_perf_fb_ctrs fb_ctrs_t1) 366 571 { 367 572 u64 delta_reference, delta_delivered; 368 - u64 reference_perf; 573 + u64 reference_perf, delivered_perf; 369 574 370 575 reference_perf = fb_ctrs_t0.reference_perf; 371 576 ··· 374 579 delta_delivered = get_delta(fb_ctrs_t1.delivered, 375 580 fb_ctrs_t0.delivered); 376 581 377 - /* Check to avoid divide-by zero and invalid delivered_perf */ 378 - if (!delta_reference || !delta_delivered) 379 - return cpu_data->perf_ctrls.desired_perf; 380 - 381 - return (reference_perf * delta_delivered) / delta_reference; 382 - } 383 - 384 - static int cppc_get_rate_from_fbctrs(struct cppc_cpudata *cpu_data, 385 - struct cppc_perf_fb_ctrs fb_ctrs_t0, 386 - struct cppc_perf_fb_ctrs fb_ctrs_t1) 387 - { 388 - u64 delivered_perf; 389 - 390 - delivered_perf = cppc_perf_from_fbctrs(cpu_data, fb_ctrs_t0, 391 - fb_ctrs_t1); 582 + /* Check to avoid divide-by zero */ 583 + if (delta_reference || delta_delivered) 584 + delivered_perf = (reference_perf * delta_delivered) / 585 + delta_reference; 586 + else 587 + delivered_perf = cpu_data->perf_ctrls.desired_perf; 392 588 393 589 return cppc_cpufreq_perf_to_khz(cpu_data, delivered_perf); 394 590 } ··· 504 718 505 719 static int __init cppc_cpufreq_init(void) 506 720 { 507 - int ret; 508 - 509 721 if ((acpi_disabled) || !acpi_cpc_valid()) 510 722 return -ENODEV; 511 723 ··· 511 727 512 728 cppc_check_hisi_workaround(); 513 729 514 - ret = cpufreq_register_driver(&cppc_cpufreq_driver); 515 - if (!ret) 516 - cppc_freq_invariance_init(); 517 - 518 - return ret; 730 + return cpufreq_register_driver(&cppc_cpufreq_driver); 519 731 } 520 732 521 733 static inline void free_cpu_data(void) ··· 528 748 529 749 static void __exit cppc_cpufreq_exit(void) 530 750 { 531 - cppc_freq_invariance_exit(); 532 751 cpufreq_unregister_driver(&cppc_cpufreq_driver); 533 752 534 753 free_cpu_data();

+2

drivers/dma/Kconfig

··· 59 59 #devices 60 60 config ALTERA_MSGDMA 61 61 tristate "Altera / Intel mSGDMA Engine" 62 + depends on HAS_IOMEM 62 63 select DMA_ENGINE 63 64 help 64 65 Enable support for Altera / Intel mSGDMA controller. ··· 702 701 703 702 config XILINX_ZYNQMP_DPDMA 704 703 tristate "Xilinx DPDMA Engine" 704 + depends on HAS_IOMEM && OF 705 705 select DMA_ENGINE 706 706 select DMA_VIRTUAL_CHANNELS 707 707 help

+3

drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c

··· 332 332 } 333 333 334 334 if (priv->dpdmai_attr.version.major > DPDMAI_VER_MAJOR) { 335 + err = -EINVAL; 335 336 dev_err(dev, "DPDMAI major version mismatch\n" 336 337 "Found %u.%u, supported version is %u.%u\n", 337 338 priv->dpdmai_attr.version.major, ··· 342 341 } 343 342 344 343 if (priv->dpdmai_attr.version.minor > DPDMAI_VER_MINOR) { 344 + err = -EINVAL; 345 345 dev_err(dev, "DPDMAI minor version mismatch\n" 346 346 "Found %u.%u, supported version is %u.%u\n", 347 347 priv->dpdmai_attr.version.major, ··· 477 475 ppriv->store = 478 476 dpaa2_io_store_create(DPAA2_QDMA_STORE_SIZE, dev); 479 477 if (!ppriv->store) { 478 + err = -ENOMEM; 480 479 dev_err(dev, "dpaa2_io_store_create() failed\n"); 481 480 goto err_store; 482 481 }

+1

drivers/dma/idxd/cdev.c

··· 110 110 pasid = iommu_sva_get_pasid(sva); 111 111 if (pasid == IOMMU_PASID_INVALID) { 112 112 iommu_sva_unbind_device(sva); 113 + rc = -EINVAL; 113 114 goto failed; 114 115 } 115 116

+60 -3

drivers/dma/idxd/init.c

··· 168 168 return rc; 169 169 } 170 170 171 + static void idxd_cleanup_interrupts(struct idxd_device *idxd) 172 + { 173 + struct pci_dev *pdev = idxd->pdev; 174 + struct idxd_irq_entry *irq_entry; 175 + int i, msixcnt; 176 + 177 + msixcnt = pci_msix_vec_count(pdev); 178 + if (msixcnt <= 0) 179 + return; 180 + 181 + irq_entry = &idxd->irq_entries[0]; 182 + free_irq(irq_entry->vector, irq_entry); 183 + 184 + for (i = 1; i < msixcnt; i++) { 185 + 186 + irq_entry = &idxd->irq_entries[i]; 187 + if (idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE)) 188 + idxd_device_release_int_handle(idxd, idxd->int_handles[i], 189 + IDXD_IRQ_MSIX); 190 + free_irq(irq_entry->vector, irq_entry); 191 + } 192 + 193 + idxd_mask_error_interrupts(idxd); 194 + pci_free_irq_vectors(pdev); 195 + } 196 + 171 197 static int idxd_setup_wqs(struct idxd_device *idxd) 172 198 { 173 199 struct device *dev = &idxd->pdev->dev; ··· 268 242 engine->idxd = idxd; 269 243 device_initialize(&engine->conf_dev); 270 244 engine->conf_dev.parent = &idxd->conf_dev; 245 + engine->conf_dev.bus = &dsa_bus_type; 271 246 engine->conf_dev.type = &idxd_engine_device_type; 272 247 rc = dev_set_name(&engine->conf_dev, "engine%d.%d", idxd->id, engine->id); 273 248 if (rc < 0) { ··· 328 301 while (--i >= 0) 329 302 put_device(&idxd->groups[i]->conf_dev); 330 303 return rc; 304 + } 305 + 306 + static void idxd_cleanup_internals(struct idxd_device *idxd) 307 + { 308 + int i; 309 + 310 + for (i = 0; i < idxd->max_groups; i++) 311 + put_device(&idxd->groups[i]->conf_dev); 312 + for (i = 0; i < idxd->max_engines; i++) 313 + put_device(&idxd->engines[i]->conf_dev); 314 + for (i = 0; i < idxd->max_wqs; i++) 315 + put_device(&idxd->wqs[i]->conf_dev); 316 + destroy_workqueue(idxd->wq); 331 317 } 332 318 333 319 static int idxd_setup_internals(struct idxd_device *idxd) ··· 571 531 dev_dbg(dev, "Loading RO device config\n"); 572 532 rc = idxd_device_load_config(idxd); 573 533 if (rc < 0) 574 - goto err; 534 + goto err_config; 575 535 } 576 536 577 537 rc = idxd_setup_interrupts(idxd); 578 538 if (rc) 579 - goto err; 539 + goto err_config; 580 540 581 541 dev_dbg(dev, "IDXD interrupt setup complete.\n"); 582 542 ··· 589 549 dev_dbg(dev, "IDXD device %d probed successfully\n", idxd->id); 590 550 return 0; 591 551 552 + err_config: 553 + idxd_cleanup_internals(idxd); 592 554 err: 593 555 if (device_pasid_enabled(idxd)) 594 556 idxd_disable_system_pasid(idxd); 595 557 iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA); 596 558 return rc; 559 + } 560 + 561 + static void idxd_cleanup(struct idxd_device *idxd) 562 + { 563 + struct device *dev = &idxd->pdev->dev; 564 + 565 + perfmon_pmu_remove(idxd); 566 + idxd_cleanup_interrupts(idxd); 567 + idxd_cleanup_internals(idxd); 568 + if (device_pasid_enabled(idxd)) 569 + idxd_disable_system_pasid(idxd); 570 + iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA); 597 571 } 598 572 599 573 static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) ··· 662 608 rc = idxd_register_devices(idxd); 663 609 if (rc) { 664 610 dev_err(dev, "IDXD sysfs setup failed\n"); 665 - goto err; 611 + goto err_dev_register; 666 612 } 667 613 668 614 idxd->state = IDXD_DEV_CONF_READY; ··· 672 618 673 619 return 0; 674 620 621 + err_dev_register: 622 + idxd_cleanup(idxd); 675 623 err: 676 624 pci_iounmap(pdev, idxd->reg_base); 677 625 err_iomap: ··· 843 787 844 788 static void __exit idxd_exit_module(void) 845 789 { 790 + idxd_unregister_driver(); 846 791 pci_unregister_driver(&idxd_pci_driver); 847 792 idxd_cdev_remove(); 848 793 idxd_unregister_bus_type();

+1 -1

drivers/dma/ipu/ipu_irq.c

··· 230 230 } 231 231 232 232 /** 233 - * ipu_irq_map() - map an IPU interrupt source to an IRQ number 233 + * ipu_irq_unmap() - unmap an IPU interrupt source 234 234 * @source: interrupt source bit position (see ipu_irq_map()) 235 235 * @return: 0 or negative error code 236 236 */

+14 -13

drivers/dma/mediatek/mtk-uart-apdma.c

··· 131 131 132 132 static void mtk_uart_apdma_desc_free(struct virt_dma_desc *vd) 133 133 { 134 - struct dma_chan *chan = vd->tx.chan; 135 - struct mtk_chan *c = to_mtk_uart_apdma_chan(chan); 136 - 137 - kfree(c->desc); 134 + kfree(container_of(vd, struct mtk_uart_apdma_desc, vd)); 138 135 } 139 136 140 137 static void mtk_uart_apdma_start_tx(struct mtk_chan *c) ··· 204 207 205 208 static void mtk_uart_apdma_tx_handler(struct mtk_chan *c) 206 209 { 207 - struct mtk_uart_apdma_desc *d = c->desc; 208 - 209 210 mtk_uart_apdma_write(c, VFF_INT_FLAG, VFF_TX_INT_CLR_B); 210 211 mtk_uart_apdma_write(c, VFF_INT_EN, VFF_INT_EN_CLR_B); 211 212 mtk_uart_apdma_write(c, VFF_EN, VFF_EN_CLR_B); 212 - 213 - list_del(&d->vd.node); 214 - vchan_cookie_complete(&d->vd); 215 213 } 216 214 217 215 static void mtk_uart_apdma_rx_handler(struct mtk_chan *c) ··· 237 245 238 246 c->rx_status = d->avail_len - cnt; 239 247 mtk_uart_apdma_write(c, VFF_RPT, wg); 248 + } 240 249 241 - list_del(&d->vd.node); 242 - vchan_cookie_complete(&d->vd); 250 + static void mtk_uart_apdma_chan_complete_handler(struct mtk_chan *c) 251 + { 252 + struct mtk_uart_apdma_desc *d = c->desc; 253 + 254 + if (d) { 255 + list_del(&d->vd.node); 256 + vchan_cookie_complete(&d->vd); 257 + c->desc = NULL; 258 + } 243 259 } 244 260 245 261 static irqreturn_t mtk_uart_apdma_irq_handler(int irq, void *dev_id) ··· 261 261 mtk_uart_apdma_rx_handler(c); 262 262 else if (c->dir == DMA_MEM_TO_DEV) 263 263 mtk_uart_apdma_tx_handler(c); 264 + mtk_uart_apdma_chan_complete_handler(c); 264 265 spin_unlock_irqrestore(&c->vc.lock, flags); 265 266 266 267 return IRQ_HANDLED; ··· 349 348 return NULL; 350 349 351 350 /* Now allocate and setup the descriptor */ 352 - d = kzalloc(sizeof(*d), GFP_ATOMIC); 351 + d = kzalloc(sizeof(*d), GFP_NOWAIT); 353 352 if (!d) 354 353 return NULL; 355 354 ··· 367 366 unsigned long flags; 368 367 369 368 spin_lock_irqsave(&c->vc.lock, flags); 370 - if (vchan_issue_pending(&c->vc)) { 369 + if (vchan_issue_pending(&c->vc) && !c->desc) { 371 370 vd = vchan_next_desc(&c->vc); 372 371 c->desc = to_mtk_uart_apdma_desc(&vd->tx); 373 372

+4 -2

drivers/dma/pl330.c

··· 2694 2694 for (i = 0; i < len / period_len; i++) { 2695 2695 desc = pl330_get_desc(pch); 2696 2696 if (!desc) { 2697 + unsigned long iflags; 2698 + 2697 2699 dev_err(pch->dmac->ddma.dev, "%s:%d Unable to fetch desc\n", 2698 2700 __func__, __LINE__); 2699 2701 2700 2702 if (!first) 2701 2703 return NULL; 2702 2704 2703 - spin_lock_irqsave(&pl330->pool_lock, flags); 2705 + spin_lock_irqsave(&pl330->pool_lock, iflags); 2704 2706 2705 2707 while (!list_empty(&first->node)) { 2706 2708 desc = list_entry(first->node.next, ··· 2712 2710 2713 2711 list_move_tail(&first->node, &pl330->desc_pool); 2714 2712 2715 - spin_unlock_irqrestore(&pl330->pool_lock, flags); 2713 + spin_unlock_irqrestore(&pl330->pool_lock, iflags); 2716 2714 2717 2715 return NULL; 2718 2716 }

+1

drivers/dma/qcom/Kconfig

··· 33 33 34 34 config QCOM_HIDMA_MGMT 35 35 tristate "Qualcomm Technologies HIDMA Management support" 36 + depends on HAS_IOMEM 36 37 select DMA_ENGINE 37 38 help 38 39 Enable support for the Qualcomm Technologies HIDMA Management.

+1

drivers/dma/sf-pdma/Kconfig

··· 1 1 config SF_PDMA 2 2 tristate "Sifive PDMA controller driver" 3 + depends on HAS_IOMEM 3 4 select DMA_ENGINE 4 5 select DMA_VIRTUAL_CHANNELS 5 6 help

+1 -1

drivers/dma/sh/rcar-dmac.c

··· 1913 1913 1914 1914 /* Enable runtime PM and initialize the device. */ 1915 1915 pm_runtime_enable(&pdev->dev); 1916 - ret = pm_runtime_get_sync(&pdev->dev); 1916 + ret = pm_runtime_resume_and_get(&pdev->dev); 1917 1917 if (ret < 0) { 1918 1918 dev_err(&pdev->dev, "runtime PM get sync failed (%d)\n", ret); 1919 1919 return ret;

+3

drivers/dma/ste_dma40.c

··· 3675 3675 3676 3676 kfree(base->lcla_pool.base_unaligned); 3677 3677 3678 + if (base->lcpa_base) 3679 + iounmap(base->lcpa_base); 3680 + 3678 3681 if (base->phy_lcpa) 3679 3682 release_mem_region(base->phy_lcpa, 3680 3683 base->lcpa_size);

+2 -2

drivers/dma/stm32-mdma.c

··· 1452 1452 return -ENOMEM; 1453 1453 } 1454 1454 1455 - ret = pm_runtime_get_sync(dmadev->ddev.dev); 1455 + ret = pm_runtime_resume_and_get(dmadev->ddev.dev); 1456 1456 if (ret < 0) 1457 1457 return ret; 1458 1458 ··· 1718 1718 u32 ccr, id; 1719 1719 int ret; 1720 1720 1721 - ret = pm_runtime_get_sync(dev); 1721 + ret = pm_runtime_resume_and_get(dev); 1722 1722 if (ret < 0) 1723 1723 return ret; 1724 1724

+28 -3

drivers/dma/xilinx/xilinx_dpdma.c

··· 113 113 #define XILINX_DPDMA_CH_VDO 0x020 114 114 #define XILINX_DPDMA_CH_PYLD_SZ 0x024 115 115 #define XILINX_DPDMA_CH_DESC_ID 0x028 116 + #define XILINX_DPDMA_CH_DESC_ID_MASK GENMASK(15, 0) 116 117 117 118 /* DPDMA descriptor fields */ 118 119 #define XILINX_DPDMA_DESC_CONTROL_PREEMBLE 0xa5 ··· 867 866 * will be used, but it should be enough. 868 867 */ 869 868 list_for_each_entry(sw_desc, &desc->descriptors, node) 870 - sw_desc->hw.desc_id = desc->vdesc.tx.cookie; 869 + sw_desc->hw.desc_id = desc->vdesc.tx.cookie 870 + & XILINX_DPDMA_CH_DESC_ID_MASK; 871 871 872 872 sw_desc = list_first_entry(&desc->descriptors, 873 873 struct xilinx_dpdma_sw_desc, node); ··· 1088 1086 if (!chan->running || !pending) 1089 1087 goto out; 1090 1088 1091 - desc_id = dpdma_read(chan->reg, XILINX_DPDMA_CH_DESC_ID); 1089 + desc_id = dpdma_read(chan->reg, XILINX_DPDMA_CH_DESC_ID) 1090 + & XILINX_DPDMA_CH_DESC_ID_MASK; 1092 1091 1093 1092 /* If the retrigger raced with vsync, retry at the next frame. */ 1094 1093 sw_desc = list_first_entry(&pending->descriptors, ··· 1462 1459 */ 1463 1460 static void xilinx_dpdma_disable_irq(struct xilinx_dpdma_device *xdev) 1464 1461 { 1465 - dpdma_write(xdev->reg, XILINX_DPDMA_IDS, XILINX_DPDMA_INTR_ERR_ALL); 1462 + dpdma_write(xdev->reg, XILINX_DPDMA_IDS, XILINX_DPDMA_INTR_ALL); 1466 1463 dpdma_write(xdev->reg, XILINX_DPDMA_EIDS, XILINX_DPDMA_EINTR_ALL); 1467 1464 } 1468 1465 ··· 1599 1596 return dma_get_slave_channel(&xdev->chan[chan_id]->vchan.chan); 1600 1597 } 1601 1598 1599 + static void dpdma_hw_init(struct xilinx_dpdma_device *xdev) 1600 + { 1601 + unsigned int i; 1602 + void __iomem *reg; 1603 + 1604 + /* Disable all interrupts */ 1605 + xilinx_dpdma_disable_irq(xdev); 1606 + 1607 + /* Stop all channels */ 1608 + for (i = 0; i < ARRAY_SIZE(xdev->chan); i++) { 1609 + reg = xdev->reg + XILINX_DPDMA_CH_BASE 1610 + + XILINX_DPDMA_CH_OFFSET * i; 1611 + dpdma_clr(reg, XILINX_DPDMA_CH_CNTL, XILINX_DPDMA_CH_CNTL_ENABLE); 1612 + } 1613 + 1614 + /* Clear the interrupt status registers */ 1615 + dpdma_write(xdev->reg, XILINX_DPDMA_ISR, XILINX_DPDMA_INTR_ALL); 1616 + dpdma_write(xdev->reg, XILINX_DPDMA_EISR, XILINX_DPDMA_EINTR_ALL); 1617 + } 1618 + 1602 1619 static int xilinx_dpdma_probe(struct platform_device *pdev) 1603 1620 { 1604 1621 struct xilinx_dpdma_device *xdev; ··· 1644 1621 xdev->reg = devm_platform_ioremap_resource(pdev, 0); 1645 1622 if (IS_ERR(xdev->reg)) 1646 1623 return PTR_ERR(xdev->reg); 1624 + 1625 + dpdma_hw_init(xdev); 1647 1626 1648 1627 xdev->irq = platform_get_irq(pdev, 0); 1649 1628 if (xdev->irq < 0) {

+1 -1

drivers/dma/xilinx/zynqmp_dma.c

··· 468 468 struct zynqmp_dma_desc_sw *desc; 469 469 int i, ret; 470 470 471 - ret = pm_runtime_get_sync(chan->dev); 471 + ret = pm_runtime_resume_and_get(chan->dev); 472 472 if (ret < 0) 473 473 return ret; 474 474

+5 -1

drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c

··· 6871 6871 if (ring->use_doorbell) { 6872 6872 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, 6873 6873 (adev->doorbell_index.kiq * 2) << 2); 6874 + /* If GC has entered CGPG, ringing doorbell > first page doesn't 6875 + * wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to workaround 6876 + * this issue. 6877 + */ 6874 6878 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 6875 - (adev->doorbell_index.userqueue_end * 2) << 2); 6879 + (adev->doorbell.size - 4)); 6876 6880 } 6877 6881 6878 6882 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,

+5 -1

drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c

··· 3673 3673 if (ring->use_doorbell) { 3674 3674 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, 3675 3675 (adev->doorbell_index.kiq * 2) << 2); 3676 + /* If GC has entered CGPG, ringing doorbell > first page doesn't 3677 + * wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to workaround 3678 + * this issue. 3679 + */ 3676 3680 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, 3677 - (adev->doorbell_index.userqueue_end * 2) << 2); 3681 + (adev->doorbell.size - 4)); 3678 3682 } 3679 3683 3680 3684 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,

+35 -1

drivers/irqchip/irq-gic-v3.c

··· 642 642 nmi_exit(); 643 643 } 644 644 645 + static u32 do_read_iar(struct pt_regs *regs) 646 + { 647 + u32 iar; 648 + 649 + if (gic_supports_nmi() && unlikely(!interrupts_enabled(regs))) { 650 + u64 pmr; 651 + 652 + /* 653 + * We were in a context with IRQs disabled. However, the 654 + * entry code has set PMR to a value that allows any 655 + * interrupt to be acknowledged, and not just NMIs. This can 656 + * lead to surprising effects if the NMI has been retired in 657 + * the meantime, and that there is an IRQ pending. The IRQ 658 + * would then be taken in NMI context, something that nobody 659 + * wants to debug twice. 660 + * 661 + * Until we sort this, drop PMR again to a level that will 662 + * actually only allow NMIs before reading IAR, and then 663 + * restore it to what it was. 664 + */ 665 + pmr = gic_read_pmr(); 666 + gic_pmr_mask_irqs(); 667 + isb(); 668 + 669 + iar = gic_read_iar(); 670 + 671 + gic_write_pmr(pmr); 672 + } else { 673 + iar = gic_read_iar(); 674 + } 675 + 676 + return iar; 677 + } 678 + 645 679 static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs) 646 680 { 647 681 u32 irqnr; 648 682 649 - irqnr = gic_read_iar(); 683 + irqnr = do_read_iar(regs); 650 684 651 685 /* Check for special IDs first */ 652 686 if ((irqnr >= 1020 && irqnr <= 1023))

+1

drivers/net/caif/caif_serial.c

··· 350 350 rtnl_lock(); 351 351 result = register_netdevice(dev); 352 352 if (result) { 353 + tty_kref_put(tty); 353 354 rtnl_unlock(); 354 355 free_netdev(dev); 355 356 return -ENODEV;

+15 -2

drivers/net/can/usb/mcba_usb.c

··· 82 82 bool can_ka_first_pass; 83 83 bool can_speed_check; 84 84 atomic_t free_ctx_cnt; 85 + void *rxbuf[MCBA_MAX_RX_URBS]; 86 + dma_addr_t rxbuf_dma[MCBA_MAX_RX_URBS]; 85 87 }; 86 88 87 89 /* CAN frame */ ··· 635 633 for (i = 0; i < MCBA_MAX_RX_URBS; i++) { 636 634 struct urb *urb = NULL; 637 635 u8 *buf; 636 + dma_addr_t buf_dma; 638 637 639 638 /* create a URB, and a buffer for it */ 640 639 urb = usb_alloc_urb(0, GFP_KERNEL); ··· 645 642 } 646 643 647 644 buf = usb_alloc_coherent(priv->udev, MCBA_USB_RX_BUFF_SIZE, 648 - GFP_KERNEL, &urb->transfer_dma); 645 + GFP_KERNEL, &buf_dma); 649 646 if (!buf) { 650 647 netdev_err(netdev, "No memory left for USB buffer\n"); 651 648 usb_free_urb(urb); ··· 664 661 if (err) { 665 662 usb_unanchor_urb(urb); 666 663 usb_free_coherent(priv->udev, MCBA_USB_RX_BUFF_SIZE, 667 - buf, urb->transfer_dma); 664 + buf, buf_dma); 668 665 usb_free_urb(urb); 669 666 break; 670 667 } 668 + 669 + priv->rxbuf[i] = buf; 670 + priv->rxbuf_dma[i] = buf_dma; 671 671 672 672 /* Drop reference, USB core will take care of freeing it */ 673 673 usb_free_urb(urb); ··· 714 708 715 709 static void mcba_urb_unlink(struct mcba_priv *priv) 716 710 { 711 + int i; 712 + 717 713 usb_kill_anchored_urbs(&priv->rx_submitted); 714 + 715 + for (i = 0; i < MCBA_MAX_RX_URBS; ++i) 716 + usb_free_coherent(priv->udev, MCBA_USB_RX_BUFF_SIZE, 717 + priv->rxbuf[i], priv->rxbuf_dma[i]); 718 + 718 719 usb_kill_anchored_urbs(&priv->tx_submitted); 719 720 } 720 721

+28 -26

drivers/net/ethernet/amazon/ena/ena_netdev.c

··· 236 236 static int ena_xdp_tx_map_frame(struct ena_ring *xdp_ring, 237 237 struct ena_tx_buffer *tx_info, 238 238 struct xdp_frame *xdpf, 239 - void **push_hdr, 240 - u32 *push_len) 239 + struct ena_com_tx_ctx *ena_tx_ctx) 241 240 { 242 241 struct ena_adapter *adapter = xdp_ring->adapter; 243 242 struct ena_com_buf *ena_buf; 244 - dma_addr_t dma = 0; 243 + int push_len = 0; 244 + dma_addr_t dma; 245 + void *data; 245 246 u32 size; 246 247 247 248 tx_info->xdpf = xdpf; 249 + data = tx_info->xdpf->data; 248 250 size = tx_info->xdpf->len; 249 - ena_buf = tx_info->bufs; 250 251 251 - /* llq push buffer */ 252 - *push_len = min_t(u32, size, xdp_ring->tx_max_header_size); 253 - *push_hdr = tx_info->xdpf->data; 252 + if (xdp_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { 253 + /* Designate part of the packet for LLQ */ 254 + push_len = min_t(u32, size, xdp_ring->tx_max_header_size); 254 255 255 - if (size - *push_len > 0) { 256 + ena_tx_ctx->push_header = data; 257 + 258 + size -= push_len; 259 + data += push_len; 260 + } 261 + 262 + ena_tx_ctx->header_len = push_len; 263 + 264 + if (size > 0) { 256 265 dma = dma_map_single(xdp_ring->dev, 257 - *push_hdr + *push_len, 258 - size - *push_len, 266 + data, 267 + size, 259 268 DMA_TO_DEVICE); 260 269 if (unlikely(dma_mapping_error(xdp_ring->dev, dma))) 261 270 goto error_report_dma_error; 262 271 263 - tx_info->map_linear_data = 1; 264 - tx_info->num_of_bufs = 1; 265 - } 272 + tx_info->map_linear_data = 0; 266 273 267 - ena_buf->paddr = dma; 268 - ena_buf->len = size; 274 + ena_buf = tx_info->bufs; 275 + ena_buf->paddr = dma; 276 + ena_buf->len = size; 277 + 278 + ena_tx_ctx->ena_bufs = ena_buf; 279 + ena_tx_ctx->num_bufs = tx_info->num_of_bufs = 1; 280 + } 269 281 270 282 return 0; 271 283 ··· 285 273 ena_increase_stat(&xdp_ring->tx_stats.dma_mapping_err, 1, 286 274 &xdp_ring->syncp); 287 275 netif_warn(adapter, tx_queued, adapter->netdev, "Failed to map xdp buff\n"); 288 - 289 - xdp_return_frame_rx_napi(tx_info->xdpf); 290 - tx_info->xdpf = NULL; 291 - tx_info->num_of_bufs = 0; 292 276 293 277 return -EINVAL; 294 278 } ··· 297 289 struct ena_com_tx_ctx ena_tx_ctx = {}; 298 290 struct ena_tx_buffer *tx_info; 299 291 u16 next_to_use, req_id; 300 - void *push_hdr; 301 - u32 push_len; 302 292 int rc; 303 293 304 294 next_to_use = xdp_ring->next_to_use; ··· 304 298 tx_info = &xdp_ring->tx_buffer_info[req_id]; 305 299 tx_info->num_of_bufs = 0; 306 300 307 - rc = ena_xdp_tx_map_frame(xdp_ring, tx_info, xdpf, &push_hdr, &push_len); 301 + rc = ena_xdp_tx_map_frame(xdp_ring, tx_info, xdpf, &ena_tx_ctx); 308 302 if (unlikely(rc)) 309 303 return rc; 310 304 311 - ena_tx_ctx.ena_bufs = tx_info->bufs; 312 - ena_tx_ctx.push_header = push_hdr; 313 - ena_tx_ctx.num_bufs = tx_info->num_of_bufs; 314 305 ena_tx_ctx.req_id = req_id; 315 - ena_tx_ctx.header_len = push_len; 316 306 317 307 rc = ena_xmit_common(dev, 318 308 xdp_ring,

+1

drivers/net/ethernet/atheros/alx/main.c

··· 1849 1849 free_netdev(netdev); 1850 1850 out_pci_release: 1851 1851 pci_release_mem_regions(pdev); 1852 + pci_disable_pcie_error_reporting(pdev); 1852 1853 out_pci_disable: 1853 1854 pci_disable_device(pdev); 1854 1855 return err;

+7 -1

drivers/net/ethernet/broadcom/bnxt/bnxt.c

··· 7308 7308 entries_sp = ctx->vnic_max_vnic_entries + ctx->qp_max_l2_entries + 7309 7309 2 * (extra_qps + ctx->qp_min_qp1_entries) + min; 7310 7310 entries_sp = roundup(entries_sp, ctx->tqm_entries_multiple); 7311 - entries = ctx->qp_max_l2_entries + extra_qps + ctx->qp_min_qp1_entries; 7311 + entries = ctx->qp_max_l2_entries + 2 * (extra_qps + ctx->qp_min_qp1_entries); 7312 7312 entries = roundup(entries, ctx->tqm_entries_multiple); 7313 7313 entries = clamp_t(u32, entries, min, ctx->tqm_max_entries_per_ring); 7314 7314 for (i = 0; i < ctx->tqm_fp_rings_count + 1; i++) { ··· 11750 11750 bnxt_hwrm_coal_params_qcaps(bp); 11751 11751 } 11752 11752 11753 + static int bnxt_probe_phy(struct bnxt *bp, bool fw_dflt); 11754 + 11753 11755 static int bnxt_fw_init_one(struct bnxt *bp) 11754 11756 { 11755 11757 int rc; ··· 11766 11764 netdev_err(bp->dev, "Firmware init phase 2 failed\n"); 11767 11765 return rc; 11768 11766 } 11767 + rc = bnxt_probe_phy(bp, false); 11768 + if (rc) 11769 + return rc; 11769 11770 rc = bnxt_approve_mac(bp, bp->dev->dev_addr, false); 11770 11771 if (rc) 11771 11772 return rc; ··· 13160 13155 bnxt_hwrm_func_drv_unrgtr(bp); 13161 13156 bnxt_free_hwrm_short_cmd_req(bp); 13162 13157 bnxt_free_hwrm_resources(bp); 13158 + bnxt_ethtool_free(bp); 13163 13159 kfree(bp->fw_health); 13164 13160 bp->fw_health = NULL; 13165 13161 bnxt_cleanup_pci(bp);

+36 -14

drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c

··· 1337 1337 return ret; 1338 1338 } 1339 1339 1340 - spin_lock_bh(&adap->win0_lock); 1341 - ret = t4_load_phy_fw(adap, MEMWIN_NIC, NULL, data, size); 1342 - spin_unlock_bh(&adap->win0_lock); 1343 - if (ret) 1344 - dev_err(adap->pdev_dev, "Failed to load PHY FW\n"); 1340 + /* We have to RESET the chip/firmware because we need the 1341 + * chip in uninitialized state for loading new PHY image. 1342 + * Otherwise, the running firmware will only store the PHY 1343 + * image in local RAM which will be lost after next reset. 1344 + */ 1345 + ret = t4_fw_reset(adap, adap->mbox, PIORSTMODE_F | PIORST_F); 1346 + if (ret < 0) { 1347 + dev_err(adap->pdev_dev, 1348 + "Set FW to RESET for flashing PHY FW failed. ret: %d\n", 1349 + ret); 1350 + return ret; 1351 + } 1345 1352 1346 - return ret; 1353 + ret = t4_load_phy_fw(adap, MEMWIN_NIC, NULL, data, size); 1354 + if (ret < 0) { 1355 + dev_err(adap->pdev_dev, "Failed to load PHY FW. ret: %d\n", 1356 + ret); 1357 + return ret; 1358 + } 1359 + 1360 + return 0; 1347 1361 } 1348 1362 1349 1363 static int cxgb4_ethtool_flash_fw(struct net_device *netdev, ··· 1624 1610 u32 ftid) 1625 1611 { 1626 1612 struct tid_info *t = &adap->tids; 1627 - struct filter_entry *f; 1628 1613 1629 - if (ftid < t->nhpftids) 1630 - f = &adap->tids.hpftid_tab[ftid]; 1631 - else if (ftid < t->nftids) 1632 - f = &adap->tids.ftid_tab[ftid - t->nhpftids]; 1633 - else 1634 - f = lookup_tid(&adap->tids, ftid); 1614 + if (ftid >= t->hpftid_base && ftid < t->hpftid_base + t->nhpftids) 1615 + return &t->hpftid_tab[ftid - t->hpftid_base]; 1635 1616 1636 - return f; 1617 + if (ftid >= t->ftid_base && ftid < t->ftid_base + t->nftids) 1618 + return &t->ftid_tab[ftid - t->ftid_base]; 1619 + 1620 + return lookup_tid(t, ftid); 1637 1621 } 1638 1622 1639 1623 static void cxgb4_fill_filter_rule(struct ethtool_rx_flow_spec *fs, ··· 1838 1826 filter_id = filter_info->loc_array[cmd->fs.location]; 1839 1827 f = cxgb4_get_filter_entry(adapter, filter_id); 1840 1828 1829 + if (f->fs.prio) 1830 + filter_id -= adapter->tids.hpftid_base; 1831 + else if (!f->fs.hash) 1832 + filter_id -= (adapter->tids.ftid_base - adapter->tids.nhpftids); 1833 + 1841 1834 ret = cxgb4_flow_rule_destroy(dev, f->fs.tc_prio, &f->fs, filter_id); 1842 1835 if (ret) 1843 1836 goto err; ··· 1901 1884 goto free; 1902 1885 1903 1886 filter_info = &adapter->ethtool_filters->port[pi->port_id]; 1887 + 1888 + if (fs.prio) 1889 + tid += adapter->tids.hpftid_base; 1890 + else if (!fs.hash) 1891 + tid += (adapter->tids.ftid_base - adapter->tids.nhpftids); 1904 1892 1905 1893 filter_info->loc_array[cmd->fs.location] = tid; 1906 1894 set_bit(cmd->fs.location, filter_info->bmap);

+1 -1

drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c

··· 198 198 WORD_MASK, f->fs.nat_lip[3] | 199 199 f->fs.nat_lip[2] << 8 | 200 200 f->fs.nat_lip[1] << 16 | 201 - (u64)f->fs.nat_lip[0] << 25, 1); 201 + (u64)f->fs.nat_lip[0] << 24, 1); 202 202 } 203 203 } 204 204

-2

drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c

··· 4424 4424 4425 4425 /* Load PHY Firmware onto adapter. 4426 4426 */ 4427 - spin_lock_bh(&adap->win0_lock); 4428 4427 ret = t4_load_phy_fw(adap, MEMWIN_NIC, phy_info->phy_fw_version, 4429 4428 (u8 *)phyf->data, phyf->size); 4430 - spin_unlock_bh(&adap->win0_lock); 4431 4429 if (ret < 0) 4432 4430 dev_err(adap->pdev_dev, "PHY Firmware transfer error %d\n", 4433 4431 -ret);

+29 -17

drivers/net/ethernet/chelsio/cxgb4/t4_hw.c

··· 3060 3060 * @addr: the start address to write 3061 3061 * @n: length of data to write in bytes 3062 3062 * @data: the data to write 3063 + * @byte_oriented: whether to store data as bytes or as words 3063 3064 * 3064 3065 * Writes up to a page of data (256 bytes) to the serial flash starting 3065 3066 * at the given address. All the data must be written to the same page. 3067 + * If @byte_oriented is set the write data is stored as byte stream 3068 + * (i.e. matches what on disk), otherwise in big-endian. 3066 3069 */ 3067 3070 static int t4_write_flash(struct adapter *adapter, unsigned int addr, 3068 - unsigned int n, const u8 *data) 3071 + unsigned int n, const u8 *data, bool byte_oriented) 3069 3072 { 3070 - int ret; 3071 - u32 buf[64]; 3072 3073 unsigned int i, c, left, val, offset = addr & 0xff; 3074 + u32 buf[64]; 3075 + int ret; 3073 3076 3074 3077 if (addr >= adapter->params.sf_size || offset + n > SF_PAGE_SIZE) 3075 3078 return -EINVAL; ··· 3083 3080 (ret = sf1_write(adapter, 4, 1, 1, val)) != 0) 3084 3081 goto unlock; 3085 3082 3086 - for (left = n; left; left -= c) { 3083 + for (left = n; left; left -= c, data += c) { 3087 3084 c = min(left, 4U); 3088 - for (val = 0, i = 0; i < c; ++i) 3089 - val = (val << 8) + *data++; 3085 + for (val = 0, i = 0; i < c; ++i) { 3086 + if (byte_oriented) 3087 + val = (val << 8) + data[i]; 3088 + else 3089 + val = (val << 8) + data[c - i - 1]; 3090 + } 3090 3091 3091 3092 ret = sf1_write(adapter, c, c != left, 1, val); 3092 3093 if (ret) ··· 3103 3096 t4_write_reg(adapter, SF_OP_A, 0); /* unlock SF */ 3104 3097 3105 3098 /* Read the page to verify the write succeeded */ 3106 - ret = t4_read_flash(adapter, addr & ~0xff, ARRAY_SIZE(buf), buf, 1); 3099 + ret = t4_read_flash(adapter, addr & ~0xff, ARRAY_SIZE(buf), buf, 3100 + byte_oriented); 3107 3101 if (ret) 3108 3102 return ret; 3109 3103 ··· 3700 3692 */ 3701 3693 memcpy(first_page, fw_data, SF_PAGE_SIZE); 3702 3694 ((struct fw_hdr *)first_page)->fw_ver = cpu_to_be32(0xffffffff); 3703 - ret = t4_write_flash(adap, fw_start, SF_PAGE_SIZE, first_page); 3695 + ret = t4_write_flash(adap, fw_start, SF_PAGE_SIZE, first_page, true); 3704 3696 if (ret) 3705 3697 goto out; 3706 3698 ··· 3708 3700 for (size -= SF_PAGE_SIZE; size; size -= SF_PAGE_SIZE) { 3709 3701 addr += SF_PAGE_SIZE; 3710 3702 fw_data += SF_PAGE_SIZE; 3711 - ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, fw_data); 3703 + ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, fw_data, true); 3712 3704 if (ret) 3713 3705 goto out; 3714 3706 } 3715 3707 3716 - ret = t4_write_flash(adap, 3717 - fw_start + offsetof(struct fw_hdr, fw_ver), 3718 - sizeof(hdr->fw_ver), (const u8 *)&hdr->fw_ver); 3708 + ret = t4_write_flash(adap, fw_start + offsetof(struct fw_hdr, fw_ver), 3709 + sizeof(hdr->fw_ver), (const u8 *)&hdr->fw_ver, 3710 + true); 3719 3711 out: 3720 3712 if (ret) 3721 3713 dev_err(adap->pdev_dev, "firmware download failed, error %d\n", ··· 3820 3812 /* Copy the supplied PHY Firmware image to the adapter memory location 3821 3813 * allocated by the adapter firmware. 3822 3814 */ 3815 + spin_lock_bh(&adap->win0_lock); 3823 3816 ret = t4_memory_rw(adap, win, mtype, maddr, 3824 3817 phy_fw_size, (__be32 *)phy_fw_data, 3825 3818 T4_MEMORY_WRITE); 3819 + spin_unlock_bh(&adap->win0_lock); 3826 3820 if (ret) 3827 3821 return ret; 3828 3822 ··· 10218 10208 n = size - i; 10219 10209 else 10220 10210 n = SF_PAGE_SIZE; 10221 - ret = t4_write_flash(adap, addr, n, cfg_data); 10211 + ret = t4_write_flash(adap, addr, n, cfg_data, true); 10222 10212 if (ret) 10223 10213 goto out; 10224 10214 ··· 10687 10677 for (size -= SF_PAGE_SIZE; size; size -= SF_PAGE_SIZE) { 10688 10678 addr += SF_PAGE_SIZE; 10689 10679 boot_data += SF_PAGE_SIZE; 10690 - ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, boot_data); 10680 + ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, boot_data, 10681 + false); 10691 10682 if (ret) 10692 10683 goto out; 10693 10684 } 10694 10685 10695 10686 ret = t4_write_flash(adap, boot_sector, SF_PAGE_SIZE, 10696 - (const u8 *)header); 10687 + (const u8 *)header, false); 10697 10688 10698 10689 out: 10699 10690 if (ret) ··· 10769 10758 for (i = 0; i < size; i += SF_PAGE_SIZE) { 10770 10759 n = min_t(u32, size - i, SF_PAGE_SIZE); 10771 10760 10772 - ret = t4_write_flash(adap, addr, n, cfg_data); 10761 + ret = t4_write_flash(adap, addr, n, cfg_data, false); 10773 10762 if (ret) 10774 10763 goto out; 10775 10764 ··· 10781 10770 for (i = 0; i < npad; i++) { 10782 10771 u8 data = 0; 10783 10772 10784 - ret = t4_write_flash(adap, cfg_addr + size + i, 1, &data); 10773 + ret = t4_write_flash(adap, cfg_addr + size + i, 1, &data, 10774 + false); 10785 10775 if (ret) 10786 10776 goto out; 10787 10777 }

+3 -1

drivers/net/ethernet/ec_bhf.c

··· 576 576 struct ec_bhf_priv *priv = netdev_priv(net_dev); 577 577 578 578 unregister_netdev(net_dev); 579 - free_netdev(net_dev); 580 579 581 580 pci_iounmap(dev, priv->dma_io); 582 581 pci_iounmap(dev, priv->io); 582 + 583 + free_netdev(net_dev); 584 + 583 585 pci_release_regions(dev); 584 586 pci_clear_master(dev); 585 587 pci_disable_device(dev);

+1

drivers/net/ethernet/emulex/benet/be_main.c

··· 5897 5897 unmap_bars: 5898 5898 be_unmap_pci_bars(adapter); 5899 5899 free_netdev: 5900 + pci_disable_pcie_error_reporting(pdev); 5900 5901 free_netdev(netdev); 5901 5902 rel_reg: 5902 5903 pci_release_regions(pdev);

+5 -3

drivers/net/ethernet/freescale/fec_ptp.c

··· 215 215 { 216 216 struct fec_enet_private *fep = 217 217 container_of(cc, struct fec_enet_private, cc); 218 - const struct platform_device_id *id_entry = 219 - platform_get_device_id(fep->pdev); 220 218 u32 tempval; 221 219 222 220 tempval = readl(fep->hwp + FEC_ATIME_CTRL); 223 221 tempval |= FEC_T_CTRL_CAPTURE; 224 222 writel(tempval, fep->hwp + FEC_ATIME_CTRL); 225 223 226 - if (id_entry->driver_data & FEC_QUIRK_BUG_CAPTURE) 224 + if (fep->quirks & FEC_QUIRK_BUG_CAPTURE) 227 225 udelay(1); 228 226 229 227 return readl(fep->hwp + FEC_ATIME); ··· 602 604 fep->ptp_caps.enable = fec_ptp_enable; 603 605 604 606 fep->cycle_speed = clk_get_rate(fep->clk_ptp); 607 + if (!fep->cycle_speed) { 608 + fep->cycle_speed = NSEC_PER_SEC; 609 + dev_err(&fep->pdev->dev, "clk_ptp clock rate is zero\n"); 610 + } 605 611 fep->ptp_inc = NSEC_PER_SEC / fep->cycle_speed; 606 612 607 613 spin_lock_init(&fep->tmreg_lock);

+10 -8

drivers/net/ethernet/intel/ice/ice_lib.c

··· 1717 1717 * ice_vsi_cfg_txqs - Configure the VSI for Tx 1718 1718 * @vsi: the VSI being configured 1719 1719 * @rings: Tx ring array to be configured 1720 + * @count: number of Tx ring array elements 1720 1721 * 1721 1722 * Return 0 on success and a negative value on error 1722 1723 * Configure the Tx VSI for operation. 1723 1724 */ 1724 1725 static int 1725 - ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings) 1726 + ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings, u16 count) 1726 1727 { 1727 1728 struct ice_aqc_add_tx_qgrp *qg_buf; 1728 1729 u16 q_idx = 0; ··· 1735 1734 1736 1735 qg_buf->num_txqs = 1; 1737 1736 1738 - for (q_idx = 0; q_idx < vsi->num_txq; q_idx++) { 1737 + for (q_idx = 0; q_idx < count; q_idx++) { 1739 1738 err = ice_vsi_cfg_txq(vsi, rings[q_idx], qg_buf); 1740 1739 if (err) 1741 1740 goto err_cfg_txqs; ··· 1755 1754 */ 1756 1755 int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi) 1757 1756 { 1758 - return ice_vsi_cfg_txqs(vsi, vsi->tx_rings); 1757 + return ice_vsi_cfg_txqs(vsi, vsi->tx_rings, vsi->num_txq); 1759 1758 } 1760 1759 1761 1760 /** ··· 1770 1769 int ret; 1771 1770 int i; 1772 1771 1773 - ret = ice_vsi_cfg_txqs(vsi, vsi->xdp_rings); 1772 + ret = ice_vsi_cfg_txqs(vsi, vsi->xdp_rings, vsi->num_xdp_txq); 1774 1773 if (ret) 1775 1774 return ret; 1776 1775 ··· 2010 2009 * @rst_src: reset source 2011 2010 * @rel_vmvf_num: Relative ID of VF/VM 2012 2011 * @rings: Tx ring array to be stopped 2012 + * @count: number of Tx ring array elements 2013 2013 */ 2014 2014 static int 2015 2015 ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, 2016 - u16 rel_vmvf_num, struct ice_ring **rings) 2016 + u16 rel_vmvf_num, struct ice_ring **rings, u16 count) 2017 2017 { 2018 2018 u16 q_idx; 2019 2019 2020 2020 if (vsi->num_txq > ICE_LAN_TXQ_MAX_QDIS) 2021 2021 return -EINVAL; 2022 2022 2023 - for (q_idx = 0; q_idx < vsi->num_txq; q_idx++) { 2023 + for (q_idx = 0; q_idx < count; q_idx++) { 2024 2024 struct ice_txq_meta txq_meta = { }; 2025 2025 int status; 2026 2026 ··· 2049 2047 ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, 2050 2048 u16 rel_vmvf_num) 2051 2049 { 2052 - return ice_vsi_stop_tx_rings(vsi, rst_src, rel_vmvf_num, vsi->tx_rings); 2050 + return ice_vsi_stop_tx_rings(vsi, rst_src, rel_vmvf_num, vsi->tx_rings, vsi->num_txq); 2053 2051 } 2054 2052 2055 2053 /** ··· 2058 2056 */ 2059 2057 int ice_vsi_stop_xdp_tx_rings(struct ice_vsi *vsi) 2060 2058 { 2061 - return ice_vsi_stop_tx_rings(vsi, ICE_NO_RESET, 0, vsi->xdp_rings); 2059 + return ice_vsi_stop_tx_rings(vsi, ICE_NO_RESET, 0, vsi->xdp_rings, vsi->num_xdp_txq); 2062 2060 } 2063 2061 2064 2062 /**

+15

drivers/net/ethernet/intel/ice/ice_main.c

··· 2556 2556 } 2557 2557 2558 2558 /** 2559 + * ice_xdp_safe_mode - XDP handler for safe mode 2560 + * @dev: netdevice 2561 + * @xdp: XDP command 2562 + */ 2563 + static int ice_xdp_safe_mode(struct net_device __always_unused *dev, 2564 + struct netdev_bpf *xdp) 2565 + { 2566 + NL_SET_ERR_MSG_MOD(xdp->extack, 2567 + "Please provide working DDP firmware package in order to use XDP\n" 2568 + "Refer to Documentation/networking/device_drivers/ethernet/intel/ice.rst"); 2569 + return -EOPNOTSUPP; 2570 + } 2571 + 2572 + /** 2559 2573 * ice_xdp - implements XDP handler 2560 2574 * @dev: netdevice 2561 2575 * @xdp: XDP command ··· 6951 6937 .ndo_change_mtu = ice_change_mtu, 6952 6938 .ndo_get_stats64 = ice_get_stats64, 6953 6939 .ndo_tx_timeout = ice_tx_timeout, 6940 + .ndo_bpf = ice_xdp_safe_mode, 6954 6941 }; 6955 6942 6956 6943 static const struct net_device_ops ice_netdev_ops = {

+3 -2

drivers/net/ethernet/lantiq_xrx200.c

··· 154 154 155 155 static int xrx200_alloc_skb(struct xrx200_chan *ch) 156 156 { 157 + struct sk_buff *skb = ch->skb[ch->dma.desc]; 157 158 dma_addr_t mapping; 158 159 int ret = 0; 159 160 ··· 169 168 XRX200_DMA_DATA_LEN, DMA_FROM_DEVICE); 170 169 if (unlikely(dma_mapping_error(ch->priv->dev, mapping))) { 171 170 dev_kfree_skb_any(ch->skb[ch->dma.desc]); 171 + ch->skb[ch->dma.desc] = skb; 172 172 ret = -ENOMEM; 173 173 goto skip; 174 174 } ··· 200 198 ch->dma.desc %= LTQ_DESC_NUM; 201 199 202 200 if (ret) { 203 - ch->skb[ch->dma.desc] = skb; 204 201 net_dev->stats.rx_dropped++; 205 202 netdev_err(net_dev, "failed to allocate new rx buffer\n"); 206 203 return ret; ··· 353 352 struct xrx200_chan *ch = ptr; 354 353 355 354 if (napi_schedule_prep(&ch->napi)) { 356 - __napi_schedule(&ch->napi); 357 355 ltq_dma_disable_irq(&ch->dma); 356 + __napi_schedule(&ch->napi); 358 357 } 359 358 360 359 ltq_dma_ack_irq(&ch->dma);

+19

drivers/net/ethernet/mellanox/mlx5/core/dev.c

··· 303 303 int ret = 0, i; 304 304 305 305 mutex_lock(&mlx5_intf_mutex); 306 + priv->flags &= ~MLX5_PRIV_FLAGS_DETACH; 306 307 for (i = 0; i < ARRAY_SIZE(mlx5_adev_devices); i++) { 307 308 if (!priv->adev[i]) { 308 309 bool is_supported = false; ··· 321 320 } 322 321 } else { 323 322 adev = &priv->adev[i]->adev; 323 + 324 + /* Pay attention that this is not PCI driver that 325 + * mlx5_core_dev is connected, but auxiliary driver. 326 + * 327 + * Here we can race of module unload with devlink 328 + * reload, but we don't need to take extra lock because 329 + * we are holding global mlx5_intf_mutex. 330 + */ 331 + if (!adev->dev.driver) 332 + continue; 324 333 adrv = to_auxiliary_drv(adev->dev.driver); 325 334 326 335 if (adrv->resume) ··· 361 350 continue; 362 351 363 352 adev = &priv->adev[i]->adev; 353 + /* Auxiliary driver was unbind manually through sysfs */ 354 + if (!adev->dev.driver) 355 + goto skip_suspend; 356 + 364 357 adrv = to_auxiliary_drv(adev->dev.driver); 365 358 366 359 if (adrv->suspend) { ··· 372 357 continue; 373 358 } 374 359 360 + skip_suspend: 375 361 del_adev(&priv->adev[i]->adev); 376 362 priv->adev[i] = NULL; 377 363 } 364 + priv->flags |= MLX5_PRIV_FLAGS_DETACH; 378 365 mutex_unlock(&mlx5_intf_mutex); 379 366 } 380 367 ··· 465 448 struct mlx5_priv *priv = &dev->priv; 466 449 467 450 lockdep_assert_held(&mlx5_intf_mutex); 451 + if (priv->flags & MLX5_PRIV_FLAGS_DETACH) 452 + return 0; 468 453 469 454 delete_drivers(dev); 470 455 if (priv->flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)

+2

drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c

··· 64 64 struct mlx5e_priv *priv = netdev_priv(dev); 65 65 struct devlink_port *port; 66 66 67 + if (!netif_device_present(dev)) 68 + return NULL; 67 69 port = mlx5e_devlink_get_dl_port(priv); 68 70 if (port->registered) 69 71 return port;

-1

drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 2 // Copyright (c) 2020 Mellanox Technologies 3 3 4 - #include <linux/ptp_classify.h> 5 4 #include "en/ptp.h" 6 5 #include "en/txrx.h" 7 6 #include "en/params.h"

+22

drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h

··· 6 6 7 7 #include "en.h" 8 8 #include "en_stats.h" 9 + #include <linux/ptp_classify.h> 9 10 10 11 struct mlx5e_ptpsq { 11 12 struct mlx5e_txqsq txqsq; ··· 43 42 struct hwtstamp_config *tstamp; 44 43 DECLARE_BITMAP(state, MLX5E_PTP_STATE_NUM_STATES); 45 44 }; 45 + 46 + static inline bool mlx5e_use_ptpsq(struct sk_buff *skb) 47 + { 48 + struct flow_keys fk; 49 + 50 + if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) 51 + return false; 52 + 53 + if (!skb_flow_dissect_flow_keys(skb, &fk, 0)) 54 + return false; 55 + 56 + if (fk.basic.n_proto == htons(ETH_P_1588)) 57 + return true; 58 + 59 + if (fk.basic.n_proto != htons(ETH_P_IP) && 60 + fk.basic.n_proto != htons(ETH_P_IPV6)) 61 + return false; 62 + 63 + return (fk.basic.ip_proto == IPPROTO_UDP && 64 + fk.ports.dst == htons(PTP_EV_PORT)); 65 + } 46 66 47 67 int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params, 48 68 u8 lag_port, struct mlx5e_ptp **cp);

+6 -9

drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c

··· 129 129 work); 130 130 struct mlx5e_neigh_hash_entry *nhe = update_work->nhe; 131 131 struct neighbour *n = update_work->n; 132 + struct mlx5e_encap_entry *e = NULL; 132 133 bool neigh_connected, same_dev; 133 - struct mlx5e_encap_entry *e; 134 134 unsigned char ha[ETH_ALEN]; 135 - struct mlx5e_priv *priv; 136 135 u8 nud_state, dead; 137 136 138 137 rtnl_lock(); ··· 155 156 if (!same_dev) 156 157 goto out; 157 158 158 - list_for_each_entry(e, &nhe->encap_list, encap_list) { 159 - if (!mlx5e_encap_take(e)) 160 - continue; 159 + /* mlx5e_get_next_init_encap() releases previous encap before returning 160 + * the next one. 161 + */ 162 + while ((e = mlx5e_get_next_init_encap(nhe, e)) != NULL) 163 + mlx5e_rep_update_flows(netdev_priv(e->out_dev), e, neigh_connected, ha); 161 164 162 - priv = netdev_priv(e->out_dev); 163 - mlx5e_rep_update_flows(priv, e, neigh_connected, ha); 164 - mlx5e_encap_put(priv, e); 165 - } 166 165 out: 167 166 rtnl_unlock(); 168 167 mlx5e_release_neigh_update_work(update_work);

+1 -5

drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c

··· 94 94 95 95 ASSERT_RTNL(); 96 96 97 - /* wait for encap to be fully initialized */ 98 - wait_for_completion(&e->res_ready); 99 - 100 97 mutex_lock(&esw->offloads.encap_tbl_lock); 101 98 encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID); 102 - if (e->compl_result < 0 || (encap_connected == neigh_connected && 103 - ether_addr_equal(e->h_dest, ha))) 99 + if (encap_connected == neigh_connected && ether_addr_equal(e->h_dest, ha)) 104 100 goto unlock; 105 101 106 102 mlx5e_take_all_encap_flows(e, &flow_list);

+30 -3

drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c

··· 251 251 mlx5e_take_tmp_flow(flow, flow_list, 0); 252 252 } 253 253 254 + typedef bool (match_cb)(struct mlx5e_encap_entry *); 255 + 254 256 static struct mlx5e_encap_entry * 255 - mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe, 256 - struct mlx5e_encap_entry *e) 257 + mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe, 258 + struct mlx5e_encap_entry *e, 259 + match_cb match) 257 260 { 258 261 struct mlx5e_encap_entry *next = NULL; 259 262 ··· 291 288 /* wait for encap to be fully initialized */ 292 289 wait_for_completion(&next->res_ready); 293 290 /* continue searching if encap entry is not in valid state after completion */ 294 - if (!(next->flags & MLX5_ENCAP_ENTRY_VALID)) { 291 + if (!match(next)) { 295 292 e = next; 296 293 goto retry; 297 294 } 298 295 299 296 return next; 297 + } 298 + 299 + static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e) 300 + { 301 + return e->flags & MLX5_ENCAP_ENTRY_VALID; 302 + } 303 + 304 + static struct mlx5e_encap_entry * 305 + mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe, 306 + struct mlx5e_encap_entry *e) 307 + { 308 + return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid); 309 + } 310 + 311 + static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e) 312 + { 313 + return e->compl_result >= 0; 314 + } 315 + 316 + struct mlx5e_encap_entry * 317 + mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe, 318 + struct mlx5e_encap_entry *e) 319 + { 320 + return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized); 300 321 } 301 322 302 323 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)

-3

drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c

··· 532 532 struct mlx5_core_dev *mdev = priv->mdev; 533 533 struct net_device *netdev = priv->netdev; 534 534 535 - if (!priv->ipsec) 536 - return; 537 - 538 535 if (!(mlx5_accel_ipsec_device_caps(mdev) & MLX5_ACCEL_IPSEC_CAP_ESP) || 539 536 !MLX5_CAP_ETH(mdev, swp)) { 540 537 mlx5_core_dbg(mdev, "mlx5e: ESP and SWP offload not supported\n");

+1 -1

drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c

··· 356 356 357 357 int mlx5e_arfs_create_tables(struct mlx5e_priv *priv) 358 358 { 359 - int err = 0; 359 + int err = -ENOMEM; 360 360 int i; 361 361 362 362 if (!(priv->netdev->hw_features & NETIF_F_NTUPLE))

+6 -15

drivers/net/ethernet/mellanox/mlx5/core/en_main.c

··· 2705 2705 nch = priv->channels.params.num_channels; 2706 2706 ntc = priv->channels.params.num_tc; 2707 2707 num_rxqs = nch * priv->profile->rq_groups; 2708 - if (priv->channels.params.ptp_rx) 2709 - num_rxqs++; 2710 2708 2711 2709 mlx5e_netdev_set_tcs(netdev, nch, ntc); 2712 2710 ··· 4822 4824 } 4823 4825 4824 4826 if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev)) { 4825 - netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL | 4826 - NETIF_F_GSO_UDP_TUNNEL_CSUM; 4827 - netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL | 4828 - NETIF_F_GSO_UDP_TUNNEL_CSUM; 4829 - netdev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM; 4830 - netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL | 4831 - NETIF_F_GSO_UDP_TUNNEL_CSUM; 4827 + netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; 4828 + netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL; 4829 + netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL; 4832 4830 } 4833 4831 4834 4832 if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_GRE)) { 4835 - netdev->hw_features |= NETIF_F_GSO_GRE | 4836 - NETIF_F_GSO_GRE_CSUM; 4837 - netdev->hw_enc_features |= NETIF_F_GSO_GRE | 4838 - NETIF_F_GSO_GRE_CSUM; 4839 - netdev->gso_partial_features |= NETIF_F_GSO_GRE | 4840 - NETIF_F_GSO_GRE_CSUM; 4833 + netdev->hw_features |= NETIF_F_GSO_GRE; 4834 + netdev->hw_enc_features |= NETIF_F_GSO_GRE; 4835 + netdev->gso_partial_features |= NETIF_F_GSO_GRE; 4841 4836 } 4842 4837 4843 4838 if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_IPIP)) {

+1 -1

drivers/net/ethernet/mellanox/mlx5/core/en_tc.c

··· 4765 4765 list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) { 4766 4766 wait_for_completion(&hpe->res_ready); 4767 4767 if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id) 4768 - hpe->hp->pair->peer_gone = true; 4768 + mlx5_core_hairpin_clear_dead_peer(hpe->hp->pair); 4769 4769 4770 4770 mlx5e_hairpin_put(priv, hpe); 4771 4771 }

+3

drivers/net/ethernet/mellanox/mlx5/core/en_tc.h

··· 178 178 void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list); 179 179 180 180 struct mlx5e_neigh_hash_entry; 181 + struct mlx5e_encap_entry * 182 + mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe, 183 + struct mlx5e_encap_entry *e); 181 184 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe); 182 185 183 186 void mlx5e_tc_reoffload_flows_work(struct work_struct *work);

+3 -22

drivers/net/ethernet/mellanox/mlx5/core/en_tx.c

··· 32 32 33 33 #include <linux/tcp.h> 34 34 #include <linux/if_vlan.h> 35 - #include <linux/ptp_classify.h> 36 35 #include <net/geneve.h> 37 36 #include <net/dsfield.h> 38 37 #include "en.h" ··· 65 66 return priv->dcbx_dp.dscp2prio[dscp_cp]; 66 67 } 67 68 #endif 68 - 69 - static bool mlx5e_use_ptpsq(struct sk_buff *skb) 70 - { 71 - struct flow_keys fk; 72 - 73 - if (!skb_flow_dissect_flow_keys(skb, &fk, 0)) 74 - return false; 75 - 76 - if (fk.basic.n_proto == htons(ETH_P_1588)) 77 - return true; 78 - 79 - if (fk.basic.n_proto != htons(ETH_P_IP) && 80 - fk.basic.n_proto != htons(ETH_P_IPV6)) 81 - return false; 82 - 83 - return (fk.basic.ip_proto == IPPROTO_UDP && 84 - fk.ports.dst == htons(PTP_EV_PORT)); 85 - } 86 69 87 70 static u16 mlx5e_select_ptpsq(struct net_device *dev, struct sk_buff *skb) 88 71 { ··· 126 145 } 127 146 128 147 ptp_channel = READ_ONCE(priv->channels.ptp); 129 - if (unlikely(ptp_channel) && 130 - test_bit(MLX5E_PTP_STATE_TX, ptp_channel->state) && 131 - mlx5e_use_ptpsq(skb)) 148 + if (unlikely(ptp_channel && 149 + test_bit(MLX5E_PTP_STATE_TX, ptp_channel->state) && 150 + mlx5e_use_ptpsq(skb))) 132 151 return mlx5e_select_ptpsq(dev, skb); 133 152 134 153 txq_ix = netdev_pick_tx(dev, skb, NULL);

+4 -2

drivers/net/ethernet/mellanox/mlx5/core/eq.c

··· 136 136 137 137 eqe = next_eqe_sw(eq); 138 138 if (!eqe) 139 - return 0; 139 + goto out; 140 140 141 141 do { 142 142 struct mlx5_core_cq *cq; ··· 161 161 ++eq->cons_index; 162 162 163 163 } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq))); 164 + 165 + out: 164 166 eq_update_ci(eq, 1); 165 167 166 168 if (cqn != -1) ··· 250 248 ++eq->cons_index; 251 249 252 250 } while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq))); 253 - eq_update_ci(eq, 1); 254 251 255 252 out: 253 + eq_update_ci(eq, 1); 256 254 mlx5_eq_async_int_unlock(eq_async, recovery, &flags); 257 255 258 256 return unlikely(recovery) ? num_eqes : 0;

+6

drivers/net/ethernet/mellanox/mlx5/core/eswitch.c

··· 1054 1054 goto err_vhca_mapping; 1055 1055 } 1056 1056 1057 + /* External controller host PF has factory programmed MAC. 1058 + * Read it from the device. 1059 + */ 1060 + if (mlx5_core_is_ecpf(esw->dev) && vport_num == MLX5_VPORT_PF) 1061 + mlx5_query_nic_vport_mac_address(esw->dev, vport_num, true, vport->info.mac); 1062 + 1057 1063 esw_vport_change_handle_locked(vport); 1058 1064 1059 1065 esw->enabled_vports++;

+2 -1

drivers/net/ethernet/mellanox/mlx5/core/main.c

··· 1161 1161 err = mlx5_core_set_hca_defaults(dev); 1162 1162 if (err) { 1163 1163 mlx5_core_err(dev, "Failed to set hca defaults\n"); 1164 - goto err_sriov; 1164 + goto err_set_hca; 1165 1165 } 1166 1166 1167 1167 mlx5_vhca_event_start(dev); ··· 1194 1194 mlx5_sf_hw_table_destroy(dev); 1195 1195 err_vhca: 1196 1196 mlx5_vhca_event_stop(dev); 1197 + err_set_hca: 1197 1198 mlx5_cleanup_fs(dev); 1198 1199 err_fs: 1199 1200 mlx5_accel_tls_cleanup(dev);

+1 -1

drivers/net/ethernet/mellanox/mlx5/core/mr.c

··· 54 54 mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index); 55 55 mkey->iova = MLX5_GET64(mkc, mkc, start_addr); 56 56 mkey->size = MLX5_GET64(mkc, mkc, len); 57 - mkey->key |= mlx5_idx_to_mkey(mkey_index); 57 + mkey->key = (u32)mlx5_mkey_variant(mkey->key) | mlx5_idx_to_mkey(mkey_index); 58 58 mkey->pd = MLX5_GET(mkc, mkc, pd); 59 59 init_waitqueue_head(&mkey->wait); 60 60

+3

drivers/net/ethernet/mellanox/mlx5/core/rdma.c

··· 156 156 { 157 157 int err; 158 158 159 + if (!MLX5_CAP_GEN(dev, roce)) 160 + return; 161 + 159 162 err = mlx5_nic_vport_enable_roce(dev); 160 163 if (err) { 161 164 mlx5_core_err(dev, "Failed to enable RoCE: %d\n", err);

+1

drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c

··· 163 163 sf_index = event->function_id - base_id; 164 164 sf_dev = xa_load(&table->devices, sf_index); 165 165 switch (event->new_vhca_state) { 166 + case MLX5_VHCA_STATE_INVALID: 166 167 case MLX5_VHCA_STATE_ALLOCATED: 167 168 if (sf_dev) 168 169 mlx5_sf_dev_del(table->dev, sf_dev, sf_index);

+16 -10

drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c

··· 694 694 if (hw_action_sz / DR_STE_ACTION_DOUBLE_SZ < DR_STE_DECAP_L3_ACTION_NUM) 695 695 return -EINVAL; 696 696 697 - memcpy(padded_data, data, data_sz); 697 + inline_data_sz = 698 + MLX5_FLD_SZ_BYTES(ste_double_action_insert_with_inline_v1, inline_data); 699 + 700 + /* Add an alignment padding */ 701 + memcpy(padded_data + data_sz % inline_data_sz, data, data_sz); 698 702 699 703 /* Remove L2L3 outer headers */ 700 704 MLX5_SET(ste_single_action_remove_header_v1, hw_action, action_id, ··· 710 706 hw_action += DR_STE_ACTION_DOUBLE_SZ; 711 707 used_actions++; /* Remove and NOP are a single double action */ 712 708 713 - inline_data_sz = 714 - MLX5_FLD_SZ_BYTES(ste_double_action_insert_with_inline_v1, inline_data); 709 + /* Point to the last dword of the header */ 710 + data_ptr += (data_sz / inline_data_sz) * inline_data_sz; 715 711 716 - /* Add the new header inline + 2 extra bytes */ 712 + /* Add the new header using inline action 4Byte at a time, the header 713 + * is added in reversed order to the beginning of the packet to avoid 714 + * incorrect parsing by the HW. Since header is 14B or 18B an extra 715 + * two bytes are padded and later removed. 716 + */ 717 717 for (i = 0; i < data_sz / inline_data_sz + 1; i++) { 718 718 void *addr_inline; 719 719 720 720 MLX5_SET(ste_double_action_insert_with_inline_v1, hw_action, action_id, 721 721 DR_STE_V1_ACTION_ID_INSERT_INLINE); 722 722 /* The hardware expects here offset to words (2 bytes) */ 723 - MLX5_SET(ste_double_action_insert_with_inline_v1, hw_action, start_offset, 724 - i * 2); 723 + MLX5_SET(ste_double_action_insert_with_inline_v1, hw_action, start_offset, 0); 725 724 726 725 /* Copy bytes one by one to avoid endianness problem */ 727 726 addr_inline = MLX5_ADDR_OF(ste_double_action_insert_with_inline_v1, 728 727 hw_action, inline_data); 729 - memcpy(addr_inline, data_ptr, inline_data_sz); 728 + memcpy(addr_inline, data_ptr - i * inline_data_sz, inline_data_sz); 730 729 hw_action += DR_STE_ACTION_DOUBLE_SZ; 731 - data_ptr += inline_data_sz; 732 730 used_actions++; 733 731 } 734 732 735 - /* Remove 2 extra bytes */ 733 + /* Remove first 2 extra bytes */ 736 734 MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, action_id, 737 735 DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE); 738 - MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, start_offset, data_sz / 2); 736 + MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, start_offset, 0); 739 737 /* The hardware expects here size in words (2 bytes) */ 740 738 MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, remove_size, 1); 741 739 used_actions++;

+5 -4

drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h

··· 124 124 static inline bool 125 125 mlx5dr_is_supported(struct mlx5_core_dev *dev) 126 126 { 127 - return MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner) || 128 - (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner_v2) && 129 - (MLX5_CAP_GEN(dev, steering_format_version) <= 130 - MLX5_STEERING_FORMAT_CONNECTX_6DX)); 127 + return MLX5_CAP_GEN(dev, roce) && 128 + (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner) || 129 + (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner_v2) && 130 + (MLX5_CAP_GEN(dev, steering_format_version) <= 131 + MLX5_STEERING_FORMAT_CONNECTX_6DX))); 131 132 } 132 133 133 134 /* buddy functions & structure */

+24 -6

drivers/net/ethernet/mellanox/mlx5/core/transobj.c

··· 424 424 return err; 425 425 } 426 426 427 + static void mlx5_hairpin_unpair_peer_sq(struct mlx5_hairpin *hp) 428 + { 429 + int i; 430 + 431 + for (i = 0; i < hp->num_channels; i++) 432 + mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[i], MLX5_SQC_STATE_RDY, 433 + MLX5_SQC_STATE_RST, 0, 0); 434 + } 435 + 427 436 static void mlx5_hairpin_unpair_queues(struct mlx5_hairpin *hp) 428 437 { 429 438 int i; ··· 441 432 for (i = 0; i < hp->num_channels; i++) 442 433 mlx5_hairpin_modify_rq(hp->func_mdev, hp->rqn[i], MLX5_RQC_STATE_RDY, 443 434 MLX5_RQC_STATE_RST, 0, 0); 444 - 445 435 /* unset peer SQs */ 446 - if (hp->peer_gone) 447 - return; 448 - for (i = 0; i < hp->num_channels; i++) 449 - mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[i], MLX5_SQC_STATE_RDY, 450 - MLX5_SQC_STATE_RST, 0, 0); 436 + if (!hp->peer_gone) 437 + mlx5_hairpin_unpair_peer_sq(hp); 451 438 } 452 439 453 440 struct mlx5_hairpin * ··· 489 484 mlx5_hairpin_unpair_queues(hp); 490 485 mlx5_hairpin_destroy_queues(hp); 491 486 kfree(hp); 487 + } 488 + 489 + void mlx5_core_hairpin_clear_dead_peer(struct mlx5_hairpin *hp) 490 + { 491 + int i; 492 + 493 + mlx5_hairpin_unpair_peer_sq(hp); 494 + 495 + /* destroy peer SQ */ 496 + for (i = 0; i < hp->num_channels; i++) 497 + mlx5_core_destroy_sq(hp->peer_mdev, hp->sqn[i]); 498 + 499 + hp->peer_gone = true; 492 500 }

-2

drivers/net/ethernet/mellanox/mlx5/core/vport.c

··· 465 465 void *in; 466 466 int err; 467 467 468 - if (!vport) 469 - return -EINVAL; 470 468 if (!MLX5_CAP_GEN(mdev, vport_group_manager)) 471 469 return -EACCES; 472 470

+4 -2

drivers/net/ethernet/mellanox/mlxsw/core_thermal.c

··· 693 693 MLXSW_THERMAL_TRIP_MASK, 694 694 module_tz, 695 695 &mlxsw_thermal_module_ops, 696 - NULL, 0, 0); 696 + NULL, 0, 697 + module_tz->parent->polling_delay); 697 698 if (IS_ERR(module_tz->tzdev)) { 698 699 err = PTR_ERR(module_tz->tzdev); 699 700 return err; ··· 816 815 MLXSW_THERMAL_TRIP_MASK, 817 816 gearbox_tz, 818 817 &mlxsw_thermal_gearbox_ops, 819 - NULL, 0, 0); 818 + NULL, 0, 819 + gearbox_tz->parent->polling_delay); 820 820 if (IS_ERR(gearbox_tz->tzdev)) 821 821 return PTR_ERR(gearbox_tz->tzdev); 822 822

+1 -1

drivers/net/ethernet/mellanox/mlxsw/reg.h

··· 3907 3907 #define MLXSW_REG_QEEC_HIGHEST_SHAPER_BS 25 3908 3908 #define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP1 5 3909 3909 #define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP2 11 3910 - #define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP3 5 3910 + #define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP3 11 3911 3911 3912 3912 static inline void mlxsw_reg_qeec_pack(char *payload, u8 local_port, 3913 3913 enum mlxsw_reg_qeec_hr hr, u8 index,

+4 -1

drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c

··· 1332 1332 u8 band, u32 child_handle) 1333 1333 { 1334 1334 struct mlxsw_sp_qdisc *old_qdisc; 1335 + u32 parent; 1335 1336 1336 1337 if (band < mlxsw_sp_qdisc->num_classes && 1337 1338 mlxsw_sp_qdisc->qdiscs[band].handle == child_handle) ··· 1353 1352 if (old_qdisc) 1354 1353 mlxsw_sp_qdisc_destroy(mlxsw_sp_port, old_qdisc); 1355 1354 1356 - mlxsw_sp_qdisc = mlxsw_sp_qdisc->ops->find_class(mlxsw_sp_qdisc, band); 1355 + parent = TC_H_MAKE(mlxsw_sp_qdisc->handle, band + 1); 1356 + mlxsw_sp_qdisc = mlxsw_sp_qdisc->ops->find_class(mlxsw_sp_qdisc, 1357 + parent); 1357 1358 if (!WARN_ON(!mlxsw_sp_qdisc)) 1358 1359 mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc); 1359 1360

+5

drivers/net/ethernet/mscc/ocelot.c

··· 379 379 380 380 int ocelot_port_flush(struct ocelot *ocelot, int port) 381 381 { 382 + unsigned int pause_ena; 382 383 int err, val; 383 384 384 385 /* Disable dequeuing from the egress queues */ ··· 388 387 QSYS_PORT_MODE, port); 389 388 390 389 /* Disable flow control */ 390 + ocelot_fields_read(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, &pause_ena); 391 391 ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, 0); 392 392 393 393 /* Disable priority flow control */ ··· 423 421 424 422 /* Clear flushing again. */ 425 423 ocelot_rmw_gix(ocelot, 0, REW_PORT_CFG_FLUSH_ENA, REW_PORT_CFG, port); 424 + 425 + /* Re-enable flow control */ 426 + ocelot_fields_write(ocelot, port, SYS_PAUSE_CFG_PAUSE_ENA, pause_ena); 426 427 427 428 return err; 428 429 }

+2

drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c

··· 1602 1602 free_netdev(netdev); 1603 1603 1604 1604 err_out_free_res: 1605 + if (NX_IS_REVISION_P3(pdev->revision)) 1606 + pci_disable_pcie_error_reporting(pdev); 1605 1607 pci_release_regions(pdev); 1606 1608 1607 1609 err_out_disable_pdev:

+3 -1

drivers/net/ethernet/qlogic/qed/qed_dcbx.c

··· 1266 1266 p_hwfn->p_dcbx_info->set.ver_num |= DCBX_CONFIG_VERSION_STATIC; 1267 1267 1268 1268 p_hwfn->p_dcbx_info->set.enabled = dcbx_info->operational.enabled; 1269 + BUILD_BUG_ON(sizeof(dcbx_info->operational.params) != 1270 + sizeof(p_hwfn->p_dcbx_info->set.config.params)); 1269 1271 memcpy(&p_hwfn->p_dcbx_info->set.config.params, 1270 1272 &dcbx_info->operational.params, 1271 - sizeof(struct qed_dcbx_admin_params)); 1273 + sizeof(p_hwfn->p_dcbx_info->set.config.params)); 1272 1274 p_hwfn->p_dcbx_info->set.config.valid = true; 1273 1275 1274 1276 memcpy(params, &p_hwfn->p_dcbx_info->set, sizeof(struct qed_dcbx_set));

+1

drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c

··· 2690 2690 kfree(ahw); 2691 2691 2692 2692 err_out_free_res: 2693 + pci_disable_pcie_error_reporting(pdev); 2693 2694 pci_release_regions(pdev); 2694 2695 2695 2696 err_out_disable_pdev:

+8 -8

drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c

··· 126 126 struct rtnl_link_stats64 *s) 127 127 { 128 128 struct rmnet_priv *priv = netdev_priv(dev); 129 - struct rmnet_vnd_stats total_stats; 129 + struct rmnet_vnd_stats total_stats = { }; 130 130 struct rmnet_pcpu_stats *pcpu_ptr; 131 + struct rmnet_vnd_stats snapshot; 131 132 unsigned int cpu, start; 132 - 133 - memset(&total_stats, 0, sizeof(struct rmnet_vnd_stats)); 134 133 135 134 for_each_possible_cpu(cpu) { 136 135 pcpu_ptr = per_cpu_ptr(priv->pcpu_stats, cpu); 137 136 138 137 do { 139 138 start = u64_stats_fetch_begin_irq(&pcpu_ptr->syncp); 140 - total_stats.rx_pkts += pcpu_ptr->stats.rx_pkts; 141 - total_stats.rx_bytes += pcpu_ptr->stats.rx_bytes; 142 - total_stats.tx_pkts += pcpu_ptr->stats.tx_pkts; 143 - total_stats.tx_bytes += pcpu_ptr->stats.tx_bytes; 139 + snapshot = pcpu_ptr->stats; /* struct assignment */ 144 140 } while (u64_stats_fetch_retry_irq(&pcpu_ptr->syncp, start)); 145 141 146 - total_stats.tx_drops += pcpu_ptr->stats.tx_drops; 142 + total_stats.rx_pkts += snapshot.rx_pkts; 143 + total_stats.rx_bytes += snapshot.rx_bytes; 144 + total_stats.tx_pkts += snapshot.tx_pkts; 145 + total_stats.tx_bytes += snapshot.tx_bytes; 146 + total_stats.tx_drops += snapshot.tx_drops; 147 147 } 148 148 149 149 s->rx_packets = total_stats.rx_pkts;

+1 -1

drivers/net/ethernet/realtek/r8169_main.c

··· 1671 1671 { 1672 1672 switch(stringset) { 1673 1673 case ETH_SS_STATS: 1674 - memcpy(data, *rtl8169_gstrings, sizeof(rtl8169_gstrings)); 1674 + memcpy(data, rtl8169_gstrings, sizeof(rtl8169_gstrings)); 1675 1675 break; 1676 1676 } 1677 1677 }

+1 -1

drivers/net/ethernet/renesas/sh_eth.c

··· 2287 2287 { 2288 2288 switch (stringset) { 2289 2289 case ETH_SS_STATS: 2290 - memcpy(data, *sh_eth_gstrings_stats, 2290 + memcpy(data, sh_eth_gstrings_stats, 2291 2291 sizeof(sh_eth_gstrings_stats)); 2292 2292 break; 2293 2293 }

+4 -4

drivers/net/ethernet/stmicro/stmmac/dwmac1000.h

··· 76 76 #define LPI_CTRL_STATUS_TLPIEN 0x00000001 /* Transmit LPI Entry */ 77 77 78 78 /* GMAC HW ADDR regs */ 79 - #define GMAC_ADDR_HIGH(reg) (((reg > 15) ? 0x00000800 : 0x00000040) + \ 80 - (reg * 8)) 81 - #define GMAC_ADDR_LOW(reg) (((reg > 15) ? 0x00000804 : 0x00000044) + \ 82 - (reg * 8)) 79 + #define GMAC_ADDR_HIGH(reg) ((reg > 15) ? 0x00000800 + (reg - 16) * 8 : \ 80 + 0x00000040 + (reg * 8)) 81 + #define GMAC_ADDR_LOW(reg) ((reg > 15) ? 0x00000804 + (reg - 16) * 8 : \ 82 + 0x00000044 + (reg * 8)) 83 83 #define GMAC_MAX_PERFECT_ADDRESSES 1 84 84 85 85 #define GMAC_PCS_BASE 0x000000c0 /* PCS register base */

+2

drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c

··· 622 622 void stmmac_remove_config_dt(struct platform_device *pdev, 623 623 struct plat_stmmacenet_data *plat) 624 624 { 625 + clk_disable_unprepare(plat->stmmac_clk); 626 + clk_disable_unprepare(plat->pclk); 625 627 of_node_put(plat->phy_node); 626 628 of_node_put(plat->mdio_node); 627 629 }

+24 -3

drivers/net/ethernet/xilinx/ll_temac_main.c

··· 774 774 stat = be32_to_cpu(cur_p->app0); 775 775 776 776 while (stat & STS_CTRL_APP0_CMPLT) { 777 + /* Make sure that the other fields are read after bd is 778 + * released by dma 779 + */ 780 + rmb(); 777 781 dma_unmap_single(ndev->dev.parent, be32_to_cpu(cur_p->phys), 778 782 be32_to_cpu(cur_p->len), DMA_TO_DEVICE); 779 783 skb = (struct sk_buff *)ptr_from_txbd(cur_p); 780 784 if (skb) 781 785 dev_consume_skb_irq(skb); 782 - cur_p->app0 = 0; 783 786 cur_p->app1 = 0; 784 787 cur_p->app2 = 0; 785 788 cur_p->app3 = 0; ··· 790 787 791 788 ndev->stats.tx_packets++; 792 789 ndev->stats.tx_bytes += be32_to_cpu(cur_p->len); 790 + 791 + /* app0 must be visible last, as it is used to flag 792 + * availability of the bd 793 + */ 794 + smp_mb(); 795 + cur_p->app0 = 0; 793 796 794 797 lp->tx_bd_ci++; 795 798 if (lp->tx_bd_ci >= lp->tx_bd_num) ··· 822 813 do { 823 814 if (cur_p->app0) 824 815 return NETDEV_TX_BUSY; 816 + 817 + /* Make sure to read next bd app0 after this one */ 818 + rmb(); 825 819 826 820 tail++; 827 821 if (tail >= lp->tx_bd_num) ··· 861 849 smp_mb(); 862 850 863 851 /* Space might have just been freed - check again */ 864 - if (temac_check_tx_bd_space(lp, num_frag)) 852 + if (temac_check_tx_bd_space(lp, num_frag + 1)) 865 853 return NETDEV_TX_BUSY; 866 854 867 855 netif_wake_queue(ndev); ··· 888 876 return NETDEV_TX_OK; 889 877 } 890 878 cur_p->phys = cpu_to_be32(skb_dma_addr); 891 - ptr_to_txbd((void *)skb, cur_p); 892 879 893 880 for (ii = 0; ii < num_frag; ii++) { 894 881 if (++lp->tx_bd_tail >= lp->tx_bd_num) ··· 926 915 } 927 916 cur_p->app0 |= cpu_to_be32(STS_CTRL_APP0_EOP); 928 917 918 + /* Mark last fragment with skb address, so it can be consumed 919 + * in temac_start_xmit_done() 920 + */ 921 + ptr_to_txbd((void *)skb, cur_p); 922 + 929 923 tail_p = lp->tx_bd_p + sizeof(*lp->tx_bd_v) * lp->tx_bd_tail; 930 924 lp->tx_bd_tail++; 931 925 if (lp->tx_bd_tail >= lp->tx_bd_num) ··· 941 925 /* Kick off the transfer */ 942 926 wmb(); 943 927 lp->dma_out(lp, TX_TAILDESC_PTR, tail_p); /* DMA start */ 928 + 929 + if (temac_check_tx_bd_space(lp, MAX_SKB_FRAGS + 1)) { 930 + netdev_info(ndev, "%s -> netif_stop_queue\n", __func__); 931 + netif_stop_queue(ndev); 932 + } 944 933 945 934 return NETDEV_TX_OK; 946 935 }

+1

drivers/net/hamradio/mkiss.c

··· 799 799 ax->tty = NULL; 800 800 801 801 unregister_netdev(ax->dev); 802 + free_netdev(ax->dev); 802 803 } 803 804 804 805 /* Perform I/O control on an active ax25 channel. */

+1 -1

drivers/net/mhi/net.c

··· 49 49 return 0; 50 50 } 51 51 52 - static int mhi_ndo_xmit(struct sk_buff *skb, struct net_device *ndev) 52 + static netdev_tx_t mhi_ndo_xmit(struct sk_buff *skb, struct net_device *ndev) 53 53 { 54 54 struct mhi_net_dev *mhi_netdev = netdev_priv(ndev); 55 55 const struct mhi_net_proto *proto = mhi_netdev->proto;

+1 -5

drivers/net/phy/dp83867.c

··· 826 826 { 827 827 int err; 828 828 829 - err = phy_write(phydev, DP83867_CTRL, DP83867_SW_RESET); 829 + err = phy_write(phydev, DP83867_CTRL, DP83867_SW_RESTART); 830 830 if (err < 0) 831 831 return err; 832 832 833 833 usleep_range(10, 20); 834 834 835 - /* After reset FORCE_LINK_GOOD bit is set. Although the 836 - * default value should be unset. Disable FORCE_LINK_GOOD 837 - * for the phy to work properly. 838 - */ 839 835 return phy_modify(phydev, MII_DP83867_PHYCTRL, 840 836 DP83867_PHYCR_FORCE_LINK_GOOD, 0); 841 837 }

+1 -1

drivers/net/usb/cdc_eem.c

··· 123 123 } 124 124 125 125 skb2 = skb_copy_expand(skb, EEM_HEAD, ETH_FCS_LEN + padlen, flags); 126 + dev_kfree_skb_any(skb); 126 127 if (!skb2) 127 128 return NULL; 128 129 129 - dev_kfree_skb_any(skb); 130 130 skb = skb2; 131 131 132 132 done:

+1 -1

drivers/net/usb/cdc_ncm.c

··· 1880 1880 static const struct driver_info cdc_ncm_info = { 1881 1881 .description = "CDC NCM", 1882 1882 .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET 1883 - | FLAG_LINK_INTR, 1883 + | FLAG_LINK_INTR | FLAG_ETHER, 1884 1884 .bind = cdc_ncm_bind, 1885 1885 .unbind = cdc_ncm_unbind, 1886 1886 .manage_power = usbnet_manage_power,

+1 -1

drivers/net/usb/qmi_wwan.c

··· 575 575 576 576 if (info->flags & QMI_WWAN_FLAG_PASS_THROUGH) { 577 577 skb->protocol = htons(ETH_P_MAP); 578 - return (netif_rx(skb) == NET_RX_SUCCESS); 578 + return 1; 579 579 } 580 580 581 581 switch (skb->data[0] & 0xf0) {

+1 -1

drivers/net/usb/r8152.c

··· 8678 8678 { 8679 8679 switch (stringset) { 8680 8680 case ETH_SS_STATS: 8681 - memcpy(data, *rtl8152_gstrings, sizeof(rtl8152_gstrings)); 8681 + memcpy(data, rtl8152_gstrings, sizeof(rtl8152_gstrings)); 8682 8682 break; 8683 8683 } 8684 8684 }

+6 -4

drivers/net/usb/smsc75xx.c

··· 1483 1483 ret = smsc75xx_wait_ready(dev, 0); 1484 1484 if (ret < 0) { 1485 1485 netdev_warn(dev->net, "device not ready in smsc75xx_bind\n"); 1486 - goto err; 1486 + goto free_pdata; 1487 1487 } 1488 1488 1489 1489 smsc75xx_init_mac_address(dev); ··· 1492 1492 ret = smsc75xx_reset(dev); 1493 1493 if (ret < 0) { 1494 1494 netdev_warn(dev->net, "smsc75xx_reset error %d\n", ret); 1495 - goto err; 1495 + goto cancel_work; 1496 1496 } 1497 1497 1498 1498 dev->net->netdev_ops = &smsc75xx_netdev_ops; ··· 1503 1503 dev->net->max_mtu = MAX_SINGLE_PACKET_SIZE; 1504 1504 return 0; 1505 1505 1506 - err: 1506 + cancel_work: 1507 + cancel_work_sync(&pdata->set_multicast); 1508 + free_pdata: 1507 1509 kfree(pdata); 1510 + dev->data[0] = 0; 1508 1511 return ret; 1509 1512 } 1510 1513 ··· 1518 1515 cancel_work_sync(&pdata->set_multicast); 1519 1516 netif_dbg(dev, ifdown, dev->net, "free pdata\n"); 1520 1517 kfree(pdata); 1521 - pdata = NULL; 1522 1518 dev->data[0] = 0; 1523 1519 } 1524 1520 }

+2 -4

drivers/net/vrf.c

··· 1183 1183 1184 1184 dev->flags = IFF_MASTER | IFF_NOARP; 1185 1185 1186 - /* MTU is irrelevant for VRF device; set to 64k similar to lo */ 1187 - dev->mtu = 64 * 1024; 1188 - 1189 1186 /* similarly, oper state is irrelevant; set to up to avoid confusion */ 1190 1187 dev->operstate = IF_OPER_UP; 1191 1188 netdev_lockdep_set_classes(dev); ··· 1682 1685 * which breaks networking. 1683 1686 */ 1684 1687 dev->min_mtu = IPV6_MIN_MTU; 1685 - dev->max_mtu = ETH_MAX_MTU; 1688 + dev->max_mtu = IP6_MAX_MTU; 1689 + dev->mtu = dev->max_mtu; 1686 1690 } 1687 1691 1688 1692 static int vrf_validate(struct nlattr *tb[], struct nlattr *data[],

+5

drivers/net/wireless/mac80211_hwsim.c

··· 1693 1693 static void mac80211_hwsim_stop(struct ieee80211_hw *hw) 1694 1694 { 1695 1695 struct mac80211_hwsim_data *data = hw->priv; 1696 + 1696 1697 data->started = false; 1697 1698 hrtimer_cancel(&data->beacon_timer); 1699 + 1700 + while (!skb_queue_empty(&data->pending)) 1701 + ieee80211_free_txskb(hw, skb_dequeue(&data->pending)); 1702 + 1698 1703 wiphy_dbg(hw->wiphy, "%s\n", __func__); 1699 1704 } 1700 1705

+2 -1

drivers/pci/controller/dwc/Makefile

··· 18 18 obj-$(CONFIG_PCIE_KIRIN) += pcie-kirin.o 19 19 obj-$(CONFIG_PCIE_HISI_STB) += pcie-histb.o 20 20 obj-$(CONFIG_PCI_MESON) += pci-meson.o 21 + obj-$(CONFIG_PCIE_TEGRA194) += pcie-tegra194.o 21 22 obj-$(CONFIG_PCIE_UNIPHIER) += pcie-uniphier.o 22 23 obj-$(CONFIG_PCIE_UNIPHIER_EP) += pcie-uniphier-ep.o 23 24 ··· 39 38 ifdef CONFIG_PCI_QUIRKS 40 39 obj-$(CONFIG_ARM64) += pcie-al.o 41 40 obj-$(CONFIG_ARM64) += pcie-hisi.o 42 - obj-$(CONFIG_ARM64) += pcie-tegra194.o 41 + obj-$(CONFIG_ARM64) += pcie-tegra194-acpi.o 43 42 endif 44 43 endif

+108

drivers/pci/controller/dwc/pcie-tegra194-acpi.c

··· 1 + // SPDX-License-Identifier: GPL-2.0+ 2 + /* 3 + * ACPI quirks for Tegra194 PCIe host controller 4 + * 5 + * Copyright (C) 2021 NVIDIA Corporation. 6 + * 7 + * Author: Vidya Sagar <vidyas@nvidia.com> 8 + */ 9 + 10 + #include <linux/pci.h> 11 + #include <linux/pci-acpi.h> 12 + #include <linux/pci-ecam.h> 13 + 14 + #include "pcie-designware.h" 15 + 16 + struct tegra194_pcie_ecam { 17 + void __iomem *config_base; 18 + void __iomem *iatu_base; 19 + void __iomem *dbi_base; 20 + }; 21 + 22 + static int tegra194_acpi_init(struct pci_config_window *cfg) 23 + { 24 + struct device *dev = cfg->parent; 25 + struct tegra194_pcie_ecam *pcie_ecam; 26 + 27 + pcie_ecam = devm_kzalloc(dev, sizeof(*pcie_ecam), GFP_KERNEL); 28 + if (!pcie_ecam) 29 + return -ENOMEM; 30 + 31 + pcie_ecam->config_base = cfg->win; 32 + pcie_ecam->iatu_base = cfg->win + SZ_256K; 33 + pcie_ecam->dbi_base = cfg->win + SZ_512K; 34 + cfg->priv = pcie_ecam; 35 + 36 + return 0; 37 + } 38 + 39 + static void atu_reg_write(struct tegra194_pcie_ecam *pcie_ecam, int index, 40 + u32 val, u32 reg) 41 + { 42 + u32 offset = PCIE_GET_ATU_OUTB_UNR_REG_OFFSET(index); 43 + 44 + writel(val, pcie_ecam->iatu_base + offset + reg); 45 + } 46 + 47 + static void program_outbound_atu(struct tegra194_pcie_ecam *pcie_ecam, 48 + int index, int type, u64 cpu_addr, 49 + u64 pci_addr, u64 size) 50 + { 51 + atu_reg_write(pcie_ecam, index, lower_32_bits(cpu_addr), 52 + PCIE_ATU_LOWER_BASE); 53 + atu_reg_write(pcie_ecam, index, upper_32_bits(cpu_addr), 54 + PCIE_ATU_UPPER_BASE); 55 + atu_reg_write(pcie_ecam, index, lower_32_bits(pci_addr), 56 + PCIE_ATU_LOWER_TARGET); 57 + atu_reg_write(pcie_ecam, index, lower_32_bits(cpu_addr + size - 1), 58 + PCIE_ATU_LIMIT); 59 + atu_reg_write(pcie_ecam, index, upper_32_bits(pci_addr), 60 + PCIE_ATU_UPPER_TARGET); 61 + atu_reg_write(pcie_ecam, index, type, PCIE_ATU_CR1); 62 + atu_reg_write(pcie_ecam, index, PCIE_ATU_ENABLE, PCIE_ATU_CR2); 63 + } 64 + 65 + static void __iomem *tegra194_map_bus(struct pci_bus *bus, 66 + unsigned int devfn, int where) 67 + { 68 + struct pci_config_window *cfg = bus->sysdata; 69 + struct tegra194_pcie_ecam *pcie_ecam = cfg->priv; 70 + u32 busdev; 71 + int type; 72 + 73 + if (bus->number < cfg->busr.start || bus->number > cfg->busr.end) 74 + return NULL; 75 + 76 + if (bus->number == cfg->busr.start) { 77 + if (PCI_SLOT(devfn) == 0) 78 + return pcie_ecam->dbi_base + where; 79 + else 80 + return NULL; 81 + } 82 + 83 + busdev = PCIE_ATU_BUS(bus->number) | PCIE_ATU_DEV(PCI_SLOT(devfn)) | 84 + PCIE_ATU_FUNC(PCI_FUNC(devfn)); 85 + 86 + if (bus->parent->number == cfg->busr.start) { 87 + if (PCI_SLOT(devfn) == 0) 88 + type = PCIE_ATU_TYPE_CFG0; 89 + else 90 + return NULL; 91 + } else { 92 + type = PCIE_ATU_TYPE_CFG1; 93 + } 94 + 95 + program_outbound_atu(pcie_ecam, 0, type, cfg->res.start, busdev, 96 + SZ_256K); 97 + 98 + return pcie_ecam->config_base + where; 99 + } 100 + 101 + const struct pci_ecam_ops tegra194_pcie_ops = { 102 + .init = tegra194_acpi_init, 103 + .pci_ops = { 104 + .map_bus = tegra194_map_bus, 105 + .read = pci_generic_config_read, 106 + .write = pci_generic_config_write, 107 + } 108 + };

+18 -120

drivers/pci/controller/dwc/pcie-tegra194.c

··· 22 22 #include <linux/of_irq.h> 23 23 #include <linux/of_pci.h> 24 24 #include <linux/pci.h> 25 - #include <linux/pci-acpi.h> 26 - #include <linux/pci-ecam.h> 27 25 #include <linux/phy/phy.h> 28 26 #include <linux/pinctrl/consumer.h> 29 27 #include <linux/platform_device.h> ··· 245 247 GEN4_CORE_CLK_FREQ 246 248 }; 247 249 248 - static const u32 event_cntr_ctrl_offset[] = { 249 - 0x1d8, 250 - 0x1a8, 251 - 0x1a8, 252 - 0x1a8, 253 - 0x1c4, 254 - 0x1d8 255 - }; 256 - 257 - static const u32 event_cntr_data_offset[] = { 258 - 0x1dc, 259 - 0x1ac, 260 - 0x1ac, 261 - 0x1ac, 262 - 0x1c8, 263 - 0x1dc 264 - }; 265 - 266 250 struct tegra_pcie_dw { 267 251 struct device *dev; 268 252 struct resource *appl_res; ··· 292 312 struct tegra_pcie_dw_of_data { 293 313 enum dw_pcie_device_mode mode; 294 314 }; 295 - 296 - #if defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS) 297 - struct tegra194_pcie_ecam { 298 - void __iomem *config_base; 299 - void __iomem *iatu_base; 300 - void __iomem *dbi_base; 301 - }; 302 - 303 - static int tegra194_acpi_init(struct pci_config_window *cfg) 304 - { 305 - struct device *dev = cfg->parent; 306 - struct tegra194_pcie_ecam *pcie_ecam; 307 - 308 - pcie_ecam = devm_kzalloc(dev, sizeof(*pcie_ecam), GFP_KERNEL); 309 - if (!pcie_ecam) 310 - return -ENOMEM; 311 - 312 - pcie_ecam->config_base = cfg->win; 313 - pcie_ecam->iatu_base = cfg->win + SZ_256K; 314 - pcie_ecam->dbi_base = cfg->win + SZ_512K; 315 - cfg->priv = pcie_ecam; 316 - 317 - return 0; 318 - } 319 - 320 - static void atu_reg_write(struct tegra194_pcie_ecam *pcie_ecam, int index, 321 - u32 val, u32 reg) 322 - { 323 - u32 offset = PCIE_GET_ATU_OUTB_UNR_REG_OFFSET(index); 324 - 325 - writel(val, pcie_ecam->iatu_base + offset + reg); 326 - } 327 - 328 - static void program_outbound_atu(struct tegra194_pcie_ecam *pcie_ecam, 329 - int index, int type, u64 cpu_addr, 330 - u64 pci_addr, u64 size) 331 - { 332 - atu_reg_write(pcie_ecam, index, lower_32_bits(cpu_addr), 333 - PCIE_ATU_LOWER_BASE); 334 - atu_reg_write(pcie_ecam, index, upper_32_bits(cpu_addr), 335 - PCIE_ATU_UPPER_BASE); 336 - atu_reg_write(pcie_ecam, index, lower_32_bits(pci_addr), 337 - PCIE_ATU_LOWER_TARGET); 338 - atu_reg_write(pcie_ecam, index, lower_32_bits(cpu_addr + size - 1), 339 - PCIE_ATU_LIMIT); 340 - atu_reg_write(pcie_ecam, index, upper_32_bits(pci_addr), 341 - PCIE_ATU_UPPER_TARGET); 342 - atu_reg_write(pcie_ecam, index, type, PCIE_ATU_CR1); 343 - atu_reg_write(pcie_ecam, index, PCIE_ATU_ENABLE, PCIE_ATU_CR2); 344 - } 345 - 346 - static void __iomem *tegra194_map_bus(struct pci_bus *bus, 347 - unsigned int devfn, int where) 348 - { 349 - struct pci_config_window *cfg = bus->sysdata; 350 - struct tegra194_pcie_ecam *pcie_ecam = cfg->priv; 351 - u32 busdev; 352 - int type; 353 - 354 - if (bus->number < cfg->busr.start || bus->number > cfg->busr.end) 355 - return NULL; 356 - 357 - if (bus->number == cfg->busr.start) { 358 - if (PCI_SLOT(devfn) == 0) 359 - return pcie_ecam->dbi_base + where; 360 - else 361 - return NULL; 362 - } 363 - 364 - busdev = PCIE_ATU_BUS(bus->number) | PCIE_ATU_DEV(PCI_SLOT(devfn)) | 365 - PCIE_ATU_FUNC(PCI_FUNC(devfn)); 366 - 367 - if (bus->parent->number == cfg->busr.start) { 368 - if (PCI_SLOT(devfn) == 0) 369 - type = PCIE_ATU_TYPE_CFG0; 370 - else 371 - return NULL; 372 - } else { 373 - type = PCIE_ATU_TYPE_CFG1; 374 - } 375 - 376 - program_outbound_atu(pcie_ecam, 0, type, cfg->res.start, busdev, 377 - SZ_256K); 378 - 379 - return pcie_ecam->config_base + where; 380 - } 381 - 382 - const struct pci_ecam_ops tegra194_pcie_ops = { 383 - .init = tegra194_acpi_init, 384 - .pci_ops = { 385 - .map_bus = tegra194_map_bus, 386 - .read = pci_generic_config_read, 387 - .write = pci_generic_config_write, 388 - } 389 - }; 390 - #endif /* defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS) */ 391 - 392 - #ifdef CONFIG_PCIE_TEGRA194 393 315 394 316 static inline struct tegra_pcie_dw *to_tegra_pcie(struct dw_pcie *pci) 395 317 { ··· 576 694 }; 577 695 578 696 #if defined(CONFIG_PCIEASPM) 697 + static const u32 event_cntr_ctrl_offset[] = { 698 + 0x1d8, 699 + 0x1a8, 700 + 0x1a8, 701 + 0x1a8, 702 + 0x1c4, 703 + 0x1d8 704 + }; 705 + 706 + static const u32 event_cntr_data_offset[] = { 707 + 0x1dc, 708 + 0x1ac, 709 + 0x1ac, 710 + 0x1ac, 711 + 0x1c8, 712 + 0x1dc 713 + }; 714 + 579 715 static void disable_aspm_l11(struct tegra_pcie_dw *pcie) 580 716 { 581 717 u32 val; ··· 2311 2411 MODULE_AUTHOR("Vidya Sagar <vidyas@nvidia.com>"); 2312 2412 MODULE_DESCRIPTION("NVIDIA PCIe host controller driver"); 2313 2413 MODULE_LICENSE("GPL v2"); 2314 - 2315 - #endif /* CONFIG_PCIE_TEGRA194 */

+40 -9

drivers/pci/controller/pci-aardvark.c

··· 514 514 udelay(PIO_RETRY_DELAY); 515 515 } 516 516 517 - dev_err(dev, "config read/write timed out\n"); 517 + dev_err(dev, "PIO read/write transfer time out\n"); 518 518 return -ETIMEDOUT; 519 519 } 520 520 ··· 657 657 return true; 658 658 } 659 659 660 + static bool advk_pcie_pio_is_running(struct advk_pcie *pcie) 661 + { 662 + struct device *dev = &pcie->pdev->dev; 663 + 664 + /* 665 + * Trying to start a new PIO transfer when previous has not completed 666 + * cause External Abort on CPU which results in kernel panic: 667 + * 668 + * SError Interrupt on CPU0, code 0xbf000002 -- SError 669 + * Kernel panic - not syncing: Asynchronous SError Interrupt 670 + * 671 + * Functions advk_pcie_rd_conf() and advk_pcie_wr_conf() are protected 672 + * by raw_spin_lock_irqsave() at pci_lock_config() level to prevent 673 + * concurrent calls at the same time. But because PIO transfer may take 674 + * about 1.5s when link is down or card is disconnected, it means that 675 + * advk_pcie_wait_pio() does not always have to wait for completion. 676 + * 677 + * Some versions of ARM Trusted Firmware handles this External Abort at 678 + * EL3 level and mask it to prevent kernel panic. Relevant TF-A commit: 679 + * https://git.trustedfirmware.org/TF-A/trusted-firmware-a.git/commit/?id=3c7dcdac5c50 680 + */ 681 + if (advk_readl(pcie, PIO_START)) { 682 + dev_err(dev, "Previous PIO read/write transfer is still running\n"); 683 + return true; 684 + } 685 + 686 + return false; 687 + } 688 + 660 689 static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn, 661 690 int where, int size, u32 *val) 662 691 { ··· 702 673 return pci_bridge_emul_conf_read(&pcie->bridge, where, 703 674 size, val); 704 675 705 - /* Start PIO */ 706 - advk_writel(pcie, 0, PIO_START); 707 - advk_writel(pcie, 1, PIO_ISR); 676 + if (advk_pcie_pio_is_running(pcie)) { 677 + *val = 0xffffffff; 678 + return PCIBIOS_SET_FAILED; 679 + } 708 680 709 681 /* Program the control register */ 710 682 reg = advk_readl(pcie, PIO_CTRL); ··· 724 694 /* Program the data strobe */ 725 695 advk_writel(pcie, 0xf, PIO_WR_DATA_STRB); 726 696 727 - /* Start the transfer */ 697 + /* Clear PIO DONE ISR and start the transfer */ 698 + advk_writel(pcie, 1, PIO_ISR); 728 699 advk_writel(pcie, 1, PIO_START); 729 700 730 701 ret = advk_pcie_wait_pio(pcie); ··· 765 734 if (where % size) 766 735 return PCIBIOS_SET_FAILED; 767 736 768 - /* Start PIO */ 769 - advk_writel(pcie, 0, PIO_START); 770 - advk_writel(pcie, 1, PIO_ISR); 737 + if (advk_pcie_pio_is_running(pcie)) 738 + return PCIBIOS_SET_FAILED; 771 739 772 740 /* Program the control register */ 773 741 reg = advk_readl(pcie, PIO_CTRL); ··· 793 763 /* Program the data strobe */ 794 764 advk_writel(pcie, data_strobe, PIO_WR_DATA_STRB); 795 765 796 - /* Start the transfer */ 766 + /* Clear PIO DONE ISR and start the transfer */ 767 + advk_writel(pcie, 1, PIO_ISR); 797 768 advk_writel(pcie, 1, PIO_START); 798 769 799 770 ret = advk_pcie_wait_pio(pcie);

+2

drivers/pci/of.c

··· 353 353 dev_warn(dev, "More than one I/O resource converted for %pOF. CPU base address for old range lost!\n", 354 354 dev_node); 355 355 *io_base = range.cpu_addr; 356 + } else if (resource_type(res) == IORESOURCE_MEM) { 357 + res->flags &= ~IORESOURCE_MEM_64; 356 358 } 357 359 358 360 pci_add_resource_offset(resources, res, res->start - range.pci_addr);

+92 -1

drivers/pci/quirks.c

··· 3547 3547 } 3548 3548 3549 3549 /* 3550 + * Some NVIDIA GPU devices do not work with bus reset, SBR needs to be 3551 + * prevented for those affected devices. 3552 + */ 3553 + static void quirk_nvidia_no_bus_reset(struct pci_dev *dev) 3554 + { 3555 + if ((dev->device & 0xffc0) == 0x2340) 3556 + quirk_no_bus_reset(dev); 3557 + } 3558 + DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, 3559 + quirk_nvidia_no_bus_reset); 3560 + 3561 + /* 3550 3562 * Some Atheros AR9xxx and QCA988x chips do not behave after a bus reset. 3551 3563 * The device will throw a Link Down error on AER-capable systems and 3552 3564 * regardless of AER, config space of the device is never accessible again ··· 3577 3565 * accesses to the child may fail. 3578 3566 */ 3579 3567 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_CAVIUM, 0xa100, quirk_no_bus_reset); 3568 + 3569 + /* 3570 + * Some TI KeyStone C667X devices do not support bus/hot reset. The PCIESS 3571 + * automatically disables LTSSM when Secondary Bus Reset is received and 3572 + * the device stops working. Prevent bus reset for these devices. With 3573 + * this change, the device can be assigned to VMs with VFIO, but it will 3574 + * leak state between VMs. Reference 3575 + * https://e2e.ti.com/support/processors/f/791/t/954382 3576 + */ 3577 + DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TI, 0xb005, quirk_no_bus_reset); 3580 3578 3581 3579 static void quirk_no_pm_reset(struct pci_dev *dev) 3582 3580 { ··· 3923 3901 return 0; 3924 3902 } 3925 3903 3904 + #define PCI_DEVICE_ID_HINIC_VF 0x375E 3905 + #define HINIC_VF_FLR_TYPE 0x1000 3906 + #define HINIC_VF_FLR_CAP_BIT (1UL << 30) 3907 + #define HINIC_VF_OP 0xE80 3908 + #define HINIC_VF_FLR_PROC_BIT (1UL << 18) 3909 + #define HINIC_OPERATION_TIMEOUT 15000 /* 15 seconds */ 3910 + 3911 + /* Device-specific reset method for Huawei Intelligent NIC virtual functions */ 3912 + static int reset_hinic_vf_dev(struct pci_dev *pdev, int probe) 3913 + { 3914 + unsigned long timeout; 3915 + void __iomem *bar; 3916 + u32 val; 3917 + 3918 + if (probe) 3919 + return 0; 3920 + 3921 + bar = pci_iomap(pdev, 0, 0); 3922 + if (!bar) 3923 + return -ENOTTY; 3924 + 3925 + /* Get and check firmware capabilities */ 3926 + val = ioread32be(bar + HINIC_VF_FLR_TYPE); 3927 + if (!(val & HINIC_VF_FLR_CAP_BIT)) { 3928 + pci_iounmap(pdev, bar); 3929 + return -ENOTTY; 3930 + } 3931 + 3932 + /* Set HINIC_VF_FLR_PROC_BIT for the start of FLR */ 3933 + val = ioread32be(bar + HINIC_VF_OP); 3934 + val = val | HINIC_VF_FLR_PROC_BIT; 3935 + iowrite32be(val, bar + HINIC_VF_OP); 3936 + 3937 + pcie_flr(pdev); 3938 + 3939 + /* 3940 + * The device must recapture its Bus and Device Numbers after FLR 3941 + * in order generate Completions. Issue a config write to let the 3942 + * device capture this information. 3943 + */ 3944 + pci_write_config_word(pdev, PCI_VENDOR_ID, 0); 3945 + 3946 + /* Firmware clears HINIC_VF_FLR_PROC_BIT when reset is complete */ 3947 + timeout = jiffies + msecs_to_jiffies(HINIC_OPERATION_TIMEOUT); 3948 + do { 3949 + val = ioread32be(bar + HINIC_VF_OP); 3950 + if (!(val & HINIC_VF_FLR_PROC_BIT)) 3951 + goto reset_complete; 3952 + msleep(20); 3953 + } while (time_before(jiffies, timeout)); 3954 + 3955 + val = ioread32be(bar + HINIC_VF_OP); 3956 + if (!(val & HINIC_VF_FLR_PROC_BIT)) 3957 + goto reset_complete; 3958 + 3959 + pci_warn(pdev, "Reset dev timeout, FLR ack reg: %#010x\n", val); 3960 + 3961 + reset_complete: 3962 + pci_iounmap(pdev, bar); 3963 + 3964 + return 0; 3965 + } 3966 + 3926 3967 static const struct pci_dev_reset_methods pci_dev_reset_methods[] = { 3927 3968 { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82599_SFP_VF, 3928 3969 reset_intel_82599_sfp_virtfn }, ··· 3998 3913 { PCI_VENDOR_ID_INTEL, 0x0a54, delay_250ms_after_flr }, 3999 3914 { PCI_VENDOR_ID_CHELSIO, PCI_ANY_ID, 4000 3915 reset_chelsio_generic_dev }, 3916 + { PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HINIC_VF, 3917 + reset_hinic_vf_dev }, 4001 3918 { 0 } 4002 3919 }; 4003 3920 ··· 4840 4753 { PCI_VENDOR_ID_AMPERE, 0xE00A, pci_quirk_xgene_acs }, 4841 4754 { PCI_VENDOR_ID_AMPERE, 0xE00B, pci_quirk_xgene_acs }, 4842 4755 { PCI_VENDOR_ID_AMPERE, 0xE00C, pci_quirk_xgene_acs }, 4756 + /* Broadcom multi-function device */ 4757 + { PCI_VENDOR_ID_BROADCOM, 0x16D7, pci_quirk_mf_endpoint_acs }, 4843 4758 { PCI_VENDOR_ID_BROADCOM, 0xD714, pci_quirk_brcm_acs }, 4844 4759 /* Amazon Annapurna Labs */ 4845 4760 { PCI_VENDOR_ID_AMAZON_ANNAPURNA_LABS, 0x0031, pci_quirk_al_acs }, ··· 5243 5154 static void quirk_amd_harvest_no_ats(struct pci_dev *pdev) 5244 5155 { 5245 5156 if ((pdev->device == 0x7312 && pdev->revision != 0x00) || 5246 - (pdev->device == 0x7340 && pdev->revision != 0xc5)) 5157 + (pdev->device == 0x7340 && pdev->revision != 0xc5) || 5158 + (pdev->device == 0x7341 && pdev->revision != 0x00)) 5247 5159 return; 5248 5160 5249 5161 if (pdev->device == 0x15d8) { ··· 5271 5181 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7312, quirk_amd_harvest_no_ats); 5272 5182 /* AMD Navi14 dGPU */ 5273 5183 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7340, quirk_amd_harvest_no_ats); 5184 + DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7341, quirk_amd_harvest_no_ats); 5274 5185 /* AMD Raven platform iGPU */ 5275 5186 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x15d8, quirk_amd_harvest_no_ats); 5276 5187 #endif /* CONFIG_PCI_ATS */

+3 -3

drivers/ptp/ptp_clock.c

··· 63 63 spin_unlock_irqrestore(&queue->lock, flags); 64 64 } 65 65 66 - s32 scaled_ppm_to_ppb(long ppm) 66 + long scaled_ppm_to_ppb(long ppm) 67 67 { 68 68 /* 69 69 * The 'freq' field in the 'struct timex' is in parts per ··· 80 80 s64 ppb = 1 + ppm; 81 81 ppb *= 125; 82 82 ppb >>= 13; 83 - return (s32) ppb; 83 + return (long) ppb; 84 84 } 85 85 EXPORT_SYMBOL(scaled_ppm_to_ppb); 86 86 ··· 138 138 delta = ktime_to_ns(kt); 139 139 err = ops->adjtime(ops, delta); 140 140 } else if (tx->modes & ADJ_FREQUENCY) { 141 - s32 ppb = scaled_ppm_to_ppb(tx->freq); 141 + long ppb = scaled_ppm_to_ppb(tx->freq); 142 142 if (ppb > ops->max_adj || ppb < -ops->max_adj) 143 143 return -ERANGE; 144 144 if (ops->adjfine)

+9 -2

drivers/s390/crypto/ap_queue.c

··· 135 135 { 136 136 struct ap_queue_status status; 137 137 struct ap_message *ap_msg; 138 + bool found = false; 138 139 139 140 status = ap_dqap(aq->qid, &aq->reply->psmid, 140 141 aq->reply->msg, aq->reply->len); 141 142 switch (status.response_code) { 142 143 case AP_RESPONSE_NORMAL: 143 - aq->queue_count--; 144 + aq->queue_count = max_t(int, 0, aq->queue_count - 1); 144 145 if (aq->queue_count > 0) 145 146 mod_timer(&aq->timeout, 146 147 jiffies + aq->request_timeout); ··· 151 150 list_del_init(&ap_msg->list); 152 151 aq->pendingq_count--; 153 152 ap_msg->receive(aq, ap_msg, aq->reply); 153 + found = true; 154 154 break; 155 + } 156 + if (!found) { 157 + AP_DBF_WARN("%s unassociated reply psmid=0x%016llx on 0x%02x.%04x\n", 158 + __func__, aq->reply->psmid, 159 + AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid)); 155 160 } 156 161 fallthrough; 157 162 case AP_RESPONSE_NO_PENDING_REPLY: ··· 239 232 ap_msg->flags & AP_MSG_FLAG_SPECIAL); 240 233 switch (status.response_code) { 241 234 case AP_RESPONSE_NORMAL: 242 - aq->queue_count++; 235 + aq->queue_count = max_t(int, 1, aq->queue_count + 1); 243 236 if (aq->queue_count == 1) 244 237 mod_timer(&aq->timeout, jiffies + aq->request_timeout); 245 238 list_move_tail(&ap_msg->list, &aq->pendingq);

+2 -2

drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c

··· 2284 2284 mon_wdev->iftype = NL80211_IFTYPE_MONITOR; 2285 2285 mon_ndev->ieee80211_ptr = mon_wdev; 2286 2286 2287 - ret = register_netdevice(mon_ndev); 2287 + ret = cfg80211_register_netdevice(mon_ndev); 2288 2288 if (ret) { 2289 2289 goto out; 2290 2290 } ··· 2360 2360 adapter = rtw_netdev_priv(ndev); 2361 2361 pwdev_priv = adapter_wdev_data(adapter); 2362 2362 2363 - unregister_netdevice(ndev); 2363 + cfg80211_unregister_netdevice(ndev); 2364 2364 2365 2365 if (ndev == pwdev_priv->pmon_ndev) { 2366 2366 pwdev_priv->pmon_ndev = NULL;

+14 -2

drivers/usb/chipidea/usbmisc_imx.c

··· 686 686 int val; 687 687 unsigned long flags; 688 688 689 + /* Clear VDATSRCENB0 to disable VDP_SRC and IDM_SNK required by BC 1.2 spec */ 690 + spin_lock_irqsave(&usbmisc->lock, flags); 691 + val = readl(usbmisc->base + MX7D_USB_OTG_PHY_CFG2); 692 + val &= ~MX7D_USB_OTG_PHY_CFG2_CHRG_VDATSRCENB0; 693 + writel(val, usbmisc->base + MX7D_USB_OTG_PHY_CFG2); 694 + spin_unlock_irqrestore(&usbmisc->lock, flags); 695 + 696 + /* TVDMSRC_DIS */ 697 + msleep(20); 698 + 689 699 /* VDM_SRC is connected to D- and IDP_SINK is connected to D+ */ 690 700 spin_lock_irqsave(&usbmisc->lock, flags); 691 701 val = readl(usbmisc->base + MX7D_USB_OTG_PHY_CFG2); ··· 705 695 usbmisc->base + MX7D_USB_OTG_PHY_CFG2); 706 696 spin_unlock_irqrestore(&usbmisc->lock, flags); 707 697 708 - usleep_range(1000, 2000); 698 + /* TVDMSRC_ON */ 699 + msleep(40); 709 700 710 701 /* 711 702 * Per BC 1.2, check voltage of D+: ··· 809 798 usbmisc->base + MX7D_USB_OTG_PHY_CFG2); 810 799 spin_unlock_irqrestore(&usbmisc->lock, flags); 811 800 812 - usleep_range(1000, 2000); 801 + /* TVDPSRC_ON */ 802 + msleep(40); 813 803 814 804 /* Check if D- is less than VDAT_REF to determine an SDP per BC 1.2 */ 815 805 val = readl(usbmisc->base + MX7D_USB_OTG_PHY_STATUS);

+7

drivers/usb/core/hub.c

··· 41 41 #define USB_VENDOR_GENESYS_LOGIC 0x05e3 42 42 #define USB_VENDOR_SMSC 0x0424 43 43 #define USB_PRODUCT_USB5534B 0x5534 44 + #define USB_VENDOR_CYPRESS 0x04b4 45 + #define USB_PRODUCT_CY7C65632 0x6570 44 46 #define HUB_QUIRK_CHECK_PORT_AUTOSUSPEND 0x01 45 47 #define HUB_QUIRK_DISABLE_AUTOSUSPEND 0x02 46 48 ··· 5720 5718 .idVendor = USB_VENDOR_SMSC, 5721 5719 .idProduct = USB_PRODUCT_USB5534B, 5722 5720 .bInterfaceClass = USB_CLASS_HUB, 5721 + .driver_info = HUB_QUIRK_DISABLE_AUTOSUSPEND}, 5722 + { .match_flags = USB_DEVICE_ID_MATCH_VENDOR 5723 + | USB_DEVICE_ID_MATCH_PRODUCT, 5724 + .idVendor = USB_VENDOR_CYPRESS, 5725 + .idProduct = USB_PRODUCT_CY7C65632, 5723 5726 .driver_info = HUB_QUIRK_DISABLE_AUTOSUSPEND}, 5724 5727 { .match_flags = USB_DEVICE_ID_MATCH_VENDOR 5725 5728 | USB_DEVICE_ID_MATCH_INT_CLASS,

+2 -2

fs/afs/main.c

··· 203 203 goto error_fs; 204 204 205 205 afs_proc_symlink = proc_symlink("fs/afs", NULL, "../self/net/afs"); 206 - if (IS_ERR(afs_proc_symlink)) { 207 - ret = PTR_ERR(afs_proc_symlink); 206 + if (!afs_proc_symlink) { 207 + ret = -ENOMEM; 208 208 goto error_proc; 209 209 } 210 210

+8 -5

fs/afs/write.c

··· 837 837 struct inode *inode = file_inode(file); 838 838 struct afs_vnode *vnode = AFS_FS_I(inode); 839 839 unsigned long priv; 840 + vm_fault_t ret = VM_FAULT_RETRY; 840 841 841 842 _enter("{{%llx:%llu}},{%lx}", vnode->fid.vid, vnode->fid.vnode, page->index); 842 843 ··· 849 848 #ifdef CONFIG_AFS_FSCACHE 850 849 if (PageFsCache(page) && 851 850 wait_on_page_fscache_killable(page) < 0) 852 - return VM_FAULT_RETRY; 851 + goto out; 853 852 #endif 854 853 855 854 if (wait_on_page_writeback_killable(page)) 856 - return VM_FAULT_RETRY; 855 + goto out; 857 856 858 857 if (lock_page_killable(page) < 0) 859 - return VM_FAULT_RETRY; 858 + goto out; 860 859 861 860 /* We mustn't change page->private until writeback is complete as that 862 861 * details the portion of the page we need to write back and we might ··· 864 863 */ 865 864 if (wait_on_page_writeback_killable(page) < 0) { 866 865 unlock_page(page); 867 - return VM_FAULT_RETRY; 866 + goto out; 868 867 } 869 868 870 869 priv = afs_page_dirty(page, 0, thp_size(page)); ··· 878 877 } 879 878 file_update_time(file); 880 879 880 + ret = VM_FAULT_LOCKED; 881 + out: 881 882 sb_end_pagefault(inode->i_sb); 882 - return VM_FAULT_LOCKED; 883 + return ret; 883 884 } 884 885 885 886 /*

+4 -4

fs/btrfs/block-group.c

··· 2442 2442 spin_lock(&sinfo->lock); 2443 2443 spin_lock(&cache->lock); 2444 2444 if (!--cache->ro) { 2445 - num_bytes = cache->length - cache->reserved - 2446 - cache->pinned - cache->bytes_super - 2447 - cache->zone_unusable - cache->used; 2448 - sinfo->bytes_readonly -= num_bytes; 2449 2445 if (btrfs_is_zoned(cache->fs_info)) { 2450 2446 /* Migrate zone_unusable bytes back */ 2451 2447 cache->zone_unusable = cache->alloc_offset - cache->used; 2452 2448 sinfo->bytes_zone_unusable += cache->zone_unusable; 2453 2449 sinfo->bytes_readonly -= cache->zone_unusable; 2454 2450 } 2451 + num_bytes = cache->length - cache->reserved - 2452 + cache->pinned - cache->bytes_super - 2453 + cache->zone_unusable - cache->used; 2454 + sinfo->bytes_readonly -= num_bytes; 2455 2455 list_del_init(&cache->ro_list); 2456 2456 } 2457 2457 spin_unlock(&cache->lock);

+1

fs/hugetlbfs/inode.c

··· 735 735 __SetPageUptodate(page); 736 736 error = huge_add_to_page_cache(page, mapping, index); 737 737 if (unlikely(error)) { 738 + restore_reserve_on_error(h, &pseudo_vma, addr, page); 738 739 put_page(page); 739 740 mutex_unlock(&hugetlb_fault_mutex_table[hash]); 740 741 goto out;

+2 -2

fs/notify/fanotify/fanotify_user.c

··· 471 471 info_type, fanotify_info_name(info), 472 472 info->name_len, buf, count); 473 473 if (ret < 0) 474 - return ret; 474 + goto out_close_fd; 475 475 476 476 buf += ret; 477 477 count -= ret; ··· 519 519 fanotify_event_object_fh(event), 520 520 info_type, dot, dot_len, buf, count); 521 521 if (ret < 0) 522 - return ret; 522 + goto out_close_fd; 523 523 524 524 buf += ret; 525 525 count -= ret;

+3 -1

fs/proc/base.c

··· 2676 2676 #ifdef CONFIG_SECURITY 2677 2677 static int proc_pid_attr_open(struct inode *inode, struct file *file) 2678 2678 { 2679 - return __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS); 2679 + file->private_data = NULL; 2680 + __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS); 2681 + return 0; 2680 2682 } 2681 2683 2682 2684 static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,

-1

include/linux/arch_topology.h

··· 37 37 enum scale_freq_source { 38 38 SCALE_FREQ_SOURCE_CPUFREQ = 0, 39 39 SCALE_FREQ_SOURCE_ARCH, 40 - SCALE_FREQ_SOURCE_CPPC, 41 40 }; 42 41 43 42 struct scale_freq_data {

+7 -1

include/linux/huge_mm.h

··· 286 286 vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd); 287 287 288 288 extern struct page *huge_zero_page; 289 + extern unsigned long huge_zero_pfn; 289 290 290 291 static inline bool is_huge_zero_page(struct page *page) 291 292 { ··· 295 294 296 295 static inline bool is_huge_zero_pmd(pmd_t pmd) 297 296 { 298 - return is_huge_zero_page(pmd_page(pmd)); 297 + return READ_ONCE(huge_zero_pfn) == pmd_pfn(pmd) && pmd_present(pmd); 299 298 } 300 299 301 300 static inline bool is_huge_zero_pud(pud_t pud) ··· 437 436 } 438 437 439 438 static inline bool is_huge_zero_page(struct page *page) 439 + { 440 + return false; 441 + } 442 + 443 + static inline bool is_huge_zero_pmd(pmd_t pmd) 440 444 { 441 445 return false; 442 446 }

+8

include/linux/hugetlb.h

··· 149 149 long hugetlb_unreserve_pages(struct inode *inode, long start, long end, 150 150 long freed); 151 151 bool isolate_huge_page(struct page *page, struct list_head *list); 152 + int get_hwpoison_huge_page(struct page *page, bool *hugetlb); 152 153 void putback_active_hugepage(struct page *page); 153 154 void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason); 154 155 void free_huge_page(struct page *page); ··· 338 337 static inline bool isolate_huge_page(struct page *page, struct list_head *list) 339 338 { 340 339 return false; 340 + } 341 + 342 + static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb) 343 + { 344 + return 0; 341 345 } 342 346 343 347 static inline void putback_active_hugepage(struct page *page) ··· 610 604 unsigned long address); 611 605 int huge_add_to_page_cache(struct page *page, struct address_space *mapping, 612 606 pgoff_t idx); 607 + void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, 608 + unsigned long address, struct page *page); 613 609 614 610 /* arch callback */ 615 611 int __init __alloc_bootmem_huge_page(struct hstate *h);

+4

include/linux/mlx5/driver.h

··· 542 542 enum { 543 543 MLX5_PRIV_FLAGS_DISABLE_IB_ADEV = 1 << 0, 544 544 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV = 1 << 1, 545 + /* Set during device detach to block any further devices 546 + * creation/deletion on drivers rescan. Unset during device attach. 547 + */ 548 + MLX5_PRIV_FLAGS_DETACH = 1 << 2, 545 549 }; 546 550 547 551 struct mlx5_adev {

+1

include/linux/mlx5/transobj.h

··· 85 85 struct mlx5_hairpin_params *params); 86 86 87 87 void mlx5_core_hairpin_destroy(struct mlx5_hairpin *pair); 88 + void mlx5_core_hairpin_clear_dead_peer(struct mlx5_hairpin *hp); 88 89 #endif /* __TRANSOBJ_H__ */

+3

include/linux/mm.h

··· 1719 1719 struct address_space *check_mapping; /* Check page->mapping if set */ 1720 1720 pgoff_t first_index; /* Lowest page->index to unmap */ 1721 1721 pgoff_t last_index; /* Highest page->index to unmap */ 1722 + struct page *single_page; /* Locked page to be unmapped */ 1722 1723 }; 1723 1724 1724 1725 struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, ··· 1767 1766 extern int fixup_user_fault(struct mm_struct *mm, 1768 1767 unsigned long address, unsigned int fault_flags, 1769 1768 bool *unlocked); 1769 + void unmap_mapping_page(struct page *page); 1770 1770 void unmap_mapping_pages(struct address_space *mapping, 1771 1771 pgoff_t start, pgoff_t nr, bool even_cows); 1772 1772 void unmap_mapping_range(struct address_space *mapping, ··· 1788 1786 BUG(); 1789 1787 return -EFAULT; 1790 1788 } 1789 + static inline void unmap_mapping_page(struct page *page) { } 1791 1790 static inline void unmap_mapping_pages(struct address_space *mapping, 1792 1791 pgoff_t start, pgoff_t nr, bool even_cows) { } 1793 1792 static inline void unmap_mapping_range(struct address_space *mapping,

+1 -1

include/linux/ptp_clock_kernel.h

··· 235 235 * @ppm: Parts per million, but with a 16 bit binary fractional field 236 236 */ 237 237 238 - extern s32 scaled_ppm_to_ppb(long ppm); 238 + extern long scaled_ppm_to_ppb(long ppm); 239 239 240 240 /** 241 241 * ptp_find_pin() - obtain the pin index of a given auxiliary function

+1

include/linux/rmap.h

··· 91 91 92 92 TTU_SPLIT_HUGE_PMD = 0x4, /* split huge PMD if any */ 93 93 TTU_IGNORE_MLOCK = 0x8, /* ignore mlock */ 94 + TTU_SYNC = 0x10, /* avoid racy checks with PVMW_SYNC */ 94 95 TTU_IGNORE_HWPOISON = 0x20, /* corrupted page is recoverable */ 95 96 TTU_BATCH_FLUSH = 0x40, /* Batch TLB flushes where possible 96 97 * and caller guarantees they will

-2

include/linux/socket.h

··· 438 438 int __user *usockvec); 439 439 extern int __sys_shutdown_sock(struct socket *sock, int how); 440 440 extern int __sys_shutdown(int fd, int how); 441 - 442 - extern struct ns_common *get_net_ns(struct ns_common *ns); 443 441 #endif /* _LINUX_SOCKET_H */

+11 -4

include/linux/swapops.h

··· 23 23 #define SWP_TYPE_SHIFT (BITS_PER_XA_VALUE - MAX_SWAPFILES_SHIFT) 24 24 #define SWP_OFFSET_MASK ((1UL << SWP_TYPE_SHIFT) - 1) 25 25 26 + /* Clear all flags but only keep swp_entry_t related information */ 27 + static inline pte_t pte_swp_clear_flags(pte_t pte) 28 + { 29 + if (pte_swp_soft_dirty(pte)) 30 + pte = pte_swp_clear_soft_dirty(pte); 31 + if (pte_swp_uffd_wp(pte)) 32 + pte = pte_swp_clear_uffd_wp(pte); 33 + return pte; 34 + } 35 + 26 36 /* 27 37 * Store a type+offset into a swp_entry_t in an arch-independent format 28 38 */ ··· 76 66 { 77 67 swp_entry_t arch_entry; 78 68 79 - if (pte_swp_soft_dirty(pte)) 80 - pte = pte_swp_clear_soft_dirty(pte); 81 - if (pte_swp_uffd_wp(pte)) 82 - pte = pte_swp_clear_uffd_wp(pte); 69 + pte = pte_swp_clear_flags(pte); 83 70 arch_entry = __pte_to_swp_entry(pte); 84 71 return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry)); 85 72 }

+7 -2

include/net/mac80211.h

··· 5537 5537 * 5538 5538 * This function iterates over the interfaces associated with a given 5539 5539 * hardware that are currently active and calls the callback for them. 5540 - * This version can only be used while holding the RTNL. 5540 + * This version can only be used while holding the wiphy mutex. 5541 5541 * 5542 5542 * @hw: the hardware struct of which the interfaces should be iterated over 5543 5543 * @iter_flags: iteration flags, see &enum ieee80211_interface_iteration_flags ··· 6392 6392 6393 6393 /** 6394 6394 * ieee80211_parse_tx_radiotap - Sanity-check and parse the radiotap header 6395 - * of injected frames 6395 + * of injected frames. 6396 + * 6397 + * To accurately parse and take into account rate and retransmission fields, 6398 + * you must initialize the chandef field in the ieee80211_tx_info structure 6399 + * of the skb before calling this function. 6400 + * 6396 6401 * @skb: packet injected by userspace 6397 6402 * @dev: the &struct device of this 802.11 device 6398 6403 */

+13 -1

include/net/net_namespace.h

··· 184 184 void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid); 185 185 186 186 void net_ns_barrier(void); 187 + 188 + struct ns_common *get_net_ns(struct ns_common *ns); 189 + struct net *get_net_ns_by_fd(int fd); 187 190 #else /* CONFIG_NET_NS */ 188 191 #include <linux/sched.h> 189 192 #include <linux/nsproxy.h> ··· 206 203 } 207 204 208 205 static inline void net_ns_barrier(void) {} 206 + 207 + static inline struct ns_common *get_net_ns(struct ns_common *ns) 208 + { 209 + return ERR_PTR(-EINVAL); 210 + } 211 + 212 + static inline struct net *get_net_ns_by_fd(int fd) 213 + { 214 + return ERR_PTR(-EINVAL); 215 + } 209 216 #endif /* CONFIG_NET_NS */ 210 217 211 218 212 219 extern struct list_head net_namespace_list; 213 220 214 221 struct net *get_net_ns_by_pid(pid_t pid); 215 - struct net *get_net_ns_by_fd(int fd); 216 222 217 223 #ifdef CONFIG_SYSCTL 218 224 void ipx_register_sysctl(void);

+13 -4

include/net/sock.h

··· 1934 1934 1935 1935 static inline void sk_set_txhash(struct sock *sk) 1936 1936 { 1937 - sk->sk_txhash = net_tx_rndhash(); 1937 + /* This pairs with READ_ONCE() in skb_set_hash_from_sk() */ 1938 + WRITE_ONCE(sk->sk_txhash, net_tx_rndhash()); 1938 1939 } 1939 1940 1940 1941 static inline bool sk_rethink_txhash(struct sock *sk) ··· 2207 2206 2208 2207 static inline void skb_set_hash_from_sk(struct sk_buff *skb, struct sock *sk) 2209 2208 { 2210 - if (sk->sk_txhash) { 2209 + /* This pairs with WRITE_ONCE() in sk_set_txhash() */ 2210 + u32 txhash = READ_ONCE(sk->sk_txhash); 2211 + 2212 + if (txhash) { 2211 2213 skb->l4_hash = 1; 2212 - skb->hash = sk->sk_txhash; 2214 + skb->hash = txhash; 2213 2215 } 2214 2216 } 2215 2217 ··· 2270 2266 static inline int sock_error(struct sock *sk) 2271 2267 { 2272 2268 int err; 2273 - if (likely(!sk->sk_err)) 2269 + 2270 + /* Avoid an atomic operation for the common case. 2271 + * This is racy since another cpu/thread can change sk_err under us. 2272 + */ 2273 + if (likely(data_race(!sk->sk_err))) 2274 2274 return 0; 2275 + 2275 2276 err = xchg(&sk->sk_err, 0); 2276 2277 return -err; 2277 2278 }

+1 -2

include/uapi/asm-generic/unistd.h

··· 863 863 __SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2) 864 864 #define __NR_mount_setattr 442 865 865 __SYSCALL(__NR_mount_setattr, sys_mount_setattr) 866 - #define __NR_quotactl_path 443 867 - __SYSCALL(__NR_quotactl_path, sys_quotactl_path) 866 + /* 443 is reserved for quotactl_path */ 868 867 869 868 #define __NR_landlock_create_ruleset 444 870 869 __SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset)

+3

include/uapi/linux/in.h

··· 289 289 /* Address indicating an error return. */ 290 290 #define INADDR_NONE ((unsigned long int) 0xffffffff) 291 291 292 + /* Dummy address for src of ICMP replies if no real address is set (RFC7600). */ 293 + #define INADDR_DUMMY ((unsigned long int) 0xc0000008) 294 + 292 295 /* Network number for local host loopback. */ 293 296 #define IN_LOOPBACKNET 127 294 297

+61 -7

kernel/bpf/verifier.c

··· 6483 6483 bool mask_to_left; 6484 6484 }; 6485 6485 6486 + static struct bpf_verifier_state * 6487 + sanitize_speculative_path(struct bpf_verifier_env *env, 6488 + const struct bpf_insn *insn, 6489 + u32 next_idx, u32 curr_idx) 6490 + { 6491 + struct bpf_verifier_state *branch; 6492 + struct bpf_reg_state *regs; 6493 + 6494 + branch = push_stack(env, next_idx, curr_idx, true); 6495 + if (branch && insn) { 6496 + regs = branch->frame[branch->curframe]->regs; 6497 + if (BPF_SRC(insn->code) == BPF_K) { 6498 + mark_reg_unknown(env, regs, insn->dst_reg); 6499 + } else if (BPF_SRC(insn->code) == BPF_X) { 6500 + mark_reg_unknown(env, regs, insn->dst_reg); 6501 + mark_reg_unknown(env, regs, insn->src_reg); 6502 + } 6503 + } 6504 + return branch; 6505 + } 6506 + 6486 6507 static int sanitize_ptr_alu(struct bpf_verifier_env *env, 6487 6508 struct bpf_insn *insn, 6488 6509 const struct bpf_reg_state *ptr_reg, ··· 6587 6566 tmp = *dst_reg; 6588 6567 *dst_reg = *ptr_reg; 6589 6568 } 6590 - ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true); 6569 + ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1, 6570 + env->insn_idx); 6591 6571 if (!ptr_is_dst_reg && ret) 6592 6572 *dst_reg = tmp; 6593 6573 return !ret ? REASON_STACK : 0; 6574 + } 6575 + 6576 + static void sanitize_mark_insn_seen(struct bpf_verifier_env *env) 6577 + { 6578 + struct bpf_verifier_state *vstate = env->cur_state; 6579 + 6580 + /* If we simulate paths under speculation, we don't update the 6581 + * insn as 'seen' such that when we verify unreachable paths in 6582 + * the non-speculative domain, sanitize_dead_code() can still 6583 + * rewrite/sanitize them. 6584 + */ 6585 + if (!vstate->speculative) 6586 + env->insn_aux_data[env->insn_idx].seen = env->pass_cnt; 6594 6587 } 6595 6588 6596 6589 static int sanitize_err(struct bpf_verifier_env *env, ··· 8785 8750 if (err) 8786 8751 return err; 8787 8752 } 8753 + 8788 8754 if (pred == 1) { 8789 - /* only follow the goto, ignore fall-through */ 8755 + /* Only follow the goto, ignore fall-through. If needed, push 8756 + * the fall-through branch for simulation under speculative 8757 + * execution. 8758 + */ 8759 + if (!env->bypass_spec_v1 && 8760 + !sanitize_speculative_path(env, insn, *insn_idx + 1, 8761 + *insn_idx)) 8762 + return -EFAULT; 8790 8763 *insn_idx += insn->off; 8791 8764 return 0; 8792 8765 } else if (pred == 0) { 8793 - /* only follow fall-through branch, since 8794 - * that's where the program will go 8766 + /* Only follow the fall-through branch, since that's where the 8767 + * program will go. If needed, push the goto branch for 8768 + * simulation under speculative execution. 8795 8769 */ 8770 + if (!env->bypass_spec_v1 && 8771 + !sanitize_speculative_path(env, insn, 8772 + *insn_idx + insn->off + 1, 8773 + *insn_idx)) 8774 + return -EFAULT; 8796 8775 return 0; 8797 8776 } 8798 8777 ··· 10679 10630 } 10680 10631 10681 10632 regs = cur_regs(env); 10682 - env->insn_aux_data[env->insn_idx].seen = env->pass_cnt; 10633 + sanitize_mark_insn_seen(env); 10683 10634 prev_insn_idx = env->insn_idx; 10684 10635 10685 10636 if (class == BPF_ALU || class == BPF_ALU64) { ··· 10906 10857 return err; 10907 10858 10908 10859 env->insn_idx++; 10909 - env->insn_aux_data[env->insn_idx].seen = env->pass_cnt; 10860 + sanitize_mark_insn_seen(env); 10910 10861 } else { 10911 10862 verbose(env, "invalid BPF_LD mode\n"); 10912 10863 return -EINVAL; ··· 11415 11366 { 11416 11367 struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data; 11417 11368 struct bpf_insn *insn = new_prog->insnsi; 11369 + u32 old_seen = old_data[off].seen; 11418 11370 u32 prog_len; 11419 11371 int i; 11420 11372 ··· 11436 11386 memcpy(new_data + off + cnt - 1, old_data + off, 11437 11387 sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1)); 11438 11388 for (i = off; i < off + cnt - 1; i++) { 11439 - new_data[i].seen = env->pass_cnt; 11389 + /* Expand insni[off]'s seen count to the patched range. */ 11390 + new_data[i].seen = old_seen; 11440 11391 new_data[i].zext_dst = insn_has_def32(env, insn + i); 11441 11392 } 11442 11393 env->insn_aux_data = new_data; ··· 12761 12710 * insn_aux_data was touched. These variables are compared to clear temporary 12762 12711 * data from failed pass. For testing and experiments do_check_common() can be 12763 12712 * run multiple times even when prior attempt to verify is unsuccessful. 12713 + * 12714 + * Note that special handling is needed on !env->bypass_spec_v1 if this is 12715 + * ever called outside of error path with subsequent program rejection. 12764 12716 */ 12765 12717 static void sanitize_insn_aux_data(struct bpf_verifier_env *env) 12766 12718 {

+1

kernel/crash_core.c

··· 464 464 VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS); 465 465 VMCOREINFO_STRUCT_SIZE(mem_section); 466 466 VMCOREINFO_OFFSET(mem_section, section_mem_map); 467 + VMCOREINFO_NUMBER(SECTION_SIZE_BITS); 467 468 VMCOREINFO_NUMBER(MAX_PHYSMEM_BITS); 468 469 #endif 469 470 VMCOREINFO_STRUCT_SIZE(page);

+1 -1

kernel/printk/printk_safe.c

··· 391 391 /* No obstacles. */ 392 392 return vprintk_default(fmt, args); 393 393 } 394 + EXPORT_SYMBOL(vprintk); 394 395 395 396 void __init printk_safe_init(void) 396 397 { ··· 412 411 /* Flush pending messages that did not have scheduled IRQ works. */ 413 412 printk_safe_flush(); 414 413 } 415 - EXPORT_SYMBOL(vprintk);

-1

kernel/sched/core.c

··· 6389 6389 { 6390 6390 return __sched_setscheduler(p, attr, false, true); 6391 6391 } 6392 - EXPORT_SYMBOL_GPL(sched_setattr_nocheck); 6393 6392 6394 6393 /** 6395 6394 * sched_setscheduler_nocheck - change the scheduling policy and/or RT priority of a thread from kernelspace.

+25 -19

kernel/sched/fair.c

··· 3298 3298 3299 3299 #ifdef CONFIG_SMP 3300 3300 #ifdef CONFIG_FAIR_GROUP_SCHED 3301 + 3302 + static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq) 3303 + { 3304 + if (cfs_rq->load.weight) 3305 + return false; 3306 + 3307 + if (cfs_rq->avg.load_sum) 3308 + return false; 3309 + 3310 + if (cfs_rq->avg.util_sum) 3311 + return false; 3312 + 3313 + if (cfs_rq->avg.runnable_sum) 3314 + return false; 3315 + 3316 + return true; 3317 + } 3318 + 3301 3319 /** 3302 3320 * update_tg_load_avg - update the tg's load avg 3303 3321 * @cfs_rq: the cfs_rq whose avg changed ··· 4109 4091 4110 4092 #else /* CONFIG_SMP */ 4111 4093 4094 + static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq) 4095 + { 4096 + return true; 4097 + } 4098 + 4112 4099 #define UPDATE_TG 0x0 4113 4100 #define SKIP_AGE_LOAD 0x0 4114 4101 #define DO_ATTACH 0x0 ··· 4772 4749 cfs_rq->throttled_clock_task_time += rq_clock_task(rq) - 4773 4750 cfs_rq->throttled_clock_task; 4774 4751 4775 - /* Add cfs_rq with already running entity in the list */ 4776 - if (cfs_rq->nr_running >= 1) 4752 + /* Add cfs_rq with load or one or more already running entities to the list */ 4753 + if (!cfs_rq_is_decayed(cfs_rq) || cfs_rq->nr_running) 4777 4754 list_add_leaf_cfs_rq(cfs_rq); 4778 4755 } 4779 4756 ··· 8018 7995 } 8019 7996 8020 7997 #ifdef CONFIG_FAIR_GROUP_SCHED 8021 - 8022 - static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq) 8023 - { 8024 - if (cfs_rq->load.weight) 8025 - return false; 8026 - 8027 - if (cfs_rq->avg.load_sum) 8028 - return false; 8029 - 8030 - if (cfs_rq->avg.util_sum) 8031 - return false; 8032 - 8033 - if (cfs_rq->avg.runnable_sum) 8034 - return false; 8035 - 8036 - return true; 8037 - } 8038 7998 8039 7999 static bool __update_blocked_fair(struct rq *rq, bool *done) 8040 8000 {

-11

kernel/trace/trace.c

··· 2198 2198 }; 2199 2199 static struct saved_cmdlines_buffer *savedcmd; 2200 2200 2201 - /* temporary disable recording */ 2202 - static atomic_t trace_record_taskinfo_disabled __read_mostly; 2203 - 2204 2201 static inline char *get_saved_cmdlines(int idx) 2205 2202 { 2206 2203 return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN]; ··· 2482 2485 static bool tracing_record_taskinfo_skip(int flags) 2483 2486 { 2484 2487 if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID)))) 2485 - return true; 2486 - if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on()) 2487 2488 return true; 2488 2489 if (!__this_cpu_read(trace_taskinfo_save)) 2489 2490 return true; ··· 3993 3998 return ERR_PTR(-EBUSY); 3994 3999 #endif 3995 4000 3996 - if (!iter->snapshot) 3997 - atomic_inc(&trace_record_taskinfo_disabled); 3998 - 3999 4001 if (*pos != iter->pos) { 4000 4002 iter->ent = NULL; 4001 4003 iter->cpu = 0; ··· 4034 4042 if (iter->snapshot && iter->trace->use_max_tr) 4035 4043 return; 4036 4044 #endif 4037 - 4038 - if (!iter->snapshot) 4039 - atomic_dec(&trace_record_taskinfo_disabled); 4040 4045 4041 4046 trace_access_unlock(iter->cpu_file); 4042 4047 trace_event_read_unlock();

+3 -3

kernel/trace/trace_clock.c

··· 115 115 prev_time = READ_ONCE(trace_clock_struct.prev_time); 116 116 now = sched_clock_cpu(this_cpu); 117 117 118 - /* Make sure that now is always greater than prev_time */ 118 + /* Make sure that now is always greater than or equal to prev_time */ 119 119 if ((s64)(now - prev_time) < 0) 120 - now = prev_time + 1; 120 + now = prev_time; 121 121 122 122 /* 123 123 * If in an NMI context then dont risk lockups and simply return ··· 131 131 /* Reread prev_time in case it was already updated */ 132 132 prev_time = READ_ONCE(trace_clock_struct.prev_time); 133 133 if ((s64)(now - prev_time) < 0) 134 - now = prev_time + 1; 134 + now = prev_time; 135 135 136 136 trace_clock_struct.prev_time = now; 137 137

+29 -27

mm/huge_memory.c

··· 62 62 63 63 static atomic_t huge_zero_refcount; 64 64 struct page *huge_zero_page __read_mostly; 65 + unsigned long huge_zero_pfn __read_mostly = ~0UL; 65 66 66 67 bool transparent_hugepage_enabled(struct vm_area_struct *vma) 67 68 { ··· 99 98 __free_pages(zero_page, compound_order(zero_page)); 100 99 goto retry; 101 100 } 101 + WRITE_ONCE(huge_zero_pfn, page_to_pfn(zero_page)); 102 102 103 103 /* We take additional reference here. It will be put back by shrinker */ 104 104 atomic_set(&huge_zero_refcount, 2); ··· 149 147 if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) { 150 148 struct page *zero_page = xchg(&huge_zero_page, NULL); 151 149 BUG_ON(zero_page == NULL); 150 + WRITE_ONCE(huge_zero_pfn, ~0UL); 152 151 __free_pages(zero_page, compound_order(zero_page)); 153 152 return HPAGE_PMD_NR; 154 153 } ··· 2047 2044 count_vm_event(THP_SPLIT_PMD); 2048 2045 2049 2046 if (!vma_is_anonymous(vma)) { 2050 - _pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd); 2047 + old_pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd); 2051 2048 /* 2052 2049 * We are going to unmap this huge page. So 2053 2050 * just go ahead and zap it ··· 2056 2053 zap_deposited_table(mm, pmd); 2057 2054 if (vma_is_special_huge(vma)) 2058 2055 return; 2059 - page = pmd_page(_pmd); 2060 - if (!PageDirty(page) && pmd_dirty(_pmd)) 2061 - set_page_dirty(page); 2062 - if (!PageReferenced(page) && pmd_young(_pmd)) 2063 - SetPageReferenced(page); 2064 - page_remove_rmap(page, true); 2065 - put_page(page); 2056 + if (unlikely(is_pmd_migration_entry(old_pmd))) { 2057 + swp_entry_t entry; 2058 + 2059 + entry = pmd_to_swp_entry(old_pmd); 2060 + page = migration_entry_to_page(entry); 2061 + } else { 2062 + page = pmd_page(old_pmd); 2063 + if (!PageDirty(page) && pmd_dirty(old_pmd)) 2064 + set_page_dirty(page); 2065 + if (!PageReferenced(page) && pmd_young(old_pmd)) 2066 + SetPageReferenced(page); 2067 + page_remove_rmap(page, true); 2068 + put_page(page); 2069 + } 2066 2070 add_mm_counter(mm, mm_counter_file(page), -HPAGE_PMD_NR); 2067 2071 return; 2068 - } else if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) { 2072 + } 2073 + 2074 + if (is_huge_zero_pmd(*pmd)) { 2069 2075 /* 2070 2076 * FIXME: Do we want to invalidate secondary mmu by calling 2071 2077 * mmu_notifier_invalidate_range() see comments below inside ··· 2350 2338 2351 2339 static void unmap_page(struct page *page) 2352 2340 { 2353 - enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | 2341 + enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_SYNC | 2354 2342 TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD; 2355 - bool unmap_success; 2356 2343 2357 2344 VM_BUG_ON_PAGE(!PageHead(page), page); 2358 2345 2359 2346 if (PageAnon(page)) 2360 2347 ttu_flags |= TTU_SPLIT_FREEZE; 2361 2348 2362 - unmap_success = try_to_unmap(page, ttu_flags); 2363 - VM_BUG_ON_PAGE(!unmap_success, page); 2349 + try_to_unmap(page, ttu_flags); 2350 + 2351 + VM_WARN_ON_ONCE_PAGE(page_mapped(page), page); 2364 2352 } 2365 2353 2366 2354 static void remap_page(struct page *page, unsigned int nr) ··· 2671 2659 struct deferred_split *ds_queue = get_deferred_split_queue(head); 2672 2660 struct anon_vma *anon_vma = NULL; 2673 2661 struct address_space *mapping = NULL; 2674 - int count, mapcount, extra_pins, ret; 2662 + int extra_pins, ret; 2675 2663 pgoff_t end; 2676 2664 2677 2665 VM_BUG_ON_PAGE(is_huge_zero_page(head), head); ··· 2730 2718 } 2731 2719 2732 2720 unmap_page(head); 2733 - VM_BUG_ON_PAGE(compound_mapcount(head), head); 2734 2721 2735 2722 /* block interrupt reentry in xa_lock and spinlock */ 2736 2723 local_irq_disable(); ··· 2747 2736 2748 2737 /* Prevent deferred_split_scan() touching ->_refcount */ 2749 2738 spin_lock(&ds_queue->split_queue_lock); 2750 - count = page_count(head); 2751 - mapcount = total_mapcount(head); 2752 - if (!mapcount && page_ref_freeze(head, 1 + extra_pins)) { 2739 + if (page_ref_freeze(head, 1 + extra_pins)) { 2753 2740 if (!list_empty(page_deferred_list(head))) { 2754 2741 ds_queue->split_queue_len--; 2755 2742 list_del(page_deferred_list(head)); ··· 2767 2758 __split_huge_page(page, list, end); 2768 2759 ret = 0; 2769 2760 } else { 2770 - if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) { 2771 - pr_alert("total_mapcount: %u, page_count(): %u\n", 2772 - mapcount, count); 2773 - if (PageTail(page)) 2774 - dump_page(head, NULL); 2775 - dump_page(page, "total_mapcount(head) > 0"); 2776 - BUG(); 2777 - } 2778 2761 spin_unlock(&ds_queue->split_queue_lock); 2779 - fail: if (mapping) 2762 + fail: 2763 + if (mapping) 2780 2764 xa_unlock(&mapping->i_pages); 2781 2765 local_irq_enable(); 2782 2766 remap_page(head, thp_nr_pages(head));

+116 -27

mm/hugetlb.c

··· 2121 2121 * be restored when a newly allocated huge page must be freed. It is 2122 2122 * to be called after calling vma_needs_reservation to determine if a 2123 2123 * reservation exists. 2124 + * 2125 + * vma_del_reservation is used in error paths where an entry in the reserve 2126 + * map was created during huge page allocation and must be removed. It is to 2127 + * be called after calling vma_needs_reservation to determine if a reservation 2128 + * exists. 2124 2129 */ 2125 2130 enum vma_resv_mode { 2126 2131 VMA_NEEDS_RESV, 2127 2132 VMA_COMMIT_RESV, 2128 2133 VMA_END_RESV, 2129 2134 VMA_ADD_RESV, 2135 + VMA_DEL_RESV, 2130 2136 }; 2131 2137 static long __vma_reservation_common(struct hstate *h, 2132 2138 struct vm_area_struct *vma, unsigned long addr, ··· 2176 2170 ret = region_del(resv, idx, idx + 1); 2177 2171 } 2178 2172 break; 2173 + case VMA_DEL_RESV: 2174 + if (vma->vm_flags & VM_MAYSHARE) { 2175 + region_abort(resv, idx, idx + 1, 1); 2176 + ret = region_del(resv, idx, idx + 1); 2177 + } else { 2178 + ret = region_add(resv, idx, idx + 1, 1, NULL, NULL); 2179 + /* region_add calls of range 1 should never fail. */ 2180 + VM_BUG_ON(ret < 0); 2181 + } 2182 + break; 2179 2183 default: 2180 2184 BUG(); 2181 2185 } 2182 2186 2183 - if (vma->vm_flags & VM_MAYSHARE) 2187 + if (vma->vm_flags & VM_MAYSHARE || mode == VMA_DEL_RESV) 2184 2188 return ret; 2185 2189 /* 2186 2190 * We know private mapping must have HPAGE_RESV_OWNER set. ··· 2238 2222 return __vma_reservation_common(h, vma, addr, VMA_ADD_RESV); 2239 2223 } 2240 2224 2241 - /* 2242 - * This routine is called to restore a reservation on error paths. In the 2243 - * specific error paths, a huge page was allocated (via alloc_huge_page) 2244 - * and is about to be freed. If a reservation for the page existed, 2245 - * alloc_huge_page would have consumed the reservation and set 2246 - * HPageRestoreReserve in the newly allocated page. When the page is freed 2247 - * via free_huge_page, the global reservation count will be incremented if 2248 - * HPageRestoreReserve is set. However, free_huge_page can not adjust the 2249 - * reserve map. Adjust the reserve map here to be consistent with global 2250 - * reserve count adjustments to be made by free_huge_page. 2251 - */ 2252 - static void restore_reserve_on_error(struct hstate *h, 2253 - struct vm_area_struct *vma, unsigned long address, 2254 - struct page *page) 2225 + static long vma_del_reservation(struct hstate *h, 2226 + struct vm_area_struct *vma, unsigned long addr) 2255 2227 { 2256 - if (unlikely(HPageRestoreReserve(page))) { 2257 - long rc = vma_needs_reservation(h, vma, address); 2228 + return __vma_reservation_common(h, vma, addr, VMA_DEL_RESV); 2229 + } 2258 2230 2259 - if (unlikely(rc < 0)) { 2231 + /* 2232 + * This routine is called to restore reservation information on error paths. 2233 + * It should ONLY be called for pages allocated via alloc_huge_page(), and 2234 + * the hugetlb mutex should remain held when calling this routine. 2235 + * 2236 + * It handles two specific cases: 2237 + * 1) A reservation was in place and the page consumed the reservation. 2238 + * HPageRestoreReserve is set in the page. 2239 + * 2) No reservation was in place for the page, so HPageRestoreReserve is 2240 + * not set. However, alloc_huge_page always updates the reserve map. 2241 + * 2242 + * In case 1, free_huge_page later in the error path will increment the 2243 + * global reserve count. But, free_huge_page does not have enough context 2244 + * to adjust the reservation map. This case deals primarily with private 2245 + * mappings. Adjust the reserve map here to be consistent with global 2246 + * reserve count adjustments to be made by free_huge_page. Make sure the 2247 + * reserve map indicates there is a reservation present. 2248 + * 2249 + * In case 2, simply undo reserve map modifications done by alloc_huge_page. 2250 + */ 2251 + void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, 2252 + unsigned long address, struct page *page) 2253 + { 2254 + long rc = vma_needs_reservation(h, vma, address); 2255 + 2256 + if (HPageRestoreReserve(page)) { 2257 + if (unlikely(rc < 0)) 2260 2258 /* 2261 2259 * Rare out of memory condition in reserve map 2262 2260 * manipulation. Clear HPageRestoreReserve so that ··· 2283 2253 * accounting of reserve counts. 2284 2254 */ 2285 2255 ClearHPageRestoreReserve(page); 2286 - } else if (rc) { 2287 - rc = vma_add_reservation(h, vma, address); 2288 - if (unlikely(rc < 0)) 2289 - /* 2290 - * See above comment about rare out of 2291 - * memory condition. 2292 - */ 2293 - ClearHPageRestoreReserve(page); 2294 - } else 2256 + else if (rc) 2257 + (void)vma_add_reservation(h, vma, address); 2258 + else 2295 2259 vma_end_reservation(h, vma, address); 2260 + } else { 2261 + if (!rc) { 2262 + /* 2263 + * This indicates there is an entry in the reserve map 2264 + * added by alloc_huge_page. We know it was added 2265 + * before the alloc_huge_page call, otherwise 2266 + * HPageRestoreReserve would be set on the page. 2267 + * Remove the entry so that a subsequent allocation 2268 + * does not consume a reservation. 2269 + */ 2270 + rc = vma_del_reservation(h, vma, address); 2271 + if (rc < 0) 2272 + /* 2273 + * VERY rare out of memory condition. Since 2274 + * we can not delete the entry, set 2275 + * HPageRestoreReserve so that the reserve 2276 + * count will be incremented when the page 2277 + * is freed. This reserve will be consumed 2278 + * on a subsequent allocation. 2279 + */ 2280 + SetHPageRestoreReserve(page); 2281 + } else if (rc < 0) { 2282 + /* 2283 + * Rare out of memory condition from 2284 + * vma_needs_reservation call. Memory allocation is 2285 + * only attempted if a new entry is needed. Therefore, 2286 + * this implies there is not an entry in the 2287 + * reserve map. 2288 + * 2289 + * For shared mappings, no entry in the map indicates 2290 + * no reservation. We are done. 2291 + */ 2292 + if (!(vma->vm_flags & VM_MAYSHARE)) 2293 + /* 2294 + * For private mappings, no entry indicates 2295 + * a reservation is present. Since we can 2296 + * not add an entry, set SetHPageRestoreReserve 2297 + * on the page so reserve count will be 2298 + * incremented when freed. This reserve will 2299 + * be consumed on a subsequent allocation. 2300 + */ 2301 + SetHPageRestoreReserve(page); 2302 + } else 2303 + /* 2304 + * No reservation present, do nothing 2305 + */ 2306 + vma_end_reservation(h, vma, address); 2296 2307 } 2297 2308 } 2298 2309 ··· 4108 4037 spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); 4109 4038 entry = huge_ptep_get(src_pte); 4110 4039 if (!pte_same(src_pte_old, entry)) { 4040 + restore_reserve_on_error(h, vma, addr, 4041 + new); 4111 4042 put_page(new); 4112 4043 /* dst_entry won't change as in child */ 4113 4044 goto again; ··· 5079 5006 if (vm_shared || is_continue) 5080 5007 unlock_page(page); 5081 5008 out_release_nounlock: 5009 + restore_reserve_on_error(h, dst_vma, dst_addr, page); 5082 5010 put_page(page); 5083 5011 goto out; 5084 5012 } ··· 5927 5853 ClearHPageMigratable(page); 5928 5854 list_move_tail(&page->lru, list); 5929 5855 unlock: 5856 + spin_unlock_irq(&hugetlb_lock); 5857 + return ret; 5858 + } 5859 + 5860 + int get_hwpoison_huge_page(struct page *page, bool *hugetlb) 5861 + { 5862 + int ret = 0; 5863 + 5864 + *hugetlb = false; 5865 + spin_lock_irq(&hugetlb_lock); 5866 + if (PageHeadHuge(page)) { 5867 + *hugetlb = true; 5868 + if (HPageFreed(page) || HPageMigratable(page)) 5869 + ret = get_page_unless_zero(page); 5870 + } 5930 5871 spin_unlock_irq(&hugetlb_lock); 5931 5872 return ret; 5932 5873 }

+39 -14

mm/internal.h

··· 384 384 extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma); 385 385 386 386 /* 387 - * At what user virtual address is page expected in @vma? 387 + * At what user virtual address is page expected in vma? 388 + * Returns -EFAULT if all of the page is outside the range of vma. 389 + * If page is a compound head, the entire compound page is considered. 388 390 */ 389 - static inline unsigned long 390 - __vma_address(struct page *page, struct vm_area_struct *vma) 391 - { 392 - pgoff_t pgoff = page_to_pgoff(page); 393 - return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); 394 - } 395 - 396 391 static inline unsigned long 397 392 vma_address(struct page *page, struct vm_area_struct *vma) 398 393 { 399 - unsigned long start, end; 394 + pgoff_t pgoff; 395 + unsigned long address; 400 396 401 - start = __vma_address(page, vma); 402 - end = start + thp_size(page) - PAGE_SIZE; 397 + VM_BUG_ON_PAGE(PageKsm(page), page); /* KSM page->index unusable */ 398 + pgoff = page_to_pgoff(page); 399 + if (pgoff >= vma->vm_pgoff) { 400 + address = vma->vm_start + 401 + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); 402 + /* Check for address beyond vma (or wrapped through 0?) */ 403 + if (address < vma->vm_start || address >= vma->vm_end) 404 + address = -EFAULT; 405 + } else if (PageHead(page) && 406 + pgoff + compound_nr(page) - 1 >= vma->vm_pgoff) { 407 + /* Test above avoids possibility of wrap to 0 on 32-bit */ 408 + address = vma->vm_start; 409 + } else { 410 + address = -EFAULT; 411 + } 412 + return address; 413 + } 403 414 404 - /* page should be within @vma mapping range */ 405 - VM_BUG_ON_VMA(end < vma->vm_start || start >= vma->vm_end, vma); 415 + /* 416 + * Then at what user virtual address will none of the page be found in vma? 417 + * Assumes that vma_address() already returned a good starting address. 418 + * If page is a compound head, the entire compound page is considered. 419 + */ 420 + static inline unsigned long 421 + vma_address_end(struct page *page, struct vm_area_struct *vma) 422 + { 423 + pgoff_t pgoff; 424 + unsigned long address; 406 425 407 - return max(start, vma->vm_start); 426 + VM_BUG_ON_PAGE(PageKsm(page), page); /* KSM page->index unusable */ 427 + pgoff = page_to_pgoff(page) + compound_nr(page); 428 + address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); 429 + /* Check for address beyond vma (or wrapped through 0?) */ 430 + if (address < vma->vm_start || address > vma->vm_end) 431 + address = vma->vm_end; 432 + return address; 408 433 } 409 434 410 435 static inline struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf,

+33 -3

mm/memory-failure.c

··· 949 949 return (result == MF_RECOVERED || result == MF_DELAYED) ? 0 : -EBUSY; 950 950 } 951 951 952 + /* 953 + * Return true if a page type of a given page is supported by hwpoison 954 + * mechanism (while handling could fail), otherwise false. This function 955 + * does not return true for hugetlb or device memory pages, so it's assumed 956 + * to be called only in the context where we never have such pages. 957 + */ 958 + static inline bool HWPoisonHandlable(struct page *page) 959 + { 960 + return PageLRU(page) || __PageMovable(page); 961 + } 962 + 952 963 /** 953 964 * __get_hwpoison_page() - Get refcount for memory error handling: 954 965 * @page: raw error page (hit by memory error) ··· 970 959 static int __get_hwpoison_page(struct page *page) 971 960 { 972 961 struct page *head = compound_head(page); 962 + int ret = 0; 963 + bool hugetlb = false; 973 964 974 - if (!PageHuge(head) && PageTransHuge(head)) { 965 + ret = get_hwpoison_huge_page(head, &hugetlb); 966 + if (hugetlb) 967 + return ret; 968 + 969 + /* 970 + * This check prevents from calling get_hwpoison_unless_zero() 971 + * for any unsupported type of page in order to reduce the risk of 972 + * unexpected races caused by taking a page refcount. 973 + */ 974 + if (!HWPoisonHandlable(head)) 975 + return 0; 976 + 977 + if (PageTransHuge(head)) { 975 978 /* 976 979 * Non anonymous thp exists only in allocation/free time. We 977 980 * can't handle such a case correctly, so let's give it up. ··· 1042 1017 ret = -EIO; 1043 1018 } 1044 1019 } else { 1045 - if (PageHuge(p) || PageLRU(p) || __PageMovable(p)) { 1020 + if (PageHuge(p) || HWPoisonHandlable(p)) { 1046 1021 ret = 1; 1047 1022 } else { 1048 1023 /* ··· 1552 1527 return 0; 1553 1528 } 1554 1529 1555 - if (!PageTransTail(p) && !PageLRU(p)) 1530 + /* 1531 + * __munlock_pagevec may clear a writeback page's LRU flag without 1532 + * page_lock. We need wait writeback completion for this page or it 1533 + * may trigger vfs BUG while evict inode. 1534 + */ 1535 + if (!PageTransTail(p) && !PageLRU(p) && !PageWriteback(p)) 1556 1536 goto identify_page_state; 1557 1537 1558 1538 /*

+41

mm/memory.c

··· 1361 1361 else if (zap_huge_pmd(tlb, vma, pmd, addr)) 1362 1362 goto next; 1363 1363 /* fall through */ 1364 + } else if (details && details->single_page && 1365 + PageTransCompound(details->single_page) && 1366 + next - addr == HPAGE_PMD_SIZE && pmd_none(*pmd)) { 1367 + spinlock_t *ptl = pmd_lock(tlb->mm, pmd); 1368 + /* 1369 + * Take and drop THP pmd lock so that we cannot return 1370 + * prematurely, while zap_huge_pmd() has cleared *pmd, 1371 + * but not yet decremented compound_mapcount(). 1372 + */ 1373 + spin_unlock(ptl); 1364 1374 } 1375 + 1365 1376 /* 1366 1377 * Here there can be other concurrent MADV_DONTNEED or 1367 1378 * trans huge page faults running, and if the pmd is ··· 3245 3234 ((zea - vba + 1) << PAGE_SHIFT) + vma->vm_start, 3246 3235 details); 3247 3236 } 3237 + } 3238 + 3239 + /** 3240 + * unmap_mapping_page() - Unmap single page from processes. 3241 + * @page: The locked page to be unmapped. 3242 + * 3243 + * Unmap this page from any userspace process which still has it mmaped. 3244 + * Typically, for efficiency, the range of nearby pages has already been 3245 + * unmapped by unmap_mapping_pages() or unmap_mapping_range(). But once 3246 + * truncation or invalidation holds the lock on a page, it may find that 3247 + * the page has been remapped again: and then uses unmap_mapping_page() 3248 + * to unmap it finally. 3249 + */ 3250 + void unmap_mapping_page(struct page *page) 3251 + { 3252 + struct address_space *mapping = page->mapping; 3253 + struct zap_details details = { }; 3254 + 3255 + VM_BUG_ON(!PageLocked(page)); 3256 + VM_BUG_ON(PageTail(page)); 3257 + 3258 + details.check_mapping = mapping; 3259 + details.first_index = page->index; 3260 + details.last_index = page->index + thp_nr_pages(page) - 1; 3261 + details.single_page = page; 3262 + 3263 + i_mmap_lock_write(mapping); 3264 + if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))) 3265 + unmap_mapping_range_tree(&mapping->i_mmap, &details); 3266 + i_mmap_unlock_write(mapping); 3248 3267 } 3249 3268 3250 3269 /**

+1

mm/migrate.c

··· 295 295 goto out; 296 296 297 297 page = migration_entry_to_page(entry); 298 + page = compound_head(page); 298 299 299 300 /* 300 301 * Once page cache replacement of page migration started, page_count

+17 -10

mm/page_vma_mapped.c

··· 212 212 pvmw->ptl = NULL; 213 213 } 214 214 } else if (!pmd_present(pmde)) { 215 + /* 216 + * If PVMW_SYNC, take and drop THP pmd lock so that we 217 + * cannot return prematurely, while zap_huge_pmd() has 218 + * cleared *pmd but not decremented compound_mapcount(). 219 + */ 220 + if ((pvmw->flags & PVMW_SYNC) && 221 + PageTransCompound(pvmw->page)) { 222 + spinlock_t *ptl = pmd_lock(mm, pvmw->pmd); 223 + 224 + spin_unlock(ptl); 225 + } 215 226 return false; 216 227 } 217 228 if (!map_pte(pvmw)) 218 229 goto next_pte; 219 230 while (1) { 231 + unsigned long end; 232 + 220 233 if (check_pte(pvmw)) 221 234 return true; 222 235 next_pte: 223 236 /* Seek to next pte only makes sense for THP */ 224 237 if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page)) 225 238 return not_found(pvmw); 239 + end = vma_address_end(pvmw->page, pvmw->vma); 226 240 do { 227 241 pvmw->address += PAGE_SIZE; 228 - if (pvmw->address >= pvmw->vma->vm_end || 229 - pvmw->address >= 230 - __vma_address(pvmw->page, pvmw->vma) + 231 - thp_size(pvmw->page)) 242 + if (pvmw->address >= end) 232 243 return not_found(pvmw); 233 244 /* Did we cross page table boundary? */ 234 245 if (pvmw->address % PMD_SIZE == 0) { ··· 277 266 .vma = vma, 278 267 .flags = PVMW_SYNC, 279 268 }; 280 - unsigned long start, end; 281 269 282 - start = __vma_address(page, vma); 283 - end = start + thp_size(page) - PAGE_SIZE; 284 - 285 - if (unlikely(end < vma->vm_start || start >= vma->vm_end)) 270 + pvmw.address = vma_address(page, vma); 271 + if (pvmw.address == -EFAULT) 286 272 return 0; 287 - pvmw.address = max(start, vma->vm_start); 288 273 if (!page_vma_mapped_walk(&pvmw)) 289 274 return 0; 290 275 page_vma_mapped_walk_done(&pvmw);

+2 -3

mm/pgtable-generic.c

··· 135 135 { 136 136 pmd_t pmd; 137 137 VM_BUG_ON(address & ~HPAGE_PMD_MASK); 138 - VM_BUG_ON(!pmd_present(*pmdp)); 139 - /* Below assumes pmd_present() is true */ 140 - VM_BUG_ON(!pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp)); 138 + VM_BUG_ON(pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) && 139 + !pmd_devmap(*pmdp)); 141 140 pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp); 142 141 flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); 143 142 return pmd;

+27 -12

mm/rmap.c

··· 707 707 */ 708 708 unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) 709 709 { 710 - unsigned long address; 711 710 if (PageAnon(page)) { 712 711 struct anon_vma *page__anon_vma = page_anon_vma(page); 713 712 /* ··· 716 717 if (!vma->anon_vma || !page__anon_vma || 717 718 vma->anon_vma->root != page__anon_vma->root) 718 719 return -EFAULT; 719 - } else if (page->mapping) { 720 - if (!vma->vm_file || vma->vm_file->f_mapping != page->mapping) 721 - return -EFAULT; 722 - } else 720 + } else if (!vma->vm_file) { 723 721 return -EFAULT; 724 - address = __vma_address(page, vma); 725 - if (unlikely(address < vma->vm_start || address >= vma->vm_end)) 722 + } else if (vma->vm_file->f_mapping != compound_head(page)->mapping) { 726 723 return -EFAULT; 727 - return address; 724 + } 725 + 726 + return vma_address(page, vma); 728 727 } 729 728 730 729 pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address) ··· 916 919 */ 917 920 mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE, 918 921 0, vma, vma->vm_mm, address, 919 - min(vma->vm_end, address + page_size(page))); 922 + vma_address_end(page, vma)); 920 923 mmu_notifier_invalidate_range_start(&range); 921 924 922 925 while (page_vma_mapped_walk(&pvmw)) { ··· 1402 1405 struct mmu_notifier_range range; 1403 1406 enum ttu_flags flags = (enum ttu_flags)(long)arg; 1404 1407 1408 + /* 1409 + * When racing against e.g. zap_pte_range() on another cpu, 1410 + * in between its ptep_get_and_clear_full() and page_remove_rmap(), 1411 + * try_to_unmap() may return false when it is about to become true, 1412 + * if page table locking is skipped: use TTU_SYNC to wait for that. 1413 + */ 1414 + if (flags & TTU_SYNC) 1415 + pvmw.flags = PVMW_SYNC; 1416 + 1405 1417 /* munlock has nothing to gain from examining un-locked vmas */ 1406 1418 if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED)) 1407 1419 return true; ··· 1432 1426 * Note that the page can not be free in this function as call of 1433 1427 * try_to_unmap() must hold a reference on the page. 1434 1428 */ 1429 + range.end = PageKsm(page) ? 1430 + address + PAGE_SIZE : vma_address_end(page, vma); 1435 1431 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, 1436 - address, 1437 - min(vma->vm_end, address + page_size(page))); 1432 + address, range.end); 1438 1433 if (PageHuge(page)) { 1439 1434 /* 1440 1435 * If sharing is possible, start and end will be adjusted ··· 1784 1777 else 1785 1778 rmap_walk(page, &rwc); 1786 1779 1787 - return !page_mapcount(page) ? true : false; 1780 + /* 1781 + * When racing against e.g. zap_pte_range() on another cpu, 1782 + * in between its ptep_get_and_clear_full() and page_remove_rmap(), 1783 + * try_to_unmap() may return false when it is about to become true, 1784 + * if page table locking is skipped: use TTU_SYNC to wait for that. 1785 + */ 1786 + return !page_mapcount(page); 1788 1787 } 1789 1788 1790 1789 /** ··· 1887 1874 struct vm_area_struct *vma = avc->vma; 1888 1875 unsigned long address = vma_address(page, vma); 1889 1876 1877 + VM_BUG_ON_VMA(address == -EFAULT, vma); 1890 1878 cond_resched(); 1891 1879 1892 1880 if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg)) ··· 1942 1928 pgoff_start, pgoff_end) { 1943 1929 unsigned long address = vma_address(page, vma); 1944 1930 1931 + VM_BUG_ON_VMA(address == -EFAULT, vma); 1945 1932 cond_resched(); 1946 1933 1947 1934 if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))

+1 -2

mm/slab_common.c

··· 97 97 #ifdef CONFIG_DEBUG_VM 98 98 static int kmem_cache_sanity_check(const char *name, unsigned int size) 99 99 { 100 - if (!name || in_interrupt() || size < sizeof(void *) || 101 - size > KMALLOC_MAX_SIZE) { 100 + if (!name || in_interrupt() || size > KMALLOC_MAX_SIZE) { 102 101 pr_err("kmem_cache_create(%s) integrity check failed\n", name); 103 102 return -EINVAL; 104 103 }

+16 -21

mm/slub.c

··· 15 15 #include <linux/module.h> 16 16 #include <linux/bit_spinlock.h> 17 17 #include <linux/interrupt.h> 18 + #include <linux/swab.h> 18 19 #include <linux/bitops.h> 19 20 #include <linux/slab.h> 20 21 #include "slab.h" ··· 713 712 p, p - addr, get_freepointer(s, p)); 714 713 715 714 if (s->flags & SLAB_RED_ZONE) 716 - print_section(KERN_ERR, "Redzone ", p - s->red_left_pad, 715 + print_section(KERN_ERR, "Redzone ", p - s->red_left_pad, 717 716 s->red_left_pad); 718 717 else if (p > addr + 16) 719 718 print_section(KERN_ERR, "Bytes b4 ", p - 16, 16); 720 719 721 - print_section(KERN_ERR, "Object ", p, 720 + print_section(KERN_ERR, "Object ", p, 722 721 min_t(unsigned int, s->object_size, PAGE_SIZE)); 723 722 if (s->flags & SLAB_RED_ZONE) 724 - print_section(KERN_ERR, "Redzone ", p + s->object_size, 723 + print_section(KERN_ERR, "Redzone ", p + s->object_size, 725 724 s->inuse - s->object_size); 726 725 727 726 off = get_info_end(s); ··· 733 732 734 733 if (off != size_from_object(s)) 735 734 /* Beginning of the filler is the free pointer */ 736 - print_section(KERN_ERR, "Padding ", p + off, 735 + print_section(KERN_ERR, "Padding ", p + off, 737 736 size_from_object(s) - off); 738 737 739 738 dump_stack(); ··· 910 909 u8 *endobject = object + s->object_size; 911 910 912 911 if (s->flags & SLAB_RED_ZONE) { 913 - if (!check_bytes_and_report(s, page, object, "Redzone", 912 + if (!check_bytes_and_report(s, page, object, "Left Redzone", 914 913 object - s->red_left_pad, val, s->red_left_pad)) 915 914 return 0; 916 915 917 - if (!check_bytes_and_report(s, page, object, "Redzone", 916 + if (!check_bytes_and_report(s, page, object, "Right Redzone", 918 917 endobject, val, s->inuse - s->object_size)) 919 918 return 0; 920 919 } else { ··· 929 928 if (val != SLUB_RED_ACTIVE && (s->flags & __OBJECT_POISON) && 930 929 (!check_bytes_and_report(s, page, p, "Poison", p, 931 930 POISON_FREE, s->object_size - 1) || 932 - !check_bytes_and_report(s, page, p, "Poison", 931 + !check_bytes_and_report(s, page, p, "End Poison", 933 932 p + s->object_size - 1, POISON_END, 1))) 934 933 return 0; 935 934 /* ··· 3690 3689 { 3691 3690 slab_flags_t flags = s->flags; 3692 3691 unsigned int size = s->object_size; 3693 - unsigned int freepointer_area; 3694 3692 unsigned int order; 3695 3693 3696 3694 /* ··· 3698 3698 * the possible location of the free pointer. 3699 3699 */ 3700 3700 size = ALIGN(size, sizeof(void *)); 3701 - /* 3702 - * This is the area of the object where a freepointer can be 3703 - * safely written. If redzoning adds more to the inuse size, we 3704 - * can't use that portion for writing the freepointer, so 3705 - * s->offset must be limited within this for the general case. 3706 - */ 3707 - freepointer_area = size; 3708 3701 3709 3702 #ifdef CONFIG_SLUB_DEBUG 3710 3703 /* ··· 3723 3730 3724 3731 /* 3725 3732 * With that we have determined the number of bytes in actual use 3726 - * by the object. This is the potential offset to the free pointer. 3733 + * by the object and redzoning. 3727 3734 */ 3728 3735 s->inuse = size; 3729 3736 3730 - if (((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) || 3731 - s->ctor)) { 3737 + if ((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) || 3738 + ((flags & SLAB_RED_ZONE) && s->object_size < sizeof(void *)) || 3739 + s->ctor) { 3732 3740 /* 3733 3741 * Relocate free pointer after the object if it is not 3734 3742 * permitted to overwrite the first word of the object on 3735 3743 * kmem_cache_free. 3736 3744 * 3737 3745 * This is the case if we do RCU, have a constructor or 3738 - * destructor or are poisoning the objects. 3746 + * destructor, are poisoning the objects, or are 3747 + * redzoning an object smaller than sizeof(void *). 3739 3748 * 3740 3749 * The assumption that s->offset >= s->inuse means free 3741 3750 * pointer is outside of the object is used in the ··· 3746 3751 */ 3747 3752 s->offset = size; 3748 3753 size += sizeof(void *); 3749 - } else if (freepointer_area > sizeof(void *)) { 3754 + } else { 3750 3755 /* 3751 3756 * Store freelist pointer near middle of object to keep 3752 3757 * it away from the edges of the object to avoid small 3753 3758 * sized over/underflows from neighboring allocations. 3754 3759 */ 3755 - s->offset = ALIGN(freepointer_area / 2, sizeof(void *)); 3760 + s->offset = ALIGN_DOWN(s->object_size / 2, sizeof(void *)); 3756 3761 } 3757 3762 3758 3763 #ifdef CONFIG_SLUB_DEBUG

+11 -2

mm/sparse.c

··· 344 344 return sizeof(struct mem_section_usage) + usemap_size(); 345 345 } 346 346 347 + static inline phys_addr_t pgdat_to_phys(struct pglist_data *pgdat) 348 + { 349 + #ifndef CONFIG_NEED_MULTIPLE_NODES 350 + return __pa_symbol(pgdat); 351 + #else 352 + return __pa(pgdat); 353 + #endif 354 + } 355 + 347 356 #ifdef CONFIG_MEMORY_HOTREMOVE 348 357 static struct mem_section_usage * __init 349 358 sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, ··· 371 362 * from the same section as the pgdat where possible to avoid 372 363 * this problem. 373 364 */ 374 - goal = __pa(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT); 365 + goal = pgdat_to_phys(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT); 375 366 limit = goal + (1UL << PA_SECTION_SHIFT); 376 367 nid = early_pfn_to_nid(goal >> PAGE_SHIFT); 377 368 again: ··· 399 390 } 400 391 401 392 usemap_snr = pfn_to_section_nr(__pa(usage) >> PAGE_SHIFT); 402 - pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT); 393 + pgdat_snr = pfn_to_section_nr(pgdat_to_phys(pgdat) >> PAGE_SHIFT); 403 394 if (usemap_snr == pgdat_snr) 404 395 return; 405 396

+1 -1

mm/swapfile.c

··· 1900 1900 1901 1901 static inline int pte_same_as_swp(pte_t pte, pte_t swp_pte) 1902 1902 { 1903 - return pte_same(pte_swp_clear_soft_dirty(pte), swp_pte); 1903 + return pte_same(pte_swp_clear_flags(pte), swp_pte); 1904 1904 } 1905 1905 1906 1906 /*

+19 -24

mm/truncate.c

··· 167 167 * its lock, b) when a concurrent invalidate_mapping_pages got there first and 168 168 * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space. 169 169 */ 170 - static void 171 - truncate_cleanup_page(struct address_space *mapping, struct page *page) 170 + static void truncate_cleanup_page(struct page *page) 172 171 { 173 - if (page_mapped(page)) { 174 - unsigned int nr = thp_nr_pages(page); 175 - unmap_mapping_pages(mapping, page->index, nr, false); 176 - } 172 + if (page_mapped(page)) 173 + unmap_mapping_page(page); 177 174 178 175 if (page_has_private(page)) 179 176 do_invalidatepage(page, 0, thp_size(page)); ··· 215 218 if (page->mapping != mapping) 216 219 return -EIO; 217 220 218 - truncate_cleanup_page(mapping, page); 221 + truncate_cleanup_page(page); 219 222 delete_from_page_cache(page); 220 223 return 0; 221 224 } ··· 322 325 index = indices[pagevec_count(&pvec) - 1] + 1; 323 326 truncate_exceptional_pvec_entries(mapping, &pvec, indices); 324 327 for (i = 0; i < pagevec_count(&pvec); i++) 325 - truncate_cleanup_page(mapping, pvec.pages[i]); 328 + truncate_cleanup_page(pvec.pages[i]); 326 329 delete_from_page_cache_batch(mapping, &pvec); 327 330 for (i = 0; i < pagevec_count(&pvec); i++) 328 331 unlock_page(pvec.pages[i]); ··· 636 639 continue; 637 640 } 638 641 642 + if (!did_range_unmap && page_mapped(page)) { 643 + /* 644 + * If page is mapped, before taking its lock, 645 + * zap the rest of the file in one hit. 646 + */ 647 + unmap_mapping_pages(mapping, index, 648 + (1 + end - index), false); 649 + did_range_unmap = 1; 650 + } 651 + 639 652 lock_page(page); 640 653 WARN_ON(page_to_index(page) != index); 641 654 if (page->mapping != mapping) { ··· 653 646 continue; 654 647 } 655 648 wait_on_page_writeback(page); 656 - if (page_mapped(page)) { 657 - if (!did_range_unmap) { 658 - /* 659 - * Zap the rest of the file in one hit. 660 - */ 661 - unmap_mapping_pages(mapping, index, 662 - (1 + end - index), false); 663 - did_range_unmap = 1; 664 - } else { 665 - /* 666 - * Just zap this page 667 - */ 668 - unmap_mapping_pages(mapping, index, 669 - 1, false); 670 - } 671 - } 649 + 650 + if (page_mapped(page)) 651 + unmap_mapping_page(page); 672 652 BUG_ON(page_mapped(page)); 653 + 673 654 ret2 = do_launder_page(mapping, page); 674 655 if (ret2 == 0) { 675 656 if (!invalidate_complete_page2(mapping, page))

+1 -1

net/appletalk/aarp.c

··· 768 768 if (a && a->status & ATIF_PROBE) { 769 769 a->status |= ATIF_PROBE_FAIL; 770 770 /* 771 - * we do not respond to probe or request packets for 771 + * we do not respond to probe or request packets of 772 772 * this address while we are probing this address 773 773 */ 774 774 goto unlock;

+3 -1

net/batman-adv/bat_iv_ogm.c

··· 409 409 if (WARN_ON(!forw_packet->if_outgoing)) 410 410 return; 411 411 412 - if (WARN_ON(forw_packet->if_outgoing->soft_iface != soft_iface)) 412 + if (forw_packet->if_outgoing->soft_iface != soft_iface) { 413 + pr_warn("%s: soft interface switch for queued OGM\n", __func__); 413 414 return; 415 + } 414 416 415 417 if (forw_packet->if_incoming->if_status != BATADV_IF_ACTIVE) 416 418 return;

+3 -3

net/bluetooth/smp.c

··· 3229 3229 { 3230 3230 struct l2cap_chan *chan; 3231 3231 3232 - bt_dev_dbg(pchan->conn->hcon->hdev, "pchan %p", pchan); 3232 + BT_DBG("pchan %p", pchan); 3233 3233 3234 3234 chan = l2cap_chan_create(); 3235 3235 if (!chan) ··· 3250 3250 */ 3251 3251 atomic_set(&chan->nesting, L2CAP_NESTING_SMP); 3252 3252 3253 - bt_dev_dbg(pchan->conn->hcon->hdev, "created chan %p", chan); 3253 + BT_DBG("created chan %p", chan); 3254 3254 3255 3255 return chan; 3256 3256 } ··· 3354 3354 { 3355 3355 struct smp_dev *smp; 3356 3356 3357 - bt_dev_dbg(chan->conn->hcon->hdev, "chan %p", chan); 3357 + BT_DBG("chan %p", chan); 3358 3358 3359 3359 smp = chan->data; 3360 3360 if (smp) {

+2 -2

net/bridge/br_private.h

··· 90 90 #endif 91 91 92 92 struct br_tunnel_info { 93 - __be64 tunnel_id; 94 - struct metadata_dst *tunnel_dst; 93 + __be64 tunnel_id; 94 + struct metadata_dst __rcu *tunnel_dst; 95 95 }; 96 96 97 97 /* private vlan flags */

+24 -14

net/bridge/br_vlan_tunnel.c

··· 41 41 br_vlan_tunnel_rht_params); 42 42 } 43 43 44 + static void vlan_tunnel_info_release(struct net_bridge_vlan *vlan) 45 + { 46 + struct metadata_dst *tdst = rtnl_dereference(vlan->tinfo.tunnel_dst); 47 + 48 + WRITE_ONCE(vlan->tinfo.tunnel_id, 0); 49 + RCU_INIT_POINTER(vlan->tinfo.tunnel_dst, NULL); 50 + dst_release(&tdst->dst); 51 + } 52 + 44 53 void vlan_tunnel_info_del(struct net_bridge_vlan_group *vg, 45 54 struct net_bridge_vlan *vlan) 46 55 { 47 - if (!vlan->tinfo.tunnel_dst) 56 + if (!rcu_access_pointer(vlan->tinfo.tunnel_dst)) 48 57 return; 49 58 rhashtable_remove_fast(&vg->tunnel_hash, &vlan->tnode, 50 59 br_vlan_tunnel_rht_params); 51 - vlan->tinfo.tunnel_id = 0; 52 - dst_release(&vlan->tinfo.tunnel_dst->dst); 53 - vlan->tinfo.tunnel_dst = NULL; 60 + vlan_tunnel_info_release(vlan); 54 61 } 55 62 56 63 static int __vlan_tunnel_info_add(struct net_bridge_vlan_group *vg, 57 64 struct net_bridge_vlan *vlan, u32 tun_id) 58 65 { 59 - struct metadata_dst *metadata = NULL; 66 + struct metadata_dst *metadata = rtnl_dereference(vlan->tinfo.tunnel_dst); 60 67 __be64 key = key32_to_tunnel_id(cpu_to_be32(tun_id)); 61 68 int err; 62 69 63 - if (vlan->tinfo.tunnel_dst) 70 + if (metadata) 64 71 return -EEXIST; 65 72 66 73 metadata = __ip_tun_set_dst(0, 0, 0, 0, 0, TUNNEL_KEY, ··· 76 69 return -EINVAL; 77 70 78 71 metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_BRIDGE; 79 - vlan->tinfo.tunnel_dst = metadata; 80 - vlan->tinfo.tunnel_id = key; 72 + rcu_assign_pointer(vlan->tinfo.tunnel_dst, metadata); 73 + WRITE_ONCE(vlan->tinfo.tunnel_id, key); 81 74 82 75 err = rhashtable_lookup_insert_fast(&vg->tunnel_hash, &vlan->tnode, 83 76 br_vlan_tunnel_rht_params); ··· 86 79 87 80 return 0; 88 81 out: 89 - dst_release(&vlan->tinfo.tunnel_dst->dst); 90 - vlan->tinfo.tunnel_dst = NULL; 91 - vlan->tinfo.tunnel_id = 0; 82 + vlan_tunnel_info_release(vlan); 92 83 93 84 return err; 94 85 } ··· 187 182 int br_handle_egress_vlan_tunnel(struct sk_buff *skb, 188 183 struct net_bridge_vlan *vlan) 189 184 { 185 + struct metadata_dst *tunnel_dst; 186 + __be64 tunnel_id; 190 187 int err; 191 188 192 - if (!vlan || !vlan->tinfo.tunnel_id) 189 + if (!vlan) 193 190 return 0; 194 191 195 - if (unlikely(!skb_vlan_tag_present(skb))) 192 + tunnel_id = READ_ONCE(vlan->tinfo.tunnel_id); 193 + if (!tunnel_id || unlikely(!skb_vlan_tag_present(skb))) 196 194 return 0; 197 195 198 196 skb_dst_drop(skb); ··· 203 195 if (err) 204 196 return err; 205 197 206 - skb_dst_set(skb, dst_clone(&vlan->tinfo.tunnel_dst->dst)); 198 + tunnel_dst = rcu_dereference(vlan->tinfo.tunnel_dst); 199 + if (tunnel_dst && dst_hold_safe(&tunnel_dst->dst)) 200 + skb_dst_set(skb, &tunnel_dst->dst); 207 201 208 202 return 0; 209 203 }

+49 -13

net/can/bcm.c

··· 125 125 struct sock sk; 126 126 int bound; 127 127 int ifindex; 128 - struct notifier_block notifier; 128 + struct list_head notifier; 129 129 struct list_head rx_ops; 130 130 struct list_head tx_ops; 131 131 unsigned long dropped_usr_msgs; 132 132 struct proc_dir_entry *bcm_proc_read; 133 133 char procname [32]; /* inode number in decimal with \0 */ 134 134 }; 135 + 136 + static LIST_HEAD(bcm_notifier_list); 137 + static DEFINE_SPINLOCK(bcm_notifier_lock); 138 + static struct bcm_sock *bcm_busy_notifier; 135 139 136 140 static inline struct bcm_sock *bcm_sk(const struct sock *sk) 137 141 { ··· 406 402 if (!op->count && (op->flags & TX_COUNTEVT)) { 407 403 408 404 /* create notification to user */ 405 + memset(&msg_head, 0, sizeof(msg_head)); 409 406 msg_head.opcode = TX_EXPIRED; 410 407 msg_head.flags = op->flags; 411 408 msg_head.count = op->count; ··· 444 439 /* this element is not throttled anymore */ 445 440 data->flags &= (BCM_CAN_FLAGS_MASK|RX_RECV); 446 441 442 + memset(&head, 0, sizeof(head)); 447 443 head.opcode = RX_CHANGED; 448 444 head.flags = op->flags; 449 445 head.count = op->count; ··· 566 560 } 567 561 568 562 /* create notification to user */ 563 + memset(&msg_head, 0, sizeof(msg_head)); 569 564 msg_head.opcode = RX_TIMEOUT; 570 565 msg_head.flags = op->flags; 571 566 msg_head.count = op->count; ··· 1385 1378 /* 1386 1379 * notification handler for netdevice status changes 1387 1380 */ 1388 - static int bcm_notifier(struct notifier_block *nb, unsigned long msg, 1389 - void *ptr) 1381 + static void bcm_notify(struct bcm_sock *bo, unsigned long msg, 1382 + struct net_device *dev) 1390 1383 { 1391 - struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1392 - struct bcm_sock *bo = container_of(nb, struct bcm_sock, notifier); 1393 1384 struct sock *sk = &bo->sk; 1394 1385 struct bcm_op *op; 1395 1386 int notify_enodev = 0; 1396 1387 1397 1388 if (!net_eq(dev_net(dev), sock_net(sk))) 1398 - return NOTIFY_DONE; 1399 - 1400 - if (dev->type != ARPHRD_CAN) 1401 - return NOTIFY_DONE; 1389 + return; 1402 1390 1403 1391 switch (msg) { 1404 1392 ··· 1428 1426 sk->sk_error_report(sk); 1429 1427 } 1430 1428 } 1429 + } 1431 1430 1431 + static int bcm_notifier(struct notifier_block *nb, unsigned long msg, 1432 + void *ptr) 1433 + { 1434 + struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1435 + 1436 + if (dev->type != ARPHRD_CAN) 1437 + return NOTIFY_DONE; 1438 + if (msg != NETDEV_UNREGISTER && msg != NETDEV_DOWN) 1439 + return NOTIFY_DONE; 1440 + if (unlikely(bcm_busy_notifier)) /* Check for reentrant bug. */ 1441 + return NOTIFY_DONE; 1442 + 1443 + spin_lock(&bcm_notifier_lock); 1444 + list_for_each_entry(bcm_busy_notifier, &bcm_notifier_list, notifier) { 1445 + spin_unlock(&bcm_notifier_lock); 1446 + bcm_notify(bcm_busy_notifier, msg, dev); 1447 + spin_lock(&bcm_notifier_lock); 1448 + } 1449 + bcm_busy_notifier = NULL; 1450 + spin_unlock(&bcm_notifier_lock); 1432 1451 return NOTIFY_DONE; 1433 1452 } 1434 1453 ··· 1469 1446 INIT_LIST_HEAD(&bo->rx_ops); 1470 1447 1471 1448 /* set notifier */ 1472 - bo->notifier.notifier_call = bcm_notifier; 1473 - 1474 - register_netdevice_notifier(&bo->notifier); 1449 + spin_lock(&bcm_notifier_lock); 1450 + list_add_tail(&bo->notifier, &bcm_notifier_list); 1451 + spin_unlock(&bcm_notifier_lock); 1475 1452 1476 1453 return 0; 1477 1454 } ··· 1494 1471 1495 1472 /* remove bcm_ops, timer, rx_unregister(), etc. */ 1496 1473 1497 - unregister_netdevice_notifier(&bo->notifier); 1474 + spin_lock(&bcm_notifier_lock); 1475 + while (bcm_busy_notifier == bo) { 1476 + spin_unlock(&bcm_notifier_lock); 1477 + schedule_timeout_uninterruptible(1); 1478 + spin_lock(&bcm_notifier_lock); 1479 + } 1480 + list_del(&bo->notifier); 1481 + spin_unlock(&bcm_notifier_lock); 1498 1482 1499 1483 lock_sock(sk); 1500 1484 ··· 1722 1692 .exit = canbcm_pernet_exit, 1723 1693 }; 1724 1694 1695 + static struct notifier_block canbcm_notifier = { 1696 + .notifier_call = bcm_notifier 1697 + }; 1698 + 1725 1699 static int __init bcm_module_init(void) 1726 1700 { 1727 1701 int err; ··· 1739 1705 } 1740 1706 1741 1707 register_pernet_subsys(&canbcm_pernet_ops); 1708 + register_netdevice_notifier(&canbcm_notifier); 1742 1709 return 0; 1743 1710 } 1744 1711 1745 1712 static void __exit bcm_module_exit(void) 1746 1713 { 1747 1714 can_proto_unregister(&bcm_can_proto); 1715 + unregister_netdevice_notifier(&canbcm_notifier); 1748 1716 unregister_pernet_subsys(&canbcm_pernet_ops); 1749 1717 } 1750 1718

+48 -13

net/can/isotp.c

··· 143 143 u32 force_tx_stmin; 144 144 u32 force_rx_stmin; 145 145 struct tpcon rx, tx; 146 - struct notifier_block notifier; 146 + struct list_head notifier; 147 147 wait_queue_head_t wait; 148 148 }; 149 + 150 + static LIST_HEAD(isotp_notifier_list); 151 + static DEFINE_SPINLOCK(isotp_notifier_lock); 152 + static struct isotp_sock *isotp_busy_notifier; 149 153 150 154 static inline struct isotp_sock *isotp_sk(const struct sock *sk) 151 155 { ··· 1017 1013 /* wait for complete transmission of current pdu */ 1018 1014 wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE); 1019 1015 1020 - unregister_netdevice_notifier(&so->notifier); 1016 + spin_lock(&isotp_notifier_lock); 1017 + while (isotp_busy_notifier == so) { 1018 + spin_unlock(&isotp_notifier_lock); 1019 + schedule_timeout_uninterruptible(1); 1020 + spin_lock(&isotp_notifier_lock); 1021 + } 1022 + list_del(&so->notifier); 1023 + spin_unlock(&isotp_notifier_lock); 1021 1024 1022 1025 lock_sock(sk); 1023 1026 ··· 1328 1317 return 0; 1329 1318 } 1330 1319 1331 - static int isotp_notifier(struct notifier_block *nb, unsigned long msg, 1332 - void *ptr) 1320 + static void isotp_notify(struct isotp_sock *so, unsigned long msg, 1321 + struct net_device *dev) 1333 1322 { 1334 - struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1335 - struct isotp_sock *so = container_of(nb, struct isotp_sock, notifier); 1336 1323 struct sock *sk = &so->sk; 1337 1324 1338 1325 if (!net_eq(dev_net(dev), sock_net(sk))) 1339 - return NOTIFY_DONE; 1340 - 1341 - if (dev->type != ARPHRD_CAN) 1342 - return NOTIFY_DONE; 1326 + return; 1343 1327 1344 1328 if (so->ifindex != dev->ifindex) 1345 - return NOTIFY_DONE; 1329 + return; 1346 1330 1347 1331 switch (msg) { 1348 1332 case NETDEV_UNREGISTER: ··· 1363 1357 sk->sk_error_report(sk); 1364 1358 break; 1365 1359 } 1360 + } 1366 1361 1362 + static int isotp_notifier(struct notifier_block *nb, unsigned long msg, 1363 + void *ptr) 1364 + { 1365 + struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1366 + 1367 + if (dev->type != ARPHRD_CAN) 1368 + return NOTIFY_DONE; 1369 + if (msg != NETDEV_UNREGISTER && msg != NETDEV_DOWN) 1370 + return NOTIFY_DONE; 1371 + if (unlikely(isotp_busy_notifier)) /* Check for reentrant bug. */ 1372 + return NOTIFY_DONE; 1373 + 1374 + spin_lock(&isotp_notifier_lock); 1375 + list_for_each_entry(isotp_busy_notifier, &isotp_notifier_list, notifier) { 1376 + spin_unlock(&isotp_notifier_lock); 1377 + isotp_notify(isotp_busy_notifier, msg, dev); 1378 + spin_lock(&isotp_notifier_lock); 1379 + } 1380 + isotp_busy_notifier = NULL; 1381 + spin_unlock(&isotp_notifier_lock); 1367 1382 return NOTIFY_DONE; 1368 1383 } 1369 1384 ··· 1421 1394 1422 1395 init_waitqueue_head(&so->wait); 1423 1396 1424 - so->notifier.notifier_call = isotp_notifier; 1425 - register_netdevice_notifier(&so->notifier); 1397 + spin_lock(&isotp_notifier_lock); 1398 + list_add_tail(&so->notifier, &isotp_notifier_list); 1399 + spin_unlock(&isotp_notifier_lock); 1426 1400 1427 1401 return 0; 1428 1402 } ··· 1470 1442 .prot = &isotp_proto, 1471 1443 }; 1472 1444 1445 + static struct notifier_block canisotp_notifier = { 1446 + .notifier_call = isotp_notifier 1447 + }; 1448 + 1473 1449 static __init int isotp_module_init(void) 1474 1450 { 1475 1451 int err; ··· 1483 1451 err = can_proto_register(&isotp_can_proto); 1484 1452 if (err < 0) 1485 1453 pr_err("can: registration of isotp protocol failed\n"); 1454 + else 1455 + register_netdevice_notifier(&canisotp_notifier); 1486 1456 1487 1457 return err; 1488 1458 } ··· 1492 1458 static __exit void isotp_module_exit(void) 1493 1459 { 1494 1460 can_proto_unregister(&isotp_can_proto); 1461 + unregister_netdevice_notifier(&canisotp_notifier); 1495 1462 } 1496 1463 1497 1464 module_init(isotp_module_init);

+40 -14

net/can/j1939/transport.c

··· 330 330 331 331 if ((do_skcb->offset + do_skb->len) < offset_start) { 332 332 __skb_unlink(do_skb, &session->skb_queue); 333 + /* drop ref taken in j1939_session_skb_queue() */ 334 + skb_unref(do_skb); 335 + 333 336 kfree_skb(do_skb); 334 337 } 335 338 spin_unlock_irqrestore(&session->skb_queue.lock, flags); ··· 352 349 353 350 skcb->flags |= J1939_ECU_LOCAL_SRC; 354 351 352 + skb_get(skb); 355 353 skb_queue_tail(&session->skb_queue, skb); 356 354 } 357 355 358 356 static struct 359 - sk_buff *j1939_session_skb_find_by_offset(struct j1939_session *session, 360 - unsigned int offset_start) 357 + sk_buff *j1939_session_skb_get_by_offset(struct j1939_session *session, 358 + unsigned int offset_start) 361 359 { 362 360 struct j1939_priv *priv = session->priv; 363 361 struct j1939_sk_buff_cb *do_skcb; ··· 375 371 skb = do_skb; 376 372 } 377 373 } 374 + 375 + if (skb) 376 + skb_get(skb); 377 + 378 378 spin_unlock_irqrestore(&session->skb_queue.lock, flags); 379 379 380 380 if (!skb) ··· 389 381 return skb; 390 382 } 391 383 392 - static struct sk_buff *j1939_session_skb_find(struct j1939_session *session) 384 + static struct sk_buff *j1939_session_skb_get(struct j1939_session *session) 393 385 { 394 386 unsigned int offset_start; 395 387 396 388 offset_start = session->pkt.dpo * 7; 397 - return j1939_session_skb_find_by_offset(session, offset_start); 389 + return j1939_session_skb_get_by_offset(session, offset_start); 398 390 } 399 391 400 392 /* see if we are receiver ··· 784 776 int ret = 0; 785 777 u8 dat[8]; 786 778 787 - se_skb = j1939_session_skb_find_by_offset(session, session->pkt.tx * 7); 779 + se_skb = j1939_session_skb_get_by_offset(session, session->pkt.tx * 7); 788 780 if (!se_skb) 789 781 return -ENOBUFS; 790 782 ··· 809 801 netdev_err_once(priv->ndev, 810 802 "%s: 0x%p: requested data outside of queued buffer: offset %i, len %i, pkt.tx: %i\n", 811 803 __func__, session, skcb->offset, se_skb->len , session->pkt.tx); 812 - return -EOVERFLOW; 804 + ret = -EOVERFLOW; 805 + goto out_free; 813 806 } 814 807 815 808 if (!len) { ··· 843 834 844 835 if (pkt_done) 845 836 j1939_tp_set_rxtimeout(session, 250); 837 + 838 + out_free: 839 + if (ret) 840 + kfree_skb(se_skb); 841 + else 842 + consume_skb(se_skb); 846 843 847 844 return ret; 848 845 } ··· 1022 1007 static int j1939_simple_txnext(struct j1939_session *session) 1023 1008 { 1024 1009 struct j1939_priv *priv = session->priv; 1025 - struct sk_buff *se_skb = j1939_session_skb_find(session); 1010 + struct sk_buff *se_skb = j1939_session_skb_get(session); 1026 1011 struct sk_buff *skb; 1027 1012 int ret; 1028 1013 ··· 1030 1015 return 0; 1031 1016 1032 1017 skb = skb_clone(se_skb, GFP_ATOMIC); 1033 - if (!skb) 1034 - return -ENOMEM; 1018 + if (!skb) { 1019 + ret = -ENOMEM; 1020 + goto out_free; 1021 + } 1035 1022 1036 1023 can_skb_set_owner(skb, se_skb->sk); 1037 1024 ··· 1041 1024 1042 1025 ret = j1939_send_one(priv, skb); 1043 1026 if (ret) 1044 - return ret; 1027 + goto out_free; 1045 1028 1046 1029 j1939_sk_errqueue(session, J1939_ERRQUEUE_SCHED); 1047 1030 j1939_sk_queue_activate_next(session); 1048 1031 1049 - return 0; 1032 + out_free: 1033 + if (ret) 1034 + kfree_skb(se_skb); 1035 + else 1036 + consume_skb(se_skb); 1037 + 1038 + return ret; 1050 1039 } 1051 1040 1052 1041 static bool j1939_session_deactivate_locked(struct j1939_session *session) ··· 1193 1170 struct sk_buff *skb; 1194 1171 1195 1172 if (!session->transmission) { 1196 - skb = j1939_session_skb_find(session); 1173 + skb = j1939_session_skb_get(session); 1197 1174 /* distribute among j1939 receivers */ 1198 1175 j1939_sk_recv(session->priv, skb); 1176 + consume_skb(skb); 1199 1177 } 1200 1178 1201 1179 j1939_session_deactivate_activate_next(session); ··· 1768 1744 { 1769 1745 struct j1939_priv *priv = session->priv; 1770 1746 struct j1939_sk_buff_cb *skcb; 1771 - struct sk_buff *se_skb; 1747 + struct sk_buff *se_skb = NULL; 1772 1748 const u8 *dat; 1773 1749 u8 *tpdat; 1774 1750 int offset; ··· 1810 1786 goto out_session_cancel; 1811 1787 } 1812 1788 1813 - se_skb = j1939_session_skb_find_by_offset(session, packet * 7); 1789 + se_skb = j1939_session_skb_get_by_offset(session, packet * 7); 1814 1790 if (!se_skb) { 1815 1791 netdev_warn(priv->ndev, "%s: 0x%p: no skb found\n", __func__, 1816 1792 session); ··· 1872 1848 j1939_tp_set_rxtimeout(session, 250); 1873 1849 } 1874 1850 session->last_cmd = 0xff; 1851 + consume_skb(se_skb); 1875 1852 j1939_session_put(session); 1876 1853 1877 1854 return; 1878 1855 1879 1856 out_session_cancel: 1857 + kfree_skb(se_skb); 1880 1858 j1939_session_timers_cancel(session); 1881 1859 j1939_session_cancel(session, J1939_XTP_ABORT_FAULT); 1882 1860 j1939_session_put(session);

+48 -14

net/can/raw.c

··· 83 83 struct sock sk; 84 84 int bound; 85 85 int ifindex; 86 - struct notifier_block notifier; 86 + struct list_head notifier; 87 87 int loopback; 88 88 int recv_own_msgs; 89 89 int fd_frames; ··· 94 94 can_err_mask_t err_mask; 95 95 struct uniqframe __percpu *uniq; 96 96 }; 97 + 98 + static LIST_HEAD(raw_notifier_list); 99 + static DEFINE_SPINLOCK(raw_notifier_lock); 100 + static struct raw_sock *raw_busy_notifier; 97 101 98 102 /* Return pointer to store the extra msg flags for raw_recvmsg(). 99 103 * We use the space of one unsigned int beyond the 'struct sockaddr_can' ··· 267 263 return err; 268 264 } 269 265 270 - static int raw_notifier(struct notifier_block *nb, 271 - unsigned long msg, void *ptr) 266 + static void raw_notify(struct raw_sock *ro, unsigned long msg, 267 + struct net_device *dev) 272 268 { 273 - struct net_device *dev = netdev_notifier_info_to_dev(ptr); 274 - struct raw_sock *ro = container_of(nb, struct raw_sock, notifier); 275 269 struct sock *sk = &ro->sk; 276 270 277 271 if (!net_eq(dev_net(dev), sock_net(sk))) 278 - return NOTIFY_DONE; 279 - 280 - if (dev->type != ARPHRD_CAN) 281 - return NOTIFY_DONE; 272 + return; 282 273 283 274 if (ro->ifindex != dev->ifindex) 284 - return NOTIFY_DONE; 275 + return; 285 276 286 277 switch (msg) { 287 278 case NETDEV_UNREGISTER: ··· 304 305 sk->sk_error_report(sk); 305 306 break; 306 307 } 308 + } 307 309 310 + static int raw_notifier(struct notifier_block *nb, unsigned long msg, 311 + void *ptr) 312 + { 313 + struct net_device *dev = netdev_notifier_info_to_dev(ptr); 314 + 315 + if (dev->type != ARPHRD_CAN) 316 + return NOTIFY_DONE; 317 + if (msg != NETDEV_UNREGISTER && msg != NETDEV_DOWN) 318 + return NOTIFY_DONE; 319 + if (unlikely(raw_busy_notifier)) /* Check for reentrant bug. */ 320 + return NOTIFY_DONE; 321 + 322 + spin_lock(&raw_notifier_lock); 323 + list_for_each_entry(raw_busy_notifier, &raw_notifier_list, notifier) { 324 + spin_unlock(&raw_notifier_lock); 325 + raw_notify(raw_busy_notifier, msg, dev); 326 + spin_lock(&raw_notifier_lock); 327 + } 328 + raw_busy_notifier = NULL; 329 + spin_unlock(&raw_notifier_lock); 308 330 return NOTIFY_DONE; 309 331 } 310 332 ··· 354 334 return -ENOMEM; 355 335 356 336 /* set notifier */ 357 - ro->notifier.notifier_call = raw_notifier; 358 - 359 - register_netdevice_notifier(&ro->notifier); 337 + spin_lock(&raw_notifier_lock); 338 + list_add_tail(&ro->notifier, &raw_notifier_list); 339 + spin_unlock(&raw_notifier_lock); 360 340 361 341 return 0; 362 342 } ··· 371 351 372 352 ro = raw_sk(sk); 373 353 374 - unregister_netdevice_notifier(&ro->notifier); 354 + spin_lock(&raw_notifier_lock); 355 + while (raw_busy_notifier == ro) { 356 + spin_unlock(&raw_notifier_lock); 357 + schedule_timeout_uninterruptible(1); 358 + spin_lock(&raw_notifier_lock); 359 + } 360 + list_del(&ro->notifier); 361 + spin_unlock(&raw_notifier_lock); 375 362 376 363 lock_sock(sk); 377 364 ··· 916 889 .prot = &raw_proto, 917 890 }; 918 891 892 + static struct notifier_block canraw_notifier = { 893 + .notifier_call = raw_notifier 894 + }; 895 + 919 896 static __init int raw_module_init(void) 920 897 { 921 898 int err; ··· 929 898 err = can_proto_register(&raw_can_proto); 930 899 if (err < 0) 931 900 pr_err("can: registration of raw protocol failed\n"); 901 + else 902 + register_netdevice_notifier(&canraw_notifier); 932 903 933 904 return err; 934 905 } ··· 938 905 static __exit void raw_module_exit(void) 939 906 { 940 907 can_proto_unregister(&raw_can_proto); 908 + unregister_netdevice_notifier(&canraw_notifier); 941 909 } 942 910 943 911 module_init(raw_module_init);

+1

net/core/neighbour.c

··· 238 238 239 239 write_lock(&n->lock); 240 240 if ((n->nud_state == NUD_FAILED) || 241 + (n->nud_state == NUD_NOARP) || 241 242 (tbl->is_multicast && 242 243 tbl->is_multicast(n->primary_key)) || 243 244 time_after(tref, n->updated))

+13 -7

net/core/net_namespace.c

··· 641 641 } 642 642 EXPORT_SYMBOL_GPL(__put_net); 643 643 644 + /** 645 + * get_net_ns - increment the refcount of the network namespace 646 + * @ns: common namespace (net) 647 + * 648 + * Returns the net's common namespace. 649 + */ 650 + struct ns_common *get_net_ns(struct ns_common *ns) 651 + { 652 + return &get_net(container_of(ns, struct net, ns))->ns; 653 + } 654 + EXPORT_SYMBOL_GPL(get_net_ns); 655 + 644 656 struct net *get_net_ns_by_fd(int fd) 645 657 { 646 658 struct file *file; ··· 672 660 fput(file); 673 661 return net; 674 662 } 675 - 676 - #else 677 - struct net *get_net_ns_by_fd(int fd) 678 - { 679 - return ERR_PTR(-EINVAL); 680 - } 681 - #endif 682 663 EXPORT_SYMBOL_GPL(get_net_ns_by_fd); 664 + #endif 683 665 684 666 struct net *get_net_ns_by_pid(pid_t pid) 685 667 {

+5 -3

net/core/rtnetlink.c

··· 4842 4842 if (err < 0) 4843 4843 goto errout; 4844 4844 4845 - if (!skb->len) { 4846 - err = -EINVAL; 4845 + /* Notification info is only filled for bridge ports, not the bridge 4846 + * device itself. Therefore, a zero notification length is valid and 4847 + * should not result in an error. 4848 + */ 4849 + if (!skb->len) 4847 4850 goto errout; 4848 - } 4849 4851 4850 4852 rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); 4851 4853 return 0;

+3 -1

net/core/skbuff.c

··· 1253 1253 struct sock *sk = skb->sk; 1254 1254 struct sk_buff_head *q; 1255 1255 unsigned long flags; 1256 + bool is_zerocopy; 1256 1257 u32 lo, hi; 1257 1258 u16 len; 1258 1259 ··· 1268 1267 len = uarg->len; 1269 1268 lo = uarg->id; 1270 1269 hi = uarg->id + len - 1; 1270 + is_zerocopy = uarg->zerocopy; 1271 1271 1272 1272 serr = SKB_EXT_ERR(skb); 1273 1273 memset(serr, 0, sizeof(*serr)); ··· 1276 1274 serr->ee.ee_origin = SO_EE_ORIGIN_ZEROCOPY; 1277 1275 serr->ee.ee_data = hi; 1278 1276 serr->ee.ee_info = lo; 1279 - if (!uarg->zerocopy) 1277 + if (!is_zerocopy) 1280 1278 serr->ee.ee_code |= SO_EE_CODE_ZEROCOPY_COPIED; 1281 1279 1282 1280 q = &sk->sk_error_queue;

+1 -1

net/ethtool/eeprom.c

··· 95 95 if (dev->sfp_bus) 96 96 return sfp_get_module_eeprom_by_page(dev->sfp_bus, page_data, extack); 97 97 98 - if (ops->get_module_info) 98 + if (ops->get_module_eeprom_by_page) 99 99 return ops->get_module_eeprom_by_page(dev, page_data, extack); 100 100 101 101 return -EOPNOTSUPP;

+5 -5

net/ethtool/ioctl.c

··· 1421 1421 if (eeprom.offset + eeprom.len > total_len) 1422 1422 return -EINVAL; 1423 1423 1424 - data = kmalloc(PAGE_SIZE, GFP_USER); 1424 + data = kzalloc(PAGE_SIZE, GFP_USER); 1425 1425 if (!data) 1426 1426 return -ENOMEM; 1427 1427 ··· 1486 1486 if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev)) 1487 1487 return -EINVAL; 1488 1488 1489 - data = kmalloc(PAGE_SIZE, GFP_USER); 1489 + data = kzalloc(PAGE_SIZE, GFP_USER); 1490 1490 if (!data) 1491 1491 return -ENOMEM; 1492 1492 ··· 1765 1765 return -EFAULT; 1766 1766 1767 1767 test.len = test_len; 1768 - data = kmalloc_array(test_len, sizeof(u64), GFP_USER); 1768 + data = kcalloc(test_len, sizeof(u64), GFP_USER); 1769 1769 if (!data) 1770 1770 return -ENOMEM; 1771 1771 ··· 2293 2293 ret = ethtool_tunable_valid(&tuna); 2294 2294 if (ret) 2295 2295 return ret; 2296 - data = kmalloc(tuna.len, GFP_USER); 2296 + data = kzalloc(tuna.len, GFP_USER); 2297 2297 if (!data) 2298 2298 return -ENOMEM; 2299 2299 ret = ops->get_tunable(dev, &tuna, data); ··· 2485 2485 ret = ethtool_phy_tunable_valid(&tuna); 2486 2486 if (ret) 2487 2487 return ret; 2488 - data = kmalloc(tuna.len, GFP_USER); 2488 + data = kzalloc(tuna.len, GFP_USER); 2489 2489 if (!data) 2490 2490 return -ENOMEM; 2491 2491 if (phy_drv_tunable) {

+2

net/ethtool/strset.c

··· 353 353 int len = 0; 354 354 int ret; 355 355 356 + len += nla_total_size(0); /* ETHTOOL_A_STRSET_STRINGSETS */ 357 + 356 358 for (i = 0; i < ETH_SS_COUNT; i++) { 357 359 const struct strset_info *set_info = &data->sets[i]; 358 360

+2 -2

net/ipv4/af_inet.c

··· 575 575 return err; 576 576 } 577 577 578 - if (!inet_sk(sk)->inet_num && inet_autobind(sk)) 578 + if (data_race(!inet_sk(sk)->inet_num) && inet_autobind(sk)) 579 579 return -EAGAIN; 580 580 return sk->sk_prot->connect(sk, uaddr, addr_len); 581 581 } ··· 803 803 sock_rps_record_flow(sk); 804 804 805 805 /* We may need to bind the socket. */ 806 - if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind && 806 + if (data_race(!inet_sk(sk)->inet_num) && !sk->sk_prot->no_autobind && 807 807 inet_autobind(sk)) 808 808 return -EAGAIN; 809 809

+1

net/ipv4/cipso_ipv4.c

··· 472 472 kfree(doi_def->map.std->lvl.local); 473 473 kfree(doi_def->map.std->cat.cipso); 474 474 kfree(doi_def->map.std->cat.local); 475 + kfree(doi_def->map.std); 475 476 break; 476 477 } 477 478 kfree(doi_def);

+1 -1

net/ipv4/devinet.c

··· 1989 1989 return -EAFNOSUPPORT; 1990 1990 1991 1991 if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0) 1992 - BUG(); 1992 + return -EINVAL; 1993 1993 1994 1994 if (tb[IFLA_INET_CONF]) { 1995 1995 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)

+7

net/ipv4/icmp.c

··· 759 759 icmp_param.data_len = room; 760 760 icmp_param.head_len = sizeof(struct icmphdr); 761 761 762 + /* if we don't have a source address at this point, fall back to the 763 + * dummy address instead of sending out a packet with a source address 764 + * of 0.0.0.0 765 + */ 766 + if (!fl4.saddr) 767 + fl4.saddr = htonl(INADDR_DUMMY); 768 + 762 769 icmp_push_reply(&icmp_param, &fl4, &ipc, &rt); 763 770 ende: 764 771 ip_rt_put(rt);

+1

net/ipv4/igmp.c

··· 1801 1801 while ((i = rtnl_dereference(in_dev->mc_list)) != NULL) { 1802 1802 in_dev->mc_list = i->next_rcu; 1803 1803 in_dev->mc_count--; 1804 + ip_mc_clear_src(i); 1804 1805 ip_ma_put(i); 1805 1806 } 1806 1807 }

+7 -5

net/ipv4/ping.c

··· 954 954 struct sock *sk; 955 955 struct net *net = dev_net(skb->dev); 956 956 struct icmphdr *icmph = icmp_hdr(skb); 957 + bool rc = false; 957 958 958 959 /* We assume the packet has already been checked by icmp_rcv */ 959 960 ··· 969 968 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 970 969 971 970 pr_debug("rcv on socket %p\n", sk); 972 - if (skb2) 973 - ping_queue_rcv_skb(sk, skb2); 971 + if (skb2 && !ping_queue_rcv_skb(sk, skb2)) 972 + rc = true; 974 973 sock_put(sk); 975 - return true; 976 974 } 977 - pr_debug("no socket, dropping\n"); 978 975 979 - return false; 976 + if (!rc) 977 + pr_debug("no socket, dropping\n"); 978 + 979 + return rc; 980 980 } 981 981 EXPORT_SYMBOL_GPL(ping_rcv); 982 982

+14 -1

net/ipv4/route.c

··· 2056 2056 return err; 2057 2057 } 2058 2058 2059 + /* get device for dst_alloc with local routes */ 2060 + static struct net_device *ip_rt_get_dev(struct net *net, 2061 + const struct fib_result *res) 2062 + { 2063 + struct fib_nh_common *nhc = res->fi ? res->nhc : NULL; 2064 + struct net_device *dev = NULL; 2065 + 2066 + if (nhc) 2067 + dev = l3mdev_master_dev_rcu(nhc->nhc_dev); 2068 + 2069 + return dev ? : net->loopback_dev; 2070 + } 2071 + 2059 2072 /* 2060 2073 * NOTE. We drop all the packets that has local source 2061 2074 * addresses, because every properly looped back packet ··· 2225 2212 } 2226 2213 } 2227 2214 2228 - rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev, 2215 + rth = rt_dst_alloc(ip_rt_get_dev(net, res), 2229 2216 flags | RTCF_LOCAL, res->type, 2230 2217 IN_DEV_ORCONF(in_dev, NOPOLICY), false); 2231 2218 if (!rth)

+10

net/ipv4/udp.c

··· 2607 2607 { 2608 2608 struct udp_sock *up = udp_sk(sk); 2609 2609 bool slow = lock_sock_fast(sk); 2610 + 2611 + /* protects from races with udp_abort() */ 2612 + sock_set_flag(sk, SOCK_DEAD); 2610 2613 udp_flush_pending_frames(sk); 2611 2614 unlock_sock_fast(sk, slow); 2612 2615 if (static_branch_unlikely(&udp_encap_needed_key)) { ··· 2860 2857 { 2861 2858 lock_sock(sk); 2862 2859 2860 + /* udp{v6}_destroy_sock() sets it under the sk lock, avoid racing 2861 + * with close() 2862 + */ 2863 + if (sock_flag(sk, SOCK_DEAD)) 2864 + goto out; 2865 + 2863 2866 sk->sk_err = err; 2864 2867 sk->sk_error_report(sk); 2865 2868 __udp_disconnect(sk, 0); 2866 2869 2870 + out: 2867 2871 release_sock(sk); 2868 2872 2869 2873 return 0;

+1 -1

net/ipv6/addrconf.c

··· 5827 5827 return -EAFNOSUPPORT; 5828 5828 5829 5829 if (nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla, NULL, NULL) < 0) 5830 - BUG(); 5830 + return -EINVAL; 5831 5831 5832 5832 if (tb[IFLA_INET6_TOKEN]) { 5833 5833 err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN]),

+18 -4

net/ipv6/netfilter/nft_fib_ipv6.c

··· 135 135 } 136 136 EXPORT_SYMBOL_GPL(nft_fib6_eval_type); 137 137 138 + static bool nft_fib_v6_skip_icmpv6(const struct sk_buff *skb, u8 next, const struct ipv6hdr *iph) 139 + { 140 + if (likely(next != IPPROTO_ICMPV6)) 141 + return false; 142 + 143 + if (ipv6_addr_type(&iph->saddr) != IPV6_ADDR_ANY) 144 + return false; 145 + 146 + return ipv6_addr_type(&iph->daddr) & IPV6_ADDR_LINKLOCAL; 147 + } 148 + 138 149 void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, 139 150 const struct nft_pktinfo *pkt) 140 151 { ··· 174 163 175 164 lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif, iph); 176 165 177 - if (nft_hook(pkt) == NF_INET_PRE_ROUTING && 178 - nft_fib_is_loopback(pkt->skb, nft_in(pkt))) { 179 - nft_fib_store_result(dest, priv, nft_in(pkt)); 180 - return; 166 + if (nft_hook(pkt) == NF_INET_PRE_ROUTING || 167 + nft_hook(pkt) == NF_INET_INGRESS) { 168 + if (nft_fib_is_loopback(pkt->skb, nft_in(pkt)) || 169 + nft_fib_v6_skip_icmpv6(pkt->skb, pkt->tprot, iph)) { 170 + nft_fib_store_result(dest, priv, nft_in(pkt)); 171 + return; 172 + } 181 173 } 182 174 183 175 *dest = 0;

+3

net/ipv6/udp.c

··· 1598 1598 { 1599 1599 struct udp_sock *up = udp_sk(sk); 1600 1600 lock_sock(sk); 1601 + 1602 + /* protects from races with udp_abort() */ 1603 + sock_set_flag(sk, SOCK_DEAD); 1601 1604 udp_v6_flush_pending_frames(sk); 1602 1605 release_sock(sk); 1603 1606

-5

net/kcm/kcmsock.c

··· 1066 1066 goto partial_message; 1067 1067 } 1068 1068 1069 - if (skb_has_frag_list(head)) { 1070 - kfree_skb_list(skb_shinfo(head)->frag_list); 1071 - skb_shinfo(head)->frag_list = NULL; 1072 - } 1073 - 1074 1069 if (head != kcm->seq_skb) 1075 1070 kfree_skb(head); 1076 1071

+9 -2

net/mac80211/debugfs.c

··· 4 4 * 5 5 * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> 6 6 * Copyright 2013-2014 Intel Mobile Communications GmbH 7 - * Copyright (C) 2018 - 2019 Intel Corporation 7 + * Copyright (C) 2018 - 2019, 2021 Intel Corporation 8 8 */ 9 9 10 10 #include <linux/debugfs.h> ··· 387 387 size_t count, loff_t *ppos) 388 388 { 389 389 struct ieee80211_local *local = file->private_data; 390 + int ret; 390 391 391 392 rtnl_lock(); 393 + wiphy_lock(local->hw.wiphy); 392 394 __ieee80211_suspend(&local->hw, NULL); 393 - __ieee80211_resume(&local->hw); 395 + ret = __ieee80211_resume(&local->hw); 396 + wiphy_unlock(local->hw.wiphy); 397 + 398 + if (ret) 399 + cfg80211_shutdown_all_interfaces(local->hw.wiphy); 400 + 394 401 rtnl_unlock(); 395 402 396 403 return count;

+1 -1

net/mac80211/ieee80211_i.h

··· 1442 1442 rcu_read_lock(); 1443 1443 chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); 1444 1444 1445 - if (WARN_ON_ONCE(!chanctx_conf)) { 1445 + if (!chanctx_conf) { 1446 1446 rcu_read_unlock(); 1447 1447 return NULL; 1448 1448 }

+12 -7

net/mac80211/iface.c

··· 476 476 GFP_KERNEL); 477 477 } 478 478 479 - /* APs need special treatment */ 480 479 if (sdata->vif.type == NL80211_IFTYPE_AP) { 481 - struct ieee80211_sub_if_data *vlan, *tmpsdata; 482 - 483 - /* down all dependent devices, that is VLANs */ 484 - list_for_each_entry_safe(vlan, tmpsdata, &sdata->u.ap.vlans, 485 - u.vlan.list) 486 - dev_close(vlan->dev); 487 480 WARN_ON(!list_empty(&sdata->u.ap.vlans)); 488 481 } else if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { 489 482 /* remove all packets in parent bc_buf pointing to this dev */ ··· 633 640 static int ieee80211_stop(struct net_device *dev) 634 641 { 635 642 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 643 + 644 + /* close all dependent VLAN interfaces before locking wiphy */ 645 + if (sdata->vif.type == NL80211_IFTYPE_AP) { 646 + struct ieee80211_sub_if_data *vlan, *tmpsdata; 647 + 648 + list_for_each_entry_safe(vlan, tmpsdata, &sdata->u.ap.vlans, 649 + u.vlan.list) 650 + dev_close(vlan->dev); 651 + } 636 652 637 653 wiphy_lock(sdata->local->hw.wiphy); 638 654 ieee80211_do_stop(sdata, true); ··· 1593 1591 1594 1592 switch (sdata->vif.type) { 1595 1593 case NL80211_IFTYPE_AP: 1594 + if (!list_empty(&sdata->u.ap.vlans)) 1595 + return -EBUSY; 1596 + break; 1596 1597 case NL80211_IFTYPE_STATION: 1597 1598 case NL80211_IFTYPE_ADHOC: 1598 1599 case NL80211_IFTYPE_OCB:

+6 -1

net/mac80211/main.c

··· 252 252 struct ieee80211_local *local = 253 253 container_of(work, struct ieee80211_local, restart_work); 254 254 struct ieee80211_sub_if_data *sdata; 255 + int ret; 255 256 256 257 /* wait for scan work complete */ 257 258 flush_workqueue(local->workqueue); ··· 302 301 /* wait for all packet processing to be done */ 303 302 synchronize_net(); 304 303 305 - ieee80211_reconfig(local); 304 + ret = ieee80211_reconfig(local); 306 305 wiphy_unlock(local->hw.wiphy); 306 + 307 + if (ret) 308 + cfg80211_shutdown_all_interfaces(local->hw.wiphy); 309 + 307 310 rtnl_unlock(); 308 311 } 309 312

+8

net/mac80211/mlme.c

··· 4062 4062 if (elems.mbssid_config_ie) 4063 4063 bss_conf->profile_periodicity = 4064 4064 elems.mbssid_config_ie->profile_periodicity; 4065 + else 4066 + bss_conf->profile_periodicity = 0; 4065 4067 4066 4068 if (elems.ext_capab_len >= 11 && 4067 4069 (elems.ext_capab[10] & WLAN_EXT_CAPA11_EMA_SUPPORT)) 4068 4070 bss_conf->ema_ap = true; 4071 + else 4072 + bss_conf->ema_ap = false; 4069 4073 4070 4074 /* continue assoc process */ 4071 4075 ifmgd->assoc_data->timeout = jiffies; ··· 5806 5802 beacon_ies->data, beacon_ies->len); 5807 5803 if (elem && elem->datalen >= 3) 5808 5804 sdata->vif.bss_conf.profile_periodicity = elem->data[2]; 5805 + else 5806 + sdata->vif.bss_conf.profile_periodicity = 0; 5809 5807 5810 5808 elem = cfg80211_find_elem(WLAN_EID_EXT_CAPABILITY, 5811 5809 beacon_ies->data, beacon_ies->len); 5812 5810 if (elem && elem->datalen >= 11 && 5813 5811 (elem->data[10] & WLAN_EXT_CAPA11_EMA_SUPPORT)) 5814 5812 sdata->vif.bss_conf.ema_ap = true; 5813 + else 5814 + sdata->vif.bss_conf.ema_ap = false; 5815 5815 } else { 5816 5816 assoc_data->timeout = jiffies; 5817 5817 assoc_data->timeout_started = true;

+1 -1

net/mac80211/rc80211_minstrel_ht.c

··· 1514 1514 (info->control.flags & IEEE80211_TX_CTRL_PORT_CTRL_PROTO)) 1515 1515 return; 1516 1516 1517 - if (time_is_before_jiffies(mi->sample_time)) 1517 + if (time_is_after_jiffies(mi->sample_time)) 1518 1518 return; 1519 1519 1520 1520 mi->sample_time = jiffies + MINSTREL_SAMPLE_INTERVAL;

+3 -6

net/mac80211/rx.c

··· 2240 2240 sc = le16_to_cpu(hdr->seq_ctrl); 2241 2241 frag = sc & IEEE80211_SCTL_FRAG; 2242 2242 2243 - if (is_multicast_ether_addr(hdr->addr1)) { 2244 - I802_DEBUG_INC(rx->local->dot11MulticastReceivedFrameCount); 2245 - goto out_no_led; 2246 - } 2247 - 2248 2243 if (rx->sta) 2249 2244 cache = &rx->sta->frags; 2250 2245 2251 2246 if (likely(!ieee80211_has_morefrags(fc) && frag == 0)) 2252 2247 goto out; 2248 + 2249 + if (is_multicast_ether_addr(hdr->addr1)) 2250 + return RX_DROP_MONITOR; 2253 2251 2254 2252 I802_DEBUG_INC(rx->local->rx_handlers_fragments); 2255 2253 ··· 2374 2376 2375 2377 out: 2376 2378 ieee80211_led_rx(rx->local); 2377 - out_no_led: 2378 2379 if (rx->sta) 2379 2380 rx->sta->rx_stats.packets++; 2380 2381 return RX_CONTINUE;

+16 -5

net/mac80211/scan.c

··· 251 251 struct ieee80211_mgmt *mgmt = (void *)skb->data; 252 252 struct ieee80211_bss *bss; 253 253 struct ieee80211_channel *channel; 254 + size_t min_hdr_len = offsetof(struct ieee80211_mgmt, 255 + u.probe_resp.variable); 256 + 257 + if (!ieee80211_is_probe_resp(mgmt->frame_control) && 258 + !ieee80211_is_beacon(mgmt->frame_control) && 259 + !ieee80211_is_s1g_beacon(mgmt->frame_control)) 260 + return; 254 261 255 262 if (ieee80211_is_s1g_beacon(mgmt->frame_control)) { 256 - if (skb->len < 15) 257 - return; 258 - } else if (skb->len < 24 || 259 - (!ieee80211_is_probe_resp(mgmt->frame_control) && 260 - !ieee80211_is_beacon(mgmt->frame_control))) 263 + if (ieee80211_is_s1g_short_beacon(mgmt->frame_control)) 264 + min_hdr_len = offsetof(struct ieee80211_ext, 265 + u.s1g_short_beacon.variable); 266 + else 267 + min_hdr_len = offsetof(struct ieee80211_ext, 268 + u.s1g_beacon); 269 + } 270 + 271 + if (skb->len < min_hdr_len) 261 272 return; 262 273 263 274 sdata1 = rcu_dereference(local->scan_sdata);

+42 -22

net/mac80211/tx.c

··· 2014 2014 ieee80211_tx(sdata, sta, skb, false); 2015 2015 } 2016 2016 2017 - bool ieee80211_parse_tx_radiotap(struct sk_buff *skb, 2018 - struct net_device *dev) 2017 + static bool ieee80211_validate_radiotap_len(struct sk_buff *skb) 2019 2018 { 2020 - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); 2021 - struct ieee80211_radiotap_iterator iterator; 2022 2019 struct ieee80211_radiotap_header *rthdr = 2023 - (struct ieee80211_radiotap_header *) skb->data; 2024 - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); 2025 - struct ieee80211_supported_band *sband = 2026 - local->hw.wiphy->bands[info->band]; 2027 - int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len, 2028 - NULL); 2029 - u16 txflags; 2030 - u16 rate = 0; 2031 - bool rate_found = false; 2032 - u8 rate_retries = 0; 2033 - u16 rate_flags = 0; 2034 - u8 mcs_known, mcs_flags, mcs_bw; 2035 - u16 vht_known; 2036 - u8 vht_mcs = 0, vht_nss = 0; 2037 - int i; 2020 + (struct ieee80211_radiotap_header *)skb->data; 2038 2021 2039 2022 /* check for not even having the fixed radiotap header part */ 2040 2023 if (unlikely(skb->len < sizeof(struct ieee80211_radiotap_header))) ··· 2030 2047 /* does the skb contain enough to deliver on the alleged length? */ 2031 2048 if (unlikely(skb->len < ieee80211_get_radiotap_len(skb->data))) 2032 2049 return false; /* skb too short for claimed rt header extent */ 2050 + 2051 + return true; 2052 + } 2053 + 2054 + bool ieee80211_parse_tx_radiotap(struct sk_buff *skb, 2055 + struct net_device *dev) 2056 + { 2057 + struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); 2058 + struct ieee80211_radiotap_iterator iterator; 2059 + struct ieee80211_radiotap_header *rthdr = 2060 + (struct ieee80211_radiotap_header *) skb->data; 2061 + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); 2062 + int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len, 2063 + NULL); 2064 + u16 txflags; 2065 + u16 rate = 0; 2066 + bool rate_found = false; 2067 + u8 rate_retries = 0; 2068 + u16 rate_flags = 0; 2069 + u8 mcs_known, mcs_flags, mcs_bw; 2070 + u16 vht_known; 2071 + u8 vht_mcs = 0, vht_nss = 0; 2072 + int i; 2073 + 2074 + if (!ieee80211_validate_radiotap_len(skb)) 2075 + return false; 2033 2076 2034 2077 info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT | 2035 2078 IEEE80211_TX_CTL_DONTFRAG; ··· 2195 2186 return false; 2196 2187 2197 2188 if (rate_found) { 2189 + struct ieee80211_supported_band *sband = 2190 + local->hw.wiphy->bands[info->band]; 2191 + 2198 2192 info->control.flags |= IEEE80211_TX_CTRL_RATE_INJECT; 2199 2193 2200 2194 for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { ··· 2211 2199 } else if (rate_flags & IEEE80211_TX_RC_VHT_MCS) { 2212 2200 ieee80211_rate_set_vht(info->control.rates, vht_mcs, 2213 2201 vht_nss); 2214 - } else { 2202 + } else if (sband) { 2215 2203 for (i = 0; i < sband->n_bitrates; i++) { 2216 2204 if (rate * 5 != sband->bitrates[i].bitrate) 2217 2205 continue; ··· 2248 2236 info->flags = IEEE80211_TX_CTL_REQ_TX_STATUS | 2249 2237 IEEE80211_TX_CTL_INJECTED; 2250 2238 2251 - /* Sanity-check and process the injection radiotap header */ 2252 - if (!ieee80211_parse_tx_radiotap(skb, dev)) 2239 + /* Sanity-check the length of the radiotap header */ 2240 + if (!ieee80211_validate_radiotap_len(skb)) 2253 2241 goto fail; 2254 2242 2255 2243 /* we now know there is a radiotap header with a length we can use */ ··· 2362 2350 */ 2363 2351 ieee80211_select_queue_80211(sdata, skb, hdr); 2364 2352 skb_set_queue_mapping(skb, ieee80211_ac_from_tid(skb->priority)); 2353 + 2354 + /* 2355 + * Process the radiotap header. This will now take into account the 2356 + * selected chandef above to accurately set injection rates and 2357 + * retransmissions. 2358 + */ 2359 + if (!ieee80211_parse_tx_radiotap(skb, dev)) 2360 + goto fail_rcu; 2365 2361 2366 2362 /* remove the injection radiotap header */ 2367 2363 skb_pull(skb, len_rthdr);

+11 -13

net/mac80211/util.c

··· 947 947 948 948 switch (elem->data[0]) { 949 949 case WLAN_EID_EXT_HE_MU_EDCA: 950 - if (len == sizeof(*elems->mu_edca_param_set)) { 950 + if (len >= sizeof(*elems->mu_edca_param_set)) { 951 951 elems->mu_edca_param_set = data; 952 952 if (crc) 953 953 *crc = crc32_be(*crc, (void *)elem, ··· 968 968 } 969 969 break; 970 970 case WLAN_EID_EXT_UORA: 971 - if (len == 1) 971 + if (len >= 1) 972 972 elems->uora_element = data; 973 973 break; 974 974 case WLAN_EID_EXT_MAX_CHANNEL_SWITCH_TIME: ··· 976 976 elems->max_channel_switch_time = data; 977 977 break; 978 978 case WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION: 979 - if (len == sizeof(*elems->mbssid_config_ie)) 979 + if (len >= sizeof(*elems->mbssid_config_ie)) 980 980 elems->mbssid_config_ie = data; 981 981 break; 982 982 case WLAN_EID_EXT_HE_SPR: ··· 985 985 elems->he_spr = data; 986 986 break; 987 987 case WLAN_EID_EXT_HE_6GHZ_CAPA: 988 - if (len == sizeof(*elems->he_6ghz_capa)) 988 + if (len >= sizeof(*elems->he_6ghz_capa)) 989 989 elems->he_6ghz_capa = data; 990 990 break; 991 991 } ··· 1074 1074 1075 1075 switch (id) { 1076 1076 case WLAN_EID_LINK_ID: 1077 - if (elen + 2 != sizeof(struct ieee80211_tdls_lnkie)) { 1077 + if (elen + 2 < sizeof(struct ieee80211_tdls_lnkie)) { 1078 1078 elem_parse_failed = true; 1079 1079 break; 1080 1080 } 1081 1081 elems->lnk_id = (void *)(pos - 2); 1082 1082 break; 1083 1083 case WLAN_EID_CHAN_SWITCH_TIMING: 1084 - if (elen != sizeof(struct ieee80211_ch_switch_timing)) { 1084 + if (elen < sizeof(struct ieee80211_ch_switch_timing)) { 1085 1085 elem_parse_failed = true; 1086 1086 break; 1087 1087 } ··· 1244 1244 elems->sec_chan_offs = (void *)pos; 1245 1245 break; 1246 1246 case WLAN_EID_CHAN_SWITCH_PARAM: 1247 - if (elen != 1247 + if (elen < 1248 1248 sizeof(*elems->mesh_chansw_params_ie)) { 1249 1249 elem_parse_failed = true; 1250 1250 break; ··· 1253 1253 break; 1254 1254 case WLAN_EID_WIDE_BW_CHANNEL_SWITCH: 1255 1255 if (!action || 1256 - elen != sizeof(*elems->wide_bw_chansw_ie)) { 1256 + elen < sizeof(*elems->wide_bw_chansw_ie)) { 1257 1257 elem_parse_failed = true; 1258 1258 break; 1259 1259 } ··· 1272 1272 ie = cfg80211_find_ie(WLAN_EID_WIDE_BW_CHANNEL_SWITCH, 1273 1273 pos, elen); 1274 1274 if (ie) { 1275 - if (ie[1] == sizeof(*elems->wide_bw_chansw_ie)) 1275 + if (ie[1] >= sizeof(*elems->wide_bw_chansw_ie)) 1276 1276 elems->wide_bw_chansw_ie = 1277 1277 (void *)(ie + 2); 1278 1278 else ··· 1316 1316 elems->cisco_dtpc_elem = pos; 1317 1317 break; 1318 1318 case WLAN_EID_ADDBA_EXT: 1319 - if (elen != sizeof(struct ieee80211_addba_ext_ie)) { 1319 + if (elen < sizeof(struct ieee80211_addba_ext_ie)) { 1320 1320 elem_parse_failed = true; 1321 1321 break; 1322 1322 } ··· 1342 1342 elem, elems); 1343 1343 break; 1344 1344 case WLAN_EID_S1G_CAPABILITIES: 1345 - if (elen == sizeof(*elems->s1g_capab)) 1345 + if (elen >= sizeof(*elems->s1g_capab)) 1346 1346 elems->s1g_capab = (void *)pos; 1347 1347 else 1348 1348 elem_parse_failed = true; ··· 2178 2178 list_for_each_entry(ctx, &local->chanctx_list, list) 2179 2179 ctx->driver_present = false; 2180 2180 mutex_unlock(&local->chanctx_mtx); 2181 - 2182 - cfg80211_shutdown_all_interfaces(local->hw.wiphy); 2183 2181 } 2184 2182 2185 2183 static void ieee80211_assign_chanctx(struct ieee80211_local *local,

+2

net/mptcp/options.c

··· 356 356 length--; 357 357 continue; 358 358 default: 359 + if (length < 2) 360 + return; 359 361 opsize = *ptr++; 360 362 if (opsize < 2) /* "silly options" */ 361 363 return;

+27 -25

net/mptcp/protocol.c

··· 280 280 281 281 /* try to fetch required memory from subflow */ 282 282 if (!sk_rmem_schedule(sk, skb, skb->truesize)) { 283 - if (ssk->sk_forward_alloc < skb->truesize) 283 + int amount = sk_mem_pages(skb->truesize) << SK_MEM_QUANTUM_SHIFT; 284 + 285 + if (ssk->sk_forward_alloc < amount) 284 286 goto drop; 285 - __sk_mem_reclaim(ssk, skb->truesize); 286 - if (!sk_rmem_schedule(sk, skb, skb->truesize)) 287 - goto drop; 287 + 288 + ssk->sk_forward_alloc -= amount; 289 + sk->sk_forward_alloc += amount; 288 290 } 289 291 290 292 /* the skb map_seq accounts for the skb offset: ··· 670 668 /* In most cases we will be able to lock the mptcp socket. If its already 671 669 * owned, we need to defer to the work queue to avoid ABBA deadlock. 672 670 */ 673 - static void move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk) 671 + static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk) 674 672 { 675 673 struct sock *sk = (struct sock *)msk; 676 674 unsigned int moved = 0; 677 675 678 676 if (inet_sk_state_load(sk) == TCP_CLOSE) 679 - return; 680 - 681 - mptcp_data_lock(sk); 677 + return false; 682 678 683 679 __mptcp_move_skbs_from_subflow(msk, ssk, &moved); 684 680 __mptcp_ofo_queue(msk); 681 + if (unlikely(ssk->sk_err)) { 682 + if (!sock_owned_by_user(sk)) 683 + __mptcp_error_report(sk); 684 + else 685 + set_bit(MPTCP_ERROR_REPORT, &msk->flags); 686 + } 685 687 686 688 /* If the moves have caught up with the DATA_FIN sequence number 687 689 * it's time to ack the DATA_FIN and change socket state, but ··· 694 688 */ 695 689 if (mptcp_pending_data_fin(sk, NULL)) 696 690 mptcp_schedule_work(sk); 697 - mptcp_data_unlock(sk); 691 + return moved > 0; 698 692 } 699 693 700 694 void mptcp_data_ready(struct sock *sk, struct sock *ssk) ··· 702 696 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 703 697 struct mptcp_sock *msk = mptcp_sk(sk); 704 698 int sk_rbuf, ssk_rbuf; 705 - bool wake; 706 699 707 700 /* The peer can send data while we are shutting down this 708 701 * subflow at msk destruction time, but we must avoid enqueuing ··· 710 705 if (unlikely(subflow->disposable)) 711 706 return; 712 707 713 - /* move_skbs_to_msk below can legitly clear the data_avail flag, 714 - * but we will need later to properly woke the reader, cache its 715 - * value 716 - */ 717 - wake = subflow->data_avail == MPTCP_SUBFLOW_DATA_AVAIL; 718 - if (wake) 719 - set_bit(MPTCP_DATA_READY, &msk->flags); 720 - 721 708 ssk_rbuf = READ_ONCE(ssk->sk_rcvbuf); 722 709 sk_rbuf = READ_ONCE(sk->sk_rcvbuf); 723 710 if (unlikely(ssk_rbuf > sk_rbuf)) 724 711 sk_rbuf = ssk_rbuf; 725 712 726 - /* over limit? can't append more skbs to msk */ 713 + /* over limit? can't append more skbs to msk, Also, no need to wake-up*/ 727 714 if (atomic_read(&sk->sk_rmem_alloc) > sk_rbuf) 728 - goto wake; 715 + return; 729 716 730 - move_skbs_to_msk(msk, ssk); 731 - 732 - wake: 733 - if (wake) 717 + /* Wake-up the reader only for in-sequence data */ 718 + mptcp_data_lock(sk); 719 + if (move_skbs_to_msk(msk, ssk)) { 720 + set_bit(MPTCP_DATA_READY, &msk->flags); 734 721 sk->sk_data_ready(sk); 722 + } 723 + mptcp_data_unlock(sk); 735 724 } 736 725 737 726 static bool mptcp_do_flush_join_list(struct mptcp_sock *msk) ··· 857 858 sock_owned_by_me(sk); 858 859 859 860 mptcp_for_each_subflow(msk, subflow) { 860 - if (subflow->data_avail) 861 + if (READ_ONCE(subflow->data_avail)) 861 862 return mptcp_subflow_tcp_sock(subflow); 862 863 } 863 864 ··· 1954 1955 done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved); 1955 1956 mptcp_data_unlock(sk); 1956 1957 tcp_cleanup_rbuf(ssk, moved); 1958 + 1959 + if (unlikely(ssk->sk_err)) 1960 + __mptcp_error_report(sk); 1957 1961 unlock_sock_fast(ssk, slowpath); 1958 1962 } while (!done); 1959 1963

-1

net/mptcp/protocol.h

··· 362 362 enum mptcp_data_avail { 363 363 MPTCP_SUBFLOW_NODATA, 364 364 MPTCP_SUBFLOW_DATA_AVAIL, 365 - MPTCP_SUBFLOW_OOO_DATA 366 365 }; 367 366 368 367 struct mptcp_delegated_action {

+53 -55

net/mptcp/subflow.c

··· 784 784 return seq | ((old_seq + old_data_len + 1) & GENMASK_ULL(63, 32)); 785 785 } 786 786 787 - static void warn_bad_map(struct mptcp_subflow_context *subflow, u32 ssn) 787 + static void dbg_bad_map(struct mptcp_subflow_context *subflow, u32 ssn) 788 788 { 789 - WARN_ONCE(1, "Bad mapping: ssn=%d map_seq=%d map_data_len=%d", 790 - ssn, subflow->map_subflow_seq, subflow->map_data_len); 789 + pr_debug("Bad mapping: ssn=%d map_seq=%d map_data_len=%d", 790 + ssn, subflow->map_subflow_seq, subflow->map_data_len); 791 791 } 792 792 793 793 static bool skb_is_fully_mapped(struct sock *ssk, struct sk_buff *skb) ··· 812 812 /* Mapping covers data later in the subflow stream, 813 813 * currently unsupported. 814 814 */ 815 - warn_bad_map(subflow, ssn); 815 + dbg_bad_map(subflow, ssn); 816 816 return false; 817 817 } 818 818 if (unlikely(!before(ssn, subflow->map_subflow_seq + 819 819 subflow->map_data_len))) { 820 820 /* Mapping does covers past subflow data, invalid */ 821 - warn_bad_map(subflow, ssn + skb->len); 821 + dbg_bad_map(subflow, ssn); 822 822 return false; 823 823 } 824 824 return true; ··· 1000 1000 struct sk_buff *skb; 1001 1001 1002 1002 if (!skb_peek(&ssk->sk_receive_queue)) 1003 - subflow->data_avail = 0; 1003 + WRITE_ONCE(subflow->data_avail, 0); 1004 1004 if (subflow->data_avail) 1005 1005 return true; 1006 1006 ··· 1039 1039 ack_seq = mptcp_subflow_get_mapped_dsn(subflow); 1040 1040 pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack, 1041 1041 ack_seq); 1042 - if (ack_seq == old_ack) { 1043 - subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL; 1044 - break; 1045 - } else if (after64(ack_seq, old_ack)) { 1046 - subflow->data_avail = MPTCP_SUBFLOW_OOO_DATA; 1047 - break; 1042 + if (unlikely(before64(ack_seq, old_ack))) { 1043 + mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq); 1044 + continue; 1048 1045 } 1049 1046 1050 - /* only accept in-sequence mapping. Old values are spurious 1051 - * retransmission 1052 - */ 1053 - mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq); 1047 + WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL); 1048 + break; 1054 1049 } 1055 1050 return true; 1056 1051 ··· 1060 1065 * subflow_error_report() will introduce the appropriate barriers 1061 1066 */ 1062 1067 ssk->sk_err = EBADMSG; 1063 - ssk->sk_error_report(ssk); 1064 1068 tcp_set_state(ssk, TCP_CLOSE); 1065 1069 subflow->reset_transient = 0; 1066 1070 subflow->reset_reason = MPTCP_RST_EMPTCP; 1067 1071 tcp_send_active_reset(ssk, GFP_ATOMIC); 1068 - subflow->data_avail = 0; 1072 + WRITE_ONCE(subflow->data_avail, 0); 1069 1073 return false; 1070 1074 } 1071 1075 ··· 1074 1080 subflow->map_seq = READ_ONCE(msk->ack_seq); 1075 1081 subflow->map_data_len = skb->len; 1076 1082 subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - subflow->ssn_offset; 1077 - subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL; 1083 + WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL); 1078 1084 return true; 1079 1085 } 1080 1086 ··· 1086 1092 if (subflow->map_valid && 1087 1093 mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) { 1088 1094 subflow->map_valid = 0; 1089 - subflow->data_avail = 0; 1095 + WRITE_ONCE(subflow->data_avail, 0); 1090 1096 1091 1097 pr_debug("Done with mapping: seq=%u data_len=%u", 1092 1098 subflow->map_subflow_seq, ··· 1112 1118 1113 1119 *space = __mptcp_space(sk); 1114 1120 *full_space = tcp_full_space(sk); 1115 - } 1116 - 1117 - static void subflow_data_ready(struct sock *sk) 1118 - { 1119 - struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); 1120 - u16 state = 1 << inet_sk_state_load(sk); 1121 - struct sock *parent = subflow->conn; 1122 - struct mptcp_sock *msk; 1123 - 1124 - msk = mptcp_sk(parent); 1125 - if (state & TCPF_LISTEN) { 1126 - /* MPJ subflow are removed from accept queue before reaching here, 1127 - * avoid stray wakeups 1128 - */ 1129 - if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue)) 1130 - return; 1131 - 1132 - set_bit(MPTCP_DATA_READY, &msk->flags); 1133 - parent->sk_data_ready(parent); 1134 - return; 1135 - } 1136 - 1137 - WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable && 1138 - !subflow->mp_join && !(state & TCPF_CLOSE)); 1139 - 1140 - if (mptcp_subflow_data_available(sk)) 1141 - mptcp_data_ready(parent, sk); 1142 - } 1143 - 1144 - static void subflow_write_space(struct sock *ssk) 1145 - { 1146 - struct sock *sk = mptcp_subflow_ctx(ssk)->conn; 1147 - 1148 - mptcp_propagate_sndbuf(sk, ssk); 1149 - mptcp_write_space(sk); 1150 1121 } 1151 1122 1152 1123 void __mptcp_error_report(struct sock *sk) ··· 1152 1193 else 1153 1194 set_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags); 1154 1195 mptcp_data_unlock(sk); 1196 + } 1197 + 1198 + static void subflow_data_ready(struct sock *sk) 1199 + { 1200 + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); 1201 + u16 state = 1 << inet_sk_state_load(sk); 1202 + struct sock *parent = subflow->conn; 1203 + struct mptcp_sock *msk; 1204 + 1205 + msk = mptcp_sk(parent); 1206 + if (state & TCPF_LISTEN) { 1207 + /* MPJ subflow are removed from accept queue before reaching here, 1208 + * avoid stray wakeups 1209 + */ 1210 + if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue)) 1211 + return; 1212 + 1213 + set_bit(MPTCP_DATA_READY, &msk->flags); 1214 + parent->sk_data_ready(parent); 1215 + return; 1216 + } 1217 + 1218 + WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable && 1219 + !subflow->mp_join && !(state & TCPF_CLOSE)); 1220 + 1221 + if (mptcp_subflow_data_available(sk)) 1222 + mptcp_data_ready(parent, sk); 1223 + else if (unlikely(sk->sk_err)) 1224 + subflow_error_report(sk); 1225 + } 1226 + 1227 + static void subflow_write_space(struct sock *ssk) 1228 + { 1229 + struct sock *sk = mptcp_subflow_ctx(ssk)->conn; 1230 + 1231 + mptcp_propagate_sndbuf(sk, ssk); 1232 + mptcp_write_space(sk); 1155 1233 } 1156 1234 1157 1235 static struct inet_connection_sock_af_ops * ··· 1501 1505 */ 1502 1506 if (mptcp_subflow_data_available(sk)) 1503 1507 mptcp_data_ready(parent, sk); 1508 + else if (unlikely(sk->sk_err)) 1509 + subflow_error_report(sk); 1504 1510 1505 1511 subflow_sched_work_if_closed(mptcp_sk(parent), sk); 1506 1512

+5

net/netfilter/nf_synproxy_core.c

··· 31 31 int length = (th->doff * 4) - sizeof(*th); 32 32 u8 buf[40], *ptr; 33 33 34 + if (unlikely(length < 0)) 35 + return false; 36 + 34 37 ptr = skb_header_pointer(skb, doff + sizeof(*th), length, buf); 35 38 if (ptr == NULL) 36 39 return false; ··· 50 47 length--; 51 48 continue; 52 49 default: 50 + if (length < 2) 51 + return true; 53 52 opsize = *ptr++; 54 53 if (opsize < 2) 55 54 return true;

+51 -50

net/netfilter/nf_tables_api.c

··· 4364 4364 err = nf_tables_set_alloc_name(&ctx, set, name); 4365 4365 kfree(name); 4366 4366 if (err < 0) 4367 - goto err_set_alloc_name; 4368 - 4369 - if (nla[NFTA_SET_EXPR]) { 4370 - expr = nft_set_elem_expr_alloc(&ctx, set, nla[NFTA_SET_EXPR]); 4371 - if (IS_ERR(expr)) { 4372 - err = PTR_ERR(expr); 4373 - goto err_set_alloc_name; 4374 - } 4375 - set->exprs[0] = expr; 4376 - set->num_exprs++; 4377 - } else if (nla[NFTA_SET_EXPRESSIONS]) { 4378 - struct nft_expr *expr; 4379 - struct nlattr *tmp; 4380 - int left; 4381 - 4382 - if (!(flags & NFT_SET_EXPR)) { 4383 - err = -EINVAL; 4384 - goto err_set_alloc_name; 4385 - } 4386 - i = 0; 4387 - nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) { 4388 - if (i == NFT_SET_EXPR_MAX) { 4389 - err = -E2BIG; 4390 - goto err_set_init; 4391 - } 4392 - if (nla_type(tmp) != NFTA_LIST_ELEM) { 4393 - err = -EINVAL; 4394 - goto err_set_init; 4395 - } 4396 - expr = nft_set_elem_expr_alloc(&ctx, set, tmp); 4397 - if (IS_ERR(expr)) { 4398 - err = PTR_ERR(expr); 4399 - goto err_set_init; 4400 - } 4401 - set->exprs[i++] = expr; 4402 - set->num_exprs++; 4403 - } 4404 - } 4367 + goto err_set_name; 4405 4368 4406 4369 udata = NULL; 4407 4370 if (udlen) { ··· 4376 4413 INIT_LIST_HEAD(&set->catchall_list); 4377 4414 set->table = table; 4378 4415 write_pnet(&set->net, net); 4379 - set->ops = ops; 4416 + set->ops = ops; 4380 4417 set->ktype = ktype; 4381 - set->klen = desc.klen; 4418 + set->klen = desc.klen; 4382 4419 set->dtype = dtype; 4383 4420 set->objtype = objtype; 4384 - set->dlen = desc.dlen; 4421 + set->dlen = desc.dlen; 4385 4422 set->flags = flags; 4386 - set->size = desc.size; 4423 + set->size = desc.size; 4387 4424 set->policy = policy; 4388 - set->udlen = udlen; 4389 - set->udata = udata; 4425 + set->udlen = udlen; 4426 + set->udata = udata; 4390 4427 set->timeout = timeout; 4391 4428 set->gc_int = gc_int; 4392 - set->handle = nf_tables_alloc_handle(table); 4393 4429 4394 4430 set->field_count = desc.field_count; 4395 4431 for (i = 0; i < desc.field_count; i++) ··· 4398 4436 if (err < 0) 4399 4437 goto err_set_init; 4400 4438 4439 + if (nla[NFTA_SET_EXPR]) { 4440 + expr = nft_set_elem_expr_alloc(&ctx, set, nla[NFTA_SET_EXPR]); 4441 + if (IS_ERR(expr)) { 4442 + err = PTR_ERR(expr); 4443 + goto err_set_expr_alloc; 4444 + } 4445 + set->exprs[0] = expr; 4446 + set->num_exprs++; 4447 + } else if (nla[NFTA_SET_EXPRESSIONS]) { 4448 + struct nft_expr *expr; 4449 + struct nlattr *tmp; 4450 + int left; 4451 + 4452 + if (!(flags & NFT_SET_EXPR)) { 4453 + err = -EINVAL; 4454 + goto err_set_expr_alloc; 4455 + } 4456 + i = 0; 4457 + nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) { 4458 + if (i == NFT_SET_EXPR_MAX) { 4459 + err = -E2BIG; 4460 + goto err_set_expr_alloc; 4461 + } 4462 + if (nla_type(tmp) != NFTA_LIST_ELEM) { 4463 + err = -EINVAL; 4464 + goto err_set_expr_alloc; 4465 + } 4466 + expr = nft_set_elem_expr_alloc(&ctx, set, tmp); 4467 + if (IS_ERR(expr)) { 4468 + err = PTR_ERR(expr); 4469 + goto err_set_expr_alloc; 4470 + } 4471 + set->exprs[i++] = expr; 4472 + set->num_exprs++; 4473 + } 4474 + } 4475 + 4476 + set->handle = nf_tables_alloc_handle(table); 4477 + 4401 4478 err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set); 4402 4479 if (err < 0) 4403 - goto err_set_trans; 4480 + goto err_set_expr_alloc; 4404 4481 4405 4482 list_add_tail_rcu(&set->list, &table->sets); 4406 4483 table->use++; 4407 4484 return 0; 4408 4485 4409 - err_set_trans: 4410 - ops->destroy(set); 4411 - err_set_init: 4486 + err_set_expr_alloc: 4412 4487 for (i = 0; i < set->num_exprs; i++) 4413 4488 nft_expr_destroy(&ctx, set->exprs[i]); 4414 - err_set_alloc_name: 4489 + 4490 + ops->destroy(set); 4491 + err_set_init: 4415 4492 kfree(set->name); 4416 4493 err_set_name: 4417 4494 kvfree(set);

+23 -18

net/packet/af_packet.c

··· 2683 2683 } 2684 2684 if (likely(saddr == NULL)) { 2685 2685 dev = packet_cached_dev_get(po); 2686 - proto = po->num; 2686 + proto = READ_ONCE(po->num); 2687 2687 } else { 2688 2688 err = -EINVAL; 2689 2689 if (msg->msg_namelen < sizeof(struct sockaddr_ll)) ··· 2896 2896 2897 2897 if (likely(saddr == NULL)) { 2898 2898 dev = packet_cached_dev_get(po); 2899 - proto = po->num; 2899 + proto = READ_ONCE(po->num); 2900 2900 } else { 2901 2901 err = -EINVAL; 2902 2902 if (msg->msg_namelen < sizeof(struct sockaddr_ll)) ··· 3034 3034 struct sock *sk = sock->sk; 3035 3035 struct packet_sock *po = pkt_sk(sk); 3036 3036 3037 - if (po->tx_ring.pg_vec) 3037 + /* Reading tx_ring.pg_vec without holding pg_vec_lock is racy. 3038 + * tpacket_snd() will redo the check safely. 3039 + */ 3040 + if (data_race(po->tx_ring.pg_vec)) 3038 3041 return tpacket_snd(po, msg); 3039 - else 3040 - return packet_snd(sock, msg, len); 3042 + 3043 + return packet_snd(sock, msg, len); 3041 3044 } 3042 3045 3043 3046 /* ··· 3171 3168 /* prevents packet_notifier() from calling 3172 3169 * register_prot_hook() 3173 3170 */ 3174 - po->num = 0; 3171 + WRITE_ONCE(po->num, 0); 3175 3172 __unregister_prot_hook(sk, true); 3176 3173 rcu_read_lock(); 3177 3174 dev_curr = po->prot_hook.dev; ··· 3181 3178 } 3182 3179 3183 3180 BUG_ON(po->running); 3184 - po->num = proto; 3181 + WRITE_ONCE(po->num, proto); 3185 3182 po->prot_hook.type = proto; 3186 3183 3187 3184 if (unlikely(unlisted)) { 3188 3185 dev_put(dev); 3189 3186 po->prot_hook.dev = NULL; 3190 - po->ifindex = -1; 3187 + WRITE_ONCE(po->ifindex, -1); 3191 3188 packet_cached_dev_reset(po); 3192 3189 } else { 3193 3190 po->prot_hook.dev = dev; 3194 - po->ifindex = dev ? dev->ifindex : 0; 3191 + WRITE_ONCE(po->ifindex, dev ? dev->ifindex : 0); 3195 3192 packet_cached_dev_assign(po, dev); 3196 3193 } 3197 3194 } ··· 3505 3502 uaddr->sa_family = AF_PACKET; 3506 3503 memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data)); 3507 3504 rcu_read_lock(); 3508 - dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex); 3505 + dev = dev_get_by_index_rcu(sock_net(sk), READ_ONCE(pkt_sk(sk)->ifindex)); 3509 3506 if (dev) 3510 3507 strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data)); 3511 3508 rcu_read_unlock(); ··· 3520 3517 struct sock *sk = sock->sk; 3521 3518 struct packet_sock *po = pkt_sk(sk); 3522 3519 DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr); 3520 + int ifindex; 3523 3521 3524 3522 if (peer) 3525 3523 return -EOPNOTSUPP; 3526 3524 3525 + ifindex = READ_ONCE(po->ifindex); 3527 3526 sll->sll_family = AF_PACKET; 3528 - sll->sll_ifindex = po->ifindex; 3529 - sll->sll_protocol = po->num; 3527 + sll->sll_ifindex = ifindex; 3528 + sll->sll_protocol = READ_ONCE(po->num); 3530 3529 sll->sll_pkttype = 0; 3531 3530 rcu_read_lock(); 3532 - dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex); 3531 + dev = dev_get_by_index_rcu(sock_net(sk), ifindex); 3533 3532 if (dev) { 3534 3533 sll->sll_hatype = dev->type; 3535 3534 sll->sll_halen = dev->addr_len; ··· 4110 4105 } 4111 4106 if (msg == NETDEV_UNREGISTER) { 4112 4107 packet_cached_dev_reset(po); 4113 - po->ifindex = -1; 4108 + WRITE_ONCE(po->ifindex, -1); 4114 4109 if (po->prot_hook.dev) 4115 4110 dev_put(po->prot_hook.dev); 4116 4111 po->prot_hook.dev = NULL; ··· 4416 4411 was_running = po->running; 4417 4412 num = po->num; 4418 4413 if (was_running) { 4419 - po->num = 0; 4414 + WRITE_ONCE(po->num, 0); 4420 4415 __unregister_prot_hook(sk, false); 4421 4416 } 4422 4417 spin_unlock(&po->bind_lock); ··· 4451 4446 4452 4447 spin_lock(&po->bind_lock); 4453 4448 if (was_running) { 4454 - po->num = num; 4449 + WRITE_ONCE(po->num, num); 4455 4450 register_prot_hook(sk); 4456 4451 } 4457 4452 spin_unlock(&po->bind_lock); ··· 4621 4616 s, 4622 4617 refcount_read(&s->sk_refcnt), 4623 4618 s->sk_type, 4624 - ntohs(po->num), 4625 - po->ifindex, 4619 + ntohs(READ_ONCE(po->num)), 4620 + READ_ONCE(po->ifindex), 4626 4621 po->running, 4627 4622 atomic_read(&s->sk_rmem_alloc), 4628 4623 from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)),

+1 -1

net/qrtr/qrtr.c

··· 436 436 struct qrtr_sock *ipc; 437 437 struct sk_buff *skb; 438 438 struct qrtr_cb *cb; 439 - unsigned int size; 439 + size_t size; 440 440 unsigned int ver; 441 441 size_t hdrlen; 442 442

+1 -1

net/rds/recv.c

··· 714 714 715 715 if (rds_cmsg_recv(inc, msg, rs)) { 716 716 ret = -EFAULT; 717 - goto out; 717 + break; 718 718 } 719 719 rds_recvmsg_zcookie(rs, msg); 720 720

+12 -7

net/sched/act_ct.c

··· 904 904 } 905 905 906 906 err = ct_nat_execute(skb, ct, ctinfo, range, maniptype); 907 - if (err == NF_ACCEPT && 908 - ct->status & IPS_SRC_NAT && ct->status & IPS_DST_NAT) { 909 - if (maniptype == NF_NAT_MANIP_SRC) 910 - maniptype = NF_NAT_MANIP_DST; 911 - else 912 - maniptype = NF_NAT_MANIP_SRC; 907 + if (err == NF_ACCEPT && ct->status & IPS_DST_NAT) { 908 + if (ct->status & IPS_SRC_NAT) { 909 + if (maniptype == NF_NAT_MANIP_SRC) 910 + maniptype = NF_NAT_MANIP_DST; 911 + else 912 + maniptype = NF_NAT_MANIP_SRC; 913 913 914 - err = ct_nat_execute(skb, ct, ctinfo, range, maniptype); 914 + err = ct_nat_execute(skb, ct, ctinfo, range, 915 + maniptype); 916 + } else if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { 917 + err = ct_nat_execute(skb, ct, ctinfo, NULL, 918 + NF_NAT_MANIP_SRC); 919 + } 915 920 } 916 921 return err; 917 922 #else

+11 -7

net/sched/sch_cake.c

··· 943 943 } 944 944 945 945 tcph = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph); 946 - if (!tcph) 946 + if (!tcph || tcph->doff < 5) 947 947 return NULL; 948 948 949 949 return skb_header_pointer(skb, offset, ··· 967 967 length--; 968 968 continue; 969 969 } 970 + if (length < 2) 971 + break; 970 972 opsize = *ptr++; 971 973 if (opsize < 2 || opsize > length) 972 974 break; ··· 1106 1104 length--; 1107 1105 continue; 1108 1106 } 1107 + if (length < 2) 1108 + break; 1109 1109 opsize = *ptr++; 1110 1110 if (opsize < 2 || opsize > length) 1111 1111 break; ··· 2342 2338 2343 2339 /* List of known Diffserv codepoints: 2344 2340 * 2345 - * Least Effort (CS1) 2341 + * Least Effort (CS1, LE) 2346 2342 * Best Effort (CS0) 2347 2343 * Max Reliability & LLT "Lo" (TOS1) 2348 2344 * Max Throughput (TOS2) ··· 2364 2360 * Total 25 codepoints. 2365 2361 */ 2366 2362 2367 - /* List of traffic classes in RFC 4594: 2363 + /* List of traffic classes in RFC 4594, updated by RFC 8622: 2368 2364 * (roughly descending order of contended priority) 2369 2365 * (roughly ascending order of uncontended throughput) 2370 2366 * ··· 2379 2375 * Ops, Admin, Management (CS2,TOS1) - eg. ssh 2380 2376 * Standard Service (CS0 & unrecognised codepoints) 2381 2377 * High Throughput Data (AF1x,TOS2) - eg. web traffic 2382 - * Low Priority Data (CS1) - eg. BitTorrent 2378 + * Low Priority Data (CS1,LE) - eg. BitTorrent 2383 2379 2384 2380 * Total 12 traffic classes. 2385 2381 */ ··· 2395 2391 * Video Streaming (AF4x, AF3x, CS3) 2396 2392 * Bog Standard (CS0 etc.) 2397 2393 * High Throughput (AF1x, TOS2) 2398 - * Background Traffic (CS1) 2394 + * Background Traffic (CS1, LE) 2399 2395 * 2400 2396 * Total 8 traffic classes. 2401 2397 */ ··· 2439 2435 * Latency Sensitive (CS7, CS6, EF, VA, CS5, CS4) 2440 2436 * Streaming Media (AF4x, AF3x, CS3, AF2x, TOS4, CS2, TOS1) 2441 2437 * Best Effort (CS0, AF1x, TOS2, and those not specified) 2442 - * Background Traffic (CS1) 2438 + * Background Traffic (CS1, LE) 2443 2439 * 2444 2440 * Total 4 traffic classes. 2445 2441 */ ··· 2477 2473 static int cake_config_diffserv3(struct Qdisc *sch) 2478 2474 { 2479 2475 /* Simplified Diffserv structure with 3 tins. 2480 - * Low Priority (CS1) 2476 + * Low Priority (CS1, LE) 2481 2477 * Best Effort 2482 2478 * Latency Sensitive (TOS4, VA, EF, CS6, CS7) 2483 2479 */

-13

net/socket.c

··· 1072 1072 * what to do with it - that's up to the protocol still. 1073 1073 */ 1074 1074 1075 - /** 1076 - * get_net_ns - increment the refcount of the network namespace 1077 - * @ns: common namespace (net) 1078 - * 1079 - * Returns the net's common namespace. 1080 - */ 1081 - 1082 - struct ns_common *get_net_ns(struct ns_common *ns) 1083 - { 1084 - return &get_net(container_of(ns, struct net, ns))->ns; 1085 - } 1086 - EXPORT_SYMBOL_GPL(get_net_ns); 1087 - 1088 1075 static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) 1089 1076 { 1090 1077 struct socket *sock;

+4 -3

net/unix/af_unix.c

··· 535 535 u->path.mnt = NULL; 536 536 state = sk->sk_state; 537 537 sk->sk_state = TCP_CLOSE; 538 + 539 + skpair = unix_peer(sk); 540 + unix_peer(sk) = NULL; 541 + 538 542 unix_state_unlock(sk); 539 543 540 544 wake_up_interruptible_all(&u->peer_wait); 541 - 542 - skpair = unix_peer(sk); 543 545 544 546 if (skpair != NULL) { 545 547 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) { ··· 557 555 558 556 unix_dgram_peer_wake_disconnect(sk, skpair); 559 557 sock_put(skpair); /* It may now die */ 560 - unix_peer(sk) = NULL; 561 558 } 562 559 563 560 /* Try to flush out this socket. Throw out buffers at least */

+1 -1

net/wireless/Makefile

··· 28 28 @$(kecho) " GEN $@" 29 29 @(echo '#include "reg.h"'; \ 30 30 echo 'const u8 shipped_regdb_certs[] = {'; \ 31 - cat $^ ; \ 31 + echo | cat - $^ ; \ 32 32 echo '};'; \ 33 33 echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \ 34 34 ) > $@

+5 -8

net/wireless/core.c

··· 1340 1340 rdev->devlist_generation++; 1341 1341 wdev->registered = true; 1342 1342 1343 + if (wdev->netdev && 1344 + sysfs_create_link(&wdev->netdev->dev.kobj, &rdev->wiphy.dev.kobj, 1345 + "phy80211")) 1346 + pr_err("failed to add phy80211 symlink to netdev!\n"); 1347 + 1343 1348 nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE); 1344 1349 } 1345 1350 ··· 1369 1364 ret = register_netdevice(dev); 1370 1365 if (ret) 1371 1366 goto out; 1372 - 1373 - if (sysfs_create_link(&dev->dev.kobj, &rdev->wiphy.dev.kobj, 1374 - "phy80211")) { 1375 - pr_err("failed to add phy80211 symlink to netdev!\n"); 1376 - unregister_netdevice(dev); 1377 - ret = -EINVAL; 1378 - goto out; 1379 - } 1380 1367 1381 1368 cfg80211_register_wdev(rdev, wdev); 1382 1369 ret = 0;

+14 -2

net/wireless/pmsr.c

··· 334 334 gfp_t gfp) 335 335 { 336 336 struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); 337 + struct cfg80211_pmsr_request *tmp, *prev, *to_free = NULL; 337 338 struct sk_buff *msg; 338 339 void *hdr; 339 340 ··· 365 364 nlmsg_free(msg); 366 365 free_request: 367 366 spin_lock_bh(&wdev->pmsr_lock); 368 - list_del(&req->list); 367 + /* 368 + * cfg80211_pmsr_process_abort() may have already moved this request 369 + * to the free list, and will free it later. In this case, don't free 370 + * it here. 371 + */ 372 + list_for_each_entry_safe(tmp, prev, &wdev->pmsr_list, list) { 373 + if (tmp == req) { 374 + list_del(&req->list); 375 + to_free = req; 376 + break; 377 + } 378 + } 369 379 spin_unlock_bh(&wdev->pmsr_lock); 370 - kfree(req); 380 + kfree(to_free); 371 381 } 372 382 EXPORT_SYMBOL_GPL(cfg80211_pmsr_complete); 373 383

+4

net/wireless/sysfs.c

··· 133 133 if (rdev->wiphy.registered && rdev->ops->resume) 134 134 ret = rdev_resume(rdev); 135 135 wiphy_unlock(&rdev->wiphy); 136 + 137 + if (ret) 138 + cfg80211_shutdown_all_interfaces(&rdev->wiphy); 139 + 136 140 rtnl_unlock(); 137 141 138 142 return ret;

+3

net/wireless/util.c

··· 1059 1059 case NL80211_IFTYPE_MESH_POINT: 1060 1060 /* mesh should be handled? */ 1061 1061 break; 1062 + case NL80211_IFTYPE_OCB: 1063 + cfg80211_leave_ocb(rdev, dev); 1064 + break; 1062 1065 default: 1063 1066 break; 1064 1067 }

+10 -5

scripts/recordmcount.h

··· 192 192 Elf32_Word const *symtab_shndx) 193 193 { 194 194 unsigned long offset; 195 + unsigned short shndx = w2(sym->st_shndx); 195 196 int index; 196 197 197 - if (sym->st_shndx != SHN_XINDEX) 198 - return w2(sym->st_shndx); 198 + if (shndx > SHN_UNDEF && shndx < SHN_LORESERVE) 199 + return shndx; 199 200 200 - offset = (unsigned long)sym - (unsigned long)symtab; 201 - index = offset / sizeof(*sym); 201 + if (shndx == SHN_XINDEX) { 202 + offset = (unsigned long)sym - (unsigned long)symtab; 203 + index = offset / sizeof(*sym); 202 204 203 - return w(symtab_shndx[index]); 205 + return w(symtab_shndx[index]); 206 + } 207 + 208 + return 0; 204 209 } 205 210 206 211 static unsigned int get_shnum(Elf_Ehdr const *ehdr, Elf_Shdr const *shdr0)

+1 -2

tools/include/uapi/asm-generic/unistd.h

··· 863 863 __SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2) 864 864 #define __NR_mount_setattr 442 865 865 __SYSCALL(__NR_mount_setattr, sys_mount_setattr) 866 - #define __NR_quotactl_path 443 867 - __SYSCALL(__NR_quotactl_path, sys_quotactl_path) 866 + /* 443 is reserved for quotactl_path */ 868 867 869 868 #define __NR_landlock_create_ruleset 444 870 869 __SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset)

+3

tools/include/uapi/linux/in.h

··· 289 289 /* Address indicating an error return. */ 290 290 #define INADDR_NONE ((unsigned long int) 0xffffffff) 291 291 292 + /* Dummy address for src of ICMP replies if no real address is set (RFC7600). */ 293 + #define INADDR_DUMMY ((unsigned long int) 0xc0000008) 294 + 292 295 /* Network number for local host loopback. */ 293 296 #define IN_LOOPBACKNET 127 294 297

+1 -1

tools/lib/bpf/xsk.c

··· 1094 1094 goto out_put_ctx; 1095 1095 } 1096 1096 if (xsk->fd == umem->fd) 1097 - umem->rx_ring_setup_done = true; 1097 + umem->tx_ring_setup_done = true; 1098 1098 } 1099 1099 1100 1100 err = xsk_get_mmap_offsets(xsk->fd, &off);

+2 -2

tools/perf/tests/shell/stat_bpf_counters.sh

··· 11 11 second_num=$2 12 12 13 13 # upper bound is first_num * 110% 14 - upper=$(( $first_num + $first_num / 10 )) 14 + upper=$(expr $first_num + $first_num / 10 ) 15 15 # lower bound is first_num * 90% 16 - lower=$(( $first_num - $first_num / 10 )) 16 + lower=$(expr $first_num - $first_num / 10 ) 17 17 18 18 if [ $second_num -gt $upper ] || [ $second_num -lt $lower ]; then 19 19 echo "The difference between $first_num and $second_num are greater than 10%."

-2

tools/perf/trace/beauty/include/linux/socket.h

··· 438 438 int __user *usockvec); 439 439 extern int __sys_shutdown_sock(struct socket *sock, int how); 440 440 extern int __sys_shutdown(int fd, int how); 441 - 442 - extern struct ns_common *get_net_ns(struct ns_common *ns); 443 441 #endif /* _LINUX_SOCKET_H */

+2 -1

tools/perf/util/machine.c

··· 776 776 if (dso) { 777 777 dso->kernel = DSO_SPACE__KERNEL; 778 778 map = map__new2(0, dso); 779 + dso__put(dso); 779 780 } 780 781 781 782 if (!dso || !map) { 782 - dso__put(dso); 783 783 return -ENOMEM; 784 784 } 785 785 ··· 792 792 map->start = event->ksymbol.addr; 793 793 map->end = map->start + event->ksymbol.len; 794 794 maps__insert(&machine->kmaps, map); 795 + map__put(map); 795 796 dso__set_loaded(dso); 796 797 797 798 if (is_bpf_image(event->ksymbol.name)) {

+8 -6

tools/perf/util/metricgroup.c

··· 162 162 return false; 163 163 } 164 164 165 - static bool evsel_same_pmu(struct evsel *ev1, struct evsel *ev2) 165 + static bool evsel_same_pmu_or_none(struct evsel *ev1, struct evsel *ev2) 166 166 { 167 167 if (!ev1->pmu_name || !ev2->pmu_name) 168 - return false; 168 + return true; 169 169 170 170 return !strcmp(ev1->pmu_name, ev2->pmu_name); 171 171 } ··· 288 288 */ 289 289 if (!has_constraint && 290 290 ev->leader != metric_events[i]->leader && 291 - evsel_same_pmu(ev->leader, metric_events[i]->leader)) 291 + evsel_same_pmu_or_none(ev->leader, metric_events[i]->leader)) 292 292 break; 293 293 if (!strcmp(metric_events[i]->name, ev->name)) { 294 294 set_bit(ev->idx, evlist_used); ··· 1073 1073 1074 1074 ret = add_metric(d->metric_list, pe, d->metric_no_group, &m, NULL, d->ids); 1075 1075 if (ret) 1076 - return ret; 1076 + goto out; 1077 1077 1078 1078 ret = resolve_metric(d->metric_no_group, 1079 1079 d->metric_list, NULL, d->ids); 1080 1080 if (ret) 1081 - return ret; 1081 + goto out; 1082 1082 1083 1083 *(d->has_match) = true; 1084 1084 1085 - return *d->ret; 1085 + out: 1086 + *(d->ret) = ret; 1087 + return ret; 1086 1088 } 1087 1089 1088 1090 static int metricgroup__add_metric(const char *metric, bool metric_no_group,

+1 -1

tools/testing/selftests/bpf/test_verifier.c

··· 1147 1147 } 1148 1148 } 1149 1149 1150 - if (test->insn_processed) { 1150 + if (!unpriv && test->insn_processed) { 1151 1151 uint32_t insn_processed; 1152 1152 char *proc; 1153 1153

+2

tools/testing/selftests/bpf/verifier/and.c

··· 61 61 BPF_MOV64_IMM(BPF_REG_0, 0), 62 62 BPF_EXIT_INSN(), 63 63 }, 64 + .errstr_unpriv = "R1 !read_ok", 65 + .result_unpriv = REJECT, 64 66 .result = ACCEPT, 65 67 .retval = 0 66 68 },

+14

tools/testing/selftests/bpf/verifier/bounds.c

··· 508 508 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, -1), 509 509 BPF_EXIT_INSN(), 510 510 }, 511 + .errstr_unpriv = "R0 invalid mem access 'inv'", 512 + .result_unpriv = REJECT, 511 513 .result = ACCEPT 512 514 }, 513 515 { ··· 530 528 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, -1), 531 529 BPF_EXIT_INSN(), 532 530 }, 531 + .errstr_unpriv = "R0 invalid mem access 'inv'", 532 + .result_unpriv = REJECT, 533 533 .result = ACCEPT 534 534 }, 535 535 { ··· 573 569 BPF_MOV64_IMM(BPF_REG_0, 0), 574 570 BPF_EXIT_INSN(), 575 571 }, 572 + .errstr_unpriv = "R0 min value is outside of the allowed memory range", 573 + .result_unpriv = REJECT, 576 574 .fixup_map_hash_8b = { 3 }, 577 575 .result = ACCEPT, 578 576 }, ··· 595 589 BPF_MOV64_IMM(BPF_REG_0, 0), 596 590 BPF_EXIT_INSN(), 597 591 }, 592 + .errstr_unpriv = "R0 min value is outside of the allowed memory range", 593 + .result_unpriv = REJECT, 598 594 .fixup_map_hash_8b = { 3 }, 599 595 .result = ACCEPT, 600 596 }, ··· 617 609 BPF_MOV64_IMM(BPF_REG_0, 0), 618 610 BPF_EXIT_INSN(), 619 611 }, 612 + .errstr_unpriv = "R0 min value is outside of the allowed memory range", 613 + .result_unpriv = REJECT, 620 614 .fixup_map_hash_8b = { 3 }, 621 615 .result = ACCEPT, 622 616 }, ··· 684 674 BPF_MOV64_IMM(BPF_REG_0, 0), 685 675 BPF_EXIT_INSN(), 686 676 }, 677 + .errstr_unpriv = "R0 min value is outside of the allowed memory range", 678 + .result_unpriv = REJECT, 687 679 .fixup_map_hash_8b = { 3 }, 688 680 .result = ACCEPT, 689 681 }, ··· 707 695 BPF_MOV64_IMM(BPF_REG_0, 0), 708 696 BPF_EXIT_INSN(), 709 697 }, 698 + .errstr_unpriv = "R0 min value is outside of the allowed memory range", 699 + .result_unpriv = REJECT, 710 700 .fixup_map_hash_8b = { 3 }, 711 701 .result = ACCEPT, 712 702 },

+2

tools/testing/selftests/bpf/verifier/dead_code.c

··· 8 8 BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 10, -4), 9 9 BPF_EXIT_INSN(), 10 10 }, 11 + .errstr_unpriv = "R9 !read_ok", 12 + .result_unpriv = REJECT, 11 13 .result = ACCEPT, 12 14 .retval = 7, 13 15 },

+22

tools/testing/selftests/bpf/verifier/jmp32.c

··· 87 87 BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), 88 88 BPF_EXIT_INSN(), 89 89 }, 90 + .errstr_unpriv = "R9 !read_ok", 91 + .result_unpriv = REJECT, 90 92 .result = ACCEPT, 91 93 }, 92 94 { ··· 152 150 BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), 153 151 BPF_EXIT_INSN(), 154 152 }, 153 + .errstr_unpriv = "R9 !read_ok", 154 + .result_unpriv = REJECT, 155 155 .result = ACCEPT, 156 156 }, 157 157 { ··· 217 213 BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), 218 214 BPF_EXIT_INSN(), 219 215 }, 216 + .errstr_unpriv = "R9 !read_ok", 217 + .result_unpriv = REJECT, 220 218 .result = ACCEPT, 221 219 }, 222 220 { ··· 286 280 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), 287 281 BPF_EXIT_INSN(), 288 282 }, 283 + .errstr_unpriv = "R0 invalid mem access 'inv'", 284 + .result_unpriv = REJECT, 289 285 .result = ACCEPT, 290 286 .retval = 2, 291 287 }, ··· 356 348 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), 357 349 BPF_EXIT_INSN(), 358 350 }, 351 + .errstr_unpriv = "R0 invalid mem access 'inv'", 352 + .result_unpriv = REJECT, 359 353 .result = ACCEPT, 360 354 .retval = 2, 361 355 }, ··· 426 416 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), 427 417 BPF_EXIT_INSN(), 428 418 }, 419 + .errstr_unpriv = "R0 invalid mem access 'inv'", 420 + .result_unpriv = REJECT, 429 421 .result = ACCEPT, 430 422 .retval = 2, 431 423 }, ··· 496 484 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), 497 485 BPF_EXIT_INSN(), 498 486 }, 487 + .errstr_unpriv = "R0 invalid mem access 'inv'", 488 + .result_unpriv = REJECT, 499 489 .result = ACCEPT, 500 490 .retval = 2, 501 491 }, ··· 566 552 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), 567 553 BPF_EXIT_INSN(), 568 554 }, 555 + .errstr_unpriv = "R0 invalid mem access 'inv'", 556 + .result_unpriv = REJECT, 569 557 .result = ACCEPT, 570 558 .retval = 2, 571 559 }, ··· 636 620 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), 637 621 BPF_EXIT_INSN(), 638 622 }, 623 + .errstr_unpriv = "R0 invalid mem access 'inv'", 624 + .result_unpriv = REJECT, 639 625 .result = ACCEPT, 640 626 .retval = 2, 641 627 }, ··· 706 688 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), 707 689 BPF_EXIT_INSN(), 708 690 }, 691 + .errstr_unpriv = "R0 invalid mem access 'inv'", 692 + .result_unpriv = REJECT, 709 693 .result = ACCEPT, 710 694 .retval = 2, 711 695 }, ··· 776 756 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), 777 757 BPF_EXIT_INSN(), 778 758 }, 759 + .errstr_unpriv = "R0 invalid mem access 'inv'", 760 + .result_unpriv = REJECT, 779 761 .result = ACCEPT, 780 762 .retval = 2, 781 763 },

+6 -4

tools/testing/selftests/bpf/verifier/jset.c

··· 82 82 BPF_EXIT_INSN(), 83 83 }, 84 84 .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, 85 - .retval_unpriv = 1, 86 - .result_unpriv = ACCEPT, 85 + .errstr_unpriv = "R9 !read_ok", 86 + .result_unpriv = REJECT, 87 87 .retval = 1, 88 88 .result = ACCEPT, 89 89 }, ··· 141 141 BPF_EXIT_INSN(), 142 142 }, 143 143 .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, 144 - .result_unpriv = ACCEPT, 144 + .errstr_unpriv = "R9 !read_ok", 145 + .result_unpriv = REJECT, 145 146 .result = ACCEPT, 146 147 }, 147 148 { ··· 163 162 BPF_EXIT_INSN(), 164 163 }, 165 164 .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, 166 - .result_unpriv = ACCEPT, 165 + .errstr_unpriv = "R9 !read_ok", 166 + .result_unpriv = REJECT, 167 167 .result = ACCEPT, 168 168 },

+2

tools/testing/selftests/bpf/verifier/unpriv.c

··· 420 420 BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), 421 421 BPF_EXIT_INSN(), 422 422 }, 423 + .errstr_unpriv = "R7 invalid mem access 'inv'", 424 + .result_unpriv = REJECT, 423 425 .result = ACCEPT, 424 426 .retval = 0, 425 427 },

+4 -3

tools/testing/selftests/bpf/verifier/value_ptr_arith.c

··· 120 120 .fixup_map_array_48b = { 1 }, 121 121 .result = ACCEPT, 122 122 .result_unpriv = REJECT, 123 - .errstr_unpriv = "R2 tried to add from different maps, paths or scalars", 123 + .errstr_unpriv = "R2 pointer comparison prohibited", 124 124 .retval = 0, 125 125 }, 126 126 { ··· 159 159 BPF_MOV64_IMM(BPF_REG_0, 0), 160 160 BPF_EXIT_INSN(), 161 161 // fake-dead code; targeted from branch A to 162 - // prevent dead code sanitization 162 + // prevent dead code sanitization, rejected 163 + // via branch B however 163 164 BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0), 164 165 BPF_MOV64_IMM(BPF_REG_0, 0), 165 166 BPF_EXIT_INSN(), ··· 168 167 .fixup_map_array_48b = { 1 }, 169 168 .result = ACCEPT, 170 169 .result_unpriv = REJECT, 171 - .errstr_unpriv = "R2 tried to add from different maps, paths or scalars", 170 + .errstr_unpriv = "R0 invalid mem access 'inv'", 172 171 .retval = 0, 173 172 }, 174 173 {

+1 -1

tools/testing/selftests/kvm/lib/kvm_util.c

··· 82 82 83 83 kvm_fd = open_kvm_dev_path_or_exit(); 84 84 ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap); 85 - TEST_ASSERT(ret != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n" 85 + TEST_ASSERT(ret >= 0, "KVM_CHECK_EXTENSION IOCTL failed,\n" 86 86 " rc: %i errno: %i", ret, errno); 87 87 88 88 close(kvm_fd);

+19 -19

tools/testing/selftests/kvm/lib/test_util.c

··· 166 166 return 0; 167 167 } 168 168 169 + #define ANON_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS) 170 + #define ANON_HUGE_FLAGS (ANON_FLAGS | MAP_HUGETLB) 171 + 169 172 const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i) 170 173 { 171 - static const int anon_flags = MAP_PRIVATE | MAP_ANONYMOUS; 172 - static const int anon_huge_flags = anon_flags | MAP_HUGETLB; 173 - 174 174 static const struct vm_mem_backing_src_alias aliases[] = { 175 175 [VM_MEM_SRC_ANONYMOUS] = { 176 176 .name = "anonymous", 177 - .flag = anon_flags, 177 + .flag = ANON_FLAGS, 178 178 }, 179 179 [VM_MEM_SRC_ANONYMOUS_THP] = { 180 180 .name = "anonymous_thp", 181 - .flag = anon_flags, 181 + .flag = ANON_FLAGS, 182 182 }, 183 183 [VM_MEM_SRC_ANONYMOUS_HUGETLB] = { 184 184 .name = "anonymous_hugetlb", 185 - .flag = anon_huge_flags, 185 + .flag = ANON_HUGE_FLAGS, 186 186 }, 187 187 [VM_MEM_SRC_ANONYMOUS_HUGETLB_16KB] = { 188 188 .name = "anonymous_hugetlb_16kb", 189 - .flag = anon_huge_flags | MAP_HUGE_16KB, 189 + .flag = ANON_HUGE_FLAGS | MAP_HUGE_16KB, 190 190 }, 191 191 [VM_MEM_SRC_ANONYMOUS_HUGETLB_64KB] = { 192 192 .name = "anonymous_hugetlb_64kb", 193 - .flag = anon_huge_flags | MAP_HUGE_64KB, 193 + .flag = ANON_HUGE_FLAGS | MAP_HUGE_64KB, 194 194 }, 195 195 [VM_MEM_SRC_ANONYMOUS_HUGETLB_512KB] = { 196 196 .name = "anonymous_hugetlb_512kb", 197 - .flag = anon_huge_flags | MAP_HUGE_512KB, 197 + .flag = ANON_HUGE_FLAGS | MAP_HUGE_512KB, 198 198 }, 199 199 [VM_MEM_SRC_ANONYMOUS_HUGETLB_1MB] = { 200 200 .name = "anonymous_hugetlb_1mb", 201 - .flag = anon_huge_flags | MAP_HUGE_1MB, 201 + .flag = ANON_HUGE_FLAGS | MAP_HUGE_1MB, 202 202 }, 203 203 [VM_MEM_SRC_ANONYMOUS_HUGETLB_2MB] = { 204 204 .name = "anonymous_hugetlb_2mb", 205 - .flag = anon_huge_flags | MAP_HUGE_2MB, 205 + .flag = ANON_HUGE_FLAGS | MAP_HUGE_2MB, 206 206 }, 207 207 [VM_MEM_SRC_ANONYMOUS_HUGETLB_8MB] = { 208 208 .name = "anonymous_hugetlb_8mb", 209 - .flag = anon_huge_flags | MAP_HUGE_8MB, 209 + .flag = ANON_HUGE_FLAGS | MAP_HUGE_8MB, 210 210 }, 211 211 [VM_MEM_SRC_ANONYMOUS_HUGETLB_16MB] = { 212 212 .name = "anonymous_hugetlb_16mb", 213 - .flag = anon_huge_flags | MAP_HUGE_16MB, 213 + .flag = ANON_HUGE_FLAGS | MAP_HUGE_16MB, 214 214 }, 215 215 [VM_MEM_SRC_ANONYMOUS_HUGETLB_32MB] = { 216 216 .name = "anonymous_hugetlb_32mb", 217 - .flag = anon_huge_flags | MAP_HUGE_32MB, 217 + .flag = ANON_HUGE_FLAGS | MAP_HUGE_32MB, 218 218 }, 219 219 [VM_MEM_SRC_ANONYMOUS_HUGETLB_256MB] = { 220 220 .name = "anonymous_hugetlb_256mb", 221 - .flag = anon_huge_flags | MAP_HUGE_256MB, 221 + .flag = ANON_HUGE_FLAGS | MAP_HUGE_256MB, 222 222 }, 223 223 [VM_MEM_SRC_ANONYMOUS_HUGETLB_512MB] = { 224 224 .name = "anonymous_hugetlb_512mb", 225 - .flag = anon_huge_flags | MAP_HUGE_512MB, 225 + .flag = ANON_HUGE_FLAGS | MAP_HUGE_512MB, 226 226 }, 227 227 [VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB] = { 228 228 .name = "anonymous_hugetlb_1gb", 229 - .flag = anon_huge_flags | MAP_HUGE_1GB, 229 + .flag = ANON_HUGE_FLAGS | MAP_HUGE_1GB, 230 230 }, 231 231 [VM_MEM_SRC_ANONYMOUS_HUGETLB_2GB] = { 232 232 .name = "anonymous_hugetlb_2gb", 233 - .flag = anon_huge_flags | MAP_HUGE_2GB, 233 + .flag = ANON_HUGE_FLAGS | MAP_HUGE_2GB, 234 234 }, 235 235 [VM_MEM_SRC_ANONYMOUS_HUGETLB_16GB] = { 236 236 .name = "anonymous_hugetlb_16gb", 237 - .flag = anon_huge_flags | MAP_HUGE_16GB, 237 + .flag = ANON_HUGE_FLAGS | MAP_HUGE_16GB, 238 238 }, 239 239 [VM_MEM_SRC_SHMEM] = { 240 240 .name = "shmem",

+25

tools/testing/selftests/net/fib_tests.sh

··· 1384 1384 ipv4_rt_replace_mpath 1385 1385 } 1386 1386 1387 + # checks that cached input route on VRF port is deleted 1388 + # when VRF is deleted 1389 + ipv4_local_rt_cache() 1390 + { 1391 + run_cmd "ip addr add 10.0.0.1/32 dev lo" 1392 + run_cmd "ip netns add test-ns" 1393 + run_cmd "ip link add veth-outside type veth peer name veth-inside" 1394 + run_cmd "ip link add vrf-100 type vrf table 1100" 1395 + run_cmd "ip link set veth-outside master vrf-100" 1396 + run_cmd "ip link set veth-inside netns test-ns" 1397 + run_cmd "ip link set veth-outside up" 1398 + run_cmd "ip link set vrf-100 up" 1399 + run_cmd "ip route add 10.1.1.1/32 dev veth-outside table 1100" 1400 + run_cmd "ip netns exec test-ns ip link set veth-inside up" 1401 + run_cmd "ip netns exec test-ns ip addr add 10.1.1.1/32 dev veth-inside" 1402 + run_cmd "ip netns exec test-ns ip route add 10.0.0.1/32 dev veth-inside" 1403 + run_cmd "ip netns exec test-ns ip route add default via 10.0.0.1" 1404 + run_cmd "ip netns exec test-ns ping 10.0.0.1 -c 1 -i 1" 1405 + run_cmd "ip link delete vrf-100" 1406 + 1407 + # if we do not hang test is a success 1408 + log_test $? 0 "Cached route removed from VRF port device" 1409 + } 1410 + 1387 1411 ipv4_route_test() 1388 1412 { 1389 1413 route_setup 1390 1414 1391 1415 ipv4_rt_add 1392 1416 ipv4_rt_replace 1417 + ipv4_local_rt_cache 1393 1418 1394 1419 route_cleanup 1395 1420 }

+74

tools/testing/selftests/net/icmp.sh

··· 1 + #!/bin/bash 2 + # SPDX-License-Identifier: GPL-2.0 3 + 4 + # Test for checking ICMP response with dummy address instead of 0.0.0.0. 5 + # Sets up two namespaces like: 6 + # +----------------------+ +--------------------+ 7 + # | ns1 | v4-via-v6 routes: | ns2 | 8 + # | | ' | | 9 + # | +--------+ -> 172.16.1.0/24 -> +--------+ | 10 + # | | veth0 +--------------------------+ veth0 | | 11 + # | +--------+ <- 172.16.0.0/24 <- +--------+ | 12 + # | 172.16.0.1 | | 2001:db8:1::2/64 | 13 + # | 2001:db8:1::2/64 | | | 14 + # +----------------------+ +--------------------+ 15 + # 16 + # And then tries to ping 172.16.1.1 from ns1. This results in a "net 17 + # unreachable" message being sent from ns2, but there is no IPv4 address set in 18 + # that address space, so the kernel should substitute the dummy address 19 + # 192.0.0.8 defined in RFC7600. 20 + 21 + NS1=ns1 22 + NS2=ns2 23 + H1_IP=172.16.0.1/32 24 + H1_IP6=2001:db8:1::1 25 + RT1=172.16.1.0/24 26 + PINGADDR=172.16.1.1 27 + RT2=172.16.0.0/24 28 + H2_IP6=2001:db8:1::2 29 + 30 + TMPFILE=$(mktemp) 31 + 32 + cleanup() 33 + { 34 + rm -f "$TMPFILE" 35 + ip netns del $NS1 36 + ip netns del $NS2 37 + } 38 + 39 + trap cleanup EXIT 40 + 41 + # Namespaces 42 + ip netns add $NS1 43 + ip netns add $NS2 44 + 45 + # Connectivity 46 + ip -netns $NS1 link add veth0 type veth peer name veth0 netns $NS2 47 + ip -netns $NS1 link set dev veth0 up 48 + ip -netns $NS2 link set dev veth0 up 49 + ip -netns $NS1 addr add $H1_IP dev veth0 50 + ip -netns $NS1 addr add $H1_IP6/64 dev veth0 nodad 51 + ip -netns $NS2 addr add $H2_IP6/64 dev veth0 nodad 52 + ip -netns $NS1 route add $RT1 via inet6 $H2_IP6 53 + ip -netns $NS2 route add $RT2 via inet6 $H1_IP6 54 + 55 + # Make sure ns2 will respond with ICMP unreachable 56 + ip netns exec $NS2 sysctl -qw net.ipv4.icmp_ratelimit=0 net.ipv4.ip_forward=1 57 + 58 + # Run the test - a ping runs in the background, and we capture ICMP responses 59 + # with tcpdump; -c 1 means it should exit on the first ping, but add a timeout 60 + # in case something goes wrong 61 + ip netns exec $NS1 ping -w 3 -i 0.5 $PINGADDR >/dev/null & 62 + ip netns exec $NS1 timeout 10 tcpdump -tpni veth0 -c 1 'icmp and icmp[icmptype] != icmp-echo' > $TMPFILE 2>/dev/null 63 + 64 + # Parse response and check for dummy address 65 + # tcpdump output looks like: 66 + # IP 192.0.0.8 > 172.16.0.1: ICMP net 172.16.1.1 unreachable, length 92 67 + RESP_IP=$(awk '{print $2}' < $TMPFILE) 68 + if [[ "$RESP_IP" != "192.0.0.8" ]]; then 69 + echo "FAIL - got ICMP response from $RESP_IP, should be 192.0.0.8" 70 + exit 1 71 + else 72 + echo "OK" 73 + exit 0 74 + fi

+8 -3

tools/testing/selftests/net/mptcp/mptcp_connect.sh

··· 197 197 ip -net "$ns4" route add default via 10.0.3.2 198 198 ip -net "$ns4" route add default via dead:beef:3::2 199 199 200 - # use TCP syn cookies, even if no flooding was detected. 201 - ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2 202 - 203 200 set_ethtool_flags() { 204 201 local ns="$1" 205 202 local dev="$2" ··· 732 735 if [ $ret -ne 0 ] ;then 733 736 echo "FAIL: Could not even run loopback v6 test" 2>&1 734 737 exit $ret 738 + fi 739 + 740 + # ns1<->ns2 is not subject to reordering/tc delays. Use it to test 741 + # mptcp syncookie support. 742 + if [ $sender = $ns1 ]; then 743 + ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2 744 + else 745 + ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=1 735 746 fi 736 747 737 748 run_tests "$ns2" $sender 10.0.1.2

+1 -1

tools/testing/selftests/net/udpgro_fwd.sh

··· 1 - #!/bin/sh 1 + #!/bin/bash 2 2 # SPDX-License-Identifier: GPL-2.0 3 3 4 4 readonly BASE="ns-$(mktemp -u XXXXXX)"

+3 -2

tools/testing/selftests/net/veth.sh

··· 18 18 19 19 cleanup() { 20 20 local ns 21 - local -r jobs="$(jobs -p)" 21 + local jobs 22 + readonly jobs="$(jobs -p)" 22 23 [ -n "${jobs}" ] && kill -1 ${jobs} 2>/dev/null 23 24 rm -f $STATS 24 25 ··· 109 108 110 109 if [ ! -f ../bpf/xdp_dummy.o ]; then 111 110 echo "Missing xdp_dummy helper. Build bpf selftest first" 112 - exit -1 111 + exit 1 113 112 fi 114 113 115 114 create_ns

+1 -1

tools/testing/selftests/netfilter/Makefile

··· 1 1 # SPDX-License-Identifier: GPL-2.0 2 2 # Makefile for netfilter selftests 3 3 4 - TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \ 4 + TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \ 5 5 conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \ 6 6 nft_concat_range.sh nft_conntrack_helper.sh \ 7 7 nft_queue.sh nft_meta.sh nf_nat_edemux.sh \

+221

tools/testing/selftests/netfilter/nft_fib.sh

··· 1 + #!/bin/bash 2 + # 3 + # This tests the fib expression. 4 + # 5 + # Kselftest framework requirement - SKIP code is 4. 6 + ksft_skip=4 7 + ret=0 8 + 9 + sfx=$(mktemp -u "XXXXXXXX") 10 + ns1="ns1-$sfx" 11 + ns2="ns2-$sfx" 12 + nsrouter="nsrouter-$sfx" 13 + timeout=4 14 + 15 + log_netns=$(sysctl -n net.netfilter.nf_log_all_netns) 16 + 17 + cleanup() 18 + { 19 + ip netns del ${ns1} 20 + ip netns del ${ns2} 21 + ip netns del ${nsrouter} 22 + 23 + [ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns 24 + } 25 + 26 + nft --version > /dev/null 2>&1 27 + if [ $? -ne 0 ];then 28 + echo "SKIP: Could not run test without nft tool" 29 + exit $ksft_skip 30 + fi 31 + 32 + ip -Version > /dev/null 2>&1 33 + if [ $? -ne 0 ];then 34 + echo "SKIP: Could not run test without ip tool" 35 + exit $ksft_skip 36 + fi 37 + 38 + ip netns add ${nsrouter} 39 + if [ $? -ne 0 ];then 40 + echo "SKIP: Could not create net namespace" 41 + exit $ksft_skip 42 + fi 43 + 44 + trap cleanup EXIT 45 + 46 + dmesg | grep -q ' nft_rpfilter: ' 47 + if [ $? -eq 0 ]; then 48 + dmesg -c | grep ' nft_rpfilter: ' 49 + echo "WARN: a previous test run has failed" 1>&2 50 + fi 51 + 52 + sysctl -q net.netfilter.nf_log_all_netns=1 53 + ip netns add ${ns1} 54 + ip netns add ${ns2} 55 + 56 + load_ruleset() { 57 + local netns=$1 58 + 59 + ip netns exec ${netns} nft -f /dev/stdin <<EOF 60 + table inet filter { 61 + chain prerouting { 62 + type filter hook prerouting priority 0; policy accept; 63 + fib saddr . iif oif missing counter log prefix "$netns nft_rpfilter: " drop 64 + } 65 + } 66 + EOF 67 + } 68 + 69 + load_ruleset_count() { 70 + local netns=$1 71 + 72 + ip netns exec ${netns} nft -f /dev/stdin <<EOF 73 + table inet filter { 74 + chain prerouting { 75 + type filter hook prerouting priority 0; policy accept; 76 + ip daddr 1.1.1.1 fib saddr . iif oif missing counter drop 77 + ip6 daddr 1c3::c01d fib saddr . iif oif missing counter drop 78 + } 79 + } 80 + EOF 81 + } 82 + 83 + check_drops() { 84 + dmesg | grep -q ' nft_rpfilter: ' 85 + if [ $? -eq 0 ]; then 86 + dmesg | grep ' nft_rpfilter: ' 87 + echo "FAIL: rpfilter did drop packets" 88 + return 1 89 + fi 90 + 91 + return 0 92 + } 93 + 94 + check_fib_counter() { 95 + local want=$1 96 + local ns=$2 97 + local address=$3 98 + 99 + line=$(ip netns exec ${ns} nft list table inet filter | grep 'fib saddr . iif' | grep $address | grep "packets $want" ) 100 + ret=$? 101 + 102 + if [ $ret -ne 0 ];then 103 + echo "Netns $ns fib counter doesn't match expected packet count of $want for $address" 1>&2 104 + ip netns exec ${ns} nft list table inet filter 105 + return 1 106 + fi 107 + 108 + if [ $want -gt 0 ]; then 109 + echo "PASS: fib expression did drop packets for $address" 110 + fi 111 + 112 + return 0 113 + } 114 + 115 + load_ruleset ${nsrouter} 116 + load_ruleset ${ns1} 117 + load_ruleset ${ns2} 118 + 119 + ip link add veth0 netns ${nsrouter} type veth peer name eth0 netns ${ns1} > /dev/null 2>&1 120 + if [ $? -ne 0 ];then 121 + echo "SKIP: No virtual ethernet pair device support in kernel" 122 + exit $ksft_skip 123 + fi 124 + ip link add veth1 netns ${nsrouter} type veth peer name eth0 netns ${ns2} 125 + 126 + ip -net ${nsrouter} link set lo up 127 + ip -net ${nsrouter} link set veth0 up 128 + ip -net ${nsrouter} addr add 10.0.1.1/24 dev veth0 129 + ip -net ${nsrouter} addr add dead:1::1/64 dev veth0 130 + 131 + ip -net ${nsrouter} link set veth1 up 132 + ip -net ${nsrouter} addr add 10.0.2.1/24 dev veth1 133 + ip -net ${nsrouter} addr add dead:2::1/64 dev veth1 134 + 135 + ip -net ${ns1} link set lo up 136 + ip -net ${ns1} link set eth0 up 137 + 138 + ip -net ${ns2} link set lo up 139 + ip -net ${ns2} link set eth0 up 140 + 141 + ip -net ${ns1} addr add 10.0.1.99/24 dev eth0 142 + ip -net ${ns1} addr add dead:1::99/64 dev eth0 143 + ip -net ${ns1} route add default via 10.0.1.1 144 + ip -net ${ns1} route add default via dead:1::1 145 + 146 + ip -net ${ns2} addr add 10.0.2.99/24 dev eth0 147 + ip -net ${ns2} addr add dead:2::99/64 dev eth0 148 + ip -net ${ns2} route add default via 10.0.2.1 149 + ip -net ${ns2} route add default via dead:2::1 150 + 151 + test_ping() { 152 + local daddr4=$1 153 + local daddr6=$2 154 + 155 + ip netns exec ${ns1} ping -c 1 -q $daddr4 > /dev/null 156 + ret=$? 157 + if [ $ret -ne 0 ];then 158 + check_drops 159 + echo "FAIL: ${ns1} cannot reach $daddr4, ret $ret" 1>&2 160 + return 1 161 + fi 162 + 163 + ip netns exec ${ns1} ping -c 3 -q $daddr6 > /dev/null 164 + ret=$? 165 + if [ $ret -ne 0 ];then 166 + check_drops 167 + echo "FAIL: ${ns1} cannot reach $daddr6, ret $ret" 1>&2 168 + return 1 169 + fi 170 + 171 + return 0 172 + } 173 + 174 + ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null 175 + ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null 176 + ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null 177 + 178 + sleep 3 179 + 180 + test_ping 10.0.2.1 dead:2::1 || exit 1 181 + check_drops || exit 1 182 + 183 + test_ping 10.0.2.99 dead:2::99 || exit 1 184 + check_drops || exit 1 185 + 186 + echo "PASS: fib expression did not cause unwanted packet drops" 187 + 188 + ip netns exec ${nsrouter} nft flush table inet filter 189 + 190 + ip -net ${ns1} route del default 191 + ip -net ${ns1} -6 route del default 192 + 193 + ip -net ${ns1} addr del 10.0.1.99/24 dev eth0 194 + ip -net ${ns1} addr del dead:1::99/64 dev eth0 195 + 196 + ip -net ${ns1} addr add 10.0.2.99/24 dev eth0 197 + ip -net ${ns1} addr add dead:2::99/64 dev eth0 198 + 199 + ip -net ${ns1} route add default via 10.0.2.1 200 + ip -net ${ns1} -6 route add default via dead:2::1 201 + 202 + ip -net ${nsrouter} addr add dead:2::1/64 dev veth0 203 + 204 + # switch to ruleset that doesn't log, this time 205 + # its expected that this does drop the packets. 206 + load_ruleset_count ${nsrouter} 207 + 208 + # ns1 has a default route, but nsrouter does not. 209 + # must not check return value, ping to 1.1.1.1 will 210 + # fail. 211 + check_fib_counter 0 ${nsrouter} 1.1.1.1 || exit 1 212 + check_fib_counter 0 ${nsrouter} 1c3::c01d || exit 1 213 + 214 + ip netns exec ${ns1} ping -c 1 -W 1 -q 1.1.1.1 > /dev/null 215 + check_fib_counter 1 ${nsrouter} 1.1.1.1 || exit 1 216 + 217 + sleep 2 218 + ip netns exec ${ns1} ping -c 3 -q 1c3::c01d > /dev/null 219 + check_fib_counter 3 ${nsrouter} 1c3::c01d || exit 1 220 + 221 + exit 0