Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

-2

MAINTAINERS

··· 324 324 F: Documentation/ABI/testing/configfs-acpi 325 325 F: drivers/pci/*acpi* 326 326 F: drivers/pci/*/*acpi* 327 - F: drivers/pci/*/*/*acpi* 328 327 F: tools/power/acpi/ 329 328 330 329 ACPI APEI ··· 8607 8608 F: arch/*/include/asm/spinlock*.h 8608 8609 F: include/linux/rwlock*.h 8609 8610 F: include/linux/mutex*.h 8610 - F: arch/*/include/asm/mutex*.h 8611 8611 F: include/linux/rwsem*.h 8612 8612 F: arch/*/include/asm/rwsem.h 8613 8613 F: include/linux/seqlock.h

+1 -1

arch/arm/mm/ioremap.c

··· 473 473 474 474 int pci_ioremap_io(unsigned int offset, phys_addr_t phys_addr) 475 475 { 476 - BUG_ON(offset + SZ_64K > IO_SPACE_LIMIT); 476 + BUG_ON(offset + SZ_64K - 1 > IO_SPACE_LIMIT); 477 477 478 478 return ioremap_page_range(PCI_IO_VIRT_BASE + offset, 479 479 PCI_IO_VIRT_BASE + offset + SZ_64K,

+1

arch/arm/tools/syscall.tbl

··· 413 413 396 common pkey_free sys_pkey_free 414 414 397 common statx sys_statx 415 415 398 common rseq sys_rseq 416 + 399 common io_pgetevents sys_io_pgetevents

+10

arch/powerpc/kvm/book3s_64_mmu_radix.c

··· 646 646 */ 647 647 local_irq_disable(); 648 648 ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift); 649 + /* 650 + * If the PTE disappeared temporarily due to a THP 651 + * collapse, just return and let the guest try again. 652 + */ 653 + if (!ptep) { 654 + local_irq_enable(); 655 + if (page) 656 + put_page(page); 657 + return RESUME_GUEST; 658 + } 649 659 pte = *ptep; 650 660 local_irq_enable(); 651 661

+1 -1

arch/riscv/kernel/setup.c

··· 186 186 BUG_ON(mem_size == 0); 187 187 188 188 set_max_mapnr(PFN_DOWN(mem_size)); 189 - max_low_pfn = pfn_base + PFN_DOWN(mem_size); 189 + max_low_pfn = memblock_end_of_DRAM(); 190 190 191 191 #ifdef CONFIG_BLK_DEV_INITRD 192 192 setup_initrd();

+14 -2

arch/x86/entry/vdso/Makefile

··· 68 68 CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \ 69 69 $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \ 70 70 -fno-omit-frame-pointer -foptimize-sibling-calls \ 71 - -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO $(RETPOLINE_VDSO_CFLAGS) 71 + -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO 72 + 73 + ifdef CONFIG_RETPOLINE 74 + ifneq ($(RETPOLINE_VDSO_CFLAGS),) 75 + CFL += $(RETPOLINE_VDSO_CFLAGS) 76 + endif 77 + endif 72 78 73 79 $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL) 74 80 ··· 144 138 KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls) 145 139 KBUILD_CFLAGS_32 += -fno-omit-frame-pointer 146 140 KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING 147 - KBUILD_CFLAGS_32 += $(RETPOLINE_VDSO_CFLAGS) 141 + 142 + ifdef CONFIG_RETPOLINE 143 + ifneq ($(RETPOLINE_VDSO_CFLAGS),) 144 + KBUILD_CFLAGS_32 += $(RETPOLINE_VDSO_CFLAGS) 145 + endif 146 + endif 147 + 148 148 $(obj)/vdso32.so.dbg: KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) 149 149 150 150 $(obj)/vdso32.so.dbg: FORCE \

+14 -12

arch/x86/entry/vdso/vclock_gettime.c

··· 43 43 notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) 44 44 { 45 45 long ret; 46 - asm("syscall" : "=a" (ret) : 47 - "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory"); 46 + asm ("syscall" : "=a" (ret), "=m" (*ts) : 47 + "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : 48 + "memory", "rcx", "r11"); 48 49 return ret; 49 50 } 50 51 ··· 53 52 { 54 53 long ret; 55 54 56 - asm("syscall" : "=a" (ret) : 57 - "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); 55 + asm ("syscall" : "=a" (ret), "=m" (*tv), "=m" (*tz) : 56 + "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : 57 + "memory", "rcx", "r11"); 58 58 return ret; 59 59 } 60 60 ··· 66 64 { 67 65 long ret; 68 66 69 - asm( 67 + asm ( 70 68 "mov %%ebx, %%edx \n" 71 - "mov %2, %%ebx \n" 69 + "mov %[clock], %%ebx \n" 72 70 "call __kernel_vsyscall \n" 73 71 "mov %%edx, %%ebx \n" 74 - : "=a" (ret) 75 - : "0" (__NR_clock_gettime), "g" (clock), "c" (ts) 72 + : "=a" (ret), "=m" (*ts) 73 + : "0" (__NR_clock_gettime), [clock] "g" (clock), "c" (ts) 76 74 : "memory", "edx"); 77 75 return ret; 78 76 } ··· 81 79 { 82 80 long ret; 83 81 84 - asm( 82 + asm ( 85 83 "mov %%ebx, %%edx \n" 86 - "mov %2, %%ebx \n" 84 + "mov %[tv], %%ebx \n" 87 85 "call __kernel_vsyscall \n" 88 86 "mov %%edx, %%ebx \n" 89 - : "=a" (ret) 90 - : "0" (__NR_gettimeofday), "g" (tv), "c" (tz) 87 + : "=a" (ret), "=m" (*tv), "=m" (*tz) 88 + : "0" (__NR_gettimeofday), [tv] "g" (tv), "c" (tz) 91 89 : "memory", "edx"); 92 90 return ret; 93 91 }

+10

arch/x86/events/amd/uncore.c

··· 36 36 37 37 static int num_counters_llc; 38 38 static int num_counters_nb; 39 + static bool l3_mask; 39 40 40 41 static HLIST_HEAD(uncore_unused_list); 41 42 ··· 209 208 /* and we do not enable counter overflow interrupts */ 210 209 hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB; 211 210 hwc->idx = -1; 211 + 212 + /* 213 + * SliceMask and ThreadMask need to be set for certain L3 events in 214 + * Family 17h. For other events, the two fields do not affect the count. 215 + */ 216 + if (l3_mask) 217 + hwc->config |= (AMD64_L3_SLICE_MASK | AMD64_L3_THREAD_MASK); 212 218 213 219 if (event->cpu < 0) 214 220 return -EINVAL; ··· 533 525 amd_llc_pmu.name = "amd_l3"; 534 526 format_attr_event_df.show = &event_show_df; 535 527 format_attr_event_l3.show = &event_show_l3; 528 + l3_mask = true; 536 529 } else { 537 530 num_counters_nb = NUM_COUNTERS_NB; 538 531 num_counters_llc = NUM_COUNTERS_L2; ··· 541 532 amd_llc_pmu.name = "amd_l2"; 542 533 format_attr_event_df = format_attr_event; 543 534 format_attr_event_l3 = format_attr_event; 535 + l3_mask = false; 544 536 } 545 537 546 538 amd_nb_pmu.attr_groups = amd_uncore_attr_groups_df;

+7 -7

arch/x86/events/intel/uncore_snbep.c

··· 3061 3061 3062 3062 void bdx_uncore_cpu_init(void) 3063 3063 { 3064 - int pkg = topology_phys_to_logical_pkg(0); 3064 + int pkg = topology_phys_to_logical_pkg(boot_cpu_data.phys_proc_id); 3065 3065 3066 3066 if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) 3067 3067 bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; ··· 3931 3931 .driver_data = UNCORE_PCI_DEV_FULL_DATA(21, 5, SKX_PCI_UNCORE_M2PCIE, 3), 3932 3932 }, 3933 3933 { /* M3UPI0 Link 0 */ 3934 - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204C), 3935 - .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 0, SKX_PCI_UNCORE_M3UPI, 0), 3934 + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D), 3935 + .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 1, SKX_PCI_UNCORE_M3UPI, 0), 3936 3936 }, 3937 3937 { /* M3UPI0 Link 1 */ 3938 - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D), 3939 - .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 1, SKX_PCI_UNCORE_M3UPI, 1), 3938 + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204E), 3939 + .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 2, SKX_PCI_UNCORE_M3UPI, 1), 3940 3940 }, 3941 3941 { /* M3UPI1 Link 2 */ 3942 - PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204C), 3943 - .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 4, SKX_PCI_UNCORE_M3UPI, 2), 3942 + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x204D), 3943 + .driver_data = UNCORE_PCI_DEV_FULL_DATA(18, 5, SKX_PCI_UNCORE_M3UPI, 2), 3944 3944 }, 3945 3945 { /* end: all zeroes */ } 3946 3946 };

+8

arch/x86/include/asm/perf_event.h

··· 46 46 #define INTEL_ARCH_EVENT_MASK \ 47 47 (ARCH_PERFMON_EVENTSEL_UMASK | ARCH_PERFMON_EVENTSEL_EVENT) 48 48 49 + #define AMD64_L3_SLICE_SHIFT 48 50 + #define AMD64_L3_SLICE_MASK \ 51 + ((0xFULL) << AMD64_L3_SLICE_SHIFT) 52 + 53 + #define AMD64_L3_THREAD_SHIFT 56 54 + #define AMD64_L3_THREAD_MASK \ 55 + ((0xFFULL) << AMD64_L3_THREAD_SHIFT) 56 + 49 57 #define X86_RAW_EVENT_MASK \ 50 58 (ARCH_PERFMON_EVENTSEL_EVENT | \ 51 59 ARCH_PERFMON_EVENTSEL_UMASK | \

+6

arch/x86/include/asm/uv/uv.h

··· 10 10 struct mm_struct; 11 11 12 12 #ifdef CONFIG_X86_UV 13 + #include <linux/efi.h> 13 14 14 15 extern enum uv_system_type get_uv_system_type(void); 16 + static inline bool is_early_uv_system(void) 17 + { 18 + return !((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || !efi.uv_systab); 19 + } 15 20 extern int is_uv_system(void); 16 21 extern int is_uv_hubless(void); 17 22 extern void uv_cpu_init(void); ··· 28 23 #else /* X86_UV */ 29 24 30 25 static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; } 26 + static inline bool is_early_uv_system(void) { return 0; } 31 27 static inline int is_uv_system(void) { return 0; } 32 28 static inline int is_uv_hubless(void) { return 0; } 33 29 static inline void uv_cpu_init(void) { }

+1 -1

arch/x86/kernel/cpu/amd.c

··· 922 922 static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) 923 923 { 924 924 /* AMD errata T13 (order #21922) */ 925 - if ((c->x86 == 6)) { 925 + if (c->x86 == 6) { 926 926 /* Duron Rev A0 */ 927 927 if (c->x86_model == 3 && c->x86_stepping == 0) 928 928 size = 64;

+4

arch/x86/kernel/tsc.c

··· 26 26 #include <asm/apic.h> 27 27 #include <asm/intel-family.h> 28 28 #include <asm/i8259.h> 29 + #include <asm/uv/uv.h> 29 30 30 31 unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ 31 32 EXPORT_SYMBOL(cpu_khz); ··· 1433 1432 void __init tsc_early_init(void) 1434 1433 { 1435 1434 if (!boot_cpu_has(X86_FEATURE_TSC)) 1435 + return; 1436 + /* Don't change UV TSC multi-chassis synchronization */ 1437 + if (is_early_uv_system()) 1436 1438 return; 1437 1439 if (!determine_cpu_tsc_frequencies(true)) 1438 1440 return;

+20 -4

arch/x86/kvm/mmu.c

··· 249 249 */ 250 250 static const u64 shadow_nonpresent_or_rsvd_mask_len = 5; 251 251 252 + /* 253 + * In some cases, we need to preserve the GFN of a non-present or reserved 254 + * SPTE when we usurp the upper five bits of the physical address space to 255 + * defend against L1TF, e.g. for MMIO SPTEs. To preserve the GFN, we'll 256 + * shift bits of the GFN that overlap with shadow_nonpresent_or_rsvd_mask 257 + * left into the reserved bits, i.e. the GFN in the SPTE will be split into 258 + * high and low parts. This mask covers the lower bits of the GFN. 259 + */ 260 + static u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask; 261 + 262 + 252 263 static void mmu_spte_set(u64 *sptep, u64 spte); 253 264 static union kvm_mmu_page_role 254 265 kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu); ··· 368 357 369 358 static gfn_t get_mmio_spte_gfn(u64 spte) 370 359 { 371 - u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask | 372 - shadow_nonpresent_or_rsvd_mask; 373 - u64 gpa = spte & ~mask; 360 + u64 gpa = spte & shadow_nonpresent_or_rsvd_lower_gfn_mask; 374 361 375 362 gpa |= (spte >> shadow_nonpresent_or_rsvd_mask_len) 376 363 & shadow_nonpresent_or_rsvd_mask; ··· 432 423 433 424 static void kvm_mmu_reset_all_pte_masks(void) 434 425 { 426 + u8 low_phys_bits; 427 + 435 428 shadow_user_mask = 0; 436 429 shadow_accessed_mask = 0; 437 430 shadow_dirty_mask = 0; ··· 448 437 * appropriate mask to guard against L1TF attacks. Otherwise, it is 449 438 * assumed that the CPU is not vulnerable to L1TF. 450 439 */ 440 + low_phys_bits = boot_cpu_data.x86_phys_bits; 451 441 if (boot_cpu_data.x86_phys_bits < 452 - 52 - shadow_nonpresent_or_rsvd_mask_len) 442 + 52 - shadow_nonpresent_or_rsvd_mask_len) { 453 443 shadow_nonpresent_or_rsvd_mask = 454 444 rsvd_bits(boot_cpu_data.x86_phys_bits - 455 445 shadow_nonpresent_or_rsvd_mask_len, 456 446 boot_cpu_data.x86_phys_bits - 1); 447 + low_phys_bits -= shadow_nonpresent_or_rsvd_mask_len; 448 + } 449 + shadow_nonpresent_or_rsvd_lower_gfn_mask = 450 + GENMASK_ULL(low_phys_bits - 1, PAGE_SHIFT); 457 451 } 458 452 459 453 static int is_cpuid_PSE36(void)

+75 -60

arch/x86/kvm/vmx.c

··· 121 121 122 122 #define MSR_BITMAP_MODE_X2APIC 1 123 123 #define MSR_BITMAP_MODE_X2APIC_APICV 2 124 - #define MSR_BITMAP_MODE_LM 4 125 124 126 125 #define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL 127 126 ··· 856 857 857 858 /* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */ 858 859 u64 vmcs01_debugctl; 860 + u64 vmcs01_guest_bndcfgs; 859 861 860 862 u16 vpid02; 861 863 u16 last_vpid; ··· 2899 2899 vmx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE); 2900 2900 } 2901 2901 2902 - if (is_long_mode(&vmx->vcpu)) 2903 - wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); 2902 + wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); 2904 2903 #else 2905 2904 savesegment(fs, fs_sel); 2906 2905 savesegment(gs, gs_sel); ··· 2950 2951 vmx->loaded_cpu_state = NULL; 2951 2952 2952 2953 #ifdef CONFIG_X86_64 2953 - if (is_long_mode(&vmx->vcpu)) 2954 - rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); 2954 + rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); 2955 2955 #endif 2956 2956 if (host_state->ldt_sel || (host_state->gs_sel & 7)) { 2957 2957 kvm_load_ldt(host_state->ldt_sel); ··· 2978 2980 #ifdef CONFIG_X86_64 2979 2981 static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx) 2980 2982 { 2981 - if (is_long_mode(&vmx->vcpu)) { 2982 - preempt_disable(); 2983 - if (vmx->loaded_cpu_state) 2984 - rdmsrl(MSR_KERNEL_GS_BASE, 2985 - vmx->msr_guest_kernel_gs_base); 2986 - preempt_enable(); 2987 - } 2983 + preempt_disable(); 2984 + if (vmx->loaded_cpu_state) 2985 + rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); 2986 + preempt_enable(); 2988 2987 return vmx->msr_guest_kernel_gs_base; 2989 2988 } 2990 2989 2991 2990 static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data) 2992 2991 { 2993 - if (is_long_mode(&vmx->vcpu)) { 2994 - preempt_disable(); 2995 - if (vmx->loaded_cpu_state) 2996 - wrmsrl(MSR_KERNEL_GS_BASE, data); 2997 - preempt_enable(); 2998 - } 2992 + preempt_disable(); 2993 + if (vmx->loaded_cpu_state) 2994 + wrmsrl(MSR_KERNEL_GS_BASE, data); 2995 + preempt_enable(); 2999 2996 vmx->msr_guest_kernel_gs_base = data; 3000 2997 } 3001 2998 #endif ··· 3526 3533 VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER | 3527 3534 VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT; 3528 3535 3529 - if (kvm_mpx_supported()) 3530 - msrs->exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; 3531 - 3532 3536 /* We support free control of debug control saving. */ 3533 3537 msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS; 3534 3538 ··· 3542 3552 VM_ENTRY_LOAD_IA32_PAT; 3543 3553 msrs->entry_ctls_high |= 3544 3554 (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER); 3545 - if (kvm_mpx_supported()) 3546 - msrs->entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS; 3547 3555 3548 3556 /* We support free control of debug control loading. */ 3549 3557 msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS; ··· 3589 3601 msrs->secondary_ctls_high); 3590 3602 msrs->secondary_ctls_low = 0; 3591 3603 msrs->secondary_ctls_high &= 3592 - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | 3593 3604 SECONDARY_EXEC_DESC | 3594 3605 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | 3595 3606 SECONDARY_EXEC_APIC_REGISTER_VIRT | 3596 3607 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | 3597 3608 SECONDARY_EXEC_WBINVD_EXITING; 3609 + 3598 3610 /* 3599 3611 * We can emulate "VMCS shadowing," even if the hardware 3600 3612 * doesn't support it. ··· 3650 3662 if (enable_unrestricted_guest) 3651 3663 msrs->secondary_ctls_high |= 3652 3664 SECONDARY_EXEC_UNRESTRICTED_GUEST; 3665 + 3666 + if (flexpriority_enabled) 3667 + msrs->secondary_ctls_high |= 3668 + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; 3653 3669 3654 3670 /* miscellaneous data */ 3655 3671 rdmsr(MSR_IA32_VMX_MISC, ··· 5065 5073 if (!msr) 5066 5074 return; 5067 5075 5068 - /* 5069 - * MSR_KERNEL_GS_BASE is not intercepted when the guest is in 5070 - * 64-bit mode as a 64-bit kernel may frequently access the 5071 - * MSR. This means we need to manually save/restore the MSR 5072 - * when switching between guest and host state, but only if 5073 - * the guest is in 64-bit mode. Sync our cached value if the 5074 - * guest is transitioning to 32-bit mode and the CPU contains 5075 - * guest state, i.e. the cache is stale. 5076 - */ 5077 - #ifdef CONFIG_X86_64 5078 - if (!(efer & EFER_LMA)) 5079 - (void)vmx_read_guest_kernel_gs_base(vmx); 5080 - #endif 5081 5076 vcpu->arch.efer = efer; 5082 5077 if (efer & EFER_LMA) { 5083 5078 vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); ··· 6057 6078 mode |= MSR_BITMAP_MODE_X2APIC_APICV; 6058 6079 } 6059 6080 6060 - if (is_long_mode(vcpu)) 6061 - mode |= MSR_BITMAP_MODE_LM; 6062 - 6063 6081 return mode; 6064 6082 } 6065 6083 ··· 6096 6120 6097 6121 if (!changed) 6098 6122 return; 6099 - 6100 - vmx_set_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW, 6101 - !(mode & MSR_BITMAP_MODE_LM)); 6102 6123 6103 6124 if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV)) 6104 6125 vmx_update_msr_bitmap_x2apic(msr_bitmap, mode); ··· 6162 6189 nested_mark_vmcs12_pages_dirty(vcpu); 6163 6190 } 6164 6191 6192 + static u8 vmx_get_rvi(void) 6193 + { 6194 + return vmcs_read16(GUEST_INTR_STATUS) & 0xff; 6195 + } 6196 + 6165 6197 static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu) 6166 6198 { 6167 6199 struct vcpu_vmx *vmx = to_vmx(vcpu); ··· 6179 6201 WARN_ON_ONCE(!vmx->nested.virtual_apic_page)) 6180 6202 return false; 6181 6203 6182 - rvi = vmcs_read16(GUEST_INTR_STATUS) & 0xff; 6204 + rvi = vmx_get_rvi(); 6183 6205 6184 6206 vapic_page = kmap(vmx->nested.virtual_apic_page); 6185 6207 vppr = *((u32 *)(vapic_page + APIC_PROCPRI)); ··· 10223 10245 if (!lapic_in_kernel(vcpu)) 10224 10246 return; 10225 10247 10248 + if (!flexpriority_enabled && 10249 + !cpu_has_vmx_virtualize_x2apic_mode()) 10250 + return; 10251 + 10226 10252 /* Postpone execution until vmcs01 is the current VMCS. */ 10227 10253 if (is_guest_mode(vcpu)) { 10228 10254 to_vmx(vcpu)->nested.change_vmcs01_virtual_apic_mode = true; 10229 10255 return; 10230 10256 } 10231 - 10232 - if (!cpu_need_tpr_shadow(vcpu)) 10233 - return; 10234 10257 10235 10258 sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); 10236 10259 sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | ··· 10352 10373 } 10353 10374 vmx_hwapic_irr_update(vcpu, max_irr); 10354 10375 return max_irr; 10376 + } 10377 + 10378 + static u8 vmx_has_apicv_interrupt(struct kvm_vcpu *vcpu) 10379 + { 10380 + u8 rvi = vmx_get_rvi(); 10381 + u8 vppr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_PROCPRI); 10382 + 10383 + return ((rvi & 0xf0) > (vppr & 0xf0)); 10355 10384 } 10356 10385 10357 10386 static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) ··· 11251 11264 #undef cr4_fixed1_update 11252 11265 } 11253 11266 11267 + static void nested_vmx_entry_exit_ctls_update(struct kvm_vcpu *vcpu) 11268 + { 11269 + struct vcpu_vmx *vmx = to_vmx(vcpu); 11270 + 11271 + if (kvm_mpx_supported()) { 11272 + bool mpx_enabled = guest_cpuid_has(vcpu, X86_FEATURE_MPX); 11273 + 11274 + if (mpx_enabled) { 11275 + vmx->nested.msrs.entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS; 11276 + vmx->nested.msrs.exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; 11277 + } else { 11278 + vmx->nested.msrs.entry_ctls_high &= ~VM_ENTRY_LOAD_BNDCFGS; 11279 + vmx->nested.msrs.exit_ctls_high &= ~VM_EXIT_CLEAR_BNDCFGS; 11280 + } 11281 + } 11282 + } 11283 + 11254 11284 static void vmx_cpuid_update(struct kvm_vcpu *vcpu) 11255 11285 { 11256 11286 struct vcpu_vmx *vmx = to_vmx(vcpu); ··· 11284 11280 to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &= 11285 11281 ~FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; 11286 11282 11287 - if (nested_vmx_allowed(vcpu)) 11283 + if (nested_vmx_allowed(vcpu)) { 11288 11284 nested_vmx_cr_fixed1_bits_update(vcpu); 11285 + nested_vmx_entry_exit_ctls_update(vcpu); 11286 + } 11289 11287 } 11290 11288 11291 11289 static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) ··· 12055 12049 12056 12050 set_cr4_guest_host_mask(vmx); 12057 12051 12058 - if (vmx_mpx_supported()) 12059 - vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs); 12052 + if (kvm_mpx_supported()) { 12053 + if (vmx->nested.nested_run_pending && 12054 + (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)) 12055 + vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs); 12056 + else 12057 + vmcs_write64(GUEST_BNDCFGS, vmx->nested.vmcs01_guest_bndcfgs); 12058 + } 12060 12059 12061 12060 if (enable_vpid) { 12062 12061 if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02) ··· 12606 12595 struct vmcs12 *vmcs12 = get_vmcs12(vcpu); 12607 12596 bool from_vmentry = !!exit_qual; 12608 12597 u32 dummy_exit_qual; 12609 - u32 vmcs01_cpu_exec_ctrl; 12598 + bool evaluate_pending_interrupts; 12610 12599 int r = 0; 12611 12600 12612 - vmcs01_cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); 12601 + evaluate_pending_interrupts = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) & 12602 + (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING); 12603 + if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu)) 12604 + evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu); 12613 12605 12614 12606 enter_guest_mode(vcpu); 12615 12607 12616 12608 if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) 12617 12609 vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); 12610 + if (kvm_mpx_supported() && 12611 + !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)) 12612 + vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); 12618 12613 12619 12614 vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02); 12620 12615 vmx_segment_cache_clear(vmx); ··· 12660 12643 * to L1 or delivered directly to L2 (e.g. In case L1 don't 12661 12644 * intercept EXTERNAL_INTERRUPT). 12662 12645 * 12663 - * Usually this would be handled by L0 requesting a 12664 - * IRQ/NMI window by setting VMCS accordingly. However, 12665 - * this setting was done on VMCS01 and now VMCS02 is active 12666 - * instead. Thus, we force L0 to perform pending event 12667 - * evaluation by requesting a KVM_REQ_EVENT. 12646 + * Usually this would be handled by the processor noticing an 12647 + * IRQ/NMI window request, or checking RVI during evaluation of 12648 + * pending virtual interrupts. However, this setting was done 12649 + * on VMCS01 and now VMCS02 is active instead. Thus, we force L0 12650 + * to perform pending event evaluation by requesting a KVM_REQ_EVENT. 12668 12651 */ 12669 - if (vmcs01_cpu_exec_ctrl & 12670 - (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING)) { 12652 + if (unlikely(evaluate_pending_interrupts)) 12671 12653 kvm_make_request(KVM_REQ_EVENT, vcpu); 12672 - } 12673 12654 12674 12655 /* 12675 12656 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point

+1 -1

arch/x86/kvm/x86.c

··· 4698 4698 */ 4699 4699 switch (msrs_to_save[i]) { 4700 4700 case MSR_IA32_BNDCFGS: 4701 - if (!kvm_x86_ops->mpx_supported()) 4701 + if (!kvm_mpx_supported()) 4702 4702 continue; 4703 4703 break; 4704 4704 case MSR_TSC_AUX:

+4 -1

drivers/base/power/main.c

··· 1713 1713 1714 1714 dpm_wait_for_subordinate(dev, async); 1715 1715 1716 - if (async_error) 1716 + if (async_error) { 1717 + dev->power.direct_complete = false; 1717 1718 goto Complete; 1719 + } 1718 1720 1719 1721 /* 1720 1722 * If a device configured to wake up the system from sleep states ··· 1728 1726 pm_wakeup_event(dev, 0); 1729 1727 1730 1728 if (pm_wakeup_pending()) { 1729 + dev->power.direct_complete = false; 1731 1730 async_error = -EBUSY; 1732 1731 goto Complete; 1733 1732 }

+4 -4

drivers/crypto/caam/caamalg.c

··· 1553 1553 edesc->src_nents = src_nents; 1554 1554 edesc->dst_nents = dst_nents; 1555 1555 edesc->sec4_sg_bytes = sec4_sg_bytes; 1556 - edesc->sec4_sg = (void *)edesc + sizeof(struct ablkcipher_edesc) + 1557 - desc_bytes; 1556 + edesc->sec4_sg = (struct sec4_sg_entry *)((u8 *)edesc->hw_desc + 1557 + desc_bytes); 1558 1558 edesc->iv_dir = DMA_TO_DEVICE; 1559 1559 1560 1560 /* Make sure IV is located in a DMAable area */ ··· 1757 1757 edesc->src_nents = src_nents; 1758 1758 edesc->dst_nents = dst_nents; 1759 1759 edesc->sec4_sg_bytes = sec4_sg_bytes; 1760 - edesc->sec4_sg = (void *)edesc + sizeof(struct ablkcipher_edesc) + 1761 - desc_bytes; 1760 + edesc->sec4_sg = (struct sec4_sg_entry *)((u8 *)edesc->hw_desc + 1761 + desc_bytes); 1762 1762 edesc->iv_dir = DMA_FROM_DEVICE; 1763 1763 1764 1764 /* Make sure IV is located in a DMAable area */

+22 -10

drivers/crypto/chelsio/chcr_algo.c

··· 367 367 walk->to = (struct phys_sge_pairs *)(dsgl + 1); 368 368 } 369 369 370 - static inline void dsgl_walk_end(struct dsgl_walk *walk, unsigned short qid) 370 + static inline void dsgl_walk_end(struct dsgl_walk *walk, unsigned short qid, 371 + int pci_chan_id) 371 372 { 372 373 struct cpl_rx_phys_dsgl *phys_cpl; 373 374 ··· 386 385 phys_cpl->rss_hdr_int.opcode = CPL_RX_PHYS_ADDR; 387 386 phys_cpl->rss_hdr_int.qid = htons(qid); 388 387 phys_cpl->rss_hdr_int.hash_val = 0; 388 + phys_cpl->rss_hdr_int.channel = pci_chan_id; 389 389 } 390 390 391 391 static inline void dsgl_walk_add_page(struct dsgl_walk *walk, ··· 720 718 FILL_WR_RX_Q_ID(ctx->dev->rx_channel_id, qid, 721 719 !!lcb, ctx->tx_qidx); 722 720 723 - chcr_req->ulptx.cmd_dest = FILL_ULPTX_CMD_DEST(ctx->dev->tx_channel_id, 721 + chcr_req->ulptx.cmd_dest = FILL_ULPTX_CMD_DEST(ctx->tx_chan_id, 724 722 qid); 725 723 chcr_req->ulptx.len = htonl((DIV_ROUND_UP(len16, 16) - 726 724 ((sizeof(chcr_req->wreq)) >> 4))); ··· 1341 1339 adap->vres.ncrypto_fc); 1342 1340 rxq_perchan = u_ctx->lldi.nrxq / u_ctx->lldi.nchan; 1343 1341 txq_perchan = ntxq / u_ctx->lldi.nchan; 1344 - rxq_idx = ctx->dev->tx_channel_id * rxq_perchan; 1345 - rxq_idx += id % rxq_perchan; 1346 - txq_idx = ctx->dev->tx_channel_id * txq_perchan; 1347 - txq_idx += id % txq_perchan; 1348 1342 spin_lock(&ctx->dev->lock_chcr_dev); 1349 - ctx->rx_qidx = rxq_idx; 1350 - ctx->tx_qidx = txq_idx; 1343 + ctx->tx_chan_id = ctx->dev->tx_channel_id; 1351 1344 ctx->dev->tx_channel_id = !ctx->dev->tx_channel_id; 1352 1345 ctx->dev->rx_channel_id = 0; 1353 1346 spin_unlock(&ctx->dev->lock_chcr_dev); 1347 + rxq_idx = ctx->tx_chan_id * rxq_perchan; 1348 + rxq_idx += id % rxq_perchan; 1349 + txq_idx = ctx->tx_chan_id * txq_perchan; 1350 + txq_idx += id % txq_perchan; 1351 + ctx->rx_qidx = rxq_idx; 1352 + ctx->tx_qidx = txq_idx; 1353 + /* Channel Id used by SGE to forward packet to Host. 1354 + * Same value should be used in cpl_fw6_pld RSS_CH field 1355 + * by FW. Driver programs PCI channel ID to be used in fw 1356 + * at the time of queue allocation with value "pi->tx_chan" 1357 + */ 1358 + ctx->pci_chan_id = txq_idx / txq_perchan; 1354 1359 } 1355 1360 out: 1356 1361 return err; ··· 2512 2503 struct crypto_aead *tfm = crypto_aead_reqtfm(req); 2513 2504 struct dsgl_walk dsgl_walk; 2514 2505 unsigned int authsize = crypto_aead_authsize(tfm); 2506 + struct chcr_context *ctx = a_ctx(tfm); 2515 2507 u32 temp; 2516 2508 2517 2509 dsgl_walk_init(&dsgl_walk, phys_cpl); ··· 2522 2512 dsgl_walk_add_page(&dsgl_walk, IV, &reqctx->iv_dma); 2523 2513 temp = req->cryptlen + (reqctx->op ? -authsize : authsize); 2524 2514 dsgl_walk_add_sg(&dsgl_walk, req->dst, temp, req->assoclen); 2525 - dsgl_walk_end(&dsgl_walk, qid); 2515 + dsgl_walk_end(&dsgl_walk, qid, ctx->pci_chan_id); 2526 2516 } 2527 2517 2528 2518 void chcr_add_cipher_src_ent(struct ablkcipher_request *req, ··· 2554 2544 unsigned short qid) 2555 2545 { 2556 2546 struct chcr_blkcipher_req_ctx *reqctx = ablkcipher_request_ctx(req); 2547 + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(wrparam->req); 2548 + struct chcr_context *ctx = c_ctx(tfm); 2557 2549 struct dsgl_walk dsgl_walk; 2558 2550 2559 2551 dsgl_walk_init(&dsgl_walk, phys_cpl); ··· 2564 2552 reqctx->dstsg = dsgl_walk.last_sg; 2565 2553 reqctx->dst_ofst = dsgl_walk.last_sg_len; 2566 2554 2567 - dsgl_walk_end(&dsgl_walk, qid); 2555 + dsgl_walk_end(&dsgl_walk, qid, ctx->pci_chan_id); 2568 2556 } 2569 2557 2570 2558 void chcr_add_hash_src_ent(struct ahash_request *req,

+2

drivers/crypto/chelsio/chcr_crypto.h

··· 255 255 struct chcr_dev *dev; 256 256 unsigned char tx_qidx; 257 257 unsigned char rx_qidx; 258 + unsigned char tx_chan_id; 259 + unsigned char pci_chan_id; 258 260 struct __crypto_ctx crypto_ctx[0]; 259 261 }; 260 262

+30 -23

drivers/crypto/mxs-dcp.c

··· 63 63 struct dcp_coherent_block *coh; 64 64 65 65 struct completion completion[DCP_MAX_CHANS]; 66 - struct mutex mutex[DCP_MAX_CHANS]; 66 + spinlock_t lock[DCP_MAX_CHANS]; 67 67 struct task_struct *thread[DCP_MAX_CHANS]; 68 68 struct crypto_queue queue[DCP_MAX_CHANS]; 69 69 }; ··· 349 349 350 350 int ret; 351 351 352 - do { 353 - __set_current_state(TASK_INTERRUPTIBLE); 352 + while (!kthread_should_stop()) { 353 + set_current_state(TASK_INTERRUPTIBLE); 354 354 355 - mutex_lock(&sdcp->mutex[chan]); 355 + spin_lock(&sdcp->lock[chan]); 356 356 backlog = crypto_get_backlog(&sdcp->queue[chan]); 357 357 arq = crypto_dequeue_request(&sdcp->queue[chan]); 358 - mutex_unlock(&sdcp->mutex[chan]); 358 + spin_unlock(&sdcp->lock[chan]); 359 + 360 + if (!backlog && !arq) { 361 + schedule(); 362 + continue; 363 + } 364 + 365 + set_current_state(TASK_RUNNING); 359 366 360 367 if (backlog) 361 368 backlog->complete(backlog, -EINPROGRESS); ··· 370 363 if (arq) { 371 364 ret = mxs_dcp_aes_block_crypt(arq); 372 365 arq->complete(arq, ret); 373 - continue; 374 366 } 375 - 376 - schedule(); 377 - } while (!kthread_should_stop()); 367 + } 378 368 379 369 return 0; 380 370 } ··· 413 409 rctx->ecb = ecb; 414 410 actx->chan = DCP_CHAN_CRYPTO; 415 411 416 - mutex_lock(&sdcp->mutex[actx->chan]); 412 + spin_lock(&sdcp->lock[actx->chan]); 417 413 ret = crypto_enqueue_request(&sdcp->queue[actx->chan], &req->base); 418 - mutex_unlock(&sdcp->mutex[actx->chan]); 414 + spin_unlock(&sdcp->lock[actx->chan]); 419 415 420 416 wake_up_process(sdcp->thread[actx->chan]); 421 417 ··· 644 640 struct ahash_request *req; 645 641 int ret, fini; 646 642 647 - do { 648 - __set_current_state(TASK_INTERRUPTIBLE); 643 + while (!kthread_should_stop()) { 644 + set_current_state(TASK_INTERRUPTIBLE); 649 645 650 - mutex_lock(&sdcp->mutex[chan]); 646 + spin_lock(&sdcp->lock[chan]); 651 647 backlog = crypto_get_backlog(&sdcp->queue[chan]); 652 648 arq = crypto_dequeue_request(&sdcp->queue[chan]); 653 - mutex_unlock(&sdcp->mutex[chan]); 649 + spin_unlock(&sdcp->lock[chan]); 650 + 651 + if (!backlog && !arq) { 652 + schedule(); 653 + continue; 654 + } 655 + 656 + set_current_state(TASK_RUNNING); 654 657 655 658 if (backlog) 656 659 backlog->complete(backlog, -EINPROGRESS); ··· 669 658 ret = dcp_sha_req_to_buf(arq); 670 659 fini = rctx->fini; 671 660 arq->complete(arq, ret); 672 - if (!fini) 673 - continue; 674 661 } 675 - 676 - schedule(); 677 - } while (!kthread_should_stop()); 662 + } 678 663 679 664 return 0; 680 665 } ··· 728 721 rctx->init = 1; 729 722 } 730 723 731 - mutex_lock(&sdcp->mutex[actx->chan]); 724 + spin_lock(&sdcp->lock[actx->chan]); 732 725 ret = crypto_enqueue_request(&sdcp->queue[actx->chan], &req->base); 733 - mutex_unlock(&sdcp->mutex[actx->chan]); 726 + spin_unlock(&sdcp->lock[actx->chan]); 734 727 735 728 wake_up_process(sdcp->thread[actx->chan]); 736 729 mutex_unlock(&actx->mutex); ··· 1004 997 platform_set_drvdata(pdev, sdcp); 1005 998 1006 999 for (i = 0; i < DCP_MAX_CHANS; i++) { 1007 - mutex_init(&sdcp->mutex[i]); 1000 + spin_lock_init(&sdcp->lock[i]); 1008 1001 init_completion(&sdcp->completion[i]); 1009 1002 crypto_init_queue(&sdcp->queue[i], 50); 1010 1003 }

+3 -3

drivers/crypto/qat/qat_c3xxx/adf_drv.c

··· 123 123 struct adf_hw_device_data *hw_data; 124 124 char name[ADF_DEVICE_NAME_LENGTH]; 125 125 unsigned int i, bar_nr; 126 - int ret, bar_mask; 126 + unsigned long bar_mask; 127 + int ret; 127 128 128 129 switch (ent->device) { 129 130 case ADF_C3XXX_PCI_DEVICE_ID: ··· 236 235 /* Find and map all the device's BARS */ 237 236 i = 0; 238 237 bar_mask = pci_select_bars(pdev, IORESOURCE_MEM); 239 - for_each_set_bit(bar_nr, (const unsigned long *)&bar_mask, 240 - ADF_PCI_MAX_BARS * 2) { 238 + for_each_set_bit(bar_nr, &bar_mask, ADF_PCI_MAX_BARS * 2) { 241 239 struct adf_bar *bar = &accel_pci_dev->pci_bars[i++]; 242 240 243 241 bar->base_addr = pci_resource_start(pdev, bar_nr);

+3 -3

drivers/crypto/qat/qat_c3xxxvf/adf_drv.c

··· 125 125 struct adf_hw_device_data *hw_data; 126 126 char name[ADF_DEVICE_NAME_LENGTH]; 127 127 unsigned int i, bar_nr; 128 - int ret, bar_mask; 128 + unsigned long bar_mask; 129 + int ret; 129 130 130 131 switch (ent->device) { 131 132 case ADF_C3XXXIOV_PCI_DEVICE_ID: ··· 216 215 /* Find and map all the device's BARS */ 217 216 i = 0; 218 217 bar_mask = pci_select_bars(pdev, IORESOURCE_MEM); 219 - for_each_set_bit(bar_nr, (const unsigned long *)&bar_mask, 220 - ADF_PCI_MAX_BARS * 2) { 218 + for_each_set_bit(bar_nr, &bar_mask, ADF_PCI_MAX_BARS * 2) { 221 219 struct adf_bar *bar = &accel_pci_dev->pci_bars[i++]; 222 220 223 221 bar->base_addr = pci_resource_start(pdev, bar_nr);

+3 -3

drivers/crypto/qat/qat_c62x/adf_drv.c

··· 123 123 struct adf_hw_device_data *hw_data; 124 124 char name[ADF_DEVICE_NAME_LENGTH]; 125 125 unsigned int i, bar_nr; 126 - int ret, bar_mask; 126 + unsigned long bar_mask; 127 + int ret; 127 128 128 129 switch (ent->device) { 129 130 case ADF_C62X_PCI_DEVICE_ID: ··· 236 235 /* Find and map all the device's BARS */ 237 236 i = (hw_data->fuses & ADF_DEVICE_FUSECTL_MASK) ? 1 : 0; 238 237 bar_mask = pci_select_bars(pdev, IORESOURCE_MEM); 239 - for_each_set_bit(bar_nr, (const unsigned long *)&bar_mask, 240 - ADF_PCI_MAX_BARS * 2) { 238 + for_each_set_bit(bar_nr, &bar_mask, ADF_PCI_MAX_BARS * 2) { 241 239 struct adf_bar *bar = &accel_pci_dev->pci_bars[i++]; 242 240 243 241 bar->base_addr = pci_resource_start(pdev, bar_nr);

+3 -3

drivers/crypto/qat/qat_c62xvf/adf_drv.c

··· 125 125 struct adf_hw_device_data *hw_data; 126 126 char name[ADF_DEVICE_NAME_LENGTH]; 127 127 unsigned int i, bar_nr; 128 - int ret, bar_mask; 128 + unsigned long bar_mask; 129 + int ret; 129 130 130 131 switch (ent->device) { 131 132 case ADF_C62XIOV_PCI_DEVICE_ID: ··· 216 215 /* Find and map all the device's BARS */ 217 216 i = 0; 218 217 bar_mask = pci_select_bars(pdev, IORESOURCE_MEM); 219 - for_each_set_bit(bar_nr, (const unsigned long *)&bar_mask, 220 - ADF_PCI_MAX_BARS * 2) { 218 + for_each_set_bit(bar_nr, &bar_mask, ADF_PCI_MAX_BARS * 2) { 221 219 struct adf_bar *bar = &accel_pci_dev->pci_bars[i++]; 222 220 223 221 bar->base_addr = pci_resource_start(pdev, bar_nr);

+3 -3

drivers/crypto/qat/qat_dh895xcc/adf_drv.c

··· 123 123 struct adf_hw_device_data *hw_data; 124 124 char name[ADF_DEVICE_NAME_LENGTH]; 125 125 unsigned int i, bar_nr; 126 - int ret, bar_mask; 126 + unsigned long bar_mask; 127 + int ret; 127 128 128 129 switch (ent->device) { 129 130 case ADF_DH895XCC_PCI_DEVICE_ID: ··· 238 237 /* Find and map all the device's BARS */ 239 238 i = 0; 240 239 bar_mask = pci_select_bars(pdev, IORESOURCE_MEM); 241 - for_each_set_bit(bar_nr, (const unsigned long *)&bar_mask, 242 - ADF_PCI_MAX_BARS * 2) { 240 + for_each_set_bit(bar_nr, &bar_mask, ADF_PCI_MAX_BARS * 2) { 243 241 struct adf_bar *bar = &accel_pci_dev->pci_bars[i++]; 244 242 245 243 bar->base_addr = pci_resource_start(pdev, bar_nr);

+3 -3

drivers/crypto/qat/qat_dh895xccvf/adf_drv.c

··· 125 125 struct adf_hw_device_data *hw_data; 126 126 char name[ADF_DEVICE_NAME_LENGTH]; 127 127 unsigned int i, bar_nr; 128 - int ret, bar_mask; 128 + unsigned long bar_mask; 129 + int ret; 129 130 130 131 switch (ent->device) { 131 132 case ADF_DH895XCCIOV_PCI_DEVICE_ID: ··· 216 215 /* Find and map all the device's BARS */ 217 216 i = 0; 218 217 bar_mask = pci_select_bars(pdev, IORESOURCE_MEM); 219 - for_each_set_bit(bar_nr, (const unsigned long *)&bar_mask, 220 - ADF_PCI_MAX_BARS * 2) { 218 + for_each_set_bit(bar_nr, &bar_mask, ADF_PCI_MAX_BARS * 2) { 221 219 struct adf_bar *bar = &accel_pci_dev->pci_bars[i++]; 222 220 223 221 bar->base_addr = pci_resource_start(pdev, bar_nr);

+1 -1

drivers/gpio/gpiolib.c

··· 571 571 if (ret) 572 572 goto out_free_descs; 573 573 lh->descs[i] = desc; 574 - count = i; 574 + count = i + 1; 575 575 576 576 if (lflags & GPIOHANDLE_REQUEST_ACTIVE_LOW) 577 577 set_bit(FLAG_ACTIVE_LOW, &desc->flags);

+29 -8

drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c

··· 358 358 struct queue *q, 359 359 struct qcm_process_device *qpd) 360 360 { 361 - int retval; 362 361 struct mqd_manager *mqd_mgr; 362 + int retval; 363 363 364 364 mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); 365 365 if (!mqd_mgr) ··· 387 387 if (!q->properties.is_active) 388 388 return 0; 389 389 390 - retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue, 391 - &q->properties, q->process->mm); 390 + if (WARN(q->process->mm != current->mm, 391 + "should only run in user thread")) 392 + retval = -EFAULT; 393 + else 394 + retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue, 395 + &q->properties, current->mm); 392 396 if (retval) 393 397 goto out_uninit_mqd; 394 398 ··· 549 545 retval = map_queues_cpsch(dqm); 550 546 else if (q->properties.is_active && 551 547 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 552 - q->properties.type == KFD_QUEUE_TYPE_SDMA)) 553 - retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue, 554 - &q->properties, q->process->mm); 548 + q->properties.type == KFD_QUEUE_TYPE_SDMA)) { 549 + if (WARN(q->process->mm != current->mm, 550 + "should only run in user thread")) 551 + retval = -EFAULT; 552 + else 553 + retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, 554 + q->pipe, q->queue, 555 + &q->properties, current->mm); 556 + } 555 557 556 558 out_unlock: 557 559 dqm_unlock(dqm); ··· 663 653 static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, 664 654 struct qcm_process_device *qpd) 665 655 { 656 + struct mm_struct *mm = NULL; 666 657 struct queue *q; 667 658 struct mqd_manager *mqd_mgr; 668 659 struct kfd_process_device *pdd; ··· 697 686 kfd_flush_tlb(pdd); 698 687 } 699 688 689 + /* Take a safe reference to the mm_struct, which may otherwise 690 + * disappear even while the kfd_process is still referenced. 691 + */ 692 + mm = get_task_mm(pdd->process->lead_thread); 693 + if (!mm) { 694 + retval = -EFAULT; 695 + goto out; 696 + } 697 + 700 698 /* activate all active queues on the qpd */ 701 699 list_for_each_entry(q, &qpd->queues_list, list) { 702 700 if (!q->properties.is_evicted) ··· 720 700 q->properties.is_evicted = false; 721 701 q->properties.is_active = true; 722 702 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 723 - q->queue, &q->properties, 724 - q->process->mm); 703 + q->queue, &q->properties, mm); 725 704 if (retval) 726 705 goto out; 727 706 dqm->queue_count++; 728 707 } 729 708 qpd->evicted = 0; 730 709 out: 710 + if (mm) 711 + mmput(mm); 731 712 dqm_unlock(dqm); 732 713 return retval; 733 714 }

+8 -2

drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c

··· 4633 4633 } 4634 4634 spin_unlock_irqrestore(&adev->ddev->event_lock, flags); 4635 4635 4636 - /* Signal HW programming completion */ 4637 - drm_atomic_helper_commit_hw_done(state); 4638 4636 4639 4637 if (wait_for_vblank) 4640 4638 drm_atomic_helper_wait_for_flip_done(dev, state); 4639 + 4640 + /* 4641 + * FIXME: 4642 + * Delay hw_done() until flip_done() is signaled. This is to block 4643 + * another commit from freeing the CRTC state while we're still 4644 + * waiting on flip_done. 4645 + */ 4646 + drm_atomic_helper_commit_hw_done(state); 4641 4647 4642 4648 drm_atomic_helper_cleanup_planes(dev, state); 4643 4649

+26 -9

drivers/gpu/drm/drm_client.c

··· 63 63 EXPORT_SYMBOL(drm_client_close); 64 64 65 65 /** 66 - * drm_client_new - Create a DRM client 66 + * drm_client_init - Initialise a DRM client 67 67 * @dev: DRM device 68 68 * @client: DRM client 69 69 * @name: Client name 70 70 * @funcs: DRM client functions (optional) 71 71 * 72 + * This initialises the client and opens a &drm_file. Use drm_client_add() to complete the process. 72 73 * The caller needs to hold a reference on @dev before calling this function. 73 74 * The client is freed when the &drm_device is unregistered. See drm_client_release(). 74 75 * 75 76 * Returns: 76 77 * Zero on success or negative error code on failure. 77 78 */ 78 - int drm_client_new(struct drm_device *dev, struct drm_client_dev *client, 79 - const char *name, const struct drm_client_funcs *funcs) 79 + int drm_client_init(struct drm_device *dev, struct drm_client_dev *client, 80 + const char *name, const struct drm_client_funcs *funcs) 80 81 { 81 82 int ret; 82 83 ··· 96 95 if (ret) 97 96 goto err_put_module; 98 97 99 - mutex_lock(&dev->clientlist_mutex); 100 - list_add(&client->list, &dev->clientlist); 101 - mutex_unlock(&dev->clientlist_mutex); 102 - 103 98 drm_dev_get(dev); 104 99 105 100 return 0; ··· 106 109 107 110 return ret; 108 111 } 109 - EXPORT_SYMBOL(drm_client_new); 112 + EXPORT_SYMBOL(drm_client_init); 113 + 114 + /** 115 + * drm_client_add - Add client to the device list 116 + * @client: DRM client 117 + * 118 + * Add the client to the &drm_device client list to activate its callbacks. 119 + * @client must be initialized by a call to drm_client_init(). After 120 + * drm_client_add() it is no longer permissible to call drm_client_release() 121 + * directly (outside the unregister callback), instead cleanup will happen 122 + * automatically on driver unload. 123 + */ 124 + void drm_client_add(struct drm_client_dev *client) 125 + { 126 + struct drm_device *dev = client->dev; 127 + 128 + mutex_lock(&dev->clientlist_mutex); 129 + list_add(&client->list, &dev->clientlist); 130 + mutex_unlock(&dev->clientlist_mutex); 131 + } 132 + EXPORT_SYMBOL(drm_client_add); 110 133 111 134 /** 112 135 * drm_client_release - Release DRM client resources 113 136 * @client: DRM client 114 137 * 115 - * Releases resources by closing the &drm_file that was opened by drm_client_new(). 138 + * Releases resources by closing the &drm_file that was opened by drm_client_init(). 116 139 * It is called automatically if the &drm_client_funcs.unregister callback is _not_ set. 117 140 * 118 141 * This function should only be called from the unregister callback. An exception

+3 -1

drivers/gpu/drm/drm_fb_cma_helper.c

··· 160 160 161 161 fb_helper = &fbdev_cma->fb_helper; 162 162 163 - ret = drm_client_new(dev, &fb_helper->client, "fbdev", NULL); 163 + ret = drm_client_init(dev, &fb_helper->client, "fbdev", NULL); 164 164 if (ret) 165 165 goto err_free; 166 166 ··· 168 168 preferred_bpp, max_conn_count); 169 169 if (ret) 170 170 goto err_client_put; 171 + 172 + drm_client_add(&fb_helper->client); 171 173 172 174 return fbdev_cma; 173 175

+3 -1

drivers/gpu/drm/drm_fb_helper.c

··· 3218 3218 if (!fb_helper) 3219 3219 return -ENOMEM; 3220 3220 3221 - ret = drm_client_new(dev, &fb_helper->client, "fbdev", &drm_fbdev_client_funcs); 3221 + ret = drm_client_init(dev, &fb_helper->client, "fbdev", &drm_fbdev_client_funcs); 3222 3222 if (ret) { 3223 3223 kfree(fb_helper); 3224 3224 return ret; 3225 3225 } 3226 + 3227 + drm_client_add(&fb_helper->client); 3226 3228 3227 3229 fb_helper->preferred_bpp = preferred_bpp; 3228 3230

+3 -3

drivers/gpu/drm/drm_lease.c

··· 566 566 lessee_priv->is_master = 1; 567 567 lessee_priv->authenticated = 1; 568 568 569 - /* Hook up the fd */ 570 - fd_install(fd, lessee_file); 571 - 572 569 /* Pass fd back to userspace */ 573 570 DRM_DEBUG_LEASE("Returning fd %d id %d\n", fd, lessee->lessee_id); 574 571 cl->fd = fd; 575 572 cl->lessee_id = lessee->lessee_id; 573 + 574 + /* Hook up the fd */ 575 + fd_install(fd, lessee_file); 576 576 577 577 DRM_DEBUG_LEASE("drm_mode_create_lease_ioctl succeeded\n"); 578 578 return 0;

+6 -28

drivers/gpu/drm/exynos/exynos_drm_iommu.h

··· 55 55 static inline int __exynos_iommu_create_mapping(struct exynos_drm_private *priv, 56 56 unsigned long start, unsigned long size) 57 57 { 58 - struct iommu_domain *domain; 59 - int ret; 60 - 61 - domain = iommu_domain_alloc(priv->dma_dev->bus); 62 - if (!domain) 63 - return -ENOMEM; 64 - 65 - ret = iommu_get_dma_cookie(domain); 66 - if (ret) 67 - goto free_domain; 68 - 69 - ret = iommu_dma_init_domain(domain, start, size, NULL); 70 - if (ret) 71 - goto put_cookie; 72 - 73 - priv->mapping = domain; 58 + priv->mapping = iommu_get_domain_for_dev(priv->dma_dev); 74 59 return 0; 75 - 76 - put_cookie: 77 - iommu_put_dma_cookie(domain); 78 - free_domain: 79 - iommu_domain_free(domain); 80 - return ret; 81 60 } 82 61 83 62 static inline void __exynos_iommu_release_mapping(struct exynos_drm_private *priv) 84 63 { 85 - struct iommu_domain *domain = priv->mapping; 86 - 87 - iommu_put_dma_cookie(domain); 88 - iommu_domain_free(domain); 89 64 priv->mapping = NULL; 90 65 } 91 66 ··· 69 94 { 70 95 struct iommu_domain *domain = priv->mapping; 71 96 72 - return iommu_attach_device(domain, dev); 97 + if (dev != priv->dma_dev) 98 + return iommu_attach_device(domain, dev); 99 + return 0; 73 100 } 74 101 75 102 static inline void __exynos_iommu_detach(struct exynos_drm_private *priv, ··· 79 102 { 80 103 struct iommu_domain *domain = priv->mapping; 81 104 82 - iommu_detach_device(domain, dev); 105 + if (dev != priv->dma_dev) 106 + iommu_detach_device(domain, dev); 83 107 } 84 108 #else 85 109 #error Unsupported architecture and IOMMU/DMA-mapping glue code

+3 -2

drivers/gpu/drm/i2c/tda9950.c

··· 191 191 break; 192 192 } 193 193 /* TDA9950 executes all retries for us */ 194 - tx_status |= CEC_TX_STATUS_MAX_RETRIES; 194 + if (tx_status != CEC_TX_STATUS_OK) 195 + tx_status |= CEC_TX_STATUS_MAX_RETRIES; 195 196 cec_transmit_done(priv->adap, tx_status, arb_lost_cnt, 196 197 nack_cnt, 0, err_cnt); 197 198 break; ··· 311 310 /* Wait up to .5s for it to signal non-busy */ 312 311 do { 313 312 csr = tda9950_read(client, REG_CSR); 314 - if (!(csr & CSR_BUSY) || --timeout) 313 + if (!(csr & CSR_BUSY) || !--timeout) 315 314 break; 316 315 msleep(10); 317 316 } while (1);

+63 -25

drivers/gpu/drm/i915/i915_gpu_error.c

··· 232 232 return true; 233 233 } 234 234 235 + static void *compress_next_page(struct drm_i915_error_object *dst) 236 + { 237 + unsigned long page; 238 + 239 + if (dst->page_count >= dst->num_pages) 240 + return ERR_PTR(-ENOSPC); 241 + 242 + page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN); 243 + if (!page) 244 + return ERR_PTR(-ENOMEM); 245 + 246 + return dst->pages[dst->page_count++] = (void *)page; 247 + } 248 + 235 249 static int compress_page(struct compress *c, 236 250 void *src, 237 251 struct drm_i915_error_object *dst) ··· 259 245 260 246 do { 261 247 if (zstream->avail_out == 0) { 262 - unsigned long page; 248 + zstream->next_out = compress_next_page(dst); 249 + if (IS_ERR(zstream->next_out)) 250 + return PTR_ERR(zstream->next_out); 263 251 264 - page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN); 265 - if (!page) 266 - return -ENOMEM; 267 - 268 - dst->pages[dst->page_count++] = (void *)page; 269 - 270 - zstream->next_out = (void *)page; 271 252 zstream->avail_out = PAGE_SIZE; 272 253 } 273 254 274 - if (zlib_deflate(zstream, Z_SYNC_FLUSH) != Z_OK) 255 + if (zlib_deflate(zstream, Z_NO_FLUSH) != Z_OK) 275 256 return -EIO; 276 257 } while (zstream->avail_in); 277 258 ··· 277 268 return 0; 278 269 } 279 270 271 + static int compress_flush(struct compress *c, 272 + struct drm_i915_error_object *dst) 273 + { 274 + struct z_stream_s *zstream = &c->zstream; 275 + 276 + do { 277 + switch (zlib_deflate(zstream, Z_FINISH)) { 278 + case Z_OK: /* more space requested */ 279 + zstream->next_out = compress_next_page(dst); 280 + if (IS_ERR(zstream->next_out)) 281 + return PTR_ERR(zstream->next_out); 282 + 283 + zstream->avail_out = PAGE_SIZE; 284 + break; 285 + 286 + case Z_STREAM_END: 287 + goto end; 288 + 289 + default: /* any error */ 290 + return -EIO; 291 + } 292 + } while (1); 293 + 294 + end: 295 + memset(zstream->next_out, 0, zstream->avail_out); 296 + dst->unused = zstream->avail_out; 297 + return 0; 298 + } 299 + 280 300 static void compress_fini(struct compress *c, 281 301 struct drm_i915_error_object *dst) 282 302 { 283 303 struct z_stream_s *zstream = &c->zstream; 284 304 285 - if (dst) { 286 - zlib_deflate(zstream, Z_FINISH); 287 - dst->unused = zstream->avail_out; 288 - } 289 - 290 305 zlib_deflateEnd(zstream); 291 306 kfree(zstream->workspace); 292 - 293 307 if (c->tmp) 294 308 free_page((unsigned long)c->tmp); 295 309 } ··· 348 316 memcpy(ptr, src, PAGE_SIZE); 349 317 dst->pages[dst->page_count++] = ptr; 350 318 319 + return 0; 320 + } 321 + 322 + static int compress_flush(struct compress *c, 323 + struct drm_i915_error_object *dst) 324 + { 351 325 return 0; 352 326 } 353 327 ··· 955 917 unsigned long num_pages; 956 918 struct sgt_iter iter; 957 919 dma_addr_t dma; 920 + int ret; 958 921 959 922 if (!vma) 960 923 return NULL; ··· 969 930 970 931 dst->gtt_offset = vma->node.start; 971 932 dst->gtt_size = vma->node.size; 933 + dst->num_pages = num_pages; 972 934 dst->page_count = 0; 973 935 dst->unused = 0; 974 936 ··· 978 938 return NULL; 979 939 } 980 940 941 + ret = -EINVAL; 981 942 for_each_sgt_dma(dma, iter, vma->pages) { 982 943 void __iomem *s; 983 - int ret; 984 944 985 945 ggtt->vm.insert_page(&ggtt->vm, dma, slot, I915_CACHE_NONE, 0); 986 946 987 947 s = io_mapping_map_atomic_wc(&ggtt->iomap, slot); 988 948 ret = compress_page(&compress, (void __force *)s, dst); 989 949 io_mapping_unmap_atomic(s); 990 - 991 950 if (ret) 992 - goto unwind; 951 + break; 993 952 } 994 - goto out; 995 953 996 - unwind: 997 - while (dst->page_count--) 998 - free_page((unsigned long)dst->pages[dst->page_count]); 999 - kfree(dst); 1000 - dst = NULL; 954 + if (ret || compress_flush(&compress, dst)) { 955 + while (dst->page_count--) 956 + free_page((unsigned long)dst->pages[dst->page_count]); 957 + kfree(dst); 958 + dst = NULL; 959 + } 1001 960 1002 - out: 1003 961 compress_fini(&compress, dst); 1004 962 ggtt->vm.clear_range(&ggtt->vm, slot, PAGE_SIZE); 1005 963 return dst;

+1

drivers/gpu/drm/i915/i915_gpu_error.h

··· 135 135 struct drm_i915_error_object { 136 136 u64 gtt_offset; 137 137 u64 gtt_size; 138 + int num_pages; 138 139 int page_count; 139 140 int unused; 140 141 u32 *pages[0];

+12 -21

drivers/gpu/drm/i915/i915_irq.c

··· 3091 3091 spin_unlock(&i915->irq_lock); 3092 3092 } 3093 3093 3094 - static void 3095 - gen11_gu_misc_irq_ack(struct drm_i915_private *dev_priv, const u32 master_ctl, 3096 - u32 *iir) 3094 + static u32 3095 + gen11_gu_misc_irq_ack(struct drm_i915_private *dev_priv, const u32 master_ctl) 3097 3096 { 3098 3097 void __iomem * const regs = dev_priv->regs; 3098 + u32 iir; 3099 3099 3100 3100 if (!(master_ctl & GEN11_GU_MISC_IRQ)) 3101 - return; 3101 + return 0; 3102 3102 3103 - *iir = raw_reg_read(regs, GEN11_GU_MISC_IIR); 3104 - if (likely(*iir)) 3105 - raw_reg_write(regs, GEN11_GU_MISC_IIR, *iir); 3103 + iir = raw_reg_read(regs, GEN11_GU_MISC_IIR); 3104 + if (likely(iir)) 3105 + raw_reg_write(regs, GEN11_GU_MISC_IIR, iir); 3106 + 3107 + return iir; 3106 3108 } 3107 3109 3108 3110 static void 3109 - gen11_gu_misc_irq_handler(struct drm_i915_private *dev_priv, 3110 - const u32 master_ctl, const u32 iir) 3111 + gen11_gu_misc_irq_handler(struct drm_i915_private *dev_priv, const u32 iir) 3111 3112 { 3112 - if (!(master_ctl & GEN11_GU_MISC_IRQ)) 3113 - return; 3114 - 3115 - if (unlikely(!iir)) { 3116 - DRM_ERROR("GU_MISC iir blank!\n"); 3117 - return; 3118 - } 3119 - 3120 3113 if (iir & GEN11_GU_MISC_GSE) 3121 3114 intel_opregion_asle_intr(dev_priv); 3122 - else 3123 - DRM_ERROR("Unexpected GU_MISC interrupt 0x%x\n", iir); 3124 3115 } 3125 3116 3126 3117 static irqreturn_t gen11_irq_handler(int irq, void *arg) ··· 3148 3157 enable_rpm_wakeref_asserts(i915); 3149 3158 } 3150 3159 3151 - gen11_gu_misc_irq_ack(i915, master_ctl, &gu_misc_iir); 3160 + gu_misc_iir = gen11_gu_misc_irq_ack(i915, master_ctl); 3152 3161 3153 3162 /* Acknowledge and enable interrupts. */ 3154 3163 raw_reg_write(regs, GEN11_GFX_MSTR_IRQ, GEN11_MASTER_IRQ | master_ctl); 3155 3164 3156 - gen11_gu_misc_irq_handler(i915, master_ctl, gu_misc_iir); 3165 + gen11_gu_misc_irq_handler(i915, gu_misc_iir); 3157 3166 3158 3167 return IRQ_HANDLED; 3159 3168 }

-1

drivers/gpu/drm/i915/i915_pci.c

··· 592 592 GEN10_FEATURES, \ 593 593 GEN(11), \ 594 594 .ddb_size = 2048, \ 595 - .has_csr = 0, \ 596 595 .has_logical_ring_elsq = 1 597 596 598 597 static const struct intel_device_info intel_icelake_11_info = {

+1 -1

drivers/iommu/amd_iommu.c

··· 3069 3069 return 0; 3070 3070 3071 3071 offset_mask = pte_pgsize - 1; 3072 - __pte = *pte & PM_ADDR_MASK; 3072 + __pte = __sme_clr(*pte & PM_ADDR_MASK); 3073 3073 3074 3074 return (__pte & ~offset_mask) | (iova & offset_mask); 3075 3075 }

+2 -2

drivers/md/dm-cache-metadata.c

··· 1455 1455 if (hints_valid) { 1456 1456 r = dm_array_cursor_next(&cmd->hint_cursor); 1457 1457 if (r) { 1458 - DMERR("dm_array_cursor_next for hint failed"); 1459 - goto out; 1458 + dm_array_cursor_end(&cmd->hint_cursor); 1459 + hints_valid = false; 1460 1460 } 1461 1461 } 1462 1462

+7 -2

drivers/md/dm-cache-target.c

··· 3009 3009 3010 3010 static bool can_resize(struct cache *cache, dm_cblock_t new_size) 3011 3011 { 3012 - if (from_cblock(new_size) > from_cblock(cache->cache_size)) 3013 - return true; 3012 + if (from_cblock(new_size) > from_cblock(cache->cache_size)) { 3013 + if (cache->sized) { 3014 + DMERR("%s: unable to extend cache due to missing cache table reload", 3015 + cache_device_name(cache)); 3016 + return false; 3017 + } 3018 + } 3014 3019 3015 3020 /* 3016 3021 * We can't drop a dirty block when shrinking the cache.

+8 -6

drivers/md/dm-mpath.c

··· 806 806 } 807 807 808 808 static int setup_scsi_dh(struct block_device *bdev, struct multipath *m, 809 - const char *attached_handler_name, char **error) 809 + const char **attached_handler_name, char **error) 810 810 { 811 811 struct request_queue *q = bdev_get_queue(bdev); 812 812 int r; 813 813 814 814 if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags)) { 815 815 retain: 816 - if (attached_handler_name) { 816 + if (*attached_handler_name) { 817 817 /* 818 818 * Clear any hw_handler_params associated with a 819 819 * handler that isn't already attached. 820 820 */ 821 - if (m->hw_handler_name && strcmp(attached_handler_name, m->hw_handler_name)) { 821 + if (m->hw_handler_name && strcmp(*attached_handler_name, m->hw_handler_name)) { 822 822 kfree(m->hw_handler_params); 823 823 m->hw_handler_params = NULL; 824 824 } ··· 830 830 * handler instead of the original table passed in. 831 831 */ 832 832 kfree(m->hw_handler_name); 833 - m->hw_handler_name = attached_handler_name; 833 + m->hw_handler_name = *attached_handler_name; 834 + *attached_handler_name = NULL; 834 835 } 835 836 } 836 837 ··· 868 867 struct pgpath *p; 869 868 struct multipath *m = ti->private; 870 869 struct request_queue *q; 871 - const char *attached_handler_name; 870 + const char *attached_handler_name = NULL; 872 871 873 872 /* we need at least a path arg */ 874 873 if (as->argc < 1) { ··· 891 890 attached_handler_name = scsi_dh_attached_handler_name(q, GFP_KERNEL); 892 891 if (attached_handler_name || m->hw_handler_name) { 893 892 INIT_DELAYED_WORK(&p->activate_path, activate_path_work); 894 - r = setup_scsi_dh(p->path.dev->bdev, m, attached_handler_name, &ti->error); 893 + r = setup_scsi_dh(p->path.dev->bdev, m, &attached_handler_name, &ti->error); 895 894 if (r) { 896 895 dm_put_device(ti, p->path.dev); 897 896 goto bad; ··· 906 905 907 906 return p; 908 907 bad: 908 + kfree(attached_handler_name); 909 909 free_pgpath(p); 910 910 return ERR_PTR(r); 911 911 }

+1 -1

drivers/md/dm-raid.c

··· 3353 3353 }; 3354 3354 3355 3355 /* Return enum sync_state for @mddev derived from @recovery flags */ 3356 - static const enum sync_state decipher_sync_action(struct mddev *mddev, unsigned long recovery) 3356 + static enum sync_state decipher_sync_action(struct mddev *mddev, unsigned long recovery) 3357 3357 { 3358 3358 if (test_bit(MD_RECOVERY_FROZEN, &recovery)) 3359 3359 return st_frozen;

+2 -4

drivers/md/dm-thin-metadata.c

··· 832 832 if (r) { 833 833 DMERR("could not get size of metadata device"); 834 834 pmd->metadata_reserve = max_blocks; 835 - } else { 836 - sector_div(total, 10); 837 - pmd->metadata_reserve = min(max_blocks, total); 838 - } 835 + } else 836 + pmd->metadata_reserve = min(max_blocks, div_u64(total, 10)); 839 837 } 840 838 841 839 struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev,

+2 -2

drivers/net/dsa/b53/b53_common.c

··· 1291 1291 b53_get_vlan_entry(dev, vid, vl); 1292 1292 1293 1293 vl->members |= BIT(port); 1294 - if (untagged) 1294 + if (untagged && !dsa_is_cpu_port(ds, port)) 1295 1295 vl->untag |= BIT(port); 1296 1296 else 1297 1297 vl->untag &= ~BIT(port); ··· 1333 1333 pvid = 0; 1334 1334 } 1335 1335 1336 - if (untagged) 1336 + if (untagged && !dsa_is_cpu_port(ds, port)) 1337 1337 vl->untag &= ~(BIT(port)); 1338 1338 1339 1339 b53_set_vlan_entry(dev, vid, vl);

+8 -6

drivers/net/ethernet/broadcom/bnxt/bnxt.c

··· 3017 3017 { 3018 3018 struct pci_dev *pdev = bp->pdev; 3019 3019 3020 - dma_free_coherent(&pdev->dev, PAGE_SIZE, bp->hwrm_cmd_resp_addr, 3021 - bp->hwrm_cmd_resp_dma_addr); 3022 - 3023 - bp->hwrm_cmd_resp_addr = NULL; 3020 + if (bp->hwrm_cmd_resp_addr) { 3021 + dma_free_coherent(&pdev->dev, PAGE_SIZE, bp->hwrm_cmd_resp_addr, 3022 + bp->hwrm_cmd_resp_dma_addr); 3023 + bp->hwrm_cmd_resp_addr = NULL; 3024 + } 3024 3025 } 3025 3026 3026 3027 static int bnxt_alloc_hwrm_resources(struct bnxt *bp) ··· 4651 4650 FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0; 4652 4651 enables |= ring_grps ? 4653 4652 FUNC_CFG_REQ_ENABLES_NUM_HW_RING_GRPS : 0; 4654 - enables |= vnics ? FUNC_VF_CFG_REQ_ENABLES_NUM_VNICS : 0; 4653 + enables |= vnics ? FUNC_CFG_REQ_ENABLES_NUM_VNICS : 0; 4655 4654 4656 4655 req->num_rx_rings = cpu_to_le16(rx_rings); 4657 4656 req->num_hw_ring_grps = cpu_to_le16(ring_grps); ··· 8622 8621 *max_tx = hw_resc->max_tx_rings; 8623 8622 *max_rx = hw_resc->max_rx_rings; 8624 8623 *max_cp = min_t(int, bnxt_get_max_func_cp_rings_for_en(bp), 8625 - hw_resc->max_irqs); 8624 + hw_resc->max_irqs - bnxt_get_ulp_msix_num(bp)); 8626 8625 *max_cp = min_t(int, *max_cp, hw_resc->max_stat_ctxs); 8627 8626 max_ring_grps = hw_resc->max_hw_ring_grps; 8628 8627 if (BNXT_CHIP_TYPE_NITRO_A0(bp) && BNXT_PF(bp)) { ··· 9058 9057 bnxt_clear_int_mode(bp); 9059 9058 9060 9059 init_err_pci_clean: 9060 + bnxt_free_hwrm_resources(bp); 9061 9061 bnxt_cleanup_pci(bp); 9062 9062 9063 9063 init_err_free:

+3 -3

drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c

··· 98 98 99 99 bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_COS2BW_CFG, -1, -1); 100 100 for (i = 0; i < max_tc; i++) { 101 - u8 qidx; 101 + u8 qidx = bp->tc_to_qidx[i]; 102 102 103 103 req.enables |= cpu_to_le32( 104 - QUEUE_COS2BW_CFG_REQ_ENABLES_COS_QUEUE_ID0_VALID << i); 104 + QUEUE_COS2BW_CFG_REQ_ENABLES_COS_QUEUE_ID0_VALID << 105 + qidx); 105 106 106 107 memset(&cos2bw, 0, sizeof(cos2bw)); 107 - qidx = bp->tc_to_qidx[i]; 108 108 cos2bw.queue_id = bp->q_info[qidx].queue_id; 109 109 if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_STRICT) { 110 110 cos2bw.tsa =

+17

drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c

··· 2158 2158 return -EPERM; 2159 2159 if (copy_from_user(&t, useraddr, sizeof(t))) 2160 2160 return -EFAULT; 2161 + if (t.cmd != CHELSIO_SET_QSET_PARAMS) 2162 + return -EINVAL; 2161 2163 if (t.qset_idx >= SGE_QSETS) 2162 2164 return -EINVAL; 2163 2165 if (!in_range(t.intr_lat, 0, M_NEWTIMER) || ··· 2259 2257 if (copy_from_user(&t, useraddr, sizeof(t))) 2260 2258 return -EFAULT; 2261 2259 2260 + if (t.cmd != CHELSIO_GET_QSET_PARAMS) 2261 + return -EINVAL; 2262 + 2262 2263 /* Display qsets for all ports when offload enabled */ 2263 2264 if (test_bit(OFFLOAD_DEVMAP_BIT, &adapter->open_device_map)) { 2264 2265 q1 = 0; ··· 2307 2302 return -EBUSY; 2308 2303 if (copy_from_user(&edata, useraddr, sizeof(edata))) 2309 2304 return -EFAULT; 2305 + if (edata.cmd != CHELSIO_SET_QSET_NUM) 2306 + return -EINVAL; 2310 2307 if (edata.val < 1 || 2311 2308 (edata.val > 1 && !(adapter->flags & USING_MSIX))) 2312 2309 return -EINVAL; ··· 2349 2342 return -EPERM; 2350 2343 if (copy_from_user(&t, useraddr, sizeof(t))) 2351 2344 return -EFAULT; 2345 + if (t.cmd != CHELSIO_LOAD_FW) 2346 + return -EINVAL; 2352 2347 /* Check t.len sanity ? */ 2353 2348 fw_data = memdup_user(useraddr + sizeof(t), t.len); 2354 2349 if (IS_ERR(fw_data)) ··· 2374 2365 return -EBUSY; 2375 2366 if (copy_from_user(&m, useraddr, sizeof(m))) 2376 2367 return -EFAULT; 2368 + if (m.cmd != CHELSIO_SETMTUTAB) 2369 + return -EINVAL; 2377 2370 if (m.nmtus != NMTUS) 2378 2371 return -EINVAL; 2379 2372 if (m.mtus[0] < 81) /* accommodate SACK */ ··· 2417 2406 return -EBUSY; 2418 2407 if (copy_from_user(&m, useraddr, sizeof(m))) 2419 2408 return -EFAULT; 2409 + if (m.cmd != CHELSIO_SET_PM) 2410 + return -EINVAL; 2420 2411 if (!is_power_of_2(m.rx_pg_sz) || 2421 2412 !is_power_of_2(m.tx_pg_sz)) 2422 2413 return -EINVAL; /* not power of 2 */ ··· 2452 2439 return -EIO; /* need the memory controllers */ 2453 2440 if (copy_from_user(&t, useraddr, sizeof(t))) 2454 2441 return -EFAULT; 2442 + if (t.cmd != CHELSIO_GET_MEM) 2443 + return -EINVAL; 2455 2444 if ((t.addr & 7) || (t.len & 7)) 2456 2445 return -EINVAL; 2457 2446 if (t.mem_id == MEM_CM) ··· 2506 2491 return -EAGAIN; 2507 2492 if (copy_from_user(&t, useraddr, sizeof(t))) 2508 2493 return -EFAULT; 2494 + if (t.cmd != CHELSIO_SET_TRACE_FILTER) 2495 + return -EINVAL; 2509 2496 2510 2497 tp = (const struct trace_params *)&t.sip; 2511 2498 if (t.config_tx)

+1 -4

drivers/net/ethernet/emulex/benet/be_main.c

··· 4002 4002 netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | 4003 4003 NETIF_F_TSO | NETIF_F_TSO6 | 4004 4004 NETIF_F_GSO_UDP_TUNNEL; 4005 - netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; 4006 - netdev->features |= NETIF_F_GSO_UDP_TUNNEL; 4007 4005 4008 4006 dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n", 4009 4007 be16_to_cpu(port)); ··· 4023 4025 adapter->vxlan_port = 0; 4024 4026 4025 4027 netdev->hw_enc_features = 0; 4026 - netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL); 4027 - netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL); 4028 4028 } 4029 4029 4030 4030 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs, ··· 5316 5320 struct be_adapter *adapter = netdev_priv(netdev); 5317 5321 5318 5322 netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 | 5323 + NETIF_F_GSO_UDP_TUNNEL | 5319 5324 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM | 5320 5325 NETIF_F_HW_VLAN_CTAG_TX; 5321 5326 if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))

+5 -4

drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c

··· 1755 1755 } 1756 1756 1757 1757 /* Set Tx descriptors fields relevant for CSUM calculation */ 1758 - static u32 mvpp2_txq_desc_csum(int l3_offs, int l3_proto, 1758 + static u32 mvpp2_txq_desc_csum(int l3_offs, __be16 l3_proto, 1759 1759 int ip_hdr_len, int l4_proto) 1760 1760 { 1761 1761 u32 command; ··· 2645 2645 if (skb->ip_summed == CHECKSUM_PARTIAL) { 2646 2646 int ip_hdr_len = 0; 2647 2647 u8 l4_proto; 2648 + __be16 l3_proto = vlan_get_protocol(skb); 2648 2649 2649 - if (skb->protocol == htons(ETH_P_IP)) { 2650 + if (l3_proto == htons(ETH_P_IP)) { 2650 2651 struct iphdr *ip4h = ip_hdr(skb); 2651 2652 2652 2653 /* Calculate IPv4 checksum and L4 checksum */ 2653 2654 ip_hdr_len = ip4h->ihl; 2654 2655 l4_proto = ip4h->protocol; 2655 - } else if (skb->protocol == htons(ETH_P_IPV6)) { 2656 + } else if (l3_proto == htons(ETH_P_IPV6)) { 2656 2657 struct ipv6hdr *ip6h = ipv6_hdr(skb); 2657 2658 2658 2659 /* Read l4_protocol from one of IPv6 extra headers */ ··· 2665 2664 } 2666 2665 2667 2666 return mvpp2_txq_desc_csum(skb_network_offset(skb), 2668 - skb->protocol, ip_hdr_len, l4_proto); 2667 + l3_proto, ip_hdr_len, l4_proto); 2669 2668 } 2670 2669 2671 2670 return MVPP2_TXD_L4_CSUM_NOT | MVPP2_TXD_IP_CSUM_DISABLE;

+7 -4

drivers/net/ethernet/mellanox/mlxsw/pci.c

··· 718 718 memset(&active_cqns, 0, sizeof(active_cqns)); 719 719 720 720 while ((eqe = mlxsw_pci_eq_sw_eqe_get(q))) { 721 - u8 event_type = mlxsw_pci_eqe_event_type_get(eqe); 722 721 723 - switch (event_type) { 724 - case MLXSW_PCI_EQE_EVENT_TYPE_CMD: 722 + /* Command interface completion events are always received on 723 + * queue MLXSW_PCI_EQ_ASYNC_NUM (EQ0) and completion events 724 + * are mapped to queue MLXSW_PCI_EQ_COMP_NUM (EQ1). 725 + */ 726 + switch (q->num) { 727 + case MLXSW_PCI_EQ_ASYNC_NUM: 725 728 mlxsw_pci_eq_cmd_event(mlxsw_pci, eqe); 726 729 q->u.eq.ev_cmd_count++; 727 730 break; 728 - case MLXSW_PCI_EQE_EVENT_TYPE_COMP: 731 + case MLXSW_PCI_EQ_COMP_NUM: 729 732 cqn = mlxsw_pci_eqe_cqn_get(eqe); 730 733 set_bit(cqn, active_cqns); 731 734 cq_handle = true;

+2

drivers/net/ethernet/mellanox/mlxsw/spectrum.c

··· 4855 4855 upper_dev = info->upper_dev; 4856 4856 if (info->linking) 4857 4857 break; 4858 + if (is_vlan_dev(upper_dev)) 4859 + mlxsw_sp_rif_destroy_by_dev(mlxsw_sp, upper_dev); 4858 4860 if (netif_is_macvlan(upper_dev)) 4859 4861 mlxsw_sp_rif_macvlan_del(mlxsw_sp, upper_dev); 4860 4862 break;

+4

drivers/net/hamradio/yam.c

··· 966 966 sizeof(struct yamdrv_ioctl_mcs)); 967 967 if (IS_ERR(ym)) 968 968 return PTR_ERR(ym); 969 + if (ym->cmd != SIOCYAMSMCS) 970 + return -EINVAL; 969 971 if (ym->bitrate > YAM_MAXBITRATE) { 970 972 kfree(ym); 971 973 return -EINVAL; ··· 983 981 if (copy_from_user(&yi, ifr->ifr_data, sizeof(struct yamdrv_ioctl_cfg))) 984 982 return -EFAULT; 985 983 984 + if (yi.cmd != SIOCYAMSCFG) 985 + return -EINVAL; 986 986 if ((yi.cfg.mask & YAM_IOBASE) && netif_running(dev)) 987 987 return -EINVAL; /* Cannot change this parameter when up */ 988 988 if ((yi.cfg.mask & YAM_IRQ) && netif_running(dev))

+28 -20

drivers/net/phy/phylink.c

··· 690 690 return 0; 691 691 } 692 692 693 + static int __phylink_connect_phy(struct phylink *pl, struct phy_device *phy, 694 + phy_interface_t interface) 695 + { 696 + int ret; 697 + 698 + if (WARN_ON(pl->link_an_mode == MLO_AN_FIXED || 699 + (pl->link_an_mode == MLO_AN_INBAND && 700 + phy_interface_mode_is_8023z(interface)))) 701 + return -EINVAL; 702 + 703 + if (pl->phydev) 704 + return -EBUSY; 705 + 706 + ret = phy_attach_direct(pl->netdev, phy, 0, interface); 707 + if (ret) 708 + return ret; 709 + 710 + ret = phylink_bringup_phy(pl, phy); 711 + if (ret) 712 + phy_detach(phy); 713 + 714 + return ret; 715 + } 716 + 693 717 /** 694 718 * phylink_connect_phy() - connect a PHY to the phylink instance 695 719 * @pl: a pointer to a &struct phylink returned from phylink_create() ··· 731 707 */ 732 708 int phylink_connect_phy(struct phylink *pl, struct phy_device *phy) 733 709 { 734 - int ret; 735 - 736 - if (WARN_ON(pl->link_an_mode == MLO_AN_FIXED || 737 - (pl->link_an_mode == MLO_AN_INBAND && 738 - phy_interface_mode_is_8023z(pl->link_interface)))) 739 - return -EINVAL; 740 - 741 - if (pl->phydev) 742 - return -EBUSY; 743 - 744 710 /* Use PHY device/driver interface */ 745 711 if (pl->link_interface == PHY_INTERFACE_MODE_NA) { 746 712 pl->link_interface = phy->interface; 747 713 pl->link_config.interface = pl->link_interface; 748 714 } 749 715 750 - ret = phy_attach_direct(pl->netdev, phy, 0, pl->link_interface); 751 - if (ret) 752 - return ret; 753 - 754 - ret = phylink_bringup_phy(pl, phy); 755 - if (ret) 756 - phy_detach(phy); 757 - 758 - return ret; 716 + return __phylink_connect_phy(pl, phy, pl->link_interface); 759 717 } 760 718 EXPORT_SYMBOL_GPL(phylink_connect_phy); 761 719 ··· 1654 1648 1655 1649 static int phylink_sfp_connect_phy(void *upstream, struct phy_device *phy) 1656 1650 { 1657 - return phylink_connect_phy(upstream, phy); 1651 + struct phylink *pl = upstream; 1652 + 1653 + return __phylink_connect_phy(upstream, phy, pl->link_config.interface); 1658 1654 } 1659 1655 1660 1656 static void phylink_sfp_disconnect_phy(void *upstream)

+6

drivers/net/team/team.c

··· 1167 1167 return -EBUSY; 1168 1168 } 1169 1169 1170 + if (dev == port_dev) { 1171 + NL_SET_ERR_MSG(extack, "Cannot enslave team device to itself"); 1172 + netdev_err(dev, "Cannot enslave team device to itself\n"); 1173 + return -EINVAL; 1174 + } 1175 + 1170 1176 if (port_dev->features & NETIF_F_VLAN_CHALLENGED && 1171 1177 vlan_uses_dev(dev)) { 1172 1178 NL_SET_ERR_MSG(extack, "Device is VLAN challenged and team device has VLAN set up");

+1

drivers/net/usb/smsc75xx.c

··· 1520 1520 { 1521 1521 struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]); 1522 1522 if (pdata) { 1523 + cancel_work_sync(&pdata->set_multicast); 1523 1524 netif_dbg(dev, ifdown, dev->net, "free pdata\n"); 1524 1525 kfree(pdata); 1525 1526 pdata = NULL;

+49 -5

drivers/pci/controller/pci-mvebu.c

··· 1145 1145 { 1146 1146 struct device *dev = &pcie->pdev->dev; 1147 1147 struct device_node *np = dev->of_node; 1148 - unsigned int i; 1149 1148 int ret; 1150 1149 1151 1150 INIT_LIST_HEAD(&pcie->resources); ··· 1178 1179 resource_size(&pcie->io) - 1); 1179 1180 pcie->realio.name = "PCI I/O"; 1180 1181 1181 - for (i = 0; i < resource_size(&pcie->realio); i += SZ_64K) 1182 - pci_ioremap_io(i, pcie->io.start + i); 1183 - 1184 1182 pci_add_resource(&pcie->resources, &pcie->realio); 1185 1183 } 1186 1184 1187 1185 return devm_request_pci_bus_resources(dev, &pcie->resources); 1186 + } 1187 + 1188 + /* 1189 + * This is a copy of pci_host_probe(), except that it does the I/O 1190 + * remap as the last step, once we are sure we won't fail. 1191 + * 1192 + * It should be removed once the I/O remap error handling issue has 1193 + * been sorted out. 1194 + */ 1195 + static int mvebu_pci_host_probe(struct pci_host_bridge *bridge) 1196 + { 1197 + struct mvebu_pcie *pcie; 1198 + struct pci_bus *bus, *child; 1199 + int ret; 1200 + 1201 + ret = pci_scan_root_bus_bridge(bridge); 1202 + if (ret < 0) { 1203 + dev_err(bridge->dev.parent, "Scanning root bridge failed"); 1204 + return ret; 1205 + } 1206 + 1207 + pcie = pci_host_bridge_priv(bridge); 1208 + if (resource_size(&pcie->io) != 0) { 1209 + unsigned int i; 1210 + 1211 + for (i = 0; i < resource_size(&pcie->realio); i += SZ_64K) 1212 + pci_ioremap_io(i, pcie->io.start + i); 1213 + } 1214 + 1215 + bus = bridge->bus; 1216 + 1217 + /* 1218 + * We insert PCI resources into the iomem_resource and 1219 + * ioport_resource trees in either pci_bus_claim_resources() 1220 + * or pci_bus_assign_resources(). 1221 + */ 1222 + if (pci_has_flag(PCI_PROBE_ONLY)) { 1223 + pci_bus_claim_resources(bus); 1224 + } else { 1225 + pci_bus_size_bridges(bus); 1226 + pci_bus_assign_resources(bus); 1227 + 1228 + list_for_each_entry(child, &bus->children, node) 1229 + pcie_bus_configure_settings(child); 1230 + } 1231 + 1232 + pci_bus_add_devices(bus); 1233 + return 0; 1188 1234 } 1189 1235 1190 1236 static int mvebu_pcie_probe(struct platform_device *pdev) ··· 1312 1268 bridge->align_resource = mvebu_pcie_align_resource; 1313 1269 bridge->msi = pcie->msi; 1314 1270 1315 - return pci_host_probe(bridge); 1271 + return mvebu_pci_host_probe(bridge); 1316 1272 } 1317 1273 1318 1274 static const struct of_device_id mvebu_pcie_of_match_table[] = {

+19 -8

drivers/pci/pci.c

··· 1289 1289 EXPORT_SYMBOL(pci_save_state); 1290 1290 1291 1291 static void pci_restore_config_dword(struct pci_dev *pdev, int offset, 1292 - u32 saved_val, int retry) 1292 + u32 saved_val, int retry, bool force) 1293 1293 { 1294 1294 u32 val; 1295 1295 1296 1296 pci_read_config_dword(pdev, offset, &val); 1297 - if (val == saved_val) 1297 + if (!force && val == saved_val) 1298 1298 return; 1299 1299 1300 1300 for (;;) { ··· 1313 1313 } 1314 1314 1315 1315 static void pci_restore_config_space_range(struct pci_dev *pdev, 1316 - int start, int end, int retry) 1316 + int start, int end, int retry, 1317 + bool force) 1317 1318 { 1318 1319 int index; 1319 1320 1320 1321 for (index = end; index >= start; index--) 1321 1322 pci_restore_config_dword(pdev, 4 * index, 1322 1323 pdev->saved_config_space[index], 1323 - retry); 1324 + retry, force); 1324 1325 } 1325 1326 1326 1327 static void pci_restore_config_space(struct pci_dev *pdev) 1327 1328 { 1328 1329 if (pdev->hdr_type == PCI_HEADER_TYPE_NORMAL) { 1329 - pci_restore_config_space_range(pdev, 10, 15, 0); 1330 + pci_restore_config_space_range(pdev, 10, 15, 0, false); 1330 1331 /* Restore BARs before the command register. */ 1331 - pci_restore_config_space_range(pdev, 4, 9, 10); 1332 - pci_restore_config_space_range(pdev, 0, 3, 0); 1332 + pci_restore_config_space_range(pdev, 4, 9, 10, false); 1333 + pci_restore_config_space_range(pdev, 0, 3, 0, false); 1334 + } else if (pdev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { 1335 + pci_restore_config_space_range(pdev, 12, 15, 0, false); 1336 + 1337 + /* 1338 + * Force rewriting of prefetch registers to avoid S3 resume 1339 + * issues on Intel PCI bridges that occur when these 1340 + * registers are not explicitly written. 1341 + */ 1342 + pci_restore_config_space_range(pdev, 9, 11, 0, true); 1343 + pci_restore_config_space_range(pdev, 0, 8, 0, false); 1333 1344 } else { 1334 - pci_restore_config_space_range(pdev, 0, 15, 0); 1345 + pci_restore_config_space_range(pdev, 0, 15, 0, false); 1335 1346 } 1336 1347 } 1337 1348

+1

fs/cifs/cifsglob.h

··· 1553 1553 1554 1554 /* Flags */ 1555 1555 #define MID_WAIT_CANCELLED 1 /* Cancelled while waiting for response */ 1556 + #define MID_DELETED 2 /* Mid has been dequeued/deleted */ 1556 1557 1557 1558 /* Types of response buffer returned from SendReceive2 */ 1558 1559 #define CIFS_NO_BUFFER 0 /* Response buffer not returned */

+10 -3

fs/cifs/connect.c

··· 659 659 mid->mid_state = MID_RESPONSE_RECEIVED; 660 660 else 661 661 mid->mid_state = MID_RESPONSE_MALFORMED; 662 - list_del_init(&mid->qhead); 662 + /* 663 + * Trying to handle/dequeue a mid after the send_recv() 664 + * function has finished processing it is a bug. 665 + */ 666 + if (mid->mid_flags & MID_DELETED) 667 + printk_once(KERN_WARNING 668 + "trying to dequeue a deleted mid\n"); 669 + else 670 + list_del_init(&mid->qhead); 663 671 spin_unlock(&GlobalMid_Lock); 664 672 } 665 673 ··· 946 938 } else { 947 939 mids[0] = server->ops->find_mid(server, buf); 948 940 bufs[0] = buf; 949 - if (mids[0]) 950 - num_mids = 1; 941 + num_mids = 1; 951 942 952 943 if (!mids[0] || !mids[0]->receive) 953 944 length = standard_receive3(server, mids[0]);

+1 -1

fs/cifs/smb2ops.c

··· 1477 1477 } 1478 1478 1479 1479 srch_inf->entries_in_buffer = 0; 1480 - srch_inf->index_of_last_entry = 0; 1480 + srch_inf->index_of_last_entry = 2; 1481 1481 1482 1482 rc = SMB2_query_directory(xid, tcon, fid->persistent_fid, 1483 1483 fid->volatile_fid, 0, srch_inf);

+19 -2

fs/cifs/transport.c

··· 142 142 cifs_delete_mid(struct mid_q_entry *mid) 143 143 { 144 144 spin_lock(&GlobalMid_Lock); 145 - list_del(&mid->qhead); 145 + list_del_init(&mid->qhead); 146 + mid->mid_flags |= MID_DELETED; 146 147 spin_unlock(&GlobalMid_Lock); 147 148 148 149 DeleteMidQEntry(mid); ··· 773 772 return mid; 774 773 } 775 774 775 + static void 776 + cifs_noop_callback(struct mid_q_entry *mid) 777 + { 778 + } 779 + 776 780 int 777 781 compound_send_recv(const unsigned int xid, struct cifs_ses *ses, 778 782 const int flags, const int num_rqst, struct smb_rqst *rqst, ··· 832 826 } 833 827 834 828 midQ[i]->mid_state = MID_REQUEST_SUBMITTED; 829 + /* 830 + * We don't invoke the callback compounds unless it is the last 831 + * request. 832 + */ 833 + if (i < num_rqst - 1) 834 + midQ[i]->callback = cifs_noop_callback; 835 835 } 836 - 837 836 cifs_in_send_inc(ses->server); 838 837 rc = smb_send_rqst(ses->server, num_rqst, rqst, flags); 839 838 cifs_in_send_dec(ses->server); ··· 919 908 midQ[i]->resp_buf = NULL; 920 909 } 921 910 out: 911 + /* 912 + * This will dequeue all mids. After this it is important that the 913 + * demultiplex_thread will not process any of these mids any futher. 914 + * This is prevented above by using a noop callback that will not 915 + * wake this thread except for the very last PDU. 916 + */ 922 917 for (i = 0; i < num_rqst; i++) 923 918 cifs_delete_mid(midQ[i]); 924 919 add_credits(ses->server, credits, optype);

+1 -1

fs/ioctl.c

··· 230 230 ret = -EXDEV; 231 231 if (src_file.file->f_path.mnt != dst_file->f_path.mnt) 232 232 goto fdput; 233 - ret = do_clone_file_range(src_file.file, off, dst_file, destoff, olen); 233 + ret = vfs_clone_file_range(src_file.file, off, dst_file, destoff, olen); 234 234 fdput: 235 235 fdput(src_file); 236 236 return ret;

+1 -1

fs/iomap.c

··· 1051 1051 } else { 1052 1052 WARN_ON_ONCE(!PageUptodate(page)); 1053 1053 iomap_page_create(inode, page); 1054 + set_page_dirty(page); 1054 1055 } 1055 1056 1056 1057 return length; ··· 1091 1090 length -= ret; 1092 1091 } 1093 1092 1094 - set_page_dirty(page); 1095 1093 wait_for_stable_page(page); 1096 1094 return VM_FAULT_LOCKED; 1097 1095 out_unlock:

+2 -1

fs/nfsd/vfs.c

··· 541 541 __be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst, 542 542 u64 dst_pos, u64 count) 543 543 { 544 - return nfserrno(do_clone_file_range(src, src_pos, dst, dst_pos, count)); 544 + return nfserrno(vfs_clone_file_range(src, src_pos, dst, dst_pos, 545 + count)); 545 546 } 546 547 547 548 ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,

+2 -2

fs/ocfs2/dlm/dlmmaster.c

··· 584 584 585 585 res->last_used = 0; 586 586 587 - spin_lock(&dlm->spinlock); 587 + spin_lock(&dlm->track_lock); 588 588 list_add_tail(&res->tracking, &dlm->tracking_list); 589 - spin_unlock(&dlm->spinlock); 589 + spin_unlock(&dlm->track_lock); 590 590 591 591 memset(res->lvb, 0, DLM_LVB_LEN); 592 592 memset(res->refmap, 0, sizeof(res->refmap));

+12 -4

fs/ocfs2/refcounttree.c

··· 2946 2946 if (map_end & (PAGE_SIZE - 1)) 2947 2947 to = map_end & (PAGE_SIZE - 1); 2948 2948 2949 + retry: 2949 2950 page = find_or_create_page(mapping, page_index, GFP_NOFS); 2950 2951 if (!page) { 2951 2952 ret = -ENOMEM; ··· 2955 2954 } 2956 2955 2957 2956 /* 2958 - * In case PAGE_SIZE <= CLUSTER_SIZE, This page 2959 - * can't be dirtied before we CoW it out. 2957 + * In case PAGE_SIZE <= CLUSTER_SIZE, we do not expect a dirty 2958 + * page, so write it back. 2960 2959 */ 2961 - if (PAGE_SIZE <= OCFS2_SB(sb)->s_clustersize) 2962 - BUG_ON(PageDirty(page)); 2960 + if (PAGE_SIZE <= OCFS2_SB(sb)->s_clustersize) { 2961 + if (PageDirty(page)) { 2962 + /* 2963 + * write_on_page will unlock the page on return 2964 + */ 2965 + ret = write_one_page(page); 2966 + goto retry; 2967 + } 2968 + } 2963 2969 2964 2970 if (!PageUptodate(page)) { 2965 2971 ret = block_read_full_page(page, ocfs2_get_block);

+1 -1

fs/overlayfs/copy_up.c

··· 141 141 } 142 142 143 143 /* Try to use clone_file_range to clone up within the same fs */ 144 - error = vfs_clone_file_range(old_file, 0, new_file, 0, len); 144 + error = do_clone_file_range(old_file, 0, new_file, 0, len); 145 145 if (!error) 146 146 goto out; 147 147 /* Couldn't clone, so now we try to copy the data */

+2

fs/overlayfs/file.c

··· 240 240 goto out_unlock; 241 241 242 242 old_cred = ovl_override_creds(file_inode(file)->i_sb); 243 + file_start_write(real.file); 243 244 ret = vfs_iter_write(real.file, iter, &iocb->ki_pos, 244 245 ovl_iocb_to_rwf(iocb)); 246 + file_end_write(real.file); 245 247 revert_creds(old_cred); 246 248 247 249 /* Update size */

+1 -1

fs/overlayfs/inode.c

··· 504 504 .update_time = ovl_update_time, 505 505 }; 506 506 507 - const struct address_space_operations ovl_aops = { 507 + static const struct address_space_operations ovl_aops = { 508 508 /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */ 509 509 .direct_IO = noop_direct_IO, 510 510 };

+1 -1

fs/overlayfs/namei.c

··· 686 686 index = NULL; 687 687 goto out; 688 688 } 689 - pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%*s, err=%i);\n" 689 + pr_warn_ratelimited("overlayfs: failed inode index lookup (ino=%lu, key=%.*s, err=%i);\n" 690 690 "overlayfs: mount with '-o index=off' to disable inodes index.\n", 691 691 d_inode(origin)->i_ino, name.len, name.name, 692 692 err);

+2 -2

fs/overlayfs/overlayfs.h

··· 152 152 const void *value, size_t size, int flags) 153 153 { 154 154 int err = vfs_setxattr(dentry, name, value, size, flags); 155 - pr_debug("setxattr(%pd2, \"%s\", \"%*s\", 0x%x) = %i\n", 156 - dentry, name, (int) size, (char *) value, flags, err); 155 + pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, 0x%x) = %i\n", 156 + dentry, name, min((int)size, 48), value, size, flags, err); 157 157 return err; 158 158 } 159 159

+2 -1

fs/overlayfs/util.c

··· 683 683 struct dentry *upperdentry = ovl_dentry_upper(dentry); 684 684 struct dentry *index = NULL; 685 685 struct inode *inode; 686 - struct qstr name; 686 + struct qstr name = { }; 687 687 int err; 688 688 689 689 err = ovl_get_index_name(lowerdentry, &name); ··· 726 726 goto fail; 727 727 728 728 out: 729 + kfree(name.name); 729 730 dput(index); 730 731 return; 731 732

+14

fs/proc/base.c

··· 407 407 unsigned long *entries; 408 408 int err; 409 409 410 + /* 411 + * The ability to racily run the kernel stack unwinder on a running task 412 + * and then observe the unwinder output is scary; while it is useful for 413 + * debugging kernel issues, it can also allow an attacker to leak kernel 414 + * stack contents. 415 + * Doing this in a manner that is at least safe from races would require 416 + * some work to ensure that the remote task can not be scheduled; and 417 + * even then, this would still expose the unwinder as local attack 418 + * surface. 419 + * Therefore, this interface is restricted to root. 420 + */ 421 + if (!file_ns_capable(m->file, &init_user_ns, CAP_SYS_ADMIN)) 422 + return -EACCES; 423 + 410 424 entries = kmalloc_array(MAX_STACK_TRACE_DEPTH, sizeof(*entries), 411 425 GFP_KERNEL); 412 426 if (!entries)

+15 -2

fs/read_write.c

··· 1818 1818 } 1819 1819 EXPORT_SYMBOL(vfs_clone_file_prep_inodes); 1820 1820 1821 - int vfs_clone_file_range(struct file *file_in, loff_t pos_in, 1822 - struct file *file_out, loff_t pos_out, u64 len) 1821 + int do_clone_file_range(struct file *file_in, loff_t pos_in, 1822 + struct file *file_out, loff_t pos_out, u64 len) 1823 1823 { 1824 1824 struct inode *inode_in = file_inode(file_in); 1825 1825 struct inode *inode_out = file_inode(file_out); ··· 1863 1863 fsnotify_access(file_in); 1864 1864 fsnotify_modify(file_out); 1865 1865 } 1866 + 1867 + return ret; 1868 + } 1869 + EXPORT_SYMBOL(do_clone_file_range); 1870 + 1871 + int vfs_clone_file_range(struct file *file_in, loff_t pos_in, 1872 + struct file *file_out, loff_t pos_out, u64 len) 1873 + { 1874 + int ret; 1875 + 1876 + file_start_write(file_out); 1877 + ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len); 1878 + file_end_write(file_out); 1866 1879 1867 1880 return ret; 1868 1881 }

+8 -20

fs/xfs/libxfs/xfs_attr.c

··· 587 587 */ 588 588 error = xfs_attr3_leaf_to_node(args); 589 589 if (error) 590 - goto out_defer_cancel; 590 + return error; 591 591 error = xfs_defer_finish(&args->trans); 592 592 if (error) 593 593 return error; ··· 675 675 error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); 676 676 /* bp is gone due to xfs_da_shrink_inode */ 677 677 if (error) 678 - goto out_defer_cancel; 678 + return error; 679 679 error = xfs_defer_finish(&args->trans); 680 680 if (error) 681 681 return error; ··· 692 692 */ 693 693 error = xfs_attr3_leaf_clearflag(args); 694 694 } 695 - return error; 696 - out_defer_cancel: 697 - xfs_defer_cancel(args->trans); 698 695 return error; 699 696 } 700 697 ··· 735 738 error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); 736 739 /* bp is gone due to xfs_da_shrink_inode */ 737 740 if (error) 738 - goto out_defer_cancel; 741 + return error; 739 742 error = xfs_defer_finish(&args->trans); 740 743 if (error) 741 744 return error; 742 745 } 743 746 return 0; 744 - out_defer_cancel: 745 - xfs_defer_cancel(args->trans); 746 - return error; 747 747 } 748 748 749 749 /* ··· 858 864 state = NULL; 859 865 error = xfs_attr3_leaf_to_node(args); 860 866 if (error) 861 - goto out_defer_cancel; 867 + goto out; 862 868 error = xfs_defer_finish(&args->trans); 863 869 if (error) 864 870 goto out; ··· 882 888 */ 883 889 error = xfs_da3_split(state); 884 890 if (error) 885 - goto out_defer_cancel; 891 + goto out; 886 892 error = xfs_defer_finish(&args->trans); 887 893 if (error) 888 894 goto out; ··· 978 984 if (retval && (state->path.active > 1)) { 979 985 error = xfs_da3_join(state); 980 986 if (error) 981 - goto out_defer_cancel; 987 + goto out; 982 988 error = xfs_defer_finish(&args->trans); 983 989 if (error) 984 990 goto out; ··· 1007 1013 if (error) 1008 1014 return error; 1009 1015 return retval; 1010 - out_defer_cancel: 1011 - xfs_defer_cancel(args->trans); 1012 - goto out; 1013 1016 } 1014 1017 1015 1018 /* ··· 1098 1107 if (retval && (state->path.active > 1)) { 1099 1108 error = xfs_da3_join(state); 1100 1109 if (error) 1101 - goto out_defer_cancel; 1110 + goto out; 1102 1111 error = xfs_defer_finish(&args->trans); 1103 1112 if (error) 1104 1113 goto out; ··· 1129 1138 error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); 1130 1139 /* bp is gone due to xfs_da_shrink_inode */ 1131 1140 if (error) 1132 - goto out_defer_cancel; 1141 + goto out; 1133 1142 error = xfs_defer_finish(&args->trans); 1134 1143 if (error) 1135 1144 goto out; ··· 1141 1150 out: 1142 1151 xfs_da_state_free(state); 1143 1152 return error; 1144 - out_defer_cancel: 1145 - xfs_defer_cancel(args->trans); 1146 - goto out; 1147 1153 } 1148 1154 1149 1155 /*

+2 -8

fs/xfs/libxfs/xfs_attr_remote.c

··· 485 485 blkcnt, XFS_BMAPI_ATTRFORK, args->total, &map, 486 486 &nmap); 487 487 if (error) 488 - goto out_defer_cancel; 488 + return error; 489 489 error = xfs_defer_finish(&args->trans); 490 490 if (error) 491 491 return error; ··· 553 553 } 554 554 ASSERT(valuelen == 0); 555 555 return 0; 556 - out_defer_cancel: 557 - xfs_defer_cancel(args->trans); 558 - return error; 559 556 } 560 557 561 558 /* ··· 622 625 error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt, 623 626 XFS_BMAPI_ATTRFORK, 1, &done); 624 627 if (error) 625 - goto out_defer_cancel; 628 + return error; 626 629 error = xfs_defer_finish(&args->trans); 627 630 if (error) 628 631 return error; ··· 635 638 return error; 636 639 } 637 640 return 0; 638 - out_defer_cancel: 639 - xfs_defer_cancel(args->trans); 640 - return error; 641 641 }

+13 -11

fs/xfs/libxfs/xfs_bmap.c

··· 673 673 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS); 674 674 675 675 /* 676 - * Make space in the inode incore. 676 + * Make space in the inode incore. This needs to be undone if we fail 677 + * to expand the root. 677 678 */ 678 679 xfs_iroot_realloc(ip, 1, whichfork); 679 680 ifp->if_flags |= XFS_IFBROOT; ··· 712 711 args.minlen = args.maxlen = args.prod = 1; 713 712 args.wasdel = wasdel; 714 713 *logflagsp = 0; 715 - if ((error = xfs_alloc_vextent(&args))) { 716 - ASSERT(ifp->if_broot == NULL); 717 - goto err1; 718 - } 714 + error = xfs_alloc_vextent(&args); 715 + if (error) 716 + goto out_root_realloc; 719 717 720 718 if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) { 721 - ASSERT(ifp->if_broot == NULL); 722 719 error = -ENOSPC; 723 - goto err1; 720 + goto out_root_realloc; 724 721 } 722 + 725 723 /* 726 724 * Allocation can't fail, the space was reserved. 727 725 */ ··· 732 732 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); 733 733 abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0); 734 734 if (!abp) { 735 - error = -ENOSPC; 736 - goto err2; 735 + error = -EFSCORRUPTED; 736 + goto out_unreserve_dquot; 737 737 } 738 + 738 739 /* 739 740 * Fill in the child block. 740 741 */ ··· 776 775 *logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork); 777 776 return 0; 778 777 779 - err2: 778 + out_unreserve_dquot: 780 779 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); 781 - err1: 780 + out_root_realloc: 782 781 xfs_iroot_realloc(ip, -1, whichfork); 783 782 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); 783 + ASSERT(ifp->if_broot == NULL); 784 784 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); 785 785 786 786 return error;

+2

fs/xfs/libxfs/xfs_format.h

··· 1016 1016 #define XFS_DIFLAG_EXTSZINHERIT_BIT 12 /* inherit inode extent size */ 1017 1017 #define XFS_DIFLAG_NODEFRAG_BIT 13 /* do not reorganize/defragment */ 1018 1018 #define XFS_DIFLAG_FILESTREAM_BIT 14 /* use filestream allocator */ 1019 + /* Do not use bit 15, di_flags is legacy and unchanging now */ 1020 + 1019 1021 #define XFS_DIFLAG_REALTIME (1 << XFS_DIFLAG_REALTIME_BIT) 1020 1022 #define XFS_DIFLAG_PREALLOC (1 << XFS_DIFLAG_PREALLOC_BIT) 1021 1023 #define XFS_DIFLAG_NEWRTBM (1 << XFS_DIFLAG_NEWRTBM_BIT)

+30

fs/xfs/libxfs/xfs_inode_buf.c

··· 415 415 return NULL; 416 416 } 417 417 418 + static xfs_failaddr_t 419 + xfs_dinode_verify_forkoff( 420 + struct xfs_dinode *dip, 421 + struct xfs_mount *mp) 422 + { 423 + if (!XFS_DFORK_Q(dip)) 424 + return NULL; 425 + 426 + switch (dip->di_format) { 427 + case XFS_DINODE_FMT_DEV: 428 + if (dip->di_forkoff != (roundup(sizeof(xfs_dev_t), 8) >> 3)) 429 + return __this_address; 430 + break; 431 + case XFS_DINODE_FMT_LOCAL: /* fall through ... */ 432 + case XFS_DINODE_FMT_EXTENTS: /* fall through ... */ 433 + case XFS_DINODE_FMT_BTREE: 434 + if (dip->di_forkoff >= (XFS_LITINO(mp, dip->di_version) >> 3)) 435 + return __this_address; 436 + break; 437 + default: 438 + return __this_address; 439 + } 440 + return NULL; 441 + } 442 + 418 443 xfs_failaddr_t 419 444 xfs_dinode_verify( 420 445 struct xfs_mount *mp, ··· 494 469 495 470 if (mode && (flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp) 496 471 return __this_address; 472 + 473 + /* check for illegal values of forkoff */ 474 + fa = xfs_dinode_verify_forkoff(dip, mp); 475 + if (fa) 476 + return fa; 497 477 498 478 /* Do we have appropriate data fork formats for the mode? */ 499 479 switch (mode & S_IFMT) {

-1

fs/xfs/scrub/alloc.c

··· 17 17 #include "xfs_sb.h" 18 18 #include "xfs_alloc.h" 19 19 #include "xfs_rmap.h" 20 - #include "xfs_alloc.h" 21 20 #include "scrub/xfs_scrub.h" 22 21 #include "scrub/scrub.h" 23 22 #include "scrub/common.h"

+3 -1

fs/xfs/scrub/inode.c

··· 126 126 { 127 127 struct xfs_mount *mp = sc->mp; 128 128 129 + /* di_flags are all taken, last bit cannot be used */ 129 130 if (flags & ~XFS_DIFLAG_ANY) 130 131 goto bad; 131 132 ··· 173 172 { 174 173 struct xfs_mount *mp = sc->mp; 175 174 175 + /* Unknown di_flags2 could be from a future kernel */ 176 176 if (flags2 & ~XFS_DIFLAG2_ANY) 177 - goto bad; 177 + xchk_ino_set_warning(sc, ino); 178 178 179 179 /* reflink flag requires reflink feature */ 180 180 if ((flags2 & XFS_DIFLAG2_REFLINK) &&

+7 -13

fs/xfs/xfs_bmap_util.c

··· 702 702 struct xfs_iext_cursor icur; 703 703 int error = 0; 704 704 705 - xfs_ilock(ip, XFS_ILOCK_EXCL); 706 - if (!(ifp->if_flags & XFS_IFEXTENTS)) { 707 - error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); 708 - if (error) 709 - goto out_unlock; 710 - } 705 + ASSERT(ifp->if_flags & XFS_IFEXTENTS); 711 706 707 + xfs_ilock(ip, XFS_ILOCK_EXCL); 712 708 if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got)) 713 709 goto out_unlock; 714 710 ··· 1580 1584 tirec.br_blockcount, &irec, 1581 1585 &nimaps, 0); 1582 1586 if (error) 1583 - goto out_defer; 1587 + goto out; 1584 1588 ASSERT(nimaps == 1); 1585 1589 ASSERT(tirec.br_startoff == irec.br_startoff); 1586 1590 trace_xfs_swap_extent_rmap_remap_piece(ip, &irec); ··· 1595 1599 /* Remove the mapping from the donor file. */ 1596 1600 error = xfs_bmap_unmap_extent(tp, tip, &uirec); 1597 1601 if (error) 1598 - goto out_defer; 1602 + goto out; 1599 1603 1600 1604 /* Remove the mapping from the source file. */ 1601 1605 error = xfs_bmap_unmap_extent(tp, ip, &irec); 1602 1606 if (error) 1603 - goto out_defer; 1607 + goto out; 1604 1608 1605 1609 /* Map the donor file's blocks into the source file. */ 1606 1610 error = xfs_bmap_map_extent(tp, ip, &uirec); 1607 1611 if (error) 1608 - goto out_defer; 1612 + goto out; 1609 1613 1610 1614 /* Map the source file's blocks into the donor file. */ 1611 1615 error = xfs_bmap_map_extent(tp, tip, &irec); 1612 1616 if (error) 1613 - goto out_defer; 1617 + goto out; 1614 1618 1615 1619 error = xfs_defer_finish(tpp); 1616 1620 tp = *tpp; ··· 1632 1636 tip->i_d.di_flags2 = tip_flags2; 1633 1637 return 0; 1634 1638 1635 - out_defer: 1636 - xfs_defer_cancel(tp); 1637 1639 out: 1638 1640 trace_xfs_swap_extent_rmap_error(ip, error, _RET_IP_); 1639 1641 tip->i_d.di_flags2 = tip_flags2;

+64 -55

fs/xfs/xfs_buf_item.c

··· 532 532 } 533 533 534 534 /* 535 + * Drop the buffer log item refcount and take appropriate action. This helper 536 + * determines whether the bli must be freed or not, since a decrement to zero 537 + * does not necessarily mean the bli is unused. 538 + * 539 + * Return true if the bli is freed, false otherwise. 540 + */ 541 + bool 542 + xfs_buf_item_put( 543 + struct xfs_buf_log_item *bip) 544 + { 545 + struct xfs_log_item *lip = &bip->bli_item; 546 + bool aborted; 547 + bool dirty; 548 + 549 + /* drop the bli ref and return if it wasn't the last one */ 550 + if (!atomic_dec_and_test(&bip->bli_refcount)) 551 + return false; 552 + 553 + /* 554 + * We dropped the last ref and must free the item if clean or aborted. 555 + * If the bli is dirty and non-aborted, the buffer was clean in the 556 + * transaction but still awaiting writeback from previous changes. In 557 + * that case, the bli is freed on buffer writeback completion. 558 + */ 559 + aborted = test_bit(XFS_LI_ABORTED, &lip->li_flags) || 560 + XFS_FORCED_SHUTDOWN(lip->li_mountp); 561 + dirty = bip->bli_flags & XFS_BLI_DIRTY; 562 + if (dirty && !aborted) 563 + return false; 564 + 565 + /* 566 + * The bli is aborted or clean. An aborted item may be in the AIL 567 + * regardless of dirty state. For example, consider an aborted 568 + * transaction that invalidated a dirty bli and cleared the dirty 569 + * state. 570 + */ 571 + if (aborted) 572 + xfs_trans_ail_remove(lip, SHUTDOWN_LOG_IO_ERROR); 573 + xfs_buf_item_relse(bip->bli_buf); 574 + return true; 575 + } 576 + 577 + /* 535 578 * Release the buffer associated with the buf log item. If there is no dirty 536 579 * logged data associated with the buffer recorded in the buf log item, then 537 580 * free the buf log item and remove the reference to it in the buffer. ··· 599 556 { 600 557 struct xfs_buf_log_item *bip = BUF_ITEM(lip); 601 558 struct xfs_buf *bp = bip->bli_buf; 602 - bool aborted; 603 - bool hold = !!(bip->bli_flags & XFS_BLI_HOLD); 604 - bool dirty = !!(bip->bli_flags & XFS_BLI_DIRTY); 559 + bool released; 560 + bool hold = bip->bli_flags & XFS_BLI_HOLD; 561 + bool stale = bip->bli_flags & XFS_BLI_STALE; 605 562 #if defined(DEBUG) || defined(XFS_WARN) 606 - bool ordered = !!(bip->bli_flags & XFS_BLI_ORDERED); 563 + bool ordered = bip->bli_flags & XFS_BLI_ORDERED; 564 + bool dirty = bip->bli_flags & XFS_BLI_DIRTY; 607 565 #endif 608 - 609 - aborted = test_bit(XFS_LI_ABORTED, &lip->li_flags); 610 - 611 - /* Clear the buffer's association with this transaction. */ 612 - bp->b_transp = NULL; 613 - 614 - /* 615 - * The per-transaction state has been copied above so clear it from the 616 - * bli. 617 - */ 618 - bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD | XFS_BLI_ORDERED); 619 - 620 - /* 621 - * If the buf item is marked stale, then don't do anything. We'll 622 - * unlock the buffer and free the buf item when the buffer is unpinned 623 - * for the last time. 624 - */ 625 - if (bip->bli_flags & XFS_BLI_STALE) { 626 - trace_xfs_buf_item_unlock_stale(bip); 627 - ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL); 628 - if (!aborted) { 629 - atomic_dec(&bip->bli_refcount); 630 - return; 631 - } 632 - } 633 566 634 567 trace_xfs_buf_item_unlock(bip); 635 568 636 569 /* 637 - * If the buf item isn't tracking any data, free it, otherwise drop the 638 - * reference we hold to it. If we are aborting the transaction, this may 639 - * be the only reference to the buf item, so we free it anyway 640 - * regardless of whether it is dirty or not. A dirty abort implies a 641 - * shutdown, anyway. 642 - * 643 570 * The bli dirty state should match whether the blf has logged segments 644 571 * except for ordered buffers, where only the bli should be dirty. 645 572 */ 646 573 ASSERT((!ordered && dirty == xfs_buf_item_dirty_format(bip)) || 647 574 (ordered && dirty && !xfs_buf_item_dirty_format(bip))); 575 + ASSERT(!stale || (bip->__bli_format.blf_flags & XFS_BLF_CANCEL)); 648 576 649 577 /* 650 - * Clean buffers, by definition, cannot be in the AIL. However, aborted 651 - * buffers may be in the AIL regardless of dirty state. An aborted 652 - * transaction that invalidates a buffer already in the AIL may have 653 - * marked it stale and cleared the dirty state, for example. 654 - * 655 - * Therefore if we are aborting a buffer and we've just taken the last 656 - * reference away, we have to check if it is in the AIL before freeing 657 - * it. We need to free it in this case, because an aborted transaction 658 - * has already shut the filesystem down and this is the last chance we 659 - * will have to do so. 578 + * Clear the buffer's association with this transaction and 579 + * per-transaction state from the bli, which has been copied above. 660 580 */ 661 - if (atomic_dec_and_test(&bip->bli_refcount)) { 662 - if (aborted) { 663 - ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp)); 664 - xfs_trans_ail_remove(lip, SHUTDOWN_LOG_IO_ERROR); 665 - xfs_buf_item_relse(bp); 666 - } else if (!dirty) 667 - xfs_buf_item_relse(bp); 668 - } 581 + bp->b_transp = NULL; 582 + bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD | XFS_BLI_ORDERED); 669 583 670 - if (!hold) 671 - xfs_buf_relse(bp); 584 + /* 585 + * Unref the item and unlock the buffer unless held or stale. Stale 586 + * buffers remain locked until final unpin unless the bli is freed by 587 + * the unref call. The latter implies shutdown because buffer 588 + * invalidation dirties the bli and transaction. 589 + */ 590 + released = xfs_buf_item_put(bip); 591 + if (hold || (stale && !released)) 592 + return; 593 + ASSERT(!stale || test_bit(XFS_LI_ABORTED, &lip->li_flags)); 594 + xfs_buf_relse(bp); 672 595 } 673 596 674 597 /*

+1

fs/xfs/xfs_buf_item.h

··· 51 51 52 52 int xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *); 53 53 void xfs_buf_item_relse(struct xfs_buf *); 54 + bool xfs_buf_item_put(struct xfs_buf_log_item *); 54 55 void xfs_buf_item_log(struct xfs_buf_log_item *, uint, uint); 55 56 bool xfs_buf_item_dirty_format(struct xfs_buf_log_item *); 56 57 void xfs_buf_attach_iodone(struct xfs_buf *,

+1 -9

fs/xfs/xfs_inode.c

··· 1563 1563 error = xfs_bunmapi(tp, ip, first_unmap_block, unmap_len, flags, 1564 1564 XFS_ITRUNC_MAX_EXTENTS, &done); 1565 1565 if (error) 1566 - goto out_bmap_cancel; 1566 + goto out; 1567 1567 1568 1568 /* 1569 1569 * Duplicate the transaction that has the permanent ··· 1599 1599 out: 1600 1600 *tpp = tp; 1601 1601 return error; 1602 - out_bmap_cancel: 1603 - /* 1604 - * If the bunmapi call encounters an error, return to the caller where 1605 - * the transaction can be properly aborted. We just need to make sure 1606 - * we're not holding any resources that we were not when we came in. 1607 - */ 1608 - xfs_defer_cancel(tp); 1609 - goto out; 1610 1602 } 1611 1603 1612 1604 int

+11 -1

fs/xfs/xfs_iops.c

··· 471 471 struct inode *inode, 472 472 struct delayed_call *done) 473 473 { 474 + char *link; 475 + 474 476 ASSERT(XFS_I(inode)->i_df.if_flags & XFS_IFINLINE); 475 - return XFS_I(inode)->i_df.if_u1.if_data; 477 + 478 + /* 479 + * The VFS crashes on a NULL pointer, so return -EFSCORRUPTED if 480 + * if_data is junk. 481 + */ 482 + link = XFS_I(inode)->i_df.if_u1.if_data; 483 + if (!link) 484 + return ERR_PTR(-EFSCORRUPTED); 485 + return link; 476 486 } 477 487 478 488 STATIC int

-10

fs/xfs/xfs_log_recover.c

··· 1570 1570 if (last_cycle != 0) { /* log completely written to */ 1571 1571 xlog_put_bp(bp); 1572 1572 return 0; 1573 - } else if (first_cycle != 1) { 1574 - /* 1575 - * If the cycle of the last block is zero, the cycle of 1576 - * the first block must be 1. If it's not, maybe we're 1577 - * not looking at a log... Bail out. 1578 - */ 1579 - xfs_warn(log->l_mp, 1580 - "Log inconsistent or not a log (last==0, first!=1)"); 1581 - error = -EINVAL; 1582 - goto bp_err; 1583 1573 } 1584 1574 1585 1575 /* we have a partially zeroed log */

+84 -59

fs/xfs/xfs_reflink.c

··· 352 352 return error; 353 353 } 354 354 355 + /* 356 + * Find the extent that maps the given range in the COW fork. Even if the extent 357 + * is not shared we might have a preallocation for it in the COW fork. If so we 358 + * use it that rather than trigger a new allocation. 359 + */ 360 + static int 361 + xfs_find_trim_cow_extent( 362 + struct xfs_inode *ip, 363 + struct xfs_bmbt_irec *imap, 364 + bool *shared, 365 + bool *found) 366 + { 367 + xfs_fileoff_t offset_fsb = imap->br_startoff; 368 + xfs_filblks_t count_fsb = imap->br_blockcount; 369 + struct xfs_iext_cursor icur; 370 + struct xfs_bmbt_irec got; 371 + bool trimmed; 372 + 373 + *found = false; 374 + 375 + /* 376 + * If we don't find an overlapping extent, trim the range we need to 377 + * allocate to fit the hole we found. 378 + */ 379 + if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &got) || 380 + got.br_startoff > offset_fsb) 381 + return xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed); 382 + 383 + *shared = true; 384 + if (isnullstartblock(got.br_startblock)) { 385 + xfs_trim_extent(imap, got.br_startoff, got.br_blockcount); 386 + return 0; 387 + } 388 + 389 + /* real extent found - no need to allocate */ 390 + xfs_trim_extent(&got, offset_fsb, count_fsb); 391 + *imap = got; 392 + *found = true; 393 + return 0; 394 + } 395 + 355 396 /* Allocate all CoW reservations covering a range of blocks in a file. */ 356 397 int 357 398 xfs_reflink_allocate_cow( ··· 404 363 struct xfs_mount *mp = ip->i_mount; 405 364 xfs_fileoff_t offset_fsb = imap->br_startoff; 406 365 xfs_filblks_t count_fsb = imap->br_blockcount; 407 - struct xfs_bmbt_irec got; 408 - struct xfs_trans *tp = NULL; 366 + struct xfs_trans *tp; 409 367 int nimaps, error = 0; 410 - bool trimmed; 368 + bool found; 411 369 xfs_filblks_t resaligned; 412 370 xfs_extlen_t resblks = 0; 413 - struct xfs_iext_cursor icur; 414 371 415 - retry: 416 - ASSERT(xfs_is_reflink_inode(ip)); 417 372 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 373 + ASSERT(xfs_is_reflink_inode(ip)); 374 + 375 + error = xfs_find_trim_cow_extent(ip, imap, shared, &found); 376 + if (error || !*shared) 377 + return error; 378 + if (found) 379 + goto convert; 380 + 381 + resaligned = xfs_aligned_fsb_count(imap->br_startoff, 382 + imap->br_blockcount, xfs_get_cowextsz_hint(ip)); 383 + resblks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned); 384 + 385 + xfs_iunlock(ip, *lockmode); 386 + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp); 387 + *lockmode = XFS_ILOCK_EXCL; 388 + xfs_ilock(ip, *lockmode); 389 + 390 + if (error) 391 + return error; 392 + 393 + error = xfs_qm_dqattach_locked(ip, false); 394 + if (error) 395 + goto out_trans_cancel; 418 396 419 397 /* 420 - * Even if the extent is not shared we might have a preallocation for 421 - * it in the COW fork. If so use it. 398 + * Check for an overlapping extent again now that we dropped the ilock. 422 399 */ 423 - if (xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &got) && 424 - got.br_startoff <= offset_fsb) { 425 - *shared = true; 426 - 427 - /* If we have a real allocation in the COW fork we're done. */ 428 - if (!isnullstartblock(got.br_startblock)) { 429 - xfs_trim_extent(&got, offset_fsb, count_fsb); 430 - *imap = got; 431 - goto convert; 432 - } 433 - 434 - xfs_trim_extent(imap, got.br_startoff, got.br_blockcount); 435 - } else { 436 - error = xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed); 437 - if (error || !*shared) 438 - goto out; 439 - } 440 - 441 - if (!tp) { 442 - resaligned = xfs_aligned_fsb_count(imap->br_startoff, 443 - imap->br_blockcount, xfs_get_cowextsz_hint(ip)); 444 - resblks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned); 445 - 446 - xfs_iunlock(ip, *lockmode); 447 - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp); 448 - *lockmode = XFS_ILOCK_EXCL; 449 - xfs_ilock(ip, *lockmode); 450 - 451 - if (error) 452 - return error; 453 - 454 - error = xfs_qm_dqattach_locked(ip, false); 455 - if (error) 456 - goto out; 457 - goto retry; 400 + error = xfs_find_trim_cow_extent(ip, imap, shared, &found); 401 + if (error || !*shared) 402 + goto out_trans_cancel; 403 + if (found) { 404 + xfs_trans_cancel(tp); 405 + goto convert; 458 406 } 459 407 460 408 error = xfs_trans_reserve_quota_nblks(tp, ip, resblks, 0, 461 409 XFS_QMOPT_RES_REGBLKS); 462 410 if (error) 463 - goto out; 411 + goto out_trans_cancel; 464 412 465 413 xfs_trans_ijoin(tp, ip, 0); 466 414 467 - nimaps = 1; 468 - 469 415 /* Allocate the entire reservation as unwritten blocks. */ 416 + nimaps = 1; 470 417 error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount, 471 418 XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, 472 419 resblks, imap, &nimaps); 473 420 if (error) 474 - goto out_trans_cancel; 421 + goto out_unreserve; 475 422 476 423 xfs_inode_set_cowblocks_tag(ip); 477 - 478 - /* Finish up. */ 479 424 error = xfs_trans_commit(tp); 480 425 if (error) 481 426 return error; ··· 474 447 return -ENOSPC; 475 448 convert: 476 449 return xfs_reflink_convert_cow_extent(ip, imap, offset_fsb, count_fsb); 477 - out_trans_cancel: 450 + 451 + out_unreserve: 478 452 xfs_trans_unreserve_quota_nblks(tp, ip, (long)resblks, 0, 479 453 XFS_QMOPT_RES_REGBLKS); 480 - out: 481 - if (tp) 482 - xfs_trans_cancel(tp); 454 + out_trans_cancel: 455 + xfs_trans_cancel(tp); 483 456 return error; 484 457 } 485 458 ··· 693 666 if (!del.br_blockcount) 694 667 goto prev_extent; 695 668 696 - ASSERT(!isnullstartblock(got.br_startblock)); 697 - 698 669 /* 699 - * Don't remap unwritten extents; these are 700 - * speculatively preallocated CoW extents that have been 701 - * allocated but have not yet been involved in a write. 670 + * Only remap real extent that contain data. With AIO 671 + * speculatively preallocations can leak into the range we 672 + * are called upon, and we need to skip them. 702 673 */ 703 - if (got.br_state == XFS_EXT_UNWRITTEN) 674 + if (!xfs_bmap_is_real_extent(&got)) 704 675 goto prev_extent; 705 676 706 677 /* Unmap the old blocks in the data fork. */

-1

fs/xfs/xfs_trace.h

··· 473 473 DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin); 474 474 DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale); 475 475 DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock); 476 - DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock_stale); 477 476 DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed); 478 477 DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push); 479 478 DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf);

+8 -2

fs/xfs/xfs_trans.c

··· 259 259 struct xfs_trans *tp; 260 260 int error; 261 261 262 + /* 263 + * Allocate the handle before we do our freeze accounting and setting up 264 + * GFP_NOFS allocation context so that we avoid lockdep false positives 265 + * by doing GFP_KERNEL allocations inside sb_start_intwrite(). 266 + */ 267 + tp = kmem_zone_zalloc(xfs_trans_zone, 268 + (flags & XFS_TRANS_NOFS) ? KM_NOFS : KM_SLEEP); 269 + 262 270 if (!(flags & XFS_TRANS_NO_WRITECOUNT)) 263 271 sb_start_intwrite(mp->m_super); 264 272 ··· 278 270 mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE); 279 271 atomic_inc(&mp->m_active_trans); 280 272 281 - tp = kmem_zone_zalloc(xfs_trans_zone, 282 - (flags & XFS_TRANS_NOFS) ? KM_NOFS : KM_SLEEP); 283 273 tp->t_magic = XFS_TRANS_HEADER_MAGIC; 284 274 tp->t_flags = flags; 285 275 tp->t_mountp = mp;

+25 -76

fs/xfs/xfs_trans_buf.c

··· 322 322 } 323 323 324 324 /* 325 - * Release the buffer bp which was previously acquired with one of the 326 - * xfs_trans_... buffer allocation routines if the buffer has not 327 - * been modified within this transaction. If the buffer is modified 328 - * within this transaction, do decrement the recursion count but do 329 - * not release the buffer even if the count goes to 0. If the buffer is not 330 - * modified within the transaction, decrement the recursion count and 331 - * release the buffer if the recursion count goes to 0. 325 + * Release a buffer previously joined to the transaction. If the buffer is 326 + * modified within this transaction, decrement the recursion count but do not 327 + * release the buffer even if the count goes to 0. If the buffer is not modified 328 + * within the transaction, decrement the recursion count and release the buffer 329 + * if the recursion count goes to 0. 332 330 * 333 - * If the buffer is to be released and it was not modified before 334 - * this transaction began, then free the buf_log_item associated with it. 331 + * If the buffer is to be released and it was not already dirty before this 332 + * transaction began, then also free the buf_log_item associated with it. 335 333 * 336 - * If the transaction pointer is NULL, make this just a normal 337 - * brelse() call. 334 + * If the transaction pointer is NULL, this is a normal xfs_buf_relse() call. 338 335 */ 339 336 void 340 337 xfs_trans_brelse( 341 - xfs_trans_t *tp, 342 - xfs_buf_t *bp) 338 + struct xfs_trans *tp, 339 + struct xfs_buf *bp) 343 340 { 344 - struct xfs_buf_log_item *bip; 345 - int freed; 341 + struct xfs_buf_log_item *bip = bp->b_log_item; 346 342 347 - /* 348 - * Default to a normal brelse() call if the tp is NULL. 349 - */ 350 - if (tp == NULL) { 351 - ASSERT(bp->b_transp == NULL); 343 + ASSERT(bp->b_transp == tp); 344 + 345 + if (!tp) { 352 346 xfs_buf_relse(bp); 353 347 return; 354 348 } 355 349 356 - ASSERT(bp->b_transp == tp); 357 - bip = bp->b_log_item; 350 + trace_xfs_trans_brelse(bip); 358 351 ASSERT(bip->bli_item.li_type == XFS_LI_BUF); 359 - ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 360 - ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL)); 361 352 ASSERT(atomic_read(&bip->bli_refcount) > 0); 362 353 363 - trace_xfs_trans_brelse(bip); 364 - 365 354 /* 366 - * If the release is just for a recursive lock, 367 - * then decrement the count and return. 355 + * If the release is for a recursive lookup, then decrement the count 356 + * and return. 368 357 */ 369 358 if (bip->bli_recur > 0) { 370 359 bip->bli_recur--; ··· 361 372 } 362 373 363 374 /* 364 - * If the buffer is dirty within this transaction, we can't 375 + * If the buffer is invalidated or dirty in this transaction, we can't 365 376 * release it until we commit. 366 377 */ 367 378 if (test_bit(XFS_LI_DIRTY, &bip->bli_item.li_flags)) 368 379 return; 369 - 370 - /* 371 - * If the buffer has been invalidated, then we can't release 372 - * it until the transaction commits to disk unless it is re-dirtied 373 - * as part of this transaction. This prevents us from pulling 374 - * the item from the AIL before we should. 375 - */ 376 380 if (bip->bli_flags & XFS_BLI_STALE) 377 381 return; 378 382 383 + /* 384 + * Unlink the log item from the transaction and clear the hold flag, if 385 + * set. We wouldn't want the next user of the buffer to get confused. 386 + */ 379 387 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); 380 - 381 - /* 382 - * Free up the log item descriptor tracking the released item. 383 - */ 384 388 xfs_trans_del_item(&bip->bli_item); 389 + bip->bli_flags &= ~XFS_BLI_HOLD; 385 390 386 - /* 387 - * Clear the hold flag in the buf log item if it is set. 388 - * We wouldn't want the next user of the buffer to 389 - * get confused. 390 - */ 391 - if (bip->bli_flags & XFS_BLI_HOLD) { 392 - bip->bli_flags &= ~XFS_BLI_HOLD; 393 - } 394 - 395 - /* 396 - * Drop our reference to the buf log item. 397 - */ 398 - freed = atomic_dec_and_test(&bip->bli_refcount); 399 - 400 - /* 401 - * If the buf item is not tracking data in the log, then we must free it 402 - * before releasing the buffer back to the free pool. 403 - * 404 - * If the fs has shutdown and we dropped the last reference, it may fall 405 - * on us to release a (possibly dirty) bli if it never made it to the 406 - * AIL (e.g., the aborted unpin already happened and didn't release it 407 - * due to our reference). Since we're already shutdown and need 408 - * ail_lock, just force remove from the AIL and release the bli here. 409 - */ 410 - if (XFS_FORCED_SHUTDOWN(tp->t_mountp) && freed) { 411 - xfs_trans_ail_remove(&bip->bli_item, SHUTDOWN_LOG_IO_ERROR); 412 - xfs_buf_item_relse(bp); 413 - } else if (!(bip->bli_flags & XFS_BLI_DIRTY)) { 414 - /*** 415 - ASSERT(bp->b_pincount == 0); 416 - ***/ 417 - ASSERT(atomic_read(&bip->bli_refcount) == 0); 418 - ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags)); 419 - ASSERT(!(bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF)); 420 - xfs_buf_item_relse(bp); 421 - } 391 + /* drop the reference to the bli */ 392 + xfs_buf_item_put(bip); 422 393 423 394 bp->b_transp = NULL; 424 395 xfs_buf_relse(bp);

+3 -2

include/drm/drm_client.h

··· 87 87 struct drm_file *file; 88 88 }; 89 89 90 - int drm_client_new(struct drm_device *dev, struct drm_client_dev *client, 91 - const char *name, const struct drm_client_funcs *funcs); 90 + int drm_client_init(struct drm_device *dev, struct drm_client_dev *client, 91 + const char *name, const struct drm_client_funcs *funcs); 92 92 void drm_client_release(struct drm_client_dev *client); 93 + void drm_client_add(struct drm_client_dev *client); 93 94 94 95 void drm_client_dev_unregister(struct drm_device *dev); 95 96 void drm_client_dev_hotplug(struct drm_device *dev);

+3 -14

include/linux/fs.h

··· 1828 1828 extern int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in, 1829 1829 struct inode *inode_out, loff_t pos_out, 1830 1830 u64 *len, bool is_dedupe); 1831 + extern int do_clone_file_range(struct file *file_in, loff_t pos_in, 1832 + struct file *file_out, loff_t pos_out, u64 len); 1831 1833 extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in, 1832 - struct file *file_out, loff_t pos_out, u64 len); 1834 + struct file *file_out, loff_t pos_out, u64 len); 1833 1835 extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff, 1834 1836 struct inode *dest, loff_t destoff, 1835 1837 loff_t len, bool *is_same); ··· 2773 2771 if (!S_ISREG(file_inode(file)->i_mode)) 2774 2772 return; 2775 2773 __sb_end_write(file_inode(file)->i_sb, SB_FREEZE_WRITE); 2776 - } 2777 - 2778 - static inline int do_clone_file_range(struct file *file_in, loff_t pos_in, 2779 - struct file *file_out, loff_t pos_out, 2780 - u64 len) 2781 - { 2782 - int ret; 2783 - 2784 - file_start_write(file_out); 2785 - ret = vfs_clone_file_range(file_in, pos_in, file_out, pos_out, len); 2786 - file_end_write(file_out); 2787 - 2788 - return ret; 2789 2774 } 2790 2775 2791 2776 /*

+14

include/linux/hugetlb.h

··· 140 140 pte_t *huge_pte_offset(struct mm_struct *mm, 141 141 unsigned long addr, unsigned long sz); 142 142 int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep); 143 + void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, 144 + unsigned long *start, unsigned long *end); 143 145 struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, 144 146 int write); 145 147 struct page *follow_huge_pd(struct vm_area_struct *vma, ··· 170 168 static inline unsigned long hugetlb_total_pages(void) 171 169 { 172 170 return 0; 171 + } 172 + 173 + static inline int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, 174 + pte_t *ptep) 175 + { 176 + return 0; 177 + } 178 + 179 + static inline void adjust_range_if_pmd_sharing_possible( 180 + struct vm_area_struct *vma, 181 + unsigned long *start, unsigned long *end) 182 + { 173 183 } 174 184 175 185 #define follow_hugetlb_page(m,v,p,vs,a,b,i,w,n) ({ BUG(); 0; })

+6

include/linux/mm.h

··· 2455 2455 return vma; 2456 2456 } 2457 2457 2458 + static inline bool range_in_vma(struct vm_area_struct *vma, 2459 + unsigned long start, unsigned long end) 2460 + { 2461 + return (vma && vma->vm_start <= start && end <= vma->vm_end); 2462 + } 2463 + 2458 2464 #ifdef CONFIG_MMU 2459 2465 pgprot_t vm_get_page_prot(unsigned long vm_flags); 2460 2466 void vma_set_page_prot(struct vm_area_struct *vma);

-6

include/linux/mmzone.h

··· 671 671 #ifdef CONFIG_NUMA_BALANCING 672 672 /* Lock serializing the migrate rate limiting window */ 673 673 spinlock_t numabalancing_migrate_lock; 674 - 675 - /* Rate limiting time interval */ 676 - unsigned long numabalancing_migrate_next_window; 677 - 678 - /* Number of pages migrated during the rate limiting time interval */ 679 - unsigned long numabalancing_migrate_nr_pages; 680 674 #endif 681 675 /* 682 676 * This is a per-node reserve of pages that are not available

+18

include/linux/virtio_net.h

··· 5 5 #include <linux/if_vlan.h> 6 6 #include <uapi/linux/virtio_net.h> 7 7 8 + static inline int virtio_net_hdr_set_proto(struct sk_buff *skb, 9 + const struct virtio_net_hdr *hdr) 10 + { 11 + switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 12 + case VIRTIO_NET_HDR_GSO_TCPV4: 13 + case VIRTIO_NET_HDR_GSO_UDP: 14 + skb->protocol = cpu_to_be16(ETH_P_IP); 15 + break; 16 + case VIRTIO_NET_HDR_GSO_TCPV6: 17 + skb->protocol = cpu_to_be16(ETH_P_IPV6); 18 + break; 19 + default: 20 + return -EINVAL; 21 + } 22 + 23 + return 0; 24 + } 25 + 8 26 static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, 9 27 const struct virtio_net_hdr *hdr, 10 28 bool little_endian)

-27

include/trace/events/migrate.h

··· 70 70 __print_symbolic(__entry->mode, MIGRATE_MODE), 71 71 __print_symbolic(__entry->reason, MIGRATE_REASON)) 72 72 ); 73 - 74 - TRACE_EVENT(mm_numa_migrate_ratelimit, 75 - 76 - TP_PROTO(struct task_struct *p, int dst_nid, unsigned long nr_pages), 77 - 78 - TP_ARGS(p, dst_nid, nr_pages), 79 - 80 - TP_STRUCT__entry( 81 - __array( char, comm, TASK_COMM_LEN) 82 - __field( pid_t, pid) 83 - __field( int, dst_nid) 84 - __field( unsigned long, nr_pages) 85 - ), 86 - 87 - TP_fast_assign( 88 - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); 89 - __entry->pid = p->pid; 90 - __entry->dst_nid = dst_nid; 91 - __entry->nr_pages = nr_pages; 92 - ), 93 - 94 - TP_printk("comm=%s pid=%d dst_nid=%d nr_pages=%lu", 95 - __entry->comm, 96 - __entry->pid, 97 - __entry->dst_nid, 98 - __entry->nr_pages) 99 - ); 100 73 #endif /* _TRACE_MIGRATE_H */ 101 74 102 75 /* This part must be outside protection */

+2

include/uapi/asm-generic/hugetlb_encode.h

··· 26 26 #define HUGETLB_FLAG_ENCODE_2MB (21 << HUGETLB_FLAG_ENCODE_SHIFT) 27 27 #define HUGETLB_FLAG_ENCODE_8MB (23 << HUGETLB_FLAG_ENCODE_SHIFT) 28 28 #define HUGETLB_FLAG_ENCODE_16MB (24 << HUGETLB_FLAG_ENCODE_SHIFT) 29 + #define HUGETLB_FLAG_ENCODE_32MB (25 << HUGETLB_FLAG_ENCODE_SHIFT) 29 30 #define HUGETLB_FLAG_ENCODE_256MB (28 << HUGETLB_FLAG_ENCODE_SHIFT) 31 + #define HUGETLB_FLAG_ENCODE_512MB (29 << HUGETLB_FLAG_ENCODE_SHIFT) 30 32 #define HUGETLB_FLAG_ENCODE_1GB (30 << HUGETLB_FLAG_ENCODE_SHIFT) 31 33 #define HUGETLB_FLAG_ENCODE_2GB (31 << HUGETLB_FLAG_ENCODE_SHIFT) 32 34 #define HUGETLB_FLAG_ENCODE_16GB (34 << HUGETLB_FLAG_ENCODE_SHIFT)

+2

include/uapi/linux/memfd.h

··· 25 25 #define MFD_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB 26 26 #define MFD_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB 27 27 #define MFD_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB 28 + #define MFD_HUGE_32MB HUGETLB_FLAG_ENCODE_32MB 28 29 #define MFD_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB 30 + #define MFD_HUGE_512MB HUGETLB_FLAG_ENCODE_512MB 29 31 #define MFD_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB 30 32 #define MFD_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB 31 33 #define MFD_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB

+2

include/uapi/linux/mman.h

··· 28 28 #define MAP_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB 29 29 #define MAP_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB 30 30 #define MAP_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB 31 + #define MAP_HUGE_32MB HUGETLB_FLAG_ENCODE_32MB 31 32 #define MAP_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB 33 + #define MAP_HUGE_512MB HUGETLB_FLAG_ENCODE_512MB 32 34 #define MAP_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB 33 35 #define MAP_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB 34 36 #define MAP_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB

+2

include/uapi/linux/shm.h

··· 65 65 #define SHM_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB 66 66 #define SHM_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB 67 67 #define SHM_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB 68 + #define SHM_HUGE_32MB HUGETLB_FLAG_ENCODE_32MB 68 69 #define SHM_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB 70 + #define SHM_HUGE_512MB HUGETLB_FLAG_ENCODE_512MB 69 71 #define SHM_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB 70 72 #define SHM_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB 71 73 #define SHM_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB

+1 -1

ipc/shm.c

··· 206 206 * Callers of shm_lock() must validate the status of the returned ipc 207 207 * object pointer and error out as appropriate. 208 208 */ 209 - return (void *)ipcp; 209 + return ERR_CAST(ipcp); 210 210 } 211 211 212 212 static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)

+4 -1

kernel/bpf/local_storage.c

··· 129 129 struct bpf_cgroup_storage *storage; 130 130 struct bpf_storage_buffer *new; 131 131 132 - if (flags & BPF_NOEXIST) 132 + if (flags != BPF_ANY && flags != BPF_EXIST) 133 133 return -EINVAL; 134 134 135 135 storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map, ··· 193 193 struct bpf_cgroup_storage_map *map; 194 194 195 195 if (attr->key_size != sizeof(struct bpf_cgroup_storage_key)) 196 + return ERR_PTR(-EINVAL); 197 + 198 + if (attr->value_size == 0) 196 199 return ERR_PTR(-EINVAL); 197 200 198 201 if (attr->value_size > PAGE_SIZE)

+9 -1

kernel/bpf/verifier.c

··· 2842 2842 u64 umin_val, umax_val; 2843 2843 u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32; 2844 2844 2845 + if (insn_bitness == 32) { 2846 + /* Relevant for 32-bit RSH: Information can propagate towards 2847 + * LSB, so it isn't sufficient to only truncate the output to 2848 + * 32 bits. 2849 + */ 2850 + coerce_reg_to_size(dst_reg, 4); 2851 + coerce_reg_to_size(&src_reg, 4); 2852 + } 2853 + 2845 2854 smin_val = src_reg.smin_value; 2846 2855 smax_val = src_reg.smax_value; 2847 2856 umin_val = src_reg.umin_value; ··· 3086 3077 if (BPF_CLASS(insn->code) != BPF_ALU64) { 3087 3078 /* 32-bit ALU ops are (32,32)->32 */ 3088 3079 coerce_reg_to_size(dst_reg, 4); 3089 - coerce_reg_to_size(&src_reg, 4); 3090 3080 } 3091 3081 3092 3082 __reg_deduce_bounds(dst_reg);

+4 -7

kernel/events/core.c

··· 8314 8314 goto unlock; 8315 8315 8316 8316 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { 8317 + if (event->cpu != smp_processor_id()) 8318 + continue; 8317 8319 if (event->attr.type != PERF_TYPE_TRACEPOINT) 8318 8320 continue; 8319 8321 if (event->attr.config != entry->type) ··· 9433 9431 if (pmu->task_ctx_nr > perf_invalid_context) 9434 9432 return; 9435 9433 9436 - mutex_lock(&pmus_lock); 9437 9434 free_percpu(pmu->pmu_cpu_context); 9438 - mutex_unlock(&pmus_lock); 9439 9435 } 9440 9436 9441 9437 /* ··· 9689 9689 9690 9690 void perf_pmu_unregister(struct pmu *pmu) 9691 9691 { 9692 - int remove_device; 9693 - 9694 9692 mutex_lock(&pmus_lock); 9695 - remove_device = pmu_bus_running; 9696 9693 list_del_rcu(&pmu->entry); 9697 - mutex_unlock(&pmus_lock); 9698 9694 9699 9695 /* 9700 9696 * We dereference the pmu list under both SRCU and regular RCU, so ··· 9702 9706 free_percpu(pmu->pmu_disable_count); 9703 9707 if (pmu->type >= PERF_TYPE_MAX) 9704 9708 idr_remove(&pmu_idr, pmu->type); 9705 - if (remove_device) { 9709 + if (pmu_bus_running) { 9706 9710 if (pmu->nr_addr_filters) 9707 9711 device_remove_file(pmu->dev, &dev_attr_nr_addr_filters); 9708 9712 device_del(pmu->dev); 9709 9713 put_device(pmu->dev); 9710 9714 } 9711 9715 free_pmu_context(pmu); 9716 + mutex_unlock(&pmus_lock); 9712 9717 } 9713 9718 EXPORT_SYMBOL_GPL(perf_pmu_unregister); 9714 9719

+6 -4

kernel/locking/test-ww_mutex.c

··· 260 260 { 261 261 struct test_cycle *cycle = container_of(work, typeof(*cycle), work); 262 262 struct ww_acquire_ctx ctx; 263 - int err; 263 + int err, erra = 0; 264 264 265 265 ww_acquire_init(&ctx, &ww_class); 266 266 ww_mutex_lock(&cycle->a_mutex, &ctx); ··· 270 270 271 271 err = ww_mutex_lock(cycle->b_mutex, &ctx); 272 272 if (err == -EDEADLK) { 273 + err = 0; 273 274 ww_mutex_unlock(&cycle->a_mutex); 274 275 ww_mutex_lock_slow(cycle->b_mutex, &ctx); 275 - err = ww_mutex_lock(&cycle->a_mutex, &ctx); 276 + erra = ww_mutex_lock(&cycle->a_mutex, &ctx); 276 277 } 277 278 278 279 if (!err) 279 280 ww_mutex_unlock(cycle->b_mutex); 280 - ww_mutex_unlock(&cycle->a_mutex); 281 + if (!erra) 282 + ww_mutex_unlock(&cycle->a_mutex); 281 283 ww_acquire_fini(&ctx); 282 284 283 - cycle->result = err; 285 + cycle->result = err ?: erra; 284 286 } 285 287 286 288 static int __test_cycle(unsigned int nthreads)

+1 -1

kernel/sched/core.c

··· 1167 1167 1168 1168 if (task_cpu(p) != new_cpu) { 1169 1169 if (p->sched_class->migrate_task_rq) 1170 - p->sched_class->migrate_task_rq(p); 1170 + p->sched_class->migrate_task_rq(p, new_cpu); 1171 1171 p->se.nr_migrations++; 1172 1172 rseq_migrate(p); 1173 1173 perf_event_task_migrate(p);

+1 -1

kernel/sched/deadline.c

··· 1607 1607 return cpu; 1608 1608 } 1609 1609 1610 - static void migrate_task_rq_dl(struct task_struct *p) 1610 + static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused) 1611 1611 { 1612 1612 struct rq *rq; 1613 1613

+91 -13

kernel/sched/fair.c

··· 1392 1392 int last_cpupid, this_cpupid; 1393 1393 1394 1394 this_cpupid = cpu_pid_to_cpupid(dst_cpu, current->pid); 1395 + last_cpupid = page_cpupid_xchg_last(page, this_cpupid); 1396 + 1397 + /* 1398 + * Allow first faults or private faults to migrate immediately early in 1399 + * the lifetime of a task. The magic number 4 is based on waiting for 1400 + * two full passes of the "multi-stage node selection" test that is 1401 + * executed below. 1402 + */ 1403 + if ((p->numa_preferred_nid == -1 || p->numa_scan_seq <= 4) && 1404 + (cpupid_pid_unset(last_cpupid) || cpupid_match_pid(p, last_cpupid))) 1405 + return true; 1395 1406 1396 1407 /* 1397 1408 * Multi-stage node selection is used in conjunction with a periodic ··· 1421 1410 * This quadric squishes small probabilities, making it less likely we 1422 1411 * act on an unlikely task<->page relation. 1423 1412 */ 1424 - last_cpupid = page_cpupid_xchg_last(page, this_cpupid); 1425 1413 if (!cpupid_pid_unset(last_cpupid) && 1426 1414 cpupid_to_nid(last_cpupid) != dst_nid) 1427 1415 return false; ··· 1524 1514 static void task_numa_assign(struct task_numa_env *env, 1525 1515 struct task_struct *p, long imp) 1526 1516 { 1517 + struct rq *rq = cpu_rq(env->dst_cpu); 1518 + 1519 + /* Bail out if run-queue part of active NUMA balance. */ 1520 + if (xchg(&rq->numa_migrate_on, 1)) 1521 + return; 1522 + 1523 + /* 1524 + * Clear previous best_cpu/rq numa-migrate flag, since task now 1525 + * found a better CPU to move/swap. 1526 + */ 1527 + if (env->best_cpu != -1) { 1528 + rq = cpu_rq(env->best_cpu); 1529 + WRITE_ONCE(rq->numa_migrate_on, 0); 1530 + } 1531 + 1527 1532 if (env->best_task) 1528 1533 put_task_struct(env->best_task); 1529 1534 if (p) ··· 1578 1553 } 1579 1554 1580 1555 /* 1556 + * Maximum NUMA importance can be 1998 (2*999); 1557 + * SMALLIMP @ 30 would be close to 1998/64. 1558 + * Used to deter task migration. 1559 + */ 1560 + #define SMALLIMP 30 1561 + 1562 + /* 1581 1563 * This checks if the overall compute and NUMA accesses of the system would 1582 1564 * be improved if the source tasks was migrated to the target dst_cpu taking 1583 1565 * into account that it might be best if task running on the dst_cpu should ··· 1601 1569 long moveimp = imp; 1602 1570 int dist = env->dist; 1603 1571 1572 + if (READ_ONCE(dst_rq->numa_migrate_on)) 1573 + return; 1574 + 1604 1575 rcu_read_lock(); 1605 1576 cur = task_rcu_dereference(&dst_rq->curr); 1606 1577 if (cur && ((cur->flags & PF_EXITING) || is_idle_task(cur))) ··· 1617 1582 goto unlock; 1618 1583 1619 1584 if (!cur) { 1620 - if (maymove || imp > env->best_imp) 1585 + if (maymove && moveimp >= env->best_imp) 1621 1586 goto assign; 1622 1587 else 1623 1588 goto unlock; ··· 1660 1625 task_weight(cur, env->dst_nid, dist); 1661 1626 } 1662 1627 1663 - if (imp <= env->best_imp) 1664 - goto unlock; 1665 - 1666 1628 if (maymove && moveimp > imp && moveimp > env->best_imp) { 1667 - imp = moveimp - 1; 1629 + imp = moveimp; 1668 1630 cur = NULL; 1669 1631 goto assign; 1670 1632 } 1633 + 1634 + /* 1635 + * If the NUMA importance is less than SMALLIMP, 1636 + * task migration might only result in ping pong 1637 + * of tasks and also hurt performance due to cache 1638 + * misses. 1639 + */ 1640 + if (imp < SMALLIMP || imp <= env->best_imp + SMALLIMP / 2) 1641 + goto unlock; 1671 1642 1672 1643 /* 1673 1644 * In the overloaded case, try and keep the load balanced. ··· 1751 1710 .best_cpu = -1, 1752 1711 }; 1753 1712 struct sched_domain *sd; 1713 + struct rq *best_rq; 1754 1714 unsigned long taskweight, groupweight; 1755 1715 int nid, ret, dist; 1756 1716 long taskimp, groupimp; ··· 1847 1805 if (env.best_cpu == -1) 1848 1806 return -EAGAIN; 1849 1807 1850 - /* 1851 - * Reset the scan period if the task is being rescheduled on an 1852 - * alternative node to recheck if the tasks is now properly placed. 1853 - */ 1854 - p->numa_scan_period = task_scan_start(p); 1855 - 1808 + best_rq = cpu_rq(env.best_cpu); 1856 1809 if (env.best_task == NULL) { 1857 1810 ret = migrate_task_to(p, env.best_cpu); 1811 + WRITE_ONCE(best_rq->numa_migrate_on, 0); 1858 1812 if (ret != 0) 1859 1813 trace_sched_stick_numa(p, env.src_cpu, env.best_cpu); 1860 1814 return ret; 1861 1815 } 1862 1816 1863 1817 ret = migrate_swap(p, env.best_task, env.best_cpu, env.src_cpu); 1818 + WRITE_ONCE(best_rq->numa_migrate_on, 0); 1864 1819 1865 1820 if (ret != 0) 1866 1821 trace_sched_stick_numa(p, env.src_cpu, task_cpu(env.best_task)); ··· 2635 2596 } 2636 2597 } 2637 2598 2599 + static void update_scan_period(struct task_struct *p, int new_cpu) 2600 + { 2601 + int src_nid = cpu_to_node(task_cpu(p)); 2602 + int dst_nid = cpu_to_node(new_cpu); 2603 + 2604 + if (!static_branch_likely(&sched_numa_balancing)) 2605 + return; 2606 + 2607 + if (!p->mm || !p->numa_faults || (p->flags & PF_EXITING)) 2608 + return; 2609 + 2610 + if (src_nid == dst_nid) 2611 + return; 2612 + 2613 + /* 2614 + * Allow resets if faults have been trapped before one scan 2615 + * has completed. This is most likely due to a new task that 2616 + * is pulled cross-node due to wakeups or load balancing. 2617 + */ 2618 + if (p->numa_scan_seq) { 2619 + /* 2620 + * Avoid scan adjustments if moving to the preferred 2621 + * node or if the task was not previously running on 2622 + * the preferred node. 2623 + */ 2624 + if (dst_nid == p->numa_preferred_nid || 2625 + (p->numa_preferred_nid != -1 && src_nid != p->numa_preferred_nid)) 2626 + return; 2627 + } 2628 + 2629 + p->numa_scan_period = task_scan_start(p); 2630 + } 2631 + 2638 2632 #else 2639 2633 static void task_tick_numa(struct rq *rq, struct task_struct *curr) 2640 2634 { ··· 2678 2606 } 2679 2607 2680 2608 static inline void account_numa_dequeue(struct rq *rq, struct task_struct *p) 2609 + { 2610 + } 2611 + 2612 + static inline void update_scan_period(struct task_struct *p, int new_cpu) 2681 2613 { 2682 2614 } 2683 2615 ··· 6351 6275 * cfs_rq_of(p) references at time of call are still valid and identify the 6352 6276 * previous CPU. The caller guarantees p->pi_lock or task_rq(p)->lock is held. 6353 6277 */ 6354 - static void migrate_task_rq_fair(struct task_struct *p) 6278 + static void migrate_task_rq_fair(struct task_struct *p, int new_cpu) 6355 6279 { 6356 6280 /* 6357 6281 * As blocked tasks retain absolute vruntime the migration needs to ··· 6404 6328 6405 6329 /* We have migrated, no longer consider this task hot */ 6406 6330 p->se.exec_start = 0; 6331 + 6332 + update_scan_period(p, new_cpu); 6407 6333 } 6408 6334 6409 6335 static void task_dead_fair(struct task_struct *p)

+2 -1

kernel/sched/sched.h

··· 783 783 #ifdef CONFIG_NUMA_BALANCING 784 784 unsigned int nr_numa_running; 785 785 unsigned int nr_preferred_running; 786 + unsigned int numa_migrate_on; 786 787 #endif 787 788 #define CPU_LOAD_IDX_MAX 5 788 789 unsigned long cpu_load[CPU_LOAD_IDX_MAX]; ··· 1524 1523 1525 1524 #ifdef CONFIG_SMP 1526 1525 int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags); 1527 - void (*migrate_task_rq)(struct task_struct *p); 1526 + void (*migrate_task_rq)(struct task_struct *p, int new_cpu); 1528 1527 1529 1528 void (*task_woken)(struct rq *this_rq, struct task_struct *task); 1530 1529

+2 -1

mm/gup_benchmark.c

··· 19 19 struct gup_benchmark *gup) 20 20 { 21 21 ktime_t start_time, end_time; 22 - unsigned long i, nr, nr_pages, addr, next; 22 + unsigned long i, nr_pages, addr, next; 23 + int nr; 23 24 struct page **pages; 24 25 25 26 nr_pages = gup->size / PAGE_SIZE;

+1 -1

mm/huge_memory.c

··· 2931 2931 else 2932 2932 page_add_file_rmap(new, true); 2933 2933 set_pmd_at(mm, mmun_start, pvmw->pmd, pmde); 2934 - if (vma->vm_flags & VM_LOCKED) 2934 + if ((vma->vm_flags & VM_LOCKED) && !PageDoubleMap(new)) 2935 2935 mlock_vma_page(new); 2936 2936 update_mmu_cache_pmd(vma, address, pvmw->pmd); 2937 2937 }

+79 -11

mm/hugetlb.c

··· 3326 3326 struct page *page; 3327 3327 struct hstate *h = hstate_vma(vma); 3328 3328 unsigned long sz = huge_page_size(h); 3329 - const unsigned long mmun_start = start; /* For mmu_notifiers */ 3330 - const unsigned long mmun_end = end; /* For mmu_notifiers */ 3329 + unsigned long mmun_start = start; /* For mmu_notifiers */ 3330 + unsigned long mmun_end = end; /* For mmu_notifiers */ 3331 3331 3332 3332 WARN_ON(!is_vm_hugetlb_page(vma)); 3333 3333 BUG_ON(start & ~huge_page_mask(h)); ··· 3339 3339 */ 3340 3340 tlb_remove_check_page_size_change(tlb, sz); 3341 3341 tlb_start_vma(tlb, vma); 3342 + 3343 + /* 3344 + * If sharing possible, alert mmu notifiers of worst case. 3345 + */ 3346 + adjust_range_if_pmd_sharing_possible(vma, &mmun_start, &mmun_end); 3342 3347 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); 3343 3348 address = start; 3344 3349 for (; address < end; address += sz) { ··· 3354 3349 ptl = huge_pte_lock(h, mm, ptep); 3355 3350 if (huge_pmd_unshare(mm, &address, ptep)) { 3356 3351 spin_unlock(ptl); 3352 + /* 3353 + * We just unmapped a page of PMDs by clearing a PUD. 3354 + * The caller's TLB flush range should cover this area. 3355 + */ 3357 3356 continue; 3358 3357 } 3359 3358 ··· 3440 3431 { 3441 3432 struct mm_struct *mm; 3442 3433 struct mmu_gather tlb; 3434 + unsigned long tlb_start = start; 3435 + unsigned long tlb_end = end; 3436 + 3437 + /* 3438 + * If shared PMDs were possibly used within this vma range, adjust 3439 + * start/end for worst case tlb flushing. 3440 + * Note that we can not be sure if PMDs are shared until we try to 3441 + * unmap pages. However, we want to make sure TLB flushing covers 3442 + * the largest possible range. 3443 + */ 3444 + adjust_range_if_pmd_sharing_possible(vma, &tlb_start, &tlb_end); 3443 3445 3444 3446 mm = vma->vm_mm; 3445 3447 3446 - tlb_gather_mmu(&tlb, mm, start, end); 3448 + tlb_gather_mmu(&tlb, mm, tlb_start, tlb_end); 3447 3449 __unmap_hugepage_range(&tlb, vma, start, end, ref_page); 3448 - tlb_finish_mmu(&tlb, start, end); 3450 + tlb_finish_mmu(&tlb, tlb_start, tlb_end); 3449 3451 } 3450 3452 3451 3453 /* ··· 4318 4298 pte_t pte; 4319 4299 struct hstate *h = hstate_vma(vma); 4320 4300 unsigned long pages = 0; 4301 + unsigned long f_start = start; 4302 + unsigned long f_end = end; 4303 + bool shared_pmd = false; 4304 + 4305 + /* 4306 + * In the case of shared PMDs, the area to flush could be beyond 4307 + * start/end. Set f_start/f_end to cover the maximum possible 4308 + * range if PMD sharing is possible. 4309 + */ 4310 + adjust_range_if_pmd_sharing_possible(vma, &f_start, &f_end); 4321 4311 4322 4312 BUG_ON(address >= end); 4323 - flush_cache_range(vma, address, end); 4313 + flush_cache_range(vma, f_start, f_end); 4324 4314 4325 - mmu_notifier_invalidate_range_start(mm, start, end); 4315 + mmu_notifier_invalidate_range_start(mm, f_start, f_end); 4326 4316 i_mmap_lock_write(vma->vm_file->f_mapping); 4327 4317 for (; address < end; address += huge_page_size(h)) { 4328 4318 spinlock_t *ptl; ··· 4343 4313 if (huge_pmd_unshare(mm, &address, ptep)) { 4344 4314 pages++; 4345 4315 spin_unlock(ptl); 4316 + shared_pmd = true; 4346 4317 continue; 4347 4318 } 4348 4319 pte = huge_ptep_get(ptep); ··· 4379 4348 * Must flush TLB before releasing i_mmap_rwsem: x86's huge_pmd_unshare 4380 4349 * may have cleared our pud entry and done put_page on the page table: 4381 4350 * once we release i_mmap_rwsem, another task can do the final put_page 4382 - * and that page table be reused and filled with junk. 4351 + * and that page table be reused and filled with junk. If we actually 4352 + * did unshare a page of pmds, flush the range corresponding to the pud. 4383 4353 */ 4384 - flush_hugetlb_tlb_range(vma, start, end); 4354 + if (shared_pmd) 4355 + flush_hugetlb_tlb_range(vma, f_start, f_end); 4356 + else 4357 + flush_hugetlb_tlb_range(vma, start, end); 4385 4358 /* 4386 4359 * No need to call mmu_notifier_invalidate_range() we are downgrading 4387 4360 * page table protection not changing it to point to a new page. ··· 4393 4358 * See Documentation/vm/mmu_notifier.rst 4394 4359 */ 4395 4360 i_mmap_unlock_write(vma->vm_file->f_mapping); 4396 - mmu_notifier_invalidate_range_end(mm, start, end); 4361 + mmu_notifier_invalidate_range_end(mm, f_start, f_end); 4397 4362 4398 4363 return pages << h->order; 4399 4364 } ··· 4580 4545 /* 4581 4546 * check on proper vm_flags and page table alignment 4582 4547 */ 4583 - if (vma->vm_flags & VM_MAYSHARE && 4584 - vma->vm_start <= base && end <= vma->vm_end) 4548 + if (vma->vm_flags & VM_MAYSHARE && range_in_vma(vma, base, end)) 4585 4549 return true; 4586 4550 return false; 4551 + } 4552 + 4553 + /* 4554 + * Determine if start,end range within vma could be mapped by shared pmd. 4555 + * If yes, adjust start and end to cover range associated with possible 4556 + * shared pmd mappings. 4557 + */ 4558 + void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, 4559 + unsigned long *start, unsigned long *end) 4560 + { 4561 + unsigned long check_addr = *start; 4562 + 4563 + if (!(vma->vm_flags & VM_MAYSHARE)) 4564 + return; 4565 + 4566 + for (check_addr = *start; check_addr < *end; check_addr += PUD_SIZE) { 4567 + unsigned long a_start = check_addr & PUD_MASK; 4568 + unsigned long a_end = a_start + PUD_SIZE; 4569 + 4570 + /* 4571 + * If sharing is possible, adjust start/end if necessary. 4572 + */ 4573 + if (range_in_vma(vma, a_start, a_end)) { 4574 + if (a_start < *start) 4575 + *start = a_start; 4576 + if (a_end > *end) 4577 + *end = a_end; 4578 + } 4579 + } 4587 4580 } 4588 4581 4589 4582 /* ··· 4710 4647 int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) 4711 4648 { 4712 4649 return 0; 4650 + } 4651 + 4652 + void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, 4653 + unsigned long *start, unsigned long *end) 4654 + { 4713 4655 } 4714 4656 #define want_pmd_share() (0) 4715 4657 #endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */

+1 -1

mm/madvise.c

··· 96 96 new_flags |= VM_DONTDUMP; 97 97 break; 98 98 case MADV_DODUMP: 99 - if (new_flags & VM_SPECIAL) { 99 + if (!is_vm_hugetlb_page(vma) && new_flags & VM_SPECIAL) { 100 100 error = -EINVAL; 101 101 goto out; 102 102 }

+4 -58

mm/migrate.c

··· 275 275 if (vma->vm_flags & VM_LOCKED && !PageTransCompound(new)) 276 276 mlock_vma_page(new); 277 277 278 + if (PageTransHuge(page) && PageMlocked(page)) 279 + clear_page_mlock(page); 280 + 278 281 /* No need to invalidate - it was non-present before */ 279 282 update_mmu_cache(vma, pvmw.address, pvmw.pte); 280 283 } ··· 1414 1411 * we encounter them after the rest of the list 1415 1412 * is processed. 1416 1413 */ 1417 - if (PageTransHuge(page)) { 1414 + if (PageTransHuge(page) && !PageHuge(page)) { 1418 1415 lock_page(page); 1419 1416 rc = split_huge_page_to_list(page, from); 1420 1417 unlock_page(page); ··· 1858 1855 return newpage; 1859 1856 } 1860 1857 1861 - /* 1862 - * page migration rate limiting control. 1863 - * Do not migrate more than @pages_to_migrate in a @migrate_interval_millisecs 1864 - * window of time. Default here says do not migrate more than 1280M per second. 1865 - */ 1866 - static unsigned int migrate_interval_millisecs __read_mostly = 100; 1867 - static unsigned int ratelimit_pages __read_mostly = 128 << (20 - PAGE_SHIFT); 1868 - 1869 - /* Returns true if the node is migrate rate-limited after the update */ 1870 - static bool numamigrate_update_ratelimit(pg_data_t *pgdat, 1871 - unsigned long nr_pages) 1872 - { 1873 - /* 1874 - * Rate-limit the amount of data that is being migrated to a node. 1875 - * Optimal placement is no good if the memory bus is saturated and 1876 - * all the time is being spent migrating! 1877 - */ 1878 - if (time_after(jiffies, pgdat->numabalancing_migrate_next_window)) { 1879 - spin_lock(&pgdat->numabalancing_migrate_lock); 1880 - pgdat->numabalancing_migrate_nr_pages = 0; 1881 - pgdat->numabalancing_migrate_next_window = jiffies + 1882 - msecs_to_jiffies(migrate_interval_millisecs); 1883 - spin_unlock(&pgdat->numabalancing_migrate_lock); 1884 - } 1885 - if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages) { 1886 - trace_mm_numa_migrate_ratelimit(current, pgdat->node_id, 1887 - nr_pages); 1888 - return true; 1889 - } 1890 - 1891 - /* 1892 - * This is an unlocked non-atomic update so errors are possible. 1893 - * The consequences are failing to migrate when we potentiall should 1894 - * have which is not severe enough to warrant locking. If it is ever 1895 - * a problem, it can be converted to a per-cpu counter. 1896 - */ 1897 - pgdat->numabalancing_migrate_nr_pages += nr_pages; 1898 - return false; 1899 - } 1900 - 1901 1858 static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page) 1902 1859 { 1903 1860 int page_lru; ··· 1930 1967 if (page_is_file_cache(page) && PageDirty(page)) 1931 1968 goto out; 1932 1969 1933 - /* 1934 - * Rate-limit the amount of data that is being migrated to a node. 1935 - * Optimal placement is no good if the memory bus is saturated and 1936 - * all the time is being spent migrating! 1937 - */ 1938 - if (numamigrate_update_ratelimit(pgdat, 1)) 1939 - goto out; 1940 - 1941 1970 isolated = numamigrate_isolate_page(pgdat, page); 1942 1971 if (!isolated) 1943 1972 goto out; ··· 1975 2020 int page_lru = page_is_file_cache(page); 1976 2021 unsigned long mmun_start = address & HPAGE_PMD_MASK; 1977 2022 unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE; 1978 - 1979 - /* 1980 - * Rate-limit the amount of data that is being migrated to a node. 1981 - * Optimal placement is no good if the memory bus is saturated and 1982 - * all the time is being spent migrating! 1983 - */ 1984 - if (numamigrate_update_ratelimit(pgdat, HPAGE_PMD_NR)) 1985 - goto out_dropref; 1986 2023 1987 2024 new_page = alloc_pages_node(node, 1988 2025 (GFP_TRANSHUGE_LIGHT | __GFP_THISNODE), ··· 2072 2125 2073 2126 out_fail: 2074 2127 count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR); 2075 - out_dropref: 2076 2128 ptl = pmd_lock(mm, pmd); 2077 2129 if (pmd_same(*pmd, entry)) { 2078 2130 entry = pmd_modify(entry, vma->vm_page_prot);

-2

mm/page_alloc.c

··· 6197 6197 static void pgdat_init_numabalancing(struct pglist_data *pgdat) 6198 6198 { 6199 6199 spin_lock_init(&pgdat->numabalancing_migrate_lock); 6200 - pgdat->numabalancing_migrate_nr_pages = 0; 6201 - pgdat->numabalancing_migrate_next_window = jiffies; 6202 6200 } 6203 6201 #else 6204 6202 static void pgdat_init_numabalancing(struct pglist_data *pgdat) {}

+39 -3

mm/rmap.c

··· 1362 1362 } 1363 1363 1364 1364 /* 1365 - * We have to assume the worse case ie pmd for invalidation. Note that 1366 - * the page can not be free in this function as call of try_to_unmap() 1367 - * must hold a reference on the page. 1365 + * For THP, we have to assume the worse case ie pmd for invalidation. 1366 + * For hugetlb, it could be much worse if we need to do pud 1367 + * invalidation in the case of pmd sharing. 1368 + * 1369 + * Note that the page can not be free in this function as call of 1370 + * try_to_unmap() must hold a reference on the page. 1368 1371 */ 1369 1372 end = min(vma->vm_end, start + (PAGE_SIZE << compound_order(page))); 1373 + if (PageHuge(page)) { 1374 + /* 1375 + * If sharing is possible, start and end will be adjusted 1376 + * accordingly. 1377 + */ 1378 + adjust_range_if_pmd_sharing_possible(vma, &start, &end); 1379 + } 1370 1380 mmu_notifier_invalidate_range_start(vma->vm_mm, start, end); 1371 1381 1372 1382 while (page_vma_mapped_walk(&pvmw)) { ··· 1419 1409 subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte); 1420 1410 address = pvmw.address; 1421 1411 1412 + if (PageHuge(page)) { 1413 + if (huge_pmd_unshare(mm, &address, pvmw.pte)) { 1414 + /* 1415 + * huge_pmd_unshare unmapped an entire PMD 1416 + * page. There is no way of knowing exactly 1417 + * which PMDs may be cached for this mm, so 1418 + * we must flush them all. start/end were 1419 + * already adjusted above to cover this range. 1420 + */ 1421 + flush_cache_range(vma, start, end); 1422 + flush_tlb_range(vma, start, end); 1423 + mmu_notifier_invalidate_range(mm, start, end); 1424 + 1425 + /* 1426 + * The ref count of the PMD page was dropped 1427 + * which is part of the way map counting 1428 + * is done for shared PMDs. Return 'true' 1429 + * here. When there is no other sharing, 1430 + * huge_pmd_unshare returns false and we will 1431 + * unmap the actual page and drop map count 1432 + * to zero. 1433 + */ 1434 + page_vma_mapped_walk_done(&pvmw); 1435 + break; 1436 + } 1437 + } 1422 1438 1423 1439 if (IS_ENABLED(CONFIG_MIGRATION) && 1424 1440 (flags & TTU_MIGRATION) &&

+3 -4

mm/vmscan.c

··· 580 580 struct mem_cgroup *memcg, int priority) 581 581 { 582 582 struct memcg_shrinker_map *map; 583 - unsigned long freed = 0; 584 - int ret, i; 583 + unsigned long ret, freed = 0; 584 + int i; 585 585 586 586 if (!memcg_kmem_enabled() || !mem_cgroup_online(memcg)) 587 587 return 0; ··· 677 677 struct mem_cgroup *memcg, 678 678 int priority) 679 679 { 680 + unsigned long ret, freed = 0; 680 681 struct shrinker *shrinker; 681 - unsigned long freed = 0; 682 - int ret; 683 682 684 683 if (!mem_cgroup_is_root(memcg)) 685 684 return shrink_slab_memcg(gfp_mask, nid, memcg, priority);

+3 -1

mm/vmstat.c

··· 1275 1275 #ifdef CONFIG_SMP 1276 1276 "nr_tlb_remote_flush", 1277 1277 "nr_tlb_remote_flush_received", 1278 + #else 1279 + "", /* nr_tlb_remote_flush */ 1280 + "", /* nr_tlb_remote_flush_received */ 1278 1281 #endif /* CONFIG_SMP */ 1279 1282 "nr_tlb_local_flush_all", 1280 1283 "nr_tlb_local_flush_one", ··· 1286 1283 #ifdef CONFIG_DEBUG_VM_VMACACHE 1287 1284 "vmacache_find_calls", 1288 1285 "vmacache_find_hits", 1289 - "vmacache_full_flushes", 1290 1286 #endif 1291 1287 #ifdef CONFIG_SWAP 1292 1288 "swap_ra",

+2 -2

net/bpfilter/bpfilter_kern.c

··· 59 59 req.is_set = is_set; 60 60 req.pid = current->pid; 61 61 req.cmd = optname; 62 - req.addr = (long)optval; 62 + req.addr = (long __force __user)optval; 63 63 req.len = optlen; 64 64 mutex_lock(&bpfilter_lock); 65 65 if (!info.pid) ··· 98 98 pr_info("Loaded bpfilter_umh pid %d\n", info.pid); 99 99 100 100 /* health check that usermode process started correctly */ 101 - if (__bpfilter_process_sockopt(NULL, 0, 0, 0, 0) != 0) { 101 + if (__bpfilter_process_sockopt(NULL, 0, NULL, 0, 0) != 0) { 102 102 stop_umh(); 103 103 return -EFAULT; 104 104 }

+20 -9

net/core/rtnetlink.c

··· 3763 3763 int err = 0; 3764 3764 int fidx = 0; 3765 3765 3766 - err = nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, 3767 - IFLA_MAX, ifla_policy, NULL); 3768 - if (err < 0) { 3769 - return -EINVAL; 3770 - } else if (err == 0) { 3771 - if (tb[IFLA_MASTER]) 3772 - br_idx = nla_get_u32(tb[IFLA_MASTER]); 3773 - } 3766 + /* A hack to preserve kernel<->userspace interface. 3767 + * Before Linux v4.12 this code accepted ndmsg since iproute2 v3.3.0. 3768 + * However, ndmsg is shorter than ifinfomsg thus nlmsg_parse() bails. 3769 + * So, check for ndmsg with an optional u32 attribute (not used here). 3770 + * Fortunately these sizes don't conflict with the size of ifinfomsg 3771 + * with an optional attribute. 3772 + */ 3773 + if (nlmsg_len(cb->nlh) != sizeof(struct ndmsg) && 3774 + (nlmsg_len(cb->nlh) != sizeof(struct ndmsg) + 3775 + nla_attr_size(sizeof(u32)))) { 3776 + err = nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, 3777 + IFLA_MAX, ifla_policy, NULL); 3778 + if (err < 0) { 3779 + return -EINVAL; 3780 + } else if (err == 0) { 3781 + if (tb[IFLA_MASTER]) 3782 + br_idx = nla_get_u32(tb[IFLA_MASTER]); 3783 + } 3774 3784 3775 - brport_idx = ifm->ifi_index; 3785 + brport_idx = ifm->ifi_index; 3786 + } 3776 3787 3777 3788 if (br_idx) { 3778 3789 br_dev = __dev_get_by_index(net, br_idx);

+20 -9

net/ipv6/raw.c

··· 651 651 skb->priority = sk->sk_priority; 652 652 skb->mark = sk->sk_mark; 653 653 skb->tstamp = sockc->transmit_time; 654 - skb_dst_set(skb, &rt->dst); 655 - *dstp = NULL; 656 654 657 655 skb_put(skb, length); 658 656 skb_reset_network_header(skb); ··· 663 665 664 666 skb->transport_header = skb->network_header; 665 667 err = memcpy_from_msg(iph, msg, length); 666 - if (err) 667 - goto error_fault; 668 + if (err) { 669 + err = -EFAULT; 670 + kfree_skb(skb); 671 + goto error; 672 + } 673 + 674 + skb_dst_set(skb, &rt->dst); 675 + *dstp = NULL; 668 676 669 677 /* if egress device is enslaved to an L3 master device pass the 670 678 * skb to its handler for processing ··· 679 675 if (unlikely(!skb)) 680 676 return 0; 681 677 678 + /* Acquire rcu_read_lock() in case we need to use rt->rt6i_idev 679 + * in the error path. Since skb has been freed, the dst could 680 + * have been queued for deletion. 681 + */ 682 + rcu_read_lock(); 682 683 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); 683 684 err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, 684 685 NULL, rt->dst.dev, dst_output); 685 686 if (err > 0) 686 687 err = net_xmit_errno(err); 687 - if (err) 688 - goto error; 688 + if (err) { 689 + IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 690 + rcu_read_unlock(); 691 + goto error_check; 692 + } 693 + rcu_read_unlock(); 689 694 out: 690 695 return 0; 691 696 692 - error_fault: 693 - err = -EFAULT; 694 - kfree_skb(skb); 695 697 error: 696 698 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); 699 + error_check: 697 700 if (err == -ENOBUFS && !np->recverr) 698 701 err = 0; 699 702 return err;

+1 -1

net/mac80211/cfg.c

··· 425 425 case NL80211_IFTYPE_AP: 426 426 case NL80211_IFTYPE_AP_VLAN: 427 427 /* Keys without a station are used for TX only */ 428 - if (key->sta && test_sta_flag(key->sta, WLAN_STA_MFP)) 428 + if (sta && test_sta_flag(sta, WLAN_STA_MFP)) 429 429 key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT; 430 430 break; 431 431 case NL80211_IFTYPE_ADHOC:

+4

net/openvswitch/conntrack.c

··· 1312 1312 1313 1313 rcu_assign_pointer(help->helper, helper); 1314 1314 info->helper = helper; 1315 + 1316 + if (info->nat) 1317 + request_module("ip_nat_%s", name); 1318 + 1315 1319 return 0; 1316 1320 } 1317 1321

+7 -4

net/packet/af_packet.c

··· 2715 2715 } 2716 2716 } 2717 2717 2718 - if (po->has_vnet_hdr && virtio_net_hdr_to_skb(skb, vnet_hdr, 2719 - vio_le())) { 2720 - tp_len = -EINVAL; 2721 - goto tpacket_error; 2718 + if (po->has_vnet_hdr) { 2719 + if (virtio_net_hdr_to_skb(skb, vnet_hdr, vio_le())) { 2720 + tp_len = -EINVAL; 2721 + goto tpacket_error; 2722 + } 2723 + virtio_net_hdr_set_proto(skb, vnet_hdr); 2722 2724 } 2723 2725 2724 2726 skb->destructor = tpacket_destruct_skb; ··· 2917 2915 if (err) 2918 2916 goto out_free; 2919 2917 len += sizeof(vnet_hdr); 2918 + virtio_net_hdr_set_proto(skb, &vnet_hdr); 2920 2919 } 2921 2920 2922 2921 skb_probe_transport_header(skb, reserve);

+20 -4

net/sched/sch_api.c

··· 1322 1322 * Delete/get qdisc. 1323 1323 */ 1324 1324 1325 + const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = { 1326 + [TCA_KIND] = { .type = NLA_STRING }, 1327 + [TCA_OPTIONS] = { .type = NLA_NESTED }, 1328 + [TCA_RATE] = { .type = NLA_BINARY, 1329 + .len = sizeof(struct tc_estimator) }, 1330 + [TCA_STAB] = { .type = NLA_NESTED }, 1331 + [TCA_DUMP_INVISIBLE] = { .type = NLA_FLAG }, 1332 + [TCA_CHAIN] = { .type = NLA_U32 }, 1333 + [TCA_INGRESS_BLOCK] = { .type = NLA_U32 }, 1334 + [TCA_EGRESS_BLOCK] = { .type = NLA_U32 }, 1335 + }; 1336 + 1325 1337 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, 1326 1338 struct netlink_ext_ack *extack) 1327 1339 { ··· 1350 1338 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) 1351 1339 return -EPERM; 1352 1340 1353 - err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack); 1341 + err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, 1342 + extack); 1354 1343 if (err < 0) 1355 1344 return err; 1356 1345 ··· 1435 1422 1436 1423 replay: 1437 1424 /* Reinit, just in case something touches this. */ 1438 - err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack); 1425 + err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, 1426 + extack); 1439 1427 if (err < 0) 1440 1428 return err; 1441 1429 ··· 1670 1656 idx = 0; 1671 1657 ASSERT_RTNL(); 1672 1658 1673 - err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX, NULL, NULL); 1659 + err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX, 1660 + rtm_tca_policy, NULL); 1674 1661 if (err < 0) 1675 1662 return err; 1676 1663 ··· 1890 1875 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) 1891 1876 return -EPERM; 1892 1877 1893 - err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack); 1878 + err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, 1879 + extack); 1894 1880 if (err < 0) 1895 1881 return err; 1896 1882

+4 -3

net/wireless/reg.c

··· 2667 2667 { 2668 2668 struct wiphy *wiphy = NULL; 2669 2669 enum reg_request_treatment treatment; 2670 + enum nl80211_reg_initiator initiator = reg_request->initiator; 2670 2671 2671 2672 if (reg_request->wiphy_idx != WIPHY_IDX_INVALID) 2672 2673 wiphy = wiphy_idx_to_wiphy(reg_request->wiphy_idx); 2673 2674 2674 - switch (reg_request->initiator) { 2675 + switch (initiator) { 2675 2676 case NL80211_REGDOM_SET_BY_CORE: 2676 2677 treatment = reg_process_hint_core(reg_request); 2677 2678 break; ··· 2690 2689 treatment = reg_process_hint_country_ie(wiphy, reg_request); 2691 2690 break; 2692 2691 default: 2693 - WARN(1, "invalid initiator %d\n", reg_request->initiator); 2692 + WARN(1, "invalid initiator %d\n", initiator); 2694 2693 goto out_free; 2695 2694 } 2696 2695 ··· 2705 2704 */ 2706 2705 if (treatment == REG_REQ_ALREADY_SET && wiphy && 2707 2706 wiphy->regulatory_flags & REGULATORY_STRICT_REG) { 2708 - wiphy_update_regulatory(wiphy, reg_request->initiator); 2707 + wiphy_update_regulatory(wiphy, initiator); 2709 2708 wiphy_all_share_dfs_chan_state(wiphy); 2710 2709 reg_check_channels(); 2711 2710 }

+10 -4

net/wireless/wext-compat.c

··· 1278 1278 if (err) 1279 1279 return err; 1280 1280 1281 - if (!(sinfo.filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE))) 1282 - return -EOPNOTSUPP; 1281 + if (!(sinfo.filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE))) { 1282 + err = -EOPNOTSUPP; 1283 + goto free; 1284 + } 1283 1285 1284 1286 rate->value = 100000 * cfg80211_calculate_bitrate(&sinfo.txrate); 1285 1287 1286 - return 0; 1288 + free: 1289 + cfg80211_sinfo_release_content(&sinfo); 1290 + return err; 1287 1291 } 1288 1292 1289 1293 /* Get wireless statistics. Called by /proc/net/wireless and by SIOCGIWSTATS */ ··· 1297 1293 struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); 1298 1294 /* we are under RTNL - globally locked - so can use static structs */ 1299 1295 static struct iw_statistics wstats; 1300 - static struct station_info sinfo; 1296 + static struct station_info sinfo = {}; 1301 1297 u8 bssid[ETH_ALEN]; 1302 1298 1303 1299 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) ··· 1355 1351 wstats.discard.misc = sinfo.rx_dropped_misc; 1356 1352 if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_TX_FAILED)) 1357 1353 wstats.discard.retries = sinfo.tx_failed; 1354 + 1355 + cfg80211_sinfo_release_content(&sinfo); 1358 1356 1359 1357 return &wstats; 1360 1358 }

+3 -1

sound/hda/hdac_i915.c

··· 145 145 if (!acomp->ops) { 146 146 request_module("i915"); 147 147 /* 10s timeout */ 148 - wait_for_completion_timeout(&bind_complete, 10 * 1000); 148 + wait_for_completion_timeout(&bind_complete, 149 + msecs_to_jiffies(10 * 1000)); 149 150 } 150 151 if (!acomp->ops) { 152 + dev_info(bus->dev, "couldn't bind with audio component\n"); 151 153 snd_hdac_acomp_exit(bus); 152 154 return -ENODEV; 153 155 }

+1

sound/pci/hda/patch_realtek.c

··· 6409 6409 SND_PCI_QUIRK(0x1028, 0x0706, "Dell Inspiron 7559", ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER), 6410 6410 SND_PCI_QUIRK(0x1028, 0x0725, "Dell Inspiron 3162", ALC255_FIXUP_DELL_SPK_NOISE), 6411 6411 SND_PCI_QUIRK(0x1028, 0x075b, "Dell XPS 13 9360", ALC256_FIXUP_DELL_XPS_13_HEADPHONE_NOISE), 6412 + SND_PCI_QUIRK(0x1028, 0x075c, "Dell XPS 27 7760", ALC298_FIXUP_SPK_VOLUME), 6412 6413 SND_PCI_QUIRK(0x1028, 0x075d, "Dell AIO", ALC298_FIXUP_SPK_VOLUME), 6413 6414 SND_PCI_QUIRK(0x1028, 0x07b0, "Dell Precision 7520", ALC295_FIXUP_DISABLE_DAC3), 6414 6415 SND_PCI_QUIRK(0x1028, 0x0798, "Dell Inspiron 17 7000 Gaming", ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER),

+1 -1

tools/kvm/kvm_stat/kvm_stat

··· 1325 1325 msg = '' 1326 1326 while True: 1327 1327 self.screen.erase() 1328 - self.screen.addstr(0, 0, 'Set update interval (defaults to %fs).' % 1328 + self.screen.addstr(0, 0, 'Set update interval (defaults to %.1fs).' % 1329 1329 DELAY_DEFAULT, curses.A_BOLD) 1330 1330 self.screen.addstr(4, 0, msg) 1331 1331 self.screen.addstr(2, 0, 'Change delay from %.1fs to ' %

+172

tools/testing/selftests/x86/test_vdso.c

··· 17 17 #include <errno.h> 18 18 #include <sched.h> 19 19 #include <stdbool.h> 20 + #include <limits.h> 20 21 21 22 #ifndef SYS_getcpu 22 23 # ifdef __x86_64__ ··· 31 30 #define MAPS_LINE_LEN 128 32 31 33 32 int nerrs = 0; 33 + 34 + typedef int (*vgettime_t)(clockid_t, struct timespec *); 35 + 36 + vgettime_t vdso_clock_gettime; 37 + 38 + typedef long (*vgtod_t)(struct timeval *tv, struct timezone *tz); 39 + 40 + vgtod_t vdso_gettimeofday; 34 41 35 42 typedef long (*getcpu_t)(unsigned *, unsigned *, void *); 36 43 ··· 104 95 printf("Warning: failed to find getcpu in vDSO\n"); 105 96 106 97 vgetcpu = (getcpu_t) vsyscall_getcpu(); 98 + 99 + vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime"); 100 + if (!vdso_clock_gettime) 101 + printf("Warning: failed to find clock_gettime in vDSO\n"); 102 + 103 + vdso_gettimeofday = (vgtod_t)dlsym(vdso, "__vdso_gettimeofday"); 104 + if (!vdso_gettimeofday) 105 + printf("Warning: failed to find gettimeofday in vDSO\n"); 106 + 107 107 } 108 108 109 109 static long sys_getcpu(unsigned * cpu, unsigned * node, 110 110 void* cache) 111 111 { 112 112 return syscall(__NR_getcpu, cpu, node, cache); 113 + } 114 + 115 + static inline int sys_clock_gettime(clockid_t id, struct timespec *ts) 116 + { 117 + return syscall(__NR_clock_gettime, id, ts); 118 + } 119 + 120 + static inline int sys_gettimeofday(struct timeval *tv, struct timezone *tz) 121 + { 122 + return syscall(__NR_gettimeofday, tv, tz); 113 123 } 114 124 115 125 static void test_getcpu(void) ··· 183 155 } 184 156 } 185 157 158 + static bool ts_leq(const struct timespec *a, const struct timespec *b) 159 + { 160 + if (a->tv_sec != b->tv_sec) 161 + return a->tv_sec < b->tv_sec; 162 + else 163 + return a->tv_nsec <= b->tv_nsec; 164 + } 165 + 166 + static bool tv_leq(const struct timeval *a, const struct timeval *b) 167 + { 168 + if (a->tv_sec != b->tv_sec) 169 + return a->tv_sec < b->tv_sec; 170 + else 171 + return a->tv_usec <= b->tv_usec; 172 + } 173 + 174 + static char const * const clocknames[] = { 175 + [0] = "CLOCK_REALTIME", 176 + [1] = "CLOCK_MONOTONIC", 177 + [2] = "CLOCK_PROCESS_CPUTIME_ID", 178 + [3] = "CLOCK_THREAD_CPUTIME_ID", 179 + [4] = "CLOCK_MONOTONIC_RAW", 180 + [5] = "CLOCK_REALTIME_COARSE", 181 + [6] = "CLOCK_MONOTONIC_COARSE", 182 + [7] = "CLOCK_BOOTTIME", 183 + [8] = "CLOCK_REALTIME_ALARM", 184 + [9] = "CLOCK_BOOTTIME_ALARM", 185 + [10] = "CLOCK_SGI_CYCLE", 186 + [11] = "CLOCK_TAI", 187 + }; 188 + 189 + static void test_one_clock_gettime(int clock, const char *name) 190 + { 191 + struct timespec start, vdso, end; 192 + int vdso_ret, end_ret; 193 + 194 + printf("[RUN]\tTesting clock_gettime for clock %s (%d)...\n", name, clock); 195 + 196 + if (sys_clock_gettime(clock, &start) < 0) { 197 + if (errno == EINVAL) { 198 + vdso_ret = vdso_clock_gettime(clock, &vdso); 199 + if (vdso_ret == -EINVAL) { 200 + printf("[OK]\tNo such clock.\n"); 201 + } else { 202 + printf("[FAIL]\tNo such clock, but __vdso_clock_gettime returned %d\n", vdso_ret); 203 + nerrs++; 204 + } 205 + } else { 206 + printf("[WARN]\t clock_gettime(%d) syscall returned error %d\n", clock, errno); 207 + } 208 + return; 209 + } 210 + 211 + vdso_ret = vdso_clock_gettime(clock, &vdso); 212 + end_ret = sys_clock_gettime(clock, &end); 213 + 214 + if (vdso_ret != 0 || end_ret != 0) { 215 + printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n", 216 + vdso_ret, errno); 217 + nerrs++; 218 + return; 219 + } 220 + 221 + printf("\t%llu.%09ld %llu.%09ld %llu.%09ld\n", 222 + (unsigned long long)start.tv_sec, start.tv_nsec, 223 + (unsigned long long)vdso.tv_sec, vdso.tv_nsec, 224 + (unsigned long long)end.tv_sec, end.tv_nsec); 225 + 226 + if (!ts_leq(&start, &vdso) || !ts_leq(&vdso, &end)) { 227 + printf("[FAIL]\tTimes are out of sequence\n"); 228 + nerrs++; 229 + } 230 + } 231 + 232 + static void test_clock_gettime(void) 233 + { 234 + for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]); 235 + clock++) { 236 + test_one_clock_gettime(clock, clocknames[clock]); 237 + } 238 + 239 + /* Also test some invalid clock ids */ 240 + test_one_clock_gettime(-1, "invalid"); 241 + test_one_clock_gettime(INT_MIN, "invalid"); 242 + test_one_clock_gettime(INT_MAX, "invalid"); 243 + } 244 + 245 + static void test_gettimeofday(void) 246 + { 247 + struct timeval start, vdso, end; 248 + struct timezone sys_tz, vdso_tz; 249 + int vdso_ret, end_ret; 250 + 251 + if (!vdso_gettimeofday) 252 + return; 253 + 254 + printf("[RUN]\tTesting gettimeofday...\n"); 255 + 256 + if (sys_gettimeofday(&start, &sys_tz) < 0) { 257 + printf("[FAIL]\tsys_gettimeofday failed (%d)\n", errno); 258 + nerrs++; 259 + return; 260 + } 261 + 262 + vdso_ret = vdso_gettimeofday(&vdso, &vdso_tz); 263 + end_ret = sys_gettimeofday(&end, NULL); 264 + 265 + if (vdso_ret != 0 || end_ret != 0) { 266 + printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n", 267 + vdso_ret, errno); 268 + nerrs++; 269 + return; 270 + } 271 + 272 + printf("\t%llu.%06ld %llu.%06ld %llu.%06ld\n", 273 + (unsigned long long)start.tv_sec, start.tv_usec, 274 + (unsigned long long)vdso.tv_sec, vdso.tv_usec, 275 + (unsigned long long)end.tv_sec, end.tv_usec); 276 + 277 + if (!tv_leq(&start, &vdso) || !tv_leq(&vdso, &end)) { 278 + printf("[FAIL]\tTimes are out of sequence\n"); 279 + nerrs++; 280 + } 281 + 282 + if (sys_tz.tz_minuteswest == vdso_tz.tz_minuteswest && 283 + sys_tz.tz_dsttime == vdso_tz.tz_dsttime) { 284 + printf("[OK]\ttimezones match: minuteswest=%d, dsttime=%d\n", 285 + sys_tz.tz_minuteswest, sys_tz.tz_dsttime); 286 + } else { 287 + printf("[FAIL]\ttimezones do not match\n"); 288 + nerrs++; 289 + } 290 + 291 + /* And make sure that passing NULL for tz doesn't crash. */ 292 + vdso_gettimeofday(&vdso, NULL); 293 + } 294 + 186 295 int main(int argc, char **argv) 187 296 { 188 297 fill_function_pointers(); 189 298 299 + test_clock_gettime(); 300 + test_gettimeofday(); 301 + 302 + /* 303 + * Test getcpu() last so that, if something goes wrong setting affinity, 304 + * we still run the other tests. 305 + */ 190 306 test_getcpu(); 191 307 192 308 return nerrs ? 1 : 0;