Merge tag 'v4.14-rc3' into irq/irqchip-4.15

-2

Documentation/cpu-freq/index.txt

··· 32 32 33 33 index.txt - File index, Mailing list and Links (this document) 34 34 35 - intel-pstate.txt - Intel pstate cpufreq driver specific file. 36 - 37 35 pcc-cpufreq.txt - PCC cpufreq driver specific file. 38 36 39 37

+18 -10

Documentation/devicetree/bindings/leds/ams,as3645a.txt

··· 15 15 16 16 compatible : Must be "ams,as3645a". 17 17 reg : The I2C address of the device. Typically 0x30. 18 + #address-cells : 1 19 + #size-cells : 0 18 20 19 21 20 - Required properties of the "flash" child node 21 - ============================================= 22 + Required properties of the flash child node (0) 23 + =============================================== 22 24 25 + reg: 0 23 26 flash-timeout-us: Flash timeout in microseconds. The value must be in 24 27 the range [100000, 850000] and divisible by 50000. 25 28 flash-max-microamp: Maximum flash current in microamperes. Has to be ··· 36 33 and divisible by 50000. 37 34 38 35 39 - Optional properties of the "flash" child node 40 - ============================================= 36 + Optional properties of the flash child node 37 + =========================================== 41 38 42 39 label : The label of the flash LED. 43 40 44 41 45 - Required properties of the "indicator" child node 46 - ================================================= 42 + Required properties of the indicator child node (1) 43 + =================================================== 47 44 45 + reg: 1 48 46 led-max-microamp: Maximum indicator current. The allowed values are 49 47 2500, 5000, 7500 and 10000. 50 48 51 - Optional properties of the "indicator" child node 52 - ================================================= 49 + Optional properties of the indicator child node 50 + =============================================== 53 51 54 52 label : The label of the indicator LED. 55 53 ··· 59 55 ======= 60 56 61 57 as3645a@30 { 58 + #address-cells: 1 59 + #size-cells: 0 62 60 reg = <0x30>; 63 61 compatible = "ams,as3645a"; 64 - flash { 62 + flash@0 { 63 + reg = <0x0>; 65 64 flash-timeout-us = <150000>; 66 65 flash-max-microamp = <320000>; 67 66 led-max-microamp = <60000>; 68 67 ams,input-max-microamp = <1750000>; 69 68 label = "as3645a:flash"; 70 69 }; 71 - indicator { 70 + indicator@1 { 71 + reg = <0x1>; 72 72 led-max-microamp = <10000>; 73 73 label = "as3645a:indicator"; 74 74 };

+6

MAINTAINERS

··· 8597 8597 S: Maintained 8598 8598 F: drivers/media/rc/mtk-cir.c 8599 8599 8600 + MEDIATEK PMIC LED DRIVER 8601 + M: Sean Wang <sean.wang@mediatek.com> 8602 + S: Maintained 8603 + F: drivers/leds/leds-mt6323.c 8604 + F: Documentation/devicetree/bindings/leds/leds-mt6323.txt 8605 + 8600 8606 MEDIATEK ETHERNET DRIVER 8601 8607 M: Felix Fietkau <nbd@openwrt.org> 8602 8608 M: John Crispin <john@phrozen.org>

+3 -3

Makefile

··· 1 1 VERSION = 4 2 2 PATCHLEVEL = 14 3 3 SUBLEVEL = 0 4 - EXTRAVERSION = -rc2 4 + EXTRAVERSION = -rc3 5 5 NAME = Fearless Coyote 6 6 7 7 # *DOCUMENTATION* ··· 1172 1172 1173 1173 PHONY += kselftest 1174 1174 kselftest: 1175 - $(Q)$(MAKE) -C tools/testing/selftests run_tests 1175 + $(Q)$(MAKE) -C $(srctree)/tools/testing/selftests run_tests 1176 1176 1177 1177 PHONY += kselftest-clean 1178 1178 kselftest-clean: 1179 - $(Q)$(MAKE) -C tools/testing/selftests clean 1179 + $(Q)$(MAKE) -C $(srctree)/tools/testing/selftests clean 1180 1180 1181 1181 PHONY += kselftest-merge 1182 1182 kselftest-merge:

+7 -3

arch/arm/boot/dts/omap3-n950-n9.dtsi

··· 267 267 clock-frequency = <400000>; 268 268 269 269 as3645a@30 { 270 + #address-cells = <1>; 271 + #size-cells = <0>; 270 272 reg = <0x30>; 271 273 compatible = "ams,as3645a"; 272 - flash { 274 + flash@0 { 275 + reg = <0x0>; 273 276 flash-timeout-us = <150000>; 274 277 flash-max-microamp = <320000>; 275 278 led-max-microamp = <60000>; 276 - peak-current-limit = <1750000>; 279 + ams,input-max-microamp = <1750000>; 277 280 }; 278 - indicator { 281 + indicator@1 { 282 + reg = <0x1>; 279 283 led-max-microamp = <10000>; 280 284 }; 281 285 };

+1 -1

arch/arm64/include/asm/pgtable.h

··· 401 401 /* Find an entry in the third-level page table. */ 402 402 #define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) 403 403 404 - #define pte_offset_phys(dir,addr) (pmd_page_paddr(*(dir)) + pte_index(addr) * sizeof(pte_t)) 404 + #define pte_offset_phys(dir,addr) (pmd_page_paddr(READ_ONCE(*(dir))) + pte_index(addr) * sizeof(pte_t)) 405 405 #define pte_offset_kernel(dir,addr) ((pte_t *)__va(pte_offset_phys((dir), (addr)))) 406 406 407 407 #define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr))

+1

arch/arm64/kernel/head.S

··· 384 384 * booted in EL1 or EL2 respectively. 385 385 */ 386 386 ENTRY(el2_setup) 387 + msr SPsel, #1 // We want to use SP_EL{1,2} 387 388 mrs x0, CurrentEL 388 389 cmp x0, #CurrentEL_EL2 389 390 b.eq 1f

+1 -1

arch/arm64/mm/fault.c

··· 651 651 { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 0 translation fault" }, 652 652 { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 1 translation fault" }, 653 653 { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" }, 654 - { do_page_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" }, 654 + { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" }, 655 655 { do_bad, SIGBUS, 0, "unknown 8" }, 656 656 { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" }, 657 657 { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" },

+1 -1

arch/microblaze/Kconfig

··· 39 39 # Endianness selection 40 40 choice 41 41 prompt "Endianness selection" 42 - default CPU_BIG_ENDIAN 42 + default CPU_LITTLE_ENDIAN 43 43 help 44 44 microblaze architectures can be configured for either little or 45 45 big endian formats. Be sure to select the appropriate mode.

+1

arch/microblaze/include/uapi/asm/Kbuild

··· 7 7 generic-y += ioctl.h 8 8 generic-y += ioctls.h 9 9 generic-y += ipcbuf.h 10 + generic-y += kvm_para.h 10 11 generic-y += mman.h 11 12 generic-y += msgbuf.h 12 13 generic-y += param.h

+1 -1

arch/microblaze/kernel/dma.c

··· 165 165 unsigned long attrs) 166 166 { 167 167 #ifdef CONFIG_MMU 168 - unsigned long user_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; 168 + unsigned long user_count = vma_pages(vma); 169 169 unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; 170 170 unsigned long off = vma->vm_pgoff; 171 171 unsigned long pfn;

+13 -1

arch/powerpc/kvm/book3s_hv_rmhandlers.S

··· 1121 1121 BEGIN_FTR_SECTION 1122 1122 mtspr SPRN_PPR, r0 1123 1123 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) 1124 + 1125 + /* Move canary into DSISR to check for later */ 1126 + BEGIN_FTR_SECTION 1127 + li r0, 0x7fff 1128 + mtspr SPRN_HDSISR, r0 1129 + END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) 1130 + 1124 1131 ld r0, VCPU_GPR(R0)(r4) 1125 1132 ld r4, VCPU_GPR(R4)(r4) 1126 1133 ··· 1963 1956 kvmppc_hdsi: 1964 1957 ld r3, VCPU_KVM(r9) 1965 1958 lbz r0, KVM_RADIX(r3) 1966 - cmpwi r0, 0 1967 1959 mfspr r4, SPRN_HDAR 1968 1960 mfspr r6, SPRN_HDSISR 1961 + BEGIN_FTR_SECTION 1962 + /* Look for DSISR canary. If we find it, retry instruction */ 1963 + cmpdi r6, 0x7fff 1964 + beq 6f 1965 + END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) 1966 + cmpwi r0, 0 1969 1967 bne .Lradix_hdsi /* on radix, just save DAR/DSISR/ASDR */ 1970 1968 /* HPTE not found fault or protection fault? */ 1971 1969 andis. r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h

+2 -2

arch/um/kernel/time.c

··· 98 98 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 99 99 }; 100 100 101 - static void __init timer_setup(void) 101 + static void __init um_timer_setup(void) 102 102 { 103 103 int err; 104 104 ··· 132 132 void __init time_init(void) 133 133 { 134 134 timer_set_signal_handler(); 135 - late_time_init = timer_setup; 135 + late_time_init = um_timer_setup; 136 136 }

+4

arch/x86/events/intel/cstate.c

··· 552 552 553 553 X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE, snb_cstates), 554 554 X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates), 555 + X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_X, snb_cstates), 555 556 556 557 X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_MOBILE, snb_cstates), 557 558 X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_DESKTOP, snb_cstates), ··· 561 560 X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates), 562 561 563 562 X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT, glm_cstates), 563 + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_DENVERTON, glm_cstates), 564 + 565 + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GEMINI_LAKE, glm_cstates), 564 566 { }, 565 567 }; 566 568 MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);

+3

arch/x86/events/intel/rapl.c

··· 775 775 X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_rapl_init), 776 776 777 777 X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, hsw_rapl_init), 778 + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_DENVERTON, hsw_rapl_init), 779 + 780 + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GEMINI_LAKE, hsw_rapl_init), 778 781 {}, 779 782 }; 780 783

+2 -2

arch/x86/events/intel/uncore_snbep.c

··· 3462 3462 static struct intel_uncore_type skx_uncore_iio = { 3463 3463 .name = "iio", 3464 3464 .num_counters = 4, 3465 - .num_boxes = 5, 3465 + .num_boxes = 6, 3466 3466 .perf_ctr_bits = 48, 3467 3467 .event_ctl = SKX_IIO0_MSR_PMON_CTL0, 3468 3468 .perf_ctr = SKX_IIO0_MSR_PMON_CTR0, ··· 3492 3492 static struct intel_uncore_type skx_uncore_irp = { 3493 3493 .name = "irp", 3494 3494 .num_counters = 2, 3495 - .num_boxes = 5, 3495 + .num_boxes = 6, 3496 3496 .perf_ctr_bits = 48, 3497 3497 .event_ctl = SKX_IRP0_MSR_PMON_CTL0, 3498 3498 .perf_ctr = SKX_IRP0_MSR_PMON_CTR0,

+8

arch/x86/events/msr.c

··· 63 63 case INTEL_FAM6_ATOM_SILVERMONT1: 64 64 case INTEL_FAM6_ATOM_SILVERMONT2: 65 65 case INTEL_FAM6_ATOM_AIRMONT: 66 + 67 + case INTEL_FAM6_ATOM_GOLDMONT: 68 + case INTEL_FAM6_ATOM_DENVERTON: 69 + 70 + case INTEL_FAM6_ATOM_GEMINI_LAKE: 71 + 72 + case INTEL_FAM6_XEON_PHI_KNL: 73 + case INTEL_FAM6_XEON_PHI_KNM: 66 74 if (idx == PERF_MSR_SMI) 67 75 return true; 68 76 break;

+1 -1

arch/x86/ia32/ia32_signal.c

··· 231 231 ksig->ka.sa.sa_restorer) 232 232 sp = (unsigned long) ksig->ka.sa.sa_restorer; 233 233 234 - if (fpu->fpstate_active) { 234 + if (fpu->initialized) { 235 235 unsigned long fx_aligned, math_size; 236 236 237 237 sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size);

+5 -3

arch/x86/include/asm/asm.h

··· 11 11 # define __ASM_FORM_COMMA(x) " " #x "," 12 12 #endif 13 13 14 - #ifdef CONFIG_X86_32 14 + #ifndef __x86_64__ 15 + /* 32 bit */ 15 16 # define __ASM_SEL(a,b) __ASM_FORM(a) 16 17 # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a) 17 18 #else 19 + /* 64 bit */ 18 20 # define __ASM_SEL(a,b) __ASM_FORM(b) 19 21 # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b) 20 22 #endif ··· 141 139 * gets set up by the containing function. If you forget to do this, objtool 142 140 * may print a "call without frame pointer save/setup" warning. 143 141 */ 144 - register unsigned int __asm_call_sp asm("esp"); 145 - #define ASM_CALL_CONSTRAINT "+r" (__asm_call_sp) 142 + register unsigned long current_stack_pointer asm(_ASM_SP); 143 + #define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer) 146 144 #endif 147 145 148 146 #endif /* _ASM_X86_ASM_H */

+22 -68

arch/x86/include/asm/fpu/internal.h

··· 23 23 /* 24 24 * High level FPU state handling functions: 25 25 */ 26 - extern void fpu__activate_curr(struct fpu *fpu); 27 - extern void fpu__activate_fpstate_read(struct fpu *fpu); 28 - extern void fpu__activate_fpstate_write(struct fpu *fpu); 29 - extern void fpu__current_fpstate_write_begin(void); 30 - extern void fpu__current_fpstate_write_end(void); 26 + extern void fpu__initialize(struct fpu *fpu); 27 + extern void fpu__prepare_read(struct fpu *fpu); 28 + extern void fpu__prepare_write(struct fpu *fpu); 31 29 extern void fpu__save(struct fpu *fpu); 32 30 extern void fpu__restore(struct fpu *fpu); 33 31 extern int fpu__restore_sig(void __user *buf, int ia32_frame); ··· 118 120 err; \ 119 121 }) 120 122 121 - #define check_insn(insn, output, input...) \ 122 - ({ \ 123 - int err; \ 123 + #define kernel_insn(insn, output, input...) \ 124 124 asm volatile("1:" #insn "\n\t" \ 125 125 "2:\n" \ 126 - ".section .fixup,\"ax\"\n" \ 127 - "3: movl $-1,%[err]\n" \ 128 - " jmp 2b\n" \ 129 - ".previous\n" \ 130 - _ASM_EXTABLE(1b, 3b) \ 131 - : [err] "=r" (err), output \ 132 - : "0"(0), input); \ 133 - err; \ 134 - }) 126 + _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_fprestore) \ 127 + : output : input) 135 128 136 129 static inline int copy_fregs_to_user(struct fregs_state __user *fx) 137 130 { ··· 142 153 143 154 static inline void copy_kernel_to_fxregs(struct fxregs_state *fx) 144 155 { 145 - int err; 146 - 147 156 if (IS_ENABLED(CONFIG_X86_32)) { 148 - err = check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); 157 + kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); 149 158 } else { 150 159 if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) { 151 - err = check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); 160 + kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); 152 161 } else { 153 162 /* See comment in copy_fxregs_to_kernel() below. */ 154 - err = check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx)); 163 + kernel_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx)); 155 164 } 156 165 } 157 - /* Copying from a kernel buffer to FPU registers should never fail: */ 158 - WARN_ON_FPU(err); 159 166 } 160 167 161 168 static inline int copy_user_to_fxregs(struct fxregs_state __user *fx) ··· 168 183 169 184 static inline void copy_kernel_to_fregs(struct fregs_state *fx) 170 185 { 171 - int err = check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); 172 - 173 - WARN_ON_FPU(err); 186 + kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); 174 187 } 175 188 176 189 static inline int copy_user_to_fregs(struct fregs_state __user *fx) ··· 264 281 * Use XRSTORS to restore context if it is enabled. XRSTORS supports compact 265 282 * XSAVE area format. 266 283 */ 267 - #define XSTATE_XRESTORE(st, lmask, hmask, err) \ 284 + #define XSTATE_XRESTORE(st, lmask, hmask) \ 268 285 asm volatile(ALTERNATIVE(XRSTOR, \ 269 286 XRSTORS, X86_FEATURE_XSAVES) \ 270 287 "\n" \ 271 - "xor %[err], %[err]\n" \ 272 288 "3:\n" \ 273 - ".pushsection .fixup,\"ax\"\n" \ 274 - "4: movl $-2, %[err]\n" \ 275 - "jmp 3b\n" \ 276 - ".popsection\n" \ 277 - _ASM_EXTABLE(661b, 4b) \ 278 - : [err] "=r" (err) \ 289 + _ASM_EXTABLE_HANDLE(661b, 3b, ex_handler_fprestore)\ 290 + : \ 279 291 : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ 280 292 : "memory") 281 293 ··· 314 336 else 315 337 XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); 316 338 317 - /* We should never fault when copying from a kernel buffer: */ 339 + /* 340 + * We should never fault when copying from a kernel buffer, and the FPU 341 + * state we set at boot time should be valid. 342 + */ 318 343 WARN_ON_FPU(err); 319 344 } 320 345 ··· 331 350 u32 hmask = mask >> 32; 332 351 int err; 333 352 334 - WARN_ON(!alternatives_patched); 353 + WARN_ON_FPU(!alternatives_patched); 335 354 336 355 XSTATE_XSAVE(xstate, lmask, hmask, err); 337 356 ··· 346 365 { 347 366 u32 lmask = mask; 348 367 u32 hmask = mask >> 32; 349 - int err; 350 368 351 - XSTATE_XRESTORE(xstate, lmask, hmask, err); 352 - 353 - /* We should never fault when copying from a kernel buffer: */ 354 - WARN_ON_FPU(err); 369 + XSTATE_XRESTORE(xstate, lmask, hmask); 355 370 } 356 371 357 372 /* ··· 503 526 */ 504 527 static inline void fpregs_deactivate(struct fpu *fpu) 505 528 { 506 - WARN_ON_FPU(!fpu->fpregs_active); 507 - 508 - fpu->fpregs_active = 0; 509 529 this_cpu_write(fpu_fpregs_owner_ctx, NULL); 510 530 trace_x86_fpu_regs_deactivated(fpu); 511 531 } 512 532 513 533 static inline void fpregs_activate(struct fpu *fpu) 514 534 { 515 - WARN_ON_FPU(fpu->fpregs_active); 516 - 517 - fpu->fpregs_active = 1; 518 535 this_cpu_write(fpu_fpregs_owner_ctx, fpu); 519 536 trace_x86_fpu_regs_activated(fpu); 520 - } 521 - 522 - /* 523 - * The question "does this thread have fpu access?" 524 - * is slightly racy, since preemption could come in 525 - * and revoke it immediately after the test. 526 - * 527 - * However, even in that very unlikely scenario, 528 - * we can just assume we have FPU access - typically 529 - * to save the FP state - we'll just take a #NM 530 - * fault and get the FPU access back. 531 - */ 532 - static inline int fpregs_active(void) 533 - { 534 - return current->thread.fpu.fpregs_active; 535 537 } 536 538 537 539 /* ··· 527 571 static inline void 528 572 switch_fpu_prepare(struct fpu *old_fpu, int cpu) 529 573 { 530 - if (old_fpu->fpregs_active) { 574 + if (old_fpu->initialized) { 531 575 if (!copy_fpregs_to_fpstate(old_fpu)) 532 576 old_fpu->last_cpu = -1; 533 577 else 534 578 old_fpu->last_cpu = cpu; 535 579 536 580 /* But leave fpu_fpregs_owner_ctx! */ 537 - old_fpu->fpregs_active = 0; 538 581 trace_x86_fpu_regs_deactivated(old_fpu); 539 582 } else 540 583 old_fpu->last_cpu = -1; ··· 550 595 static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu) 551 596 { 552 597 bool preload = static_cpu_has(X86_FEATURE_FPU) && 553 - new_fpu->fpstate_active; 598 + new_fpu->initialized; 554 599 555 600 if (preload) { 556 601 if (!fpregs_state_valid(new_fpu, cpu)) ··· 572 617 struct fpu *fpu = &current->thread.fpu; 573 618 574 619 preempt_disable(); 575 - if (!fpregs_active()) 576 - fpregs_activate(fpu); 620 + fpregs_activate(fpu); 577 621 preempt_enable(); 578 622 } 579 623

+6 -26

arch/x86/include/asm/fpu/types.h

··· 68 68 /* Default value for fxregs_state.mxcsr: */ 69 69 #define MXCSR_DEFAULT 0x1f80 70 70 71 + /* Copy both mxcsr & mxcsr_flags with a single u64 memcpy: */ 72 + #define MXCSR_AND_FLAGS_SIZE sizeof(u64) 73 + 71 74 /* 72 75 * Software based FPU emulation state. This is arbitrary really, 73 76 * it matches the x87 format to make it easier to understand: ··· 293 290 unsigned int last_cpu; 294 291 295 292 /* 296 - * @fpstate_active: 293 + * @initialized: 297 294 * 298 - * This flag indicates whether this context is active: if the task 295 + * This flag indicates whether this context is initialized: if the task 299 296 * is not running then we can restore from this context, if the task 300 297 * is running then we should save into this context. 301 298 */ 302 - unsigned char fpstate_active; 303 - 304 - /* 305 - * @fpregs_active: 306 - * 307 - * This flag determines whether a given context is actively 308 - * loaded into the FPU's registers and that those registers 309 - * represent the task's current FPU state. 310 - * 311 - * Note the interaction with fpstate_active: 312 - * 313 - * # task does not use the FPU: 314 - * fpstate_active == 0 315 - * 316 - * # task uses the FPU and regs are active: 317 - * fpstate_active == 1 && fpregs_active == 1 318 - * 319 - * # the regs are inactive but still match fpstate: 320 - * fpstate_active == 1 && fpregs_active == 0 && fpregs_owner == fpu 321 - * 322 - * The third state is what we use for the lazy restore optimization 323 - * on lazy-switching CPUs. 324 - */ 325 - unsigned char fpregs_active; 299 + unsigned char initialized; 326 300 327 301 /* 328 302 * @state:

+8 -4

arch/x86/include/asm/fpu/xstate.h

··· 48 48 void *get_xsave_addr(struct xregs_state *xsave, int xstate); 49 49 const void *get_xsave_field_ptr(int xstate_field); 50 50 int using_compacted_format(void); 51 - int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf, 52 - void __user *ubuf, struct xregs_state *xsave); 53 - int copyin_to_xsaves(const void *kbuf, const void __user *ubuf, 54 - struct xregs_state *xsave); 51 + int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size); 52 + int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size); 53 + int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf); 54 + int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf); 55 + 56 + /* Validate an xstate header supplied by userspace (ptrace or sigreturn) */ 57 + extern int validate_xstate_header(const struct xstate_header *hdr); 58 + 55 59 #endif

-11

arch/x86/include/asm/thread_info.h

··· 158 158 */ 159 159 #ifndef __ASSEMBLY__ 160 160 161 - static inline unsigned long current_stack_pointer(void) 162 - { 163 - unsigned long sp; 164 - #ifdef CONFIG_X86_64 165 - asm("mov %%rsp,%0" : "=g" (sp)); 166 - #else 167 - asm("mov %%esp,%0" : "=g" (sp)); 168 - #endif 169 - return sp; 170 - } 171 - 172 161 /* 173 162 * Walks up the stack frames to make sure that the specified object is 174 163 * entirely contained by a single stack frame.

+4 -7

arch/x86/include/asm/trace/fpu.h

··· 12 12 13 13 TP_STRUCT__entry( 14 14 __field(struct fpu *, fpu) 15 - __field(bool, fpregs_active) 16 - __field(bool, fpstate_active) 15 + __field(bool, initialized) 17 16 __field(u64, xfeatures) 18 17 __field(u64, xcomp_bv) 19 18 ), 20 19 21 20 TP_fast_assign( 22 21 __entry->fpu = fpu; 23 - __entry->fpregs_active = fpu->fpregs_active; 24 - __entry->fpstate_active = fpu->fpstate_active; 22 + __entry->initialized = fpu->initialized; 25 23 if (boot_cpu_has(X86_FEATURE_OSXSAVE)) { 26 24 __entry->xfeatures = fpu->state.xsave.header.xfeatures; 27 25 __entry->xcomp_bv = fpu->state.xsave.header.xcomp_bv; 28 26 } 29 27 ), 30 - TP_printk("x86/fpu: %p fpregs_active: %d fpstate_active: %d xfeatures: %llx xcomp_bv: %llx", 28 + TP_printk("x86/fpu: %p initialized: %d xfeatures: %llx xcomp_bv: %llx", 31 29 __entry->fpu, 32 - __entry->fpregs_active, 33 - __entry->fpstate_active, 30 + __entry->initialized, 34 31 __entry->xfeatures, 35 32 __entry->xcomp_bv 36 33 )

+1 -1

arch/x86/include/asm/uaccess.h

··· 337 337 _ASM_EXTABLE(1b, 4b) \ 338 338 _ASM_EXTABLE(2b, 4b) \ 339 339 : "=r" (retval), "=&A"(x) \ 340 - : "m" (__m(__ptr)), "m" __m(((u32 *)(__ptr)) + 1), \ 340 + : "m" (__m(__ptr)), "m" __m(((u32 __user *)(__ptr)) + 1), \ 341 341 "i" (errret), "0" (retval)); \ 342 342 }) 343 343

+2 -2

arch/x86/include/asm/xen/hypercall.h

··· 551 551 MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr, 552 552 struct desc_struct desc) 553 553 { 554 - u32 *p = (u32 *) &desc; 555 - 556 554 mcl->op = __HYPERVISOR_update_descriptor; 557 555 if (sizeof(maddr) == sizeof(long)) { 558 556 mcl->args[0] = maddr; 559 557 mcl->args[1] = *(unsigned long *)&desc; 560 558 } else { 559 + u32 *p = (u32 *)&desc; 560 + 561 561 mcl->args[0] = maddr; 562 562 mcl->args[1] = maddr >> 32; 563 563 mcl->args[2] = *p++;

+43 -112

arch/x86/kernel/fpu/core.c

··· 100 100 101 101 kernel_fpu_disable(); 102 102 103 - if (fpu->fpregs_active) { 103 + if (fpu->initialized) { 104 104 /* 105 105 * Ignore return value -- we don't care if reg state 106 106 * is clobbered. ··· 116 116 { 117 117 struct fpu *fpu = &current->thread.fpu; 118 118 119 - if (fpu->fpregs_active) 119 + if (fpu->initialized) 120 120 copy_kernel_to_fpregs(&fpu->state); 121 121 122 122 kernel_fpu_enable(); ··· 148 148 149 149 preempt_disable(); 150 150 trace_x86_fpu_before_save(fpu); 151 - if (fpu->fpregs_active) { 151 + if (fpu->initialized) { 152 152 if (!copy_fpregs_to_fpstate(fpu)) { 153 153 copy_kernel_to_fpregs(&fpu->state); 154 154 } ··· 189 189 190 190 int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) 191 191 { 192 - dst_fpu->fpregs_active = 0; 193 192 dst_fpu->last_cpu = -1; 194 193 195 - if (!src_fpu->fpstate_active || !static_cpu_has(X86_FEATURE_FPU)) 194 + if (!src_fpu->initialized || !static_cpu_has(X86_FEATURE_FPU)) 196 195 return 0; 197 196 198 197 WARN_ON_FPU(src_fpu != &current->thread.fpu); ··· 205 206 /* 206 207 * Save current FPU registers directly into the child 207 208 * FPU context, without any memory-to-memory copying. 208 - * In lazy mode, if the FPU context isn't loaded into 209 - * fpregs, CR0.TS will be set and do_device_not_available 210 - * will load the FPU context. 211 209 * 212 - * We have to do all this with preemption disabled, 213 - * mostly because of the FNSAVE case, because in that 214 - * case we must not allow preemption in the window 215 - * between the FNSAVE and us marking the context lazy. 216 - * 217 - * It shouldn't be an issue as even FNSAVE is plenty 218 - * fast in terms of critical section length. 210 + * ( The function 'fails' in the FNSAVE case, which destroys 211 + * register contents so we have to copy them back. ) 219 212 */ 220 - preempt_disable(); 221 213 if (!copy_fpregs_to_fpstate(dst_fpu)) { 222 - memcpy(&src_fpu->state, &dst_fpu->state, 223 - fpu_kernel_xstate_size); 224 - 214 + memcpy(&src_fpu->state, &dst_fpu->state, fpu_kernel_xstate_size); 225 215 copy_kernel_to_fpregs(&src_fpu->state); 226 216 } 227 - preempt_enable(); 228 217 229 218 trace_x86_fpu_copy_src(src_fpu); 230 219 trace_x86_fpu_copy_dst(dst_fpu); ··· 224 237 * Activate the current task's in-memory FPU context, 225 238 * if it has not been used before: 226 239 */ 227 - void fpu__activate_curr(struct fpu *fpu) 240 + void fpu__initialize(struct fpu *fpu) 228 241 { 229 242 WARN_ON_FPU(fpu != &current->thread.fpu); 230 243 231 - if (!fpu->fpstate_active) { 244 + if (!fpu->initialized) { 232 245 fpstate_init(&fpu->state); 233 246 trace_x86_fpu_init_state(fpu); 234 247 235 248 trace_x86_fpu_activate_state(fpu); 236 249 /* Safe to do for the current task: */ 237 - fpu->fpstate_active = 1; 250 + fpu->initialized = 1; 238 251 } 239 252 } 240 - EXPORT_SYMBOL_GPL(fpu__activate_curr); 253 + EXPORT_SYMBOL_GPL(fpu__initialize); 241 254 242 255 /* 243 256 * This function must be called before we read a task's fpstate. 244 257 * 245 - * If the task has not used the FPU before then initialize its 246 - * fpstate. 258 + * There's two cases where this gets called: 259 + * 260 + * - for the current task (when coredumping), in which case we have 261 + * to save the latest FPU registers into the fpstate, 262 + * 263 + * - or it's called for stopped tasks (ptrace), in which case the 264 + * registers were already saved by the context-switch code when 265 + * the task scheduled out - we only have to initialize the registers 266 + * if they've never been initialized. 247 267 * 248 268 * If the task has used the FPU before then save it. 249 269 */ 250 - void fpu__activate_fpstate_read(struct fpu *fpu) 270 + void fpu__prepare_read(struct fpu *fpu) 251 271 { 252 - /* 253 - * If fpregs are active (in the current CPU), then 254 - * copy them to the fpstate: 255 - */ 256 - if (fpu->fpregs_active) { 272 + if (fpu == &current->thread.fpu) { 257 273 fpu__save(fpu); 258 274 } else { 259 - if (!fpu->fpstate_active) { 275 + if (!fpu->initialized) { 260 276 fpstate_init(&fpu->state); 261 277 trace_x86_fpu_init_state(fpu); 262 278 263 279 trace_x86_fpu_activate_state(fpu); 264 280 /* Safe to do for current and for stopped child tasks: */ 265 - fpu->fpstate_active = 1; 281 + fpu->initialized = 1; 266 282 } 267 283 } 268 284 } ··· 273 283 /* 274 284 * This function must be called before we write a task's fpstate. 275 285 * 276 - * If the task has used the FPU before then unlazy it. 286 + * If the task has used the FPU before then invalidate any cached FPU registers. 277 287 * If the task has not used the FPU before then initialize its fpstate. 278 288 * 279 289 * After this function call, after registers in the fpstate are 280 290 * modified and the child task has woken up, the child task will 281 291 * restore the modified FPU state from the modified context. If we 282 - * didn't clear its lazy status here then the lazy in-registers 292 + * didn't clear its cached status here then the cached in-registers 283 293 * state pending on its former CPU could be restored, corrupting 284 294 * the modifications. 285 295 */ 286 - void fpu__activate_fpstate_write(struct fpu *fpu) 296 + void fpu__prepare_write(struct fpu *fpu) 287 297 { 288 298 /* 289 299 * Only stopped child tasks can be used to modify the FPU ··· 291 301 */ 292 302 WARN_ON_FPU(fpu == &current->thread.fpu); 293 303 294 - if (fpu->fpstate_active) { 295 - /* Invalidate any lazy state: */ 304 + if (fpu->initialized) { 305 + /* Invalidate any cached state: */ 296 306 __fpu_invalidate_fpregs_state(fpu); 297 307 } else { 298 308 fpstate_init(&fpu->state); ··· 300 310 301 311 trace_x86_fpu_activate_state(fpu); 302 312 /* Safe to do for stopped child tasks: */ 303 - fpu->fpstate_active = 1; 313 + fpu->initialized = 1; 304 314 } 305 - } 306 - 307 - /* 308 - * This function must be called before we write the current 309 - * task's fpstate. 310 - * 311 - * This call gets the current FPU register state and moves 312 - * it in to the 'fpstate'. Preemption is disabled so that 313 - * no writes to the 'fpstate' can occur from context 314 - * swiches. 315 - * 316 - * Must be followed by a fpu__current_fpstate_write_end(). 317 - */ 318 - void fpu__current_fpstate_write_begin(void) 319 - { 320 - struct fpu *fpu = &current->thread.fpu; 321 - 322 - /* 323 - * Ensure that the context-switching code does not write 324 - * over the fpstate while we are doing our update. 325 - */ 326 - preempt_disable(); 327 - 328 - /* 329 - * Move the fpregs in to the fpu's 'fpstate'. 330 - */ 331 - fpu__activate_fpstate_read(fpu); 332 - 333 - /* 334 - * The caller is about to write to 'fpu'. Ensure that no 335 - * CPU thinks that its fpregs match the fpstate. This 336 - * ensures we will not be lazy and skip a XRSTOR in the 337 - * future. 338 - */ 339 - __fpu_invalidate_fpregs_state(fpu); 340 - } 341 - 342 - /* 343 - * This function must be paired with fpu__current_fpstate_write_begin() 344 - * 345 - * This will ensure that the modified fpstate gets placed back in 346 - * the fpregs if necessary. 347 - * 348 - * Note: This function may be called whether or not an _actual_ 349 - * write to the fpstate occurred. 350 - */ 351 - void fpu__current_fpstate_write_end(void) 352 - { 353 - struct fpu *fpu = &current->thread.fpu; 354 - 355 - /* 356 - * 'fpu' now has an updated copy of the state, but the 357 - * registers may still be out of date. Update them with 358 - * an XRSTOR if they are active. 359 - */ 360 - if (fpregs_active()) 361 - copy_kernel_to_fpregs(&fpu->state); 362 - 363 - /* 364 - * Our update is done and the fpregs/fpstate are in sync 365 - * if necessary. Context switches can happen again. 366 - */ 367 - preempt_enable(); 368 315 } 369 316 370 317 /* ··· 316 389 */ 317 390 void fpu__restore(struct fpu *fpu) 318 391 { 319 - fpu__activate_curr(fpu); 392 + fpu__initialize(fpu); 320 393 321 394 /* Avoid __kernel_fpu_begin() right after fpregs_activate() */ 322 395 kernel_fpu_disable(); ··· 341 414 { 342 415 preempt_disable(); 343 416 344 - if (fpu->fpregs_active) { 345 - /* Ignore delayed exceptions from user space */ 346 - asm volatile("1: fwait\n" 347 - "2:\n" 348 - _ASM_EXTABLE(1b, 2b)); 349 - fpregs_deactivate(fpu); 417 + if (fpu == &current->thread.fpu) { 418 + if (fpu->initialized) { 419 + /* Ignore delayed exceptions from user space */ 420 + asm volatile("1: fwait\n" 421 + "2:\n" 422 + _ASM_EXTABLE(1b, 2b)); 423 + fpregs_deactivate(fpu); 424 + } 350 425 } 351 426 352 - fpu->fpstate_active = 0; 427 + fpu->initialized = 0; 353 428 354 429 trace_x86_fpu_dropped(fpu); 355 430 ··· 391 462 * Make sure fpstate is cleared and initialized. 392 463 */ 393 464 if (static_cpu_has(X86_FEATURE_FPU)) { 394 - fpu__activate_curr(fpu); 465 + preempt_disable(); 466 + fpu__initialize(fpu); 395 467 user_fpu_begin(); 396 468 copy_init_fpstate_to_fpregs(); 469 + preempt_enable(); 397 470 } 398 471 } 399 472

+1 -1

arch/x86/kernel/fpu/init.c

··· 240 240 WARN_ON_FPU(!on_boot_cpu); 241 241 on_boot_cpu = 0; 242 242 243 - WARN_ON_FPU(current->thread.fpu.fpstate_active); 243 + WARN_ON_FPU(current->thread.fpu.initialized); 244 244 } 245 245 246 246 /*

+27 -23

arch/x86/kernel/fpu/regset.c

··· 16 16 { 17 17 struct fpu *target_fpu = &target->thread.fpu; 18 18 19 - return target_fpu->fpstate_active ? regset->n : 0; 19 + return target_fpu->initialized ? regset->n : 0; 20 20 } 21 21 22 22 int regset_xregset_fpregs_active(struct task_struct *target, const struct user_regset *regset) 23 23 { 24 24 struct fpu *target_fpu = &target->thread.fpu; 25 25 26 - if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->fpstate_active) 26 + if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->initialized) 27 27 return regset->n; 28 28 else 29 29 return 0; ··· 38 38 if (!boot_cpu_has(X86_FEATURE_FXSR)) 39 39 return -ENODEV; 40 40 41 - fpu__activate_fpstate_read(fpu); 41 + fpu__prepare_read(fpu); 42 42 fpstate_sanitize_xstate(fpu); 43 43 44 44 return user_regset_copyout(&pos, &count, &kbuf, &ubuf, ··· 55 55 if (!boot_cpu_has(X86_FEATURE_FXSR)) 56 56 return -ENODEV; 57 57 58 - fpu__activate_fpstate_write(fpu); 58 + fpu__prepare_write(fpu); 59 59 fpstate_sanitize_xstate(fpu); 60 60 61 61 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, ··· 89 89 90 90 xsave = &fpu->state.xsave; 91 91 92 - fpu__activate_fpstate_read(fpu); 92 + fpu__prepare_read(fpu); 93 93 94 94 if (using_compacted_format()) { 95 - ret = copyout_from_xsaves(pos, count, kbuf, ubuf, xsave); 95 + if (kbuf) 96 + ret = copy_xstate_to_kernel(kbuf, xsave, pos, count); 97 + else 98 + ret = copy_xstate_to_user(ubuf, xsave, pos, count); 96 99 } else { 97 100 fpstate_sanitize_xstate(fpu); 98 101 /* ··· 132 129 133 130 xsave = &fpu->state.xsave; 134 131 135 - fpu__activate_fpstate_write(fpu); 132 + fpu__prepare_write(fpu); 136 133 137 - if (boot_cpu_has(X86_FEATURE_XSAVES)) 138 - ret = copyin_to_xsaves(kbuf, ubuf, xsave); 139 - else 134 + if (using_compacted_format()) { 135 + if (kbuf) 136 + ret = copy_kernel_to_xstate(xsave, kbuf); 137 + else 138 + ret = copy_user_to_xstate(xsave, ubuf); 139 + } else { 140 140 ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); 141 + if (!ret) 142 + ret = validate_xstate_header(&xsave->header); 143 + } 144 + 145 + /* 146 + * mxcsr reserved bits must be masked to zero for security reasons. 147 + */ 148 + xsave->i387.mxcsr &= mxcsr_feature_mask; 141 149 142 150 /* 143 151 * In case of failure, mark all states as init: 144 152 */ 145 153 if (ret) 146 154 fpstate_init(&fpu->state); 147 - 148 - /* 149 - * mxcsr reserved bits must be masked to zero for security reasons. 150 - */ 151 - xsave->i387.mxcsr &= mxcsr_feature_mask; 152 - xsave->header.xfeatures &= xfeatures_mask; 153 - /* 154 - * These bits must be zero. 155 - */ 156 - memset(&xsave->header.reserved, 0, 48); 157 155 158 156 return ret; 159 157 } ··· 303 299 struct fpu *fpu = &target->thread.fpu; 304 300 struct user_i387_ia32_struct env; 305 301 306 - fpu__activate_fpstate_read(fpu); 302 + fpu__prepare_read(fpu); 307 303 308 304 if (!boot_cpu_has(X86_FEATURE_FPU)) 309 305 return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); ··· 333 329 struct user_i387_ia32_struct env; 334 330 int ret; 335 331 336 - fpu__activate_fpstate_write(fpu); 332 + fpu__prepare_write(fpu); 337 333 fpstate_sanitize_xstate(fpu); 338 334 339 335 if (!boot_cpu_has(X86_FEATURE_FPU)) ··· 373 369 struct fpu *fpu = &tsk->thread.fpu; 374 370 int fpvalid; 375 371 376 - fpvalid = fpu->fpstate_active; 372 + fpvalid = fpu->initialized; 377 373 if (fpvalid) 378 374 fpvalid = !fpregs_get(tsk, NULL, 379 375 0, sizeof(struct user_i387_ia32_struct),

+21 -16

arch/x86/kernel/fpu/signal.c

··· 155 155 */ 156 156 int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) 157 157 { 158 - struct xregs_state *xsave = &current->thread.fpu.state.xsave; 158 + struct fpu *fpu = &current->thread.fpu; 159 + struct xregs_state *xsave = &fpu->state.xsave; 159 160 struct task_struct *tsk = current; 160 161 int ia32_fxstate = (buf != buf_fx); 161 162 ··· 171 170 sizeof(struct user_i387_ia32_struct), NULL, 172 171 (struct _fpstate_32 __user *) buf) ? -1 : 1; 173 172 174 - if (fpregs_active() || using_compacted_format()) { 173 + if (fpu->initialized || using_compacted_format()) { 175 174 /* Save the live register state to the user directly. */ 176 175 if (copy_fpregs_to_sigframe(buf_fx)) 177 176 return -1; 178 177 /* Update the thread's fxstate to save the fsave header. */ 179 178 if (ia32_fxstate) 180 - copy_fxregs_to_kernel(&tsk->thread.fpu); 179 + copy_fxregs_to_kernel(fpu); 181 180 } else { 182 181 /* 183 182 * It is a *bug* if kernel uses compacted-format for xsave ··· 190 189 return -1; 191 190 } 192 191 193 - fpstate_sanitize_xstate(&tsk->thread.fpu); 192 + fpstate_sanitize_xstate(fpu); 194 193 if (__copy_to_user(buf_fx, xsave, fpu_user_xstate_size)) 195 194 return -1; 196 195 } ··· 214 213 struct xstate_header *header = &xsave->header; 215 214 216 215 if (use_xsave()) { 217 - /* These bits must be zero. */ 218 - memset(header->reserved, 0, 48); 216 + /* 217 + * Note: we don't need to zero the reserved bits in the 218 + * xstate_header here because we either didn't copy them at all, 219 + * or we checked earlier that they aren't set. 220 + */ 219 221 220 222 /* 221 223 * Init the state that is not present in the memory ··· 227 223 if (fx_only) 228 224 header->xfeatures = XFEATURE_MASK_FPSSE; 229 225 else 230 - header->xfeatures &= (xfeatures_mask & xfeatures); 226 + header->xfeatures &= xfeatures; 231 227 } 232 228 233 229 if (use_fxsr()) { ··· 283 279 if (!access_ok(VERIFY_READ, buf, size)) 284 280 return -EACCES; 285 281 286 - fpu__activate_curr(fpu); 282 + fpu__initialize(fpu); 287 283 288 284 if (!static_cpu_has(X86_FEATURE_FPU)) 289 285 return fpregs_soft_set(current, NULL, ··· 311 307 /* 312 308 * For 32-bit frames with fxstate, copy the user state to the 313 309 * thread's fpu state, reconstruct fxstate from the fsave 314 - * header. Sanitize the copied state etc. 310 + * header. Validate and sanitize the copied state. 315 311 */ 316 312 struct fpu *fpu = &tsk->thread.fpu; 317 313 struct user_i387_ia32_struct env; 318 314 int err = 0; 319 315 320 316 /* 321 - * Drop the current fpu which clears fpu->fpstate_active. This ensures 317 + * Drop the current fpu which clears fpu->initialized. This ensures 322 318 * that any context-switch during the copy of the new state, 323 319 * avoids the intermediate state from getting restored/saved. 324 320 * Thus avoiding the new restored state from getting corrupted. 325 321 * We will be ready to restore/save the state only after 326 - * fpu->fpstate_active is again set. 322 + * fpu->initialized is again set. 327 323 */ 328 324 fpu__drop(fpu); 329 325 330 326 if (using_compacted_format()) { 331 - err = copyin_to_xsaves(NULL, buf_fx, 332 - &fpu->state.xsave); 327 + err = copy_user_to_xstate(&fpu->state.xsave, buf_fx); 333 328 } else { 334 - err = __copy_from_user(&fpu->state.xsave, 335 - buf_fx, state_size); 329 + err = __copy_from_user(&fpu->state.xsave, buf_fx, state_size); 330 + 331 + if (!err && state_size > offsetof(struct xregs_state, header)) 332 + err = validate_xstate_header(&fpu->state.xsave.header); 336 333 } 337 334 338 335 if (err || __copy_from_user(&env, buf, sizeof(env))) { ··· 344 339 sanitize_restored_xstate(tsk, &env, xfeatures, fx_only); 345 340 } 346 341 347 - fpu->fpstate_active = 1; 342 + fpu->initialized = 1; 348 343 preempt_disable(); 349 344 fpu__restore(fpu); 350 345 preempt_enable();

+222 -60

arch/x86/kernel/fpu/xstate.c

··· 483 483 return boot_cpu_has(X86_FEATURE_XSAVES); 484 484 } 485 485 486 + /* Validate an xstate header supplied by userspace (ptrace or sigreturn) */ 487 + int validate_xstate_header(const struct xstate_header *hdr) 488 + { 489 + /* No unknown or supervisor features may be set */ 490 + if (hdr->xfeatures & (~xfeatures_mask | XFEATURE_MASK_SUPERVISOR)) 491 + return -EINVAL; 492 + 493 + /* Userspace must use the uncompacted format */ 494 + if (hdr->xcomp_bv) 495 + return -EINVAL; 496 + 497 + /* 498 + * If 'reserved' is shrunken to add a new field, make sure to validate 499 + * that new field here! 500 + */ 501 + BUILD_BUG_ON(sizeof(hdr->reserved) != 48); 502 + 503 + /* No reserved bits may be set */ 504 + if (memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved))) 505 + return -EINVAL; 506 + 507 + return 0; 508 + } 509 + 486 510 static void __xstate_dump_leaves(void) 487 511 { 488 512 int i; ··· 891 867 { 892 868 struct fpu *fpu = &current->thread.fpu; 893 869 894 - if (!fpu->fpstate_active) 870 + if (!fpu->initialized) 895 871 return NULL; 896 872 /* 897 873 * fpu__save() takes the CPU's xstate registers ··· 945 921 #endif /* ! CONFIG_ARCH_HAS_PKEYS */ 946 922 947 923 /* 924 + * Weird legacy quirk: SSE and YMM states store information in the 925 + * MXCSR and MXCSR_FLAGS fields of the FP area. That means if the FP 926 + * area is marked as unused in the xfeatures header, we need to copy 927 + * MXCSR and MXCSR_FLAGS if either SSE or YMM are in use. 928 + */ 929 + static inline bool xfeatures_mxcsr_quirk(u64 xfeatures) 930 + { 931 + if (!(xfeatures & (XFEATURE_MASK_SSE|XFEATURE_MASK_YMM))) 932 + return false; 933 + 934 + if (xfeatures & XFEATURE_MASK_FP) 935 + return false; 936 + 937 + return true; 938 + } 939 + 940 + /* 948 941 * This is similar to user_regset_copyout(), but will not add offset to 949 942 * the source data pointer or increment pos, count, kbuf, and ubuf. 950 943 */ 951 - static inline int xstate_copyout(unsigned int pos, unsigned int count, 952 - void *kbuf, void __user *ubuf, 953 - const void *data, const int start_pos, 954 - const int end_pos) 944 + static inline void 945 + __copy_xstate_to_kernel(void *kbuf, const void *data, 946 + unsigned int offset, unsigned int size, unsigned int size_total) 955 947 { 956 - if ((count == 0) || (pos < start_pos)) 957 - return 0; 948 + if (offset < size_total) { 949 + unsigned int copy = min(size, size_total - offset); 958 950 959 - if (end_pos < 0 || pos < end_pos) { 960 - unsigned int copy = (end_pos < 0 ? count : min(count, end_pos - pos)); 961 - 962 - if (kbuf) { 963 - memcpy(kbuf + pos, data, copy); 964 - } else { 965 - if (__copy_to_user(ubuf + pos, data, copy)) 966 - return -EFAULT; 967 - } 951 + memcpy(kbuf + offset, data, copy); 968 952 } 969 - return 0; 970 953 } 971 954 972 955 /* 973 956 * Convert from kernel XSAVES compacted format to standard format and copy 974 - * to a ptrace buffer. It supports partial copy but pos always starts from 975 - * zero. This is called from xstateregs_get() and there we check the CPU 976 - * has XSAVES. 957 + * to a kernel-space ptrace buffer. 958 + * 959 + * It supports partial copy but pos always starts from zero. This is called 960 + * from xstateregs_get() and there we check the CPU has XSAVES. 977 961 */ 978 - int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf, 979 - void __user *ubuf, struct xregs_state *xsave) 962 + int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total) 980 963 { 981 964 unsigned int offset, size; 982 - int ret, i; 983 965 struct xstate_header header; 966 + int i; 984 967 985 968 /* 986 969 * Currently copy_regset_to_user() starts from pos 0: 987 970 */ 988 - if (unlikely(pos != 0)) 971 + if (unlikely(offset_start != 0)) 989 972 return -EFAULT; 990 973 991 974 /* ··· 1008 977 offset = offsetof(struct xregs_state, header); 1009 978 size = sizeof(header); 1010 979 1011 - ret = xstate_copyout(offset, size, kbuf, ubuf, &header, 0, count); 980 + __copy_xstate_to_kernel(kbuf, &header, offset, size, size_total); 1012 981 982 + for (i = 0; i < XFEATURE_MAX; i++) { 983 + /* 984 + * Copy only in-use xstates: 985 + */ 986 + if ((header.xfeatures >> i) & 1) { 987 + void *src = __raw_xsave_addr(xsave, 1 << i); 988 + 989 + offset = xstate_offsets[i]; 990 + size = xstate_sizes[i]; 991 + 992 + /* The next component has to fit fully into the output buffer: */ 993 + if (offset + size > size_total) 994 + break; 995 + 996 + __copy_xstate_to_kernel(kbuf, src, offset, size, size_total); 997 + } 998 + 999 + } 1000 + 1001 + if (xfeatures_mxcsr_quirk(header.xfeatures)) { 1002 + offset = offsetof(struct fxregs_state, mxcsr); 1003 + size = MXCSR_AND_FLAGS_SIZE; 1004 + __copy_xstate_to_kernel(kbuf, &xsave->i387.mxcsr, offset, size, size_total); 1005 + } 1006 + 1007 + /* 1008 + * Fill xsave->i387.sw_reserved value for ptrace frame: 1009 + */ 1010 + offset = offsetof(struct fxregs_state, sw_reserved); 1011 + size = sizeof(xstate_fx_sw_bytes); 1012 + 1013 + __copy_xstate_to_kernel(kbuf, xstate_fx_sw_bytes, offset, size, size_total); 1014 + 1015 + return 0; 1016 + } 1017 + 1018 + static inline int 1019 + __copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int offset, unsigned int size, unsigned int size_total) 1020 + { 1021 + if (!size) 1022 + return 0; 1023 + 1024 + if (offset < size_total) { 1025 + unsigned int copy = min(size, size_total - offset); 1026 + 1027 + if (__copy_to_user(ubuf + offset, data, copy)) 1028 + return -EFAULT; 1029 + } 1030 + return 0; 1031 + } 1032 + 1033 + /* 1034 + * Convert from kernel XSAVES compacted format to standard format and copy 1035 + * to a user-space buffer. It supports partial copy but pos always starts from 1036 + * zero. This is called from xstateregs_get() and there we check the CPU 1037 + * has XSAVES. 1038 + */ 1039 + int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total) 1040 + { 1041 + unsigned int offset, size; 1042 + int ret, i; 1043 + struct xstate_header header; 1044 + 1045 + /* 1046 + * Currently copy_regset_to_user() starts from pos 0: 1047 + */ 1048 + if (unlikely(offset_start != 0)) 1049 + return -EFAULT; 1050 + 1051 + /* 1052 + * The destination is a ptrace buffer; we put in only user xstates: 1053 + */ 1054 + memset(&header, 0, sizeof(header)); 1055 + header.xfeatures = xsave->header.xfeatures; 1056 + header.xfeatures &= ~XFEATURE_MASK_SUPERVISOR; 1057 + 1058 + /* 1059 + * Copy xregs_state->header: 1060 + */ 1061 + offset = offsetof(struct xregs_state, header); 1062 + size = sizeof(header); 1063 + 1064 + ret = __copy_xstate_to_user(ubuf, &header, offset, size, size_total); 1013 1065 if (ret) 1014 1066 return ret; 1015 1067 ··· 1106 992 offset = xstate_offsets[i]; 1107 993 size = xstate_sizes[i]; 1108 994 1109 - ret = xstate_copyout(offset, size, kbuf, ubuf, src, 0, count); 995 + /* The next component has to fit fully into the output buffer: */ 996 + if (offset + size > size_total) 997 + break; 1110 998 999 + ret = __copy_xstate_to_user(ubuf, src, offset, size, size_total); 1111 1000 if (ret) 1112 1001 return ret; 1113 - 1114 - if (offset + size >= count) 1115 - break; 1116 1002 } 1117 1003 1004 + } 1005 + 1006 + if (xfeatures_mxcsr_quirk(header.xfeatures)) { 1007 + offset = offsetof(struct fxregs_state, mxcsr); 1008 + size = MXCSR_AND_FLAGS_SIZE; 1009 + __copy_xstate_to_user(ubuf, &xsave->i387.mxcsr, offset, size, size_total); 1118 1010 } 1119 1011 1120 1012 /* ··· 1129 1009 offset = offsetof(struct fxregs_state, sw_reserved); 1130 1010 size = sizeof(xstate_fx_sw_bytes); 1131 1011 1132 - ret = xstate_copyout(offset, size, kbuf, ubuf, xstate_fx_sw_bytes, 0, count); 1133 - 1012 + ret = __copy_xstate_to_user(ubuf, xstate_fx_sw_bytes, offset, size, size_total); 1134 1013 if (ret) 1135 1014 return ret; 1136 1015 ··· 1137 1018 } 1138 1019 1139 1020 /* 1140 - * Convert from a ptrace standard-format buffer to kernel XSAVES format 1141 - * and copy to the target thread. This is called from xstateregs_set() and 1142 - * there we check the CPU has XSAVES and a whole standard-sized buffer 1143 - * exists. 1021 + * Convert from a ptrace standard-format kernel buffer to kernel XSAVES format 1022 + * and copy to the target thread. This is called from xstateregs_set(). 1144 1023 */ 1145 - int copyin_to_xsaves(const void *kbuf, const void __user *ubuf, 1146 - struct xregs_state *xsave) 1024 + int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf) 1147 1025 { 1148 1026 unsigned int offset, size; 1149 1027 int i; 1150 - u64 xfeatures; 1151 - u64 allowed_features; 1028 + struct xstate_header hdr; 1152 1029 1153 1030 offset = offsetof(struct xregs_state, header); 1154 - size = sizeof(xfeatures); 1031 + size = sizeof(hdr); 1155 1032 1156 - if (kbuf) { 1157 - memcpy(&xfeatures, kbuf + offset, size); 1158 - } else { 1159 - if (__copy_from_user(&xfeatures, ubuf + offset, size)) 1160 - return -EFAULT; 1161 - } 1033 + memcpy(&hdr, kbuf + offset, size); 1162 1034 1163 - /* 1164 - * Reject if the user sets any disabled or supervisor features: 1165 - */ 1166 - allowed_features = xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR; 1167 - 1168 - if (xfeatures & ~allowed_features) 1035 + if (validate_xstate_header(&hdr)) 1169 1036 return -EINVAL; 1170 1037 1171 1038 for (i = 0; i < XFEATURE_MAX; i++) { 1172 1039 u64 mask = ((u64)1 << i); 1173 1040 1174 - if (xfeatures & mask) { 1041 + if (hdr.xfeatures & mask) { 1175 1042 void *dst = __raw_xsave_addr(xsave, 1 << i); 1176 1043 1177 1044 offset = xstate_offsets[i]; 1178 1045 size = xstate_sizes[i]; 1179 1046 1180 - if (kbuf) { 1181 - memcpy(dst, kbuf + offset, size); 1182 - } else { 1183 - if (__copy_from_user(dst, ubuf + offset, size)) 1184 - return -EFAULT; 1185 - } 1047 + memcpy(dst, kbuf + offset, size); 1186 1048 } 1049 + } 1050 + 1051 + if (xfeatures_mxcsr_quirk(hdr.xfeatures)) { 1052 + offset = offsetof(struct fxregs_state, mxcsr); 1053 + size = MXCSR_AND_FLAGS_SIZE; 1054 + memcpy(&xsave->i387.mxcsr, kbuf + offset, size); 1187 1055 } 1188 1056 1189 1057 /* ··· 1182 1076 /* 1183 1077 * Add back in the features that came in from userspace: 1184 1078 */ 1185 - xsave->header.xfeatures |= xfeatures; 1079 + xsave->header.xfeatures |= hdr.xfeatures; 1080 + 1081 + return 0; 1082 + } 1083 + 1084 + /* 1085 + * Convert from a ptrace or sigreturn standard-format user-space buffer to 1086 + * kernel XSAVES format and copy to the target thread. This is called from 1087 + * xstateregs_set(), as well as potentially from the sigreturn() and 1088 + * rt_sigreturn() system calls. 1089 + */ 1090 + int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf) 1091 + { 1092 + unsigned int offset, size; 1093 + int i; 1094 + struct xstate_header hdr; 1095 + 1096 + offset = offsetof(struct xregs_state, header); 1097 + size = sizeof(hdr); 1098 + 1099 + if (__copy_from_user(&hdr, ubuf + offset, size)) 1100 + return -EFAULT; 1101 + 1102 + if (validate_xstate_header(&hdr)) 1103 + return -EINVAL; 1104 + 1105 + for (i = 0; i < XFEATURE_MAX; i++) { 1106 + u64 mask = ((u64)1 << i); 1107 + 1108 + if (hdr.xfeatures & mask) { 1109 + void *dst = __raw_xsave_addr(xsave, 1 << i); 1110 + 1111 + offset = xstate_offsets[i]; 1112 + size = xstate_sizes[i]; 1113 + 1114 + if (__copy_from_user(dst, ubuf + offset, size)) 1115 + return -EFAULT; 1116 + } 1117 + } 1118 + 1119 + if (xfeatures_mxcsr_quirk(hdr.xfeatures)) { 1120 + offset = offsetof(struct fxregs_state, mxcsr); 1121 + size = MXCSR_AND_FLAGS_SIZE; 1122 + if (__copy_from_user(&xsave->i387.mxcsr, ubuf + offset, size)) 1123 + return -EFAULT; 1124 + } 1125 + 1126 + /* 1127 + * The state that came in from userspace was user-state only. 1128 + * Mask all the user states out of 'xfeatures': 1129 + */ 1130 + xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR; 1131 + 1132 + /* 1133 + * Add back in the features that came in from userspace: 1134 + */ 1135 + xsave->header.xfeatures |= hdr.xfeatures; 1186 1136 1187 1137 return 0; 1188 1138 }

+3 -3

arch/x86/kernel/irq_32.c

··· 64 64 65 65 static inline void *current_stack(void) 66 66 { 67 - return (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1)); 67 + return (void *)(current_stack_pointer & ~(THREAD_SIZE - 1)); 68 68 } 69 69 70 70 static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc) ··· 88 88 89 89 /* Save the next esp at the bottom of the stack */ 90 90 prev_esp = (u32 *)irqstk; 91 - *prev_esp = current_stack_pointer(); 91 + *prev_esp = current_stack_pointer; 92 92 93 93 if (unlikely(overflow)) 94 94 call_on_stack(print_stack_overflow, isp); ··· 139 139 140 140 /* Push the previous esp onto the stack */ 141 141 prev_esp = (u32 *)irqstk; 142 - *prev_esp = current_stack_pointer(); 142 + *prev_esp = current_stack_pointer; 143 143 144 144 call_on_stack(__do_softirq, isp); 145 145 }

+1 -1

arch/x86/kernel/ksysfs.c

··· 299 299 return 0; 300 300 301 301 out_clean_nodes: 302 - for (j = i - 1; j > 0; j--) 302 + for (j = i - 1; j >= 0; j--) 303 303 cleanup_setup_data_node(*(kobjp + j)); 304 304 kfree(kobjp); 305 305 out_setup_data_kobj:

+2 -1

arch/x86/kernel/kvm.c

··· 140 140 141 141 n.token = token; 142 142 n.cpu = smp_processor_id(); 143 - n.halted = is_idle_task(current) || preempt_count() > 1; 143 + n.halted = is_idle_task(current) || preempt_count() > 1 || 144 + rcu_preempt_depth(); 144 145 init_swait_queue_head(&n.wq); 145 146 hlist_add_head(&n.link, &b->list); 146 147 raw_spin_unlock(&b->lock);

+3 -3

arch/x86/kernel/signal.c

··· 263 263 sp = (unsigned long) ka->sa.sa_restorer; 264 264 } 265 265 266 - if (fpu->fpstate_active) { 266 + if (fpu->initialized) { 267 267 sp = fpu__alloc_mathframe(sp, IS_ENABLED(CONFIG_X86_32), 268 268 &buf_fx, &math_size); 269 269 *fpstate = (void __user *)sp; ··· 279 279 return (void __user *)-1L; 280 280 281 281 /* save i387 and extended state */ 282 - if (fpu->fpstate_active && 282 + if (fpu->initialized && 283 283 copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size) < 0) 284 284 return (void __user *)-1L; 285 285 ··· 755 755 /* 756 756 * Ensure the signal handler starts with the new fpu state. 757 757 */ 758 - if (fpu->fpstate_active) 758 + if (fpu->initialized) 759 759 fpu__clear(fpu); 760 760 } 761 761 signal_setup_done(failed, ksig, stepping);

+1 -1

arch/x86/kernel/traps.c

··· 142 142 * from double_fault. 143 143 */ 144 144 BUG_ON((unsigned long)(current_top_of_stack() - 145 - current_stack_pointer()) >= THREAD_SIZE); 145 + current_stack_pointer) >= THREAD_SIZE); 146 146 147 147 preempt_enable_no_resched(); 148 148 }

+101 -105

arch/x86/kvm/vmx.c

··· 200 200 int cpu; 201 201 bool launched; 202 202 bool nmi_known_unmasked; 203 + unsigned long vmcs_host_cr3; /* May not match real cr3 */ 204 + unsigned long vmcs_host_cr4; /* May not match real cr4 */ 203 205 struct list_head loaded_vmcss_on_cpu_link; 204 206 }; 205 207 ··· 602 600 int gs_ldt_reload_needed; 603 601 int fs_reload_needed; 604 602 u64 msr_host_bndcfgs; 605 - unsigned long vmcs_host_cr3; /* May not match real cr3 */ 606 - unsigned long vmcs_host_cr4; /* May not match real cr4 */ 607 603 } host_state; 608 604 struct { 609 605 int vm86_active; ··· 2202 2202 struct pi_desc old, new; 2203 2203 unsigned int dest; 2204 2204 2205 - if (!kvm_arch_has_assigned_device(vcpu->kvm) || 2206 - !irq_remapping_cap(IRQ_POSTING_CAP) || 2207 - !kvm_vcpu_apicv_active(vcpu)) 2205 + /* 2206 + * In case of hot-plug or hot-unplug, we may have to undo 2207 + * vmx_vcpu_pi_put even if there is no assigned device. And we 2208 + * always keep PI.NDST up to date for simplicity: it makes the 2209 + * code easier, and CPU migration is not a fast path. 2210 + */ 2211 + if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu) 2208 2212 return; 2209 2213 2214 + /* 2215 + * First handle the simple case where no cmpxchg is necessary; just 2216 + * allow posting non-urgent interrupts. 2217 + * 2218 + * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change 2219 + * PI.NDST: pi_post_block will do it for us and the wakeup_handler 2220 + * expects the VCPU to be on the blocked_vcpu_list that matches 2221 + * PI.NDST. 2222 + */ 2223 + if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR || 2224 + vcpu->cpu == cpu) { 2225 + pi_clear_sn(pi_desc); 2226 + return; 2227 + } 2228 + 2229 + /* The full case. */ 2210 2230 do { 2211 2231 old.control = new.control = pi_desc->control; 2212 2232 2213 - /* 2214 - * If 'nv' field is POSTED_INTR_WAKEUP_VECTOR, there 2215 - * are two possible cases: 2216 - * 1. After running 'pre_block', context switch 2217 - * happened. For this case, 'sn' was set in 2218 - * vmx_vcpu_put(), so we need to clear it here. 2219 - * 2. After running 'pre_block', we were blocked, 2220 - * and woken up by some other guy. For this case, 2221 - * we don't need to do anything, 'pi_post_block' 2222 - * will do everything for us. However, we cannot 2223 - * check whether it is case #1 or case #2 here 2224 - * (maybe, not needed), so we also clear sn here, 2225 - * I think it is not a big deal. 2226 - */ 2227 - if (pi_desc->nv != POSTED_INTR_WAKEUP_VECTOR) { 2228 - if (vcpu->cpu != cpu) { 2229 - dest = cpu_physical_id(cpu); 2233 + dest = cpu_physical_id(cpu); 2230 2234 2231 - if (x2apic_enabled()) 2232 - new.ndst = dest; 2233 - else 2234 - new.ndst = (dest << 8) & 0xFF00; 2235 - } 2235 + if (x2apic_enabled()) 2236 + new.ndst = dest; 2237 + else 2238 + new.ndst = (dest << 8) & 0xFF00; 2236 2239 2237 - /* set 'NV' to 'notification vector' */ 2238 - new.nv = POSTED_INTR_VECTOR; 2239 - } 2240 - 2241 - /* Allow posting non-urgent interrupts */ 2242 2240 new.sn = 0; 2243 - } while (cmpxchg(&pi_desc->control, old.control, 2244 - new.control) != old.control); 2241 + } while (cmpxchg64(&pi_desc->control, old.control, 2242 + new.control) != old.control); 2245 2243 } 2246 2244 2247 2245 static void decache_tsc_multiplier(struct vcpu_vmx *vmx) ··· 5176 5178 */ 5177 5179 cr3 = __read_cr3(); 5178 5180 vmcs_writel(HOST_CR3, cr3); /* 22.2.3 FIXME: shadow tables */ 5179 - vmx->host_state.vmcs_host_cr3 = cr3; 5181 + vmx->loaded_vmcs->vmcs_host_cr3 = cr3; 5180 5182 5181 5183 /* Save the most likely value for this task's CR4 in the VMCS. */ 5182 5184 cr4 = cr4_read_shadow(); 5183 5185 vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */ 5184 - vmx->host_state.vmcs_host_cr4 = cr4; 5186 + vmx->loaded_vmcs->vmcs_host_cr4 = cr4; 5185 5187 5186 5188 vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ 5187 5189 #ifdef CONFIG_X86_64 ··· 9271 9273 vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); 9272 9274 9273 9275 cr3 = __get_current_cr3_fast(); 9274 - if (unlikely(cr3 != vmx->host_state.vmcs_host_cr3)) { 9276 + if (unlikely(cr3 != vmx->loaded_vmcs->vmcs_host_cr3)) { 9275 9277 vmcs_writel(HOST_CR3, cr3); 9276 - vmx->host_state.vmcs_host_cr3 = cr3; 9278 + vmx->loaded_vmcs->vmcs_host_cr3 = cr3; 9277 9279 } 9278 9280 9279 9281 cr4 = cr4_read_shadow(); 9280 - if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) { 9282 + if (unlikely(cr4 != vmx->loaded_vmcs->vmcs_host_cr4)) { 9281 9283 vmcs_writel(HOST_CR4, cr4); 9282 - vmx->host_state.vmcs_host_cr4 = cr4; 9284 + vmx->loaded_vmcs->vmcs_host_cr4 = cr4; 9283 9285 } 9284 9286 9285 9287 /* When single-stepping over STI and MOV SS, we must clear the ··· 9589 9591 9590 9592 vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED; 9591 9593 9594 + /* 9595 + * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR 9596 + * or POSTED_INTR_WAKEUP_VECTOR. 9597 + */ 9598 + vmx->pi_desc.nv = POSTED_INTR_VECTOR; 9599 + vmx->pi_desc.sn = 1; 9600 + 9592 9601 return &vmx->vcpu; 9593 9602 9594 9603 free_vmcs: ··· 9844 9839 9845 9840 WARN_ON(!is_guest_mode(vcpu)); 9846 9841 9847 - if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code)) { 9842 + if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code) && 9843 + !to_vmx(vcpu)->nested.nested_run_pending) { 9848 9844 vmcs12->vm_exit_intr_error_code = fault->error_code; 9849 9845 nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, 9850 9846 PF_VECTOR | INTR_TYPE_HARD_EXCEPTION | ··· 11710 11704 kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask); 11711 11705 } 11712 11706 11707 + static void __pi_post_block(struct kvm_vcpu *vcpu) 11708 + { 11709 + struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); 11710 + struct pi_desc old, new; 11711 + unsigned int dest; 11712 + 11713 + do { 11714 + old.control = new.control = pi_desc->control; 11715 + WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR, 11716 + "Wakeup handler not enabled while the VCPU is blocked\n"); 11717 + 11718 + dest = cpu_physical_id(vcpu->cpu); 11719 + 11720 + if (x2apic_enabled()) 11721 + new.ndst = dest; 11722 + else 11723 + new.ndst = (dest << 8) & 0xFF00; 11724 + 11725 + /* set 'NV' to 'notification vector' */ 11726 + new.nv = POSTED_INTR_VECTOR; 11727 + } while (cmpxchg64(&pi_desc->control, old.control, 11728 + new.control) != old.control); 11729 + 11730 + if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) { 11731 + spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); 11732 + list_del(&vcpu->blocked_vcpu_list); 11733 + spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); 11734 + vcpu->pre_pcpu = -1; 11735 + } 11736 + } 11737 + 11713 11738 /* 11714 11739 * This routine does the following things for vCPU which is going 11715 11740 * to be blocked if VT-d PI is enabled. ··· 11756 11719 */ 11757 11720 static int pi_pre_block(struct kvm_vcpu *vcpu) 11758 11721 { 11759 - unsigned long flags; 11760 11722 unsigned int dest; 11761 11723 struct pi_desc old, new; 11762 11724 struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); ··· 11765 11729 !kvm_vcpu_apicv_active(vcpu)) 11766 11730 return 0; 11767 11731 11768 - vcpu->pre_pcpu = vcpu->cpu; 11769 - spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock, 11770 - vcpu->pre_pcpu), flags); 11771 - list_add_tail(&vcpu->blocked_vcpu_list, 11772 - &per_cpu(blocked_vcpu_on_cpu, 11773 - vcpu->pre_pcpu)); 11774 - spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock, 11775 - vcpu->pre_pcpu), flags); 11732 + WARN_ON(irqs_disabled()); 11733 + local_irq_disable(); 11734 + if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) { 11735 + vcpu->pre_pcpu = vcpu->cpu; 11736 + spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); 11737 + list_add_tail(&vcpu->blocked_vcpu_list, 11738 + &per_cpu(blocked_vcpu_on_cpu, 11739 + vcpu->pre_pcpu)); 11740 + spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); 11741 + } 11776 11742 11777 11743 do { 11778 11744 old.control = new.control = pi_desc->control; 11779 - 11780 - /* 11781 - * We should not block the vCPU if 11782 - * an interrupt is posted for it. 11783 - */ 11784 - if (pi_test_on(pi_desc) == 1) { 11785 - spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock, 11786 - vcpu->pre_pcpu), flags); 11787 - list_del(&vcpu->blocked_vcpu_list); 11788 - spin_unlock_irqrestore( 11789 - &per_cpu(blocked_vcpu_on_cpu_lock, 11790 - vcpu->pre_pcpu), flags); 11791 - vcpu->pre_pcpu = -1; 11792 - 11793 - return 1; 11794 - } 11795 11745 11796 11746 WARN((pi_desc->sn == 1), 11797 11747 "Warning: SN field of posted-interrupts " ··· 11800 11778 11801 11779 /* set 'NV' to 'wakeup vector' */ 11802 11780 new.nv = POSTED_INTR_WAKEUP_VECTOR; 11803 - } while (cmpxchg(&pi_desc->control, old.control, 11804 - new.control) != old.control); 11781 + } while (cmpxchg64(&pi_desc->control, old.control, 11782 + new.control) != old.control); 11805 11783 11806 - return 0; 11784 + /* We should not block the vCPU if an interrupt is posted for it. */ 11785 + if (pi_test_on(pi_desc) == 1) 11786 + __pi_post_block(vcpu); 11787 + 11788 + local_irq_enable(); 11789 + return (vcpu->pre_pcpu == -1); 11807 11790 } 11808 11791 11809 11792 static int vmx_pre_block(struct kvm_vcpu *vcpu) ··· 11824 11797 11825 11798 static void pi_post_block(struct kvm_vcpu *vcpu) 11826 11799 { 11827 - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); 11828 - struct pi_desc old, new; 11829 - unsigned int dest; 11830 - unsigned long flags; 11831 - 11832 - if (!kvm_arch_has_assigned_device(vcpu->kvm) || 11833 - !irq_remapping_cap(IRQ_POSTING_CAP) || 11834 - !kvm_vcpu_apicv_active(vcpu)) 11800 + if (vcpu->pre_pcpu == -1) 11835 11801 return; 11836 11802 11837 - do { 11838 - old.control = new.control = pi_desc->control; 11839 - 11840 - dest = cpu_physical_id(vcpu->cpu); 11841 - 11842 - if (x2apic_enabled()) 11843 - new.ndst = dest; 11844 - else 11845 - new.ndst = (dest << 8) & 0xFF00; 11846 - 11847 - /* Allow posting non-urgent interrupts */ 11848 - new.sn = 0; 11849 - 11850 - /* set 'NV' to 'notification vector' */ 11851 - new.nv = POSTED_INTR_VECTOR; 11852 - } while (cmpxchg(&pi_desc->control, old.control, 11853 - new.control) != old.control); 11854 - 11855 - if(vcpu->pre_pcpu != -1) { 11856 - spin_lock_irqsave( 11857 - &per_cpu(blocked_vcpu_on_cpu_lock, 11858 - vcpu->pre_pcpu), flags); 11859 - list_del(&vcpu->blocked_vcpu_list); 11860 - spin_unlock_irqrestore( 11861 - &per_cpu(blocked_vcpu_on_cpu_lock, 11862 - vcpu->pre_pcpu), flags); 11863 - vcpu->pre_pcpu = -1; 11864 - } 11803 + WARN_ON(irqs_disabled()); 11804 + local_irq_disable(); 11805 + __pi_post_block(vcpu); 11806 + local_irq_enable(); 11865 11807 } 11866 11808 11867 11809 static void vmx_post_block(struct kvm_vcpu *vcpu)

+1 -1

arch/x86/kvm/x86.c

··· 7225 7225 int r; 7226 7226 sigset_t sigsaved; 7227 7227 7228 - fpu__activate_curr(fpu); 7228 + fpu__initialize(fpu); 7229 7229 7230 7230 if (vcpu->sigset_active) 7231 7231 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);

+1 -1

arch/x86/math-emu/fpu_entry.c

··· 114 114 struct desc_struct code_descriptor; 115 115 struct fpu *fpu = &current->thread.fpu; 116 116 117 - fpu__activate_curr(fpu); 117 + fpu__initialize(fpu); 118 118 119 119 #ifdef RE_ENTRANT_CHECKING 120 120 if (emulating) {

+24

arch/x86/mm/extable.c

··· 2 2 #include <linux/uaccess.h> 3 3 #include <linux/sched/debug.h> 4 4 5 + #include <asm/fpu/internal.h> 5 6 #include <asm/traps.h> 6 7 #include <asm/kdebug.h> 7 8 ··· 78 77 return true; 79 78 } 80 79 EXPORT_SYMBOL_GPL(ex_handler_refcount); 80 + 81 + /* 82 + * Handler for when we fail to restore a task's FPU state. We should never get 83 + * here because the FPU state of a task using the FPU (task->thread.fpu.state) 84 + * should always be valid. However, past bugs have allowed userspace to set 85 + * reserved bits in the XSAVE area using PTRACE_SETREGSET or sys_rt_sigreturn(). 86 + * These caused XRSTOR to fail when switching to the task, leaking the FPU 87 + * registers of the task previously executing on the CPU. Mitigate this class 88 + * of vulnerability by restoring from the initial state (essentially, zeroing 89 + * out all the FPU registers) if we can't restore from the task's FPU state. 90 + */ 91 + bool ex_handler_fprestore(const struct exception_table_entry *fixup, 92 + struct pt_regs *regs, int trapnr) 93 + { 94 + regs->ip = ex_fixup_addr(fixup); 95 + 96 + WARN_ONCE(1, "Bad FPU state detected at %pB, reinitializing FPU registers.", 97 + (void *)instruction_pointer(regs)); 98 + 99 + __copy_kernel_to_fpregs(&init_fpstate, -1); 100 + return true; 101 + } 102 + EXPORT_SYMBOL_GPL(ex_handler_fprestore); 81 103 82 104 bool ex_handler_ext(const struct exception_table_entry *fixup, 83 105 struct pt_regs *regs, int trapnr)

+24 -23

arch/x86/mm/fault.c

··· 192 192 * 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really 193 193 * faulted on a pte with its pkey=4. 194 194 */ 195 - static void fill_sig_info_pkey(int si_code, siginfo_t *info, 196 - struct vm_area_struct *vma) 195 + static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey) 197 196 { 198 197 /* This is effectively an #ifdef */ 199 198 if (!boot_cpu_has(X86_FEATURE_OSPKE)) ··· 208 209 * valid VMA, so we should never reach this without a 209 210 * valid VMA. 210 211 */ 211 - if (!vma) { 212 + if (!pkey) { 212 213 WARN_ONCE(1, "PKU fault with no VMA passed in"); 213 214 info->si_pkey = 0; 214 215 return; ··· 218 219 * absolutely guranteed to be 100% accurate because of 219 220 * the race explained above. 220 221 */ 221 - info->si_pkey = vma_pkey(vma); 222 + info->si_pkey = *pkey; 222 223 } 223 224 224 225 static void 225 226 force_sig_info_fault(int si_signo, int si_code, unsigned long address, 226 - struct task_struct *tsk, struct vm_area_struct *vma, 227 - int fault) 227 + struct task_struct *tsk, u32 *pkey, int fault) 228 228 { 229 229 unsigned lsb = 0; 230 230 siginfo_t info; ··· 238 240 lsb = PAGE_SHIFT; 239 241 info.si_addr_lsb = lsb; 240 242 241 - fill_sig_info_pkey(si_code, &info, vma); 243 + fill_sig_info_pkey(si_code, &info, pkey); 242 244 243 245 force_sig_info(si_signo, &info, tsk); 244 246 } ··· 760 762 struct task_struct *tsk = current; 761 763 unsigned long flags; 762 764 int sig; 763 - /* No context means no VMA to pass down */ 764 - struct vm_area_struct *vma = NULL; 765 765 766 766 /* Are we prepared to handle this kernel fault? */ 767 767 if (fixup_exception(regs, X86_TRAP_PF)) { ··· 784 788 785 789 /* XXX: hwpoison faults will set the wrong code. */ 786 790 force_sig_info_fault(signal, si_code, address, 787 - tsk, vma, 0); 791 + tsk, NULL, 0); 788 792 } 789 793 790 794 /* ··· 892 896 893 897 static void 894 898 __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, 895 - unsigned long address, struct vm_area_struct *vma, 896 - int si_code) 899 + unsigned long address, u32 *pkey, int si_code) 897 900 { 898 901 struct task_struct *tsk = current; 899 902 ··· 940 945 tsk->thread.error_code = error_code; 941 946 tsk->thread.trap_nr = X86_TRAP_PF; 942 947 943 - force_sig_info_fault(SIGSEGV, si_code, address, tsk, vma, 0); 948 + force_sig_info_fault(SIGSEGV, si_code, address, tsk, pkey, 0); 944 949 945 950 return; 946 951 } ··· 953 958 954 959 static noinline void 955 960 bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, 956 - unsigned long address, struct vm_area_struct *vma) 961 + unsigned long address, u32 *pkey) 957 962 { 958 - __bad_area_nosemaphore(regs, error_code, address, vma, SEGV_MAPERR); 963 + __bad_area_nosemaphore(regs, error_code, address, pkey, SEGV_MAPERR); 959 964 } 960 965 961 966 static void ··· 963 968 unsigned long address, struct vm_area_struct *vma, int si_code) 964 969 { 965 970 struct mm_struct *mm = current->mm; 971 + u32 pkey; 972 + 973 + if (vma) 974 + pkey = vma_pkey(vma); 966 975 967 976 /* 968 977 * Something tried to access memory that isn't in our memory map.. ··· 974 975 */ 975 976 up_read(&mm->mmap_sem); 976 977 977 - __bad_area_nosemaphore(regs, error_code, address, vma, si_code); 978 + __bad_area_nosemaphore(regs, error_code, address, 979 + (vma) ? &pkey : NULL, si_code); 978 980 } 979 981 980 982 static noinline void ··· 1018 1018 1019 1019 static void 1020 1020 do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, 1021 - struct vm_area_struct *vma, unsigned int fault) 1021 + u32 *pkey, unsigned int fault) 1022 1022 { 1023 1023 struct task_struct *tsk = current; 1024 1024 int code = BUS_ADRERR; ··· 1045 1045 code = BUS_MCEERR_AR; 1046 1046 } 1047 1047 #endif 1048 - force_sig_info_fault(SIGBUS, code, address, tsk, vma, fault); 1048 + force_sig_info_fault(SIGBUS, code, address, tsk, pkey, fault); 1049 1049 } 1050 1050 1051 1051 static noinline void 1052 1052 mm_fault_error(struct pt_regs *regs, unsigned long error_code, 1053 - unsigned long address, struct vm_area_struct *vma, 1054 - unsigned int fault) 1053 + unsigned long address, u32 *pkey, unsigned int fault) 1055 1054 { 1056 1055 if (fatal_signal_pending(current) && !(error_code & PF_USER)) { 1057 1056 no_context(regs, error_code, address, 0, 0); ··· 1074 1075 } else { 1075 1076 if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON| 1076 1077 VM_FAULT_HWPOISON_LARGE)) 1077 - do_sigbus(regs, error_code, address, vma, fault); 1078 + do_sigbus(regs, error_code, address, pkey, fault); 1078 1079 else if (fault & VM_FAULT_SIGSEGV) 1079 - bad_area_nosemaphore(regs, error_code, address, vma); 1080 + bad_area_nosemaphore(regs, error_code, address, pkey); 1080 1081 else 1081 1082 BUG(); 1082 1083 } ··· 1266 1267 struct mm_struct *mm; 1267 1268 int fault, major = 0; 1268 1269 unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; 1270 + u32 pkey; 1269 1271 1270 1272 tsk = current; 1271 1273 mm = tsk->mm; ··· 1467 1467 return; 1468 1468 } 1469 1469 1470 + pkey = vma_pkey(vma); 1470 1471 up_read(&mm->mmap_sem); 1471 1472 if (unlikely(fault & VM_FAULT_ERROR)) { 1472 - mm_fault_error(regs, error_code, address, vma, fault); 1473 + mm_fault_error(regs, error_code, address, &pkey, fault); 1473 1474 return; 1474 1475 } 1475 1476

+2

arch/x86/mm/mem_encrypt.c

··· 10 10 * published by the Free Software Foundation. 11 11 */ 12 12 13 + #define DISABLE_BRANCH_PROFILING 14 + 13 15 #include <linux/linkage.h> 14 16 #include <linux/init.h> 15 17 #include <linux/mm.h>

+1 -2

arch/x86/mm/pkeys.c

··· 18 18 19 19 #include <asm/cpufeature.h> /* boot_cpu_has, ... */ 20 20 #include <asm/mmu_context.h> /* vma_pkey() */ 21 - #include <asm/fpu/internal.h> /* fpregs_active() */ 22 21 23 22 int __execute_only_pkey(struct mm_struct *mm) 24 23 { ··· 44 45 */ 45 46 preempt_disable(); 46 47 if (!need_to_set_mm_pkey && 47 - fpregs_active() && 48 + current->thread.fpu.initialized && 48 49 !__pkru_allows_read(read_pkru(), execute_only_pkey)) { 49 50 preempt_enable(); 50 51 return execute_only_pkey;

+1 -1

arch/x86/mm/tlb.c

··· 191 191 * mapped in the new pgd, we'll double-fault. Forcibly 192 192 * map it. 193 193 */ 194 - unsigned int index = pgd_index(current_stack_pointer()); 194 + unsigned int index = pgd_index(current_stack_pointer); 195 195 pgd_t *pgd = next->pgd + index; 196 196 197 197 if (unlikely(pgd_none(*pgd)))

+4 -9

arch/x86/xen/mmu_pv.c

··· 1238 1238 * from _brk_limit way up to the max_pfn_mapped (which is the end of 1239 1239 * the ramdisk). We continue on, erasing PMD entries that point to page 1240 1240 * tables - do note that they are accessible at this stage via __va. 1241 - * For good measure we also round up to the PMD - which means that if 1241 + * As Xen is aligning the memory end to a 4MB boundary, for good 1242 + * measure we also round up to PMD_SIZE * 2 - which means that if 1242 1243 * anybody is using __ka address to the initial boot-stack - and try 1243 1244 * to use it - they are going to crash. The xen_start_info has been 1244 1245 * taken care of already in xen_setup_kernel_pagetable. */ 1245 1246 addr = xen_start_info->pt_base; 1246 - size = roundup(xen_start_info->nr_pt_frames * PAGE_SIZE, PMD_SIZE); 1247 + size = xen_start_info->nr_pt_frames * PAGE_SIZE; 1247 1248 1248 - xen_cleanhighmap(addr, addr + size); 1249 + xen_cleanhighmap(addr, roundup(addr + size, PMD_SIZE * 2)); 1249 1250 xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base)); 1250 - #ifdef DEBUG 1251 - /* This is superfluous and is not necessary, but you know what 1252 - * lets do it. The MODULES_VADDR -> MODULES_END should be clear of 1253 - * anything at this stage. */ 1254 - xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1); 1255 - #endif 1256 1251 } 1257 1252 #endif 1258 1253

+3

block/blk-core.c

··· 854 854 855 855 kobject_init(&q->kobj, &blk_queue_ktype); 856 856 857 + #ifdef CONFIG_BLK_DEV_IO_TRACE 858 + mutex_init(&q->blk_trace_mutex); 859 + #endif 857 860 mutex_init(&q->sysfs_lock); 858 861 spin_lock_init(&q->__queue_lock); 859 862

-1

block/bsg-lib.c

··· 154 154 failjob_rls_rqst_payload: 155 155 kfree(job->request_payload.sg_list); 156 156 failjob_rls_job: 157 - kfree(job); 158 157 return -ENOMEM; 159 158 } 160 159

+1 -1

block/partition-generic.c

··· 112 112 struct device_attribute *attr, char *buf) 113 113 { 114 114 struct hd_struct *p = dev_to_part(dev); 115 - struct request_queue *q = dev_to_disk(dev)->queue; 115 + struct request_queue *q = part_to_disk(p)->queue; 116 116 unsigned int inflight[2]; 117 117 int cpu; 118 118

+9 -7

drivers/acpi/apei/ghes.c

··· 743 743 } 744 744 ghes_do_proc(ghes, ghes->estatus); 745 745 746 + out: 747 + ghes_clear_estatus(ghes); 748 + 749 + if (rc == -ENOENT) 750 + return rc; 751 + 746 752 /* 747 753 * GHESv2 type HEST entries introduce support for error acknowledgment, 748 754 * so only acknowledge the error if this support is present. 749 755 */ 750 - if (is_hest_type_generic_v2(ghes)) { 751 - rc = ghes_ack_error(ghes->generic_v2); 752 - if (rc) 753 - return rc; 754 - } 755 - out: 756 - ghes_clear_estatus(ghes); 756 + if (is_hest_type_generic_v2(ghes)) 757 + return ghes_ack_error(ghes->generic_v2); 758 + 757 759 return rc; 758 760 } 759 761

+7

drivers/base/power/opp/core.c

··· 1581 1581 1582 1582 opp->available = availability_req; 1583 1583 1584 + dev_pm_opp_get(opp); 1585 + mutex_unlock(&opp_table->lock); 1586 + 1584 1587 /* Notify the change of the OPP availability */ 1585 1588 if (availability_req) 1586 1589 blocking_notifier_call_chain(&opp_table->head, OPP_EVENT_ENABLE, ··· 1592 1589 blocking_notifier_call_chain(&opp_table->head, 1593 1590 OPP_EVENT_DISABLE, opp); 1594 1591 1592 + dev_pm_opp_put(opp); 1593 + goto put_table; 1594 + 1595 1595 unlock: 1596 1596 mutex_unlock(&opp_table->lock); 1597 + put_table: 1597 1598 dev_pm_opp_put_opp_table(opp_table); 1598 1599 return r; 1599 1600 }

+1 -1

drivers/block/brd.c

··· 342 342 343 343 if (!brd) 344 344 return -ENODEV; 345 - page = brd_insert_page(brd, PFN_PHYS(pgoff) / 512); 345 + page = brd_insert_page(brd, (sector_t)pgoff << PAGE_SECTORS_SHIFT); 346 346 if (!page) 347 347 return -ENOSPC; 348 348 *kaddr = page_address(page);

+2 -4

drivers/block/loop.h

··· 67 67 struct loop_cmd { 68 68 struct kthread_work work; 69 69 struct request *rq; 70 - union { 71 - bool use_aio; /* use AIO interface to handle I/O */ 72 - atomic_t ref; /* only for aio */ 73 - }; 70 + bool use_aio; /* use AIO interface to handle I/O */ 71 + atomic_t ref; /* only for aio */ 74 72 long ret; 75 73 struct kiocb iocb; 76 74 struct bio_vec *bvec;

+6

drivers/block/nbd.c

··· 1194 1194 if (!capable(CAP_SYS_ADMIN)) 1195 1195 return -EPERM; 1196 1196 1197 + /* The block layer will pass back some non-nbd ioctls in case we have 1198 + * special handling for them, but we don't so just return an error. 1199 + */ 1200 + if (_IOC_TYPE(cmd) != 0xab) 1201 + return -EINVAL; 1202 + 1197 1203 mutex_lock(&nbd->config_lock); 1198 1204 1199 1205 /* Don't allow ioctl operations on a nbd device that was created with

+1 -1

drivers/clocksource/numachip.c

··· 43 43 return 0; 44 44 } 45 45 46 - static struct clock_event_device numachip2_clockevent = { 46 + static const struct clock_event_device numachip2_clockevent __initconst = { 47 47 .name = "numachip2", 48 48 .rating = 400, 49 49 .set_next_event = numachip2_set_next_event,

+4

drivers/cpufreq/cpufreq-dt-platdev.c

··· 118 118 119 119 { .compatible = "sigma,tango4", }, 120 120 121 + { .compatible = "ti,am33xx", }, 122 + { .compatible = "ti,am43", }, 123 + { .compatible = "ti,dra7", }, 124 + 121 125 { } 122 126 }; 123 127

+188 -1

drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c

··· 636 636 NUM_BANKS(ADDR_SURF_2_BANK); 637 637 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 638 638 WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]); 639 - } else if (adev->asic_type == CHIP_OLAND || adev->asic_type == CHIP_HAINAN) { 639 + } else if (adev->asic_type == CHIP_OLAND) { 640 + tilemode[0] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | 641 + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 642 + PIPE_CONFIG(ADDR_SURF_P4_8x16) | 643 + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 644 + NUM_BANKS(ADDR_SURF_16_BANK) | 645 + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 646 + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 647 + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4); 648 + tilemode[1] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | 649 + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 650 + PIPE_CONFIG(ADDR_SURF_P4_8x16) | 651 + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 652 + NUM_BANKS(ADDR_SURF_16_BANK) | 653 + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 654 + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 655 + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4); 656 + tilemode[2] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | 657 + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 658 + PIPE_CONFIG(ADDR_SURF_P4_8x16) | 659 + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 660 + NUM_BANKS(ADDR_SURF_16_BANK) | 661 + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 662 + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 663 + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4); 664 + tilemode[3] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | 665 + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 666 + PIPE_CONFIG(ADDR_SURF_P4_8x16) | 667 + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | 668 + NUM_BANKS(ADDR_SURF_16_BANK) | 669 + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 670 + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 671 + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4); 672 + tilemode[4] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | 673 + ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 674 + PIPE_CONFIG(ADDR_SURF_P4_8x16) | 675 + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 676 + NUM_BANKS(ADDR_SURF_16_BANK) | 677 + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 678 + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 679 + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); 680 + tilemode[5] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | 681 + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 682 + PIPE_CONFIG(ADDR_SURF_P4_8x16) | 683 + TILE_SPLIT(split_equal_to_row_size) | 684 + NUM_BANKS(ADDR_SURF_16_BANK) | 685 + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 686 + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 687 + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); 688 + tilemode[6] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | 689 + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 690 + PIPE_CONFIG(ADDR_SURF_P4_8x16) | 691 + TILE_SPLIT(split_equal_to_row_size) | 692 + NUM_BANKS(ADDR_SURF_16_BANK) | 693 + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 694 + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 695 + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); 696 + tilemode[7] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | 697 + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 698 + PIPE_CONFIG(ADDR_SURF_P4_8x16) | 699 + TILE_SPLIT(split_equal_to_row_size) | 700 + NUM_BANKS(ADDR_SURF_16_BANK) | 701 + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 702 + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 703 + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4); 704 + tilemode[8] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | 705 + ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | 706 + PIPE_CONFIG(ADDR_SURF_P4_8x16) | 707 + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 708 + NUM_BANKS(ADDR_SURF_16_BANK) | 709 + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 710 + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 711 + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); 712 + tilemode[9] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | 713 + ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 714 + PIPE_CONFIG(ADDR_SURF_P4_8x16) | 715 + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 716 + NUM_BANKS(ADDR_SURF_16_BANK) | 717 + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 718 + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 719 + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); 720 + tilemode[10] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | 721 + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 722 + PIPE_CONFIG(ADDR_SURF_P4_8x16) | 723 + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 724 + NUM_BANKS(ADDR_SURF_16_BANK) | 725 + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 726 + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 727 + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4); 728 + tilemode[11] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | 729 + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 730 + PIPE_CONFIG(ADDR_SURF_P4_8x16) | 731 + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 732 + NUM_BANKS(ADDR_SURF_16_BANK) | 733 + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 734 + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 735 + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); 736 + tilemode[12] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | 737 + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 738 + PIPE_CONFIG(ADDR_SURF_P4_8x16) | 739 + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 740 + NUM_BANKS(ADDR_SURF_16_BANK) | 741 + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 742 + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 743 + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); 744 + tilemode[13] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 745 + ARRAY_MODE(ARRAY_1D_TILED_THIN1) | 746 + PIPE_CONFIG(ADDR_SURF_P4_8x16) | 747 + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | 748 + NUM_BANKS(ADDR_SURF_16_BANK) | 749 + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 750 + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 751 + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); 752 + tilemode[14] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 753 + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 754 + PIPE_CONFIG(ADDR_SURF_P4_8x16) | 755 + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 756 + NUM_BANKS(ADDR_SURF_16_BANK) | 757 + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 758 + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 759 + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); 760 + tilemode[15] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 761 + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 762 + PIPE_CONFIG(ADDR_SURF_P4_8x16) | 763 + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 764 + NUM_BANKS(ADDR_SURF_16_BANK) | 765 + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 766 + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 767 + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); 768 + tilemode[16] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 769 + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 770 + PIPE_CONFIG(ADDR_SURF_P4_8x16) | 771 + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 772 + NUM_BANKS(ADDR_SURF_16_BANK) | 773 + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 774 + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 775 + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); 776 + tilemode[17] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 777 + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 778 + PIPE_CONFIG(ADDR_SURF_P4_8x16) | 779 + TILE_SPLIT(split_equal_to_row_size) | 780 + NUM_BANKS(ADDR_SURF_16_BANK) | 781 + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 782 + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 783 + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); 784 + tilemode[21] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 785 + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 786 + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 787 + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 788 + NUM_BANKS(ADDR_SURF_16_BANK) | 789 + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | 790 + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 791 + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); 792 + tilemode[22] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 793 + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 794 + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 795 + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 796 + NUM_BANKS(ADDR_SURF_16_BANK) | 797 + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 798 + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | 799 + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4); 800 + tilemode[23] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 801 + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 802 + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 803 + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | 804 + NUM_BANKS(ADDR_SURF_16_BANK) | 805 + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 806 + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | 807 + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); 808 + tilemode[24] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 809 + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 810 + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 811 + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | 812 + NUM_BANKS(ADDR_SURF_16_BANK) | 813 + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 814 + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 815 + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2); 816 + tilemode[25] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | 817 + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 818 + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | 819 + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | 820 + NUM_BANKS(ADDR_SURF_8_BANK) | 821 + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | 822 + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | 823 + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1); 824 + for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) 825 + WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]); 826 + } else if (adev->asic_type == CHIP_HAINAN) { 640 827 tilemode[0] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | 641 828 ARRAY_MODE(ARRAY_2D_TILED_THIN1) | 642 829 PIPE_CONFIG(ADDR_SURF_P2) |

+2

drivers/gpu/drm/amd/amdkfd/kfd_chardev.c

··· 892 892 int err = 0; 893 893 894 894 dev = kfd_device_by_id(args->gpu_id); 895 + if (!dev) 896 + return -EINVAL; 895 897 896 898 dev->kfd2kgd->get_tile_config(dev->kgd, &config); 897 899

+4 -1

drivers/gpu/drm/amd/amdkfd/kfd_events.c

··· 292 292 struct kfd_event *ev) 293 293 { 294 294 if (p->signal_event_count == KFD_SIGNAL_EVENT_LIMIT) { 295 - pr_warn("Signal event wasn't created because limit was reached\n"); 295 + if (!p->signal_event_limit_reached) { 296 + pr_warn("Signal event wasn't created because limit was reached\n"); 297 + p->signal_event_limit_reached = true; 298 + } 296 299 return -ENOMEM; 297 300 } 298 301

+16 -4

drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c

··· 184 184 if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ) 185 185 kq->mqd->destroy_mqd(kq->mqd, 186 186 kq->queue->mqd, 187 - false, 187 + KFD_PREEMPT_TYPE_WAVEFRONT_RESET, 188 188 QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS, 189 189 kq->queue->pipe, 190 190 kq->queue->queue); ··· 210 210 uint32_t wptr, rptr; 211 211 unsigned int *queue_address; 212 212 213 + /* When rptr == wptr, the buffer is empty. 214 + * When rptr == wptr + 1, the buffer is full. 215 + * It is always rptr that advances to the position of wptr, rather than 216 + * the opposite. So we can only use up to queue_size_dwords - 1 dwords. 217 + */ 213 218 rptr = *kq->rptr_kernel; 214 219 wptr = *kq->wptr_kernel; 215 220 queue_address = (unsigned int *)kq->pq_kernel_addr; ··· 224 219 pr_debug("wptr: %d\n", wptr); 225 220 pr_debug("queue_address 0x%p\n", queue_address); 226 221 227 - available_size = (rptr - 1 - wptr + queue_size_dwords) % 222 + available_size = (rptr + queue_size_dwords - 1 - wptr) % 228 223 queue_size_dwords; 229 224 230 - if (packet_size_in_dwords >= queue_size_dwords || 231 - packet_size_in_dwords >= available_size) { 225 + if (packet_size_in_dwords > available_size) { 232 226 /* 233 227 * make sure calling functions know 234 228 * acquire_packet_buffer() failed ··· 237 233 } 238 234 239 235 if (wptr + packet_size_in_dwords >= queue_size_dwords) { 236 + /* make sure after rolling back to position 0, there is 237 + * still enough space. 238 + */ 239 + if (packet_size_in_dwords >= rptr) { 240 + *buffer_ptr = NULL; 241 + return -ENOMEM; 242 + } 243 + /* fill nops, roll back and start at position 0 */ 240 244 while (wptr > 0) { 241 245 queue_address[wptr] = kq->nop_packet; 242 246 wptr = (wptr + 1) % queue_size_dwords;

+1

drivers/gpu/drm/amd/amdkfd/kfd_priv.h

··· 521 521 struct list_head signal_event_pages; 522 522 u32 next_nonsignal_event_id; 523 523 size_t signal_event_count; 524 + bool signal_event_limit_reached; 524 525 }; 525 526 526 527 /**

+3

drivers/gpu/drm/etnaviv/etnaviv_gem.c

··· 551 551 void etnaviv_gem_free_object(struct drm_gem_object *obj) 552 552 { 553 553 struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj); 554 + struct etnaviv_drm_private *priv = obj->dev->dev_private; 554 555 struct etnaviv_vram_mapping *mapping, *tmp; 555 556 556 557 /* object should not be active */ 557 558 WARN_ON(is_active(etnaviv_obj)); 558 559 560 + mutex_lock(&priv->gem_lock); 559 561 list_del(&etnaviv_obj->gem_node); 562 + mutex_unlock(&priv->gem_lock); 560 563 561 564 list_for_each_entry_safe(mapping, tmp, &etnaviv_obj->vram_list, 562 565 obj_node) {

+4 -2

drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c

··· 445 445 cmdbuf->user_size = ALIGN(args->stream_size, 8); 446 446 447 447 ret = etnaviv_gpu_submit(gpu, submit, cmdbuf); 448 - if (ret == 0) 449 - cmdbuf = NULL; 448 + if (ret) 449 + goto out; 450 + 451 + cmdbuf = NULL; 450 452 451 453 if (args->flags & ETNA_SUBMIT_FENCE_FD_OUT) { 452 454 /*

+24 -19

drivers/gpu/drm/qxl/qxl_display.c

··· 509 509 .y2 = qfb->base.height 510 510 }; 511 511 512 - if (!old_state->fb) { 513 - qxl_io_log(qdev, 514 - "create primary fb: %dx%d,%d,%d\n", 515 - bo->surf.width, bo->surf.height, 516 - bo->surf.stride, bo->surf.format); 517 - 518 - qxl_io_create_primary(qdev, 0, bo); 519 - bo->is_primary = true; 520 - return; 521 - 522 - } else { 512 + if (old_state->fb) { 523 513 qfb_old = to_qxl_framebuffer(old_state->fb); 524 514 bo_old = gem_to_qxl_bo(qfb_old->obj); 515 + } else { 516 + bo_old = NULL; 517 + } 518 + 519 + if (bo == bo_old) 520 + return; 521 + 522 + if (bo_old && bo_old->is_primary) { 523 + qxl_io_destroy_primary(qdev); 525 524 bo_old->is_primary = false; 526 525 } 527 526 528 - bo->is_primary = true; 527 + if (!bo->is_primary) { 528 + qxl_io_create_primary(qdev, 0, bo); 529 + bo->is_primary = true; 530 + } 529 531 qxl_draw_dirty_fb(qdev, qfb, bo, 0, 0, &norect, 1, 1); 530 532 } 531 533 ··· 536 534 { 537 535 struct qxl_device *qdev = plane->dev->dev_private; 538 536 539 - if (old_state->fb) 540 - { struct qxl_framebuffer *qfb = 537 + if (old_state->fb) { 538 + struct qxl_framebuffer *qfb = 541 539 to_qxl_framebuffer(old_state->fb); 542 540 struct qxl_bo *bo = gem_to_qxl_bo(qfb->obj); 543 541 544 - qxl_io_destroy_primary(qdev); 545 - bo->is_primary = false; 542 + if (bo->is_primary) { 543 + qxl_io_destroy_primary(qdev); 544 + bo->is_primary = false; 545 + } 546 546 } 547 547 } 548 548 ··· 702 698 struct drm_gem_object *obj; 703 699 struct qxl_bo *user_bo; 704 700 705 - if (!plane->state->fb) { 706 - /* we never executed prepare_fb, so there's nothing to 701 + if (!old_state->fb) { 702 + /* 703 + * we never executed prepare_fb, so there's nothing to 707 704 * unpin. 708 705 */ 709 706 return; 710 707 } 711 708 712 - obj = to_qxl_framebuffer(plane->state->fb)->obj; 709 + obj = to_qxl_framebuffer(old_state->fb)->obj; 713 710 user_bo = gem_to_qxl_bo(obj); 714 711 qxl_bo_unpin(user_bo); 715 712 }

+1 -1

drivers/gpu/drm/radeon/radeon_device.c

··· 1663 1663 radeon_agp_suspend(rdev); 1664 1664 1665 1665 pci_save_state(dev->pdev); 1666 - if (freeze && rdev->family >= CHIP_CEDAR) { 1666 + if (freeze && rdev->family >= CHIP_CEDAR && !(rdev->flags & RADEON_IS_IGP)) { 1667 1667 rdev->asic->asic_reset(rdev, true); 1668 1668 pci_restore_state(dev->pdev); 1669 1669 } else if (suspend) {

+1 -1

drivers/gpu/drm/sun4i/Kconfig

··· 26 26 bool "Allwinner A10 HDMI CEC Support" 27 27 depends on DRM_SUN4I_HDMI 28 28 select CEC_CORE 29 - depends on CEC_PIN 29 + select CEC_PIN 30 30 help 31 31 Choose this option if you have an Allwinner SoC with an HDMI 32 32 controller and want to use CEC.

+1 -1

drivers/gpu/drm/sun4i/sun4i_hdmi.h

··· 15 15 #include <drm/drm_connector.h> 16 16 #include <drm/drm_encoder.h> 17 17 18 - #include <media/cec.h> 18 + #include <media/cec-pin.h> 19 19 20 20 #define SUN4I_HDMI_CTRL_REG 0x004 21 21 #define SUN4I_HDMI_CTRL_ENABLE BIT(31)

+1 -1

drivers/gpu/drm/tegra/trace.h

··· 63 63 64 64 /* This part must be outside protection */ 65 65 #undef TRACE_INCLUDE_PATH 66 - #define TRACE_INCLUDE_PATH . 66 + #define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/tegra 67 67 #define TRACE_INCLUDE_FILE trace 68 68 #include <trace/define_trace.h>

+3 -1

drivers/infiniband/core/security.c

··· 432 432 atomic_set(&qp->qp_sec->error_list_count, 0); 433 433 init_completion(&qp->qp_sec->error_complete); 434 434 ret = security_ib_alloc_security(&qp->qp_sec->security); 435 - if (ret) 435 + if (ret) { 436 436 kfree(qp->qp_sec); 437 + qp->qp_sec = NULL; 438 + } 437 439 438 440 return ret; 439 441 }

+7 -7

drivers/infiniband/core/uverbs_cmd.c

··· 3869 3869 resp.raw_packet_caps = attr.raw_packet_caps; 3870 3870 resp.response_length += sizeof(resp.raw_packet_caps); 3871 3871 3872 - if (ucore->outlen < resp.response_length + sizeof(resp.xrq_caps)) 3872 + if (ucore->outlen < resp.response_length + sizeof(resp.tm_caps)) 3873 3873 goto end; 3874 3874 3875 - resp.xrq_caps.max_rndv_hdr_size = attr.xrq_caps.max_rndv_hdr_size; 3876 - resp.xrq_caps.max_num_tags = attr.xrq_caps.max_num_tags; 3877 - resp.xrq_caps.max_ops = attr.xrq_caps.max_ops; 3878 - resp.xrq_caps.max_sge = attr.xrq_caps.max_sge; 3879 - resp.xrq_caps.flags = attr.xrq_caps.flags; 3880 - resp.response_length += sizeof(resp.xrq_caps); 3875 + resp.tm_caps.max_rndv_hdr_size = attr.tm_caps.max_rndv_hdr_size; 3876 + resp.tm_caps.max_num_tags = attr.tm_caps.max_num_tags; 3877 + resp.tm_caps.max_ops = attr.tm_caps.max_ops; 3878 + resp.tm_caps.max_sge = attr.tm_caps.max_sge; 3879 + resp.tm_caps.flags = attr.tm_caps.flags; 3880 + resp.response_length += sizeof(resp.tm_caps); 3881 3881 end: 3882 3882 err = ib_copy_to_udata(ucore, &resp, resp.response_length); 3883 3883 return err;

+78 -23

drivers/infiniband/hw/hfi1/chip.c

··· 1066 1066 static int thermal_init(struct hfi1_devdata *dd); 1067 1067 1068 1068 static void update_statusp(struct hfi1_pportdata *ppd, u32 state); 1069 + static int wait_phys_link_offline_substates(struct hfi1_pportdata *ppd, 1070 + int msecs); 1069 1071 static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state, 1070 1072 int msecs); 1071 1073 static void log_state_transition(struct hfi1_pportdata *ppd, u32 state); ··· 8240 8238 u64 regs[CCE_NUM_INT_CSRS]; 8241 8239 u32 bit; 8242 8240 int i; 8241 + irqreturn_t handled = IRQ_NONE; 8243 8242 8244 8243 this_cpu_inc(*dd->int_counter); 8245 8244 ··· 8261 8258 for_each_set_bit(bit, (unsigned long *)&regs[0], 8262 8259 CCE_NUM_INT_CSRS * 64) { 8263 8260 is_interrupt(dd, bit); 8261 + handled = IRQ_HANDLED; 8264 8262 } 8265 8263 8266 - return IRQ_HANDLED; 8264 + return handled; 8267 8265 } 8268 8266 8269 8267 static irqreturn_t sdma_interrupt(int irq, void *data) ··· 9417 9413 write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK, mask); 9418 9414 } 9419 9415 9420 - void reset_qsfp(struct hfi1_pportdata *ppd) 9416 + int reset_qsfp(struct hfi1_pportdata *ppd) 9421 9417 { 9422 9418 struct hfi1_devdata *dd = ppd->dd; 9423 9419 u64 mask, qsfp_mask; ··· 9447 9443 * for alarms and warnings 9448 9444 */ 9449 9445 set_qsfp_int_n(ppd, 1); 9446 + 9447 + /* 9448 + * After the reset, AOC transmitters are enabled by default. They need 9449 + * to be turned off to complete the QSFP setup before they can be 9450 + * enabled again. 9451 + */ 9452 + return set_qsfp_tx(ppd, 0); 9450 9453 } 9451 9454 9452 9455 static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd, ··· 10316 10305 { 10317 10306 struct hfi1_devdata *dd = ppd->dd; 10318 10307 u32 previous_state; 10308 + int offline_state_ret; 10319 10309 int ret; 10320 10310 10321 10311 update_lcb_cache(dd); ··· 10338 10326 ppd->offline_disabled_reason = 10339 10327 HFI1_ODR_MASK(OPA_LINKDOWN_REASON_TRANSIENT); 10340 10328 10341 - /* 10342 - * Wait for offline transition. It can take a while for 10343 - * the link to go down. 10344 - */ 10345 - ret = wait_physical_linkstate(ppd, PLS_OFFLINE, 10000); 10346 - if (ret < 0) 10347 - return ret; 10329 + offline_state_ret = wait_phys_link_offline_substates(ppd, 10000); 10330 + if (offline_state_ret < 0) 10331 + return offline_state_ret; 10348 10332 10349 - /* 10350 - * Now in charge of LCB - must be after the physical state is 10351 - * offline.quiet and before host_link_state is changed. 10352 - */ 10353 - set_host_lcb_access(dd); 10354 - write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */ 10355 - 10356 - /* make sure the logical state is also down */ 10357 - ret = wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000); 10358 - if (ret) 10359 - force_logical_link_state_down(ppd); 10360 - 10361 - ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */ 10362 - 10333 + /* Disabling AOC transmitters */ 10363 10334 if (ppd->port_type == PORT_TYPE_QSFP && 10364 10335 ppd->qsfp_info.limiting_active && 10365 10336 qsfp_mod_present(ppd)) { ··· 10358 10363 "Unable to acquire lock to turn off QSFP TX\n"); 10359 10364 } 10360 10365 } 10366 + 10367 + /* 10368 + * Wait for the offline.Quiet transition if it hasn't happened yet. It 10369 + * can take a while for the link to go down. 10370 + */ 10371 + if (offline_state_ret != PLS_OFFLINE_QUIET) { 10372 + ret = wait_physical_linkstate(ppd, PLS_OFFLINE, 30000); 10373 + if (ret < 0) 10374 + return ret; 10375 + } 10376 + 10377 + /* 10378 + * Now in charge of LCB - must be after the physical state is 10379 + * offline.quiet and before host_link_state is changed. 10380 + */ 10381 + set_host_lcb_access(dd); 10382 + write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */ 10383 + 10384 + /* make sure the logical state is also down */ 10385 + ret = wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000); 10386 + if (ret) 10387 + force_logical_link_state_down(ppd); 10388 + 10389 + ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */ 10361 10390 10362 10391 /* 10363 10392 * The LNI has a mandatory wait time after the physical state ··· 10415 10396 & (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) { 10416 10397 /* went down while attempting link up */ 10417 10398 check_lni_states(ppd); 10399 + 10400 + /* The QSFP doesn't need to be reset on LNI failure */ 10401 + ppd->qsfp_info.reset_needed = 0; 10418 10402 } 10419 10403 10420 10404 /* the active link width (downgrade) is 0 on link down */ ··· 12824 12802 12825 12803 log_state_transition(ppd, state); 12826 12804 return 0; 12805 + } 12806 + 12807 + /* 12808 + * wait_phys_link_offline_quiet_substates - wait for any offline substate 12809 + * @ppd: port device 12810 + * @msecs: the number of milliseconds to wait 12811 + * 12812 + * Wait up to msecs milliseconds for any offline physical link 12813 + * state change to occur. 12814 + * Returns 0 if at least one state is reached, otherwise -ETIMEDOUT. 12815 + */ 12816 + static int wait_phys_link_offline_substates(struct hfi1_pportdata *ppd, 12817 + int msecs) 12818 + { 12819 + u32 read_state; 12820 + unsigned long timeout; 12821 + 12822 + timeout = jiffies + msecs_to_jiffies(msecs); 12823 + while (1) { 12824 + read_state = read_physical_state(ppd->dd); 12825 + if ((read_state & 0xF0) == PLS_OFFLINE) 12826 + break; 12827 + if (time_after(jiffies, timeout)) { 12828 + dd_dev_err(ppd->dd, 12829 + "timeout waiting for phy link offline.quiet substates. Read state 0x%x, %dms\n", 12830 + read_state, msecs); 12831 + return -ETIMEDOUT; 12832 + } 12833 + usleep_range(1950, 2050); /* sleep 2ms-ish */ 12834 + } 12835 + 12836 + log_state_transition(ppd, read_state); 12837 + return read_state; 12827 12838 } 12828 12839 12829 12840 #define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \

+2 -1

drivers/infiniband/hw/hfi1/chip.h

··· 204 204 #define PLS_OFFLINE_READY_TO_QUIET_LT 0x92 205 205 #define PLS_OFFLINE_REPORT_FAILURE 0x93 206 206 #define PLS_OFFLINE_READY_TO_QUIET_BCC 0x94 207 + #define PLS_OFFLINE_QUIET_DURATION 0x95 207 208 #define PLS_POLLING 0x20 208 209 #define PLS_POLLING_QUIET 0x20 209 210 #define PLS_POLLING_ACTIVE 0x21 ··· 723 722 void handle_link_bounce(struct work_struct *work); 724 723 void handle_start_link(struct work_struct *work); 725 724 void handle_sma_message(struct work_struct *work); 726 - void reset_qsfp(struct hfi1_pportdata *ppd); 725 + int reset_qsfp(struct hfi1_pportdata *ppd); 727 726 void qsfp_event(struct work_struct *work); 728 727 void start_freeze_handling(struct hfi1_pportdata *ppd, int flags); 729 728 int send_idle_sma(struct hfi1_devdata *dd, u64 message);

+15 -5

drivers/infiniband/hw/hfi1/eprom.c

··· 204 204 return ret; 205 205 } 206 206 207 - /* magic character sequence that trails an image */ 207 + /* magic character sequence that begins an image */ 208 + #define IMAGE_START_MAGIC "APO=" 209 + 210 + /* magic character sequence that might trail an image */ 208 211 #define IMAGE_TRAIL_MAGIC "egamiAPO" 209 212 210 213 /* EPROM file types */ ··· 253 250 { 254 251 void *buffer; 255 252 void *p; 253 + u32 length; 256 254 int ret; 257 255 258 256 buffer = kmalloc(P1_SIZE, GFP_KERNEL); ··· 266 262 return ret; 267 263 } 268 264 269 - /* scan for image magic that may trail the actual data */ 270 - p = strnstr(buffer, IMAGE_TRAIL_MAGIC, P1_SIZE); 271 - if (!p) { 265 + /* config partition is valid only if it starts with IMAGE_START_MAGIC */ 266 + if (memcmp(buffer, IMAGE_START_MAGIC, strlen(IMAGE_START_MAGIC))) { 272 267 kfree(buffer); 273 268 return -ENOENT; 274 269 } 275 270 271 + /* scan for image magic that may trail the actual data */ 272 + p = strnstr(buffer, IMAGE_TRAIL_MAGIC, P1_SIZE); 273 + if (p) 274 + length = p - buffer; 275 + else 276 + length = P1_SIZE; 277 + 276 278 *data = buffer; 277 - *size = p - buffer; 279 + *size = length; 278 280 return 0; 279 281 } 280 282

+22 -19

drivers/infiniband/hw/hfi1/file_ops.c

··· 930 930 switch (ret) { 931 931 case 0: 932 932 ret = setup_base_ctxt(fd, uctxt); 933 - if (uctxt->subctxt_cnt) { 934 - /* 935 - * Base context is done (successfully or not), notify 936 - * anybody using a sub-context that is waiting for 937 - * this completion. 938 - */ 939 - clear_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags); 940 - wake_up(&uctxt->wait); 941 - } 933 + if (ret) 934 + deallocate_ctxt(uctxt); 942 935 break; 943 936 case 1: 944 937 ret = complete_subctxt(fd); ··· 1298 1305 /* Now allocate the RcvHdr queue and eager buffers. */ 1299 1306 ret = hfi1_create_rcvhdrq(dd, uctxt); 1300 1307 if (ret) 1301 - return ret; 1308 + goto done; 1302 1309 1303 1310 ret = hfi1_setup_eagerbufs(uctxt); 1304 1311 if (ret) 1305 - goto setup_failed; 1312 + goto done; 1306 1313 1307 1314 /* If sub-contexts are enabled, do the appropriate setup */ 1308 1315 if (uctxt->subctxt_cnt) 1309 1316 ret = setup_subctxt(uctxt); 1310 1317 if (ret) 1311 - goto setup_failed; 1318 + goto done; 1312 1319 1313 1320 ret = hfi1_alloc_ctxt_rcv_groups(uctxt); 1314 1321 if (ret) 1315 - goto setup_failed; 1322 + goto done; 1316 1323 1317 1324 ret = init_user_ctxt(fd, uctxt); 1318 1325 if (ret) 1319 - goto setup_failed; 1326 + goto done; 1320 1327 1321 1328 user_init(uctxt); 1322 1329 ··· 1324 1331 fd->uctxt = uctxt; 1325 1332 hfi1_rcd_get(uctxt); 1326 1333 1327 - return 0; 1334 + done: 1335 + if (uctxt->subctxt_cnt) { 1336 + /* 1337 + * On error, set the failed bit so sub-contexts will clean up 1338 + * correctly. 1339 + */ 1340 + if (ret) 1341 + set_bit(HFI1_CTXT_BASE_FAILED, &uctxt->event_flags); 1328 1342 1329 - setup_failed: 1330 - /* Set the failed bit so sub-context init can do the right thing */ 1331 - set_bit(HFI1_CTXT_BASE_FAILED, &uctxt->event_flags); 1332 - deallocate_ctxt(uctxt); 1343 + /* 1344 + * Base context is done (successfully or not), notify anybody 1345 + * using a sub-context that is waiting for this completion. 1346 + */ 1347 + clear_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags); 1348 + wake_up(&uctxt->wait); 1349 + } 1333 1350 1334 1351 return ret; 1335 1352 }

+21 -29

drivers/infiniband/hw/hfi1/pcie.c

··· 68 68 /* 69 69 * Code to adjust PCIe capabilities. 70 70 */ 71 - static int tune_pcie_caps(struct hfi1_devdata *); 71 + static void tune_pcie_caps(struct hfi1_devdata *); 72 72 73 73 /* 74 74 * Do all the common PCIe setup and initialization. ··· 351 351 */ 352 352 int request_msix(struct hfi1_devdata *dd, u32 msireq) 353 353 { 354 - int nvec, ret; 354 + int nvec; 355 355 356 356 nvec = pci_alloc_irq_vectors(dd->pcidev, 1, msireq, 357 357 PCI_IRQ_MSIX | PCI_IRQ_LEGACY); ··· 360 360 return nvec; 361 361 } 362 362 363 - ret = tune_pcie_caps(dd); 364 - if (ret) { 365 - dd_dev_err(dd, "tune_pcie_caps() failed: %d\n", ret); 366 - pci_free_irq_vectors(dd->pcidev); 367 - return ret; 368 - } 363 + tune_pcie_caps(dd); 369 364 370 365 /* check for legacy IRQ */ 371 366 if (nvec == 1 && !dd->pcidev->msix_enabled) ··· 497 502 module_param_named(aspm, aspm_mode, uint, S_IRUGO); 498 503 MODULE_PARM_DESC(aspm, "PCIe ASPM: 0: disable, 1: enable, 2: dynamic"); 499 504 500 - static int tune_pcie_caps(struct hfi1_devdata *dd) 505 + static void tune_pcie_caps(struct hfi1_devdata *dd) 501 506 { 502 507 struct pci_dev *parent; 503 508 u16 rc_mpss, rc_mps, ep_mpss, ep_mps; ··· 508 513 * Turn on extended tags in DevCtl in case the BIOS has turned it off 509 514 * to improve WFR SDMA bandwidth 510 515 */ 511 - ret = pcie_capability_read_word(dd->pcidev, 512 - PCI_EXP_DEVCTL, &ectl); 513 - if (ret) { 514 - dd_dev_err(dd, "Unable to read from PCI config\n"); 515 - return ret; 516 - } 517 - 518 - if (!(ectl & PCI_EXP_DEVCTL_EXT_TAG)) { 516 + ret = pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, &ectl); 517 + if ((!ret) && !(ectl & PCI_EXP_DEVCTL_EXT_TAG)) { 519 518 dd_dev_info(dd, "Enabling PCIe extended tags\n"); 520 519 ectl |= PCI_EXP_DEVCTL_EXT_TAG; 521 520 ret = pcie_capability_write_word(dd->pcidev, 522 521 PCI_EXP_DEVCTL, ectl); 523 - if (ret) { 524 - dd_dev_err(dd, "Unable to write to PCI config\n"); 525 - return ret; 526 - } 522 + if (ret) 523 + dd_dev_info(dd, "Unable to write to PCI config\n"); 527 524 } 528 525 /* Find out supported and configured values for parent (root) */ 529 526 parent = dd->pcidev->bus->self; ··· 523 536 * The driver cannot perform the tuning if it does not have 524 537 * access to the upstream component. 525 538 */ 526 - if (!parent) 527 - return -EINVAL; 539 + if (!parent) { 540 + dd_dev_info(dd, "Parent not found\n"); 541 + return; 542 + } 528 543 if (!pci_is_root_bus(parent->bus)) { 529 544 dd_dev_info(dd, "Parent not root\n"); 530 - return -EINVAL; 545 + return; 531 546 } 532 - 533 - if (!pci_is_pcie(parent) || !pci_is_pcie(dd->pcidev)) 534 - return -EINVAL; 547 + if (!pci_is_pcie(parent)) { 548 + dd_dev_info(dd, "Parent is not PCI Express capable\n"); 549 + return; 550 + } 551 + if (!pci_is_pcie(dd->pcidev)) { 552 + dd_dev_info(dd, "PCI device is not PCI Express capable\n"); 553 + return; 554 + } 535 555 rc_mpss = parent->pcie_mpss; 536 556 rc_mps = ffs(pcie_get_mps(parent)) - 8; 537 557 /* Find out supported and configured values for endpoint (us) */ ··· 584 590 ep_mrrs = max_mrrs; 585 591 pcie_set_readrq(dd->pcidev, ep_mrrs); 586 592 } 587 - 588 - return 0; 589 593 } 590 594 591 595 /* End of PCIe capability tuning */

+3 -1

drivers/infiniband/hw/hfi1/platform.c

··· 790 790 * reuse of stale settings established in our previous pass through. 791 791 */ 792 792 if (ppd->qsfp_info.reset_needed) { 793 - reset_qsfp(ppd); 793 + ret = reset_qsfp(ppd); 794 + if (ret) 795 + return ret; 794 796 refresh_qsfp_cache(ppd, &ppd->qsfp_info); 795 797 } else { 796 798 ppd->qsfp_info.reset_needed = 1;

+5 -5

drivers/infiniband/hw/mlx5/main.c

··· 778 778 } 779 779 780 780 if (MLX5_CAP_GEN(mdev, tag_matching)) { 781 - props->xrq_caps.max_rndv_hdr_size = MLX5_TM_MAX_RNDV_MSG_SIZE; 782 - props->xrq_caps.max_num_tags = 781 + props->tm_caps.max_rndv_hdr_size = MLX5_TM_MAX_RNDV_MSG_SIZE; 782 + props->tm_caps.max_num_tags = 783 783 (1 << MLX5_CAP_GEN(mdev, log_tag_matching_list_sz)) - 1; 784 - props->xrq_caps.flags = IB_TM_CAP_RC; 785 - props->xrq_caps.max_ops = 784 + props->tm_caps.flags = IB_TM_CAP_RC; 785 + props->tm_caps.max_ops = 786 786 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); 787 - props->xrq_caps.max_sge = MLX5_TM_MAX_SGE; 787 + props->tm_caps.max_sge = MLX5_TM_MAX_SGE; 788 788 } 789 789 790 790 if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) {

+17 -30

drivers/infiniband/hw/mlx5/mem.c

··· 50 50 { 51 51 unsigned long tmp; 52 52 unsigned long m; 53 - int i, k; 54 - u64 base = 0; 55 - int p = 0; 56 - int skip; 57 - int mask; 58 - u64 len; 59 - u64 pfn; 53 + u64 base = ~0, p = 0; 54 + u64 len, pfn; 55 + int i = 0; 60 56 struct scatterlist *sg; 61 57 int entry; 62 58 unsigned long page_shift = umem->page_shift; ··· 72 76 m = find_first_bit(&tmp, BITS_PER_LONG); 73 77 if (max_page_shift) 74 78 m = min_t(unsigned long, max_page_shift - page_shift, m); 75 - skip = 1 << m; 76 - mask = skip - 1; 77 - i = 0; 79 + 78 80 for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { 79 81 len = sg_dma_len(sg) >> page_shift; 80 82 pfn = sg_dma_address(sg) >> page_shift; 81 - for (k = 0; k < len; k++) { 82 - if (!(i & mask)) { 83 - tmp = (unsigned long)pfn; 84 - m = min_t(unsigned long, m, find_first_bit(&tmp, BITS_PER_LONG)); 85 - skip = 1 << m; 86 - mask = skip - 1; 87 - base = pfn; 88 - p = 0; 89 - } else { 90 - if (base + p != pfn) { 91 - tmp = (unsigned long)p; 92 - m = find_first_bit(&tmp, BITS_PER_LONG); 93 - skip = 1 << m; 94 - mask = skip - 1; 95 - base = pfn; 96 - p = 0; 97 - } 98 - } 99 - p++; 100 - i++; 83 + if (base + p != pfn) { 84 + /* If either the offset or the new 85 + * base are unaligned update m 86 + */ 87 + tmp = (unsigned long)(pfn | p); 88 + if (!IS_ALIGNED(tmp, 1 << m)) 89 + m = find_first_bit(&tmp, BITS_PER_LONG); 90 + 91 + base = pfn; 92 + p = 0; 101 93 } 94 + 95 + p += len; 96 + i += len; 102 97 } 103 98 104 99 if (i) {

+17 -10

drivers/infiniband/hw/mlx5/mr.c

··· 47 47 48 48 #define MLX5_UMR_ALIGN 2048 49 49 50 - static int clean_mr(struct mlx5_ib_mr *mr); 50 + static int clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); 51 + static int dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); 51 52 static int mr_cache_max_order(struct mlx5_ib_dev *dev); 52 53 static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); 53 54 ··· 1271 1270 1272 1271 err = mlx5_ib_update_xlt(mr, 0, ncont, page_shift, 1273 1272 update_xlt_flags); 1273 + 1274 1274 if (err) { 1275 - mlx5_ib_dereg_mr(&mr->ibmr); 1275 + dereg_mr(dev, mr); 1276 1276 return ERR_PTR(err); 1277 1277 } 1278 1278 } ··· 1358 1356 err = mr_umem_get(pd, addr, len, access_flags, &mr->umem, 1359 1357 &npages, &page_shift, &ncont, &order); 1360 1358 if (err < 0) { 1361 - clean_mr(mr); 1359 + clean_mr(dev, mr); 1362 1360 return err; 1363 1361 } 1364 1362 } ··· 1412 1410 if (err) { 1413 1411 mlx5_ib_warn(dev, "Failed to rereg UMR\n"); 1414 1412 ib_umem_release(mr->umem); 1415 - clean_mr(mr); 1413 + clean_mr(dev, mr); 1416 1414 return err; 1417 1415 } 1418 1416 } ··· 1471 1469 } 1472 1470 } 1473 1471 1474 - static int clean_mr(struct mlx5_ib_mr *mr) 1472 + static int clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 1475 1473 { 1476 - struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); 1477 1474 int allocated_from_cache = mr->allocated_from_cache; 1478 1475 int err; 1479 1476 ··· 1508 1507 return 0; 1509 1508 } 1510 1509 1511 - int mlx5_ib_dereg_mr(struct ib_mr *ibmr) 1510 + static int dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 1512 1511 { 1513 - struct mlx5_ib_dev *dev = to_mdev(ibmr->device); 1514 - struct mlx5_ib_mr *mr = to_mmr(ibmr); 1515 1512 int npages = mr->npages; 1516 1513 struct ib_umem *umem = mr->umem; 1517 1514 ··· 1538 1539 } 1539 1540 #endif 1540 1541 1541 - clean_mr(mr); 1542 + clean_mr(dev, mr); 1542 1543 1543 1544 if (umem) { 1544 1545 ib_umem_release(umem); ··· 1546 1547 } 1547 1548 1548 1549 return 0; 1550 + } 1551 + 1552 + int mlx5_ib_dereg_mr(struct ib_mr *ibmr) 1553 + { 1554 + struct mlx5_ib_dev *dev = to_mdev(ibmr->device); 1555 + struct mlx5_ib_mr *mr = to_mmr(ibmr); 1556 + 1557 + return dereg_mr(dev, mr); 1549 1558 } 1550 1559 1551 1560 struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,

+2 -2

drivers/infiniband/hw/nes/nes_verbs.c

··· 3232 3232 mr->ibmr.iova); 3233 3233 set_wqe_32bit_value(wqe->wqe_words, 3234 3234 NES_IWARP_SQ_FMR_WQE_LENGTH_LOW_IDX, 3235 - mr->ibmr.length); 3235 + lower_32_bits(mr->ibmr.length)); 3236 3236 set_wqe_32bit_value(wqe->wqe_words, 3237 3237 NES_IWARP_SQ_FMR_WQE_LENGTH_HIGH_IDX, 0); 3238 3238 set_wqe_32bit_value(wqe->wqe_words, ··· 3274 3274 mr->npages * 8); 3275 3275 3276 3276 nes_debug(NES_DBG_IW_TX, "SQ_REG_MR: iova_start: %llx, " 3277 - "length: %d, rkey: %0x, pgl_paddr: %llx, " 3277 + "length: %lld, rkey: %0x, pgl_paddr: %llx, " 3278 3278 "page_list_len: %u, wqe_misc: %x\n", 3279 3279 (unsigned long long) mr->ibmr.iova, 3280 3280 mr->ibmr.length,

-13

drivers/infiniband/ulp/ipoib/ipoib_ib.c

··· 1000 1000 */ 1001 1001 priv->dev->broadcast[8] = priv->pkey >> 8; 1002 1002 priv->dev->broadcast[9] = priv->pkey & 0xff; 1003 - 1004 - /* 1005 - * Update the broadcast address in the priv->broadcast object, 1006 - * in case it already exists, otherwise no one will do that. 1007 - */ 1008 - if (priv->broadcast) { 1009 - spin_lock_irq(&priv->lock); 1010 - memcpy(priv->broadcast->mcmember.mgid.raw, 1011 - priv->dev->broadcast + 4, 1012 - sizeof(union ib_gid)); 1013 - spin_unlock_irq(&priv->lock); 1014 - } 1015 - 1016 1003 return 0; 1017 1004 } 1018 1005

+11 -4

drivers/infiniband/ulp/ipoib/ipoib_main.c

··· 2180 2180 { 2181 2181 struct ipoib_dev_priv *priv; 2182 2182 struct ib_port_attr attr; 2183 + struct rdma_netdev *rn; 2183 2184 int result = -ENOMEM; 2184 2185 2185 2186 priv = ipoib_intf_alloc(hca, port, format); ··· 2280 2279 ipoib_dev_cleanup(priv->dev); 2281 2280 2282 2281 device_init_failed: 2283 - free_netdev(priv->dev); 2282 + rn = netdev_priv(priv->dev); 2283 + rn->free_rdma_netdev(priv->dev); 2284 2284 kfree(priv); 2285 2285 2286 2286 alloc_mem_failed: ··· 2330 2328 return; 2331 2329 2332 2330 list_for_each_entry_safe(priv, tmp, dev_list, list) { 2333 - struct rdma_netdev *rn = netdev_priv(priv->dev); 2331 + struct rdma_netdev *parent_rn = netdev_priv(priv->dev); 2334 2332 2335 2333 ib_unregister_event_handler(&priv->event_handler); 2336 2334 flush_workqueue(ipoib_workqueue); ··· 2352 2350 unregister_netdev(priv->dev); 2353 2351 mutex_unlock(&priv->sysfs_mutex); 2354 2352 2355 - rn->free_rdma_netdev(priv->dev); 2353 + parent_rn->free_rdma_netdev(priv->dev); 2356 2354 2357 - list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) 2355 + list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) { 2356 + struct rdma_netdev *child_rn; 2357 + 2358 + child_rn = netdev_priv(cpriv->dev); 2359 + child_rn->free_rdma_netdev(cpriv->dev); 2358 2360 kfree(cpriv); 2361 + } 2359 2362 2360 2363 kfree(priv); 2361 2364 }

+22 -8

drivers/infiniband/ulp/ipoib/ipoib_vlan.c

··· 141 141 return restart_syscall(); 142 142 } 143 143 144 - priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name); 145 - if (!priv) { 144 + if (!down_write_trylock(&ppriv->vlan_rwsem)) { 146 145 rtnl_unlock(); 147 146 mutex_unlock(&ppriv->sysfs_mutex); 148 - return -ENOMEM; 147 + return restart_syscall(); 149 148 } 150 149 151 - down_write(&ppriv->vlan_rwsem); 150 + priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name); 151 + if (!priv) { 152 + result = -ENOMEM; 153 + goto out; 154 + } 152 155 153 156 /* 154 157 * First ensure this isn't a duplicate. We check the parent device and ··· 178 175 rtnl_unlock(); 179 176 mutex_unlock(&ppriv->sysfs_mutex); 180 177 181 - if (result) { 182 - free_netdev(priv->dev); 178 + if (result && priv) { 179 + struct rdma_netdev *rn; 180 + 181 + rn = netdev_priv(priv->dev); 182 + rn->free_rdma_netdev(priv->dev); 183 183 kfree(priv); 184 184 } 185 185 ··· 210 204 return restart_syscall(); 211 205 } 212 206 213 - down_write(&ppriv->vlan_rwsem); 207 + if (!down_write_trylock(&ppriv->vlan_rwsem)) { 208 + rtnl_unlock(); 209 + mutex_unlock(&ppriv->sysfs_mutex); 210 + return restart_syscall(); 211 + } 212 + 214 213 list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) { 215 214 if (priv->pkey == pkey && 216 215 priv->child_type == IPOIB_LEGACY_CHILD) { ··· 235 224 mutex_unlock(&ppriv->sysfs_mutex); 236 225 237 226 if (dev) { 238 - free_netdev(dev); 227 + struct rdma_netdev *rn; 228 + 229 + rn = netdev_priv(dev); 230 + rn->free_rdma_netdev(priv->dev); 239 231 kfree(priv); 240 232 return 0; 241 233 }

+1 -1

drivers/infiniband/ulp/iser/iser_memory.c

··· 154 154 { 155 155 int i; 156 156 157 - iser_err("page vec npages %d data length %d\n", 157 + iser_err("page vec npages %d data length %lld\n", 158 158 page_vec->npages, page_vec->fake_mr.length); 159 159 for (i = 0; i < page_vec->npages; i++) 160 160 iser_err("vec[%d]: %llx\n", i, page_vec->pages[i]);

+4 -4

drivers/iommu/amd_iommu_init.c

··· 874 874 hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4); 875 875 entry = (((u64) hi) << 32) + lo; 876 876 if (last_entry && last_entry != entry) { 877 - pr_err("IOMMU:%d should use the same dev table as others!/n", 877 + pr_err("IOMMU:%d should use the same dev table as others!\n", 878 878 iommu->index); 879 879 return false; 880 880 } ··· 882 882 883 883 old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12; 884 884 if (old_devtb_size != dev_table_size) { 885 - pr_err("The device table size of IOMMU:%d is not expected!/n", 885 + pr_err("The device table size of IOMMU:%d is not expected!\n", 886 886 iommu->index); 887 887 return false; 888 888 } ··· 890 890 891 891 old_devtb_phys = entry & PAGE_MASK; 892 892 if (old_devtb_phys >= 0x100000000ULL) { 893 - pr_err("The address of old device table is above 4G, not trustworthy!/n"); 893 + pr_err("The address of old device table is above 4G, not trustworthy!\n"); 894 894 return false; 895 895 } 896 896 old_devtb = memremap(old_devtb_phys, dev_table_size, MEMREMAP_WB); ··· 901 901 old_dev_tbl_cpy = (void *)__get_free_pages(gfp_flag, 902 902 get_order(dev_table_size)); 903 903 if (old_dev_tbl_cpy == NULL) { 904 - pr_err("Failed to allocate memory for copying old device table!/n"); 904 + pr_err("Failed to allocate memory for copying old device table!\n"); 905 905 return false; 906 906 } 907 907

+1 -1

drivers/iommu/io-pgtable-arm-v7s.c

··· 245 245 static void __arm_v7s_pte_sync(arm_v7s_iopte *ptep, int num_entries, 246 246 struct io_pgtable_cfg *cfg) 247 247 { 248 - if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) 248 + if (cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA) 249 249 return; 250 250 251 251 dma_sync_single_for_device(cfg->iommu_dev, __arm_v7s_dma_addr(ptep),

+2 -1

drivers/iommu/mtk_iommu.c

··· 371 371 int ret; 372 372 373 373 spin_lock_irqsave(&dom->pgtlock, flags); 374 - ret = dom->iop->map(dom->iop, iova, paddr, size, prot); 374 + ret = dom->iop->map(dom->iop, iova, paddr & DMA_BIT_MASK(32), 375 + size, prot); 375 376 spin_unlock_irqrestore(&dom->pgtlock, flags); 376 377 377 378 return ret;

+8 -5

drivers/irqchip/irq-mips-gic.c

··· 175 175 176 176 static void gic_unmask_irq(struct irq_data *d) 177 177 { 178 - struct cpumask *affinity = irq_data_get_affinity_mask(d); 179 178 unsigned int intr = GIC_HWIRQ_TO_SHARED(d->hwirq); 180 179 unsigned int cpu; 181 180 182 181 write_gic_smask(intr); 183 182 184 183 gic_clear_pcpu_masks(intr); 185 - cpu = cpumask_first_and(affinity, cpu_online_mask); 184 + cpu = cpumask_first(irq_data_get_effective_affinity_mask(d)); 186 185 set_bit(intr, per_cpu_ptr(pcpu_masks, cpu)); 187 186 } 188 187 ··· 419 420 irq_hw_number_t hw, unsigned int cpu) 420 421 { 421 422 int intr = GIC_HWIRQ_TO_SHARED(hw); 423 + struct irq_data *data; 422 424 unsigned long flags; 425 + 426 + data = irq_get_irq_data(virq); 423 427 424 428 spin_lock_irqsave(&gic_lock, flags); 425 429 write_gic_map_pin(intr, GIC_MAP_PIN_MAP_TO_PIN | gic_cpu_pin); 426 430 write_gic_map_vp(intr, BIT(mips_cm_vp_id(cpu))); 427 431 gic_clear_pcpu_masks(intr); 428 432 set_bit(intr, per_cpu_ptr(pcpu_masks, cpu)); 433 + irq_data_update_effective_affinity(data, cpumask_of(cpu)); 429 434 spin_unlock_irqrestore(&gic_lock, flags); 430 435 431 436 return 0; ··· 648 645 649 646 /* Find the first available CPU vector. */ 650 647 i = 0; 651 - reserved = (C_SW0 | C_SW1) >> __fls(C_SW0); 648 + reserved = (C_SW0 | C_SW1) >> __ffs(C_SW0); 652 649 while (!of_property_read_u32_index(node, "mti,reserved-cpu-vectors", 653 650 i++, &cpu_vec)) 654 651 reserved |= BIT(cpu_vec); ··· 687 684 688 685 gicconfig = read_gic_config(); 689 686 gic_shared_intrs = gicconfig & GIC_CONFIG_NUMINTERRUPTS; 690 - gic_shared_intrs >>= __fls(GIC_CONFIG_NUMINTERRUPTS); 687 + gic_shared_intrs >>= __ffs(GIC_CONFIG_NUMINTERRUPTS); 691 688 gic_shared_intrs = (gic_shared_intrs + 1) * 8; 692 689 693 690 gic_vpes = gicconfig & GIC_CONFIG_PVPS; 694 - gic_vpes >>= __fls(GIC_CONFIG_PVPS); 691 + gic_vpes >>= __ffs(GIC_CONFIG_PVPS); 695 692 gic_vpes = gic_vpes + 1; 696 693 697 694 if (cpu_has_veic) {

+26 -3

drivers/leds/leds-as3645a.c

··· 112 112 #define AS_PEAK_mA_TO_REG(a) \ 113 113 ((min_t(u32, AS_PEAK_mA_MAX, a) - 1250) / 250) 114 114 115 + /* LED numbers for Devicetree */ 116 + #define AS_LED_FLASH 0 117 + #define AS_LED_INDICATOR 1 118 + 115 119 enum as_mode { 116 120 AS_MODE_EXT_TORCH = 0 << AS_CONTROL_MODE_SETTING_SHIFT, 117 121 AS_MODE_INDICATOR = 1 << AS_CONTROL_MODE_SETTING_SHIFT, ··· 495 491 struct device_node *node) 496 492 { 497 493 struct as3645a_config *cfg = &flash->cfg; 494 + struct device_node *child; 498 495 const char *name; 499 496 int rval; 500 497 501 - flash->flash_node = of_get_child_by_name(node, "flash"); 498 + for_each_child_of_node(node, child) { 499 + u32 id = 0; 500 + 501 + of_property_read_u32(child, "reg", &id); 502 + 503 + switch (id) { 504 + case AS_LED_FLASH: 505 + flash->flash_node = of_node_get(child); 506 + break; 507 + case AS_LED_INDICATOR: 508 + flash->indicator_node = of_node_get(child); 509 + break; 510 + default: 511 + dev_warn(&flash->client->dev, 512 + "unknown LED %u encountered, ignoring\n", id); 513 + break; 514 + } 515 + } 516 + 502 517 if (!flash->flash_node) { 503 518 dev_err(&flash->client->dev, "can't find flash node\n"); 504 519 return -ENODEV; ··· 557 534 of_property_read_u32(flash->flash_node, "voltage-reference", 558 535 &cfg->voltage_reference); 559 536 560 - of_property_read_u32(flash->flash_node, "peak-current-limit", 537 + of_property_read_u32(flash->flash_node, "ams,input-max-microamp", 561 538 &cfg->peak); 562 539 cfg->peak = AS_PEAK_mA_TO_REG(cfg->peak); 563 540 564 - flash->indicator_node = of_get_child_by_name(node, "indicator"); 565 541 if (!flash->indicator_node) { 566 542 dev_warn(&flash->client->dev, 567 543 "can't find indicator node\n"); ··· 743 721 as3645a_set_control(flash, AS_MODE_EXT_TORCH, false); 744 722 745 723 v4l2_flash_release(flash->vf); 724 + v4l2_flash_release(flash->vfind); 746 725 747 726 led_classdev_flash_unregister(&flash->fled); 748 727 led_classdev_unregister(&flash->iled_cdev);

+1 -1

drivers/md/dm-raid.c

··· 3238 3238 if (unlikely(bio_end_sector(bio) > mddev->array_sectors)) 3239 3239 return DM_MAPIO_REQUEUE; 3240 3240 3241 - mddev->pers->make_request(mddev, bio); 3241 + md_handle_request(mddev, bio); 3242 3242 3243 3243 return DM_MAPIO_SUBMITTED; 3244 3244 }

+43 -29

drivers/md/md.c

··· 266 266 * call has finished, the bio has been linked into some internal structure 267 267 * and so is visible to ->quiesce(), so we don't need the refcount any more. 268 268 */ 269 + void md_handle_request(struct mddev *mddev, struct bio *bio) 270 + { 271 + check_suspended: 272 + rcu_read_lock(); 273 + if (mddev->suspended) { 274 + DEFINE_WAIT(__wait); 275 + for (;;) { 276 + prepare_to_wait(&mddev->sb_wait, &__wait, 277 + TASK_UNINTERRUPTIBLE); 278 + if (!mddev->suspended) 279 + break; 280 + rcu_read_unlock(); 281 + schedule(); 282 + rcu_read_lock(); 283 + } 284 + finish_wait(&mddev->sb_wait, &__wait); 285 + } 286 + atomic_inc(&mddev->active_io); 287 + rcu_read_unlock(); 288 + 289 + if (!mddev->pers->make_request(mddev, bio)) { 290 + atomic_dec(&mddev->active_io); 291 + wake_up(&mddev->sb_wait); 292 + goto check_suspended; 293 + } 294 + 295 + if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended) 296 + wake_up(&mddev->sb_wait); 297 + } 298 + EXPORT_SYMBOL(md_handle_request); 299 + 269 300 static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio) 270 301 { 271 302 const int rw = bio_data_dir(bio); ··· 316 285 bio_endio(bio); 317 286 return BLK_QC_T_NONE; 318 287 } 319 - check_suspended: 320 - rcu_read_lock(); 321 - if (mddev->suspended) { 322 - DEFINE_WAIT(__wait); 323 - for (;;) { 324 - prepare_to_wait(&mddev->sb_wait, &__wait, 325 - TASK_UNINTERRUPTIBLE); 326 - if (!mddev->suspended) 327 - break; 328 - rcu_read_unlock(); 329 - schedule(); 330 - rcu_read_lock(); 331 - } 332 - finish_wait(&mddev->sb_wait, &__wait); 333 - } 334 - atomic_inc(&mddev->active_io); 335 - rcu_read_unlock(); 336 288 337 289 /* 338 290 * save the sectors now since our bio can ··· 324 310 sectors = bio_sectors(bio); 325 311 /* bio could be mergeable after passing to underlayer */ 326 312 bio->bi_opf &= ~REQ_NOMERGE; 327 - if (!mddev->pers->make_request(mddev, bio)) { 328 - atomic_dec(&mddev->active_io); 329 - wake_up(&mddev->sb_wait); 330 - goto check_suspended; 331 - } 313 + 314 + md_handle_request(mddev, bio); 332 315 333 316 cpu = part_stat_lock(); 334 317 part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); 335 318 part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors); 336 319 part_stat_unlock(); 337 - 338 - if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended) 339 - wake_up(&mddev->sb_wait); 340 320 341 321 return BLK_QC_T_NONE; 342 322 } ··· 447 439 struct mddev *mddev = container_of(ws, struct mddev, flush_work); 448 440 struct bio *bio = mddev->flush_bio; 449 441 442 + /* 443 + * must reset flush_bio before calling into md_handle_request to avoid a 444 + * deadlock, because other bios passed md_handle_request suspend check 445 + * could wait for this and below md_handle_request could wait for those 446 + * bios because of suspend check 447 + */ 448 + mddev->flush_bio = NULL; 449 + wake_up(&mddev->sb_wait); 450 + 450 451 if (bio->bi_iter.bi_size == 0) 451 452 /* an empty barrier - all done */ 452 453 bio_endio(bio); 453 454 else { 454 455 bio->bi_opf &= ~REQ_PREFLUSH; 455 - mddev->pers->make_request(mddev, bio); 456 + md_handle_request(mddev, bio); 456 457 } 457 - 458 - mddev->flush_bio = NULL; 459 - wake_up(&mddev->sb_wait); 460 458 } 461 459 462 460 void md_flush_request(struct mddev *mddev, struct bio *bio)

+1

drivers/md/md.h

··· 692 692 extern int md_rdev_init(struct md_rdev *rdev); 693 693 extern void md_rdev_clear(struct md_rdev *rdev); 694 694 695 + extern void md_handle_request(struct mddev *mddev, struct bio *bio); 695 696 extern void mddev_suspend(struct mddev *mddev); 696 697 extern void mddev_resume(struct mddev *mddev); 697 698 extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,

+5 -2

drivers/md/raid5.c

··· 6575 6575 raid5_store_group_thread_cnt(struct mddev *mddev, const char *page, size_t len) 6576 6576 { 6577 6577 struct r5conf *conf; 6578 - unsigned long new; 6578 + unsigned int new; 6579 6579 int err; 6580 6580 struct r5worker_group *new_groups, *old_groups; 6581 6581 int group_cnt, worker_cnt_per_group; 6582 6582 6583 6583 if (len >= PAGE_SIZE) 6584 6584 return -EINVAL; 6585 - if (kstrtoul(page, 10, &new)) 6585 + if (kstrtouint(page, 10, &new)) 6586 + return -EINVAL; 6587 + /* 8192 should be big enough */ 6588 + if (new > 8192) 6586 6589 return -EINVAL; 6587 6590 6588 6591 err = mddev_lock(mddev);

+15

drivers/mmc/host/sdhci-pci-core.c

··· 392 392 393 393 enum { 394 394 INTEL_DSM_FNS = 0, 395 + INTEL_DSM_V18_SWITCH = 3, 395 396 INTEL_DSM_DRV_STRENGTH = 9, 396 397 INTEL_DSM_D3_RETUNE = 10, 397 398 }; ··· 558 557 sdhci_writel(host, val, INTEL_HS400_ES_REG); 559 558 } 560 559 560 + static void sdhci_intel_voltage_switch(struct sdhci_host *host) 561 + { 562 + struct sdhci_pci_slot *slot = sdhci_priv(host); 563 + struct intel_host *intel_host = sdhci_pci_priv(slot); 564 + struct device *dev = &slot->chip->pdev->dev; 565 + u32 result = 0; 566 + int err; 567 + 568 + err = intel_dsm(intel_host, dev, INTEL_DSM_V18_SWITCH, &result); 569 + pr_debug("%s: %s DSM error %d result %u\n", 570 + mmc_hostname(host->mmc), __func__, err, result); 571 + } 572 + 561 573 static const struct sdhci_ops sdhci_intel_byt_ops = { 562 574 .set_clock = sdhci_set_clock, 563 575 .set_power = sdhci_intel_set_power, ··· 579 565 .reset = sdhci_reset, 580 566 .set_uhs_signaling = sdhci_set_uhs_signaling, 581 567 .hw_reset = sdhci_pci_hw_reset, 568 + .voltage_switch = sdhci_intel_voltage_switch, 582 569 }; 583 570 584 571 static void byt_read_dsm(struct sdhci_pci_slot *slot)

-47

drivers/mmc/host/tmio_mmc_core.c

··· 129 129 130 130 #define CMDREQ_TIMEOUT 5000 131 131 132 - #ifdef CONFIG_MMC_DEBUG 133 - 134 - #define STATUS_TO_TEXT(a, status, i) \ 135 - do { \ 136 - if ((status) & TMIO_STAT_##a) { \ 137 - if ((i)++) \ 138 - printk(KERN_DEBUG " | "); \ 139 - printk(KERN_DEBUG #a); \ 140 - } \ 141 - } while (0) 142 - 143 - static void pr_debug_status(u32 status) 144 - { 145 - int i = 0; 146 - 147 - pr_debug("status: %08x = ", status); 148 - STATUS_TO_TEXT(CARD_REMOVE, status, i); 149 - STATUS_TO_TEXT(CARD_INSERT, status, i); 150 - STATUS_TO_TEXT(SIGSTATE, status, i); 151 - STATUS_TO_TEXT(WRPROTECT, status, i); 152 - STATUS_TO_TEXT(CARD_REMOVE_A, status, i); 153 - STATUS_TO_TEXT(CARD_INSERT_A, status, i); 154 - STATUS_TO_TEXT(SIGSTATE_A, status, i); 155 - STATUS_TO_TEXT(CMD_IDX_ERR, status, i); 156 - STATUS_TO_TEXT(STOPBIT_ERR, status, i); 157 - STATUS_TO_TEXT(ILL_FUNC, status, i); 158 - STATUS_TO_TEXT(CMD_BUSY, status, i); 159 - STATUS_TO_TEXT(CMDRESPEND, status, i); 160 - STATUS_TO_TEXT(DATAEND, status, i); 161 - STATUS_TO_TEXT(CRCFAIL, status, i); 162 - STATUS_TO_TEXT(DATATIMEOUT, status, i); 163 - STATUS_TO_TEXT(CMDTIMEOUT, status, i); 164 - STATUS_TO_TEXT(RXOVERFLOW, status, i); 165 - STATUS_TO_TEXT(TXUNDERRUN, status, i); 166 - STATUS_TO_TEXT(RXRDY, status, i); 167 - STATUS_TO_TEXT(TXRQ, status, i); 168 - STATUS_TO_TEXT(ILL_ACCESS, status, i); 169 - printk("\n"); 170 - } 171 - 172 - #else 173 - #define pr_debug_status(s) do { } while (0) 174 - #endif 175 - 176 132 static void tmio_mmc_enable_sdio_irq(struct mmc_host *mmc, int enable) 177 133 { 178 134 struct tmio_mmc_host *host = mmc_priv(mmc); ··· 717 761 718 762 status = sd_ctrl_read16_and_16_as_32(host, CTL_STATUS); 719 763 ireg = status & TMIO_MASK_IRQ & ~host->sdcard_irq_mask; 720 - 721 - pr_debug_status(status); 722 - pr_debug_status(ireg); 723 764 724 765 /* Clear the status except the interrupt status */ 725 766 sd_ctrl_write32_as_16_and_16(host, CTL_STATUS, TMIO_MASK_IRQ);

+8

drivers/mtd/mtdpart.c

··· 581 581 slave->mtd.erasesize = parent->erasesize; 582 582 } 583 583 584 + /* 585 + * Slave erasesize might differ from the master one if the master 586 + * exposes several regions with different erasesize. Adjust 587 + * wr_alignment accordingly. 588 + */ 589 + if (!(slave->mtd.flags & MTD_NO_ERASE)) 590 + wr_alignment = slave->mtd.erasesize; 591 + 584 592 tmp = slave->offset; 585 593 remainder = do_div(tmp, wr_alignment); 586 594 if ((slave->mtd.flags & MTD_WRITEABLE) && remainder) {

+1 -1

drivers/mtd/nand/atmel/pmecc.c

··· 363 363 size += (req->ecc.strength + 1) * sizeof(u16); 364 364 /* Reserve space for mu, dmu and delta. */ 365 365 size = ALIGN(size, sizeof(s32)); 366 - size += (req->ecc.strength + 1) * sizeof(s32); 366 + size += (req->ecc.strength + 1) * sizeof(s32) * 3; 367 367 368 368 user = kzalloc(size, GFP_KERNEL); 369 369 if (!user)

+4 -5

drivers/nvme/host/core.c

··· 134 134 return false; 135 135 if (nvme_req(req)->status & NVME_SC_DNR) 136 136 return false; 137 - if (jiffies - req->start_time >= req->timeout) 138 - return false; 139 137 if (nvme_req(req)->retries >= nvme_max_retries) 140 138 return false; 141 139 return true; ··· 2588 2590 container_of(work, struct nvme_ctrl, async_event_work); 2589 2591 2590 2592 spin_lock_irq(&ctrl->lock); 2591 - while (ctrl->event_limit > 0) { 2593 + while (ctrl->state == NVME_CTRL_LIVE && ctrl->event_limit > 0) { 2592 2594 int aer_idx = --ctrl->event_limit; 2593 2595 2594 2596 spin_unlock_irq(&ctrl->lock); ··· 2675 2677 /*FALLTHRU*/ 2676 2678 case NVME_SC_ABORT_REQ: 2677 2679 ++ctrl->event_limit; 2678 - queue_work(nvme_wq, &ctrl->async_event_work); 2680 + if (ctrl->state == NVME_CTRL_LIVE) 2681 + queue_work(nvme_wq, &ctrl->async_event_work); 2679 2682 break; 2680 2683 default: 2681 2684 break; ··· 2691 2692 nvme_queue_scan(ctrl); 2692 2693 break; 2693 2694 case NVME_AER_NOTICE_FW_ACT_STARTING: 2694 - schedule_work(&ctrl->fw_act_work); 2695 + queue_work(nvme_wq, &ctrl->fw_act_work); 2695 2696 break; 2696 2697 default: 2697 2698 dev_warn(ctrl->device, "async event result %08x\n", result);

+9 -9

drivers/nvme/host/fabrics.c

··· 565 565 opts->queue_size = NVMF_DEF_QUEUE_SIZE; 566 566 opts->nr_io_queues = num_online_cpus(); 567 567 opts->reconnect_delay = NVMF_DEF_RECONNECT_DELAY; 568 + opts->kato = NVME_DEFAULT_KATO; 568 569 569 570 options = o = kstrdup(buf, GFP_KERNEL); 570 571 if (!options) ··· 656 655 goto out; 657 656 } 658 657 659 - if (opts->discovery_nqn) { 660 - pr_err("Discovery controllers cannot accept keep_alive_tmo != 0\n"); 661 - ret = -EINVAL; 662 - goto out; 663 - } 664 - 665 658 if (token < 0) { 666 659 pr_err("Invalid keep_alive_tmo %d\n", token); 667 660 ret = -EINVAL; 668 661 goto out; 669 - } else if (token == 0) { 662 + } else if (token == 0 && !opts->discovery_nqn) { 670 663 /* Allowed for debug */ 671 664 pr_warn("keep_alive_tmo 0 won't execute keep alives!!!\n"); 672 665 } 673 666 opts->kato = token; 667 + 668 + if (opts->discovery_nqn && opts->kato) { 669 + pr_err("Discovery controllers cannot accept KATO != 0\n"); 670 + ret = -EINVAL; 671 + goto out; 672 + } 673 + 674 674 break; 675 675 case NVMF_OPT_CTRL_LOSS_TMO: 676 676 if (match_int(args, &token)) { ··· 764 762 uuid_copy(&opts->host->id, &hostid); 765 763 766 764 out: 767 - if (!opts->discovery_nqn && !opts->kato) 768 - opts->kato = NVME_DEFAULT_KATO; 769 765 kfree(options); 770 766 return ret; 771 767 }

+11 -10

drivers/nvme/host/fc.c

··· 1376 1376 if (atomic_read(&op->state) == FCPOP_STATE_ABORTED) 1377 1377 status = cpu_to_le16((NVME_SC_ABORT_REQ | NVME_SC_DNR) << 1); 1378 1378 else if (freq->status) 1379 - status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); 1379 + status = cpu_to_le16(NVME_SC_INTERNAL << 1); 1380 1380 1381 1381 /* 1382 1382 * For the linux implementation, if we have an unsuccesful ··· 1404 1404 */ 1405 1405 if (freq->transferred_length != 1406 1406 be32_to_cpu(op->cmd_iu.data_len)) { 1407 - status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); 1407 + status = cpu_to_le16(NVME_SC_INTERNAL << 1); 1408 1408 goto done; 1409 1409 } 1410 1410 result.u64 = 0; ··· 1421 1421 freq->transferred_length || 1422 1422 op->rsp_iu.status_code || 1423 1423 sqe->common.command_id != cqe->command_id)) { 1424 - status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); 1424 + status = cpu_to_le16(NVME_SC_INTERNAL << 1); 1425 1425 goto done; 1426 1426 } 1427 1427 result = cqe->result; ··· 1429 1429 break; 1430 1430 1431 1431 default: 1432 - status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); 1432 + status = cpu_to_le16(NVME_SC_INTERNAL << 1); 1433 1433 goto done; 1434 1434 } 1435 1435 ··· 1989 1989 * as well as those by FC-NVME spec. 1990 1990 */ 1991 1991 WARN_ON_ONCE(sqe->common.metadata); 1992 - WARN_ON_ONCE(sqe->common.dptr.prp1); 1993 - WARN_ON_ONCE(sqe->common.dptr.prp2); 1994 1992 sqe->common.flags |= NVME_CMD_SGL_METABUF; 1995 1993 1996 1994 /* 1997 - * format SQE DPTR field per FC-NVME rules 1998 - * type=data block descr; subtype=offset; 1999 - * offset is currently 0. 1995 + * format SQE DPTR field per FC-NVME rules: 1996 + * type=0x5 Transport SGL Data Block Descriptor 1997 + * subtype=0xA Transport-specific value 1998 + * address=0 1999 + * length=length of the data series 2000 2000 */ 2001 - sqe->rw.dptr.sgl.type = NVME_SGL_FMT_OFFSET; 2001 + sqe->rw.dptr.sgl.type = (NVME_TRANSPORT_SGL_DATA_DESC << 4) | 2002 + NVME_SGL_FMT_TRANSPORT_A; 2002 2003 sqe->rw.dptr.sgl.length = cpu_to_le32(data_len); 2003 2004 sqe->rw.dptr.sgl.addr = 0; 2004 2005

+20 -14

drivers/nvme/host/pci.c

··· 24 24 #include <linux/mm.h> 25 25 #include <linux/module.h> 26 26 #include <linux/mutex.h> 27 + #include <linux/once.h> 27 28 #include <linux/pci.h> 28 29 #include <linux/poison.h> 29 30 #include <linux/t10-pi.h> ··· 541 540 } 542 541 #endif 543 542 543 + static void nvme_print_sgl(struct scatterlist *sgl, int nents) 544 + { 545 + int i; 546 + struct scatterlist *sg; 547 + 548 + for_each_sg(sgl, sg, nents, i) { 549 + dma_addr_t phys = sg_phys(sg); 550 + pr_warn("sg[%d] phys_addr:%pad offset:%d length:%d " 551 + "dma_address:%pad dma_length:%d\n", 552 + i, &phys, sg->offset, sg->length, &sg_dma_address(sg), 553 + sg_dma_len(sg)); 554 + } 555 + } 556 + 544 557 static blk_status_t nvme_setup_prps(struct nvme_dev *dev, struct request *req) 545 558 { 546 559 struct nvme_iod *iod = blk_mq_rq_to_pdu(req); ··· 637 622 return BLK_STS_OK; 638 623 639 624 bad_sgl: 640 - if (WARN_ONCE(1, "Invalid SGL for payload:%d nents:%d\n", 641 - blk_rq_payload_bytes(req), iod->nents)) { 642 - for_each_sg(iod->sg, sg, iod->nents, i) { 643 - dma_addr_t phys = sg_phys(sg); 644 - pr_warn("sg[%d] phys_addr:%pad offset:%d length:%d " 645 - "dma_address:%pad dma_length:%d\n", i, &phys, 646 - sg->offset, sg->length, 647 - &sg_dma_address(sg), 648 - sg_dma_len(sg)); 649 - } 650 - } 625 + WARN(DO_ONCE(nvme_print_sgl, iod->sg, iod->nents), 626 + "Invalid SGL for payload:%d nents:%d\n", 627 + blk_rq_payload_bytes(req), iod->nents); 651 628 return BLK_STS_IOERR; 652 - 653 629 } 654 630 655 631 static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, ··· 1319 1313 if (result < 0) 1320 1314 goto release_cq; 1321 1315 1316 + nvme_init_queue(nvmeq, qid); 1322 1317 result = queue_request_irq(nvmeq); 1323 1318 if (result < 0) 1324 1319 goto release_sq; 1325 1320 1326 - nvme_init_queue(nvmeq, qid); 1327 1321 return result; 1328 1322 1329 1323 release_sq: ··· 1470 1464 return result; 1471 1465 1472 1466 nvmeq->cq_vector = 0; 1467 + nvme_init_queue(nvmeq, 0); 1473 1468 result = queue_request_irq(nvmeq); 1474 1469 if (result) { 1475 1470 nvmeq->cq_vector = -1; ··· 2163 2156 if (result) 2164 2157 goto out; 2165 2158 2166 - nvme_init_queue(dev->queues[0], 0); 2167 2159 result = nvme_alloc_admin_tags(dev); 2168 2160 if (result) 2169 2161 goto out;

+7 -2

drivers/nvme/host/rdma.c

··· 942 942 } 943 943 944 944 changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); 945 - WARN_ON_ONCE(!changed); 945 + if (!changed) { 946 + /* state change failure is ok if we're in DELETING state */ 947 + WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING); 948 + return; 949 + } 950 + 946 951 ctrl->ctrl.nr_reconnects = 0; 947 952 948 953 nvme_start_ctrl(&ctrl->ctrl); ··· 967 962 struct nvme_rdma_ctrl *ctrl = container_of(work, 968 963 struct nvme_rdma_ctrl, err_work); 969 964 970 - nvme_stop_ctrl(&ctrl->ctrl); 965 + nvme_stop_keep_alive(&ctrl->ctrl); 971 966 972 967 if (ctrl->ctrl.queue_count > 1) { 973 968 nvme_stop_queues(&ctrl->ctrl);

+5 -4

drivers/nvme/target/core.c

··· 390 390 if (status) 391 391 nvmet_set_status(req, status); 392 392 393 - /* XXX: need to fill in something useful for sq_head */ 394 - req->rsp->sq_head = 0; 395 - if (likely(req->sq)) /* may happen during early failure */ 396 - req->rsp->sq_id = cpu_to_le16(req->sq->qid); 393 + if (req->sq->size) 394 + req->sq->sqhd = (req->sq->sqhd + 1) % req->sq->size; 395 + req->rsp->sq_head = cpu_to_le16(req->sq->sqhd); 396 + req->rsp->sq_id = cpu_to_le16(req->sq->qid); 397 397 req->rsp->command_id = req->cmd->common.command_id; 398 398 399 399 if (req->ns) ··· 420 420 void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq, 421 421 u16 qid, u16 size) 422 422 { 423 + sq->sqhd = 0; 423 424 sq->qid = qid; 424 425 sq->size = size; 425 426

+7 -2

drivers/nvme/target/fabrics-cmd.c

··· 109 109 pr_warn("queue already connected!\n"); 110 110 return NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR; 111 111 } 112 + if (!sqsize) { 113 + pr_warn("queue size zero!\n"); 114 + return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; 115 + } 112 116 113 - nvmet_cq_setup(ctrl, req->cq, qid, sqsize); 114 - nvmet_sq_setup(ctrl, req->sq, qid, sqsize); 117 + /* note: convert queue size from 0's-based value to 1's-based value */ 118 + nvmet_cq_setup(ctrl, req->cq, qid, sqsize + 1); 119 + nvmet_sq_setup(ctrl, req->sq, qid, sqsize + 1); 115 120 return 0; 116 121 } 117 122

+14 -10

drivers/nvme/target/fc.c

··· 148 148 u32 a_id; 149 149 struct nvmet_fc_tgtport *tgtport; 150 150 struct list_head a_list; 151 - struct nvmet_fc_tgt_queue *queues[NVMET_NR_QUEUES]; 151 + struct nvmet_fc_tgt_queue *queues[NVMET_NR_QUEUES + 1]; 152 152 struct kref ref; 153 153 }; 154 154 ··· 608 608 unsigned long flags; 609 609 int ret; 610 610 611 - if (qid >= NVMET_NR_QUEUES) 611 + if (qid > NVMET_NR_QUEUES) 612 612 return NULL; 613 613 614 614 queue = kzalloc((sizeof(*queue) + ··· 783 783 u16 qid = nvmet_fc_getqueueid(connection_id); 784 784 unsigned long flags; 785 785 786 + if (qid > NVMET_NR_QUEUES) 787 + return NULL; 788 + 786 789 spin_lock_irqsave(&tgtport->lock, flags); 787 790 list_for_each_entry(assoc, &tgtport->assoc_list, a_list) { 788 791 if (association_id == assoc->association_id) { ··· 891 888 int i; 892 889 893 890 spin_lock_irqsave(&tgtport->lock, flags); 894 - for (i = NVMET_NR_QUEUES - 1; i >= 0; i--) { 891 + for (i = NVMET_NR_QUEUES; i >= 0; i--) { 895 892 queue = assoc->queues[i]; 896 893 if (queue) { 897 894 if (!nvmet_fc_tgt_q_get(queue)) ··· 1913 1910 spin_lock_irqsave(&fod->flock, flags); 1914 1911 fod->writedataactive = false; 1915 1912 spin_unlock_irqrestore(&fod->flock, flags); 1916 - nvmet_req_complete(&fod->req, 1917 - NVME_SC_FC_TRANSPORT_ERROR); 1913 + nvmet_req_complete(&fod->req, NVME_SC_INTERNAL); 1918 1914 } else /* NVMET_FCOP_READDATA or NVMET_FCOP_READDATA_RSP */ { 1919 1915 fcpreq->fcp_error = ret; 1920 1916 fcpreq->transferred_length = 0; ··· 1931 1929 /* if in the middle of an io and we need to tear down */ 1932 1930 if (abort) { 1933 1931 if (fcpreq->op == NVMET_FCOP_WRITEDATA) { 1934 - nvmet_req_complete(&fod->req, 1935 - NVME_SC_FC_TRANSPORT_ERROR); 1932 + nvmet_req_complete(&fod->req, NVME_SC_INTERNAL); 1936 1933 return true; 1937 1934 } 1938 1935 ··· 1969 1968 fod->abort = true; 1970 1969 spin_unlock(&fod->flock); 1971 1970 1972 - nvmet_req_complete(&fod->req, 1973 - NVME_SC_FC_TRANSPORT_ERROR); 1971 + nvmet_req_complete(&fod->req, NVME_SC_INTERNAL); 1974 1972 return; 1975 1973 } 1976 1974 ··· 2533 2533 { 2534 2534 struct nvmet_fc_tgtport *tgtport = port->priv; 2535 2535 unsigned long flags; 2536 + bool matched = false; 2536 2537 2537 2538 spin_lock_irqsave(&nvmet_fc_tgtlock, flags); 2538 2539 if (tgtport->port == port) { 2539 - nvmet_fc_tgtport_put(tgtport); 2540 + matched = true; 2540 2541 tgtport->port = NULL; 2541 2542 } 2542 2543 spin_unlock_irqrestore(&nvmet_fc_tgtlock, flags); 2544 + 2545 + if (matched) 2546 + nvmet_fc_tgtport_put(tgtport); 2543 2547 } 2544 2548 2545 2549 static struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops = {

+39 -65

drivers/nvme/target/fcloop.c

··· 224 224 struct fcloop_lport *lport; 225 225 struct list_head nport_list; 226 226 struct kref ref; 227 - struct completion rport_unreg_done; 228 - struct completion tport_unreg_done; 229 227 u64 node_name; 230 228 u64 port_name; 231 229 u32 port_role; ··· 574 576 tfcp_req->aborted = true; 575 577 spin_unlock(&tfcp_req->reqlock); 576 578 577 - tfcp_req->status = NVME_SC_FC_TRANSPORT_ABORTED; 579 + tfcp_req->status = NVME_SC_INTERNAL; 578 580 579 581 /* 580 582 * nothing more to do. If io wasn't active, the transport should ··· 629 631 } 630 632 631 633 static void 634 + fcloop_nport_free(struct kref *ref) 635 + { 636 + struct fcloop_nport *nport = 637 + container_of(ref, struct fcloop_nport, ref); 638 + unsigned long flags; 639 + 640 + spin_lock_irqsave(&fcloop_lock, flags); 641 + list_del(&nport->nport_list); 642 + spin_unlock_irqrestore(&fcloop_lock, flags); 643 + 644 + kfree(nport); 645 + } 646 + 647 + static void 648 + fcloop_nport_put(struct fcloop_nport *nport) 649 + { 650 + kref_put(&nport->ref, fcloop_nport_free); 651 + } 652 + 653 + static int 654 + fcloop_nport_get(struct fcloop_nport *nport) 655 + { 656 + return kref_get_unless_zero(&nport->ref); 657 + } 658 + 659 + static void 632 660 fcloop_localport_delete(struct nvme_fc_local_port *localport) 633 661 { 634 662 struct fcloop_lport *lport = localport->private; ··· 668 644 { 669 645 struct fcloop_rport *rport = remoteport->private; 670 646 671 - /* release any threads waiting for the unreg to complete */ 672 - complete(&rport->nport->rport_unreg_done); 647 + fcloop_nport_put(rport->nport); 673 648 } 674 649 675 650 static void ··· 676 653 { 677 654 struct fcloop_tport *tport = targetport->private; 678 655 679 - /* release any threads waiting for the unreg to complete */ 680 - complete(&tport->nport->tport_unreg_done); 656 + fcloop_nport_put(tport->nport); 681 657 } 682 658 683 659 #define FCLOOP_HW_QUEUES 4 ··· 744 722 goto out_free_opts; 745 723 } 746 724 725 + memset(&pinfo, 0, sizeof(pinfo)); 747 726 pinfo.node_name = opts->wwnn; 748 727 pinfo.port_name = opts->wwpn; 749 728 pinfo.port_role = opts->roles; ··· 825 802 ret = __wait_localport_unreg(lport); 826 803 827 804 return ret ? ret : count; 828 - } 829 - 830 - static void 831 - fcloop_nport_free(struct kref *ref) 832 - { 833 - struct fcloop_nport *nport = 834 - container_of(ref, struct fcloop_nport, ref); 835 - unsigned long flags; 836 - 837 - spin_lock_irqsave(&fcloop_lock, flags); 838 - list_del(&nport->nport_list); 839 - spin_unlock_irqrestore(&fcloop_lock, flags); 840 - 841 - kfree(nport); 842 - } 843 - 844 - static void 845 - fcloop_nport_put(struct fcloop_nport *nport) 846 - { 847 - kref_put(&nport->ref, fcloop_nport_free); 848 - } 849 - 850 - static int 851 - fcloop_nport_get(struct fcloop_nport *nport) 852 - { 853 - return kref_get_unless_zero(&nport->ref); 854 805 } 855 806 856 807 static struct fcloop_nport * ··· 935 938 if (!nport) 936 939 return -EIO; 937 940 941 + memset(&pinfo, 0, sizeof(pinfo)); 938 942 pinfo.node_name = nport->node_name; 939 943 pinfo.port_name = nport->port_name; 940 944 pinfo.port_role = nport->port_role; ··· 977 979 } 978 980 979 981 static int 980 - __wait_remoteport_unreg(struct fcloop_nport *nport, struct fcloop_rport *rport) 982 + __remoteport_unreg(struct fcloop_nport *nport, struct fcloop_rport *rport) 981 983 { 982 - int ret; 983 - 984 984 if (!rport) 985 985 return -EALREADY; 986 986 987 - init_completion(&nport->rport_unreg_done); 988 - 989 - ret = nvme_fc_unregister_remoteport(rport->remoteport); 990 - if (ret) 991 - return ret; 992 - 993 - wait_for_completion(&nport->rport_unreg_done); 994 - 995 - fcloop_nport_put(nport); 996 - 997 - return ret; 987 + return nvme_fc_unregister_remoteport(rport->remoteport); 998 988 } 999 989 1000 990 static ssize_t ··· 1015 1029 if (!nport) 1016 1030 return -ENOENT; 1017 1031 1018 - ret = __wait_remoteport_unreg(nport, rport); 1032 + ret = __remoteport_unreg(nport, rport); 1019 1033 1020 1034 return ret ? ret : count; 1021 1035 } ··· 1072 1086 } 1073 1087 1074 1088 static int 1075 - __wait_targetport_unreg(struct fcloop_nport *nport, struct fcloop_tport *tport) 1089 + __targetport_unreg(struct fcloop_nport *nport, struct fcloop_tport *tport) 1076 1090 { 1077 - int ret; 1078 - 1079 1091 if (!tport) 1080 1092 return -EALREADY; 1081 1093 1082 - init_completion(&nport->tport_unreg_done); 1083 - 1084 - ret = nvmet_fc_unregister_targetport(tport->targetport); 1085 - if (ret) 1086 - return ret; 1087 - 1088 - wait_for_completion(&nport->tport_unreg_done); 1089 - 1090 - fcloop_nport_put(nport); 1091 - 1092 - return ret; 1094 + return nvmet_fc_unregister_targetport(tport->targetport); 1093 1095 } 1094 1096 1095 1097 static ssize_t ··· 1110 1136 if (!nport) 1111 1137 return -ENOENT; 1112 1138 1113 - ret = __wait_targetport_unreg(nport, tport); 1139 + ret = __targetport_unreg(nport, tport); 1114 1140 1115 1141 return ret ? ret : count; 1116 1142 } ··· 1197 1223 1198 1224 spin_unlock_irqrestore(&fcloop_lock, flags); 1199 1225 1200 - ret = __wait_targetport_unreg(nport, tport); 1226 + ret = __targetport_unreg(nport, tport); 1201 1227 if (ret) 1202 1228 pr_warn("%s: Failed deleting target port\n", __func__); 1203 1229 1204 - ret = __wait_remoteport_unreg(nport, rport); 1230 + ret = __remoteport_unreg(nport, rport); 1205 1231 if (ret) 1206 1232 pr_warn("%s: Failed deleting remote port\n", __func__); 1207 1233

+1

drivers/nvme/target/nvmet.h

··· 74 74 struct percpu_ref ref; 75 75 u16 qid; 76 76 u16 size; 77 + u16 sqhd; 77 78 struct completion free_done; 78 79 struct completion confirm_done; 79 80 };

+9 -2

drivers/pci/pci-sysfs.c

··· 686 686 const char *buf, size_t count) 687 687 { 688 688 struct pci_dev *pdev = to_pci_dev(dev); 689 - char *driver_override, *old = pdev->driver_override, *cp; 689 + char *driver_override, *old, *cp; 690 690 691 691 /* We need to keep extra room for a newline */ 692 692 if (count >= (PAGE_SIZE - 1)) ··· 700 700 if (cp) 701 701 *cp = '\0'; 702 702 703 + device_lock(dev); 704 + old = pdev->driver_override; 703 705 if (strlen(driver_override)) { 704 706 pdev->driver_override = driver_override; 705 707 } else { 706 708 kfree(driver_override); 707 709 pdev->driver_override = NULL; 708 710 } 711 + device_unlock(dev); 709 712 710 713 kfree(old); 711 714 ··· 719 716 struct device_attribute *attr, char *buf) 720 717 { 721 718 struct pci_dev *pdev = to_pci_dev(dev); 719 + ssize_t len; 722 720 723 - return snprintf(buf, PAGE_SIZE, "%s\n", pdev->driver_override); 721 + device_lock(dev); 722 + len = snprintf(buf, PAGE_SIZE, "%s\n", pdev->driver_override); 723 + device_unlock(dev); 724 + return len; 724 725 } 725 726 static DEVICE_ATTR_RW(driver_override); 726 727

+6 -4

drivers/platform/x86/fujitsu-laptop.c

··· 254 254 { 255 255 struct acpi_device *device = bl_get_data(b); 256 256 257 - if (b->props.power == FB_BLANK_POWERDOWN) 258 - call_fext_func(fext, FUNC_BACKLIGHT, 0x1, 0x4, 0x3); 259 - else 260 - call_fext_func(fext, FUNC_BACKLIGHT, 0x1, 0x4, 0x0); 257 + if (fext) { 258 + if (b->props.power == FB_BLANK_POWERDOWN) 259 + call_fext_func(fext, FUNC_BACKLIGHT, 0x1, 0x4, 0x3); 260 + else 261 + call_fext_func(fext, FUNC_BACKLIGHT, 0x1, 0x4, 0x0); 262 + } 261 263 262 264 return set_lcd_level(device, b->props.brightness); 263 265 }

+6 -6

drivers/scsi/aacraid/aachba.c

··· 699 699 int status; 700 700 701 701 dresp = (struct aac_mount *) fib_data(fibptr); 702 - if (!(fibptr->dev->supplement_adapter_info.supported_options2 & 703 - AAC_OPTION_VARIABLE_BLOCK_SIZE)) 702 + if (!aac_supports_2T(fibptr->dev)) { 704 703 dresp->mnt[0].capacityhigh = 0; 705 - if ((le32_to_cpu(dresp->status) != ST_OK) || 706 - (le32_to_cpu(dresp->mnt[0].vol) != CT_NONE)) { 707 - _aac_probe_container2(context, fibptr); 708 - return; 704 + if ((le32_to_cpu(dresp->status) == ST_OK) && 705 + (le32_to_cpu(dresp->mnt[0].vol) != CT_NONE)) { 706 + _aac_probe_container2(context, fibptr); 707 + return; 708 + } 709 709 } 710 710 scsicmd = (struct scsi_cmnd *) context; 711 711

+5

drivers/scsi/aacraid/aacraid.h

··· 2701 2701 return 0; 2702 2702 } 2703 2703 2704 + static inline int aac_supports_2T(struct aac_dev *dev) 2705 + { 2706 + return (dev->adapter_info.options & AAC_OPT_NEW_COMM_64); 2707 + } 2708 + 2704 2709 char * get_container_type(unsigned type); 2705 2710 extern int numacb; 2706 2711 extern char aac_driver_version[];

+12 -8

drivers/scsi/aacraid/linit.c

··· 906 906 907 907 bus = aac_logical_to_phys(scmd_channel(cmd)); 908 908 cid = scmd_id(cmd); 909 - info = &aac->hba_map[bus][cid]; 910 - if (bus >= AAC_MAX_BUSES || cid >= AAC_MAX_TARGETS || 911 - info->devtype != AAC_DEVTYPE_NATIVE_RAW) 909 + 910 + if (bus >= AAC_MAX_BUSES || cid >= AAC_MAX_TARGETS) 912 911 return FAILED; 913 912 914 - if (info->reset_state > 0) 913 + info = &aac->hba_map[bus][cid]; 914 + 915 + if (info->devtype != AAC_DEVTYPE_NATIVE_RAW && 916 + info->reset_state > 0) 915 917 return FAILED; 916 918 917 919 pr_err("%s: Host adapter reset request. SCSI hang ?\n", ··· 964 962 965 963 bus = aac_logical_to_phys(scmd_channel(cmd)); 966 964 cid = scmd_id(cmd); 967 - info = &aac->hba_map[bus][cid]; 968 - if (bus >= AAC_MAX_BUSES || cid >= AAC_MAX_TARGETS || 969 - info->devtype != AAC_DEVTYPE_NATIVE_RAW) 965 + 966 + if (bus >= AAC_MAX_BUSES || cid >= AAC_MAX_TARGETS) 970 967 return FAILED; 971 968 972 - if (info->reset_state > 0) 969 + info = &aac->hba_map[bus][cid]; 970 + 971 + if (info->devtype != AAC_DEVTYPE_NATIVE_RAW && 972 + info->reset_state > 0) 973 973 return FAILED; 974 974 975 975 pr_err("%s: Host adapter reset request. SCSI hang ?\n",

+2

drivers/scsi/aacraid/src.c

··· 740 740 aac_set_intx_mode(dev); 741 741 742 742 src_writel(dev, MUnit.IDR, IOP_SRC_RESET_MASK); 743 + 744 + msleep(5000); 743 745 } 744 746 745 747 static void aac_send_hardware_soft_reset(struct aac_dev *dev)

+1

drivers/scsi/lpfc/lpfc_init.c

··· 6131 6131 "Extents and RPI headers enabled.\n"); 6132 6132 } 6133 6133 mempool_free(mboxq, phba->mbox_mem_pool); 6134 + rc = -EIO; 6134 6135 goto out_free_bsmbx; 6135 6136 } 6136 6137

+1 -1

drivers/scsi/lpfc/lpfc_nvme.c

··· 884 884 wcqe->total_data_placed); 885 885 nCmd->transferred_length = 0; 886 886 nCmd->rcv_rsplen = 0; 887 - nCmd->status = NVME_SC_FC_TRANSPORT_ERROR; 887 + nCmd->status = NVME_SC_INTERNAL; 888 888 } 889 889 } 890 890

+1 -1

drivers/scsi/qla2xxx/qla_nvme.c

··· 180 180 goto rel; 181 181 182 182 if (unlikely(res == QLA_FUNCTION_FAILED)) 183 - fd->status = NVME_SC_FC_TRANSPORT_ERROR; 183 + fd->status = NVME_SC_INTERNAL; 184 184 else 185 185 fd->status = 0; 186 186

+2 -1

drivers/scsi/scsi_error.c

··· 580 580 if (sshdr.asc == 0x20 || /* Invalid command operation code */ 581 581 sshdr.asc == 0x21 || /* Logical block address out of range */ 582 582 sshdr.asc == 0x24 || /* Invalid field in cdb */ 583 - sshdr.asc == 0x26) { /* Parameter value invalid */ 583 + sshdr.asc == 0x26 || /* Parameter value invalid */ 584 + sshdr.asc == 0x27) { /* Write protected */ 584 585 set_host_byte(scmd, DID_TARGET_FAILURE); 585 586 } 586 587 return SUCCESS;

+3 -11

drivers/scsi/scsi_transport_fc.c

··· 2739 2739 2740 2740 list_for_each_entry(rport, &fc_host->rports, peers) { 2741 2741 2742 - if ((rport->port_state == FC_PORTSTATE_BLOCKED) && 2742 + if ((rport->port_state == FC_PORTSTATE_BLOCKED || 2743 + rport->port_state == FC_PORTSTATE_NOTPRESENT) && 2743 2744 (rport->channel == channel)) { 2744 2745 2745 2746 switch (fc_host->tgtid_bind_type) { ··· 2877 2876 memcpy(&rport->port_name, &ids->port_name, 2878 2877 sizeof(rport->port_name)); 2879 2878 rport->port_id = ids->port_id; 2880 - rport->roles = ids->roles; 2881 2879 rport->port_state = FC_PORTSTATE_ONLINE; 2882 2880 rport->flags &= ~FC_RPORT_FAST_FAIL_TIMEDOUT; 2883 2881 ··· 2885 2885 fci->f->dd_fcrport_size); 2886 2886 spin_unlock_irqrestore(shost->host_lock, flags); 2887 2887 2888 - if (ids->roles & FC_PORT_ROLE_FCP_TARGET) { 2889 - scsi_target_unblock(&rport->dev, SDEV_RUNNING); 2890 - 2891 - /* initiate a scan of the target */ 2892 - spin_lock_irqsave(shost->host_lock, flags); 2893 - rport->flags |= FC_RPORT_SCAN_PENDING; 2894 - scsi_queue_work(shost, &rport->scan_work); 2895 - spin_unlock_irqrestore(shost->host_lock, flags); 2896 - } 2888 + fc_remote_port_rolechg(rport, ids->roles); 2897 2889 return rport; 2898 2890 } 2899 2891 }

+1 -1

drivers/scsi/scsi_transport_iscsi.c

··· 3689 3689 uint32_t group; 3690 3690 3691 3691 nlh = nlmsg_hdr(skb); 3692 - if (nlh->nlmsg_len < sizeof(*nlh) || 3692 + if (nlh->nlmsg_len < sizeof(*nlh) + sizeof(*ev) || 3693 3693 skb->len < nlh->nlmsg_len) { 3694 3694 break; 3695 3695 }

+10 -1

drivers/xen/xen-pciback/conf_space_header.c

··· 169 169 static int bar_write(struct pci_dev *dev, int offset, u32 value, void *data) 170 170 { 171 171 struct pci_bar_info *bar = data; 172 + unsigned int pos = (offset - PCI_BASE_ADDRESS_0) / 4; 173 + const struct resource *res = dev->resource; 174 + u32 mask; 172 175 173 176 if (unlikely(!bar)) { 174 177 pr_warn(DRV_NAME ": driver data not found for %s\n", ··· 182 179 /* A write to obtain the length must happen as a 32-bit write. 183 180 * This does not (yet) support writing individual bytes 184 181 */ 185 - if (value == ~0) 182 + if (res[pos].flags & IORESOURCE_IO) 183 + mask = ~PCI_BASE_ADDRESS_IO_MASK; 184 + else if (pos && (res[pos - 1].flags & IORESOURCE_MEM_64)) 185 + mask = 0; 186 + else 187 + mask = ~PCI_BASE_ADDRESS_MEM_MASK; 188 + if ((value | mask) == ~0U) 186 189 bar->which = 1; 187 190 else { 188 191 u32 tmpval;

+17 -1

fs/btrfs/compression.c

··· 107 107 struct inode *inode; 108 108 struct page *page; 109 109 unsigned long index; 110 - int ret; 110 + unsigned int mirror = btrfs_io_bio(bio)->mirror_num; 111 + int ret = 0; 111 112 112 113 if (bio->bi_status) 113 114 cb->errors = 1; ··· 118 117 */ 119 118 if (!refcount_dec_and_test(&cb->pending_bios)) 120 119 goto out; 120 + 121 + /* 122 + * Record the correct mirror_num in cb->orig_bio so that 123 + * read-repair can work properly. 124 + */ 125 + ASSERT(btrfs_io_bio(cb->orig_bio)); 126 + btrfs_io_bio(cb->orig_bio)->mirror_num = mirror; 127 + cb->mirror_num = mirror; 128 + 129 + /* 130 + * Some IO in this cb have failed, just skip checksum as there 131 + * is no way it could be correct. 132 + */ 133 + if (cb->errors == 1) 134 + goto csum_failed; 121 135 122 136 inode = cb->inode; 123 137 ret = check_compressed_csum(BTRFS_I(inode), cb,

-1

fs/btrfs/ctree.h

··· 709 709 #define BTRFS_FS_OPEN 5 710 710 #define BTRFS_FS_QUOTA_ENABLED 6 711 711 #define BTRFS_FS_QUOTA_ENABLING 7 712 - #define BTRFS_FS_QUOTA_DISABLING 8 713 712 #define BTRFS_FS_UPDATE_UUID_TREE_GEN 9 714 713 #define BTRFS_FS_CREATING_FREE_SPACE_TREE 10 715 714 #define BTRFS_FS_BTREE_ERR 11

+8 -1

fs/btrfs/disk-io.c

··· 3643 3643 u64 flags; 3644 3644 3645 3645 do_barriers = !btrfs_test_opt(fs_info, NOBARRIER); 3646 - backup_super_roots(fs_info); 3646 + 3647 + /* 3648 + * max_mirrors == 0 indicates we're from commit_transaction, 3649 + * not from fsync where the tree roots in fs_info have not 3650 + * been consistent on disk. 3651 + */ 3652 + if (max_mirrors == 0) 3653 + backup_super_roots(fs_info); 3647 3654 3648 3655 sb = fs_info->super_for_commit; 3649 3656 dev_item = &sb->dev_item;

+2 -6

fs/btrfs/extent_io.c

··· 3471 3471 unsigned int write_flags = 0; 3472 3472 unsigned long nr_written = 0; 3473 3473 3474 - if (wbc->sync_mode == WB_SYNC_ALL) 3475 - write_flags = REQ_SYNC; 3474 + write_flags = wbc_to_write_flags(wbc); 3476 3475 3477 3476 trace___extent_writepage(page, inode, wbc); 3478 3477 ··· 3717 3718 unsigned long i, num_pages; 3718 3719 unsigned long bio_flags = 0; 3719 3720 unsigned long start, end; 3720 - unsigned int write_flags = (epd->sync_io ? REQ_SYNC : 0) | REQ_META; 3721 + unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META; 3721 3722 int ret = 0; 3722 3723 3723 3724 clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags); ··· 4061 4062 { 4062 4063 if (epd->bio) { 4063 4064 int ret; 4064 - 4065 - bio_set_op_attrs(epd->bio, REQ_OP_WRITE, 4066 - epd->sync_io ? REQ_SYNC : 0); 4067 4065 4068 4066 ret = submit_one_bio(epd->bio, 0, epd->bio_flags); 4069 4067 BUG_ON(ret < 0); /* -ENOMEM */

+22 -5

fs/btrfs/inode.c

··· 135 135 const u64 offset, 136 136 const u64 bytes) 137 137 { 138 + unsigned long index = offset >> PAGE_SHIFT; 139 + unsigned long end_index = (offset + bytes - 1) >> PAGE_SHIFT; 140 + struct page *page; 141 + 142 + while (index <= end_index) { 143 + page = find_get_page(inode->i_mapping, index); 144 + index++; 145 + if (!page) 146 + continue; 147 + ClearPagePrivate2(page); 148 + put_page(page); 149 + } 138 150 return __endio_write_update_ordered(inode, offset + PAGE_SIZE, 139 151 bytes - PAGE_SIZE, false); 140 152 } ··· 8369 8357 struct btrfs_io_bio *io_bio = btrfs_io_bio(bio); 8370 8358 blk_status_t err = bio->bi_status; 8371 8359 8372 - if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED) { 8360 + if (dip->flags & BTRFS_DIO_ORIG_BIO_SUBMITTED) 8373 8361 err = btrfs_subio_endio_read(inode, io_bio, err); 8374 - if (!err) 8375 - bio->bi_status = 0; 8376 - } 8377 8362 8378 8363 unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, 8379 8364 dip->logical_offset + dip->bytes - 1); ··· 8378 8369 8379 8370 kfree(dip); 8380 8371 8381 - dio_bio->bi_status = bio->bi_status; 8372 + dio_bio->bi_status = err; 8382 8373 dio_end_io(dio_bio); 8383 8374 8384 8375 if (io_bio->end_io) ··· 8396 8387 btrfs_work_func_t func; 8397 8388 u64 ordered_offset = offset; 8398 8389 u64 ordered_bytes = bytes; 8390 + u64 last_offset; 8399 8391 int ret; 8400 8392 8401 8393 if (btrfs_is_free_space_inode(BTRFS_I(inode))) { ··· 8408 8398 } 8409 8399 8410 8400 again: 8401 + last_offset = ordered_offset; 8411 8402 ret = btrfs_dec_test_first_ordered_pending(inode, &ordered, 8412 8403 &ordered_offset, 8413 8404 ordered_bytes, ··· 8419 8408 btrfs_init_work(&ordered->work, func, finish_ordered_fn, NULL, NULL); 8420 8409 btrfs_queue_work(wq, &ordered->work); 8421 8410 out_test: 8411 + /* 8412 + * If btrfs_dec_test_ordered_pending does not find any ordered extent 8413 + * in the range, we can exit. 8414 + */ 8415 + if (ordered_offset == last_offset) 8416 + return; 8422 8417 /* 8423 8418 * our bio might span multiple ordered extents. If we haven't 8424 8419 * completed the accounting for the whole dio, go back and try again

+8 -4

fs/btrfs/ioctl.c

··· 2773 2773 } 2774 2774 mutex_unlock(&fs_devices->device_list_mutex); 2775 2775 2776 - fi_args->nodesize = fs_info->super_copy->nodesize; 2777 - fi_args->sectorsize = fs_info->super_copy->sectorsize; 2778 - fi_args->clone_alignment = fs_info->super_copy->sectorsize; 2776 + fi_args->nodesize = fs_info->nodesize; 2777 + fi_args->sectorsize = fs_info->sectorsize; 2778 + fi_args->clone_alignment = fs_info->sectorsize; 2779 2779 2780 2780 if (copy_to_user(arg, fi_args, sizeof(*fi_args))) 2781 2781 ret = -EFAULT; ··· 3032 3032 out: 3033 3033 if (ret) 3034 3034 btrfs_cmp_data_free(cmp); 3035 - return 0; 3035 + return ret; 3036 3036 } 3037 3037 3038 3038 static int btrfs_cmp_data(u64 len, struct cmp_pages *cmp) ··· 4059 4059 new_root = btrfs_read_fs_root_no_name(fs_info, &location); 4060 4060 if (IS_ERR(new_root)) { 4061 4061 ret = PTR_ERR(new_root); 4062 + goto out; 4063 + } 4064 + if (!is_fstree(new_root->objectid)) { 4065 + ret = -ENOENT; 4062 4066 goto out; 4063 4067 } 4064 4068

+2 -4

fs/btrfs/qgroup.c

··· 807 807 } 808 808 ret = 0; 809 809 out: 810 - set_bit(BTRFS_FS_QUOTA_DISABLING, &root->fs_info->flags); 811 810 btrfs_free_path(path); 812 811 return ret; 813 812 } ··· 952 953 if (!fs_info->quota_root) 953 954 goto out; 954 955 clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 955 - set_bit(BTRFS_FS_QUOTA_DISABLING, &fs_info->flags); 956 956 btrfs_qgroup_wait_for_completion(fs_info, false); 957 957 spin_lock(&fs_info->qgroup_lock); 958 958 quota_root = fs_info->quota_root; ··· 1305 1307 } 1306 1308 } 1307 1309 ret = del_qgroup_item(trans, quota_root, qgroupid); 1310 + if (ret && ret != -ENOENT) 1311 + goto out; 1308 1312 1309 1313 while (!list_empty(&qgroup->groups)) { 1310 1314 list = list_first_entry(&qgroup->groups, ··· 2086 2086 2087 2087 if (test_and_clear_bit(BTRFS_FS_QUOTA_ENABLING, &fs_info->flags)) 2088 2088 set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 2089 - if (test_and_clear_bit(BTRFS_FS_QUOTA_DISABLING, &fs_info->flags)) 2090 - clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); 2091 2089 2092 2090 spin_lock(&fs_info->qgroup_lock); 2093 2091 while (!list_empty(&fs_info->dirty_qgroups)) {

+1 -1

fs/btrfs/relocation.c

··· 2400 2400 while (!list_empty(list)) { 2401 2401 reloc_root = list_entry(list->next, struct btrfs_root, 2402 2402 root_list); 2403 + __del_reloc_root(reloc_root); 2403 2404 free_extent_buffer(reloc_root->node); 2404 2405 free_extent_buffer(reloc_root->commit_root); 2405 2406 reloc_root->node = NULL; 2406 2407 reloc_root->commit_root = NULL; 2407 - __del_reloc_root(reloc_root); 2408 2408 } 2409 2409 } 2410 2410

+1 -1

fs/btrfs/send.c

··· 2630 2630 } else { 2631 2631 btrfs_warn(sctx->send_root->fs_info, "unexpected inode type %o", 2632 2632 (int)(mode & S_IFMT)); 2633 - ret = -ENOTSUPP; 2633 + ret = -EOPNOTSUPP; 2634 2634 goto out; 2635 2635 } 2636 2636

+10 -2

fs/btrfs/tree-log.c

··· 4181 4181 struct extent_map *em, *n; 4182 4182 struct list_head extents; 4183 4183 struct extent_map_tree *tree = &inode->extent_tree; 4184 + u64 logged_start, logged_end; 4184 4185 u64 test_gen; 4185 4186 int ret = 0; 4186 4187 int num = 0; ··· 4191 4190 down_write(&inode->dio_sem); 4192 4191 write_lock(&tree->lock); 4193 4192 test_gen = root->fs_info->last_trans_committed; 4193 + logged_start = start; 4194 + logged_end = end; 4194 4195 4195 4196 list_for_each_entry_safe(em, n, &tree->modified_extents, list) { 4196 4197 list_del_init(&em->list); 4197 - 4198 4198 /* 4199 4199 * Just an arbitrary number, this can be really CPU intensive 4200 4200 * once we start getting a lot of extents, and really once we ··· 4210 4208 4211 4209 if (em->generation <= test_gen) 4212 4210 continue; 4211 + 4212 + if (em->start < logged_start) 4213 + logged_start = em->start; 4214 + if ((em->start + em->len - 1) > logged_end) 4215 + logged_end = em->start + em->len - 1; 4216 + 4213 4217 /* Need a ref to keep it from getting evicted from cache */ 4214 4218 refcount_inc(&em->refs); 4215 4219 set_bit(EXTENT_FLAG_LOGGING, &em->flags); ··· 4224 4216 } 4225 4217 4226 4218 list_sort(NULL, &extents, extent_cmp); 4227 - btrfs_get_logged_extents(inode, logged_list, start, end); 4219 + btrfs_get_logged_extents(inode, logged_list, logged_start, logged_end); 4228 4220 /* 4229 4221 * Some ordered extents started by fsync might have completed 4230 4222 * before we could collect them into the list logged_list, which

+1 -1

fs/btrfs/volumes.c

··· 6166 6166 map_length = length; 6167 6167 6168 6168 btrfs_bio_counter_inc_blocked(fs_info); 6169 - ret = __btrfs_map_block(fs_info, bio_op(bio), logical, 6169 + ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical, 6170 6170 &map_length, &bbio, mirror_num, 1); 6171 6171 if (ret) { 6172 6172 btrfs_bio_counter_dec(fs_info);

+43 -6

fs/direct-io.c

··· 229 229 { 230 230 loff_t offset = dio->iocb->ki_pos; 231 231 ssize_t transferred = 0; 232 + int err; 232 233 233 234 /* 234 235 * AIO submission can race with bio completion to get here while ··· 259 258 if (ret == 0) 260 259 ret = transferred; 261 260 261 + /* 262 + * Try again to invalidate clean pages which might have been cached by 263 + * non-direct readahead, or faulted in by get_user_pages() if the source 264 + * of the write was an mmap'ed region of the file we're writing. Either 265 + * one is a pretty crazy thing to do, so we don't support it 100%. If 266 + * this invalidation fails, tough, the write still worked... 267 + */ 268 + if (ret > 0 && dio->op == REQ_OP_WRITE && 269 + dio->inode->i_mapping->nrpages) { 270 + err = invalidate_inode_pages2_range(dio->inode->i_mapping, 271 + offset >> PAGE_SHIFT, 272 + (offset + ret - 1) >> PAGE_SHIFT); 273 + WARN_ON_ONCE(err); 274 + } 275 + 262 276 if (dio->end_io) { 263 - int err; 264 277 265 278 // XXX: ki_pos?? 266 279 err = dio->end_io(dio->iocb, offset, ret, dio->private); ··· 319 304 struct dio *dio = bio->bi_private; 320 305 unsigned long remaining; 321 306 unsigned long flags; 307 + bool defer_completion = false; 322 308 323 309 /* cleanup the bio */ 324 310 dio_bio_complete(dio, bio); ··· 331 315 spin_unlock_irqrestore(&dio->bio_lock, flags); 332 316 333 317 if (remaining == 0) { 334 - if (dio->result && dio->defer_completion) { 318 + /* 319 + * Defer completion when defer_completion is set or 320 + * when the inode has pages mapped and this is AIO write. 321 + * We need to invalidate those pages because there is a 322 + * chance they contain stale data in the case buffered IO 323 + * went in between AIO submission and completion into the 324 + * same region. 325 + */ 326 + if (dio->result) 327 + defer_completion = dio->defer_completion || 328 + (dio->op == REQ_OP_WRITE && 329 + dio->inode->i_mapping->nrpages); 330 + if (defer_completion) { 335 331 INIT_WORK(&dio->complete_work, dio_aio_complete_work); 336 332 queue_work(dio->inode->i_sb->s_dio_done_wq, 337 333 &dio->complete_work); ··· 1238 1210 * For AIO O_(D)SYNC writes we need to defer completions to a workqueue 1239 1211 * so that we can call ->fsync. 1240 1212 */ 1241 - if (dio->is_async && iov_iter_rw(iter) == WRITE && 1242 - ((iocb->ki_filp->f_flags & O_DSYNC) || 1243 - IS_SYNC(iocb->ki_filp->f_mapping->host))) { 1244 - retval = dio_set_defer_completion(dio); 1213 + if (dio->is_async && iov_iter_rw(iter) == WRITE) { 1214 + retval = 0; 1215 + if ((iocb->ki_filp->f_flags & O_DSYNC) || 1216 + IS_SYNC(iocb->ki_filp->f_mapping->host)) 1217 + retval = dio_set_defer_completion(dio); 1218 + else if (!dio->inode->i_sb->s_dio_done_wq) { 1219 + /* 1220 + * In case of AIO write racing with buffered read we 1221 + * need to defer completion. We can't decide this now, 1222 + * however the workqueue needs to be initialized here. 1223 + */ 1224 + retval = sb_init_dio_done_wq(dio->inode->i_sb); 1225 + } 1245 1226 if (retval) { 1246 1227 /* 1247 1228 * We grab i_mutex only for reads so we don't have

+5 -9

fs/gfs2/glock.c

··· 1945 1945 { 1946 1946 struct gfs2_glock_iter *gi = seq->private; 1947 1947 loff_t n = *pos; 1948 - int ret; 1949 1948 1950 - if (gi->last_pos <= *pos) 1951 - n = (*pos - gi->last_pos); 1952 - 1953 - ret = rhashtable_walk_start(&gi->hti); 1954 - if (ret) 1949 + rhashtable_walk_enter(&gl_hash_table, &gi->hti); 1950 + if (rhashtable_walk_start(&gi->hti) != 0) 1955 1951 return NULL; 1956 1952 1957 1953 do { ··· 1955 1959 } while (gi->gl && n--); 1956 1960 1957 1961 gi->last_pos = *pos; 1962 + 1958 1963 return gi->gl; 1959 1964 } 1960 1965 ··· 1967 1970 (*pos)++; 1968 1971 gi->last_pos = *pos; 1969 1972 gfs2_glock_iter_next(gi); 1973 + 1970 1974 return gi->gl; 1971 1975 } 1972 1976 ··· 1978 1980 1979 1981 gi->gl = NULL; 1980 1982 rhashtable_walk_stop(&gi->hti); 1983 + rhashtable_walk_exit(&gi->hti); 1981 1984 } 1982 1985 1983 1986 static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) ··· 2041 2042 struct gfs2_glock_iter *gi = seq->private; 2042 2043 2043 2044 gi->sdp = inode->i_private; 2044 - gi->last_pos = 0; 2045 2045 seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN); 2046 2046 if (seq->buf) 2047 2047 seq->size = GFS2_SEQ_GOODSIZE; 2048 2048 gi->gl = NULL; 2049 - rhashtable_walk_enter(&gl_hash_table, &gi->hti); 2050 2049 } 2051 2050 return ret; 2052 2051 } ··· 2060 2063 struct gfs2_glock_iter *gi = seq->private; 2061 2064 2062 2065 gi->gl = NULL; 2063 - rhashtable_walk_exit(&gi->hti); 2064 2066 return seq_release_private(inode, file); 2065 2067 } 2066 2068

+23 -20

fs/iomap.c

··· 713 713 static ssize_t iomap_dio_complete(struct iomap_dio *dio) 714 714 { 715 715 struct kiocb *iocb = dio->iocb; 716 + struct inode *inode = file_inode(iocb->ki_filp); 716 717 ssize_t ret; 718 + 719 + /* 720 + * Try again to invalidate clean pages which might have been cached by 721 + * non-direct readahead, or faulted in by get_user_pages() if the source 722 + * of the write was an mmap'ed region of the file we're writing. Either 723 + * one is a pretty crazy thing to do, so we don't support it 100%. If 724 + * this invalidation fails, tough, the write still worked... 725 + */ 726 + if (!dio->error && 727 + (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) { 728 + ret = invalidate_inode_pages2_range(inode->i_mapping, 729 + iocb->ki_pos >> PAGE_SHIFT, 730 + (iocb->ki_pos + dio->size - 1) >> PAGE_SHIFT); 731 + WARN_ON_ONCE(ret); 732 + } 717 733 718 734 if (dio->end_io) { 719 735 ret = dio->end_io(iocb, ··· 1009 993 WARN_ON_ONCE(ret); 1010 994 ret = 0; 1011 995 996 + if (iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) && 997 + !inode->i_sb->s_dio_done_wq) { 998 + ret = sb_init_dio_done_wq(inode->i_sb); 999 + if (ret < 0) 1000 + goto out_free_dio; 1001 + } 1002 + 1012 1003 inode_dio_begin(inode); 1013 1004 1014 1005 blk_start_plug(&plug); ··· 1038 1015 if (ret < 0) 1039 1016 iomap_dio_set_error(dio, ret); 1040 1017 1041 - if (ret >= 0 && iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) && 1042 - !inode->i_sb->s_dio_done_wq) { 1043 - ret = sb_init_dio_done_wq(inode->i_sb); 1044 - if (ret < 0) 1045 - iomap_dio_set_error(dio, ret); 1046 - } 1047 - 1048 1018 if (!atomic_dec_and_test(&dio->ref)) { 1049 1019 if (!is_sync_kiocb(iocb)) 1050 1020 return -EIOCBQUEUED; ··· 1057 1041 } 1058 1042 1059 1043 ret = iomap_dio_complete(dio); 1060 - 1061 - /* 1062 - * Try again to invalidate clean pages which might have been cached by 1063 - * non-direct readahead, or faulted in by get_user_pages() if the source 1064 - * of the write was an mmap'ed region of the file we're writing. Either 1065 - * one is a pretty crazy thing to do, so we don't support it 100%. If 1066 - * this invalidation fails, tough, the write still worked... 1067 - */ 1068 - if (iov_iter_rw(iter) == WRITE) { 1069 - int err = invalidate_inode_pages2_range(mapping, 1070 - start >> PAGE_SHIFT, end >> PAGE_SHIFT); 1071 - WARN_ON_ONCE(err); 1072 - } 1073 1044 1074 1045 return ret; 1075 1046

+2

fs/isofs/inode.c

··· 514 514 if (sbi->s_fmode != ISOFS_INVALID_MODE) 515 515 seq_printf(m, ",fmode=%o", sbi->s_fmode); 516 516 517 + #ifdef CONFIG_JOLIET 517 518 if (sbi->s_nls_iocharset && 518 519 strcmp(sbi->s_nls_iocharset->charset, CONFIG_NLS_DEFAULT) != 0) 519 520 seq_printf(m, ",iocharset=%s", sbi->s_nls_iocharset->charset); 521 + #endif 520 522 return 0; 521 523 } 522 524

+15 -20

fs/proc/array.c

··· 119 119 * simple bit tests. 120 120 */ 121 121 static const char * const task_state_array[] = { 122 - "R (running)", /* 0 */ 123 - "S (sleeping)", /* 1 */ 124 - "D (disk sleep)", /* 2 */ 125 - "T (stopped)", /* 4 */ 126 - "t (tracing stop)", /* 8 */ 127 - "X (dead)", /* 16 */ 128 - "Z (zombie)", /* 32 */ 122 + 123 + /* states in TASK_REPORT: */ 124 + "R (running)", /* 0x00 */ 125 + "S (sleeping)", /* 0x01 */ 126 + "D (disk sleep)", /* 0x02 */ 127 + "T (stopped)", /* 0x04 */ 128 + "t (tracing stop)", /* 0x08 */ 129 + "X (dead)", /* 0x10 */ 130 + "Z (zombie)", /* 0x20 */ 131 + "P (parked)", /* 0x40 */ 132 + 133 + /* states beyond TASK_REPORT: */ 134 + "I (idle)", /* 0x80 */ 129 135 }; 130 136 131 137 static inline const char *get_task_state(struct task_struct *tsk) 132 138 { 133 - unsigned int state = (tsk->state | tsk->exit_state) & TASK_REPORT; 134 - 135 - /* 136 - * Parked tasks do not run; they sit in __kthread_parkme(). 137 - * Without this check, we would report them as running, which is 138 - * clearly wrong, so we report them as sleeping instead. 139 - */ 140 - if (tsk->state == TASK_PARKED) 141 - state = TASK_INTERRUPTIBLE; 142 - 143 - BUILD_BUG_ON(1 + ilog2(TASK_REPORT) != ARRAY_SIZE(task_state_array)-1); 144 - 145 - return task_state_array[fls(state)]; 139 + BUILD_BUG_ON(1 + ilog2(TASK_REPORT_MAX) != ARRAY_SIZE(task_state_array)); 140 + return task_state_array[__get_task_state(tsk)]; 146 141 } 147 142 148 143 static inline int get_task_umask(struct task_struct *tsk)

+2

fs/quota/dquot.c

··· 1980 1980 ret = dquot_add_space(transfer_to[cnt], cur_space, rsv_space, 0, 1981 1981 &warn_to[cnt]); 1982 1982 if (ret) { 1983 + spin_lock(&transfer_to[cnt]->dq_dqb_lock); 1983 1984 dquot_decr_inodes(transfer_to[cnt], inode_usage); 1985 + spin_unlock(&transfer_to[cnt]->dq_dqb_lock); 1984 1986 goto over_quota; 1985 1987 } 1986 1988 }

+4

fs/quota/quota_v2.c

··· 328 328 if (!dquot->dq_off) { 329 329 alloc = true; 330 330 down_write(&dqopt->dqio_sem); 331 + } else { 332 + down_read(&dqopt->dqio_sem); 331 333 } 332 334 ret = qtree_write_dquot( 333 335 sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv, 334 336 dquot); 335 337 if (alloc) 336 338 up_write(&dqopt->dqio_sem); 339 + else 340 + up_read(&dqopt->dqio_sem); 337 341 return ret; 338 342 } 339 343

+2 -2

fs/read_write.c

··· 112 112 * In the generic case the entire file is data, so as long as 113 113 * offset isn't at the end of the file then the offset is data. 114 114 */ 115 - if (offset >= eof) 115 + if ((unsigned long long)offset >= eof) 116 116 return -ENXIO; 117 117 break; 118 118 case SEEK_HOLE: ··· 120 120 * There is a virtual hole at the end of the file, so as long as 121 121 * offset isn't i_size or larger, return i_size. 122 122 */ 123 - if (offset >= eof) 123 + if ((unsigned long long)offset >= eof) 124 124 return -ENXIO; 125 125 offset = eof; 126 126 break;

+10 -2

fs/xfs/libxfs/xfs_ag_resv.c

··· 156 156 trace_xfs_ag_resv_free(pag, type, 0); 157 157 158 158 resv = xfs_perag_resv(pag, type); 159 - pag->pag_mount->m_ag_max_usable += resv->ar_asked; 159 + if (pag->pag_agno == 0) 160 + pag->pag_mount->m_ag_max_usable += resv->ar_asked; 160 161 /* 161 162 * AGFL blocks are always considered "free", so whatever 162 163 * was reserved at mount time must be given back at umount. ··· 217 216 return error; 218 217 } 219 218 220 - mp->m_ag_max_usable -= ask; 219 + /* 220 + * Reduce the maximum per-AG allocation length by however much we're 221 + * trying to reserve for an AG. Since this is a filesystem-wide 222 + * counter, we only make the adjustment for AG 0. This assumes that 223 + * there aren't any AGs hungrier for per-AG reservation than AG 0. 224 + */ 225 + if (pag->pag_agno == 0) 226 + mp->m_ag_max_usable -= ask; 221 227 222 228 resv = xfs_perag_resv(pag, type); 223 229 resv->ar_asked = ask;

+2 -15

fs/xfs/libxfs/xfs_bmap.c

··· 49 49 #include "xfs_rmap.h" 50 50 #include "xfs_ag_resv.h" 51 51 #include "xfs_refcount.h" 52 - #include "xfs_rmap_btree.h" 53 52 #include "xfs_icache.h" 54 53 55 54 ··· 191 192 int maxrecs; /* maximum record count at this level */ 192 193 xfs_mount_t *mp; /* mount structure */ 193 194 xfs_filblks_t rval; /* return value */ 194 - xfs_filblks_t orig_len; 195 195 196 196 mp = ip->i_mount; 197 - 198 - /* Calculate the worst-case size of the bmbt. */ 199 - orig_len = len; 200 197 maxrecs = mp->m_bmap_dmxr[0]; 201 198 for (level = 0, rval = 0; 202 199 level < XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK); ··· 200 205 len += maxrecs - 1; 201 206 do_div(len, maxrecs); 202 207 rval += len; 203 - if (len == 1) { 204 - rval += XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 208 + if (len == 1) 209 + return rval + XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) - 205 210 level - 1; 206 - break; 207 - } 208 211 if (level == 0) 209 212 maxrecs = mp->m_bmap_dmxr[1]; 210 213 } 211 - 212 - /* Calculate the worst-case size of the rmapbt. */ 213 - if (xfs_sb_version_hasrmapbt(&mp->m_sb)) 214 - rval += 1 + xfs_rmapbt_calc_size(mp, orig_len) + 215 - mp->m_rmap_maxlevels; 216 - 217 214 return rval; 218 215 } 219 216

+2 -1

fs/xfs/xfs_aops.c

··· 343 343 error = xfs_reflink_end_cow(ip, offset, size); 344 344 break; 345 345 case XFS_IO_UNWRITTEN: 346 - error = xfs_iomap_write_unwritten(ip, offset, size); 346 + /* writeback should never update isize */ 347 + error = xfs_iomap_write_unwritten(ip, offset, size, false); 347 348 break; 348 349 default: 349 350 ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans);

+13 -1

fs/xfs/xfs_bmap_util.c

··· 1459 1459 return error; 1460 1460 1461 1461 /* 1462 - * The extent shiting code works on extent granularity. So, if 1462 + * Clean out anything hanging around in the cow fork now that 1463 + * we've flushed all the dirty data out to disk to avoid having 1464 + * CoW extents at the wrong offsets. 1465 + */ 1466 + if (xfs_is_reflink_inode(ip)) { 1467 + error = xfs_reflink_cancel_cow_range(ip, offset, NULLFILEOFF, 1468 + true); 1469 + if (error) 1470 + return error; 1471 + } 1472 + 1473 + /* 1474 + * The extent shifting code works on extent granularity. So, if 1463 1475 * stop_fsb is not the starting block of extent, we need to split 1464 1476 * the extent at stop_fsb. 1465 1477 */

-2

fs/xfs/xfs_buf.c

··· 1258 1258 int size; 1259 1259 int offset; 1260 1260 1261 - total_nr_pages = bp->b_page_count; 1262 - 1263 1261 /* skip the pages in the buffer before the start offset */ 1264 1262 page_index = 0; 1265 1263 offset = *buf_offset;

+1 -1

fs/xfs/xfs_error.c

··· 347 347 { 348 348 struct xfs_mount *mp = bp->b_target->bt_mount; 349 349 350 - xfs_alert(mp, "Metadata %s detected at %pF, %s block 0x%llx", 350 + xfs_alert(mp, "Metadata %s detected at %pS, %s block 0x%llx", 351 351 bp->b_error == -EFSBADCRC ? "CRC error" : "corruption", 352 352 __return_address, bp->b_ops->name, bp->b_bn); 353 353

+21 -18

fs/xfs/xfs_file.c

··· 58 58 xfs_off_t count, 59 59 bool *did_zero) 60 60 { 61 - return iomap_zero_range(VFS_I(ip), pos, count, NULL, &xfs_iomap_ops); 61 + return iomap_zero_range(VFS_I(ip), pos, count, did_zero, &xfs_iomap_ops); 62 62 } 63 63 64 64 int ··· 377 377 */ 378 378 spin_lock(&ip->i_flags_lock); 379 379 if (iocb->ki_pos > i_size_read(inode)) { 380 - bool zero = false; 381 - 382 380 spin_unlock(&ip->i_flags_lock); 383 381 if (!drained_dio) { 384 382 if (*iolock == XFS_IOLOCK_SHARED) { ··· 397 399 drained_dio = true; 398 400 goto restart; 399 401 } 400 - error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero); 402 + error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), NULL); 401 403 if (error) 402 404 return error; 403 405 } else ··· 434 436 struct inode *inode = file_inode(iocb->ki_filp); 435 437 struct xfs_inode *ip = XFS_I(inode); 436 438 loff_t offset = iocb->ki_pos; 437 - bool update_size = false; 438 439 int error = 0; 439 440 440 441 trace_xfs_end_io_direct_write(ip, offset, size); ··· 443 446 444 447 if (size <= 0) 445 448 return size; 449 + 450 + if (flags & IOMAP_DIO_COW) { 451 + error = xfs_reflink_end_cow(ip, offset, size); 452 + if (error) 453 + return error; 454 + } 455 + 456 + /* 457 + * Unwritten conversion updates the in-core isize after extent 458 + * conversion but before updating the on-disk size. Updating isize any 459 + * earlier allows a racing dio read to find unwritten extents before 460 + * they are converted. 461 + */ 462 + if (flags & IOMAP_DIO_UNWRITTEN) 463 + return xfs_iomap_write_unwritten(ip, offset, size, true); 446 464 447 465 /* 448 466 * We need to update the in-core inode size here so that we don't end up ··· 473 461 spin_lock(&ip->i_flags_lock); 474 462 if (offset + size > i_size_read(inode)) { 475 463 i_size_write(inode, offset + size); 476 - update_size = true; 477 - } 478 - spin_unlock(&ip->i_flags_lock); 479 - 480 - if (flags & IOMAP_DIO_COW) { 481 - error = xfs_reflink_end_cow(ip, offset, size); 482 - if (error) 483 - return error; 484 - } 485 - 486 - if (flags & IOMAP_DIO_UNWRITTEN) 487 - error = xfs_iomap_write_unwritten(ip, offset, size); 488 - else if (update_size) 464 + spin_unlock(&ip->i_flags_lock); 489 465 error = xfs_setfilesize(ip, offset, size); 466 + } else { 467 + spin_unlock(&ip->i_flags_lock); 468 + } 490 469 491 470 return error; 492 471 }

+5 -3

fs/xfs/xfs_inode.c

··· 1624 1624 goto out; 1625 1625 1626 1626 /* 1627 - * Clear the reflink flag if we truncated everything. 1627 + * Clear the reflink flag if there are no data fork blocks and 1628 + * there are no extents staged in the cow fork. 1628 1629 */ 1629 - if (ip->i_d.di_nblocks == 0 && xfs_is_reflink_inode(ip)) { 1630 - ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; 1630 + if (xfs_is_reflink_inode(ip) && ip->i_cnextents == 0) { 1631 + if (ip->i_d.di_nblocks == 0) 1632 + ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK; 1631 1633 xfs_inode_clear_cowblocks_tag(ip); 1632 1634 } 1633 1635

+1 -1

fs/xfs/xfs_inode_item.c

··· 745 745 */ 746 746 iip = INODE_ITEM(blip); 747 747 if ((iip->ili_logged && blip->li_lsn == iip->ili_flush_lsn) || 748 - lip->li_flags & XFS_LI_FAILED) 748 + (blip->li_flags & XFS_LI_FAILED)) 749 749 need_ail++; 750 750 751 751 blip = next;

+2 -1

fs/xfs/xfs_ioctl.c

··· 1088 1088 int *join_flags) 1089 1089 { 1090 1090 struct inode *inode = VFS_I(ip); 1091 + struct super_block *sb = inode->i_sb; 1091 1092 int error; 1092 1093 1093 1094 *join_flags = 0; ··· 1101 1100 if (fa->fsx_xflags & FS_XFLAG_DAX) { 1102 1101 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) 1103 1102 return -EINVAL; 1104 - if (ip->i_mount->m_sb.sb_blocksize != PAGE_SIZE) 1103 + if (bdev_dax_supported(sb, sb->s_blocksize) < 0) 1105 1104 return -EINVAL; 1106 1105 } 1107 1106

+5 -2

fs/xfs/xfs_iomap.c

··· 829 829 xfs_iomap_write_unwritten( 830 830 xfs_inode_t *ip, 831 831 xfs_off_t offset, 832 - xfs_off_t count) 832 + xfs_off_t count, 833 + bool update_isize) 833 834 { 834 835 xfs_mount_t *mp = ip->i_mount; 835 836 xfs_fileoff_t offset_fsb; ··· 841 840 xfs_trans_t *tp; 842 841 xfs_bmbt_irec_t imap; 843 842 struct xfs_defer_ops dfops; 843 + struct inode *inode = VFS_I(ip); 844 844 xfs_fsize_t i_size; 845 845 uint resblks; 846 846 int error; ··· 901 899 i_size = XFS_FSB_TO_B(mp, offset_fsb + count_fsb); 902 900 if (i_size > offset + count) 903 901 i_size = offset + count; 904 - 902 + if (update_isize && i_size > i_size_read(inode)) 903 + i_size_write(inode, i_size); 905 904 i_size = xfs_new_eof(ip, i_size); 906 905 if (i_size) { 907 906 ip->i_d.di_size = i_size;

+1 -1

fs/xfs/xfs_iomap.h

··· 27 27 struct xfs_bmbt_irec *, int); 28 28 int xfs_iomap_write_allocate(struct xfs_inode *, int, xfs_off_t, 29 29 struct xfs_bmbt_irec *); 30 - int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t); 30 + int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool); 31 31 32 32 void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *, 33 33 struct xfs_bmbt_irec *);

+1 -1

fs/xfs/xfs_pnfs.c

··· 274 274 (end - 1) >> PAGE_SHIFT); 275 275 WARN_ON_ONCE(error); 276 276 277 - error = xfs_iomap_write_unwritten(ip, start, length); 277 + error = xfs_iomap_write_unwritten(ip, start, length, false); 278 278 if (error) 279 279 goto out_drop_iolock; 280 280 }

+10

fs/xfs/xfs_super.c

··· 1654 1654 "DAX and reflink have not been tested together!"); 1655 1655 } 1656 1656 1657 + if (mp->m_flags & XFS_MOUNT_DISCARD) { 1658 + struct request_queue *q = bdev_get_queue(sb->s_bdev); 1659 + 1660 + if (!blk_queue_discard(q)) { 1661 + xfs_warn(mp, "mounting with \"discard\" option, but " 1662 + "the device does not support discard"); 1663 + mp->m_flags &= ~XFS_MOUNT_DISCARD; 1664 + } 1665 + } 1666 + 1657 1667 if (xfs_sb_version_hasrmapbt(&mp->m_sb)) { 1658 1668 if (mp->m_sb.sb_rblocks) { 1659 1669 xfs_alert(mp,

+1

include/linux/blkdev.h

··· 551 551 int node; 552 552 #ifdef CONFIG_BLK_DEV_IO_TRACE 553 553 struct blk_trace *blk_trace; 554 + struct mutex blk_trace_mutex; 554 555 #endif 555 556 /* 556 557 * for flush operations

+20 -1

include/linux/cpuhotplug.h

··· 3 3 4 4 #include <linux/types.h> 5 5 6 + /* 7 + * CPU-up CPU-down 8 + * 9 + * BP AP BP AP 10 + * 11 + * OFFLINE OFFLINE 12 + * | ^ 13 + * v | 14 + * BRINGUP_CPU->AP_OFFLINE BRINGUP_CPU <- AP_IDLE_DEAD (idle thread/play_dead) 15 + * | AP_OFFLINE 16 + * v (IRQ-off) ,---------------^ 17 + * AP_ONLNE | (stop_machine) 18 + * | TEARDOWN_CPU <- AP_ONLINE_IDLE 19 + * | ^ 20 + * v | 21 + * AP_ACTIVE AP_ACTIVE 22 + */ 23 + 6 24 enum cpuhp_state { 7 - CPUHP_OFFLINE, 25 + CPUHP_INVALID = -1, 26 + CPUHP_OFFLINE = 0, 8 27 CPUHP_CREATE_THREADS, 9 28 CPUHP_PERF_PREPARE, 10 29 CPUHP_PERF_X86_PREPARE,

+1 -1

include/linux/iommu.h

··· 167 167 * @map: map a physically contiguous memory region to an iommu domain 168 168 * @unmap: unmap a physically contiguous memory region from an iommu domain 169 169 * @map_sg: map a scatter-gather list of physically contiguous memory chunks 170 + * to an iommu domain 170 171 * @flush_tlb_all: Synchronously flush all hardware TLBs for this domain 171 172 * @tlb_range_add: Add a given iova range to the flush queue for this domain 172 173 * @tlb_sync: Flush all queued ranges from the hardware TLBs and empty flush 173 174 * queue 174 - * to an iommu domain 175 175 * @iova_to_phys: translate iova to physical address 176 176 * @add_device: add device to iommu grouping 177 177 * @remove_device: remove device from iommu grouping

+2

include/linux/key.h

··· 187 187 #define KEY_FLAG_BUILTIN 8 /* set if key is built in to the kernel */ 188 188 #define KEY_FLAG_ROOT_CAN_INVAL 9 /* set if key can be invalidated by root without permission */ 189 189 #define KEY_FLAG_KEEP 10 /* set if key should not be removed */ 190 + #define KEY_FLAG_UID_KEYRING 11 /* set if key is a user or user session keyring */ 190 191 191 192 /* the key type and key description string 192 193 * - the desc is used to match a key against search criteria ··· 244 243 #define KEY_ALLOC_NOT_IN_QUOTA 0x0002 /* not in quota */ 245 244 #define KEY_ALLOC_BUILT_IN 0x0004 /* Key is built into kernel */ 246 245 #define KEY_ALLOC_BYPASS_RESTRICTION 0x0008 /* Override the check on restricted keyrings */ 246 + #define KEY_ALLOC_UID_KEYRING 0x0010 /* allocating a user or user session keyring */ 247 247 248 248 extern void key_revoke(struct key *key); 249 249 extern void key_invalidate(struct key *key);

+8 -5

include/linux/nvme-fc-driver.h

··· 346 346 * indicating an FC transport Aborted status. 347 347 * Entrypoint is Mandatory. 348 348 * 349 - * @defer_rcv: Called by the transport to signal the LLLD that it has 350 - * begun processing of a previously received NVME CMD IU. The LLDD 351 - * is now free to re-use the rcv buffer associated with the 352 - * nvmefc_tgt_fcp_req. 353 - * 354 349 * @max_hw_queues: indicates the maximum number of hw queues the LLDD 355 350 * supports for cpu affinitization. 356 351 * Value is Mandatory. Must be at least 1. ··· 801 806 * outstanding operation (if there was one) to complete, then will 802 807 * call the fcp_req_release() callback to return the command's 803 808 * exchange context back to the LLDD. 809 + * Entrypoint is Mandatory. 804 810 * 805 811 * @fcp_req_release: Called by the transport to return a nvmefc_tgt_fcp_req 806 812 * to the LLDD after all operations on the fcp operation are complete. 807 813 * This may be due to the command completing or upon completion of 808 814 * abort cleanup. 815 + * Entrypoint is Mandatory. 816 + * 817 + * @defer_rcv: Called by the transport to signal the LLLD that it has 818 + * begun processing of a previously received NVME CMD IU. The LLDD 819 + * is now free to re-use the rcv buffer associated with the 820 + * nvmefc_tgt_fcp_req. 821 + * Entrypoint is Optional. 809 822 * 810 823 * @max_hw_queues: indicates the maximum number of hw queues the LLDD 811 824 * supports for cpu affinitization.

+6 -13

include/linux/nvme.h

··· 471 471 * 472 472 * @NVME_SGL_FMT_ADDRESS: absolute address of the data block 473 473 * @NVME_SGL_FMT_OFFSET: relative offset of the in-capsule data block 474 + * @NVME_SGL_FMT_TRANSPORT_A: transport defined format, value 0xA 474 475 * @NVME_SGL_FMT_INVALIDATE: RDMA transport specific remote invalidation 475 476 * request subtype 476 477 */ 477 478 enum { 478 479 NVME_SGL_FMT_ADDRESS = 0x00, 479 480 NVME_SGL_FMT_OFFSET = 0x01, 481 + NVME_SGL_FMT_TRANSPORT_A = 0x0A, 480 482 NVME_SGL_FMT_INVALIDATE = 0x0f, 481 483 }; 482 484 ··· 492 490 * 493 491 * For struct nvme_keyed_sgl_desc: 494 492 * @NVME_KEY_SGL_FMT_DATA_DESC: keyed data block descriptor 493 + * 494 + * Transport-specific SGL types: 495 + * @NVME_TRANSPORT_SGL_DATA_DESC: Transport SGL data dlock descriptor 495 496 */ 496 497 enum { 497 498 NVME_SGL_FMT_DATA_DESC = 0x00, 498 499 NVME_SGL_FMT_SEG_DESC = 0x02, 499 500 NVME_SGL_FMT_LAST_SEG_DESC = 0x03, 500 501 NVME_KEY_SGL_FMT_DATA_DESC = 0x04, 502 + NVME_TRANSPORT_SGL_DATA_DESC = 0x05, 501 503 }; 502 504 503 505 struct nvme_sgl_desc { ··· 1133 1127 NVME_SC_UNWRITTEN_BLOCK = 0x287, 1134 1128 1135 1129 NVME_SC_DNR = 0x4000, 1136 - 1137 - 1138 - /* 1139 - * FC Transport-specific error status values for NVME commands 1140 - * 1141 - * Transport-specific status code values must be in the range 0xB0..0xBF 1142 - */ 1143 - 1144 - /* Generic FC failure - catchall */ 1145 - NVME_SC_FC_TRANSPORT_ERROR = 0x00B0, 1146 - 1147 - /* I/O failure due to FC ABTS'd */ 1148 - NVME_SC_FC_TRANSPORT_ABORTED = 0x00B1, 1149 1130 }; 1150 1131 1151 1132 struct nvme_completion {

+2

include/linux/pci.h

··· 1685 1685 1686 1686 #define dev_is_pci(d) (false) 1687 1687 #define dev_is_pf(d) (false) 1688 + static inline bool pci_acs_enabled(struct pci_dev *pdev, u16 acs_flags) 1689 + { return false; } 1688 1690 #endif /* CONFIG_PCI */ 1689 1691 1690 1692 /* Include architecture-dependent settings and functions */

+40 -24

include/linux/sched.h

··· 65 65 */ 66 66 67 67 /* Used in tsk->state: */ 68 - #define TASK_RUNNING 0 69 - #define TASK_INTERRUPTIBLE 1 70 - #define TASK_UNINTERRUPTIBLE 2 71 - #define __TASK_STOPPED 4 72 - #define __TASK_TRACED 8 68 + #define TASK_RUNNING 0x0000 69 + #define TASK_INTERRUPTIBLE 0x0001 70 + #define TASK_UNINTERRUPTIBLE 0x0002 71 + #define __TASK_STOPPED 0x0004 72 + #define __TASK_TRACED 0x0008 73 73 /* Used in tsk->exit_state: */ 74 - #define EXIT_DEAD 16 75 - #define EXIT_ZOMBIE 32 74 + #define EXIT_DEAD 0x0010 75 + #define EXIT_ZOMBIE 0x0020 76 76 #define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD) 77 77 /* Used in tsk->state again: */ 78 - #define TASK_DEAD 64 79 - #define TASK_WAKEKILL 128 80 - #define TASK_WAKING 256 81 - #define TASK_PARKED 512 82 - #define TASK_NOLOAD 1024 83 - #define TASK_NEW 2048 84 - #define TASK_STATE_MAX 4096 85 - 86 - #define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPNn" 78 + #define TASK_PARKED 0x0040 79 + #define TASK_DEAD 0x0080 80 + #define TASK_WAKEKILL 0x0100 81 + #define TASK_WAKING 0x0200 82 + #define TASK_NOLOAD 0x0400 83 + #define TASK_NEW 0x0800 84 + #define TASK_STATE_MAX 0x1000 87 85 88 86 /* Convenience macros for the sake of set_current_state: */ 89 87 #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) ··· 97 99 /* get_task_state(): */ 98 100 #define TASK_REPORT (TASK_RUNNING | TASK_INTERRUPTIBLE | \ 99 101 TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \ 100 - __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD) 102 + __TASK_TRACED | EXIT_DEAD | EXIT_ZOMBIE | \ 103 + TASK_PARKED) 101 104 102 105 #define task_is_traced(task) ((task->state & __TASK_TRACED) != 0) 103 106 ··· 1242 1243 return task_pgrp_nr_ns(tsk, &init_pid_ns); 1243 1244 } 1244 1245 1245 - static inline char task_state_to_char(struct task_struct *task) 1246 + #define TASK_REPORT_IDLE (TASK_REPORT + 1) 1247 + #define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1) 1248 + 1249 + static inline unsigned int __get_task_state(struct task_struct *tsk) 1246 1250 { 1247 - const char stat_nam[] = TASK_STATE_TO_CHAR_STR; 1248 - unsigned long state = task->state; 1251 + unsigned int tsk_state = READ_ONCE(tsk->state); 1252 + unsigned int state = (tsk_state | tsk->exit_state) & TASK_REPORT; 1249 1253 1250 - state = state ? __ffs(state) + 1 : 0; 1254 + BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX); 1251 1255 1252 - /* Make sure the string lines up properly with the number of task states: */ 1253 - BUILD_BUG_ON(sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1); 1256 + if (tsk_state == TASK_IDLE) 1257 + state = TASK_REPORT_IDLE; 1254 1258 1255 - return state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?'; 1259 + return fls(state); 1260 + } 1261 + 1262 + static inline char __task_state_to_char(unsigned int state) 1263 + { 1264 + static const char state_char[] = "RSDTtXZPI"; 1265 + 1266 + BUILD_BUG_ON(1 + ilog2(TASK_REPORT_MAX) != sizeof(state_char) - 1); 1267 + 1268 + return state_char[state]; 1269 + } 1270 + 1271 + static inline char task_state_to_char(struct task_struct *tsk) 1272 + { 1273 + return __task_state_to_char(__get_task_state(tsk)); 1256 1274 } 1257 1275 1258 1276 /**

+14

include/linux/timer.h

··· 168 168 #define setup_pinned_deferrable_timer_on_stack(timer, fn, data) \ 169 169 __setup_timer_on_stack((timer), (fn), (data), TIMER_DEFERRABLE | TIMER_PINNED) 170 170 171 + #define TIMER_DATA_TYPE unsigned long 172 + #define TIMER_FUNC_TYPE void (*)(TIMER_DATA_TYPE) 173 + 174 + static inline void timer_setup(struct timer_list *timer, 175 + void (*callback)(struct timer_list *), 176 + unsigned int flags) 177 + { 178 + __setup_timer(timer, (TIMER_FUNC_TYPE)callback, 179 + (TIMER_DATA_TYPE)timer, flags); 180 + } 181 + 182 + #define from_timer(var, callback_timer, timer_fieldname) \ 183 + container_of(callback_timer, typeof(*var), timer_fieldname) 184 + 171 185 /** 172 186 * timer_pending - is a timer pending? 173 187 * @timer: the timer in question

+3 -3

include/rdma/ib_verbs.h

··· 285 285 IB_TM_CAP_RC = 1 << 0, 286 286 }; 287 287 288 - struct ib_xrq_caps { 288 + struct ib_tm_caps { 289 289 /* Max size of RNDV header */ 290 290 u32 max_rndv_hdr_size; 291 291 /* Max number of entries in tag matching list */ ··· 358 358 struct ib_rss_caps rss_caps; 359 359 u32 max_wq_type_rq; 360 360 u32 raw_packet_caps; /* Use ib_raw_packet_caps enum */ 361 - struct ib_xrq_caps xrq_caps; 361 + struct ib_tm_caps tm_caps; 362 362 }; 363 363 364 364 enum ib_mtu { ··· 1739 1739 u32 lkey; 1740 1740 u32 rkey; 1741 1741 u64 iova; 1742 - u32 length; 1742 + u64 length; 1743 1743 unsigned int page_size; 1744 1744 bool need_inval; 1745 1745 union {

+12 -7

include/trace/events/sched.h

··· 114 114 * Preemption ignores task state, therefore preempted tasks are always 115 115 * RUNNING (we will not have dequeued if state != RUNNING). 116 116 */ 117 - return preempt ? TASK_RUNNING | TASK_STATE_MAX : p->state; 117 + if (preempt) 118 + return TASK_STATE_MAX; 119 + 120 + return __get_task_state(p); 118 121 } 119 122 #endif /* CREATE_TRACE_POINTS */ 120 123 ··· 155 152 156 153 TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d", 157 154 __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, 158 - __entry->prev_state & (TASK_STATE_MAX-1) ? 159 - __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|", 160 - { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" }, 161 - { 16, "Z" }, { 32, "X" }, { 64, "x" }, 162 - { 128, "K" }, { 256, "W" }, { 512, "P" }, 163 - { 1024, "N" }) : "R", 155 + 156 + (__entry->prev_state & (TASK_REPORT_MAX - 1)) ? 157 + __print_flags(__entry->prev_state & (TASK_REPORT_MAX - 1), "|", 158 + { 0x01, "S" }, { 0x02, "D" }, { 0x04, "T" }, 159 + { 0x08, "t" }, { 0x10, "X" }, { 0x20, "Z" }, 160 + { 0x40, "P" }, { 0x80, "I" }) : 161 + "R", 162 + 164 163 __entry->prev_state & TASK_STATE_MAX ? "+" : "", 165 164 __entry->next_comm, __entry->next_pid, __entry->next_prio) 166 165 );

+1 -1

include/uapi/rdma/ib_user_verbs.h

··· 261 261 struct ib_uverbs_rss_caps rss_caps; 262 262 __u32 max_wq_type_rq; 263 263 __u32 raw_packet_caps; 264 - struct ib_uverbs_tm_caps xrq_caps; 264 + struct ib_uverbs_tm_caps tm_caps; 265 265 }; 266 266 267 267 struct ib_uverbs_query_port {

+1 -1

ipc/shm.c

··· 1154 1154 info.shm_swp = ip->shm_swp; 1155 1155 info.swap_attempts = ip->swap_attempts; 1156 1156 info.swap_successes = ip->swap_successes; 1157 - return copy_to_user(up, &info, sizeof(info)); 1157 + return copy_to_user(uip, &info, sizeof(info)); 1158 1158 } 1159 1159 1160 1160 static int copy_compat_shmid_to_user(void __user *buf, struct shmid64_ds *in,

+367 -145

kernel/cpu.c

··· 46 46 * @bringup: Single callback bringup or teardown selector 47 47 * @cb_state: The state for a single callback (install/uninstall) 48 48 * @result: Result of the operation 49 - * @done: Signal completion to the issuer of the task 49 + * @done_up: Signal completion to the issuer of the task for cpu-up 50 + * @done_down: Signal completion to the issuer of the task for cpu-down 50 51 */ 51 52 struct cpuhp_cpu_state { 52 53 enum cpuhp_state state; 53 54 enum cpuhp_state target; 55 + enum cpuhp_state fail; 54 56 #ifdef CONFIG_SMP 55 57 struct task_struct *thread; 56 58 bool should_run; ··· 60 58 bool single; 61 59 bool bringup; 62 60 struct hlist_node *node; 61 + struct hlist_node *last; 63 62 enum cpuhp_state cb_state; 64 63 int result; 65 - struct completion done; 64 + struct completion done_up; 65 + struct completion done_down; 66 66 #endif 67 67 }; 68 68 69 - static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state); 69 + static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = { 70 + .fail = CPUHP_INVALID, 71 + }; 70 72 71 73 #if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP) 72 - static struct lock_class_key cpuhp_state_key; 73 - static struct lockdep_map cpuhp_state_lock_map = 74 - STATIC_LOCKDEP_MAP_INIT("cpuhp_state", &cpuhp_state_key); 74 + static struct lockdep_map cpuhp_state_up_map = 75 + STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_map); 76 + static struct lockdep_map cpuhp_state_down_map = 77 + STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map); 78 + 79 + 80 + static void inline cpuhp_lock_acquire(bool bringup) 81 + { 82 + lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map); 83 + } 84 + 85 + static void inline cpuhp_lock_release(bool bringup) 86 + { 87 + lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map); 88 + } 89 + #else 90 + 91 + static void inline cpuhp_lock_acquire(bool bringup) { } 92 + static void inline cpuhp_lock_release(bool bringup) { } 93 + 75 94 #endif 76 95 77 96 /** ··· 146 123 /** 147 124 * cpuhp_invoke_callback _ Invoke the callbacks for a given state 148 125 * @cpu: The cpu for which the callback should be invoked 149 - * @step: The step in the state machine 126 + * @state: The state to do callbacks for 150 127 * @bringup: True if the bringup callback should be invoked 128 + * @node: For multi-instance, do a single entry callback for install/remove 129 + * @lastp: For multi-instance rollback, remember how far we got 151 130 * 152 131 * Called from cpu hotplug and from the state register machinery. 153 132 */ 154 133 static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state, 155 - bool bringup, struct hlist_node *node) 134 + bool bringup, struct hlist_node *node, 135 + struct hlist_node **lastp) 156 136 { 157 137 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); 158 138 struct cpuhp_step *step = cpuhp_get_step(state); ··· 163 137 int (*cb)(unsigned int cpu); 164 138 int ret, cnt; 165 139 140 + if (st->fail == state) { 141 + st->fail = CPUHP_INVALID; 142 + 143 + if (!(bringup ? step->startup.single : step->teardown.single)) 144 + return 0; 145 + 146 + return -EAGAIN; 147 + } 148 + 166 149 if (!step->multi_instance) { 150 + WARN_ON_ONCE(lastp && *lastp); 167 151 cb = bringup ? step->startup.single : step->teardown.single; 168 152 if (!cb) 169 153 return 0; ··· 188 152 189 153 /* Single invocation for instance add/remove */ 190 154 if (node) { 155 + WARN_ON_ONCE(lastp && *lastp); 191 156 trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node); 192 157 ret = cbm(cpu, node); 193 158 trace_cpuhp_exit(cpu, st->state, state, ret); ··· 198 161 /* State transition. Invoke on all instances */ 199 162 cnt = 0; 200 163 hlist_for_each(node, &step->list) { 164 + if (lastp && node == *lastp) 165 + break; 166 + 201 167 trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node); 202 168 ret = cbm(cpu, node); 203 169 trace_cpuhp_exit(cpu, st->state, state, ret); 204 - if (ret) 205 - goto err; 170 + if (ret) { 171 + if (!lastp) 172 + goto err; 173 + 174 + *lastp = node; 175 + return ret; 176 + } 206 177 cnt++; 207 178 } 179 + if (lastp) 180 + *lastp = NULL; 208 181 return 0; 209 182 err: 210 183 /* Rollback the instances if one failed */ ··· 225 178 hlist_for_each(node, &step->list) { 226 179 if (!cnt--) 227 180 break; 228 - cbm(cpu, node); 181 + 182 + trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node); 183 + ret = cbm(cpu, node); 184 + trace_cpuhp_exit(cpu, st->state, state, ret); 185 + /* 186 + * Rollback must not fail, 187 + */ 188 + WARN_ON_ONCE(ret); 229 189 } 230 190 return ret; 231 191 } 232 192 233 193 #ifdef CONFIG_SMP 194 + static inline void wait_for_ap_thread(struct cpuhp_cpu_state *st, bool bringup) 195 + { 196 + struct completion *done = bringup ? &st->done_up : &st->done_down; 197 + wait_for_completion(done); 198 + } 199 + 200 + static inline void complete_ap_thread(struct cpuhp_cpu_state *st, bool bringup) 201 + { 202 + struct completion *done = bringup ? &st->done_up : &st->done_down; 203 + complete(done); 204 + } 205 + 206 + /* 207 + * The former STARTING/DYING states, ran with IRQs disabled and must not fail. 208 + */ 209 + static bool cpuhp_is_atomic_state(enum cpuhp_state state) 210 + { 211 + return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE; 212 + } 213 + 234 214 /* Serializes the updates to cpu_online_mask, cpu_present_mask */ 235 215 static DEFINE_MUTEX(cpu_add_remove_lock); 236 216 bool cpuhp_tasks_frozen; ··· 345 271 EXPORT_SYMBOL_GPL(cpu_hotplug_enable); 346 272 #endif /* CONFIG_HOTPLUG_CPU */ 347 273 348 - static void __cpuhp_kick_ap_work(struct cpuhp_cpu_state *st); 274 + static inline enum cpuhp_state 275 + cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target) 276 + { 277 + enum cpuhp_state prev_state = st->state; 278 + 279 + st->rollback = false; 280 + st->last = NULL; 281 + 282 + st->target = target; 283 + st->single = false; 284 + st->bringup = st->state < target; 285 + 286 + return prev_state; 287 + } 288 + 289 + static inline void 290 + cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state) 291 + { 292 + st->rollback = true; 293 + 294 + /* 295 + * If we have st->last we need to undo partial multi_instance of this 296 + * state first. Otherwise start undo at the previous state. 297 + */ 298 + if (!st->last) { 299 + if (st->bringup) 300 + st->state--; 301 + else 302 + st->state++; 303 + } 304 + 305 + st->target = prev_state; 306 + st->bringup = !st->bringup; 307 + } 308 + 309 + /* Regular hotplug invocation of the AP hotplug thread */ 310 + static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st) 311 + { 312 + if (!st->single && st->state == st->target) 313 + return; 314 + 315 + st->result = 0; 316 + /* 317 + * Make sure the above stores are visible before should_run becomes 318 + * true. Paired with the mb() above in cpuhp_thread_fun() 319 + */ 320 + smp_mb(); 321 + st->should_run = true; 322 + wake_up_process(st->thread); 323 + wait_for_ap_thread(st, st->bringup); 324 + } 325 + 326 + static int cpuhp_kick_ap(struct cpuhp_cpu_state *st, enum cpuhp_state target) 327 + { 328 + enum cpuhp_state prev_state; 329 + int ret; 330 + 331 + prev_state = cpuhp_set_state(st, target); 332 + __cpuhp_kick_ap(st); 333 + if ((ret = st->result)) { 334 + cpuhp_reset_state(st, prev_state); 335 + __cpuhp_kick_ap(st); 336 + } 337 + 338 + return ret; 339 + } 349 340 350 341 static int bringup_wait_for_ap(unsigned int cpu) 351 342 { 352 343 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); 353 344 354 345 /* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */ 355 - wait_for_completion(&st->done); 346 + wait_for_ap_thread(st, true); 356 347 if (WARN_ON_ONCE((!cpu_online(cpu)))) 357 348 return -ECANCELED; 358 349 ··· 425 286 stop_machine_unpark(cpu); 426 287 kthread_unpark(st->thread); 427 288 428 - /* Should we go further up ? */ 429 - if (st->target > CPUHP_AP_ONLINE_IDLE) { 430 - __cpuhp_kick_ap_work(st); 431 - wait_for_completion(&st->done); 432 - } 433 - return st->result; 289 + if (st->target <= CPUHP_AP_ONLINE_IDLE) 290 + return 0; 291 + 292 + return cpuhp_kick_ap(st, st->target); 434 293 } 435 294 436 295 static int bringup_cpu(unsigned int cpu) ··· 454 317 /* 455 318 * Hotplug state machine related functions 456 319 */ 457 - static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st) 458 - { 459 - for (st->state++; st->state < st->target; st->state++) { 460 - struct cpuhp_step *step = cpuhp_get_step(st->state); 461 - 462 - if (!step->skip_onerr) 463 - cpuhp_invoke_callback(cpu, st->state, true, NULL); 464 - } 465 - } 466 - 467 - static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st, 468 - enum cpuhp_state target) 469 - { 470 - enum cpuhp_state prev_state = st->state; 471 - int ret = 0; 472 - 473 - for (; st->state > target; st->state--) { 474 - ret = cpuhp_invoke_callback(cpu, st->state, false, NULL); 475 - if (ret) { 476 - st->target = prev_state; 477 - undo_cpu_down(cpu, st); 478 - break; 479 - } 480 - } 481 - return ret; 482 - } 483 320 484 321 static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st) 485 322 { ··· 461 350 struct cpuhp_step *step = cpuhp_get_step(st->state); 462 351 463 352 if (!step->skip_onerr) 464 - cpuhp_invoke_callback(cpu, st->state, false, NULL); 353 + cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL); 465 354 } 466 355 } 467 356 ··· 473 362 474 363 while (st->state < target) { 475 364 st->state++; 476 - ret = cpuhp_invoke_callback(cpu, st->state, true, NULL); 365 + ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL); 477 366 if (ret) { 478 367 st->target = prev_state; 479 368 undo_cpu_up(cpu, st); ··· 490 379 { 491 380 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); 492 381 493 - init_completion(&st->done); 382 + init_completion(&st->done_up); 383 + init_completion(&st->done_down); 494 384 } 495 385 496 386 static int cpuhp_should_run(unsigned int cpu) ··· 501 389 return st->should_run; 502 390 } 503 391 504 - /* Execute the teardown callbacks. Used to be CPU_DOWN_PREPARE */ 505 - static int cpuhp_ap_offline(unsigned int cpu, struct cpuhp_cpu_state *st) 506 - { 507 - enum cpuhp_state target = max((int)st->target, CPUHP_TEARDOWN_CPU); 508 - 509 - return cpuhp_down_callbacks(cpu, st, target); 510 - } 511 - 512 - /* Execute the online startup callbacks. Used to be CPU_ONLINE */ 513 - static int cpuhp_ap_online(unsigned int cpu, struct cpuhp_cpu_state *st) 514 - { 515 - return cpuhp_up_callbacks(cpu, st, st->target); 516 - } 517 - 518 392 /* 519 393 * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke 520 394 * callbacks when a state gets [un]installed at runtime. 395 + * 396 + * Each invocation of this function by the smpboot thread does a single AP 397 + * state callback. 398 + * 399 + * It has 3 modes of operation: 400 + * - single: runs st->cb_state 401 + * - up: runs ++st->state, while st->state < st->target 402 + * - down: runs st->state--, while st->state > st->target 403 + * 404 + * When complete or on error, should_run is cleared and the completion is fired. 521 405 */ 522 406 static void cpuhp_thread_fun(unsigned int cpu) 523 407 { 524 408 struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state); 525 - int ret = 0; 409 + bool bringup = st->bringup; 410 + enum cpuhp_state state; 526 411 527 412 /* 528 - * Paired with the mb() in cpuhp_kick_ap_work and 529 - * cpuhp_invoke_ap_callback, so the work set is consistent visible. 413 + * ACQUIRE for the cpuhp_should_run() load of ->should_run. Ensures 414 + * that if we see ->should_run we also see the rest of the state. 530 415 */ 531 416 smp_mb(); 532 - if (!st->should_run) 417 + 418 + if (WARN_ON_ONCE(!st->should_run)) 533 419 return; 534 420 535 - st->should_run = false; 421 + cpuhp_lock_acquire(bringup); 536 422 537 - lock_map_acquire(&cpuhp_state_lock_map); 538 - /* Single callback invocation for [un]install ? */ 539 423 if (st->single) { 540 - if (st->cb_state < CPUHP_AP_ONLINE) { 541 - local_irq_disable(); 542 - ret = cpuhp_invoke_callback(cpu, st->cb_state, 543 - st->bringup, st->node); 544 - local_irq_enable(); 545 - } else { 546 - ret = cpuhp_invoke_callback(cpu, st->cb_state, 547 - st->bringup, st->node); 548 - } 549 - } else if (st->rollback) { 550 - BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE); 551 - 552 - undo_cpu_down(cpu, st); 553 - st->rollback = false; 424 + state = st->cb_state; 425 + st->should_run = false; 554 426 } else { 555 - /* Cannot happen .... */ 556 - BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE); 557 - 558 - /* Regular hotplug work */ 559 - if (st->state < st->target) 560 - ret = cpuhp_ap_online(cpu, st); 561 - else if (st->state > st->target) 562 - ret = cpuhp_ap_offline(cpu, st); 427 + if (bringup) { 428 + st->state++; 429 + state = st->state; 430 + st->should_run = (st->state < st->target); 431 + WARN_ON_ONCE(st->state > st->target); 432 + } else { 433 + state = st->state; 434 + st->state--; 435 + st->should_run = (st->state > st->target); 436 + WARN_ON_ONCE(st->state < st->target); 437 + } 563 438 } 564 - lock_map_release(&cpuhp_state_lock_map); 565 - st->result = ret; 566 - complete(&st->done); 439 + 440 + WARN_ON_ONCE(!cpuhp_is_ap_state(state)); 441 + 442 + if (st->rollback) { 443 + struct cpuhp_step *step = cpuhp_get_step(state); 444 + if (step->skip_onerr) 445 + goto next; 446 + } 447 + 448 + if (cpuhp_is_atomic_state(state)) { 449 + local_irq_disable(); 450 + st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last); 451 + local_irq_enable(); 452 + 453 + /* 454 + * STARTING/DYING must not fail! 455 + */ 456 + WARN_ON_ONCE(st->result); 457 + } else { 458 + st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last); 459 + } 460 + 461 + if (st->result) { 462 + /* 463 + * If we fail on a rollback, we're up a creek without no 464 + * paddle, no way forward, no way back. We loose, thanks for 465 + * playing. 466 + */ 467 + WARN_ON_ONCE(st->rollback); 468 + st->should_run = false; 469 + } 470 + 471 + next: 472 + cpuhp_lock_release(bringup); 473 + 474 + if (!st->should_run) 475 + complete_ap_thread(st, bringup); 567 476 } 568 477 569 478 /* Invoke a single callback on a remote cpu */ ··· 593 460 struct hlist_node *node) 594 461 { 595 462 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); 463 + int ret; 596 464 597 465 if (!cpu_online(cpu)) 598 466 return 0; 599 467 600 - lock_map_acquire(&cpuhp_state_lock_map); 601 - lock_map_release(&cpuhp_state_lock_map); 468 + cpuhp_lock_acquire(false); 469 + cpuhp_lock_release(false); 470 + 471 + cpuhp_lock_acquire(true); 472 + cpuhp_lock_release(true); 602 473 603 474 /* 604 475 * If we are up and running, use the hotplug thread. For early calls 605 476 * we invoke the thread function directly. 606 477 */ 607 478 if (!st->thread) 608 - return cpuhp_invoke_callback(cpu, state, bringup, node); 479 + return cpuhp_invoke_callback(cpu, state, bringup, node, NULL); 609 480 481 + st->rollback = false; 482 + st->last = NULL; 483 + 484 + st->node = node; 485 + st->bringup = bringup; 610 486 st->cb_state = state; 611 487 st->single = true; 612 - st->bringup = bringup; 613 - st->node = node; 488 + 489 + __cpuhp_kick_ap(st); 614 490 615 491 /* 616 - * Make sure the above stores are visible before should_run becomes 617 - * true. Paired with the mb() above in cpuhp_thread_fun() 492 + * If we failed and did a partial, do a rollback. 618 493 */ 619 - smp_mb(); 620 - st->should_run = true; 621 - wake_up_process(st->thread); 622 - wait_for_completion(&st->done); 623 - return st->result; 624 - } 494 + if ((ret = st->result) && st->last) { 495 + st->rollback = true; 496 + st->bringup = !bringup; 625 497 626 - /* Regular hotplug invocation of the AP hotplug thread */ 627 - static void __cpuhp_kick_ap_work(struct cpuhp_cpu_state *st) 628 - { 629 - st->result = 0; 630 - st->single = false; 631 - /* 632 - * Make sure the above stores are visible before should_run becomes 633 - * true. Paired with the mb() above in cpuhp_thread_fun() 634 - */ 635 - smp_mb(); 636 - st->should_run = true; 637 - wake_up_process(st->thread); 498 + __cpuhp_kick_ap(st); 499 + } 500 + 501 + return ret; 638 502 } 639 503 640 504 static int cpuhp_kick_ap_work(unsigned int cpu) 641 505 { 642 506 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); 643 - enum cpuhp_state state = st->state; 507 + enum cpuhp_state prev_state = st->state; 508 + int ret; 644 509 645 - trace_cpuhp_enter(cpu, st->target, state, cpuhp_kick_ap_work); 646 - lock_map_acquire(&cpuhp_state_lock_map); 647 - lock_map_release(&cpuhp_state_lock_map); 648 - __cpuhp_kick_ap_work(st); 649 - wait_for_completion(&st->done); 650 - trace_cpuhp_exit(cpu, st->state, state, st->result); 651 - return st->result; 510 + cpuhp_lock_acquire(false); 511 + cpuhp_lock_release(false); 512 + 513 + cpuhp_lock_acquire(true); 514 + cpuhp_lock_release(true); 515 + 516 + trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work); 517 + ret = cpuhp_kick_ap(st, st->target); 518 + trace_cpuhp_exit(cpu, st->state, prev_state, ret); 519 + 520 + return ret; 652 521 } 653 522 654 523 static struct smp_hotplug_thread cpuhp_threads = { ··· 716 581 struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state); 717 582 enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE); 718 583 int err, cpu = smp_processor_id(); 584 + int ret; 719 585 720 586 /* Ensure this CPU doesn't handle any more interrupts. */ 721 587 err = __cpu_disable(); ··· 730 594 WARN_ON(st->state != CPUHP_TEARDOWN_CPU); 731 595 st->state--; 732 596 /* Invoke the former CPU_DYING callbacks */ 733 - for (; st->state > target; st->state--) 734 - cpuhp_invoke_callback(cpu, st->state, false, NULL); 597 + for (; st->state > target; st->state--) { 598 + ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL); 599 + /* 600 + * DYING must not fail! 601 + */ 602 + WARN_ON_ONCE(ret); 603 + } 735 604 736 605 /* Give up timekeeping duties */ 737 606 tick_handover_do_timer(); ··· 780 639 * 781 640 * Wait for the stop thread to go away. 782 641 */ 783 - wait_for_completion(&st->done); 642 + wait_for_ap_thread(st, false); 784 643 BUG_ON(st->state != CPUHP_AP_IDLE_DEAD); 785 644 786 645 /* Interrupts are moved away from the dying cpu, reenable alloc/free */ ··· 799 658 { 800 659 struct cpuhp_cpu_state *st = arg; 801 660 802 - complete(&st->done); 661 + complete_ap_thread(st, false); 803 662 } 804 663 805 664 void cpuhp_report_idle_dead(void) ··· 817 676 cpuhp_complete_idle_dead, st, 0); 818 677 } 819 678 820 - #else 821 - #define takedown_cpu NULL 822 - #endif 679 + static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st) 680 + { 681 + for (st->state++; st->state < st->target; st->state++) { 682 + struct cpuhp_step *step = cpuhp_get_step(st->state); 823 683 824 - #ifdef CONFIG_HOTPLUG_CPU 684 + if (!step->skip_onerr) 685 + cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL); 686 + } 687 + } 688 + 689 + static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st, 690 + enum cpuhp_state target) 691 + { 692 + enum cpuhp_state prev_state = st->state; 693 + int ret = 0; 694 + 695 + for (; st->state > target; st->state--) { 696 + ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL); 697 + if (ret) { 698 + st->target = prev_state; 699 + undo_cpu_down(cpu, st); 700 + break; 701 + } 702 + } 703 + return ret; 704 + } 825 705 826 706 /* Requires cpu_add_remove_lock to be held */ 827 707 static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, ··· 861 699 862 700 cpuhp_tasks_frozen = tasks_frozen; 863 701 864 - prev_state = st->state; 865 - st->target = target; 702 + prev_state = cpuhp_set_state(st, target); 866 703 /* 867 704 * If the current CPU state is in the range of the AP hotplug thread, 868 705 * then we need to kick the thread. 869 706 */ 870 707 if (st->state > CPUHP_TEARDOWN_CPU) { 708 + st->target = max((int)target, CPUHP_TEARDOWN_CPU); 871 709 ret = cpuhp_kick_ap_work(cpu); 872 710 /* 873 711 * The AP side has done the error rollback already. Just ··· 882 720 */ 883 721 if (st->state > CPUHP_TEARDOWN_CPU) 884 722 goto out; 723 + 724 + st->target = target; 885 725 } 886 726 /* 887 727 * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need ··· 891 727 */ 892 728 ret = cpuhp_down_callbacks(cpu, st, target); 893 729 if (ret && st->state > CPUHP_TEARDOWN_CPU && st->state < prev_state) { 894 - st->target = prev_state; 895 - st->rollback = true; 896 - cpuhp_kick_ap_work(cpu); 730 + cpuhp_reset_state(st, prev_state); 731 + __cpuhp_kick_ap(st); 897 732 } 898 733 899 734 out: ··· 917 754 cpu_maps_update_done(); 918 755 return err; 919 756 } 757 + 920 758 int cpu_down(unsigned int cpu) 921 759 { 922 760 return do_cpu_down(cpu, CPUHP_OFFLINE); 923 761 } 924 762 EXPORT_SYMBOL(cpu_down); 763 + 764 + #else 765 + #define takedown_cpu NULL 925 766 #endif /*CONFIG_HOTPLUG_CPU*/ 926 767 927 768 /** ··· 939 772 { 940 773 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); 941 774 enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE); 775 + int ret; 942 776 943 777 rcu_cpu_starting(cpu); /* Enables RCU usage on this CPU. */ 944 778 while (st->state < target) { 945 779 st->state++; 946 - cpuhp_invoke_callback(cpu, st->state, true, NULL); 780 + ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL); 781 + /* 782 + * STARTING must not fail! 783 + */ 784 + WARN_ON_ONCE(ret); 947 785 } 948 786 } 949 787 ··· 966 794 return; 967 795 968 796 st->state = CPUHP_AP_ONLINE_IDLE; 969 - complete(&st->done); 797 + complete_ap_thread(st, true); 970 798 } 971 799 972 800 /* Requires cpu_add_remove_lock to be held */ ··· 1001 829 1002 830 cpuhp_tasks_frozen = tasks_frozen; 1003 831 1004 - st->target = target; 832 + cpuhp_set_state(st, target); 1005 833 /* 1006 834 * If the current CPU state is in the range of the AP hotplug thread, 1007 835 * then we need to kick the thread once more. ··· 1468 1296 struct cpuhp_step *sp = cpuhp_get_step(state); 1469 1297 int ret; 1470 1298 1299 + /* 1300 + * If there's nothing to do, we done. 1301 + * Relies on the union for multi_instance. 1302 + */ 1471 1303 if ((bringup && !sp->startup.single) || 1472 1304 (!bringup && !sp->teardown.single)) 1473 1305 return 0; ··· 1483 1307 if (cpuhp_is_ap_state(state)) 1484 1308 ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node); 1485 1309 else 1486 - ret = cpuhp_invoke_callback(cpu, state, bringup, node); 1310 + ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL); 1487 1311 #else 1488 - ret = cpuhp_invoke_callback(cpu, state, bringup, node); 1312 + ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL); 1489 1313 #endif 1490 1314 BUG_ON(ret && !bringup); 1491 1315 return ret; ··· 1817 1641 } 1818 1642 static DEVICE_ATTR(target, 0644, show_cpuhp_target, write_cpuhp_target); 1819 1643 1644 + 1645 + static ssize_t write_cpuhp_fail(struct device *dev, 1646 + struct device_attribute *attr, 1647 + const char *buf, size_t count) 1648 + { 1649 + struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id); 1650 + struct cpuhp_step *sp; 1651 + int fail, ret; 1652 + 1653 + ret = kstrtoint(buf, 10, &fail); 1654 + if (ret) 1655 + return ret; 1656 + 1657 + /* 1658 + * Cannot fail STARTING/DYING callbacks. 1659 + */ 1660 + if (cpuhp_is_atomic_state(fail)) 1661 + return -EINVAL; 1662 + 1663 + /* 1664 + * Cannot fail anything that doesn't have callbacks. 1665 + */ 1666 + mutex_lock(&cpuhp_state_mutex); 1667 + sp = cpuhp_get_step(fail); 1668 + if (!sp->startup.single && !sp->teardown.single) 1669 + ret = -EINVAL; 1670 + mutex_unlock(&cpuhp_state_mutex); 1671 + if (ret) 1672 + return ret; 1673 + 1674 + st->fail = fail; 1675 + 1676 + return count; 1677 + } 1678 + 1679 + static ssize_t show_cpuhp_fail(struct device *dev, 1680 + struct device_attribute *attr, char *buf) 1681 + { 1682 + struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id); 1683 + 1684 + return sprintf(buf, "%d\n", st->fail); 1685 + } 1686 + 1687 + static DEVICE_ATTR(fail, 0644, show_cpuhp_fail, write_cpuhp_fail); 1688 + 1820 1689 static struct attribute *cpuhp_cpu_attrs[] = { 1821 1690 &dev_attr_state.attr, 1822 1691 &dev_attr_target.attr, 1692 + &dev_attr_fail.attr, 1823 1693 NULL 1824 1694 }; 1825 1695

+15 -5

kernel/events/ring_buffer.c

··· 412 412 return NULL; 413 413 } 414 414 415 + static bool __always_inline rb_need_aux_wakeup(struct ring_buffer *rb) 416 + { 417 + if (rb->aux_overwrite) 418 + return false; 419 + 420 + if (rb->aux_head - rb->aux_wakeup >= rb->aux_watermark) { 421 + rb->aux_wakeup = rounddown(rb->aux_head, rb->aux_watermark); 422 + return true; 423 + } 424 + 425 + return false; 426 + } 427 + 415 428 /* 416 429 * Commit the data written by hardware into the ring buffer by adjusting 417 430 * aux_head and posting a PERF_RECORD_AUX into the perf buffer. It is the ··· 464 451 } 465 452 466 453 rb->user_page->aux_head = rb->aux_head; 467 - if (rb->aux_head - rb->aux_wakeup >= rb->aux_watermark) { 454 + if (rb_need_aux_wakeup(rb)) 468 455 wakeup = true; 469 - rb->aux_wakeup = rounddown(rb->aux_head, rb->aux_watermark); 470 - } 471 456 472 457 if (wakeup) { 473 458 if (handle->aux_flags & PERF_AUX_FLAG_TRUNCATED) ··· 495 484 rb->aux_head += size; 496 485 497 486 rb->user_page->aux_head = rb->aux_head; 498 - if (rb->aux_head - rb->aux_wakeup >= rb->aux_watermark) { 487 + if (rb_need_aux_wakeup(rb)) { 499 488 perf_output_wakeup(handle); 500 - rb->aux_wakeup = rounddown(rb->aux_head, rb->aux_watermark); 501 489 handle->wakeup = rb->aux_wakeup + rb->aux_watermark; 502 490 } 503 491

+10 -13

kernel/exit.c

··· 1600 1600 struct waitid_info info = {.status = 0}; 1601 1601 long err = kernel_waitid(which, upid, &info, options, ru ? &r : NULL); 1602 1602 int signo = 0; 1603 + 1603 1604 if (err > 0) { 1604 1605 signo = SIGCHLD; 1605 1606 err = 0; 1606 - } 1607 - 1608 - if (!err) { 1609 1607 if (ru && copy_to_user(ru, &r, sizeof(struct rusage))) 1610 1608 return -EFAULT; 1611 1609 } ··· 1721 1723 if (err > 0) { 1722 1724 signo = SIGCHLD; 1723 1725 err = 0; 1724 - } 1725 - 1726 - if (!err && uru) { 1727 - /* kernel_waitid() overwrites everything in ru */ 1728 - if (COMPAT_USE_64BIT_TIME) 1729 - err = copy_to_user(uru, &ru, sizeof(ru)); 1730 - else 1731 - err = put_compat_rusage(&ru, uru); 1732 - if (err) 1733 - return -EFAULT; 1726 + if (uru) { 1727 + /* kernel_waitid() overwrites everything in ru */ 1728 + if (COMPAT_USE_64BIT_TIME) 1729 + err = copy_to_user(uru, &ru, sizeof(ru)); 1730 + else 1731 + err = put_compat_rusage(&ru, uru); 1732 + if (err) 1733 + return -EFAULT; 1734 + } 1734 1735 } 1735 1736 1736 1737 if (!infop)

+31 -14

kernel/extable.c

··· 102 102 103 103 int __kernel_text_address(unsigned long addr) 104 104 { 105 - if (core_kernel_text(addr)) 106 - return 1; 107 - if (is_module_text_address(addr)) 108 - return 1; 109 - if (is_ftrace_trampoline(addr)) 110 - return 1; 111 - if (is_kprobe_optinsn_slot(addr) || is_kprobe_insn_slot(addr)) 112 - return 1; 113 - if (is_bpf_text_address(addr)) 105 + if (kernel_text_address(addr)) 114 106 return 1; 115 107 /* 116 108 * There might be init symbols in saved stacktraces. ··· 119 127 120 128 int kernel_text_address(unsigned long addr) 121 129 { 130 + bool no_rcu; 131 + int ret = 1; 132 + 122 133 if (core_kernel_text(addr)) 123 134 return 1; 135 + 136 + /* 137 + * If a stack dump happens while RCU is not watching, then 138 + * RCU needs to be notified that it requires to start 139 + * watching again. This can happen either by tracing that 140 + * triggers a stack trace, or a WARN() that happens during 141 + * coming back from idle, or cpu on or offlining. 142 + * 143 + * is_module_text_address() as well as the kprobe slots 144 + * and is_bpf_text_address() require RCU to be watching. 145 + */ 146 + no_rcu = !rcu_is_watching(); 147 + 148 + /* Treat this like an NMI as it can happen anywhere */ 149 + if (no_rcu) 150 + rcu_nmi_enter(); 151 + 124 152 if (is_module_text_address(addr)) 125 - return 1; 153 + goto out; 126 154 if (is_ftrace_trampoline(addr)) 127 - return 1; 155 + goto out; 128 156 if (is_kprobe_optinsn_slot(addr) || is_kprobe_insn_slot(addr)) 129 - return 1; 157 + goto out; 130 158 if (is_bpf_text_address(addr)) 131 - return 1; 132 - return 0; 159 + goto out; 160 + ret = 0; 161 + out: 162 + if (no_rcu) 163 + rcu_nmi_exit(); 164 + 165 + return ret; 133 166 } 134 167 135 168 /*

+22 -11

kernel/futex.c

··· 821 821 /* 822 822 * Drops a reference to the pi_state object and frees or caches it 823 823 * when the last reference is gone. 824 - * 825 - * Must be called with the hb lock held. 826 824 */ 827 825 static void put_pi_state(struct futex_pi_state *pi_state) 828 826 { ··· 835 837 * and has cleaned up the pi_state already 836 838 */ 837 839 if (pi_state->owner) { 838 - raw_spin_lock_irq(&pi_state->owner->pi_lock); 839 - list_del_init(&pi_state->list); 840 - raw_spin_unlock_irq(&pi_state->owner->pi_lock); 840 + struct task_struct *owner; 841 841 842 - rt_mutex_proxy_unlock(&pi_state->pi_mutex, pi_state->owner); 842 + raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); 843 + owner = pi_state->owner; 844 + if (owner) { 845 + raw_spin_lock(&owner->pi_lock); 846 + list_del_init(&pi_state->list); 847 + raw_spin_unlock(&owner->pi_lock); 848 + } 849 + rt_mutex_proxy_unlock(&pi_state->pi_mutex, owner); 850 + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); 843 851 } 844 852 845 - if (current->pi_state_cache) 853 + if (current->pi_state_cache) { 846 854 kfree(pi_state); 847 - else { 855 + } else { 848 856 /* 849 857 * pi_state->list is already empty. 850 858 * clear pi_state->owner. ··· 911 907 raw_spin_unlock_irq(&curr->pi_lock); 912 908 913 909 spin_lock(&hb->lock); 914 - 915 - raw_spin_lock_irq(&curr->pi_lock); 910 + raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); 911 + raw_spin_lock(&curr->pi_lock); 916 912 /* 917 913 * We dropped the pi-lock, so re-check whether this 918 914 * task still owns the PI-state: 919 915 */ 920 916 if (head->next != next) { 917 + raw_spin_unlock(&pi_state->pi_mutex.wait_lock); 921 918 spin_unlock(&hb->lock); 922 919 continue; 923 920 } ··· 927 922 WARN_ON(list_empty(&pi_state->list)); 928 923 list_del_init(&pi_state->list); 929 924 pi_state->owner = NULL; 930 - raw_spin_unlock_irq(&curr->pi_lock); 925 + raw_spin_unlock(&curr->pi_lock); 931 926 932 927 get_pi_state(pi_state); 928 + raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); 933 929 spin_unlock(&hb->lock); 934 930 935 931 rt_mutex_futex_unlock(&pi_state->pi_mutex); ··· 1214 1208 1215 1209 WARN_ON(!list_empty(&pi_state->list)); 1216 1210 list_add(&pi_state->list, &p->pi_state_list); 1211 + /* 1212 + * Assignment without holding pi_state->pi_mutex.wait_lock is safe 1213 + * because there is no concurrency as the object is not published yet. 1214 + */ 1217 1215 pi_state->owner = p; 1218 1216 raw_spin_unlock_irq(&p->pi_lock); 1219 1217 ··· 2888 2878 raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock); 2889 2879 spin_unlock(&hb->lock); 2890 2880 2881 + /* drops pi_state->pi_mutex.wait_lock */ 2891 2882 ret = wake_futex_pi(uaddr, uval, pi_state); 2892 2883 2893 2884 put_pi_state(pi_state);

-1

kernel/irq/generic-chip.c

··· 322 322 /* Calc pointer to the next generic chip */ 323 323 tmp += sizeof(*gc) + num_ct * sizeof(struct irq_chip_type); 324 324 } 325 - d->name = name; 326 325 return 0; 327 326 } 328 327 EXPORT_SYMBOL_GPL(__irq_alloc_domain_generic_chips);

+27

kernel/locking/rwsem-xadd.c

··· 613 613 DEFINE_WAKE_Q(wake_q); 614 614 615 615 /* 616 + * __rwsem_down_write_failed_common(sem) 617 + * rwsem_optimistic_spin(sem) 618 + * osq_unlock(sem->osq) 619 + * ... 620 + * atomic_long_add_return(&sem->count) 621 + * 622 + * - VS - 623 + * 624 + * __up_write() 625 + * if (atomic_long_sub_return_release(&sem->count) < 0) 626 + * rwsem_wake(sem) 627 + * osq_is_locked(&sem->osq) 628 + * 629 + * And __up_write() must observe !osq_is_locked() when it observes the 630 + * atomic_long_add_return() in order to not miss a wakeup. 631 + * 632 + * This boils down to: 633 + * 634 + * [S.rel] X = 1 [RmW] r0 = (Y += 0) 635 + * MB RMB 636 + * [RmW] Y += 1 [L] r1 = X 637 + * 638 + * exists (r0=1 /\ r1=0) 639 + */ 640 + smp_rmb(); 641 + 642 + /* 616 643 * If a spinner is present, it is not necessary to do the wakeup. 617 644 * Try to do wakeup only if the trylock succeeds to minimize 618 645 * spinlock contention which may introduce too much delay in the

+10

kernel/rcu/tree.c

··· 882 882 883 883 RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_irq_exit() invoked with irqs enabled!!!"); 884 884 rdtp = this_cpu_ptr(&rcu_dynticks); 885 + 886 + /* Page faults can happen in NMI handlers, so check... */ 887 + if (READ_ONCE(rdtp->dynticks_nmi_nesting)) 888 + return; 889 + 885 890 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && 886 891 rdtp->dynticks_nesting < 1); 887 892 if (rdtp->dynticks_nesting <= 1) { ··· 1020 1015 1021 1016 RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_irq_enter() invoked with irqs enabled!!!"); 1022 1017 rdtp = this_cpu_ptr(&rcu_dynticks); 1018 + 1019 + /* Page faults can happen in NMI handlers, so check... */ 1020 + if (READ_ONCE(rdtp->dynticks_nmi_nesting)) 1021 + return; 1022 + 1023 1023 oldval = rdtp->dynticks_nesting; 1024 1024 rdtp->dynticks_nesting++; 1025 1025 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&

+23 -1

kernel/sched/core.c

··· 5166 5166 put_task_stack(p); 5167 5167 } 5168 5168 5169 + static inline bool 5170 + state_filter_match(unsigned long state_filter, struct task_struct *p) 5171 + { 5172 + /* no filter, everything matches */ 5173 + if (!state_filter) 5174 + return true; 5175 + 5176 + /* filter, but doesn't match */ 5177 + if (!(p->state & state_filter)) 5178 + return false; 5179 + 5180 + /* 5181 + * When looking for TASK_UNINTERRUPTIBLE skip TASK_IDLE (allows 5182 + * TASK_KILLABLE). 5183 + */ 5184 + if (state_filter == TASK_UNINTERRUPTIBLE && p->state == TASK_IDLE) 5185 + return false; 5186 + 5187 + return true; 5188 + } 5189 + 5190 + 5169 5191 void show_state_filter(unsigned long state_filter) 5170 5192 { 5171 5193 struct task_struct *g, *p; ··· 5210 5188 */ 5211 5189 touch_nmi_watchdog(); 5212 5190 touch_all_softlockup_watchdogs(); 5213 - if (!state_filter || (p->state & state_filter)) 5191 + if (state_filter_match(state_filter, p)) 5214 5192 sched_show_task(p); 5215 5193 } 5216 5194

-2

kernel/sched/debug.c

··· 466 466 } 467 467 #endif 468 468 469 - static const char stat_nam[] = TASK_STATE_TO_CHAR_STR; 470 - 471 469 static void 472 470 print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) 473 471 {

+16 -7

kernel/seccomp.c

··· 473 473 return 0; 474 474 } 475 475 476 + void __get_seccomp_filter(struct seccomp_filter *filter) 477 + { 478 + /* Reference count is bounded by the number of total processes. */ 479 + refcount_inc(&filter->usage); 480 + } 481 + 476 482 /* get_seccomp_filter - increments the reference count of the filter on @tsk */ 477 483 void get_seccomp_filter(struct task_struct *tsk) 478 484 { 479 485 struct seccomp_filter *orig = tsk->seccomp.filter; 480 486 if (!orig) 481 487 return; 482 - /* Reference count is bounded by the number of total processes. */ 483 - refcount_inc(&orig->usage); 488 + __get_seccomp_filter(orig); 484 489 } 485 490 486 491 static inline void seccomp_filter_free(struct seccomp_filter *filter) ··· 496 491 } 497 492 } 498 493 499 - /* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */ 500 - void put_seccomp_filter(struct task_struct *tsk) 494 + static void __put_seccomp_filter(struct seccomp_filter *orig) 501 495 { 502 - struct seccomp_filter *orig = tsk->seccomp.filter; 503 496 /* Clean up single-reference branches iteratively. */ 504 497 while (orig && refcount_dec_and_test(&orig->usage)) { 505 498 struct seccomp_filter *freeme = orig; 506 499 orig = orig->prev; 507 500 seccomp_filter_free(freeme); 508 501 } 502 + } 503 + 504 + /* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */ 505 + void put_seccomp_filter(struct task_struct *tsk) 506 + { 507 + __put_seccomp_filter(tsk->seccomp.filter); 509 508 } 510 509 511 510 static void seccomp_init_siginfo(siginfo_t *info, int syscall, int reason) ··· 1034 1025 if (!data) 1035 1026 goto out; 1036 1027 1037 - get_seccomp_filter(task); 1028 + __get_seccomp_filter(filter); 1038 1029 spin_unlock_irq(&task->sighand->siglock); 1039 1030 1040 1031 if (copy_to_user(data, fprog->filter, bpf_classic_proglen(fprog))) 1041 1032 ret = -EFAULT; 1042 1033 1043 - put_seccomp_filter(task); 1034 + __put_seccomp_filter(filter); 1044 1035 return ret; 1045 1036 1046 1037 out:

+2 -1

kernel/sysctl.c

··· 367 367 .data = &sysctl_sched_time_avg, 368 368 .maxlen = sizeof(unsigned int), 369 369 .mode = 0644, 370 - .proc_handler = proc_dointvec, 370 + .proc_handler = proc_dointvec_minmax, 371 + .extra1 = &one, 371 372 }, 372 373 #ifdef CONFIG_SCHEDSTATS 373 374 {

+12 -6

kernel/trace/blktrace.c

··· 648 648 } 649 649 EXPORT_SYMBOL_GPL(blk_trace_startstop); 650 650 651 + /* 652 + * When reading or writing the blktrace sysfs files, the references to the 653 + * opened sysfs or device files should prevent the underlying block device 654 + * from being removed. So no further delete protection is really needed. 655 + */ 656 + 651 657 /** 652 658 * blk_trace_ioctl: - handle the ioctls associated with tracing 653 659 * @bdev: the block device ··· 671 665 if (!q) 672 666 return -ENXIO; 673 667 674 - mutex_lock(&bdev->bd_mutex); 668 + mutex_lock(&q->blk_trace_mutex); 675 669 676 670 switch (cmd) { 677 671 case BLKTRACESETUP: ··· 697 691 break; 698 692 } 699 693 700 - mutex_unlock(&bdev->bd_mutex); 694 + mutex_unlock(&q->blk_trace_mutex); 701 695 return ret; 702 696 } 703 697 ··· 1733 1727 if (q == NULL) 1734 1728 goto out_bdput; 1735 1729 1736 - mutex_lock(&bdev->bd_mutex); 1730 + mutex_lock(&q->blk_trace_mutex); 1737 1731 1738 1732 if (attr == &dev_attr_enable) { 1739 1733 ret = sprintf(buf, "%u\n", !!q->blk_trace); ··· 1752 1746 ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba); 1753 1747 1754 1748 out_unlock_bdev: 1755 - mutex_unlock(&bdev->bd_mutex); 1749 + mutex_unlock(&q->blk_trace_mutex); 1756 1750 out_bdput: 1757 1751 bdput(bdev); 1758 1752 out: ··· 1794 1788 if (q == NULL) 1795 1789 goto out_bdput; 1796 1790 1797 - mutex_lock(&bdev->bd_mutex); 1791 + mutex_lock(&q->blk_trace_mutex); 1798 1792 1799 1793 if (attr == &dev_attr_enable) { 1800 1794 if (value) ··· 1820 1814 } 1821 1815 1822 1816 out_unlock_bdev: 1823 - mutex_unlock(&bdev->bd_mutex); 1817 + mutex_unlock(&q->blk_trace_mutex); 1824 1818 out_bdput: 1825 1819 bdput(bdev); 1826 1820 out:

+6 -15

kernel/trace/trace_output.c

··· 656 656 return !trace_seq_has_overflowed(s); 657 657 } 658 658 659 - static const char state_to_char[] = TASK_STATE_TO_CHAR_STR; 660 - 661 - static int task_state_char(unsigned long state) 662 - { 663 - int bit = state ? __ffs(state) + 1 : 0; 664 - 665 - return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?'; 666 - } 667 - 668 659 /** 669 660 * ftrace_find_event - find a registered event 670 661 * @type: the type of event to look for ··· 921 930 922 931 trace_assign_type(field, iter->ent); 923 932 924 - T = task_state_char(field->next_state); 925 - S = task_state_char(field->prev_state); 933 + T = __task_state_to_char(field->next_state); 934 + S = __task_state_to_char(field->prev_state); 926 935 trace_find_cmdline(field->next_pid, comm); 927 936 trace_seq_printf(&iter->seq, 928 937 " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n", ··· 957 966 trace_assign_type(field, iter->ent); 958 967 959 968 if (!S) 960 - S = task_state_char(field->prev_state); 961 - T = task_state_char(field->next_state); 969 + S = __task_state_to_char(field->prev_state); 970 + T = __task_state_to_char(field->next_state); 962 971 trace_seq_printf(&iter->seq, "%d %d %c %d %d %d %c\n", 963 972 field->prev_pid, 964 973 field->prev_prio, ··· 993 1002 trace_assign_type(field, iter->ent); 994 1003 995 1004 if (!S) 996 - S = task_state_char(field->prev_state); 997 - T = task_state_char(field->next_state); 1005 + S = __task_state_to_char(field->prev_state); 1006 + T = __task_state_to_char(field->next_state); 998 1007 999 1008 SEQ_PUT_HEX_FIELD(s, field->prev_pid); 1000 1009 SEQ_PUT_HEX_FIELD(s, field->prev_prio);

+4 -4

kernel/trace/trace_sched_wakeup.c

··· 397 397 entry = ring_buffer_event_data(event); 398 398 entry->prev_pid = prev->pid; 399 399 entry->prev_prio = prev->prio; 400 - entry->prev_state = prev->state; 400 + entry->prev_state = __get_task_state(prev); 401 401 entry->next_pid = next->pid; 402 402 entry->next_prio = next->prio; 403 - entry->next_state = next->state; 403 + entry->next_state = __get_task_state(next); 404 404 entry->next_cpu = task_cpu(next); 405 405 406 406 if (!call_filter_check_discard(call, entry, buffer, event)) ··· 425 425 entry = ring_buffer_event_data(event); 426 426 entry->prev_pid = curr->pid; 427 427 entry->prev_prio = curr->prio; 428 - entry->prev_state = curr->state; 428 + entry->prev_state = __get_task_state(curr); 429 429 entry->next_pid = wakee->pid; 430 430 entry->next_prio = wakee->prio; 431 - entry->next_state = wakee->state; 431 + entry->next_state = __get_task_state(wakee); 432 432 entry->next_cpu = task_cpu(wakee); 433 433 434 434 if (!call_filter_check_discard(call, entry, buffer, event))

-15

kernel/trace/trace_stack.c

··· 96 96 if (in_nmi()) 97 97 return; 98 98 99 - /* 100 - * There's a slight chance that we are tracing inside the 101 - * RCU infrastructure, and rcu_irq_enter() will not work 102 - * as expected. 103 - */ 104 - if (unlikely(rcu_irq_enter_disabled())) 105 - return; 106 - 107 99 local_irq_save(flags); 108 100 arch_spin_lock(&stack_trace_max_lock); 109 - 110 - /* 111 - * RCU may not be watching, make it see us. 112 - * The stack trace code uses rcu_sched. 113 - */ 114 - rcu_irq_enter(); 115 101 116 102 /* In case another CPU set the tracer_frame on us */ 117 103 if (unlikely(!frame_size)) ··· 191 205 } 192 206 193 207 out: 194 - rcu_irq_exit(); 195 208 arch_spin_unlock(&stack_trace_max_lock); 196 209 local_irq_restore(flags); 197 210 }

+8 -2

mm/filemap.c

··· 2926 2926 * we're writing. Either one is a pretty crazy thing to do, 2927 2927 * so we don't support it 100%. If this invalidation 2928 2928 * fails, tough, the write still worked... 2929 + * 2930 + * Most of the time we do not need this since dio_complete() will do 2931 + * the invalidation for us. However there are some file systems that 2932 + * do not end up with dio_complete() being called, so let's not break 2933 + * them by removing it completely 2929 2934 */ 2930 - invalidate_inode_pages2_range(mapping, 2931 - pos >> PAGE_SHIFT, end); 2935 + if (mapping->nrpages) 2936 + invalidate_inode_pages2_range(mapping, 2937 + pos >> PAGE_SHIFT, end); 2932 2938 2933 2939 if (written > 0) { 2934 2940 pos += written;

-10

net/bluetooth/Kconfig

··· 126 126 Provide extensive information about internal Bluetooth states 127 127 in debugfs. 128 128 129 - config BT_LEGACY_IOCTL 130 - bool "Enable legacy ioctl interfaces" 131 - depends on BT && BT_BREDR 132 - default y 133 - help 134 - Enable support for legacy ioctl interfaces. This is only needed 135 - for old and deprecated applications using direct ioctl calls for 136 - controller management. Since Linux 3.4 all configuration and 137 - setup is done via mgmt interface and this is no longer needed. 138 - 139 129 source "drivers/bluetooth/Kconfig"

-6

net/bluetooth/hci_sock.c

··· 878 878 return 0; 879 879 } 880 880 881 - #ifdef CONFIG_BT_LEGACY_IOCTL 882 881 static int hci_sock_blacklist_add(struct hci_dev *hdev, void __user *arg) 883 882 { 884 883 bdaddr_t bdaddr; ··· 1049 1050 release_sock(sk); 1050 1051 return err; 1051 1052 } 1052 - #endif 1053 1053 1054 1054 static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, 1055 1055 int addr_len) ··· 1969 1971 .getname = hci_sock_getname, 1970 1972 .sendmsg = hci_sock_sendmsg, 1971 1973 .recvmsg = hci_sock_recvmsg, 1972 - #ifdef CONFIG_BT_LEGACY_IOCTL 1973 1974 .ioctl = hci_sock_ioctl, 1974 - #else 1975 - .ioctl = sock_no_ioctl, 1976 - #endif 1977 1975 .poll = datagram_poll, 1978 1976 .listen = sock_no_listen, 1979 1977 .shutdown = sock_no_shutdown,

+1 -1

net/sunrpc/xprtrdma/frwr_ops.c

··· 401 401 if (unlikely(n != mw->mw_nents)) 402 402 goto out_mapmr_err; 403 403 404 - dprintk("RPC: %s: Using frmr %p to map %u segments (%u bytes)\n", 404 + dprintk("RPC: %s: Using frmr %p to map %u segments (%llu bytes)\n", 405 405 __func__, frmr, mw->mw_nents, mr->length); 406 406 407 407 key = (u8)(mr->rkey & 0x000000FF);

+2

scripts/Makefile.build

··· 265 265 endif 266 266 ifdef CONFIG_GCOV_KERNEL 267 267 objtool_args += --no-unreachable 268 + else 269 + objtool_args += $(call cc-ifversion, -lt, 0405, --no-unreachable) 268 270 endif 269 271 270 272 # 'OBJECT_FILES_NON_STANDARD := y': skip objtool checking for a directory

+1 -3

security/keys/Kconfig

··· 45 45 bool "Large payload keys" 46 46 depends on KEYS 47 47 depends on TMPFS 48 - depends on (CRYPTO_ANSI_CPRNG = y || CRYPTO_DRBG = y) 49 48 select CRYPTO_AES 50 - select CRYPTO_ECB 51 - select CRYPTO_RNG 49 + select CRYPTO_GCM 52 50 help 53 51 This option provides support for holding large keys within the kernel 54 52 (for example Kerberos ticket caches). The data may be stored out to

+64 -73

security/keys/big_key.c

··· 1 1 /* Large capacity key type 2 2 * 3 + * Copyright (C) 2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. 3 4 * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. 4 5 * Written by David Howells (dhowells@redhat.com) 5 6 * ··· 17 16 #include <linux/shmem_fs.h> 18 17 #include <linux/err.h> 19 18 #include <linux/scatterlist.h> 19 + #include <linux/random.h> 20 20 #include <keys/user-type.h> 21 21 #include <keys/big_key-type.h> 22 - #include <crypto/rng.h> 23 - #include <crypto/skcipher.h> 22 + #include <crypto/aead.h> 24 23 25 24 /* 26 25 * Layout of key payload words. ··· 50 49 /* 51 50 * Key size for big_key data encryption 52 51 */ 53 - #define ENC_KEY_SIZE 16 52 + #define ENC_KEY_SIZE 32 53 + 54 + /* 55 + * Authentication tag length 56 + */ 57 + #define ENC_AUTHTAG_SIZE 16 54 58 55 59 /* 56 60 * big_key defined keys take an arbitrary string as the description and an ··· 70 64 .destroy = big_key_destroy, 71 65 .describe = big_key_describe, 72 66 .read = big_key_read, 67 + /* no ->update(); don't add it without changing big_key_crypt() nonce */ 73 68 }; 74 69 75 70 /* 76 - * Crypto names for big_key data encryption 71 + * Crypto names for big_key data authenticated encryption 77 72 */ 78 - static const char big_key_rng_name[] = "stdrng"; 79 - static const char big_key_alg_name[] = "ecb(aes)"; 73 + static const char big_key_alg_name[] = "gcm(aes)"; 80 74 81 75 /* 82 - * Crypto algorithms for big_key data encryption 76 + * Crypto algorithms for big_key data authenticated encryption 83 77 */ 84 - static struct crypto_rng *big_key_rng; 85 - static struct crypto_skcipher *big_key_skcipher; 78 + static struct crypto_aead *big_key_aead; 86 79 87 80 /* 88 - * Generate random key to encrypt big_key data 81 + * Since changing the key affects the entire object, we need a mutex. 89 82 */ 90 - static inline int big_key_gen_enckey(u8 *key) 91 - { 92 - return crypto_rng_get_bytes(big_key_rng, key, ENC_KEY_SIZE); 93 - } 83 + static DEFINE_MUTEX(big_key_aead_lock); 94 84 95 85 /* 96 86 * Encrypt/decrypt big_key data 97 87 */ 98 88 static int big_key_crypt(enum big_key_op op, u8 *data, size_t datalen, u8 *key) 99 89 { 100 - int ret = -EINVAL; 90 + int ret; 101 91 struct scatterlist sgio; 102 - SKCIPHER_REQUEST_ON_STACK(req, big_key_skcipher); 92 + struct aead_request *aead_req; 93 + /* We always use a zero nonce. The reason we can get away with this is 94 + * because we're using a different randomly generated key for every 95 + * different encryption. Notably, too, key_type_big_key doesn't define 96 + * an .update function, so there's no chance we'll wind up reusing the 97 + * key to encrypt updated data. Simply put: one key, one encryption. 98 + */ 99 + u8 zero_nonce[crypto_aead_ivsize(big_key_aead)]; 103 100 104 - if (crypto_skcipher_setkey(big_key_skcipher, key, ENC_KEY_SIZE)) { 101 + aead_req = aead_request_alloc(big_key_aead, GFP_KERNEL); 102 + if (!aead_req) 103 + return -ENOMEM; 104 + 105 + memset(zero_nonce, 0, sizeof(zero_nonce)); 106 + sg_init_one(&sgio, data, datalen + (op == BIG_KEY_ENC ? ENC_AUTHTAG_SIZE : 0)); 107 + aead_request_set_crypt(aead_req, &sgio, &sgio, datalen, zero_nonce); 108 + aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL); 109 + aead_request_set_ad(aead_req, 0); 110 + 111 + mutex_lock(&big_key_aead_lock); 112 + if (crypto_aead_setkey(big_key_aead, key, ENC_KEY_SIZE)) { 105 113 ret = -EAGAIN; 106 114 goto error; 107 115 } 108 - 109 - skcipher_request_set_tfm(req, big_key_skcipher); 110 - skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, 111 - NULL, NULL); 112 - 113 - sg_init_one(&sgio, data, datalen); 114 - skcipher_request_set_crypt(req, &sgio, &sgio, datalen, NULL); 115 - 116 116 if (op == BIG_KEY_ENC) 117 - ret = crypto_skcipher_encrypt(req); 117 + ret = crypto_aead_encrypt(aead_req); 118 118 else 119 - ret = crypto_skcipher_decrypt(req); 120 - 121 - skcipher_request_zero(req); 122 - 119 + ret = crypto_aead_decrypt(aead_req); 123 120 error: 121 + mutex_unlock(&big_key_aead_lock); 122 + aead_request_free(aead_req); 124 123 return ret; 125 124 } 126 125 ··· 157 146 * 158 147 * File content is stored encrypted with randomly generated key. 159 148 */ 160 - size_t enclen = ALIGN(datalen, crypto_skcipher_blocksize(big_key_skcipher)); 149 + size_t enclen = datalen + ENC_AUTHTAG_SIZE; 161 150 loff_t pos = 0; 162 151 163 - /* prepare aligned data to encrypt */ 164 152 data = kmalloc(enclen, GFP_KERNEL); 165 153 if (!data) 166 154 return -ENOMEM; 167 - 168 155 memcpy(data, prep->data, datalen); 169 - memset(data + datalen, 0x00, enclen - datalen); 170 156 171 157 /* generate random key */ 172 158 enckey = kmalloc(ENC_KEY_SIZE, GFP_KERNEL); ··· 171 163 ret = -ENOMEM; 172 164 goto error; 173 165 } 174 - 175 - ret = big_key_gen_enckey(enckey); 176 - if (ret) 166 + ret = get_random_bytes_wait(enckey, ENC_KEY_SIZE); 167 + if (unlikely(ret)) 177 168 goto err_enckey; 178 169 179 170 /* encrypt aligned data */ 180 - ret = big_key_crypt(BIG_KEY_ENC, data, enclen, enckey); 171 + ret = big_key_crypt(BIG_KEY_ENC, data, datalen, enckey); 181 172 if (ret) 182 173 goto err_enckey; 183 174 ··· 202 195 *path = file->f_path; 203 196 path_get(path); 204 197 fput(file); 205 - kfree(data); 198 + kzfree(data); 206 199 } else { 207 200 /* Just store the data in a buffer */ 208 201 void *data = kmalloc(datalen, GFP_KERNEL); ··· 218 211 err_fput: 219 212 fput(file); 220 213 err_enckey: 221 - kfree(enckey); 214 + kzfree(enckey); 222 215 error: 223 - kfree(data); 216 + kzfree(data); 224 217 return ret; 225 218 } 226 219 ··· 234 227 235 228 path_put(path); 236 229 } 237 - kfree(prep->payload.data[big_key_data]); 230 + kzfree(prep->payload.data[big_key_data]); 238 231 } 239 232 240 233 /* ··· 266 259 path->mnt = NULL; 267 260 path->dentry = NULL; 268 261 } 269 - kfree(key->payload.data[big_key_data]); 262 + kzfree(key->payload.data[big_key_data]); 270 263 key->payload.data[big_key_data] = NULL; 271 264 } 272 265 ··· 302 295 struct file *file; 303 296 u8 *data; 304 297 u8 *enckey = (u8 *)key->payload.data[big_key_data]; 305 - size_t enclen = ALIGN(datalen, crypto_skcipher_blocksize(big_key_skcipher)); 298 + size_t enclen = datalen + ENC_AUTHTAG_SIZE; 306 299 loff_t pos = 0; 307 300 308 301 data = kmalloc(enclen, GFP_KERNEL); ··· 335 328 err_fput: 336 329 fput(file); 337 330 error: 338 - kfree(data); 331 + kzfree(data); 339 332 } else { 340 333 ret = datalen; 341 334 if (copy_to_user(buffer, key->payload.data[big_key_data], ··· 351 344 */ 352 345 static int __init big_key_init(void) 353 346 { 354 - struct crypto_skcipher *cipher; 355 - struct crypto_rng *rng; 356 347 int ret; 357 348 358 - rng = crypto_alloc_rng(big_key_rng_name, 0, 0); 359 - if (IS_ERR(rng)) { 360 - pr_err("Can't alloc rng: %ld\n", PTR_ERR(rng)); 361 - return PTR_ERR(rng); 362 - } 363 - 364 - big_key_rng = rng; 365 - 366 - /* seed RNG */ 367 - ret = crypto_rng_reset(rng, NULL, crypto_rng_seedsize(rng)); 368 - if (ret) { 369 - pr_err("Can't reset rng: %d\n", ret); 370 - goto error_rng; 371 - } 372 - 373 349 /* init block cipher */ 374 - cipher = crypto_alloc_skcipher(big_key_alg_name, 0, CRYPTO_ALG_ASYNC); 375 - if (IS_ERR(cipher)) { 376 - ret = PTR_ERR(cipher); 350 + big_key_aead = crypto_alloc_aead(big_key_alg_name, 0, CRYPTO_ALG_ASYNC); 351 + if (IS_ERR(big_key_aead)) { 352 + ret = PTR_ERR(big_key_aead); 377 353 pr_err("Can't alloc crypto: %d\n", ret); 378 - goto error_rng; 354 + return ret; 379 355 } 380 - 381 - big_key_skcipher = cipher; 356 + ret = crypto_aead_setauthsize(big_key_aead, ENC_AUTHTAG_SIZE); 357 + if (ret < 0) { 358 + pr_err("Can't set crypto auth tag len: %d\n", ret); 359 + goto free_aead; 360 + } 382 361 383 362 ret = register_key_type(&key_type_big_key); 384 363 if (ret < 0) { 385 364 pr_err("Can't register type: %d\n", ret); 386 - goto error_cipher; 365 + goto free_aead; 387 366 } 388 367 389 368 return 0; 390 369 391 - error_cipher: 392 - crypto_free_skcipher(big_key_skcipher); 393 - error_rng: 394 - crypto_free_rng(big_key_rng); 370 + free_aead: 371 + crypto_free_aead(big_key_aead); 395 372 return ret; 396 373 } 397 374

+1 -1

security/keys/internal.h

··· 141 141 extern key_ref_t search_my_process_keyrings(struct keyring_search_context *ctx); 142 142 extern key_ref_t search_process_keyrings(struct keyring_search_context *ctx); 143 143 144 - extern struct key *find_keyring_by_name(const char *name, bool skip_perm_check); 144 + extern struct key *find_keyring_by_name(const char *name, bool uid_keyring); 145 145 146 146 extern int install_user_keyrings(void); 147 147 extern int install_thread_keyring_to_cred(struct cred *);

+4 -2

security/keys/key.c

··· 54 54 struct key_user *key_user_lookup(kuid_t uid) 55 55 { 56 56 struct key_user *candidate = NULL, *user; 57 - struct rb_node *parent = NULL; 58 - struct rb_node **p; 57 + struct rb_node *parent, **p; 59 58 60 59 try_again: 60 + parent = NULL; 61 61 p = &key_user_tree.rb_node; 62 62 spin_lock(&key_user_lock); 63 63 ··· 302 302 key->flags |= 1 << KEY_FLAG_IN_QUOTA; 303 303 if (flags & KEY_ALLOC_BUILT_IN) 304 304 key->flags |= 1 << KEY_FLAG_BUILTIN; 305 + if (flags & KEY_ALLOC_UID_KEYRING) 306 + key->flags |= 1 << KEY_FLAG_UID_KEYRING; 305 307 306 308 #ifdef KEY_DEBUGGING 307 309 key->magic = KEY_DEBUG_MAGIC;

+8 -5

security/keys/keyctl.c

··· 766 766 767 767 key = key_ref_to_ptr(key_ref); 768 768 769 + if (test_bit(KEY_FLAG_NEGATIVE, &key->flags)) { 770 + ret = -ENOKEY; 771 + goto error2; 772 + } 773 + 769 774 /* see if we can read it directly */ 770 775 ret = key_permission(key_ref, KEY_NEED_READ); 771 776 if (ret == 0) 772 777 goto can_read_key; 773 778 if (ret != -EACCES) 774 - goto error; 779 + goto error2; 775 780 776 781 /* we can't; see if it's searchable from this process's keyrings 777 782 * - we automatically take account of the fact that it may be ··· 1411 1406 } 1412 1407 1413 1408 ret = keyctl_change_reqkey_auth(authkey); 1414 - if (ret < 0) 1415 - goto error; 1409 + if (ret == 0) 1410 + ret = authkey->serial; 1416 1411 key_put(authkey); 1417 - 1418 - ret = authkey->serial; 1419 1412 error: 1420 1413 return ret; 1421 1414 }

+19 -18

security/keys/keyring.c

··· 423 423 } 424 424 425 425 struct keyring_read_iterator_context { 426 - size_t qty; 426 + size_t buflen; 427 427 size_t count; 428 428 key_serial_t __user *buffer; 429 429 }; ··· 435 435 int ret; 436 436 437 437 kenter("{%s,%d},,{%zu/%zu}", 438 - key->type->name, key->serial, ctx->count, ctx->qty); 438 + key->type->name, key->serial, ctx->count, ctx->buflen); 439 439 440 - if (ctx->count >= ctx->qty) 440 + if (ctx->count >= ctx->buflen) 441 441 return 1; 442 442 443 443 ret = put_user(key->serial, ctx->buffer); ··· 472 472 return 0; 473 473 474 474 /* Calculate how much data we could return */ 475 - ctx.qty = nr_keys * sizeof(key_serial_t); 476 - 477 475 if (!buffer || !buflen) 478 - return ctx.qty; 479 - 480 - if (buflen > ctx.qty) 481 - ctx.qty = buflen; 476 + return nr_keys * sizeof(key_serial_t); 482 477 483 478 /* Copy the IDs of the subscribed keys into the buffer */ 484 479 ctx.buffer = (key_serial_t __user *)buffer; 480 + ctx.buflen = buflen; 485 481 ctx.count = 0; 486 482 ret = assoc_array_iterate(&keyring->keys, keyring_read_iterator, &ctx); 487 483 if (ret < 0) { ··· 1097 1101 /* 1098 1102 * Find a keyring with the specified name. 1099 1103 * 1100 - * All named keyrings in the current user namespace are searched, provided they 1101 - * grant Search permission directly to the caller (unless this check is 1102 - * skipped). Keyrings whose usage points have reached zero or who have been 1103 - * revoked are skipped. 1104 + * Only keyrings that have nonzero refcount, are not revoked, and are owned by a 1105 + * user in the current user namespace are considered. If @uid_keyring is %true, 1106 + * the keyring additionally must have been allocated as a user or user session 1107 + * keyring; otherwise, it must grant Search permission directly to the caller. 1104 1108 * 1105 1109 * Returns a pointer to the keyring with the keyring's refcount having being 1106 1110 * incremented on success. -ENOKEY is returned if a key could not be found. 1107 1111 */ 1108 - struct key *find_keyring_by_name(const char *name, bool skip_perm_check) 1112 + struct key *find_keyring_by_name(const char *name, bool uid_keyring) 1109 1113 { 1110 1114 struct key *keyring; 1111 1115 int bucket; ··· 1133 1137 if (strcmp(keyring->description, name) != 0) 1134 1138 continue; 1135 1139 1136 - if (!skip_perm_check && 1137 - key_permission(make_key_ref(keyring, 0), 1138 - KEY_NEED_SEARCH) < 0) 1139 - continue; 1140 + if (uid_keyring) { 1141 + if (!test_bit(KEY_FLAG_UID_KEYRING, 1142 + &keyring->flags)) 1143 + continue; 1144 + } else { 1145 + if (key_permission(make_key_ref(keyring, 0), 1146 + KEY_NEED_SEARCH) < 0) 1147 + continue; 1148 + } 1140 1149 1141 1150 /* we've got a match but we might end up racing with 1142 1151 * key_cleanup() if the keyring is currently 'dead'

+2 -6

security/keys/proc.c

··· 187 187 struct keyring_search_context ctx = { 188 188 .index_key.type = key->type, 189 189 .index_key.description = key->description, 190 - .cred = current_cred(), 190 + .cred = m->file->f_cred, 191 191 .match_data.cmp = lookup_user_key_possessed, 192 192 .match_data.raw_data = key, 193 193 .match_data.lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT, ··· 207 207 } 208 208 } 209 209 210 - /* check whether the current task is allowed to view the key (assuming 211 - * non-possession) 212 - * - the caller holds a spinlock, and thus the RCU read lock, making our 213 - * access to __current_cred() safe 214 - */ 210 + /* check whether the current task is allowed to view the key */ 215 211 rc = key_task_permission(key_ref, ctx.cred, KEY_NEED_VIEW); 216 212 if (rc < 0) 217 213 return 0;

+4 -2

security/keys/process_keys.c

··· 77 77 if (IS_ERR(uid_keyring)) { 78 78 uid_keyring = keyring_alloc(buf, user->uid, INVALID_GID, 79 79 cred, user_keyring_perm, 80 - KEY_ALLOC_IN_QUOTA, 80 + KEY_ALLOC_UID_KEYRING | 81 + KEY_ALLOC_IN_QUOTA, 81 82 NULL, NULL); 82 83 if (IS_ERR(uid_keyring)) { 83 84 ret = PTR_ERR(uid_keyring); ··· 95 94 session_keyring = 96 95 keyring_alloc(buf, user->uid, INVALID_GID, 97 96 cred, user_keyring_perm, 98 - KEY_ALLOC_IN_QUOTA, 97 + KEY_ALLOC_UID_KEYRING | 98 + KEY_ALLOC_IN_QUOTA, 99 99 NULL, NULL); 100 100 if (IS_ERR(session_keyring)) { 101 101 ret = PTR_ERR(session_keyring);

+33 -41

security/keys/request_key_auth.c

··· 120 120 } 121 121 } 122 122 123 + static void free_request_key_auth(struct request_key_auth *rka) 124 + { 125 + if (!rka) 126 + return; 127 + key_put(rka->target_key); 128 + key_put(rka->dest_keyring); 129 + if (rka->cred) 130 + put_cred(rka->cred); 131 + kfree(rka->callout_info); 132 + kfree(rka); 133 + } 134 + 123 135 /* 124 136 * Destroy an instantiation authorisation token key. 125 137 */ ··· 141 129 142 130 kenter("{%d}", key->serial); 143 131 144 - if (rka->cred) { 145 - put_cred(rka->cred); 146 - rka->cred = NULL; 147 - } 148 - 149 - key_put(rka->target_key); 150 - key_put(rka->dest_keyring); 151 - kfree(rka->callout_info); 152 - kfree(rka); 132 + free_request_key_auth(rka); 153 133 } 154 134 155 135 /* ··· 155 151 const struct cred *cred = current->cred; 156 152 struct key *authkey = NULL; 157 153 char desc[20]; 158 - int ret; 154 + int ret = -ENOMEM; 159 155 160 156 kenter("%d,", target->serial); 161 157 162 158 /* allocate a auth record */ 163 - rka = kmalloc(sizeof(*rka), GFP_KERNEL); 164 - if (!rka) { 165 - kleave(" = -ENOMEM"); 166 - return ERR_PTR(-ENOMEM); 167 - } 168 - rka->callout_info = kmalloc(callout_len, GFP_KERNEL); 169 - if (!rka->callout_info) { 170 - kleave(" = -ENOMEM"); 171 - kfree(rka); 172 - return ERR_PTR(-ENOMEM); 173 - } 159 + rka = kzalloc(sizeof(*rka), GFP_KERNEL); 160 + if (!rka) 161 + goto error; 162 + rka->callout_info = kmemdup(callout_info, callout_len, GFP_KERNEL); 163 + if (!rka->callout_info) 164 + goto error_free_rka; 165 + rka->callout_len = callout_len; 174 166 175 167 /* see if the calling process is already servicing the key request of 176 168 * another process */ ··· 176 176 177 177 /* if the auth key has been revoked, then the key we're 178 178 * servicing is already instantiated */ 179 - if (test_bit(KEY_FLAG_REVOKED, &cred->request_key_auth->flags)) 180 - goto auth_key_revoked; 179 + if (test_bit(KEY_FLAG_REVOKED, 180 + &cred->request_key_auth->flags)) { 181 + up_read(&cred->request_key_auth->sem); 182 + ret = -EKEYREVOKED; 183 + goto error_free_rka; 184 + } 181 185 182 186 irka = cred->request_key_auth->payload.data[0]; 183 187 rka->cred = get_cred(irka->cred); ··· 197 193 198 194 rka->target_key = key_get(target); 199 195 rka->dest_keyring = key_get(dest_keyring); 200 - memcpy(rka->callout_info, callout_info, callout_len); 201 - rka->callout_len = callout_len; 202 196 203 197 /* allocate the auth key */ 204 198 sprintf(desc, "%x", target->serial); ··· 207 205 KEY_USR_VIEW, KEY_ALLOC_NOT_IN_QUOTA, NULL); 208 206 if (IS_ERR(authkey)) { 209 207 ret = PTR_ERR(authkey); 210 - goto error_alloc; 208 + goto error_free_rka; 211 209 } 212 210 213 211 /* construct the auth key */ 214 212 ret = key_instantiate_and_link(authkey, rka, 0, NULL, NULL); 215 213 if (ret < 0) 216 - goto error_inst; 214 + goto error_put_authkey; 217 215 218 216 kleave(" = {%d,%d}", authkey->serial, refcount_read(&authkey->usage)); 219 217 return authkey; 220 218 221 - auth_key_revoked: 222 - up_read(&cred->request_key_auth->sem); 223 - kfree(rka->callout_info); 224 - kfree(rka); 225 - kleave("= -EKEYREVOKED"); 226 - return ERR_PTR(-EKEYREVOKED); 227 - 228 - error_inst: 229 - key_revoke(authkey); 219 + error_put_authkey: 230 220 key_put(authkey); 231 - error_alloc: 232 - key_put(rka->target_key); 233 - key_put(rka->dest_keyring); 234 - kfree(rka->callout_info); 235 - kfree(rka); 221 + error_free_rka: 222 + free_request_key_auth(rka); 223 + error: 236 224 kleave("= %d", ret); 237 225 return ERR_PTR(ret); 238 226 }

+6

tools/arch/s390/include/uapi/asm/kvm.h

··· 88 88 /* kvm attributes for KVM_S390_VM_TOD */ 89 89 #define KVM_S390_VM_TOD_LOW 0 90 90 #define KVM_S390_VM_TOD_HIGH 1 91 + #define KVM_S390_VM_TOD_EXT 2 92 + 93 + struct kvm_s390_vm_tod_clock { 94 + __u8 epoch_idx; 95 + __u64 tod; 96 + }; 91 97 92 98 /* kvm attributes for KVM_S390_VM_CPU_MODEL */ 93 99 /* processor related attributes are r/w */

+2

tools/arch/x86/include/asm/cpufeatures.h

··· 196 196 197 197 #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ 198 198 #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ 199 + #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ 199 200 200 201 #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ 201 202 #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ ··· 288 287 #define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ 289 288 #define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ 290 289 #define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ 290 + #define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ 291 291 292 292 /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */ 293 293 #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/

+3 -1

tools/arch/x86/include/asm/disabled-features.h

··· 21 21 # define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31)) 22 22 # define DISABLE_CYRIX_ARR (1<<(X86_FEATURE_CYRIX_ARR & 31)) 23 23 # define DISABLE_CENTAUR_MCR (1<<(X86_FEATURE_CENTAUR_MCR & 31)) 24 + # define DISABLE_PCID 0 24 25 #else 25 26 # define DISABLE_VME 0 26 27 # define DISABLE_K6_MTRR 0 27 28 # define DISABLE_CYRIX_ARR 0 28 29 # define DISABLE_CENTAUR_MCR 0 30 + # define DISABLE_PCID (1<<(X86_FEATURE_PCID & 31)) 29 31 #endif /* CONFIG_X86_64 */ 30 32 31 33 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS ··· 51 49 #define DISABLED_MASK1 0 52 50 #define DISABLED_MASK2 0 53 51 #define DISABLED_MASK3 (DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR) 54 - #define DISABLED_MASK4 0 52 + #define DISABLED_MASK4 (DISABLE_PCID) 55 53 #define DISABLED_MASK5 0 56 54 #define DISABLED_MASK6 0 57 55 #define DISABLED_MASK7 0

+34

tools/include/asm-generic/hugetlb_encode.h

··· 1 + #ifndef _ASM_GENERIC_HUGETLB_ENCODE_H_ 2 + #define _ASM_GENERIC_HUGETLB_ENCODE_H_ 3 + 4 + /* 5 + * Several system calls take a flag to request "hugetlb" huge pages. 6 + * Without further specification, these system calls will use the 7 + * system's default huge page size. If a system supports multiple 8 + * huge page sizes, the desired huge page size can be specified in 9 + * bits [26:31] of the flag arguments. The value in these 6 bits 10 + * will encode the log2 of the huge page size. 11 + * 12 + * The following definitions are associated with this huge page size 13 + * encoding in flag arguments. System call specific header files 14 + * that use this encoding should include this file. They can then 15 + * provide definitions based on these with their own specific prefix. 16 + * for example: 17 + * #define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT 18 + */ 19 + 20 + #define HUGETLB_FLAG_ENCODE_SHIFT 26 21 + #define HUGETLB_FLAG_ENCODE_MASK 0x3f 22 + 23 + #define HUGETLB_FLAG_ENCODE_64KB (16 << HUGETLB_FLAG_ENCODE_SHIFT) 24 + #define HUGETLB_FLAG_ENCODE_512KB (19 << HUGETLB_FLAG_ENCODE_SHIFT) 25 + #define HUGETLB_FLAG_ENCODE_1MB (20 << HUGETLB_FLAG_ENCODE_SHIFT) 26 + #define HUGETLB_FLAG_ENCODE_2MB (21 << HUGETLB_FLAG_ENCODE_SHIFT) 27 + #define HUGETLB_FLAG_ENCODE_8MB (23 << HUGETLB_FLAG_ENCODE_SHIFT) 28 + #define HUGETLB_FLAG_ENCODE_16MB (24 << HUGETLB_FLAG_ENCODE_SHIFT) 29 + #define HUGETLB_FLAG_ENCODE_256MB (28 << HUGETLB_FLAG_ENCODE_SHIFT) 30 + #define HUGETLB_FLAG_ENCODE_1GB (30 << HUGETLB_FLAG_ENCODE_SHIFT) 31 + #define HUGETLB_FLAG_ENCODE_2GB (31 << HUGETLB_FLAG_ENCODE_SHIFT) 32 + #define HUGETLB_FLAG_ENCODE_16GB (34 << HUGETLB_FLAG_ENCODE_SHIFT) 33 + 34 + #endif /* _ASM_GENERIC_HUGETLB_ENCODE_H_ */

+3 -11

tools/include/uapi/asm-generic/mman-common.h

··· 58 58 overrides the coredump filter bits */ 59 59 #define MADV_DODUMP 17 /* Clear the MADV_DONTDUMP flag */ 60 60 61 + #define MADV_WIPEONFORK 18 /* Zero memory on fork, child only */ 62 + #define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */ 63 + 61 64 /* compatibility flags */ 62 65 #define MAP_FILE 0 63 - 64 - /* 65 - * When MAP_HUGETLB is set bits [26:31] encode the log2 of the huge page size. 66 - * This gives us 6 bits, which is enough until someone invents 128 bit address 67 - * spaces. 68 - * 69 - * Assume these are all power of twos. 70 - * When 0 use the default page size. 71 - */ 72 - #define MAP_HUGE_SHIFT 26 73 - #define MAP_HUGE_MASK 0x3f 74 66 75 67 #define PKEY_DISABLE_ACCESS 0x1 76 68 #define PKEY_DISABLE_WRITE 0x2

+22

tools/include/uapi/drm/drm.h

··· 700 700 701 701 struct drm_syncobj_create { 702 702 __u32 handle; 703 + #define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0) 703 704 __u32 flags; 704 705 }; 705 706 ··· 716 715 __u32 flags; 717 716 718 717 __s32 fd; 718 + __u32 pad; 719 + }; 720 + 721 + #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0) 722 + #define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1) 723 + struct drm_syncobj_wait { 724 + __u64 handles; 725 + /* absolute timeout */ 726 + __s64 timeout_nsec; 727 + __u32 count_handles; 728 + __u32 flags; 729 + __u32 first_signaled; /* only valid when not waiting all */ 730 + __u32 pad; 731 + }; 732 + 733 + struct drm_syncobj_array { 734 + __u64 handles; 735 + __u32 count_handles; 719 736 __u32 pad; 720 737 }; 721 738 ··· 859 840 #define DRM_IOCTL_SYNCOBJ_DESTROY DRM_IOWR(0xC0, struct drm_syncobj_destroy) 860 841 #define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1, struct drm_syncobj_handle) 861 842 #define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct drm_syncobj_handle) 843 + #define DRM_IOCTL_SYNCOBJ_WAIT DRM_IOWR(0xC3, struct drm_syncobj_wait) 844 + #define DRM_IOCTL_SYNCOBJ_RESET DRM_IOWR(0xC4, struct drm_syncobj_array) 845 + #define DRM_IOCTL_SYNCOBJ_SIGNAL DRM_IOWR(0xC5, struct drm_syncobj_array) 862 846 863 847 /** 864 848 * Device specific ioctls should only be in their respective headers

+49 -2

tools/include/uapi/drm/i915_drm.h

··· 260 260 #define DRM_I915_GEM_CONTEXT_GETPARAM 0x34 261 261 #define DRM_I915_GEM_CONTEXT_SETPARAM 0x35 262 262 #define DRM_I915_PERF_OPEN 0x36 263 + #define DRM_I915_PERF_ADD_CONFIG 0x37 264 + #define DRM_I915_PERF_REMOVE_CONFIG 0x38 263 265 264 266 #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) 265 267 #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) ··· 317 315 #define DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_GETPARAM, struct drm_i915_gem_context_param) 318 316 #define DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_SETPARAM, struct drm_i915_gem_context_param) 319 317 #define DRM_IOCTL_I915_PERF_OPEN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_OPEN, struct drm_i915_perf_open_param) 318 + #define DRM_IOCTL_I915_PERF_ADD_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config) 319 + #define DRM_IOCTL_I915_PERF_REMOVE_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64) 320 320 321 321 /* Allow drivers to submit batchbuffers directly to hardware, relying 322 322 * on the security mechanisms provided by hardware. ··· 434 430 * as the first execobject as opposed to the last. See I915_EXEC_BATCH_FIRST. 435 431 */ 436 432 #define I915_PARAM_HAS_EXEC_BATCH_FIRST 48 433 + 434 + /* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of 435 + * drm_i915_gem_exec_fence structures. See I915_EXEC_FENCE_ARRAY. 436 + */ 437 + #define I915_PARAM_HAS_EXEC_FENCE_ARRAY 49 437 438 438 439 typedef struct drm_i915_getparam { 439 440 __s32 param; ··· 821 812 __u64 rsvd2; 822 813 }; 823 814 815 + struct drm_i915_gem_exec_fence { 816 + /** 817 + * User's handle for a drm_syncobj to wait on or signal. 818 + */ 819 + __u32 handle; 820 + 821 + #define I915_EXEC_FENCE_WAIT (1<<0) 822 + #define I915_EXEC_FENCE_SIGNAL (1<<1) 823 + __u32 flags; 824 + }; 825 + 824 826 struct drm_i915_gem_execbuffer2 { 825 827 /** 826 828 * List of gem_exec_object2 structs ··· 846 826 __u32 DR1; 847 827 __u32 DR4; 848 828 __u32 num_cliprects; 849 - /** This is a struct drm_clip_rect *cliprects */ 829 + /** 830 + * This is a struct drm_clip_rect *cliprects if I915_EXEC_FENCE_ARRAY 831 + * is not set. If I915_EXEC_FENCE_ARRAY is set, then this is a 832 + * struct drm_i915_gem_exec_fence *fences. 833 + */ 850 834 __u64 cliprects_ptr; 851 835 #define I915_EXEC_RING_MASK (7<<0) 852 836 #define I915_EXEC_DEFAULT (0<<0) ··· 951 927 * element). 952 928 */ 953 929 #define I915_EXEC_BATCH_FIRST (1<<18) 954 - #define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_BATCH_FIRST<<1)) 930 + 931 + /* Setting I915_FENCE_ARRAY implies that num_cliprects and cliprects_ptr 932 + * define an array of i915_gem_exec_fence structures which specify a set of 933 + * dma fences to wait upon or signal. 934 + */ 935 + #define I915_EXEC_FENCE_ARRAY (1<<19) 936 + 937 + #define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1)) 955 938 956 939 #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) 957 940 #define i915_execbuffer2_set_context_id(eb2, context) \ ··· 1496 1465 DRM_I915_PERF_RECORD_OA_BUFFER_LOST = 3, 1497 1466 1498 1467 DRM_I915_PERF_RECORD_MAX /* non-ABI */ 1468 + }; 1469 + 1470 + /** 1471 + * Structure to upload perf dynamic configuration into the kernel. 1472 + */ 1473 + struct drm_i915_perf_oa_config { 1474 + /** String formatted like "%08x-%04x-%04x-%04x-%012x" */ 1475 + char uuid[36]; 1476 + 1477 + __u32 n_mux_regs; 1478 + __u32 n_boolean_regs; 1479 + __u32 n_flex_regs; 1480 + 1481 + __u64 __user mux_regs_ptr; 1482 + __u64 __user boolean_regs_ptr; 1483 + __u64 __user flex_regs_ptr; 1499 1484 }; 1500 1485 1501 1486 #if defined(__cplusplus)

+20 -12

tools/include/uapi/linux/bpf.h

··· 143 143 144 144 #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE 145 145 146 - enum bpf_sockmap_flags { 147 - BPF_SOCKMAP_UNSPEC, 148 - BPF_SOCKMAP_STRPARSER, 149 - __MAX_BPF_SOCKMAP_FLAG 150 - }; 151 - 152 146 /* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command 153 147 * to the given target_fd cgroup the descendent cgroup will be able to 154 148 * override effective bpf program that was inherited from this cgroup ··· 362 368 * int bpf_redirect(ifindex, flags) 363 369 * redirect to another netdev 364 370 * @ifindex: ifindex of the net device 365 - * @flags: bit 0 - if set, redirect to ingress instead of egress 366 - * other bits - reserved 367 - * Return: TC_ACT_REDIRECT 371 + * @flags: 372 + * cls_bpf: 373 + * bit 0 - if set, redirect to ingress instead of egress 374 + * other bits - reserved 375 + * xdp_bpf: 376 + * all bits - reserved 377 + * Return: cls_bpf: TC_ACT_REDIRECT on success or TC_ACT_SHOT on error 378 + * xdp_bfp: XDP_REDIRECT on success or XDP_ABORT on error 379 + * int bpf_redirect_map(map, key, flags) 380 + * redirect to endpoint in map 381 + * @map: pointer to dev map 382 + * @key: index in map to lookup 383 + * @flags: -- 384 + * Return: XDP_REDIRECT on success or XDP_ABORT on error 368 385 * 369 386 * u32 bpf_get_route_realm(skb) 370 387 * retrieve a dst's tclassid ··· 637 632 FN(skb_adjust_room), \ 638 633 FN(redirect_map), \ 639 634 FN(sk_redirect_map), \ 640 - FN(sock_map_update), 635 + FN(sock_map_update), \ 641 636 642 637 /* integer value in 'imm' field of BPF_CALL instruction selects which helper 643 638 * function eBPF program intends to call ··· 758 753 __u32 family; 759 754 __u32 type; 760 755 __u32 protocol; 756 + __u32 mark; 757 + __u32 priority; 761 758 }; 762 759 763 760 #define XDP_PACKET_HEADROOM 256 764 761 765 762 /* User return codes for XDP prog type. 766 763 * A valid XDP program must return one of these defined values. All other 767 - * return codes are reserved for future use. Unknown return codes will result 768 - * in packet drop. 764 + * return codes are reserved for future use. Unknown return codes will 765 + * result in packet drops and a warning via bpf_warn_invalid_xdp_action(). 769 766 */ 770 767 enum xdp_action { 771 768 XDP_ABORTED = 0, 772 769 XDP_DROP, 773 770 XDP_PASS, 774 771 XDP_TX, 772 + XDP_REDIRECT, 775 773 }; 776 774 777 775 /* user accessible metadata for XDP packet hook

+2 -1

tools/include/uapi/linux/kvm.h

··· 711 711 struct kvm_ppc_smmu_info { 712 712 __u64 flags; 713 713 __u32 slb_size; 714 - __u32 pad; 714 + __u16 data_keys; /* # storage keys supported for data */ 715 + __u16 instr_keys; /* # storage keys supported for instructions */ 715 716 struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; 716 717 }; 717 718

+23 -1

tools/include/uapi/linux/mman.h

··· 1 1 #ifndef _UAPI_LINUX_MMAN_H 2 2 #define _UAPI_LINUX_MMAN_H 3 3 4 - #include <uapi/asm/mman.h> 4 + #include <asm/mman.h> 5 + #include <asm-generic/hugetlb_encode.h> 5 6 6 7 #define MREMAP_MAYMOVE 1 7 8 #define MREMAP_FIXED 2 ··· 10 9 #define OVERCOMMIT_GUESS 0 11 10 #define OVERCOMMIT_ALWAYS 1 12 11 #define OVERCOMMIT_NEVER 2 12 + 13 + /* 14 + * Huge page size encoding when MAP_HUGETLB is specified, and a huge page 15 + * size other than the default is desired. See hugetlb_encode.h. 16 + * All known huge page size encodings are provided here. It is the 17 + * responsibility of the application to know which sizes are supported on 18 + * the running system. See mmap(2) man page for details. 19 + */ 20 + #define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT 21 + #define MAP_HUGE_MASK HUGETLB_FLAG_ENCODE_MASK 22 + 23 + #define MAP_HUGE_64KB HUGETLB_FLAG_ENCODE_64KB 24 + #define MAP_HUGE_512KB HUGETLB_FLAG_ENCODE_512KB 25 + #define MAP_HUGE_1MB HUGETLB_FLAG_ENCODE_1MB 26 + #define MAP_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB 27 + #define MAP_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB 28 + #define MAP_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB 29 + #define MAP_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB 30 + #define MAP_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB 31 + #define MAP_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB 32 + #define MAP_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB 13 33 14 34 #endif /* _UAPI_LINUX_MMAN_H */

+8 -3

tools/objtool/arch/x86/decode.c

··· 284 284 case 0x8d: 285 285 if (sib == 0x24 && rex_w && !rex_b && !rex_x) { 286 286 287 - /* lea disp(%rsp), reg */ 288 287 *type = INSN_STACK; 289 - op->src.type = OP_SRC_ADD; 288 + if (!insn.displacement.value) { 289 + /* lea (%rsp), reg */ 290 + op->src.type = OP_SRC_REG; 291 + } else { 292 + /* lea disp(%rsp), reg */ 293 + op->src.type = OP_SRC_ADD; 294 + op->src.offset = insn.displacement.value; 295 + } 290 296 op->src.reg = CFI_SP; 291 - op->src.offset = insn.displacement.value; 292 297 op->dest.type = OP_DEST_REG; 293 298 op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r]; 294 299

+2 -85

tools/perf/MANIFEST

··· 1 1 tools/perf 2 - tools/arch/alpha/include/asm/barrier.h 3 - tools/arch/arm/include/asm/barrier.h 4 - tools/arch/arm64/include/asm/barrier.h 5 - tools/arch/ia64/include/asm/barrier.h 6 - tools/arch/mips/include/asm/barrier.h 7 - tools/arch/powerpc/include/asm/barrier.h 8 - tools/arch/s390/include/asm/barrier.h 9 - tools/arch/sh/include/asm/barrier.h 10 - tools/arch/sparc/include/asm/barrier.h 11 - tools/arch/sparc/include/asm/barrier_32.h 12 - tools/arch/sparc/include/asm/barrier_64.h 13 - tools/arch/tile/include/asm/barrier.h 14 - tools/arch/x86/include/asm/barrier.h 15 - tools/arch/x86/include/asm/cmpxchg.h 16 - tools/arch/x86/include/asm/cpufeatures.h 17 - tools/arch/x86/include/asm/disabled-features.h 18 - tools/arch/x86/include/asm/required-features.h 19 - tools/arch/x86/include/uapi/asm/svm.h 20 - tools/arch/x86/include/uapi/asm/vmx.h 21 - tools/arch/x86/include/uapi/asm/kvm.h 22 - tools/arch/x86/include/uapi/asm/kvm_perf.h 23 - tools/arch/x86/lib/memcpy_64.S 24 - tools/arch/x86/lib/memset_64.S 25 - tools/arch/s390/include/uapi/asm/kvm_perf.h 26 - tools/arch/s390/include/uapi/asm/sie.h 27 - tools/arch/xtensa/include/asm/barrier.h 2 + tools/arch 28 3 tools/scripts 29 4 tools/build 30 - tools/arch/x86/include/asm/atomic.h 31 - tools/arch/x86/include/asm/rmwcc.h 5 + tools/include 32 6 tools/lib/traceevent 33 7 tools/lib/api 34 8 tools/lib/bpf ··· 16 42 tools/lib/bitmap.c 17 43 tools/lib/str_error_r.c 18 44 tools/lib/vsprintf.c 19 - tools/include/asm/alternative-asm.h 20 - tools/include/asm/atomic.h 21 - tools/include/asm/barrier.h 22 - tools/include/asm/bug.h 23 - tools/include/asm-generic/atomic-gcc.h 24 - tools/include/asm-generic/barrier.h 25 - tools/include/asm-generic/bitops/arch_hweight.h 26 - tools/include/asm-generic/bitops/atomic.h 27 - tools/include/asm-generic/bitops/const_hweight.h 28 - tools/include/asm-generic/bitops/__ffs.h 29 - tools/include/asm-generic/bitops/__ffz.h 30 - tools/include/asm-generic/bitops/__fls.h 31 - tools/include/asm-generic/bitops/find.h 32 - tools/include/asm-generic/bitops/fls64.h 33 - tools/include/asm-generic/bitops/fls.h 34 - tools/include/asm-generic/bitops/hweight.h 35 - tools/include/asm-generic/bitops.h 36 - tools/include/linux/atomic.h 37 - tools/include/linux/bitops.h 38 - tools/include/linux/compiler.h 39 - tools/include/linux/compiler-gcc.h 40 - tools/include/linux/coresight-pmu.h 41 - tools/include/linux/bug.h 42 - tools/include/linux/filter.h 43 - tools/include/linux/hash.h 44 - tools/include/linux/kernel.h 45 - tools/include/linux/list.h 46 - tools/include/linux/log2.h 47 - tools/include/uapi/asm-generic/fcntl.h 48 - tools/include/uapi/asm-generic/ioctls.h 49 - tools/include/uapi/asm-generic/mman-common.h 50 - tools/include/uapi/asm-generic/mman.h 51 - tools/include/uapi/drm/drm.h 52 - tools/include/uapi/drm/i915_drm.h 53 - tools/include/uapi/linux/bpf.h 54 - tools/include/uapi/linux/bpf_common.h 55 - tools/include/uapi/linux/fcntl.h 56 - tools/include/uapi/linux/hw_breakpoint.h 57 - tools/include/uapi/linux/kvm.h 58 - tools/include/uapi/linux/mman.h 59 - tools/include/uapi/linux/perf_event.h 60 - tools/include/uapi/linux/sched.h 61 - tools/include/uapi/linux/stat.h 62 - tools/include/uapi/linux/vhost.h 63 - tools/include/uapi/sound/asound.h 64 - tools/include/linux/poison.h 65 - tools/include/linux/rbtree.h 66 - tools/include/linux/rbtree_augmented.h 67 - tools/include/linux/refcount.h 68 - tools/include/linux/string.h 69 - tools/include/linux/stringify.h 70 - tools/include/linux/types.h 71 - tools/include/linux/err.h 72 - tools/include/linux/bitmap.h 73 - tools/include/linux/time64.h 74 - tools/arch/*/include/uapi/asm/mman.h 75 - tools/arch/*/include/uapi/asm/perf_regs.h

-1

tools/perf/arch/s390/util/Build

··· 1 1 libperf-y += header.o 2 - libperf-y += sym-handling.o 3 2 libperf-y += kvm-stat.o 4 3 5 4 libperf-$(CONFIG_DWARF) += dwarf-regs.o

-29

tools/perf/arch/s390/util/sym-handling.c

··· 1 - /* 2 - * Architecture specific ELF symbol handling and relocation mapping. 3 - * 4 - * Copyright 2017 IBM Corp. 5 - * Author(s): Thomas Richter <tmricht@linux.vnet.ibm.com> 6 - * 7 - * This program is free software; you can redistribute it and/or modify 8 - * it under the terms of the GNU General Public License (version 2 only) 9 - * as published by the Free Software Foundation. 10 - */ 11 - 12 - #include "symbol.h" 13 - 14 - #ifdef HAVE_LIBELF_SUPPORT 15 - bool elf__needs_adjust_symbols(GElf_Ehdr ehdr) 16 - { 17 - if (ehdr.e_type == ET_EXEC) 18 - return false; 19 - return ehdr.e_type == ET_REL || ehdr.e_type == ET_DYN; 20 - } 21 - 22 - void arch__adjust_sym_map_offset(GElf_Sym *sym, 23 - GElf_Shdr *shdr __maybe_unused, 24 - struct map *map) 25 - { 26 - if (map->type == MAP__FUNCTION) 27 - sym->st_value += map->start; 28 - } 29 - #endif

+21 -14

tools/perf/util/callchain.c

··· 65 65 callchain_param.mode = CHAIN_FOLDED; 66 66 return 0; 67 67 } 68 - 69 - pr_err("Invalid callchain mode: %s\n", value); 70 68 return -1; 71 69 } 72 70 ··· 80 82 callchain_param.order_set = true; 81 83 return 0; 82 84 } 83 - 84 - pr_err("Invalid callchain order: %s\n", value); 85 85 return -1; 86 86 } 87 87 ··· 101 105 callchain_param.branch_callstack = 1; 102 106 return 0; 103 107 } 104 - 105 - pr_err("Invalid callchain sort key: %s\n", value); 106 108 return -1; 107 109 } 108 110 ··· 118 124 callchain_param.value = CCVAL_COUNT; 119 125 return 0; 120 126 } 121 - 122 - pr_err("Invalid callchain config key: %s\n", value); 123 127 return -1; 124 128 } 125 129 ··· 311 319 312 320 return ret; 313 321 } 314 - if (!strcmp(var, "print-type")) 315 - return parse_callchain_mode(value); 316 - if (!strcmp(var, "order")) 317 - return parse_callchain_order(value); 318 - if (!strcmp(var, "sort-key")) 319 - return parse_callchain_sort_key(value); 322 + if (!strcmp(var, "print-type")){ 323 + int ret; 324 + ret = parse_callchain_mode(value); 325 + if (ret == -1) 326 + pr_err("Invalid callchain mode: %s\n", value); 327 + return ret; 328 + } 329 + if (!strcmp(var, "order")){ 330 + int ret; 331 + ret = parse_callchain_order(value); 332 + if (ret == -1) 333 + pr_err("Invalid callchain order: %s\n", value); 334 + return ret; 335 + } 336 + if (!strcmp(var, "sort-key")){ 337 + int ret; 338 + ret = parse_callchain_sort_key(value); 339 + if (ret == -1) 340 + pr_err("Invalid callchain sort key: %s\n", value); 341 + return ret; 342 + } 320 343 if (!strcmp(var, "threshold")) { 321 344 callchain_param.min_percent = strtod(value, &endptr); 322 345 if (value == endptr) {

+6 -1

tools/perf/util/evsel.c

··· 271 271 return evsel; 272 272 } 273 273 274 + static bool perf_event_can_profile_kernel(void) 275 + { 276 + return geteuid() == 0 || perf_event_paranoid() == -1; 277 + } 278 + 274 279 struct perf_evsel *perf_evsel__new_cycles(bool precise) 275 280 { 276 281 struct perf_event_attr attr = { 277 282 .type = PERF_TYPE_HARDWARE, 278 283 .config = PERF_COUNT_HW_CPU_CYCLES, 279 - .exclude_kernel = geteuid() != 0, 284 + .exclude_kernel = !perf_event_can_profile_kernel(), 280 285 }; 281 286 struct perf_evsel *evsel; 282 287

+1 -7

tools/perf/util/symbol-elf.c

··· 810 810 void __weak arch__sym_update(struct symbol *s __maybe_unused, 811 811 GElf_Sym *sym __maybe_unused) { } 812 812 813 - void __weak arch__adjust_sym_map_offset(GElf_Sym *sym, GElf_Shdr *shdr, 814 - struct map *map __maybe_unused) 815 - { 816 - sym->st_value -= shdr->sh_addr - shdr->sh_offset; 817 - } 818 - 819 813 int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, 820 814 struct symsrc *runtime_ss, int kmodule) 821 815 { ··· 990 996 991 997 /* Adjust symbol to map to file offset */ 992 998 if (adjust_kernel_syms) 993 - arch__adjust_sym_map_offset(&sym, &shdr, map); 999 + sym.st_value -= shdr.sh_addr - shdr.sh_offset; 994 1000 995 1001 if (strcmp(section_name, 996 1002 (curr_dso->short_name +

-3

tools/perf/util/symbol.h

··· 344 344 #ifdef HAVE_LIBELF_SUPPORT 345 345 bool elf__needs_adjust_symbols(GElf_Ehdr ehdr); 346 346 void arch__sym_update(struct symbol *s, GElf_Sym *sym); 347 - void arch__adjust_sym_map_offset(GElf_Sym *sym, 348 - GElf_Shdr *shdr __maybe_unused, 349 - struct map *map __maybe_unused); 350 347 #endif 351 348 352 349 #define SYMBOL_A 0

+1 -1

tools/perf/util/syscalltbl.c

··· 15 15 16 16 #include "syscalltbl.h" 17 17 #include <stdlib.h> 18 + #include <linux/compiler.h> 18 19 19 20 #ifdef HAVE_SYSCALL_TABLE 20 - #include <linux/compiler.h> 21 21 #include <string.h> 22 22 #include "string2.h" 23 23 #include "util.h"

+11 -7

tools/testing/selftests/Makefile

··· 52 52 override MAKEFLAGS = 53 53 endif 54 54 55 + ifneq ($(KBUILD_SRC),) 56 + override LDFLAGS = 57 + endif 58 + 55 59 BUILD := $(O) 56 60 ifndef BUILD 57 61 BUILD := $(KBUILD_OUTPUT) ··· 66 62 67 63 export BUILD 68 64 all: 69 - for TARGET in $(TARGETS); do \ 65 + @for TARGET in $(TARGETS); do \ 70 66 BUILD_TARGET=$$BUILD/$$TARGET; \ 71 67 mkdir $$BUILD_TARGET -p; \ 72 68 make OUTPUT=$$BUILD_TARGET -C $$TARGET;\ 73 69 done; 74 70 75 71 run_tests: all 76 - for TARGET in $(TARGETS); do \ 72 + @for TARGET in $(TARGETS); do \ 77 73 BUILD_TARGET=$$BUILD/$$TARGET; \ 78 74 make OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests;\ 79 75 done; 80 76 81 77 hotplug: 82 - for TARGET in $(TARGETS_HOTPLUG); do \ 78 + @for TARGET in $(TARGETS_HOTPLUG); do \ 83 79 BUILD_TARGET=$$BUILD/$$TARGET; \ 84 80 make OUTPUT=$$BUILD_TARGET -C $$TARGET;\ 85 81 done; 86 82 87 83 run_hotplug: hotplug 88 - for TARGET in $(TARGETS_HOTPLUG); do \ 84 + @for TARGET in $(TARGETS_HOTPLUG); do \ 89 85 BUILD_TARGET=$$BUILD/$$TARGET; \ 90 86 make OUTPUT=$$BUILD_TARGET -C $$TARGET run_full_test;\ 91 87 done; 92 88 93 89 clean_hotplug: 94 - for TARGET in $(TARGETS_HOTPLUG); do \ 90 + @for TARGET in $(TARGETS_HOTPLUG); do \ 95 91 BUILD_TARGET=$$BUILD/$$TARGET; \ 96 92 make OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\ 97 93 done; ··· 107 103 ifdef INSTALL_PATH 108 104 @# Ask all targets to install their files 109 105 mkdir -p $(INSTALL_PATH) 110 - for TARGET in $(TARGETS); do \ 106 + @for TARGET in $(TARGETS); do \ 111 107 BUILD_TARGET=$$BUILD/$$TARGET; \ 112 108 make OUTPUT=$$BUILD_TARGET -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \ 113 109 done; ··· 132 128 endif 133 129 134 130 clean: 135 - for TARGET in $(TARGETS); do \ 131 + @for TARGET in $(TARGETS); do \ 136 132 BUILD_TARGET=$$BUILD/$$TARGET; \ 137 133 make OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\ 138 134 done;

+9 -8

tools/testing/selftests/bpf/bpf_util.h

··· 12 12 unsigned int start, end, possible_cpus = 0; 13 13 char buff[128]; 14 14 FILE *fp; 15 + int n; 15 16 16 17 fp = fopen(fcpu, "r"); 17 18 if (!fp) { ··· 21 20 } 22 21 23 22 while (fgets(buff, sizeof(buff), fp)) { 24 - if (sscanf(buff, "%u-%u", &start, &end) == 2) { 25 - possible_cpus = start == 0 ? end + 1 : 0; 26 - break; 23 + n = sscanf(buff, "%u-%u", &start, &end); 24 + if (n == 0) { 25 + printf("Failed to retrieve # possible CPUs!\n"); 26 + exit(1); 27 + } else if (n == 1) { 28 + end = start; 27 29 } 30 + possible_cpus = start == 0 ? end + 1 : 0; 31 + break; 28 32 } 29 - 30 33 fclose(fp); 31 - if (!possible_cpus) { 32 - printf("Failed to retrieve # possible CPUs!\n"); 33 - exit(1); 34 - } 35 34 36 35 return possible_cpus; 37 36 }

+4 -4

tools/testing/selftests/breakpoints/Makefile

··· 2 2 uname_M := $(shell uname -m 2>/dev/null || echo not) 3 3 ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) 4 4 5 + TEST_GEN_PROGS := step_after_suspend_test 6 + 5 7 ifeq ($(ARCH),x86) 6 - TEST_GEN_PROGS := breakpoint_test 8 + TEST_GEN_PROGS += breakpoint_test 7 9 endif 8 10 ifneq (,$(filter $(ARCH),aarch64 arm64)) 9 - TEST_GEN_PROGS := breakpoint_test_arm64 11 + TEST_GEN_PROGS += breakpoint_test_arm64 10 12 endif 11 - 12 - TEST_GEN_PROGS += step_after_suspend_test 13 13 14 14 include ../lib.mk 15 15

+2

tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc

··· 1 1 #!/bin/sh 2 2 # description: Register/unregister many kprobe events 3 3 4 + [ -f kprobe_events ] || exit_unsupported # this is configurable 5 + 4 6 # ftrace fentry skip size depends on the machine architecture. 5 7 # Currently HAVE_KPROBES_ON_FTRACE defined on x86 and powerpc64le 6 8 case `uname -m` in

+6 -3

tools/testing/selftests/futex/Makefile

··· 7 7 include ../lib.mk 8 8 9 9 all: 10 - for DIR in $(SUBDIRS); do \ 10 + @for DIR in $(SUBDIRS); do \ 11 11 BUILD_TARGET=$(OUTPUT)/$$DIR; \ 12 12 mkdir $$BUILD_TARGET -p; \ 13 13 make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\ 14 + if [ -e $$DIR/$(TEST_PROGS) ]; then 15 + rsync -a $$DIR/$(TEST_PROGS) $$BUILD_TARGET/; 16 + fi 14 17 done 15 18 16 19 override define RUN_TESTS 17 - $(OUTPUT)/run.sh 20 + @cd $(OUTPUT); ./run.sh 18 21 endef 19 22 20 23 override define INSTALL_RULE ··· 36 33 endef 37 34 38 35 override define CLEAN 39 - for DIR in $(SUBDIRS); do \ 36 + @for DIR in $(SUBDIRS); do \ 40 37 BUILD_TARGET=$(OUTPUT)/$$DIR; \ 41 38 mkdir $$BUILD_TARGET -p; \ 42 39 make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\

+2

tools/testing/selftests/intel_pstate/Makefile

··· 1 1 CFLAGS := $(CFLAGS) -Wall -D_GNU_SOURCE 2 2 LDLIBS := $(LDLIBS) -lm 3 3 4 + ifeq (,$(filter $(ARCH),x86)) 4 5 TEST_GEN_FILES := msr aperf 6 + endif 5 7 6 8 TEST_PROGS := run.sh 7 9

+5 -6

tools/testing/selftests/intel_pstate/run.sh

··· 29 29 30 30 EVALUATE_ONLY=0 31 31 32 - max_cpus=$(($(nproc)-1)) 32 + if ! uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ | grep -q x86; then 33 + echo "$0 # Skipped: Test can only run on x86 architectures." 34 + exit 0 35 + fi 33 36 34 - # compile programs 35 - gcc aperf.c -Wall -D_GNU_SOURCE -o aperf -lm 36 - [ $? -ne 0 ] && echo "Problem compiling aperf.c." && exit 1 37 - gcc -o msr msr.c -lm 38 - [ $? -ne 0 ] && echo "Problem compiling msr.c." && exit 1 37 + max_cpus=$(($(nproc)-1)) 39 38 40 39 function run_test () { 41 40

+40 -8

tools/testing/selftests/lib.mk

··· 6 6 OUTPUT := $(shell pwd) 7 7 endif 8 8 9 + # The following are built by lib.mk common compile rules. 10 + # TEST_CUSTOM_PROGS should be used by tests that require 11 + # custom build rule and prevent common build rule use. 12 + # TEST_PROGS are for test shell scripts. 13 + # TEST_CUSTOM_PROGS and TEST_PROGS will be run by common run_tests 14 + # and install targets. Common clean doesn't touch them. 9 15 TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS)) 16 + TEST_GEN_PROGS_EXTENDED := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS_EXTENDED)) 10 17 TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES)) 11 18 12 19 all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) ··· 27 20 test_num=`echo $$test_num+1 | bc`; \ 28 21 echo "selftests: $$BASENAME_TEST"; \ 29 22 echo "========================================"; \ 30 - if [ ! -x $$BASENAME_TEST ]; then \ 23 + if [ ! -x $$TEST ]; then \ 31 24 echo "selftests: Warning: file $$BASENAME_TEST is not executable, correct this.";\ 32 25 echo "not ok 1..$$test_num selftests: $$BASENAME_TEST [FAIL]"; \ 33 26 else \ 34 - cd `dirname $$TEST` > /dev/null; (./$$BASENAME_TEST && echo "ok 1..$$test_num selftests: $$BASENAME_TEST [PASS]") || echo "not ok 1..$$test_num selftests: $$BASENAME_TEST [FAIL]"; cd - > /dev/null;\ 27 + cd `dirname $$TEST` > /dev/null; (./$$BASENAME_TEST > /tmp/$$BASENAME_TEST 2>&1 && echo "ok 1..$$test_num selftests: $$BASENAME_TEST [PASS]") || echo "not ok 1..$$test_num selftests: $$BASENAME_TEST [FAIL]"; cd - > /dev/null;\ 35 28 fi; \ 36 29 done; 37 30 endef 38 31 39 32 run_tests: all 40 - $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_PROGS)) 33 + ifneq ($(KBUILD_SRC),) 34 + @if [ "X$(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)" != "X" ]; then 35 + @rsync -aq $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(OUTPUT) 36 + fi 37 + @if [ "X$(TEST_PROGS)" != "X" ]; then 38 + $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(OUTPUT)/$(TEST_PROGS)) 39 + else 40 + $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS)) 41 + fi 42 + else 43 + $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS)) 44 + endif 41 45 42 46 define INSTALL_RULE 43 47 @if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)" != "X" ]; then \ ··· 56 38 echo "rsync -a $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/"; \ 57 39 rsync -a $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/; \ 58 40 fi 59 - @if [ "X$(TEST_GEN_PROGS)$(TEST_GEN_PROGS_EXTENDED)$(TEST_GEN_FILES)" != "X" ]; then \ 41 + @if [ "X$(TEST_GEN_PROGS)$(TEST_CUSTOM_PROGS)$(TEST_GEN_PROGS_EXTENDED)$(TEST_GEN_FILES)" != "X" ]; then \ 60 42 mkdir -p ${INSTALL_PATH}; \ 61 - echo "rsync -a $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/"; \ 62 - rsync -a $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/; \ 43 + echo "rsync -a $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/"; \ 44 + rsync -a $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/; \ 63 45 fi 64 46 endef 65 47 ··· 71 53 endif 72 54 73 55 define EMIT_TESTS 74 - @for TEST in $(TEST_GEN_PROGS) $(TEST_PROGS); do \ 56 + @for TEST in $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS); do \ 75 57 BASENAME_TEST=`basename $$TEST`; \ 76 - echo "(./$$BASENAME_TEST && echo \"selftests: $$BASENAME_TEST [PASS]\") || echo \"selftests: $$BASENAME_TEST [FAIL]\""; \ 58 + echo "(./$$BASENAME_TEST > /tmp/$$BASENAME_TEST 2>&1 && echo \"selftests: $$BASENAME_TEST [PASS]\") || echo \"selftests: $$BASENAME_TEST [FAIL]\""; \ 77 59 done; 78 60 endef 79 61 80 62 emit_tests: 81 63 $(EMIT_TESTS) 64 + 65 + # define if isn't already. It is undefined in make O= case. 66 + ifeq ($(RM),) 67 + RM := rm -f 68 + endif 82 69 83 70 define CLEAN 84 71 $(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN) ··· 91 68 92 69 clean: 93 70 $(CLEAN) 71 + 72 + # When make O= with kselftest target from main level 73 + # the following aren't defined. 74 + # 75 + ifneq ($(KBUILD_SRC),) 76 + LINK.c = $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) 77 + COMPILE.S = $(CC) $(ASFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c 78 + LINK.S = $(CC) $(ASFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) 79 + endif 94 80 95 81 $(OUTPUT)/%:%.c 96 82 $(LINK.c) $^ $(LDLIBS) -o $@

tools/testing/selftests/memfd/run_tests.sh

+2 -2

tools/testing/selftests/mqueue/Makefile

··· 5 5 include ../lib.mk 6 6 7 7 override define RUN_TESTS 8 - @./mq_open_tests /test1 || echo "selftests: mq_open_tests [FAIL]" 9 - @./mq_perf_tests || echo "selftests: mq_perf_tests [FAIL]" 8 + $(OUTPUT)/mq_open_tests /test1 || echo "selftests: mq_open_tests [FAIL]" 9 + $(OUTPUT)//mq_perf_tests || echo "selftests: mq_perf_tests [FAIL]" 10 10 endef 11 11 12 12 override define EMIT_TESTS

+1

tools/testing/selftests/net/.gitignore

··· 6 6 reuseport_bpf_cpu 7 7 reuseport_bpf_numa 8 8 reuseport_dualstack 9 + reuseaddr_conflict

+3 -3

tools/testing/selftests/net/Makefile

··· 5 5 6 6 TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh 7 7 TEST_GEN_FILES = socket 8 - TEST_GEN_FILES += psock_fanout psock_tpacket 9 - TEST_GEN_FILES += reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa 10 - TEST_GEN_FILES += reuseport_dualstack msg_zerocopy 8 + TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy 9 + TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa 10 + TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict 11 11 12 12 include ../lib.mk 13 13

+1 -1

tools/testing/selftests/net/msg_zerocopy.c

··· 55 55 #include <unistd.h> 56 56 57 57 #ifndef SO_EE_ORIGIN_ZEROCOPY 58 - #define SO_EE_ORIGIN_ZEROCOPY SO_EE_ORIGIN_UPAGE 58 + #define SO_EE_ORIGIN_ZEROCOPY 5 59 59 #endif 60 60 61 61 #ifndef SO_ZEROCOPY

+1 -1

tools/testing/selftests/net/netdevice.sh

··· 178 178 exit 0 179 179 fi 180 180 181 - ip -Version 2>/dev/null >/dev/null 181 + ip link show 2>/dev/null >/dev/null 182 182 if [ $? -ne 0 ];then 183 183 echo "SKIP: Could not run test without the ip tool" 184 184 exit 0

+114

tools/testing/selftests/net/reuseaddr_conflict.c

··· 1 + /* 2 + * Test for the regression introduced by 3 + * 4 + * b9470c27607b ("inet: kill smallest_size and smallest_port") 5 + * 6 + * If we open an ipv4 socket on a port with reuseaddr we shouldn't reset the tb 7 + * when we open the ipv6 conterpart, which is what was happening previously. 8 + */ 9 + #include <errno.h> 10 + #include <error.h> 11 + #include <arpa/inet.h> 12 + #include <netinet/in.h> 13 + #include <stdbool.h> 14 + #include <stdio.h> 15 + #include <sys/socket.h> 16 + #include <sys/types.h> 17 + #include <unistd.h> 18 + 19 + #define PORT 9999 20 + 21 + int open_port(int ipv6, int any) 22 + { 23 + int fd = -1; 24 + int reuseaddr = 1; 25 + int v6only = 1; 26 + int addrlen; 27 + int ret = -1; 28 + struct sockaddr *addr; 29 + int family = ipv6 ? AF_INET6 : AF_INET; 30 + 31 + struct sockaddr_in6 addr6 = { 32 + .sin6_family = AF_INET6, 33 + .sin6_port = htons(PORT), 34 + .sin6_addr = in6addr_any 35 + }; 36 + struct sockaddr_in addr4 = { 37 + .sin_family = AF_INET, 38 + .sin_port = htons(PORT), 39 + .sin_addr.s_addr = any ? htonl(INADDR_ANY) : inet_addr("127.0.0.1"), 40 + }; 41 + 42 + 43 + if (ipv6) { 44 + addr = (struct sockaddr*)&addr6; 45 + addrlen = sizeof(addr6); 46 + } else { 47 + addr = (struct sockaddr*)&addr4; 48 + addrlen = sizeof(addr4); 49 + } 50 + 51 + if ((fd = socket(family, SOCK_STREAM, IPPROTO_TCP)) < 0) { 52 + perror("socket"); 53 + goto out; 54 + } 55 + 56 + if (ipv6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, (void*)&v6only, 57 + sizeof(v6only)) < 0) { 58 + perror("setsockopt IPV6_V6ONLY"); 59 + goto out; 60 + } 61 + 62 + if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &reuseaddr, 63 + sizeof(reuseaddr)) < 0) { 64 + perror("setsockopt SO_REUSEADDR"); 65 + goto out; 66 + } 67 + 68 + if (bind(fd, addr, addrlen) < 0) { 69 + perror("bind"); 70 + goto out; 71 + } 72 + 73 + if (any) 74 + return fd; 75 + 76 + if (listen(fd, 1) < 0) { 77 + perror("listen"); 78 + goto out; 79 + } 80 + return fd; 81 + out: 82 + close(fd); 83 + return ret; 84 + } 85 + 86 + int main(void) 87 + { 88 + int listenfd; 89 + int fd1, fd2; 90 + 91 + fprintf(stderr, "Opening 127.0.0.1:%d\n", PORT); 92 + listenfd = open_port(0, 0); 93 + if (listenfd < 0) 94 + error(1, errno, "Couldn't open listen socket"); 95 + fprintf(stderr, "Opening INADDR_ANY:%d\n", PORT); 96 + fd1 = open_port(0, 1); 97 + if (fd1 >= 0) 98 + error(1, 0, "Was allowed to create an ipv4 reuseport on a already bound non-reuseport socket"); 99 + fprintf(stderr, "Opening in6addr_any:%d\n", PORT); 100 + fd1 = open_port(1, 1); 101 + if (fd1 < 0) 102 + error(1, errno, "Couldn't open ipv6 reuseport"); 103 + fprintf(stderr, "Opening INADDR_ANY:%d\n", PORT); 104 + fd2 = open_port(0, 1); 105 + if (fd2 >= 0) 106 + error(1, 0, "Was allowed to create an ipv4 reuseport on a already bound non-reuseport socket"); 107 + close(fd1); 108 + fprintf(stderr, "Opening INADDR_ANY:%d after closing ipv6 socket\n", PORT); 109 + fd1 = open_port(0, 1); 110 + if (fd1 >= 0) 111 + error(1, 0, "Was allowed to create an ipv4 reuseport on an already bound non-reuseport socket with no ipv6"); 112 + fprintf(stderr, "Success"); 113 + return 0; 114 + }

+13 -5

tools/testing/selftests/seccomp/seccomp_bpf.c

··· 6 6 */ 7 7 8 8 #include <sys/types.h> 9 - #include <asm/siginfo.h> 10 - #define __have_siginfo_t 1 11 - #define __have_sigval_t 1 12 - #define __have_sigevent_t 1 9 + 10 + /* 11 + * glibc 2.26 and later have SIGSYS in siginfo_t. Before that, 12 + * we need to use the kernel's siginfo.h file and trick glibc 13 + * into accepting it. 14 + */ 15 + #if !__GLIBC_PREREQ(2, 26) 16 + # include <asm/siginfo.h> 17 + # define __have_siginfo_t 1 18 + # define __have_sigval_t 1 19 + # define __have_sigevent_t 1 20 + #endif 13 21 14 22 #include <errno.h> 15 23 #include <linux/filter.h> ··· 892 884 syscall(__NR_getpid); 893 885 } 894 886 895 - static struct siginfo TRAP_info; 887 + static siginfo_t TRAP_info; 896 888 static volatile int TRAP_nr; 897 889 static void TRAP_action(int nr, siginfo_t *info, void *void_context) 898 890 {

+4

tools/testing/selftests/sigaltstack/sas.c

··· 39 39 stack_t stk; 40 40 struct stk_data *p; 41 41 42 + #if __s390x__ 43 + register unsigned long sp asm("%15"); 44 + #else 42 45 register unsigned long sp asm("sp"); 46 + #endif 43 47 44 48 if (sp < (unsigned long)sstack || 45 49 sp >= (unsigned long)sstack + SIGSTKSZ) {

+19 -5

tools/testing/selftests/sync/Makefile

··· 2 2 CFLAGS += -I../../../../usr/include/ 3 3 LDFLAGS += -pthread 4 4 5 - TEST_PROGS = sync_test 6 - 7 - all: $(TEST_PROGS) 5 + .PHONY: all clean 8 6 9 7 include ../lib.mk 8 + 9 + # lib.mk TEST_CUSTOM_PROGS var is for custom tests that need special 10 + # build rules. lib.mk will run and install them. 11 + 12 + TEST_CUSTOM_PROGS := $(OUTPUT)/sync_test 13 + all: $(TEST_CUSTOM_PROGS) 10 14 11 15 OBJS = sync_test.o sync.o 12 16 ··· 22 18 TESTS += sync_stress_consumer.o 23 19 TESTS += sync_stress_merge.o 24 20 25 - sync_test: $(OBJS) $(TESTS) 21 + OBJS := $(patsubst %,$(OUTPUT)/%,$(OBJS)) 22 + TESTS := $(patsubst %,$(OUTPUT)/%,$(TESTS)) 26 23 27 - EXTRA_CLEAN := sync_test $(OBJS) $(TESTS) 24 + $(TEST_CUSTOM_PROGS): $(TESTS) $(OBJS) 25 + $(CC) -o $(TEST_CUSTOM_PROGS) $(OBJS) $(TESTS) $(CFLAGS) $(LDFLAGS) 26 + 27 + $(OBJS): $(OUTPUT)/%.o: %.c 28 + $(CC) -c $^ -o $@ 29 + 30 + $(TESTS): $(OUTPUT)/%.o: %.c 31 + $(CC) -c $^ -o $@ 32 + 33 + EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(OBJS) $(TESTS)

+7 -6

tools/testing/selftests/timers/set-timer-lat.c

··· 143 143 printf("%-22s %s missing CAP_WAKE_ALARM? : [UNSUPPORTED]\n", 144 144 clockstring(clock_id), 145 145 flags ? "ABSTIME":"RELTIME"); 146 - return 0; 146 + /* Indicate timer isn't set, so caller doesn't wait */ 147 + return 1; 147 148 } 148 149 printf("%s - timer_create() failed\n", clockstring(clock_id)); 149 150 return -1; ··· 214 213 int err; 215 214 216 215 err = setup_timer(clock_id, flags, interval, &tm1); 216 + /* Unsupported case - return 0 to not fail the test */ 217 217 if (err) 218 - return err; 218 + return err == 1 ? 0 : err; 219 219 220 220 while (alarmcount < 5) 221 221 sleep(1); ··· 230 228 timer_t tm1; 231 229 const int interval = 0; 232 230 struct timeval timeout; 233 - fd_set fds; 234 231 int err; 235 232 236 233 err = setup_timer(clock_id, flags, interval, &tm1); 234 + /* Unsupported case - return 0 to not fail the test */ 237 235 if (err) 238 - return err; 236 + return err == 1 ? 0 : err; 239 237 240 238 memset(&timeout, 0, sizeof(timeout)); 241 239 timeout.tv_sec = 5; 242 - FD_ZERO(&fds); 243 240 do { 244 - err = select(FD_SETSIZE, &fds, NULL, NULL, &timeout); 241 + err = select(0, NULL, NULL, NULL, &timeout); 245 242 } while (err == -1 && errno == EINTR); 246 243 247 244 timer_delete(tm1);

+1 -6

tools/testing/selftests/watchdog/Makefile

··· 1 - TEST_PROGS := watchdog-test 2 - 3 - all: $(TEST_PROGS) 1 + TEST_GEN_PROGS := watchdog-test 4 2 5 3 include ../lib.mk 6 - 7 - clean: 8 - rm -fr $(TEST_PROGS)