···413413396 common pkey_free sys_pkey_free414414397 common statx sys_statx415415398 common rseq sys_rseq416416+399 common io_pgetevents sys_io_pgetevents
+10
arch/powerpc/kvm/book3s_64_mmu_radix.c
···646646 */647647 local_irq_disable();648648 ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift);649649+ /*650650+ * If the PTE disappeared temporarily due to a THP651651+ * collapse, just return and let the guest try again.652652+ */653653+ if (!ptep) {654654+ local_irq_enable();655655+ if (page)656656+ put_page(page);657657+ return RESUME_GUEST;658658+ }649659 pte = *ptep;650660 local_irq_enable();651661
···36363737static int num_counters_llc;3838static int num_counters_nb;3939+static bool l3_mask;39404041static HLIST_HEAD(uncore_unused_list);4142···209208 /* and we do not enable counter overflow interrupts */210209 hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB;211210 hwc->idx = -1;211211+212212+ /*213213+ * SliceMask and ThreadMask need to be set for certain L3 events in214214+ * Family 17h. For other events, the two fields do not affect the count.215215+ */216216+ if (l3_mask)217217+ hwc->config |= (AMD64_L3_SLICE_MASK | AMD64_L3_THREAD_MASK);212218213219 if (event->cpu < 0)214220 return -EINVAL;···533525 amd_llc_pmu.name = "amd_l3";534526 format_attr_event_df.show = &event_show_df;535527 format_attr_event_l3.show = &event_show_l3;528528+ l3_mask = true;536529 } else {537530 num_counters_nb = NUM_COUNTERS_NB;538531 num_counters_llc = NUM_COUNTERS_L2;···541532 amd_llc_pmu.name = "amd_l2";542533 format_attr_event_df = format_attr_event;543534 format_attr_event_l3 = format_attr_event;535535+ l3_mask = false;544536 }545537546538 amd_nb_pmu.attr_groups = amd_uncore_attr_groups_df;
···922922static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)923923{924924 /* AMD errata T13 (order #21922) */925925- if ((c->x86 == 6)) {925925+ if (c->x86 == 6) {926926 /* Duron Rev A0 */927927 if (c->x86_model == 3 && c->x86_stepping == 0)928928 size = 64;
+4
arch/x86/kernel/tsc.c
···2626#include <asm/apic.h>2727#include <asm/intel-family.h>2828#include <asm/i8259.h>2929+#include <asm/uv/uv.h>29303031unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */3132EXPORT_SYMBOL(cpu_khz);···14331432void __init tsc_early_init(void)14341433{14351434 if (!boot_cpu_has(X86_FEATURE_TSC))14351435+ return;14361436+ /* Don't change UV TSC multi-chassis synchronization */14371437+ if (is_early_uv_system())14361438 return;14371439 if (!determine_cpu_tsc_frequencies(true))14381440 return;
+20-4
arch/x86/kvm/mmu.c
···249249 */250250static const u64 shadow_nonpresent_or_rsvd_mask_len = 5;251251252252+/*253253+ * In some cases, we need to preserve the GFN of a non-present or reserved254254+ * SPTE when we usurp the upper five bits of the physical address space to255255+ * defend against L1TF, e.g. for MMIO SPTEs. To preserve the GFN, we'll256256+ * shift bits of the GFN that overlap with shadow_nonpresent_or_rsvd_mask257257+ * left into the reserved bits, i.e. the GFN in the SPTE will be split into258258+ * high and low parts. This mask covers the lower bits of the GFN.259259+ */260260+static u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask;261261+262262+252263static void mmu_spte_set(u64 *sptep, u64 spte);253264static union kvm_mmu_page_role254265kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu);···368357369358static gfn_t get_mmio_spte_gfn(u64 spte)370359{371371- u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask |372372- shadow_nonpresent_or_rsvd_mask;373373- u64 gpa = spte & ~mask;360360+ u64 gpa = spte & shadow_nonpresent_or_rsvd_lower_gfn_mask;374361375362 gpa |= (spte >> shadow_nonpresent_or_rsvd_mask_len)376363 & shadow_nonpresent_or_rsvd_mask;···432423433424static void kvm_mmu_reset_all_pte_masks(void)434425{426426+ u8 low_phys_bits;427427+435428 shadow_user_mask = 0;436429 shadow_accessed_mask = 0;437430 shadow_dirty_mask = 0;···448437 * appropriate mask to guard against L1TF attacks. Otherwise, it is449438 * assumed that the CPU is not vulnerable to L1TF.450439 */440440+ low_phys_bits = boot_cpu_data.x86_phys_bits;451441 if (boot_cpu_data.x86_phys_bits <452452- 52 - shadow_nonpresent_or_rsvd_mask_len)442442+ 52 - shadow_nonpresent_or_rsvd_mask_len) {453443 shadow_nonpresent_or_rsvd_mask =454444 rsvd_bits(boot_cpu_data.x86_phys_bits -455445 shadow_nonpresent_or_rsvd_mask_len,456446 boot_cpu_data.x86_phys_bits - 1);447447+ low_phys_bits -= shadow_nonpresent_or_rsvd_mask_len;448448+ }449449+ shadow_nonpresent_or_rsvd_lower_gfn_mask =450450+ GENMASK_ULL(low_phys_bits - 1, PAGE_SHIFT);457451}458452459453static int is_cpuid_PSE36(void)
+75-60
arch/x86/kvm/vmx.c
···121121122122#define MSR_BITMAP_MODE_X2APIC 1123123#define MSR_BITMAP_MODE_X2APIC_APICV 2124124-#define MSR_BITMAP_MODE_LM 4125124126125#define KVM_VMX_TSC_MULTIPLIER_MAX 0xffffffffffffffffULL127126···856857857858 /* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */858859 u64 vmcs01_debugctl;860860+ u64 vmcs01_guest_bndcfgs;859861860862 u16 vpid02;861863 u16 last_vpid;···28992899 vmx->msr_host_kernel_gs_base = read_msr(MSR_KERNEL_GS_BASE);29002900 }2901290129022902- if (is_long_mode(&vmx->vcpu))29032903- wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);29022902+ wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);29042903#else29052904 savesegment(fs, fs_sel);29062905 savesegment(gs, gs_sel);···29502951 vmx->loaded_cpu_state = NULL;2951295229522953#ifdef CONFIG_X86_6429532953- if (is_long_mode(&vmx->vcpu))29542954- rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);29542954+ rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);29552955#endif29562956 if (host_state->ldt_sel || (host_state->gs_sel & 7)) {29572957 kvm_load_ldt(host_state->ldt_sel);···29782980#ifdef CONFIG_X86_6429792981static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx)29802982{29812981- if (is_long_mode(&vmx->vcpu)) {29822982- preempt_disable();29832983- if (vmx->loaded_cpu_state)29842984- rdmsrl(MSR_KERNEL_GS_BASE,29852985- vmx->msr_guest_kernel_gs_base);29862986- preempt_enable();29872987- }29832983+ preempt_disable();29842984+ if (vmx->loaded_cpu_state)29852985+ rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);29862986+ preempt_enable();29882987 return vmx->msr_guest_kernel_gs_base;29892988}2990298929912990static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data)29922991{29932993- if (is_long_mode(&vmx->vcpu)) {29942994- preempt_disable();29952995- if (vmx->loaded_cpu_state)29962996- wrmsrl(MSR_KERNEL_GS_BASE, data);29972997- preempt_enable();29982998- }29922992+ preempt_disable();29932993+ if (vmx->loaded_cpu_state)29942994+ wrmsrl(MSR_KERNEL_GS_BASE, data);29952995+ preempt_enable();29992996 vmx->msr_guest_kernel_gs_base = data;30002997}30012998#endif···35263533 VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |35273534 VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT;3528353535293529- if (kvm_mpx_supported())35303530- msrs->exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;35313531-35323536 /* We support free control of debug control saving. */35333537 msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS;35343538···35423552 VM_ENTRY_LOAD_IA32_PAT;35433553 msrs->entry_ctls_high |=35443554 (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER);35453545- if (kvm_mpx_supported())35463546- msrs->entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;3547355535483556 /* We support free control of debug control loading. */35493557 msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS;···35893601 msrs->secondary_ctls_high);35903602 msrs->secondary_ctls_low = 0;35913603 msrs->secondary_ctls_high &=35923592- SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |35933604 SECONDARY_EXEC_DESC |35943605 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |35953606 SECONDARY_EXEC_APIC_REGISTER_VIRT |35963607 SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |35973608 SECONDARY_EXEC_WBINVD_EXITING;36093609+35983610 /*35993611 * We can emulate "VMCS shadowing," even if the hardware36003612 * doesn't support it.···36503662 if (enable_unrestricted_guest)36513663 msrs->secondary_ctls_high |=36523664 SECONDARY_EXEC_UNRESTRICTED_GUEST;36653665+36663666+ if (flexpriority_enabled)36673667+ msrs->secondary_ctls_high |=36683668+ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;3653366936543670 /* miscellaneous data */36553671 rdmsr(MSR_IA32_VMX_MISC,···50655073 if (!msr)50665074 return;5067507550685068- /*50695069- * MSR_KERNEL_GS_BASE is not intercepted when the guest is in50705070- * 64-bit mode as a 64-bit kernel may frequently access the50715071- * MSR. This means we need to manually save/restore the MSR50725072- * when switching between guest and host state, but only if50735073- * the guest is in 64-bit mode. Sync our cached value if the50745074- * guest is transitioning to 32-bit mode and the CPU contains50755075- * guest state, i.e. the cache is stale.50765076- */50775077-#ifdef CONFIG_X86_6450785078- if (!(efer & EFER_LMA))50795079- (void)vmx_read_guest_kernel_gs_base(vmx);50805080-#endif50815076 vcpu->arch.efer = efer;50825077 if (efer & EFER_LMA) {50835078 vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);···60576078 mode |= MSR_BITMAP_MODE_X2APIC_APICV;60586079 }6059608060606060- if (is_long_mode(vcpu))60616061- mode |= MSR_BITMAP_MODE_LM;60626062-60636081 return mode;60646082}60656083···6096612060976121 if (!changed)60986122 return;60996099-61006100- vmx_set_intercept_for_msr(msr_bitmap, MSR_KERNEL_GS_BASE, MSR_TYPE_RW,61016101- !(mode & MSR_BITMAP_MODE_LM));6102612361036124 if (changed & (MSR_BITMAP_MODE_X2APIC | MSR_BITMAP_MODE_X2APIC_APICV))61046125 vmx_update_msr_bitmap_x2apic(msr_bitmap, mode);···61626189 nested_mark_vmcs12_pages_dirty(vcpu);61636190}6164619161926192+static u8 vmx_get_rvi(void)61936193+{61946194+ return vmcs_read16(GUEST_INTR_STATUS) & 0xff;61956195+}61966196+61656197static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)61666198{61676199 struct vcpu_vmx *vmx = to_vmx(vcpu);···61796201 WARN_ON_ONCE(!vmx->nested.virtual_apic_page))61806202 return false;6181620361826182- rvi = vmcs_read16(GUEST_INTR_STATUS) & 0xff;62046204+ rvi = vmx_get_rvi();6183620561846206 vapic_page = kmap(vmx->nested.virtual_apic_page);61856207 vppr = *((u32 *)(vapic_page + APIC_PROCPRI));···1022310245 if (!lapic_in_kernel(vcpu))1022410246 return;10225102471024810248+ if (!flexpriority_enabled &&1024910249+ !cpu_has_vmx_virtualize_x2apic_mode())1025010250+ return;1025110251+1022610252 /* Postpone execution until vmcs01 is the current VMCS. */1022710253 if (is_guest_mode(vcpu)) {1022810254 to_vmx(vcpu)->nested.change_vmcs01_virtual_apic_mode = true;1022910255 return;1023010256 }1023110231-1023210232- if (!cpu_need_tpr_shadow(vcpu))1023310233- return;10234102571023510258 sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);1023610259 sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |···1035210373 }1035310374 vmx_hwapic_irr_update(vcpu, max_irr);1035410375 return max_irr;1037610376+}1037710377+1037810378+static u8 vmx_has_apicv_interrupt(struct kvm_vcpu *vcpu)1037910379+{1038010380+ u8 rvi = vmx_get_rvi();1038110381+ u8 vppr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_PROCPRI);1038210382+1038310383+ return ((rvi & 0xf0) > (vppr & 0xf0));1035510384}10356103851035710386static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)···1125111264#undef cr4_fixed1_update1125211265}11253112661126711267+static void nested_vmx_entry_exit_ctls_update(struct kvm_vcpu *vcpu)1126811268+{1126911269+ struct vcpu_vmx *vmx = to_vmx(vcpu);1127011270+1127111271+ if (kvm_mpx_supported()) {1127211272+ bool mpx_enabled = guest_cpuid_has(vcpu, X86_FEATURE_MPX);1127311273+1127411274+ if (mpx_enabled) {1127511275+ vmx->nested.msrs.entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;1127611276+ vmx->nested.msrs.exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;1127711277+ } else {1127811278+ vmx->nested.msrs.entry_ctls_high &= ~VM_ENTRY_LOAD_BNDCFGS;1127911279+ vmx->nested.msrs.exit_ctls_high &= ~VM_EXIT_CLEAR_BNDCFGS;1128011280+ }1128111281+ }1128211282+}1128311283+1125411284static void vmx_cpuid_update(struct kvm_vcpu *vcpu)1125511285{1125611286 struct vcpu_vmx *vmx = to_vmx(vcpu);···1128411280 to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &=1128511281 ~FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;11286112821128711287- if (nested_vmx_allowed(vcpu))1128311283+ if (nested_vmx_allowed(vcpu)) {1128811284 nested_vmx_cr_fixed1_bits_update(vcpu);1128511285+ nested_vmx_entry_exit_ctls_update(vcpu);1128611286+ }1128911287}11290112881129111289static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)···12055120491205612050 set_cr4_guest_host_mask(vmx);12057120511205812058- if (vmx_mpx_supported())1205912059- vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);1205212052+ if (kvm_mpx_supported()) {1205312053+ if (vmx->nested.nested_run_pending &&1205412054+ (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))1205512055+ vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);1205612056+ else1205712057+ vmcs_write64(GUEST_BNDCFGS, vmx->nested.vmcs01_guest_bndcfgs);1205812058+ }12060120591206112060 if (enable_vpid) {1206212061 if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02)···1260612595 struct vmcs12 *vmcs12 = get_vmcs12(vcpu);1260712596 bool from_vmentry = !!exit_qual;1260812597 u32 dummy_exit_qual;1260912609- u32 vmcs01_cpu_exec_ctrl;1259812598+ bool evaluate_pending_interrupts;1261012599 int r = 0;12611126001261212612- vmcs01_cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);1260112601+ evaluate_pending_interrupts = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) &1260212602+ (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING);1260312603+ if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu))1260412604+ evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu);12613126051261412606 enter_guest_mode(vcpu);12615126071261612608 if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))1261712609 vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);1261012610+ if (kvm_mpx_supported() &&1261112611+ !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))1261212612+ vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);12618126131261912614 vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);1262012615 vmx_segment_cache_clear(vmx);···1266012643 * to L1 or delivered directly to L2 (e.g. In case L1 don't1266112644 * intercept EXTERNAL_INTERRUPT).1266212645 *1266312663- * Usually this would be handled by L0 requesting a1266412664- * IRQ/NMI window by setting VMCS accordingly. However,1266512665- * this setting was done on VMCS01 and now VMCS02 is active1266612666- * instead. Thus, we force L0 to perform pending event1266712667- * evaluation by requesting a KVM_REQ_EVENT.1264612646+ * Usually this would be handled by the processor noticing an1264712647+ * IRQ/NMI window request, or checking RVI during evaluation of1264812648+ * pending virtual interrupts. However, this setting was done1264912649+ * on VMCS01 and now VMCS02 is active instead. Thus, we force L01265012650+ * to perform pending event evaluation by requesting a KVM_REQ_EVENT.1266812651 */1266912669- if (vmcs01_cpu_exec_ctrl &1267012670- (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING)) {1265212652+ if (unlikely(evaluate_pending_interrupts))1267112653 kvm_make_request(KVM_REQ_EVENT, vcpu);1267212672- }12673126541267412655 /*1267512656 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
···358358 struct queue *q,359359 struct qcm_process_device *qpd)360360{361361- int retval;362361 struct mqd_manager *mqd_mgr;362362+ int retval;363363364364 mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);365365 if (!mqd_mgr)···387387 if (!q->properties.is_active)388388 return 0;389389390390- retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue,391391- &q->properties, q->process->mm);390390+ if (WARN(q->process->mm != current->mm,391391+ "should only run in user thread"))392392+ retval = -EFAULT;393393+ else394394+ retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue,395395+ &q->properties, current->mm);392396 if (retval)393397 goto out_uninit_mqd;394398···549545 retval = map_queues_cpsch(dqm);550546 else if (q->properties.is_active &&551547 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||552552- q->properties.type == KFD_QUEUE_TYPE_SDMA))553553- retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue,554554- &q->properties, q->process->mm);548548+ q->properties.type == KFD_QUEUE_TYPE_SDMA)) {549549+ if (WARN(q->process->mm != current->mm,550550+ "should only run in user thread"))551551+ retval = -EFAULT;552552+ else553553+ retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd,554554+ q->pipe, q->queue,555555+ &q->properties, current->mm);556556+ }555557556558out_unlock:557559 dqm_unlock(dqm);···663653static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,664654 struct qcm_process_device *qpd)665655{656656+ struct mm_struct *mm = NULL;666657 struct queue *q;667658 struct mqd_manager *mqd_mgr;668659 struct kfd_process_device *pdd;···697686 kfd_flush_tlb(pdd);698687 }699688689689+ /* Take a safe reference to the mm_struct, which may otherwise690690+ * disappear even while the kfd_process is still referenced.691691+ */692692+ mm = get_task_mm(pdd->process->lead_thread);693693+ if (!mm) {694694+ retval = -EFAULT;695695+ goto out;696696+ }697697+700698 /* activate all active queues on the qpd */701699 list_for_each_entry(q, &qpd->queues_list, list) {702700 if (!q->properties.is_evicted)···720700 q->properties.is_evicted = false;721701 q->properties.is_active = true;722702 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,723723- q->queue, &q->properties,724724- q->process->mm);703703+ q->queue, &q->properties, mm);725704 if (retval)726705 goto out;727706 dqm->queue_count++;728707 }729708 qpd->evicted = 0;730709out:710710+ if (mm)711711+ mmput(mm);731712 dqm_unlock(dqm);732713 return retval;733714}
+8-2
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
···46334633 }46344634 spin_unlock_irqrestore(&adev->ddev->event_lock, flags);4635463546364636- /* Signal HW programming completion */46374637- drm_atomic_helper_commit_hw_done(state);4638463646394637 if (wait_for_vblank)46404638 drm_atomic_helper_wait_for_flip_done(dev, state);46394639+46404640+ /*46414641+ * FIXME:46424642+ * Delay hw_done() until flip_done() is signaled. This is to block46434643+ * another commit from freeing the CRTC state while we're still46444644+ * waiting on flip_done.46454645+ */46464646+ drm_atomic_helper_commit_hw_done(state);4641464746424648 drm_atomic_helper_cleanup_planes(dev, state);46434649
+26-9
drivers/gpu/drm/drm_client.c
···6363EXPORT_SYMBOL(drm_client_close);64646565/**6666- * drm_client_new - Create a DRM client6666+ * drm_client_init - Initialise a DRM client6767 * @dev: DRM device6868 * @client: DRM client6969 * @name: Client name7070 * @funcs: DRM client functions (optional)7171 *7272+ * This initialises the client and opens a &drm_file. Use drm_client_add() to complete the process.7273 * The caller needs to hold a reference on @dev before calling this function.7374 * The client is freed when the &drm_device is unregistered. See drm_client_release().7475 *7576 * Returns:7677 * Zero on success or negative error code on failure.7778 */7878-int drm_client_new(struct drm_device *dev, struct drm_client_dev *client,7979- const char *name, const struct drm_client_funcs *funcs)7979+int drm_client_init(struct drm_device *dev, struct drm_client_dev *client,8080+ const char *name, const struct drm_client_funcs *funcs)8081{8182 int ret;8283···9695 if (ret)9796 goto err_put_module;98979999- mutex_lock(&dev->clientlist_mutex);100100- list_add(&client->list, &dev->clientlist);101101- mutex_unlock(&dev->clientlist_mutex);102102-10398 drm_dev_get(dev);10499105100 return 0;···106109107110 return ret;108111}109109-EXPORT_SYMBOL(drm_client_new);112112+EXPORT_SYMBOL(drm_client_init);113113+114114+/**115115+ * drm_client_add - Add client to the device list116116+ * @client: DRM client117117+ *118118+ * Add the client to the &drm_device client list to activate its callbacks.119119+ * @client must be initialized by a call to drm_client_init(). After120120+ * drm_client_add() it is no longer permissible to call drm_client_release()121121+ * directly (outside the unregister callback), instead cleanup will happen122122+ * automatically on driver unload.123123+ */124124+void drm_client_add(struct drm_client_dev *client)125125+{126126+ struct drm_device *dev = client->dev;127127+128128+ mutex_lock(&dev->clientlist_mutex);129129+ list_add(&client->list, &dev->clientlist);130130+ mutex_unlock(&dev->clientlist_mutex);131131+}132132+EXPORT_SYMBOL(drm_client_add);110133111134/**112135 * drm_client_release - Release DRM client resources113136 * @client: DRM client114137 *115115- * Releases resources by closing the &drm_file that was opened by drm_client_new().138138+ * Releases resources by closing the &drm_file that was opened by drm_client_init().116139 * It is called automatically if the &drm_client_funcs.unregister callback is _not_ set.117140 *118141 * This function should only be called from the unregister callback. An exception
+3-1
drivers/gpu/drm/drm_fb_cma_helper.c
···160160161161 fb_helper = &fbdev_cma->fb_helper;162162163163- ret = drm_client_new(dev, &fb_helper->client, "fbdev", NULL);163163+ ret = drm_client_init(dev, &fb_helper->client, "fbdev", NULL);164164 if (ret)165165 goto err_free;166166···168168 preferred_bpp, max_conn_count);169169 if (ret)170170 goto err_client_put;171171+172172+ drm_client_add(&fb_helper->client);171173172174 return fbdev_cma;173175
+3-1
drivers/gpu/drm/drm_fb_helper.c
···32183218 if (!fb_helper)32193219 return -ENOMEM;3220322032213221- ret = drm_client_new(dev, &fb_helper->client, "fbdev", &drm_fbdev_client_funcs);32213221+ ret = drm_client_init(dev, &fb_helper->client, "fbdev", &drm_fbdev_client_funcs);32223222 if (ret) {32233223 kfree(fb_helper);32243224 return ret;32253225 }32263226+32273227+ drm_client_add(&fb_helper->client);3226322832273229 fb_helper->preferred_bpp = preferred_bpp;32283230
+3-3
drivers/gpu/drm/drm_lease.c
···566566 lessee_priv->is_master = 1;567567 lessee_priv->authenticated = 1;568568569569- /* Hook up the fd */570570- fd_install(fd, lessee_file);571571-572569 /* Pass fd back to userspace */573570 DRM_DEBUG_LEASE("Returning fd %d id %d\n", fd, lessee->lessee_id);574571 cl->fd = fd;575572 cl->lessee_id = lessee->lessee_id;573573+574574+ /* Hook up the fd */575575+ fd_install(fd, lessee_file);576576577577 DRM_DEBUG_LEASE("drm_mode_create_lease_ioctl succeeded\n");578578 return 0;
+6-28
drivers/gpu/drm/exynos/exynos_drm_iommu.h
···5555static inline int __exynos_iommu_create_mapping(struct exynos_drm_private *priv,5656 unsigned long start, unsigned long size)5757{5858- struct iommu_domain *domain;5959- int ret;6060-6161- domain = iommu_domain_alloc(priv->dma_dev->bus);6262- if (!domain)6363- return -ENOMEM;6464-6565- ret = iommu_get_dma_cookie(domain);6666- if (ret)6767- goto free_domain;6868-6969- ret = iommu_dma_init_domain(domain, start, size, NULL);7070- if (ret)7171- goto put_cookie;7272-7373- priv->mapping = domain;5858+ priv->mapping = iommu_get_domain_for_dev(priv->dma_dev);7459 return 0;7575-7676-put_cookie:7777- iommu_put_dma_cookie(domain);7878-free_domain:7979- iommu_domain_free(domain);8080- return ret;8160}82618362static inline void __exynos_iommu_release_mapping(struct exynos_drm_private *priv)8463{8585- struct iommu_domain *domain = priv->mapping;8686-8787- iommu_put_dma_cookie(domain);8888- iommu_domain_free(domain);8964 priv->mapping = NULL;9065}9166···6994{7095 struct iommu_domain *domain = priv->mapping;71967272- return iommu_attach_device(domain, dev);9797+ if (dev != priv->dma_dev)9898+ return iommu_attach_device(domain, dev);9999+ return 0;73100}7410175102static inline void __exynos_iommu_detach(struct exynos_drm_private *priv,···79102{80103 struct iommu_domain *domain = priv->mapping;811048282- iommu_detach_device(domain, dev);105105+ if (dev != priv->dma_dev)106106+ iommu_detach_device(domain, dev);83107}84108#else85109#error Unsupported architecture and IOMMU/DMA-mapping glue code
+3-2
drivers/gpu/drm/i2c/tda9950.c
···191191 break;192192 }193193 /* TDA9950 executes all retries for us */194194- tx_status |= CEC_TX_STATUS_MAX_RETRIES;194194+ if (tx_status != CEC_TX_STATUS_OK)195195+ tx_status |= CEC_TX_STATUS_MAX_RETRIES;195196 cec_transmit_done(priv->adap, tx_status, arb_lost_cnt,196197 nack_cnt, 0, err_cnt);197198 break;···311310 /* Wait up to .5s for it to signal non-busy */312311 do {313312 csr = tda9950_read(client, REG_CSR);314314- if (!(csr & CSR_BUSY) || --timeout)313313+ if (!(csr & CSR_BUSY) || !--timeout)315314 break;316315 msleep(10);317316 } while (1);
+63-25
drivers/gpu/drm/i915/i915_gpu_error.c
···232232 return true;233233}234234235235+static void *compress_next_page(struct drm_i915_error_object *dst)236236+{237237+ unsigned long page;238238+239239+ if (dst->page_count >= dst->num_pages)240240+ return ERR_PTR(-ENOSPC);241241+242242+ page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN);243243+ if (!page)244244+ return ERR_PTR(-ENOMEM);245245+246246+ return dst->pages[dst->page_count++] = (void *)page;247247+}248248+235249static int compress_page(struct compress *c,236250 void *src,237251 struct drm_i915_error_object *dst)···259245260246 do {261247 if (zstream->avail_out == 0) {262262- unsigned long page;248248+ zstream->next_out = compress_next_page(dst);249249+ if (IS_ERR(zstream->next_out))250250+ return PTR_ERR(zstream->next_out);263251264264- page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN);265265- if (!page)266266- return -ENOMEM;267267-268268- dst->pages[dst->page_count++] = (void *)page;269269-270270- zstream->next_out = (void *)page;271252 zstream->avail_out = PAGE_SIZE;272253 }273254274274- if (zlib_deflate(zstream, Z_SYNC_FLUSH) != Z_OK)255255+ if (zlib_deflate(zstream, Z_NO_FLUSH) != Z_OK)275256 return -EIO;276257 } while (zstream->avail_in);277258···277268 return 0;278269}279270271271+static int compress_flush(struct compress *c,272272+ struct drm_i915_error_object *dst)273273+{274274+ struct z_stream_s *zstream = &c->zstream;275275+276276+ do {277277+ switch (zlib_deflate(zstream, Z_FINISH)) {278278+ case Z_OK: /* more space requested */279279+ zstream->next_out = compress_next_page(dst);280280+ if (IS_ERR(zstream->next_out))281281+ return PTR_ERR(zstream->next_out);282282+283283+ zstream->avail_out = PAGE_SIZE;284284+ break;285285+286286+ case Z_STREAM_END:287287+ goto end;288288+289289+ default: /* any error */290290+ return -EIO;291291+ }292292+ } while (1);293293+294294+end:295295+ memset(zstream->next_out, 0, zstream->avail_out);296296+ dst->unused = zstream->avail_out;297297+ return 0;298298+}299299+280300static void compress_fini(struct compress *c,281301 struct drm_i915_error_object *dst)282302{283303 struct z_stream_s *zstream = &c->zstream;284304285285- if (dst) {286286- zlib_deflate(zstream, Z_FINISH);287287- dst->unused = zstream->avail_out;288288- }289289-290305 zlib_deflateEnd(zstream);291306 kfree(zstream->workspace);292292-293307 if (c->tmp)294308 free_page((unsigned long)c->tmp);295309}···348316 memcpy(ptr, src, PAGE_SIZE);349317 dst->pages[dst->page_count++] = ptr;350318319319+ return 0;320320+}321321+322322+static int compress_flush(struct compress *c,323323+ struct drm_i915_error_object *dst)324324+{351325 return 0;352326}353327···955917 unsigned long num_pages;956918 struct sgt_iter iter;957919 dma_addr_t dma;920920+ int ret;958921959922 if (!vma)960923 return NULL;···969930970931 dst->gtt_offset = vma->node.start;971932 dst->gtt_size = vma->node.size;933933+ dst->num_pages = num_pages;972934 dst->page_count = 0;973935 dst->unused = 0;974936···978938 return NULL;979939 }980940941941+ ret = -EINVAL;981942 for_each_sgt_dma(dma, iter, vma->pages) {982943 void __iomem *s;983983- int ret;984944985945 ggtt->vm.insert_page(&ggtt->vm, dma, slot, I915_CACHE_NONE, 0);986946987947 s = io_mapping_map_atomic_wc(&ggtt->iomap, slot);988948 ret = compress_page(&compress, (void __force *)s, dst);989949 io_mapping_unmap_atomic(s);990990-991950 if (ret)992992- goto unwind;951951+ break;993952 }994994- goto out;995953996996-unwind:997997- while (dst->page_count--)998998- free_page((unsigned long)dst->pages[dst->page_count]);999999- kfree(dst);10001000- dst = NULL;954954+ if (ret || compress_flush(&compress, dst)) {955955+ while (dst->page_count--)956956+ free_page((unsigned long)dst->pages[dst->page_count]);957957+ kfree(dst);958958+ dst = NULL;959959+ }100196010021002-out:1003961 compress_fini(&compress, dst);1004962 ggtt->vm.clear_range(&ggtt->vm, slot, PAGE_SIZE);1005963 return dst;
+1
drivers/gpu/drm/i915/i915_gpu_error.h
···135135 struct drm_i915_error_object {136136 u64 gtt_offset;137137 u64 gtt_size;138138+ int num_pages;138139 int page_count;139140 int unused;140141 u32 *pages[0];
···14551455 if (hints_valid) {14561456 r = dm_array_cursor_next(&cmd->hint_cursor);14571457 if (r) {14581458- DMERR("dm_array_cursor_next for hint failed");14591459- goto out;14581458+ dm_array_cursor_end(&cmd->hint_cursor);14591459+ hints_valid = false;14601460 }14611461 }14621462
+7-2
drivers/md/dm-cache-target.c
···3009300930103010static bool can_resize(struct cache *cache, dm_cblock_t new_size)30113011{30123012- if (from_cblock(new_size) > from_cblock(cache->cache_size))30133013- return true;30123012+ if (from_cblock(new_size) > from_cblock(cache->cache_size)) {30133013+ if (cache->sized) {30143014+ DMERR("%s: unable to extend cache due to missing cache table reload",30153015+ cache_device_name(cache));30163016+ return false;30173017+ }30183018+ }3014301930153020 /*30163021 * We can't drop a dirty block when shrinking the cache.
+8-6
drivers/md/dm-mpath.c
···806806}807807808808static int setup_scsi_dh(struct block_device *bdev, struct multipath *m,809809- const char *attached_handler_name, char **error)809809+ const char **attached_handler_name, char **error)810810{811811 struct request_queue *q = bdev_get_queue(bdev);812812 int r;813813814814 if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags)) {815815retain:816816- if (attached_handler_name) {816816+ if (*attached_handler_name) {817817 /*818818 * Clear any hw_handler_params associated with a819819 * handler that isn't already attached.820820 */821821- if (m->hw_handler_name && strcmp(attached_handler_name, m->hw_handler_name)) {821821+ if (m->hw_handler_name && strcmp(*attached_handler_name, m->hw_handler_name)) {822822 kfree(m->hw_handler_params);823823 m->hw_handler_params = NULL;824824 }···830830 * handler instead of the original table passed in.831831 */832832 kfree(m->hw_handler_name);833833- m->hw_handler_name = attached_handler_name;833833+ m->hw_handler_name = *attached_handler_name;834834+ *attached_handler_name = NULL;834835 }835836 }836837···868867 struct pgpath *p;869868 struct multipath *m = ti->private;870869 struct request_queue *q;871871- const char *attached_handler_name;870870+ const char *attached_handler_name = NULL;872871873872 /* we need at least a path arg */874873 if (as->argc < 1) {···891890 attached_handler_name = scsi_dh_attached_handler_name(q, GFP_KERNEL);892891 if (attached_handler_name || m->hw_handler_name) {893892 INIT_DELAYED_WORK(&p->activate_path, activate_path_work);894894- r = setup_scsi_dh(p->path.dev->bdev, m, attached_handler_name, &ti->error);893893+ r = setup_scsi_dh(p->path.dev->bdev, m, &attached_handler_name, &ti->error);895894 if (r) {896895 dm_put_device(ti, p->path.dev);897896 goto bad;···906905907906 return p;908907 bad:908908+ kfree(attached_handler_name);909909 free_pgpath(p);910910 return ERR_PTR(r);911911}
+1-1
drivers/md/dm-raid.c
···33533353};3354335433553355/* Return enum sync_state for @mddev derived from @recovery flags */33563356-static const enum sync_state decipher_sync_action(struct mddev *mddev, unsigned long recovery)33563356+static enum sync_state decipher_sync_action(struct mddev *mddev, unsigned long recovery)33573357{33583358 if (test_bit(MD_RECOVERY_FROZEN, &recovery))33593359 return st_frozen;
+2-4
drivers/md/dm-thin-metadata.c
···832832 if (r) {833833 DMERR("could not get size of metadata device");834834 pmd->metadata_reserve = max_blocks;835835- } else {836836- sector_div(total, 10);837837- pmd->metadata_reserve = min(max_blocks, total);838838- }835835+ } else836836+ pmd->metadata_reserve = min(max_blocks, div_u64(total, 10));839837}840838841839struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev,
···17551755}1756175617571757/* Set Tx descriptors fields relevant for CSUM calculation */17581758-static u32 mvpp2_txq_desc_csum(int l3_offs, int l3_proto,17581758+static u32 mvpp2_txq_desc_csum(int l3_offs, __be16 l3_proto,17591759 int ip_hdr_len, int l4_proto)17601760{17611761 u32 command;···26452645 if (skb->ip_summed == CHECKSUM_PARTIAL) {26462646 int ip_hdr_len = 0;26472647 u8 l4_proto;26482648+ __be16 l3_proto = vlan_get_protocol(skb);2648264926492649- if (skb->protocol == htons(ETH_P_IP)) {26502650+ if (l3_proto == htons(ETH_P_IP)) {26502651 struct iphdr *ip4h = ip_hdr(skb);2651265226522653 /* Calculate IPv4 checksum and L4 checksum */26532654 ip_hdr_len = ip4h->ihl;26542655 l4_proto = ip4h->protocol;26552655- } else if (skb->protocol == htons(ETH_P_IPV6)) {26562656+ } else if (l3_proto == htons(ETH_P_IPV6)) {26562657 struct ipv6hdr *ip6h = ipv6_hdr(skb);2657265826582659 /* Read l4_protocol from one of IPv6 extra headers */···26652664 }2666266526672666 return mvpp2_txq_desc_csum(skb_network_offset(skb),26682668- skb->protocol, ip_hdr_len, l4_proto);26672667+ l3_proto, ip_hdr_len, l4_proto);26692668 }2670266926712670 return MVPP2_TXD_L4_CSUM_NOT | MVPP2_TXD_IP_CSUM_DISABLE;
+7-4
drivers/net/ethernet/mellanox/mlxsw/pci.c
···718718 memset(&active_cqns, 0, sizeof(active_cqns));719719720720 while ((eqe = mlxsw_pci_eq_sw_eqe_get(q))) {721721- u8 event_type = mlxsw_pci_eqe_event_type_get(eqe);722721723723- switch (event_type) {724724- case MLXSW_PCI_EQE_EVENT_TYPE_CMD:722722+ /* Command interface completion events are always received on723723+ * queue MLXSW_PCI_EQ_ASYNC_NUM (EQ0) and completion events724724+ * are mapped to queue MLXSW_PCI_EQ_COMP_NUM (EQ1).725725+ */726726+ switch (q->num) {727727+ case MLXSW_PCI_EQ_ASYNC_NUM:725728 mlxsw_pci_eq_cmd_event(mlxsw_pci, eqe);726729 q->u.eq.ev_cmd_count++;727730 break;728728- case MLXSW_PCI_EQE_EVENT_TYPE_COMP:731731+ case MLXSW_PCI_EQ_COMP_NUM:729732 cqn = mlxsw_pci_eqe_cqn_get(eqe);730733 set_bit(cqn, active_cqns);731734 cq_handle = true;
+2
drivers/net/ethernet/mellanox/mlxsw/spectrum.c
···48554855 upper_dev = info->upper_dev;48564856 if (info->linking)48574857 break;48584858+ if (is_vlan_dev(upper_dev))48594859+ mlxsw_sp_rif_destroy_by_dev(mlxsw_sp, upper_dev);48584860 if (netif_is_macvlan(upper_dev))48594861 mlxsw_sp_rif_macvlan_del(mlxsw_sp, upper_dev);48604862 break;
+4
drivers/net/hamradio/yam.c
···966966 sizeof(struct yamdrv_ioctl_mcs));967967 if (IS_ERR(ym))968968 return PTR_ERR(ym);969969+ if (ym->cmd != SIOCYAMSMCS)970970+ return -EINVAL;969971 if (ym->bitrate > YAM_MAXBITRATE) {970972 kfree(ym);971973 return -EINVAL;···983981 if (copy_from_user(&yi, ifr->ifr_data, sizeof(struct yamdrv_ioctl_cfg)))984982 return -EFAULT;985983984984+ if (yi.cmd != SIOCYAMSCFG)985985+ return -EINVAL;986986 if ((yi.cfg.mask & YAM_IOBASE) && netif_running(dev))987987 return -EINVAL; /* Cannot change this parameter when up */988988 if ((yi.cfg.mask & YAM_IRQ) && netif_running(dev))
+28-20
drivers/net/phy/phylink.c
···690690 return 0;691691}692692693693+static int __phylink_connect_phy(struct phylink *pl, struct phy_device *phy,694694+ phy_interface_t interface)695695+{696696+ int ret;697697+698698+ if (WARN_ON(pl->link_an_mode == MLO_AN_FIXED ||699699+ (pl->link_an_mode == MLO_AN_INBAND &&700700+ phy_interface_mode_is_8023z(interface))))701701+ return -EINVAL;702702+703703+ if (pl->phydev)704704+ return -EBUSY;705705+706706+ ret = phy_attach_direct(pl->netdev, phy, 0, interface);707707+ if (ret)708708+ return ret;709709+710710+ ret = phylink_bringup_phy(pl, phy);711711+ if (ret)712712+ phy_detach(phy);713713+714714+ return ret;715715+}716716+693717/**694718 * phylink_connect_phy() - connect a PHY to the phylink instance695719 * @pl: a pointer to a &struct phylink returned from phylink_create()···731707 */732708int phylink_connect_phy(struct phylink *pl, struct phy_device *phy)733709{734734- int ret;735735-736736- if (WARN_ON(pl->link_an_mode == MLO_AN_FIXED ||737737- (pl->link_an_mode == MLO_AN_INBAND &&738738- phy_interface_mode_is_8023z(pl->link_interface))))739739- return -EINVAL;740740-741741- if (pl->phydev)742742- return -EBUSY;743743-744710 /* Use PHY device/driver interface */745711 if (pl->link_interface == PHY_INTERFACE_MODE_NA) {746712 pl->link_interface = phy->interface;747713 pl->link_config.interface = pl->link_interface;748714 }749715750750- ret = phy_attach_direct(pl->netdev, phy, 0, pl->link_interface);751751- if (ret)752752- return ret;753753-754754- ret = phylink_bringup_phy(pl, phy);755755- if (ret)756756- phy_detach(phy);757757-758758- return ret;716716+ return __phylink_connect_phy(pl, phy, pl->link_interface);759717}760718EXPORT_SYMBOL_GPL(phylink_connect_phy);761719···1654164816551649static int phylink_sfp_connect_phy(void *upstream, struct phy_device *phy)16561650{16571657- return phylink_connect_phy(upstream, phy);16511651+ struct phylink *pl = upstream;16521652+16531653+ return __phylink_connect_phy(upstream, phy, pl->link_config.interface);16581654}1659165516601656static void phylink_sfp_disconnect_phy(void *upstream)
+6
drivers/net/team/team.c
···11671167 return -EBUSY;11681168 }1169116911701170+ if (dev == port_dev) {11711171+ NL_SET_ERR_MSG(extack, "Cannot enslave team device to itself");11721172+ netdev_err(dev, "Cannot enslave team device to itself\n");11731173+ return -EINVAL;11741174+ }11751175+11701176 if (port_dev->features & NETIF_F_VLAN_CHALLENGED &&11711177 vlan_uses_dev(dev)) {11721178 NL_SET_ERR_MSG(extack, "Device is VLAN challenged and team device has VLAN set up");
···11451145{11461146 struct device *dev = &pcie->pdev->dev;11471147 struct device_node *np = dev->of_node;11481148- unsigned int i;11491148 int ret;1150114911511150 INIT_LIST_HEAD(&pcie->resources);···11781179 resource_size(&pcie->io) - 1);11791180 pcie->realio.name = "PCI I/O";1180118111811181- for (i = 0; i < resource_size(&pcie->realio); i += SZ_64K)11821182- pci_ioremap_io(i, pcie->io.start + i);11831183-11841182 pci_add_resource(&pcie->resources, &pcie->realio);11851183 }1186118411871185 return devm_request_pci_bus_resources(dev, &pcie->resources);11861186+}11871187+11881188+/*11891189+ * This is a copy of pci_host_probe(), except that it does the I/O11901190+ * remap as the last step, once we are sure we won't fail.11911191+ *11921192+ * It should be removed once the I/O remap error handling issue has11931193+ * been sorted out.11941194+ */11951195+static int mvebu_pci_host_probe(struct pci_host_bridge *bridge)11961196+{11971197+ struct mvebu_pcie *pcie;11981198+ struct pci_bus *bus, *child;11991199+ int ret;12001200+12011201+ ret = pci_scan_root_bus_bridge(bridge);12021202+ if (ret < 0) {12031203+ dev_err(bridge->dev.parent, "Scanning root bridge failed");12041204+ return ret;12051205+ }12061206+12071207+ pcie = pci_host_bridge_priv(bridge);12081208+ if (resource_size(&pcie->io) != 0) {12091209+ unsigned int i;12101210+12111211+ for (i = 0; i < resource_size(&pcie->realio); i += SZ_64K)12121212+ pci_ioremap_io(i, pcie->io.start + i);12131213+ }12141214+12151215+ bus = bridge->bus;12161216+12171217+ /*12181218+ * We insert PCI resources into the iomem_resource and12191219+ * ioport_resource trees in either pci_bus_claim_resources()12201220+ * or pci_bus_assign_resources().12211221+ */12221222+ if (pci_has_flag(PCI_PROBE_ONLY)) {12231223+ pci_bus_claim_resources(bus);12241224+ } else {12251225+ pci_bus_size_bridges(bus);12261226+ pci_bus_assign_resources(bus);12271227+12281228+ list_for_each_entry(child, &bus->children, node)12291229+ pcie_bus_configure_settings(child);12301230+ }12311231+12321232+ pci_bus_add_devices(bus);12331233+ return 0;11881234}1189123511901236static int mvebu_pcie_probe(struct platform_device *pdev)···13121268 bridge->align_resource = mvebu_pcie_align_resource;13131269 bridge->msi = pcie->msi;1314127013151315- return pci_host_probe(bridge);12711271+ return mvebu_pci_host_probe(bridge);13161272}1317127313181274static const struct of_device_id mvebu_pcie_of_match_table[] = {
+19-8
drivers/pci/pci.c
···12891289EXPORT_SYMBOL(pci_save_state);1290129012911291static void pci_restore_config_dword(struct pci_dev *pdev, int offset,12921292- u32 saved_val, int retry)12921292+ u32 saved_val, int retry, bool force)12931293{12941294 u32 val;1295129512961296 pci_read_config_dword(pdev, offset, &val);12971297- if (val == saved_val)12971297+ if (!force && val == saved_val)12981298 return;1299129913001300 for (;;) {···13131313}1314131413151315static void pci_restore_config_space_range(struct pci_dev *pdev,13161316- int start, int end, int retry)13161316+ int start, int end, int retry,13171317+ bool force)13171318{13181319 int index;1319132013201321 for (index = end; index >= start; index--)13211322 pci_restore_config_dword(pdev, 4 * index,13221323 pdev->saved_config_space[index],13231323- retry);13241324+ retry, force);13241325}1325132613261327static void pci_restore_config_space(struct pci_dev *pdev)13271328{13281329 if (pdev->hdr_type == PCI_HEADER_TYPE_NORMAL) {13291329- pci_restore_config_space_range(pdev, 10, 15, 0);13301330+ pci_restore_config_space_range(pdev, 10, 15, 0, false);13301331 /* Restore BARs before the command register. */13311331- pci_restore_config_space_range(pdev, 4, 9, 10);13321332- pci_restore_config_space_range(pdev, 0, 3, 0);13321332+ pci_restore_config_space_range(pdev, 4, 9, 10, false);13331333+ pci_restore_config_space_range(pdev, 0, 3, 0, false);13341334+ } else if (pdev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {13351335+ pci_restore_config_space_range(pdev, 12, 15, 0, false);13361336+13371337+ /*13381338+ * Force rewriting of prefetch registers to avoid S3 resume13391339+ * issues on Intel PCI bridges that occur when these13401340+ * registers are not explicitly written.13411341+ */13421342+ pci_restore_config_space_range(pdev, 9, 11, 0, true);13431343+ pci_restore_config_space_range(pdev, 0, 8, 0, false);13331344 } else {13341334- pci_restore_config_space_range(pdev, 0, 15, 0);13451345+ pci_restore_config_space_range(pdev, 0, 15, 0, false);13351346 }13361347}13371348
+1
fs/cifs/cifsglob.h
···1553155315541554/* Flags */15551555#define MID_WAIT_CANCELLED 1 /* Cancelled while waiting for response */15561556+#define MID_DELETED 2 /* Mid has been dequeued/deleted */1556155715571558/* Types of response buffer returned from SendReceive2 */15581559#define CIFS_NO_BUFFER 0 /* Response buffer not returned */
+10-3
fs/cifs/connect.c
···659659 mid->mid_state = MID_RESPONSE_RECEIVED;660660 else661661 mid->mid_state = MID_RESPONSE_MALFORMED;662662- list_del_init(&mid->qhead);662662+ /*663663+ * Trying to handle/dequeue a mid after the send_recv()664664+ * function has finished processing it is a bug.665665+ */666666+ if (mid->mid_flags & MID_DELETED)667667+ printk_once(KERN_WARNING668668+ "trying to dequeue a deleted mid\n");669669+ else670670+ list_del_init(&mid->qhead);663671 spin_unlock(&GlobalMid_Lock);664672}665673···946938 } else {947939 mids[0] = server->ops->find_mid(server, buf);948940 bufs[0] = buf;949949- if (mids[0])950950- num_mids = 1;941941+ num_mids = 1;951942952943 if (!mids[0] || !mids[0]->receive)953944 length = standard_receive3(server, mids[0]);
···142142cifs_delete_mid(struct mid_q_entry *mid)143143{144144 spin_lock(&GlobalMid_Lock);145145- list_del(&mid->qhead);145145+ list_del_init(&mid->qhead);146146+ mid->mid_flags |= MID_DELETED;146147 spin_unlock(&GlobalMid_Lock);147148148149 DeleteMidQEntry(mid);···773772 return mid;774773}775774775775+static void776776+cifs_noop_callback(struct mid_q_entry *mid)777777+{778778+}779779+776780int777781compound_send_recv(const unsigned int xid, struct cifs_ses *ses,778782 const int flags, const int num_rqst, struct smb_rqst *rqst,···832826 }833827834828 midQ[i]->mid_state = MID_REQUEST_SUBMITTED;829829+ /*830830+ * We don't invoke the callback compounds unless it is the last831831+ * request.832832+ */833833+ if (i < num_rqst - 1)834834+ midQ[i]->callback = cifs_noop_callback;835835 }836836-837836 cifs_in_send_inc(ses->server);838837 rc = smb_send_rqst(ses->server, num_rqst, rqst, flags);839838 cifs_in_send_dec(ses->server);···919908 midQ[i]->resp_buf = NULL;920909 }921910out:911911+ /*912912+ * This will dequeue all mids. After this it is important that the913913+ * demultiplex_thread will not process any of these mids any futher.914914+ * This is prevented above by using a noop callback that will not915915+ * wake this thread except for the very last PDU.916916+ */922917 for (i = 0; i < num_rqst; i++)923918 cifs_delete_mid(midQ[i]);924919 add_credits(ses->server, credits, optype);
+1-1
fs/ioctl.c
···230230 ret = -EXDEV;231231 if (src_file.file->f_path.mnt != dst_file->f_path.mnt)232232 goto fdput;233233- ret = do_clone_file_range(src_file.file, off, dst_file, destoff, olen);233233+ ret = vfs_clone_file_range(src_file.file, off, dst_file, destoff, olen);234234fdput:235235 fdput(src_file);236236 return ret;
···29462946 if (map_end & (PAGE_SIZE - 1))29472947 to = map_end & (PAGE_SIZE - 1);2948294829492949+retry:29492950 page = find_or_create_page(mapping, page_index, GFP_NOFS);29502951 if (!page) {29512952 ret = -ENOMEM;···29552954 }2956295529572956 /*29582958- * In case PAGE_SIZE <= CLUSTER_SIZE, This page29592959- * can't be dirtied before we CoW it out.29572957+ * In case PAGE_SIZE <= CLUSTER_SIZE, we do not expect a dirty29582958+ * page, so write it back.29602959 */29612961- if (PAGE_SIZE <= OCFS2_SB(sb)->s_clustersize)29622962- BUG_ON(PageDirty(page));29602960+ if (PAGE_SIZE <= OCFS2_SB(sb)->s_clustersize) {29612961+ if (PageDirty(page)) {29622962+ /*29632963+ * write_on_page will unlock the page on return29642964+ */29652965+ ret = write_one_page(page);29662966+ goto retry;29672967+ }29682968+ }2963296929642970 if (!PageUptodate(page)) {29652971 ret = block_read_full_page(page, ocfs2_get_block);
+1-1
fs/overlayfs/copy_up.c
···141141 }142142143143 /* Try to use clone_file_range to clone up within the same fs */144144- error = vfs_clone_file_range(old_file, 0, new_file, 0, len);144144+ error = do_clone_file_range(old_file, 0, new_file, 0, len);145145 if (!error)146146 goto out;147147 /* Couldn't clone, so now we try to copy the data */
···407407 unsigned long *entries;408408 int err;409409410410+ /*411411+ * The ability to racily run the kernel stack unwinder on a running task412412+ * and then observe the unwinder output is scary; while it is useful for413413+ * debugging kernel issues, it can also allow an attacker to leak kernel414414+ * stack contents.415415+ * Doing this in a manner that is at least safe from races would require416416+ * some work to ensure that the remote task can not be scheduled; and417417+ * even then, this would still expose the unwinder as local attack418418+ * surface.419419+ * Therefore, this interface is restricted to root.420420+ */421421+ if (!file_ns_capable(m->file, &init_user_ns, CAP_SYS_ADMIN))422422+ return -EACCES;423423+410424 entries = kmalloc_array(MAX_STACK_TRACE_DEPTH, sizeof(*entries),411425 GFP_KERNEL);412426 if (!entries)
···673673 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS);674674675675 /*676676- * Make space in the inode incore.676676+ * Make space in the inode incore. This needs to be undone if we fail677677+ * to expand the root.677678 */678679 xfs_iroot_realloc(ip, 1, whichfork);679680 ifp->if_flags |= XFS_IFBROOT;···712711 args.minlen = args.maxlen = args.prod = 1;713712 args.wasdel = wasdel;714713 *logflagsp = 0;715715- if ((error = xfs_alloc_vextent(&args))) {716716- ASSERT(ifp->if_broot == NULL);717717- goto err1;718718- }714714+ error = xfs_alloc_vextent(&args);715715+ if (error)716716+ goto out_root_realloc;719717720718 if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) {721721- ASSERT(ifp->if_broot == NULL);722719 error = -ENOSPC;723723- goto err1;720720+ goto out_root_realloc;724721 }722722+725723 /*726724 * Allocation can't fail, the space was reserved.727725 */···732732 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);733733 abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0);734734 if (!abp) {735735- error = -ENOSPC;736736- goto err2;735735+ error = -EFSCORRUPTED;736736+ goto out_unreserve_dquot;737737 }738738+738739 /*739740 * Fill in the child block.740741 */···776775 *logflagsp = XFS_ILOG_CORE | xfs_ilog_fbroot(whichfork);777776 return 0;778777779779-err2:778778+out_unreserve_dquot:780779 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);781781-err1:780780+out_root_realloc:782781 xfs_iroot_realloc(ip, -1, whichfork);783782 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);783783+ ASSERT(ifp->if_broot == NULL);784784 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);785785786786 return error;
+2
fs/xfs/libxfs/xfs_format.h
···10161016#define XFS_DIFLAG_EXTSZINHERIT_BIT 12 /* inherit inode extent size */10171017#define XFS_DIFLAG_NODEFRAG_BIT 13 /* do not reorganize/defragment */10181018#define XFS_DIFLAG_FILESTREAM_BIT 14 /* use filestream allocator */10191019+/* Do not use bit 15, di_flags is legacy and unchanging now */10201020+10191021#define XFS_DIFLAG_REALTIME (1 << XFS_DIFLAG_REALTIME_BIT)10201022#define XFS_DIFLAG_PREALLOC (1 << XFS_DIFLAG_PREALLOC_BIT)10211023#define XFS_DIFLAG_NEWRTBM (1 << XFS_DIFLAG_NEWRTBM_BIT)
+30
fs/xfs/libxfs/xfs_inode_buf.c
···415415 return NULL;416416}417417418418+static xfs_failaddr_t419419+xfs_dinode_verify_forkoff(420420+ struct xfs_dinode *dip,421421+ struct xfs_mount *mp)422422+{423423+ if (!XFS_DFORK_Q(dip))424424+ return NULL;425425+426426+ switch (dip->di_format) {427427+ case XFS_DINODE_FMT_DEV:428428+ if (dip->di_forkoff != (roundup(sizeof(xfs_dev_t), 8) >> 3))429429+ return __this_address;430430+ break;431431+ case XFS_DINODE_FMT_LOCAL: /* fall through ... */432432+ case XFS_DINODE_FMT_EXTENTS: /* fall through ... */433433+ case XFS_DINODE_FMT_BTREE:434434+ if (dip->di_forkoff >= (XFS_LITINO(mp, dip->di_version) >> 3))435435+ return __this_address;436436+ break;437437+ default:438438+ return __this_address;439439+ }440440+ return NULL;441441+}442442+418443xfs_failaddr_t419444xfs_dinode_verify(420445 struct xfs_mount *mp,···494469495470 if (mode && (flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp)496471 return __this_address;472472+473473+ /* check for illegal values of forkoff */474474+ fa = xfs_dinode_verify_forkoff(dip, mp);475475+ if (fa)476476+ return fa;497477498478 /* Do we have appropriate data fork formats for the mode? */499479 switch (mode & S_IFMT) {
···126126{127127 struct xfs_mount *mp = sc->mp;128128129129+ /* di_flags are all taken, last bit cannot be used */129130 if (flags & ~XFS_DIFLAG_ANY)130131 goto bad;131132···173172{174173 struct xfs_mount *mp = sc->mp;175174175175+ /* Unknown di_flags2 could be from a future kernel */176176 if (flags2 & ~XFS_DIFLAG2_ANY)177177- goto bad;177177+ xchk_ino_set_warning(sc, ino);178178179179 /* reflink flag requires reflink feature */180180 if ((flags2 & XFS_DIFLAG2_REFLINK) &&
+7-13
fs/xfs/xfs_bmap_util.c
···702702 struct xfs_iext_cursor icur;703703 int error = 0;704704705705- xfs_ilock(ip, XFS_ILOCK_EXCL);706706- if (!(ifp->if_flags & XFS_IFEXTENTS)) {707707- error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);708708- if (error)709709- goto out_unlock;710710- }705705+ ASSERT(ifp->if_flags & XFS_IFEXTENTS);711706707707+ xfs_ilock(ip, XFS_ILOCK_EXCL);712708 if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got))713709 goto out_unlock;714710···15801584 tirec.br_blockcount, &irec,15811585 &nimaps, 0);15821586 if (error)15831583- goto out_defer;15871587+ goto out;15841588 ASSERT(nimaps == 1);15851589 ASSERT(tirec.br_startoff == irec.br_startoff);15861590 trace_xfs_swap_extent_rmap_remap_piece(ip, &irec);···15951599 /* Remove the mapping from the donor file. */15961600 error = xfs_bmap_unmap_extent(tp, tip, &uirec);15971601 if (error)15981598- goto out_defer;16021602+ goto out;1599160316001604 /* Remove the mapping from the source file. */16011605 error = xfs_bmap_unmap_extent(tp, ip, &irec);16021606 if (error)16031603- goto out_defer;16071607+ goto out;1604160816051609 /* Map the donor file's blocks into the source file. */16061610 error = xfs_bmap_map_extent(tp, ip, &uirec);16071611 if (error)16081608- goto out_defer;16121612+ goto out;1609161316101614 /* Map the source file's blocks into the donor file. */16111615 error = xfs_bmap_map_extent(tp, tip, &irec);16121616 if (error)16131613- goto out_defer;16171617+ goto out;1614161816151619 error = xfs_defer_finish(tpp);16161620 tp = *tpp;···16321636 tip->i_d.di_flags2 = tip_flags2;16331637 return 0;1634163816351635-out_defer:16361636- xfs_defer_cancel(tp);16371639out:16381640 trace_xfs_swap_extent_rmap_error(ip, error, _RET_IP_);16391641 tip->i_d.di_flags2 = tip_flags2;
+64-55
fs/xfs/xfs_buf_item.c
···532532}533533534534/*535535+ * Drop the buffer log item refcount and take appropriate action. This helper536536+ * determines whether the bli must be freed or not, since a decrement to zero537537+ * does not necessarily mean the bli is unused.538538+ *539539+ * Return true if the bli is freed, false otherwise.540540+ */541541+bool542542+xfs_buf_item_put(543543+ struct xfs_buf_log_item *bip)544544+{545545+ struct xfs_log_item *lip = &bip->bli_item;546546+ bool aborted;547547+ bool dirty;548548+549549+ /* drop the bli ref and return if it wasn't the last one */550550+ if (!atomic_dec_and_test(&bip->bli_refcount))551551+ return false;552552+553553+ /*554554+ * We dropped the last ref and must free the item if clean or aborted.555555+ * If the bli is dirty and non-aborted, the buffer was clean in the556556+ * transaction but still awaiting writeback from previous changes. In557557+ * that case, the bli is freed on buffer writeback completion.558558+ */559559+ aborted = test_bit(XFS_LI_ABORTED, &lip->li_flags) ||560560+ XFS_FORCED_SHUTDOWN(lip->li_mountp);561561+ dirty = bip->bli_flags & XFS_BLI_DIRTY;562562+ if (dirty && !aborted)563563+ return false;564564+565565+ /*566566+ * The bli is aborted or clean. An aborted item may be in the AIL567567+ * regardless of dirty state. For example, consider an aborted568568+ * transaction that invalidated a dirty bli and cleared the dirty569569+ * state.570570+ */571571+ if (aborted)572572+ xfs_trans_ail_remove(lip, SHUTDOWN_LOG_IO_ERROR);573573+ xfs_buf_item_relse(bip->bli_buf);574574+ return true;575575+}576576+577577+/*535578 * Release the buffer associated with the buf log item. If there is no dirty536579 * logged data associated with the buffer recorded in the buf log item, then537580 * free the buf log item and remove the reference to it in the buffer.···599556{600557 struct xfs_buf_log_item *bip = BUF_ITEM(lip);601558 struct xfs_buf *bp = bip->bli_buf;602602- bool aborted;603603- bool hold = !!(bip->bli_flags & XFS_BLI_HOLD);604604- bool dirty = !!(bip->bli_flags & XFS_BLI_DIRTY);559559+ bool released;560560+ bool hold = bip->bli_flags & XFS_BLI_HOLD;561561+ bool stale = bip->bli_flags & XFS_BLI_STALE;605562#if defined(DEBUG) || defined(XFS_WARN)606606- bool ordered = !!(bip->bli_flags & XFS_BLI_ORDERED);563563+ bool ordered = bip->bli_flags & XFS_BLI_ORDERED;564564+ bool dirty = bip->bli_flags & XFS_BLI_DIRTY;607565#endif608608-609609- aborted = test_bit(XFS_LI_ABORTED, &lip->li_flags);610610-611611- /* Clear the buffer's association with this transaction. */612612- bp->b_transp = NULL;613613-614614- /*615615- * The per-transaction state has been copied above so clear it from the616616- * bli.617617- */618618- bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD | XFS_BLI_ORDERED);619619-620620- /*621621- * If the buf item is marked stale, then don't do anything. We'll622622- * unlock the buffer and free the buf item when the buffer is unpinned623623- * for the last time.624624- */625625- if (bip->bli_flags & XFS_BLI_STALE) {626626- trace_xfs_buf_item_unlock_stale(bip);627627- ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);628628- if (!aborted) {629629- atomic_dec(&bip->bli_refcount);630630- return;631631- }632632- }633566634567 trace_xfs_buf_item_unlock(bip);635568636569 /*637637- * If the buf item isn't tracking any data, free it, otherwise drop the638638- * reference we hold to it. If we are aborting the transaction, this may639639- * be the only reference to the buf item, so we free it anyway640640- * regardless of whether it is dirty or not. A dirty abort implies a641641- * shutdown, anyway.642642- *643570 * The bli dirty state should match whether the blf has logged segments644571 * except for ordered buffers, where only the bli should be dirty.645572 */646573 ASSERT((!ordered && dirty == xfs_buf_item_dirty_format(bip)) ||647574 (ordered && dirty && !xfs_buf_item_dirty_format(bip)));575575+ ASSERT(!stale || (bip->__bli_format.blf_flags & XFS_BLF_CANCEL));648576649577 /*650650- * Clean buffers, by definition, cannot be in the AIL. However, aborted651651- * buffers may be in the AIL regardless of dirty state. An aborted652652- * transaction that invalidates a buffer already in the AIL may have653653- * marked it stale and cleared the dirty state, for example.654654- *655655- * Therefore if we are aborting a buffer and we've just taken the last656656- * reference away, we have to check if it is in the AIL before freeing657657- * it. We need to free it in this case, because an aborted transaction658658- * has already shut the filesystem down and this is the last chance we659659- * will have to do so.578578+ * Clear the buffer's association with this transaction and579579+ * per-transaction state from the bli, which has been copied above.660580 */661661- if (atomic_dec_and_test(&bip->bli_refcount)) {662662- if (aborted) {663663- ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp));664664- xfs_trans_ail_remove(lip, SHUTDOWN_LOG_IO_ERROR);665665- xfs_buf_item_relse(bp);666666- } else if (!dirty)667667- xfs_buf_item_relse(bp);668668- }581581+ bp->b_transp = NULL;582582+ bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD | XFS_BLI_ORDERED);669583670670- if (!hold)671671- xfs_buf_relse(bp);584584+ /*585585+ * Unref the item and unlock the buffer unless held or stale. Stale586586+ * buffers remain locked until final unpin unless the bli is freed by587587+ * the unref call. The latter implies shutdown because buffer588588+ * invalidation dirties the bli and transaction.589589+ */590590+ released = xfs_buf_item_put(bip);591591+ if (hold || (stale && !released))592592+ return;593593+ ASSERT(!stale || test_bit(XFS_LI_ABORTED, &lip->li_flags));594594+ xfs_buf_relse(bp);672595}673596674597/*
···15631563 error = xfs_bunmapi(tp, ip, first_unmap_block, unmap_len, flags,15641564 XFS_ITRUNC_MAX_EXTENTS, &done);15651565 if (error)15661566- goto out_bmap_cancel;15661566+ goto out;1567156715681568 /*15691569 * Duplicate the transaction that has the permanent···15991599out:16001600 *tpp = tp;16011601 return error;16021602-out_bmap_cancel:16031603- /*16041604- * If the bunmapi call encounters an error, return to the caller where16051605- * the transaction can be properly aborted. We just need to make sure16061606- * we're not holding any resources that we were not when we came in.16071607- */16081608- xfs_defer_cancel(tp);16091609- goto out;16101602}1611160316121604int
+11-1
fs/xfs/xfs_iops.c
···471471 struct inode *inode,472472 struct delayed_call *done)473473{474474+ char *link;475475+474476 ASSERT(XFS_I(inode)->i_df.if_flags & XFS_IFINLINE);475475- return XFS_I(inode)->i_df.if_u1.if_data;477477+478478+ /*479479+ * The VFS crashes on a NULL pointer, so return -EFSCORRUPTED if480480+ * if_data is junk.481481+ */482482+ link = XFS_I(inode)->i_df.if_u1.if_data;483483+ if (!link)484484+ return ERR_PTR(-EFSCORRUPTED);485485+ return link;476486}477487478488STATIC int
-10
fs/xfs/xfs_log_recover.c
···15701570 if (last_cycle != 0) { /* log completely written to */15711571 xlog_put_bp(bp);15721572 return 0;15731573- } else if (first_cycle != 1) {15741574- /*15751575- * If the cycle of the last block is zero, the cycle of15761576- * the first block must be 1. If it's not, maybe we're15771577- * not looking at a log... Bail out.15781578- */15791579- xfs_warn(log->l_mp,15801580- "Log inconsistent or not a log (last==0, first!=1)");15811581- error = -EINVAL;15821582- goto bp_err;15831573 }1584157415851575 /* we have a partially zeroed log */
+84-59
fs/xfs/xfs_reflink.c
···352352 return error;353353}354354355355+/*356356+ * Find the extent that maps the given range in the COW fork. Even if the extent357357+ * is not shared we might have a preallocation for it in the COW fork. If so we358358+ * use it that rather than trigger a new allocation.359359+ */360360+static int361361+xfs_find_trim_cow_extent(362362+ struct xfs_inode *ip,363363+ struct xfs_bmbt_irec *imap,364364+ bool *shared,365365+ bool *found)366366+{367367+ xfs_fileoff_t offset_fsb = imap->br_startoff;368368+ xfs_filblks_t count_fsb = imap->br_blockcount;369369+ struct xfs_iext_cursor icur;370370+ struct xfs_bmbt_irec got;371371+ bool trimmed;372372+373373+ *found = false;374374+375375+ /*376376+ * If we don't find an overlapping extent, trim the range we need to377377+ * allocate to fit the hole we found.378378+ */379379+ if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &got) ||380380+ got.br_startoff > offset_fsb)381381+ return xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed);382382+383383+ *shared = true;384384+ if (isnullstartblock(got.br_startblock)) {385385+ xfs_trim_extent(imap, got.br_startoff, got.br_blockcount);386386+ return 0;387387+ }388388+389389+ /* real extent found - no need to allocate */390390+ xfs_trim_extent(&got, offset_fsb, count_fsb);391391+ *imap = got;392392+ *found = true;393393+ return 0;394394+}395395+355396/* Allocate all CoW reservations covering a range of blocks in a file. */356397int357398xfs_reflink_allocate_cow(···404363 struct xfs_mount *mp = ip->i_mount;405364 xfs_fileoff_t offset_fsb = imap->br_startoff;406365 xfs_filblks_t count_fsb = imap->br_blockcount;407407- struct xfs_bmbt_irec got;408408- struct xfs_trans *tp = NULL;366366+ struct xfs_trans *tp;409367 int nimaps, error = 0;410410- bool trimmed;368368+ bool found;411369 xfs_filblks_t resaligned;412370 xfs_extlen_t resblks = 0;413413- struct xfs_iext_cursor icur;414371415415-retry:416416- ASSERT(xfs_is_reflink_inode(ip));417372 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));373373+ ASSERT(xfs_is_reflink_inode(ip));374374+375375+ error = xfs_find_trim_cow_extent(ip, imap, shared, &found);376376+ if (error || !*shared)377377+ return error;378378+ if (found)379379+ goto convert;380380+381381+ resaligned = xfs_aligned_fsb_count(imap->br_startoff,382382+ imap->br_blockcount, xfs_get_cowextsz_hint(ip));383383+ resblks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);384384+385385+ xfs_iunlock(ip, *lockmode);386386+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);387387+ *lockmode = XFS_ILOCK_EXCL;388388+ xfs_ilock(ip, *lockmode);389389+390390+ if (error)391391+ return error;392392+393393+ error = xfs_qm_dqattach_locked(ip, false);394394+ if (error)395395+ goto out_trans_cancel;418396419397 /*420420- * Even if the extent is not shared we might have a preallocation for421421- * it in the COW fork. If so use it.398398+ * Check for an overlapping extent again now that we dropped the ilock.422399 */423423- if (xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &got) &&424424- got.br_startoff <= offset_fsb) {425425- *shared = true;426426-427427- /* If we have a real allocation in the COW fork we're done. */428428- if (!isnullstartblock(got.br_startblock)) {429429- xfs_trim_extent(&got, offset_fsb, count_fsb);430430- *imap = got;431431- goto convert;432432- }433433-434434- xfs_trim_extent(imap, got.br_startoff, got.br_blockcount);435435- } else {436436- error = xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed);437437- if (error || !*shared)438438- goto out;439439- }440440-441441- if (!tp) {442442- resaligned = xfs_aligned_fsb_count(imap->br_startoff,443443- imap->br_blockcount, xfs_get_cowextsz_hint(ip));444444- resblks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);445445-446446- xfs_iunlock(ip, *lockmode);447447- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);448448- *lockmode = XFS_ILOCK_EXCL;449449- xfs_ilock(ip, *lockmode);450450-451451- if (error)452452- return error;453453-454454- error = xfs_qm_dqattach_locked(ip, false);455455- if (error)456456- goto out;457457- goto retry;400400+ error = xfs_find_trim_cow_extent(ip, imap, shared, &found);401401+ if (error || !*shared)402402+ goto out_trans_cancel;403403+ if (found) {404404+ xfs_trans_cancel(tp);405405+ goto convert;458406 }459407460408 error = xfs_trans_reserve_quota_nblks(tp, ip, resblks, 0,461409 XFS_QMOPT_RES_REGBLKS);462410 if (error)463463- goto out;411411+ goto out_trans_cancel;464412465413 xfs_trans_ijoin(tp, ip, 0);466414467467- nimaps = 1;468468-469415 /* Allocate the entire reservation as unwritten blocks. */416416+ nimaps = 1;470417 error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount,471418 XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC,472419 resblks, imap, &nimaps);473420 if (error)474474- goto out_trans_cancel;421421+ goto out_unreserve;475422476423 xfs_inode_set_cowblocks_tag(ip);477477-478478- /* Finish up. */479424 error = xfs_trans_commit(tp);480425 if (error)481426 return error;···474447 return -ENOSPC;475448convert:476449 return xfs_reflink_convert_cow_extent(ip, imap, offset_fsb, count_fsb);477477-out_trans_cancel:450450+451451+out_unreserve:478452 xfs_trans_unreserve_quota_nblks(tp, ip, (long)resblks, 0,479453 XFS_QMOPT_RES_REGBLKS);480480-out:481481- if (tp)482482- xfs_trans_cancel(tp);454454+out_trans_cancel:455455+ xfs_trans_cancel(tp);483456 return error;484457}485458···693666 if (!del.br_blockcount)694667 goto prev_extent;695668696696- ASSERT(!isnullstartblock(got.br_startblock));697697-698669 /*699699- * Don't remap unwritten extents; these are700700- * speculatively preallocated CoW extents that have been701701- * allocated but have not yet been involved in a write.670670+ * Only remap real extent that contain data. With AIO671671+ * speculatively preallocations can leak into the range we672672+ * are called upon, and we need to skip them.702673 */703703- if (got.br_state == XFS_EXT_UNWRITTEN)674674+ if (!xfs_bmap_is_real_extent(&got))704675 goto prev_extent;705676706677 /* Unmap the old blocks in the data fork. */
···259259 struct xfs_trans *tp;260260 int error;261261262262+ /*263263+ * Allocate the handle before we do our freeze accounting and setting up264264+ * GFP_NOFS allocation context so that we avoid lockdep false positives265265+ * by doing GFP_KERNEL allocations inside sb_start_intwrite().266266+ */267267+ tp = kmem_zone_zalloc(xfs_trans_zone,268268+ (flags & XFS_TRANS_NOFS) ? KM_NOFS : KM_SLEEP);269269+262270 if (!(flags & XFS_TRANS_NO_WRITECOUNT))263271 sb_start_intwrite(mp->m_super);264272···278270 mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE);279271 atomic_inc(&mp->m_active_trans);280272281281- tp = kmem_zone_zalloc(xfs_trans_zone,282282- (flags & XFS_TRANS_NOFS) ? KM_NOFS : KM_SLEEP);283273 tp->t_magic = XFS_TRANS_HEADER_MAGIC;284274 tp->t_flags = flags;285275 tp->t_mountp = mp;
+25-76
fs/xfs/xfs_trans_buf.c
···322322}323323324324/*325325- * Release the buffer bp which was previously acquired with one of the326326- * xfs_trans_... buffer allocation routines if the buffer has not327327- * been modified within this transaction. If the buffer is modified328328- * within this transaction, do decrement the recursion count but do329329- * not release the buffer even if the count goes to 0. If the buffer is not330330- * modified within the transaction, decrement the recursion count and331331- * release the buffer if the recursion count goes to 0.325325+ * Release a buffer previously joined to the transaction. If the buffer is326326+ * modified within this transaction, decrement the recursion count but do not327327+ * release the buffer even if the count goes to 0. If the buffer is not modified328328+ * within the transaction, decrement the recursion count and release the buffer329329+ * if the recursion count goes to 0.332330 *333333- * If the buffer is to be released and it was not modified before334334- * this transaction began, then free the buf_log_item associated with it.331331+ * If the buffer is to be released and it was not already dirty before this332332+ * transaction began, then also free the buf_log_item associated with it.335333 *336336- * If the transaction pointer is NULL, make this just a normal337337- * brelse() call.334334+ * If the transaction pointer is NULL, this is a normal xfs_buf_relse() call.338335 */339336void340337xfs_trans_brelse(341341- xfs_trans_t *tp,342342- xfs_buf_t *bp)338338+ struct xfs_trans *tp,339339+ struct xfs_buf *bp)343340{344344- struct xfs_buf_log_item *bip;345345- int freed;341341+ struct xfs_buf_log_item *bip = bp->b_log_item;346342347347- /*348348- * Default to a normal brelse() call if the tp is NULL.349349- */350350- if (tp == NULL) {351351- ASSERT(bp->b_transp == NULL);343343+ ASSERT(bp->b_transp == tp);344344+345345+ if (!tp) {352346 xfs_buf_relse(bp);353347 return;354348 }355349356356- ASSERT(bp->b_transp == tp);357357- bip = bp->b_log_item;350350+ trace_xfs_trans_brelse(bip);358351 ASSERT(bip->bli_item.li_type == XFS_LI_BUF);359359- ASSERT(!(bip->bli_flags & XFS_BLI_STALE));360360- ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL));361352 ASSERT(atomic_read(&bip->bli_refcount) > 0);362353363363- trace_xfs_trans_brelse(bip);364364-365354 /*366366- * If the release is just for a recursive lock,367367- * then decrement the count and return.355355+ * If the release is for a recursive lookup, then decrement the count356356+ * and return.368357 */369358 if (bip->bli_recur > 0) {370359 bip->bli_recur--;···361372 }362373363374 /*364364- * If the buffer is dirty within this transaction, we can't375375+ * If the buffer is invalidated or dirty in this transaction, we can't365376 * release it until we commit.366377 */367378 if (test_bit(XFS_LI_DIRTY, &bip->bli_item.li_flags))368379 return;369369-370370- /*371371- * If the buffer has been invalidated, then we can't release372372- * it until the transaction commits to disk unless it is re-dirtied373373- * as part of this transaction. This prevents us from pulling374374- * the item from the AIL before we should.375375- */376380 if (bip->bli_flags & XFS_BLI_STALE)377381 return;378382383383+ /*384384+ * Unlink the log item from the transaction and clear the hold flag, if385385+ * set. We wouldn't want the next user of the buffer to get confused.386386+ */379387 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));380380-381381- /*382382- * Free up the log item descriptor tracking the released item.383383- */384388 xfs_trans_del_item(&bip->bli_item);389389+ bip->bli_flags &= ~XFS_BLI_HOLD;385390386386- /*387387- * Clear the hold flag in the buf log item if it is set.388388- * We wouldn't want the next user of the buffer to389389- * get confused.390390- */391391- if (bip->bli_flags & XFS_BLI_HOLD) {392392- bip->bli_flags &= ~XFS_BLI_HOLD;393393- }394394-395395- /*396396- * Drop our reference to the buf log item.397397- */398398- freed = atomic_dec_and_test(&bip->bli_refcount);399399-400400- /*401401- * If the buf item is not tracking data in the log, then we must free it402402- * before releasing the buffer back to the free pool.403403- *404404- * If the fs has shutdown and we dropped the last reference, it may fall405405- * on us to release a (possibly dirty) bli if it never made it to the406406- * AIL (e.g., the aborted unpin already happened and didn't release it407407- * due to our reference). Since we're already shutdown and need408408- * ail_lock, just force remove from the AIL and release the bli here.409409- */410410- if (XFS_FORCED_SHUTDOWN(tp->t_mountp) && freed) {411411- xfs_trans_ail_remove(&bip->bli_item, SHUTDOWN_LOG_IO_ERROR);412412- xfs_buf_item_relse(bp);413413- } else if (!(bip->bli_flags & XFS_BLI_DIRTY)) {414414-/***415415- ASSERT(bp->b_pincount == 0);416416-***/417417- ASSERT(atomic_read(&bip->bli_refcount) == 0);418418- ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags));419419- ASSERT(!(bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF));420420- xfs_buf_item_relse(bp);421421- }391391+ /* drop the reference to the bli */392392+ xfs_buf_item_put(bip);422393423394 bp->b_transp = NULL;424395 xfs_buf_relse(bp);
···140140pte_t *huge_pte_offset(struct mm_struct *mm,141141 unsigned long addr, unsigned long sz);142142int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep);143143+void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,144144+ unsigned long *start, unsigned long *end);143145struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,144146 int write);145147struct page *follow_huge_pd(struct vm_area_struct *vma,···170168static inline unsigned long hugetlb_total_pages(void)171169{172170 return 0;171171+}172172+173173+static inline int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr,174174+ pte_t *ptep)175175+{176176+ return 0;177177+}178178+179179+static inline void adjust_range_if_pmd_sharing_possible(180180+ struct vm_area_struct *vma,181181+ unsigned long *start, unsigned long *end)182182+{173183}174184175185#define follow_hugetlb_page(m,v,p,vs,a,b,i,w,n) ({ BUG(); 0; })
+6
include/linux/mm.h
···24552455 return vma;24562456}2457245724582458+static inline bool range_in_vma(struct vm_area_struct *vma,24592459+ unsigned long start, unsigned long end)24602460+{24612461+ return (vma && vma->vm_start <= start && end <= vma->vm_end);24622462+}24632463+24582464#ifdef CONFIG_MMU24592465pgprot_t vm_get_page_prot(unsigned long vm_flags);24602466void vma_set_page_prot(struct vm_area_struct *vma);
-6
include/linux/mmzone.h
···671671#ifdef CONFIG_NUMA_BALANCING672672 /* Lock serializing the migrate rate limiting window */673673 spinlock_t numabalancing_migrate_lock;674674-675675- /* Rate limiting time interval */676676- unsigned long numabalancing_migrate_next_window;677677-678678- /* Number of pages migrated during the rate limiting time interval */679679- unsigned long numabalancing_migrate_nr_pages;680674#endif681675 /*682676 * This is a per-node reserve of pages that are not available
+18
include/linux/virtio_net.h
···55#include <linux/if_vlan.h>66#include <uapi/linux/virtio_net.h>7788+static inline int virtio_net_hdr_set_proto(struct sk_buff *skb,99+ const struct virtio_net_hdr *hdr)1010+{1111+ switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {1212+ case VIRTIO_NET_HDR_GSO_TCPV4:1313+ case VIRTIO_NET_HDR_GSO_UDP:1414+ skb->protocol = cpu_to_be16(ETH_P_IP);1515+ break;1616+ case VIRTIO_NET_HDR_GSO_TCPV6:1717+ skb->protocol = cpu_to_be16(ETH_P_IPV6);1818+ break;1919+ default:2020+ return -EINVAL;2121+ }2222+2323+ return 0;2424+}2525+826static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,927 const struct virtio_net_hdr *hdr,1028 bool little_endian)
···206206 * Callers of shm_lock() must validate the status of the returned ipc207207 * object pointer and error out as appropriate.208208 */209209- return (void *)ipcp;209209+ return ERR_CAST(ipcp);210210}211211212212static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
+4-1
kernel/bpf/local_storage.c
···129129 struct bpf_cgroup_storage *storage;130130 struct bpf_storage_buffer *new;131131132132- if (flags & BPF_NOEXIST)132132+ if (flags != BPF_ANY && flags != BPF_EXIST)133133 return -EINVAL;134134135135 storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map,···193193 struct bpf_cgroup_storage_map *map;194194195195 if (attr->key_size != sizeof(struct bpf_cgroup_storage_key))196196+ return ERR_PTR(-EINVAL);197197+198198+ if (attr->value_size == 0)196199 return ERR_PTR(-EINVAL);197200198201 if (attr->value_size > PAGE_SIZE)
+9-1
kernel/bpf/verifier.c
···28422842 u64 umin_val, umax_val;28432843 u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;2844284428452845+ if (insn_bitness == 32) {28462846+ /* Relevant for 32-bit RSH: Information can propagate towards28472847+ * LSB, so it isn't sufficient to only truncate the output to28482848+ * 32 bits.28492849+ */28502850+ coerce_reg_to_size(dst_reg, 4);28512851+ coerce_reg_to_size(&src_reg, 4);28522852+ }28532853+28452854 smin_val = src_reg.smin_value;28462855 smax_val = src_reg.smax_value;28472856 umin_val = src_reg.umin_value;···30863077 if (BPF_CLASS(insn->code) != BPF_ALU64) {30873078 /* 32-bit ALU ops are (32,32)->32 */30883079 coerce_reg_to_size(dst_reg, 4);30893089- coerce_reg_to_size(&src_reg, 4);30903080 }3091308130923082 __reg_deduce_bounds(dst_reg);
+4-7
kernel/events/core.c
···83148314 goto unlock;8315831583168316 list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {83178317+ if (event->cpu != smp_processor_id())83188318+ continue;83178319 if (event->attr.type != PERF_TYPE_TRACEPOINT)83188320 continue;83198321 if (event->attr.config != entry->type)···94339431 if (pmu->task_ctx_nr > perf_invalid_context)94349432 return;9435943394369436- mutex_lock(&pmus_lock);94379434 free_percpu(pmu->pmu_cpu_context);94389438- mutex_unlock(&pmus_lock);94399435}9440943694419437/*···9689968996909690void perf_pmu_unregister(struct pmu *pmu)96919691{96929692- int remove_device;96939693-96949692 mutex_lock(&pmus_lock);96959695- remove_device = pmu_bus_running;96969693 list_del_rcu(&pmu->entry);96979697- mutex_unlock(&pmus_lock);9698969496999695 /*97009696 * We dereference the pmu list under both SRCU and regular RCU, so···97029706 free_percpu(pmu->pmu_disable_count);97039707 if (pmu->type >= PERF_TYPE_MAX)97049708 idr_remove(&pmu_idr, pmu->type);97059705- if (remove_device) {97099709+ if (pmu_bus_running) {97069710 if (pmu->nr_addr_filters)97079711 device_remove_file(pmu->dev, &dev_attr_nr_addr_filters);97089712 device_del(pmu->dev);97099713 put_device(pmu->dev);97109714 }97119715 free_pmu_context(pmu);97169716+ mutex_unlock(&pmus_lock);97129717}97139718EXPORT_SYMBOL_GPL(perf_pmu_unregister);97149719
+6-4
kernel/locking/test-ww_mutex.c
···260260{261261 struct test_cycle *cycle = container_of(work, typeof(*cycle), work);262262 struct ww_acquire_ctx ctx;263263- int err;263263+ int err, erra = 0;264264265265 ww_acquire_init(&ctx, &ww_class);266266 ww_mutex_lock(&cycle->a_mutex, &ctx);···270270271271 err = ww_mutex_lock(cycle->b_mutex, &ctx);272272 if (err == -EDEADLK) {273273+ err = 0;273274 ww_mutex_unlock(&cycle->a_mutex);274275 ww_mutex_lock_slow(cycle->b_mutex, &ctx);275275- err = ww_mutex_lock(&cycle->a_mutex, &ctx);276276+ erra = ww_mutex_lock(&cycle->a_mutex, &ctx);276277 }277278278279 if (!err)279280 ww_mutex_unlock(cycle->b_mutex);280280- ww_mutex_unlock(&cycle->a_mutex);281281+ if (!erra)282282+ ww_mutex_unlock(&cycle->a_mutex);281283 ww_acquire_fini(&ctx);282284283283- cycle->result = err;285285+ cycle->result = err ?: erra;284286}285287286288static int __test_cycle(unsigned int nthreads)
+1-1
kernel/sched/core.c
···1167116711681168 if (task_cpu(p) != new_cpu) {11691169 if (p->sched_class->migrate_task_rq)11701170- p->sched_class->migrate_task_rq(p);11701170+ p->sched_class->migrate_task_rq(p, new_cpu);11711171 p->se.nr_migrations++;11721172 rseq_migrate(p);11731173 perf_event_task_migrate(p);
···13921392 int last_cpupid, this_cpupid;1393139313941394 this_cpupid = cpu_pid_to_cpupid(dst_cpu, current->pid);13951395+ last_cpupid = page_cpupid_xchg_last(page, this_cpupid);13961396+13971397+ /*13981398+ * Allow first faults or private faults to migrate immediately early in13991399+ * the lifetime of a task. The magic number 4 is based on waiting for14001400+ * two full passes of the "multi-stage node selection" test that is14011401+ * executed below.14021402+ */14031403+ if ((p->numa_preferred_nid == -1 || p->numa_scan_seq <= 4) &&14041404+ (cpupid_pid_unset(last_cpupid) || cpupid_match_pid(p, last_cpupid)))14051405+ return true;1395140613961407 /*13971408 * Multi-stage node selection is used in conjunction with a periodic···14211410 * This quadric squishes small probabilities, making it less likely we14221411 * act on an unlikely task<->page relation.14231412 */14241424- last_cpupid = page_cpupid_xchg_last(page, this_cpupid);14251413 if (!cpupid_pid_unset(last_cpupid) &&14261414 cpupid_to_nid(last_cpupid) != dst_nid)14271415 return false;···15241514static void task_numa_assign(struct task_numa_env *env,15251515 struct task_struct *p, long imp)15261516{15171517+ struct rq *rq = cpu_rq(env->dst_cpu);15181518+15191519+ /* Bail out if run-queue part of active NUMA balance. */15201520+ if (xchg(&rq->numa_migrate_on, 1))15211521+ return;15221522+15231523+ /*15241524+ * Clear previous best_cpu/rq numa-migrate flag, since task now15251525+ * found a better CPU to move/swap.15261526+ */15271527+ if (env->best_cpu != -1) {15281528+ rq = cpu_rq(env->best_cpu);15291529+ WRITE_ONCE(rq->numa_migrate_on, 0);15301530+ }15311531+15271532 if (env->best_task)15281533 put_task_struct(env->best_task);15291534 if (p)···15781553}1579155415801555/*15561556+ * Maximum NUMA importance can be 1998 (2*999);15571557+ * SMALLIMP @ 30 would be close to 1998/64.15581558+ * Used to deter task migration.15591559+ */15601560+#define SMALLIMP 3015611561+15621562+/*15811563 * This checks if the overall compute and NUMA accesses of the system would15821564 * be improved if the source tasks was migrated to the target dst_cpu taking15831565 * into account that it might be best if task running on the dst_cpu should···16011569 long moveimp = imp;16021570 int dist = env->dist;1603157115721572+ if (READ_ONCE(dst_rq->numa_migrate_on))15731573+ return;15741574+16041575 rcu_read_lock();16051576 cur = task_rcu_dereference(&dst_rq->curr);16061577 if (cur && ((cur->flags & PF_EXITING) || is_idle_task(cur)))···16171582 goto unlock;1618158316191584 if (!cur) {16201620- if (maymove || imp > env->best_imp)15851585+ if (maymove && moveimp >= env->best_imp)16211586 goto assign;16221587 else16231588 goto unlock;···16601625 task_weight(cur, env->dst_nid, dist);16611626 }1662162716631663- if (imp <= env->best_imp)16641664- goto unlock;16651665-16661628 if (maymove && moveimp > imp && moveimp > env->best_imp) {16671667- imp = moveimp - 1;16291629+ imp = moveimp;16681630 cur = NULL;16691631 goto assign;16701632 }16331633+16341634+ /*16351635+ * If the NUMA importance is less than SMALLIMP,16361636+ * task migration might only result in ping pong16371637+ * of tasks and also hurt performance due to cache16381638+ * misses.16391639+ */16401640+ if (imp < SMALLIMP || imp <= env->best_imp + SMALLIMP / 2)16411641+ goto unlock;1671164216721643 /*16731644 * In the overloaded case, try and keep the load balanced.···17511710 .best_cpu = -1,17521711 };17531712 struct sched_domain *sd;17131713+ struct rq *best_rq;17541714 unsigned long taskweight, groupweight;17551715 int nid, ret, dist;17561716 long taskimp, groupimp;···18471805 if (env.best_cpu == -1)18481806 return -EAGAIN;1849180718501850- /*18511851- * Reset the scan period if the task is being rescheduled on an18521852- * alternative node to recheck if the tasks is now properly placed.18531853- */18541854- p->numa_scan_period = task_scan_start(p);18551855-18081808+ best_rq = cpu_rq(env.best_cpu);18561809 if (env.best_task == NULL) {18571810 ret = migrate_task_to(p, env.best_cpu);18111811+ WRITE_ONCE(best_rq->numa_migrate_on, 0);18581812 if (ret != 0)18591813 trace_sched_stick_numa(p, env.src_cpu, env.best_cpu);18601814 return ret;18611815 }1862181618631817 ret = migrate_swap(p, env.best_task, env.best_cpu, env.src_cpu);18181818+ WRITE_ONCE(best_rq->numa_migrate_on, 0);1864181918651820 if (ret != 0)18661821 trace_sched_stick_numa(p, env.src_cpu, task_cpu(env.best_task));···26352596 }26362597}2637259825992599+static void update_scan_period(struct task_struct *p, int new_cpu)26002600+{26012601+ int src_nid = cpu_to_node(task_cpu(p));26022602+ int dst_nid = cpu_to_node(new_cpu);26032603+26042604+ if (!static_branch_likely(&sched_numa_balancing))26052605+ return;26062606+26072607+ if (!p->mm || !p->numa_faults || (p->flags & PF_EXITING))26082608+ return;26092609+26102610+ if (src_nid == dst_nid)26112611+ return;26122612+26132613+ /*26142614+ * Allow resets if faults have been trapped before one scan26152615+ * has completed. This is most likely due to a new task that26162616+ * is pulled cross-node due to wakeups or load balancing.26172617+ */26182618+ if (p->numa_scan_seq) {26192619+ /*26202620+ * Avoid scan adjustments if moving to the preferred26212621+ * node or if the task was not previously running on26222622+ * the preferred node.26232623+ */26242624+ if (dst_nid == p->numa_preferred_nid ||26252625+ (p->numa_preferred_nid != -1 && src_nid != p->numa_preferred_nid))26262626+ return;26272627+ }26282628+26292629+ p->numa_scan_period = task_scan_start(p);26302630+}26312631+26382632#else26392633static void task_tick_numa(struct rq *rq, struct task_struct *curr)26402634{···26782606}2679260726802608static inline void account_numa_dequeue(struct rq *rq, struct task_struct *p)26092609+{26102610+}26112611+26122612+static inline void update_scan_period(struct task_struct *p, int new_cpu)26812613{26822614}26832615···63516275 * cfs_rq_of(p) references at time of call are still valid and identify the63526276 * previous CPU. The caller guarantees p->pi_lock or task_rq(p)->lock is held.63536277 */63546354-static void migrate_task_rq_fair(struct task_struct *p)62786278+static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)63556279{63566280 /*63576281 * As blocked tasks retain absolute vruntime the migration needs to···6404632864056329 /* We have migrated, no longer consider this task hot */64066330 p->se.exec_start = 0;63316331+63326332+ update_scan_period(p, new_cpu);64076333}6408633464096335static void task_dead_fair(struct task_struct *p)
+2-1
kernel/sched/sched.h
···783783#ifdef CONFIG_NUMA_BALANCING784784 unsigned int nr_numa_running;785785 unsigned int nr_preferred_running;786786+ unsigned int numa_migrate_on;786787#endif787788 #define CPU_LOAD_IDX_MAX 5788789 unsigned long cpu_load[CPU_LOAD_IDX_MAX];···1524152315251524#ifdef CONFIG_SMP15261525 int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);15271527- void (*migrate_task_rq)(struct task_struct *p);15261526+ void (*migrate_task_rq)(struct task_struct *p, int new_cpu);1528152715291528 void (*task_woken)(struct rq *this_rq, struct task_struct *task);15301529
+2-1
mm/gup_benchmark.c
···1919 struct gup_benchmark *gup)2020{2121 ktime_t start_time, end_time;2222- unsigned long i, nr, nr_pages, addr, next;2222+ unsigned long i, nr_pages, addr, next;2323+ int nr;2324 struct page **pages;24252526 nr_pages = gup->size / PAGE_SIZE;
···33263326 struct page *page;33273327 struct hstate *h = hstate_vma(vma);33283328 unsigned long sz = huge_page_size(h);33293329- const unsigned long mmun_start = start; /* For mmu_notifiers */33303330- const unsigned long mmun_end = end; /* For mmu_notifiers */33293329+ unsigned long mmun_start = start; /* For mmu_notifiers */33303330+ unsigned long mmun_end = end; /* For mmu_notifiers */3331333133323332 WARN_ON(!is_vm_hugetlb_page(vma));33333333 BUG_ON(start & ~huge_page_mask(h));···33393339 */33403340 tlb_remove_check_page_size_change(tlb, sz);33413341 tlb_start_vma(tlb, vma);33423342+33433343+ /*33443344+ * If sharing possible, alert mmu notifiers of worst case.33453345+ */33463346+ adjust_range_if_pmd_sharing_possible(vma, &mmun_start, &mmun_end);33423347 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);33433348 address = start;33443349 for (; address < end; address += sz) {···33543349 ptl = huge_pte_lock(h, mm, ptep);33553350 if (huge_pmd_unshare(mm, &address, ptep)) {33563351 spin_unlock(ptl);33523352+ /*33533353+ * We just unmapped a page of PMDs by clearing a PUD.33543354+ * The caller's TLB flush range should cover this area.33553355+ */33573356 continue;33583357 }33593358···34403431{34413432 struct mm_struct *mm;34423433 struct mmu_gather tlb;34343434+ unsigned long tlb_start = start;34353435+ unsigned long tlb_end = end;34363436+34373437+ /*34383438+ * If shared PMDs were possibly used within this vma range, adjust34393439+ * start/end for worst case tlb flushing.34403440+ * Note that we can not be sure if PMDs are shared until we try to34413441+ * unmap pages. However, we want to make sure TLB flushing covers34423442+ * the largest possible range.34433443+ */34443444+ adjust_range_if_pmd_sharing_possible(vma, &tlb_start, &tlb_end);3443344534443446 mm = vma->vm_mm;3445344734463446- tlb_gather_mmu(&tlb, mm, start, end);34483448+ tlb_gather_mmu(&tlb, mm, tlb_start, tlb_end);34473449 __unmap_hugepage_range(&tlb, vma, start, end, ref_page);34483448- tlb_finish_mmu(&tlb, start, end);34503450+ tlb_finish_mmu(&tlb, tlb_start, tlb_end);34493451}3450345234513453/*···43184298 pte_t pte;43194299 struct hstate *h = hstate_vma(vma);43204300 unsigned long pages = 0;43014301+ unsigned long f_start = start;43024302+ unsigned long f_end = end;43034303+ bool shared_pmd = false;43044304+43054305+ /*43064306+ * In the case of shared PMDs, the area to flush could be beyond43074307+ * start/end. Set f_start/f_end to cover the maximum possible43084308+ * range if PMD sharing is possible.43094309+ */43104310+ adjust_range_if_pmd_sharing_possible(vma, &f_start, &f_end);4321431143224312 BUG_ON(address >= end);43234323- flush_cache_range(vma, address, end);43134313+ flush_cache_range(vma, f_start, f_end);4324431443254325- mmu_notifier_invalidate_range_start(mm, start, end);43154315+ mmu_notifier_invalidate_range_start(mm, f_start, f_end);43264316 i_mmap_lock_write(vma->vm_file->f_mapping);43274317 for (; address < end; address += huge_page_size(h)) {43284318 spinlock_t *ptl;···43434313 if (huge_pmd_unshare(mm, &address, ptep)) {43444314 pages++;43454315 spin_unlock(ptl);43164316+ shared_pmd = true;43464317 continue;43474318 }43484319 pte = huge_ptep_get(ptep);···43794348 * Must flush TLB before releasing i_mmap_rwsem: x86's huge_pmd_unshare43804349 * may have cleared our pud entry and done put_page on the page table:43814350 * once we release i_mmap_rwsem, another task can do the final put_page43824382- * and that page table be reused and filled with junk.43514351+ * and that page table be reused and filled with junk. If we actually43524352+ * did unshare a page of pmds, flush the range corresponding to the pud.43834353 */43844384- flush_hugetlb_tlb_range(vma, start, end);43544354+ if (shared_pmd)43554355+ flush_hugetlb_tlb_range(vma, f_start, f_end);43564356+ else43574357+ flush_hugetlb_tlb_range(vma, start, end);43854358 /*43864359 * No need to call mmu_notifier_invalidate_range() we are downgrading43874360 * page table protection not changing it to point to a new page.···43934358 * See Documentation/vm/mmu_notifier.rst43944359 */43954360 i_mmap_unlock_write(vma->vm_file->f_mapping);43964396- mmu_notifier_invalidate_range_end(mm, start, end);43614361+ mmu_notifier_invalidate_range_end(mm, f_start, f_end);4397436243984363 return pages << h->order;43994364}···45804545 /*45814546 * check on proper vm_flags and page table alignment45824547 */45834583- if (vma->vm_flags & VM_MAYSHARE &&45844584- vma->vm_start <= base && end <= vma->vm_end)45484548+ if (vma->vm_flags & VM_MAYSHARE && range_in_vma(vma, base, end))45854549 return true;45864550 return false;45514551+}45524552+45534553+/*45544554+ * Determine if start,end range within vma could be mapped by shared pmd.45554555+ * If yes, adjust start and end to cover range associated with possible45564556+ * shared pmd mappings.45574557+ */45584558+void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,45594559+ unsigned long *start, unsigned long *end)45604560+{45614561+ unsigned long check_addr = *start;45624562+45634563+ if (!(vma->vm_flags & VM_MAYSHARE))45644564+ return;45654565+45664566+ for (check_addr = *start; check_addr < *end; check_addr += PUD_SIZE) {45674567+ unsigned long a_start = check_addr & PUD_MASK;45684568+ unsigned long a_end = a_start + PUD_SIZE;45694569+45704570+ /*45714571+ * If sharing is possible, adjust start/end if necessary.45724572+ */45734573+ if (range_in_vma(vma, a_start, a_end)) {45744574+ if (a_start < *start)45754575+ *start = a_start;45764576+ if (a_end > *end)45774577+ *end = a_end;45784578+ }45794579+ }45874580}4588458145894582/*···47104647int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)47114648{47124649 return 0;46504650+}46514651+46524652+void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,46534653+ unsigned long *start, unsigned long *end)46544654+{47134655}47144656#define want_pmd_share() (0)47154657#endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */
+1-1
mm/madvise.c
···9696 new_flags |= VM_DONTDUMP;9797 break;9898 case MADV_DODUMP:9999- if (new_flags & VM_SPECIAL) {9999+ if (!is_vm_hugetlb_page(vma) && new_flags & VM_SPECIAL) {100100 error = -EINVAL;101101 goto out;102102 }
+4-58
mm/migrate.c
···275275 if (vma->vm_flags & VM_LOCKED && !PageTransCompound(new))276276 mlock_vma_page(new);277277278278+ if (PageTransHuge(page) && PageMlocked(page))279279+ clear_page_mlock(page);280280+278281 /* No need to invalidate - it was non-present before */279282 update_mmu_cache(vma, pvmw.address, pvmw.pte);280283 }···14141411 * we encounter them after the rest of the list14151412 * is processed.14161413 */14171417- if (PageTransHuge(page)) {14141414+ if (PageTransHuge(page) && !PageHuge(page)) {14181415 lock_page(page);14191416 rc = split_huge_page_to_list(page, from);14201417 unlock_page(page);···18581855 return newpage;18591856}1860185718611861-/*18621862- * page migration rate limiting control.18631863- * Do not migrate more than @pages_to_migrate in a @migrate_interval_millisecs18641864- * window of time. Default here says do not migrate more than 1280M per second.18651865- */18661866-static unsigned int migrate_interval_millisecs __read_mostly = 100;18671867-static unsigned int ratelimit_pages __read_mostly = 128 << (20 - PAGE_SHIFT);18681868-18691869-/* Returns true if the node is migrate rate-limited after the update */18701870-static bool numamigrate_update_ratelimit(pg_data_t *pgdat,18711871- unsigned long nr_pages)18721872-{18731873- /*18741874- * Rate-limit the amount of data that is being migrated to a node.18751875- * Optimal placement is no good if the memory bus is saturated and18761876- * all the time is being spent migrating!18771877- */18781878- if (time_after(jiffies, pgdat->numabalancing_migrate_next_window)) {18791879- spin_lock(&pgdat->numabalancing_migrate_lock);18801880- pgdat->numabalancing_migrate_nr_pages = 0;18811881- pgdat->numabalancing_migrate_next_window = jiffies +18821882- msecs_to_jiffies(migrate_interval_millisecs);18831883- spin_unlock(&pgdat->numabalancing_migrate_lock);18841884- }18851885- if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages) {18861886- trace_mm_numa_migrate_ratelimit(current, pgdat->node_id,18871887- nr_pages);18881888- return true;18891889- }18901890-18911891- /*18921892- * This is an unlocked non-atomic update so errors are possible.18931893- * The consequences are failing to migrate when we potentiall should18941894- * have which is not severe enough to warrant locking. If it is ever18951895- * a problem, it can be converted to a per-cpu counter.18961896- */18971897- pgdat->numabalancing_migrate_nr_pages += nr_pages;18981898- return false;18991899-}19001900-19011858static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)19021859{19031860 int page_lru;···19301967 if (page_is_file_cache(page) && PageDirty(page))19311968 goto out;1932196919331933- /*19341934- * Rate-limit the amount of data that is being migrated to a node.19351935- * Optimal placement is no good if the memory bus is saturated and19361936- * all the time is being spent migrating!19371937- */19381938- if (numamigrate_update_ratelimit(pgdat, 1))19391939- goto out;19401940-19411970 isolated = numamigrate_isolate_page(pgdat, page);19421971 if (!isolated)19431972 goto out;···19752020 int page_lru = page_is_file_cache(page);19762021 unsigned long mmun_start = address & HPAGE_PMD_MASK;19772022 unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE;19781978-19791979- /*19801980- * Rate-limit the amount of data that is being migrated to a node.19811981- * Optimal placement is no good if the memory bus is saturated and19821982- * all the time is being spent migrating!19831983- */19841984- if (numamigrate_update_ratelimit(pgdat, HPAGE_PMD_NR))19851985- goto out_dropref;1986202319872024 new_page = alloc_pages_node(node,19882025 (GFP_TRANSHUGE_LIGHT | __GFP_THISNODE),···2072212520732126out_fail:20742127 count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);20752075-out_dropref:20762128 ptl = pmd_lock(mm, pmd);20772129 if (pmd_same(*pmd, entry)) {20782130 entry = pmd_modify(entry, vma->vm_page_prot);
···13621362 }1363136313641364 /*13651365- * We have to assume the worse case ie pmd for invalidation. Note that13661366- * the page can not be free in this function as call of try_to_unmap()13671367- * must hold a reference on the page.13651365+ * For THP, we have to assume the worse case ie pmd for invalidation.13661366+ * For hugetlb, it could be much worse if we need to do pud13671367+ * invalidation in the case of pmd sharing.13681368+ *13691369+ * Note that the page can not be free in this function as call of13701370+ * try_to_unmap() must hold a reference on the page.13681371 */13691372 end = min(vma->vm_end, start + (PAGE_SIZE << compound_order(page)));13731373+ if (PageHuge(page)) {13741374+ /*13751375+ * If sharing is possible, start and end will be adjusted13761376+ * accordingly.13771377+ */13781378+ adjust_range_if_pmd_sharing_possible(vma, &start, &end);13791379+ }13701380 mmu_notifier_invalidate_range_start(vma->vm_mm, start, end);1371138113721382 while (page_vma_mapped_walk(&pvmw)) {···14191409 subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);14201410 address = pvmw.address;1421141114121412+ if (PageHuge(page)) {14131413+ if (huge_pmd_unshare(mm, &address, pvmw.pte)) {14141414+ /*14151415+ * huge_pmd_unshare unmapped an entire PMD14161416+ * page. There is no way of knowing exactly14171417+ * which PMDs may be cached for this mm, so14181418+ * we must flush them all. start/end were14191419+ * already adjusted above to cover this range.14201420+ */14211421+ flush_cache_range(vma, start, end);14221422+ flush_tlb_range(vma, start, end);14231423+ mmu_notifier_invalidate_range(mm, start, end);14241424+14251425+ /*14261426+ * The ref count of the PMD page was dropped14271427+ * which is part of the way map counting14281428+ * is done for shared PMDs. Return 'true'14291429+ * here. When there is no other sharing,14301430+ * huge_pmd_unshare returns false and we will14311431+ * unmap the actual page and drop map count14321432+ * to zero.14331433+ */14341434+ page_vma_mapped_walk_done(&pvmw);14351435+ break;14361436+ }14371437+ }1422143814231439 if (IS_ENABLED(CONFIG_MIGRATION) &&14241440 (flags & TTU_MIGRATION) &&
+3-4
mm/vmscan.c
···580580 struct mem_cgroup *memcg, int priority)581581{582582 struct memcg_shrinker_map *map;583583- unsigned long freed = 0;584584- int ret, i;583583+ unsigned long ret, freed = 0;584584+ int i;585585586586 if (!memcg_kmem_enabled() || !mem_cgroup_online(memcg))587587 return 0;···677677 struct mem_cgroup *memcg,678678 int priority)679679{680680+ unsigned long ret, freed = 0;680681 struct shrinker *shrinker;681681- unsigned long freed = 0;682682- int ret;683682684683 if (!mem_cgroup_is_root(memcg))685684 return shrink_slab_memcg(gfp_mask, nid, memcg, priority);
···5959 req.is_set = is_set;6060 req.pid = current->pid;6161 req.cmd = optname;6262- req.addr = (long)optval;6262+ req.addr = (long __force __user)optval;6363 req.len = optlen;6464 mutex_lock(&bpfilter_lock);6565 if (!info.pid)···9898 pr_info("Loaded bpfilter_umh pid %d\n", info.pid);9999100100 /* health check that usermode process started correctly */101101- if (__bpfilter_process_sockopt(NULL, 0, 0, 0, 0) != 0) {101101+ if (__bpfilter_process_sockopt(NULL, 0, NULL, 0, 0) != 0) {102102 stop_umh();103103 return -EFAULT;104104 }
+20-9
net/core/rtnetlink.c
···37633763 int err = 0;37643764 int fidx = 0;3765376537663766- err = nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb,37673767- IFLA_MAX, ifla_policy, NULL);37683768- if (err < 0) {37693769- return -EINVAL;37703770- } else if (err == 0) {37713771- if (tb[IFLA_MASTER])37723772- br_idx = nla_get_u32(tb[IFLA_MASTER]);37733773- }37663766+ /* A hack to preserve kernel<->userspace interface.37673767+ * Before Linux v4.12 this code accepted ndmsg since iproute2 v3.3.0.37683768+ * However, ndmsg is shorter than ifinfomsg thus nlmsg_parse() bails.37693769+ * So, check for ndmsg with an optional u32 attribute (not used here).37703770+ * Fortunately these sizes don't conflict with the size of ifinfomsg37713771+ * with an optional attribute.37723772+ */37733773+ if (nlmsg_len(cb->nlh) != sizeof(struct ndmsg) &&37743774+ (nlmsg_len(cb->nlh) != sizeof(struct ndmsg) +37753775+ nla_attr_size(sizeof(u32)))) {37763776+ err = nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb,37773777+ IFLA_MAX, ifla_policy, NULL);37783778+ if (err < 0) {37793779+ return -EINVAL;37803780+ } else if (err == 0) {37813781+ if (tb[IFLA_MASTER])37823782+ br_idx = nla_get_u32(tb[IFLA_MASTER]);37833783+ }3774378437753775- brport_idx = ifm->ifi_index;37853785+ brport_idx = ifm->ifi_index;37863786+ }3776378737773788 if (br_idx) {37783789 br_dev = __dev_get_by_index(net, br_idx);
+20-9
net/ipv6/raw.c
···651651 skb->priority = sk->sk_priority;652652 skb->mark = sk->sk_mark;653653 skb->tstamp = sockc->transmit_time;654654- skb_dst_set(skb, &rt->dst);655655- *dstp = NULL;656654657655 skb_put(skb, length);658656 skb_reset_network_header(skb);···663665664666 skb->transport_header = skb->network_header;665667 err = memcpy_from_msg(iph, msg, length);666666- if (err)667667- goto error_fault;668668+ if (err) {669669+ err = -EFAULT;670670+ kfree_skb(skb);671671+ goto error;672672+ }673673+674674+ skb_dst_set(skb, &rt->dst);675675+ *dstp = NULL;668676669677 /* if egress device is enslaved to an L3 master device pass the670678 * skb to its handler for processing···679675 if (unlikely(!skb))680676 return 0;681677678678+ /* Acquire rcu_read_lock() in case we need to use rt->rt6i_idev679679+ * in the error path. Since skb has been freed, the dst could680680+ * have been queued for deletion.681681+ */682682+ rcu_read_lock();682683 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);683684 err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,684685 NULL, rt->dst.dev, dst_output);685686 if (err > 0)686687 err = net_xmit_errno(err);687687- if (err)688688- goto error;688688+ if (err) {689689+ IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);690690+ rcu_read_unlock();691691+ goto error_check;692692+ }693693+ rcu_read_unlock();689694out:690695 return 0;691696692692-error_fault:693693- err = -EFAULT;694694- kfree_skb(skb);695697error:696698 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);699699+error_check:697700 if (err == -ENOBUFS && !np->recverr)698701 err = 0;699702 return err;
+1-1
net/mac80211/cfg.c
···425425 case NL80211_IFTYPE_AP:426426 case NL80211_IFTYPE_AP_VLAN:427427 /* Keys without a station are used for TX only */428428- if (key->sta && test_sta_flag(key->sta, WLAN_STA_MFP))428428+ if (sta && test_sta_flag(sta, WLAN_STA_MFP))429429 key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT;430430 break;431431 case NL80211_IFTYPE_ADHOC: