···526526 /sys/devices/system/cpu/vulnerabilities/srbds527527 /sys/devices/system/cpu/vulnerabilities/tsx_async_abort528528 /sys/devices/system/cpu/vulnerabilities/itlb_multihit529529+ /sys/devices/system/cpu/vulnerabilities/mmio_stale_data529530Date: January 2018530531Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>531532Description: Information about CPU vulnerabilities
···11+=========================================22+Processor MMIO Stale Data Vulnerabilities33+=========================================44+55+Processor MMIO Stale Data Vulnerabilities are a class of memory-mapped I/O66+(MMIO) vulnerabilities that can expose data. The sequences of operations for77+exposing data range from simple to very complex. Because most of the88+vulnerabilities require the attacker to have access to MMIO, many environments99+are not affected. System environments using virtualization where MMIO access is1010+provided to untrusted guests may need mitigation. These vulnerabilities are1111+not transient execution attacks. However, these vulnerabilities may propagate1212+stale data into core fill buffers where the data can subsequently be inferred1313+by an unmitigated transient execution attack. Mitigation for these1414+vulnerabilities includes a combination of microcode update and software1515+changes, depending on the platform and usage model. Some of these mitigations1616+are similar to those used to mitigate Microarchitectural Data Sampling (MDS) or1717+those used to mitigate Special Register Buffer Data Sampling (SRBDS).1818+1919+Data Propagators2020+================2121+Propagators are operations that result in stale data being copied or moved from2222+one microarchitectural buffer or register to another. Processor MMIO Stale Data2323+Vulnerabilities are operations that may result in stale data being directly2424+read into an architectural, software-visible state or sampled from a buffer or2525+register.2626+2727+Fill Buffer Stale Data Propagator (FBSDP)2828+-----------------------------------------2929+Stale data may propagate from fill buffers (FB) into the non-coherent portion3030+of the uncore on some non-coherent writes. Fill buffer propagation by itself3131+does not make stale data architecturally visible. Stale data must be propagated3232+to a location where it is subject to reading or sampling.3333+3434+Sideband Stale Data Propagator (SSDP)3535+-------------------------------------3636+The sideband stale data propagator (SSDP) is limited to the client (including3737+Intel Xeon server E3) uncore implementation. The sideband response buffer is3838+shared by all client cores. For non-coherent reads that go to sideband3939+destinations, the uncore logic returns 64 bytes of data to the core, including4040+both requested data and unrequested stale data, from a transaction buffer and4141+the sideband response buffer. As a result, stale data from the sideband4242+response and transaction buffers may now reside in a core fill buffer.4343+4444+Primary Stale Data Propagator (PSDP)4545+------------------------------------4646+The primary stale data propagator (PSDP) is limited to the client (including4747+Intel Xeon server E3) uncore implementation. Similar to the sideband response4848+buffer, the primary response buffer is shared by all client cores. For some4949+processors, MMIO primary reads will return 64 bytes of data to the core fill5050+buffer including both requested data and unrequested stale data. This is5151+similar to the sideband stale data propagator.5252+5353+Vulnerabilities5454+===============5555+Device Register Partial Write (DRPW) (CVE-2022-21166)5656+-----------------------------------------------------5757+Some endpoint MMIO registers incorrectly handle writes that are smaller than5858+the register size. Instead of aborting the write or only copying the correct5959+subset of bytes (for example, 2 bytes for a 2-byte write), more bytes than6060+specified by the write transaction may be written to the register. On6161+processors affected by FBSDP, this may expose stale data from the fill buffers6262+of the core that created the write transaction.6363+6464+Shared Buffers Data Sampling (SBDS) (CVE-2022-21125)6565+----------------------------------------------------6666+After propagators may have moved data around the uncore and copied stale data6767+into client core fill buffers, processors affected by MFBDS can leak data from6868+the fill buffer. It is limited to the client (including Intel Xeon server E3)6969+uncore implementation.7070+7171+Shared Buffers Data Read (SBDR) (CVE-2022-21123)7272+------------------------------------------------7373+It is similar to Shared Buffer Data Sampling (SBDS) except that the data is7474+directly read into the architectural software-visible state. It is limited to7575+the client (including Intel Xeon server E3) uncore implementation.7676+7777+Affected Processors7878+===================7979+Not all the CPUs are affected by all the variants. For instance, most8080+processors for the server market (excluding Intel Xeon E3 processors) are8181+impacted by only Device Register Partial Write (DRPW).8282+8383+Below is the list of affected Intel processors [#f1]_:8484+8585+ =================== ============ =========8686+ Common name Family_Model Steppings8787+ =================== ============ =========8888+ HASWELL_X 06_3FH 2,48989+ SKYLAKE_L 06_4EH 39090+ BROADWELL_X 06_4FH All9191+ SKYLAKE_X 06_55H 3,4,6,7,119292+ BROADWELL_D 06_56H 3,4,59393+ SKYLAKE 06_5EH 39494+ ICELAKE_X 06_6AH 4,5,69595+ ICELAKE_D 06_6CH 19696+ ICELAKE_L 06_7EH 59797+ ATOM_TREMONT_D 06_86H All9898+ LAKEFIELD 06_8AH 19999+ KABYLAKE_L 06_8EH 9 to 12100100+ ATOM_TREMONT 06_96H 1101101+ ATOM_TREMONT_L 06_9CH 0102102+ KABYLAKE 06_9EH 9 to 13103103+ COMETLAKE 06_A5H 2,3,5104104+ COMETLAKE_L 06_A6H 0,1105105+ ROCKETLAKE 06_A7H 1106106+ =================== ============ =========107107+108108+If a CPU is in the affected processor list, but not affected by a variant, it109109+is indicated by new bits in MSR IA32_ARCH_CAPABILITIES. As described in a later110110+section, mitigation largely remains the same for all the variants, i.e. to111111+clear the CPU fill buffers via VERW instruction.112112+113113+New bits in MSRs114114+================115115+Newer processors and microcode update on existing affected processors added new116116+bits to IA32_ARCH_CAPABILITIES MSR. These bits can be used to enumerate117117+specific variants of Processor MMIO Stale Data vulnerabilities and mitigation118118+capability.119119+120120+MSR IA32_ARCH_CAPABILITIES121121+--------------------------122122+Bit 13 - SBDR_SSDP_NO - When set, processor is not affected by either the123123+ Shared Buffers Data Read (SBDR) vulnerability or the sideband stale124124+ data propagator (SSDP).125125+Bit 14 - FBSDP_NO - When set, processor is not affected by the Fill Buffer126126+ Stale Data Propagator (FBSDP).127127+Bit 15 - PSDP_NO - When set, processor is not affected by Primary Stale Data128128+ Propagator (PSDP).129129+Bit 17 - FB_CLEAR - When set, VERW instruction will overwrite CPU fill buffer130130+ values as part of MD_CLEAR operations. Processors that do not131131+ enumerate MDS_NO (meaning they are affected by MDS) but that do132132+ enumerate support for both L1D_FLUSH and MD_CLEAR implicitly enumerate133133+ FB_CLEAR as part of their MD_CLEAR support.134134+Bit 18 - FB_CLEAR_CTRL - Processor supports read and write to MSR135135+ IA32_MCU_OPT_CTRL[FB_CLEAR_DIS]. On such processors, the FB_CLEAR_DIS136136+ bit can be set to cause the VERW instruction to not perform the137137+ FB_CLEAR action. Not all processors that support FB_CLEAR will support138138+ FB_CLEAR_CTRL.139139+140140+MSR IA32_MCU_OPT_CTRL141141+---------------------142142+Bit 3 - FB_CLEAR_DIS - When set, VERW instruction does not perform the FB_CLEAR143143+action. This may be useful to reduce the performance impact of FB_CLEAR in144144+cases where system software deems it warranted (for example, when performance145145+is more critical, or the untrusted software has no MMIO access). Note that146146+FB_CLEAR_DIS has no impact on enumeration (for example, it does not change147147+FB_CLEAR or MD_CLEAR enumeration) and it may not be supported on all processors148148+that enumerate FB_CLEAR.149149+150150+Mitigation151151+==========152152+Like MDS, all variants of Processor MMIO Stale Data vulnerabilities have the153153+same mitigation strategy to force the CPU to clear the affected buffers before154154+an attacker can extract the secrets.155155+156156+This is achieved by using the otherwise unused and obsolete VERW instruction in157157+combination with a microcode update. The microcode clears the affected CPU158158+buffers when the VERW instruction is executed.159159+160160+Kernel reuses the MDS function to invoke the buffer clearing:161161+162162+ mds_clear_cpu_buffers()163163+164164+On MDS affected CPUs, the kernel already invokes CPU buffer clear on165165+kernel/userspace, hypervisor/guest and C-state (idle) transitions. No166166+additional mitigation is needed on such CPUs.167167+168168+For CPUs not affected by MDS or TAA, mitigation is needed only for the attacker169169+with MMIO capability. Therefore, VERW is not required for kernel/userspace. For170170+virtualization case, VERW is only needed at VMENTER for a guest with MMIO171171+capability.172172+173173+Mitigation points174174+-----------------175175+Return to user space176176+^^^^^^^^^^^^^^^^^^^^177177+Same mitigation as MDS when affected by MDS/TAA, otherwise no mitigation178178+needed.179179+180180+C-State transition181181+^^^^^^^^^^^^^^^^^^182182+Control register writes by CPU during C-state transition can propagate data183183+from fill buffer to uncore buffers. Execute VERW before C-state transition to184184+clear CPU fill buffers.185185+186186+Guest entry point187187+^^^^^^^^^^^^^^^^^188188+Same mitigation as MDS when processor is also affected by MDS/TAA, otherwise189189+execute VERW at VMENTER only for MMIO capable guests. On CPUs not affected by190190+MDS/TAA, guest without MMIO access cannot extract secrets using Processor MMIO191191+Stale Data vulnerabilities, so there is no need to execute VERW for such guests.192192+193193+Mitigation control on the kernel command line194194+---------------------------------------------195195+The kernel command line allows to control the Processor MMIO Stale Data196196+mitigations at boot time with the option "mmio_stale_data=". The valid197197+arguments for this option are:198198+199199+ ========== =================================================================200200+ full If the CPU is vulnerable, enable mitigation; CPU buffer clearing201201+ on exit to userspace and when entering a VM. Idle transitions are202202+ protected as well. It does not automatically disable SMT.203203+ full,nosmt Same as full, with SMT disabled on vulnerable CPUs. This is the204204+ complete mitigation.205205+ off Disables mitigation completely.206206+ ========== =================================================================207207+208208+If the CPU is affected and mmio_stale_data=off is not supplied on the kernel209209+command line, then the kernel selects the appropriate mitigation.210210+211211+Mitigation status information212212+-----------------------------213213+The Linux kernel provides a sysfs interface to enumerate the current214214+vulnerability status of the system: whether the system is vulnerable, and215215+which mitigations are active. The relevant sysfs file is:216216+217217+ /sys/devices/system/cpu/vulnerabilities/mmio_stale_data218218+219219+The possible values in this file are:220220+221221+ .. list-table::222222+223223+ * - 'Not affected'224224+ - The processor is not vulnerable225225+ * - 'Vulnerable'226226+ - The processor is vulnerable, but no mitigation enabled227227+ * - 'Vulnerable: Clear CPU buffers attempted, no microcode'228228+ - The processor is vulnerable, but microcode is not updated. The229229+ mitigation is enabled on a best effort basis.230230+ * - 'Mitigation: Clear CPU buffers'231231+ - The processor is vulnerable and the CPU buffer clearing mitigation is232232+ enabled.233233+234234+If the processor is vulnerable then the following information is appended to235235+the above information:236236+237237+ ======================== ===========================================238238+ 'SMT vulnerable' SMT is enabled239239+ 'SMT disabled' SMT is disabled240240+ 'SMT Host state unknown' Kernel runs in a VM, Host SMT state unknown241241+ ======================== ===========================================242242+243243+References244244+----------245245+.. [#f1] Affected Processors246246+ https://www.intel.com/content/www/us/en/developer/topic-technology/software-security-guidance/processors-affected-consolidated-product-cpu-model.html
+36-1
Documentation/admin-guide/kernel-parameters.txt
···2469246924702470 protected: nVHE-based mode with support for guests whose24712471 state is kept private from the host.24722472- Not valid if the kernel is running in EL2.2473247224742473 Defaults to VHE/nVHE based on hardware support. Setting24752474 mode to "protected" will disable kexec and hibernation···31753176 srbds=off [X86,INTEL]31763177 no_entry_flush [PPC]31773178 no_uaccess_flush [PPC]31793179+ mmio_stale_data=off [X86]3178318031793181 Exceptions:31803182 This does not have any effect on···31973197 Equivalent to: l1tf=flush,nosmt [X86]31983198 mds=full,nosmt [X86]31993199 tsx_async_abort=full,nosmt [X86]32003200+ mmio_stale_data=full,nosmt [X86]3200320132013202 mminit_loglevel=32023203 [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this···32063205 of 0 disables mminit logging and a level of 4 will32073206 log everything. Information is printed at KERN_DEBUG32083207 so loglevel=8 may also need to be specified.32083208+32093209+ mmio_stale_data=32103210+ [X86,INTEL] Control mitigation for the Processor32113211+ MMIO Stale Data vulnerabilities.32123212+32133213+ Processor MMIO Stale Data is a class of32143214+ vulnerabilities that may expose data after an MMIO32153215+ operation. Exposed data could originate or end in32163216+ the same CPU buffers as affected by MDS and TAA.32173217+ Therefore, similar to MDS and TAA, the mitigation32183218+ is to clear the affected CPU buffers.32193219+32203220+ This parameter controls the mitigation. The32213221+ options are:32223222+32233223+ full - Enable mitigation on vulnerable CPUs32243224+32253225+ full,nosmt - Enable mitigation and disable SMT on32263226+ vulnerable CPUs.32273227+32283228+ off - Unconditionally disable mitigation32293229+32303230+ On MDS or TAA affected machines,32313231+ mmio_stale_data=off can be prevented by an active32323232+ MDS or TAA mitigation as these vulnerabilities are32333233+ mitigated with the same mechanism so in order to32343234+ disable this mitigation, you need to specify32353235+ mds=off and tsx_async_abort=off too.32363236+32373237+ Not specifying this option is equivalent to32383238+ mmio_stale_data=full.32393239+32403240+ For details see:32413241+ Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst3209324232103243 module.sig_enforce32113244 [KNL] When CONFIG_MODULE_SIG is set, this means that
+17-16
Documentation/filesystems/netfs_library.rst
···7979provided. Firstly, a function to perform basic initialisation on a context and8080set the operations table pointer::81818282- void netfs_inode_init(struct inode *inode,8282+ void netfs_inode_init(struct netfs_inode *ctx,8383 const struct netfs_request_ops *ops);84848585then a function to cast from the VFS inode structure to the netfs context::···8989and finally, a function to get the cache cookie pointer from the context9090attached to an inode (or NULL if fscache is disabled)::91919292- struct fscache_cookie *netfs_i_cookie(struct inode *inode);9292+ struct fscache_cookie *netfs_i_cookie(struct netfs_inode *ctx);939394949595Buffered Read Helpers···136136137137 void netfs_readahead(struct readahead_control *ractl);138138 int netfs_read_folio(struct file *file,139139- struct folio *folio);140140- int netfs_write_begin(struct file *file,139139+ struct folio *folio);140140+ int netfs_write_begin(struct netfs_inode *ctx,141141+ struct file *file,141142 struct address_space *mapping,142143 loff_t pos,143144 unsigned int len,···158157through the suppplied table of operations. Waits will be performed as159158necessary before returning for helpers that are meant to be synchronous.160159161161-If an error occurs and netfs_priv is non-NULL, ops->cleanup() will be called to162162-deal with it. If some parts of the request are in progress when an error163163-occurs, the request will get partially completed if sufficient data is read.160160+If an error occurs, the ->free_request() will be called to clean up the161161+netfs_io_request struct allocated. If some parts of the request are in162162+progress when an error occurs, the request will get partially completed if163163+sufficient data is read.164164165165Additionally, there is::166166···209207 * ``netfs_priv``210208211209 The network filesystem's private data. The value for this can be passed in212212- to the helper functions or set during the request. The ->cleanup() op will213213- be called if this is non-NULL at the end.210210+ to the helper functions or set during the request.214211215212 * ``start``216213 * ``len``···294293295294 struct netfs_request_ops {296295 void (*init_request)(struct netfs_io_request *rreq, struct file *file);296296+ void (*free_request)(struct netfs_io_request *rreq);297297 int (*begin_cache_operation)(struct netfs_io_request *rreq);298298 void (*expand_readahead)(struct netfs_io_request *rreq);299299 bool (*clamp_length)(struct netfs_io_subrequest *subreq);···303301 int (*check_write_begin)(struct file *file, loff_t pos, unsigned len,304302 struct folio *folio, void **_fsdata);305303 void (*done)(struct netfs_io_request *rreq);306306- void (*cleanup)(struct address_space *mapping, void *netfs_priv);307304 };308305309306The operations are as follows:···310309 * ``init_request()``311310312311 [Optional] This is called to initialise the request structure. It is given313313- the file for reference and can modify the ->netfs_priv value.312312+ the file for reference.313313+314314+ * ``free_request()``315315+316316+ [Optional] This is called as the request is being deallocated so that the317317+ filesystem can clean up any state it has attached there.314318315319 * ``begin_cache_operation()``316320···388382389383 [Optional] This is called after the folios in the request have all been390384 unlocked (and marked uptodate if applicable).391391-392392- * ``cleanup``393393-394394- [Optional] This is called as the request is being deallocated so that the395395- filesystem can clean up ->netfs_priv.396385397386398387
+37
Documentation/networking/ip-sysctl.rst
···2925292529262926 Default: 02927292729282928+reconf_enable - BOOLEAN29292929+ Enable or disable extension of Stream Reconfiguration functionality29302930+ specified in RFC6525. This extension provides the ability to "reset"29312931+ a stream, and it includes the Parameters of "Outgoing/Incoming SSN29322932+ Reset", "SSN/TSN Reset" and "Add Outgoing/Incoming Streams".29332933+29342934+ - 1: Enable extension.29352935+ - 0: Disable extension.29362936+29372937+ Default: 029382938+29392939+intl_enable - BOOLEAN29402940+ Enable or disable extension of User Message Interleaving functionality29412941+ specified in RFC8260. This extension allows the interleaving of user29422942+ messages sent on different streams. With this feature enabled, I-DATA29432943+ chunk will replace DATA chunk to carry user messages if also supported29442944+ by the peer. Note that to use this feature, one needs to set this option29452945+ to 1 and also needs to set socket options SCTP_FRAGMENT_INTERLEAVE to 229462946+ and SCTP_INTERLEAVING_SUPPORTED to 1.29472947+29482948+ - 1: Enable extension.29492949+ - 0: Disable extension.29502950+29512951+ Default: 029522952+29532953+ecn_enable - BOOLEAN29542954+ Control use of Explicit Congestion Notification (ECN) by SCTP.29552955+ Like in TCP, ECN is used only when both ends of the SCTP connection29562956+ indicate support for it. This feature is useful in avoiding losses29572957+ due to congestion by allowing supporting routers to signal congestion29582958+ before having to drop packets.29592959+29602960+ 1: Enable ecn.29612961+ 0: Disable ecn.29622962+29632963+ Default: 129642964+2928296529292966``/proc/sys/net/core/*``29302967========================
+1-1
Documentation/networking/phy.rst
···104104105105* PHY device drivers in PHYLIB being reusable by nature, being able to106106 configure correctly a specified delay enables more designs with similar delay107107- requirements to be operate correctly107107+ requirements to be operated correctly108108109109For cases where the PHY is not capable of providing this delay, but the110110Ethernet MAC driver is capable of doing so, the correct phy_interface_t value
+12
Documentation/process/changes.rst
···3232GNU C 5.1 gcc --version3333Clang/LLVM (optional) 11.0.0 clang --version3434GNU make 3.81 make --version3535+bash 4.2 bash --version3536binutils 2.23 ld -v3637flex 2.5.35 flex --version3738bison 2.0 bison --version···8483----85848685You will need GNU make 3.81 or later to build the kernel.8686+8787+Bash8888+----8989+9090+Some bash scripts are used for the kernel build.9191+Bash 4.2 or newer is needed.87928893Binutils8994--------···368361----369362370363- <ftp://ftp.gnu.org/gnu/make/>364364+365365+Bash366366+----367367+368368+- <ftp://ftp.gnu.org/gnu/bash/>371369372370Binutils373371--------
+3-1
MAINTAINERS
···7653765376547654FILE LOCKING (flock() and fcntl()/lockf())76557655M: Jeff Layton <jlayton@kernel.org>76567656+M: Chuck Lever <chuck.lever@oracle.com>76567657L: linux-fsdevel@vger.kernel.org76577658S: Maintained76587659F: fs/fcntl.c···10746107451074710746KERNEL NFSD, SUNRPC, AND LOCKD SERVERS1074810747M: Chuck Lever <chuck.lever@oracle.com>1074810748+M: Jeff Layton <jlayton@kernel.org>1074910749L: linux-nfs@vger.kernel.org1075010750S: Supported1075110751W: http://nfs.sourceforge.net/···1087110869F: arch/riscv/include/uapi/asm/kvm*1087210870F: arch/riscv/kvm/1087310871F: tools/testing/selftests/kvm/*/riscv/1087410874-F: tools/testing/selftests/kvm/riscv/10875108721087610873KERNEL VIRTUAL MACHINE for s390 (KVM/s390)1087710874M: Christian Borntraeger <borntraeger@linux.ibm.com>···1379913798F: Documentation/devicetree/bindings/net/1380013799F: drivers/connector/1380113800F: drivers/net/1380113801+F: include/dt-bindings/net/1380213802F: include/linux/etherdevice.h1380313803F: include/linux/fcdevice.h1380413804F: include/linux/fddidevice.h
···363363 struct kvm_pmu pmu;364364365365 /*366366- * Anything that is not used directly from assembly code goes367367- * here.368368- */369369-370370- /*371366 * Guest registers we preserve during guest debugging.372367 *373368 * These shadow registers are updated by the kvm_handle_sys_reg
+3
arch/arm64/include/asm/virt.h
···113113 /*114114 * Code only run in VHE/NVHE hyp context can assume VHE is present or115115 * absent. Otherwise fall back to caps.116116+ * This allows the compiler to discard VHE-specific code from the117117+ * nVHE object, reducing the number of external symbol references118118+ * needed to link.116119 */117120 if (is_vhe_hyp_code())118121 return true;
+1-9
arch/arm64/kernel/cpufeature.c
···19741974#ifdef CONFIG_KVM19751975static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, int __unused)19761976{19771977- if (kvm_get_mode() != KVM_MODE_PROTECTED)19781978- return false;19791979-19801980- if (is_kernel_in_hyp_mode()) {19811981- pr_warn("Protected KVM not available with VHE\n");19821982- return false;19831983- }19841984-19851985- return true;19771977+ return kvm_get_mode() == KVM_MODE_PROTECTED;19861978}19871979#endif /* CONFIG_KVM */19881980
+3
arch/arm64/kvm/arch_timer.c
···12301230 struct kvm_vcpu *vcpu = kvm_get_running_vcpu();12311231 struct arch_timer_context *timer;1232123212331233+ if (WARN(!vcpu, "No vcpu context!\n"))12341234+ return false;12351235+12331236 if (vintid == vcpu_vtimer(vcpu)->irq.irq)12341237 timer = vcpu_vtimer(vcpu);12351238 else if (vintid == vcpu_ptimer(vcpu)->irq.irq)
+8-2
arch/arm64/kvm/arm.c
···150150 if (ret)151151 goto out_free_stage2_pgd;152152153153- if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL))153153+ if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL)) {154154+ ret = -ENOMEM;154155 goto out_free_stage2_pgd;156156+ }155157 cpumask_copy(kvm->arch.supported_cpus, cpu_possible_mask);156158157159 kvm_vgic_early_init(kvm);···22732271 return -EINVAL;2274227222752273 if (strcmp(arg, "protected") == 0) {22762276- kvm_mode = KVM_MODE_PROTECTED;22742274+ if (!is_kernel_in_hyp_mode())22752275+ kvm_mode = KVM_MODE_PROTECTED;22762276+ else22772277+ pr_warn_once("Protected KVM not available with VHE\n");22782278+22772279 return 0;22782280 }22792281
+2
arch/arm64/kvm/fpsimd.c
···8080 vcpu->arch.flags &= ~KVM_ARM64_FP_ENABLED;8181 vcpu->arch.flags |= KVM_ARM64_FP_HOST;82828383+ vcpu->arch.flags &= ~KVM_ARM64_HOST_SVE_ENABLED;8384 if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)8485 vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED;8586···9493 * operations. Do this for ZA as well for now for simplicity.9594 */9695 if (system_supports_sme()) {9696+ vcpu->arch.flags &= ~KVM_ARM64_HOST_SME_ENABLED;9797 if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN)9898 vcpu->arch.flags |= KVM_ARM64_HOST_SME_ENABLED;9999
···353353 return 0;354354}355355356356-static unsigned long vgic_v3_uaccess_read_pending(struct kvm_vcpu *vcpu,357357- gpa_t addr, unsigned int len)358358-{359359- u32 intid = VGIC_ADDR_TO_INTID(addr, 1);360360- u32 value = 0;361361- int i;362362-363363- /*364364- * pending state of interrupt is latched in pending_latch variable.365365- * Userspace will save and restore pending state and line_level366366- * separately.367367- * Refer to Documentation/virt/kvm/devices/arm-vgic-v3.rst368368- * for handling of ISPENDR and ICPENDR.369369- */370370- for (i = 0; i < len * 8; i++) {371371- struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);372372- bool state = irq->pending_latch;373373-374374- if (irq->hw && vgic_irq_is_sgi(irq->intid)) {375375- int err;376376-377377- err = irq_get_irqchip_state(irq->host_irq,378378- IRQCHIP_STATE_PENDING,379379- &state);380380- WARN_ON(err);381381- }382382-383383- if (state)384384- value |= (1U << i);385385-386386- vgic_put_irq(vcpu->kvm, irq);387387- }388388-389389- return value;390390-}391391-392356static int vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu,393357 gpa_t addr, unsigned int len,394358 unsigned long val)···630666 VGIC_ACCESS_32bit),631667 REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISPENDR,632668 vgic_mmio_read_pending, vgic_mmio_write_spending,633633- vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 1,669669+ vgic_uaccess_read_pending, vgic_v3_uaccess_write_pending, 1,634670 VGIC_ACCESS_32bit),635671 REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICPENDR,636672 vgic_mmio_read_pending, vgic_mmio_write_cpending,···714750 VGIC_ACCESS_32bit),715751 REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISPENDR0,716752 vgic_mmio_read_pending, vgic_mmio_write_spending,717717- vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4,753753+ vgic_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4,718754 VGIC_ACCESS_32bit),719755 REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICPENDR0,720756 vgic_mmio_read_pending, vgic_mmio_write_cpending,
+36-4
arch/arm64/kvm/vgic/vgic-mmio.c
···226226 return 0;227227}228228229229-unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,230230- gpa_t addr, unsigned int len)229229+static unsigned long __read_pending(struct kvm_vcpu *vcpu,230230+ gpa_t addr, unsigned int len,231231+ bool is_user)231232{232233 u32 intid = VGIC_ADDR_TO_INTID(addr, 1);233234 u32 value = 0;···240239 unsigned long flags;241240 bool val;242241242242+ /*243243+ * When used from userspace with a GICv3 model:244244+ *245245+ * Pending state of interrupt is latched in pending_latch246246+ * variable. Userspace will save and restore pending state247247+ * and line_level separately.248248+ * Refer to Documentation/virt/kvm/devices/arm-vgic-v3.rst249249+ * for handling of ISPENDR and ICPENDR.250250+ */243251 raw_spin_lock_irqsave(&irq->irq_lock, flags);244252 if (irq->hw && vgic_irq_is_sgi(irq->intid)) {245253 int err;···258248 IRQCHIP_STATE_PENDING,259249 &val);260250 WARN_RATELIMIT(err, "IRQ %d", irq->host_irq);261261- } else if (vgic_irq_is_mapped_level(irq)) {251251+ } else if (!is_user && vgic_irq_is_mapped_level(irq)) {262252 val = vgic_get_phys_line_level(irq);263253 } else {264264- val = irq_is_pending(irq);254254+ switch (vcpu->kvm->arch.vgic.vgic_model) {255255+ case KVM_DEV_TYPE_ARM_VGIC_V3:256256+ if (is_user) {257257+ val = irq->pending_latch;258258+ break;259259+ }260260+ fallthrough;261261+ default:262262+ val = irq_is_pending(irq);263263+ break;264264+ }265265 }266266267267 value |= ((u32)val << i);···281261 }282262283263 return value;264264+}265265+266266+unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,267267+ gpa_t addr, unsigned int len)268268+{269269+ return __read_pending(vcpu, addr, len, false);270270+}271271+272272+unsigned long vgic_uaccess_read_pending(struct kvm_vcpu *vcpu,273273+ gpa_t addr, unsigned int len)274274+{275275+ return __read_pending(vcpu, addr, len, true);284276}285277286278static bool is_vgic_v2_sgi(struct kvm_vcpu *vcpu, struct vgic_irq *irq)
+3
arch/arm64/kvm/vgic/vgic-mmio.h
···149149unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,150150 gpa_t addr, unsigned int len);151151152152+unsigned long vgic_uaccess_read_pending(struct kvm_vcpu *vcpu,153153+ gpa_t addr, unsigned int len);154154+152155void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,153156 gpa_t addr, unsigned int len,154157 unsigned long val);
+1-1
arch/arm64/kvm/vmid.c
···6666 * the next context-switch, we broadcast TLB flush + I-cache6767 * invalidation over the inner shareable domain on rollover.6868 */6969- kvm_call_hyp(__kvm_flush_vm_context);6969+ kvm_call_hyp(__kvm_flush_vm_context);7070}71717272static bool check_update_reserved_vmid(u64 vmid, u64 newvmid)
+1
arch/loongarch/Kconfig
···343343344344config NUMA345345 bool "NUMA Support"346346+ select SMP346347 select ACPI_NUMA if ACPI347348 help348349 Say Y to compile the kernel with NUMA (Non-Uniform Memory Access)
+1-1
arch/loongarch/include/asm/hardirq.h
···1919 unsigned int __softirq_pending;2020} ____cacheline_aligned irq_cpustat_t;21212222-DECLARE_PER_CPU_ALIGNED(irq_cpustat_t, irq_stat);2222+DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);23232424#define __ARCH_IRQ_STAT2525
+1
arch/loongarch/include/asm/percpu.h
···66#define __ASM_PERCPU_H7788#include <asm/cmpxchg.h>99+#include <asm/loongarch.h>9101011/* Use r21 for fast access */1112register unsigned long __my_cpu_offset __asm__("$r21");
+7-16
arch/loongarch/include/asm/smp.h
···99#include <linux/atomic.h>1010#include <linux/bitops.h>1111#include <linux/linkage.h>1212-#include <linux/smp.h>1312#include <linux/threads.h>1413#include <linux/cpumask.h>1414+1515+extern int smp_num_siblings;1616+extern int num_processors;1717+extern int disabled_cpus;1818+extern cpumask_t cpu_sibling_map[];1919+extern cpumask_t cpu_core_map[];2020+extern cpumask_t cpu_foreign_map[];15211622void loongson3_smp_setup(void);1723void loongson3_prepare_cpus(unsigned int max_cpus);···3125void loongson3_cpu_die(unsigned int cpu);3226#endif33273434-#ifdef CONFIG_SMP3535-3628static inline void plat_smp_setup(void)3729{3830 loongson3_smp_setup();3931}4040-4141-#else /* !CONFIG_SMP */4242-4343-static inline void plat_smp_setup(void) { }4444-4545-#endif /* !CONFIG_SMP */4646-4747-extern int smp_num_siblings;4848-extern int num_processors;4949-extern int disabled_cpus;5050-extern cpumask_t cpu_sibling_map[];5151-extern cpumask_t cpu_core_map[];5252-extern cpumask_t cpu_foreign_map[];53325433static inline int raw_smp_processor_id(void)5534{
-7
arch/loongarch/include/asm/timex.h
···1212#include <asm/cpu.h>1313#include <asm/cpu-features.h>14141515-/*1616- * Standard way to access the cycle counter.1717- * Currently only used on SMP for scheduling.1818- *1919- * We know that all SMP capable CPUs have cycle counters.2020- */2121-2215typedef unsigned long cycles_t;23162417#define get_cycles get_cycles
···44 *55 * Copyright (C) 2020-2022 Loongson Technology Corporation Limited66 */77+#include <asm/cpu-info.h>78#include <linux/cacheinfo.h>89910/* Populates leaf and increments to next leaf */
+6-1
arch/loongarch/kernel/irq.c
···2222#include <asm/setup.h>23232424DEFINE_PER_CPU(unsigned long, irq_stack);2525+DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);2626+EXPORT_PER_CPU_SYMBOL(irq_stat);25272628struct irq_domain *cpu_domain;2729struct irq_domain *liointc_domain;···58565957void __init init_IRQ(void)6058{6161- int i, r, ipi_irq;5959+ int i;6060+#ifdef CONFIG_SMP6161+ int r, ipi_irq;6262 static int ipi_dummy_dev;6363+#endif6364 unsigned int order = get_order(IRQ_STACK_SIZE);6465 struct page *page;6566
···9797 * We ran out of VMIDs so we increment vmid_version and9898 * start assigning VMIDs from 1.9999 *100100- * This also means existing VMIDs assignement to all Guest100100+ * This also means existing VMIDs assignment to all Guest101101 * instances is invalid and we have force VMID re-assignement102102 * for all Guest instances. The Guest instances that were not103103 * running will automatically pick-up new VMIDs because will
···446446#define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */447447#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */448448#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */449449+#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */449450450451#endif /* _ASM_X86_CPUFEATURES_H */
+68-5
arch/x86/include/asm/kvm_host.h
···10471047};1048104810491049enum kvm_apicv_inhibit {10501050+10511051+ /********************************************************************/10521052+ /* INHIBITs that are relevant to both Intel's APICv and AMD's AVIC. */10531053+ /********************************************************************/10541054+10551055+ /*10561056+ * APIC acceleration is disabled by a module parameter10571057+ * and/or not supported in hardware.10581058+ */10501059 APICV_INHIBIT_REASON_DISABLE,10601060+10611061+ /*10621062+ * APIC acceleration is inhibited because AutoEOI feature is10631063+ * being used by a HyperV guest.10641064+ */10511065 APICV_INHIBIT_REASON_HYPERV,10521052- APICV_INHIBIT_REASON_NESTED,10531053- APICV_INHIBIT_REASON_IRQWIN,10541054- APICV_INHIBIT_REASON_PIT_REINJ,10551055- APICV_INHIBIT_REASON_X2APIC,10561056- APICV_INHIBIT_REASON_BLOCKIRQ,10661066+10671067+ /*10681068+ * APIC acceleration is inhibited because the userspace didn't yet10691069+ * enable the kernel/split irqchip.10701070+ */10571071 APICV_INHIBIT_REASON_ABSENT,10721072+10731073+ /* APIC acceleration is inhibited because KVM_GUESTDBG_BLOCKIRQ10741074+ * (out of band, debug measure of blocking all interrupts on this vCPU)10751075+ * was enabled, to avoid AVIC/APICv bypassing it.10761076+ */10771077+ APICV_INHIBIT_REASON_BLOCKIRQ,10781078+10791079+ /*10801080+ * For simplicity, the APIC acceleration is inhibited10811081+ * first time either APIC ID or APIC base are changed by the guest10821082+ * from their reset values.10831083+ */10841084+ APICV_INHIBIT_REASON_APIC_ID_MODIFIED,10851085+ APICV_INHIBIT_REASON_APIC_BASE_MODIFIED,10861086+10871087+ /******************************************************/10881088+ /* INHIBITs that are relevant only to the AMD's AVIC. */10891089+ /******************************************************/10901090+10911091+ /*10921092+ * AVIC is inhibited on a vCPU because it runs a nested guest.10931093+ *10941094+ * This is needed because unlike APICv, the peers of this vCPU10951095+ * cannot use the doorbell mechanism to signal interrupts via AVIC when10961096+ * a vCPU runs nested.10971097+ */10981098+ APICV_INHIBIT_REASON_NESTED,10991099+11001100+ /*11011101+ * On SVM, the wait for the IRQ window is implemented with pending vIRQ,11021102+ * which cannot be injected when the AVIC is enabled, thus AVIC11031103+ * is inhibited while KVM waits for IRQ window.11041104+ */11051105+ APICV_INHIBIT_REASON_IRQWIN,11061106+11071107+ /*11081108+ * PIT (i8254) 're-inject' mode, relies on EOI intercept,11091109+ * which AVIC doesn't support for edge triggered interrupts.11101110+ */11111111+ APICV_INHIBIT_REASON_PIT_REINJ,11121112+11131113+ /*11141114+ * AVIC is inhibited because the guest has x2apic in its CPUID.11151115+ */11161116+ APICV_INHIBIT_REASON_X2APIC,11171117+11181118+ /*11191119+ * AVIC is disabled because SEV doesn't support it.11201120+ */10581121 APICV_INHIBIT_REASON_SEV,10591122};10601123
+25
arch/x86/include/asm/msr-index.h
···116116 * Not susceptible to117117 * TSX Async Abort (TAA) vulnerabilities.118118 */119119+#define ARCH_CAP_SBDR_SSDP_NO BIT(13) /*120120+ * Not susceptible to SBDR and SSDP121121+ * variants of Processor MMIO stale data122122+ * vulnerabilities.123123+ */124124+#define ARCH_CAP_FBSDP_NO BIT(14) /*125125+ * Not susceptible to FBSDP variant of126126+ * Processor MMIO stale data127127+ * vulnerabilities.128128+ */129129+#define ARCH_CAP_PSDP_NO BIT(15) /*130130+ * Not susceptible to PSDP variant of131131+ * Processor MMIO stale data132132+ * vulnerabilities.133133+ */134134+#define ARCH_CAP_FB_CLEAR BIT(17) /*135135+ * VERW clears CPU fill buffer136136+ * even on MDS_NO CPUs.137137+ */138138+#define ARCH_CAP_FB_CLEAR_CTRL BIT(18) /*139139+ * MSR_IA32_MCU_OPT_CTRL[FB_CLEAR_DIS]140140+ * bit available to control VERW141141+ * behavior.142142+ */119143120144#define MSR_IA32_FLUSH_CMD 0x0000010b121145#define L1D_FLUSH BIT(0) /*···157133#define MSR_IA32_MCU_OPT_CTRL 0x00000123158134#define RNGDS_MITG_DIS BIT(0) /* SRBDS support */159135#define RTM_ALLOW BIT(1) /* TSX development mode */136136+#define FB_CLEAR_DIS BIT(3) /* CPU Fill buffer clear disable */160137161138#define MSR_IA32_SYSENTER_CS 0x00000174162139#define MSR_IA32_SYSENTER_ESP 0x00000175
···4141static void __init ssb_select_mitigation(void);4242static void __init l1tf_select_mitigation(void);4343static void __init mds_select_mitigation(void);4444-static void __init mds_print_mitigation(void);4444+static void __init md_clear_update_mitigation(void);4545+static void __init md_clear_select_mitigation(void);4546static void __init taa_select_mitigation(void);4747+static void __init mmio_select_mitigation(void);4648static void __init srbds_select_mitigation(void);4749static void __init l1d_flush_select_mitigation(void);4850···8785 */8886DEFINE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);89878888+/* Controls CPU Fill buffer clear before KVM guest MMIO accesses */8989+DEFINE_STATIC_KEY_FALSE(mmio_stale_data_clear);9090+EXPORT_SYMBOL_GPL(mmio_stale_data_clear);9191+9092void __init check_bugs(void)9193{9294 identify_boot_cpu();···123117 spectre_v2_select_mitigation();124118 ssb_select_mitigation();125119 l1tf_select_mitigation();126126- mds_select_mitigation();127127- taa_select_mitigation();120120+ md_clear_select_mitigation();128121 srbds_select_mitigation();129122 l1d_flush_select_mitigation();130130-131131- /*132132- * As MDS and TAA mitigations are inter-related, print MDS133133- * mitigation until after TAA mitigation selection is done.134134- */135135- mds_print_mitigation();136123137124 arch_smt_update();138125···266267 }267268}268269269269-static void __init mds_print_mitigation(void)270270-{271271- if (!boot_cpu_has_bug(X86_BUG_MDS) || cpu_mitigations_off())272272- return;273273-274274- pr_info("%s\n", mds_strings[mds_mitigation]);275275-}276276-277270static int __init mds_cmdline(char *str)278271{279272 if (!boot_cpu_has_bug(X86_BUG_MDS))···320329 /* TSX previously disabled by tsx=off */321330 if (!boot_cpu_has(X86_FEATURE_RTM)) {322331 taa_mitigation = TAA_MITIGATION_TSX_DISABLED;323323- goto out;332332+ return;324333 }325334326335 if (cpu_mitigations_off()) {···334343 */335344 if (taa_mitigation == TAA_MITIGATION_OFF &&336345 mds_mitigation == MDS_MITIGATION_OFF)337337- goto out;346346+ return;338347339348 if (boot_cpu_has(X86_FEATURE_MD_CLEAR))340349 taa_mitigation = TAA_MITIGATION_VERW;···366375367376 if (taa_nosmt || cpu_mitigations_auto_nosmt())368377 cpu_smt_disable(false);369369-370370- /*371371- * Update MDS mitigation, if necessary, as the mds_user_clear is372372- * now enabled for TAA mitigation.373373- */374374- if (mds_mitigation == MDS_MITIGATION_OFF &&375375- boot_cpu_has_bug(X86_BUG_MDS)) {376376- mds_mitigation = MDS_MITIGATION_FULL;377377- mds_select_mitigation();378378- }379379-out:380380- pr_info("%s\n", taa_strings[taa_mitigation]);381378}382379383380static int __init tsx_async_abort_parse_cmdline(char *str)···388409 return 0;389410}390411early_param("tsx_async_abort", tsx_async_abort_parse_cmdline);412412+413413+#undef pr_fmt414414+#define pr_fmt(fmt) "MMIO Stale Data: " fmt415415+416416+enum mmio_mitigations {417417+ MMIO_MITIGATION_OFF,418418+ MMIO_MITIGATION_UCODE_NEEDED,419419+ MMIO_MITIGATION_VERW,420420+};421421+422422+/* Default mitigation for Processor MMIO Stale Data vulnerabilities */423423+static enum mmio_mitigations mmio_mitigation __ro_after_init = MMIO_MITIGATION_VERW;424424+static bool mmio_nosmt __ro_after_init = false;425425+426426+static const char * const mmio_strings[] = {427427+ [MMIO_MITIGATION_OFF] = "Vulnerable",428428+ [MMIO_MITIGATION_UCODE_NEEDED] = "Vulnerable: Clear CPU buffers attempted, no microcode",429429+ [MMIO_MITIGATION_VERW] = "Mitigation: Clear CPU buffers",430430+};431431+432432+static void __init mmio_select_mitigation(void)433433+{434434+ u64 ia32_cap;435435+436436+ if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) ||437437+ cpu_mitigations_off()) {438438+ mmio_mitigation = MMIO_MITIGATION_OFF;439439+ return;440440+ }441441+442442+ if (mmio_mitigation == MMIO_MITIGATION_OFF)443443+ return;444444+445445+ ia32_cap = x86_read_arch_cap_msr();446446+447447+ /*448448+ * Enable CPU buffer clear mitigation for host and VMM, if also affected449449+ * by MDS or TAA. Otherwise, enable mitigation for VMM only.450450+ */451451+ if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) &&452452+ boot_cpu_has(X86_FEATURE_RTM)))453453+ static_branch_enable(&mds_user_clear);454454+ else455455+ static_branch_enable(&mmio_stale_data_clear);456456+457457+ /*458458+ * If Processor-MMIO-Stale-Data bug is present and Fill Buffer data can459459+ * be propagated to uncore buffers, clearing the Fill buffers on idle460460+ * is required irrespective of SMT state.461461+ */462462+ if (!(ia32_cap & ARCH_CAP_FBSDP_NO))463463+ static_branch_enable(&mds_idle_clear);464464+465465+ /*466466+ * Check if the system has the right microcode.467467+ *468468+ * CPU Fill buffer clear mitigation is enumerated by either an explicit469469+ * FB_CLEAR or by the presence of both MD_CLEAR and L1D_FLUSH on MDS470470+ * affected systems.471471+ */472472+ if ((ia32_cap & ARCH_CAP_FB_CLEAR) ||473473+ (boot_cpu_has(X86_FEATURE_MD_CLEAR) &&474474+ boot_cpu_has(X86_FEATURE_FLUSH_L1D) &&475475+ !(ia32_cap & ARCH_CAP_MDS_NO)))476476+ mmio_mitigation = MMIO_MITIGATION_VERW;477477+ else478478+ mmio_mitigation = MMIO_MITIGATION_UCODE_NEEDED;479479+480480+ if (mmio_nosmt || cpu_mitigations_auto_nosmt())481481+ cpu_smt_disable(false);482482+}483483+484484+static int __init mmio_stale_data_parse_cmdline(char *str)485485+{486486+ if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA))487487+ return 0;488488+489489+ if (!str)490490+ return -EINVAL;491491+492492+ if (!strcmp(str, "off")) {493493+ mmio_mitigation = MMIO_MITIGATION_OFF;494494+ } else if (!strcmp(str, "full")) {495495+ mmio_mitigation = MMIO_MITIGATION_VERW;496496+ } else if (!strcmp(str, "full,nosmt")) {497497+ mmio_mitigation = MMIO_MITIGATION_VERW;498498+ mmio_nosmt = true;499499+ }500500+501501+ return 0;502502+}503503+early_param("mmio_stale_data", mmio_stale_data_parse_cmdline);504504+505505+#undef pr_fmt506506+#define pr_fmt(fmt) "" fmt507507+508508+static void __init md_clear_update_mitigation(void)509509+{510510+ if (cpu_mitigations_off())511511+ return;512512+513513+ if (!static_key_enabled(&mds_user_clear))514514+ goto out;515515+516516+ /*517517+ * mds_user_clear is now enabled. Update MDS, TAA and MMIO Stale Data518518+ * mitigation, if necessary.519519+ */520520+ if (mds_mitigation == MDS_MITIGATION_OFF &&521521+ boot_cpu_has_bug(X86_BUG_MDS)) {522522+ mds_mitigation = MDS_MITIGATION_FULL;523523+ mds_select_mitigation();524524+ }525525+ if (taa_mitigation == TAA_MITIGATION_OFF &&526526+ boot_cpu_has_bug(X86_BUG_TAA)) {527527+ taa_mitigation = TAA_MITIGATION_VERW;528528+ taa_select_mitigation();529529+ }530530+ if (mmio_mitigation == MMIO_MITIGATION_OFF &&531531+ boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) {532532+ mmio_mitigation = MMIO_MITIGATION_VERW;533533+ mmio_select_mitigation();534534+ }535535+out:536536+ if (boot_cpu_has_bug(X86_BUG_MDS))537537+ pr_info("MDS: %s\n", mds_strings[mds_mitigation]);538538+ if (boot_cpu_has_bug(X86_BUG_TAA))539539+ pr_info("TAA: %s\n", taa_strings[taa_mitigation]);540540+ if (boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA))541541+ pr_info("MMIO Stale Data: %s\n", mmio_strings[mmio_mitigation]);542542+}543543+544544+static void __init md_clear_select_mitigation(void)545545+{546546+ mds_select_mitigation();547547+ taa_select_mitigation();548548+ mmio_select_mitigation();549549+550550+ /*551551+ * As MDS, TAA and MMIO Stale Data mitigations are inter-related, update552552+ * and print their mitigation after MDS, TAA and MMIO Stale Data553553+ * mitigation selection is done.554554+ */555555+ md_clear_update_mitigation();556556+}391557392558#undef pr_fmt393559#define pr_fmt(fmt) "SRBDS: " fmt···602478 return;603479604480 /*605605- * Check to see if this is one of the MDS_NO systems supporting606606- * TSX that are only exposed to SRBDS when TSX is enabled.481481+ * Check to see if this is one of the MDS_NO systems supporting TSX that482482+ * are only exposed to SRBDS when TSX is enabled or when CPU is affected483483+ * by Processor MMIO Stale Data vulnerability.607484 */608485 ia32_cap = x86_read_arch_cap_msr();609609- if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM))486486+ if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) &&487487+ !boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA))610488 srbds_mitigation = SRBDS_MITIGATION_TSX_OFF;611489 else if (boot_cpu_has(X86_FEATURE_HYPERVISOR))612490 srbds_mitigation = SRBDS_MITIGATION_HYPERVISOR;···12421116/* Update the static key controlling the MDS CPU buffer clear in idle */12431117static void update_mds_branch_idle(void)12441118{11191119+ u64 ia32_cap = x86_read_arch_cap_msr();11201120+12451121 /*12461122 * Enable the idle clearing if SMT is active on CPUs which are12471123 * affected only by MSBDS and not any other MDS variant.···12551127 if (!boot_cpu_has_bug(X86_BUG_MSBDS_ONLY))12561128 return;1257112912581258- if (sched_smt_active())11301130+ if (sched_smt_active()) {12591131 static_branch_enable(&mds_idle_clear);12601260- else11321132+ } else if (mmio_mitigation == MMIO_MITIGATION_OFF ||11331133+ (ia32_cap & ARCH_CAP_FBSDP_NO)) {12611134 static_branch_disable(&mds_idle_clear);11351135+ }12621136}1263113712641138#define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n"12651139#define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n"11401140+#define MMIO_MSG_SMT "MMIO Stale Data CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/processor_mmio_stale_data.html for more details.\n"1266114112671142void cpu_bugs_smt_update(void)12681143{···13071176 break;13081177 case TAA_MITIGATION_TSX_DISABLED:13091178 case TAA_MITIGATION_OFF:11791179+ break;11801180+ }11811181+11821182+ switch (mmio_mitigation) {11831183+ case MMIO_MITIGATION_VERW:11841184+ case MMIO_MITIGATION_UCODE_NEEDED:11851185+ if (sched_smt_active())11861186+ pr_warn_once(MMIO_MSG_SMT);11871187+ break;11881188+ case MMIO_MITIGATION_OFF:13101189 break;13111190 }13121191···19221781 sched_smt_active() ? "vulnerable" : "disabled");19231782}1924178317841784+static ssize_t mmio_stale_data_show_state(char *buf)17851785+{17861786+ if (mmio_mitigation == MMIO_MITIGATION_OFF)17871787+ return sysfs_emit(buf, "%s\n", mmio_strings[mmio_mitigation]);17881788+17891789+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {17901790+ return sysfs_emit(buf, "%s; SMT Host state unknown\n",17911791+ mmio_strings[mmio_mitigation]);17921792+ }17931793+17941794+ return sysfs_emit(buf, "%s; SMT %s\n", mmio_strings[mmio_mitigation],17951795+ sched_smt_active() ? "vulnerable" : "disabled");17961796+}17971797+19251798static char *stibp_state(void)19261799{19271800 if (spectre_v2_in_eibrs_mode(spectre_v2_enabled))···20361881 case X86_BUG_SRBDS:20371882 return srbds_show_state(buf);2038188318841884+ case X86_BUG_MMIO_STALE_DATA:18851885+ return mmio_stale_data_show_state(buf);18861886+20391887 default:20401888 break;20411889 }···20891931ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char *buf)20901932{20911933 return cpu_show_common(dev, attr, buf, X86_BUG_SRBDS);19341934+}19351935+19361936+ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *attr, char *buf)19371937+{19381938+ return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA);20921939}20931940#endif
+49-3
arch/x86/kernel/cpu/common.c
···12111211 X86_FEATURE_ANY, issues)1212121212131213#define SRBDS BIT(0)12141214+/* CPU is affected by X86_BUG_MMIO_STALE_DATA */12151215+#define MMIO BIT(1)12161216+/* CPU is affected by Shared Buffers Data Sampling (SBDS), a variant of X86_BUG_MMIO_STALE_DATA */12171217+#define MMIO_SBDS BIT(2)1214121812151219static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {12161220 VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS),12171221 VULNBL_INTEL_STEPPINGS(HASWELL, X86_STEPPING_ANY, SRBDS),12181222 VULNBL_INTEL_STEPPINGS(HASWELL_L, X86_STEPPING_ANY, SRBDS),12191223 VULNBL_INTEL_STEPPINGS(HASWELL_G, X86_STEPPING_ANY, SRBDS),12241224+ VULNBL_INTEL_STEPPINGS(HASWELL_X, BIT(2) | BIT(4), MMIO),12251225+ VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x3, 0x5), MMIO),12201226 VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS),12271227+ VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO),12211228 VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS),12291229+ VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO),12221230 VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS),12311231+ VULNBL_INTEL_STEPPINGS(SKYLAKE_X, BIT(3) | BIT(4) | BIT(6) |12321232+ BIT(7) | BIT(0xB), MMIO),12331233+ VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO),12231234 VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS),12241224- VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0xC), SRBDS),12251225- VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0xD), SRBDS),12351235+ VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x9, 0xC), SRBDS | MMIO),12361236+ VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0x8), SRBDS),12371237+ VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x9, 0xD), SRBDS | MMIO),12381238+ VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0x8), SRBDS),12391239+ VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPINGS(0x5, 0x5), MMIO | MMIO_SBDS),12401240+ VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPINGS(0x1, 0x1), MMIO),12411241+ VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x4, 0x6), MMIO),12421242+ VULNBL_INTEL_STEPPINGS(COMETLAKE, BIT(2) | BIT(3) | BIT(5), MMIO | MMIO_SBDS),12431243+ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS),12441244+ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO),12451245+ VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS),12461246+ VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPINGS(0x1, 0x1), MMIO),12471247+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS),12481248+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO),12491249+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPINGS(0x0, 0x0), MMIO | MMIO_SBDS),12261250 {}12271251};12281252···12651241 rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);1266124212671243 return ia32_cap;12441244+}12451245+12461246+static bool arch_cap_mmio_immune(u64 ia32_cap)12471247+{12481248+ return (ia32_cap & ARCH_CAP_FBSDP_NO &&12491249+ ia32_cap & ARCH_CAP_PSDP_NO &&12501250+ ia32_cap & ARCH_CAP_SBDR_SSDP_NO);12681251}1269125212701253static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)···13271296 /*13281297 * SRBDS affects CPUs which support RDRAND or RDSEED and are listed13291298 * in the vulnerability blacklist.12991299+ *13001300+ * Some of the implications and mitigation of Shared Buffers Data13011301+ * Sampling (SBDS) are similar to SRBDS. Give SBDS same treatment as13021302+ * SRBDS.13301303 */13311304 if ((cpu_has(c, X86_FEATURE_RDRAND) ||13321305 cpu_has(c, X86_FEATURE_RDSEED)) &&13331333- cpu_matches(cpu_vuln_blacklist, SRBDS))13061306+ cpu_matches(cpu_vuln_blacklist, SRBDS | MMIO_SBDS))13341307 setup_force_cpu_bug(X86_BUG_SRBDS);13081308+13091309+ /*13101310+ * Processor MMIO Stale Data bug enumeration13111311+ *13121312+ * Affected CPU list is generally enough to enumerate the vulnerability,13131313+ * but for virtualization case check for ARCH_CAP MSR bits also, VMM may13141314+ * not want the guest to enumerate the bug.13151315+ */13161316+ if (cpu_matches(cpu_vuln_blacklist, MMIO) &&13171317+ !arch_cap_mmio_immune(ia32_cap))13181318+ setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);1335131913361320 if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))13371321 return;
+23-4
arch/x86/kvm/lapic.c
···20392039 }20402040}2041204120422042+static void kvm_lapic_xapic_id_updated(struct kvm_lapic *apic)20432043+{20442044+ struct kvm *kvm = apic->vcpu->kvm;20452045+20462046+ if (KVM_BUG_ON(apic_x2apic_mode(apic), kvm))20472047+ return;20482048+20492049+ if (kvm_xapic_id(apic) == apic->vcpu->vcpu_id)20502050+ return;20512051+20522052+ kvm_set_apicv_inhibit(apic->vcpu->kvm, APICV_INHIBIT_REASON_APIC_ID_MODIFIED);20532053+}20542054+20422055static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)20432056{20442057 int ret = 0;···2060204720612048 switch (reg) {20622049 case APIC_ID: /* Local APIC ID */20632063- if (!apic_x2apic_mode(apic))20502050+ if (!apic_x2apic_mode(apic)) {20642051 kvm_apic_set_xapic_id(apic, val >> 24);20652065- else20522052+ kvm_lapic_xapic_id_updated(apic);20532053+ } else {20662054 ret = 1;20552055+ }20672056 break;2068205720692058 case APIC_TASKPRI:···23512336 MSR_IA32_APICBASE_BASE;2352233723532338 if ((value & MSR_IA32_APICBASE_ENABLE) &&23542354- apic->base_address != APIC_DEFAULT_PHYS_BASE)23552355- pr_warn_once("APIC base relocation is unsupported by KVM");23392339+ apic->base_address != APIC_DEFAULT_PHYS_BASE) {23402340+ kvm_set_apicv_inhibit(apic->vcpu->kvm,23412341+ APICV_INHIBIT_REASON_APIC_BASE_MODIFIED);23422342+ }23562343}2357234423582345void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)···26652648 icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR);26662649 __kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32);26672650 }26512651+ } else {26522652+ kvm_lapic_xapic_id_updated(vcpu->arch.apic);26682653 }2669265426702655 return 0;
···17471747}17481748EXPORT_SYMBOL(bioset_init);1749174917501750-/*17511751- * Initialize and setup a new bio_set, based on the settings from17521752- * another bio_set.17531753- */17541754-int bioset_init_from_src(struct bio_set *bs, struct bio_set *src)17551755-{17561756- int flags;17571757-17581758- flags = 0;17591759- if (src->bvec_pool.min_nr)17601760- flags |= BIOSET_NEED_BVECS;17611761- if (src->rescue_workqueue)17621762- flags |= BIOSET_NEED_RESCUER;17631763- if (src->cache)17641764- flags |= BIOSET_PERCPU_CACHE;17651765-17661766- return bioset_init(bs, src->bio_pool.min_nr, src->front_pad, flags);17671767-}17681768-EXPORT_SYMBOL(bioset_init_from_src);17691769-17701750static int __init init_bio(void)17711751{17721752 int i;
···429429 driver include crash and makedumpfile.430430431431config RANDOM_TRUST_CPU432432- bool "Trust the CPU manufacturer to initialize Linux's CRNG"432432+ bool "Initialize RNG using CPU RNG instructions"433433+ default y433434 depends on ARCH_RANDOM434434- default n435435 help436436- Assume that CPU manufacturer (e.g., Intel or AMD for RDSEED or437437- RDRAND, IBM for the S390 and Power PC architectures) is trustworthy438438- for the purposes of initializing Linux's CRNG. Since this is not439439- something that can be independently audited, this amounts to trusting440440- that CPU manufacturer (perhaps with the insistence or mandate441441- of a Nation State's intelligence or law enforcement agencies)442442- has not installed a hidden back door to compromise the CPU's443443- random number generation facilities. This can also be configured444444- at boot with "random.trust_cpu=on/off".436436+ Initialize the RNG using random numbers supplied by the CPU's437437+ RNG instructions (e.g. RDRAND), if supported and available. These438438+ random numbers are never used directly, but are rather hashed into439439+ the main input pool, and this happens regardless of whether or not440440+ this option is enabled. Instead, this option controls whether the441441+ they are credited and hence can initialize the RNG. Additionally,442442+ other sources of randomness are always used, regardless of this443443+ setting. Enabling this implies trusting that the CPU can supply high444444+ quality and non-backdoored random numbers.445445+446446+ Say Y here unless you have reason to mistrust your CPU or believe447447+ its RNG facilities may be faulty. This may also be configured at448448+ boot time with "random.trust_cpu=on/off".445449446450config RANDOM_TRUST_BOOTLOADER447447- bool "Trust the bootloader to initialize Linux's CRNG"451451+ bool "Initialize RNG using bootloader-supplied seed"452452+ default y448453 help449449- Some bootloaders can provide entropy to increase the kernel's initial450450- device randomness. Say Y here to assume the entropy provided by the451451- booloader is trustworthy so it will be added to the kernel's entropy452452- pool. Otherwise, say N here so it will be regarded as device input that453453- only mixes the entropy pool. This can also be configured at boot with454454- "random.trust_bootloader=on/off".454454+ Initialize the RNG using a seed supplied by the bootloader or boot455455+ environment (e.g. EFI or a bootloader-generated device tree). This456456+ seed is not used directly, but is rather hashed into the main input457457+ pool, and this happens regardless of whether or not this option is458458+ enabled. Instead, this option controls whether the seed is credited459459+ and hence can initialize the RNG. Additionally, other sources of460460+ randomness are always used, regardless of this setting. Enabling461461+ this implies trusting that the bootloader can supply high quality and462462+ non-backdoored seeds.463463+464464+ Say Y here unless you have reason to mistrust your bootloader or465465+ believe its RNG facilities may be faulty. This may also be configured466466+ at boot time with "random.trust_bootloader=on/off".455467456468endmenu
···650650651651 if (orig < POOL_READY_BITS && new >= POOL_READY_BITS) {652652 crng_reseed(); /* Sets crng_init to CRNG_READY under base_crng.lock. */653653- execute_in_process_context(crng_set_ready, &set_ready);653653+ if (static_key_initialized)654654+ execute_in_process_context(crng_set_ready, &set_ready);654655 wake_up_interruptible(&crng_init_wait);655656 kill_fasync(&fasync, SIGIO, POLL_IN);656657 pr_notice("crng init done\n");···725724 *726725 **********************************************************************/727726728728-static bool used_arch_random;729729-static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU);730730-static bool trust_bootloader __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER);727727+static bool trust_cpu __initdata = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU);728728+static bool trust_bootloader __initdata = IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER);731729static int __init parse_trust_cpu(char *arg)732730{733731 return kstrtobool(arg, &trust_cpu);···776776int __init random_init(const char *command_line)777777{778778 ktime_t now = ktime_get_real();779779- unsigned int i, arch_bytes;779779+ unsigned int i, arch_bits;780780 unsigned long entropy;781781782782#if defined(LATENT_ENTROPY_PLUGIN)···784784 _mix_pool_bytes(compiletime_seed, sizeof(compiletime_seed));785785#endif786786787787- for (i = 0, arch_bytes = BLAKE2S_BLOCK_SIZE;787787+ for (i = 0, arch_bits = BLAKE2S_BLOCK_SIZE * 8;788788 i < BLAKE2S_BLOCK_SIZE; i += sizeof(entropy)) {789789 if (!arch_get_random_seed_long_early(&entropy) &&790790 !arch_get_random_long_early(&entropy)) {791791 entropy = random_get_entropy();792792- arch_bytes -= sizeof(entropy);792792+ arch_bits -= sizeof(entropy) * 8;793793 }794794 _mix_pool_bytes(&entropy, sizeof(entropy));795795 }···798798 _mix_pool_bytes(command_line, strlen(command_line));799799 add_latent_entropy();800800801801+ /*802802+ * If we were initialized by the bootloader before jump labels are803803+ * initialized, then we should enable the static branch here, where804804+ * it's guaranteed that jump labels have been initialized.805805+ */806806+ if (!static_branch_likely(&crng_is_ready) && crng_init >= CRNG_READY)807807+ crng_set_ready(NULL);808808+801809 if (crng_ready())802810 crng_reseed();803811 else if (trust_cpu)804804- credit_init_bits(arch_bytes * 8);805805- used_arch_random = arch_bytes * 8 >= POOL_READY_BITS;812812+ _credit_init_bits(arch_bits);806813807814 WARN_ON(register_pm_notifier(&pm_notifier));808815809816 WARN(!random_get_entropy(), "Missing cycle counter and fallback timer; RNG "810817 "entropy collection will consequently suffer.");811818 return 0;812812-}813813-814814-/*815815- * Returns whether arch randomness has been mixed into the initial816816- * state of the RNG, regardless of whether or not that randomness817817- * was credited. Knowing this is only good for a very limited set818818- * of uses, such as early init printk pointer obfuscation.819819- */820820-bool rng_has_arch_random(void)821821-{822822- return used_arch_random;823819}824820825821/*···861865 * Handle random seed passed by bootloader, and credit it if862866 * CONFIG_RANDOM_TRUST_BOOTLOADER is set.863867 */864864-void __cold add_bootloader_randomness(const void *buf, size_t len)868868+void __init add_bootloader_randomness(const void *buf, size_t len)865869{866870 mix_pool_bytes(buf, len);867871 if (trust_bootloader)868872 credit_init_bits(len * 8);869873}870870-EXPORT_SYMBOL_GPL(add_bootloader_randomness);871874872875#if IS_ENABLED(CONFIG_VMGENID)873876static BLOCKING_NOTIFIER_HEAD(vmfork_chain);
···136136 return latch;137137}138138139139-/*140140- * For mempools pre-allocation at the table loading time.141141- */142142-struct dm_md_mempools {143143- struct bio_set bs;144144- struct bio_set io_bs;145145-};146146-147139struct table_device {148140 struct list_head list;149141 refcount_t count;···573581 struct dm_target_io *tio;574582 struct bio *clone;575583576576- clone = bio_alloc_clone(NULL, bio, GFP_NOIO, &md->io_bs);584584+ clone = bio_alloc_clone(NULL, bio, GFP_NOIO, &md->mempools->io_bs);577585 /* Set default bdev, but target must bio_set_dev() before issuing IO */578586 clone->bi_bdev = md->disk->part0;579587···620628 } else {621629 struct mapped_device *md = ci->io->md;622630623623- clone = bio_alloc_clone(NULL, ci->bio, gfp_mask, &md->bs);631631+ clone = bio_alloc_clone(NULL, ci->bio, gfp_mask,632632+ &md->mempools->bs);624633 if (!clone)625634 return NULL;626635 /* Set default bdev, but target must bio_set_dev() before issuing IO */···10161023 struct dm_io *io = tio->io;10171024 struct mapped_device *md = io->md;1018102510191019- if (likely(bio->bi_bdev != md->disk->part0)) {10201020- struct request_queue *q = bdev_get_queue(bio->bi_bdev);10211021-10221022- if (unlikely(error == BLK_STS_TARGET)) {10231023- if (bio_op(bio) == REQ_OP_DISCARD &&10241024- !bdev_max_discard_sectors(bio->bi_bdev))10251025- disable_discard(md);10261026- else if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&10271027- !q->limits.max_write_zeroes_sectors)10281028- disable_write_zeroes(md);10291029- }10301030-10311031- if (static_branch_unlikely(&zoned_enabled) &&10321032- unlikely(blk_queue_is_zoned(q)))10331033- dm_zone_endio(io, bio);10261026+ if (unlikely(error == BLK_STS_TARGET)) {10271027+ if (bio_op(bio) == REQ_OP_DISCARD &&10281028+ !bdev_max_discard_sectors(bio->bi_bdev))10291029+ disable_discard(md);10301030+ else if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&10311031+ !bdev_write_zeroes_sectors(bio->bi_bdev))10321032+ disable_write_zeroes(md);10341033 }10341034+10351035+ if (static_branch_unlikely(&zoned_enabled) &&10361036+ unlikely(blk_queue_is_zoned(bdev_get_queue(bio->bi_bdev))))10371037+ dm_zone_endio(io, bio);1035103810361039 if (endio) {10371040 int r = endio(ti, bio, &error);···18651876{18661877 if (md->wq)18671878 destroy_workqueue(md->wq);18681868- bioset_exit(&md->bs);18691869- bioset_exit(&md->io_bs);18791879+ dm_free_md_mempools(md->mempools);1870188018711881 if (md->dax_dev) {18721882 dax_remove_host(md->disk);···20372049 kvfree(md);20382050}2039205120402040-static int __bind_mempools(struct mapped_device *md, struct dm_table *t)20412041-{20422042- struct dm_md_mempools *p = dm_table_get_md_mempools(t);20432043- int ret = 0;20442044-20452045- if (dm_table_bio_based(t)) {20462046- /*20472047- * The md may already have mempools that need changing.20482048- * If so, reload bioset because front_pad may have changed20492049- * because a different table was loaded.20502050- */20512051- bioset_exit(&md->bs);20522052- bioset_exit(&md->io_bs);20532053-20542054- } else if (bioset_initialized(&md->bs)) {20552055- /*20562056- * There's no need to reload with request-based dm20572057- * because the size of front_pad doesn't change.20582058- * Note for future: If you are to reload bioset,20592059- * prep-ed requests in the queue may refer20602060- * to bio from the old bioset, so you must walk20612061- * through the queue to unprep.20622062- */20632063- goto out;20642064- }20652065-20662066- BUG_ON(!p ||20672067- bioset_initialized(&md->bs) ||20682068- bioset_initialized(&md->io_bs));20692069-20702070- ret = bioset_init_from_src(&md->bs, &p->bs);20712071- if (ret)20722072- goto out;20732073- ret = bioset_init_from_src(&md->io_bs, &p->io_bs);20742074- if (ret)20752075- bioset_exit(&md->bs);20762076-out:20772077- /* mempool bind completed, no longer need any mempools in the table */20782078- dm_table_free_md_mempools(t);20792079- return ret;20802080-}20812081-20822052/*20832053 * Bind a table to the device.20842054 */···20902144 * immutable singletons - used to optimize dm_mq_queue_rq.20912145 */20922146 md->immutable_target = dm_table_get_immutable_target(t);20932093- }2094214720952095- ret = __bind_mempools(md, t);20962096- if (ret) {20972097- old_map = ERR_PTR(ret);20982098- goto out;21482148+ /*21492149+ * There is no need to reload with request-based dm because the21502150+ * size of front_pad doesn't change.21512151+ *21522152+ * Note for future: If you are to reload bioset, prep-ed21532153+ * requests in the queue may refer to bio from the old bioset,21542154+ * so you must walk through the queue to unprep.21552155+ */21562156+ if (!md->mempools) {21572157+ md->mempools = t->mempools;21582158+ t->mempools = NULL;21592159+ }21602160+ } else {21612161+ /*21622162+ * The md may already have mempools that need changing.21632163+ * If so, reload bioset because front_pad may have changed21642164+ * because a different table was loaded.21652165+ */21662166+ dm_free_md_mempools(md->mempools);21672167+ md->mempools = t->mempools;21682168+ t->mempools = NULL;20992169 }2100217021012171 ret = dm_table_set_restrictions(t, md->queue, limits);
···769769 u8 prio_tc[HNAE3_MAX_USER_PRIO]; /* TC indexed by prio */770770 u16 tqp_count[HNAE3_MAX_TC];771771 u16 tqp_offset[HNAE3_MAX_TC];772772+ u8 max_tc; /* Total number of TCs */772773 u8 num_tc; /* Total number of enabled TCs */773774 bool mqprio_active;774775};
···32683268static int hclge_update_port_info(struct hclge_dev *hdev)32693269{32703270 struct hclge_mac *mac = &hdev->hw.mac;32713271- int speed = HCLGE_MAC_SPEED_UNKNOWN;32713271+ int speed;32723272 int ret;3273327332743274 /* get the port info from SFP cmd if not copper port */···32793279 if (!hdev->support_sfp_query)32803280 return 0;3281328132823282- if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2)32823282+ if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) {32833283+ speed = mac->speed;32833284 ret = hclge_get_sfp_info(hdev, mac);32843284- else32853285+ } else {32863286+ speed = HCLGE_MAC_SPEED_UNKNOWN;32853287 ret = hclge_get_sfp_speed(hdev, &speed);32883288+ }3286328932873290 if (ret == -EOPNOTSUPP) {32883291 hdev->support_sfp_query = false;···32973294 if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) {32983295 if (mac->speed_type == QUERY_ACTIVE_SPEED) {32993296 hclge_update_port_capability(hdev, mac);32973297+ if (mac->speed != speed)32983298+ (void)hclge_tm_port_shaper_cfg(hdev);33003299 return 0;33013300 }33023301 return hclge_cfg_mac_speed_dup(hdev, mac->speed,···3380337533813376 link_state_old = vport->vf_info.link_state;33823377 vport->vf_info.link_state = link_state;33783378+33793379+ /* return success directly if the VF is unalive, VF will33803380+ * query link state itself when it starts work.33813381+ */33823382+ if (!test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state))33833383+ return 0;3383338433843385 ret = hclge_push_vf_link_status(vport);33853386 if (ret) {···1012810117 if (ret)1012910118 return ret;10130101191012010120+ vport->port_base_vlan_cfg.tbl_sta = false;1013110121 /* remove old VLAN tag */1013210122 if (old_info->vlan_tag == 0)1013310123 ret = hclge_set_vf_vlan_common(hdev, vport->vport_id,
···22822282 }2283228322842284 if (vf->adq_enabled) {22852285- for (i = 0; i < I40E_MAX_VF_VSI; i++)22852285+ for (i = 0; i < vf->num_tc; i++)22862286 num_qps_all += vf->ch[i].num_qps;22872287 if (num_qps_all != qci->num_queue_pairs) {22882288 aq_ret = I40E_ERR_PARAM;
···57635763ice_fix_features(struct net_device *netdev, netdev_features_t features)57645764{57655765 struct ice_netdev_priv *np = netdev_priv(netdev);57665766- netdev_features_t supported_vlan_filtering;57675767- netdev_features_t requested_vlan_filtering;57685768- struct ice_vsi *vsi = np->vsi;57665766+ netdev_features_t req_vlan_fltr, cur_vlan_fltr;57675767+ bool cur_ctag, cur_stag, req_ctag, req_stag;5769576857705770- requested_vlan_filtering = features & NETIF_VLAN_FILTERING_FEATURES;57695769+ cur_vlan_fltr = netdev->features & NETIF_VLAN_FILTERING_FEATURES;57705770+ cur_ctag = cur_vlan_fltr & NETIF_F_HW_VLAN_CTAG_FILTER;57715771+ cur_stag = cur_vlan_fltr & NETIF_F_HW_VLAN_STAG_FILTER;5771577257725772- /* make sure supported_vlan_filtering works for both SVM and DVM */57735773- supported_vlan_filtering = NETIF_F_HW_VLAN_CTAG_FILTER;57745774- if (ice_is_dvm_ena(&vsi->back->hw))57755775- supported_vlan_filtering |= NETIF_F_HW_VLAN_STAG_FILTER;57735773+ req_vlan_fltr = features & NETIF_VLAN_FILTERING_FEATURES;57745774+ req_ctag = req_vlan_fltr & NETIF_F_HW_VLAN_CTAG_FILTER;57755775+ req_stag = req_vlan_fltr & NETIF_F_HW_VLAN_STAG_FILTER;5776577657775777- if (requested_vlan_filtering &&57785778- requested_vlan_filtering != supported_vlan_filtering) {57795779- if (requested_vlan_filtering & NETIF_F_HW_VLAN_CTAG_FILTER) {57805780- netdev_warn(netdev, "cannot support requested VLAN filtering settings, enabling all supported VLAN filtering settings\n");57815781- features |= supported_vlan_filtering;57775777+ if (req_vlan_fltr != cur_vlan_fltr) {57785778+ if (ice_is_dvm_ena(&np->vsi->back->hw)) {57795779+ if (req_ctag && req_stag) {57805780+ features |= NETIF_VLAN_FILTERING_FEATURES;57815781+ } else if (!req_ctag && !req_stag) {57825782+ features &= ~NETIF_VLAN_FILTERING_FEATURES;57835783+ } else if ((!cur_ctag && req_ctag && !cur_stag) ||57845784+ (!cur_stag && req_stag && !cur_ctag)) {57855785+ features |= NETIF_VLAN_FILTERING_FEATURES;57865786+ netdev_warn(netdev, "802.1Q and 802.1ad VLAN filtering must be either both on or both off. VLAN filtering has been enabled for both types.\n");57875787+ } else if ((cur_ctag && !req_ctag && cur_stag) ||57885788+ (cur_stag && !req_stag && cur_ctag)) {57895789+ features &= ~NETIF_VLAN_FILTERING_FEATURES;57905790+ netdev_warn(netdev, "802.1Q and 802.1ad VLAN filtering must be either both on or both off. VLAN filtering has been disabled for both types.\n");57915791+ }57825792 } else {57835783- netdev_warn(netdev, "cannot support requested VLAN filtering settings, clearing all supported VLAN filtering settings\n");57845784- features &= ~supported_vlan_filtering;57935793+ if (req_vlan_fltr & NETIF_F_HW_VLAN_STAG_FILTER)57945794+ netdev_warn(netdev, "cannot support requested 802.1ad filtering setting in SVM mode\n");57955795+57965796+ if (req_vlan_fltr & NETIF_F_HW_VLAN_CTAG_FILTER)57975797+ features |= NETIF_F_HW_VLAN_CTAG_FILTER;57855798 }57865799 }57875800
···4949 * To allow multiple ports to access the shared register block independently,5050 * the blocks are split up so that indexes are assigned to each port based on5151 * hardware logical port number.5252+ *5353+ * The timestamp blocks are handled differently for E810- and E822-based5454+ * devices. In E810 devices, each port has its own block of timestamps, while in5555+ * E822 there is a need to logically break the block of registers into smaller5656+ * chunks based on the port number to avoid collisions.5757+ *5858+ * Example for port 5 in E810:5959+ * +--------+--------+--------+--------+--------+--------+--------+--------+6060+ * |register|register|register|register|register|register|register|register|6161+ * | block | block | block | block | block | block | block | block |6262+ * | for | for | for | for | for | for | for | for |6363+ * | port 0 | port 1 | port 2 | port 3 | port 4 | port 5 | port 6 | port 7 |6464+ * +--------+--------+--------+--------+--------+--------+--------+--------+6565+ * ^^6666+ * ||6767+ * |--- quad offset is always 06868+ * ---- quad number6969+ *7070+ * Example for port 5 in E822:7171+ * +-----------------------------+-----------------------------+7272+ * | register block for quad 0 | register block for quad 1 |7373+ * |+------+------+------+------+|+------+------+------+------+|7474+ * ||port 0|port 1|port 2|port 3|||port 0|port 1|port 2|port 3||7575+ * |+------+------+------+------+|+------+------+------+------+|7676+ * +-----------------------------+-------^---------------------+7777+ * ^ |7878+ * | --- quad offset*7979+ * ---- quad number8080+ *8181+ * * PHY port 5 is port 1 in quad 18282+ *5283 */53845485/**
+5
drivers/net/ethernet/intel/ice/ice_vf_lib.c
···504504 }505505506506 if (ice_is_vf_disabled(vf)) {507507+ vsi = ice_get_vf_vsi(vf);508508+ if (WARN_ON(!vsi))509509+ return -EINVAL;510510+ ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, vf->vf_id);511511+ ice_vsi_stop_all_rx_rings(vsi);507512 dev_dbg(dev, "VF is already disabled, there is no need for resetting it, telling VM, all is fine %d\n",508513 vf->vf_id);509514 return 0;
+27-28
drivers/net/ethernet/intel/ice/ice_virtchnl.c
···15921592 */15931593static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)15941594{15951595- enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;15961595 struct virtchnl_vsi_queue_config_info *qci =15971596 (struct virtchnl_vsi_queue_config_info *)msg;15981597 struct virtchnl_queue_pair_info *qpi;15991598 struct ice_pf *pf = vf->pf;16001599 struct ice_vsi *vsi;16011601- int i, q_idx;16001600+ int i = -1, q_idx;1602160116031603- if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {16041604- v_ret = VIRTCHNL_STATUS_ERR_PARAM;16021602+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))16051603 goto error_param;16061606- }1607160416081608- if (!ice_vc_isvalid_vsi_id(vf, qci->vsi_id)) {16091609- v_ret = VIRTCHNL_STATUS_ERR_PARAM;16051605+ if (!ice_vc_isvalid_vsi_id(vf, qci->vsi_id))16101606 goto error_param;16111611- }1612160716131608 vsi = ice_get_vf_vsi(vf);16141614- if (!vsi) {16151615- v_ret = VIRTCHNL_STATUS_ERR_PARAM;16091609+ if (!vsi)16161610 goto error_param;16171617- }1618161116191612 if (qci->num_queue_pairs > ICE_MAX_RSS_QS_PER_VF ||16201613 qci->num_queue_pairs > min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)) {16211614 dev_err(ice_pf_to_dev(pf), "VF-%d requesting more than supported number of queues: %d\n",16221615 vf->vf_id, min_t(u16, vsi->alloc_txq, vsi->alloc_rxq));16231623- v_ret = VIRTCHNL_STATUS_ERR_PARAM;16241616 goto error_param;16251617 }16261618···16251633 !ice_vc_isvalid_ring_len(qpi->txq.ring_len) ||16261634 !ice_vc_isvalid_ring_len(qpi->rxq.ring_len) ||16271635 !ice_vc_isvalid_q_id(vf, qci->vsi_id, qpi->txq.queue_id)) {16281628- v_ret = VIRTCHNL_STATUS_ERR_PARAM;16291636 goto error_param;16301637 }16311638···16341643 * for selected "vsi"16351644 */16361645 if (q_idx >= vsi->alloc_txq || q_idx >= vsi->alloc_rxq) {16371637- v_ret = VIRTCHNL_STATUS_ERR_PARAM;16381646 goto error_param;16391647 }16401648···16431653 vsi->tx_rings[i]->count = qpi->txq.ring_len;1644165416451655 /* Disable any existing queue first */16461646- if (ice_vf_vsi_dis_single_txq(vf, vsi, q_idx)) {16471647- v_ret = VIRTCHNL_STATUS_ERR_PARAM;16561656+ if (ice_vf_vsi_dis_single_txq(vf, vsi, q_idx))16481657 goto error_param;16491649- }1650165816511659 /* Configure a queue with the requested settings */16521660 if (ice_vsi_cfg_single_txq(vsi, vsi->tx_rings, q_idx)) {16531653- v_ret = VIRTCHNL_STATUS_ERR_PARAM;16611661+ dev_warn(ice_pf_to_dev(pf), "VF-%d failed to configure TX queue %d\n",16621662+ vf->vf_id, i);16541663 goto error_param;16551664 }16561665 }···1663167416641675 if (qpi->rxq.databuffer_size != 0 &&16651676 (qpi->rxq.databuffer_size > ((16 * 1024) - 128) ||16661666- qpi->rxq.databuffer_size < 1024)) {16671667- v_ret = VIRTCHNL_STATUS_ERR_PARAM;16771677+ qpi->rxq.databuffer_size < 1024))16681678 goto error_param;16691669- }16701679 vsi->rx_buf_len = qpi->rxq.databuffer_size;16711680 vsi->rx_rings[i]->rx_buf_len = vsi->rx_buf_len;16721681 if (qpi->rxq.max_pkt_size > max_frame_size ||16731673- qpi->rxq.max_pkt_size < 64) {16741674- v_ret = VIRTCHNL_STATUS_ERR_PARAM;16821682+ qpi->rxq.max_pkt_size < 64)16751683 goto error_param;16761676- }1677168416781685 vsi->max_frame = qpi->rxq.max_pkt_size;16791686 /* add space for the port VLAN since the VF driver is···16801695 vsi->max_frame += VLAN_HLEN;1681169616821697 if (ice_vsi_cfg_single_rxq(vsi, q_idx)) {16831683- v_ret = VIRTCHNL_STATUS_ERR_PARAM;16981698+ dev_warn(ice_pf_to_dev(pf), "VF-%d failed to configure RX queue %d\n",16991699+ vf->vf_id, i);16841700 goto error_param;16851701 }16861702 }16871703 }1688170416891689-error_param:16901705 /* send the response to the VF */16911691- return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES, v_ret,16921692- NULL, 0);17061706+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES,17071707+ VIRTCHNL_STATUS_SUCCESS, NULL, 0);17081708+error_param:17091709+ /* disable whatever we can */17101710+ for (; i >= 0; i--) {17111711+ if (ice_vsi_ctrl_one_rx_ring(vsi, false, i, true))17121712+ dev_err(ice_pf_to_dev(pf), "VF-%d could not disable RX queue %d\n",17131713+ vf->vf_id, i);17141714+ if (ice_vf_vsi_dis_single_txq(vf, vsi, i))17151715+ dev_err(ice_pf_to_dev(pf), "VF-%d could not disable TX queue %d\n",17161716+ vf->vf_id, i);17171717+ }17181718+17191719+ /* send the response to the VF */17201720+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES,17211721+ VIRTCHNL_STATUS_ERR_PARAM, NULL, 0);16931722}1694172316951724/**
···547547 iowrite32(value, lp->regs + offset);548548}549549550550+/**551551+ * axienet_dma_out32 - Memory mapped Axi DMA register write.552552+ * @lp: Pointer to axienet local structure553553+ * @reg: Address offset from the base address of the Axi DMA core554554+ * @value: Value to be written into the Axi DMA register555555+ *556556+ * This function writes the desired value into the corresponding Axi DMA557557+ * register.558558+ */559559+560560+static inline void axienet_dma_out32(struct axienet_local *lp,561561+ off_t reg, u32 value)562562+{563563+ iowrite32(value, lp->dma_regs + reg);564564+}565565+566566+#if defined(CONFIG_64BIT) && defined(iowrite64)567567+/**568568+ * axienet_dma_out64 - Memory mapped Axi DMA register write.569569+ * @lp: Pointer to axienet local structure570570+ * @reg: Address offset from the base address of the Axi DMA core571571+ * @value: Value to be written into the Axi DMA register572572+ *573573+ * This function writes the desired value into the corresponding Axi DMA574574+ * register.575575+ */576576+static inline void axienet_dma_out64(struct axienet_local *lp,577577+ off_t reg, u64 value)578578+{579579+ iowrite64(value, lp->dma_regs + reg);580580+}581581+582582+static inline void axienet_dma_out_addr(struct axienet_local *lp, off_t reg,583583+ dma_addr_t addr)584584+{585585+ if (lp->features & XAE_FEATURE_DMA_64BIT)586586+ axienet_dma_out64(lp, reg, addr);587587+ else588588+ axienet_dma_out32(lp, reg, lower_32_bits(addr));589589+}590590+591591+#else /* CONFIG_64BIT */592592+593593+static inline void axienet_dma_out_addr(struct axienet_local *lp, off_t reg,594594+ dma_addr_t addr)595595+{596596+ axienet_dma_out32(lp, reg, lower_32_bits(addr));597597+}598598+599599+#endif /* CONFIG_64BIT */600600+550601/* Function prototypes visible in xilinx_axienet_mdio.c for other files */551602int axienet_mdio_enable(struct axienet_local *lp);552603void axienet_mdio_disable(struct axienet_local *lp);
+5-24
drivers/net/ethernet/xilinx/xilinx_axienet_main.c
···133133 return ioread32(lp->dma_regs + reg);134134}135135136136-/**137137- * axienet_dma_out32 - Memory mapped Axi DMA register write.138138- * @lp: Pointer to axienet local structure139139- * @reg: Address offset from the base address of the Axi DMA core140140- * @value: Value to be written into the Axi DMA register141141- *142142- * This function writes the desired value into the corresponding Axi DMA143143- * register.144144- */145145-static inline void axienet_dma_out32(struct axienet_local *lp,146146- off_t reg, u32 value)147147-{148148- iowrite32(value, lp->dma_regs + reg);149149-}150150-151151-static void axienet_dma_out_addr(struct axienet_local *lp, off_t reg,152152- dma_addr_t addr)153153-{154154- axienet_dma_out32(lp, reg, lower_32_bits(addr));155155-156156- if (lp->features & XAE_FEATURE_DMA_64BIT)157157- axienet_dma_out32(lp, reg + 4, upper_32_bits(addr));158158-}159159-160136static void desc_set_phys_addr(struct axienet_local *lp, dma_addr_t addr,161137 struct axidma_bd *desc)162138{···20362060 }20372061 iowrite32(0x0, desc);20382062 }20632063+ }20642064+ if (!IS_ENABLED(CONFIG_64BIT) && lp->features & XAE_FEATURE_DMA_64BIT) {20652065+ dev_err(&pdev->dev, "64-bit addressable DMA is not compatible with 32-bit archecture\n");20662066+ ret = -EINVAL;20672067+ goto cleanup_clk;20392068 }2040206920412070 ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(addr_width));
···8585 depends on I2C8686 depends on REGMAP_I2C8787 help8888- This driver provides support for the Nvidia SN2201 platfom.8888+ This driver provides support for the Nvidia SN2201 platform.8989 The SN2201 is a highly integrated for one rack unit system with9090 L3 management switches. It has 48 x 1Gbps RJ45 + 4 x 100G QSFP289191 ports in a compact 1RU form factor. The system also including a
···405405static int __init p50_module_init(void)406406{407407 struct resource res = DEFINE_RES_IO(P50_GPIO_IO_PORT_BASE, P50_PORT_CMD + 1);408408+ int ret;408409409410 if (!dmi_first_match(dmi_ids))410411 return -ENODEV;411412412412- platform_driver_register(&p50_gpio_driver);413413+ ret = platform_driver_register(&p50_gpio_driver);414414+ if (ret)415415+ return ret;413416414417 gpio_pdev = platform_device_register_simple(DRIVER_NAME, PLATFORM_DEVID_NONE, &res, 1);415418 if (IS_ERR(gpio_pdev)) {
···29982998 ndlp->nlp_DID, ulp_status,29992999 ulp_word4);3000300030013001- /* Call NLP_EVT_DEVICE_RM if link is down or LOGO is aborted */30023001 if (lpfc_error_lost_link(ulp_status, ulp_word4)) {30033003- lpfc_disc_state_machine(vport, ndlp, cmdiocb,30043004- NLP_EVT_DEVICE_RM);30053002 skip_recovery = 1;30063003 goto out;30073004 }···30183021 spin_unlock_irq(&ndlp->lock);30193022 lpfc_disc_state_machine(vport, ndlp, cmdiocb,30203023 NLP_EVT_DEVICE_RM);30213021- lpfc_els_free_iocb(phba, cmdiocb);30223022- lpfc_nlp_put(ndlp);30233023-30243024- /* Presume the node was released. */30253025- return;30243024+ goto out_rsrc_free;30263025 }3027302630283027out:30293029- /* Driver is done with the IO. */30303030- lpfc_els_free_iocb(phba, cmdiocb);30313031- lpfc_nlp_put(ndlp);30323032-30333028 /* At this point, the LOGO processing is complete. NOTE: For a30343029 * pt2pt topology, we are assuming the NPortID will only change30353030 * on link up processing. For a LOGO / PLOGI initiated by the···30483059 ndlp->nlp_DID, ulp_status,30493060 ulp_word4, tmo,30503061 vport->num_disc_nodes);30623062+30633063+ lpfc_els_free_iocb(phba, cmdiocb);30643064+ lpfc_nlp_put(ndlp);30653065+30513066 lpfc_disc_start(vport);30523067 return;30533068 }···30683075 lpfc_disc_state_machine(vport, ndlp, cmdiocb,30693076 NLP_EVT_DEVICE_RM);30703077 }30783078+out_rsrc_free:30793079+ /* Driver is done with the I/O. */30803080+ lpfc_els_free_iocb(phba, cmdiocb);30813081+ lpfc_nlp_put(ndlp);30713082}3072308330733084/**
···834834 lpfc_nvmet_invalidate_host(phba, ndlp);835835836836 if (ndlp->nlp_DID == Fabric_DID) {837837- if (vport->port_state <= LPFC_FDISC)837837+ if (vport->port_state <= LPFC_FDISC ||838838+ vport->fc_flag & FC_PT2PT)838839 goto out;839840 lpfc_linkdown_port(vport);840841 spin_lock_irq(shost->host_lock);
+40-12
drivers/scsi/lpfc/lpfc_nvme.c
···10651065 nCmd->rcv_rsplen = wcqe->parameter;10661066 nCmd->status = 0;1067106710681068+ /* Get the NVME cmd details for this unique error. */10691069+ cp = (struct nvme_fc_cmd_iu *)nCmd->cmdaddr;10701070+ ep = (struct nvme_fc_ersp_iu *)nCmd->rspaddr;10711071+10681072 /* Check if this is really an ERSP */10691073 if (nCmd->rcv_rsplen == LPFC_NVME_ERSP_LEN) {10701074 lpfc_ncmd->status = IOSTAT_SUCCESS;10711075 lpfc_ncmd->result = 0;1072107610731077 lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME,10741074- "6084 NVME Completion ERSP: "10751075- "xri %x placed x%x\n",10761076- lpfc_ncmd->cur_iocbq.sli4_xritag,10771077- wcqe->total_data_placed);10781078+ "6084 NVME FCP_ERR ERSP: "10791079+ "xri %x placed x%x opcode x%x cmd_id "10801080+ "x%x cqe_status x%x\n",10811081+ lpfc_ncmd->cur_iocbq.sli4_xritag,10821082+ wcqe->total_data_placed,10831083+ cp->sqe.common.opcode,10841084+ cp->sqe.common.command_id,10851085+ ep->cqe.status);10781086 break;10791087 }10801088 lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,10811089 "6081 NVME Completion Protocol Error: "10821090 "xri %x status x%x result x%x "10831083- "placed x%x\n",10911091+ "placed x%x opcode x%x cmd_id x%x, "10921092+ "cqe_status x%x\n",10841093 lpfc_ncmd->cur_iocbq.sli4_xritag,10851094 lpfc_ncmd->status, lpfc_ncmd->result,10861086- wcqe->total_data_placed);10951095+ wcqe->total_data_placed,10961096+ cp->sqe.common.opcode,10971097+ cp->sqe.common.command_id,10981098+ ep->cqe.status);10871099 break;10881100 case IOSTAT_LOCAL_REJECT:10891101 /* Let fall through to set command final state. */···12071195{12081196 struct lpfc_hba *phba = vport->phba;12091197 struct nvmefc_fcp_req *nCmd = lpfc_ncmd->nvmeCmd;12101210- struct lpfc_iocbq *pwqeq = &(lpfc_ncmd->cur_iocbq);11981198+ struct nvme_common_command *sqe;11991199+ struct lpfc_iocbq *pwqeq = &lpfc_ncmd->cur_iocbq;12111200 union lpfc_wqe128 *wqe = &pwqeq->wqe;12121201 uint32_t req_len;12131202···12651252 cstat->control_requests++;12661253 }1267125412681268- if (pnode->nlp_nvme_info & NLP_NVME_NSLER)12551255+ if (pnode->nlp_nvme_info & NLP_NVME_NSLER) {12691256 bf_set(wqe_erp, &wqe->generic.wqe_com, 1);12571257+ sqe = &((struct nvme_fc_cmd_iu *)12581258+ nCmd->cmdaddr)->sqe.common;12591259+ if (sqe->opcode == nvme_admin_async_event)12601260+ bf_set(wqe_ffrq, &wqe->generic.wqe_com, 1);12611261+ }12621262+12701263 /*12711264 * Finish initializing those WQE fields that are independent12721265 * of the nvme_cmnd request_buffer···18061787 * lpfc_nvme_abort_fcreq_cmpl - Complete an NVME FCP abort request.18071788 * @phba: Pointer to HBA context object18081789 * @cmdiocb: Pointer to command iocb object.18091809- * @abts_cmpl: Pointer to wcqe complete object.17901790+ * @rspiocb: Pointer to response iocb object.18101791 *18111792 * This is the callback function for any NVME FCP IO that was aborted.18121793 *···18151796 **/18161797void18171798lpfc_nvme_abort_fcreq_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,18181818- struct lpfc_wcqe_complete *abts_cmpl)17991799+ struct lpfc_iocbq *rspiocb)18191800{18011801+ struct lpfc_wcqe_complete *abts_cmpl = &rspiocb->wcqe_cmpl;18021802+18201803 lpfc_printf_log(phba, KERN_INFO, LOG_NVME,18211804 "6145 ABORT_XRI_CN completing on rpi x%x "18221805 "original iotag x%x, abort cmd iotag x%x "···18611840 struct lpfc_nvme_fcpreq_priv *freqpriv;18621841 unsigned long flags;18631842 int ret_val;18431843+ struct nvme_fc_cmd_iu *cp;1864184418651845 /* Validate pointers. LLDD fault handling with transport does18661846 * have timing races.···19851963 return;19861964 }1987196519661966+ /*19671967+ * Get Command Id from cmd to plug into response. This19681968+ * code is not needed in the next NVME Transport drop.19691969+ */19701970+ cp = (struct nvme_fc_cmd_iu *)lpfc_nbuf->nvmeCmd->cmdaddr;19881971 lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_ABTS,19891972 "6138 Transport Abort NVME Request Issued for "19901990- "ox_id x%x\n",19911991- nvmereq_wqe->sli4_xritag);19731973+ "ox_id x%x nvme opcode x%x nvme cmd_id x%x\n",19741974+ nvmereq_wqe->sli4_xritag, cp->sqe.common.opcode,19751975+ cp->sqe.common.command_id);19921976 return;1993197719941978out_unlock:
+6
drivers/scsi/lpfc/lpfc_scsi.c
···60626062 int status;60636063 u32 logit = LOG_FCP;6064606460656065+ if (!rport)60666066+ return FAILED;60676067+60656068 rdata = rport->dd_data;60666069 if (!rdata || !rdata->pnode) {60676070 lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,···61426139 u32 dev_loss_tmo = vport->cfg_devloss_tmo;61436140 unsigned long flags;61446141 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(waitq);61426142+61436143+ if (!rport)61446144+ return FAILED;6145614561466146 rdata = rport->dd_data;61476147 if (!rdata || !rdata->pnode) {
+12-13
drivers/scsi/lpfc/lpfc_sli.c
···19301930 sync_buf = __lpfc_sli_get_iocbq(phba);19311931 if (!sync_buf) {19321932 lpfc_printf_log(phba, KERN_ERR, LOG_CGN_MGMT,19331933- "6213 No available WQEs for CMF_SYNC_WQE\n");19331933+ "6244 No available WQEs for CMF_SYNC_WQE\n");19341934 ret_val = ENOMEM;19351935 goto out_unlock;19361936 }···38053805 set_job_ulpword4(cmdiocbp,38063806 IOERR_ABORT_REQUESTED);38073807 /*38083808- * For SLI4, irsiocb contains38083808+ * For SLI4, irspiocb contains38093809 * NO_XRI in sli_xritag, it38103810 * shall not affect releasing38113811 * sgl (xri) process.···38233823 }38243824 }38253825 }38263826- (cmdiocbp->cmd_cmpl) (phba, cmdiocbp, saveq);38263826+ cmdiocbp->cmd_cmpl(phba, cmdiocbp, saveq);38273827 } else38283828 lpfc_sli_release_iocbq(phba, cmdiocbp);38293829 } else {···40634063 cmdiocbq->cmd_flag &= ~LPFC_DRIVER_ABORTED;40644064 if (cmdiocbq->cmd_cmpl) {40654065 spin_unlock_irqrestore(&phba->hbalock, iflag);40664066- (cmdiocbq->cmd_cmpl)(phba, cmdiocbq,40674067- &rspiocbq);40664066+ cmdiocbq->cmd_cmpl(phba, cmdiocbq, &rspiocbq);40684067 spin_lock_irqsave(&phba->hbalock, iflag);40694068 }40704069 break;···1028710288 * @flag: Flag indicating if this command can be put into txq.1028810289 *1028910290 * __lpfc_sli_issue_fcp_io_s3 is wrapper function to invoke lockless func to1029010290- * send an iocb command to an HBA with SLI-4 interface spec.1029110291+ * send an iocb command to an HBA with SLI-3 interface spec.1029110292 *1029210293 * This function takes the hbalock before invoking the lockless version.1029310294 * The function will return success after it successfully submit the wqe to···1273912740 cmdiocbq->cmd_cmpl = cmdiocbq->wait_cmd_cmpl;1274012741 cmdiocbq->wait_cmd_cmpl = NULL;1274112742 if (cmdiocbq->cmd_cmpl)1274212742- (cmdiocbq->cmd_cmpl)(phba, cmdiocbq, NULL);1274312743+ cmdiocbq->cmd_cmpl(phba, cmdiocbq, NULL);1274312744 else1274412745 lpfc_sli_release_iocbq(phba, cmdiocbq);1274512746 return;···12753127541275412755 /* Set the exchange busy flag for task management commands */1275512756 if ((cmdiocbq->cmd_flag & LPFC_IO_FCP) &&1275612756- !(cmdiocbq->cmd_flag & LPFC_IO_LIBDFC)) {1275712757+ !(cmdiocbq->cmd_flag & LPFC_IO_LIBDFC)) {1275712758 lpfc_cmd = container_of(cmdiocbq, struct lpfc_io_buf,1275812758- cur_iocbq);1275912759+ cur_iocbq);1275912760 if (rspiocbq && (rspiocbq->cmd_flag & LPFC_EXCHANGE_BUSY))1276012761 lpfc_cmd->flags |= LPFC_SBUF_XBUSY;1276112762 else···1389513896 * @irspiocbq: Pointer to work-queue completion queue entry.1389613897 *1389713898 * This routine handles an ELS work-queue completion event and construct1389813898- * a pseudo response ELS IODBQ from the SLI4 ELS WCQE for the common1389913899+ * a pseudo response ELS IOCBQ from the SLI4 ELS WCQE for the common1389913900 * discovery engine to handle.1390013901 *1390113902 * Return: Pointer to the receive IOCBQ, NULL otherwise.···13939139401394013941 if (bf_get(lpfc_wcqe_c_xb, wcqe)) {1394113942 spin_lock_irqsave(&phba->hbalock, iflags);1394213942- cmdiocbq->cmd_flag |= LPFC_EXCHANGE_BUSY;1394313943+ irspiocbq->cmd_flag |= LPFC_EXCHANGE_BUSY;1394313944 spin_unlock_irqrestore(&phba->hbalock, iflags);1394413945 }1394513946···1479814799 /* Pass the cmd_iocb and the wcqe to the upper layer */1479914800 memcpy(&cmdiocbq->wcqe_cmpl, wcqe,1480014801 sizeof(struct lpfc_wcqe_complete));1480114801- (cmdiocbq->cmd_cmpl)(phba, cmdiocbq, cmdiocbq);1480214802+ cmdiocbq->cmd_cmpl(phba, cmdiocbq, cmdiocbq);1480214803 } else {1480314804 lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,1480414805 "0375 FCP cmdiocb not callback function "···18955189561895618957 /* Free iocb created in lpfc_prep_seq */1895718958 list_for_each_entry_safe(curr_iocb, next_iocb,1895818958- &iocbq->list, list) {1895918959+ &iocbq->list, list) {1895918960 list_del_init(&curr_iocb->list);1896018961 lpfc_sli_release_iocbq(phba, curr_iocb);1896118962 }
+1-1
drivers/scsi/lpfc/lpfc_version.h
···2020 * included with this package. *2121 *******************************************************************/22222323-#define LPFC_DRIVER_VERSION "14.2.0.3"2323+#define LPFC_DRIVER_VERSION "14.2.0.4"2424#define LPFC_DRIVER_NAME "lpfc"25252626/* Used for SLI 2/3 */
···107107108108 /* Resources for implementing the notification channel from the device109109 * to the driver. fwqp is the firmware end of an RC connection; the110110- * other end is vqqp used by the driver. cq is is where completions are110110+ * other end is vqqp used by the driver. cq is where completions are111111 * reported.112112 */113113 struct mlx5_vdpa_cq cq;···1814181418151815 id = mlx5vdpa16_to_cpu(mvdev, vlan);18161816 mac_vlan_del(ndev, ndev->config.mac, id, true);18171817+ status = VIRTIO_NET_OK;18171818 break;18181819 default:18191819- break;18201820-}18201820+ break;18211821+ }1821182218221822-return status;18231823+ return status;18231824}1824182518251826static void mlx5_cvq_kick_handler(struct work_struct *work)
···255255256256 /*257257 * Per memory-barriers.txt, wmb() is not needed to guarantee258258- * that the the cache coherent memory writes have completed258258+ * that the cache coherent memory writes have completed259259 * before writing to the MMIO region.260260 */261261 writel(status, vm_dev->base + VIRTIO_MMIO_STATUS);···701701 if (!vm_cmdline_parent_registered) {702702 err = device_register(&vm_cmdline_parent);703703 if (err) {704704+ put_device(&vm_cmdline_parent);704705 pr_err("Failed to register parent device!\n");705706 return err;706707 }
+1-1
drivers/virtio/virtio_pci_modern_dev.c
···469469470470 /*471471 * Per memory-barriers.txt, wmb() is not needed to guarantee472472- * that the the cache coherent memory writes have completed472472+ * that the cache coherent memory writes have completed473473 * before writing to the MMIO region.474474 */475475 vp_iowrite8(status, &cfg->device_status);
···6666}67676868/**6969- * v9fs_req_cleanup - Cleanup request initialized by v9fs_init_request7070- * @mapping: unused mapping of request to cleanup7171- * @priv: private data to cleanup, a fid, guaranted non-null.6969+ * v9fs_free_request - Cleanup request initialized by v9fs_init_rreq7070+ * @rreq: The I/O request to clean up7271 */7373-static void v9fs_req_cleanup(struct address_space *mapping, void *priv)7272+static void v9fs_free_request(struct netfs_io_request *rreq)7473{7575- struct p9_fid *fid = priv;7474+ struct p9_fid *fid = rreq->netfs_priv;76757776 p9_client_clunk(fid);7877}···93949495const struct netfs_request_ops v9fs_req_ops = {9596 .init_request = v9fs_init_request,9797+ .free_request = v9fs_free_request,9698 .begin_cache_operation = v9fs_begin_cache_operation,9799 .issue_read = v9fs_issue_read,9898- .cleanup = v9fs_req_cleanup,99100};100101101102/**···273274 * file. We need to do this before we get a lock on the page in case274275 * there's more than one writer competing for the same cache block.275276 */276276- retval = netfs_write_begin(filp, mapping, pos, len, &folio, fsdata);277277+ retval = netfs_write_begin(&v9inode->netfs, filp, mapping, pos, len, &folio, fsdata);277278 if (retval < 0)278279 return retval;279280
···99#include <linux/slab.h>1010#include "internal.h"11111212-unsigned __read_mostly afs_volume_gc_delay = 10;1313-unsigned __read_mostly afs_volume_record_life = 60 * 60;1212+static unsigned __read_mostly afs_volume_record_life = 60 * 60;14131514/*1615 * Insert a volume into a cell. If there's an existing volume record, that is
+1-1
fs/afs/write.c
···6060 * file. We need to do this before we get a lock on the page in case6161 * there's more than one writer competing for the same cache block.6262 */6363- ret = netfs_write_begin(file, mapping, pos, len, &folio, fsdata);6363+ ret = netfs_write_begin(&vnode->netfs, file, mapping, pos, len, &folio, fsdata);6464 if (ret < 0)6565 return ret;6666
+20-6
fs/attr.c
···6161 const struct inode *inode, kgid_t gid)6262{6363 kgid_t kgid = i_gid_into_mnt(mnt_userns, inode);6464- if (uid_eq(current_fsuid(), i_uid_into_mnt(mnt_userns, inode)) &&6565- (in_group_p(gid) || gid_eq(gid, inode->i_gid)))6666- return true;6464+ if (uid_eq(current_fsuid(), i_uid_into_mnt(mnt_userns, inode))) {6565+ kgid_t mapped_gid;6666+6767+ if (gid_eq(gid, inode->i_gid))6868+ return true;6969+ mapped_gid = mapped_kgid_fs(mnt_userns, i_user_ns(inode), gid);7070+ if (in_group_p(mapped_gid))7171+ return true;7272+ }6773 if (capable_wrt_inode_uidgid(mnt_userns, inode, CAP_CHOWN))6874 return true;6975 if (gid_eq(kgid, INVALID_GID) &&···129123130124 /* Make sure a caller can chmod. */131125 if (ia_valid & ATTR_MODE) {126126+ kgid_t mapped_gid;127127+132128 if (!inode_owner_or_capable(mnt_userns, inode))133129 return -EPERM;130130+131131+ if (ia_valid & ATTR_GID)132132+ mapped_gid = mapped_kgid_fs(mnt_userns,133133+ i_user_ns(inode), attr->ia_gid);134134+ else135135+ mapped_gid = i_gid_into_mnt(mnt_userns, inode);136136+134137 /* Also check the setgid bit! */135135- if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid :136136- i_gid_into_mnt(mnt_userns, inode)) &&137137- !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))138138+ if (!in_group_p(mapped_gid) &&139139+ !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))138140 attr->ia_mode &= ~S_ISGID;139141 }140142
···9797 if (!server->hostname)9898 return -EINVAL;9999100100+ /* if server hostname isn't populated, there's nothing to do here */101101+ if (server->hostname[0] == '\0')102102+ return 0;103103+100104 len = strlen(server->hostname) + 3;101105102106 unc = kmalloc(len, GFP_KERNEL);
···297297298298/**299299 * netfs_write_begin - Helper to prepare for writing300300+ * @ctx: The netfs context300301 * @file: The file to read from301302 * @mapping: The mapping to read from302303 * @pos: File position at which the write will begin···327326 *328327 * This is usable whether or not caching is enabled.329328 */330330-int netfs_write_begin(struct file *file, struct address_space *mapping,329329+int netfs_write_begin(struct netfs_inode *ctx,330330+ struct file *file, struct address_space *mapping,331331 loff_t pos, unsigned int len, struct folio **_folio,332332 void **_fsdata)333333{334334 struct netfs_io_request *rreq;335335- struct netfs_inode *ctx = netfs_inode(file_inode(file ));336335 struct folio *folio;337336 unsigned int fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE;338337 pgoff_t index = pos >> PAGE_SHIFT;
+3-3
fs/netfs/objects.c
···7575 struct netfs_io_request *rreq =7676 container_of(work, struct netfs_io_request, work);77777878- netfs_clear_subrequests(rreq, false);7979- if (rreq->netfs_priv)8080- rreq->netfs_ops->cleanup(rreq->mapping, rreq->netfs_priv);8178 trace_netfs_rreq(rreq, netfs_rreq_trace_free);7979+ netfs_clear_subrequests(rreq, false);8080+ if (rreq->netfs_ops->free_request)8181+ rreq->netfs_ops->free_request(rreq);8282 if (rreq->cache_resources.ops)8383 rreq->cache_resources.ops->end_operation(&rreq->cache_resources);8484 kfree(rreq);
+5-4
fs/nfsd/filecache.c
···309309 if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) {310310 nfsd_file_flush(nf);311311 nfsd_file_put_noref(nf);312312- } else {312312+ } else if (nf->nf_file) {313313 nfsd_file_put_noref(nf);314314- if (nf->nf_file)315315- nfsd_file_schedule_laundrette();316316- }314314+ nfsd_file_schedule_laundrette();315315+ } else316316+ nfsd_file_put_noref(nf);317317+317318 if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT)318319 nfsd_file_gc();319320}
-1
include/linux/bio.h
···403403extern int bioset_init(struct bio_set *, unsigned int, unsigned int, int flags);404404extern void bioset_exit(struct bio_set *);405405extern int biovec_init_pool(mempool_t *pool, int pool_entries);406406-extern int bioset_init_from_src(struct bio_set *bs, struct bio_set *src);407406408407struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,409408 unsigned int opf, gfp_t gfp_mask,
···227227 * struct folio - Represents a contiguous set of bytes.228228 * @flags: Identical to the page flags.229229 * @lru: Least Recently Used list; tracks how recently this folio was used.230230+ * @mlock_count: Number of times this folio has been pinned by mlock().230231 * @mapping: The file this page belongs to, or refers to the anon_vma for231232 * anonymous memory.232233 * @index: Offset within the file, in units of pages. For anonymous memory,···256255 unsigned long flags;257256 union {258257 struct list_head lru;258258+ /* private: avoid cluttering the output */259259 struct {260260 void *__filler;261261+ /* public: */261262 unsigned int mlock_count;263263+ /* private: */262264 };265265+ /* public: */263266 };264267 struct address_space *mapping;265268 pgoff_t index;
+11-14
include/linux/netfs.h
···206206 */207207struct netfs_request_ops {208208 int (*init_request)(struct netfs_io_request *rreq, struct file *file);209209+ void (*free_request)(struct netfs_io_request *rreq);209210 int (*begin_cache_operation)(struct netfs_io_request *rreq);211211+210212 void (*expand_readahead)(struct netfs_io_request *rreq);211213 bool (*clamp_length)(struct netfs_io_subrequest *subreq);212214 void (*issue_read)(struct netfs_io_subrequest *subreq);···216214 int (*check_write_begin)(struct file *file, loff_t pos, unsigned len,217215 struct folio *folio, void **_fsdata);218216 void (*done)(struct netfs_io_request *rreq);219219- void (*cleanup)(struct address_space *mapping, void *netfs_priv);220217};221218222219/*···278277struct readahead_control;279278extern void netfs_readahead(struct readahead_control *);280279int netfs_read_folio(struct file *, struct folio *);281281-extern int netfs_write_begin(struct file *, struct address_space *,280280+extern int netfs_write_begin(struct netfs_inode *,281281+ struct file *, struct address_space *,282282 loff_t, unsigned int, struct folio **,283283 void **);284284···304302305303/**306304 * netfs_inode_init - Initialise a netfslib inode context307307- * @inode: The inode with which the context is associated305305+ * @ctx: The netfs inode to initialise308306 * @ops: The netfs's operations list309307 *310308 * Initialise the netfs library context struct. This is expected to follow on311309 * directly from the VFS inode struct.312310 */313313-static inline void netfs_inode_init(struct inode *inode,311311+static inline void netfs_inode_init(struct netfs_inode *ctx,314312 const struct netfs_request_ops *ops)315313{316316- struct netfs_inode *ctx = netfs_inode(inode);317317-318314 ctx->ops = ops;319319- ctx->remote_i_size = i_size_read(inode);315315+ ctx->remote_i_size = i_size_read(&ctx->inode);320316#if IS_ENABLED(CONFIG_FSCACHE)321317 ctx->cache = NULL;322318#endif···322322323323/**324324 * netfs_resize_file - Note that a file got resized325325- * @inode: The inode being resized325325+ * @ctx: The netfs inode being resized326326 * @new_i_size: The new file size327327 *328328 * Inform the netfs lib that a file got resized so that it can adjust its state.329329 */330330-static inline void netfs_resize_file(struct inode *inode, loff_t new_i_size)330330+static inline void netfs_resize_file(struct netfs_inode *ctx, loff_t new_i_size)331331{332332- struct netfs_inode *ctx = netfs_inode(inode);333333-334332 ctx->remote_i_size = new_i_size;335333}336334337335/**338336 * netfs_i_cookie - Get the cache cookie from the inode339339- * @inode: The inode to query337337+ * @ctx: The netfs inode to query340338 *341339 * Get the caching cookie (if enabled) from the network filesystem's inode.342340 */343343-static inline struct fscache_cookie *netfs_i_cookie(struct inode *inode)341341+static inline struct fscache_cookie *netfs_i_cookie(struct netfs_inode *ctx)344342{345343#if IS_ENABLED(CONFIG_FSCACHE)346346- struct netfs_inode *ctx = netfs_inode(inode);347344 return ctx->cache;348345#else349346 return NULL;
+1-2
include/linux/random.h
···1313struct notifier_block;14141515void add_device_randomness(const void *buf, size_t len);1616-void add_bootloader_randomness(const void *buf, size_t len);1616+void __init add_bootloader_randomness(const void *buf, size_t len);1717void add_input_randomness(unsigned int type, unsigned int code,1818 unsigned int value) __latent_entropy;1919void add_interrupt_randomness(int irq) __latent_entropy;···74747575int __init random_init(const char *command_line);7676bool rng_is_initialized(void);7777-bool rng_has_arch_random(void);7877int wait_for_random_bytes(void);79788079/* Calls wait_for_random_bytes() and then calls get_random_bytes(buf, nbytes).
+15-1
include/linux/sunrpc/xdr.h
···243243extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);244244extern int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec,245245 size_t nbytes);246246-extern void xdr_commit_encode(struct xdr_stream *xdr);246246+extern void __xdr_commit_encode(struct xdr_stream *xdr);247247extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len);248248extern int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen);249249extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,···304304xdr_reset_scratch_buffer(struct xdr_stream *xdr)305305{306306 xdr_set_scratch_buffer(xdr, NULL, 0);307307+}308308+309309+/**310310+ * xdr_commit_encode - Ensure all data is written to xdr->buf311311+ * @xdr: pointer to xdr_stream312312+ *313313+ * Handle encoding across page boundaries by giving the caller a314314+ * temporary location to write to, then later copying the data into315315+ * place. __xdr_commit_encode() does that copying.316316+ */317317+static inline void xdr_commit_encode(struct xdr_stream *xdr)318318+{319319+ if (unlikely(xdr->scratch.iov_len))320320+ __xdr_commit_encode(xdr);307321}308322309323/**
+3-2
include/linux/vdpa.h
···178178 * for the device179179 * @vdev: vdpa device180180 * Returns virtqueue algin requirement181181- * @get_vq_group: Get the group id for a specific virtqueue181181+ * @get_vq_group: Get the group id for a specific182182+ * virtqueue (optional)182183 * @vdev: vdpa device183184 * @idx: virtqueue index184185 * Returns u32: group id for this virtqueue···244243 * Returns the iova range supported by245244 * the device.246245 * @set_group_asid: Set address space identifier for a247247- * virtqueue group246246+ * virtqueue group (optional)248247 * @vdev: vdpa device249248 * @group: virtqueue group250249 * @asid: address space id for this group
···406406 * alloc_ordered_workqueue - allocate an ordered workqueue407407 * @fmt: printf format for the name of the workqueue408408 * @flags: WQ_* flags (only WQ_FREEZABLE and WQ_MEM_RECLAIM are meaningful)409409- * @args...: args for @fmt409409+ * @args: args for @fmt410410 *411411 * Allocate an ordered workqueue. An ordered workqueue executes at412412 * most one work item at any given time in the queued order. They are···445445 struct delayed_work *dwork, unsigned long delay);446446extern bool queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rwork);447447448448-extern void flush_workqueue(struct workqueue_struct *wq);448448+extern void __flush_workqueue(struct workqueue_struct *wq);449449extern void drain_workqueue(struct workqueue_struct *wq);450450451451extern int schedule_on_each_cpu(work_func_t func);···563563 return queue_work(system_wq, work);564564}565565566566+/*567567+ * Detect attempt to flush system-wide workqueues at compile time when possible.568568+ *569569+ * See https://lkml.kernel.org/r/49925af7-78a8-a3dd-bce6-cfc02e1a9236@I-love.SAKURA.ne.jp570570+ * for reasons and steps for converting system-wide workqueues into local workqueues.571571+ */572572+extern void __warn_flushing_systemwide_wq(void)573573+ __compiletime_warning("Please avoid flushing system-wide workqueues.");574574+566575/**567576 * flush_scheduled_work - ensure that any scheduled work has run to completion.568577 *569578 * Forces execution of the kernel-global workqueue and blocks until its570579 * completion.571580 *572572- * Think twice before calling this function! It's very easy to get into573573- * trouble if you don't take great care. Either of the following situations574574- * will lead to deadlock:581581+ * It's very easy to get into trouble if you don't take great care.582582+ * Either of the following situations will lead to deadlock:575583 *576584 * One of the work items currently on the workqueue needs to acquire577585 * a lock held by your code or its caller.···594586 * need to know that a particular work item isn't queued and isn't running.595587 * In such cases you should use cancel_delayed_work_sync() or596588 * cancel_work_sync() instead.589589+ *590590+ * Please stop calling this function! A conversion to stop flushing system-wide591591+ * workqueues is in progress. This function will be removed after all in-tree592592+ * users stopped calling this function.597593 */598598-static inline void flush_scheduled_work(void)599599-{600600- flush_workqueue(system_wq);601601-}594594+/*595595+ * The background of commit 771c035372a036f8 ("deprecate the596596+ * '__deprecated' attribute warnings entirely and for good") is that,597597+ * since Linus builds all modules between every single pull he does,598598+ * the standard kernel build needs to be _clean_ in order to be able to599599+ * notice when new problems happen. Therefore, don't emit warning while600600+ * there are in-tree users.601601+ */602602+#define flush_scheduled_work() \603603+({ \604604+ if (0) \605605+ __warn_flushing_systemwide_wq(); \606606+ __flush_workqueue(system_wq); \607607+})608608+609609+/*610610+ * Although there is no longer in-tree caller, for now just emit warning611611+ * in order to give out-of-tree callers time to update.612612+ */613613+#define flush_workqueue(wq) \614614+({ \615615+ struct workqueue_struct *_wq = (wq); \616616+ \617617+ if ((__builtin_constant_p(_wq == system_wq) && \618618+ _wq == system_wq) || \619619+ (__builtin_constant_p(_wq == system_highpri_wq) && \620620+ _wq == system_highpri_wq) || \621621+ (__builtin_constant_p(_wq == system_long_wq) && \622622+ _wq == system_long_wq) || \623623+ (__builtin_constant_p(_wq == system_unbound_wq) && \624624+ _wq == system_unbound_wq) || \625625+ (__builtin_constant_p(_wq == system_freezable_wq) && \626626+ _wq == system_freezable_wq) || \627627+ (__builtin_constant_p(_wq == system_power_efficient_wq) && \628628+ _wq == system_power_efficient_wq) || \629629+ (__builtin_constant_p(_wq == system_freezable_power_efficient_wq) && \630630+ _wq == system_freezable_power_efficient_wq)) \631631+ __warn_flushing_systemwide_wq(); \632632+ __flush_workqueue(_wq); \633633+})602634603635/**604636 * schedule_delayed_work_on - queue work in global workqueue on CPU after delay
···2525#undef INET_CSK_CLEAR_TIMERS26262727struct inet_bind_bucket;2828-struct inet_bind2_bucket;2928struct tcp_congestion_ops;30293130/*···5758 *5859 * @icsk_accept_queue: FIFO of established children5960 * @icsk_bind_hash: Bind node6060- * @icsk_bind2_hash: Bind node in the bhash2 table6161 * @icsk_timeout: Timeout6262 * @icsk_retransmit_timer: Resend (no ack)6363 * @icsk_rto: Retransmit timeout···8385 struct inet_sock icsk_inet;8486 struct request_sock_queue icsk_accept_queue;8587 struct inet_bind_bucket *icsk_bind_hash;8686- struct inet_bind2_bucket *icsk_bind2_hash;8788 unsigned long icsk_timeout;8889 struct timer_list icsk_retransmit_timer;8990 struct timer_list icsk_delack_timer;
+1-67
include/net/inet_hashtables.h
···9090 struct hlist_head owners;9191};92929393-struct inet_bind2_bucket {9494- possible_net_t ib_net;9595- int l3mdev;9696- unsigned short port;9797- union {9898-#if IS_ENABLED(CONFIG_IPV6)9999- struct in6_addr v6_rcv_saddr;100100-#endif101101- __be32 rcv_saddr;102102- };103103- /* Node in the inet2_bind_hashbucket chain */104104- struct hlist_node node;105105- /* List of sockets hashed to this bucket */106106- struct hlist_head owners;107107-};108108-10993static inline struct net *ib_net(struct inet_bind_bucket *ib)110110-{111111- return read_pnet(&ib->ib_net);112112-}113113-114114-static inline struct net *ib2_net(struct inet_bind2_bucket *ib)11594{11695 return read_pnet(&ib->ib_net);11796}···100121101122struct inet_bind_hashbucket {102123 spinlock_t lock;103103- struct hlist_head chain;104104-};105105-106106-/* This is synchronized using the inet_bind_hashbucket's spinlock.107107- * Instead of having separate spinlocks, the inet_bind2_hashbucket can share108108- * the inet_bind_hashbucket's given that in every case where the bhash2 table109109- * is useful, a lookup in the bhash table also occurs.110110- */111111-struct inet_bind2_hashbucket {112124 struct hlist_head chain;113125};114126···134164 */135165 struct kmem_cache *bind_bucket_cachep;136166 struct inet_bind_hashbucket *bhash;137137- /* The 2nd binding table hashed by port and address.138138- * This is used primarily for expediting the resolution of bind139139- * conflicts.140140- */141141- struct kmem_cache *bind2_bucket_cachep;142142- struct inet_bind2_hashbucket *bhash2;143167 unsigned int bhash_size;144168145169 /* The 2nd listener table hashed by local port and address */···193229void inet_bind_bucket_destroy(struct kmem_cache *cachep,194230 struct inet_bind_bucket *tb);195231196196-static inline bool check_bind_bucket_match(struct inet_bind_bucket *tb,197197- struct net *net,198198- const unsigned short port,199199- int l3mdev)200200-{201201- return net_eq(ib_net(tb), net) && tb->port == port &&202202- tb->l3mdev == l3mdev;203203-}204204-205205-struct inet_bind2_bucket *206206-inet_bind2_bucket_create(struct kmem_cache *cachep, struct net *net,207207- struct inet_bind2_hashbucket *head,208208- const unsigned short port, int l3mdev,209209- const struct sock *sk);210210-211211-void inet_bind2_bucket_destroy(struct kmem_cache *cachep,212212- struct inet_bind2_bucket *tb);213213-214214-struct inet_bind2_bucket *215215-inet_bind2_bucket_find(struct inet_hashinfo *hinfo, struct net *net,216216- const unsigned short port, int l3mdev,217217- struct sock *sk,218218- struct inet_bind2_hashbucket **head);219219-220220-bool check_bind2_bucket_match_nulladdr(struct inet_bind2_bucket *tb,221221- struct net *net,222222- const unsigned short port,223223- int l3mdev,224224- const struct sock *sk);225225-226232static inline u32 inet_bhashfn(const struct net *net, const __u16 lport,227233 const u32 bhash_size)228234{···200266}201267202268void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,203203- struct inet_bind2_bucket *tb2, const unsigned short snum);269269+ const unsigned short snum);204270205271/* Caller must disable local BH processing. */206272int __inet_inherit_port(const struct sock *sk, struct sock *child);
-14
include/net/sock.h
···348348 * @sk_txtime_report_errors: set report errors mode for SO_TXTIME349349 * @sk_txtime_unused: unused txtime flags350350 * @ns_tracker: tracker for netns reference351351- * @sk_bind2_node: bind node in the bhash2 table352351 */353352struct sock {354353 /*···537538#endif538539 struct rcu_head sk_rcu;539540 netns_tracker ns_tracker;540540- struct hlist_node sk_bind2_node;541541};542542543543enum sk_pacing {···817819 hlist_add_head(&sk->sk_bind_node, list);818820}819821820820-static inline void __sk_del_bind2_node(struct sock *sk)821821-{822822- __hlist_del(&sk->sk_bind2_node);823823-}824824-825825-static inline void sk_add_bind2_node(struct sock *sk, struct hlist_head *list)826826-{827827- hlist_add_head(&sk->sk_bind2_node, list);828828-}829829-830822#define sk_for_each(__sk, list) \831823 hlist_for_each_entry(__sk, list, sk_node)832824#define sk_for_each_rcu(__sk, list) \···834846 hlist_for_each_entry_safe(__sk, tmp, list, sk_node)835847#define sk_for_each_bound(__sk, list) \836848 hlist_for_each_entry(__sk, list, sk_bind_node)837837-#define sk_for_each_bound_bhash2(__sk, list) \838838- hlist_for_each_entry(__sk, list, sk_bind2_node)839849840850/**841851 * sk_for_each_entry_offset_rcu - iterate over a list at a given struct offset
···281281static inline cfi_check_fn find_check_fn(unsigned long ptr)282282{283283 cfi_check_fn fn = NULL;284284+ unsigned long flags;285285+ bool rcu_idle;284286285287 if (is_kernel_text(ptr))286288 return __cfi_check;···292290 * the shadow and __module_address use RCU, so we need to wake it293291 * up if necessary.294292 */295295- RCU_NONIDLE({296296- if (IS_ENABLED(CONFIG_CFI_CLANG_SHADOW))297297- fn = find_shadow_check_fn(ptr);293293+ rcu_idle = !rcu_is_watching();294294+ if (rcu_idle) {295295+ local_irq_save(flags);296296+ rcu_irq_enter();297297+ }298298299299- if (!fn)300300- fn = find_module_check_fn(ptr);301301- });299299+ if (IS_ENABLED(CONFIG_CFI_CLANG_SHADOW))300300+ fn = find_shadow_check_fn(ptr);301301+ if (!fn)302302+ fn = find_module_check_fn(ptr);303303+304304+ if (rcu_idle) {305305+ rcu_irq_exit();306306+ local_irq_restore(flags);307307+ }302308303309 return fn;304310}
+12-4
kernel/workqueue.c
···27882788}2789278927902790/**27912791- * flush_workqueue - ensure that any scheduled work has run to completion.27912791+ * __flush_workqueue - ensure that any scheduled work has run to completion.27922792 * @wq: workqueue to flush27932793 *27942794 * This function sleeps until all work items which were queued on entry27952795 * have finished execution, but it is not livelocked by new incoming ones.27962796 */27972797-void flush_workqueue(struct workqueue_struct *wq)27972797+void __flush_workqueue(struct workqueue_struct *wq)27982798{27992799 struct wq_flusher this_flusher = {28002800 .list = LIST_HEAD_INIT(this_flusher.list),···29432943out_unlock:29442944 mutex_unlock(&wq->mutex);29452945}29462946-EXPORT_SYMBOL(flush_workqueue);29462946+EXPORT_SYMBOL(__flush_workqueue);2947294729482948/**29492949 * drain_workqueue - drain a workqueue···29712971 wq->flags |= __WQ_DRAINING;29722972 mutex_unlock(&wq->mutex);29732973reflush:29742974- flush_workqueue(wq);29742974+ __flush_workqueue(wq);2975297529762976 mutex_lock(&wq->mutex);29772977···61116111 wq_online = true;61126112 wq_watchdog_init();61136113}61146114+61156115+/*61166116+ * Despite the naming, this is a no-op function which is here only for avoiding61176117+ * link error. Since compile-time warning may fail to catch, we will need to61186118+ * emit run-time warning from __flush_workqueue().61196119+ */61206120+void __warn_flushing_systemwide_wq(void) { }61216121+EXPORT_SYMBOL(__warn_flushing_systemwide_wq);
···14341434{14351435 unsigned nr, offset;14361436 pgoff_t index, count;14371437- size_t size = maxsize, actual;14371437+ size_t size = maxsize;14381438 loff_t pos;1439143914401440 if (!size || !maxpages)···14611461 if (nr == 0)14621462 return 0;1463146314641464- actual = PAGE_SIZE * nr;14651465- actual -= offset;14661466- if (nr == count && size > 0) {14671467- unsigned last_offset = (nr > 1) ? 0 : offset;14681468- actual -= PAGE_SIZE - (last_offset + size);14691469- }14701470- return actual;14641464+ return min_t(size_t, nr * PAGE_SIZE - offset, maxsize);14711465}1472146614731467/* must be done on non-empty ITER_IOVEC one */···15961602 struct page **p;15971603 unsigned nr, offset;15981604 pgoff_t index, count;15991599- size_t size = maxsize, actual;16051605+ size_t size = maxsize;16001606 loff_t pos;1601160716021608 if (!size)···16251631 if (nr == 0)16261632 return 0;1627163316281628- actual = PAGE_SIZE * nr;16291629- actual -= offset;16301630- if (nr == count && size > 0) {16311631- unsigned last_offset = (nr > 1) ? 0 : offset;16321632- actual -= PAGE_SIZE - (last_offset + size);16331633- }16341634- return actual;16341634+ return min_t(size_t, nr * PAGE_SIZE - offset, maxsize);16351635}1636163616371637ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
+1-2
lib/vsprintf.c
···769769 static DECLARE_WORK(enable_ptr_key_work, enable_ptr_key_workfn);770770 unsigned long flags;771771772772- if (!system_unbound_wq ||773773- (!rng_is_initialized() && !rng_has_arch_random()) ||772772+ if (!system_unbound_wq || !rng_is_initialized() ||774773 !spin_trylock_irqsave(&filling, flags))775774 return -EAGAIN;776775
+3-2
lib/xarray.c
···264264 * xas_destroy() - Free any resources allocated during the XArray operation.265265 * @xas: XArray operation state.266266 *267267- * This function is now internal-only.267267+ * Most users will not need to call this function; it is called for you268268+ * by xas_nomem().268269 */269269-static void xas_destroy(struct xa_state *xas)270270+void xas_destroy(struct xa_state *xas)270271{271272 struct xa_node *next, *node = xas->xa_alloc;272273
+5-4
mm/filemap.c
···29912991 struct address_space *mapping = file->f_mapping;29922992 DEFINE_READAHEAD(ractl, file, ra, mapping, vmf->pgoff);29932993 struct file *fpin = NULL;29942994+ unsigned long vm_flags = vmf->vma->vm_flags;29942995 unsigned int mmap_miss;2995299629962997#ifdef CONFIG_TRANSPARENT_HUGEPAGE29972998 /* Use the readahead code, even if readahead is disabled */29982998- if (vmf->vma->vm_flags & VM_HUGEPAGE) {29992999+ if (vm_flags & VM_HUGEPAGE) {29993000 fpin = maybe_unlock_mmap_for_io(vmf, fpin);30003001 ractl._index &= ~((unsigned long)HPAGE_PMD_NR - 1);30013002 ra->size = HPAGE_PMD_NR;···30043003 * Fetch two PMD folios, so we get the chance to actually30053004 * readahead, unless we've been told not to.30063005 */30073007- if (!(vmf->vma->vm_flags & VM_RAND_READ))30063006+ if (!(vm_flags & VM_RAND_READ))30083007 ra->size *= 2;30093008 ra->async_size = HPAGE_PMD_NR;30103009 page_cache_ra_order(&ractl, ra, HPAGE_PMD_ORDER);···30133012#endif3014301330153014 /* If we don't want any read-ahead, don't bother */30163016- if (vmf->vma->vm_flags & VM_RAND_READ)30153015+ if (vm_flags & VM_RAND_READ)30173016 return fpin;30183017 if (!ra->ra_pages)30193018 return fpin;3020301930213021- if (vmf->vma->vm_flags & VM_SEQ_READ) {30203020+ if (vm_flags & VM_SEQ_READ) {30223021 fpin = maybe_unlock_mmap_for_io(vmf, fpin);30233022 page_cache_sync_ra(&ractl, ra->ra_pages);30243023 return fpin;
+1-2
mm/huge_memory.c
···26722672 if (mapping)26732673 i_mmap_unlock_read(mapping);26742674out:26752675- /* Free any memory we didn't use */26762676- xas_nomem(&xas, 0);26752675+ xas_destroy(&xas);26772676 count_vm_event(!ret ? THP_SPLIT_PAGE : THP_SPLIT_PAGE_FAILED);26782677 return ret;26792678}
+2
mm/readahead.c
···164164 while ((folio = readahead_folio(rac)) != NULL) {165165 unsigned long nr = folio_nr_pages(folio);166166167167+ folio_get(folio);167168 rac->ra->size -= nr;168169 if (rac->ra->async_size >= nr) {169170 rac->ra->async_size -= nr;170171 filemap_remove_folio(folio);171172 }172173 folio_unlock(folio);174174+ folio_put(folio);173175 }174176 } else {175177 while ((folio = readahead_folio(rac)) != NULL)
+12-14
mm/usercopy.c
···161161static inline void check_heap_object(const void *ptr, unsigned long n,162162 bool to_user)163163{164164+ uintptr_t addr = (uintptr_t)ptr;165165+ unsigned long offset;164166 struct folio *folio;165167166168 if (is_kmap_addr(ptr)) {167167- unsigned long page_end = (unsigned long)ptr | (PAGE_SIZE - 1);168168-169169- if ((unsigned long)ptr + n - 1 > page_end)170170- usercopy_abort("kmap", NULL, to_user,171171- offset_in_page(ptr), n);169169+ offset = offset_in_page(ptr);170170+ if (n > PAGE_SIZE - offset)171171+ usercopy_abort("kmap", NULL, to_user, offset, n);172172 return;173173 }174174175175 if (is_vmalloc_addr(ptr)) {176176- struct vm_struct *area = find_vm_area(ptr);177177- unsigned long offset;176176+ struct vmap_area *area = find_vmap_area(addr);178177179179- if (!area) {178178+ if (!area)180179 usercopy_abort("vmalloc", "no area", to_user, 0, n);181181- return;182182- }183180184184- offset = ptr - area->addr;185185- if (offset + n > get_vm_area_size(area))181181+ if (n > area->va_end - addr) {182182+ offset = addr - area->va_start;186183 usercopy_abort("vmalloc", NULL, to_user, offset, n);184184+ }187185 return;188186 }189187···194196 /* Check slab allocator for flags and size. */195197 __check_heap_object(ptr, n, folio_slab(folio), to_user);196198 } else if (folio_test_large(folio)) {197197- unsigned long offset = ptr - folio_address(folio);198198- if (offset + n > folio_size(folio))199199+ offset = ptr - folio_address(folio);200200+ if (n > folio_size(folio) - offset)199201 usercopy_abort("page alloc", NULL, to_user, offset, n);200202 }201203}
+1-1
mm/vmalloc.c
···17981798 free_vmap_area_noflush(va);17991799}1800180018011801-static struct vmap_area *find_vmap_area(unsigned long addr)18011801+struct vmap_area *find_vmap_area(unsigned long addr)18021802{18031803 struct vmap_area *va;18041804
+28-5
net/ax25/af_ax25.c
···16621662 int flags)16631663{16641664 struct sock *sk = sock->sk;16651665- struct sk_buff *skb;16651665+ struct sk_buff *skb, *last;16661666+ struct sk_buff_head *sk_queue;16661667 int copied;16671668 int err = 0;16691669+ int off = 0;16701670+ long timeo;1668167116691672 lock_sock(sk);16701673 /*···16791676 goto out;16801677 }1681167816821682- /* Now we can treat all alike */16831683- skb = skb_recv_datagram(sk, flags, &err);16841684- if (skb == NULL)16851685- goto out;16791679+ /* We need support for non-blocking reads. */16801680+ sk_queue = &sk->sk_receive_queue;16811681+ skb = __skb_try_recv_datagram(sk, sk_queue, flags, &off, &err, &last);16821682+ /* If no packet is available, release_sock(sk) and try again. */16831683+ if (!skb) {16841684+ if (err != -EAGAIN)16851685+ goto out;16861686+ release_sock(sk);16871687+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);16881688+ while (timeo && !__skb_wait_for_more_packets(sk, sk_queue, &err,16891689+ &timeo, last)) {16901690+ skb = __skb_try_recv_datagram(sk, sk_queue, flags, &off,16911691+ &err, &last);16921692+ if (skb)16931693+ break;16941694+16951695+ if (err != -EAGAIN)16961696+ goto done;16971697+ }16981698+ if (!skb)16991699+ goto done;17001700+ lock_sock(sk);17011701+ }1686170216871703 if (!sk_to_ax25(sk)->pidincl)16881704 skb_pull(skb, 1); /* Remove PID */···17481726out:17491727 release_sock(sk);1750172817291729+done:17511730 return err;17521731}17531732
+5-28
net/dccp/proto.c
···11201120 SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);11211121 if (!dccp_hashinfo.bind_bucket_cachep)11221122 goto out_free_hashinfo2;11231123- dccp_hashinfo.bind2_bucket_cachep =11241124- kmem_cache_create("dccp_bind2_bucket",11251125- sizeof(struct inet_bind2_bucket), 0,11261126- SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);11271127- if (!dccp_hashinfo.bind2_bucket_cachep)11281128- goto out_free_bind_bucket_cachep;1129112311301124 /*11311125 * Size and allocate the main established and bind bucket···1150115611511157 if (!dccp_hashinfo.ehash) {11521158 DCCP_CRIT("Failed to allocate DCCP established hash table");11531153- goto out_free_bind2_bucket_cachep;11591159+ goto out_free_bind_bucket_cachep;11541160 }1155116111561162 for (i = 0; i <= dccp_hashinfo.ehash_mask; i++)···11761182 goto out_free_dccp_locks;11771183 }1178118411791179- dccp_hashinfo.bhash2 = (struct inet_bind2_hashbucket *)11801180- __get_free_pages(GFP_ATOMIC | __GFP_NOWARN, bhash_order);11811181-11821182- if (!dccp_hashinfo.bhash2) {11831183- DCCP_CRIT("Failed to allocate DCCP bind2 hash table");11841184- goto out_free_dccp_bhash;11851185- }11861186-11871185 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {11881186 spin_lock_init(&dccp_hashinfo.bhash[i].lock);11891187 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);11901190- INIT_HLIST_HEAD(&dccp_hashinfo.bhash2[i].chain);11911188 }1192118911931190 rc = dccp_mib_init();11941191 if (rc)11951195- goto out_free_dccp_bhash2;11921192+ goto out_free_dccp_bhash;1196119311971194 rc = dccp_ackvec_init();11981195 if (rc)···12071222 dccp_ackvec_exit();12081223out_free_dccp_mib:12091224 dccp_mib_exit();12101210-out_free_dccp_bhash2:12111211- free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order);12121225out_free_dccp_bhash:12131226 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);12141227out_free_dccp_locks:12151228 inet_ehash_locks_free(&dccp_hashinfo);12161229out_free_dccp_ehash:12171230 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);12181218-out_free_bind2_bucket_cachep:12191219- kmem_cache_destroy(dccp_hashinfo.bind2_bucket_cachep);12201231out_free_bind_bucket_cachep:12211232 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);12221233out_free_hashinfo2:12231234 inet_hashinfo2_free_mod(&dccp_hashinfo);12241235out_fail:12251236 dccp_hashinfo.bhash = NULL;12261226- dccp_hashinfo.bhash2 = NULL;12271237 dccp_hashinfo.ehash = NULL;12281238 dccp_hashinfo.bind_bucket_cachep = NULL;12291229- dccp_hashinfo.bind2_bucket_cachep = NULL;12301239 return rc;12311240}1232124112331242static void __exit dccp_fini(void)12341243{12351235- int bhash_order = get_order(dccp_hashinfo.bhash_size *12361236- sizeof(struct inet_bind_hashbucket));12371237-12381244 ccid_cleanup_builtins();12391245 dccp_mib_exit();12401240- free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);12411241- free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order);12461246+ free_pages((unsigned long)dccp_hashinfo.bhash,12471247+ get_order(dccp_hashinfo.bhash_size *12481248+ sizeof(struct inet_bind_hashbucket)));12421249 free_pages((unsigned long)dccp_hashinfo.ehash,12431250 get_order((dccp_hashinfo.ehash_mask + 1) *12441251 sizeof(struct inet_ehash_bucket)));
+64-183
net/ipv4/inet_connection_sock.c
···117117 return !sk->sk_rcv_saddr;118118}119119120120-static bool use_bhash2_on_bind(const struct sock *sk)121121-{122122-#if IS_ENABLED(CONFIG_IPV6)123123- int addr_type;124124-125125- if (sk->sk_family == AF_INET6) {126126- addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);127127- return addr_type != IPV6_ADDR_ANY &&128128- addr_type != IPV6_ADDR_MAPPED;129129- }130130-#endif131131- return sk->sk_rcv_saddr != htonl(INADDR_ANY);132132-}133133-134134-static u32 get_bhash2_nulladdr_hash(const struct sock *sk, struct net *net,135135- int port)136136-{137137-#if IS_ENABLED(CONFIG_IPV6)138138- struct in6_addr nulladdr = {};139139-140140- if (sk->sk_family == AF_INET6)141141- return ipv6_portaddr_hash(net, &nulladdr, port);142142-#endif143143- return ipv4_portaddr_hash(net, 0, port);144144-}145145-146120void inet_get_local_port_range(struct net *net, int *low, int *high)147121{148122 unsigned int seq;···130156}131157EXPORT_SYMBOL(inet_get_local_port_range);132158133133-static bool bind_conflict_exist(const struct sock *sk, struct sock *sk2,134134- kuid_t sk_uid, bool relax,135135- bool reuseport_cb_ok, bool reuseport_ok)136136-{137137- int bound_dev_if2;138138-139139- if (sk == sk2)140140- return false;141141-142142- bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if);143143-144144- if (!sk->sk_bound_dev_if || !bound_dev_if2 ||145145- sk->sk_bound_dev_if == bound_dev_if2) {146146- if (sk->sk_reuse && sk2->sk_reuse &&147147- sk2->sk_state != TCP_LISTEN) {148148- if (!relax || (!reuseport_ok && sk->sk_reuseport &&149149- sk2->sk_reuseport && reuseport_cb_ok &&150150- (sk2->sk_state == TCP_TIME_WAIT ||151151- uid_eq(sk_uid, sock_i_uid(sk2)))))152152- return true;153153- } else if (!reuseport_ok || !sk->sk_reuseport ||154154- !sk2->sk_reuseport || !reuseport_cb_ok ||155155- (sk2->sk_state != TCP_TIME_WAIT &&156156- !uid_eq(sk_uid, sock_i_uid(sk2)))) {157157- return true;158158- }159159- }160160- return false;161161-}162162-163163-static bool check_bhash2_conflict(const struct sock *sk,164164- struct inet_bind2_bucket *tb2, kuid_t sk_uid,165165- bool relax, bool reuseport_cb_ok,166166- bool reuseport_ok)167167-{168168- struct sock *sk2;169169-170170- sk_for_each_bound_bhash2(sk2, &tb2->owners) {171171- if (sk->sk_family == AF_INET && ipv6_only_sock(sk2))172172- continue;173173-174174- if (bind_conflict_exist(sk, sk2, sk_uid, relax,175175- reuseport_cb_ok, reuseport_ok))176176- return true;177177- }178178- return false;179179-}180180-181181-/* This should be called only when the corresponding inet_bind_bucket spinlock182182- * is held183183- */184184-static int inet_csk_bind_conflict(const struct sock *sk, int port,185185- struct inet_bind_bucket *tb,186186- struct inet_bind2_bucket *tb2, /* may be null */159159+static int inet_csk_bind_conflict(const struct sock *sk,160160+ const struct inet_bind_bucket *tb,187161 bool relax, bool reuseport_ok)188162{189189- struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;190190- kuid_t uid = sock_i_uid((struct sock *)sk);191191- struct sock_reuseport *reuseport_cb;192192- struct inet_bind2_hashbucket *head2;193193- bool reuseport_cb_ok;194163 struct sock *sk2;195195- struct net *net;196196- int l3mdev;197197- u32 hash;164164+ bool reuseport_cb_ok;165165+ bool reuse = sk->sk_reuse;166166+ bool reuseport = !!sk->sk_reuseport;167167+ struct sock_reuseport *reuseport_cb;168168+ kuid_t uid = sock_i_uid((struct sock *)sk);198169199170 rcu_read_lock();200171 reuseport_cb = rcu_dereference(sk->sk_reuseport_cb);···150231 /*151232 * Unlike other sk lookup places we do not check152233 * for sk_net here, since _all_ the socks listed153153- * in tb->owners and tb2->owners list belong154154- * to the same net234234+ * in tb->owners list belong to the same net - the235235+ * one this bucket belongs to.155236 */156237157157- if (!use_bhash2_on_bind(sk)) {158158- sk_for_each_bound(sk2, &tb->owners)159159- if (bind_conflict_exist(sk, sk2, uid, relax,160160- reuseport_cb_ok, reuseport_ok) &&161161- inet_rcv_saddr_equal(sk, sk2, true))162162- return true;238238+ sk_for_each_bound(sk2, &tb->owners) {239239+ int bound_dev_if2;163240164164- return false;241241+ if (sk == sk2)242242+ continue;243243+ bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if);244244+ if ((!sk->sk_bound_dev_if ||245245+ !bound_dev_if2 ||246246+ sk->sk_bound_dev_if == bound_dev_if2)) {247247+ if (reuse && sk2->sk_reuse &&248248+ sk2->sk_state != TCP_LISTEN) {249249+ if ((!relax ||250250+ (!reuseport_ok &&251251+ reuseport && sk2->sk_reuseport &&252252+ reuseport_cb_ok &&253253+ (sk2->sk_state == TCP_TIME_WAIT ||254254+ uid_eq(uid, sock_i_uid(sk2))))) &&255255+ inet_rcv_saddr_equal(sk, sk2, true))256256+ break;257257+ } else if (!reuseport_ok ||258258+ !reuseport || !sk2->sk_reuseport ||259259+ !reuseport_cb_ok ||260260+ (sk2->sk_state != TCP_TIME_WAIT &&261261+ !uid_eq(uid, sock_i_uid(sk2)))) {262262+ if (inet_rcv_saddr_equal(sk, sk2, true))263263+ break;264264+ }265265+ }165266 }166166-167167- if (tb2 && check_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok,168168- reuseport_ok))169169- return true;170170-171171- net = sock_net(sk);172172-173173- /* check there's no conflict with an existing IPV6_ADDR_ANY (if ipv6) or174174- * INADDR_ANY (if ipv4) socket.175175- */176176- hash = get_bhash2_nulladdr_hash(sk, net, port);177177- head2 = &hinfo->bhash2[hash & (hinfo->bhash_size - 1)];178178-179179- l3mdev = inet_sk_bound_l3mdev(sk);180180- inet_bind_bucket_for_each(tb2, &head2->chain)181181- if (check_bind2_bucket_match_nulladdr(tb2, net, port, l3mdev, sk))182182- break;183183-184184- if (tb2 && check_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok,185185- reuseport_ok))186186- return true;187187-188188- return false;267267+ return sk2 != NULL;189268}190269191270/*···191274 * inet_bind_hashbucket lock held.192275 */193276static struct inet_bind_hashbucket *194194-inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret,195195- struct inet_bind2_bucket **tb2_ret,196196- struct inet_bind2_hashbucket **head2_ret, int *port_ret)277277+inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret, int *port_ret)197278{198279 struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;199199- struct inet_bind2_hashbucket *head2;280280+ int port = 0;200281 struct inet_bind_hashbucket *head;201282 struct net *net = sock_net(sk);283283+ bool relax = false;202284 int i, low, high, attempt_half;203203- struct inet_bind2_bucket *tb2;204285 struct inet_bind_bucket *tb;205286 u32 remaining, offset;206206- bool relax = false;207207- int port = 0;208287 int l3mdev;209288210289 l3mdev = inet_sk_bound_l3mdev(sk);···239326 head = &hinfo->bhash[inet_bhashfn(net, port,240327 hinfo->bhash_size)];241328 spin_lock_bh(&head->lock);242242- tb2 = inet_bind2_bucket_find(hinfo, net, port, l3mdev, sk,243243- &head2);244329 inet_bind_bucket_for_each(tb, &head->chain)245245- if (check_bind_bucket_match(tb, net, port, l3mdev)) {246246- if (!inet_csk_bind_conflict(sk, port, tb, tb2,247247- relax, false))330330+ if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&331331+ tb->port == port) {332332+ if (!inet_csk_bind_conflict(sk, tb, relax, false))248333 goto success;249334 goto next_port;250335 }···272361success:273362 *port_ret = port;274363 *tb_ret = tb;275275- *tb2_ret = tb2;276276- *head2_ret = head2;277364 return head;278365}279366···367458{368459 bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;369460 struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;370370- bool bhash_created = false, bhash2_created = false;371371- struct inet_bind2_bucket *tb2 = NULL;372372- struct inet_bind2_hashbucket *head2;373373- struct inet_bind_bucket *tb = NULL;461461+ int ret = 1, port = snum;374462 struct inet_bind_hashbucket *head;375463 struct net *net = sock_net(sk);376376- int ret = 1, port = snum;377377- bool found_port = false;464464+ struct inet_bind_bucket *tb = NULL;378465 int l3mdev;379466380467 l3mdev = inet_sk_bound_l3mdev(sk);381468382469 if (!port) {383383- head = inet_csk_find_open_port(sk, &tb, &tb2, &head2, &port);470470+ head = inet_csk_find_open_port(sk, &tb, &port);384471 if (!head)385472 return ret;386386- if (tb && tb2)387387- goto success;388388- found_port = true;389389- } else {390390- head = &hinfo->bhash[inet_bhashfn(net, port,391391- hinfo->bhash_size)];392392- spin_lock_bh(&head->lock);393393- inet_bind_bucket_for_each(tb, &head->chain)394394- if (check_bind_bucket_match(tb, net, port, l3mdev))395395- break;396396-397397- tb2 = inet_bind2_bucket_find(hinfo, net, port, l3mdev, sk,398398- &head2);399399- }400400-401401- if (!tb) {402402- tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, net,403403- head, port, l3mdev);404473 if (!tb)405405- goto fail_unlock;406406- bhash_created = true;474474+ goto tb_not_found;475475+ goto success;407476 }408408-409409- if (!tb2) {410410- tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep,411411- net, head2, port, l3mdev, sk);412412- if (!tb2)413413- goto fail_unlock;414414- bhash2_created = true;415415- }416416-417417- /* If we had to find an open port, we already checked for conflicts */418418- if (!found_port && !hlist_empty(&tb->owners)) {477477+ head = &hinfo->bhash[inet_bhashfn(net, port,478478+ hinfo->bhash_size)];479479+ spin_lock_bh(&head->lock);480480+ inet_bind_bucket_for_each(tb, &head->chain)481481+ if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&482482+ tb->port == port)483483+ goto tb_found;484484+tb_not_found:485485+ tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,486486+ net, head, port, l3mdev);487487+ if (!tb)488488+ goto fail_unlock;489489+tb_found:490490+ if (!hlist_empty(&tb->owners)) {419491 if (sk->sk_reuse == SK_FORCE_REUSE)420492 goto success;421493422494 if ((tb->fastreuse > 0 && reuse) ||423495 sk_reuseport_match(tb, sk))424496 goto success;425425- if (inet_csk_bind_conflict(sk, port, tb, tb2, true, true))497497+ if (inet_csk_bind_conflict(sk, tb, true, true))426498 goto fail_unlock;427499 }428500success:429501 inet_csk_update_fastreuse(tb, sk);430502431503 if (!inet_csk(sk)->icsk_bind_hash)432432- inet_bind_hash(sk, tb, tb2, port);504504+ inet_bind_hash(sk, tb, port);433505 WARN_ON(inet_csk(sk)->icsk_bind_hash != tb);434434- WARN_ON(inet_csk(sk)->icsk_bind2_hash != tb2);435506 ret = 0;436507437508fail_unlock:438438- if (ret) {439439- if (bhash_created)440440- inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb);441441- if (bhash2_created)442442- inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep,443443- tb2);444444- }445509 spin_unlock_bh(&head->lock);446510 return ret;447511}···96110799621080 inet_sk_set_state(newsk, TCP_SYN_RECV);9631081 newicsk->icsk_bind_hash = NULL;964964- newicsk->icsk_bind2_hash = NULL;96510829661083 inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port;9671084 inet_sk(newsk)->inet_num = inet_rsk(req)->ir_num;
+11-182
net/ipv4/inet_hashtables.c
···8181 return tb;8282}83838484-struct inet_bind2_bucket *inet_bind2_bucket_create(struct kmem_cache *cachep,8585- struct net *net,8686- struct inet_bind2_hashbucket *head,8787- const unsigned short port,8888- int l3mdev,8989- const struct sock *sk)9090-{9191- struct inet_bind2_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC);9292-9393- if (tb) {9494- write_pnet(&tb->ib_net, net);9595- tb->l3mdev = l3mdev;9696- tb->port = port;9797-#if IS_ENABLED(CONFIG_IPV6)9898- if (sk->sk_family == AF_INET6)9999- tb->v6_rcv_saddr = sk->sk_v6_rcv_saddr;100100- else101101-#endif102102- tb->rcv_saddr = sk->sk_rcv_saddr;103103- INIT_HLIST_HEAD(&tb->owners);104104- hlist_add_head(&tb->node, &head->chain);105105- }106106- return tb;107107-}108108-109109-static bool bind2_bucket_addr_match(struct inet_bind2_bucket *tb2, struct sock *sk)110110-{111111-#if IS_ENABLED(CONFIG_IPV6)112112- if (sk->sk_family == AF_INET6)113113- return ipv6_addr_equal(&tb2->v6_rcv_saddr,114114- &sk->sk_v6_rcv_saddr);115115-#endif116116- return tb2->rcv_saddr == sk->sk_rcv_saddr;117117-}118118-11984/*12085 * Caller must hold hashbucket lock for this tb with local BH disabled12186 */···92127 }93128}941299595-/* Caller must hold the lock for the corresponding hashbucket in the bhash table9696- * with local BH disabled9797- */9898-void inet_bind2_bucket_destroy(struct kmem_cache *cachep, struct inet_bind2_bucket *tb)9999-{100100- if (hlist_empty(&tb->owners)) {101101- __hlist_del(&tb->node);102102- kmem_cache_free(cachep, tb);103103- }104104-}105105-106130void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,107107- struct inet_bind2_bucket *tb2, const unsigned short snum)131131+ const unsigned short snum)108132{109133 inet_sk(sk)->inet_num = snum;110134 sk_add_bind_node(sk, &tb->owners);111135 inet_csk(sk)->icsk_bind_hash = tb;112112- sk_add_bind2_node(sk, &tb2->owners);113113- inet_csk(sk)->icsk_bind2_hash = tb2;114136}115137116138/*···109157 const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->inet_num,110158 hashinfo->bhash_size);111159 struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];112112- struct inet_bind2_bucket *tb2;113160 struct inet_bind_bucket *tb;114161115162 spin_lock(&head->lock);···117166 inet_csk(sk)->icsk_bind_hash = NULL;118167 inet_sk(sk)->inet_num = 0;119168 inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);120120-121121- if (inet_csk(sk)->icsk_bind2_hash) {122122- tb2 = inet_csk(sk)->icsk_bind2_hash;123123- __sk_del_bind2_node(sk);124124- inet_csk(sk)->icsk_bind2_hash = NULL;125125- inet_bind2_bucket_destroy(hashinfo->bind2_bucket_cachep, tb2);126126- }127169 spin_unlock(&head->lock);128170}129171···133189 struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;134190 unsigned short port = inet_sk(child)->inet_num;135191 const int bhash = inet_bhashfn(sock_net(sk), port,136136- table->bhash_size);192192+ table->bhash_size);137193 struct inet_bind_hashbucket *head = &table->bhash[bhash];138138- struct inet_bind2_hashbucket *head_bhash2;139139- bool created_inet_bind_bucket = false;140140- struct net *net = sock_net(sk);141141- struct inet_bind2_bucket *tb2;142194 struct inet_bind_bucket *tb;143195 int l3mdev;144196145197 spin_lock(&head->lock);146198 tb = inet_csk(sk)->icsk_bind_hash;147147- tb2 = inet_csk(sk)->icsk_bind2_hash;148148- if (unlikely(!tb || !tb2)) {199199+ if (unlikely(!tb)) {149200 spin_unlock(&head->lock);150201 return -ENOENT;151202 }···153214 * as that of the child socket. We have to look up or154215 * create a new bind bucket for the child here. */155216 inet_bind_bucket_for_each(tb, &head->chain) {156156- if (check_bind_bucket_match(tb, net, port, l3mdev))217217+ if (net_eq(ib_net(tb), sock_net(sk)) &&218218+ tb->l3mdev == l3mdev && tb->port == port)157219 break;158220 }159221 if (!tb) {160222 tb = inet_bind_bucket_create(table->bind_bucket_cachep,161161- net, head, port, l3mdev);223223+ sock_net(sk), head, port,224224+ l3mdev);162225 if (!tb) {163226 spin_unlock(&head->lock);164227 return -ENOMEM;165228 }166166- created_inet_bind_bucket = true;167229 }168230 inet_csk_update_fastreuse(tb, child);169169-170170- goto bhash2_find;171171- } else if (!bind2_bucket_addr_match(tb2, child)) {172172- l3mdev = inet_sk_bound_l3mdev(sk);173173-174174-bhash2_find:175175- tb2 = inet_bind2_bucket_find(table, net, port, l3mdev, child,176176- &head_bhash2);177177- if (!tb2) {178178- tb2 = inet_bind2_bucket_create(table->bind2_bucket_cachep,179179- net, head_bhash2, port,180180- l3mdev, child);181181- if (!tb2)182182- goto error;183183- }184231 }185185- inet_bind_hash(child, tb, tb2, port);232232+ inet_bind_hash(child, tb, port);186233 spin_unlock(&head->lock);187234188235 return 0;189189-190190-error:191191- if (created_inet_bind_bucket)192192- inet_bind_bucket_destroy(table->bind_bucket_cachep, tb);193193- spin_unlock(&head->lock);194194- return -ENOMEM;195236}196237EXPORT_SYMBOL_GPL(__inet_inherit_port);197238···675756}676757EXPORT_SYMBOL_GPL(inet_unhash);677758678678-static bool check_bind2_bucket_match(struct inet_bind2_bucket *tb,679679- struct net *net, unsigned short port,680680- int l3mdev, struct sock *sk)681681-{682682-#if IS_ENABLED(CONFIG_IPV6)683683- if (sk->sk_family == AF_INET6)684684- return net_eq(ib2_net(tb), net) && tb->port == port &&685685- tb->l3mdev == l3mdev &&686686- ipv6_addr_equal(&tb->v6_rcv_saddr, &sk->sk_v6_rcv_saddr);687687- else688688-#endif689689- return net_eq(ib2_net(tb), net) && tb->port == port &&690690- tb->l3mdev == l3mdev && tb->rcv_saddr == sk->sk_rcv_saddr;691691-}692692-693693-bool check_bind2_bucket_match_nulladdr(struct inet_bind2_bucket *tb,694694- struct net *net, const unsigned short port,695695- int l3mdev, const struct sock *sk)696696-{697697-#if IS_ENABLED(CONFIG_IPV6)698698- struct in6_addr nulladdr = {};699699-700700- if (sk->sk_family == AF_INET6)701701- return net_eq(ib2_net(tb), net) && tb->port == port &&702702- tb->l3mdev == l3mdev &&703703- ipv6_addr_equal(&tb->v6_rcv_saddr, &nulladdr);704704- else705705-#endif706706- return net_eq(ib2_net(tb), net) && tb->port == port &&707707- tb->l3mdev == l3mdev && tb->rcv_saddr == 0;708708-}709709-710710-static struct inet_bind2_hashbucket *711711-inet_bhashfn_portaddr(struct inet_hashinfo *hinfo, const struct sock *sk,712712- const struct net *net, unsigned short port)713713-{714714- u32 hash;715715-716716-#if IS_ENABLED(CONFIG_IPV6)717717- if (sk->sk_family == AF_INET6)718718- hash = ipv6_portaddr_hash(net, &sk->sk_v6_rcv_saddr, port);719719- else720720-#endif721721- hash = ipv4_portaddr_hash(net, sk->sk_rcv_saddr, port);722722- return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)];723723-}724724-725725-/* This should only be called when the spinlock for the socket's corresponding726726- * bind_hashbucket is held727727- */728728-struct inet_bind2_bucket *729729-inet_bind2_bucket_find(struct inet_hashinfo *hinfo, struct net *net,730730- const unsigned short port, int l3mdev, struct sock *sk,731731- struct inet_bind2_hashbucket **head)732732-{733733- struct inet_bind2_bucket *bhash2 = NULL;734734- struct inet_bind2_hashbucket *h;735735-736736- h = inet_bhashfn_portaddr(hinfo, sk, net, port);737737- inet_bind_bucket_for_each(bhash2, &h->chain) {738738- if (check_bind2_bucket_match(bhash2, net, port, l3mdev, sk))739739- break;740740- }741741-742742- if (head)743743- *head = h;744744-745745- return bhash2;746746-}747747-748759/* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm749760 * Note that we use 32bit integers (vs RFC 'short integers')750761 * because 2^16 is not a multiple of num_ephemeral and this···695846{696847 struct inet_hashinfo *hinfo = death_row->hashinfo;697848 struct inet_timewait_sock *tw = NULL;698698- struct inet_bind2_hashbucket *head2;699849 struct inet_bind_hashbucket *head;700850 int port = inet_sk(sk)->inet_num;701851 struct net *net = sock_net(sk);702702- struct inet_bind2_bucket *tb2;703852 struct inet_bind_bucket *tb;704704- bool tb_created = false;705853 u32 remaining, offset;706854 int ret, i, low, high;707855 int l3mdev;···755909 * the established check is already unique enough.756910 */757911 inet_bind_bucket_for_each(tb, &head->chain) {758758- if (check_bind_bucket_match(tb, net, port, l3mdev)) {912912+ if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&913913+ tb->port == port) {759914 if (tb->fastreuse >= 0 ||760915 tb->fastreuseport >= 0)761916 goto next_port;···774927 spin_unlock_bh(&head->lock);775928 return -ENOMEM;776929 }777777- tb_created = true;778930 tb->fastreuse = -1;779931 tb->fastreuseport = -1;780932 goto ok;···789943 return -EADDRNOTAVAIL;790944791945ok:792792- /* Find the corresponding tb2 bucket since we need to793793- * add the socket to the bhash2 table as well794794- */795795- tb2 = inet_bind2_bucket_find(hinfo, net, port, l3mdev, sk, &head2);796796- if (!tb2) {797797- tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep, net,798798- head2, port, l3mdev, sk);799799- if (!tb2)800800- goto error;801801- }802802-803946 /* Here we want to add a little bit of randomness to the next source804947 * port that will be chosen. We use a max() with a random here so that805948 * on low contention the randomness is maximal and on high contention···798963 WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2);799964800965 /* Head lock still held and bh's disabled */801801- inet_bind_hash(sk, tb, tb2, port);966966+ inet_bind_hash(sk, tb, port);802967 if (sk_unhashed(sk)) {803968 inet_sk(sk)->inet_sport = htons(port);804969 inet_ehash_nolisten(sk, (struct sock *)tw, NULL);···810975 inet_twsk_deschedule_put(tw);811976 local_bh_enable();812977 return 0;813813-814814-error:815815- if (tb_created)816816- inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb);817817- spin_unlock_bh(&head->lock);818818- return -ENOMEM;819978}820979821980/*
+2-12
net/ipv4/tcp.c
···45994599 SLAB_HWCACHE_ALIGN | SLAB_PANIC |46004600 SLAB_ACCOUNT,46014601 NULL);46024602- tcp_hashinfo.bind2_bucket_cachep =46034603- kmem_cache_create("tcp_bind2_bucket",46044604- sizeof(struct inet_bind2_bucket), 0,46054605- SLAB_HWCACHE_ALIGN | SLAB_PANIC |46064606- SLAB_ACCOUNT,46074607- NULL);4608460246094603 /* Size and allocate the main established and bind bucket46104604 * hash tables.···46214627 if (inet_ehash_locks_alloc(&tcp_hashinfo))46224628 panic("TCP: failed to alloc ehash_locks");46234629 tcp_hashinfo.bhash =46244624- alloc_large_system_hash("TCP bind bhash tables",46254625- sizeof(struct inet_bind_hashbucket) +46264626- sizeof(struct inet_bind2_hashbucket),46304630+ alloc_large_system_hash("TCP bind",46314631+ sizeof(struct inet_bind_hashbucket),46274632 tcp_hashinfo.ehash_mask + 1,46284633 17, /* one slot per 128 KB of memory */46294634 0,···46314638 0,46324639 64 * 1024);46334640 tcp_hashinfo.bhash_size = 1U << tcp_hashinfo.bhash_size;46344634- tcp_hashinfo.bhash2 =46354635- (struct inet_bind2_hashbucket *)(tcp_hashinfo.bhash + tcp_hashinfo.bhash_size);46364641 for (i = 0; i < tcp_hashinfo.bhash_size; i++) {46374642 spin_lock_init(&tcp_hashinfo.bhash[i].lock);46384643 INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain);46394639- INIT_HLIST_HEAD(&tcp_hashinfo.bhash2[i].chain);46404644 }4641464546424646
+23-14
net/sunrpc/xdr.c
···919919EXPORT_SYMBOL_GPL(xdr_init_encode);920920921921/**922922- * xdr_commit_encode - Ensure all data is written to buffer922922+ * __xdr_commit_encode - Ensure all data is written to buffer923923 * @xdr: pointer to xdr_stream924924 *925925 * We handle encoding across page boundaries by giving the caller a···931931 * required at the end of encoding, or any other time when the xdr_buf932932 * data might be read.933933 */934934-inline void xdr_commit_encode(struct xdr_stream *xdr)934934+void __xdr_commit_encode(struct xdr_stream *xdr)935935{936936- int shift = xdr->scratch.iov_len;936936+ size_t shift = xdr->scratch.iov_len;937937 void *page;938938939939- if (shift == 0)940940- return;941939 page = page_address(*xdr->page_ptr);942940 memcpy(xdr->scratch.iov_base, page, shift);943941 memmove(page, page + shift, (void *)xdr->p - page);944942 xdr_reset_scratch_buffer(xdr);945943}946946-EXPORT_SYMBOL_GPL(xdr_commit_encode);944944+EXPORT_SYMBOL_GPL(__xdr_commit_encode);947945948948-static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,949949- size_t nbytes)946946+/*947947+ * The buffer space to be reserved crosses the boundary between948948+ * xdr->buf->head and xdr->buf->pages, or between two pages949949+ * in xdr->buf->pages.950950+ */951951+static noinline __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,952952+ size_t nbytes)950953{951951- __be32 *p;952954 int space_left;953955 int frag1bytes, frag2bytes;956956+ void *p;954957955958 if (nbytes > PAGE_SIZE)956959 goto out_overflow; /* Bigger buffers require special handling */···967964 xdr->buf->page_len += frag1bytes;968965 xdr->page_ptr++;969966 xdr->iov = NULL;967967+970968 /*971969 * If the last encode didn't end exactly on a page boundary, the972970 * next one will straddle boundaries. Encode into the next···976972 * space at the end of the previous buffer:977973 */978974 xdr_set_scratch_buffer(xdr, xdr->p, frag1bytes);979979- p = page_address(*xdr->page_ptr);975975+980976 /*981981- * Note this is where the next encode will start after we've982982- * shifted this one back:977977+ * xdr->p is where the next encode will start after978978+ * xdr_commit_encode() has shifted this one back:983979 */984984- xdr->p = (void *)p + frag2bytes;980980+ p = page_address(*xdr->page_ptr);981981+ xdr->p = p + frag2bytes;985982 space_left = xdr->buf->buflen - xdr->buf->len;986986- xdr->end = (void *)p + min_t(int, space_left, PAGE_SIZE);983983+ if (space_left - nbytes >= PAGE_SIZE)984984+ xdr->end = p + PAGE_SIZE;985985+ else986986+ xdr->end = p + space_left - frag1bytes;987987+987988 xdr->buf->page_len += frag2bytes;988989 xdr->buf->len += nbytes;989990 return p;
+2-2
net/sunrpc/xprtrdma/svc_rdma_rw.c
···478478 unsigned int write_len;479479 u64 offset;480480481481- seg = &info->wi_chunk->ch_segments[info->wi_seg_no];482482- if (!seg)481481+ if (info->wi_seg_no >= info->wi_chunk->ch_segcount)483482 goto out_overflow;484483484484+ seg = &info->wi_chunk->ch_segments[info->wi_seg_no];485485 write_len = min(remaining, seg->rs_length - info->wi_seg_off);486486 if (!write_len)487487 goto out_overflow;
+2-2
scripts/Makefile.build
···251251252252# To make this rule robust against "Argument list too long" error,253253# ensure to add $(obj)/ prefix by a shell command.254254-cmd_mod = echo $(call real-search, $*.o, .o, -objs -y -m) | \255255- $(AWK) -v RS='( |\n)' '!x[$$0]++ { print("$(obj)/"$$0) }' > $@254254+cmd_mod = printf '%s\n' $(call real-search, $*.o, .o, -objs -y -m) | \255255+ $(AWK) '!x[$$0]++ { print("$(obj)/"$$0) }' > $@256256257257$(obj)/%.mod: FORCE258258 $(call if_changed,mod)
+21-15
scripts/check-local-export
···8899set -e10101111+# catch errors from ${NM}1212+set -o pipefail1313+1414+# Run the last element of a pipeline in the current shell.1515+# Without this, the while-loop would be executed in a subshell, and1616+# the changes made to 'symbol_types' and 'export_symbols' would be lost.1717+shopt -s lastpipe1818+1119declare -A symbol_types1220declare -a export_symbols13211422exit_code=015232424+# If there is no symbol in the object, ${NM} (both GNU nm and llvm-nm) shows2525+# 'no symbols' diagnostic (but exits with 0). It is harmless and hidden by2626+# '2>/dev/null'. However, it suppresses real error messages as well. Add a2727+# hand-crafted error message here.2828+#2929+# TODO:3030+# Use --quiet instead of 2>/dev/null when we upgrade the minimum version of3131+# binutils to 2.37, llvm to 13.0.0.3232+# Then, the following line will be really simple:3333+# ${NM} --quiet ${1} |3434+3535+{ ${NM} ${1} 2>/dev/null || { echo "${0}: ${NM} failed" >&2; false; } } |1636while read value type name1737do1838 # Skip the line if the number of fields is less than 3.···5737 if [[ ${name} == __ksymtab_* ]]; then5838 export_symbols+=(${name#__ksymtab_})5939 fi6060-6161- # If there is no symbol in the object, ${NM} (both GNU nm and llvm-nm)6262- # shows 'no symbols' diagnostic (but exits with 0). It is harmless and6363- # hidden by '2>/dev/null'. However, it suppresses real error messages6464- # as well. Add a hand-crafted error message here.6565- #6666- # Use --quiet instead of 2>/dev/null when we upgrade the minimum version6767- # of binutils to 2.37, llvm to 13.0.0.6868- #6969- # Then, the following line will be really simple:7070- # done < <(${NM} --quiet ${1})7171-done < <(${NM} ${1} 2>/dev/null || { echo "${0}: ${NM} failed" >&2; false; } )7272-7373-# Catch error in the process substitution7474-wait $!4040+done75417642for name in "${export_symbols[@]}"7743do
···3434 local mod=${1%.ko:}3535 shift3636 local namespaces="$*"3737- local mod_source_files="`cat $mod.mod | sed -n 1p \3838- | sed -e 's/\.o/\.c/g' \3939- | sed "s|[^ ]* *|${src_prefix}&|g"`"3737+ local mod_source_files=$(sed "s|^\(.*\)\.o$|${src_prefix}\1.c|" $mod.mod)3838+4039 for ns in $namespaces; do4140 echo "Adding namespace $ns to module $mod.ko."4241 generate_deps_for_ns $ns "$mod_source_files"
+1
tools/arch/x86/include/asm/cpufeatures.h
···443443#define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */444444#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */445445#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */446446+#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */446447447448#endif /* _ASM_X86_CPUFEATURES_H */
+25
tools/arch/x86/include/asm/msr-index.h
···116116 * Not susceptible to117117 * TSX Async Abort (TAA) vulnerabilities.118118 */119119+#define ARCH_CAP_SBDR_SSDP_NO BIT(13) /*120120+ * Not susceptible to SBDR and SSDP121121+ * variants of Processor MMIO stale data122122+ * vulnerabilities.123123+ */124124+#define ARCH_CAP_FBSDP_NO BIT(14) /*125125+ * Not susceptible to FBSDP variant of126126+ * Processor MMIO stale data127127+ * vulnerabilities.128128+ */129129+#define ARCH_CAP_PSDP_NO BIT(15) /*130130+ * Not susceptible to PSDP variant of131131+ * Processor MMIO stale data132132+ * vulnerabilities.133133+ */134134+#define ARCH_CAP_FB_CLEAR BIT(17) /*135135+ * VERW clears CPU fill buffer136136+ * even on MDS_NO CPUs.137137+ */138138+#define ARCH_CAP_FB_CLEAR_CTRL BIT(18) /*139139+ * MSR_IA32_MCU_OPT_CTRL[FB_CLEAR_DIS]140140+ * bit available to control VERW141141+ * behavior.142142+ */119143120144#define MSR_IA32_FLUSH_CMD 0x0000010b121145#define L1D_FLUSH BIT(0) /*···157133#define MSR_IA32_MCU_OPT_CTRL 0x00000123158134#define RNGDS_MITG_DIS BIT(0) /* SRBDS support */159135#define RTM_ALLOW BIT(1) /* TSX development mode */136136+#define FB_CLEAR_DIS BIT(3) /* CPU Fill buffer clear disable */160137161138#define MSR_IA32_SYSENTER_CS 0x00000174162139#define MSR_IA32_SYSENTER_ESP 0x00000175
···30303131struct perf_test_args {3232 struct kvm_vm *vm;3333+ /* The starting address and size of the guest test region. */3334 uint64_t gpa;3535+ uint64_t size;3436 uint64_t guest_page_size;3537 int wr_fract;3838+3939+ /* Run vCPUs in L2 instead of L1, if the architecture supports it. */4040+ bool nested;36413742 struct perf_test_vcpu_args vcpu_args[KVM_MAX_VCPUS];3843};···54495550void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *));5651void perf_test_join_vcpu_threads(int vcpus);5252+void perf_test_guest_code(uint32_t vcpu_id);5353+5454+uint64_t perf_test_nested_pages(int nr_vcpus);5555+void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus);57565857#endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */
···4040 * Continuously write to the first 8 bytes of each page in the4141 * specified region.4242 */4343-static void guest_code(uint32_t vcpu_id)4343+void perf_test_guest_code(uint32_t vcpu_id)4444{4545 struct perf_test_args *pta = &perf_test_args;4646 struct perf_test_vcpu_args *vcpu_args = &pta->vcpu_args[vcpu_id];···108108{109109 struct perf_test_args *pta = &perf_test_args;110110 struct kvm_vm *vm;111111- uint64_t guest_num_pages;111111+ uint64_t guest_num_pages, slot0_pages = DEFAULT_GUEST_PHY_PAGES;112112 uint64_t backing_src_pagesz = get_backing_src_pagesz(backing_src);113113+ uint64_t region_end_gfn;113114 int i;114115115116 pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));···136135 slots);137136138137 /*138138+ * If using nested, allocate extra pages for the nested page tables and139139+ * in-memory data structures.140140+ */141141+ if (pta->nested)142142+ slot0_pages += perf_test_nested_pages(vcpus);143143+144144+ /*139145 * Pass guest_num_pages to populate the page tables for test memory.140146 * The memory is also added to memslot 0, but that's a benign side141147 * effect as KVM allows aliasing HVAs in meslots.142148 */143143- vm = vm_create_with_vcpus(mode, vcpus, DEFAULT_GUEST_PHY_PAGES,144144- guest_num_pages, 0, guest_code, NULL);149149+ vm = vm_create_with_vcpus(mode, vcpus, slot0_pages, guest_num_pages, 0,150150+ perf_test_guest_code, NULL);145151146152 pta->vm = vm;147153154154+ /* Put the test region at the top guest physical memory. */155155+ region_end_gfn = vm_get_max_gfn(vm) + 1;156156+157157+#ifdef __x86_64__158158+ /*159159+ * When running vCPUs in L2, restrict the test region to 48 bits to160160+ * avoid needing 5-level page tables to identity map L2.161161+ */162162+ if (pta->nested)163163+ region_end_gfn = min(region_end_gfn, (1UL << 48) / pta->guest_page_size);164164+#endif148165 /*149166 * If there should be more memory in the guest test region than there150167 * can be pages in the guest, it will definitely cause problems.151168 */152152- TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm),169169+ TEST_ASSERT(guest_num_pages < region_end_gfn,153170 "Requested more guest memory than address space allows.\n"154171 " guest pages: %" PRIx64 " max gfn: %" PRIx64155172 " vcpus: %d wss: %" PRIx64 "]\n",156156- guest_num_pages, vm_get_max_gfn(vm), vcpus,173173+ guest_num_pages, region_end_gfn - 1, vcpus,157174 vcpu_memory_bytes);158175159159- pta->gpa = (vm_get_max_gfn(vm) - guest_num_pages) * pta->guest_page_size;176176+ pta->gpa = (region_end_gfn - guest_num_pages) * pta->guest_page_size;160177 pta->gpa = align_down(pta->gpa, backing_src_pagesz);161178#ifdef __s390x__162179 /* Align to 1M (segment size) */163180 pta->gpa = align_down(pta->gpa, 1 << 20);164181#endif165165- pr_info("guest physical test memory offset: 0x%lx\n", pta->gpa);182182+ pta->size = guest_num_pages * pta->guest_page_size;183183+ pr_info("guest physical test memory: [0x%lx, 0x%lx)\n",184184+ pta->gpa, pta->gpa + pta->size);166185167186 /* Add extra memory slots for testing */168187 for (i = 0; i < slots; i++) {···198177 virt_map(vm, guest_test_virt_mem, pta->gpa, guest_num_pages);199178200179 perf_test_setup_vcpus(vm, vcpus, vcpu_memory_bytes, partition_vcpu_memory_access);180180+181181+ if (pta->nested) {182182+ pr_info("Configuring vCPUs to run in L2 (nested).\n");183183+ perf_test_setup_nested(vm, vcpus);184184+ }201185202186 ucall_init(vm, NULL);203187···222196{223197 perf_test_args.wr_fract = wr_fract;224198 sync_global_to_guest(vm, perf_test_args);199199+}200200+201201+uint64_t __weak perf_test_nested_pages(int nr_vcpus)202202+{203203+ return 0;204204+}205205+206206+void __weak perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus)207207+{208208+ pr_info("%s() not support on this architecture, skipping.\n", __func__);209209+ exit(KSFT_SKIP);225210}226211227212static void *vcpu_thread_main(void *data)
···244244#ifdef __x86_64__245245 /* Identity map memory in the guest using 1gb pages. */246246 for (i = 0; i < slot_size; i += size_1gb)247247- __virt_pg_map(vm, gpa + i, gpa + i, X86_PAGE_SIZE_1G);247247+ __virt_pg_map(vm, gpa + i, gpa + i, PG_LEVEL_1G);248248#else249249 for (i = 0; i < slot_size; i += vm_get_page_size(vm))250250 virt_pg_map(vm, gpa + i, gpa + i);
···11-// SPDX-License-Identifier: GPL-2.022-/*33- * This times how long it takes to bind to a port when the port already44- * has multiple sockets in its bhash table.55- *66- * In the setup(), we populate the port's bhash table with77- * MAX_THREADS * MAX_CONNECTIONS number of entries.88- */99-1010-#include <unistd.h>1111-#include <stdio.h>1212-#include <netdb.h>1313-#include <pthread.h>1414-1515-#define MAX_THREADS 6001616-#define MAX_CONNECTIONS 401717-1818-static const char *bind_addr = "::1";1919-static const char *port;2020-2121-static int fd_array[MAX_THREADS][MAX_CONNECTIONS];2222-2323-static int bind_socket(int opt, const char *addr)2424-{2525- struct addrinfo *res, hint = {};2626- int sock_fd, reuse = 1, err;2727-2828- sock_fd = socket(AF_INET6, SOCK_STREAM, 0);2929- if (sock_fd < 0) {3030- perror("socket fd err");3131- return -1;3232- }3333-3434- hint.ai_family = AF_INET6;3535- hint.ai_socktype = SOCK_STREAM;3636-3737- err = getaddrinfo(addr, port, &hint, &res);3838- if (err) {3939- perror("getaddrinfo failed");4040- return -1;4141- }4242-4343- if (opt) {4444- err = setsockopt(sock_fd, SOL_SOCKET, opt, &reuse, sizeof(reuse));4545- if (err) {4646- perror("setsockopt failed");4747- return -1;4848- }4949- }5050-5151- err = bind(sock_fd, res->ai_addr, res->ai_addrlen);5252- if (err) {5353- perror("failed to bind to port");5454- return -1;5555- }5656-5757- return sock_fd;5858-}5959-6060-static void *setup(void *arg)6161-{6262- int sock_fd, i;6363- int *array = (int *)arg;6464-6565- for (i = 0; i < MAX_CONNECTIONS; i++) {6666- sock_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, bind_addr);6767- if (sock_fd < 0)6868- return NULL;6969- array[i] = sock_fd;7070- }7171-7272- return NULL;7373-}7474-7575-int main(int argc, const char *argv[])7676-{7777- int listener_fd, sock_fd, i, j;7878- pthread_t tid[MAX_THREADS];7979- clock_t begin, end;8080-8181- if (argc != 2) {8282- printf("Usage: listener <port>\n");8383- return -1;8484- }8585-8686- port = argv[1];8787-8888- listener_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, bind_addr);8989- if (listen(listener_fd, 100) < 0) {9090- perror("listen failed");9191- return -1;9292- }9393-9494- /* Set up threads to populate the bhash table entry for the port */9595- for (i = 0; i < MAX_THREADS; i++)9696- pthread_create(&tid[i], NULL, setup, fd_array[i]);9797-9898- for (i = 0; i < MAX_THREADS; i++)9999- pthread_join(tid[i], NULL);100100-101101- begin = clock();102102-103103- /* Bind to the same port on a different address */104104- sock_fd = bind_socket(0, "2001:0db8:0:f101::1");105105-106106- end = clock();107107-108108- printf("time spent = %f\n", (double)(end - begin) / CLOCKS_PER_SEC);109109-110110- /* clean up */111111- close(sock_fd);112112- close(listener_fd);113113- for (i = 0; i < MAX_THREADS; i++) {114114- for (j = 0; i < MAX_THREADS; i++)115115- close(fd_array[i][j]);116116- }117117-118118- return 0;119119-}