Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net

+1

Documentation/ABI/testing/sysfs-devices-system-cpu

··· 526 526 /sys/devices/system/cpu/vulnerabilities/srbds 527 527 /sys/devices/system/cpu/vulnerabilities/tsx_async_abort 528 528 /sys/devices/system/cpu/vulnerabilities/itlb_multihit 529 + /sys/devices/system/cpu/vulnerabilities/mmio_stale_data 529 530 Date: January 2018 530 531 Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> 531 532 Description: Information about CPU vulnerabilities

+1

Documentation/admin-guide/hw-vuln/index.rst

··· 17 17 special-register-buffer-data-sampling.rst 18 18 core-scheduling.rst 19 19 l1d_flush.rst 20 + processor_mmio_stale_data.rst

+246

Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst

··· 1 + ========================================= 2 + Processor MMIO Stale Data Vulnerabilities 3 + ========================================= 4 + 5 + Processor MMIO Stale Data Vulnerabilities are a class of memory-mapped I/O 6 + (MMIO) vulnerabilities that can expose data. The sequences of operations for 7 + exposing data range from simple to very complex. Because most of the 8 + vulnerabilities require the attacker to have access to MMIO, many environments 9 + are not affected. System environments using virtualization where MMIO access is 10 + provided to untrusted guests may need mitigation. These vulnerabilities are 11 + not transient execution attacks. However, these vulnerabilities may propagate 12 + stale data into core fill buffers where the data can subsequently be inferred 13 + by an unmitigated transient execution attack. Mitigation for these 14 + vulnerabilities includes a combination of microcode update and software 15 + changes, depending on the platform and usage model. Some of these mitigations 16 + are similar to those used to mitigate Microarchitectural Data Sampling (MDS) or 17 + those used to mitigate Special Register Buffer Data Sampling (SRBDS). 18 + 19 + Data Propagators 20 + ================ 21 + Propagators are operations that result in stale data being copied or moved from 22 + one microarchitectural buffer or register to another. Processor MMIO Stale Data 23 + Vulnerabilities are operations that may result in stale data being directly 24 + read into an architectural, software-visible state or sampled from a buffer or 25 + register. 26 + 27 + Fill Buffer Stale Data Propagator (FBSDP) 28 + ----------------------------------------- 29 + Stale data may propagate from fill buffers (FB) into the non-coherent portion 30 + of the uncore on some non-coherent writes. Fill buffer propagation by itself 31 + does not make stale data architecturally visible. Stale data must be propagated 32 + to a location where it is subject to reading or sampling. 33 + 34 + Sideband Stale Data Propagator (SSDP) 35 + ------------------------------------- 36 + The sideband stale data propagator (SSDP) is limited to the client (including 37 + Intel Xeon server E3) uncore implementation. The sideband response buffer is 38 + shared by all client cores. For non-coherent reads that go to sideband 39 + destinations, the uncore logic returns 64 bytes of data to the core, including 40 + both requested data and unrequested stale data, from a transaction buffer and 41 + the sideband response buffer. As a result, stale data from the sideband 42 + response and transaction buffers may now reside in a core fill buffer. 43 + 44 + Primary Stale Data Propagator (PSDP) 45 + ------------------------------------ 46 + The primary stale data propagator (PSDP) is limited to the client (including 47 + Intel Xeon server E3) uncore implementation. Similar to the sideband response 48 + buffer, the primary response buffer is shared by all client cores. For some 49 + processors, MMIO primary reads will return 64 bytes of data to the core fill 50 + buffer including both requested data and unrequested stale data. This is 51 + similar to the sideband stale data propagator. 52 + 53 + Vulnerabilities 54 + =============== 55 + Device Register Partial Write (DRPW) (CVE-2022-21166) 56 + ----------------------------------------------------- 57 + Some endpoint MMIO registers incorrectly handle writes that are smaller than 58 + the register size. Instead of aborting the write or only copying the correct 59 + subset of bytes (for example, 2 bytes for a 2-byte write), more bytes than 60 + specified by the write transaction may be written to the register. On 61 + processors affected by FBSDP, this may expose stale data from the fill buffers 62 + of the core that created the write transaction. 63 + 64 + Shared Buffers Data Sampling (SBDS) (CVE-2022-21125) 65 + ---------------------------------------------------- 66 + After propagators may have moved data around the uncore and copied stale data 67 + into client core fill buffers, processors affected by MFBDS can leak data from 68 + the fill buffer. It is limited to the client (including Intel Xeon server E3) 69 + uncore implementation. 70 + 71 + Shared Buffers Data Read (SBDR) (CVE-2022-21123) 72 + ------------------------------------------------ 73 + It is similar to Shared Buffer Data Sampling (SBDS) except that the data is 74 + directly read into the architectural software-visible state. It is limited to 75 + the client (including Intel Xeon server E3) uncore implementation. 76 + 77 + Affected Processors 78 + =================== 79 + Not all the CPUs are affected by all the variants. For instance, most 80 + processors for the server market (excluding Intel Xeon E3 processors) are 81 + impacted by only Device Register Partial Write (DRPW). 82 + 83 + Below is the list of affected Intel processors [#f1]_: 84 + 85 + =================== ============ ========= 86 + Common name Family_Model Steppings 87 + =================== ============ ========= 88 + HASWELL_X 06_3FH 2,4 89 + SKYLAKE_L 06_4EH 3 90 + BROADWELL_X 06_4FH All 91 + SKYLAKE_X 06_55H 3,4,6,7,11 92 + BROADWELL_D 06_56H 3,4,5 93 + SKYLAKE 06_5EH 3 94 + ICELAKE_X 06_6AH 4,5,6 95 + ICELAKE_D 06_6CH 1 96 + ICELAKE_L 06_7EH 5 97 + ATOM_TREMONT_D 06_86H All 98 + LAKEFIELD 06_8AH 1 99 + KABYLAKE_L 06_8EH 9 to 12 100 + ATOM_TREMONT 06_96H 1 101 + ATOM_TREMONT_L 06_9CH 0 102 + KABYLAKE 06_9EH 9 to 13 103 + COMETLAKE 06_A5H 2,3,5 104 + COMETLAKE_L 06_A6H 0,1 105 + ROCKETLAKE 06_A7H 1 106 + =================== ============ ========= 107 + 108 + If a CPU is in the affected processor list, but not affected by a variant, it 109 + is indicated by new bits in MSR IA32_ARCH_CAPABILITIES. As described in a later 110 + section, mitigation largely remains the same for all the variants, i.e. to 111 + clear the CPU fill buffers via VERW instruction. 112 + 113 + New bits in MSRs 114 + ================ 115 + Newer processors and microcode update on existing affected processors added new 116 + bits to IA32_ARCH_CAPABILITIES MSR. These bits can be used to enumerate 117 + specific variants of Processor MMIO Stale Data vulnerabilities and mitigation 118 + capability. 119 + 120 + MSR IA32_ARCH_CAPABILITIES 121 + -------------------------- 122 + Bit 13 - SBDR_SSDP_NO - When set, processor is not affected by either the 123 + Shared Buffers Data Read (SBDR) vulnerability or the sideband stale 124 + data propagator (SSDP). 125 + Bit 14 - FBSDP_NO - When set, processor is not affected by the Fill Buffer 126 + Stale Data Propagator (FBSDP). 127 + Bit 15 - PSDP_NO - When set, processor is not affected by Primary Stale Data 128 + Propagator (PSDP). 129 + Bit 17 - FB_CLEAR - When set, VERW instruction will overwrite CPU fill buffer 130 + values as part of MD_CLEAR operations. Processors that do not 131 + enumerate MDS_NO (meaning they are affected by MDS) but that do 132 + enumerate support for both L1D_FLUSH and MD_CLEAR implicitly enumerate 133 + FB_CLEAR as part of their MD_CLEAR support. 134 + Bit 18 - FB_CLEAR_CTRL - Processor supports read and write to MSR 135 + IA32_MCU_OPT_CTRL[FB_CLEAR_DIS]. On such processors, the FB_CLEAR_DIS 136 + bit can be set to cause the VERW instruction to not perform the 137 + FB_CLEAR action. Not all processors that support FB_CLEAR will support 138 + FB_CLEAR_CTRL. 139 + 140 + MSR IA32_MCU_OPT_CTRL 141 + --------------------- 142 + Bit 3 - FB_CLEAR_DIS - When set, VERW instruction does not perform the FB_CLEAR 143 + action. This may be useful to reduce the performance impact of FB_CLEAR in 144 + cases where system software deems it warranted (for example, when performance 145 + is more critical, or the untrusted software has no MMIO access). Note that 146 + FB_CLEAR_DIS has no impact on enumeration (for example, it does not change 147 + FB_CLEAR or MD_CLEAR enumeration) and it may not be supported on all processors 148 + that enumerate FB_CLEAR. 149 + 150 + Mitigation 151 + ========== 152 + Like MDS, all variants of Processor MMIO Stale Data vulnerabilities have the 153 + same mitigation strategy to force the CPU to clear the affected buffers before 154 + an attacker can extract the secrets. 155 + 156 + This is achieved by using the otherwise unused and obsolete VERW instruction in 157 + combination with a microcode update. The microcode clears the affected CPU 158 + buffers when the VERW instruction is executed. 159 + 160 + Kernel reuses the MDS function to invoke the buffer clearing: 161 + 162 + mds_clear_cpu_buffers() 163 + 164 + On MDS affected CPUs, the kernel already invokes CPU buffer clear on 165 + kernel/userspace, hypervisor/guest and C-state (idle) transitions. No 166 + additional mitigation is needed on such CPUs. 167 + 168 + For CPUs not affected by MDS or TAA, mitigation is needed only for the attacker 169 + with MMIO capability. Therefore, VERW is not required for kernel/userspace. For 170 + virtualization case, VERW is only needed at VMENTER for a guest with MMIO 171 + capability. 172 + 173 + Mitigation points 174 + ----------------- 175 + Return to user space 176 + ^^^^^^^^^^^^^^^^^^^^ 177 + Same mitigation as MDS when affected by MDS/TAA, otherwise no mitigation 178 + needed. 179 + 180 + C-State transition 181 + ^^^^^^^^^^^^^^^^^^ 182 + Control register writes by CPU during C-state transition can propagate data 183 + from fill buffer to uncore buffers. Execute VERW before C-state transition to 184 + clear CPU fill buffers. 185 + 186 + Guest entry point 187 + ^^^^^^^^^^^^^^^^^ 188 + Same mitigation as MDS when processor is also affected by MDS/TAA, otherwise 189 + execute VERW at VMENTER only for MMIO capable guests. On CPUs not affected by 190 + MDS/TAA, guest without MMIO access cannot extract secrets using Processor MMIO 191 + Stale Data vulnerabilities, so there is no need to execute VERW for such guests. 192 + 193 + Mitigation control on the kernel command line 194 + --------------------------------------------- 195 + The kernel command line allows to control the Processor MMIO Stale Data 196 + mitigations at boot time with the option "mmio_stale_data=". The valid 197 + arguments for this option are: 198 + 199 + ========== ================================================================= 200 + full If the CPU is vulnerable, enable mitigation; CPU buffer clearing 201 + on exit to userspace and when entering a VM. Idle transitions are 202 + protected as well. It does not automatically disable SMT. 203 + full,nosmt Same as full, with SMT disabled on vulnerable CPUs. This is the 204 + complete mitigation. 205 + off Disables mitigation completely. 206 + ========== ================================================================= 207 + 208 + If the CPU is affected and mmio_stale_data=off is not supplied on the kernel 209 + command line, then the kernel selects the appropriate mitigation. 210 + 211 + Mitigation status information 212 + ----------------------------- 213 + The Linux kernel provides a sysfs interface to enumerate the current 214 + vulnerability status of the system: whether the system is vulnerable, and 215 + which mitigations are active. The relevant sysfs file is: 216 + 217 + /sys/devices/system/cpu/vulnerabilities/mmio_stale_data 218 + 219 + The possible values in this file are: 220 + 221 + .. list-table:: 222 + 223 + * - 'Not affected' 224 + - The processor is not vulnerable 225 + * - 'Vulnerable' 226 + - The processor is vulnerable, but no mitigation enabled 227 + * - 'Vulnerable: Clear CPU buffers attempted, no microcode' 228 + - The processor is vulnerable, but microcode is not updated. The 229 + mitigation is enabled on a best effort basis. 230 + * - 'Mitigation: Clear CPU buffers' 231 + - The processor is vulnerable and the CPU buffer clearing mitigation is 232 + enabled. 233 + 234 + If the processor is vulnerable then the following information is appended to 235 + the above information: 236 + 237 + ======================== =========================================== 238 + 'SMT vulnerable' SMT is enabled 239 + 'SMT disabled' SMT is disabled 240 + 'SMT Host state unknown' Kernel runs in a VM, Host SMT state unknown 241 + ======================== =========================================== 242 + 243 + References 244 + ---------- 245 + .. [#f1] Affected Processors 246 + https://www.intel.com/content/www/us/en/developer/topic-technology/software-security-guidance/processors-affected-consolidated-product-cpu-model.html

+36 -1

Documentation/admin-guide/kernel-parameters.txt

··· 2469 2469 2470 2470 protected: nVHE-based mode with support for guests whose 2471 2471 state is kept private from the host. 2472 - Not valid if the kernel is running in EL2. 2473 2472 2474 2473 Defaults to VHE/nVHE based on hardware support. Setting 2475 2474 mode to "protected" will disable kexec and hibernation ··· 3175 3176 srbds=off [X86,INTEL] 3176 3177 no_entry_flush [PPC] 3177 3178 no_uaccess_flush [PPC] 3179 + mmio_stale_data=off [X86] 3178 3180 3179 3181 Exceptions: 3180 3182 This does not have any effect on ··· 3197 3197 Equivalent to: l1tf=flush,nosmt [X86] 3198 3198 mds=full,nosmt [X86] 3199 3199 tsx_async_abort=full,nosmt [X86] 3200 + mmio_stale_data=full,nosmt [X86] 3200 3201 3201 3202 mminit_loglevel= 3202 3203 [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this ··· 3206 3205 of 0 disables mminit logging and a level of 4 will 3207 3206 log everything. Information is printed at KERN_DEBUG 3208 3207 so loglevel=8 may also need to be specified. 3208 + 3209 + mmio_stale_data= 3210 + [X86,INTEL] Control mitigation for the Processor 3211 + MMIO Stale Data vulnerabilities. 3212 + 3213 + Processor MMIO Stale Data is a class of 3214 + vulnerabilities that may expose data after an MMIO 3215 + operation. Exposed data could originate or end in 3216 + the same CPU buffers as affected by MDS and TAA. 3217 + Therefore, similar to MDS and TAA, the mitigation 3218 + is to clear the affected CPU buffers. 3219 + 3220 + This parameter controls the mitigation. The 3221 + options are: 3222 + 3223 + full - Enable mitigation on vulnerable CPUs 3224 + 3225 + full,nosmt - Enable mitigation and disable SMT on 3226 + vulnerable CPUs. 3227 + 3228 + off - Unconditionally disable mitigation 3229 + 3230 + On MDS or TAA affected machines, 3231 + mmio_stale_data=off can be prevented by an active 3232 + MDS or TAA mitigation as these vulnerabilities are 3233 + mitigated with the same mechanism so in order to 3234 + disable this mitigation, you need to specify 3235 + mds=off and tsx_async_abort=off too. 3236 + 3237 + Not specifying this option is equivalent to 3238 + mmio_stale_data=full. 3239 + 3240 + For details see: 3241 + Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst 3209 3242 3210 3243 module.sig_enforce 3211 3244 [KNL] When CONFIG_MODULE_SIG is set, this means that

+17 -16

Documentation/filesystems/netfs_library.rst

··· 79 79 provided. Firstly, a function to perform basic initialisation on a context and 80 80 set the operations table pointer:: 81 81 82 - void netfs_inode_init(struct inode *inode, 82 + void netfs_inode_init(struct netfs_inode *ctx, 83 83 const struct netfs_request_ops *ops); 84 84 85 85 then a function to cast from the VFS inode structure to the netfs context:: ··· 89 89 and finally, a function to get the cache cookie pointer from the context 90 90 attached to an inode (or NULL if fscache is disabled):: 91 91 92 - struct fscache_cookie *netfs_i_cookie(struct inode *inode); 92 + struct fscache_cookie *netfs_i_cookie(struct netfs_inode *ctx); 93 93 94 94 95 95 Buffered Read Helpers ··· 136 136 137 137 void netfs_readahead(struct readahead_control *ractl); 138 138 int netfs_read_folio(struct file *file, 139 - struct folio *folio); 140 - int netfs_write_begin(struct file *file, 139 + struct folio *folio); 140 + int netfs_write_begin(struct netfs_inode *ctx, 141 + struct file *file, 141 142 struct address_space *mapping, 142 143 loff_t pos, 143 144 unsigned int len, ··· 158 157 through the suppplied table of operations. Waits will be performed as 159 158 necessary before returning for helpers that are meant to be synchronous. 160 159 161 - If an error occurs and netfs_priv is non-NULL, ops->cleanup() will be called to 162 - deal with it. If some parts of the request are in progress when an error 163 - occurs, the request will get partially completed if sufficient data is read. 160 + If an error occurs, the ->free_request() will be called to clean up the 161 + netfs_io_request struct allocated. If some parts of the request are in 162 + progress when an error occurs, the request will get partially completed if 163 + sufficient data is read. 164 164 165 165 Additionally, there is:: 166 166 ··· 209 207 * ``netfs_priv`` 210 208 211 209 The network filesystem's private data. The value for this can be passed in 212 - to the helper functions or set during the request. The ->cleanup() op will 213 - be called if this is non-NULL at the end. 210 + to the helper functions or set during the request. 214 211 215 212 * ``start`` 216 213 * ``len`` ··· 294 293 295 294 struct netfs_request_ops { 296 295 void (*init_request)(struct netfs_io_request *rreq, struct file *file); 296 + void (*free_request)(struct netfs_io_request *rreq); 297 297 int (*begin_cache_operation)(struct netfs_io_request *rreq); 298 298 void (*expand_readahead)(struct netfs_io_request *rreq); 299 299 bool (*clamp_length)(struct netfs_io_subrequest *subreq); ··· 303 301 int (*check_write_begin)(struct file *file, loff_t pos, unsigned len, 304 302 struct folio *folio, void **_fsdata); 305 303 void (*done)(struct netfs_io_request *rreq); 306 - void (*cleanup)(struct address_space *mapping, void *netfs_priv); 307 304 }; 308 305 309 306 The operations are as follows: ··· 310 309 * ``init_request()`` 311 310 312 311 [Optional] This is called to initialise the request structure. It is given 313 - the file for reference and can modify the ->netfs_priv value. 312 + the file for reference. 313 + 314 + * ``free_request()`` 315 + 316 + [Optional] This is called as the request is being deallocated so that the 317 + filesystem can clean up any state it has attached there. 314 318 315 319 * ``begin_cache_operation()`` 316 320 ··· 388 382 389 383 [Optional] This is called after the folios in the request have all been 390 384 unlocked (and marked uptodate if applicable). 391 - 392 - * ``cleanup`` 393 - 394 - [Optional] This is called as the request is being deallocated so that the 395 - filesystem can clean up ->netfs_priv. 396 385 397 386 398 387

+37

Documentation/networking/ip-sysctl.rst

··· 2925 2925 2926 2926 Default: 0 2927 2927 2928 + reconf_enable - BOOLEAN 2929 + Enable or disable extension of Stream Reconfiguration functionality 2930 + specified in RFC6525. This extension provides the ability to "reset" 2931 + a stream, and it includes the Parameters of "Outgoing/Incoming SSN 2932 + Reset", "SSN/TSN Reset" and "Add Outgoing/Incoming Streams". 2933 + 2934 + - 1: Enable extension. 2935 + - 0: Disable extension. 2936 + 2937 + Default: 0 2938 + 2939 + intl_enable - BOOLEAN 2940 + Enable or disable extension of User Message Interleaving functionality 2941 + specified in RFC8260. This extension allows the interleaving of user 2942 + messages sent on different streams. With this feature enabled, I-DATA 2943 + chunk will replace DATA chunk to carry user messages if also supported 2944 + by the peer. Note that to use this feature, one needs to set this option 2945 + to 1 and also needs to set socket options SCTP_FRAGMENT_INTERLEAVE to 2 2946 + and SCTP_INTERLEAVING_SUPPORTED to 1. 2947 + 2948 + - 1: Enable extension. 2949 + - 0: Disable extension. 2950 + 2951 + Default: 0 2952 + 2953 + ecn_enable - BOOLEAN 2954 + Control use of Explicit Congestion Notification (ECN) by SCTP. 2955 + Like in TCP, ECN is used only when both ends of the SCTP connection 2956 + indicate support for it. This feature is useful in avoiding losses 2957 + due to congestion by allowing supporting routers to signal congestion 2958 + before having to drop packets. 2959 + 2960 + 1: Enable ecn. 2961 + 0: Disable ecn. 2962 + 2963 + Default: 1 2964 + 2928 2965 2929 2966 ``/proc/sys/net/core/*`` 2930 2967 ========================

+1 -1

Documentation/networking/phy.rst

··· 104 104 105 105 * PHY device drivers in PHYLIB being reusable by nature, being able to 106 106 configure correctly a specified delay enables more designs with similar delay 107 - requirements to be operate correctly 107 + requirements to be operated correctly 108 108 109 109 For cases where the PHY is not capable of providing this delay, but the 110 110 Ethernet MAC driver is capable of doing so, the correct phy_interface_t value

+12

Documentation/process/changes.rst

··· 32 32 GNU C 5.1 gcc --version 33 33 Clang/LLVM (optional) 11.0.0 clang --version 34 34 GNU make 3.81 make --version 35 + bash 4.2 bash --version 35 36 binutils 2.23 ld -v 36 37 flex 2.5.35 flex --version 37 38 bison 2.0 bison --version ··· 84 83 ---- 85 84 86 85 You will need GNU make 3.81 or later to build the kernel. 86 + 87 + Bash 88 + ---- 89 + 90 + Some bash scripts are used for the kernel build. 91 + Bash 4.2 or newer is needed. 87 92 88 93 Binutils 89 94 -------- ··· 368 361 ---- 369 362 370 363 - <ftp://ftp.gnu.org/gnu/make/> 364 + 365 + Bash 366 + ---- 367 + 368 + - <ftp://ftp.gnu.org/gnu/bash/> 371 369 372 370 Binutils 373 371 --------

+3 -1

MAINTAINERS

··· 7653 7653 7654 7654 FILE LOCKING (flock() and fcntl()/lockf()) 7655 7655 M: Jeff Layton <jlayton@kernel.org> 7656 + M: Chuck Lever <chuck.lever@oracle.com> 7656 7657 L: linux-fsdevel@vger.kernel.org 7657 7658 S: Maintained 7658 7659 F: fs/fcntl.c ··· 10746 10745 10747 10746 KERNEL NFSD, SUNRPC, AND LOCKD SERVERS 10748 10747 M: Chuck Lever <chuck.lever@oracle.com> 10748 + M: Jeff Layton <jlayton@kernel.org> 10749 10749 L: linux-nfs@vger.kernel.org 10750 10750 S: Supported 10751 10751 W: http://nfs.sourceforge.net/ ··· 10871 10869 F: arch/riscv/include/uapi/asm/kvm* 10872 10870 F: arch/riscv/kvm/ 10873 10871 F: tools/testing/selftests/kvm/*/riscv/ 10874 - F: tools/testing/selftests/kvm/riscv/ 10875 10872 10876 10873 KERNEL VIRTUAL MACHINE for s390 (KVM/s390) 10877 10874 M: Christian Borntraeger <borntraeger@linux.ibm.com> ··· 13799 13798 F: Documentation/devicetree/bindings/net/ 13800 13799 F: drivers/connector/ 13801 13800 F: drivers/net/ 13801 + F: include/dt-bindings/net/ 13802 13802 F: include/linux/etherdevice.h 13803 13803 F: include/linux/fcdevice.h 13804 13804 F: include/linux/fddidevice.h

+1 -1

Makefile

··· 2 2 VERSION = 5 3 3 PATCHLEVEL = 19 4 4 SUBLEVEL = 0 5 - EXTRAVERSION = -rc1 5 + EXTRAVERSION = -rc2 6 6 NAME = Superb Owl 7 7 8 8 # *DOCUMENTATION*

+5

arch/arm/boot/dts/at91-sama5d3_ksz9477_evb.dts

··· 120 120 port@0 { 121 121 reg = <0>; 122 122 label = "lan1"; 123 + phy-mode = "internal"; 123 124 }; 124 125 125 126 port@1 { 126 127 reg = <1>; 127 128 label = "lan2"; 129 + phy-mode = "internal"; 128 130 }; 129 131 130 132 port@2 { 131 133 reg = <2>; 132 134 label = "lan3"; 135 + phy-mode = "internal"; 133 136 }; 134 137 135 138 port@3 { 136 139 reg = <3>; 137 140 label = "lan4"; 141 + phy-mode = "internal"; 138 142 }; 139 143 140 144 port@4 { 141 145 reg = <4>; 142 146 label = "lan5"; 147 + phy-mode = "internal"; 143 148 }; 144 149 145 150 port@5 {

-5

arch/arm64/include/asm/kvm_host.h

··· 363 363 struct kvm_pmu pmu; 364 364 365 365 /* 366 - * Anything that is not used directly from assembly code goes 367 - * here. 368 - */ 369 - 370 - /* 371 366 * Guest registers we preserve during guest debugging. 372 367 * 373 368 * These shadow registers are updated by the kvm_handle_sys_reg

+3

arch/arm64/include/asm/virt.h

··· 113 113 /* 114 114 * Code only run in VHE/NVHE hyp context can assume VHE is present or 115 115 * absent. Otherwise fall back to caps. 116 + * This allows the compiler to discard VHE-specific code from the 117 + * nVHE object, reducing the number of external symbol references 118 + * needed to link. 116 119 */ 117 120 if (is_vhe_hyp_code()) 118 121 return true;

+1 -9

arch/arm64/kernel/cpufeature.c

··· 1974 1974 #ifdef CONFIG_KVM 1975 1975 static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, int __unused) 1976 1976 { 1977 - if (kvm_get_mode() != KVM_MODE_PROTECTED) 1978 - return false; 1979 - 1980 - if (is_kernel_in_hyp_mode()) { 1981 - pr_warn("Protected KVM not available with VHE\n"); 1982 - return false; 1983 - } 1984 - 1985 - return true; 1977 + return kvm_get_mode() == KVM_MODE_PROTECTED; 1986 1978 } 1987 1979 #endif /* CONFIG_KVM */ 1988 1980

+3

arch/arm64/kvm/arch_timer.c

··· 1230 1230 struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); 1231 1231 struct arch_timer_context *timer; 1232 1232 1233 + if (WARN(!vcpu, "No vcpu context!\n")) 1234 + return false; 1235 + 1233 1236 if (vintid == vcpu_vtimer(vcpu)->irq.irq) 1234 1237 timer = vcpu_vtimer(vcpu); 1235 1238 else if (vintid == vcpu_ptimer(vcpu)->irq.irq)

+8 -2

arch/arm64/kvm/arm.c

··· 150 150 if (ret) 151 151 goto out_free_stage2_pgd; 152 152 153 - if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL)) 153 + if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL)) { 154 + ret = -ENOMEM; 154 155 goto out_free_stage2_pgd; 156 + } 155 157 cpumask_copy(kvm->arch.supported_cpus, cpu_possible_mask); 156 158 157 159 kvm_vgic_early_init(kvm); ··· 2273 2271 return -EINVAL; 2274 2272 2275 2273 if (strcmp(arg, "protected") == 0) { 2276 - kvm_mode = KVM_MODE_PROTECTED; 2274 + if (!is_kernel_in_hyp_mode()) 2275 + kvm_mode = KVM_MODE_PROTECTED; 2276 + else 2277 + pr_warn_once("Protected KVM not available with VHE\n"); 2278 + 2277 2279 return 0; 2278 2280 } 2279 2281

+2

arch/arm64/kvm/fpsimd.c

··· 80 80 vcpu->arch.flags &= ~KVM_ARM64_FP_ENABLED; 81 81 vcpu->arch.flags |= KVM_ARM64_FP_HOST; 82 82 83 + vcpu->arch.flags &= ~KVM_ARM64_HOST_SVE_ENABLED; 83 84 if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN) 84 85 vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED; 85 86 ··· 94 93 * operations. Do this for ZA as well for now for simplicity. 95 94 */ 96 95 if (system_supports_sme()) { 96 + vcpu->arch.flags &= ~KVM_ARM64_HOST_SME_ENABLED; 97 97 if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN) 98 98 vcpu->arch.flags |= KVM_ARM64_HOST_SME_ENABLED; 99 99

-4

arch/arm64/kvm/hyp/nvhe/mem_protect.c

··· 314 314 int host_stage2_idmap_locked(phys_addr_t addr, u64 size, 315 315 enum kvm_pgtable_prot prot) 316 316 { 317 - hyp_assert_lock_held(&host_kvm.lock); 318 - 319 317 return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot); 320 318 } 321 319 322 320 int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id) 323 321 { 324 - hyp_assert_lock_held(&host_kvm.lock); 325 - 326 322 return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_kvm.pgt, 327 323 addr, size, &host_s2_pool, owner_id); 328 324 }

+34 -8

arch/arm64/kvm/hyp/nvhe/sys_regs.c

··· 243 243 case SYS_ID_AA64MMFR2_EL1: 244 244 return get_pvm_id_aa64mmfr2(vcpu); 245 245 default: 246 - /* 247 - * Should never happen because all cases are covered in 248 - * pvm_sys_reg_descs[]. 249 - */ 250 - WARN_ON(1); 251 - break; 246 + /* Unhandled ID register, RAZ */ 247 + return 0; 252 248 } 253 - 254 - return 0; 255 249 } 256 250 257 251 static u64 read_id_reg(const struct kvm_vcpu *vcpu, ··· 326 332 /* Mark the specified system register as an AArch64 feature id register. */ 327 333 #define AARCH64(REG) { SYS_DESC(REG), .access = pvm_access_id_aarch64 } 328 334 335 + /* 336 + * sys_reg_desc initialiser for architecturally unallocated cpufeature ID 337 + * register with encoding Op0=3, Op1=0, CRn=0, CRm=crm, Op2=op2 338 + * (1 <= crm < 8, 0 <= Op2 < 8). 339 + */ 340 + #define ID_UNALLOCATED(crm, op2) { \ 341 + Op0(3), Op1(0), CRn(0), CRm(crm), Op2(op2), \ 342 + .access = pvm_access_id_aarch64, \ 343 + } 344 + 329 345 /* Mark the specified system register as Read-As-Zero/Write-Ignored */ 330 346 #define RAZ_WI(REG) { SYS_DESC(REG), .access = pvm_access_raz_wi } 331 347 ··· 379 375 AARCH32(SYS_MVFR0_EL1), 380 376 AARCH32(SYS_MVFR1_EL1), 381 377 AARCH32(SYS_MVFR2_EL1), 378 + ID_UNALLOCATED(3,3), 382 379 AARCH32(SYS_ID_PFR2_EL1), 383 380 AARCH32(SYS_ID_DFR1_EL1), 384 381 AARCH32(SYS_ID_MMFR5_EL1), 382 + ID_UNALLOCATED(3,7), 385 383 386 384 /* AArch64 ID registers */ 387 385 /* CRm=4 */ 388 386 AARCH64(SYS_ID_AA64PFR0_EL1), 389 387 AARCH64(SYS_ID_AA64PFR1_EL1), 388 + ID_UNALLOCATED(4,2), 389 + ID_UNALLOCATED(4,3), 390 390 AARCH64(SYS_ID_AA64ZFR0_EL1), 391 + ID_UNALLOCATED(4,5), 392 + ID_UNALLOCATED(4,6), 393 + ID_UNALLOCATED(4,7), 391 394 AARCH64(SYS_ID_AA64DFR0_EL1), 392 395 AARCH64(SYS_ID_AA64DFR1_EL1), 396 + ID_UNALLOCATED(5,2), 397 + ID_UNALLOCATED(5,3), 393 398 AARCH64(SYS_ID_AA64AFR0_EL1), 394 399 AARCH64(SYS_ID_AA64AFR1_EL1), 400 + ID_UNALLOCATED(5,6), 401 + ID_UNALLOCATED(5,7), 395 402 AARCH64(SYS_ID_AA64ISAR0_EL1), 396 403 AARCH64(SYS_ID_AA64ISAR1_EL1), 404 + AARCH64(SYS_ID_AA64ISAR2_EL1), 405 + ID_UNALLOCATED(6,3), 406 + ID_UNALLOCATED(6,4), 407 + ID_UNALLOCATED(6,5), 408 + ID_UNALLOCATED(6,6), 409 + ID_UNALLOCATED(6,7), 397 410 AARCH64(SYS_ID_AA64MMFR0_EL1), 398 411 AARCH64(SYS_ID_AA64MMFR1_EL1), 399 412 AARCH64(SYS_ID_AA64MMFR2_EL1), 413 + ID_UNALLOCATED(7,3), 414 + ID_UNALLOCATED(7,4), 415 + ID_UNALLOCATED(7,5), 416 + ID_UNALLOCATED(7,6), 417 + ID_UNALLOCATED(7,7), 400 418 401 419 /* Scalable Vector Registers are restricted. */ 402 420

+2 -2

arch/arm64/kvm/vgic/vgic-mmio-v2.c

··· 429 429 VGIC_ACCESS_32bit), 430 430 REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_SET, 431 431 vgic_mmio_read_pending, vgic_mmio_write_spending, 432 - NULL, vgic_uaccess_write_spending, 1, 432 + vgic_uaccess_read_pending, vgic_uaccess_write_spending, 1, 433 433 VGIC_ACCESS_32bit), 434 434 REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_CLEAR, 435 435 vgic_mmio_read_pending, vgic_mmio_write_cpending, 436 - NULL, vgic_uaccess_write_cpending, 1, 436 + vgic_uaccess_read_pending, vgic_uaccess_write_cpending, 1, 437 437 VGIC_ACCESS_32bit), 438 438 REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_SET, 439 439 vgic_mmio_read_active, vgic_mmio_write_sactive,

+2 -38

arch/arm64/kvm/vgic/vgic-mmio-v3.c

··· 353 353 return 0; 354 354 } 355 355 356 - static unsigned long vgic_v3_uaccess_read_pending(struct kvm_vcpu *vcpu, 357 - gpa_t addr, unsigned int len) 358 - { 359 - u32 intid = VGIC_ADDR_TO_INTID(addr, 1); 360 - u32 value = 0; 361 - int i; 362 - 363 - /* 364 - * pending state of interrupt is latched in pending_latch variable. 365 - * Userspace will save and restore pending state and line_level 366 - * separately. 367 - * Refer to Documentation/virt/kvm/devices/arm-vgic-v3.rst 368 - * for handling of ISPENDR and ICPENDR. 369 - */ 370 - for (i = 0; i < len * 8; i++) { 371 - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); 372 - bool state = irq->pending_latch; 373 - 374 - if (irq->hw && vgic_irq_is_sgi(irq->intid)) { 375 - int err; 376 - 377 - err = irq_get_irqchip_state(irq->host_irq, 378 - IRQCHIP_STATE_PENDING, 379 - &state); 380 - WARN_ON(err); 381 - } 382 - 383 - if (state) 384 - value |= (1U << i); 385 - 386 - vgic_put_irq(vcpu->kvm, irq); 387 - } 388 - 389 - return value; 390 - } 391 - 392 356 static int vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu, 393 357 gpa_t addr, unsigned int len, 394 358 unsigned long val) ··· 630 666 VGIC_ACCESS_32bit), 631 667 REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISPENDR, 632 668 vgic_mmio_read_pending, vgic_mmio_write_spending, 633 - vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 1, 669 + vgic_uaccess_read_pending, vgic_v3_uaccess_write_pending, 1, 634 670 VGIC_ACCESS_32bit), 635 671 REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICPENDR, 636 672 vgic_mmio_read_pending, vgic_mmio_write_cpending, ··· 714 750 VGIC_ACCESS_32bit), 715 751 REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISPENDR0, 716 752 vgic_mmio_read_pending, vgic_mmio_write_spending, 717 - vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4, 753 + vgic_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4, 718 754 VGIC_ACCESS_32bit), 719 755 REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICPENDR0, 720 756 vgic_mmio_read_pending, vgic_mmio_write_cpending,

+36 -4

arch/arm64/kvm/vgic/vgic-mmio.c

··· 226 226 return 0; 227 227 } 228 228 229 - unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, 230 - gpa_t addr, unsigned int len) 229 + static unsigned long __read_pending(struct kvm_vcpu *vcpu, 230 + gpa_t addr, unsigned int len, 231 + bool is_user) 231 232 { 232 233 u32 intid = VGIC_ADDR_TO_INTID(addr, 1); 233 234 u32 value = 0; ··· 240 239 unsigned long flags; 241 240 bool val; 242 241 242 + /* 243 + * When used from userspace with a GICv3 model: 244 + * 245 + * Pending state of interrupt is latched in pending_latch 246 + * variable. Userspace will save and restore pending state 247 + * and line_level separately. 248 + * Refer to Documentation/virt/kvm/devices/arm-vgic-v3.rst 249 + * for handling of ISPENDR and ICPENDR. 250 + */ 243 251 raw_spin_lock_irqsave(&irq->irq_lock, flags); 244 252 if (irq->hw && vgic_irq_is_sgi(irq->intid)) { 245 253 int err; ··· 258 248 IRQCHIP_STATE_PENDING, 259 249 &val); 260 250 WARN_RATELIMIT(err, "IRQ %d", irq->host_irq); 261 - } else if (vgic_irq_is_mapped_level(irq)) { 251 + } else if (!is_user && vgic_irq_is_mapped_level(irq)) { 262 252 val = vgic_get_phys_line_level(irq); 263 253 } else { 264 - val = irq_is_pending(irq); 254 + switch (vcpu->kvm->arch.vgic.vgic_model) { 255 + case KVM_DEV_TYPE_ARM_VGIC_V3: 256 + if (is_user) { 257 + val = irq->pending_latch; 258 + break; 259 + } 260 + fallthrough; 261 + default: 262 + val = irq_is_pending(irq); 263 + break; 264 + } 265 265 } 266 266 267 267 value |= ((u32)val << i); ··· 281 261 } 282 262 283 263 return value; 264 + } 265 + 266 + unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, 267 + gpa_t addr, unsigned int len) 268 + { 269 + return __read_pending(vcpu, addr, len, false); 270 + } 271 + 272 + unsigned long vgic_uaccess_read_pending(struct kvm_vcpu *vcpu, 273 + gpa_t addr, unsigned int len) 274 + { 275 + return __read_pending(vcpu, addr, len, true); 284 276 } 285 277 286 278 static bool is_vgic_v2_sgi(struct kvm_vcpu *vcpu, struct vgic_irq *irq)

+3

arch/arm64/kvm/vgic/vgic-mmio.h

··· 149 149 unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, 150 150 gpa_t addr, unsigned int len); 151 151 152 + unsigned long vgic_uaccess_read_pending(struct kvm_vcpu *vcpu, 153 + gpa_t addr, unsigned int len); 154 + 152 155 void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, 153 156 gpa_t addr, unsigned int len, 154 157 unsigned long val);

+1 -1

arch/arm64/kvm/vmid.c

··· 66 66 * the next context-switch, we broadcast TLB flush + I-cache 67 67 * invalidation over the inner shareable domain on rollover. 68 68 */ 69 - kvm_call_hyp(__kvm_flush_vm_context); 69 + kvm_call_hyp(__kvm_flush_vm_context); 70 70 } 71 71 72 72 static bool check_update_reserved_vmid(u64 vmid, u64 newvmid)

+1

arch/loongarch/Kconfig

··· 343 343 344 344 config NUMA 345 345 bool "NUMA Support" 346 + select SMP 346 347 select ACPI_NUMA if ACPI 347 348 help 348 349 Say Y to compile the kernel with NUMA (Non-Uniform Memory Access)

+1 -1

arch/loongarch/include/asm/hardirq.h

··· 19 19 unsigned int __softirq_pending; 20 20 } ____cacheline_aligned irq_cpustat_t; 21 21 22 - DECLARE_PER_CPU_ALIGNED(irq_cpustat_t, irq_stat); 22 + DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); 23 23 24 24 #define __ARCH_IRQ_STAT 25 25

+1

arch/loongarch/include/asm/percpu.h

··· 6 6 #define __ASM_PERCPU_H 7 7 8 8 #include <asm/cmpxchg.h> 9 + #include <asm/loongarch.h> 9 10 10 11 /* Use r21 for fast access */ 11 12 register unsigned long __my_cpu_offset __asm__("$r21");

+7 -16

arch/loongarch/include/asm/smp.h

··· 9 9 #include <linux/atomic.h> 10 10 #include <linux/bitops.h> 11 11 #include <linux/linkage.h> 12 - #include <linux/smp.h> 13 12 #include <linux/threads.h> 14 13 #include <linux/cpumask.h> 14 + 15 + extern int smp_num_siblings; 16 + extern int num_processors; 17 + extern int disabled_cpus; 18 + extern cpumask_t cpu_sibling_map[]; 19 + extern cpumask_t cpu_core_map[]; 20 + extern cpumask_t cpu_foreign_map[]; 15 21 16 22 void loongson3_smp_setup(void); 17 23 void loongson3_prepare_cpus(unsigned int max_cpus); ··· 31 25 void loongson3_cpu_die(unsigned int cpu); 32 26 #endif 33 27 34 - #ifdef CONFIG_SMP 35 - 36 28 static inline void plat_smp_setup(void) 37 29 { 38 30 loongson3_smp_setup(); 39 31 } 40 - 41 - #else /* !CONFIG_SMP */ 42 - 43 - static inline void plat_smp_setup(void) { } 44 - 45 - #endif /* !CONFIG_SMP */ 46 - 47 - extern int smp_num_siblings; 48 - extern int num_processors; 49 - extern int disabled_cpus; 50 - extern cpumask_t cpu_sibling_map[]; 51 - extern cpumask_t cpu_core_map[]; 52 - extern cpumask_t cpu_foreign_map[]; 53 32 54 33 static inline int raw_smp_processor_id(void) 55 34 {

-7

arch/loongarch/include/asm/timex.h

··· 12 12 #include <asm/cpu.h> 13 13 #include <asm/cpu-features.h> 14 14 15 - /* 16 - * Standard way to access the cycle counter. 17 - * Currently only used on SMP for scheduling. 18 - * 19 - * We know that all SMP capable CPUs have cycle counters. 20 - */ 21 - 22 15 typedef unsigned long cycles_t; 23 16 24 17 #define get_cycles get_cycles

+4

arch/loongarch/kernel/acpi.c

··· 138 138 } 139 139 } 140 140 141 + #ifdef CONFIG_SMP 141 142 static int set_processor_mask(u32 id, u32 flags) 142 143 { 143 144 ··· 167 166 168 167 return cpu; 169 168 } 169 + #endif 170 170 171 171 static void __init acpi_process_madt(void) 172 172 { 173 + #ifdef CONFIG_SMP 173 174 int i; 174 175 175 176 for (i = 0; i < NR_CPUS; i++) { 176 177 __cpu_number_map[i] = -1; 177 178 __cpu_logical_map[i] = -1; 178 179 } 180 + #endif 179 181 180 182 loongson_sysconf.nr_cpus = num_processors; 181 183 }

+1

arch/loongarch/kernel/cacheinfo.c

··· 4 4 * 5 5 * Copyright (C) 2020-2022 Loongson Technology Corporation Limited 6 6 */ 7 + #include <asm/cpu-info.h> 7 8 #include <linux/cacheinfo.h> 8 9 9 10 /* Populates leaf and increments to next leaf */

+6 -1

arch/loongarch/kernel/irq.c

··· 22 22 #include <asm/setup.h> 23 23 24 24 DEFINE_PER_CPU(unsigned long, irq_stack); 25 + DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); 26 + EXPORT_PER_CPU_SYMBOL(irq_stat); 25 27 26 28 struct irq_domain *cpu_domain; 27 29 struct irq_domain *liointc_domain; ··· 58 56 59 57 void __init init_IRQ(void) 60 58 { 61 - int i, r, ipi_irq; 59 + int i; 60 + #ifdef CONFIG_SMP 61 + int r, ipi_irq; 62 62 static int ipi_dummy_dev; 63 + #endif 63 64 unsigned int order = get_order(IRQ_STACK_SIZE); 64 65 struct page *page; 65 66

+8 -6

arch/loongarch/kernel/process.c

··· 120 120 /* 121 121 * Copy architecture-specific thread state 122 122 */ 123 - int copy_thread(unsigned long clone_flags, unsigned long usp, 124 - unsigned long kthread_arg, struct task_struct *p, unsigned long tls) 123 + int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) 125 124 { 126 125 unsigned long childksp; 126 + unsigned long tls = args->tls; 127 + unsigned long usp = args->stack; 128 + unsigned long clone_flags = args->flags; 127 129 struct pt_regs *childregs, *regs = current_pt_regs(); 128 130 129 131 childksp = (unsigned long)task_stack_page(p) + THREAD_SIZE - 32; ··· 138 136 p->thread.csr_crmd = csr_read32(LOONGARCH_CSR_CRMD); 139 137 p->thread.csr_prmd = csr_read32(LOONGARCH_CSR_PRMD); 140 138 p->thread.csr_ecfg = csr_read32(LOONGARCH_CSR_ECFG); 141 - if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) { 139 + if (unlikely(args->fn)) { 142 140 /* kernel thread */ 143 - p->thread.reg23 = usp; /* fn */ 144 - p->thread.reg24 = kthread_arg; 145 141 p->thread.reg03 = childksp; 146 - p->thread.reg01 = (unsigned long) ret_from_kernel_thread; 142 + p->thread.reg23 = (unsigned long)args->fn; 143 + p->thread.reg24 = (unsigned long)args->fn_arg; 144 + p->thread.reg01 = (unsigned long)ret_from_kernel_thread; 147 145 memset(childregs, 0, sizeof(struct pt_regs)); 148 146 childregs->csr_euen = p->thread.csr_euen; 149 147 childregs->csr_crmd = p->thread.csr_crmd;

+2 -3

arch/loongarch/kernel/setup.c

··· 39 39 #include <asm/pgalloc.h> 40 40 #include <asm/sections.h> 41 41 #include <asm/setup.h> 42 - #include <asm/smp.h> 43 42 #include <asm/time.h> 44 43 45 44 #define SMBIOS_BIOSSIZE_OFFSET 0x09 ··· 348 349 349 350 nr_cpu_ids = possible; 350 351 } 351 - #else 352 - static inline void prefill_possible_map(void) {} 353 352 #endif 354 353 355 354 void __init setup_arch(char **cmdline_p) ··· 364 367 arch_mem_init(cmdline_p); 365 368 366 369 resource_init(); 370 + #ifdef CONFIG_SMP 367 371 plat_smp_setup(); 368 372 prefill_possible_map(); 373 + #endif 369 374 370 375 paging_init(); 371 376 }

-2

arch/loongarch/kernel/smp.c

··· 66 66 67 67 struct secondary_data cpuboot_data; 68 68 static DEFINE_PER_CPU(int, cpu_state); 69 - DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); 70 - EXPORT_PER_CPU_SYMBOL(irq_stat); 71 69 72 70 enum ipi_msg_type { 73 71 IPI_RESCHEDULE,

+1 -1

arch/riscv/kvm/vmid.c

··· 97 97 * We ran out of VMIDs so we increment vmid_version and 98 98 * start assigning VMIDs from 1. 99 99 * 100 - * This also means existing VMIDs assignement to all Guest 100 + * This also means existing VMIDs assignment to all Guest 101 101 * instances is invalid and we have force VMID re-assignement 102 102 * for all Guest instances. The Guest instances that were not 103 103 * running will automatically pick-up new VMIDs because will

+6 -1

arch/um/drivers/virt-pci.c

··· 544 544 dev->cmd_vq = vqs[0]; 545 545 dev->irq_vq = vqs[1]; 546 546 547 + virtio_device_ready(dev->vdev); 548 + 547 549 for (i = 0; i < NUM_IRQ_MSGS; i++) { 548 550 void *msg = kzalloc(MAX_IRQ_MSG_SIZE, GFP_KERNEL); 549 551 ··· 589 587 dev->irq = irq_alloc_desc(numa_node_id()); 590 588 if (dev->irq < 0) { 591 589 err = dev->irq; 592 - goto error; 590 + goto err_reset; 593 591 } 594 592 um_pci_devices[free].dev = dev; 595 593 vdev->priv = dev; ··· 606 604 607 605 um_pci_rescan(); 608 606 return 0; 607 + err_reset: 608 + virtio_reset_device(vdev); 609 + vdev->config->del_vqs(vdev); 609 610 error: 610 611 mutex_unlock(&um_pci_mtx); 611 612 kfree(dev);

+1

arch/x86/include/asm/cpufeatures.h

··· 446 446 #define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */ 447 447 #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ 448 448 #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ 449 + #define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ 449 450 450 451 #endif /* _ASM_X86_CPUFEATURES_H */

+68 -5

arch/x86/include/asm/kvm_host.h

··· 1047 1047 }; 1048 1048 1049 1049 enum kvm_apicv_inhibit { 1050 + 1051 + /********************************************************************/ 1052 + /* INHIBITs that are relevant to both Intel's APICv and AMD's AVIC. */ 1053 + /********************************************************************/ 1054 + 1055 + /* 1056 + * APIC acceleration is disabled by a module parameter 1057 + * and/or not supported in hardware. 1058 + */ 1050 1059 APICV_INHIBIT_REASON_DISABLE, 1060 + 1061 + /* 1062 + * APIC acceleration is inhibited because AutoEOI feature is 1063 + * being used by a HyperV guest. 1064 + */ 1051 1065 APICV_INHIBIT_REASON_HYPERV, 1052 - APICV_INHIBIT_REASON_NESTED, 1053 - APICV_INHIBIT_REASON_IRQWIN, 1054 - APICV_INHIBIT_REASON_PIT_REINJ, 1055 - APICV_INHIBIT_REASON_X2APIC, 1056 - APICV_INHIBIT_REASON_BLOCKIRQ, 1066 + 1067 + /* 1068 + * APIC acceleration is inhibited because the userspace didn't yet 1069 + * enable the kernel/split irqchip. 1070 + */ 1057 1071 APICV_INHIBIT_REASON_ABSENT, 1072 + 1073 + /* APIC acceleration is inhibited because KVM_GUESTDBG_BLOCKIRQ 1074 + * (out of band, debug measure of blocking all interrupts on this vCPU) 1075 + * was enabled, to avoid AVIC/APICv bypassing it. 1076 + */ 1077 + APICV_INHIBIT_REASON_BLOCKIRQ, 1078 + 1079 + /* 1080 + * For simplicity, the APIC acceleration is inhibited 1081 + * first time either APIC ID or APIC base are changed by the guest 1082 + * from their reset values. 1083 + */ 1084 + APICV_INHIBIT_REASON_APIC_ID_MODIFIED, 1085 + APICV_INHIBIT_REASON_APIC_BASE_MODIFIED, 1086 + 1087 + /******************************************************/ 1088 + /* INHIBITs that are relevant only to the AMD's AVIC. */ 1089 + /******************************************************/ 1090 + 1091 + /* 1092 + * AVIC is inhibited on a vCPU because it runs a nested guest. 1093 + * 1094 + * This is needed because unlike APICv, the peers of this vCPU 1095 + * cannot use the doorbell mechanism to signal interrupts via AVIC when 1096 + * a vCPU runs nested. 1097 + */ 1098 + APICV_INHIBIT_REASON_NESTED, 1099 + 1100 + /* 1101 + * On SVM, the wait for the IRQ window is implemented with pending vIRQ, 1102 + * which cannot be injected when the AVIC is enabled, thus AVIC 1103 + * is inhibited while KVM waits for IRQ window. 1104 + */ 1105 + APICV_INHIBIT_REASON_IRQWIN, 1106 + 1107 + /* 1108 + * PIT (i8254) 're-inject' mode, relies on EOI intercept, 1109 + * which AVIC doesn't support for edge triggered interrupts. 1110 + */ 1111 + APICV_INHIBIT_REASON_PIT_REINJ, 1112 + 1113 + /* 1114 + * AVIC is inhibited because the guest has x2apic in its CPUID. 1115 + */ 1116 + APICV_INHIBIT_REASON_X2APIC, 1117 + 1118 + /* 1119 + * AVIC is disabled because SEV doesn't support it. 1120 + */ 1058 1121 APICV_INHIBIT_REASON_SEV, 1059 1122 }; 1060 1123

+25

arch/x86/include/asm/msr-index.h

··· 116 116 * Not susceptible to 117 117 * TSX Async Abort (TAA) vulnerabilities. 118 118 */ 119 + #define ARCH_CAP_SBDR_SSDP_NO BIT(13) /* 120 + * Not susceptible to SBDR and SSDP 121 + * variants of Processor MMIO stale data 122 + * vulnerabilities. 123 + */ 124 + #define ARCH_CAP_FBSDP_NO BIT(14) /* 125 + * Not susceptible to FBSDP variant of 126 + * Processor MMIO stale data 127 + * vulnerabilities. 128 + */ 129 + #define ARCH_CAP_PSDP_NO BIT(15) /* 130 + * Not susceptible to PSDP variant of 131 + * Processor MMIO stale data 132 + * vulnerabilities. 133 + */ 134 + #define ARCH_CAP_FB_CLEAR BIT(17) /* 135 + * VERW clears CPU fill buffer 136 + * even on MDS_NO CPUs. 137 + */ 138 + #define ARCH_CAP_FB_CLEAR_CTRL BIT(18) /* 139 + * MSR_IA32_MCU_OPT_CTRL[FB_CLEAR_DIS] 140 + * bit available to control VERW 141 + * behavior. 142 + */ 119 143 120 144 #define MSR_IA32_FLUSH_CMD 0x0000010b 121 145 #define L1D_FLUSH BIT(0) /* ··· 157 133 #define MSR_IA32_MCU_OPT_CTRL 0x00000123 158 134 #define RNGDS_MITG_DIS BIT(0) /* SRBDS support */ 159 135 #define RTM_ALLOW BIT(1) /* TSX development mode */ 136 + #define FB_CLEAR_DIS BIT(3) /* CPU Fill buffer clear disable */ 160 137 161 138 #define MSR_IA32_SYSENTER_CS 0x00000174 162 139 #define MSR_IA32_SYSENTER_ESP 0x00000175

+2

arch/x86/include/asm/nospec-branch.h

··· 269 269 270 270 DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); 271 271 272 + DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear); 273 + 272 274 #include <asm/segment.h> 273 275 274 276 /**

+199 -36

arch/x86/kernel/cpu/bugs.c

··· 41 41 static void __init ssb_select_mitigation(void); 42 42 static void __init l1tf_select_mitigation(void); 43 43 static void __init mds_select_mitigation(void); 44 - static void __init mds_print_mitigation(void); 44 + static void __init md_clear_update_mitigation(void); 45 + static void __init md_clear_select_mitigation(void); 45 46 static void __init taa_select_mitigation(void); 47 + static void __init mmio_select_mitigation(void); 46 48 static void __init srbds_select_mitigation(void); 47 49 static void __init l1d_flush_select_mitigation(void); 48 50 ··· 87 85 */ 88 86 DEFINE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush); 89 87 88 + /* Controls CPU Fill buffer clear before KVM guest MMIO accesses */ 89 + DEFINE_STATIC_KEY_FALSE(mmio_stale_data_clear); 90 + EXPORT_SYMBOL_GPL(mmio_stale_data_clear); 91 + 90 92 void __init check_bugs(void) 91 93 { 92 94 identify_boot_cpu(); ··· 123 117 spectre_v2_select_mitigation(); 124 118 ssb_select_mitigation(); 125 119 l1tf_select_mitigation(); 126 - mds_select_mitigation(); 127 - taa_select_mitigation(); 120 + md_clear_select_mitigation(); 128 121 srbds_select_mitigation(); 129 122 l1d_flush_select_mitigation(); 130 - 131 - /* 132 - * As MDS and TAA mitigations are inter-related, print MDS 133 - * mitigation until after TAA mitigation selection is done. 134 - */ 135 - mds_print_mitigation(); 136 123 137 124 arch_smt_update(); 138 125 ··· 266 267 } 267 268 } 268 269 269 - static void __init mds_print_mitigation(void) 270 - { 271 - if (!boot_cpu_has_bug(X86_BUG_MDS) || cpu_mitigations_off()) 272 - return; 273 - 274 - pr_info("%s\n", mds_strings[mds_mitigation]); 275 - } 276 - 277 270 static int __init mds_cmdline(char *str) 278 271 { 279 272 if (!boot_cpu_has_bug(X86_BUG_MDS)) ··· 320 329 /* TSX previously disabled by tsx=off */ 321 330 if (!boot_cpu_has(X86_FEATURE_RTM)) { 322 331 taa_mitigation = TAA_MITIGATION_TSX_DISABLED; 323 - goto out; 332 + return; 324 333 } 325 334 326 335 if (cpu_mitigations_off()) { ··· 334 343 */ 335 344 if (taa_mitigation == TAA_MITIGATION_OFF && 336 345 mds_mitigation == MDS_MITIGATION_OFF) 337 - goto out; 346 + return; 338 347 339 348 if (boot_cpu_has(X86_FEATURE_MD_CLEAR)) 340 349 taa_mitigation = TAA_MITIGATION_VERW; ··· 366 375 367 376 if (taa_nosmt || cpu_mitigations_auto_nosmt()) 368 377 cpu_smt_disable(false); 369 - 370 - /* 371 - * Update MDS mitigation, if necessary, as the mds_user_clear is 372 - * now enabled for TAA mitigation. 373 - */ 374 - if (mds_mitigation == MDS_MITIGATION_OFF && 375 - boot_cpu_has_bug(X86_BUG_MDS)) { 376 - mds_mitigation = MDS_MITIGATION_FULL; 377 - mds_select_mitigation(); 378 - } 379 - out: 380 - pr_info("%s\n", taa_strings[taa_mitigation]); 381 378 } 382 379 383 380 static int __init tsx_async_abort_parse_cmdline(char *str) ··· 388 409 return 0; 389 410 } 390 411 early_param("tsx_async_abort", tsx_async_abort_parse_cmdline); 412 + 413 + #undef pr_fmt 414 + #define pr_fmt(fmt) "MMIO Stale Data: " fmt 415 + 416 + enum mmio_mitigations { 417 + MMIO_MITIGATION_OFF, 418 + MMIO_MITIGATION_UCODE_NEEDED, 419 + MMIO_MITIGATION_VERW, 420 + }; 421 + 422 + /* Default mitigation for Processor MMIO Stale Data vulnerabilities */ 423 + static enum mmio_mitigations mmio_mitigation __ro_after_init = MMIO_MITIGATION_VERW; 424 + static bool mmio_nosmt __ro_after_init = false; 425 + 426 + static const char * const mmio_strings[] = { 427 + [MMIO_MITIGATION_OFF] = "Vulnerable", 428 + [MMIO_MITIGATION_UCODE_NEEDED] = "Vulnerable: Clear CPU buffers attempted, no microcode", 429 + [MMIO_MITIGATION_VERW] = "Mitigation: Clear CPU buffers", 430 + }; 431 + 432 + static void __init mmio_select_mitigation(void) 433 + { 434 + u64 ia32_cap; 435 + 436 + if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) || 437 + cpu_mitigations_off()) { 438 + mmio_mitigation = MMIO_MITIGATION_OFF; 439 + return; 440 + } 441 + 442 + if (mmio_mitigation == MMIO_MITIGATION_OFF) 443 + return; 444 + 445 + ia32_cap = x86_read_arch_cap_msr(); 446 + 447 + /* 448 + * Enable CPU buffer clear mitigation for host and VMM, if also affected 449 + * by MDS or TAA. Otherwise, enable mitigation for VMM only. 450 + */ 451 + if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) && 452 + boot_cpu_has(X86_FEATURE_RTM))) 453 + static_branch_enable(&mds_user_clear); 454 + else 455 + static_branch_enable(&mmio_stale_data_clear); 456 + 457 + /* 458 + * If Processor-MMIO-Stale-Data bug is present and Fill Buffer data can 459 + * be propagated to uncore buffers, clearing the Fill buffers on idle 460 + * is required irrespective of SMT state. 461 + */ 462 + if (!(ia32_cap & ARCH_CAP_FBSDP_NO)) 463 + static_branch_enable(&mds_idle_clear); 464 + 465 + /* 466 + * Check if the system has the right microcode. 467 + * 468 + * CPU Fill buffer clear mitigation is enumerated by either an explicit 469 + * FB_CLEAR or by the presence of both MD_CLEAR and L1D_FLUSH on MDS 470 + * affected systems. 471 + */ 472 + if ((ia32_cap & ARCH_CAP_FB_CLEAR) || 473 + (boot_cpu_has(X86_FEATURE_MD_CLEAR) && 474 + boot_cpu_has(X86_FEATURE_FLUSH_L1D) && 475 + !(ia32_cap & ARCH_CAP_MDS_NO))) 476 + mmio_mitigation = MMIO_MITIGATION_VERW; 477 + else 478 + mmio_mitigation = MMIO_MITIGATION_UCODE_NEEDED; 479 + 480 + if (mmio_nosmt || cpu_mitigations_auto_nosmt()) 481 + cpu_smt_disable(false); 482 + } 483 + 484 + static int __init mmio_stale_data_parse_cmdline(char *str) 485 + { 486 + if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) 487 + return 0; 488 + 489 + if (!str) 490 + return -EINVAL; 491 + 492 + if (!strcmp(str, "off")) { 493 + mmio_mitigation = MMIO_MITIGATION_OFF; 494 + } else if (!strcmp(str, "full")) { 495 + mmio_mitigation = MMIO_MITIGATION_VERW; 496 + } else if (!strcmp(str, "full,nosmt")) { 497 + mmio_mitigation = MMIO_MITIGATION_VERW; 498 + mmio_nosmt = true; 499 + } 500 + 501 + return 0; 502 + } 503 + early_param("mmio_stale_data", mmio_stale_data_parse_cmdline); 504 + 505 + #undef pr_fmt 506 + #define pr_fmt(fmt) "" fmt 507 + 508 + static void __init md_clear_update_mitigation(void) 509 + { 510 + if (cpu_mitigations_off()) 511 + return; 512 + 513 + if (!static_key_enabled(&mds_user_clear)) 514 + goto out; 515 + 516 + /* 517 + * mds_user_clear is now enabled. Update MDS, TAA and MMIO Stale Data 518 + * mitigation, if necessary. 519 + */ 520 + if (mds_mitigation == MDS_MITIGATION_OFF && 521 + boot_cpu_has_bug(X86_BUG_MDS)) { 522 + mds_mitigation = MDS_MITIGATION_FULL; 523 + mds_select_mitigation(); 524 + } 525 + if (taa_mitigation == TAA_MITIGATION_OFF && 526 + boot_cpu_has_bug(X86_BUG_TAA)) { 527 + taa_mitigation = TAA_MITIGATION_VERW; 528 + taa_select_mitigation(); 529 + } 530 + if (mmio_mitigation == MMIO_MITIGATION_OFF && 531 + boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) { 532 + mmio_mitigation = MMIO_MITIGATION_VERW; 533 + mmio_select_mitigation(); 534 + } 535 + out: 536 + if (boot_cpu_has_bug(X86_BUG_MDS)) 537 + pr_info("MDS: %s\n", mds_strings[mds_mitigation]); 538 + if (boot_cpu_has_bug(X86_BUG_TAA)) 539 + pr_info("TAA: %s\n", taa_strings[taa_mitigation]); 540 + if (boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) 541 + pr_info("MMIO Stale Data: %s\n", mmio_strings[mmio_mitigation]); 542 + } 543 + 544 + static void __init md_clear_select_mitigation(void) 545 + { 546 + mds_select_mitigation(); 547 + taa_select_mitigation(); 548 + mmio_select_mitigation(); 549 + 550 + /* 551 + * As MDS, TAA and MMIO Stale Data mitigations are inter-related, update 552 + * and print their mitigation after MDS, TAA and MMIO Stale Data 553 + * mitigation selection is done. 554 + */ 555 + md_clear_update_mitigation(); 556 + } 391 557 392 558 #undef pr_fmt 393 559 #define pr_fmt(fmt) "SRBDS: " fmt ··· 602 478 return; 603 479 604 480 /* 605 - * Check to see if this is one of the MDS_NO systems supporting 606 - * TSX that are only exposed to SRBDS when TSX is enabled. 481 + * Check to see if this is one of the MDS_NO systems supporting TSX that 482 + * are only exposed to SRBDS when TSX is enabled or when CPU is affected 483 + * by Processor MMIO Stale Data vulnerability. 607 484 */ 608 485 ia32_cap = x86_read_arch_cap_msr(); 609 - if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM)) 486 + if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) && 487 + !boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) 610 488 srbds_mitigation = SRBDS_MITIGATION_TSX_OFF; 611 489 else if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) 612 490 srbds_mitigation = SRBDS_MITIGATION_HYPERVISOR; ··· 1242 1116 /* Update the static key controlling the MDS CPU buffer clear in idle */ 1243 1117 static void update_mds_branch_idle(void) 1244 1118 { 1119 + u64 ia32_cap = x86_read_arch_cap_msr(); 1120 + 1245 1121 /* 1246 1122 * Enable the idle clearing if SMT is active on CPUs which are 1247 1123 * affected only by MSBDS and not any other MDS variant. ··· 1255 1127 if (!boot_cpu_has_bug(X86_BUG_MSBDS_ONLY)) 1256 1128 return; 1257 1129 1258 - if (sched_smt_active()) 1130 + if (sched_smt_active()) { 1259 1131 static_branch_enable(&mds_idle_clear); 1260 - else 1132 + } else if (mmio_mitigation == MMIO_MITIGATION_OFF || 1133 + (ia32_cap & ARCH_CAP_FBSDP_NO)) { 1261 1134 static_branch_disable(&mds_idle_clear); 1135 + } 1262 1136 } 1263 1137 1264 1138 #define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n" 1265 1139 #define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n" 1140 + #define MMIO_MSG_SMT "MMIO Stale Data CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/processor_mmio_stale_data.html for more details.\n" 1266 1141 1267 1142 void cpu_bugs_smt_update(void) 1268 1143 { ··· 1307 1176 break; 1308 1177 case TAA_MITIGATION_TSX_DISABLED: 1309 1178 case TAA_MITIGATION_OFF: 1179 + break; 1180 + } 1181 + 1182 + switch (mmio_mitigation) { 1183 + case MMIO_MITIGATION_VERW: 1184 + case MMIO_MITIGATION_UCODE_NEEDED: 1185 + if (sched_smt_active()) 1186 + pr_warn_once(MMIO_MSG_SMT); 1187 + break; 1188 + case MMIO_MITIGATION_OFF: 1310 1189 break; 1311 1190 } 1312 1191 ··· 1922 1781 sched_smt_active() ? "vulnerable" : "disabled"); 1923 1782 } 1924 1783 1784 + static ssize_t mmio_stale_data_show_state(char *buf) 1785 + { 1786 + if (mmio_mitigation == MMIO_MITIGATION_OFF) 1787 + return sysfs_emit(buf, "%s\n", mmio_strings[mmio_mitigation]); 1788 + 1789 + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) { 1790 + return sysfs_emit(buf, "%s; SMT Host state unknown\n", 1791 + mmio_strings[mmio_mitigation]); 1792 + } 1793 + 1794 + return sysfs_emit(buf, "%s; SMT %s\n", mmio_strings[mmio_mitigation], 1795 + sched_smt_active() ? "vulnerable" : "disabled"); 1796 + } 1797 + 1925 1798 static char *stibp_state(void) 1926 1799 { 1927 1800 if (spectre_v2_in_eibrs_mode(spectre_v2_enabled)) ··· 2036 1881 case X86_BUG_SRBDS: 2037 1882 return srbds_show_state(buf); 2038 1883 1884 + case X86_BUG_MMIO_STALE_DATA: 1885 + return mmio_stale_data_show_state(buf); 1886 + 2039 1887 default: 2040 1888 break; 2041 1889 } ··· 2089 1931 ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char *buf) 2090 1932 { 2091 1933 return cpu_show_common(dev, attr, buf, X86_BUG_SRBDS); 1934 + } 1935 + 1936 + ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *attr, char *buf) 1937 + { 1938 + return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA); 2092 1939 } 2093 1940 #endif

+49 -3

arch/x86/kernel/cpu/common.c

··· 1211 1211 X86_FEATURE_ANY, issues) 1212 1212 1213 1213 #define SRBDS BIT(0) 1214 + /* CPU is affected by X86_BUG_MMIO_STALE_DATA */ 1215 + #define MMIO BIT(1) 1216 + /* CPU is affected by Shared Buffers Data Sampling (SBDS), a variant of X86_BUG_MMIO_STALE_DATA */ 1217 + #define MMIO_SBDS BIT(2) 1214 1218 1215 1219 static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { 1216 1220 VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), 1217 1221 VULNBL_INTEL_STEPPINGS(HASWELL, X86_STEPPING_ANY, SRBDS), 1218 1222 VULNBL_INTEL_STEPPINGS(HASWELL_L, X86_STEPPING_ANY, SRBDS), 1219 1223 VULNBL_INTEL_STEPPINGS(HASWELL_G, X86_STEPPING_ANY, SRBDS), 1224 + VULNBL_INTEL_STEPPINGS(HASWELL_X, BIT(2) | BIT(4), MMIO), 1225 + VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x3, 0x5), MMIO), 1220 1226 VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS), 1227 + VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO), 1221 1228 VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS), 1229 + VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO), 1222 1230 VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS), 1231 + VULNBL_INTEL_STEPPINGS(SKYLAKE_X, BIT(3) | BIT(4) | BIT(6) | 1232 + BIT(7) | BIT(0xB), MMIO), 1233 + VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO), 1223 1234 VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS), 1224 - VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0xC), SRBDS), 1225 - VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0xD), SRBDS), 1235 + VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x9, 0xC), SRBDS | MMIO), 1236 + VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0x8), SRBDS), 1237 + VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x9, 0xD), SRBDS | MMIO), 1238 + VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0x8), SRBDS), 1239 + VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPINGS(0x5, 0x5), MMIO | MMIO_SBDS), 1240 + VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPINGS(0x1, 0x1), MMIO), 1241 + VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x4, 0x6), MMIO), 1242 + VULNBL_INTEL_STEPPINGS(COMETLAKE, BIT(2) | BIT(3) | BIT(5), MMIO | MMIO_SBDS), 1243 + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), 1244 + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO), 1245 + VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), 1246 + VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPINGS(0x1, 0x1), MMIO), 1247 + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), 1248 + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO), 1249 + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPINGS(0x0, 0x0), MMIO | MMIO_SBDS), 1226 1250 {} 1227 1251 }; 1228 1252 ··· 1265 1241 rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); 1266 1242 1267 1243 return ia32_cap; 1244 + } 1245 + 1246 + static bool arch_cap_mmio_immune(u64 ia32_cap) 1247 + { 1248 + return (ia32_cap & ARCH_CAP_FBSDP_NO && 1249 + ia32_cap & ARCH_CAP_PSDP_NO && 1250 + ia32_cap & ARCH_CAP_SBDR_SSDP_NO); 1268 1251 } 1269 1252 1270 1253 static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) ··· 1327 1296 /* 1328 1297 * SRBDS affects CPUs which support RDRAND or RDSEED and are listed 1329 1298 * in the vulnerability blacklist. 1299 + * 1300 + * Some of the implications and mitigation of Shared Buffers Data 1301 + * Sampling (SBDS) are similar to SRBDS. Give SBDS same treatment as 1302 + * SRBDS. 1330 1303 */ 1331 1304 if ((cpu_has(c, X86_FEATURE_RDRAND) || 1332 1305 cpu_has(c, X86_FEATURE_RDSEED)) && 1333 - cpu_matches(cpu_vuln_blacklist, SRBDS)) 1306 + cpu_matches(cpu_vuln_blacklist, SRBDS | MMIO_SBDS)) 1334 1307 setup_force_cpu_bug(X86_BUG_SRBDS); 1308 + 1309 + /* 1310 + * Processor MMIO Stale Data bug enumeration 1311 + * 1312 + * Affected CPU list is generally enough to enumerate the vulnerability, 1313 + * but for virtualization case check for ARCH_CAP MSR bits also, VMM may 1314 + * not want the guest to enumerate the bug. 1315 + */ 1316 + if (cpu_matches(cpu_vuln_blacklist, MMIO) && 1317 + !arch_cap_mmio_immune(ia32_cap)) 1318 + setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); 1335 1319 1336 1320 if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) 1337 1321 return;

+23 -4

arch/x86/kvm/lapic.c

··· 2039 2039 } 2040 2040 } 2041 2041 2042 + static void kvm_lapic_xapic_id_updated(struct kvm_lapic *apic) 2043 + { 2044 + struct kvm *kvm = apic->vcpu->kvm; 2045 + 2046 + if (KVM_BUG_ON(apic_x2apic_mode(apic), kvm)) 2047 + return; 2048 + 2049 + if (kvm_xapic_id(apic) == apic->vcpu->vcpu_id) 2050 + return; 2051 + 2052 + kvm_set_apicv_inhibit(apic->vcpu->kvm, APICV_INHIBIT_REASON_APIC_ID_MODIFIED); 2053 + } 2054 + 2042 2055 static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) 2043 2056 { 2044 2057 int ret = 0; ··· 2060 2047 2061 2048 switch (reg) { 2062 2049 case APIC_ID: /* Local APIC ID */ 2063 - if (!apic_x2apic_mode(apic)) 2050 + if (!apic_x2apic_mode(apic)) { 2064 2051 kvm_apic_set_xapic_id(apic, val >> 24); 2065 - else 2052 + kvm_lapic_xapic_id_updated(apic); 2053 + } else { 2066 2054 ret = 1; 2055 + } 2067 2056 break; 2068 2057 2069 2058 case APIC_TASKPRI: ··· 2351 2336 MSR_IA32_APICBASE_BASE; 2352 2337 2353 2338 if ((value & MSR_IA32_APICBASE_ENABLE) && 2354 - apic->base_address != APIC_DEFAULT_PHYS_BASE) 2355 - pr_warn_once("APIC base relocation is unsupported by KVM"); 2339 + apic->base_address != APIC_DEFAULT_PHYS_BASE) { 2340 + kvm_set_apicv_inhibit(apic->vcpu->kvm, 2341 + APICV_INHIBIT_REASON_APIC_BASE_MODIFIED); 2342 + } 2356 2343 } 2357 2344 2358 2345 void kvm_apic_update_apicv(struct kvm_vcpu *vcpu) ··· 2665 2648 icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR); 2666 2649 __kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32); 2667 2650 } 2651 + } else { 2652 + kvm_lapic_xapic_id_updated(vcpu->arch.apic); 2668 2653 } 2669 2654 2670 2655 return 0;

+1 -1

arch/x86/kvm/mmu/mmu.c

··· 3411 3411 root = mmu_alloc_root(vcpu, i << (30 - PAGE_SHIFT), 3412 3412 i << 30, PT32_ROOT_LEVEL, true); 3413 3413 mmu->pae_root[i] = root | PT_PRESENT_MASK | 3414 - shadow_me_mask; 3414 + shadow_me_value; 3415 3415 } 3416 3416 mmu->root.hpa = __pa(mmu->pae_root); 3417 3417 } else {

+79 -98

arch/x86/kvm/svm/avic.c

··· 291 291 static int avic_kick_target_vcpus_fast(struct kvm *kvm, struct kvm_lapic *source, 292 292 u32 icrl, u32 icrh, u32 index) 293 293 { 294 - u32 dest, apic_id; 295 - struct kvm_vcpu *vcpu; 294 + u32 l1_physical_id, dest; 295 + struct kvm_vcpu *target_vcpu; 296 296 int dest_mode = icrl & APIC_DEST_MASK; 297 297 int shorthand = icrl & APIC_SHORT_MASK; 298 298 struct kvm_svm *kvm_svm = to_kvm_svm(kvm); 299 - u32 *avic_logical_id_table = page_address(kvm_svm->avic_logical_id_table_page); 300 299 301 300 if (shorthand != APIC_DEST_NOSHORT) 302 301 return -EINVAL; 303 302 304 - /* 305 - * The AVIC incomplete IPI #vmexit info provides index into 306 - * the physical APIC ID table, which can be used to derive 307 - * guest physical APIC ID. 308 - */ 309 - if (dest_mode == APIC_DEST_PHYSICAL) { 310 - apic_id = index; 311 - } else { 312 - if (!apic_x2apic_mode(source)) { 313 - /* For xAPIC logical mode, the index is for logical APIC table. */ 314 - apic_id = avic_logical_id_table[index] & 0x1ff; 315 - } else { 316 - return -EINVAL; 317 - } 318 - } 319 - 320 - /* 321 - * Assuming vcpu ID is the same as physical apic ID, 322 - * and use it to retrieve the target vCPU. 323 - */ 324 - vcpu = kvm_get_vcpu_by_id(kvm, apic_id); 325 - if (!vcpu) 326 - return -EINVAL; 327 - 328 - if (apic_x2apic_mode(vcpu->arch.apic)) 303 + if (apic_x2apic_mode(source)) 329 304 dest = icrh; 330 305 else 331 306 dest = GET_APIC_DEST_FIELD(icrh); 332 307 333 - /* 334 - * Try matching the destination APIC ID with the vCPU. 335 - */ 336 - if (kvm_apic_match_dest(vcpu, source, shorthand, dest, dest_mode)) { 337 - vcpu->arch.apic->irr_pending = true; 338 - svm_complete_interrupt_delivery(vcpu, 339 - icrl & APIC_MODE_MASK, 340 - icrl & APIC_INT_LEVELTRIG, 341 - icrl & APIC_VECTOR_MASK); 342 - return 0; 308 + if (dest_mode == APIC_DEST_PHYSICAL) { 309 + /* broadcast destination, use slow path */ 310 + if (apic_x2apic_mode(source) && dest == X2APIC_BROADCAST) 311 + return -EINVAL; 312 + if (!apic_x2apic_mode(source) && dest == APIC_BROADCAST) 313 + return -EINVAL; 314 + 315 + l1_physical_id = dest; 316 + 317 + if (WARN_ON_ONCE(l1_physical_id != index)) 318 + return -EINVAL; 319 + 320 + } else { 321 + u32 bitmap, cluster; 322 + int logid_index; 323 + 324 + if (apic_x2apic_mode(source)) { 325 + /* 16 bit dest mask, 16 bit cluster id */ 326 + bitmap = dest & 0xFFFF0000; 327 + cluster = (dest >> 16) << 4; 328 + } else if (kvm_lapic_get_reg(source, APIC_DFR) == APIC_DFR_FLAT) { 329 + /* 8 bit dest mask*/ 330 + bitmap = dest; 331 + cluster = 0; 332 + } else { 333 + /* 4 bit desk mask, 4 bit cluster id */ 334 + bitmap = dest & 0xF; 335 + cluster = (dest >> 4) << 2; 336 + } 337 + 338 + if (unlikely(!bitmap)) 339 + /* guest bug: nobody to send the logical interrupt to */ 340 + return 0; 341 + 342 + if (!is_power_of_2(bitmap)) 343 + /* multiple logical destinations, use slow path */ 344 + return -EINVAL; 345 + 346 + logid_index = cluster + __ffs(bitmap); 347 + 348 + if (apic_x2apic_mode(source)) { 349 + l1_physical_id = logid_index; 350 + } else { 351 + u32 *avic_logical_id_table = 352 + page_address(kvm_svm->avic_logical_id_table_page); 353 + 354 + u32 logid_entry = avic_logical_id_table[logid_index]; 355 + 356 + if (WARN_ON_ONCE(index != logid_index)) 357 + return -EINVAL; 358 + 359 + /* guest bug: non existing/reserved logical destination */ 360 + if (unlikely(!(logid_entry & AVIC_LOGICAL_ID_ENTRY_VALID_MASK))) 361 + return 0; 362 + 363 + l1_physical_id = logid_entry & 364 + AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK; 365 + } 343 366 } 344 367 345 - return -EINVAL; 368 + target_vcpu = kvm_get_vcpu_by_id(kvm, l1_physical_id); 369 + if (unlikely(!target_vcpu)) 370 + /* guest bug: non existing vCPU is a target of this IPI*/ 371 + return 0; 372 + 373 + target_vcpu->arch.apic->irr_pending = true; 374 + svm_complete_interrupt_delivery(target_vcpu, 375 + icrl & APIC_MODE_MASK, 376 + icrl & APIC_INT_LEVELTRIG, 377 + icrl & APIC_VECTOR_MASK); 378 + return 0; 346 379 } 347 380 348 381 static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source, ··· 541 508 return ret; 542 509 } 543 510 544 - static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu) 545 - { 546 - u64 *old, *new; 547 - struct vcpu_svm *svm = to_svm(vcpu); 548 - u32 id = kvm_xapic_id(vcpu->arch.apic); 549 - 550 - if (vcpu->vcpu_id == id) 551 - return 0; 552 - 553 - old = avic_get_physical_id_entry(vcpu, vcpu->vcpu_id); 554 - new = avic_get_physical_id_entry(vcpu, id); 555 - if (!new || !old) 556 - return 1; 557 - 558 - /* We need to move physical_id_entry to new offset */ 559 - *new = *old; 560 - *old = 0ULL; 561 - to_svm(vcpu)->avic_physical_id_cache = new; 562 - 563 - /* 564 - * Also update the guest physical APIC ID in the logical 565 - * APIC ID table entry if already setup the LDR. 566 - */ 567 - if (svm->ldr_reg) 568 - avic_handle_ldr_update(vcpu); 569 - 570 - return 0; 571 - } 572 - 573 511 static void avic_handle_dfr_update(struct kvm_vcpu *vcpu) 574 512 { 575 513 struct vcpu_svm *svm = to_svm(vcpu); ··· 559 555 AVIC_UNACCEL_ACCESS_OFFSET_MASK; 560 556 561 557 switch (offset) { 562 - case APIC_ID: 563 - if (avic_handle_apic_id_update(vcpu)) 564 - return 0; 565 - break; 566 558 case APIC_LDR: 567 559 if (avic_handle_ldr_update(vcpu)) 568 560 return 0; ··· 650 650 651 651 void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu) 652 652 { 653 - if (avic_handle_apic_id_update(vcpu) != 0) 654 - return; 655 653 avic_handle_dfr_update(vcpu); 656 654 avic_handle_ldr_update(vcpu); 657 655 } ··· 908 910 BIT(APICV_INHIBIT_REASON_PIT_REINJ) | 909 911 BIT(APICV_INHIBIT_REASON_X2APIC) | 910 912 BIT(APICV_INHIBIT_REASON_BLOCKIRQ) | 911 - BIT(APICV_INHIBIT_REASON_SEV); 913 + BIT(APICV_INHIBIT_REASON_SEV) | 914 + BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) | 915 + BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED); 912 916 913 917 return supported & BIT(reason); 914 918 } ··· 946 946 return ret; 947 947 } 948 948 949 - void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 949 + void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 950 950 { 951 951 u64 entry; 952 952 int h_physical_id = kvm_cpu_get_apicid(cpu); ··· 978 978 avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true); 979 979 } 980 980 981 - void __avic_vcpu_put(struct kvm_vcpu *vcpu) 981 + void avic_vcpu_put(struct kvm_vcpu *vcpu) 982 982 { 983 983 u64 entry; 984 984 struct vcpu_svm *svm = to_svm(vcpu); ··· 997 997 WRITE_ONCE(*(svm->avic_physical_id_cache), entry); 998 998 } 999 999 1000 - static void avic_vcpu_load(struct kvm_vcpu *vcpu) 1001 - { 1002 - int cpu = get_cpu(); 1003 - 1004 - WARN_ON(cpu != vcpu->cpu); 1005 - 1006 - __avic_vcpu_load(vcpu, cpu); 1007 - 1008 - put_cpu(); 1009 - } 1010 - 1011 - static void avic_vcpu_put(struct kvm_vcpu *vcpu) 1012 - { 1013 - preempt_disable(); 1014 - 1015 - __avic_vcpu_put(vcpu); 1016 - 1017 - preempt_enable(); 1018 - } 1019 1000 1020 1001 void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) 1021 1002 { ··· 1023 1042 vmcb_mark_dirty(vmcb, VMCB_AVIC); 1024 1043 1025 1044 if (activated) 1026 - avic_vcpu_load(vcpu); 1045 + avic_vcpu_load(vcpu, vcpu->cpu); 1027 1046 else 1028 1047 avic_vcpu_put(vcpu); 1029 1048 ··· 1056 1075 if (!kvm_vcpu_apicv_active(vcpu)) 1057 1076 return; 1058 1077 1059 - avic_vcpu_load(vcpu); 1078 + avic_vcpu_load(vcpu, vcpu->cpu); 1060 1079 }

+21 -18

arch/x86/kvm/svm/nested.c

··· 616 616 struct kvm_vcpu *vcpu = &svm->vcpu; 617 617 struct vmcb *vmcb01 = svm->vmcb01.ptr; 618 618 struct vmcb *vmcb02 = svm->nested.vmcb02.ptr; 619 + u32 pause_count12; 620 + u32 pause_thresh12; 619 621 620 622 /* 621 623 * Filled at exit: exit_code, exit_code_hi, exit_info_1, exit_info_2, ··· 673 671 if (!nested_vmcb_needs_vls_intercept(svm)) 674 672 vmcb02->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK; 675 673 674 + pause_count12 = svm->pause_filter_enabled ? svm->nested.ctl.pause_filter_count : 0; 675 + pause_thresh12 = svm->pause_threshold_enabled ? svm->nested.ctl.pause_filter_thresh : 0; 676 676 if (kvm_pause_in_guest(svm->vcpu.kvm)) { 677 - /* use guest values since host doesn't use them */ 678 - vmcb02->control.pause_filter_count = 679 - svm->pause_filter_enabled ? 680 - svm->nested.ctl.pause_filter_count : 0; 677 + /* use guest values since host doesn't intercept PAUSE */ 678 + vmcb02->control.pause_filter_count = pause_count12; 679 + vmcb02->control.pause_filter_thresh = pause_thresh12; 681 680 682 - vmcb02->control.pause_filter_thresh = 683 - svm->pause_threshold_enabled ? 684 - svm->nested.ctl.pause_filter_thresh : 0; 685 - 686 - } else if (!vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_PAUSE)) { 687 - /* use host values when guest doesn't use them */ 681 + } else { 682 + /* start from host values otherwise */ 688 683 vmcb02->control.pause_filter_count = vmcb01->control.pause_filter_count; 689 684 vmcb02->control.pause_filter_thresh = vmcb01->control.pause_filter_thresh; 690 - } else { 691 - /* 692 - * Intercept every PAUSE otherwise and 693 - * ignore both host and guest values 694 - */ 695 - vmcb02->control.pause_filter_count = 0; 696 - vmcb02->control.pause_filter_thresh = 0; 685 + 686 + /* ... but ensure filtering is disabled if so requested. */ 687 + if (vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_PAUSE)) { 688 + if (!pause_count12) 689 + vmcb02->control.pause_filter_count = 0; 690 + if (!pause_thresh12) 691 + vmcb02->control.pause_filter_thresh = 0; 692 + } 697 693 } 698 694 699 695 nested_svm_transition_tlb_flush(vcpu); ··· 951 951 vmcb12->control.event_inj = svm->nested.ctl.event_inj; 952 952 vmcb12->control.event_inj_err = svm->nested.ctl.event_inj_err; 953 953 954 - if (!kvm_pause_in_guest(vcpu->kvm) && vmcb02->control.pause_filter_count) 954 + if (!kvm_pause_in_guest(vcpu->kvm)) { 955 955 vmcb01->control.pause_filter_count = vmcb02->control.pause_filter_count; 956 + vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS); 957 + 958 + } 956 959 957 960 nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr); 958 961

+4 -4

arch/x86/kvm/svm/svm.c

··· 921 921 struct vmcb_control_area *control = &svm->vmcb->control; 922 922 int old = control->pause_filter_count; 923 923 924 - if (kvm_pause_in_guest(vcpu->kvm) || !old) 924 + if (kvm_pause_in_guest(vcpu->kvm)) 925 925 return; 926 926 927 927 control->pause_filter_count = __grow_ple_window(old, ··· 942 942 struct vmcb_control_area *control = &svm->vmcb->control; 943 943 int old = control->pause_filter_count; 944 944 945 - if (kvm_pause_in_guest(vcpu->kvm) || !old) 945 + if (kvm_pause_in_guest(vcpu->kvm)) 946 946 return; 947 947 948 948 control->pause_filter_count = ··· 1400 1400 indirect_branch_prediction_barrier(); 1401 1401 } 1402 1402 if (kvm_vcpu_apicv_active(vcpu)) 1403 - __avic_vcpu_load(vcpu, cpu); 1403 + avic_vcpu_load(vcpu, cpu); 1404 1404 } 1405 1405 1406 1406 static void svm_vcpu_put(struct kvm_vcpu *vcpu) 1407 1407 { 1408 1408 if (kvm_vcpu_apicv_active(vcpu)) 1409 - __avic_vcpu_put(vcpu); 1409 + avic_vcpu_put(vcpu); 1410 1410 1411 1411 svm_prepare_host_switch(vcpu); 1412 1412

+2 -2

arch/x86/kvm/svm/svm.h

··· 610 610 int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu); 611 611 int avic_unaccelerated_access_interception(struct kvm_vcpu *vcpu); 612 612 int avic_init_vcpu(struct vcpu_svm *svm); 613 - void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu); 614 - void __avic_vcpu_put(struct kvm_vcpu *vcpu); 613 + void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu); 614 + void avic_vcpu_put(struct kvm_vcpu *vcpu); 615 615 void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu); 616 616 void avic_set_virtual_apic_mode(struct kvm_vcpu *vcpu); 617 617 void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu);

+75 -1

arch/x86/kvm/vmx/vmx.c

··· 229 229 #define L1D_CACHE_ORDER 4 230 230 static void *vmx_l1d_flush_pages; 231 231 232 + /* Control for disabling CPU Fill buffer clear */ 233 + static bool __read_mostly vmx_fb_clear_ctrl_available; 234 + 232 235 static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf) 233 236 { 234 237 struct page *page; ··· 361 358 return sprintf(s, "???\n"); 362 359 363 360 return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option); 361 + } 362 + 363 + static void vmx_setup_fb_clear_ctrl(void) 364 + { 365 + u64 msr; 366 + 367 + if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES) && 368 + !boot_cpu_has_bug(X86_BUG_MDS) && 369 + !boot_cpu_has_bug(X86_BUG_TAA)) { 370 + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr); 371 + if (msr & ARCH_CAP_FB_CLEAR_CTRL) 372 + vmx_fb_clear_ctrl_available = true; 373 + } 374 + } 375 + 376 + static __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx) 377 + { 378 + u64 msr; 379 + 380 + if (!vmx->disable_fb_clear) 381 + return; 382 + 383 + rdmsrl(MSR_IA32_MCU_OPT_CTRL, msr); 384 + msr |= FB_CLEAR_DIS; 385 + wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr); 386 + /* Cache the MSR value to avoid reading it later */ 387 + vmx->msr_ia32_mcu_opt_ctrl = msr; 388 + } 389 + 390 + static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx) 391 + { 392 + if (!vmx->disable_fb_clear) 393 + return; 394 + 395 + vmx->msr_ia32_mcu_opt_ctrl &= ~FB_CLEAR_DIS; 396 + wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl); 397 + } 398 + 399 + static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) 400 + { 401 + vmx->disable_fb_clear = vmx_fb_clear_ctrl_available; 402 + 403 + /* 404 + * If guest will not execute VERW, there is no need to set FB_CLEAR_DIS 405 + * at VMEntry. Skip the MSR read/write when a guest has no use case to 406 + * execute VERW. 407 + */ 408 + if ((vcpu->arch.arch_capabilities & ARCH_CAP_FB_CLEAR) || 409 + ((vcpu->arch.arch_capabilities & ARCH_CAP_MDS_NO) && 410 + (vcpu->arch.arch_capabilities & ARCH_CAP_TAA_NO) && 411 + (vcpu->arch.arch_capabilities & ARCH_CAP_PSDP_NO) && 412 + (vcpu->arch.arch_capabilities & ARCH_CAP_FBSDP_NO) && 413 + (vcpu->arch.arch_capabilities & ARCH_CAP_SBDR_SSDP_NO))) 414 + vmx->disable_fb_clear = false; 364 415 } 365 416 366 417 static const struct kernel_param_ops vmentry_l1d_flush_ops = { ··· 2308 2251 else 2309 2252 ret = kvm_set_msr_common(vcpu, msr_info); 2310 2253 } 2254 + 2255 + /* FB_CLEAR may have changed, also update the FB_CLEAR_DIS behavior */ 2256 + if (msr_index == MSR_IA32_ARCH_CAPABILITIES) 2257 + vmx_update_fb_clear_dis(vcpu, vmx); 2311 2258 2312 2259 return ret; 2313 2260 } ··· 4614 4553 kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); 4615 4554 4616 4555 vpid_sync_context(vmx->vpid); 4556 + 4557 + vmx_update_fb_clear_dis(vcpu, vmx); 4617 4558 } 4618 4559 4619 4560 static void vmx_enable_irq_window(struct kvm_vcpu *vcpu) ··· 6835 6772 vmx_l1d_flush(vcpu); 6836 6773 else if (static_branch_unlikely(&mds_user_clear)) 6837 6774 mds_clear_cpu_buffers(); 6775 + else if (static_branch_unlikely(&mmio_stale_data_clear) && 6776 + kvm_arch_has_assigned_device(vcpu->kvm)) 6777 + mds_clear_cpu_buffers(); 6778 + 6779 + vmx_disable_fb_clear(vmx); 6838 6780 6839 6781 if (vcpu->arch.cr2 != native_read_cr2()) 6840 6782 native_write_cr2(vcpu->arch.cr2); ··· 6848 6780 vmx->loaded_vmcs->launched); 6849 6781 6850 6782 vcpu->arch.cr2 = native_read_cr2(); 6783 + 6784 + vmx_enable_fb_clear(vmx); 6851 6785 6852 6786 guest_state_exit_irqoff(); 6853 6787 } ··· 7779 7709 ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) | 7780 7710 BIT(APICV_INHIBIT_REASON_ABSENT) | 7781 7711 BIT(APICV_INHIBIT_REASON_HYPERV) | 7782 - BIT(APICV_INHIBIT_REASON_BLOCKIRQ); 7712 + BIT(APICV_INHIBIT_REASON_BLOCKIRQ) | 7713 + BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) | 7714 + BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED); 7783 7715 7784 7716 return supported & BIT(reason); 7785 7717 } ··· 8283 8211 vmx_exit(); 8284 8212 return r; 8285 8213 } 8214 + 8215 + vmx_setup_fb_clear_ctrl(); 8286 8216 8287 8217 for_each_possible_cpu(cpu) { 8288 8218 INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));

+2

arch/x86/kvm/vmx/vmx.h

··· 348 348 u64 msr_ia32_feature_control_valid_bits; 349 349 /* SGX Launch Control public key hash */ 350 350 u64 msr_ia32_sgxlepubkeyhash[4]; 351 + u64 msr_ia32_mcu_opt_ctrl; 352 + bool disable_fb_clear; 351 353 352 354 struct pt_desc pt_desc; 353 355 struct lbr_desc lbr_desc;

+5

arch/x86/kvm/x86.c

··· 1617 1617 */ 1618 1618 } 1619 1619 1620 + /* Guests don't need to know "Fill buffer clear control" exists */ 1621 + data &= ~ARCH_CAP_FB_CLEAR_CTRL; 1622 + 1620 1623 return data; 1621 1624 } 1622 1625 ··· 9853 9850 return; 9854 9851 9855 9852 down_read(&vcpu->kvm->arch.apicv_update_lock); 9853 + preempt_disable(); 9856 9854 9857 9855 activate = kvm_vcpu_apicv_activated(vcpu); 9858 9856 ··· 9874 9870 kvm_make_request(KVM_REQ_EVENT, vcpu); 9875 9871 9876 9872 out: 9873 + preempt_enable(); 9877 9874 up_read(&vcpu->kvm->arch.apicv_update_lock); 9878 9875 } 9879 9876 EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv);

-20

block/bio.c

··· 1747 1747 } 1748 1748 EXPORT_SYMBOL(bioset_init); 1749 1749 1750 - /* 1751 - * Initialize and setup a new bio_set, based on the settings from 1752 - * another bio_set. 1753 - */ 1754 - int bioset_init_from_src(struct bio_set *bs, struct bio_set *src) 1755 - { 1756 - int flags; 1757 - 1758 - flags = 0; 1759 - if (src->bvec_pool.min_nr) 1760 - flags |= BIOSET_NEED_BVECS; 1761 - if (src->rescue_workqueue) 1762 - flags |= BIOSET_NEED_RESCUER; 1763 - if (src->cache) 1764 - flags |= BIOSET_PERCPU_CACHE; 1765 - 1766 - return bioset_init(bs, src->bio_pool.min_nr, src->front_pad, flags); 1767 - } 1768 - EXPORT_SYMBOL(bioset_init_from_src); 1769 - 1770 1750 static int __init init_bio(void) 1771 1751 { 1772 1752 int i;

+1 -1

certs/.gitignore

··· 1 1 # SPDX-License-Identifier: GPL-2.0-only 2 - /blacklist_hashes_checked 2 + /blacklist_hash_list 3 3 /extract-cert 4 4 /x509_certificate_list 5 5 /x509_revocation_list

+10 -10

certs/Makefile

··· 7 7 obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist.o common.o 8 8 obj-$(CONFIG_SYSTEM_REVOCATION_LIST) += revocation_certificates.o 9 9 ifneq ($(CONFIG_SYSTEM_BLACKLIST_HASH_LIST),) 10 - quiet_cmd_check_blacklist_hashes = CHECK $(patsubst "%",%,$(2)) 11 - cmd_check_blacklist_hashes = $(AWK) -f $(srctree)/scripts/check-blacklist-hashes.awk $(2); touch $@ 12 10 13 - $(eval $(call config_filename,SYSTEM_BLACKLIST_HASH_LIST)) 11 + $(obj)/blacklist_hashes.o: $(obj)/blacklist_hash_list 12 + CFLAGS_blacklist_hashes.o := -I $(obj) 14 13 15 - $(obj)/blacklist_hashes.o: $(obj)/blacklist_hashes_checked 14 + quiet_cmd_check_and_copy_blacklist_hash_list = GEN $@ 15 + cmd_check_and_copy_blacklist_hash_list = \ 16 + $(AWK) -f $(srctree)/scripts/check-blacklist-hashes.awk $(CONFIG_SYSTEM_BLACKLIST_HASH_LIST) >&2; \ 17 + cat $(CONFIG_SYSTEM_BLACKLIST_HASH_LIST) > $@ 16 18 17 - CFLAGS_blacklist_hashes.o += -I$(srctree) 18 - 19 - targets += blacklist_hashes_checked 20 - $(obj)/blacklist_hashes_checked: $(SYSTEM_BLACKLIST_HASH_LIST_SRCPREFIX)$(SYSTEM_BLACKLIST_HASH_LIST_FILENAME) scripts/check-blacklist-hashes.awk FORCE 21 - $(call if_changed,check_blacklist_hashes,$(SYSTEM_BLACKLIST_HASH_LIST_SRCPREFIX)$(CONFIG_SYSTEM_BLACKLIST_HASH_LIST)) 19 + $(obj)/blacklist_hash_list: $(CONFIG_SYSTEM_BLACKLIST_HASH_LIST) FORCE 20 + $(call if_changed,check_and_copy_blacklist_hash_list) 22 21 obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist_hashes.o 23 22 else 24 23 obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist_nohashes.o 25 24 endif 25 + targets += blacklist_hash_list 26 26 27 27 quiet_cmd_extract_certs = CERT $@ 28 28 cmd_extract_certs = $(obj)/extract-cert $(extract-cert-in) $@ ··· 33 33 $(obj)/x509_certificate_list: $(CONFIG_SYSTEM_TRUSTED_KEYS) $(obj)/extract-cert FORCE 34 34 $(call if_changed,extract_certs) 35 35 36 - targets += x509_certificate_list blacklist_hashes_checked 36 + targets += x509_certificate_list 37 37 38 38 # If module signing is requested, say by allyesconfig, but a key has not been 39 39 # supplied, then one will need to be generated to make sure the build does not

+2 -2

certs/blacklist_hashes.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include "blacklist.h" 3 3 4 - const char __initdata *const blacklist_hashes[] = { 5 - #include CONFIG_SYSTEM_BLACKLIST_HASH_LIST 4 + const char __initconst *const blacklist_hashes[] = { 5 + #include "blacklist_hash_list" 6 6 , NULL 7 7 };

+8

drivers/base/cpu.c

··· 564 564 return sysfs_emit(buf, "Not affected\n"); 565 565 } 566 566 567 + ssize_t __weak cpu_show_mmio_stale_data(struct device *dev, 568 + struct device_attribute *attr, char *buf) 569 + { 570 + return sysfs_emit(buf, "Not affected\n"); 571 + } 572 + 567 573 static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); 568 574 static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); 569 575 static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); ··· 579 573 static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL); 580 574 static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL); 581 575 static DEVICE_ATTR(srbds, 0444, cpu_show_srbds, NULL); 576 + static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL); 582 577 583 578 static struct attribute *cpu_root_vulnerabilities_attrs[] = { 584 579 &dev_attr_meltdown.attr, ··· 591 584 &dev_attr_tsx_async_abort.attr, 592 585 &dev_attr_itlb_multihit.attr, 593 586 &dev_attr_srbds.attr, 587 + &dev_attr_mmio_stale_data.attr, 594 588 NULL 595 589 }; 596 590

+30 -18

drivers/char/Kconfig

··· 429 429 driver include crash and makedumpfile. 430 430 431 431 config RANDOM_TRUST_CPU 432 - bool "Trust the CPU manufacturer to initialize Linux's CRNG" 432 + bool "Initialize RNG using CPU RNG instructions" 433 + default y 433 434 depends on ARCH_RANDOM 434 - default n 435 435 help 436 - Assume that CPU manufacturer (e.g., Intel or AMD for RDSEED or 437 - RDRAND, IBM for the S390 and Power PC architectures) is trustworthy 438 - for the purposes of initializing Linux's CRNG. Since this is not 439 - something that can be independently audited, this amounts to trusting 440 - that CPU manufacturer (perhaps with the insistence or mandate 441 - of a Nation State's intelligence or law enforcement agencies) 442 - has not installed a hidden back door to compromise the CPU's 443 - random number generation facilities. This can also be configured 444 - at boot with "random.trust_cpu=on/off". 436 + Initialize the RNG using random numbers supplied by the CPU's 437 + RNG instructions (e.g. RDRAND), if supported and available. These 438 + random numbers are never used directly, but are rather hashed into 439 + the main input pool, and this happens regardless of whether or not 440 + this option is enabled. Instead, this option controls whether the 441 + they are credited and hence can initialize the RNG. Additionally, 442 + other sources of randomness are always used, regardless of this 443 + setting. Enabling this implies trusting that the CPU can supply high 444 + quality and non-backdoored random numbers. 445 + 446 + Say Y here unless you have reason to mistrust your CPU or believe 447 + its RNG facilities may be faulty. This may also be configured at 448 + boot time with "random.trust_cpu=on/off". 445 449 446 450 config RANDOM_TRUST_BOOTLOADER 447 - bool "Trust the bootloader to initialize Linux's CRNG" 451 + bool "Initialize RNG using bootloader-supplied seed" 452 + default y 448 453 help 449 - Some bootloaders can provide entropy to increase the kernel's initial 450 - device randomness. Say Y here to assume the entropy provided by the 451 - booloader is trustworthy so it will be added to the kernel's entropy 452 - pool. Otherwise, say N here so it will be regarded as device input that 453 - only mixes the entropy pool. This can also be configured at boot with 454 - "random.trust_bootloader=on/off". 454 + Initialize the RNG using a seed supplied by the bootloader or boot 455 + environment (e.g. EFI or a bootloader-generated device tree). This 456 + seed is not used directly, but is rather hashed into the main input 457 + pool, and this happens regardless of whether or not this option is 458 + enabled. Instead, this option controls whether the seed is credited 459 + and hence can initialize the RNG. Additionally, other sources of 460 + randomness are always used, regardless of this setting. Enabling 461 + this implies trusting that the bootloader can supply high quality and 462 + non-backdoored seeds. 463 + 464 + Say Y here unless you have reason to mistrust your bootloader or 465 + believe its RNG facilities may be faulty. This may also be configured 466 + at boot time with "random.trust_bootloader=on/off". 455 467 456 468 endmenu

+2

drivers/char/hw_random/virtio-rng.c

··· 159 159 goto err_find; 160 160 } 161 161 162 + virtio_device_ready(vdev); 163 + 162 164 /* we always have a pending entropy request */ 163 165 request_entropy(vi); 164 166

+17 -22

drivers/char/random.c

··· 650 650 651 651 if (orig < POOL_READY_BITS && new >= POOL_READY_BITS) { 652 652 crng_reseed(); /* Sets crng_init to CRNG_READY under base_crng.lock. */ 653 - execute_in_process_context(crng_set_ready, &set_ready); 653 + if (static_key_initialized) 654 + execute_in_process_context(crng_set_ready, &set_ready); 654 655 wake_up_interruptible(&crng_init_wait); 655 656 kill_fasync(&fasync, SIGIO, POLL_IN); 656 657 pr_notice("crng init done\n"); ··· 725 724 * 726 725 **********************************************************************/ 727 726 728 - static bool used_arch_random; 729 - static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU); 730 - static bool trust_bootloader __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER); 727 + static bool trust_cpu __initdata = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU); 728 + static bool trust_bootloader __initdata = IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER); 731 729 static int __init parse_trust_cpu(char *arg) 732 730 { 733 731 return kstrtobool(arg, &trust_cpu); ··· 776 776 int __init random_init(const char *command_line) 777 777 { 778 778 ktime_t now = ktime_get_real(); 779 - unsigned int i, arch_bytes; 779 + unsigned int i, arch_bits; 780 780 unsigned long entropy; 781 781 782 782 #if defined(LATENT_ENTROPY_PLUGIN) ··· 784 784 _mix_pool_bytes(compiletime_seed, sizeof(compiletime_seed)); 785 785 #endif 786 786 787 - for (i = 0, arch_bytes = BLAKE2S_BLOCK_SIZE; 787 + for (i = 0, arch_bits = BLAKE2S_BLOCK_SIZE * 8; 788 788 i < BLAKE2S_BLOCK_SIZE; i += sizeof(entropy)) { 789 789 if (!arch_get_random_seed_long_early(&entropy) && 790 790 !arch_get_random_long_early(&entropy)) { 791 791 entropy = random_get_entropy(); 792 - arch_bytes -= sizeof(entropy); 792 + arch_bits -= sizeof(entropy) * 8; 793 793 } 794 794 _mix_pool_bytes(&entropy, sizeof(entropy)); 795 795 } ··· 798 798 _mix_pool_bytes(command_line, strlen(command_line)); 799 799 add_latent_entropy(); 800 800 801 + /* 802 + * If we were initialized by the bootloader before jump labels are 803 + * initialized, then we should enable the static branch here, where 804 + * it's guaranteed that jump labels have been initialized. 805 + */ 806 + if (!static_branch_likely(&crng_is_ready) && crng_init >= CRNG_READY) 807 + crng_set_ready(NULL); 808 + 801 809 if (crng_ready()) 802 810 crng_reseed(); 803 811 else if (trust_cpu) 804 - credit_init_bits(arch_bytes * 8); 805 - used_arch_random = arch_bytes * 8 >= POOL_READY_BITS; 812 + _credit_init_bits(arch_bits); 806 813 807 814 WARN_ON(register_pm_notifier(&pm_notifier)); 808 815 809 816 WARN(!random_get_entropy(), "Missing cycle counter and fallback timer; RNG " 810 817 "entropy collection will consequently suffer."); 811 818 return 0; 812 - } 813 - 814 - /* 815 - * Returns whether arch randomness has been mixed into the initial 816 - * state of the RNG, regardless of whether or not that randomness 817 - * was credited. Knowing this is only good for a very limited set 818 - * of uses, such as early init printk pointer obfuscation. 819 - */ 820 - bool rng_has_arch_random(void) 821 - { 822 - return used_arch_random; 823 819 } 824 820 825 821 /* ··· 861 865 * Handle random seed passed by bootloader, and credit it if 862 866 * CONFIG_RANDOM_TRUST_BOOTLOADER is set. 863 867 */ 864 - void __cold add_bootloader_randomness(const void *buf, size_t len) 868 + void __init add_bootloader_randomness(const void *buf, size_t len) 865 869 { 866 870 mix_pool_bytes(buf, len); 867 871 if (trust_bootloader) 868 872 credit_init_bits(len * 8); 869 873 } 870 - EXPORT_SYMBOL_GPL(add_bootloader_randomness); 871 874 872 875 #if IS_ENABLED(CONFIG_VMGENID) 873 876 static BLOCKING_NOTIFIER_HEAD(vmfork_chain);

+37 -33

drivers/gpio/gpio-crystalcove.c

··· 15 15 #include <linux/platform_device.h> 16 16 #include <linux/regmap.h> 17 17 #include <linux/seq_file.h> 18 + #include <linux/types.h> 18 19 19 20 #define CRYSTALCOVE_GPIO_NUM 16 20 21 #define CRYSTALCOVE_VGPIO_NUM 95 ··· 111 110 return reg + gpio % 8; 112 111 } 113 112 114 - static void crystalcove_update_irq_mask(struct crystalcove_gpio *cg, 115 - int gpio) 113 + static void crystalcove_update_irq_mask(struct crystalcove_gpio *cg, int gpio) 116 114 { 117 115 u8 mirqs0 = gpio < 8 ? MGPIO0IRQS0 : MGPIO1IRQS0; 118 116 int mask = BIT(gpio % 8); ··· 140 140 return regmap_write(cg->regmap, reg, CTLO_INPUT_SET); 141 141 } 142 142 143 - static int crystalcove_gpio_dir_out(struct gpio_chip *chip, unsigned int gpio, 144 - int value) 143 + static int crystalcove_gpio_dir_out(struct gpio_chip *chip, unsigned int gpio, int value) 145 144 { 146 145 struct crystalcove_gpio *cg = gpiochip_get_data(chip); 147 146 int reg = to_reg(gpio, CTRL_OUT); ··· 167 168 return val & 0x1; 168 169 } 169 170 170 - static void crystalcove_gpio_set(struct gpio_chip *chip, 171 - unsigned int gpio, int value) 171 + static void crystalcove_gpio_set(struct gpio_chip *chip, unsigned int gpio, int value) 172 172 { 173 173 struct crystalcove_gpio *cg = gpiochip_get_data(chip); 174 174 int reg = to_reg(gpio, CTRL_OUT); ··· 183 185 184 186 static int crystalcove_irq_type(struct irq_data *data, unsigned int type) 185 187 { 186 - struct crystalcove_gpio *cg = 187 - gpiochip_get_data(irq_data_get_irq_chip_data(data)); 188 + struct crystalcove_gpio *cg = gpiochip_get_data(irq_data_get_irq_chip_data(data)); 189 + irq_hw_number_t hwirq = irqd_to_hwirq(data); 188 190 189 - if (data->hwirq >= CRYSTALCOVE_GPIO_NUM) 191 + if (hwirq >= CRYSTALCOVE_GPIO_NUM) 190 192 return 0; 191 193 192 194 switch (type) { ··· 213 215 214 216 static void crystalcove_bus_lock(struct irq_data *data) 215 217 { 216 - struct crystalcove_gpio *cg = 217 - gpiochip_get_data(irq_data_get_irq_chip_data(data)); 218 + struct crystalcove_gpio *cg = gpiochip_get_data(irq_data_get_irq_chip_data(data)); 218 219 219 220 mutex_lock(&cg->buslock); 220 221 } 221 222 222 223 static void crystalcove_bus_sync_unlock(struct irq_data *data) 223 224 { 224 - struct crystalcove_gpio *cg = 225 - gpiochip_get_data(irq_data_get_irq_chip_data(data)); 226 - int gpio = data->hwirq; 225 + struct crystalcove_gpio *cg = gpiochip_get_data(irq_data_get_irq_chip_data(data)); 226 + irq_hw_number_t hwirq = irqd_to_hwirq(data); 227 227 228 228 if (cg->update & UPDATE_IRQ_TYPE) 229 - crystalcove_update_irq_ctrl(cg, gpio); 229 + crystalcove_update_irq_ctrl(cg, hwirq); 230 230 if (cg->update & UPDATE_IRQ_MASK) 231 - crystalcove_update_irq_mask(cg, gpio); 231 + crystalcove_update_irq_mask(cg, hwirq); 232 232 cg->update = 0; 233 233 234 234 mutex_unlock(&cg->buslock); ··· 234 238 235 239 static void crystalcove_irq_unmask(struct irq_data *data) 236 240 { 237 - struct crystalcove_gpio *cg = 238 - gpiochip_get_data(irq_data_get_irq_chip_data(data)); 241 + struct gpio_chip *gc = irq_data_get_irq_chip_data(data); 242 + struct crystalcove_gpio *cg = gpiochip_get_data(gc); 243 + irq_hw_number_t hwirq = irqd_to_hwirq(data); 239 244 240 - if (data->hwirq < CRYSTALCOVE_GPIO_NUM) { 241 - cg->set_irq_mask = false; 242 - cg->update |= UPDATE_IRQ_MASK; 243 - } 245 + if (hwirq >= CRYSTALCOVE_GPIO_NUM) 246 + return; 247 + 248 + gpiochip_enable_irq(gc, hwirq); 249 + 250 + cg->set_irq_mask = false; 251 + cg->update |= UPDATE_IRQ_MASK; 244 252 } 245 253 246 254 static void crystalcove_irq_mask(struct irq_data *data) 247 255 { 248 - struct crystalcove_gpio *cg = 249 - gpiochip_get_data(irq_data_get_irq_chip_data(data)); 256 + struct gpio_chip *gc = irq_data_get_irq_chip_data(data); 257 + struct crystalcove_gpio *cg = gpiochip_get_data(gc); 258 + irq_hw_number_t hwirq = irqd_to_hwirq(data); 250 259 251 - if (data->hwirq < CRYSTALCOVE_GPIO_NUM) { 252 - cg->set_irq_mask = true; 253 - cg->update |= UPDATE_IRQ_MASK; 254 - } 260 + if (hwirq >= CRYSTALCOVE_GPIO_NUM) 261 + return; 262 + 263 + cg->set_irq_mask = true; 264 + cg->update |= UPDATE_IRQ_MASK; 265 + 266 + gpiochip_disable_irq(gc, hwirq); 255 267 } 256 268 257 - static struct irq_chip crystalcove_irqchip = { 269 + static const struct irq_chip crystalcove_irqchip = { 258 270 .name = "Crystal Cove", 259 271 .irq_mask = crystalcove_irq_mask, 260 272 .irq_unmask = crystalcove_irq_unmask, 261 273 .irq_set_type = crystalcove_irq_type, 262 274 .irq_bus_lock = crystalcove_bus_lock, 263 275 .irq_bus_sync_unlock = crystalcove_bus_sync_unlock, 264 - .flags = IRQCHIP_SKIP_SET_WAKE, 276 + .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_IMMUTABLE, 277 + GPIOCHIP_IRQ_RESOURCE_HELPERS, 265 278 }; 266 279 267 280 static irqreturn_t crystalcove_gpio_irq_handler(int irq, void *data) ··· 298 293 return IRQ_HANDLED; 299 294 } 300 295 301 - static void crystalcove_gpio_dbg_show(struct seq_file *s, 302 - struct gpio_chip *chip) 296 + static void crystalcove_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip) 303 297 { 304 298 struct crystalcove_gpio *cg = gpiochip_get_data(chip); 305 299 int gpio, offset; ··· 357 353 cg->regmap = pmic->regmap; 358 354 359 355 girq = &cg->chip.irq; 360 - girq->chip = &crystalcove_irqchip; 356 + gpio_irq_chip_set_chip(girq, &crystalcove_irqchip); 361 357 /* This will let us handle the parent IRQ in the driver */ 362 358 girq->parent_handler = NULL; 363 359 girq->num_parents = 0;

+14 -9

drivers/gpio/gpio-dln2.c

··· 46 46 struct dln2_gpio { 47 47 struct platform_device *pdev; 48 48 struct gpio_chip gpio; 49 - struct irq_chip irqchip; 50 49 51 50 /* 52 51 * Cache pin direction to save us one transfer, since the hardware has ··· 305 306 struct dln2_gpio *dln2 = gpiochip_get_data(gc); 306 307 int pin = irqd_to_hwirq(irqd); 307 308 309 + gpiochip_enable_irq(gc, pin); 308 310 set_bit(pin, dln2->unmasked_irqs); 309 311 } 310 312 ··· 316 316 int pin = irqd_to_hwirq(irqd); 317 317 318 318 clear_bit(pin, dln2->unmasked_irqs); 319 + gpiochip_disable_irq(gc, pin); 319 320 } 320 321 321 322 static int dln2_irq_set_type(struct irq_data *irqd, unsigned type) ··· 384 383 385 384 mutex_unlock(&dln2->irq_lock); 386 385 } 386 + 387 + static const struct irq_chip dln2_irqchip = { 388 + .name = "dln2-irq", 389 + .irq_mask = dln2_irq_mask, 390 + .irq_unmask = dln2_irq_unmask, 391 + .irq_set_type = dln2_irq_set_type, 392 + .irq_bus_lock = dln2_irq_bus_lock, 393 + .irq_bus_sync_unlock = dln2_irq_bus_unlock, 394 + .flags = IRQCHIP_IMMUTABLE, 395 + GPIOCHIP_IRQ_RESOURCE_HELPERS, 396 + }; 387 397 388 398 static void dln2_gpio_event(struct platform_device *pdev, u16 echo, 389 399 const void *data, int len) ··· 477 465 dln2->gpio.direction_output = dln2_gpio_direction_output; 478 466 dln2->gpio.set_config = dln2_gpio_set_config; 479 467 480 - dln2->irqchip.name = "dln2-irq", 481 - dln2->irqchip.irq_mask = dln2_irq_mask, 482 - dln2->irqchip.irq_unmask = dln2_irq_unmask, 483 - dln2->irqchip.irq_set_type = dln2_irq_set_type, 484 - dln2->irqchip.irq_bus_lock = dln2_irq_bus_lock, 485 - dln2->irqchip.irq_bus_sync_unlock = dln2_irq_bus_unlock, 486 - 487 468 girq = &dln2->gpio.irq; 488 - girq->chip = &dln2->irqchip; 469 + gpio_irq_chip_set_chip(girq, &dln2_irqchip); 489 470 /* The event comes from the outside so no parent handler */ 490 471 girq->parent_handler = NULL; 491 472 girq->num_parents = 0;

+3 -4

drivers/gpio/gpio-dwapb.c

··· 662 662 gpio->clks[1].id = "db"; 663 663 err = devm_clk_bulk_get_optional(gpio->dev, DWAPB_NR_CLOCKS, 664 664 gpio->clks); 665 - if (err) { 666 - dev_err(gpio->dev, "Cannot get APB/Debounce clocks\n"); 667 - return err; 668 - } 665 + if (err) 666 + return dev_err_probe(gpio->dev, err, 667 + "Cannot get APB/Debounce clocks\n"); 669 668 670 669 err = clk_bulk_prepare_enable(DWAPB_NR_CLOCKS, gpio->clks); 671 670 if (err) {

+15 -7

drivers/gpio/gpio-merrifield.c

··· 220 220 raw_spin_unlock_irqrestore(&priv->lock, flags); 221 221 } 222 222 223 - static void mrfld_irq_unmask_mask(struct irq_data *d, bool unmask) 223 + static void mrfld_irq_unmask_mask(struct mrfld_gpio *priv, u32 gpio, bool unmask) 224 224 { 225 - struct mrfld_gpio *priv = irq_data_get_irq_chip_data(d); 226 - u32 gpio = irqd_to_hwirq(d); 227 225 void __iomem *gimr = gpio_reg(&priv->chip, gpio, GIMR); 228 226 unsigned long flags; 229 227 u32 value; ··· 239 241 240 242 static void mrfld_irq_mask(struct irq_data *d) 241 243 { 242 - mrfld_irq_unmask_mask(d, false); 244 + struct mrfld_gpio *priv = irq_data_get_irq_chip_data(d); 245 + u32 gpio = irqd_to_hwirq(d); 246 + 247 + mrfld_irq_unmask_mask(priv, gpio, false); 248 + gpiochip_disable_irq(&priv->chip, gpio); 243 249 } 244 250 245 251 static void mrfld_irq_unmask(struct irq_data *d) 246 252 { 247 - mrfld_irq_unmask_mask(d, true); 253 + struct mrfld_gpio *priv = irq_data_get_irq_chip_data(d); 254 + u32 gpio = irqd_to_hwirq(d); 255 + 256 + gpiochip_enable_irq(&priv->chip, gpio); 257 + mrfld_irq_unmask_mask(priv, gpio, true); 248 258 } 249 259 250 260 static int mrfld_irq_set_type(struct irq_data *d, unsigned int type) ··· 335 329 return 0; 336 330 } 337 331 338 - static struct irq_chip mrfld_irqchip = { 332 + static const struct irq_chip mrfld_irqchip = { 339 333 .name = "gpio-merrifield", 340 334 .irq_ack = mrfld_irq_ack, 341 335 .irq_mask = mrfld_irq_mask, 342 336 .irq_unmask = mrfld_irq_unmask, 343 337 .irq_set_type = mrfld_irq_set_type, 344 338 .irq_set_wake = mrfld_irq_set_wake, 339 + .flags = IRQCHIP_IMMUTABLE, 340 + GPIOCHIP_IRQ_RESOURCE_HELPERS, 345 341 }; 346 342 347 343 static void mrfld_irq_handler(struct irq_desc *desc) ··· 490 482 return retval; 491 483 492 484 girq = &priv->chip.irq; 493 - girq->chip = &mrfld_irqchip; 485 + gpio_irq_chip_set_chip(girq, &mrfld_irqchip); 494 486 girq->init_hw = mrfld_irq_init_hw; 495 487 girq->parent_handler = mrfld_irq_handler; 496 488 girq->num_parents = 1;

+22 -13

drivers/gpio/gpio-sch.c

··· 38 38 39 39 struct sch_gpio { 40 40 struct gpio_chip chip; 41 - struct irq_chip irqchip; 42 41 spinlock_t lock; 43 42 unsigned short iobase; 44 43 unsigned short resume_base; ··· 217 218 spin_unlock_irqrestore(&sch->lock, flags); 218 219 } 219 220 220 - static void sch_irq_mask_unmask(struct irq_data *d, int val) 221 + static void sch_irq_mask_unmask(struct gpio_chip *gc, irq_hw_number_t gpio_num, int val) 221 222 { 222 - struct gpio_chip *gc = irq_data_get_irq_chip_data(d); 223 223 struct sch_gpio *sch = gpiochip_get_data(gc); 224 - irq_hw_number_t gpio_num = irqd_to_hwirq(d); 225 224 unsigned long flags; 226 225 227 226 spin_lock_irqsave(&sch->lock, flags); ··· 229 232 230 233 static void sch_irq_mask(struct irq_data *d) 231 234 { 232 - sch_irq_mask_unmask(d, 0); 235 + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); 236 + irq_hw_number_t gpio_num = irqd_to_hwirq(d); 237 + 238 + sch_irq_mask_unmask(gc, gpio_num, 0); 239 + gpiochip_disable_irq(gc, gpio_num); 233 240 } 234 241 235 242 static void sch_irq_unmask(struct irq_data *d) 236 243 { 237 - sch_irq_mask_unmask(d, 1); 244 + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); 245 + irq_hw_number_t gpio_num = irqd_to_hwirq(d); 246 + 247 + gpiochip_enable_irq(gc, gpio_num); 248 + sch_irq_mask_unmask(gc, gpio_num, 1); 238 249 } 250 + 251 + static const struct irq_chip sch_irqchip = { 252 + .name = "sch_gpio", 253 + .irq_ack = sch_irq_ack, 254 + .irq_mask = sch_irq_mask, 255 + .irq_unmask = sch_irq_unmask, 256 + .irq_set_type = sch_irq_type, 257 + .flags = IRQCHIP_IMMUTABLE, 258 + GPIOCHIP_IRQ_RESOURCE_HELPERS, 259 + }; 239 260 240 261 static u32 sch_gpio_gpe_handler(acpi_handle gpe_device, u32 gpe, void *context) 241 262 { ··· 382 367 383 368 platform_set_drvdata(pdev, sch); 384 369 385 - sch->irqchip.name = "sch_gpio"; 386 - sch->irqchip.irq_ack = sch_irq_ack; 387 - sch->irqchip.irq_mask = sch_irq_mask; 388 - sch->irqchip.irq_unmask = sch_irq_unmask; 389 - sch->irqchip.irq_set_type = sch_irq_type; 390 - 391 370 girq = &sch->chip.irq; 392 - girq->chip = &sch->irqchip; 371 + gpio_irq_chip_set_chip(girq, &sch_irqchip); 393 372 girq->num_parents = 0; 394 373 girq->parents = NULL; 395 374 girq->parent_handler = NULL;

+8 -2

drivers/gpio/gpio-wcove.c

··· 299 299 if (gpio >= WCOVE_GPIO_NUM) 300 300 return; 301 301 302 + gpiochip_enable_irq(chip, gpio); 303 + 302 304 wg->set_irq_mask = false; 303 305 wg->update |= UPDATE_IRQ_MASK; 304 306 } ··· 316 314 317 315 wg->set_irq_mask = true; 318 316 wg->update |= UPDATE_IRQ_MASK; 317 + 318 + gpiochip_disable_irq(chip, gpio); 319 319 } 320 320 321 - static struct irq_chip wcove_irqchip = { 321 + static const struct irq_chip wcove_irqchip = { 322 322 .name = "Whiskey Cove", 323 323 .irq_mask = wcove_irq_mask, 324 324 .irq_unmask = wcove_irq_unmask, 325 325 .irq_set_type = wcove_irq_type, 326 326 .irq_bus_lock = wcove_bus_lock, 327 327 .irq_bus_sync_unlock = wcove_bus_sync_unlock, 328 + .flags = IRQCHIP_IMMUTABLE, 329 + GPIOCHIP_IRQ_RESOURCE_HELPERS, 328 330 }; 329 331 330 332 static irqreturn_t wcove_gpio_irq_handler(int irq, void *data) ··· 458 452 } 459 453 460 454 girq = &wg->chip.irq; 461 - girq->chip = &wcove_irqchip; 455 + gpio_irq_chip_set_chip(girq, &wcove_irqchip); 462 456 /* This will let us handle the parent IRQ in the driver */ 463 457 girq->parent_handler = NULL; 464 458 girq->num_parents = 0;

+9 -2

drivers/md/dm-core.h

··· 33 33 * access their members! 34 34 */ 35 35 36 + /* 37 + * For mempools pre-allocation at the table loading time. 38 + */ 39 + struct dm_md_mempools { 40 + struct bio_set bs; 41 + struct bio_set io_bs; 42 + }; 43 + 36 44 struct mapped_device { 37 45 struct mutex suspend_lock; 38 46 ··· 118 110 /* 119 111 * io objects are allocated from here. 120 112 */ 121 - struct bio_set io_bs; 122 - struct bio_set bs; 113 + struct dm_md_mempools *mempools; 123 114 124 115 /* kobject and completion */ 125 116 struct dm_kobject_holder kobj_holder;

+1 -1

drivers/md/dm-rq.c

··· 319 319 { 320 320 int r; 321 321 322 - r = blk_rq_prep_clone(clone, rq, &tio->md->bs, gfp_mask, 322 + r = blk_rq_prep_clone(clone, rq, &tio->md->mempools->bs, gfp_mask, 323 323 dm_rq_bio_constructor, tio); 324 324 if (r) 325 325 return r;

-11

drivers/md/dm-table.c

··· 1038 1038 return 0; 1039 1039 } 1040 1040 1041 - void dm_table_free_md_mempools(struct dm_table *t) 1042 - { 1043 - dm_free_md_mempools(t->mempools); 1044 - t->mempools = NULL; 1045 - } 1046 - 1047 - struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t) 1048 - { 1049 - return t->mempools; 1050 - } 1051 - 1052 1041 static int setup_indexes(struct dm_table *t) 1053 1042 { 1054 1043 int i;

+36 -74

drivers/md/dm.c

··· 136 136 return latch; 137 137 } 138 138 139 - /* 140 - * For mempools pre-allocation at the table loading time. 141 - */ 142 - struct dm_md_mempools { 143 - struct bio_set bs; 144 - struct bio_set io_bs; 145 - }; 146 - 147 139 struct table_device { 148 140 struct list_head list; 149 141 refcount_t count; ··· 573 581 struct dm_target_io *tio; 574 582 struct bio *clone; 575 583 576 - clone = bio_alloc_clone(NULL, bio, GFP_NOIO, &md->io_bs); 584 + clone = bio_alloc_clone(NULL, bio, GFP_NOIO, &md->mempools->io_bs); 577 585 /* Set default bdev, but target must bio_set_dev() before issuing IO */ 578 586 clone->bi_bdev = md->disk->part0; 579 587 ··· 620 628 } else { 621 629 struct mapped_device *md = ci->io->md; 622 630 623 - clone = bio_alloc_clone(NULL, ci->bio, gfp_mask, &md->bs); 631 + clone = bio_alloc_clone(NULL, ci->bio, gfp_mask, 632 + &md->mempools->bs); 624 633 if (!clone) 625 634 return NULL; 626 635 /* Set default bdev, but target must bio_set_dev() before issuing IO */ ··· 1016 1023 struct dm_io *io = tio->io; 1017 1024 struct mapped_device *md = io->md; 1018 1025 1019 - if (likely(bio->bi_bdev != md->disk->part0)) { 1020 - struct request_queue *q = bdev_get_queue(bio->bi_bdev); 1021 - 1022 - if (unlikely(error == BLK_STS_TARGET)) { 1023 - if (bio_op(bio) == REQ_OP_DISCARD && 1024 - !bdev_max_discard_sectors(bio->bi_bdev)) 1025 - disable_discard(md); 1026 - else if (bio_op(bio) == REQ_OP_WRITE_ZEROES && 1027 - !q->limits.max_write_zeroes_sectors) 1028 - disable_write_zeroes(md); 1029 - } 1030 - 1031 - if (static_branch_unlikely(&zoned_enabled) && 1032 - unlikely(blk_queue_is_zoned(q))) 1033 - dm_zone_endio(io, bio); 1026 + if (unlikely(error == BLK_STS_TARGET)) { 1027 + if (bio_op(bio) == REQ_OP_DISCARD && 1028 + !bdev_max_discard_sectors(bio->bi_bdev)) 1029 + disable_discard(md); 1030 + else if (bio_op(bio) == REQ_OP_WRITE_ZEROES && 1031 + !bdev_write_zeroes_sectors(bio->bi_bdev)) 1032 + disable_write_zeroes(md); 1034 1033 } 1034 + 1035 + if (static_branch_unlikely(&zoned_enabled) && 1036 + unlikely(blk_queue_is_zoned(bdev_get_queue(bio->bi_bdev)))) 1037 + dm_zone_endio(io, bio); 1035 1038 1036 1039 if (endio) { 1037 1040 int r = endio(ti, bio, &error); ··· 1865 1876 { 1866 1877 if (md->wq) 1867 1878 destroy_workqueue(md->wq); 1868 - bioset_exit(&md->bs); 1869 - bioset_exit(&md->io_bs); 1879 + dm_free_md_mempools(md->mempools); 1870 1880 1871 1881 if (md->dax_dev) { 1872 1882 dax_remove_host(md->disk); ··· 2037 2049 kvfree(md); 2038 2050 } 2039 2051 2040 - static int __bind_mempools(struct mapped_device *md, struct dm_table *t) 2041 - { 2042 - struct dm_md_mempools *p = dm_table_get_md_mempools(t); 2043 - int ret = 0; 2044 - 2045 - if (dm_table_bio_based(t)) { 2046 - /* 2047 - * The md may already have mempools that need changing. 2048 - * If so, reload bioset because front_pad may have changed 2049 - * because a different table was loaded. 2050 - */ 2051 - bioset_exit(&md->bs); 2052 - bioset_exit(&md->io_bs); 2053 - 2054 - } else if (bioset_initialized(&md->bs)) { 2055 - /* 2056 - * There's no need to reload with request-based dm 2057 - * because the size of front_pad doesn't change. 2058 - * Note for future: If you are to reload bioset, 2059 - * prep-ed requests in the queue may refer 2060 - * to bio from the old bioset, so you must walk 2061 - * through the queue to unprep. 2062 - */ 2063 - goto out; 2064 - } 2065 - 2066 - BUG_ON(!p || 2067 - bioset_initialized(&md->bs) || 2068 - bioset_initialized(&md->io_bs)); 2069 - 2070 - ret = bioset_init_from_src(&md->bs, &p->bs); 2071 - if (ret) 2072 - goto out; 2073 - ret = bioset_init_from_src(&md->io_bs, &p->io_bs); 2074 - if (ret) 2075 - bioset_exit(&md->bs); 2076 - out: 2077 - /* mempool bind completed, no longer need any mempools in the table */ 2078 - dm_table_free_md_mempools(t); 2079 - return ret; 2080 - } 2081 - 2082 2052 /* 2083 2053 * Bind a table to the device. 2084 2054 */ ··· 2090 2144 * immutable singletons - used to optimize dm_mq_queue_rq. 2091 2145 */ 2092 2146 md->immutable_target = dm_table_get_immutable_target(t); 2093 - } 2094 2147 2095 - ret = __bind_mempools(md, t); 2096 - if (ret) { 2097 - old_map = ERR_PTR(ret); 2098 - goto out; 2148 + /* 2149 + * There is no need to reload with request-based dm because the 2150 + * size of front_pad doesn't change. 2151 + * 2152 + * Note for future: If you are to reload bioset, prep-ed 2153 + * requests in the queue may refer to bio from the old bioset, 2154 + * so you must walk through the queue to unprep. 2155 + */ 2156 + if (!md->mempools) { 2157 + md->mempools = t->mempools; 2158 + t->mempools = NULL; 2159 + } 2160 + } else { 2161 + /* 2162 + * The md may already have mempools that need changing. 2163 + * If so, reload bioset because front_pad may have changed 2164 + * because a different table was loaded. 2165 + */ 2166 + dm_free_md_mempools(md->mempools); 2167 + md->mempools = t->mempools; 2168 + t->mempools = NULL; 2099 2169 } 2100 2170 2101 2171 ret = dm_table_set_restrictions(t, md->queue, limits);

-2

drivers/md/dm.h

··· 71 71 struct dm_target *dm_table_get_wildcard_target(struct dm_table *t); 72 72 bool dm_table_bio_based(struct dm_table *t); 73 73 bool dm_table_request_based(struct dm_table *t); 74 - void dm_table_free_md_mempools(struct dm_table *t); 75 - struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t); 76 74 77 75 void dm_lock_md_type(struct mapped_device *md); 78 76 void dm_unlock_md_type(struct mapped_device *md);

+2 -2

drivers/net/ethernet/amd/xgbe/xgbe-platform.c

··· 338 338 * the PHY resources listed last 339 339 */ 340 340 phy_memnum = xgbe_resource_count(pdev, IORESOURCE_MEM) - 3; 341 - phy_irqnum = xgbe_resource_count(pdev, IORESOURCE_IRQ) - 1; 341 + phy_irqnum = platform_irq_count(pdev) - 1; 342 342 dma_irqnum = 1; 343 343 dma_irqend = phy_irqnum; 344 344 } else { ··· 348 348 phy_memnum = 0; 349 349 phy_irqnum = 0; 350 350 dma_irqnum = 1; 351 - dma_irqend = xgbe_resource_count(pdev, IORESOURCE_IRQ); 351 + dma_irqend = platform_irq_count(pdev); 352 352 } 353 353 354 354 /* Obtain the mmio areas for the device */

-1

drivers/net/ethernet/broadcom/bgmac-bcma.c

··· 332 332 bcma_mdio_mii_unregister(bgmac->mii_bus); 333 333 bgmac_enet_remove(bgmac); 334 334 bcma_set_drvdata(core, NULL); 335 - kfree(bgmac); 336 335 } 337 336 338 337 static struct bcma_driver bgmac_bcma_driver = {

+1

drivers/net/ethernet/hisilicon/hns3/hnae3.h

··· 769 769 u8 prio_tc[HNAE3_MAX_USER_PRIO]; /* TC indexed by prio */ 770 770 u16 tqp_count[HNAE3_MAX_TC]; 771 771 u16 tqp_offset[HNAE3_MAX_TC]; 772 + u8 max_tc; /* Total number of TCs */ 772 773 u8 num_tc; /* Total number of enabled TCs */ 773 774 bool mqprio_active; 774 775 };

+1 -1

drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c

··· 1129 1129 if (old_ringparam->tx_desc_num == new_ringparam->tx_desc_num && 1130 1130 old_ringparam->rx_desc_num == new_ringparam->rx_desc_num && 1131 1131 old_ringparam->rx_buf_len == new_ringparam->rx_buf_len) { 1132 - netdev_info(ndev, "ringparam not changed\n"); 1132 + netdev_info(ndev, "descriptor number and rx buffer length not changed\n"); 1133 1133 return false; 1134 1134 } 1135 1135

+15 -3

drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c

··· 3268 3268 static int hclge_update_port_info(struct hclge_dev *hdev) 3269 3269 { 3270 3270 struct hclge_mac *mac = &hdev->hw.mac; 3271 - int speed = HCLGE_MAC_SPEED_UNKNOWN; 3271 + int speed; 3272 3272 int ret; 3273 3273 3274 3274 /* get the port info from SFP cmd if not copper port */ ··· 3279 3279 if (!hdev->support_sfp_query) 3280 3280 return 0; 3281 3281 3282 - if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) 3282 + if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) { 3283 + speed = mac->speed; 3283 3284 ret = hclge_get_sfp_info(hdev, mac); 3284 - else 3285 + } else { 3286 + speed = HCLGE_MAC_SPEED_UNKNOWN; 3285 3287 ret = hclge_get_sfp_speed(hdev, &speed); 3288 + } 3286 3289 3287 3290 if (ret == -EOPNOTSUPP) { 3288 3291 hdev->support_sfp_query = false; ··· 3297 3294 if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) { 3298 3295 if (mac->speed_type == QUERY_ACTIVE_SPEED) { 3299 3296 hclge_update_port_capability(hdev, mac); 3297 + if (mac->speed != speed) 3298 + (void)hclge_tm_port_shaper_cfg(hdev); 3300 3299 return 0; 3301 3300 } 3302 3301 return hclge_cfg_mac_speed_dup(hdev, mac->speed, ··· 3380 3375 3381 3376 link_state_old = vport->vf_info.link_state; 3382 3377 vport->vf_info.link_state = link_state; 3378 + 3379 + /* return success directly if the VF is unalive, VF will 3380 + * query link state itself when it starts work. 3381 + */ 3382 + if (!test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state)) 3383 + return 0; 3383 3384 3384 3385 ret = hclge_push_vf_link_status(vport); 3385 3386 if (ret) { ··· 10128 10117 if (ret) 10129 10118 return ret; 10130 10119 10120 + vport->port_base_vlan_cfg.tbl_sta = false; 10131 10121 /* remove old VLAN tag */ 10132 10122 if (old_info->vlan_tag == 0) 10133 10123 ret = hclge_set_vf_vlan_common(hdev, vport->vport_id,

+68 -33

drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c

··· 282 282 return hclge_cmd_send(&hdev->hw, &desc, 1); 283 283 } 284 284 285 - static int hclge_tm_qs_to_pri_map_cfg(struct hclge_dev *hdev, 286 - u16 qs_id, u8 pri) 285 + static int hclge_tm_qs_to_pri_map_cfg(struct hclge_dev *hdev, u16 qs_id, u8 pri, 286 + bool link_vld) 287 287 { 288 288 struct hclge_qs_to_pri_link_cmd *map; 289 289 struct hclge_desc desc; ··· 294 294 295 295 map->qs_id = cpu_to_le16(qs_id); 296 296 map->priority = pri; 297 - map->link_vld = HCLGE_TM_QS_PRI_LINK_VLD_MSK; 297 + map->link_vld = link_vld ? HCLGE_TM_QS_PRI_LINK_VLD_MSK : 0; 298 298 299 299 return hclge_cmd_send(&hdev->hw, &desc, 1); 300 300 } ··· 420 420 return hclge_cmd_send(&hdev->hw, &desc, 1); 421 421 } 422 422 423 - static int hclge_tm_port_shaper_cfg(struct hclge_dev *hdev) 423 + int hclge_tm_port_shaper_cfg(struct hclge_dev *hdev) 424 424 { 425 425 struct hclge_port_shapping_cmd *shap_cfg_cmd; 426 426 struct hclge_shaper_ir_para ir_para; ··· 642 642 * one tc for VF for simplicity. VF's vport_id is non zero. 643 643 */ 644 644 if (vport->vport_id) { 645 + kinfo->tc_info.max_tc = 1; 645 646 kinfo->tc_info.num_tc = 1; 646 647 vport->qs_offset = HNAE3_MAX_TC + 647 648 vport->vport_id - HCLGE_VF_VPORT_START_NUM; 648 649 vport_max_rss_size = hdev->vf_rss_size_max; 649 650 } else { 651 + kinfo->tc_info.max_tc = hdev->tc_max; 650 652 kinfo->tc_info.num_tc = 651 653 min_t(u16, vport->alloc_tqps, hdev->tm_info.num_tc); 652 654 vport->qs_offset = 0; ··· 681 679 kinfo->num_tqps = hclge_vport_get_tqp_num(vport); 682 680 vport->dwrr = 100; /* 100 percent as init */ 683 681 vport->bw_limit = hdev->tm_info.pg_info[0].bw_limit; 684 - hdev->rss_cfg.rss_size = kinfo->rss_size; 682 + 683 + if (vport->vport_id == PF_VPORT_ID) 684 + hdev->rss_cfg.rss_size = kinfo->rss_size; 685 685 686 686 /* when enable mqprio, the tc_info has been updated. */ 687 687 if (kinfo->tc_info.mqprio_active) ··· 718 714 719 715 static void hclge_tm_tc_info_init(struct hclge_dev *hdev) 720 716 { 721 - u8 i; 717 + u8 i, tc_sch_mode; 718 + u32 bw_limit; 722 719 723 - for (i = 0; i < hdev->tm_info.num_tc; i++) { 720 + for (i = 0; i < hdev->tc_max; i++) { 721 + if (i < hdev->tm_info.num_tc) { 722 + tc_sch_mode = HCLGE_SCH_MODE_DWRR; 723 + bw_limit = hdev->tm_info.pg_info[0].bw_limit; 724 + } else { 725 + tc_sch_mode = HCLGE_SCH_MODE_SP; 726 + bw_limit = 0; 727 + } 728 + 724 729 hdev->tm_info.tc_info[i].tc_id = i; 725 - hdev->tm_info.tc_info[i].tc_sch_mode = HCLGE_SCH_MODE_DWRR; 730 + hdev->tm_info.tc_info[i].tc_sch_mode = tc_sch_mode; 726 731 hdev->tm_info.tc_info[i].pgid = 0; 727 - hdev->tm_info.tc_info[i].bw_limit = 728 - hdev->tm_info.pg_info[0].bw_limit; 732 + hdev->tm_info.tc_info[i].bw_limit = bw_limit; 729 733 } 730 734 731 735 for (i = 0; i < HNAE3_MAX_USER_PRIO; i++) ··· 938 926 for (k = 0; k < hdev->num_alloc_vport; k++) { 939 927 struct hnae3_knic_private_info *kinfo = &vport[k].nic.kinfo; 940 928 941 - for (i = 0; i < kinfo->tc_info.num_tc; i++) { 929 + for (i = 0; i < kinfo->tc_info.max_tc; i++) { 930 + u8 pri = i < kinfo->tc_info.num_tc ? i : 0; 931 + bool link_vld = i < kinfo->tc_info.num_tc; 932 + 942 933 ret = hclge_tm_qs_to_pri_map_cfg(hdev, 943 934 vport[k].qs_offset + i, 944 - i); 935 + pri, link_vld); 945 936 if (ret) 946 937 return ret; 947 938 } ··· 964 949 for (i = 0; i < HNAE3_MAX_TC; i++) { 965 950 ret = hclge_tm_qs_to_pri_map_cfg(hdev, 966 951 vport[k].qs_offset + i, 967 - k); 952 + k, true); 968 953 if (ret) 969 954 return ret; 970 955 } ··· 1004 989 { 1005 990 u32 max_tm_rate = hdev->ae_dev->dev_specs.max_tm_rate; 1006 991 struct hclge_shaper_ir_para ir_para; 1007 - u32 shaper_para; 992 + u32 shaper_para_c, shaper_para_p; 1008 993 int ret; 1009 994 u32 i; 1010 995 1011 - for (i = 0; i < hdev->tm_info.num_tc; i++) { 996 + for (i = 0; i < hdev->tc_max; i++) { 1012 997 u32 rate = hdev->tm_info.tc_info[i].bw_limit; 1013 998 1014 - ret = hclge_shaper_para_calc(rate, HCLGE_SHAPER_LVL_PRI, 1015 - &ir_para, max_tm_rate); 1016 - if (ret) 1017 - return ret; 999 + if (rate) { 1000 + ret = hclge_shaper_para_calc(rate, HCLGE_SHAPER_LVL_PRI, 1001 + &ir_para, max_tm_rate); 1002 + if (ret) 1003 + return ret; 1018 1004 1019 - shaper_para = hclge_tm_get_shapping_para(0, 0, 0, 1020 - HCLGE_SHAPER_BS_U_DEF, 1021 - HCLGE_SHAPER_BS_S_DEF); 1005 + shaper_para_c = hclge_tm_get_shapping_para(0, 0, 0, 1006 + HCLGE_SHAPER_BS_U_DEF, 1007 + HCLGE_SHAPER_BS_S_DEF); 1008 + shaper_para_p = hclge_tm_get_shapping_para(ir_para.ir_b, 1009 + ir_para.ir_u, 1010 + ir_para.ir_s, 1011 + HCLGE_SHAPER_BS_U_DEF, 1012 + HCLGE_SHAPER_BS_S_DEF); 1013 + } else { 1014 + shaper_para_c = 0; 1015 + shaper_para_p = 0; 1016 + } 1017 + 1022 1018 ret = hclge_tm_pri_shapping_cfg(hdev, HCLGE_TM_SHAP_C_BUCKET, i, 1023 - shaper_para, rate); 1019 + shaper_para_c, rate); 1024 1020 if (ret) 1025 1021 return ret; 1026 1022 1027 - shaper_para = hclge_tm_get_shapping_para(ir_para.ir_b, 1028 - ir_para.ir_u, 1029 - ir_para.ir_s, 1030 - HCLGE_SHAPER_BS_U_DEF, 1031 - HCLGE_SHAPER_BS_S_DEF); 1032 1023 ret = hclge_tm_pri_shapping_cfg(hdev, HCLGE_TM_SHAP_P_BUCKET, i, 1033 - shaper_para, rate); 1024 + shaper_para_p, rate); 1034 1025 if (ret) 1035 1026 return ret; 1036 1027 } ··· 1146 1125 int ret; 1147 1126 u32 i, k; 1148 1127 1149 - for (i = 0; i < hdev->tm_info.num_tc; i++) { 1128 + for (i = 0; i < hdev->tc_max; i++) { 1150 1129 pg_info = 1151 1130 &hdev->tm_info.pg_info[hdev->tm_info.tc_info[i].pgid]; 1152 1131 dwrr = pg_info->tc_dwrr[i]; ··· 1156 1135 return ret; 1157 1136 1158 1137 for (k = 0; k < hdev->num_alloc_vport; k++) { 1138 + struct hnae3_knic_private_info *kinfo = &vport[k].nic.kinfo; 1139 + 1140 + if (i >= kinfo->tc_info.max_tc) 1141 + continue; 1142 + 1143 + dwrr = i < kinfo->tc_info.num_tc ? vport[k].dwrr : 0; 1159 1144 ret = hclge_tm_qs_weight_cfg( 1160 1145 hdev, vport[k].qs_offset + i, 1161 - vport[k].dwrr); 1146 + dwrr); 1162 1147 if (ret) 1163 1148 return ret; 1164 1149 } ··· 1330 1303 { 1331 1304 struct hclge_vport *vport = hdev->vport; 1332 1305 int ret; 1306 + u8 mode; 1333 1307 u16 i; 1334 1308 1335 1309 ret = hclge_tm_pri_schd_mode_cfg(hdev, pri_id); ··· 1338 1310 return ret; 1339 1311 1340 1312 for (i = 0; i < hdev->num_alloc_vport; i++) { 1313 + struct hnae3_knic_private_info *kinfo = &vport[i].nic.kinfo; 1314 + 1315 + if (pri_id >= kinfo->tc_info.max_tc) 1316 + continue; 1317 + 1318 + mode = pri_id < kinfo->tc_info.num_tc ? HCLGE_SCH_MODE_DWRR : 1319 + HCLGE_SCH_MODE_SP; 1341 1320 ret = hclge_tm_qs_schd_mode_cfg(hdev, 1342 1321 vport[i].qs_offset + pri_id, 1343 - HCLGE_SCH_MODE_DWRR); 1322 + mode); 1344 1323 if (ret) 1345 1324 return ret; 1346 1325 } ··· 1388 1353 u8 i; 1389 1354 1390 1355 if (hdev->tx_sch_mode == HCLGE_FLAG_TC_BASE_SCH_MODE) { 1391 - for (i = 0; i < hdev->tm_info.num_tc; i++) { 1356 + for (i = 0; i < hdev->tc_max; i++) { 1392 1357 ret = hclge_tm_schd_mode_tc_base_cfg(hdev, i); 1393 1358 if (ret) 1394 1359 return ret;

+1

drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h

··· 237 237 void hclge_pfc_rx_stats_get(struct hclge_dev *hdev, u64 *stats); 238 238 void hclge_pfc_tx_stats_get(struct hclge_dev *hdev, u64 *stats); 239 239 int hclge_tm_qs_shaper_cfg(struct hclge_vport *vport, int max_tx_rate); 240 + int hclge_tm_port_shaper_cfg(struct hclge_dev *hdev); 240 241 int hclge_tm_get_qset_num(struct hclge_dev *hdev, u16 *qset_num); 241 242 int hclge_tm_get_pri_num(struct hclge_dev *hdev, u8 *pri_num); 242 243 int hclge_tm_get_qset_map_pri(struct hclge_dev *hdev, u16 qset_id, u8 *priority,

+17 -8

drivers/net/ethernet/intel/i40e/i40e_ethtool.c

··· 2588 2588 2589 2589 set_bit(__I40E_TESTING, pf->state); 2590 2590 2591 + if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) || 2592 + test_bit(__I40E_RESET_INTR_RECEIVED, pf->state)) { 2593 + dev_warn(&pf->pdev->dev, 2594 + "Cannot start offline testing when PF is in reset state.\n"); 2595 + goto skip_ol_tests; 2596 + } 2597 + 2591 2598 if (i40e_active_vfs(pf) || i40e_active_vmdqs(pf)) { 2592 2599 dev_warn(&pf->pdev->dev, 2593 2600 "Please take active VFs and Netqueues offline and restart the adapter before running NIC diagnostics\n"); 2594 - data[I40E_ETH_TEST_REG] = 1; 2595 - data[I40E_ETH_TEST_EEPROM] = 1; 2596 - data[I40E_ETH_TEST_INTR] = 1; 2597 - data[I40E_ETH_TEST_LINK] = 1; 2598 - eth_test->flags |= ETH_TEST_FL_FAILED; 2599 - clear_bit(__I40E_TESTING, pf->state); 2600 2601 goto skip_ol_tests; 2601 2602 } 2602 2603 ··· 2644 2643 data[I40E_ETH_TEST_INTR] = 0; 2645 2644 } 2646 2645 2647 - skip_ol_tests: 2648 - 2649 2646 netif_info(pf, drv, netdev, "testing finished\n"); 2647 + return; 2648 + 2649 + skip_ol_tests: 2650 + data[I40E_ETH_TEST_REG] = 1; 2651 + data[I40E_ETH_TEST_EEPROM] = 1; 2652 + data[I40E_ETH_TEST_INTR] = 1; 2653 + data[I40E_ETH_TEST_LINK] = 1; 2654 + eth_test->flags |= ETH_TEST_FL_FAILED; 2655 + clear_bit(__I40E_TESTING, pf->state); 2656 + netif_info(pf, drv, netdev, "testing failed\n"); 2650 2657 } 2651 2658 2652 2659 static void i40e_get_wol(struct net_device *netdev,

+5

drivers/net/ethernet/intel/i40e/i40e_main.c

··· 8667 8667 return -EOPNOTSUPP; 8668 8668 } 8669 8669 8670 + if (!tc) { 8671 + dev_err(&pf->pdev->dev, "Unable to add filter because of invalid destination"); 8672 + return -EINVAL; 8673 + } 8674 + 8670 8675 if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) || 8671 8676 test_bit(__I40E_RESET_INTR_RECEIVED, pf->state)) 8672 8677 return -EBUSY;

+1 -1

drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c

··· 2282 2282 } 2283 2283 2284 2284 if (vf->adq_enabled) { 2285 - for (i = 0; i < I40E_MAX_VF_VSI; i++) 2285 + for (i = 0; i < vf->num_tc; i++) 2286 2286 num_qps_all += vf->ch[i].num_qps; 2287 2287 if (num_qps_all != qci->num_queue_pairs) { 2288 2288 aq_ret = I40E_ERR_PARAM;

+1 -1

drivers/net/ethernet/intel/iavf/iavf_main.c

··· 985 985 f->add = true; 986 986 f->add_handled = false; 987 987 f->is_new_mac = true; 988 - f->is_primary = false; 988 + f->is_primary = ether_addr_equal(macaddr, adapter->hw.mac.addr); 989 989 adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER; 990 990 } else { 991 991 f->remove = false;

+28 -15

drivers/net/ethernet/intel/ice/ice_main.c

··· 5763 5763 ice_fix_features(struct net_device *netdev, netdev_features_t features) 5764 5764 { 5765 5765 struct ice_netdev_priv *np = netdev_priv(netdev); 5766 - netdev_features_t supported_vlan_filtering; 5767 - netdev_features_t requested_vlan_filtering; 5768 - struct ice_vsi *vsi = np->vsi; 5766 + netdev_features_t req_vlan_fltr, cur_vlan_fltr; 5767 + bool cur_ctag, cur_stag, req_ctag, req_stag; 5769 5768 5770 - requested_vlan_filtering = features & NETIF_VLAN_FILTERING_FEATURES; 5769 + cur_vlan_fltr = netdev->features & NETIF_VLAN_FILTERING_FEATURES; 5770 + cur_ctag = cur_vlan_fltr & NETIF_F_HW_VLAN_CTAG_FILTER; 5771 + cur_stag = cur_vlan_fltr & NETIF_F_HW_VLAN_STAG_FILTER; 5771 5772 5772 - /* make sure supported_vlan_filtering works for both SVM and DVM */ 5773 - supported_vlan_filtering = NETIF_F_HW_VLAN_CTAG_FILTER; 5774 - if (ice_is_dvm_ena(&vsi->back->hw)) 5775 - supported_vlan_filtering |= NETIF_F_HW_VLAN_STAG_FILTER; 5773 + req_vlan_fltr = features & NETIF_VLAN_FILTERING_FEATURES; 5774 + req_ctag = req_vlan_fltr & NETIF_F_HW_VLAN_CTAG_FILTER; 5775 + req_stag = req_vlan_fltr & NETIF_F_HW_VLAN_STAG_FILTER; 5776 5776 5777 - if (requested_vlan_filtering && 5778 - requested_vlan_filtering != supported_vlan_filtering) { 5779 - if (requested_vlan_filtering & NETIF_F_HW_VLAN_CTAG_FILTER) { 5780 - netdev_warn(netdev, "cannot support requested VLAN filtering settings, enabling all supported VLAN filtering settings\n"); 5781 - features |= supported_vlan_filtering; 5777 + if (req_vlan_fltr != cur_vlan_fltr) { 5778 + if (ice_is_dvm_ena(&np->vsi->back->hw)) { 5779 + if (req_ctag && req_stag) { 5780 + features |= NETIF_VLAN_FILTERING_FEATURES; 5781 + } else if (!req_ctag && !req_stag) { 5782 + features &= ~NETIF_VLAN_FILTERING_FEATURES; 5783 + } else if ((!cur_ctag && req_ctag && !cur_stag) || 5784 + (!cur_stag && req_stag && !cur_ctag)) { 5785 + features |= NETIF_VLAN_FILTERING_FEATURES; 5786 + netdev_warn(netdev, "802.1Q and 802.1ad VLAN filtering must be either both on or both off. VLAN filtering has been enabled for both types.\n"); 5787 + } else if ((cur_ctag && !req_ctag && cur_stag) || 5788 + (cur_stag && !req_stag && cur_ctag)) { 5789 + features &= ~NETIF_VLAN_FILTERING_FEATURES; 5790 + netdev_warn(netdev, "802.1Q and 802.1ad VLAN filtering must be either both on or both off. VLAN filtering has been disabled for both types.\n"); 5791 + } 5782 5792 } else { 5783 - netdev_warn(netdev, "cannot support requested VLAN filtering settings, clearing all supported VLAN filtering settings\n"); 5784 - features &= ~supported_vlan_filtering; 5793 + if (req_vlan_fltr & NETIF_F_HW_VLAN_STAG_FILTER) 5794 + netdev_warn(netdev, "cannot support requested 802.1ad filtering setting in SVM mode\n"); 5795 + 5796 + if (req_vlan_fltr & NETIF_F_HW_VLAN_CTAG_FILTER) 5797 + features |= NETIF_F_HW_VLAN_CTAG_FILTER; 5785 5798 } 5786 5799 } 5787 5800

+1 -1

drivers/net/ethernet/intel/ice/ice_ptp.c

··· 2271 2271 ice_ptp_init_tx_e822(struct ice_pf *pf, struct ice_ptp_tx *tx, u8 port) 2272 2272 { 2273 2273 tx->quad = port / ICE_PORTS_PER_QUAD; 2274 - tx->quad_offset = tx->quad * INDEX_PER_PORT; 2274 + tx->quad_offset = (port % ICE_PORTS_PER_QUAD) * INDEX_PER_PORT; 2275 2275 tx->len = INDEX_PER_PORT; 2276 2276 2277 2277 return ice_ptp_alloc_tx_tracker(tx);

+31

drivers/net/ethernet/intel/ice/ice_ptp.h

··· 49 49 * To allow multiple ports to access the shared register block independently, 50 50 * the blocks are split up so that indexes are assigned to each port based on 51 51 * hardware logical port number. 52 + * 53 + * The timestamp blocks are handled differently for E810- and E822-based 54 + * devices. In E810 devices, each port has its own block of timestamps, while in 55 + * E822 there is a need to logically break the block of registers into smaller 56 + * chunks based on the port number to avoid collisions. 57 + * 58 + * Example for port 5 in E810: 59 + * +--------+--------+--------+--------+--------+--------+--------+--------+ 60 + * |register|register|register|register|register|register|register|register| 61 + * | block | block | block | block | block | block | block | block | 62 + * | for | for | for | for | for | for | for | for | 63 + * | port 0 | port 1 | port 2 | port 3 | port 4 | port 5 | port 6 | port 7 | 64 + * +--------+--------+--------+--------+--------+--------+--------+--------+ 65 + * ^^ 66 + * || 67 + * |--- quad offset is always 0 68 + * ---- quad number 69 + * 70 + * Example for port 5 in E822: 71 + * +-----------------------------+-----------------------------+ 72 + * | register block for quad 0 | register block for quad 1 | 73 + * |+------+------+------+------+|+------+------+------+------+| 74 + * ||port 0|port 1|port 2|port 3|||port 0|port 1|port 2|port 3|| 75 + * |+------+------+------+------+|+------+------+------+------+| 76 + * +-----------------------------+-------^---------------------+ 77 + * ^ | 78 + * | --- quad offset* 79 + * ---- quad number 80 + * 81 + * * PHY port 5 is port 1 in quad 1 82 + * 52 83 */ 53 84 54 85 /**

+5

drivers/net/ethernet/intel/ice/ice_vf_lib.c

··· 504 504 } 505 505 506 506 if (ice_is_vf_disabled(vf)) { 507 + vsi = ice_get_vf_vsi(vf); 508 + if (WARN_ON(!vsi)) 509 + return -EINVAL; 510 + ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, vf->vf_id); 511 + ice_vsi_stop_all_rx_rings(vsi); 507 512 dev_dbg(dev, "VF is already disabled, there is no need for resetting it, telling VM, all is fine %d\n", 508 513 vf->vf_id); 509 514 return 0;

+27 -28

drivers/net/ethernet/intel/ice/ice_virtchnl.c

··· 1592 1592 */ 1593 1593 static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) 1594 1594 { 1595 - enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; 1596 1595 struct virtchnl_vsi_queue_config_info *qci = 1597 1596 (struct virtchnl_vsi_queue_config_info *)msg; 1598 1597 struct virtchnl_queue_pair_info *qpi; 1599 1598 struct ice_pf *pf = vf->pf; 1600 1599 struct ice_vsi *vsi; 1601 - int i, q_idx; 1600 + int i = -1, q_idx; 1602 1601 1603 - if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { 1604 - v_ret = VIRTCHNL_STATUS_ERR_PARAM; 1602 + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) 1605 1603 goto error_param; 1606 - } 1607 1604 1608 - if (!ice_vc_isvalid_vsi_id(vf, qci->vsi_id)) { 1609 - v_ret = VIRTCHNL_STATUS_ERR_PARAM; 1605 + if (!ice_vc_isvalid_vsi_id(vf, qci->vsi_id)) 1610 1606 goto error_param; 1611 - } 1612 1607 1613 1608 vsi = ice_get_vf_vsi(vf); 1614 - if (!vsi) { 1615 - v_ret = VIRTCHNL_STATUS_ERR_PARAM; 1609 + if (!vsi) 1616 1610 goto error_param; 1617 - } 1618 1611 1619 1612 if (qci->num_queue_pairs > ICE_MAX_RSS_QS_PER_VF || 1620 1613 qci->num_queue_pairs > min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)) { 1621 1614 dev_err(ice_pf_to_dev(pf), "VF-%d requesting more than supported number of queues: %d\n", 1622 1615 vf->vf_id, min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)); 1623 - v_ret = VIRTCHNL_STATUS_ERR_PARAM; 1624 1616 goto error_param; 1625 1617 } 1626 1618 ··· 1625 1633 !ice_vc_isvalid_ring_len(qpi->txq.ring_len) || 1626 1634 !ice_vc_isvalid_ring_len(qpi->rxq.ring_len) || 1627 1635 !ice_vc_isvalid_q_id(vf, qci->vsi_id, qpi->txq.queue_id)) { 1628 - v_ret = VIRTCHNL_STATUS_ERR_PARAM; 1629 1636 goto error_param; 1630 1637 } 1631 1638 ··· 1634 1643 * for selected "vsi" 1635 1644 */ 1636 1645 if (q_idx >= vsi->alloc_txq || q_idx >= vsi->alloc_rxq) { 1637 - v_ret = VIRTCHNL_STATUS_ERR_PARAM; 1638 1646 goto error_param; 1639 1647 } 1640 1648 ··· 1643 1653 vsi->tx_rings[i]->count = qpi->txq.ring_len; 1644 1654 1645 1655 /* Disable any existing queue first */ 1646 - if (ice_vf_vsi_dis_single_txq(vf, vsi, q_idx)) { 1647 - v_ret = VIRTCHNL_STATUS_ERR_PARAM; 1656 + if (ice_vf_vsi_dis_single_txq(vf, vsi, q_idx)) 1648 1657 goto error_param; 1649 - } 1650 1658 1651 1659 /* Configure a queue with the requested settings */ 1652 1660 if (ice_vsi_cfg_single_txq(vsi, vsi->tx_rings, q_idx)) { 1653 - v_ret = VIRTCHNL_STATUS_ERR_PARAM; 1661 + dev_warn(ice_pf_to_dev(pf), "VF-%d failed to configure TX queue %d\n", 1662 + vf->vf_id, i); 1654 1663 goto error_param; 1655 1664 } 1656 1665 } ··· 1663 1674 1664 1675 if (qpi->rxq.databuffer_size != 0 && 1665 1676 (qpi->rxq.databuffer_size > ((16 * 1024) - 128) || 1666 - qpi->rxq.databuffer_size < 1024)) { 1667 - v_ret = VIRTCHNL_STATUS_ERR_PARAM; 1677 + qpi->rxq.databuffer_size < 1024)) 1668 1678 goto error_param; 1669 - } 1670 1679 vsi->rx_buf_len = qpi->rxq.databuffer_size; 1671 1680 vsi->rx_rings[i]->rx_buf_len = vsi->rx_buf_len; 1672 1681 if (qpi->rxq.max_pkt_size > max_frame_size || 1673 - qpi->rxq.max_pkt_size < 64) { 1674 - v_ret = VIRTCHNL_STATUS_ERR_PARAM; 1682 + qpi->rxq.max_pkt_size < 64) 1675 1683 goto error_param; 1676 - } 1677 1684 1678 1685 vsi->max_frame = qpi->rxq.max_pkt_size; 1679 1686 /* add space for the port VLAN since the VF driver is ··· 1680 1695 vsi->max_frame += VLAN_HLEN; 1681 1696 1682 1697 if (ice_vsi_cfg_single_rxq(vsi, q_idx)) { 1683 - v_ret = VIRTCHNL_STATUS_ERR_PARAM; 1698 + dev_warn(ice_pf_to_dev(pf), "VF-%d failed to configure RX queue %d\n", 1699 + vf->vf_id, i); 1684 1700 goto error_param; 1685 1701 } 1686 1702 } 1687 1703 } 1688 1704 1689 - error_param: 1690 1705 /* send the response to the VF */ 1691 - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES, v_ret, 1692 - NULL, 0); 1706 + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES, 1707 + VIRTCHNL_STATUS_SUCCESS, NULL, 0); 1708 + error_param: 1709 + /* disable whatever we can */ 1710 + for (; i >= 0; i--) { 1711 + if (ice_vsi_ctrl_one_rx_ring(vsi, false, i, true)) 1712 + dev_err(ice_pf_to_dev(pf), "VF-%d could not disable RX queue %d\n", 1713 + vf->vf_id, i); 1714 + if (ice_vf_vsi_dis_single_txq(vf, vsi, i)) 1715 + dev_err(ice_pf_to_dev(pf), "VF-%d could not disable TX queue %d\n", 1716 + vf->vf_id, i); 1717 + } 1718 + 1719 + /* send the response to the VF */ 1720 + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES, 1721 + VIRTCHNL_STATUS_ERR_PARAM, NULL, 0); 1693 1722 } 1694 1723 1695 1724 /**

+2 -1

drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c

··· 1390 1390 1391 1391 static const struct ethtool_ops otx2vf_ethtool_ops = { 1392 1392 .supported_coalesce_params = ETHTOOL_COALESCE_USECS | 1393 - ETHTOOL_COALESCE_MAX_FRAMES, 1393 + ETHTOOL_COALESCE_MAX_FRAMES | 1394 + ETHTOOL_COALESCE_USE_ADAPTIVE, 1394 1395 .supported_ring_params = ETHTOOL_RING_USE_RX_BUF_LEN | 1395 1396 ETHTOOL_RING_USE_CQE_SIZE, 1396 1397 .get_link = otx2_get_link,

+1 -1

drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h

··· 8 8 #include "spectrum.h" 9 9 10 10 enum mlxsw_sp_counter_sub_pool_id { 11 - MLXSW_SP_COUNTER_SUB_POOL_FLOW, 12 11 MLXSW_SP_COUNTER_SUB_POOL_RIF, 12 + MLXSW_SP_COUNTER_SUB_POOL_FLOW, 13 13 }; 14 14 15 15 int mlxsw_sp_counter_alloc(struct mlxsw_sp *mlxsw_sp,

+51

drivers/net/ethernet/xilinx/xilinx_axienet.h

··· 547 547 iowrite32(value, lp->regs + offset); 548 548 } 549 549 550 + /** 551 + * axienet_dma_out32 - Memory mapped Axi DMA register write. 552 + * @lp: Pointer to axienet local structure 553 + * @reg: Address offset from the base address of the Axi DMA core 554 + * @value: Value to be written into the Axi DMA register 555 + * 556 + * This function writes the desired value into the corresponding Axi DMA 557 + * register. 558 + */ 559 + 560 + static inline void axienet_dma_out32(struct axienet_local *lp, 561 + off_t reg, u32 value) 562 + { 563 + iowrite32(value, lp->dma_regs + reg); 564 + } 565 + 566 + #if defined(CONFIG_64BIT) && defined(iowrite64) 567 + /** 568 + * axienet_dma_out64 - Memory mapped Axi DMA register write. 569 + * @lp: Pointer to axienet local structure 570 + * @reg: Address offset from the base address of the Axi DMA core 571 + * @value: Value to be written into the Axi DMA register 572 + * 573 + * This function writes the desired value into the corresponding Axi DMA 574 + * register. 575 + */ 576 + static inline void axienet_dma_out64(struct axienet_local *lp, 577 + off_t reg, u64 value) 578 + { 579 + iowrite64(value, lp->dma_regs + reg); 580 + } 581 + 582 + static inline void axienet_dma_out_addr(struct axienet_local *lp, off_t reg, 583 + dma_addr_t addr) 584 + { 585 + if (lp->features & XAE_FEATURE_DMA_64BIT) 586 + axienet_dma_out64(lp, reg, addr); 587 + else 588 + axienet_dma_out32(lp, reg, lower_32_bits(addr)); 589 + } 590 + 591 + #else /* CONFIG_64BIT */ 592 + 593 + static inline void axienet_dma_out_addr(struct axienet_local *lp, off_t reg, 594 + dma_addr_t addr) 595 + { 596 + axienet_dma_out32(lp, reg, lower_32_bits(addr)); 597 + } 598 + 599 + #endif /* CONFIG_64BIT */ 600 + 550 601 /* Function prototypes visible in xilinx_axienet_mdio.c for other files */ 551 602 int axienet_mdio_enable(struct axienet_local *lp); 552 603 void axienet_mdio_disable(struct axienet_local *lp);

+5 -24

drivers/net/ethernet/xilinx/xilinx_axienet_main.c

··· 133 133 return ioread32(lp->dma_regs + reg); 134 134 } 135 135 136 - /** 137 - * axienet_dma_out32 - Memory mapped Axi DMA register write. 138 - * @lp: Pointer to axienet local structure 139 - * @reg: Address offset from the base address of the Axi DMA core 140 - * @value: Value to be written into the Axi DMA register 141 - * 142 - * This function writes the desired value into the corresponding Axi DMA 143 - * register. 144 - */ 145 - static inline void axienet_dma_out32(struct axienet_local *lp, 146 - off_t reg, u32 value) 147 - { 148 - iowrite32(value, lp->dma_regs + reg); 149 - } 150 - 151 - static void axienet_dma_out_addr(struct axienet_local *lp, off_t reg, 152 - dma_addr_t addr) 153 - { 154 - axienet_dma_out32(lp, reg, lower_32_bits(addr)); 155 - 156 - if (lp->features & XAE_FEATURE_DMA_64BIT) 157 - axienet_dma_out32(lp, reg + 4, upper_32_bits(addr)); 158 - } 159 - 160 136 static void desc_set_phys_addr(struct axienet_local *lp, dma_addr_t addr, 161 137 struct axidma_bd *desc) 162 138 { ··· 2036 2060 } 2037 2061 iowrite32(0x0, desc); 2038 2062 } 2063 + } 2064 + if (!IS_ENABLED(CONFIG_64BIT) && lp->features & XAE_FEATURE_DMA_64BIT) { 2065 + dev_err(&pdev->dev, "64-bit addressable DMA is not compatible with 32-bit archecture\n"); 2066 + ret = -EINVAL; 2067 + goto cleanup_clk; 2039 2068 } 2040 2069 2041 2070 ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(addr_width));

+13 -13

drivers/net/usb/ax88179_178a.c

··· 1750 1750 .link_reset = ax88179_link_reset, 1751 1751 .reset = ax88179_reset, 1752 1752 .stop = ax88179_stop, 1753 - .flags = FLAG_ETHER | FLAG_FRAMING_AX, 1753 + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, 1754 1754 .rx_fixup = ax88179_rx_fixup, 1755 1755 .tx_fixup = ax88179_tx_fixup, 1756 1756 }; ··· 1763 1763 .link_reset = ax88179_link_reset, 1764 1764 .reset = ax88179_reset, 1765 1765 .stop = ax88179_stop, 1766 - .flags = FLAG_ETHER | FLAG_FRAMING_AX, 1766 + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, 1767 1767 .rx_fixup = ax88179_rx_fixup, 1768 1768 .tx_fixup = ax88179_tx_fixup, 1769 1769 }; ··· 1776 1776 .link_reset = ax88179_link_reset, 1777 1777 .reset = ax88179_reset, 1778 1778 .stop = ax88179_stop, 1779 - .flags = FLAG_ETHER | FLAG_FRAMING_AX, 1779 + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, 1780 1780 .rx_fixup = ax88179_rx_fixup, 1781 1781 .tx_fixup = ax88179_tx_fixup, 1782 1782 }; ··· 1789 1789 .link_reset = ax88179_link_reset, 1790 1790 .reset = ax88179_reset, 1791 1791 .stop = ax88179_stop, 1792 - .flags = FLAG_ETHER | FLAG_FRAMING_AX, 1792 + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, 1793 1793 .rx_fixup = ax88179_rx_fixup, 1794 1794 .tx_fixup = ax88179_tx_fixup, 1795 1795 }; ··· 1802 1802 .link_reset = ax88179_link_reset, 1803 1803 .reset = ax88179_reset, 1804 1804 .stop = ax88179_stop, 1805 - .flags = FLAG_ETHER | FLAG_FRAMING_AX, 1805 + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, 1806 1806 .rx_fixup = ax88179_rx_fixup, 1807 1807 .tx_fixup = ax88179_tx_fixup, 1808 1808 }; ··· 1815 1815 .link_reset = ax88179_link_reset, 1816 1816 .reset = ax88179_reset, 1817 1817 .stop = ax88179_stop, 1818 - .flags = FLAG_ETHER | FLAG_FRAMING_AX, 1818 + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, 1819 1819 .rx_fixup = ax88179_rx_fixup, 1820 1820 .tx_fixup = ax88179_tx_fixup, 1821 1821 }; ··· 1828 1828 .link_reset = ax88179_link_reset, 1829 1829 .reset = ax88179_reset, 1830 1830 .stop = ax88179_stop, 1831 - .flags = FLAG_ETHER | FLAG_FRAMING_AX, 1831 + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, 1832 1832 .rx_fixup = ax88179_rx_fixup, 1833 1833 .tx_fixup = ax88179_tx_fixup, 1834 1834 }; ··· 1841 1841 .link_reset = ax88179_link_reset, 1842 1842 .reset = ax88179_reset, 1843 1843 .stop = ax88179_stop, 1844 - .flags = FLAG_ETHER | FLAG_FRAMING_AX, 1844 + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, 1845 1845 .rx_fixup = ax88179_rx_fixup, 1846 1846 .tx_fixup = ax88179_tx_fixup, 1847 1847 }; ··· 1854 1854 .link_reset = ax88179_link_reset, 1855 1855 .reset = ax88179_reset, 1856 1856 .stop = ax88179_stop, 1857 - .flags = FLAG_ETHER | FLAG_FRAMING_AX, 1857 + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, 1858 1858 .rx_fixup = ax88179_rx_fixup, 1859 1859 .tx_fixup = ax88179_tx_fixup, 1860 1860 }; ··· 1867 1867 .link_reset = ax88179_link_reset, 1868 1868 .reset = ax88179_reset, 1869 1869 .stop = ax88179_stop, 1870 - .flags = FLAG_ETHER | FLAG_FRAMING_AX, 1870 + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, 1871 1871 .rx_fixup = ax88179_rx_fixup, 1872 1872 .tx_fixup = ax88179_tx_fixup, 1873 1873 }; ··· 1880 1880 .link_reset = ax88179_link_reset, 1881 1881 .reset = ax88179_reset, 1882 1882 .stop = ax88179_stop, 1883 - .flags = FLAG_ETHER | FLAG_FRAMING_AX, 1883 + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, 1884 1884 .rx_fixup = ax88179_rx_fixup, 1885 1885 .tx_fixup = ax88179_tx_fixup, 1886 1886 }; ··· 1893 1893 .link_reset = ax88179_link_reset, 1894 1894 .reset = ax88179_reset, 1895 1895 .stop = ax88179_stop, 1896 - .flags = FLAG_ETHER | FLAG_FRAMING_AX, 1896 + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, 1897 1897 .rx_fixup = ax88179_rx_fixup, 1898 1898 .tx_fixup = ax88179_tx_fixup, 1899 1899 }; ··· 1906 1906 .link_reset = ax88179_link_reset, 1907 1907 .reset = ax88179_reset, 1908 1908 .stop = ax88179_stop, 1909 - .flags = FLAG_ETHER | FLAG_FRAMING_AX, 1909 + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, 1910 1910 .rx_fixup = ax88179_rx_fixup, 1911 1911 .tx_fixup = ax88179_tx_fixup, 1912 1912 };

+1 -1

drivers/platform/mellanox/Kconfig

··· 85 85 depends on I2C 86 86 depends on REGMAP_I2C 87 87 help 88 - This driver provides support for the Nvidia SN2201 platfom. 88 + This driver provides support for the Nvidia SN2201 platform. 89 89 The SN2201 is a highly integrated for one rack unit system with 90 90 L3 management switches. It has 48 x 1Gbps RJ45 + 4 x 100G QSFP28 91 91 ports in a compact 1RU form factor. The system also including a

+1 -1

drivers/platform/mellanox/nvsw-sn2201.c

··· 326 326 }; 327 327 328 328 /* SN2201 I2C platform data. */ 329 - struct mlxreg_core_hotplug_platform_data nvsw_sn2201_i2c_data = { 329 + static struct mlxreg_core_hotplug_platform_data nvsw_sn2201_i2c_data = { 330 330 .irq = NVSW_SN2201_CPLD_SYSIRQ, 331 331 }; 332 332

+4 -1

drivers/platform/x86/barco-p50-gpio.c

··· 405 405 static int __init p50_module_init(void) 406 406 { 407 407 struct resource res = DEFINE_RES_IO(P50_GPIO_IO_PORT_BASE, P50_PORT_CMD + 1); 408 + int ret; 408 409 409 410 if (!dmi_first_match(dmi_ids)) 410 411 return -ENODEV; 411 412 412 - platform_driver_register(&p50_gpio_driver); 413 + ret = platform_driver_register(&p50_gpio_driver); 414 + if (ret) 415 + return ret; 413 416 414 417 gpio_pdev = platform_device_register_simple(DRIVER_NAME, PLATFORM_DEVID_NONE, &res, 1); 415 418 if (IS_ERR(gpio_pdev)) {

+2

drivers/platform/x86/gigabyte-wmi.c

··· 140 140 }} 141 141 142 142 static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = { 143 + DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B450M DS3H-CF"), 143 144 DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B450M S2H V2"), 144 145 DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 AORUS ELITE AX V2"), 145 146 DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 AORUS ELITE"), ··· 157 156 DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 GAMING X"), 158 157 DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 I AORUS PRO WIFI"), 159 158 DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 UD"), 159 + DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("Z690M AORUS ELITE AX DDR4"), 160 160 { } 161 161 }; 162 162

+19 -10

drivers/platform/x86/hp-wmi.c

··· 38 38 #define HPWMI_EVENT_GUID "95F24279-4D7B-4334-9387-ACCDC67EF61C" 39 39 #define HPWMI_BIOS_GUID "5FB7F034-2C63-45e9-BE91-3D44E2C707E4" 40 40 #define HP_OMEN_EC_THERMAL_PROFILE_OFFSET 0x95 41 + #define zero_if_sup(tmp) (zero_insize_support?0:sizeof(tmp)) // use when zero insize is required 41 42 42 43 /* DMI board names of devices that should use the omen specific path for 43 44 * thermal profiles. ··· 221 220 static struct platform_device *hp_wmi_platform_dev; 222 221 static struct platform_profile_handler platform_profile_handler; 223 222 static bool platform_profile_support; 223 + static bool zero_insize_support; 224 224 225 225 static struct rfkill *wifi_rfkill; 226 226 static struct rfkill *bluetooth_rfkill; ··· 292 290 struct bios_return *bios_return; 293 291 union acpi_object *obj = NULL; 294 292 struct bios_args *args = NULL; 295 - int mid, actual_outsize, ret; 293 + int mid, actual_insize, actual_outsize; 296 294 size_t bios_args_size; 295 + int ret; 297 296 298 297 mid = encode_outsize_for_pvsz(outsize); 299 298 if (WARN_ON(mid < 0)) 300 299 return mid; 301 300 302 - bios_args_size = struct_size(args, data, insize); 301 + actual_insize = max(insize, 128); 302 + bios_args_size = struct_size(args, data, actual_insize); 303 303 args = kmalloc(bios_args_size, GFP_KERNEL); 304 304 if (!args) 305 305 return -ENOMEM; ··· 378 374 int val = 0, ret; 379 375 380 376 ret = hp_wmi_perform_query(query, HPWMI_READ, &val, 381 - 0, sizeof(val)); 377 + zero_if_sup(val), sizeof(val)); 382 378 383 379 if (ret) 384 380 return ret < 0 ? ret : -EINVAL; ··· 414 410 return -ENODEV; 415 411 416 412 ret = hp_wmi_perform_query(HPWMI_SYSTEM_DEVICE_MODE, HPWMI_READ, 417 - system_device_mode, 0, sizeof(system_device_mode)); 413 + system_device_mode, zero_if_sup(system_device_mode), 414 + sizeof(system_device_mode)); 418 415 if (ret < 0) 419 416 return ret; 420 417 ··· 502 497 int val = 0, ret; 503 498 504 499 ret = hp_wmi_perform_query(HPWMI_FAN_SPEED_MAX_GET_QUERY, HPWMI_GM, 505 - &val, 0, sizeof(val)); 500 + &val, zero_if_sup(val), sizeof(val)); 506 501 507 502 if (ret) 508 503 return ret < 0 ? ret : -EINVAL; ··· 514 509 { 515 510 int state = 0; 516 511 int ret = hp_wmi_perform_query(HPWMI_FEATURE_QUERY, HPWMI_READ, &state, 517 - 0, sizeof(state)); 512 + zero_if_sup(state), sizeof(state)); 518 513 if (!ret) 519 514 return 1; 520 515 ··· 525 520 { 526 521 u8 state[128]; 527 522 int ret = hp_wmi_perform_query(HPWMI_FEATURE2_QUERY, HPWMI_READ, &state, 528 - 0, sizeof(state)); 523 + zero_if_sup(state), sizeof(state)); 529 524 if (!ret) 530 525 return 1; 531 526 ··· 603 598 int err, i; 604 599 605 600 err = hp_wmi_perform_query(HPWMI_WIRELESS2_QUERY, HPWMI_READ, &state, 606 - 0, sizeof(state)); 601 + zero_if_sup(state), sizeof(state)); 607 602 if (err) 608 603 return err; 609 604 ··· 1012 1007 int err, i; 1013 1008 1014 1009 err = hp_wmi_perform_query(HPWMI_WIRELESS2_QUERY, HPWMI_READ, &state, 1015 - 0, sizeof(state)); 1010 + zero_if_sup(state), sizeof(state)); 1016 1011 if (err) 1017 1012 return err < 0 ? err : -EINVAL; 1018 1013 ··· 1488 1483 { 1489 1484 int event_capable = wmi_has_guid(HPWMI_EVENT_GUID); 1490 1485 int bios_capable = wmi_has_guid(HPWMI_BIOS_GUID); 1491 - int err; 1486 + int err, tmp = 0; 1492 1487 1493 1488 if (!bios_capable && !event_capable) 1494 1489 return -ENODEV; 1490 + 1491 + if (hp_wmi_perform_query(HPWMI_HARDWARE_QUERY, HPWMI_READ, &tmp, 1492 + sizeof(tmp), sizeof(tmp)) == HPWMI_RET_INVALID_PARAMETERS) 1493 + zero_insize_support = true; 1495 1494 1496 1495 if (event_capable) { 1497 1496 err = hp_wmi_input_setup();

+6

drivers/platform/x86/intel/hid.c

··· 122 122 DMI_MATCH(DMI_PRODUCT_NAME, "HP Spectre x360 Convertible 15-df0xxx"), 123 123 }, 124 124 }, 125 + { 126 + .matches = { 127 + DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"), 128 + DMI_MATCH(DMI_PRODUCT_NAME, "Surface Go"), 129 + }, 130 + }, 125 131 { } 126 132 }; 127 133

+1

drivers/platform/x86/intel/pmc/core.c

··· 1912 1912 X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &tgl_reg_map), 1913 1913 X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &tgl_reg_map), 1914 1914 X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_reg_map), 1915 + X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &tgl_reg_map), 1915 1916 {} 1916 1917 }; 1917 1918

+1 -1

drivers/platform/x86/intel/pmt/crashlog.c

··· 282 282 auxiliary_set_drvdata(auxdev, priv); 283 283 284 284 for (i = 0; i < intel_vsec_dev->num_resources; i++) { 285 - struct intel_pmt_entry *entry = &priv->entry[i].entry; 285 + struct intel_pmt_entry *entry = &priv->entry[priv->num_entries].entry; 286 286 287 287 ret = intel_pmt_dev_create(entry, &pmt_crashlog_ns, intel_vsec_dev, i); 288 288 if (ret < 0)

+2 -2

drivers/scsi/ipr.c

··· 9795 9795 GFP_KERNEL); 9796 9796 9797 9797 if (!ioa_cfg->hrrq[i].host_rrq) { 9798 - while (--i > 0) 9798 + while (--i >= 0) 9799 9799 dma_free_coherent(&pdev->dev, 9800 9800 sizeof(u32) * ioa_cfg->hrrq[i].size, 9801 9801 ioa_cfg->hrrq[i].host_rrq, ··· 10068 10068 ioa_cfg->vectors_info[i].desc, 10069 10069 &ioa_cfg->hrrq[i]); 10070 10070 if (rc) { 10071 - while (--i >= 0) 10071 + while (--i > 0) 10072 10072 free_irq(pci_irq_vector(pdev, i), 10073 10073 &ioa_cfg->hrrq[i]); 10074 10074 return rc;

+1 -3

drivers/scsi/lpfc/lpfc_crtn.h

··· 420 420 uint32_t); 421 421 void lpfc_sli_abort_fcp_cmpl(struct lpfc_hba *, struct lpfc_iocbq *, 422 422 struct lpfc_iocbq *); 423 - void lpfc_sli4_abort_fcp_cmpl(struct lpfc_hba *h, struct lpfc_iocbq *i, 424 - struct lpfc_wcqe_complete *w); 425 423 426 424 void lpfc_sli_free_hbq(struct lpfc_hba *, struct hbq_dmabuf *); 427 425 ··· 628 630 struct lpfc_nodelist *ndlp); 629 631 void lpfc_nvme_abort_fcreq_cmpl(struct lpfc_hba *phba, 630 632 struct lpfc_iocbq *cmdiocb, 631 - struct lpfc_wcqe_complete *abts_cmpl); 633 + struct lpfc_iocbq *rspiocb); 632 634 void lpfc_create_multixri_pools(struct lpfc_hba *phba); 633 635 void lpfc_create_destroy_pools(struct lpfc_hba *phba); 634 636 void lpfc_move_xri_pvt_to_pbl(struct lpfc_hba *phba, u32 hwqid);

+1 -1

drivers/scsi/lpfc/lpfc_ct.c

··· 197 197 memset(bpl, 0, sizeof(struct ulp_bde64)); 198 198 bpl->addrHigh = le32_to_cpu(putPaddrHigh(mp->phys)); 199 199 bpl->addrLow = le32_to_cpu(putPaddrLow(mp->phys)); 200 - bpl->tus.f.bdeFlags = BUFF_TYPE_BLP_64; 200 + bpl->tus.f.bdeFlags = BUFF_TYPE_BDE_64; 201 201 bpl->tus.f.bdeSize = (LPFC_CT_PREAMBLE - 4); 202 202 bpl->tus.w = le32_to_cpu(bpl->tus.w); 203 203

+9 -12

drivers/scsi/lpfc/lpfc_els.c

··· 2998 2998 ndlp->nlp_DID, ulp_status, 2999 2999 ulp_word4); 3000 3000 3001 - /* Call NLP_EVT_DEVICE_RM if link is down or LOGO is aborted */ 3002 3001 if (lpfc_error_lost_link(ulp_status, ulp_word4)) { 3003 - lpfc_disc_state_machine(vport, ndlp, cmdiocb, 3004 - NLP_EVT_DEVICE_RM); 3005 3002 skip_recovery = 1; 3006 3003 goto out; 3007 3004 } ··· 3018 3021 spin_unlock_irq(&ndlp->lock); 3019 3022 lpfc_disc_state_machine(vport, ndlp, cmdiocb, 3020 3023 NLP_EVT_DEVICE_RM); 3021 - lpfc_els_free_iocb(phba, cmdiocb); 3022 - lpfc_nlp_put(ndlp); 3023 - 3024 - /* Presume the node was released. */ 3025 - return; 3024 + goto out_rsrc_free; 3026 3025 } 3027 3026 3028 3027 out: 3029 - /* Driver is done with the IO. */ 3030 - lpfc_els_free_iocb(phba, cmdiocb); 3031 - lpfc_nlp_put(ndlp); 3032 - 3033 3028 /* At this point, the LOGO processing is complete. NOTE: For a 3034 3029 * pt2pt topology, we are assuming the NPortID will only change 3035 3030 * on link up processing. For a LOGO / PLOGI initiated by the ··· 3048 3059 ndlp->nlp_DID, ulp_status, 3049 3060 ulp_word4, tmo, 3050 3061 vport->num_disc_nodes); 3062 + 3063 + lpfc_els_free_iocb(phba, cmdiocb); 3064 + lpfc_nlp_put(ndlp); 3065 + 3051 3066 lpfc_disc_start(vport); 3052 3067 return; 3053 3068 } ··· 3068 3075 lpfc_disc_state_machine(vport, ndlp, cmdiocb, 3069 3076 NLP_EVT_DEVICE_RM); 3070 3077 } 3078 + out_rsrc_free: 3079 + /* Driver is done with the I/O. */ 3080 + lpfc_els_free_iocb(phba, cmdiocb); 3081 + lpfc_nlp_put(ndlp); 3071 3082 } 3072 3083 3073 3084 /**

+3

drivers/scsi/lpfc/lpfc_hw4.h

··· 4487 4487 #define wqe_sup_SHIFT 6 4488 4488 #define wqe_sup_MASK 0x00000001 4489 4489 #define wqe_sup_WORD word11 4490 + #define wqe_ffrq_SHIFT 6 4491 + #define wqe_ffrq_MASK 0x00000001 4492 + #define wqe_ffrq_WORD word11 4490 4493 #define wqe_wqec_SHIFT 7 4491 4494 #define wqe_wqec_MASK 0x00000001 4492 4495 #define wqe_wqec_WORD word11

+1 -1

drivers/scsi/lpfc/lpfc_init.c

··· 12188 12188 rc = pci_enable_msi(phba->pcidev); 12189 12189 if (!rc) 12190 12190 lpfc_printf_log(phba, KERN_INFO, LOG_INIT, 12191 - "0462 PCI enable MSI mode success.\n"); 12191 + "0012 PCI enable MSI mode success.\n"); 12192 12192 else { 12193 12193 lpfc_printf_log(phba, KERN_INFO, LOG_INIT, 12194 12194 "0471 PCI enable MSI mode failed (%d)\n", rc);

+2 -1

drivers/scsi/lpfc/lpfc_nportdisc.c

··· 834 834 lpfc_nvmet_invalidate_host(phba, ndlp); 835 835 836 836 if (ndlp->nlp_DID == Fabric_DID) { 837 - if (vport->port_state <= LPFC_FDISC) 837 + if (vport->port_state <= LPFC_FDISC || 838 + vport->fc_flag & FC_PT2PT) 838 839 goto out; 839 840 lpfc_linkdown_port(vport); 840 841 spin_lock_irq(shost->host_lock);

+40 -12

drivers/scsi/lpfc/lpfc_nvme.c

··· 1065 1065 nCmd->rcv_rsplen = wcqe->parameter; 1066 1066 nCmd->status = 0; 1067 1067 1068 + /* Get the NVME cmd details for this unique error. */ 1069 + cp = (struct nvme_fc_cmd_iu *)nCmd->cmdaddr; 1070 + ep = (struct nvme_fc_ersp_iu *)nCmd->rspaddr; 1071 + 1068 1072 /* Check if this is really an ERSP */ 1069 1073 if (nCmd->rcv_rsplen == LPFC_NVME_ERSP_LEN) { 1070 1074 lpfc_ncmd->status = IOSTAT_SUCCESS; 1071 1075 lpfc_ncmd->result = 0; 1072 1076 1073 1077 lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME, 1074 - "6084 NVME Completion ERSP: " 1075 - "xri %x placed x%x\n", 1076 - lpfc_ncmd->cur_iocbq.sli4_xritag, 1077 - wcqe->total_data_placed); 1078 + "6084 NVME FCP_ERR ERSP: " 1079 + "xri %x placed x%x opcode x%x cmd_id " 1080 + "x%x cqe_status x%x\n", 1081 + lpfc_ncmd->cur_iocbq.sli4_xritag, 1082 + wcqe->total_data_placed, 1083 + cp->sqe.common.opcode, 1084 + cp->sqe.common.command_id, 1085 + ep->cqe.status); 1078 1086 break; 1079 1087 } 1080 1088 lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT, 1081 1089 "6081 NVME Completion Protocol Error: " 1082 1090 "xri %x status x%x result x%x " 1083 - "placed x%x\n", 1091 + "placed x%x opcode x%x cmd_id x%x, " 1092 + "cqe_status x%x\n", 1084 1093 lpfc_ncmd->cur_iocbq.sli4_xritag, 1085 1094 lpfc_ncmd->status, lpfc_ncmd->result, 1086 - wcqe->total_data_placed); 1095 + wcqe->total_data_placed, 1096 + cp->sqe.common.opcode, 1097 + cp->sqe.common.command_id, 1098 + ep->cqe.status); 1087 1099 break; 1088 1100 case IOSTAT_LOCAL_REJECT: 1089 1101 /* Let fall through to set command final state. */ ··· 1207 1195 { 1208 1196 struct lpfc_hba *phba = vport->phba; 1209 1197 struct nvmefc_fcp_req *nCmd = lpfc_ncmd->nvmeCmd; 1210 - struct lpfc_iocbq *pwqeq = &(lpfc_ncmd->cur_iocbq); 1198 + struct nvme_common_command *sqe; 1199 + struct lpfc_iocbq *pwqeq = &lpfc_ncmd->cur_iocbq; 1211 1200 union lpfc_wqe128 *wqe = &pwqeq->wqe; 1212 1201 uint32_t req_len; 1213 1202 ··· 1265 1252 cstat->control_requests++; 1266 1253 } 1267 1254 1268 - if (pnode->nlp_nvme_info & NLP_NVME_NSLER) 1255 + if (pnode->nlp_nvme_info & NLP_NVME_NSLER) { 1269 1256 bf_set(wqe_erp, &wqe->generic.wqe_com, 1); 1257 + sqe = &((struct nvme_fc_cmd_iu *) 1258 + nCmd->cmdaddr)->sqe.common; 1259 + if (sqe->opcode == nvme_admin_async_event) 1260 + bf_set(wqe_ffrq, &wqe->generic.wqe_com, 1); 1261 + } 1262 + 1270 1263 /* 1271 1264 * Finish initializing those WQE fields that are independent 1272 1265 * of the nvme_cmnd request_buffer ··· 1806 1787 * lpfc_nvme_abort_fcreq_cmpl - Complete an NVME FCP abort request. 1807 1788 * @phba: Pointer to HBA context object 1808 1789 * @cmdiocb: Pointer to command iocb object. 1809 - * @abts_cmpl: Pointer to wcqe complete object. 1790 + * @rspiocb: Pointer to response iocb object. 1810 1791 * 1811 1792 * This is the callback function for any NVME FCP IO that was aborted. 1812 1793 * ··· 1815 1796 **/ 1816 1797 void 1817 1798 lpfc_nvme_abort_fcreq_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, 1818 - struct lpfc_wcqe_complete *abts_cmpl) 1799 + struct lpfc_iocbq *rspiocb) 1819 1800 { 1801 + struct lpfc_wcqe_complete *abts_cmpl = &rspiocb->wcqe_cmpl; 1802 + 1820 1803 lpfc_printf_log(phba, KERN_INFO, LOG_NVME, 1821 1804 "6145 ABORT_XRI_CN completing on rpi x%x " 1822 1805 "original iotag x%x, abort cmd iotag x%x " ··· 1861 1840 struct lpfc_nvme_fcpreq_priv *freqpriv; 1862 1841 unsigned long flags; 1863 1842 int ret_val; 1843 + struct nvme_fc_cmd_iu *cp; 1864 1844 1865 1845 /* Validate pointers. LLDD fault handling with transport does 1866 1846 * have timing races. ··· 1985 1963 return; 1986 1964 } 1987 1965 1966 + /* 1967 + * Get Command Id from cmd to plug into response. This 1968 + * code is not needed in the next NVME Transport drop. 1969 + */ 1970 + cp = (struct nvme_fc_cmd_iu *)lpfc_nbuf->nvmeCmd->cmdaddr; 1988 1971 lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_ABTS, 1989 1972 "6138 Transport Abort NVME Request Issued for " 1990 - "ox_id x%x\n", 1991 - nvmereq_wqe->sli4_xritag); 1973 + "ox_id x%x nvme opcode x%x nvme cmd_id x%x\n", 1974 + nvmereq_wqe->sli4_xritag, cp->sqe.common.opcode, 1975 + cp->sqe.common.command_id); 1992 1976 return; 1993 1977 1994 1978 out_unlock:

+6

drivers/scsi/lpfc/lpfc_scsi.c

··· 6062 6062 int status; 6063 6063 u32 logit = LOG_FCP; 6064 6064 6065 + if (!rport) 6066 + return FAILED; 6067 + 6065 6068 rdata = rport->dd_data; 6066 6069 if (!rdata || !rdata->pnode) { 6067 6070 lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT, ··· 6142 6139 u32 dev_loss_tmo = vport->cfg_devloss_tmo; 6143 6140 unsigned long flags; 6144 6141 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(waitq); 6142 + 6143 + if (!rport) 6144 + return FAILED; 6145 6145 6146 6146 rdata = rport->dd_data; 6147 6147 if (!rdata || !rdata->pnode) {

+12 -13

drivers/scsi/lpfc/lpfc_sli.c

··· 1930 1930 sync_buf = __lpfc_sli_get_iocbq(phba); 1931 1931 if (!sync_buf) { 1932 1932 lpfc_printf_log(phba, KERN_ERR, LOG_CGN_MGMT, 1933 - "6213 No available WQEs for CMF_SYNC_WQE\n"); 1933 + "6244 No available WQEs for CMF_SYNC_WQE\n"); 1934 1934 ret_val = ENOMEM; 1935 1935 goto out_unlock; 1936 1936 } ··· 3805 3805 set_job_ulpword4(cmdiocbp, 3806 3806 IOERR_ABORT_REQUESTED); 3807 3807 /* 3808 - * For SLI4, irsiocb contains 3808 + * For SLI4, irspiocb contains 3809 3809 * NO_XRI in sli_xritag, it 3810 3810 * shall not affect releasing 3811 3811 * sgl (xri) process. ··· 3823 3823 } 3824 3824 } 3825 3825 } 3826 - (cmdiocbp->cmd_cmpl) (phba, cmdiocbp, saveq); 3826 + cmdiocbp->cmd_cmpl(phba, cmdiocbp, saveq); 3827 3827 } else 3828 3828 lpfc_sli_release_iocbq(phba, cmdiocbp); 3829 3829 } else { ··· 4063 4063 cmdiocbq->cmd_flag &= ~LPFC_DRIVER_ABORTED; 4064 4064 if (cmdiocbq->cmd_cmpl) { 4065 4065 spin_unlock_irqrestore(&phba->hbalock, iflag); 4066 - (cmdiocbq->cmd_cmpl)(phba, cmdiocbq, 4067 - &rspiocbq); 4066 + cmdiocbq->cmd_cmpl(phba, cmdiocbq, &rspiocbq); 4068 4067 spin_lock_irqsave(&phba->hbalock, iflag); 4069 4068 } 4070 4069 break; ··· 10287 10288 * @flag: Flag indicating if this command can be put into txq. 10288 10289 * 10289 10290 * __lpfc_sli_issue_fcp_io_s3 is wrapper function to invoke lockless func to 10290 - * send an iocb command to an HBA with SLI-4 interface spec. 10291 + * send an iocb command to an HBA with SLI-3 interface spec. 10291 10292 * 10292 10293 * This function takes the hbalock before invoking the lockless version. 10293 10294 * The function will return success after it successfully submit the wqe to ··· 12739 12740 cmdiocbq->cmd_cmpl = cmdiocbq->wait_cmd_cmpl; 12740 12741 cmdiocbq->wait_cmd_cmpl = NULL; 12741 12742 if (cmdiocbq->cmd_cmpl) 12742 - (cmdiocbq->cmd_cmpl)(phba, cmdiocbq, NULL); 12743 + cmdiocbq->cmd_cmpl(phba, cmdiocbq, NULL); 12743 12744 else 12744 12745 lpfc_sli_release_iocbq(phba, cmdiocbq); 12745 12746 return; ··· 12753 12754 12754 12755 /* Set the exchange busy flag for task management commands */ 12755 12756 if ((cmdiocbq->cmd_flag & LPFC_IO_FCP) && 12756 - !(cmdiocbq->cmd_flag & LPFC_IO_LIBDFC)) { 12757 + !(cmdiocbq->cmd_flag & LPFC_IO_LIBDFC)) { 12757 12758 lpfc_cmd = container_of(cmdiocbq, struct lpfc_io_buf, 12758 - cur_iocbq); 12759 + cur_iocbq); 12759 12760 if (rspiocbq && (rspiocbq->cmd_flag & LPFC_EXCHANGE_BUSY)) 12760 12761 lpfc_cmd->flags |= LPFC_SBUF_XBUSY; 12761 12762 else ··· 13895 13896 * @irspiocbq: Pointer to work-queue completion queue entry. 13896 13897 * 13897 13898 * This routine handles an ELS work-queue completion event and construct 13898 - * a pseudo response ELS IODBQ from the SLI4 ELS WCQE for the common 13899 + * a pseudo response ELS IOCBQ from the SLI4 ELS WCQE for the common 13899 13900 * discovery engine to handle. 13900 13901 * 13901 13902 * Return: Pointer to the receive IOCBQ, NULL otherwise. ··· 13939 13940 13940 13941 if (bf_get(lpfc_wcqe_c_xb, wcqe)) { 13941 13942 spin_lock_irqsave(&phba->hbalock, iflags); 13942 - cmdiocbq->cmd_flag |= LPFC_EXCHANGE_BUSY; 13943 + irspiocbq->cmd_flag |= LPFC_EXCHANGE_BUSY; 13943 13944 spin_unlock_irqrestore(&phba->hbalock, iflags); 13944 13945 } 13945 13946 ··· 14798 14799 /* Pass the cmd_iocb and the wcqe to the upper layer */ 14799 14800 memcpy(&cmdiocbq->wcqe_cmpl, wcqe, 14800 14801 sizeof(struct lpfc_wcqe_complete)); 14801 - (cmdiocbq->cmd_cmpl)(phba, cmdiocbq, cmdiocbq); 14802 + cmdiocbq->cmd_cmpl(phba, cmdiocbq, cmdiocbq); 14802 14803 } else { 14803 14804 lpfc_printf_log(phba, KERN_WARNING, LOG_SLI, 14804 14805 "0375 FCP cmdiocb not callback function " ··· 18955 18956 18956 18957 /* Free iocb created in lpfc_prep_seq */ 18957 18958 list_for_each_entry_safe(curr_iocb, next_iocb, 18958 - &iocbq->list, list) { 18959 + &iocbq->list, list) { 18959 18960 list_del_init(&curr_iocb->list); 18960 18961 lpfc_sli_release_iocbq(phba, curr_iocb); 18961 18962 }

+1 -1

drivers/scsi/lpfc/lpfc_version.h

··· 20 20 * included with this package. * 21 21 *******************************************************************/ 22 22 23 - #define LPFC_DRIVER_VERSION "14.2.0.3" 23 + #define LPFC_DRIVER_VERSION "14.2.0.4" 24 24 #define LPFC_DRIVER_NAME "lpfc" 25 25 26 26 /* Used for SLI 2/3 */

+12 -11

drivers/scsi/mpt3sas/mpt3sas_base.c

··· 5369 5369 Mpi2ConfigReply_t mpi_reply; 5370 5370 Mpi2SasIOUnitPage1_t *sas_iounit_pg1 = NULL; 5371 5371 Mpi26PCIeIOUnitPage1_t pcie_iounit_pg1; 5372 + u16 depth; 5372 5373 int sz; 5373 5374 int rc = 0; 5374 5375 ··· 5381 5380 goto out; 5382 5381 /* sas iounit page 1 */ 5383 5382 sz = offsetof(Mpi2SasIOUnitPage1_t, PhyData); 5384 - sas_iounit_pg1 = kzalloc(sz, GFP_KERNEL); 5383 + sas_iounit_pg1 = kzalloc(sizeof(Mpi2SasIOUnitPage1_t), GFP_KERNEL); 5385 5384 if (!sas_iounit_pg1) { 5386 5385 pr_err("%s: failure at %s:%d/%s()!\n", 5387 5386 ioc->name, __FILE__, __LINE__, __func__); ··· 5394 5393 ioc->name, __FILE__, __LINE__, __func__); 5395 5394 goto out; 5396 5395 } 5397 - ioc->max_wideport_qd = 5398 - (le16_to_cpu(sas_iounit_pg1->SASWideMaxQueueDepth)) ? 5399 - le16_to_cpu(sas_iounit_pg1->SASWideMaxQueueDepth) : 5400 - MPT3SAS_SAS_QUEUE_DEPTH; 5401 - ioc->max_narrowport_qd = 5402 - (le16_to_cpu(sas_iounit_pg1->SASNarrowMaxQueueDepth)) ? 5403 - le16_to_cpu(sas_iounit_pg1->SASNarrowMaxQueueDepth) : 5404 - MPT3SAS_SAS_QUEUE_DEPTH; 5405 - ioc->max_sata_qd = (sas_iounit_pg1->SATAMaxQDepth) ? 5406 - sas_iounit_pg1->SATAMaxQDepth : MPT3SAS_SATA_QUEUE_DEPTH; 5396 + 5397 + depth = le16_to_cpu(sas_iounit_pg1->SASWideMaxQueueDepth); 5398 + ioc->max_wideport_qd = (depth ? depth : MPT3SAS_SAS_QUEUE_DEPTH); 5399 + 5400 + depth = le16_to_cpu(sas_iounit_pg1->SASNarrowMaxQueueDepth); 5401 + ioc->max_narrowport_qd = (depth ? depth : MPT3SAS_SAS_QUEUE_DEPTH); 5402 + 5403 + depth = sas_iounit_pg1->SATAMaxQDepth; 5404 + ioc->max_sata_qd = (depth ? depth : MPT3SAS_SATA_QUEUE_DEPTH); 5405 + 5407 5406 /* pcie iounit page 1 */ 5408 5407 rc = mpt3sas_config_get_pcie_iounit_pg1(ioc, &mpi_reply, 5409 5408 &pcie_iounit_pg1, sizeof(Mpi26PCIeIOUnitPage1_t));

+1 -1

drivers/scsi/pmcraid.c

··· 4031 4031 return 0; 4032 4032 4033 4033 out_unwind: 4034 - while (--i > 0) 4034 + while (--i >= 0) 4035 4035 free_irq(pci_irq_vector(pdev, i), &pinstance->hrrq_vector[i]); 4036 4036 pci_free_irq_vectors(pdev); 4037 4037 return rc;

+1 -1

drivers/scsi/sd.c

··· 3072 3072 goto out; 3073 3073 3074 3074 /* We must have at least a 64B header and one 32B range descriptor */ 3075 - vpd_len = get_unaligned_be16(&buffer[2]) + 3; 3075 + vpd_len = get_unaligned_be16(&buffer[2]) + 4; 3076 3076 if (vpd_len > buf_len || vpd_len < 64 + 32 || (vpd_len & 31)) { 3077 3077 sd_printk(KERN_ERR, sdkp, 3078 3078 "Invalid Concurrent Positioning Ranges VPD page\n");

+2 -2

drivers/scsi/vmw_pvscsi.h

··· 331 331 u8 tag; 332 332 u8 bus; 333 333 u8 target; 334 - u8 vcpuHint; 335 - u8 unused[59]; 334 + u16 vcpuHint; 335 + u8 unused[58]; 336 336 } __packed; 337 337 338 338 /*

+5 -4

drivers/vdpa/mlx5/net/mlx5_vnet.c

··· 107 107 108 108 /* Resources for implementing the notification channel from the device 109 109 * to the driver. fwqp is the firmware end of an RC connection; the 110 - * other end is vqqp used by the driver. cq is is where completions are 110 + * other end is vqqp used by the driver. cq is where completions are 111 111 * reported. 112 112 */ 113 113 struct mlx5_vdpa_cq cq; ··· 1814 1814 1815 1815 id = mlx5vdpa16_to_cpu(mvdev, vlan); 1816 1816 mac_vlan_del(ndev, ndev->config.mac, id, true); 1817 + status = VIRTIO_NET_OK; 1817 1818 break; 1818 1819 default: 1819 - break; 1820 - } 1820 + break; 1821 + } 1821 1822 1822 - return status; 1823 + return status; 1823 1824 } 1824 1825 1825 1826 static void mlx5_cvq_kick_handler(struct work_struct *work)

+3 -4

drivers/vdpa/vdpa_user/vduse_dev.c

··· 1345 1345 1346 1346 dev->minor = ret; 1347 1347 dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT; 1348 - dev->dev = device_create(vduse_class, NULL, 1349 - MKDEV(MAJOR(vduse_major), dev->minor), 1350 - dev, "%s", config->name); 1348 + dev->dev = device_create_with_groups(vduse_class, NULL, 1349 + MKDEV(MAJOR(vduse_major), dev->minor), 1350 + dev, vduse_dev_groups, "%s", config->name); 1351 1351 if (IS_ERR(dev->dev)) { 1352 1352 ret = PTR_ERR(dev->dev); 1353 1353 goto err_dev; ··· 1596 1596 return PTR_ERR(vduse_class); 1597 1597 1598 1598 vduse_class->devnode = vduse_devnode; 1599 - vduse_class->dev_groups = vduse_dev_groups; 1600 1599 1601 1600 ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse"); 1602 1601 if (ret)

+2

drivers/vhost/vdpa.c

··· 499 499 ops->set_vq_ready(vdpa, idx, s.num); 500 500 return 0; 501 501 case VHOST_VDPA_GET_VRING_GROUP: 502 + if (!ops->get_vq_group) 503 + return -EOPNOTSUPP; 502 504 s.index = idx; 503 505 s.num = ops->get_vq_group(vdpa, idx); 504 506 if (s.num >= vdpa->ngroups)

+8 -2

drivers/vhost/vringh.c

··· 292 292 int (*copy)(const struct vringh *vrh, 293 293 void *dst, const void *src, size_t len)) 294 294 { 295 - int err, count = 0, up_next, desc_max; 295 + int err, count = 0, indirect_count = 0, up_next, desc_max; 296 296 struct vring_desc desc, *descs; 297 297 struct vringh_range range = { -1ULL, 0 }, slowrange; 298 298 bool slow = false; ··· 349 349 continue; 350 350 } 351 351 352 - if (count++ == vrh->vring.num) { 352 + if (up_next == -1) 353 + count++; 354 + else 355 + indirect_count++; 356 + 357 + if (count > vrh->vring.num || indirect_count > desc_max) { 353 358 vringh_bad("Descriptor loop in %p", descs); 354 359 err = -ELOOP; 355 360 goto fail; ··· 416 411 i = return_from_indirect(vrh, &up_next, 417 412 &descs, &desc_max); 418 413 slow = false; 414 + indirect_count = 0; 419 415 } else 420 416 break; 421 417 }

+2 -1

drivers/virtio/virtio_mmio.c

··· 255 255 256 256 /* 257 257 * Per memory-barriers.txt, wmb() is not needed to guarantee 258 - * that the the cache coherent memory writes have completed 258 + * that the cache coherent memory writes have completed 259 259 * before writing to the MMIO region. 260 260 */ 261 261 writel(status, vm_dev->base + VIRTIO_MMIO_STATUS); ··· 701 701 if (!vm_cmdline_parent_registered) { 702 702 err = device_register(&vm_cmdline_parent); 703 703 if (err) { 704 + put_device(&vm_cmdline_parent); 704 705 pr_err("Failed to register parent device!\n"); 705 706 return err; 706 707 }

+1 -1

drivers/virtio/virtio_pci_modern_dev.c

··· 469 469 470 470 /* 471 471 * Per memory-barriers.txt, wmb() is not needed to guarantee 472 - * that the the cache coherent memory writes have completed 472 + * that the cache coherent memory writes have completed 473 473 * before writing to the MMIO region. 474 474 */ 475 475 vp_iowrite8(status, &cfg->device_status);

+1 -1

fs/9p/v9fs.h

··· 124 124 static inline struct fscache_cookie *v9fs_inode_cookie(struct v9fs_inode *v9inode) 125 125 { 126 126 #ifdef CONFIG_9P_FSCACHE 127 - return netfs_i_cookie(&v9inode->netfs.inode); 127 + return netfs_i_cookie(&v9inode->netfs); 128 128 #else 129 129 return NULL; 130 130 #endif

+6 -7

fs/9p/vfs_addr.c

··· 66 66 } 67 67 68 68 /** 69 - * v9fs_req_cleanup - Cleanup request initialized by v9fs_init_request 70 - * @mapping: unused mapping of request to cleanup 71 - * @priv: private data to cleanup, a fid, guaranted non-null. 69 + * v9fs_free_request - Cleanup request initialized by v9fs_init_rreq 70 + * @rreq: The I/O request to clean up 72 71 */ 73 - static void v9fs_req_cleanup(struct address_space *mapping, void *priv) 72 + static void v9fs_free_request(struct netfs_io_request *rreq) 74 73 { 75 - struct p9_fid *fid = priv; 74 + struct p9_fid *fid = rreq->netfs_priv; 76 75 77 76 p9_client_clunk(fid); 78 77 } ··· 93 94 94 95 const struct netfs_request_ops v9fs_req_ops = { 95 96 .init_request = v9fs_init_request, 97 + .free_request = v9fs_free_request, 96 98 .begin_cache_operation = v9fs_begin_cache_operation, 97 99 .issue_read = v9fs_issue_read, 98 - .cleanup = v9fs_req_cleanup, 99 100 }; 100 101 101 102 /** ··· 273 274 * file. We need to do this before we get a lock on the page in case 274 275 * there's more than one writer competing for the same cache block. 275 276 */ 276 - retval = netfs_write_begin(filp, mapping, pos, len, &folio, fsdata); 277 + retval = netfs_write_begin(&v9inode->netfs, filp, mapping, pos, len, &folio, fsdata); 277 278 if (retval < 0) 278 279 return retval; 279 280

+2 -1

fs/9p/vfs_inode.c

··· 252 252 */ 253 253 static void v9fs_set_netfs_context(struct inode *inode) 254 254 { 255 - netfs_inode_init(inode, &v9fs_req_ops); 255 + struct v9fs_inode *v9inode = V9FS_I(inode); 256 + netfs_inode_init(&v9inode->netfs, &v9fs_req_ops); 256 257 } 257 258 258 259 int v9fs_init_inode(struct v9fs_session_info *v9ses,

+1 -1

fs/afs/dynroot.c

··· 76 76 /* there shouldn't be an existing inode */ 77 77 BUG_ON(!(inode->i_state & I_NEW)); 78 78 79 - netfs_inode_init(inode, NULL); 79 + netfs_inode_init(&vnode->netfs, NULL); 80 80 inode->i_size = 0; 81 81 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; 82 82 if (root) {

+3 -3

fs/afs/file.c

··· 382 382 return test_bit(AFS_VNODE_DELETED, &vnode->flags) ? -ESTALE : 0; 383 383 } 384 384 385 - static void afs_priv_cleanup(struct address_space *mapping, void *netfs_priv) 385 + static void afs_free_request(struct netfs_io_request *rreq) 386 386 { 387 - key_put(netfs_priv); 387 + key_put(rreq->netfs_priv); 388 388 } 389 389 390 390 const struct netfs_request_ops afs_req_ops = { 391 391 .init_request = afs_init_request, 392 + .free_request = afs_free_request, 392 393 .begin_cache_operation = afs_begin_cache_operation, 393 394 .check_write_begin = afs_check_write_begin, 394 395 .issue_read = afs_issue_read, 395 - .cleanup = afs_priv_cleanup, 396 396 }; 397 397 398 398 int afs_write_inode(struct inode *inode, struct writeback_control *wbc)

+1 -1

fs/afs/inode.c

··· 58 58 */ 59 59 static void afs_set_netfs_context(struct afs_vnode *vnode) 60 60 { 61 - netfs_inode_init(&vnode->netfs.inode, &afs_req_ops); 61 + netfs_inode_init(&vnode->netfs, &afs_req_ops); 62 62 } 63 63 64 64 /*

+1 -1

fs/afs/internal.h

··· 670 670 static inline struct fscache_cookie *afs_vnode_cache(struct afs_vnode *vnode) 671 671 { 672 672 #ifdef CONFIG_AFS_FSCACHE 673 - return netfs_i_cookie(&vnode->netfs.inode); 673 + return netfs_i_cookie(&vnode->netfs); 674 674 #else 675 675 return NULL; 676 676 #endif

+1 -2

fs/afs/volume.c

··· 9 9 #include <linux/slab.h> 10 10 #include "internal.h" 11 11 12 - unsigned __read_mostly afs_volume_gc_delay = 10; 13 - unsigned __read_mostly afs_volume_record_life = 60 * 60; 12 + static unsigned __read_mostly afs_volume_record_life = 60 * 60; 14 13 15 14 /* 16 15 * Insert a volume into a cell. If there's an existing volume record, that is

+1 -1

fs/afs/write.c

··· 60 60 * file. We need to do this before we get a lock on the page in case 61 61 * there's more than one writer competing for the same cache block. 62 62 */ 63 - ret = netfs_write_begin(file, mapping, pos, len, &folio, fsdata); 63 + ret = netfs_write_begin(&vnode->netfs, file, mapping, pos, len, &folio, fsdata); 64 64 if (ret < 0) 65 65 return ret; 66 66

+20 -6

fs/attr.c

··· 61 61 const struct inode *inode, kgid_t gid) 62 62 { 63 63 kgid_t kgid = i_gid_into_mnt(mnt_userns, inode); 64 - if (uid_eq(current_fsuid(), i_uid_into_mnt(mnt_userns, inode)) && 65 - (in_group_p(gid) || gid_eq(gid, inode->i_gid))) 66 - return true; 64 + if (uid_eq(current_fsuid(), i_uid_into_mnt(mnt_userns, inode))) { 65 + kgid_t mapped_gid; 66 + 67 + if (gid_eq(gid, inode->i_gid)) 68 + return true; 69 + mapped_gid = mapped_kgid_fs(mnt_userns, i_user_ns(inode), gid); 70 + if (in_group_p(mapped_gid)) 71 + return true; 72 + } 67 73 if (capable_wrt_inode_uidgid(mnt_userns, inode, CAP_CHOWN)) 68 74 return true; 69 75 if (gid_eq(kgid, INVALID_GID) && ··· 129 123 130 124 /* Make sure a caller can chmod. */ 131 125 if (ia_valid & ATTR_MODE) { 126 + kgid_t mapped_gid; 127 + 132 128 if (!inode_owner_or_capable(mnt_userns, inode)) 133 129 return -EPERM; 130 + 131 + if (ia_valid & ATTR_GID) 132 + mapped_gid = mapped_kgid_fs(mnt_userns, 133 + i_user_ns(inode), attr->ia_gid); 134 + else 135 + mapped_gid = i_gid_into_mnt(mnt_userns, inode); 136 + 134 137 /* Also check the setgid bit! */ 135 - if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid : 136 - i_gid_into_mnt(mnt_userns, inode)) && 137 - !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID)) 138 + if (!in_group_p(mapped_gid) && 139 + !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID)) 138 140 attr->ia_mode &= ~S_ISGID; 139 141 } 140 142

+6 -6

fs/ceph/addr.c

··· 394 394 return 0; 395 395 } 396 396 397 - static void ceph_readahead_cleanup(struct address_space *mapping, void *priv) 397 + static void ceph_netfs_free_request(struct netfs_io_request *rreq) 398 398 { 399 - struct inode *inode = mapping->host; 400 - struct ceph_inode_info *ci = ceph_inode(inode); 401 - int got = (uintptr_t)priv; 399 + struct ceph_inode_info *ci = ceph_inode(rreq->inode); 400 + int got = (uintptr_t)rreq->netfs_priv; 402 401 403 402 if (got) 404 403 ceph_put_cap_refs(ci, got); ··· 405 406 406 407 const struct netfs_request_ops ceph_netfs_ops = { 407 408 .init_request = ceph_init_request, 409 + .free_request = ceph_netfs_free_request, 408 410 .begin_cache_operation = ceph_begin_cache_operation, 409 411 .issue_read = ceph_netfs_issue_read, 410 412 .expand_readahead = ceph_netfs_expand_readahead, 411 413 .clamp_length = ceph_netfs_clamp_length, 412 414 .check_write_begin = ceph_netfs_check_write_begin, 413 - .cleanup = ceph_readahead_cleanup, 414 415 }; 415 416 416 417 #ifdef CONFIG_CEPH_FSCACHE ··· 1321 1322 struct page **pagep, void **fsdata) 1322 1323 { 1323 1324 struct inode *inode = file_inode(file); 1325 + struct ceph_inode_info *ci = ceph_inode(inode); 1324 1326 struct folio *folio = NULL; 1325 1327 int r; 1326 1328 1327 - r = netfs_write_begin(file, inode->i_mapping, pos, len, &folio, NULL); 1329 + r = netfs_write_begin(&ci->netfs, file, inode->i_mapping, pos, len, &folio, NULL); 1328 1330 if (r == 0) 1329 1331 folio_wait_fscache(folio); 1330 1332 if (r < 0) {

+1 -1

fs/ceph/cache.h

··· 28 28 29 29 static inline struct fscache_cookie *ceph_fscache_cookie(struct ceph_inode_info *ci) 30 30 { 31 - return netfs_i_cookie(&ci->netfs.inode); 31 + return netfs_i_cookie(&ci->netfs); 32 32 } 33 33 34 34 static inline void ceph_fscache_resize(struct inode *inode, loff_t to)

+1 -1

fs/ceph/inode.c

··· 460 460 dout("alloc_inode %p\n", &ci->netfs.inode); 461 461 462 462 /* Set parameters for the netfs library */ 463 - netfs_inode_init(&ci->netfs.inode, &ceph_netfs_ops); 463 + netfs_inode_init(&ci->netfs, &ceph_netfs_ops); 464 464 465 465 spin_lock_init(&ci->i_ceph_lock); 466 466

+1 -1

fs/cifs/cifsfs.c

··· 1086 1086 }; 1087 1087 MODULE_ALIAS_FS("cifs"); 1088 1088 1089 - static struct file_system_type smb3_fs_type = { 1089 + struct file_system_type smb3_fs_type = { 1090 1090 .owner = THIS_MODULE, 1091 1091 .name = "smb3", 1092 1092 .init_fs_context = smb3_init_fs_context,

+1 -1

fs/cifs/cifsfs.h

··· 38 38 return (unsigned long) dentry->d_fsdata; 39 39 } 40 40 41 - extern struct file_system_type cifs_fs_type; 41 + extern struct file_system_type cifs_fs_type, smb3_fs_type; 42 42 extern const struct address_space_operations cifs_addr_ops; 43 43 extern const struct address_space_operations cifs_addr_ops_smallbuf; 44 44

+4

fs/cifs/connect.c

··· 97 97 if (!server->hostname) 98 98 return -EINVAL; 99 99 100 + /* if server hostname isn't populated, there's nothing to do here */ 101 + if (server->hostname[0] == '\0') 102 + return 0; 103 + 100 104 len = strlen(server->hostname) + 3; 101 105 102 106 unc = kmalloc(len, GFP_KERNEL);

+1 -1

fs/cifs/fscache.h

··· 61 61 62 62 static inline struct fscache_cookie *cifs_inode_cookie(struct inode *inode) 63 63 { 64 - return netfs_i_cookie(inode); 64 + return netfs_i_cookie(&CIFS_I(inode)->netfs); 65 65 } 66 66 67 67 static inline void cifs_invalidate_cache(struct inode *inode, unsigned int flags)

+16 -11

fs/cifs/misc.c

··· 1211 1211 .data = data, 1212 1212 .sb = NULL, 1213 1213 }; 1214 + struct file_system_type **fs_type = (struct file_system_type *[]) { 1215 + &cifs_fs_type, &smb3_fs_type, NULL, 1216 + }; 1214 1217 1215 - iterate_supers_type(&cifs_fs_type, f, &sd); 1216 - 1217 - if (!sd.sb) 1218 - return ERR_PTR(-EINVAL); 1219 - /* 1220 - * Grab an active reference in order to prevent automounts (DFS links) 1221 - * of expiring and then freeing up our cifs superblock pointer while 1222 - * we're doing failover. 1223 - */ 1224 - cifs_sb_active(sd.sb); 1225 - return sd.sb; 1218 + for (; *fs_type; fs_type++) { 1219 + iterate_supers_type(*fs_type, f, &sd); 1220 + if (sd.sb) { 1221 + /* 1222 + * Grab an active reference in order to prevent automounts (DFS links) 1223 + * of expiring and then freeing up our cifs superblock pointer while 1224 + * we're doing failover. 1225 + */ 1226 + cifs_sb_active(sd.sb); 1227 + return sd.sb; 1228 + } 1229 + } 1230 + return ERR_PTR(-EINVAL); 1226 1231 } 1227 1232 1228 1233 static void __cifs_put_super(struct super_block *sb)

+4 -1

fs/cifs/sess.c

··· 301 301 /* Auth */ 302 302 ctx.domainauto = ses->domainAuto; 303 303 ctx.domainname = ses->domainName; 304 - ctx.server_hostname = ses->server->hostname; 304 + 305 + /* no hostname for extra channels */ 306 + ctx.server_hostname = ""; 307 + 305 308 ctx.username = ses->user_name; 306 309 ctx.password = ses->password; 307 310 ctx.sectype = ses->sectype;

+3

fs/cifs/smb2pdu.c

··· 288 288 mutex_unlock(&ses->session_mutex); 289 289 rc = -EHOSTDOWN; 290 290 goto failed; 291 + } else if (rc) { 292 + mutex_unlock(&ses->session_mutex); 293 + goto out; 291 294 } 292 295 } else { 293 296 mutex_unlock(&ses->session_mutex);

+3 -2

fs/netfs/buffered_read.c

··· 297 297 298 298 /** 299 299 * netfs_write_begin - Helper to prepare for writing 300 + * @ctx: The netfs context 300 301 * @file: The file to read from 301 302 * @mapping: The mapping to read from 302 303 * @pos: File position at which the write will begin ··· 327 326 * 328 327 * This is usable whether or not caching is enabled. 329 328 */ 330 - int netfs_write_begin(struct file *file, struct address_space *mapping, 329 + int netfs_write_begin(struct netfs_inode *ctx, 330 + struct file *file, struct address_space *mapping, 331 331 loff_t pos, unsigned int len, struct folio **_folio, 332 332 void **_fsdata) 333 333 { 334 334 struct netfs_io_request *rreq; 335 - struct netfs_inode *ctx = netfs_inode(file_inode(file )); 336 335 struct folio *folio; 337 336 unsigned int fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE; 338 337 pgoff_t index = pos >> PAGE_SHIFT;

+3 -3

fs/netfs/objects.c

··· 75 75 struct netfs_io_request *rreq = 76 76 container_of(work, struct netfs_io_request, work); 77 77 78 - netfs_clear_subrequests(rreq, false); 79 - if (rreq->netfs_priv) 80 - rreq->netfs_ops->cleanup(rreq->mapping, rreq->netfs_priv); 81 78 trace_netfs_rreq(rreq, netfs_rreq_trace_free); 79 + netfs_clear_subrequests(rreq, false); 80 + if (rreq->netfs_ops->free_request) 81 + rreq->netfs_ops->free_request(rreq); 82 82 if (rreq->cache_resources.ops) 83 83 rreq->cache_resources.ops->end_operation(&rreq->cache_resources); 84 84 kfree(rreq);

+5 -4

fs/nfsd/filecache.c

··· 309 309 if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) { 310 310 nfsd_file_flush(nf); 311 311 nfsd_file_put_noref(nf); 312 - } else { 312 + } else if (nf->nf_file) { 313 313 nfsd_file_put_noref(nf); 314 - if (nf->nf_file) 315 - nfsd_file_schedule_laundrette(); 316 - } 314 + nfsd_file_schedule_laundrette(); 315 + } else 316 + nfsd_file_put_noref(nf); 317 + 317 318 if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT) 318 319 nfsd_file_gc(); 319 320 }

-1

include/linux/bio.h

··· 403 403 extern int bioset_init(struct bio_set *, unsigned int, unsigned int, int flags); 404 404 extern void bioset_exit(struct bio_set *); 405 405 extern int biovec_init_pool(mempool_t *pool, int pool_entries); 406 - extern int bioset_init_from_src(struct bio_set *bs, struct bio_set *src); 407 406 408 407 struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs, 409 408 unsigned int opf, gfp_t gfp_mask,

+3

include/linux/cpu.h

··· 65 65 extern ssize_t cpu_show_itlb_multihit(struct device *dev, 66 66 struct device_attribute *attr, char *buf); 67 67 extern ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char *buf); 68 + extern ssize_t cpu_show_mmio_stale_data(struct device *dev, 69 + struct device_attribute *attr, 70 + char *buf); 68 71 69 72 extern __printf(4, 5) 70 73 struct device *cpu_device_create(struct device *parent, void *drvdata,

+1 -1

include/linux/crc-itu-t.h

··· 4 4 * 5 5 * Implements the standard CRC ITU-T V.41: 6 6 * Width 16 7 - * Poly 0x1021 (x^16 + x^12 + x^15 + 1) 7 + * Poly 0x1021 (x^16 + x^12 + x^5 + 1) 8 8 * Init 0 9 9 */ 10 10

+5

include/linux/mm_types.h

··· 227 227 * struct folio - Represents a contiguous set of bytes. 228 228 * @flags: Identical to the page flags. 229 229 * @lru: Least Recently Used list; tracks how recently this folio was used. 230 + * @mlock_count: Number of times this folio has been pinned by mlock(). 230 231 * @mapping: The file this page belongs to, or refers to the anon_vma for 231 232 * anonymous memory. 232 233 * @index: Offset within the file, in units of pages. For anonymous memory, ··· 256 255 unsigned long flags; 257 256 union { 258 257 struct list_head lru; 258 + /* private: avoid cluttering the output */ 259 259 struct { 260 260 void *__filler; 261 + /* public: */ 261 262 unsigned int mlock_count; 263 + /* private: */ 262 264 }; 265 + /* public: */ 263 266 }; 264 267 struct address_space *mapping; 265 268 pgoff_t index;

+11 -14

include/linux/netfs.h

··· 206 206 */ 207 207 struct netfs_request_ops { 208 208 int (*init_request)(struct netfs_io_request *rreq, struct file *file); 209 + void (*free_request)(struct netfs_io_request *rreq); 209 210 int (*begin_cache_operation)(struct netfs_io_request *rreq); 211 + 210 212 void (*expand_readahead)(struct netfs_io_request *rreq); 211 213 bool (*clamp_length)(struct netfs_io_subrequest *subreq); 212 214 void (*issue_read)(struct netfs_io_subrequest *subreq); ··· 216 214 int (*check_write_begin)(struct file *file, loff_t pos, unsigned len, 217 215 struct folio *folio, void **_fsdata); 218 216 void (*done)(struct netfs_io_request *rreq); 219 - void (*cleanup)(struct address_space *mapping, void *netfs_priv); 220 217 }; 221 218 222 219 /* ··· 278 277 struct readahead_control; 279 278 extern void netfs_readahead(struct readahead_control *); 280 279 int netfs_read_folio(struct file *, struct folio *); 281 - extern int netfs_write_begin(struct file *, struct address_space *, 280 + extern int netfs_write_begin(struct netfs_inode *, 281 + struct file *, struct address_space *, 282 282 loff_t, unsigned int, struct folio **, 283 283 void **); 284 284 ··· 304 302 305 303 /** 306 304 * netfs_inode_init - Initialise a netfslib inode context 307 - * @inode: The inode with which the context is associated 305 + * @ctx: The netfs inode to initialise 308 306 * @ops: The netfs's operations list 309 307 * 310 308 * Initialise the netfs library context struct. This is expected to follow on 311 309 * directly from the VFS inode struct. 312 310 */ 313 - static inline void netfs_inode_init(struct inode *inode, 311 + static inline void netfs_inode_init(struct netfs_inode *ctx, 314 312 const struct netfs_request_ops *ops) 315 313 { 316 - struct netfs_inode *ctx = netfs_inode(inode); 317 - 318 314 ctx->ops = ops; 319 - ctx->remote_i_size = i_size_read(inode); 315 + ctx->remote_i_size = i_size_read(&ctx->inode); 320 316 #if IS_ENABLED(CONFIG_FSCACHE) 321 317 ctx->cache = NULL; 322 318 #endif ··· 322 322 323 323 /** 324 324 * netfs_resize_file - Note that a file got resized 325 - * @inode: The inode being resized 325 + * @ctx: The netfs inode being resized 326 326 * @new_i_size: The new file size 327 327 * 328 328 * Inform the netfs lib that a file got resized so that it can adjust its state. 329 329 */ 330 - static inline void netfs_resize_file(struct inode *inode, loff_t new_i_size) 330 + static inline void netfs_resize_file(struct netfs_inode *ctx, loff_t new_i_size) 331 331 { 332 - struct netfs_inode *ctx = netfs_inode(inode); 333 - 334 332 ctx->remote_i_size = new_i_size; 335 333 } 336 334 337 335 /** 338 336 * netfs_i_cookie - Get the cache cookie from the inode 339 - * @inode: The inode to query 337 + * @ctx: The netfs inode to query 340 338 * 341 339 * Get the caching cookie (if enabled) from the network filesystem's inode. 342 340 */ 343 - static inline struct fscache_cookie *netfs_i_cookie(struct inode *inode) 341 + static inline struct fscache_cookie *netfs_i_cookie(struct netfs_inode *ctx) 344 342 { 345 343 #if IS_ENABLED(CONFIG_FSCACHE) 346 - struct netfs_inode *ctx = netfs_inode(inode); 347 344 return ctx->cache; 348 345 #else 349 346 return NULL;

+1 -2

include/linux/random.h

··· 13 13 struct notifier_block; 14 14 15 15 void add_device_randomness(const void *buf, size_t len); 16 - void add_bootloader_randomness(const void *buf, size_t len); 16 + void __init add_bootloader_randomness(const void *buf, size_t len); 17 17 void add_input_randomness(unsigned int type, unsigned int code, 18 18 unsigned int value) __latent_entropy; 19 19 void add_interrupt_randomness(int irq) __latent_entropy; ··· 74 74 75 75 int __init random_init(const char *command_line); 76 76 bool rng_is_initialized(void); 77 - bool rng_has_arch_random(void); 78 77 int wait_for_random_bytes(void); 79 78 80 79 /* Calls wait_for_random_bytes() and then calls get_random_bytes(buf, nbytes).

+15 -1

include/linux/sunrpc/xdr.h

··· 243 243 extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); 244 244 extern int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec, 245 245 size_t nbytes); 246 - extern void xdr_commit_encode(struct xdr_stream *xdr); 246 + extern void __xdr_commit_encode(struct xdr_stream *xdr); 247 247 extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len); 248 248 extern int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen); 249 249 extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, ··· 304 304 xdr_reset_scratch_buffer(struct xdr_stream *xdr) 305 305 { 306 306 xdr_set_scratch_buffer(xdr, NULL, 0); 307 + } 308 + 309 + /** 310 + * xdr_commit_encode - Ensure all data is written to xdr->buf 311 + * @xdr: pointer to xdr_stream 312 + * 313 + * Handle encoding across page boundaries by giving the caller a 314 + * temporary location to write to, then later copying the data into 315 + * place. __xdr_commit_encode() does that copying. 316 + */ 317 + static inline void xdr_commit_encode(struct xdr_stream *xdr) 318 + { 319 + if (unlikely(xdr->scratch.iov_len)) 320 + __xdr_commit_encode(xdr); 307 321 } 308 322 309 323 /**

+3 -2

include/linux/vdpa.h

··· 178 178 * for the device 179 179 * @vdev: vdpa device 180 180 * Returns virtqueue algin requirement 181 - * @get_vq_group: Get the group id for a specific virtqueue 181 + * @get_vq_group: Get the group id for a specific 182 + * virtqueue (optional) 182 183 * @vdev: vdpa device 183 184 * @idx: virtqueue index 184 185 * Returns u32: group id for this virtqueue ··· 244 243 * Returns the iova range supported by 245 244 * the device. 246 245 * @set_group_asid: Set address space identifier for a 247 - * virtqueue group 246 + * virtqueue group (optional) 248 247 * @vdev: vdpa device 249 248 * @group: virtqueue group 250 249 * @asid: address space id for this group

+1

include/linux/vmalloc.h

··· 215 215 void free_vm_area(struct vm_struct *area); 216 216 extern struct vm_struct *remove_vm_area(const void *addr); 217 217 extern struct vm_struct *find_vm_area(const void *addr); 218 + struct vmap_area *find_vmap_area(unsigned long addr); 218 219 219 220 static inline bool is_vm_area_hugepages(const void *addr) 220 221 {

+57 -9

include/linux/workqueue.h

··· 406 406 * alloc_ordered_workqueue - allocate an ordered workqueue 407 407 * @fmt: printf format for the name of the workqueue 408 408 * @flags: WQ_* flags (only WQ_FREEZABLE and WQ_MEM_RECLAIM are meaningful) 409 - * @args...: args for @fmt 409 + * @args: args for @fmt 410 410 * 411 411 * Allocate an ordered workqueue. An ordered workqueue executes at 412 412 * most one work item at any given time in the queued order. They are ··· 445 445 struct delayed_work *dwork, unsigned long delay); 446 446 extern bool queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rwork); 447 447 448 - extern void flush_workqueue(struct workqueue_struct *wq); 448 + extern void __flush_workqueue(struct workqueue_struct *wq); 449 449 extern void drain_workqueue(struct workqueue_struct *wq); 450 450 451 451 extern int schedule_on_each_cpu(work_func_t func); ··· 563 563 return queue_work(system_wq, work); 564 564 } 565 565 566 + /* 567 + * Detect attempt to flush system-wide workqueues at compile time when possible. 568 + * 569 + * See https://lkml.kernel.org/r/49925af7-78a8-a3dd-bce6-cfc02e1a9236@I-love.SAKURA.ne.jp 570 + * for reasons and steps for converting system-wide workqueues into local workqueues. 571 + */ 572 + extern void __warn_flushing_systemwide_wq(void) 573 + __compiletime_warning("Please avoid flushing system-wide workqueues."); 574 + 566 575 /** 567 576 * flush_scheduled_work - ensure that any scheduled work has run to completion. 568 577 * 569 578 * Forces execution of the kernel-global workqueue and blocks until its 570 579 * completion. 571 580 * 572 - * Think twice before calling this function! It's very easy to get into 573 - * trouble if you don't take great care. Either of the following situations 574 - * will lead to deadlock: 581 + * It's very easy to get into trouble if you don't take great care. 582 + * Either of the following situations will lead to deadlock: 575 583 * 576 584 * One of the work items currently on the workqueue needs to acquire 577 585 * a lock held by your code or its caller. ··· 594 586 * need to know that a particular work item isn't queued and isn't running. 595 587 * In such cases you should use cancel_delayed_work_sync() or 596 588 * cancel_work_sync() instead. 589 + * 590 + * Please stop calling this function! A conversion to stop flushing system-wide 591 + * workqueues is in progress. This function will be removed after all in-tree 592 + * users stopped calling this function. 597 593 */ 598 - static inline void flush_scheduled_work(void) 599 - { 600 - flush_workqueue(system_wq); 601 - } 594 + /* 595 + * The background of commit 771c035372a036f8 ("deprecate the 596 + * '__deprecated' attribute warnings entirely and for good") is that, 597 + * since Linus builds all modules between every single pull he does, 598 + * the standard kernel build needs to be _clean_ in order to be able to 599 + * notice when new problems happen. Therefore, don't emit warning while 600 + * there are in-tree users. 601 + */ 602 + #define flush_scheduled_work() \ 603 + ({ \ 604 + if (0) \ 605 + __warn_flushing_systemwide_wq(); \ 606 + __flush_workqueue(system_wq); \ 607 + }) 608 + 609 + /* 610 + * Although there is no longer in-tree caller, for now just emit warning 611 + * in order to give out-of-tree callers time to update. 612 + */ 613 + #define flush_workqueue(wq) \ 614 + ({ \ 615 + struct workqueue_struct *_wq = (wq); \ 616 + \ 617 + if ((__builtin_constant_p(_wq == system_wq) && \ 618 + _wq == system_wq) || \ 619 + (__builtin_constant_p(_wq == system_highpri_wq) && \ 620 + _wq == system_highpri_wq) || \ 621 + (__builtin_constant_p(_wq == system_long_wq) && \ 622 + _wq == system_long_wq) || \ 623 + (__builtin_constant_p(_wq == system_unbound_wq) && \ 624 + _wq == system_unbound_wq) || \ 625 + (__builtin_constant_p(_wq == system_freezable_wq) && \ 626 + _wq == system_freezable_wq) || \ 627 + (__builtin_constant_p(_wq == system_power_efficient_wq) && \ 628 + _wq == system_power_efficient_wq) || \ 629 + (__builtin_constant_p(_wq == system_freezable_power_efficient_wq) && \ 630 + _wq == system_freezable_power_efficient_wq)) \ 631 + __warn_flushing_systemwide_wq(); \ 632 + __flush_workqueue(_wq); \ 633 + }) 602 634 603 635 /** 604 636 * schedule_delayed_work_on - queue work in global workqueue on CPU after delay

+1

include/linux/xarray.h

··· 1508 1508 void xas_init_marks(const struct xa_state *); 1509 1509 1510 1510 bool xas_nomem(struct xa_state *, gfp_t); 1511 + void xas_destroy(struct xa_state *); 1511 1512 void xas_pause(struct xa_state *); 1512 1513 1513 1514 void xas_create_range(struct xa_state *);

-3

include/net/inet_connection_sock.h

··· 25 25 #undef INET_CSK_CLEAR_TIMERS 26 26 27 27 struct inet_bind_bucket; 28 - struct inet_bind2_bucket; 29 28 struct tcp_congestion_ops; 30 29 31 30 /* ··· 57 58 * 58 59 * @icsk_accept_queue: FIFO of established children 59 60 * @icsk_bind_hash: Bind node 60 - * @icsk_bind2_hash: Bind node in the bhash2 table 61 61 * @icsk_timeout: Timeout 62 62 * @icsk_retransmit_timer: Resend (no ack) 63 63 * @icsk_rto: Retransmit timeout ··· 83 85 struct inet_sock icsk_inet; 84 86 struct request_sock_queue icsk_accept_queue; 85 87 struct inet_bind_bucket *icsk_bind_hash; 86 - struct inet_bind2_bucket *icsk_bind2_hash; 87 88 unsigned long icsk_timeout; 88 89 struct timer_list icsk_retransmit_timer; 89 90 struct timer_list icsk_delack_timer;

+1 -67

include/net/inet_hashtables.h

··· 90 90 struct hlist_head owners; 91 91 }; 92 92 93 - struct inet_bind2_bucket { 94 - possible_net_t ib_net; 95 - int l3mdev; 96 - unsigned short port; 97 - union { 98 - #if IS_ENABLED(CONFIG_IPV6) 99 - struct in6_addr v6_rcv_saddr; 100 - #endif 101 - __be32 rcv_saddr; 102 - }; 103 - /* Node in the inet2_bind_hashbucket chain */ 104 - struct hlist_node node; 105 - /* List of sockets hashed to this bucket */ 106 - struct hlist_head owners; 107 - }; 108 - 109 93 static inline struct net *ib_net(struct inet_bind_bucket *ib) 110 - { 111 - return read_pnet(&ib->ib_net); 112 - } 113 - 114 - static inline struct net *ib2_net(struct inet_bind2_bucket *ib) 115 94 { 116 95 return read_pnet(&ib->ib_net); 117 96 } ··· 100 121 101 122 struct inet_bind_hashbucket { 102 123 spinlock_t lock; 103 - struct hlist_head chain; 104 - }; 105 - 106 - /* This is synchronized using the inet_bind_hashbucket's spinlock. 107 - * Instead of having separate spinlocks, the inet_bind2_hashbucket can share 108 - * the inet_bind_hashbucket's given that in every case where the bhash2 table 109 - * is useful, a lookup in the bhash table also occurs. 110 - */ 111 - struct inet_bind2_hashbucket { 112 124 struct hlist_head chain; 113 125 }; 114 126 ··· 134 164 */ 135 165 struct kmem_cache *bind_bucket_cachep; 136 166 struct inet_bind_hashbucket *bhash; 137 - /* The 2nd binding table hashed by port and address. 138 - * This is used primarily for expediting the resolution of bind 139 - * conflicts. 140 - */ 141 - struct kmem_cache *bind2_bucket_cachep; 142 - struct inet_bind2_hashbucket *bhash2; 143 167 unsigned int bhash_size; 144 168 145 169 /* The 2nd listener table hashed by local port and address */ ··· 193 229 void inet_bind_bucket_destroy(struct kmem_cache *cachep, 194 230 struct inet_bind_bucket *tb); 195 231 196 - static inline bool check_bind_bucket_match(struct inet_bind_bucket *tb, 197 - struct net *net, 198 - const unsigned short port, 199 - int l3mdev) 200 - { 201 - return net_eq(ib_net(tb), net) && tb->port == port && 202 - tb->l3mdev == l3mdev; 203 - } 204 - 205 - struct inet_bind2_bucket * 206 - inet_bind2_bucket_create(struct kmem_cache *cachep, struct net *net, 207 - struct inet_bind2_hashbucket *head, 208 - const unsigned short port, int l3mdev, 209 - const struct sock *sk); 210 - 211 - void inet_bind2_bucket_destroy(struct kmem_cache *cachep, 212 - struct inet_bind2_bucket *tb); 213 - 214 - struct inet_bind2_bucket * 215 - inet_bind2_bucket_find(struct inet_hashinfo *hinfo, struct net *net, 216 - const unsigned short port, int l3mdev, 217 - struct sock *sk, 218 - struct inet_bind2_hashbucket **head); 219 - 220 - bool check_bind2_bucket_match_nulladdr(struct inet_bind2_bucket *tb, 221 - struct net *net, 222 - const unsigned short port, 223 - int l3mdev, 224 - const struct sock *sk); 225 - 226 232 static inline u32 inet_bhashfn(const struct net *net, const __u16 lport, 227 233 const u32 bhash_size) 228 234 { ··· 200 266 } 201 267 202 268 void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, 203 - struct inet_bind2_bucket *tb2, const unsigned short snum); 269 + const unsigned short snum); 204 270 205 271 /* Caller must disable local BH processing. */ 206 272 int __inet_inherit_port(const struct sock *sk, struct sock *child);

-14

include/net/sock.h

··· 348 348 * @sk_txtime_report_errors: set report errors mode for SO_TXTIME 349 349 * @sk_txtime_unused: unused txtime flags 350 350 * @ns_tracker: tracker for netns reference 351 - * @sk_bind2_node: bind node in the bhash2 table 352 351 */ 353 352 struct sock { 354 353 /* ··· 537 538 #endif 538 539 struct rcu_head sk_rcu; 539 540 netns_tracker ns_tracker; 540 - struct hlist_node sk_bind2_node; 541 541 }; 542 542 543 543 enum sk_pacing { ··· 817 819 hlist_add_head(&sk->sk_bind_node, list); 818 820 } 819 821 820 - static inline void __sk_del_bind2_node(struct sock *sk) 821 - { 822 - __hlist_del(&sk->sk_bind2_node); 823 - } 824 - 825 - static inline void sk_add_bind2_node(struct sock *sk, struct hlist_head *list) 826 - { 827 - hlist_add_head(&sk->sk_bind2_node, list); 828 - } 829 - 830 822 #define sk_for_each(__sk, list) \ 831 823 hlist_for_each_entry(__sk, list, sk_node) 832 824 #define sk_for_each_rcu(__sk, list) \ ··· 834 846 hlist_for_each_entry_safe(__sk, tmp, list, sk_node) 835 847 #define sk_for_each_bound(__sk, list) \ 836 848 hlist_for_each_entry(__sk, list, sk_bind_node) 837 - #define sk_for_each_bound_bhash2(__sk, list) \ 838 - hlist_for_each_entry(__sk, list, sk_bind2_node) 839 849 840 850 /** 841 851 * sk_for_each_entry_offset_rcu - iterate over a list at a given struct offset

+4 -4

include/trace/events/workqueue.h

··· 22 22 */ 23 23 TRACE_EVENT(workqueue_queue_work, 24 24 25 - TP_PROTO(unsigned int req_cpu, struct pool_workqueue *pwq, 25 + TP_PROTO(int req_cpu, struct pool_workqueue *pwq, 26 26 struct work_struct *work), 27 27 28 28 TP_ARGS(req_cpu, pwq, work), ··· 31 31 __field( void *, work ) 32 32 __field( void *, function) 33 33 __string( workqueue, pwq->wq->name) 34 - __field( unsigned int, req_cpu ) 35 - __field( unsigned int, cpu ) 34 + __field( int, req_cpu ) 35 + __field( int, cpu ) 36 36 ), 37 37 38 38 TP_fast_assign( ··· 43 43 __entry->cpu = pwq->pool->cpu; 44 44 ), 45 45 46 - TP_printk("work struct=%p function=%ps workqueue=%s req_cpu=%u cpu=%u", 46 + TP_printk("work struct=%p function=%ps workqueue=%s req_cpu=%d cpu=%d", 47 47 __entry->work, __entry->function, __get_str(workqueue), 48 48 __entry->req_cpu, __entry->cpu) 49 49 );

+16 -6

kernel/cfi.c

··· 281 281 static inline cfi_check_fn find_check_fn(unsigned long ptr) 282 282 { 283 283 cfi_check_fn fn = NULL; 284 + unsigned long flags; 285 + bool rcu_idle; 284 286 285 287 if (is_kernel_text(ptr)) 286 288 return __cfi_check; ··· 292 290 * the shadow and __module_address use RCU, so we need to wake it 293 291 * up if necessary. 294 292 */ 295 - RCU_NONIDLE({ 296 - if (IS_ENABLED(CONFIG_CFI_CLANG_SHADOW)) 297 - fn = find_shadow_check_fn(ptr); 293 + rcu_idle = !rcu_is_watching(); 294 + if (rcu_idle) { 295 + local_irq_save(flags); 296 + rcu_irq_enter(); 297 + } 298 298 299 - if (!fn) 300 - fn = find_module_check_fn(ptr); 301 - }); 299 + if (IS_ENABLED(CONFIG_CFI_CLANG_SHADOW)) 300 + fn = find_shadow_check_fn(ptr); 301 + if (!fn) 302 + fn = find_module_check_fn(ptr); 303 + 304 + if (rcu_idle) { 305 + rcu_irq_exit(); 306 + local_irq_restore(flags); 307 + } 302 308 303 309 return fn; 304 310 }

+12 -4

kernel/workqueue.c

··· 2788 2788 } 2789 2789 2790 2790 /** 2791 - * flush_workqueue - ensure that any scheduled work has run to completion. 2791 + * __flush_workqueue - ensure that any scheduled work has run to completion. 2792 2792 * @wq: workqueue to flush 2793 2793 * 2794 2794 * This function sleeps until all work items which were queued on entry 2795 2795 * have finished execution, but it is not livelocked by new incoming ones. 2796 2796 */ 2797 - void flush_workqueue(struct workqueue_struct *wq) 2797 + void __flush_workqueue(struct workqueue_struct *wq) 2798 2798 { 2799 2799 struct wq_flusher this_flusher = { 2800 2800 .list = LIST_HEAD_INIT(this_flusher.list), ··· 2943 2943 out_unlock: 2944 2944 mutex_unlock(&wq->mutex); 2945 2945 } 2946 - EXPORT_SYMBOL(flush_workqueue); 2946 + EXPORT_SYMBOL(__flush_workqueue); 2947 2947 2948 2948 /** 2949 2949 * drain_workqueue - drain a workqueue ··· 2971 2971 wq->flags |= __WQ_DRAINING; 2972 2972 mutex_unlock(&wq->mutex); 2973 2973 reflush: 2974 - flush_workqueue(wq); 2974 + __flush_workqueue(wq); 2975 2975 2976 2976 mutex_lock(&wq->mutex); 2977 2977 ··· 6111 6111 wq_online = true; 6112 6112 wq_watchdog_init(); 6113 6113 } 6114 + 6115 + /* 6116 + * Despite the naming, this is a no-op function which is here only for avoiding 6117 + * link error. Since compile-time warning may fail to catch, we will need to 6118 + * emit run-time warning from __flush_workqueue(). 6119 + */ 6120 + void __warn_flushing_systemwide_wq(void) { } 6121 + EXPORT_SYMBOL(__warn_flushing_systemwide_wq);

+1 -1

lib/crc-itu-t.c

··· 7 7 #include <linux/module.h> 8 8 #include <linux/crc-itu-t.h> 9 9 10 - /** CRC table for the CRC ITU-T V.41 0x1021 (x^16 + x^12 + x^15 + 1) */ 10 + /* CRC table for the CRC ITU-T V.41 0x1021 (x^16 + x^12 + x^5 + 1) */ 11 11 const u16 crc_itu_t_table[256] = { 12 12 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7, 13 13 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,

+4 -16

lib/iov_iter.c

··· 1434 1434 { 1435 1435 unsigned nr, offset; 1436 1436 pgoff_t index, count; 1437 - size_t size = maxsize, actual; 1437 + size_t size = maxsize; 1438 1438 loff_t pos; 1439 1439 1440 1440 if (!size || !maxpages) ··· 1461 1461 if (nr == 0) 1462 1462 return 0; 1463 1463 1464 - actual = PAGE_SIZE * nr; 1465 - actual -= offset; 1466 - if (nr == count && size > 0) { 1467 - unsigned last_offset = (nr > 1) ? 0 : offset; 1468 - actual -= PAGE_SIZE - (last_offset + size); 1469 - } 1470 - return actual; 1464 + return min_t(size_t, nr * PAGE_SIZE - offset, maxsize); 1471 1465 } 1472 1466 1473 1467 /* must be done on non-empty ITER_IOVEC one */ ··· 1596 1602 struct page **p; 1597 1603 unsigned nr, offset; 1598 1604 pgoff_t index, count; 1599 - size_t size = maxsize, actual; 1605 + size_t size = maxsize; 1600 1606 loff_t pos; 1601 1607 1602 1608 if (!size) ··· 1625 1631 if (nr == 0) 1626 1632 return 0; 1627 1633 1628 - actual = PAGE_SIZE * nr; 1629 - actual -= offset; 1630 - if (nr == count && size > 0) { 1631 - unsigned last_offset = (nr > 1) ? 0 : offset; 1632 - actual -= PAGE_SIZE - (last_offset + size); 1633 - } 1634 - return actual; 1634 + return min_t(size_t, nr * PAGE_SIZE - offset, maxsize); 1635 1635 } 1636 1636 1637 1637 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,

+1 -2

lib/vsprintf.c

··· 769 769 static DECLARE_WORK(enable_ptr_key_work, enable_ptr_key_workfn); 770 770 unsigned long flags; 771 771 772 - if (!system_unbound_wq || 773 - (!rng_is_initialized() && !rng_has_arch_random()) || 772 + if (!system_unbound_wq || !rng_is_initialized() || 774 773 !spin_trylock_irqsave(&filling, flags)) 775 774 return -EAGAIN; 776 775

+3 -2

lib/xarray.c

··· 264 264 * xas_destroy() - Free any resources allocated during the XArray operation. 265 265 * @xas: XArray operation state. 266 266 * 267 - * This function is now internal-only. 267 + * Most users will not need to call this function; it is called for you 268 + * by xas_nomem(). 268 269 */ 269 - static void xas_destroy(struct xa_state *xas) 270 + void xas_destroy(struct xa_state *xas) 270 271 { 271 272 struct xa_node *next, *node = xas->xa_alloc; 272 273

+5 -4

mm/filemap.c

··· 2991 2991 struct address_space *mapping = file->f_mapping; 2992 2992 DEFINE_READAHEAD(ractl, file, ra, mapping, vmf->pgoff); 2993 2993 struct file *fpin = NULL; 2994 + unsigned long vm_flags = vmf->vma->vm_flags; 2994 2995 unsigned int mmap_miss; 2995 2996 2996 2997 #ifdef CONFIG_TRANSPARENT_HUGEPAGE 2997 2998 /* Use the readahead code, even if readahead is disabled */ 2998 - if (vmf->vma->vm_flags & VM_HUGEPAGE) { 2999 + if (vm_flags & VM_HUGEPAGE) { 2999 3000 fpin = maybe_unlock_mmap_for_io(vmf, fpin); 3000 3001 ractl._index &= ~((unsigned long)HPAGE_PMD_NR - 1); 3001 3002 ra->size = HPAGE_PMD_NR; ··· 3004 3003 * Fetch two PMD folios, so we get the chance to actually 3005 3004 * readahead, unless we've been told not to. 3006 3005 */ 3007 - if (!(vmf->vma->vm_flags & VM_RAND_READ)) 3006 + if (!(vm_flags & VM_RAND_READ)) 3008 3007 ra->size *= 2; 3009 3008 ra->async_size = HPAGE_PMD_NR; 3010 3009 page_cache_ra_order(&ractl, ra, HPAGE_PMD_ORDER); ··· 3013 3012 #endif 3014 3013 3015 3014 /* If we don't want any read-ahead, don't bother */ 3016 - if (vmf->vma->vm_flags & VM_RAND_READ) 3015 + if (vm_flags & VM_RAND_READ) 3017 3016 return fpin; 3018 3017 if (!ra->ra_pages) 3019 3018 return fpin; 3020 3019 3021 - if (vmf->vma->vm_flags & VM_SEQ_READ) { 3020 + if (vm_flags & VM_SEQ_READ) { 3022 3021 fpin = maybe_unlock_mmap_for_io(vmf, fpin); 3023 3022 page_cache_sync_ra(&ractl, ra->ra_pages); 3024 3023 return fpin;

+1 -2

mm/huge_memory.c

··· 2672 2672 if (mapping) 2673 2673 i_mmap_unlock_read(mapping); 2674 2674 out: 2675 - /* Free any memory we didn't use */ 2676 - xas_nomem(&xas, 0); 2675 + xas_destroy(&xas); 2677 2676 count_vm_event(!ret ? THP_SPLIT_PAGE : THP_SPLIT_PAGE_FAILED); 2678 2677 return ret; 2679 2678 }

+2

mm/readahead.c

··· 164 164 while ((folio = readahead_folio(rac)) != NULL) { 165 165 unsigned long nr = folio_nr_pages(folio); 166 166 167 + folio_get(folio); 167 168 rac->ra->size -= nr; 168 169 if (rac->ra->async_size >= nr) { 169 170 rac->ra->async_size -= nr; 170 171 filemap_remove_folio(folio); 171 172 } 172 173 folio_unlock(folio); 174 + folio_put(folio); 173 175 } 174 176 } else { 175 177 while ((folio = readahead_folio(rac)) != NULL)

+12 -14

mm/usercopy.c

··· 161 161 static inline void check_heap_object(const void *ptr, unsigned long n, 162 162 bool to_user) 163 163 { 164 + uintptr_t addr = (uintptr_t)ptr; 165 + unsigned long offset; 164 166 struct folio *folio; 165 167 166 168 if (is_kmap_addr(ptr)) { 167 - unsigned long page_end = (unsigned long)ptr | (PAGE_SIZE - 1); 168 - 169 - if ((unsigned long)ptr + n - 1 > page_end) 170 - usercopy_abort("kmap", NULL, to_user, 171 - offset_in_page(ptr), n); 169 + offset = offset_in_page(ptr); 170 + if (n > PAGE_SIZE - offset) 171 + usercopy_abort("kmap", NULL, to_user, offset, n); 172 172 return; 173 173 } 174 174 175 175 if (is_vmalloc_addr(ptr)) { 176 - struct vm_struct *area = find_vm_area(ptr); 177 - unsigned long offset; 176 + struct vmap_area *area = find_vmap_area(addr); 178 177 179 - if (!area) { 178 + if (!area) 180 179 usercopy_abort("vmalloc", "no area", to_user, 0, n); 181 - return; 182 - } 183 180 184 - offset = ptr - area->addr; 185 - if (offset + n > get_vm_area_size(area)) 181 + if (n > area->va_end - addr) { 182 + offset = addr - area->va_start; 186 183 usercopy_abort("vmalloc", NULL, to_user, offset, n); 184 + } 187 185 return; 188 186 } 189 187 ··· 194 196 /* Check slab allocator for flags and size. */ 195 197 __check_heap_object(ptr, n, folio_slab(folio), to_user); 196 198 } else if (folio_test_large(folio)) { 197 - unsigned long offset = ptr - folio_address(folio); 198 - if (offset + n > folio_size(folio)) 199 + offset = ptr - folio_address(folio); 200 + if (n > folio_size(folio) - offset) 199 201 usercopy_abort("page alloc", NULL, to_user, offset, n); 200 202 } 201 203 }

+1 -1

mm/vmalloc.c

··· 1798 1798 free_vmap_area_noflush(va); 1799 1799 } 1800 1800 1801 - static struct vmap_area *find_vmap_area(unsigned long addr) 1801 + struct vmap_area *find_vmap_area(unsigned long addr) 1802 1802 { 1803 1803 struct vmap_area *va; 1804 1804

+28 -5

net/ax25/af_ax25.c

··· 1662 1662 int flags) 1663 1663 { 1664 1664 struct sock *sk = sock->sk; 1665 - struct sk_buff *skb; 1665 + struct sk_buff *skb, *last; 1666 + struct sk_buff_head *sk_queue; 1666 1667 int copied; 1667 1668 int err = 0; 1669 + int off = 0; 1670 + long timeo; 1668 1671 1669 1672 lock_sock(sk); 1670 1673 /* ··· 1679 1676 goto out; 1680 1677 } 1681 1678 1682 - /* Now we can treat all alike */ 1683 - skb = skb_recv_datagram(sk, flags, &err); 1684 - if (skb == NULL) 1685 - goto out; 1679 + /* We need support for non-blocking reads. */ 1680 + sk_queue = &sk->sk_receive_queue; 1681 + skb = __skb_try_recv_datagram(sk, sk_queue, flags, &off, &err, &last); 1682 + /* If no packet is available, release_sock(sk) and try again. */ 1683 + if (!skb) { 1684 + if (err != -EAGAIN) 1685 + goto out; 1686 + release_sock(sk); 1687 + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); 1688 + while (timeo && !__skb_wait_for_more_packets(sk, sk_queue, &err, 1689 + &timeo, last)) { 1690 + skb = __skb_try_recv_datagram(sk, sk_queue, flags, &off, 1691 + &err, &last); 1692 + if (skb) 1693 + break; 1694 + 1695 + if (err != -EAGAIN) 1696 + goto done; 1697 + } 1698 + if (!skb) 1699 + goto done; 1700 + lock_sock(sk); 1701 + } 1686 1702 1687 1703 if (!sk_to_ax25(sk)->pidincl) 1688 1704 skb_pull(skb, 1); /* Remove PID */ ··· 1748 1726 out: 1749 1727 release_sock(sk); 1750 1728 1729 + done: 1751 1730 return err; 1752 1731 } 1753 1732

+5 -28

net/dccp/proto.c

··· 1120 1120 SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL); 1121 1121 if (!dccp_hashinfo.bind_bucket_cachep) 1122 1122 goto out_free_hashinfo2; 1123 - dccp_hashinfo.bind2_bucket_cachep = 1124 - kmem_cache_create("dccp_bind2_bucket", 1125 - sizeof(struct inet_bind2_bucket), 0, 1126 - SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL); 1127 - if (!dccp_hashinfo.bind2_bucket_cachep) 1128 - goto out_free_bind_bucket_cachep; 1129 1123 1130 1124 /* 1131 1125 * Size and allocate the main established and bind bucket ··· 1150 1156 1151 1157 if (!dccp_hashinfo.ehash) { 1152 1158 DCCP_CRIT("Failed to allocate DCCP established hash table"); 1153 - goto out_free_bind2_bucket_cachep; 1159 + goto out_free_bind_bucket_cachep; 1154 1160 } 1155 1161 1156 1162 for (i = 0; i <= dccp_hashinfo.ehash_mask; i++) ··· 1176 1182 goto out_free_dccp_locks; 1177 1183 } 1178 1184 1179 - dccp_hashinfo.bhash2 = (struct inet_bind2_hashbucket *) 1180 - __get_free_pages(GFP_ATOMIC | __GFP_NOWARN, bhash_order); 1181 - 1182 - if (!dccp_hashinfo.bhash2) { 1183 - DCCP_CRIT("Failed to allocate DCCP bind2 hash table"); 1184 - goto out_free_dccp_bhash; 1185 - } 1186 - 1187 1185 for (i = 0; i < dccp_hashinfo.bhash_size; i++) { 1188 1186 spin_lock_init(&dccp_hashinfo.bhash[i].lock); 1189 1187 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain); 1190 - INIT_HLIST_HEAD(&dccp_hashinfo.bhash2[i].chain); 1191 1188 } 1192 1189 1193 1190 rc = dccp_mib_init(); 1194 1191 if (rc) 1195 - goto out_free_dccp_bhash2; 1192 + goto out_free_dccp_bhash; 1196 1193 1197 1194 rc = dccp_ackvec_init(); 1198 1195 if (rc) ··· 1207 1222 dccp_ackvec_exit(); 1208 1223 out_free_dccp_mib: 1209 1224 dccp_mib_exit(); 1210 - out_free_dccp_bhash2: 1211 - free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order); 1212 1225 out_free_dccp_bhash: 1213 1226 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order); 1214 1227 out_free_dccp_locks: 1215 1228 inet_ehash_locks_free(&dccp_hashinfo); 1216 1229 out_free_dccp_ehash: 1217 1230 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order); 1218 - out_free_bind2_bucket_cachep: 1219 - kmem_cache_destroy(dccp_hashinfo.bind2_bucket_cachep); 1220 1231 out_free_bind_bucket_cachep: 1221 1232 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); 1222 1233 out_free_hashinfo2: 1223 1234 inet_hashinfo2_free_mod(&dccp_hashinfo); 1224 1235 out_fail: 1225 1236 dccp_hashinfo.bhash = NULL; 1226 - dccp_hashinfo.bhash2 = NULL; 1227 1237 dccp_hashinfo.ehash = NULL; 1228 1238 dccp_hashinfo.bind_bucket_cachep = NULL; 1229 - dccp_hashinfo.bind2_bucket_cachep = NULL; 1230 1239 return rc; 1231 1240 } 1232 1241 1233 1242 static void __exit dccp_fini(void) 1234 1243 { 1235 - int bhash_order = get_order(dccp_hashinfo.bhash_size * 1236 - sizeof(struct inet_bind_hashbucket)); 1237 - 1238 1244 ccid_cleanup_builtins(); 1239 1245 dccp_mib_exit(); 1240 - free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order); 1241 - free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order); 1246 + free_pages((unsigned long)dccp_hashinfo.bhash, 1247 + get_order(dccp_hashinfo.bhash_size * 1248 + sizeof(struct inet_bind_hashbucket))); 1242 1249 free_pages((unsigned long)dccp_hashinfo.ehash, 1243 1250 get_order((dccp_hashinfo.ehash_mask + 1) * 1244 1251 sizeof(struct inet_ehash_bucket)));

+64 -183

net/ipv4/inet_connection_sock.c

··· 117 117 return !sk->sk_rcv_saddr; 118 118 } 119 119 120 - static bool use_bhash2_on_bind(const struct sock *sk) 121 - { 122 - #if IS_ENABLED(CONFIG_IPV6) 123 - int addr_type; 124 - 125 - if (sk->sk_family == AF_INET6) { 126 - addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr); 127 - return addr_type != IPV6_ADDR_ANY && 128 - addr_type != IPV6_ADDR_MAPPED; 129 - } 130 - #endif 131 - return sk->sk_rcv_saddr != htonl(INADDR_ANY); 132 - } 133 - 134 - static u32 get_bhash2_nulladdr_hash(const struct sock *sk, struct net *net, 135 - int port) 136 - { 137 - #if IS_ENABLED(CONFIG_IPV6) 138 - struct in6_addr nulladdr = {}; 139 - 140 - if (sk->sk_family == AF_INET6) 141 - return ipv6_portaddr_hash(net, &nulladdr, port); 142 - #endif 143 - return ipv4_portaddr_hash(net, 0, port); 144 - } 145 - 146 120 void inet_get_local_port_range(struct net *net, int *low, int *high) 147 121 { 148 122 unsigned int seq; ··· 130 156 } 131 157 EXPORT_SYMBOL(inet_get_local_port_range); 132 158 133 - static bool bind_conflict_exist(const struct sock *sk, struct sock *sk2, 134 - kuid_t sk_uid, bool relax, 135 - bool reuseport_cb_ok, bool reuseport_ok) 136 - { 137 - int bound_dev_if2; 138 - 139 - if (sk == sk2) 140 - return false; 141 - 142 - bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if); 143 - 144 - if (!sk->sk_bound_dev_if || !bound_dev_if2 || 145 - sk->sk_bound_dev_if == bound_dev_if2) { 146 - if (sk->sk_reuse && sk2->sk_reuse && 147 - sk2->sk_state != TCP_LISTEN) { 148 - if (!relax || (!reuseport_ok && sk->sk_reuseport && 149 - sk2->sk_reuseport && reuseport_cb_ok && 150 - (sk2->sk_state == TCP_TIME_WAIT || 151 - uid_eq(sk_uid, sock_i_uid(sk2))))) 152 - return true; 153 - } else if (!reuseport_ok || !sk->sk_reuseport || 154 - !sk2->sk_reuseport || !reuseport_cb_ok || 155 - (sk2->sk_state != TCP_TIME_WAIT && 156 - !uid_eq(sk_uid, sock_i_uid(sk2)))) { 157 - return true; 158 - } 159 - } 160 - return false; 161 - } 162 - 163 - static bool check_bhash2_conflict(const struct sock *sk, 164 - struct inet_bind2_bucket *tb2, kuid_t sk_uid, 165 - bool relax, bool reuseport_cb_ok, 166 - bool reuseport_ok) 167 - { 168 - struct sock *sk2; 169 - 170 - sk_for_each_bound_bhash2(sk2, &tb2->owners) { 171 - if (sk->sk_family == AF_INET && ipv6_only_sock(sk2)) 172 - continue; 173 - 174 - if (bind_conflict_exist(sk, sk2, sk_uid, relax, 175 - reuseport_cb_ok, reuseport_ok)) 176 - return true; 177 - } 178 - return false; 179 - } 180 - 181 - /* This should be called only when the corresponding inet_bind_bucket spinlock 182 - * is held 183 - */ 184 - static int inet_csk_bind_conflict(const struct sock *sk, int port, 185 - struct inet_bind_bucket *tb, 186 - struct inet_bind2_bucket *tb2, /* may be null */ 159 + static int inet_csk_bind_conflict(const struct sock *sk, 160 + const struct inet_bind_bucket *tb, 187 161 bool relax, bool reuseport_ok) 188 162 { 189 - struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo; 190 - kuid_t uid = sock_i_uid((struct sock *)sk); 191 - struct sock_reuseport *reuseport_cb; 192 - struct inet_bind2_hashbucket *head2; 193 - bool reuseport_cb_ok; 194 163 struct sock *sk2; 195 - struct net *net; 196 - int l3mdev; 197 - u32 hash; 164 + bool reuseport_cb_ok; 165 + bool reuse = sk->sk_reuse; 166 + bool reuseport = !!sk->sk_reuseport; 167 + struct sock_reuseport *reuseport_cb; 168 + kuid_t uid = sock_i_uid((struct sock *)sk); 198 169 199 170 rcu_read_lock(); 200 171 reuseport_cb = rcu_dereference(sk->sk_reuseport_cb); ··· 150 231 /* 151 232 * Unlike other sk lookup places we do not check 152 233 * for sk_net here, since _all_ the socks listed 153 - * in tb->owners and tb2->owners list belong 154 - * to the same net 234 + * in tb->owners list belong to the same net - the 235 + * one this bucket belongs to. 155 236 */ 156 237 157 - if (!use_bhash2_on_bind(sk)) { 158 - sk_for_each_bound(sk2, &tb->owners) 159 - if (bind_conflict_exist(sk, sk2, uid, relax, 160 - reuseport_cb_ok, reuseport_ok) && 161 - inet_rcv_saddr_equal(sk, sk2, true)) 162 - return true; 238 + sk_for_each_bound(sk2, &tb->owners) { 239 + int bound_dev_if2; 163 240 164 - return false; 241 + if (sk == sk2) 242 + continue; 243 + bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if); 244 + if ((!sk->sk_bound_dev_if || 245 + !bound_dev_if2 || 246 + sk->sk_bound_dev_if == bound_dev_if2)) { 247 + if (reuse && sk2->sk_reuse && 248 + sk2->sk_state != TCP_LISTEN) { 249 + if ((!relax || 250 + (!reuseport_ok && 251 + reuseport && sk2->sk_reuseport && 252 + reuseport_cb_ok && 253 + (sk2->sk_state == TCP_TIME_WAIT || 254 + uid_eq(uid, sock_i_uid(sk2))))) && 255 + inet_rcv_saddr_equal(sk, sk2, true)) 256 + break; 257 + } else if (!reuseport_ok || 258 + !reuseport || !sk2->sk_reuseport || 259 + !reuseport_cb_ok || 260 + (sk2->sk_state != TCP_TIME_WAIT && 261 + !uid_eq(uid, sock_i_uid(sk2)))) { 262 + if (inet_rcv_saddr_equal(sk, sk2, true)) 263 + break; 264 + } 265 + } 165 266 } 166 - 167 - if (tb2 && check_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok, 168 - reuseport_ok)) 169 - return true; 170 - 171 - net = sock_net(sk); 172 - 173 - /* check there's no conflict with an existing IPV6_ADDR_ANY (if ipv6) or 174 - * INADDR_ANY (if ipv4) socket. 175 - */ 176 - hash = get_bhash2_nulladdr_hash(sk, net, port); 177 - head2 = &hinfo->bhash2[hash & (hinfo->bhash_size - 1)]; 178 - 179 - l3mdev = inet_sk_bound_l3mdev(sk); 180 - inet_bind_bucket_for_each(tb2, &head2->chain) 181 - if (check_bind2_bucket_match_nulladdr(tb2, net, port, l3mdev, sk)) 182 - break; 183 - 184 - if (tb2 && check_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok, 185 - reuseport_ok)) 186 - return true; 187 - 188 - return false; 267 + return sk2 != NULL; 189 268 } 190 269 191 270 /* ··· 191 274 * inet_bind_hashbucket lock held. 192 275 */ 193 276 static struct inet_bind_hashbucket * 194 - inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret, 195 - struct inet_bind2_bucket **tb2_ret, 196 - struct inet_bind2_hashbucket **head2_ret, int *port_ret) 277 + inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret, int *port_ret) 197 278 { 198 279 struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo; 199 - struct inet_bind2_hashbucket *head2; 280 + int port = 0; 200 281 struct inet_bind_hashbucket *head; 201 282 struct net *net = sock_net(sk); 283 + bool relax = false; 202 284 int i, low, high, attempt_half; 203 - struct inet_bind2_bucket *tb2; 204 285 struct inet_bind_bucket *tb; 205 286 u32 remaining, offset; 206 - bool relax = false; 207 - int port = 0; 208 287 int l3mdev; 209 288 210 289 l3mdev = inet_sk_bound_l3mdev(sk); ··· 239 326 head = &hinfo->bhash[inet_bhashfn(net, port, 240 327 hinfo->bhash_size)]; 241 328 spin_lock_bh(&head->lock); 242 - tb2 = inet_bind2_bucket_find(hinfo, net, port, l3mdev, sk, 243 - &head2); 244 329 inet_bind_bucket_for_each(tb, &head->chain) 245 - if (check_bind_bucket_match(tb, net, port, l3mdev)) { 246 - if (!inet_csk_bind_conflict(sk, port, tb, tb2, 247 - relax, false)) 330 + if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev && 331 + tb->port == port) { 332 + if (!inet_csk_bind_conflict(sk, tb, relax, false)) 248 333 goto success; 249 334 goto next_port; 250 335 } ··· 272 361 success: 273 362 *port_ret = port; 274 363 *tb_ret = tb; 275 - *tb2_ret = tb2; 276 - *head2_ret = head2; 277 364 return head; 278 365 } 279 366 ··· 367 458 { 368 459 bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN; 369 460 struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo; 370 - bool bhash_created = false, bhash2_created = false; 371 - struct inet_bind2_bucket *tb2 = NULL; 372 - struct inet_bind2_hashbucket *head2; 373 - struct inet_bind_bucket *tb = NULL; 461 + int ret = 1, port = snum; 374 462 struct inet_bind_hashbucket *head; 375 463 struct net *net = sock_net(sk); 376 - int ret = 1, port = snum; 377 - bool found_port = false; 464 + struct inet_bind_bucket *tb = NULL; 378 465 int l3mdev; 379 466 380 467 l3mdev = inet_sk_bound_l3mdev(sk); 381 468 382 469 if (!port) { 383 - head = inet_csk_find_open_port(sk, &tb, &tb2, &head2, &port); 470 + head = inet_csk_find_open_port(sk, &tb, &port); 384 471 if (!head) 385 472 return ret; 386 - if (tb && tb2) 387 - goto success; 388 - found_port = true; 389 - } else { 390 - head = &hinfo->bhash[inet_bhashfn(net, port, 391 - hinfo->bhash_size)]; 392 - spin_lock_bh(&head->lock); 393 - inet_bind_bucket_for_each(tb, &head->chain) 394 - if (check_bind_bucket_match(tb, net, port, l3mdev)) 395 - break; 396 - 397 - tb2 = inet_bind2_bucket_find(hinfo, net, port, l3mdev, sk, 398 - &head2); 399 - } 400 - 401 - if (!tb) { 402 - tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, net, 403 - head, port, l3mdev); 404 473 if (!tb) 405 - goto fail_unlock; 406 - bhash_created = true; 474 + goto tb_not_found; 475 + goto success; 407 476 } 408 - 409 - if (!tb2) { 410 - tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep, 411 - net, head2, port, l3mdev, sk); 412 - if (!tb2) 413 - goto fail_unlock; 414 - bhash2_created = true; 415 - } 416 - 417 - /* If we had to find an open port, we already checked for conflicts */ 418 - if (!found_port && !hlist_empty(&tb->owners)) { 477 + head = &hinfo->bhash[inet_bhashfn(net, port, 478 + hinfo->bhash_size)]; 479 + spin_lock_bh(&head->lock); 480 + inet_bind_bucket_for_each(tb, &head->chain) 481 + if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev && 482 + tb->port == port) 483 + goto tb_found; 484 + tb_not_found: 485 + tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, 486 + net, head, port, l3mdev); 487 + if (!tb) 488 + goto fail_unlock; 489 + tb_found: 490 + if (!hlist_empty(&tb->owners)) { 419 491 if (sk->sk_reuse == SK_FORCE_REUSE) 420 492 goto success; 421 493 422 494 if ((tb->fastreuse > 0 && reuse) || 423 495 sk_reuseport_match(tb, sk)) 424 496 goto success; 425 - if (inet_csk_bind_conflict(sk, port, tb, tb2, true, true)) 497 + if (inet_csk_bind_conflict(sk, tb, true, true)) 426 498 goto fail_unlock; 427 499 } 428 500 success: 429 501 inet_csk_update_fastreuse(tb, sk); 430 502 431 503 if (!inet_csk(sk)->icsk_bind_hash) 432 - inet_bind_hash(sk, tb, tb2, port); 504 + inet_bind_hash(sk, tb, port); 433 505 WARN_ON(inet_csk(sk)->icsk_bind_hash != tb); 434 - WARN_ON(inet_csk(sk)->icsk_bind2_hash != tb2); 435 506 ret = 0; 436 507 437 508 fail_unlock: 438 - if (ret) { 439 - if (bhash_created) 440 - inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb); 441 - if (bhash2_created) 442 - inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep, 443 - tb2); 444 - } 445 509 spin_unlock_bh(&head->lock); 446 510 return ret; 447 511 } ··· 961 1079 962 1080 inet_sk_set_state(newsk, TCP_SYN_RECV); 963 1081 newicsk->icsk_bind_hash = NULL; 964 - newicsk->icsk_bind2_hash = NULL; 965 1082 966 1083 inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port; 967 1084 inet_sk(newsk)->inet_num = inet_rsk(req)->ir_num;

+11 -182

net/ipv4/inet_hashtables.c

··· 81 81 return tb; 82 82 } 83 83 84 - struct inet_bind2_bucket *inet_bind2_bucket_create(struct kmem_cache *cachep, 85 - struct net *net, 86 - struct inet_bind2_hashbucket *head, 87 - const unsigned short port, 88 - int l3mdev, 89 - const struct sock *sk) 90 - { 91 - struct inet_bind2_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC); 92 - 93 - if (tb) { 94 - write_pnet(&tb->ib_net, net); 95 - tb->l3mdev = l3mdev; 96 - tb->port = port; 97 - #if IS_ENABLED(CONFIG_IPV6) 98 - if (sk->sk_family == AF_INET6) 99 - tb->v6_rcv_saddr = sk->sk_v6_rcv_saddr; 100 - else 101 - #endif 102 - tb->rcv_saddr = sk->sk_rcv_saddr; 103 - INIT_HLIST_HEAD(&tb->owners); 104 - hlist_add_head(&tb->node, &head->chain); 105 - } 106 - return tb; 107 - } 108 - 109 - static bool bind2_bucket_addr_match(struct inet_bind2_bucket *tb2, struct sock *sk) 110 - { 111 - #if IS_ENABLED(CONFIG_IPV6) 112 - if (sk->sk_family == AF_INET6) 113 - return ipv6_addr_equal(&tb2->v6_rcv_saddr, 114 - &sk->sk_v6_rcv_saddr); 115 - #endif 116 - return tb2->rcv_saddr == sk->sk_rcv_saddr; 117 - } 118 - 119 84 /* 120 85 * Caller must hold hashbucket lock for this tb with local BH disabled 121 86 */ ··· 92 127 } 93 128 } 94 129 95 - /* Caller must hold the lock for the corresponding hashbucket in the bhash table 96 - * with local BH disabled 97 - */ 98 - void inet_bind2_bucket_destroy(struct kmem_cache *cachep, struct inet_bind2_bucket *tb) 99 - { 100 - if (hlist_empty(&tb->owners)) { 101 - __hlist_del(&tb->node); 102 - kmem_cache_free(cachep, tb); 103 - } 104 - } 105 - 106 130 void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, 107 - struct inet_bind2_bucket *tb2, const unsigned short snum) 131 + const unsigned short snum) 108 132 { 109 133 inet_sk(sk)->inet_num = snum; 110 134 sk_add_bind_node(sk, &tb->owners); 111 135 inet_csk(sk)->icsk_bind_hash = tb; 112 - sk_add_bind2_node(sk, &tb2->owners); 113 - inet_csk(sk)->icsk_bind2_hash = tb2; 114 136 } 115 137 116 138 /* ··· 109 157 const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->inet_num, 110 158 hashinfo->bhash_size); 111 159 struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; 112 - struct inet_bind2_bucket *tb2; 113 160 struct inet_bind_bucket *tb; 114 161 115 162 spin_lock(&head->lock); ··· 117 166 inet_csk(sk)->icsk_bind_hash = NULL; 118 167 inet_sk(sk)->inet_num = 0; 119 168 inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); 120 - 121 - if (inet_csk(sk)->icsk_bind2_hash) { 122 - tb2 = inet_csk(sk)->icsk_bind2_hash; 123 - __sk_del_bind2_node(sk); 124 - inet_csk(sk)->icsk_bind2_hash = NULL; 125 - inet_bind2_bucket_destroy(hashinfo->bind2_bucket_cachep, tb2); 126 - } 127 169 spin_unlock(&head->lock); 128 170 } 129 171 ··· 133 189 struct inet_hashinfo *table = sk->sk_prot->h.hashinfo; 134 190 unsigned short port = inet_sk(child)->inet_num; 135 191 const int bhash = inet_bhashfn(sock_net(sk), port, 136 - table->bhash_size); 192 + table->bhash_size); 137 193 struct inet_bind_hashbucket *head = &table->bhash[bhash]; 138 - struct inet_bind2_hashbucket *head_bhash2; 139 - bool created_inet_bind_bucket = false; 140 - struct net *net = sock_net(sk); 141 - struct inet_bind2_bucket *tb2; 142 194 struct inet_bind_bucket *tb; 143 195 int l3mdev; 144 196 145 197 spin_lock(&head->lock); 146 198 tb = inet_csk(sk)->icsk_bind_hash; 147 - tb2 = inet_csk(sk)->icsk_bind2_hash; 148 - if (unlikely(!tb || !tb2)) { 199 + if (unlikely(!tb)) { 149 200 spin_unlock(&head->lock); 150 201 return -ENOENT; 151 202 } ··· 153 214 * as that of the child socket. We have to look up or 154 215 * create a new bind bucket for the child here. */ 155 216 inet_bind_bucket_for_each(tb, &head->chain) { 156 - if (check_bind_bucket_match(tb, net, port, l3mdev)) 217 + if (net_eq(ib_net(tb), sock_net(sk)) && 218 + tb->l3mdev == l3mdev && tb->port == port) 157 219 break; 158 220 } 159 221 if (!tb) { 160 222 tb = inet_bind_bucket_create(table->bind_bucket_cachep, 161 - net, head, port, l3mdev); 223 + sock_net(sk), head, port, 224 + l3mdev); 162 225 if (!tb) { 163 226 spin_unlock(&head->lock); 164 227 return -ENOMEM; 165 228 } 166 - created_inet_bind_bucket = true; 167 229 } 168 230 inet_csk_update_fastreuse(tb, child); 169 - 170 - goto bhash2_find; 171 - } else if (!bind2_bucket_addr_match(tb2, child)) { 172 - l3mdev = inet_sk_bound_l3mdev(sk); 173 - 174 - bhash2_find: 175 - tb2 = inet_bind2_bucket_find(table, net, port, l3mdev, child, 176 - &head_bhash2); 177 - if (!tb2) { 178 - tb2 = inet_bind2_bucket_create(table->bind2_bucket_cachep, 179 - net, head_bhash2, port, 180 - l3mdev, child); 181 - if (!tb2) 182 - goto error; 183 - } 184 231 } 185 - inet_bind_hash(child, tb, tb2, port); 232 + inet_bind_hash(child, tb, port); 186 233 spin_unlock(&head->lock); 187 234 188 235 return 0; 189 - 190 - error: 191 - if (created_inet_bind_bucket) 192 - inet_bind_bucket_destroy(table->bind_bucket_cachep, tb); 193 - spin_unlock(&head->lock); 194 - return -ENOMEM; 195 236 } 196 237 EXPORT_SYMBOL_GPL(__inet_inherit_port); 197 238 ··· 675 756 } 676 757 EXPORT_SYMBOL_GPL(inet_unhash); 677 758 678 - static bool check_bind2_bucket_match(struct inet_bind2_bucket *tb, 679 - struct net *net, unsigned short port, 680 - int l3mdev, struct sock *sk) 681 - { 682 - #if IS_ENABLED(CONFIG_IPV6) 683 - if (sk->sk_family == AF_INET6) 684 - return net_eq(ib2_net(tb), net) && tb->port == port && 685 - tb->l3mdev == l3mdev && 686 - ipv6_addr_equal(&tb->v6_rcv_saddr, &sk->sk_v6_rcv_saddr); 687 - else 688 - #endif 689 - return net_eq(ib2_net(tb), net) && tb->port == port && 690 - tb->l3mdev == l3mdev && tb->rcv_saddr == sk->sk_rcv_saddr; 691 - } 692 - 693 - bool check_bind2_bucket_match_nulladdr(struct inet_bind2_bucket *tb, 694 - struct net *net, const unsigned short port, 695 - int l3mdev, const struct sock *sk) 696 - { 697 - #if IS_ENABLED(CONFIG_IPV6) 698 - struct in6_addr nulladdr = {}; 699 - 700 - if (sk->sk_family == AF_INET6) 701 - return net_eq(ib2_net(tb), net) && tb->port == port && 702 - tb->l3mdev == l3mdev && 703 - ipv6_addr_equal(&tb->v6_rcv_saddr, &nulladdr); 704 - else 705 - #endif 706 - return net_eq(ib2_net(tb), net) && tb->port == port && 707 - tb->l3mdev == l3mdev && tb->rcv_saddr == 0; 708 - } 709 - 710 - static struct inet_bind2_hashbucket * 711 - inet_bhashfn_portaddr(struct inet_hashinfo *hinfo, const struct sock *sk, 712 - const struct net *net, unsigned short port) 713 - { 714 - u32 hash; 715 - 716 - #if IS_ENABLED(CONFIG_IPV6) 717 - if (sk->sk_family == AF_INET6) 718 - hash = ipv6_portaddr_hash(net, &sk->sk_v6_rcv_saddr, port); 719 - else 720 - #endif 721 - hash = ipv4_portaddr_hash(net, sk->sk_rcv_saddr, port); 722 - return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)]; 723 - } 724 - 725 - /* This should only be called when the spinlock for the socket's corresponding 726 - * bind_hashbucket is held 727 - */ 728 - struct inet_bind2_bucket * 729 - inet_bind2_bucket_find(struct inet_hashinfo *hinfo, struct net *net, 730 - const unsigned short port, int l3mdev, struct sock *sk, 731 - struct inet_bind2_hashbucket **head) 732 - { 733 - struct inet_bind2_bucket *bhash2 = NULL; 734 - struct inet_bind2_hashbucket *h; 735 - 736 - h = inet_bhashfn_portaddr(hinfo, sk, net, port); 737 - inet_bind_bucket_for_each(bhash2, &h->chain) { 738 - if (check_bind2_bucket_match(bhash2, net, port, l3mdev, sk)) 739 - break; 740 - } 741 - 742 - if (head) 743 - *head = h; 744 - 745 - return bhash2; 746 - } 747 - 748 759 /* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm 749 760 * Note that we use 32bit integers (vs RFC 'short integers') 750 761 * because 2^16 is not a multiple of num_ephemeral and this ··· 695 846 { 696 847 struct inet_hashinfo *hinfo = death_row->hashinfo; 697 848 struct inet_timewait_sock *tw = NULL; 698 - struct inet_bind2_hashbucket *head2; 699 849 struct inet_bind_hashbucket *head; 700 850 int port = inet_sk(sk)->inet_num; 701 851 struct net *net = sock_net(sk); 702 - struct inet_bind2_bucket *tb2; 703 852 struct inet_bind_bucket *tb; 704 - bool tb_created = false; 705 853 u32 remaining, offset; 706 854 int ret, i, low, high; 707 855 int l3mdev; ··· 755 909 * the established check is already unique enough. 756 910 */ 757 911 inet_bind_bucket_for_each(tb, &head->chain) { 758 - if (check_bind_bucket_match(tb, net, port, l3mdev)) { 912 + if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev && 913 + tb->port == port) { 759 914 if (tb->fastreuse >= 0 || 760 915 tb->fastreuseport >= 0) 761 916 goto next_port; ··· 774 927 spin_unlock_bh(&head->lock); 775 928 return -ENOMEM; 776 929 } 777 - tb_created = true; 778 930 tb->fastreuse = -1; 779 931 tb->fastreuseport = -1; 780 932 goto ok; ··· 789 943 return -EADDRNOTAVAIL; 790 944 791 945 ok: 792 - /* Find the corresponding tb2 bucket since we need to 793 - * add the socket to the bhash2 table as well 794 - */ 795 - tb2 = inet_bind2_bucket_find(hinfo, net, port, l3mdev, sk, &head2); 796 - if (!tb2) { 797 - tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep, net, 798 - head2, port, l3mdev, sk); 799 - if (!tb2) 800 - goto error; 801 - } 802 - 803 946 /* Here we want to add a little bit of randomness to the next source 804 947 * port that will be chosen. We use a max() with a random here so that 805 948 * on low contention the randomness is maximal and on high contention ··· 798 963 WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2); 799 964 800 965 /* Head lock still held and bh's disabled */ 801 - inet_bind_hash(sk, tb, tb2, port); 966 + inet_bind_hash(sk, tb, port); 802 967 if (sk_unhashed(sk)) { 803 968 inet_sk(sk)->inet_sport = htons(port); 804 969 inet_ehash_nolisten(sk, (struct sock *)tw, NULL); ··· 810 975 inet_twsk_deschedule_put(tw); 811 976 local_bh_enable(); 812 977 return 0; 813 - 814 - error: 815 - if (tb_created) 816 - inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb); 817 - spin_unlock_bh(&head->lock); 818 - return -ENOMEM; 819 978 } 820 979 821 980 /*

+2 -12

net/ipv4/tcp.c

··· 4599 4599 SLAB_HWCACHE_ALIGN | SLAB_PANIC | 4600 4600 SLAB_ACCOUNT, 4601 4601 NULL); 4602 - tcp_hashinfo.bind2_bucket_cachep = 4603 - kmem_cache_create("tcp_bind2_bucket", 4604 - sizeof(struct inet_bind2_bucket), 0, 4605 - SLAB_HWCACHE_ALIGN | SLAB_PANIC | 4606 - SLAB_ACCOUNT, 4607 - NULL); 4608 4602 4609 4603 /* Size and allocate the main established and bind bucket 4610 4604 * hash tables. ··· 4621 4627 if (inet_ehash_locks_alloc(&tcp_hashinfo)) 4622 4628 panic("TCP: failed to alloc ehash_locks"); 4623 4629 tcp_hashinfo.bhash = 4624 - alloc_large_system_hash("TCP bind bhash tables", 4625 - sizeof(struct inet_bind_hashbucket) + 4626 - sizeof(struct inet_bind2_hashbucket), 4630 + alloc_large_system_hash("TCP bind", 4631 + sizeof(struct inet_bind_hashbucket), 4627 4632 tcp_hashinfo.ehash_mask + 1, 4628 4633 17, /* one slot per 128 KB of memory */ 4629 4634 0, ··· 4631 4638 0, 4632 4639 64 * 1024); 4633 4640 tcp_hashinfo.bhash_size = 1U << tcp_hashinfo.bhash_size; 4634 - tcp_hashinfo.bhash2 = 4635 - (struct inet_bind2_hashbucket *)(tcp_hashinfo.bhash + tcp_hashinfo.bhash_size); 4636 4641 for (i = 0; i < tcp_hashinfo.bhash_size; i++) { 4637 4642 spin_lock_init(&tcp_hashinfo.bhash[i].lock); 4638 4643 INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain); 4639 - INIT_HLIST_HEAD(&tcp_hashinfo.bhash2[i].chain); 4640 4644 } 4641 4645 4642 4646

+23 -14

net/sunrpc/xdr.c

··· 919 919 EXPORT_SYMBOL_GPL(xdr_init_encode); 920 920 921 921 /** 922 - * xdr_commit_encode - Ensure all data is written to buffer 922 + * __xdr_commit_encode - Ensure all data is written to buffer 923 923 * @xdr: pointer to xdr_stream 924 924 * 925 925 * We handle encoding across page boundaries by giving the caller a ··· 931 931 * required at the end of encoding, or any other time when the xdr_buf 932 932 * data might be read. 933 933 */ 934 - inline void xdr_commit_encode(struct xdr_stream *xdr) 934 + void __xdr_commit_encode(struct xdr_stream *xdr) 935 935 { 936 - int shift = xdr->scratch.iov_len; 936 + size_t shift = xdr->scratch.iov_len; 937 937 void *page; 938 938 939 - if (shift == 0) 940 - return; 941 939 page = page_address(*xdr->page_ptr); 942 940 memcpy(xdr->scratch.iov_base, page, shift); 943 941 memmove(page, page + shift, (void *)xdr->p - page); 944 942 xdr_reset_scratch_buffer(xdr); 945 943 } 946 - EXPORT_SYMBOL_GPL(xdr_commit_encode); 944 + EXPORT_SYMBOL_GPL(__xdr_commit_encode); 947 945 948 - static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, 949 - size_t nbytes) 946 + /* 947 + * The buffer space to be reserved crosses the boundary between 948 + * xdr->buf->head and xdr->buf->pages, or between two pages 949 + * in xdr->buf->pages. 950 + */ 951 + static noinline __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, 952 + size_t nbytes) 950 953 { 951 - __be32 *p; 952 954 int space_left; 953 955 int frag1bytes, frag2bytes; 956 + void *p; 954 957 955 958 if (nbytes > PAGE_SIZE) 956 959 goto out_overflow; /* Bigger buffers require special handling */ ··· 967 964 xdr->buf->page_len += frag1bytes; 968 965 xdr->page_ptr++; 969 966 xdr->iov = NULL; 967 + 970 968 /* 971 969 * If the last encode didn't end exactly on a page boundary, the 972 970 * next one will straddle boundaries. Encode into the next ··· 976 972 * space at the end of the previous buffer: 977 973 */ 978 974 xdr_set_scratch_buffer(xdr, xdr->p, frag1bytes); 979 - p = page_address(*xdr->page_ptr); 975 + 980 976 /* 981 - * Note this is where the next encode will start after we've 982 - * shifted this one back: 977 + * xdr->p is where the next encode will start after 978 + * xdr_commit_encode() has shifted this one back: 983 979 */ 984 - xdr->p = (void *)p + frag2bytes; 980 + p = page_address(*xdr->page_ptr); 981 + xdr->p = p + frag2bytes; 985 982 space_left = xdr->buf->buflen - xdr->buf->len; 986 - xdr->end = (void *)p + min_t(int, space_left, PAGE_SIZE); 983 + if (space_left - nbytes >= PAGE_SIZE) 984 + xdr->end = p + PAGE_SIZE; 985 + else 986 + xdr->end = p + space_left - frag1bytes; 987 + 987 988 xdr->buf->page_len += frag2bytes; 988 989 xdr->buf->len += nbytes; 989 990 return p;

+2 -2

net/sunrpc/xprtrdma/svc_rdma_rw.c

··· 478 478 unsigned int write_len; 479 479 u64 offset; 480 480 481 - seg = &info->wi_chunk->ch_segments[info->wi_seg_no]; 482 - if (!seg) 481 + if (info->wi_seg_no >= info->wi_chunk->ch_segcount) 483 482 goto out_overflow; 484 483 484 + seg = &info->wi_chunk->ch_segments[info->wi_seg_no]; 485 485 write_len = min(remaining, seg->rs_length - info->wi_seg_off); 486 486 if (!write_len) 487 487 goto out_overflow;

+2 -2

scripts/Makefile.build

··· 251 251 252 252 # To make this rule robust against "Argument list too long" error, 253 253 # ensure to add $(obj)/ prefix by a shell command. 254 - cmd_mod = echo $(call real-search, $*.o, .o, -objs -y -m) | \ 255 - $(AWK) -v RS='( |\n)' '!x[$$0]++ { print("$(obj)/"$$0) }' > $@ 254 + cmd_mod = printf '%s\n' $(call real-search, $*.o, .o, -objs -y -m) | \ 255 + $(AWK) '!x[$$0]++ { print("$(obj)/"$$0) }' > $@ 256 256 257 257 $(obj)/%.mod: FORCE 258 258 $(call if_changed,mod)

+21 -15

scripts/check-local-export

··· 8 8 9 9 set -e 10 10 11 + # catch errors from ${NM} 12 + set -o pipefail 13 + 14 + # Run the last element of a pipeline in the current shell. 15 + # Without this, the while-loop would be executed in a subshell, and 16 + # the changes made to 'symbol_types' and 'export_symbols' would be lost. 17 + shopt -s lastpipe 18 + 11 19 declare -A symbol_types 12 20 declare -a export_symbols 13 21 14 22 exit_code=0 15 23 24 + # If there is no symbol in the object, ${NM} (both GNU nm and llvm-nm) shows 25 + # 'no symbols' diagnostic (but exits with 0). It is harmless and hidden by 26 + # '2>/dev/null'. However, it suppresses real error messages as well. Add a 27 + # hand-crafted error message here. 28 + # 29 + # TODO: 30 + # Use --quiet instead of 2>/dev/null when we upgrade the minimum version of 31 + # binutils to 2.37, llvm to 13.0.0. 32 + # Then, the following line will be really simple: 33 + # ${NM} --quiet ${1} | 34 + 35 + { ${NM} ${1} 2>/dev/null || { echo "${0}: ${NM} failed" >&2; false; } } | 16 36 while read value type name 17 37 do 18 38 # Skip the line if the number of fields is less than 3. ··· 57 37 if [[ ${name} == __ksymtab_* ]]; then 58 38 export_symbols+=(${name#__ksymtab_}) 59 39 fi 60 - 61 - # If there is no symbol in the object, ${NM} (both GNU nm and llvm-nm) 62 - # shows 'no symbols' diagnostic (but exits with 0). It is harmless and 63 - # hidden by '2>/dev/null'. However, it suppresses real error messages 64 - # as well. Add a hand-crafted error message here. 65 - # 66 - # Use --quiet instead of 2>/dev/null when we upgrade the minimum version 67 - # of binutils to 2.37, llvm to 13.0.0. 68 - # 69 - # Then, the following line will be really simple: 70 - # done < <(${NM} --quiet ${1}) 71 - done < <(${NM} ${1} 2>/dev/null || { echo "${0}: ${NM} failed" >&2; false; } ) 72 - 73 - # Catch error in the process substitution 74 - wait $! 40 + done 75 41 76 42 for name in "${export_symbols[@]}" 77 43 do

+3 -3

scripts/gdb/linux/config.py

··· 24 24 filename = arg 25 25 26 26 try: 27 - py_config_ptr = gdb.parse_and_eval("kernel_config_data + 8") 28 - py_config_size = gdb.parse_and_eval( 29 - "sizeof(kernel_config_data) - 1 - 8 * 2") 27 + py_config_ptr = gdb.parse_and_eval("&kernel_config_data") 28 + py_config_ptr_end = gdb.parse_and_eval("&kernel_config_data_end") 29 + py_config_size = py_config_ptr_end - py_config_ptr 30 30 except gdb.error as e: 31 31 raise gdb.GdbError("Can't find config, enable CONFIG_IKCONFIG?") 32 32

+2 -3

scripts/nsdeps

··· 34 34 local mod=${1%.ko:} 35 35 shift 36 36 local namespaces="$*" 37 - local mod_source_files="`cat $mod.mod | sed -n 1p \ 38 - | sed -e 's/\.o/\.c/g' \ 39 - | sed "s|[^ ]* *|${src_prefix}&|g"`" 37 + local mod_source_files=$(sed "s|^$.*$\.o$|${src_prefix}\1.c|" $mod.mod) 38 + 40 39 for ns in $namespaces; do 41 40 echo "Adding namespace $ns to module $mod.ko." 42 41 generate_deps_for_ns $ns "$mod_source_files"

+1

tools/arch/x86/include/asm/cpufeatures.h

··· 443 443 #define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */ 444 444 #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ 445 445 #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ 446 + #define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ 446 447 447 448 #endif /* _ASM_X86_CPUFEATURES_H */

+25

tools/arch/x86/include/asm/msr-index.h

··· 116 116 * Not susceptible to 117 117 * TSX Async Abort (TAA) vulnerabilities. 118 118 */ 119 + #define ARCH_CAP_SBDR_SSDP_NO BIT(13) /* 120 + * Not susceptible to SBDR and SSDP 121 + * variants of Processor MMIO stale data 122 + * vulnerabilities. 123 + */ 124 + #define ARCH_CAP_FBSDP_NO BIT(14) /* 125 + * Not susceptible to FBSDP variant of 126 + * Processor MMIO stale data 127 + * vulnerabilities. 128 + */ 129 + #define ARCH_CAP_PSDP_NO BIT(15) /* 130 + * Not susceptible to PSDP variant of 131 + * Processor MMIO stale data 132 + * vulnerabilities. 133 + */ 134 + #define ARCH_CAP_FB_CLEAR BIT(17) /* 135 + * VERW clears CPU fill buffer 136 + * even on MDS_NO CPUs. 137 + */ 138 + #define ARCH_CAP_FB_CLEAR_CTRL BIT(18) /* 139 + * MSR_IA32_MCU_OPT_CTRL[FB_CLEAR_DIS] 140 + * bit available to control VERW 141 + * behavior. 142 + */ 119 143 120 144 #define MSR_IA32_FLUSH_CMD 0x0000010b 121 145 #define L1D_FLUSH BIT(0) /* ··· 157 133 #define MSR_IA32_MCU_OPT_CTRL 0x00000123 158 134 #define RNGDS_MITG_DIS BIT(0) /* SRBDS support */ 159 135 #define RTM_ALLOW BIT(1) /* TSX development mode */ 136 + #define FB_CLEAR_DIS BIT(3) /* CPU Fill buffer clear disable */ 160 137 161 138 #define MSR_IA32_SYSENTER_CS 0x00000174 162 139 #define MSR_IA32_SYSENTER_ESP 0x00000175

+36 -13

tools/testing/selftests/kvm/Makefile

··· 37 37 UNAME_M := riscv 38 38 endif 39 39 40 - LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/rbtree.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c 41 - LIBKVM_x86_64 = lib/x86_64/apic.c lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S 42 - LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c lib/aarch64/handlers.S lib/aarch64/spinlock.c lib/aarch64/gic.c lib/aarch64/gic_v3.c lib/aarch64/vgic.c 43 - LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c 44 - LIBKVM_riscv = lib/riscv/processor.c lib/riscv/ucall.c 40 + LIBKVM += lib/assert.c 41 + LIBKVM += lib/elf.c 42 + LIBKVM += lib/guest_modes.c 43 + LIBKVM += lib/io.c 44 + LIBKVM += lib/kvm_util.c 45 + LIBKVM += lib/perf_test_util.c 46 + LIBKVM += lib/rbtree.c 47 + LIBKVM += lib/sparsebit.c 48 + LIBKVM += lib/test_util.c 49 + 50 + LIBKVM_x86_64 += lib/x86_64/apic.c 51 + LIBKVM_x86_64 += lib/x86_64/handlers.S 52 + LIBKVM_x86_64 += lib/x86_64/perf_test_util.c 53 + LIBKVM_x86_64 += lib/x86_64/processor.c 54 + LIBKVM_x86_64 += lib/x86_64/svm.c 55 + LIBKVM_x86_64 += lib/x86_64/ucall.c 56 + LIBKVM_x86_64 += lib/x86_64/vmx.c 57 + 58 + LIBKVM_aarch64 += lib/aarch64/gic.c 59 + LIBKVM_aarch64 += lib/aarch64/gic_v3.c 60 + LIBKVM_aarch64 += lib/aarch64/handlers.S 61 + LIBKVM_aarch64 += lib/aarch64/processor.c 62 + LIBKVM_aarch64 += lib/aarch64/spinlock.c 63 + LIBKVM_aarch64 += lib/aarch64/ucall.c 64 + LIBKVM_aarch64 += lib/aarch64/vgic.c 65 + 66 + LIBKVM_s390x += lib/s390x/diag318_test_handler.c 67 + LIBKVM_s390x += lib/s390x/processor.c 68 + LIBKVM_s390x += lib/s390x/ucall.c 69 + 70 + LIBKVM_riscv += lib/riscv/processor.c 71 + LIBKVM_riscv += lib/riscv/ucall.c 45 72 46 73 TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test 47 74 TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test ··· 200 173 # $(TEST_GEN_PROGS) starts with $(OUTPUT)/ 201 174 include ../lib.mk 202 175 203 - STATIC_LIBS := $(OUTPUT)/libkvm.a 204 176 LIBKVM_C := $(filter %.c,$(LIBKVM)) 205 177 LIBKVM_S := $(filter %.S,$(LIBKVM)) 206 178 LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C)) 207 179 LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S)) 208 - EXTRA_CLEAN += $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(STATIC_LIBS) cscope.* 180 + LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) 181 + 182 + EXTRA_CLEAN += $(LIBKVM_OBJS) cscope.* 209 183 210 184 x := $(shell mkdir -p $(sort $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)))) 211 185 $(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c ··· 215 187 $(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S 216 188 $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ 217 189 218 - LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) 219 - $(OUTPUT)/libkvm.a: $(LIBKVM_OBJS) 220 - $(AR) crs $@ $^ 221 - 222 190 x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS)))) 223 - all: $(STATIC_LIBS) 224 - $(TEST_GEN_PROGS): $(STATIC_LIBS) 191 + $(TEST_GEN_PROGS): $(LIBKVM_OBJS) 225 192 226 193 cscope: include_paths = $(LINUX_TOOL_INCLUDE) $(LINUX_HDR_PATH) include lib .. 227 194 cscope:

+7 -3

tools/testing/selftests/kvm/dirty_log_perf_test.c

··· 336 336 static void help(char *name) 337 337 { 338 338 puts(""); 339 - printf("usage: %s [-h] [-i iterations] [-p offset] [-g]" 340 - "[-m mode] [-b vcpu bytes] [-v vcpus] [-o] [-s mem type]" 339 + printf("usage: %s [-h] [-i iterations] [-p offset] [-g] " 340 + "[-m mode] [-n] [-b vcpu bytes] [-v vcpus] [-o] [-s mem type]" 341 341 "[-x memslots]\n", name); 342 342 puts(""); 343 343 printf(" -i: specify iteration counts (default: %"PRIu64")\n", ··· 351 351 printf(" -p: specify guest physical test memory offset\n" 352 352 " Warning: a low offset can conflict with the loaded test code.\n"); 353 353 guest_modes_help(); 354 + printf(" -n: Run the vCPUs in nested mode (L2)\n"); 354 355 printf(" -b: specify the size of the memory region which should be\n" 355 356 " dirtied by each vCPU. e.g. 10M or 3G.\n" 356 357 " (default: 1G)\n"); ··· 388 387 389 388 guest_modes_append_default(); 390 389 391 - while ((opt = getopt(argc, argv, "ghi:p:m:b:f:v:os:x:")) != -1) { 390 + while ((opt = getopt(argc, argv, "ghi:p:m:nb:f:v:os:x:")) != -1) { 392 391 switch (opt) { 393 392 case 'g': 394 393 dirty_log_manual_caps = 0; ··· 401 400 break; 402 401 case 'm': 403 402 guest_modes_cmdline(optarg); 403 + break; 404 + case 'n': 405 + perf_test_args.nested = true; 404 406 break; 405 407 case 'b': 406 408 guest_percpu_mem_size = parse_size(optarg);

+9

tools/testing/selftests/kvm/include/perf_test_util.h

··· 30 30 31 31 struct perf_test_args { 32 32 struct kvm_vm *vm; 33 + /* The starting address and size of the guest test region. */ 33 34 uint64_t gpa; 35 + uint64_t size; 34 36 uint64_t guest_page_size; 35 37 int wr_fract; 38 + 39 + /* Run vCPUs in L2 instead of L1, if the architecture supports it. */ 40 + bool nested; 36 41 37 42 struct perf_test_vcpu_args vcpu_args[KVM_MAX_VCPUS]; 38 43 }; ··· 54 49 55 50 void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *)); 56 51 void perf_test_join_vcpu_threads(int vcpus); 52 + void perf_test_guest_code(uint32_t vcpu_id); 53 + 54 + uint64_t perf_test_nested_pages(int nr_vcpus); 55 + void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus); 57 56 58 57 #endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */

+16 -9

tools/testing/selftests/kvm/include/x86_64/processor.h

··· 482 482 struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid); 483 483 void vm_xsave_req_perm(int bit); 484 484 485 - enum x86_page_size { 486 - X86_PAGE_SIZE_4K = 0, 487 - X86_PAGE_SIZE_2M, 488 - X86_PAGE_SIZE_1G, 485 + enum pg_level { 486 + PG_LEVEL_NONE, 487 + PG_LEVEL_4K, 488 + PG_LEVEL_2M, 489 + PG_LEVEL_1G, 490 + PG_LEVEL_512G, 491 + PG_LEVEL_NUM 489 492 }; 490 - void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, 491 - enum x86_page_size page_size); 493 + 494 + #define PG_LEVEL_SHIFT(_level) ((_level - 1) * 9 + 12) 495 + #define PG_LEVEL_SIZE(_level) (1ull << PG_LEVEL_SHIFT(_level)) 496 + 497 + #define PG_SIZE_4K PG_LEVEL_SIZE(PG_LEVEL_4K) 498 + #define PG_SIZE_2M PG_LEVEL_SIZE(PG_LEVEL_2M) 499 + #define PG_SIZE_1G PG_LEVEL_SIZE(PG_LEVEL_1G) 500 + 501 + void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level); 492 502 493 503 /* 494 504 * Basic CPU control in CR0 ··· 514 504 #define X86_CR0_NW (1UL<<29) /* Not Write-through */ 515 505 #define X86_CR0_CD (1UL<<30) /* Cache Disable */ 516 506 #define X86_CR0_PG (1UL<<31) /* Paging */ 517 - 518 - /* VMX_EPT_VPID_CAP bits */ 519 - #define VMX_EPT_VPID_CAP_AD_BITS (1ULL << 21) 520 507 521 508 #define XSTATE_XTILE_CFG_BIT 17 522 509 #define XSTATE_XTILE_DATA_BIT 18

+6

tools/testing/selftests/kvm/include/x86_64/vmx.h

··· 96 96 #define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f 97 97 #define VMX_MISC_SAVE_EFER_LMA 0x00000020 98 98 99 + #define VMX_EPT_VPID_CAP_1G_PAGES 0x00020000 100 + #define VMX_EPT_VPID_CAP_AD_BITS 0x00200000 101 + 99 102 #define EXIT_REASON_FAILED_VMENTRY 0x80000000 100 103 #define EXIT_REASON_EXCEPTION_NMI 0 101 104 #define EXIT_REASON_EXTERNAL_INTERRUPT 1 ··· 609 606 610 607 bool nested_vmx_supported(void); 611 608 void nested_vmx_check_supported(void); 609 + bool ept_1g_pages_supported(void); 612 610 613 611 void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, 614 612 uint64_t nested_paddr, uint64_t paddr); ··· 617 613 uint64_t nested_paddr, uint64_t paddr, uint64_t size); 618 614 void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, 619 615 uint32_t memslot); 616 + void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm, 617 + uint64_t addr, uint64_t size); 620 618 void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, 621 619 uint32_t eptp_memslot); 622 620 void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm);

+45 -8

tools/testing/selftests/kvm/lib/perf_test_util.c

··· 40 40 * Continuously write to the first 8 bytes of each page in the 41 41 * specified region. 42 42 */ 43 - static void guest_code(uint32_t vcpu_id) 43 + void perf_test_guest_code(uint32_t vcpu_id) 44 44 { 45 45 struct perf_test_args *pta = &perf_test_args; 46 46 struct perf_test_vcpu_args *vcpu_args = &pta->vcpu_args[vcpu_id]; ··· 108 108 { 109 109 struct perf_test_args *pta = &perf_test_args; 110 110 struct kvm_vm *vm; 111 - uint64_t guest_num_pages; 111 + uint64_t guest_num_pages, slot0_pages = DEFAULT_GUEST_PHY_PAGES; 112 112 uint64_t backing_src_pagesz = get_backing_src_pagesz(backing_src); 113 + uint64_t region_end_gfn; 113 114 int i; 114 115 115 116 pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); ··· 136 135 slots); 137 136 138 137 /* 138 + * If using nested, allocate extra pages for the nested page tables and 139 + * in-memory data structures. 140 + */ 141 + if (pta->nested) 142 + slot0_pages += perf_test_nested_pages(vcpus); 143 + 144 + /* 139 145 * Pass guest_num_pages to populate the page tables for test memory. 140 146 * The memory is also added to memslot 0, but that's a benign side 141 147 * effect as KVM allows aliasing HVAs in meslots. 142 148 */ 143 - vm = vm_create_with_vcpus(mode, vcpus, DEFAULT_GUEST_PHY_PAGES, 144 - guest_num_pages, 0, guest_code, NULL); 149 + vm = vm_create_with_vcpus(mode, vcpus, slot0_pages, guest_num_pages, 0, 150 + perf_test_guest_code, NULL); 145 151 146 152 pta->vm = vm; 147 153 154 + /* Put the test region at the top guest physical memory. */ 155 + region_end_gfn = vm_get_max_gfn(vm) + 1; 156 + 157 + #ifdef __x86_64__ 158 + /* 159 + * When running vCPUs in L2, restrict the test region to 48 bits to 160 + * avoid needing 5-level page tables to identity map L2. 161 + */ 162 + if (pta->nested) 163 + region_end_gfn = min(region_end_gfn, (1UL << 48) / pta->guest_page_size); 164 + #endif 148 165 /* 149 166 * If there should be more memory in the guest test region than there 150 167 * can be pages in the guest, it will definitely cause problems. 151 168 */ 152 - TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm), 169 + TEST_ASSERT(guest_num_pages < region_end_gfn, 153 170 "Requested more guest memory than address space allows.\n" 154 171 " guest pages: %" PRIx64 " max gfn: %" PRIx64 155 172 " vcpus: %d wss: %" PRIx64 "]\n", 156 - guest_num_pages, vm_get_max_gfn(vm), vcpus, 173 + guest_num_pages, region_end_gfn - 1, vcpus, 157 174 vcpu_memory_bytes); 158 175 159 - pta->gpa = (vm_get_max_gfn(vm) - guest_num_pages) * pta->guest_page_size; 176 + pta->gpa = (region_end_gfn - guest_num_pages) * pta->guest_page_size; 160 177 pta->gpa = align_down(pta->gpa, backing_src_pagesz); 161 178 #ifdef __s390x__ 162 179 /* Align to 1M (segment size) */ 163 180 pta->gpa = align_down(pta->gpa, 1 << 20); 164 181 #endif 165 - pr_info("guest physical test memory offset: 0x%lx\n", pta->gpa); 182 + pta->size = guest_num_pages * pta->guest_page_size; 183 + pr_info("guest physical test memory: [0x%lx, 0x%lx)\n", 184 + pta->gpa, pta->gpa + pta->size); 166 185 167 186 /* Add extra memory slots for testing */ 168 187 for (i = 0; i < slots; i++) { ··· 198 177 virt_map(vm, guest_test_virt_mem, pta->gpa, guest_num_pages); 199 178 200 179 perf_test_setup_vcpus(vm, vcpus, vcpu_memory_bytes, partition_vcpu_memory_access); 180 + 181 + if (pta->nested) { 182 + pr_info("Configuring vCPUs to run in L2 (nested).\n"); 183 + perf_test_setup_nested(vm, vcpus); 184 + } 201 185 202 186 ucall_init(vm, NULL); 203 187 ··· 222 196 { 223 197 perf_test_args.wr_fract = wr_fract; 224 198 sync_global_to_guest(vm, perf_test_args); 199 + } 200 + 201 + uint64_t __weak perf_test_nested_pages(int nr_vcpus) 202 + { 203 + return 0; 204 + } 205 + 206 + void __weak perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus) 207 + { 208 + pr_info("%s() not support on this architecture, skipping.\n", __func__); 209 + exit(KSFT_SKIP); 225 210 } 226 211 227 212 static void *vcpu_thread_main(void *data)

+112

tools/testing/selftests/kvm/lib/x86_64/perf_test_util.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * x86_64-specific extensions to perf_test_util.c. 4 + * 5 + * Copyright (C) 2022, Google, Inc. 6 + */ 7 + #include <stdio.h> 8 + #include <stdlib.h> 9 + #include <linux/bitmap.h> 10 + #include <linux/bitops.h> 11 + 12 + #include "test_util.h" 13 + #include "kvm_util.h" 14 + #include "perf_test_util.h" 15 + #include "../kvm_util_internal.h" 16 + #include "processor.h" 17 + #include "vmx.h" 18 + 19 + void perf_test_l2_guest_code(uint64_t vcpu_id) 20 + { 21 + perf_test_guest_code(vcpu_id); 22 + vmcall(); 23 + } 24 + 25 + extern char perf_test_l2_guest_entry[]; 26 + __asm__( 27 + "perf_test_l2_guest_entry:" 28 + " mov (%rsp), %rdi;" 29 + " call perf_test_l2_guest_code;" 30 + " ud2;" 31 + ); 32 + 33 + static void perf_test_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id) 34 + { 35 + #define L2_GUEST_STACK_SIZE 64 36 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; 37 + unsigned long *rsp; 38 + 39 + GUEST_ASSERT(vmx->vmcs_gpa); 40 + GUEST_ASSERT(prepare_for_vmx_operation(vmx)); 41 + GUEST_ASSERT(load_vmcs(vmx)); 42 + GUEST_ASSERT(ept_1g_pages_supported()); 43 + 44 + rsp = &l2_guest_stack[L2_GUEST_STACK_SIZE - 1]; 45 + *rsp = vcpu_id; 46 + prepare_vmcs(vmx, perf_test_l2_guest_entry, rsp); 47 + 48 + GUEST_ASSERT(!vmlaunch()); 49 + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); 50 + GUEST_DONE(); 51 + } 52 + 53 + uint64_t perf_test_nested_pages(int nr_vcpus) 54 + { 55 + /* 56 + * 513 page tables is enough to identity-map 256 TiB of L2 with 1G 57 + * pages and 4-level paging, plus a few pages per-vCPU for data 58 + * structures such as the VMCS. 59 + */ 60 + return 513 + 10 * nr_vcpus; 61 + } 62 + 63 + void perf_test_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm) 64 + { 65 + uint64_t start, end; 66 + 67 + prepare_eptp(vmx, vm, 0); 68 + 69 + /* 70 + * Identity map the first 4G and the test region with 1G pages so that 71 + * KVM can shadow the EPT12 with the maximum huge page size supported 72 + * by the backing source. 73 + */ 74 + nested_identity_map_1g(vmx, vm, 0, 0x100000000ULL); 75 + 76 + start = align_down(perf_test_args.gpa, PG_SIZE_1G); 77 + end = align_up(perf_test_args.gpa + perf_test_args.size, PG_SIZE_1G); 78 + nested_identity_map_1g(vmx, vm, start, end - start); 79 + } 80 + 81 + void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus) 82 + { 83 + struct vmx_pages *vmx, *vmx0 = NULL; 84 + struct kvm_regs regs; 85 + vm_vaddr_t vmx_gva; 86 + int vcpu_id; 87 + 88 + nested_vmx_check_supported(); 89 + 90 + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { 91 + vmx = vcpu_alloc_vmx(vm, &vmx_gva); 92 + 93 + if (vcpu_id == 0) { 94 + perf_test_setup_ept(vmx, vm); 95 + vmx0 = vmx; 96 + } else { 97 + /* Share the same EPT table across all vCPUs. */ 98 + vmx->eptp = vmx0->eptp; 99 + vmx->eptp_hva = vmx0->eptp_hva; 100 + vmx->eptp_gpa = vmx0->eptp_gpa; 101 + } 102 + 103 + /* 104 + * Override the vCPU to run perf_test_l1_guest_code() which will 105 + * bounce it into L2 before calling perf_test_guest_code(). 106 + */ 107 + vcpu_regs_get(vm, vcpu_id, &regs); 108 + regs.rip = (unsigned long) perf_test_l1_guest_code; 109 + vcpu_regs_set(vm, vcpu_id, &regs); 110 + vcpu_args_set(vm, vcpu_id, 2, vmx_gva, vcpu_id); 111 + } 112 + }

+15 -16

tools/testing/selftests/kvm/lib/x86_64/processor.c

··· 158 158 int level) 159 159 { 160 160 uint64_t *page_table = addr_gpa2hva(vm, pt_pfn << vm->page_shift); 161 - int index = vaddr >> (vm->page_shift + level * 9) & 0x1ffu; 161 + int index = (vaddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu; 162 162 163 163 return &page_table[index]; 164 164 } ··· 167 167 uint64_t pt_pfn, 168 168 uint64_t vaddr, 169 169 uint64_t paddr, 170 - int level, 171 - enum x86_page_size page_size) 170 + int current_level, 171 + int target_level) 172 172 { 173 - uint64_t *pte = virt_get_pte(vm, pt_pfn, vaddr, level); 173 + uint64_t *pte = virt_get_pte(vm, pt_pfn, vaddr, current_level); 174 174 175 175 if (!(*pte & PTE_PRESENT_MASK)) { 176 176 *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK; 177 - if (level == page_size) 177 + if (current_level == target_level) 178 178 *pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK); 179 179 else 180 180 *pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK; ··· 184 184 * a hugepage at this level, and that there isn't a hugepage at 185 185 * this level. 186 186 */ 187 - TEST_ASSERT(level != page_size, 187 + TEST_ASSERT(current_level != target_level, 188 188 "Cannot create hugepage at level: %u, vaddr: 0x%lx\n", 189 - page_size, vaddr); 189 + current_level, vaddr); 190 190 TEST_ASSERT(!(*pte & PTE_LARGE_MASK), 191 191 "Cannot create page table at level: %u, vaddr: 0x%lx\n", 192 - level, vaddr); 192 + current_level, vaddr); 193 193 } 194 194 return pte; 195 195 } 196 196 197 - void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, 198 - enum x86_page_size page_size) 197 + void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level) 199 198 { 200 - const uint64_t pg_size = 1ull << ((page_size * 9) + 12); 199 + const uint64_t pg_size = PG_LEVEL_SIZE(level); 201 200 uint64_t *pml4e, *pdpe, *pde; 202 201 uint64_t *pte; 203 202 ··· 221 222 * early if a hugepage was created. 222 223 */ 223 224 pml4e = virt_create_upper_pte(vm, vm->pgd >> vm->page_shift, 224 - vaddr, paddr, 3, page_size); 225 + vaddr, paddr, PG_LEVEL_512G, level); 225 226 if (*pml4e & PTE_LARGE_MASK) 226 227 return; 227 228 228 - pdpe = virt_create_upper_pte(vm, PTE_GET_PFN(*pml4e), vaddr, paddr, 2, page_size); 229 + pdpe = virt_create_upper_pte(vm, PTE_GET_PFN(*pml4e), vaddr, paddr, PG_LEVEL_1G, level); 229 230 if (*pdpe & PTE_LARGE_MASK) 230 231 return; 231 232 232 - pde = virt_create_upper_pte(vm, PTE_GET_PFN(*pdpe), vaddr, paddr, 1, page_size); 233 + pde = virt_create_upper_pte(vm, PTE_GET_PFN(*pdpe), vaddr, paddr, PG_LEVEL_2M, level); 233 234 if (*pde & PTE_LARGE_MASK) 234 235 return; 235 236 236 237 /* Fill in page table entry. */ 237 - pte = virt_get_pte(vm, PTE_GET_PFN(*pde), vaddr, 0); 238 + pte = virt_get_pte(vm, PTE_GET_PFN(*pde), vaddr, PG_LEVEL_4K); 238 239 TEST_ASSERT(!(*pte & PTE_PRESENT_MASK), 239 240 "PTE already present for 4k page at vaddr: 0x%lx\n", vaddr); 240 241 *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK); ··· 242 243 243 244 void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) 244 245 { 245 - __virt_pg_map(vm, vaddr, paddr, X86_PAGE_SIZE_4K); 246 + __virt_pg_map(vm, vaddr, paddr, PG_LEVEL_4K); 246 247 } 247 248 248 249 static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid,

+94 -57

tools/testing/selftests/kvm/lib/x86_64/vmx.c

··· 198 198 return true; 199 199 } 200 200 201 + static bool ept_vpid_cap_supported(uint64_t mask) 202 + { 203 + return rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & mask; 204 + } 205 + 206 + bool ept_1g_pages_supported(void) 207 + { 208 + return ept_vpid_cap_supported(VMX_EPT_VPID_CAP_1G_PAGES); 209 + } 210 + 201 211 /* 202 212 * Initialize the control fields to the most basic settings possible. 203 213 */ ··· 225 215 struct eptPageTablePointer eptp = { 226 216 .memory_type = VMX_BASIC_MEM_TYPE_WB, 227 217 .page_walk_length = 3, /* + 1 */ 228 - .ad_enabled = !!(rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & VMX_EPT_VPID_CAP_AD_BITS), 218 + .ad_enabled = ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS), 229 219 .address = vmx->eptp_gpa >> PAGE_SHIFT_4K, 230 220 }; 231 221 ··· 402 392 } 403 393 } 404 394 405 - void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, 406 - uint64_t nested_paddr, uint64_t paddr) 395 + static void nested_create_pte(struct kvm_vm *vm, 396 + struct eptPageTableEntry *pte, 397 + uint64_t nested_paddr, 398 + uint64_t paddr, 399 + int current_level, 400 + int target_level) 407 401 { 408 - uint16_t index[4]; 409 - struct eptPageTableEntry *pml4e; 402 + if (!pte->readable) { 403 + pte->writable = true; 404 + pte->readable = true; 405 + pte->executable = true; 406 + pte->page_size = (current_level == target_level); 407 + if (pte->page_size) 408 + pte->address = paddr >> vm->page_shift; 409 + else 410 + pte->address = vm_alloc_page_table(vm) >> vm->page_shift; 411 + } else { 412 + /* 413 + * Entry already present. Assert that the caller doesn't want 414 + * a hugepage at this level, and that there isn't a hugepage at 415 + * this level. 416 + */ 417 + TEST_ASSERT(current_level != target_level, 418 + "Cannot create hugepage at level: %u, nested_paddr: 0x%lx\n", 419 + current_level, nested_paddr); 420 + TEST_ASSERT(!pte->page_size, 421 + "Cannot create page table at level: %u, nested_paddr: 0x%lx\n", 422 + current_level, nested_paddr); 423 + } 424 + } 425 + 426 + 427 + void __nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, 428 + uint64_t nested_paddr, uint64_t paddr, int target_level) 429 + { 430 + const uint64_t page_size = PG_LEVEL_SIZE(target_level); 431 + struct eptPageTableEntry *pt = vmx->eptp_hva, *pte; 432 + uint16_t index; 410 433 411 434 TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " 412 435 "unknown or unsupported guest mode, mode: 0x%x", vm->mode); 413 436 414 - TEST_ASSERT((nested_paddr % vm->page_size) == 0, 437 + TEST_ASSERT((nested_paddr >> 48) == 0, 438 + "Nested physical address 0x%lx requires 5-level paging", 439 + nested_paddr); 440 + TEST_ASSERT((nested_paddr % page_size) == 0, 415 441 "Nested physical address not on page boundary,\n" 416 - " nested_paddr: 0x%lx vm->page_size: 0x%x", 417 - nested_paddr, vm->page_size); 442 + " nested_paddr: 0x%lx page_size: 0x%lx", 443 + nested_paddr, page_size); 418 444 TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn, 419 445 "Physical address beyond beyond maximum supported,\n" 420 446 " nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", 421 447 paddr, vm->max_gfn, vm->page_size); 422 - TEST_ASSERT((paddr % vm->page_size) == 0, 448 + TEST_ASSERT((paddr % page_size) == 0, 423 449 "Physical address not on page boundary,\n" 424 - " paddr: 0x%lx vm->page_size: 0x%x", 425 - paddr, vm->page_size); 450 + " paddr: 0x%lx page_size: 0x%lx", 451 + paddr, page_size); 426 452 TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, 427 453 "Physical address beyond beyond maximum supported,\n" 428 454 " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", 429 455 paddr, vm->max_gfn, vm->page_size); 430 456 431 - index[0] = (nested_paddr >> 12) & 0x1ffu; 432 - index[1] = (nested_paddr >> 21) & 0x1ffu; 433 - index[2] = (nested_paddr >> 30) & 0x1ffu; 434 - index[3] = (nested_paddr >> 39) & 0x1ffu; 457 + for (int level = PG_LEVEL_512G; level >= PG_LEVEL_4K; level--) { 458 + index = (nested_paddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu; 459 + pte = &pt[index]; 435 460 436 - /* Allocate page directory pointer table if not present. */ 437 - pml4e = vmx->eptp_hva; 438 - if (!pml4e[index[3]].readable) { 439 - pml4e[index[3]].address = vm_alloc_page_table(vm) >> vm->page_shift; 440 - pml4e[index[3]].writable = true; 441 - pml4e[index[3]].readable = true; 442 - pml4e[index[3]].executable = true; 461 + nested_create_pte(vm, pte, nested_paddr, paddr, level, target_level); 462 + 463 + if (pte->page_size) 464 + break; 465 + 466 + pt = addr_gpa2hva(vm, pte->address * vm->page_size); 443 467 } 444 - 445 - /* Allocate page directory table if not present. */ 446 - struct eptPageTableEntry *pdpe; 447 - pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size); 448 - if (!pdpe[index[2]].readable) { 449 - pdpe[index[2]].address = vm_alloc_page_table(vm) >> vm->page_shift; 450 - pdpe[index[2]].writable = true; 451 - pdpe[index[2]].readable = true; 452 - pdpe[index[2]].executable = true; 453 - } 454 - 455 - /* Allocate page table if not present. */ 456 - struct eptPageTableEntry *pde; 457 - pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size); 458 - if (!pde[index[1]].readable) { 459 - pde[index[1]].address = vm_alloc_page_table(vm) >> vm->page_shift; 460 - pde[index[1]].writable = true; 461 - pde[index[1]].readable = true; 462 - pde[index[1]].executable = true; 463 - } 464 - 465 - /* Fill in page table entry. */ 466 - struct eptPageTableEntry *pte; 467 - pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size); 468 - pte[index[0]].address = paddr >> vm->page_shift; 469 - pte[index[0]].writable = true; 470 - pte[index[0]].readable = true; 471 - pte[index[0]].executable = true; 472 468 473 469 /* 474 470 * For now mark these as accessed and dirty because the only 475 471 * testcase we have needs that. Can be reconsidered later. 476 472 */ 477 - pte[index[0]].accessed = true; 478 - pte[index[0]].dirty = true; 473 + pte->accessed = true; 474 + pte->dirty = true; 475 + 476 + } 477 + 478 + void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, 479 + uint64_t nested_paddr, uint64_t paddr) 480 + { 481 + __nested_pg_map(vmx, vm, nested_paddr, paddr, PG_LEVEL_4K); 479 482 } 480 483 481 484 /* ··· 499 476 * nested_paddr - Nested guest physical address to map 500 477 * paddr - VM Physical Address 501 478 * size - The size of the range to map 502 - * eptp_memslot - Memory region slot for new virtual translation tables 479 + * level - The level at which to map the range 503 480 * 504 481 * Output Args: None 505 482 * ··· 508 485 * Within the VM given by vm, creates a nested guest translation for the 509 486 * page range starting at nested_paddr to the page range starting at paddr. 510 487 */ 511 - void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, 512 - uint64_t nested_paddr, uint64_t paddr, uint64_t size) 488 + void __nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, 489 + uint64_t nested_paddr, uint64_t paddr, uint64_t size, 490 + int level) 513 491 { 514 - size_t page_size = vm->page_size; 492 + size_t page_size = PG_LEVEL_SIZE(level); 515 493 size_t npages = size / page_size; 516 494 517 495 TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow"); 518 496 TEST_ASSERT(paddr + size > paddr, "Paddr overflow"); 519 497 520 498 while (npages--) { 521 - nested_pg_map(vmx, vm, nested_paddr, paddr); 499 + __nested_pg_map(vmx, vm, nested_paddr, paddr, level); 522 500 nested_paddr += page_size; 523 501 paddr += page_size; 524 502 } 503 + } 504 + 505 + void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm, 506 + uint64_t nested_paddr, uint64_t paddr, uint64_t size) 507 + { 508 + __nested_map(vmx, vm, nested_paddr, paddr, size, PG_LEVEL_4K); 525 509 } 526 510 527 511 /* Prepare an identity extended page table that maps all the ··· 553 523 (uint64_t)i << vm->page_shift, 554 524 1 << vm->page_shift); 555 525 } 526 + } 527 + 528 + /* Identity map a region with 1GiB Pages. */ 529 + void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm, 530 + uint64_t addr, uint64_t size) 531 + { 532 + __nested_map(vmx, vm, addr, addr, size, PG_LEVEL_1G); 556 533 } 557 534 558 535 void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,

+1 -1

tools/testing/selftests/kvm/max_guest_memory_test.c

··· 244 244 #ifdef __x86_64__ 245 245 /* Identity map memory in the guest using 1gb pages. */ 246 246 for (i = 0; i < slot_size; i += size_1gb) 247 - __virt_pg_map(vm, gpa + i, gpa + i, X86_PAGE_SIZE_1G); 247 + __virt_pg_map(vm, gpa + i, gpa + i, PG_LEVEL_1G); 248 248 #else 249 249 for (i = 0; i < slot_size; i += vm_get_page_size(vm)) 250 250 virt_pg_map(vm, gpa + i, gpa + i);

+1 -1

tools/testing/selftests/kvm/x86_64/mmu_role_test.c

··· 35 35 run = vcpu_state(vm, VCPU_ID); 36 36 37 37 /* Map 1gb page without a backing memlot. */ 38 - __virt_pg_map(vm, MMIO_GPA, MMIO_GPA, X86_PAGE_SIZE_1G); 38 + __virt_pg_map(vm, MMIO_GPA, MMIO_GPA, PG_LEVEL_1G); 39 39 40 40 r = _vcpu_run(vm, VCPU_ID); 41 41

-1

tools/testing/selftests/net/.gitignore

··· 37 37 ioam6_parser 38 38 toeplitz 39 39 cmsg_sender 40 - bind_bhash_test

-2

tools/testing/selftests/net/Makefile

··· 59 59 TEST_GEN_FILES += cmsg_sender 60 60 TEST_GEN_FILES += stress_reuseport_listen 61 61 TEST_PROGS += test_vxlan_vnifiltering.sh 62 - TEST_GEN_FILES += bind_bhash_test 63 62 64 63 TEST_FILES := settings 65 64 ··· 69 70 70 71 $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma 71 72 $(OUTPUT)/tcp_mmap: LDLIBS += -lpthread 72 - $(OUTPUT)/bind_bhash_test: LDLIBS += -lpthread 73 73 $(OUTPUT)/tcp_inq: LDLIBS += -lpthread

-119

tools/testing/selftests/net/bind_bhash_test.c

··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - /* 3 - * This times how long it takes to bind to a port when the port already 4 - * has multiple sockets in its bhash table. 5 - * 6 - * In the setup(), we populate the port's bhash table with 7 - * MAX_THREADS * MAX_CONNECTIONS number of entries. 8 - */ 9 - 10 - #include <unistd.h> 11 - #include <stdio.h> 12 - #include <netdb.h> 13 - #include <pthread.h> 14 - 15 - #define MAX_THREADS 600 16 - #define MAX_CONNECTIONS 40 17 - 18 - static const char *bind_addr = "::1"; 19 - static const char *port; 20 - 21 - static int fd_array[MAX_THREADS][MAX_CONNECTIONS]; 22 - 23 - static int bind_socket(int opt, const char *addr) 24 - { 25 - struct addrinfo *res, hint = {}; 26 - int sock_fd, reuse = 1, err; 27 - 28 - sock_fd = socket(AF_INET6, SOCK_STREAM, 0); 29 - if (sock_fd < 0) { 30 - perror("socket fd err"); 31 - return -1; 32 - } 33 - 34 - hint.ai_family = AF_INET6; 35 - hint.ai_socktype = SOCK_STREAM; 36 - 37 - err = getaddrinfo(addr, port, &hint, &res); 38 - if (err) { 39 - perror("getaddrinfo failed"); 40 - return -1; 41 - } 42 - 43 - if (opt) { 44 - err = setsockopt(sock_fd, SOL_SOCKET, opt, &reuse, sizeof(reuse)); 45 - if (err) { 46 - perror("setsockopt failed"); 47 - return -1; 48 - } 49 - } 50 - 51 - err = bind(sock_fd, res->ai_addr, res->ai_addrlen); 52 - if (err) { 53 - perror("failed to bind to port"); 54 - return -1; 55 - } 56 - 57 - return sock_fd; 58 - } 59 - 60 - static void *setup(void *arg) 61 - { 62 - int sock_fd, i; 63 - int *array = (int *)arg; 64 - 65 - for (i = 0; i < MAX_CONNECTIONS; i++) { 66 - sock_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, bind_addr); 67 - if (sock_fd < 0) 68 - return NULL; 69 - array[i] = sock_fd; 70 - } 71 - 72 - return NULL; 73 - } 74 - 75 - int main(int argc, const char *argv[]) 76 - { 77 - int listener_fd, sock_fd, i, j; 78 - pthread_t tid[MAX_THREADS]; 79 - clock_t begin, end; 80 - 81 - if (argc != 2) { 82 - printf("Usage: listener <port>\n"); 83 - return -1; 84 - } 85 - 86 - port = argv[1]; 87 - 88 - listener_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, bind_addr); 89 - if (listen(listener_fd, 100) < 0) { 90 - perror("listen failed"); 91 - return -1; 92 - } 93 - 94 - /* Set up threads to populate the bhash table entry for the port */ 95 - for (i = 0; i < MAX_THREADS; i++) 96 - pthread_create(&tid[i], NULL, setup, fd_array[i]); 97 - 98 - for (i = 0; i < MAX_THREADS; i++) 99 - pthread_join(tid[i], NULL); 100 - 101 - begin = clock(); 102 - 103 - /* Bind to the same port on a different address */ 104 - sock_fd = bind_socket(0, "2001:0db8:0:f101::1"); 105 - 106 - end = clock(); 107 - 108 - printf("time spent = %f\n", (double)(end - begin) / CLOCKS_PER_SEC); 109 - 110 - /* clean up */ 111 - close(sock_fd); 112 - close(listener_fd); 113 - for (i = 0; i < MAX_THREADS; i++) { 114 - for (j = 0; i < MAX_THREADS; i++) 115 - close(fd_array[i][j]); 116 - } 117 - 118 - return 0; 119 - }

+13 -15

tools/testing/selftests/wireguard/qemu/Makefile

··· 64 64 ifeq ($(HOST_ARCH),$(ARCH)) 65 65 QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm 66 66 else 67 - QEMU_MACHINE := -cpu cortex-a53 -machine virt 68 - CFLAGS += -march=armv8-a -mtune=cortex-a53 67 + QEMU_MACHINE := -cpu max -machine virt 68 + CFLAGS += -march=armv8-a 69 69 endif 70 70 else ifeq ($(ARCH),aarch64_be) 71 71 CHOST := aarch64_be-linux-musl ··· 76 76 ifeq ($(HOST_ARCH),$(ARCH)) 77 77 QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm 78 78 else 79 - QEMU_MACHINE := -cpu cortex-a53 -machine virt 80 - CFLAGS += -march=armv8-a -mtune=cortex-a53 79 + QEMU_MACHINE := -cpu max -machine virt 80 + CFLAGS += -march=armv8-a 81 81 endif 82 82 else ifeq ($(ARCH),arm) 83 83 CHOST := arm-linux-musleabi ··· 88 88 ifeq ($(HOST_ARCH),$(ARCH)) 89 89 QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm 90 90 else 91 - QEMU_MACHINE := -cpu cortex-a15 -machine virt 92 - CFLAGS += -march=armv7-a -mtune=cortex-a15 -mabi=aapcs-linux 91 + QEMU_MACHINE := -cpu max -machine virt 92 + CFLAGS += -march=armv7-a -mabi=aapcs-linux 93 93 endif 94 94 else ifeq ($(ARCH),armeb) 95 95 CHOST := armeb-linux-musleabi ··· 100 100 ifeq ($(HOST_ARCH),$(ARCH)) 101 101 QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm 102 102 else 103 - QEMU_MACHINE := -cpu cortex-a15 -machine virt 104 - CFLAGS += -march=armv7-a -mabi=aapcs-linux # We don't pass -mtune=cortex-a15 due to a compiler bug on big endian. 103 + QEMU_MACHINE := -cpu max -machine virt 104 + CFLAGS += -march=armv7-a -mabi=aapcs-linux 105 105 LDFLAGS += -Wl,--be8 106 106 endif 107 107 else ifeq ($(ARCH),x86_64) ··· 112 112 ifeq ($(HOST_ARCH),$(ARCH)) 113 113 QEMU_MACHINE := -cpu host -machine q35,accel=kvm 114 114 else 115 - QEMU_MACHINE := -cpu Skylake-Server -machine q35 116 - CFLAGS += -march=skylake-avx512 115 + QEMU_MACHINE := -cpu max -machine q35 117 116 endif 118 117 else ifeq ($(ARCH),i686) 119 118 CHOST := i686-linux-musl ··· 122 123 ifeq ($(subst x86_64,i686,$(HOST_ARCH)),$(ARCH)) 123 124 QEMU_MACHINE := -cpu host -machine q35,accel=kvm 124 125 else 125 - QEMU_MACHINE := -cpu coreduo -machine q35 126 - CFLAGS += -march=prescott 126 + QEMU_MACHINE := -cpu max -machine q35 127 127 endif 128 128 else ifeq ($(ARCH),mips64) 129 129 CHOST := mips64-linux-musl ··· 180 182 ifeq ($(HOST_ARCH),$(ARCH)) 181 183 QEMU_MACHINE := -cpu host,accel=kvm -machine pseries 182 184 else 183 - QEMU_MACHINE := -machine pseries 185 + QEMU_MACHINE := -machine pseries -device spapr-rng,rng=rng -object rng-random,id=rng 184 186 endif 185 187 else ifeq ($(ARCH),powerpc64le) 186 188 CHOST := powerpc64le-linux-musl ··· 190 192 ifeq ($(HOST_ARCH),$(ARCH)) 191 193 QEMU_MACHINE := -cpu host,accel=kvm -machine pseries 192 194 else 193 - QEMU_MACHINE := -machine pseries 195 + QEMU_MACHINE := -machine pseries -device spapr-rng,rng=rng -object rng-random,id=rng 194 196 endif 195 197 else ifeq ($(ARCH),powerpc) 196 198 CHOST := powerpc-linux-musl ··· 245 247 ifeq ($(HOST_ARCH),$(ARCH)) 246 248 QEMU_MACHINE := -cpu host,accel=kvm -machine s390-ccw-virtio -append $(KERNEL_CMDLINE) 247 249 else 248 - QEMU_MACHINE := -machine s390-ccw-virtio -append $(KERNEL_CMDLINE) 250 + QEMU_MACHINE := -cpu max -machine s390-ccw-virtio -append $(KERNEL_CMDLINE) 249 251 endif 250 252 else 251 253 $(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64, powerpc64le, powerpc, m68k, riscv64, riscv32, s390x)

+3

tools/testing/selftests/wireguard/qemu/init.c

··· 21 21 #include <sys/utsname.h> 22 22 #include <sys/sendfile.h> 23 23 #include <sys/sysmacros.h> 24 + #include <sys/random.h> 24 25 #include <linux/random.h> 25 26 #include <linux/version.h> 26 27 ··· 59 58 { 60 59 int bits = 256, fd; 61 60 61 + if (!getrandom(NULL, 0, GRND_NONBLOCK)) 62 + return; 62 63 pretty_message("[+] Fake seeding RNG..."); 63 64 fd = open("/dev/random", O_WRONLY); 64 65 if (fd < 0)

+3

tools/testing/selftests/wireguard/qemu/kernel.config

··· 31 31 CONFIG_BINFMT_ELF=y 32 32 CONFIG_BINFMT_SCRIPT=y 33 33 CONFIG_VDSO=y 34 + CONFIG_STRICT_KERNEL_RWX=y 34 35 CONFIG_VIRTUALIZATION=y 35 36 CONFIG_HYPERVISOR_GUEST=y 36 37 CONFIG_PARAVIRT=y ··· 66 65 CONFIG_PROC_SYSCTL=y 67 66 CONFIG_SYSFS=y 68 67 CONFIG_TMPFS=y 68 + CONFIG_RANDOM_TRUST_CPU=y 69 + CONFIG_RANDOM_TRUST_BOOTLOADER=y 69 70 CONFIG_CONSOLE_LOGLEVEL_DEFAULT=15 70 71 CONFIG_LOG_BUF_SHIFT=18 71 72 CONFIG_PRINTK_TIME=y

+6 -2

virt/kvm/kvm_main.c

··· 3328 3328 3329 3329 vcpu->stat.generic.blocking = 1; 3330 3330 3331 + preempt_disable(); 3331 3332 kvm_arch_vcpu_blocking(vcpu); 3332 - 3333 3333 prepare_to_rcuwait(wait); 3334 + preempt_enable(); 3335 + 3334 3336 for (;;) { 3335 3337 set_current_state(TASK_INTERRUPTIBLE); 3336 3338 ··· 3342 3340 waited = true; 3343 3341 schedule(); 3344 3342 } 3345 - finish_rcuwait(wait); 3346 3343 3344 + preempt_disable(); 3345 + finish_rcuwait(wait); 3347 3346 kvm_arch_vcpu_unblocking(vcpu); 3347 + preempt_enable(); 3348 3348 3349 3349 vcpu->stat.generic.blocking = 0; 3350 3350