Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net

+4

.mailmap

··· 108 108 Jason Gunthorpe <jgg@ziepe.ca> <jgunthorpe@obsidianresearch.com> 109 109 Javi Merino <javi.merino@kernel.org> <javi.merino@arm.com> 110 110 <javier@osg.samsung.com> <javier.martinez@collabora.co.uk> 111 + Jayachandran C <c.jayachandran@gmail.com> <jayachandranc@netlogicmicro.com> 112 + Jayachandran C <c.jayachandran@gmail.com> <jchandra@broadcom.com> 113 + Jayachandran C <c.jayachandran@gmail.com> <jchandra@digeo.com> 114 + Jayachandran C <c.jayachandran@gmail.com> <jnair@caviumnetworks.com> 111 115 Jean Tourrilhes <jt@hpl.hp.com> 112 116 <jean-philippe@linaro.org> <jean-philippe.brucker@arm.com> 113 117 Jeff Garzik <jgarzik@pretzel.yyz.us>

+2

Documentation/ABI/testing/sysfs-devices-system-cpu

··· 486 486 /sys/devices/system/cpu/vulnerabilities/spec_store_bypass 487 487 /sys/devices/system/cpu/vulnerabilities/l1tf 488 488 /sys/devices/system/cpu/vulnerabilities/mds 489 + /sys/devices/system/cpu/vulnerabilities/tsx_async_abort 490 + /sys/devices/system/cpu/vulnerabilities/itlb_multihit 489 491 Date: January 2018 490 492 Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org> 491 493 Description: Information about CPU vulnerabilities

+2

Documentation/admin-guide/hw-vuln/index.rst

··· 12 12 spectre 13 13 l1tf 14 14 mds 15 + tsx_async_abort 16 + multihit.rst

+163

Documentation/admin-guide/hw-vuln/multihit.rst

··· 1 + iTLB multihit 2 + ============= 3 + 4 + iTLB multihit is an erratum where some processors may incur a machine check 5 + error, possibly resulting in an unrecoverable CPU lockup, when an 6 + instruction fetch hits multiple entries in the instruction TLB. This can 7 + occur when the page size is changed along with either the physical address 8 + or cache type. A malicious guest running on a virtualized system can 9 + exploit this erratum to perform a denial of service attack. 10 + 11 + 12 + Affected processors 13 + ------------------- 14 + 15 + Variations of this erratum are present on most Intel Core and Xeon processor 16 + models. The erratum is not present on: 17 + 18 + - non-Intel processors 19 + 20 + - Some Atoms (Airmont, Bonnell, Goldmont, GoldmontPlus, Saltwell, Silvermont) 21 + 22 + - Intel processors that have the PSCHANGE_MC_NO bit set in the 23 + IA32_ARCH_CAPABILITIES MSR. 24 + 25 + 26 + Related CVEs 27 + ------------ 28 + 29 + The following CVE entry is related to this issue: 30 + 31 + ============== ================================================= 32 + CVE-2018-12207 Machine Check Error Avoidance on Page Size Change 33 + ============== ================================================= 34 + 35 + 36 + Problem 37 + ------- 38 + 39 + Privileged software, including OS and virtual machine managers (VMM), are in 40 + charge of memory management. A key component in memory management is the control 41 + of the page tables. Modern processors use virtual memory, a technique that creates 42 + the illusion of a very large memory for processors. This virtual space is split 43 + into pages of a given size. Page tables translate virtual addresses to physical 44 + addresses. 45 + 46 + To reduce latency when performing a virtual to physical address translation, 47 + processors include a structure, called TLB, that caches recent translations. 48 + There are separate TLBs for instruction (iTLB) and data (dTLB). 49 + 50 + Under this errata, instructions are fetched from a linear address translated 51 + using a 4 KB translation cached in the iTLB. Privileged software modifies the 52 + paging structure so that the same linear address using large page size (2 MB, 4 53 + MB, 1 GB) with a different physical address or memory type. After the page 54 + structure modification but before the software invalidates any iTLB entries for 55 + the linear address, a code fetch that happens on the same linear address may 56 + cause a machine-check error which can result in a system hang or shutdown. 57 + 58 + 59 + Attack scenarios 60 + ---------------- 61 + 62 + Attacks against the iTLB multihit erratum can be mounted from malicious 63 + guests in a virtualized system. 64 + 65 + 66 + iTLB multihit system information 67 + -------------------------------- 68 + 69 + The Linux kernel provides a sysfs interface to enumerate the current iTLB 70 + multihit status of the system:whether the system is vulnerable and which 71 + mitigations are active. The relevant sysfs file is: 72 + 73 + /sys/devices/system/cpu/vulnerabilities/itlb_multihit 74 + 75 + The possible values in this file are: 76 + 77 + .. list-table:: 78 + 79 + * - Not affected 80 + - The processor is not vulnerable. 81 + * - KVM: Mitigation: Split huge pages 82 + - Software changes mitigate this issue. 83 + * - KVM: Vulnerable 84 + - The processor is vulnerable, but no mitigation enabled 85 + 86 + 87 + Enumeration of the erratum 88 + -------------------------------- 89 + 90 + A new bit has been allocated in the IA32_ARCH_CAPABILITIES (PSCHANGE_MC_NO) msr 91 + and will be set on CPU's which are mitigated against this issue. 92 + 93 + ======================================= =========== =============================== 94 + IA32_ARCH_CAPABILITIES MSR Not present Possibly vulnerable,check model 95 + IA32_ARCH_CAPABILITIES[PSCHANGE_MC_NO] '0' Likely vulnerable,check model 96 + IA32_ARCH_CAPABILITIES[PSCHANGE_MC_NO] '1' Not vulnerable 97 + ======================================= =========== =============================== 98 + 99 + 100 + Mitigation mechanism 101 + ------------------------- 102 + 103 + This erratum can be mitigated by restricting the use of large page sizes to 104 + non-executable pages. This forces all iTLB entries to be 4K, and removes 105 + the possibility of multiple hits. 106 + 107 + In order to mitigate the vulnerability, KVM initially marks all huge pages 108 + as non-executable. If the guest attempts to execute in one of those pages, 109 + the page is broken down into 4K pages, which are then marked executable. 110 + 111 + If EPT is disabled or not available on the host, KVM is in control of TLB 112 + flushes and the problematic situation cannot happen. However, the shadow 113 + EPT paging mechanism used by nested virtualization is vulnerable, because 114 + the nested guest can trigger multiple iTLB hits by modifying its own 115 + (non-nested) page tables. For simplicity, KVM will make large pages 116 + non-executable in all shadow paging modes. 117 + 118 + Mitigation control on the kernel command line and KVM - module parameter 119 + ------------------------------------------------------------------------ 120 + 121 + The KVM hypervisor mitigation mechanism for marking huge pages as 122 + non-executable can be controlled with a module parameter "nx_huge_pages=". 123 + The kernel command line allows to control the iTLB multihit mitigations at 124 + boot time with the option "kvm.nx_huge_pages=". 125 + 126 + The valid arguments for these options are: 127 + 128 + ========== ================================================================ 129 + force Mitigation is enabled. In this case, the mitigation implements 130 + non-executable huge pages in Linux kernel KVM module. All huge 131 + pages in the EPT are marked as non-executable. 132 + If a guest attempts to execute in one of those pages, the page is 133 + broken down into 4K pages, which are then marked executable. 134 + 135 + off Mitigation is disabled. 136 + 137 + auto Enable mitigation only if the platform is affected and the kernel 138 + was not booted with the "mitigations=off" command line parameter. 139 + This is the default option. 140 + ========== ================================================================ 141 + 142 + 143 + Mitigation selection guide 144 + -------------------------- 145 + 146 + 1. No virtualization in use 147 + ^^^^^^^^^^^^^^^^^^^^^^^^^^^ 148 + 149 + The system is protected by the kernel unconditionally and no further 150 + action is required. 151 + 152 + 2. Virtualization with trusted guests 153 + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 154 + 155 + If the guest comes from a trusted source, you may assume that the guest will 156 + not attempt to maliciously exploit these errata and no further action is 157 + required. 158 + 159 + 3. Virtualization with untrusted guests 160 + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 161 + If the guest comes from an untrusted source, the guest host kernel will need 162 + to apply iTLB multihit mitigation via the kernel command line or kvm 163 + module parameter.

+276

Documentation/admin-guide/hw-vuln/tsx_async_abort.rst

··· 1 + .. SPDX-License-Identifier: GPL-2.0 2 + 3 + TAA - TSX Asynchronous Abort 4 + ====================================== 5 + 6 + TAA is a hardware vulnerability that allows unprivileged speculative access to 7 + data which is available in various CPU internal buffers by using asynchronous 8 + aborts within an Intel TSX transactional region. 9 + 10 + Affected processors 11 + ------------------- 12 + 13 + This vulnerability only affects Intel processors that support Intel 14 + Transactional Synchronization Extensions (TSX) when the TAA_NO bit (bit 8) 15 + is 0 in the IA32_ARCH_CAPABILITIES MSR. On processors where the MDS_NO bit 16 + (bit 5) is 0 in the IA32_ARCH_CAPABILITIES MSR, the existing MDS mitigations 17 + also mitigate against TAA. 18 + 19 + Whether a processor is affected or not can be read out from the TAA 20 + vulnerability file in sysfs. See :ref:`tsx_async_abort_sys_info`. 21 + 22 + Related CVEs 23 + ------------ 24 + 25 + The following CVE entry is related to this TAA issue: 26 + 27 + ============== ===== =================================================== 28 + CVE-2019-11135 TAA TSX Asynchronous Abort (TAA) condition on some 29 + microprocessors utilizing speculative execution may 30 + allow an authenticated user to potentially enable 31 + information disclosure via a side channel with 32 + local access. 33 + ============== ===== =================================================== 34 + 35 + Problem 36 + ------- 37 + 38 + When performing store, load or L1 refill operations, processors write 39 + data into temporary microarchitectural structures (buffers). The data in 40 + those buffers can be forwarded to load operations as an optimization. 41 + 42 + Intel TSX is an extension to the x86 instruction set architecture that adds 43 + hardware transactional memory support to improve performance of multi-threaded 44 + software. TSX lets the processor expose and exploit concurrency hidden in an 45 + application due to dynamically avoiding unnecessary synchronization. 46 + 47 + TSX supports atomic memory transactions that are either committed (success) or 48 + aborted. During an abort, operations that happened within the transactional region 49 + are rolled back. An asynchronous abort takes place, among other options, when a 50 + different thread accesses a cache line that is also used within the transactional 51 + region when that access might lead to a data race. 52 + 53 + Immediately after an uncompleted asynchronous abort, certain speculatively 54 + executed loads may read data from those internal buffers and pass it to dependent 55 + operations. This can be then used to infer the value via a cache side channel 56 + attack. 57 + 58 + Because the buffers are potentially shared between Hyper-Threads cross 59 + Hyper-Thread attacks are possible. 60 + 61 + The victim of a malicious actor does not need to make use of TSX. Only the 62 + attacker needs to begin a TSX transaction and raise an asynchronous abort 63 + which in turn potenitally leaks data stored in the buffers. 64 + 65 + More detailed technical information is available in the TAA specific x86 66 + architecture section: :ref:`Documentation/x86/tsx_async_abort.rst <tsx_async_abort>`. 67 + 68 + 69 + Attack scenarios 70 + ---------------- 71 + 72 + Attacks against the TAA vulnerability can be implemented from unprivileged 73 + applications running on hosts or guests. 74 + 75 + As for MDS, the attacker has no control over the memory addresses that can 76 + be leaked. Only the victim is responsible for bringing data to the CPU. As 77 + a result, the malicious actor has to sample as much data as possible and 78 + then postprocess it to try to infer any useful information from it. 79 + 80 + A potential attacker only has read access to the data. Also, there is no direct 81 + privilege escalation by using this technique. 82 + 83 + 84 + .. _tsx_async_abort_sys_info: 85 + 86 + TAA system information 87 + ----------------------- 88 + 89 + The Linux kernel provides a sysfs interface to enumerate the current TAA status 90 + of mitigated systems. The relevant sysfs file is: 91 + 92 + /sys/devices/system/cpu/vulnerabilities/tsx_async_abort 93 + 94 + The possible values in this file are: 95 + 96 + .. list-table:: 97 + 98 + * - 'Vulnerable' 99 + - The CPU is affected by this vulnerability and the microcode and kernel mitigation are not applied. 100 + * - 'Vulnerable: Clear CPU buffers attempted, no microcode' 101 + - The system tries to clear the buffers but the microcode might not support the operation. 102 + * - 'Mitigation: Clear CPU buffers' 103 + - The microcode has been updated to clear the buffers. TSX is still enabled. 104 + * - 'Mitigation: TSX disabled' 105 + - TSX is disabled. 106 + * - 'Not affected' 107 + - The CPU is not affected by this issue. 108 + 109 + .. _ucode_needed: 110 + 111 + Best effort mitigation mode 112 + ^^^^^^^^^^^^^^^^^^^^^^^^^^^ 113 + 114 + If the processor is vulnerable, but the availability of the microcode-based 115 + mitigation mechanism is not advertised via CPUID the kernel selects a best 116 + effort mitigation mode. This mode invokes the mitigation instructions 117 + without a guarantee that they clear the CPU buffers. 118 + 119 + This is done to address virtualization scenarios where the host has the 120 + microcode update applied, but the hypervisor is not yet updated to expose the 121 + CPUID to the guest. If the host has updated microcode the protection takes 122 + effect; otherwise a few CPU cycles are wasted pointlessly. 123 + 124 + The state in the tsx_async_abort sysfs file reflects this situation 125 + accordingly. 126 + 127 + 128 + Mitigation mechanism 129 + -------------------- 130 + 131 + The kernel detects the affected CPUs and the presence of the microcode which is 132 + required. If a CPU is affected and the microcode is available, then the kernel 133 + enables the mitigation by default. 134 + 135 + 136 + The mitigation can be controlled at boot time via a kernel command line option. 137 + See :ref:`taa_mitigation_control_command_line`. 138 + 139 + .. _virt_mechanism: 140 + 141 + Virtualization mitigation 142 + ^^^^^^^^^^^^^^^^^^^^^^^^^ 143 + 144 + Affected systems where the host has TAA microcode and TAA is mitigated by 145 + having disabled TSX previously, are not vulnerable regardless of the status 146 + of the VMs. 147 + 148 + In all other cases, if the host either does not have the TAA microcode or 149 + the kernel is not mitigated, the system might be vulnerable. 150 + 151 + 152 + .. _taa_mitigation_control_command_line: 153 + 154 + Mitigation control on the kernel command line 155 + --------------------------------------------- 156 + 157 + The kernel command line allows to control the TAA mitigations at boot time with 158 + the option "tsx_async_abort=". The valid arguments for this option are: 159 + 160 + ============ ============================================================= 161 + off This option disables the TAA mitigation on affected platforms. 162 + If the system has TSX enabled (see next parameter) and the CPU 163 + is affected, the system is vulnerable. 164 + 165 + full TAA mitigation is enabled. If TSX is enabled, on an affected 166 + system it will clear CPU buffers on ring transitions. On 167 + systems which are MDS-affected and deploy MDS mitigation, 168 + TAA is also mitigated. Specifying this option on those 169 + systems will have no effect. 170 + 171 + full,nosmt The same as tsx_async_abort=full, with SMT disabled on 172 + vulnerable CPUs that have TSX enabled. This is the complete 173 + mitigation. When TSX is disabled, SMT is not disabled because 174 + CPU is not vulnerable to cross-thread TAA attacks. 175 + ============ ============================================================= 176 + 177 + Not specifying this option is equivalent to "tsx_async_abort=full". 178 + 179 + The kernel command line also allows to control the TSX feature using the 180 + parameter "tsx=" on CPUs which support TSX control. MSR_IA32_TSX_CTRL is used 181 + to control the TSX feature and the enumeration of the TSX feature bits (RTM 182 + and HLE) in CPUID. 183 + 184 + The valid options are: 185 + 186 + ============ ============================================================= 187 + off Disables TSX on the system. 188 + 189 + Note that this option takes effect only on newer CPUs which are 190 + not vulnerable to MDS, i.e., have MSR_IA32_ARCH_CAPABILITIES.MDS_NO=1 191 + and which get the new IA32_TSX_CTRL MSR through a microcode 192 + update. This new MSR allows for the reliable deactivation of 193 + the TSX functionality. 194 + 195 + on Enables TSX. 196 + 197 + Although there are mitigations for all known security 198 + vulnerabilities, TSX has been known to be an accelerator for 199 + several previous speculation-related CVEs, and so there may be 200 + unknown security risks associated with leaving it enabled. 201 + 202 + auto Disables TSX if X86_BUG_TAA is present, otherwise enables TSX 203 + on the system. 204 + ============ ============================================================= 205 + 206 + Not specifying this option is equivalent to "tsx=off". 207 + 208 + The following combinations of the "tsx_async_abort" and "tsx" are possible. For 209 + affected platforms tsx=auto is equivalent to tsx=off and the result will be: 210 + 211 + ========= ========================== ========================================= 212 + tsx=on tsx_async_abort=full The system will use VERW to clear CPU 213 + buffers. Cross-thread attacks are still 214 + possible on SMT machines. 215 + tsx=on tsx_async_abort=full,nosmt As above, cross-thread attacks on SMT 216 + mitigated. 217 + tsx=on tsx_async_abort=off The system is vulnerable. 218 + tsx=off tsx_async_abort=full TSX might be disabled if microcode 219 + provides a TSX control MSR. If so, 220 + system is not vulnerable. 221 + tsx=off tsx_async_abort=full,nosmt Ditto 222 + tsx=off tsx_async_abort=off ditto 223 + ========= ========================== ========================================= 224 + 225 + 226 + For unaffected platforms "tsx=on" and "tsx_async_abort=full" does not clear CPU 227 + buffers. For platforms without TSX control (MSR_IA32_ARCH_CAPABILITIES.MDS_NO=0) 228 + "tsx" command line argument has no effect. 229 + 230 + For the affected platforms below table indicates the mitigation status for the 231 + combinations of CPUID bit MD_CLEAR and IA32_ARCH_CAPABILITIES MSR bits MDS_NO 232 + and TSX_CTRL_MSR. 233 + 234 + ======= ========= ============= ======================================== 235 + MDS_NO MD_CLEAR TSX_CTRL_MSR Status 236 + ======= ========= ============= ======================================== 237 + 0 0 0 Vulnerable (needs microcode) 238 + 0 1 0 MDS and TAA mitigated via VERW 239 + 1 1 0 MDS fixed, TAA vulnerable if TSX enabled 240 + because MD_CLEAR has no meaning and 241 + VERW is not guaranteed to clear buffers 242 + 1 X 1 MDS fixed, TAA can be mitigated by 243 + VERW or TSX_CTRL_MSR 244 + ======= ========= ============= ======================================== 245 + 246 + Mitigation selection guide 247 + -------------------------- 248 + 249 + 1. Trusted userspace and guests 250 + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 251 + 252 + If all user space applications are from a trusted source and do not execute 253 + untrusted code which is supplied externally, then the mitigation can be 254 + disabled. The same applies to virtualized environments with trusted guests. 255 + 256 + 257 + 2. Untrusted userspace and guests 258 + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 259 + 260 + If there are untrusted applications or guests on the system, enabling TSX 261 + might allow a malicious actor to leak data from the host or from other 262 + processes running on the same physical core. 263 + 264 + If the microcode is available and the TSX is disabled on the host, attacks 265 + are prevented in a virtualized environment as well, even if the VMs do not 266 + explicitly enable the mitigation. 267 + 268 + 269 + .. _taa_default_mitigations: 270 + 271 + Default mitigations 272 + ------------------- 273 + 274 + The kernel's default action for vulnerable processors is: 275 + 276 + - Deploy TSX disable mitigation (tsx_async_abort=full tsx=off).

+92

Documentation/admin-guide/kernel-parameters.txt

··· 2055 2055 KVM MMU at runtime. 2056 2056 Default is 0 (off) 2057 2057 2058 + kvm.nx_huge_pages= 2059 + [KVM] Controls the software workaround for the 2060 + X86_BUG_ITLB_MULTIHIT bug. 2061 + force : Always deploy workaround. 2062 + off : Never deploy workaround. 2063 + auto : Deploy workaround based on the presence of 2064 + X86_BUG_ITLB_MULTIHIT. 2065 + 2066 + Default is 'auto'. 2067 + 2068 + If the software workaround is enabled for the host, 2069 + guests do need not to enable it for nested guests. 2070 + 2071 + kvm.nx_huge_pages_recovery_ratio= 2072 + [KVM] Controls how many 4KiB pages are periodically zapped 2073 + back to huge pages. 0 disables the recovery, otherwise if 2074 + the value is N KVM will zap 1/Nth of the 4KiB pages every 2075 + minute. The default is 60. 2076 + 2058 2077 kvm-amd.nested= [KVM,AMD] Allow nested virtualization in KVM/SVM. 2059 2078 Default is 1 (enabled) 2060 2079 ··· 2655 2636 ssbd=force-off [ARM64] 2656 2637 l1tf=off [X86] 2657 2638 mds=off [X86] 2639 + tsx_async_abort=off [X86] 2640 + kvm.nx_huge_pages=off [X86] 2641 + 2642 + Exceptions: 2643 + This does not have any effect on 2644 + kvm.nx_huge_pages when 2645 + kvm.nx_huge_pages=force. 2658 2646 2659 2647 auto (default) 2660 2648 Mitigate all CPU vulnerabilities, but leave SMT ··· 2677 2651 be fully mitigated, even if it means losing SMT. 2678 2652 Equivalent to: l1tf=flush,nosmt [X86] 2679 2653 mds=full,nosmt [X86] 2654 + tsx_async_abort=full,nosmt [X86] 2680 2655 2681 2656 mminit_loglevel= 2682 2657 [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this ··· 4874 4847 in situations with strict latency requirements (where 4875 4848 interruptions from clocksource watchdog are not 4876 4849 acceptable). 4850 + 4851 + tsx= [X86] Control Transactional Synchronization 4852 + Extensions (TSX) feature in Intel processors that 4853 + support TSX control. 4854 + 4855 + This parameter controls the TSX feature. The options are: 4856 + 4857 + on - Enable TSX on the system. Although there are 4858 + mitigations for all known security vulnerabilities, 4859 + TSX has been known to be an accelerator for 4860 + several previous speculation-related CVEs, and 4861 + so there may be unknown security risks associated 4862 + with leaving it enabled. 4863 + 4864 + off - Disable TSX on the system. (Note that this 4865 + option takes effect only on newer CPUs which are 4866 + not vulnerable to MDS, i.e., have 4867 + MSR_IA32_ARCH_CAPABILITIES.MDS_NO=1 and which get 4868 + the new IA32_TSX_CTRL MSR through a microcode 4869 + update. This new MSR allows for the reliable 4870 + deactivation of the TSX functionality.) 4871 + 4872 + auto - Disable TSX if X86_BUG_TAA is present, 4873 + otherwise enable TSX on the system. 4874 + 4875 + Not specifying this option is equivalent to tsx=off. 4876 + 4877 + See Documentation/admin-guide/hw-vuln/tsx_async_abort.rst 4878 + for more details. 4879 + 4880 + tsx_async_abort= [X86,INTEL] Control mitigation for the TSX Async 4881 + Abort (TAA) vulnerability. 4882 + 4883 + Similar to Micro-architectural Data Sampling (MDS) 4884 + certain CPUs that support Transactional 4885 + Synchronization Extensions (TSX) are vulnerable to an 4886 + exploit against CPU internal buffers which can forward 4887 + information to a disclosure gadget under certain 4888 + conditions. 4889 + 4890 + In vulnerable processors, the speculatively forwarded 4891 + data can be used in a cache side channel attack, to 4892 + access data to which the attacker does not have direct 4893 + access. 4894 + 4895 + This parameter controls the TAA mitigation. The 4896 + options are: 4897 + 4898 + full - Enable TAA mitigation on vulnerable CPUs 4899 + if TSX is enabled. 4900 + 4901 + full,nosmt - Enable TAA mitigation and disable SMT on 4902 + vulnerable CPUs. If TSX is disabled, SMT 4903 + is not disabled because CPU is not 4904 + vulnerable to cross-thread TAA attacks. 4905 + off - Unconditionally disable TAA mitigation 4906 + 4907 + Not specifying this option is equivalent to 4908 + tsx_async_abort=full. On CPUs which are MDS affected 4909 + and deploy MDS mitigation, TAA mitigation is not 4910 + required and doesn't provide any additional 4911 + mitigation. 4912 + 4913 + For details see: 4914 + Documentation/admin-guide/hw-vuln/tsx_async_abort.rst 4877 4915 4878 4916 turbografx.map[2|3]= [HW,JOY] 4879 4917 TurboGraFX parallel port interface

+1

Documentation/x86/index.rst

··· 27 27 mds 28 28 microcode 29 29 resctrl_ui 30 + tsx_async_abort 30 31 usb-legacy-support 31 32 i386/index 32 33 x86_64/index

+117

Documentation/x86/tsx_async_abort.rst

··· 1 + .. SPDX-License-Identifier: GPL-2.0 2 + 3 + TSX Async Abort (TAA) mitigation 4 + ================================ 5 + 6 + .. _tsx_async_abort: 7 + 8 + Overview 9 + -------- 10 + 11 + TSX Async Abort (TAA) is a side channel attack on internal buffers in some 12 + Intel processors similar to Microachitectural Data Sampling (MDS). In this 13 + case certain loads may speculatively pass invalid data to dependent operations 14 + when an asynchronous abort condition is pending in a Transactional 15 + Synchronization Extensions (TSX) transaction. This includes loads with no 16 + fault or assist condition. Such loads may speculatively expose stale data from 17 + the same uarch data structures as in MDS, with same scope of exposure i.e. 18 + same-thread and cross-thread. This issue affects all current processors that 19 + support TSX. 20 + 21 + Mitigation strategy 22 + ------------------- 23 + 24 + a) TSX disable - one of the mitigations is to disable TSX. A new MSR 25 + IA32_TSX_CTRL will be available in future and current processors after 26 + microcode update which can be used to disable TSX. In addition, it 27 + controls the enumeration of the TSX feature bits (RTM and HLE) in CPUID. 28 + 29 + b) Clear CPU buffers - similar to MDS, clearing the CPU buffers mitigates this 30 + vulnerability. More details on this approach can be found in 31 + :ref:`Documentation/admin-guide/hw-vuln/mds.rst <mds>`. 32 + 33 + Kernel internal mitigation modes 34 + -------------------------------- 35 + 36 + ============= ============================================================ 37 + off Mitigation is disabled. Either the CPU is not affected or 38 + tsx_async_abort=off is supplied on the kernel command line. 39 + 40 + tsx disabled Mitigation is enabled. TSX feature is disabled by default at 41 + bootup on processors that support TSX control. 42 + 43 + verw Mitigation is enabled. CPU is affected and MD_CLEAR is 44 + advertised in CPUID. 45 + 46 + ucode needed Mitigation is enabled. CPU is affected and MD_CLEAR is not 47 + advertised in CPUID. That is mainly for virtualization 48 + scenarios where the host has the updated microcode but the 49 + hypervisor does not expose MD_CLEAR in CPUID. It's a best 50 + effort approach without guarantee. 51 + ============= ============================================================ 52 + 53 + If the CPU is affected and the "tsx_async_abort" kernel command line parameter is 54 + not provided then the kernel selects an appropriate mitigation depending on the 55 + status of RTM and MD_CLEAR CPUID bits. 56 + 57 + Below tables indicate the impact of tsx=on|off|auto cmdline options on state of 58 + TAA mitigation, VERW behavior and TSX feature for various combinations of 59 + MSR_IA32_ARCH_CAPABILITIES bits. 60 + 61 + 1. "tsx=off" 62 + 63 + ========= ========= ============ ============ ============== =================== ====================== 64 + MSR_IA32_ARCH_CAPABILITIES bits Result with cmdline tsx=off 65 + ---------------------------------- ------------------------------------------------------------------------- 66 + TAA_NO MDS_NO TSX_CTRL_MSR TSX state VERW can clear TAA mitigation TAA mitigation 67 + after bootup CPU buffers tsx_async_abort=off tsx_async_abort=full 68 + ========= ========= ============ ============ ============== =================== ====================== 69 + 0 0 0 HW default Yes Same as MDS Same as MDS 70 + 0 0 1 Invalid case Invalid case Invalid case Invalid case 71 + 0 1 0 HW default No Need ucode update Need ucode update 72 + 0 1 1 Disabled Yes TSX disabled TSX disabled 73 + 1 X 1 Disabled X None needed None needed 74 + ========= ========= ============ ============ ============== =================== ====================== 75 + 76 + 2. "tsx=on" 77 + 78 + ========= ========= ============ ============ ============== =================== ====================== 79 + MSR_IA32_ARCH_CAPABILITIES bits Result with cmdline tsx=on 80 + ---------------------------------- ------------------------------------------------------------------------- 81 + TAA_NO MDS_NO TSX_CTRL_MSR TSX state VERW can clear TAA mitigation TAA mitigation 82 + after bootup CPU buffers tsx_async_abort=off tsx_async_abort=full 83 + ========= ========= ============ ============ ============== =================== ====================== 84 + 0 0 0 HW default Yes Same as MDS Same as MDS 85 + 0 0 1 Invalid case Invalid case Invalid case Invalid case 86 + 0 1 0 HW default No Need ucode update Need ucode update 87 + 0 1 1 Enabled Yes None Same as MDS 88 + 1 X 1 Enabled X None needed None needed 89 + ========= ========= ============ ============ ============== =================== ====================== 90 + 91 + 3. "tsx=auto" 92 + 93 + ========= ========= ============ ============ ============== =================== ====================== 94 + MSR_IA32_ARCH_CAPABILITIES bits Result with cmdline tsx=auto 95 + ---------------------------------- ------------------------------------------------------------------------- 96 + TAA_NO MDS_NO TSX_CTRL_MSR TSX state VERW can clear TAA mitigation TAA mitigation 97 + after bootup CPU buffers tsx_async_abort=off tsx_async_abort=full 98 + ========= ========= ============ ============ ============== =================== ====================== 99 + 0 0 0 HW default Yes Same as MDS Same as MDS 100 + 0 0 1 Invalid case Invalid case Invalid case Invalid case 101 + 0 1 0 HW default No Need ucode update Need ucode update 102 + 0 1 1 Disabled Yes TSX disabled TSX disabled 103 + 1 X 1 Enabled X None needed None needed 104 + ========= ========= ============ ============ ============== =================== ====================== 105 + 106 + In the tables, TSX_CTRL_MSR is a new bit in MSR_IA32_ARCH_CAPABILITIES that 107 + indicates whether MSR_IA32_TSX_CTRL is supported. 108 + 109 + There are two control bits in IA32_TSX_CTRL MSR: 110 + 111 + Bit 0: When set it disables the Restricted Transactional Memory (RTM) 112 + sub-feature of TSX (will force all transactions to abort on the 113 + XBEGIN instruction). 114 + 115 + Bit 1: When set it disables the enumeration of the RTM and HLE feature 116 + (i.e. it will make CPUID(EAX=7).EBX{bit4} and 117 + CPUID(EAX=7).EBX{bit11} read as 0).

-2

MAINTAINERS

··· 3268 3268 F: drivers/cpufreq/bmips-cpufreq.c 3269 3269 3270 3270 BROADCOM BMIPS MIPS ARCHITECTURE 3271 - M: Kevin Cernekee <cernekee@gmail.com> 3272 3271 M: Florian Fainelli <f.fainelli@gmail.com> 3273 3272 L: bcm-kernel-feedback-list@broadcom.com 3274 3273 L: linux-mips@vger.kernel.org ··· 3744 3745 3745 3746 CAVIUM THUNDERX2 ARM64 SOC 3746 3747 M: Robert Richter <rrichter@cavium.com> 3747 - M: Jayachandran C <jnair@caviumnetworks.com> 3748 3748 L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) 3749 3749 S: Maintained 3750 3750 F: arch/arm64/boot/dts/cavium/thunder2-99xx*

+4 -1

Makefile

··· 2 2 VERSION = 5 3 3 PATCHLEVEL = 4 4 4 SUBLEVEL = 0 5 - EXTRAVERSION = -rc6 5 + EXTRAVERSION = -rc7 6 6 NAME = Kleptomaniac Octopus 7 7 8 8 # *DOCUMENTATION* ··· 916 916 ifeq ($(CONFIG_RELR),y) 917 917 LDFLAGS_vmlinux += --pack-dyn-relocs=relr 918 918 endif 919 + 920 + # make the checker run with the right architecture 921 + CHECKFLAGS += --arch=$(ARCH) 919 922 920 923 # insure the checker run with the right endianness 921 924 CHECKFLAGS += $(if $(CONFIG_CPU_BIG_ENDIAN),-mbig-endian,-mlittle-endian)

+4

arch/arm/boot/dts/imx6-logicpd-baseboard.dtsi

··· 328 328 pinctrl-0 = <&pinctrl_pwm3>; 329 329 }; 330 330 331 + &snvs_pwrkey { 332 + status = "okay"; 333 + }; 334 + 331 335 &ssi2 { 332 336 status = "okay"; 333 337 };

+8

arch/arm/boot/dts/imx6qdl-sabreauto.dtsi

··· 230 230 accelerometer@1c { 231 231 compatible = "fsl,mma8451"; 232 232 reg = <0x1c>; 233 + pinctrl-names = "default"; 234 + pinctrl-0 = <&pinctrl_mma8451_int>; 233 235 interrupt-parent = <&gpio6>; 234 236 interrupts = <31 IRQ_TYPE_LEVEL_LOW>; 235 237 }; ··· 627 625 pinctrl_max7310: max7310grp { 628 626 fsl,pins = < 629 627 MX6QDL_PAD_SD2_DAT0__GPIO1_IO15 0x1b0b0 628 + >; 629 + }; 630 + 631 + pinctrl_mma8451_int: mma8451intgrp { 632 + fsl,pins = < 633 + MX6QDL_PAD_EIM_BCLK__GPIO6_IO31 0xb0b1 630 634 >; 631 635 }; 632 636

+2 -11

arch/arm/boot/dts/stm32mp157c-ev1.dts

··· 183 183 184 184 ov5640: camera@3c { 185 185 compatible = "ovti,ov5640"; 186 - pinctrl-names = "default"; 187 - pinctrl-0 = <&ov5640_pins>; 188 186 reg = <0x3c>; 189 187 clocks = <&clk_ext_camera>; 190 188 clock-names = "xclk"; 191 189 DOVDD-supply = <&v2v8>; 192 - powerdown-gpios = <&stmfx_pinctrl 18 GPIO_ACTIVE_HIGH>; 193 - reset-gpios = <&stmfx_pinctrl 19 GPIO_ACTIVE_LOW>; 190 + powerdown-gpios = <&stmfx_pinctrl 18 (GPIO_ACTIVE_HIGH | GPIO_PUSH_PULL)>; 191 + reset-gpios = <&stmfx_pinctrl 19 (GPIO_ACTIVE_LOW | GPIO_PUSH_PULL)>; 194 192 rotation = <180>; 195 193 status = "okay"; 196 194 ··· 221 223 222 224 joystick_pins: joystick { 223 225 pins = "gpio0", "gpio1", "gpio2", "gpio3", "gpio4"; 224 - drive-push-pull; 225 226 bias-pull-down; 226 - }; 227 - 228 - ov5640_pins: camera { 229 - pins = "agpio2", "agpio3"; /* stmfx pins 18 & 19 */ 230 - drive-push-pull; 231 - output-low; 232 227 }; 233 228 }; 234 229 };

+2 -2

arch/arm/boot/dts/stm32mp157c.dtsi

··· 932 932 interrupt-names = "int0", "int1"; 933 933 clocks = <&rcc CK_HSE>, <&rcc FDCAN_K>; 934 934 clock-names = "hclk", "cclk"; 935 - bosch,mram-cfg = <0x1400 0 0 32 0 0 2 2>; 935 + bosch,mram-cfg = <0x0 0 0 32 0 0 2 2>; 936 936 status = "disabled"; 937 937 }; 938 938 ··· 945 945 interrupt-names = "int0", "int1"; 946 946 clocks = <&rcc CK_HSE>, <&rcc FDCAN_K>; 947 947 clock-names = "hclk", "cclk"; 948 - bosch,mram-cfg = <0x0 0 0 32 0 0 2 2>; 948 + bosch,mram-cfg = <0x1400 0 0 32 0 0 2 2>; 949 949 status = "disabled"; 950 950 }; 951 951

+1

arch/arm/boot/dts/sun8i-a83t-tbs-a711.dts

··· 192 192 vqmmc-supply = <&reg_dldo1>; 193 193 non-removable; 194 194 wakeup-source; 195 + keep-power-in-suspend; 195 196 status = "okay"; 196 197 197 198 brcmf: wifi@1 {

+5 -1

arch/arm/mach-sunxi/mc_smp.c

··· 481 481 static int sunxi_cpu_powerdown(unsigned int cpu, unsigned int cluster) 482 482 { 483 483 u32 reg; 484 + int gating_bit = cpu; 484 485 485 486 pr_debug("%s: cluster %u cpu %u\n", __func__, cluster, cpu); 486 487 if (cpu >= SUNXI_CPUS_PER_CLUSTER || cluster >= SUNXI_NR_CLUSTERS) 487 488 return -EINVAL; 488 489 490 + if (is_a83t && cpu == 0) 491 + gating_bit = 4; 492 + 489 493 /* gate processor power */ 490 494 reg = readl(prcm_base + PRCM_PWROFF_GATING_REG(cluster)); 491 - reg |= PRCM_PWROFF_GATING_REG_CORE(cpu); 495 + reg |= PRCM_PWROFF_GATING_REG_CORE(gating_bit); 492 496 writel(reg, prcm_base + PRCM_PWROFF_GATING_REG(cluster)); 493 497 udelay(20); 494 498

+1 -1

arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts

··· 127 127 status = "okay"; 128 128 129 129 i2c-mux@77 { 130 - compatible = "nxp,pca9847"; 130 + compatible = "nxp,pca9547"; 131 131 reg = <0x77>; 132 132 #address-cells = <1>; 133 133 #size-cells = <0>;

+3 -3

arch/arm64/boot/dts/freescale/imx8mm.dtsi

··· 394 394 }; 395 395 396 396 sdma2: dma-controller@302c0000 { 397 - compatible = "fsl,imx8mm-sdma", "fsl,imx7d-sdma"; 397 + compatible = "fsl,imx8mm-sdma", "fsl,imx8mq-sdma"; 398 398 reg = <0x302c0000 0x10000>; 399 399 interrupts = <GIC_SPI 103 IRQ_TYPE_LEVEL_HIGH>; 400 400 clocks = <&clk IMX8MM_CLK_SDMA2_ROOT>, ··· 405 405 }; 406 406 407 407 sdma3: dma-controller@302b0000 { 408 - compatible = "fsl,imx8mm-sdma", "fsl,imx7d-sdma"; 408 + compatible = "fsl,imx8mm-sdma", "fsl,imx8mq-sdma"; 409 409 reg = <0x302b0000 0x10000>; 410 410 interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>; 411 411 clocks = <&clk IMX8MM_CLK_SDMA3_ROOT>, ··· 737 737 }; 738 738 739 739 sdma1: dma-controller@30bd0000 { 740 - compatible = "fsl,imx8mm-sdma", "fsl,imx7d-sdma"; 740 + compatible = "fsl,imx8mm-sdma", "fsl,imx8mq-sdma"; 741 741 reg = <0x30bd0000 0x10000>; 742 742 interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>; 743 743 clocks = <&clk IMX8MM_CLK_SDMA1_ROOT>,

+3 -3

arch/arm64/boot/dts/freescale/imx8mn.dtsi

··· 288 288 }; 289 289 290 290 sdma3: dma-controller@302b0000 { 291 - compatible = "fsl,imx8mn-sdma", "fsl,imx7d-sdma"; 291 + compatible = "fsl,imx8mn-sdma", "fsl,imx8mq-sdma"; 292 292 reg = <0x302b0000 0x10000>; 293 293 interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>; 294 294 clocks = <&clk IMX8MN_CLK_SDMA3_ROOT>, ··· 299 299 }; 300 300 301 301 sdma2: dma-controller@302c0000 { 302 - compatible = "fsl,imx8mn-sdma", "fsl,imx7d-sdma"; 302 + compatible = "fsl,imx8mn-sdma", "fsl,imx8mq-sdma"; 303 303 reg = <0x302c0000 0x10000>; 304 304 interrupts = <GIC_SPI 103 IRQ_TYPE_LEVEL_HIGH>; 305 305 clocks = <&clk IMX8MN_CLK_SDMA2_ROOT>, ··· 612 612 }; 613 613 614 614 sdma1: dma-controller@30bd0000 { 615 - compatible = "fsl,imx8mn-sdma", "fsl,imx7d-sdma"; 615 + compatible = "fsl,imx8mn-sdma", "fsl,imx8mq-sdma"; 616 616 reg = <0x30bd0000 0x10000>; 617 617 interrupts = <GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>; 618 618 clocks = <&clk IMX8MN_CLK_SDMA1_ROOT>,

+1 -1

arch/arm64/boot/dts/freescale/imx8mq-zii-ultra.dtsi

··· 88 88 regulator-name = "0V9_ARM"; 89 89 regulator-min-microvolt = <900000>; 90 90 regulator-max-microvolt = <1000000>; 91 - gpios = <&gpio3 19 GPIO_ACTIVE_HIGH>; 91 + gpios = <&gpio3 16 GPIO_ACTIVE_HIGH>; 92 92 states = <1000000 0x1 93 93 900000 0x0>; 94 94 regulator-always-on;

-7

arch/arm64/include/asm/vdso/vsyscall.h

··· 31 31 #define __arch_get_clock_mode __arm64_get_clock_mode 32 32 33 33 static __always_inline 34 - int __arm64_use_vsyscall(struct vdso_data *vdata) 35 - { 36 - return !vdata[CS_HRES_COARSE].clock_mode; 37 - } 38 - #define __arch_use_vsyscall __arm64_use_vsyscall 39 - 40 - static __always_inline 41 34 void __arm64_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk) 42 35 { 43 36 vdata[CS_HRES_COARSE].mask = VDSO_PRECISION_MASK;

-7

arch/mips/include/asm/vdso/vsyscall.h

··· 28 28 } 29 29 #define __arch_get_clock_mode __mips_get_clock_mode 30 30 31 - static __always_inline 32 - int __mips_use_vsyscall(struct vdso_data *vdata) 33 - { 34 - return (vdata[CS_HRES_COARSE].clock_mode != VDSO_CLOCK_NONE); 35 - } 36 - #define __arch_use_vsyscall __mips_use_vsyscall 37 - 38 31 /* The asm-generic header needs to be included after the definitions above */ 39 32 #include <asm-generic/vdso/vsyscall.h> 40 33

-7

arch/mips/sgi-ip27/Kconfig

··· 38 38 Say Y here to enable replicating the kernel text across multiple 39 39 nodes in a NUMA cluster. This trades memory for speed. 40 40 41 - config REPLICATE_EXHANDLERS 42 - bool "Exception handler replication support" 43 - depends on SGI_IP27 44 - help 45 - Say Y here to enable replicating the kernel exception handlers 46 - across multiple nodes in a NUMA cluster. This trades memory for 47 - speed.

+6 -15

arch/mips/sgi-ip27/ip27-init.c

··· 69 69 70 70 hub_rtc_init(cnode); 71 71 72 - #ifdef CONFIG_REPLICATE_EXHANDLERS 73 - /* 74 - * If this is not a headless node initialization, 75 - * copy over the caliased exception handlers. 76 - */ 77 - if (get_compact_nodeid() == cnode) { 78 - extern char except_vec2_generic, except_vec3_generic; 79 - extern void build_tlb_refill_handler(void); 80 - 81 - memcpy((void *)(CKSEG0 + 0x100), &except_vec2_generic, 0x80); 82 - memcpy((void *)(CKSEG0 + 0x180), &except_vec3_generic, 0x80); 83 - build_tlb_refill_handler(); 84 - memcpy((void *)(CKSEG0 + 0x100), (void *) CKSEG0, 0x80); 85 - memcpy((void *)(CKSEG0 + 0x180), &except_vec3_generic, 0x100); 72 + if (nasid) { 73 + /* copy exception handlers from first node to current node */ 74 + memcpy((void *)NODE_OFFSET_TO_K0(nasid, 0), 75 + (void *)CKSEG0, 0x200); 86 76 __flush_cache_all(); 77 + /* switch to node local exception handlers */ 78 + REMOTE_HUB_S(nasid, PI_CALIAS_SIZE, PI_CALIAS_SIZE_8K); 87 79 } 88 - #endif 89 80 } 90 81 91 82 void per_cpu_init(void)

-4

arch/mips/sgi-ip27/ip27-memory.c

··· 332 332 * thinks it is a node 0 address. 333 333 */ 334 334 REMOTE_HUB_S(nasid, PI_REGION_PRESENT, (region_mask | 1)); 335 - #ifdef CONFIG_REPLICATE_EXHANDLERS 336 - REMOTE_HUB_S(nasid, PI_CALIAS_SIZE, PI_CALIAS_SIZE_8K); 337 - #else 338 335 REMOTE_HUB_S(nasid, PI_CALIAS_SIZE, PI_CALIAS_SIZE_0); 339 - #endif 340 336 341 337 #ifdef LATER 342 338 /*

+2 -2

arch/sparc/vdso/Makefile

··· 65 65 # 66 66 # vDSO code runs in userspace and -pg doesn't help with profiling anyway. 67 67 # 68 - CFLAGS_REMOVE_vdso-note.o = -pg 69 68 CFLAGS_REMOVE_vclock_gettime.o = -pg 69 + CFLAGS_REMOVE_vdso32/vclock_gettime.o = -pg 70 70 71 71 $(obj)/%.so: OBJCOPYFLAGS := -S 72 72 $(obj)/%.so: $(obj)/%.so.dbg FORCE 73 73 $(call if_changed,objcopy) 74 74 75 - CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds) 75 + CPPFLAGS_vdso32/vdso32.lds = $(CPPFLAGS_vdso.lds) 76 76 VDSO_LDFLAGS_vdso32.lds = -m elf32_sparc -soname linux-gate.so.1 77 77 78 78 #This makes sure the $(obj) subdirectory exists even though vdso32/

+45

arch/x86/Kconfig

··· 1940 1940 1941 1941 If unsure, say y. 1942 1942 1943 + choice 1944 + prompt "TSX enable mode" 1945 + depends on CPU_SUP_INTEL 1946 + default X86_INTEL_TSX_MODE_OFF 1947 + help 1948 + Intel's TSX (Transactional Synchronization Extensions) feature 1949 + allows to optimize locking protocols through lock elision which 1950 + can lead to a noticeable performance boost. 1951 + 1952 + On the other hand it has been shown that TSX can be exploited 1953 + to form side channel attacks (e.g. TAA) and chances are there 1954 + will be more of those attacks discovered in the future. 1955 + 1956 + Therefore TSX is not enabled by default (aka tsx=off). An admin 1957 + might override this decision by tsx=on the command line parameter. 1958 + Even with TSX enabled, the kernel will attempt to enable the best 1959 + possible TAA mitigation setting depending on the microcode available 1960 + for the particular machine. 1961 + 1962 + This option allows to set the default tsx mode between tsx=on, =off 1963 + and =auto. See Documentation/admin-guide/kernel-parameters.txt for more 1964 + details. 1965 + 1966 + Say off if not sure, auto if TSX is in use but it should be used on safe 1967 + platforms or on if TSX is in use and the security aspect of tsx is not 1968 + relevant. 1969 + 1970 + config X86_INTEL_TSX_MODE_OFF 1971 + bool "off" 1972 + help 1973 + TSX is disabled if possible - equals to tsx=off command line parameter. 1974 + 1975 + config X86_INTEL_TSX_MODE_ON 1976 + bool "on" 1977 + help 1978 + TSX is always enabled on TSX capable HW - equals the tsx=on command 1979 + line parameter. 1980 + 1981 + config X86_INTEL_TSX_MODE_AUTO 1982 + bool "auto" 1983 + help 1984 + TSX is enabled on TSX capable HW that is believed to be safe against 1985 + side channel attacks- equals the tsx=auto command line parameter. 1986 + endchoice 1987 + 1943 1988 config EFI 1944 1989 bool "EFI runtime service support" 1945 1990 depends on ACPI

+2

arch/x86/include/asm/cpufeatures.h

··· 399 399 #define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */ 400 400 #define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */ 401 401 #define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */ 402 + #define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */ 403 + #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ 402 404 403 405 #endif /* _ASM_X86_CPUFEATURES_H */

+6

arch/x86/include/asm/kvm_host.h

··· 312 312 struct kvm_mmu_page { 313 313 struct list_head link; 314 314 struct hlist_node hash_link; 315 + struct list_head lpage_disallowed_link; 316 + 315 317 bool unsync; 316 318 u8 mmu_valid_gen; 317 319 bool mmio_cached; 320 + bool lpage_disallowed; /* Can't be replaced by an equiv large page */ 318 321 319 322 /* 320 323 * The following two entries are used to key the shadow page in the ··· 862 859 */ 863 860 struct list_head active_mmu_pages; 864 861 struct list_head zapped_obsolete_pages; 862 + struct list_head lpage_disallowed_mmu_pages; 865 863 struct kvm_page_track_notifier_node mmu_sp_tracker; 866 864 struct kvm_page_track_notifier_head track_notifier_head; 867 865 ··· 937 933 bool exception_payload_enabled; 938 934 939 935 struct kvm_pmu_event_filter *pmu_event_filter; 936 + struct task_struct *nx_lpage_recovery_thread; 940 937 }; 941 938 942 939 struct kvm_vm_stat { ··· 951 946 ulong mmu_unsync; 952 947 ulong remote_tlb_flush; 953 948 ulong lpages; 949 + ulong nx_lpage_splits; 954 950 ulong max_mmu_page_hash_collisions; 955 951 }; 956 952

+16

arch/x86/include/asm/msr-index.h

··· 93 93 * Microarchitectural Data 94 94 * Sampling (MDS) vulnerabilities. 95 95 */ 96 + #define ARCH_CAP_PSCHANGE_MC_NO BIT(6) /* 97 + * The processor is not susceptible to a 98 + * machine check error due to modifying the 99 + * code page size along with either the 100 + * physical address or cache type 101 + * without TLB invalidation. 102 + */ 103 + #define ARCH_CAP_TSX_CTRL_MSR BIT(7) /* MSR for TSX control is available. */ 104 + #define ARCH_CAP_TAA_NO BIT(8) /* 105 + * Not susceptible to 106 + * TSX Async Abort (TAA) vulnerabilities. 107 + */ 96 108 97 109 #define MSR_IA32_FLUSH_CMD 0x0000010b 98 110 #define L1D_FLUSH BIT(0) /* ··· 114 102 115 103 #define MSR_IA32_BBL_CR_CTL 0x00000119 116 104 #define MSR_IA32_BBL_CR_CTL3 0x0000011e 105 + 106 + #define MSR_IA32_TSX_CTRL 0x00000122 107 + #define TSX_CTRL_RTM_DISABLE BIT(0) /* Disable RTM feature */ 108 + #define TSX_CTRL_CPUID_CLEAR BIT(1) /* Disable TSX enumeration */ 117 109 118 110 #define MSR_IA32_SYSENTER_CS 0x00000174 119 111 #define MSR_IA32_SYSENTER_ESP 0x00000175

+2 -2

arch/x86/include/asm/nospec-branch.h

··· 314 314 #include <asm/segment.h> 315 315 316 316 /** 317 - * mds_clear_cpu_buffers - Mitigation for MDS vulnerability 317 + * mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability 318 318 * 319 319 * This uses the otherwise unused and obsolete VERW instruction in 320 320 * combination with microcode which triggers a CPU buffer flush when the ··· 337 337 } 338 338 339 339 /** 340 - * mds_user_clear_cpu_buffers - Mitigation for MDS vulnerability 340 + * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability 341 341 * 342 342 * Clear CPU buffers if the corresponding static key is enabled 343 343 */

+7

arch/x86/include/asm/processor.h

··· 988 988 MDS_MITIGATION_VMWERV, 989 989 }; 990 990 991 + enum taa_mitigations { 992 + TAA_MITIGATION_OFF, 993 + TAA_MITIGATION_UCODE_NEEDED, 994 + TAA_MITIGATION_VERW, 995 + TAA_MITIGATION_TSX_DISABLED, 996 + }; 997 + 991 998 #endif /* _ASM_X86_PROCESSOR_H */

+15 -13

arch/x86/kernel/apic/apic.c

··· 1586 1586 { 1587 1587 int cpu = smp_processor_id(); 1588 1588 unsigned int value; 1589 - #ifdef CONFIG_X86_32 1590 - int logical_apicid, ldr_apicid; 1591 - #endif 1592 1589 1593 1590 if (disable_apic) { 1594 1591 disable_ioapic_support(); ··· 1623 1626 apic->init_apic_ldr(); 1624 1627 1625 1628 #ifdef CONFIG_X86_32 1626 - /* 1627 - * APIC LDR is initialized. If logical_apicid mapping was 1628 - * initialized during get_smp_config(), make sure it matches the 1629 - * actual value. 1630 - */ 1631 - logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); 1632 - ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); 1633 - WARN_ON(logical_apicid != BAD_APICID && logical_apicid != ldr_apicid); 1634 - /* always use the value from LDR */ 1635 - early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid; 1629 + if (apic->dest_logical) { 1630 + int logical_apicid, ldr_apicid; 1631 + 1632 + /* 1633 + * APIC LDR is initialized. If logical_apicid mapping was 1634 + * initialized during get_smp_config(), make sure it matches 1635 + * the actual value. 1636 + */ 1637 + logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); 1638 + ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); 1639 + if (logical_apicid != BAD_APICID) 1640 + WARN_ON(logical_apicid != ldr_apicid); 1641 + /* Always use the value from LDR. */ 1642 + early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid; 1643 + } 1636 1644 #endif 1637 1645 1638 1646 /*

+1 -1

arch/x86/kernel/cpu/Makefile

··· 30 30 obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o 31 31 32 32 ifdef CONFIG_CPU_SUP_INTEL 33 - obj-y += intel.o intel_pconfig.o 33 + obj-y += intel.o intel_pconfig.o tsx.o 34 34 obj-$(CONFIG_PM) += intel_epb.o 35 35 endif 36 36 obj-$(CONFIG_CPU_SUP_AMD) += amd.o

+155 -4

arch/x86/kernel/cpu/bugs.c

··· 39 39 static void __init ssb_select_mitigation(void); 40 40 static void __init l1tf_select_mitigation(void); 41 41 static void __init mds_select_mitigation(void); 42 + static void __init taa_select_mitigation(void); 42 43 43 44 /* The base value of the SPEC_CTRL MSR that always has to be preserved. */ 44 45 u64 x86_spec_ctrl_base; ··· 106 105 ssb_select_mitigation(); 107 106 l1tf_select_mitigation(); 108 107 mds_select_mitigation(); 108 + taa_select_mitigation(); 109 109 110 110 arch_smt_update(); 111 111 ··· 269 267 return 0; 270 268 } 271 269 early_param("mds", mds_cmdline); 270 + 271 + #undef pr_fmt 272 + #define pr_fmt(fmt) "TAA: " fmt 273 + 274 + /* Default mitigation for TAA-affected CPUs */ 275 + static enum taa_mitigations taa_mitigation __ro_after_init = TAA_MITIGATION_VERW; 276 + static bool taa_nosmt __ro_after_init; 277 + 278 + static const char * const taa_strings[] = { 279 + [TAA_MITIGATION_OFF] = "Vulnerable", 280 + [TAA_MITIGATION_UCODE_NEEDED] = "Vulnerable: Clear CPU buffers attempted, no microcode", 281 + [TAA_MITIGATION_VERW] = "Mitigation: Clear CPU buffers", 282 + [TAA_MITIGATION_TSX_DISABLED] = "Mitigation: TSX disabled", 283 + }; 284 + 285 + static void __init taa_select_mitigation(void) 286 + { 287 + u64 ia32_cap; 288 + 289 + if (!boot_cpu_has_bug(X86_BUG_TAA)) { 290 + taa_mitigation = TAA_MITIGATION_OFF; 291 + return; 292 + } 293 + 294 + /* TSX previously disabled by tsx=off */ 295 + if (!boot_cpu_has(X86_FEATURE_RTM)) { 296 + taa_mitigation = TAA_MITIGATION_TSX_DISABLED; 297 + goto out; 298 + } 299 + 300 + if (cpu_mitigations_off()) { 301 + taa_mitigation = TAA_MITIGATION_OFF; 302 + return; 303 + } 304 + 305 + /* TAA mitigation is turned off on the cmdline (tsx_async_abort=off) */ 306 + if (taa_mitigation == TAA_MITIGATION_OFF) 307 + goto out; 308 + 309 + if (boot_cpu_has(X86_FEATURE_MD_CLEAR)) 310 + taa_mitigation = TAA_MITIGATION_VERW; 311 + else 312 + taa_mitigation = TAA_MITIGATION_UCODE_NEEDED; 313 + 314 + /* 315 + * VERW doesn't clear the CPU buffers when MD_CLEAR=1 and MDS_NO=1. 316 + * A microcode update fixes this behavior to clear CPU buffers. It also 317 + * adds support for MSR_IA32_TSX_CTRL which is enumerated by the 318 + * ARCH_CAP_TSX_CTRL_MSR bit. 319 + * 320 + * On MDS_NO=1 CPUs if ARCH_CAP_TSX_CTRL_MSR is not set, microcode 321 + * update is required. 322 + */ 323 + ia32_cap = x86_read_arch_cap_msr(); 324 + if ( (ia32_cap & ARCH_CAP_MDS_NO) && 325 + !(ia32_cap & ARCH_CAP_TSX_CTRL_MSR)) 326 + taa_mitigation = TAA_MITIGATION_UCODE_NEEDED; 327 + 328 + /* 329 + * TSX is enabled, select alternate mitigation for TAA which is 330 + * the same as MDS. Enable MDS static branch to clear CPU buffers. 331 + * 332 + * For guests that can't determine whether the correct microcode is 333 + * present on host, enable the mitigation for UCODE_NEEDED as well. 334 + */ 335 + static_branch_enable(&mds_user_clear); 336 + 337 + if (taa_nosmt || cpu_mitigations_auto_nosmt()) 338 + cpu_smt_disable(false); 339 + 340 + out: 341 + pr_info("%s\n", taa_strings[taa_mitigation]); 342 + } 343 + 344 + static int __init tsx_async_abort_parse_cmdline(char *str) 345 + { 346 + if (!boot_cpu_has_bug(X86_BUG_TAA)) 347 + return 0; 348 + 349 + if (!str) 350 + return -EINVAL; 351 + 352 + if (!strcmp(str, "off")) { 353 + taa_mitigation = TAA_MITIGATION_OFF; 354 + } else if (!strcmp(str, "full")) { 355 + taa_mitigation = TAA_MITIGATION_VERW; 356 + } else if (!strcmp(str, "full,nosmt")) { 357 + taa_mitigation = TAA_MITIGATION_VERW; 358 + taa_nosmt = true; 359 + } 360 + 361 + return 0; 362 + } 363 + early_param("tsx_async_abort", tsx_async_abort_parse_cmdline); 272 364 273 365 #undef pr_fmt 274 366 #define pr_fmt(fmt) "Spectre V1 : " fmt ··· 882 786 } 883 787 884 788 #define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n" 789 + #define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n" 885 790 886 791 void cpu_bugs_smt_update(void) 887 792 { 888 - /* Enhanced IBRS implies STIBP. No update required. */ 889 - if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) 890 - return; 891 - 892 793 mutex_lock(&spec_ctrl_mutex); 893 794 894 795 switch (spectre_v2_user) { ··· 909 816 update_mds_branch_idle(); 910 817 break; 911 818 case MDS_MITIGATION_OFF: 819 + break; 820 + } 821 + 822 + switch (taa_mitigation) { 823 + case TAA_MITIGATION_VERW: 824 + case TAA_MITIGATION_UCODE_NEEDED: 825 + if (sched_smt_active()) 826 + pr_warn_once(TAA_MSG_SMT); 827 + break; 828 + case TAA_MITIGATION_TSX_DISABLED: 829 + case TAA_MITIGATION_OFF: 912 830 break; 913 831 } 914 832 ··· 1253 1149 x86_amd_ssb_disable(); 1254 1150 } 1255 1151 1152 + bool itlb_multihit_kvm_mitigation; 1153 + EXPORT_SYMBOL_GPL(itlb_multihit_kvm_mitigation); 1154 + 1256 1155 #undef pr_fmt 1257 1156 #define pr_fmt(fmt) "L1TF: " fmt 1258 1157 ··· 1411 1304 l1tf_vmx_states[l1tf_vmx_mitigation], 1412 1305 sched_smt_active() ? "vulnerable" : "disabled"); 1413 1306 } 1307 + 1308 + static ssize_t itlb_multihit_show_state(char *buf) 1309 + { 1310 + if (itlb_multihit_kvm_mitigation) 1311 + return sprintf(buf, "KVM: Mitigation: Split huge pages\n"); 1312 + else 1313 + return sprintf(buf, "KVM: Vulnerable\n"); 1314 + } 1414 1315 #else 1415 1316 static ssize_t l1tf_show_state(char *buf) 1416 1317 { 1417 1318 return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG); 1319 + } 1320 + 1321 + static ssize_t itlb_multihit_show_state(char *buf) 1322 + { 1323 + return sprintf(buf, "Processor vulnerable\n"); 1418 1324 } 1419 1325 #endif 1420 1326 ··· 1445 1325 } 1446 1326 1447 1327 return sprintf(buf, "%s; SMT %s\n", mds_strings[mds_mitigation], 1328 + sched_smt_active() ? "vulnerable" : "disabled"); 1329 + } 1330 + 1331 + static ssize_t tsx_async_abort_show_state(char *buf) 1332 + { 1333 + if ((taa_mitigation == TAA_MITIGATION_TSX_DISABLED) || 1334 + (taa_mitigation == TAA_MITIGATION_OFF)) 1335 + return sprintf(buf, "%s\n", taa_strings[taa_mitigation]); 1336 + 1337 + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) { 1338 + return sprintf(buf, "%s; SMT Host state unknown\n", 1339 + taa_strings[taa_mitigation]); 1340 + } 1341 + 1342 + return sprintf(buf, "%s; SMT %s\n", taa_strings[taa_mitigation], 1448 1343 sched_smt_active() ? "vulnerable" : "disabled"); 1449 1344 } 1450 1345 ··· 1533 1398 case X86_BUG_MDS: 1534 1399 return mds_show_state(buf); 1535 1400 1401 + case X86_BUG_TAA: 1402 + return tsx_async_abort_show_state(buf); 1403 + 1404 + case X86_BUG_ITLB_MULTIHIT: 1405 + return itlb_multihit_show_state(buf); 1406 + 1536 1407 default: 1537 1408 break; 1538 1409 } ··· 1574 1433 ssize_t cpu_show_mds(struct device *dev, struct device_attribute *attr, char *buf) 1575 1434 { 1576 1435 return cpu_show_common(dev, attr, buf, X86_BUG_MDS); 1436 + } 1437 + 1438 + ssize_t cpu_show_tsx_async_abort(struct device *dev, struct device_attribute *attr, char *buf) 1439 + { 1440 + return cpu_show_common(dev, attr, buf, X86_BUG_TAA); 1441 + } 1442 + 1443 + ssize_t cpu_show_itlb_multihit(struct device *dev, struct device_attribute *attr, char *buf) 1444 + { 1445 + return cpu_show_common(dev, attr, buf, X86_BUG_ITLB_MULTIHIT); 1577 1446 } 1578 1447 #endif

+64 -33

arch/x86/kernel/cpu/common.c

··· 1016 1016 #endif 1017 1017 } 1018 1018 1019 - #define NO_SPECULATION BIT(0) 1020 - #define NO_MELTDOWN BIT(1) 1021 - #define NO_SSB BIT(2) 1022 - #define NO_L1TF BIT(3) 1023 - #define NO_MDS BIT(4) 1024 - #define MSBDS_ONLY BIT(5) 1025 - #define NO_SWAPGS BIT(6) 1019 + #define NO_SPECULATION BIT(0) 1020 + #define NO_MELTDOWN BIT(1) 1021 + #define NO_SSB BIT(2) 1022 + #define NO_L1TF BIT(3) 1023 + #define NO_MDS BIT(4) 1024 + #define MSBDS_ONLY BIT(5) 1025 + #define NO_SWAPGS BIT(6) 1026 + #define NO_ITLB_MULTIHIT BIT(7) 1026 1027 1027 1028 #define VULNWL(_vendor, _family, _model, _whitelist) \ 1028 1029 { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist } ··· 1044 1043 VULNWL(NSC, 5, X86_MODEL_ANY, NO_SPECULATION), 1045 1044 1046 1045 /* Intel Family 6 */ 1047 - VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION), 1048 - VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION), 1049 - VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION), 1050 - VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION), 1051 - VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION), 1046 + VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION | NO_ITLB_MULTIHIT), 1047 + VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION | NO_ITLB_MULTIHIT), 1048 + VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT), 1049 + VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION | NO_ITLB_MULTIHIT), 1050 + VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT), 1052 1051 1053 - VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), 1054 - VULNWL_INTEL(ATOM_SILVERMONT_D, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), 1055 - VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), 1056 - VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), 1057 - VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), 1058 - VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), 1052 + VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), 1053 + VULNWL_INTEL(ATOM_SILVERMONT_D, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), 1054 + VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), 1055 + VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), 1056 + VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), 1057 + VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), 1059 1058 1060 1059 VULNWL_INTEL(CORE_YONAH, NO_SSB), 1061 1060 1062 - VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS), 1063 - VULNWL_INTEL(ATOM_AIRMONT_NP, NO_L1TF | NO_SWAPGS), 1061 + VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), 1062 + VULNWL_INTEL(ATOM_AIRMONT_NP, NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), 1064 1063 1065 - VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS), 1066 - VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS), 1067 - VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS), 1064 + VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), 1065 + VULNWL_INTEL(ATOM_GOLDMONT_D, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), 1066 + VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), 1068 1067 1069 1068 /* 1070 1069 * Technically, swapgs isn't serializing on AMD (despite it previously ··· 1074 1073 * good enough for our purposes. 1075 1074 */ 1076 1075 1076 + VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT), 1077 + 1077 1078 /* AMD Family 0xf - 0x12 */ 1078 - VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS), 1079 - VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS), 1080 - VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS), 1081 - VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS), 1079 + VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), 1080 + VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), 1081 + VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), 1082 + VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), 1082 1083 1083 1084 /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ 1084 - VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS), 1085 - VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS), 1085 + VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), 1086 + VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), 1086 1087 {} 1087 1088 }; 1088 1089 ··· 1095 1092 return m && !!(m->driver_data & which); 1096 1093 } 1097 1094 1098 - static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) 1095 + u64 x86_read_arch_cap_msr(void) 1099 1096 { 1100 1097 u64 ia32_cap = 0; 1098 + 1099 + if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) 1100 + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); 1101 + 1102 + return ia32_cap; 1103 + } 1104 + 1105 + static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) 1106 + { 1107 + u64 ia32_cap = x86_read_arch_cap_msr(); 1108 + 1109 + /* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */ 1110 + if (!cpu_matches(NO_ITLB_MULTIHIT) && !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO)) 1111 + setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT); 1101 1112 1102 1113 if (cpu_matches(NO_SPECULATION)) 1103 1114 return; 1104 1115 1105 1116 setup_force_cpu_bug(X86_BUG_SPECTRE_V1); 1106 1117 setup_force_cpu_bug(X86_BUG_SPECTRE_V2); 1107 - 1108 - if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) 1109 - rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); 1110 1118 1111 1119 if (!cpu_matches(NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) && 1112 1120 !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) ··· 1134 1120 1135 1121 if (!cpu_matches(NO_SWAPGS)) 1136 1122 setup_force_cpu_bug(X86_BUG_SWAPGS); 1123 + 1124 + /* 1125 + * When the CPU is not mitigated for TAA (TAA_NO=0) set TAA bug when: 1126 + * - TSX is supported or 1127 + * - TSX_CTRL is present 1128 + * 1129 + * TSX_CTRL check is needed for cases when TSX could be disabled before 1130 + * the kernel boot e.g. kexec. 1131 + * TSX_CTRL check alone is not sufficient for cases when the microcode 1132 + * update is not present or running as guest that don't get TSX_CTRL. 1133 + */ 1134 + if (!(ia32_cap & ARCH_CAP_TAA_NO) && 1135 + (cpu_has(c, X86_FEATURE_RTM) || 1136 + (ia32_cap & ARCH_CAP_TSX_CTRL_MSR))) 1137 + setup_force_cpu_bug(X86_BUG_TAA); 1137 1138 1138 1139 if (cpu_matches(NO_MELTDOWN)) 1139 1140 return; ··· 1583 1554 #endif 1584 1555 cpu_detect_tlb(&boot_cpu_data); 1585 1556 setup_cr_pinning(); 1557 + 1558 + tsx_init(); 1586 1559 } 1587 1560 1588 1561 void identify_secondary_cpu(struct cpuinfo_x86 *c)

+18

arch/x86/kernel/cpu/cpu.h

··· 44 44 extern const struct cpu_dev *const __x86_cpu_dev_start[], 45 45 *const __x86_cpu_dev_end[]; 46 46 47 + #ifdef CONFIG_CPU_SUP_INTEL 48 + enum tsx_ctrl_states { 49 + TSX_CTRL_ENABLE, 50 + TSX_CTRL_DISABLE, 51 + TSX_CTRL_NOT_SUPPORTED, 52 + }; 53 + 54 + extern __ro_after_init enum tsx_ctrl_states tsx_ctrl_state; 55 + 56 + extern void __init tsx_init(void); 57 + extern void tsx_enable(void); 58 + extern void tsx_disable(void); 59 + #else 60 + static inline void tsx_init(void) { } 61 + #endif /* CONFIG_CPU_SUP_INTEL */ 62 + 47 63 extern void get_cpu_cap(struct cpuinfo_x86 *c); 48 64 extern void get_cpu_address_sizes(struct cpuinfo_x86 *c); 49 65 extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); ··· 77 61 unsigned int aperfmperf_get_khz(int cpu); 78 62 79 63 extern void x86_spec_ctrl_setup_ap(void); 64 + 65 + extern u64 x86_read_arch_cap_msr(void); 80 66 81 67 #endif /* ARCH_X86_CPU_H */

+5

arch/x86/kernel/cpu/intel.c

··· 762 762 detect_tme(c); 763 763 764 764 init_intel_misc_features(c); 765 + 766 + if (tsx_ctrl_state == TSX_CTRL_ENABLE) 767 + tsx_enable(); 768 + if (tsx_ctrl_state == TSX_CTRL_DISABLE) 769 + tsx_disable(); 765 770 } 766 771 767 772 #ifdef CONFIG_X86_32

+4

arch/x86/kernel/cpu/resctrl/ctrlmondata.c

··· 522 522 int ret = 0; 523 523 524 524 rdtgrp = rdtgroup_kn_lock_live(of->kn); 525 + if (!rdtgrp) { 526 + ret = -ENOENT; 527 + goto out; 528 + } 525 529 526 530 md.priv = of->kn->priv; 527 531 resid = md.u.rid;

-4

arch/x86/kernel/cpu/resctrl/rdtgroup.c

··· 461 461 } 462 462 463 463 rdtgrp = rdtgroup_kn_lock_live(of->kn); 464 - rdt_last_cmd_clear(); 465 464 if (!rdtgrp) { 466 465 ret = -ENOENT; 467 - rdt_last_cmd_puts("Directory was removed\n"); 468 466 goto unlock; 469 467 } 470 468 ··· 2646 2648 int ret; 2647 2649 2648 2650 prdtgrp = rdtgroup_kn_lock_live(prgrp_kn); 2649 - rdt_last_cmd_clear(); 2650 2651 if (!prdtgrp) { 2651 2652 ret = -ENODEV; 2652 - rdt_last_cmd_puts("Directory was removed\n"); 2653 2653 goto out_unlock; 2654 2654 } 2655 2655

+140

arch/x86/kernel/cpu/tsx.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Intel Transactional Synchronization Extensions (TSX) control. 4 + * 5 + * Copyright (C) 2019 Intel Corporation 6 + * 7 + * Author: 8 + * Pawan Gupta <pawan.kumar.gupta@linux.intel.com> 9 + */ 10 + 11 + #include <linux/cpufeature.h> 12 + 13 + #include <asm/cmdline.h> 14 + 15 + #include "cpu.h" 16 + 17 + enum tsx_ctrl_states tsx_ctrl_state __ro_after_init = TSX_CTRL_NOT_SUPPORTED; 18 + 19 + void tsx_disable(void) 20 + { 21 + u64 tsx; 22 + 23 + rdmsrl(MSR_IA32_TSX_CTRL, tsx); 24 + 25 + /* Force all transactions to immediately abort */ 26 + tsx |= TSX_CTRL_RTM_DISABLE; 27 + 28 + /* 29 + * Ensure TSX support is not enumerated in CPUID. 30 + * This is visible to userspace and will ensure they 31 + * do not waste resources trying TSX transactions that 32 + * will always abort. 33 + */ 34 + tsx |= TSX_CTRL_CPUID_CLEAR; 35 + 36 + wrmsrl(MSR_IA32_TSX_CTRL, tsx); 37 + } 38 + 39 + void tsx_enable(void) 40 + { 41 + u64 tsx; 42 + 43 + rdmsrl(MSR_IA32_TSX_CTRL, tsx); 44 + 45 + /* Enable the RTM feature in the cpu */ 46 + tsx &= ~TSX_CTRL_RTM_DISABLE; 47 + 48 + /* 49 + * Ensure TSX support is enumerated in CPUID. 50 + * This is visible to userspace and will ensure they 51 + * can enumerate and use the TSX feature. 52 + */ 53 + tsx &= ~TSX_CTRL_CPUID_CLEAR; 54 + 55 + wrmsrl(MSR_IA32_TSX_CTRL, tsx); 56 + } 57 + 58 + static bool __init tsx_ctrl_is_supported(void) 59 + { 60 + u64 ia32_cap = x86_read_arch_cap_msr(); 61 + 62 + /* 63 + * TSX is controlled via MSR_IA32_TSX_CTRL. However, support for this 64 + * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES. 65 + * 66 + * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a 67 + * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES 68 + * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get 69 + * MSR_IA32_TSX_CTRL support even after a microcode update. Thus, 70 + * tsx= cmdline requests will do nothing on CPUs without 71 + * MSR_IA32_TSX_CTRL support. 72 + */ 73 + return !!(ia32_cap & ARCH_CAP_TSX_CTRL_MSR); 74 + } 75 + 76 + static enum tsx_ctrl_states x86_get_tsx_auto_mode(void) 77 + { 78 + if (boot_cpu_has_bug(X86_BUG_TAA)) 79 + return TSX_CTRL_DISABLE; 80 + 81 + return TSX_CTRL_ENABLE; 82 + } 83 + 84 + void __init tsx_init(void) 85 + { 86 + char arg[5] = {}; 87 + int ret; 88 + 89 + if (!tsx_ctrl_is_supported()) 90 + return; 91 + 92 + ret = cmdline_find_option(boot_command_line, "tsx", arg, sizeof(arg)); 93 + if (ret >= 0) { 94 + if (!strcmp(arg, "on")) { 95 + tsx_ctrl_state = TSX_CTRL_ENABLE; 96 + } else if (!strcmp(arg, "off")) { 97 + tsx_ctrl_state = TSX_CTRL_DISABLE; 98 + } else if (!strcmp(arg, "auto")) { 99 + tsx_ctrl_state = x86_get_tsx_auto_mode(); 100 + } else { 101 + tsx_ctrl_state = TSX_CTRL_DISABLE; 102 + pr_err("tsx: invalid option, defaulting to off\n"); 103 + } 104 + } else { 105 + /* tsx= not provided */ 106 + if (IS_ENABLED(CONFIG_X86_INTEL_TSX_MODE_AUTO)) 107 + tsx_ctrl_state = x86_get_tsx_auto_mode(); 108 + else if (IS_ENABLED(CONFIG_X86_INTEL_TSX_MODE_OFF)) 109 + tsx_ctrl_state = TSX_CTRL_DISABLE; 110 + else 111 + tsx_ctrl_state = TSX_CTRL_ENABLE; 112 + } 113 + 114 + if (tsx_ctrl_state == TSX_CTRL_DISABLE) { 115 + tsx_disable(); 116 + 117 + /* 118 + * tsx_disable() will change the state of the 119 + * RTM CPUID bit. Clear it here since it is now 120 + * expected to be not set. 121 + */ 122 + setup_clear_cpu_cap(X86_FEATURE_RTM); 123 + } else if (tsx_ctrl_state == TSX_CTRL_ENABLE) { 124 + 125 + /* 126 + * HW defaults TSX to be enabled at bootup. 127 + * We may still need the TSX enable support 128 + * during init for special cases like 129 + * kexec after TSX is disabled. 130 + */ 131 + tsx_enable(); 132 + 133 + /* 134 + * tsx_enable() will change the state of the 135 + * RTM CPUID bit. Force it here since it is now 136 + * expected to be set. 137 + */ 138 + setup_force_cpu_cap(X86_FEATURE_RTM); 139 + } 140 + }

+7

arch/x86/kernel/dumpstack_64.c

··· 94 94 BUILD_BUG_ON(N_EXCEPTION_STACKS != 6); 95 95 96 96 begin = (unsigned long)__this_cpu_read(cea_exception_stacks); 97 + /* 98 + * Handle the case where stack trace is collected _before_ 99 + * cea_exception_stacks had been initialized. 100 + */ 101 + if (!begin) 102 + return false; 103 + 97 104 end = begin + sizeof(struct cea_exception_stacks); 98 105 /* Bail if @stack is outside the exception stack area. */ 99 106 if (stk < begin || stk >= end)

+2

arch/x86/kernel/early-quirks.c

··· 710 710 */ 711 711 { PCI_VENDOR_ID_INTEL, 0x0f00, 712 712 PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet}, 713 + { PCI_VENDOR_ID_INTEL, 0x3ec4, 714 + PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet}, 713 715 { PCI_VENDOR_ID_BROADCOM, 0x4331, 714 716 PCI_CLASS_NETWORK_OTHER, PCI_ANY_ID, 0, apple_airport_reset}, 715 717 {}

+3

arch/x86/kernel/tsc.c

··· 1505 1505 return; 1506 1506 } 1507 1507 1508 + if (tsc_clocksource_reliable || no_tsc_watchdog) 1509 + clocksource_tsc_early.flags &= ~CLOCK_SOURCE_MUST_VERIFY; 1510 + 1508 1511 clocksource_register_khz(&clocksource_tsc_early, tsc_khz); 1509 1512 detect_art(); 1510 1513 }

+272 -10

arch/x86/kvm/mmu.c

··· 37 37 #include <linux/uaccess.h> 38 38 #include <linux/hash.h> 39 39 #include <linux/kern_levels.h> 40 + #include <linux/kthread.h> 40 41 41 42 #include <asm/page.h> 42 43 #include <asm/pat.h> ··· 47 46 #include <asm/vmx.h> 48 47 #include <asm/kvm_page_track.h> 49 48 #include "trace.h" 49 + 50 + extern bool itlb_multihit_kvm_mitigation; 51 + 52 + static int __read_mostly nx_huge_pages = -1; 53 + #ifdef CONFIG_PREEMPT_RT 54 + /* Recovery can cause latency spikes, disable it for PREEMPT_RT. */ 55 + static uint __read_mostly nx_huge_pages_recovery_ratio = 0; 56 + #else 57 + static uint __read_mostly nx_huge_pages_recovery_ratio = 60; 58 + #endif 59 + 60 + static int set_nx_huge_pages(const char *val, const struct kernel_param *kp); 61 + static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp); 62 + 63 + static struct kernel_param_ops nx_huge_pages_ops = { 64 + .set = set_nx_huge_pages, 65 + .get = param_get_bool, 66 + }; 67 + 68 + static struct kernel_param_ops nx_huge_pages_recovery_ratio_ops = { 69 + .set = set_nx_huge_pages_recovery_ratio, 70 + .get = param_get_uint, 71 + }; 72 + 73 + module_param_cb(nx_huge_pages, &nx_huge_pages_ops, &nx_huge_pages, 0644); 74 + __MODULE_PARM_TYPE(nx_huge_pages, "bool"); 75 + module_param_cb(nx_huge_pages_recovery_ratio, &nx_huge_pages_recovery_ratio_ops, 76 + &nx_huge_pages_recovery_ratio, 0644); 77 + __MODULE_PARM_TYPE(nx_huge_pages_recovery_ratio, "uint"); 50 78 51 79 /* 52 80 * When setting this variable to true it enables Two-Dimensional-Paging ··· 380 350 { 381 351 MMU_WARN_ON(is_mmio_spte(spte)); 382 352 return (spte & SPTE_SPECIAL_MASK) != SPTE_AD_ENABLED_MASK; 353 + } 354 + 355 + static bool is_nx_huge_page_enabled(void) 356 + { 357 + return READ_ONCE(nx_huge_pages); 383 358 } 384 359 385 360 static inline u64 spte_shadow_accessed_mask(u64 spte) ··· 1225 1190 kvm_mmu_gfn_disallow_lpage(slot, gfn); 1226 1191 } 1227 1192 1193 + static void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp) 1194 + { 1195 + if (sp->lpage_disallowed) 1196 + return; 1197 + 1198 + ++kvm->stat.nx_lpage_splits; 1199 + list_add_tail(&sp->lpage_disallowed_link, 1200 + &kvm->arch.lpage_disallowed_mmu_pages); 1201 + sp->lpage_disallowed = true; 1202 + } 1203 + 1228 1204 static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) 1229 1205 { 1230 1206 struct kvm_memslots *slots; ··· 1251 1205 KVM_PAGE_TRACK_WRITE); 1252 1206 1253 1207 kvm_mmu_gfn_allow_lpage(slot, gfn); 1208 + } 1209 + 1210 + static void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp) 1211 + { 1212 + --kvm->stat.nx_lpage_splits; 1213 + sp->lpage_disallowed = false; 1214 + list_del(&sp->lpage_disallowed_link); 1254 1215 } 1255 1216 1256 1217 static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level, ··· 2845 2792 kvm_reload_remote_mmus(kvm); 2846 2793 } 2847 2794 2795 + if (sp->lpage_disallowed) 2796 + unaccount_huge_nx_page(kvm, sp); 2797 + 2848 2798 sp->role.invalid = 1; 2849 2799 return list_unstable; 2850 2800 } ··· 3069 3013 if (!speculative) 3070 3014 spte |= spte_shadow_accessed_mask(spte); 3071 3015 3016 + if (level > PT_PAGE_TABLE_LEVEL && (pte_access & ACC_EXEC_MASK) && 3017 + is_nx_huge_page_enabled()) { 3018 + pte_access &= ~ACC_EXEC_MASK; 3019 + } 3020 + 3072 3021 if (pte_access & ACC_EXEC_MASK) 3073 3022 spte |= shadow_x_mask; 3074 3023 else ··· 3294 3233 __direct_pte_prefetch(vcpu, sp, sptep); 3295 3234 } 3296 3235 3236 + static void disallowed_hugepage_adjust(struct kvm_shadow_walk_iterator it, 3237 + gfn_t gfn, kvm_pfn_t *pfnp, int *levelp) 3238 + { 3239 + int level = *levelp; 3240 + u64 spte = *it.sptep; 3241 + 3242 + if (it.level == level && level > PT_PAGE_TABLE_LEVEL && 3243 + is_nx_huge_page_enabled() && 3244 + is_shadow_present_pte(spte) && 3245 + !is_large_pte(spte)) { 3246 + /* 3247 + * A small SPTE exists for this pfn, but FNAME(fetch) 3248 + * and __direct_map would like to create a large PTE 3249 + * instead: just force them to go down another level, 3250 + * patching back for them into pfn the next 9 bits of 3251 + * the address. 3252 + */ 3253 + u64 page_mask = KVM_PAGES_PER_HPAGE(level) - KVM_PAGES_PER_HPAGE(level - 1); 3254 + *pfnp |= gfn & page_mask; 3255 + (*levelp)--; 3256 + } 3257 + } 3258 + 3297 3259 static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write, 3298 3260 int map_writable, int level, kvm_pfn_t pfn, 3299 - bool prefault) 3261 + bool prefault, bool lpage_disallowed) 3300 3262 { 3301 3263 struct kvm_shadow_walk_iterator it; 3302 3264 struct kvm_mmu_page *sp; ··· 3332 3248 3333 3249 trace_kvm_mmu_spte_requested(gpa, level, pfn); 3334 3250 for_each_shadow_entry(vcpu, gpa, it) { 3251 + /* 3252 + * We cannot overwrite existing page tables with an NX 3253 + * large page, as the leaf could be executable. 3254 + */ 3255 + disallowed_hugepage_adjust(it, gfn, &pfn, &level); 3256 + 3335 3257 base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); 3336 3258 if (it.level == level) 3337 3259 break; ··· 3348 3258 it.level - 1, true, ACC_ALL); 3349 3259 3350 3260 link_shadow_page(vcpu, it.sptep, sp); 3261 + if (lpage_disallowed) 3262 + account_huge_nx_page(vcpu->kvm, sp); 3351 3263 } 3352 3264 } 3353 3265 ··· 3398 3306 * here. 3399 3307 */ 3400 3308 if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) && 3401 - level == PT_PAGE_TABLE_LEVEL && 3309 + !kvm_is_zone_device_pfn(pfn) && level == PT_PAGE_TABLE_LEVEL && 3402 3310 PageTransCompoundMap(pfn_to_page(pfn)) && 3403 3311 !mmu_gfn_lpage_is_disallowed(vcpu, gfn, PT_DIRECTORY_LEVEL)) { 3404 3312 unsigned long mask; ··· 3642 3550 { 3643 3551 int r; 3644 3552 int level; 3645 - bool force_pt_level = false; 3553 + bool force_pt_level; 3646 3554 kvm_pfn_t pfn; 3647 3555 unsigned long mmu_seq; 3648 3556 bool map_writable, write = error_code & PFERR_WRITE_MASK; 3557 + bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) && 3558 + is_nx_huge_page_enabled(); 3649 3559 3560 + force_pt_level = lpage_disallowed; 3650 3561 level = mapping_level(vcpu, gfn, &force_pt_level); 3651 3562 if (likely(!force_pt_level)) { 3652 3563 /* ··· 3683 3588 goto out_unlock; 3684 3589 if (likely(!force_pt_level)) 3685 3590 transparent_hugepage_adjust(vcpu, gfn, &pfn, &level); 3686 - r = __direct_map(vcpu, v, write, map_writable, level, pfn, prefault); 3591 + r = __direct_map(vcpu, v, write, map_writable, level, pfn, 3592 + prefault, false); 3687 3593 out_unlock: 3688 3594 spin_unlock(&vcpu->kvm->mmu_lock); 3689 3595 kvm_release_pfn_clean(pfn); ··· 4270 4174 unsigned long mmu_seq; 4271 4175 int write = error_code & PFERR_WRITE_MASK; 4272 4176 bool map_writable; 4177 + bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) && 4178 + is_nx_huge_page_enabled(); 4273 4179 4274 4180 MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa)); 4275 4181 ··· 4282 4184 if (r) 4283 4185 return r; 4284 4186 4285 - force_pt_level = !check_hugepage_cache_consistency(vcpu, gfn, 4286 - PT_DIRECTORY_LEVEL); 4187 + force_pt_level = 4188 + lpage_disallowed || 4189 + !check_hugepage_cache_consistency(vcpu, gfn, PT_DIRECTORY_LEVEL); 4287 4190 level = mapping_level(vcpu, gfn, &force_pt_level); 4288 4191 if (likely(!force_pt_level)) { 4289 4192 if (level > PT_DIRECTORY_LEVEL && ··· 4313 4214 goto out_unlock; 4314 4215 if (likely(!force_pt_level)) 4315 4216 transparent_hugepage_adjust(vcpu, gfn, &pfn, &level); 4316 - r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, prefault); 4217 + r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, 4218 + prefault, lpage_disallowed); 4317 4219 out_unlock: 4318 4220 spin_unlock(&vcpu->kvm->mmu_lock); 4319 4221 kvm_release_pfn_clean(pfn); ··· 6014 5914 * the guest, and the guest page table is using 4K page size 6015 5915 * mapping if the indirect sp has level = 1. 6016 5916 */ 6017 - if (sp->role.direct && 6018 - !kvm_is_reserved_pfn(pfn) && 6019 - PageTransCompoundMap(pfn_to_page(pfn))) { 5917 + if (sp->role.direct && !kvm_is_reserved_pfn(pfn) && 5918 + !kvm_is_zone_device_pfn(pfn) && 5919 + PageTransCompoundMap(pfn_to_page(pfn))) { 6020 5920 pte_list_remove(rmap_head, sptep); 6021 5921 6022 5922 if (kvm_available_flush_tlb_with_range()) ··· 6255 6155 kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK); 6256 6156 } 6257 6157 6158 + static bool get_nx_auto_mode(void) 6159 + { 6160 + /* Return true when CPU has the bug, and mitigations are ON */ 6161 + return boot_cpu_has_bug(X86_BUG_ITLB_MULTIHIT) && !cpu_mitigations_off(); 6162 + } 6163 + 6164 + static void __set_nx_huge_pages(bool val) 6165 + { 6166 + nx_huge_pages = itlb_multihit_kvm_mitigation = val; 6167 + } 6168 + 6169 + static int set_nx_huge_pages(const char *val, const struct kernel_param *kp) 6170 + { 6171 + bool old_val = nx_huge_pages; 6172 + bool new_val; 6173 + 6174 + /* In "auto" mode deploy workaround only if CPU has the bug. */ 6175 + if (sysfs_streq(val, "off")) 6176 + new_val = 0; 6177 + else if (sysfs_streq(val, "force")) 6178 + new_val = 1; 6179 + else if (sysfs_streq(val, "auto")) 6180 + new_val = get_nx_auto_mode(); 6181 + else if (strtobool(val, &new_val) < 0) 6182 + return -EINVAL; 6183 + 6184 + __set_nx_huge_pages(new_val); 6185 + 6186 + if (new_val != old_val) { 6187 + struct kvm *kvm; 6188 + 6189 + mutex_lock(&kvm_lock); 6190 + 6191 + list_for_each_entry(kvm, &vm_list, vm_list) { 6192 + mutex_lock(&kvm->slots_lock); 6193 + kvm_mmu_zap_all_fast(kvm); 6194 + mutex_unlock(&kvm->slots_lock); 6195 + 6196 + wake_up_process(kvm->arch.nx_lpage_recovery_thread); 6197 + } 6198 + mutex_unlock(&kvm_lock); 6199 + } 6200 + 6201 + return 0; 6202 + } 6203 + 6258 6204 int kvm_mmu_module_init(void) 6259 6205 { 6260 6206 int ret = -ENOMEM; 6207 + 6208 + if (nx_huge_pages == -1) 6209 + __set_nx_huge_pages(get_nx_auto_mode()); 6261 6210 6262 6211 /* 6263 6212 * MMU roles use union aliasing which is, generally speaking, an ··· 6386 6237 percpu_counter_destroy(&kvm_total_used_mmu_pages); 6387 6238 unregister_shrinker(&mmu_shrinker); 6388 6239 mmu_audit_disable(); 6240 + } 6241 + 6242 + static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp) 6243 + { 6244 + unsigned int old_val; 6245 + int err; 6246 + 6247 + old_val = nx_huge_pages_recovery_ratio; 6248 + err = param_set_uint(val, kp); 6249 + if (err) 6250 + return err; 6251 + 6252 + if (READ_ONCE(nx_huge_pages) && 6253 + !old_val && nx_huge_pages_recovery_ratio) { 6254 + struct kvm *kvm; 6255 + 6256 + mutex_lock(&kvm_lock); 6257 + 6258 + list_for_each_entry(kvm, &vm_list, vm_list) 6259 + wake_up_process(kvm->arch.nx_lpage_recovery_thread); 6260 + 6261 + mutex_unlock(&kvm_lock); 6262 + } 6263 + 6264 + return err; 6265 + } 6266 + 6267 + static void kvm_recover_nx_lpages(struct kvm *kvm) 6268 + { 6269 + int rcu_idx; 6270 + struct kvm_mmu_page *sp; 6271 + unsigned int ratio; 6272 + LIST_HEAD(invalid_list); 6273 + ulong to_zap; 6274 + 6275 + rcu_idx = srcu_read_lock(&kvm->srcu); 6276 + spin_lock(&kvm->mmu_lock); 6277 + 6278 + ratio = READ_ONCE(nx_huge_pages_recovery_ratio); 6279 + to_zap = ratio ? DIV_ROUND_UP(kvm->stat.nx_lpage_splits, ratio) : 0; 6280 + while (to_zap && !list_empty(&kvm->arch.lpage_disallowed_mmu_pages)) { 6281 + /* 6282 + * We use a separate list instead of just using active_mmu_pages 6283 + * because the number of lpage_disallowed pages is expected to 6284 + * be relatively small compared to the total. 6285 + */ 6286 + sp = list_first_entry(&kvm->arch.lpage_disallowed_mmu_pages, 6287 + struct kvm_mmu_page, 6288 + lpage_disallowed_link); 6289 + WARN_ON_ONCE(!sp->lpage_disallowed); 6290 + kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); 6291 + WARN_ON_ONCE(sp->lpage_disallowed); 6292 + 6293 + if (!--to_zap || need_resched() || spin_needbreak(&kvm->mmu_lock)) { 6294 + kvm_mmu_commit_zap_page(kvm, &invalid_list); 6295 + if (to_zap) 6296 + cond_resched_lock(&kvm->mmu_lock); 6297 + } 6298 + } 6299 + 6300 + spin_unlock(&kvm->mmu_lock); 6301 + srcu_read_unlock(&kvm->srcu, rcu_idx); 6302 + } 6303 + 6304 + static long get_nx_lpage_recovery_timeout(u64 start_time) 6305 + { 6306 + return READ_ONCE(nx_huge_pages) && READ_ONCE(nx_huge_pages_recovery_ratio) 6307 + ? start_time + 60 * HZ - get_jiffies_64() 6308 + : MAX_SCHEDULE_TIMEOUT; 6309 + } 6310 + 6311 + static int kvm_nx_lpage_recovery_worker(struct kvm *kvm, uintptr_t data) 6312 + { 6313 + u64 start_time; 6314 + long remaining_time; 6315 + 6316 + while (true) { 6317 + start_time = get_jiffies_64(); 6318 + remaining_time = get_nx_lpage_recovery_timeout(start_time); 6319 + 6320 + set_current_state(TASK_INTERRUPTIBLE); 6321 + while (!kthread_should_stop() && remaining_time > 0) { 6322 + schedule_timeout(remaining_time); 6323 + remaining_time = get_nx_lpage_recovery_timeout(start_time); 6324 + set_current_state(TASK_INTERRUPTIBLE); 6325 + } 6326 + 6327 + set_current_state(TASK_RUNNING); 6328 + 6329 + if (kthread_should_stop()) 6330 + return 0; 6331 + 6332 + kvm_recover_nx_lpages(kvm); 6333 + } 6334 + } 6335 + 6336 + int kvm_mmu_post_init_vm(struct kvm *kvm) 6337 + { 6338 + int err; 6339 + 6340 + err = kvm_vm_create_worker_thread(kvm, kvm_nx_lpage_recovery_worker, 0, 6341 + "kvm-nx-lpage-recovery", 6342 + &kvm->arch.nx_lpage_recovery_thread); 6343 + if (!err) 6344 + kthread_unpark(kvm->arch.nx_lpage_recovery_thread); 6345 + 6346 + return err; 6347 + } 6348 + 6349 + void kvm_mmu_pre_destroy_vm(struct kvm *kvm) 6350 + { 6351 + if (kvm->arch.nx_lpage_recovery_thread) 6352 + kthread_stop(kvm->arch.nx_lpage_recovery_thread); 6389 6353 }

+4

arch/x86/kvm/mmu.h

··· 210 210 bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, 211 211 struct kvm_memory_slot *slot, u64 gfn); 212 212 int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu); 213 + 214 + int kvm_mmu_post_init_vm(struct kvm *kvm); 215 + void kvm_mmu_pre_destroy_vm(struct kvm *kvm); 216 + 213 217 #endif

+23 -6

arch/x86/kvm/paging_tmpl.h

··· 614 614 static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, 615 615 struct guest_walker *gw, 616 616 int write_fault, int hlevel, 617 - kvm_pfn_t pfn, bool map_writable, bool prefault) 617 + kvm_pfn_t pfn, bool map_writable, bool prefault, 618 + bool lpage_disallowed) 618 619 { 619 620 struct kvm_mmu_page *sp = NULL; 620 621 struct kvm_shadow_walk_iterator it; 621 622 unsigned direct_access, access = gw->pt_access; 622 623 int top_level, ret; 623 - gfn_t base_gfn; 624 + gfn_t gfn, base_gfn; 624 625 625 626 direct_access = gw->pte_access; 626 627 ··· 666 665 link_shadow_page(vcpu, it.sptep, sp); 667 666 } 668 667 669 - base_gfn = gw->gfn; 668 + /* 669 + * FNAME(page_fault) might have clobbered the bottom bits of 670 + * gw->gfn, restore them from the virtual address. 671 + */ 672 + gfn = gw->gfn | ((addr & PT_LVL_OFFSET_MASK(gw->level)) >> PAGE_SHIFT); 673 + base_gfn = gfn; 670 674 671 675 trace_kvm_mmu_spte_requested(addr, gw->level, pfn); 672 676 673 677 for (; shadow_walk_okay(&it); shadow_walk_next(&it)) { 674 678 clear_sp_write_flooding_count(it.sptep); 675 - base_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); 679 + 680 + /* 681 + * We cannot overwrite existing page tables with an NX 682 + * large page, as the leaf could be executable. 683 + */ 684 + disallowed_hugepage_adjust(it, gfn, &pfn, &hlevel); 685 + 686 + base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); 676 687 if (it.level == hlevel) 677 688 break; 678 689 ··· 696 683 sp = kvm_mmu_get_page(vcpu, base_gfn, addr, 697 684 it.level - 1, true, direct_access); 698 685 link_shadow_page(vcpu, it.sptep, sp); 686 + if (lpage_disallowed) 687 + account_huge_nx_page(vcpu->kvm, sp); 699 688 } 700 689 } 701 690 ··· 774 759 int r; 775 760 kvm_pfn_t pfn; 776 761 int level = PT_PAGE_TABLE_LEVEL; 777 - bool force_pt_level = false; 778 762 unsigned long mmu_seq; 779 763 bool map_writable, is_self_change_mapping; 764 + bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) && 765 + is_nx_huge_page_enabled(); 766 + bool force_pt_level = lpage_disallowed; 780 767 781 768 pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); 782 769 ··· 868 851 if (!force_pt_level) 869 852 transparent_hugepage_adjust(vcpu, walker.gfn, &pfn, &level); 870 853 r = FNAME(fetch)(vcpu, addr, &walker, write_fault, 871 - level, pfn, map_writable, prefault); 854 + level, pfn, map_writable, prefault, lpage_disallowed); 872 855 kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); 873 856 874 857 out_unlock:

+20 -3

arch/x86/kvm/vmx/vmx.c

··· 1268 1268 if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu) 1269 1269 return; 1270 1270 1271 + /* 1272 + * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change 1273 + * PI.NDST: pi_post_block is the one expected to change PID.NDST and the 1274 + * wakeup handler expects the vCPU to be on the blocked_vcpu_list that 1275 + * matches PI.NDST. Otherwise, a vcpu may not be able to be woken up 1276 + * correctly. 1277 + */ 1278 + if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR || vcpu->cpu == cpu) { 1279 + pi_clear_sn(pi_desc); 1280 + goto after_clear_sn; 1281 + } 1282 + 1271 1283 /* The full case. */ 1272 1284 do { 1273 1285 old.control = new.control = pi_desc->control; ··· 1295 1283 } while (cmpxchg64(&pi_desc->control, old.control, 1296 1284 new.control) != old.control); 1297 1285 1286 + after_clear_sn: 1287 + 1298 1288 /* 1299 1289 * Clear SN before reading the bitmap. The VT-d firmware 1300 1290 * writes the bitmap and reads SN atomically (5.2.3 in the ··· 1305 1291 */ 1306 1292 smp_mb__after_atomic(); 1307 1293 1308 - if (!bitmap_empty((unsigned long *)pi_desc->pir, NR_VECTORS)) 1294 + if (!pi_is_pir_empty(pi_desc)) 1309 1295 pi_set_on(pi_desc); 1310 1296 } 1311 1297 ··· 6151 6137 if (pi_test_on(&vmx->pi_desc)) { 6152 6138 pi_clear_on(&vmx->pi_desc); 6153 6139 /* 6154 - * IOMMU can write to PIR.ON, so the barrier matters even on UP. 6140 + * IOMMU can write to PID.ON, so the barrier matters even on UP. 6155 6141 * But on x86 this is just a compiler barrier anyway. 6156 6142 */ 6157 6143 smp_mb__after_atomic(); ··· 6181 6167 6182 6168 static bool vmx_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu) 6183 6169 { 6184 - return pi_test_on(vcpu_to_pi_desc(vcpu)); 6170 + struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); 6171 + 6172 + return pi_test_on(pi_desc) || 6173 + (pi_test_sn(pi_desc) && !pi_is_pir_empty(pi_desc)); 6185 6174 } 6186 6175 6187 6176 static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)

+11

arch/x86/kvm/vmx/vmx.h

··· 355 355 return test_and_set_bit(vector, (unsigned long *)pi_desc->pir); 356 356 } 357 357 358 + static inline bool pi_is_pir_empty(struct pi_desc *pi_desc) 359 + { 360 + return bitmap_empty((unsigned long *)pi_desc->pir, NR_VECTORS); 361 + } 362 + 358 363 static inline void pi_set_sn(struct pi_desc *pi_desc) 359 364 { 360 365 set_bit(POSTED_INTR_SN, ··· 375 370 static inline void pi_clear_on(struct pi_desc *pi_desc) 376 371 { 377 372 clear_bit(POSTED_INTR_ON, 373 + (unsigned long *)&pi_desc->control); 374 + } 375 + 376 + static inline void pi_clear_sn(struct pi_desc *pi_desc) 377 + { 378 + clear_bit(POSTED_INTR_SN, 378 379 (unsigned long *)&pi_desc->control); 379 380 } 380 381

+69 -30

arch/x86/kvm/x86.c

··· 213 213 { "mmu_unsync", VM_STAT(mmu_unsync) }, 214 214 { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, 215 215 { "largepages", VM_STAT(lpages, .mode = 0444) }, 216 + { "nx_largepages_splitted", VM_STAT(nx_lpage_splits, .mode = 0444) }, 216 217 { "max_mmu_page_hash_collisions", 217 218 VM_STAT(max_mmu_page_hash_collisions) }, 218 219 { NULL } ··· 1133 1132 * List of msr numbers which we expose to userspace through KVM_GET_MSRS 1134 1133 * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. 1135 1134 * 1136 - * This list is modified at module load time to reflect the 1135 + * The three MSR lists(msrs_to_save, emulated_msrs, msr_based_features) 1136 + * extract the supported MSRs from the related const lists. 1137 + * msrs_to_save is selected from the msrs_to_save_all to reflect the 1137 1138 * capabilities of the host cpu. This capabilities test skips MSRs that are 1138 - * kvm-specific. Those are put in emulated_msrs; filtering of emulated_msrs 1139 + * kvm-specific. Those are put in emulated_msrs_all; filtering of emulated_msrs 1139 1140 * may depend on host virtualization features rather than host cpu features. 1140 1141 */ 1141 1142 1142 - static u32 msrs_to_save[] = { 1143 + static const u32 msrs_to_save_all[] = { 1143 1144 MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, 1144 1145 MSR_STAR, 1145 1146 #ifdef CONFIG_X86_64 ··· 1182 1179 MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17, 1183 1180 }; 1184 1181 1182 + static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_all)]; 1185 1183 static unsigned num_msrs_to_save; 1186 1184 1187 - static u32 emulated_msrs[] = { 1185 + static const u32 emulated_msrs_all[] = { 1188 1186 MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, 1189 1187 MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, 1190 1188 HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, ··· 1224 1220 * by arch/x86/kvm/vmx/nested.c based on CPUID or other MSRs. 1225 1221 * We always support the "true" VMX control MSRs, even if the host 1226 1222 * processor does not, so I am putting these registers here rather 1227 - * than in msrs_to_save. 1223 + * than in msrs_to_save_all. 1228 1224 */ 1229 1225 MSR_IA32_VMX_BASIC, 1230 1226 MSR_IA32_VMX_TRUE_PINBASED_CTLS, ··· 1243 1239 MSR_KVM_POLL_CONTROL, 1244 1240 }; 1245 1241 1242 + static u32 emulated_msrs[ARRAY_SIZE(emulated_msrs_all)]; 1246 1243 static unsigned num_emulated_msrs; 1247 1244 1248 1245 /* 1249 1246 * List of msr numbers which are used to expose MSR-based features that 1250 1247 * can be used by a hypervisor to validate requested CPU features. 1251 1248 */ 1252 - static u32 msr_based_features[] = { 1249 + static const u32 msr_based_features_all[] = { 1253 1250 MSR_IA32_VMX_BASIC, 1254 1251 MSR_IA32_VMX_TRUE_PINBASED_CTLS, 1255 1252 MSR_IA32_VMX_PINBASED_CTLS, ··· 1275 1270 MSR_IA32_ARCH_CAPABILITIES, 1276 1271 }; 1277 1272 1273 + static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)]; 1278 1274 static unsigned int num_msr_based_features; 1279 1275 1280 1276 static u64 kvm_get_arch_capabilities(void) ··· 1284 1278 1285 1279 if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) 1286 1280 rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data); 1281 + 1282 + /* 1283 + * If nx_huge_pages is enabled, KVM's shadow paging will ensure that 1284 + * the nested hypervisor runs with NX huge pages. If it is not, 1285 + * L1 is anyway vulnerable to ITLB_MULTIHIT explots from other 1286 + * L1 guests, so it need not worry about its own (L2) guests. 1287 + */ 1288 + data |= ARCH_CAP_PSCHANGE_MC_NO; 1287 1289 1288 1290 /* 1289 1291 * If we're doing cache flushes (either "always" or "cond") ··· 1311 1297 data |= ARCH_CAP_SSB_NO; 1312 1298 if (!boot_cpu_has_bug(X86_BUG_MDS)) 1313 1299 data |= ARCH_CAP_MDS_NO; 1300 + 1301 + /* 1302 + * On TAA affected systems, export MDS_NO=0 when: 1303 + * - TSX is enabled on the host, i.e. X86_FEATURE_RTM=1. 1304 + * - Updated microcode is present. This is detected by 1305 + * the presence of ARCH_CAP_TSX_CTRL_MSR and ensures 1306 + * that VERW clears CPU buffers. 1307 + * 1308 + * When MDS_NO=0 is exported, guests deploy clear CPU buffer 1309 + * mitigation and don't complain: 1310 + * 1311 + * "Vulnerable: Clear CPU buffers attempted, no microcode" 1312 + * 1313 + * If TSX is disabled on the system, guests are also mitigated against 1314 + * TAA and clear CPU buffer mitigation is not required for guests. 1315 + */ 1316 + if (boot_cpu_has_bug(X86_BUG_TAA) && boot_cpu_has(X86_FEATURE_RTM) && 1317 + (data & ARCH_CAP_TSX_CTRL_MSR)) 1318 + data &= ~ARCH_CAP_MDS_NO; 1314 1319 1315 1320 return data; 1316 1321 } ··· 5123 5090 { 5124 5091 struct x86_pmu_capability x86_pmu; 5125 5092 u32 dummy[2]; 5126 - unsigned i, j; 5093 + unsigned i; 5127 5094 5128 5095 BUILD_BUG_ON_MSG(INTEL_PMC_MAX_FIXED != 4, 5129 - "Please update the fixed PMCs in msrs_to_save[]"); 5096 + "Please update the fixed PMCs in msrs_to_saved_all[]"); 5130 5097 5131 5098 perf_get_x86_pmu_capability(&x86_pmu); 5132 5099 5133 - for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) { 5134 - if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0) 5100 + num_msrs_to_save = 0; 5101 + num_emulated_msrs = 0; 5102 + num_msr_based_features = 0; 5103 + 5104 + for (i = 0; i < ARRAY_SIZE(msrs_to_save_all); i++) { 5105 + if (rdmsr_safe(msrs_to_save_all[i], &dummy[0], &dummy[1]) < 0) 5135 5106 continue; 5136 5107 5137 5108 /* 5138 5109 * Even MSRs that are valid in the host may not be exposed 5139 5110 * to the guests in some cases. 5140 5111 */ 5141 - switch (msrs_to_save[i]) { 5112 + switch (msrs_to_save_all[i]) { 5142 5113 case MSR_IA32_BNDCFGS: 5143 5114 if (!kvm_mpx_supported()) 5144 5115 continue; ··· 5170 5133 break; 5171 5134 case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: { 5172 5135 if (!kvm_x86_ops->pt_supported() || 5173 - msrs_to_save[i] - MSR_IA32_RTIT_ADDR0_A >= 5136 + msrs_to_save_all[i] - MSR_IA32_RTIT_ADDR0_A >= 5174 5137 intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2) 5175 5138 continue; 5176 5139 break; 5177 5140 case MSR_ARCH_PERFMON_PERFCTR0 ... MSR_ARCH_PERFMON_PERFCTR0 + 17: 5178 - if (msrs_to_save[i] - MSR_ARCH_PERFMON_PERFCTR0 >= 5141 + if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_PERFCTR0 >= 5179 5142 min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp)) 5180 5143 continue; 5181 5144 break; 5182 5145 case MSR_ARCH_PERFMON_EVENTSEL0 ... MSR_ARCH_PERFMON_EVENTSEL0 + 17: 5183 - if (msrs_to_save[i] - MSR_ARCH_PERFMON_EVENTSEL0 >= 5146 + if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_EVENTSEL0 >= 5184 5147 min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp)) 5185 5148 continue; 5186 5149 } ··· 5188 5151 break; 5189 5152 } 5190 5153 5191 - if (j < i) 5192 - msrs_to_save[j] = msrs_to_save[i]; 5193 - j++; 5154 + msrs_to_save[num_msrs_to_save++] = msrs_to_save_all[i]; 5194 5155 } 5195 - num_msrs_to_save = j; 5196 5156 5197 - for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) { 5198 - if (!kvm_x86_ops->has_emulated_msr(emulated_msrs[i])) 5157 + for (i = 0; i < ARRAY_SIZE(emulated_msrs_all); i++) { 5158 + if (!kvm_x86_ops->has_emulated_msr(emulated_msrs_all[i])) 5199 5159 continue; 5200 5160 5201 - if (j < i) 5202 - emulated_msrs[j] = emulated_msrs[i]; 5203 - j++; 5161 + emulated_msrs[num_emulated_msrs++] = emulated_msrs_all[i]; 5204 5162 } 5205 - num_emulated_msrs = j; 5206 5163 5207 - for (i = j = 0; i < ARRAY_SIZE(msr_based_features); i++) { 5164 + for (i = 0; i < ARRAY_SIZE(msr_based_features_all); i++) { 5208 5165 struct kvm_msr_entry msr; 5209 5166 5210 - msr.index = msr_based_features[i]; 5167 + msr.index = msr_based_features_all[i]; 5211 5168 if (kvm_get_msr_feature(&msr)) 5212 5169 continue; 5213 5170 5214 - if (j < i) 5215 - msr_based_features[j] = msr_based_features[i]; 5216 - j++; 5171 + msr_based_features[num_msr_based_features++] = msr_based_features_all[i]; 5217 5172 } 5218 - num_msr_based_features = j; 5219 5173 } 5220 5174 5221 5175 static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, ··· 9456 9428 INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list); 9457 9429 INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); 9458 9430 INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); 9431 + INIT_LIST_HEAD(&kvm->arch.lpage_disallowed_mmu_pages); 9459 9432 INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); 9460 9433 atomic_set(&kvm->arch.noncoherent_dma_count, 0); 9461 9434 ··· 9483 9454 kvm_mmu_init_vm(kvm); 9484 9455 9485 9456 return kvm_x86_ops->vm_init(kvm); 9457 + } 9458 + 9459 + int kvm_arch_post_init_vm(struct kvm *kvm) 9460 + { 9461 + return kvm_mmu_post_init_vm(kvm); 9486 9462 } 9487 9463 9488 9464 static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) ··· 9590 9556 return r; 9591 9557 } 9592 9558 EXPORT_SYMBOL_GPL(x86_set_memory_region); 9559 + 9560 + void kvm_arch_pre_destroy_vm(struct kvm *kvm) 9561 + { 9562 + kvm_mmu_pre_destroy_vm(kvm); 9563 + } 9593 9564 9594 9565 void kvm_arch_destroy_vm(struct kvm *kvm) 9595 9566 {

+26 -6

block/bfq-iosched.c

··· 2713 2713 } 2714 2714 } 2715 2715 2716 + 2717 + static 2718 + void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq) 2719 + { 2720 + /* 2721 + * To prevent bfqq's service guarantees from being violated, 2722 + * bfqq may be left busy, i.e., queued for service, even if 2723 + * empty (see comments in __bfq_bfqq_expire() for 2724 + * details). But, if no process will send requests to bfqq any 2725 + * longer, then there is no point in keeping bfqq queued for 2726 + * service. In addition, keeping bfqq queued for service, but 2727 + * with no process ref any longer, may have caused bfqq to be 2728 + * freed when dequeued from service. But this is assumed to 2729 + * never happen. 2730 + */ 2731 + if (bfq_bfqq_busy(bfqq) && RB_EMPTY_ROOT(&bfqq->sort_list) && 2732 + bfqq != bfqd->in_service_queue) 2733 + bfq_del_bfqq_busy(bfqd, bfqq, false); 2734 + 2735 + bfq_put_queue(bfqq); 2736 + } 2737 + 2716 2738 static void 2717 2739 bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, 2718 2740 struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) ··· 2805 2783 */ 2806 2784 new_bfqq->pid = -1; 2807 2785 bfqq->bic = NULL; 2808 - /* release process reference to bfqq */ 2809 - bfq_put_queue(bfqq); 2786 + bfq_release_process_ref(bfqd, bfqq); 2810 2787 } 2811 2788 2812 2789 static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq, ··· 4920 4899 4921 4900 bfq_put_cooperator(bfqq); 4922 4901 4923 - bfq_put_queue(bfqq); /* release process reference */ 4902 + bfq_release_process_ref(bfqd, bfqq); 4924 4903 } 4925 4904 4926 4905 static void bfq_exit_icq_bfqq(struct bfq_io_cq *bic, bool is_sync) ··· 5022 5001 5023 5002 bfqq = bic_to_bfqq(bic, false); 5024 5003 if (bfqq) { 5025 - /* release process reference on this queue */ 5026 - bfq_put_queue(bfqq); 5004 + bfq_release_process_ref(bfqd, bfqq); 5027 5005 bfqq = bfq_get_queue(bfqd, bio, BLK_RW_ASYNC, bic); 5028 5006 bic_set_bfqq(bic, bfqq, false); 5029 5007 } ··· 5983 5963 5984 5964 bfq_put_cooperator(bfqq); 5985 5965 5986 - bfq_put_queue(bfqq); 5966 + bfq_release_process_ref(bfqq->bfqd, bfqq); 5987 5967 return NULL; 5988 5968 } 5989 5969

+1 -1

block/bio.c

··· 751 751 if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED))) 752 752 return false; 753 753 754 - if (bio->bi_vcnt > 0) { 754 + if (bio->bi_vcnt > 0 && !bio_full(bio, len)) { 755 755 struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1]; 756 756 757 757 if (page_is_mergeable(bv, page, len, off, same_page)) {

+6 -2

block/blk-iocost.c

··· 1057 1057 atomic64_set(&iocg->active_period, cur_period); 1058 1058 1059 1059 /* already activated or breaking leaf-only constraint? */ 1060 - for (i = iocg->level; i > 0; i--) 1061 - if (!list_empty(&iocg->active_list)) 1060 + if (!list_empty(&iocg->active_list)) 1061 + goto succeed_unlock; 1062 + for (i = iocg->level - 1; i > 0; i--) 1063 + if (!list_empty(&iocg->ancestors[i]->active_list)) 1062 1064 goto fail_unlock; 1065 + 1063 1066 if (iocg->child_active_sum) 1064 1067 goto fail_unlock; 1065 1068 ··· 1104 1101 ioc_start_period(ioc, now); 1105 1102 } 1106 1103 1104 + succeed_unlock: 1107 1105 spin_unlock_irq(&ioc->lock); 1108 1106 return true; 1109 1107

+17

drivers/base/cpu.c

··· 554 554 return sprintf(buf, "Not affected\n"); 555 555 } 556 556 557 + ssize_t __weak cpu_show_tsx_async_abort(struct device *dev, 558 + struct device_attribute *attr, 559 + char *buf) 560 + { 561 + return sprintf(buf, "Not affected\n"); 562 + } 563 + 564 + ssize_t __weak cpu_show_itlb_multihit(struct device *dev, 565 + struct device_attribute *attr, char *buf) 566 + { 567 + return sprintf(buf, "Not affected\n"); 568 + } 569 + 557 570 static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); 558 571 static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); 559 572 static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); 560 573 static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL); 561 574 static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL); 562 575 static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL); 576 + static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL); 577 + static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL); 563 578 564 579 static struct attribute *cpu_root_vulnerabilities_attrs[] = { 565 580 &dev_attr_meltdown.attr, ··· 583 568 &dev_attr_spec_store_bypass.attr, 584 569 &dev_attr_l1tf.attr, 585 570 &dev_attr_mds.attr, 571 + &dev_attr_tsx_async_abort.attr, 572 + &dev_attr_itlb_multihit.attr, 586 573 NULL 587 574 }; 588 575

+36

drivers/base/memory.c

··· 872 872 } 873 873 return ret; 874 874 } 875 + 876 + struct for_each_memory_block_cb_data { 877 + walk_memory_blocks_func_t func; 878 + void *arg; 879 + }; 880 + 881 + static int for_each_memory_block_cb(struct device *dev, void *data) 882 + { 883 + struct memory_block *mem = to_memory_block(dev); 884 + struct for_each_memory_block_cb_data *cb_data = data; 885 + 886 + return cb_data->func(mem, cb_data->arg); 887 + } 888 + 889 + /** 890 + * for_each_memory_block - walk through all present memory blocks 891 + * 892 + * @arg: argument passed to func 893 + * @func: callback for each memory block walked 894 + * 895 + * This function walks through all present memory blocks, calling func on 896 + * each memory block. 897 + * 898 + * In case func() returns an error, walking is aborted and the error is 899 + * returned. 900 + */ 901 + int for_each_memory_block(void *arg, walk_memory_blocks_func_t func) 902 + { 903 + struct for_each_memory_block_cb_data cb_data = { 904 + .func = func, 905 + .arg = arg, 906 + }; 907 + 908 + return bus_for_each_dev(&memory_subsys, NULL, &cb_data, 909 + for_each_memory_block_cb); 910 + }

+1 -1

drivers/block/rbd.c

··· 2087 2087 struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev; 2088 2088 struct ceph_osd_data *osd_data; 2089 2089 u64 objno; 2090 - u8 state, new_state, current_state; 2090 + u8 state, new_state, uninitialized_var(current_state); 2091 2091 bool has_current_state; 2092 2092 void *p; 2093 2093

+2

drivers/block/rsxx/core.c

··· 1000 1000 1001 1001 cancel_work_sync(&card->event_work); 1002 1002 1003 + destroy_workqueue(card->event_wq); 1003 1004 rsxx_destroy_dev(card); 1004 1005 rsxx_dma_destroy(card); 1006 + destroy_workqueue(card->creg_ctrl.creg_wq); 1005 1007 1006 1008 spin_lock_irqsave(&card->irq_lock, flags); 1007 1009 rsxx_disable_ier_and_isr(card, CR_INTR_ALL);

+1 -4

drivers/char/hw_random/core.c

··· 13 13 #include <linux/delay.h> 14 14 #include <linux/device.h> 15 15 #include <linux/err.h> 16 - #include <linux/freezer.h> 17 16 #include <linux/fs.h> 18 17 #include <linux/hw_random.h> 19 18 #include <linux/kernel.h> ··· 421 422 { 422 423 long rc; 423 424 424 - set_freezable(); 425 - 426 - while (!kthread_freezable_should_stop(NULL)) { 425 + while (!kthread_should_stop()) { 427 426 struct hwrng *rng; 428 427 429 428 rng = get_current_rng();

+1 -3

drivers/char/random.c

··· 327 327 #include <linux/percpu.h> 328 328 #include <linux/cryptohash.h> 329 329 #include <linux/fips.h> 330 - #include <linux/freezer.h> 331 330 #include <linux/ptrace.h> 332 331 #include <linux/workqueue.h> 333 332 #include <linux/irq.h> ··· 2499 2500 * We'll be woken up again once below random_write_wakeup_thresh, 2500 2501 * or when the calling thread is about to terminate. 2501 2502 */ 2502 - wait_event_freezable(random_write_wait, 2503 - kthread_should_stop() || 2503 + wait_event_interruptible(random_write_wait, kthread_should_stop() || 2504 2504 ENTROPY_BITS(&input_pool) <= random_write_wakeup_bits); 2505 2505 mix_pool_bytes(poolp, buffer, count); 2506 2506 credit_entropy_bits(poolp, entropy);

+11 -5

drivers/clocksource/sh_mtu2.c

··· 328 328 return 0; 329 329 } 330 330 331 + static const unsigned int sh_mtu2_channel_offsets[] = { 332 + 0x300, 0x380, 0x000, 333 + }; 334 + 331 335 static int sh_mtu2_setup_channel(struct sh_mtu2_channel *ch, unsigned int index, 332 336 struct sh_mtu2_device *mtu) 333 337 { 334 - static const unsigned int channel_offsets[] = { 335 - 0x300, 0x380, 0x000, 336 - }; 337 338 char name[6]; 338 339 int irq; 339 340 int ret; ··· 357 356 return ret; 358 357 } 359 358 360 - ch->base = mtu->mapbase + channel_offsets[index]; 359 + ch->base = mtu->mapbase + sh_mtu2_channel_offsets[index]; 361 360 ch->index = index; 362 361 363 362 return sh_mtu2_register(ch, dev_name(&mtu->pdev->dev)); ··· 409 408 } 410 409 411 410 /* Allocate and setup the channels. */ 412 - mtu->num_channels = 3; 411 + ret = platform_irq_count(pdev); 412 + if (ret < 0) 413 + goto err_unmap; 414 + 415 + mtu->num_channels = min_t(unsigned int, ret, 416 + ARRAY_SIZE(sh_mtu2_channel_offsets)); 413 417 414 418 mtu->channels = kcalloc(mtu->num_channels, sizeof(*mtu->channels), 415 419 GFP_KERNEL);

+2 -8

drivers/clocksource/timer-mediatek.c

··· 268 268 269 269 ret = timer_of_init(node, &to); 270 270 if (ret) 271 - goto err; 271 + return ret; 272 272 273 273 clockevents_config_and_register(&to.clkevt, timer_of_rate(&to), 274 274 TIMER_SYNC_TICKS, 0xffffffff); 275 275 276 276 return 0; 277 - err: 278 - timer_of_cleanup(&to); 279 - return ret; 280 277 } 281 278 282 279 static int __init mtk_gpt_init(struct device_node *node) ··· 290 293 291 294 ret = timer_of_init(node, &to); 292 295 if (ret) 293 - goto err; 296 + return ret; 294 297 295 298 /* Configure clock source */ 296 299 mtk_gpt_setup(&to, TIMER_CLK_SRC, GPT_CTRL_OP_FREERUN); ··· 308 311 mtk_gpt_enable_irq(&to, TIMER_CLK_EVT); 309 312 310 313 return 0; 311 - err: 312 - timer_of_cleanup(&to); 313 - return ret; 314 314 } 315 315 TIMER_OF_DECLARE(mtk_mt6577, "mediatek,mt6577-timer", mtk_gpt_init); 316 316 TIMER_OF_DECLARE(mtk_mt6765, "mediatek,mt6765-timer", mtk_syst_init);

+16 -22

drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c

··· 950 950 struct amdgpu_firmware_info *ucode) 951 951 { 952 952 struct amdgpu_device *adev = psp->adev; 953 - const struct sdma_firmware_header_v1_0 *sdma_hdr = 954 - (const struct sdma_firmware_header_v1_0 *) 955 - adev->sdma.instance[ucode->ucode_id - AMDGPU_UCODE_ID_SDMA0].fw->data; 956 - const struct gfx_firmware_header_v1_0 *ce_hdr = 957 - (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; 958 - const struct gfx_firmware_header_v1_0 *pfp_hdr = 959 - (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; 960 - const struct gfx_firmware_header_v1_0 *me_hdr = 961 - (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; 962 - const struct gfx_firmware_header_v1_0 *mec_hdr = 963 - (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; 964 - const struct rlc_firmware_header_v2_0 *rlc_hdr = 965 - (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; 966 - const struct smc_firmware_header_v1_0 *smc_hdr = 967 - (const struct smc_firmware_header_v1_0 *)adev->pm.fw->data; 953 + struct common_firmware_header *hdr; 968 954 969 955 switch (ucode->ucode_id) { 970 956 case AMDGPU_UCODE_ID_SDMA0: ··· 961 975 case AMDGPU_UCODE_ID_SDMA5: 962 976 case AMDGPU_UCODE_ID_SDMA6: 963 977 case AMDGPU_UCODE_ID_SDMA7: 964 - amdgpu_ucode_print_sdma_hdr(&sdma_hdr->header); 978 + hdr = (struct common_firmware_header *) 979 + adev->sdma.instance[ucode->ucode_id - AMDGPU_UCODE_ID_SDMA0].fw->data; 980 + amdgpu_ucode_print_sdma_hdr(hdr); 965 981 break; 966 982 case AMDGPU_UCODE_ID_CP_CE: 967 - amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); 983 + hdr = (struct common_firmware_header *)adev->gfx.ce_fw->data; 984 + amdgpu_ucode_print_gfx_hdr(hdr); 968 985 break; 969 986 case AMDGPU_UCODE_ID_CP_PFP: 970 - amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); 987 + hdr = (struct common_firmware_header *)adev->gfx.pfp_fw->data; 988 + amdgpu_ucode_print_gfx_hdr(hdr); 971 989 break; 972 990 case AMDGPU_UCODE_ID_CP_ME: 973 - amdgpu_ucode_print_gfx_hdr(&me_hdr->header); 991 + hdr = (struct common_firmware_header *)adev->gfx.me_fw->data; 992 + amdgpu_ucode_print_gfx_hdr(hdr); 974 993 break; 975 994 case AMDGPU_UCODE_ID_CP_MEC1: 976 - amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); 995 + hdr = (struct common_firmware_header *)adev->gfx.mec_fw->data; 996 + amdgpu_ucode_print_gfx_hdr(hdr); 977 997 break; 978 998 case AMDGPU_UCODE_ID_RLC_G: 979 - amdgpu_ucode_print_rlc_hdr(&rlc_hdr->header); 999 + hdr = (struct common_firmware_header *)adev->gfx.rlc_fw->data; 1000 + amdgpu_ucode_print_rlc_hdr(hdr); 980 1001 break; 981 1002 case AMDGPU_UCODE_ID_SMC: 982 - amdgpu_ucode_print_smc_hdr(&smc_hdr->header); 1003 + hdr = (struct common_firmware_header *)adev->pm.fw->data; 1004 + amdgpu_ucode_print_smc_hdr(hdr); 983 1005 break; 984 1006 default: 985 1007 break;

+3

drivers/gpu/drm/i915/display/intel_display_power.c

··· 4896 4896 4897 4897 power_domains->initializing = true; 4898 4898 4899 + /* Must happen before power domain init on VLV/CHV */ 4900 + intel_update_rawclk(i915); 4901 + 4899 4902 if (INTEL_GEN(i915) >= 11) { 4900 4903 icl_display_core_init(i915, resume); 4901 4904 } else if (IS_CANNONLAKE(i915)) {

+5

drivers/gpu/drm/i915/gem/i915_gem_context.c

··· 319 319 free_engines(rcu_access_pointer(ctx->engines)); 320 320 mutex_destroy(&ctx->engines_mutex); 321 321 322 + kfree(ctx->jump_whitelist); 323 + 322 324 if (ctx->timeline) 323 325 intel_timeline_put(ctx->timeline); 324 326 ··· 442 440 443 441 for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++) 444 442 ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES; 443 + 444 + ctx->jump_whitelist = NULL; 445 + ctx->jump_whitelist_cmds = 0; 445 446 446 447 return ctx; 447 448

+7

drivers/gpu/drm/i915/gem/i915_gem_context_types.h

··· 192 192 * per vm, which may be one per context or shared with the global GTT) 193 193 */ 194 194 struct radix_tree_root handles_vma; 195 + 196 + /** jump_whitelist: Bit array for tracking cmds during cmdparsing 197 + * Guarded by struct_mutex 198 + */ 199 + unsigned long *jump_whitelist; 200 + /** jump_whitelist_cmds: No of cmd slots available */ 201 + u32 jump_whitelist_cmds; 195 202 }; 196 203 197 204 #endif /* __I915_GEM_CONTEXT_TYPES_H__ */

+80 -31

drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c

··· 296 296 297 297 static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) 298 298 { 299 - return intel_engine_needs_cmd_parser(eb->engine) && eb->batch_len; 299 + return intel_engine_requires_cmd_parser(eb->engine) || 300 + (intel_engine_using_cmd_parser(eb->engine) && 301 + eb->args->batch_len); 300 302 } 301 303 302 304 static int eb_create(struct i915_execbuffer *eb) ··· 1957 1955 return 0; 1958 1956 } 1959 1957 1960 - static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master) 1958 + static struct i915_vma * 1959 + shadow_batch_pin(struct i915_execbuffer *eb, struct drm_i915_gem_object *obj) 1960 + { 1961 + struct drm_i915_private *dev_priv = eb->i915; 1962 + struct i915_vma * const vma = *eb->vma; 1963 + struct i915_address_space *vm; 1964 + u64 flags; 1965 + 1966 + /* 1967 + * PPGTT backed shadow buffers must be mapped RO, to prevent 1968 + * post-scan tampering 1969 + */ 1970 + if (CMDPARSER_USES_GGTT(dev_priv)) { 1971 + flags = PIN_GLOBAL; 1972 + vm = &dev_priv->ggtt.vm; 1973 + } else if (vma->vm->has_read_only) { 1974 + flags = PIN_USER; 1975 + vm = vma->vm; 1976 + i915_gem_object_set_readonly(obj); 1977 + } else { 1978 + DRM_DEBUG("Cannot prevent post-scan tampering without RO capable vm\n"); 1979 + return ERR_PTR(-EINVAL); 1980 + } 1981 + 1982 + return i915_gem_object_pin(obj, vm, NULL, 0, 0, flags); 1983 + } 1984 + 1985 + static struct i915_vma *eb_parse(struct i915_execbuffer *eb) 1961 1986 { 1962 1987 struct intel_engine_pool_node *pool; 1963 1988 struct i915_vma *vma; 1989 + u64 batch_start; 1990 + u64 shadow_batch_start; 1964 1991 int err; 1965 1992 1966 1993 pool = intel_engine_pool_get(&eb->engine->pool, eb->batch_len); 1967 1994 if (IS_ERR(pool)) 1968 1995 return ERR_CAST(pool); 1969 1996 1970 - err = intel_engine_cmd_parser(eb->engine, 1997 + vma = shadow_batch_pin(eb, pool->obj); 1998 + if (IS_ERR(vma)) 1999 + goto err; 2000 + 2001 + batch_start = gen8_canonical_addr(eb->batch->node.start) + 2002 + eb->batch_start_offset; 2003 + 2004 + shadow_batch_start = gen8_canonical_addr(vma->node.start); 2005 + 2006 + err = intel_engine_cmd_parser(eb->gem_context, 2007 + eb->engine, 1971 2008 eb->batch->obj, 1972 - pool->obj, 2009 + batch_start, 1973 2010 eb->batch_start_offset, 1974 2011 eb->batch_len, 1975 - is_master); 2012 + pool->obj, 2013 + shadow_batch_start); 2014 + 1976 2015 if (err) { 1977 - if (err == -EACCES) /* unhandled chained batch */ 2016 + i915_vma_unpin(vma); 2017 + 2018 + /* 2019 + * Unsafe GGTT-backed buffers can still be submitted safely 2020 + * as non-secure. 2021 + * For PPGTT backing however, we have no choice but to forcibly 2022 + * reject unsafe buffers 2023 + */ 2024 + if (CMDPARSER_USES_GGTT(eb->i915) && (err == -EACCES)) 2025 + /* Execute original buffer non-secure */ 1978 2026 vma = NULL; 1979 2027 else 1980 2028 vma = ERR_PTR(err); 1981 2029 goto err; 1982 2030 } 1983 2031 1984 - vma = i915_gem_object_ggtt_pin(pool->obj, NULL, 0, 0, 0); 1985 - if (IS_ERR(vma)) 1986 - goto err; 1987 - 1988 2032 eb->vma[eb->buffer_count] = i915_vma_get(vma); 1989 2033 eb->flags[eb->buffer_count] = 1990 2034 __EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF; 1991 2035 vma->exec_flags = &eb->flags[eb->buffer_count]; 1992 2036 eb->buffer_count++; 2037 + 2038 + eb->batch_start_offset = 0; 2039 + eb->batch = vma; 2040 + 2041 + if (CMDPARSER_USES_GGTT(eb->i915)) 2042 + eb->batch_flags |= I915_DISPATCH_SECURE; 2043 + 2044 + /* eb->batch_len unchanged */ 1993 2045 1994 2046 vma->private = pool; 1995 2047 return vma; ··· 2477 2421 struct drm_i915_gem_exec_object2 *exec, 2478 2422 struct drm_syncobj **fences) 2479 2423 { 2424 + struct drm_i915_private *i915 = to_i915(dev); 2480 2425 struct i915_execbuffer eb; 2481 2426 struct dma_fence *in_fence = NULL; 2482 2427 struct dma_fence *exec_fence = NULL; ··· 2489 2432 BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & 2490 2433 ~__EXEC_OBJECT_UNKNOWN_FLAGS); 2491 2434 2492 - eb.i915 = to_i915(dev); 2435 + eb.i915 = i915; 2493 2436 eb.file = file; 2494 2437 eb.args = args; 2495 2438 if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC)) ··· 2509 2452 2510 2453 eb.batch_flags = 0; 2511 2454 if (args->flags & I915_EXEC_SECURE) { 2455 + if (INTEL_GEN(i915) >= 11) 2456 + return -ENODEV; 2457 + 2458 + /* Return -EPERM to trigger fallback code on old binaries. */ 2459 + if (!HAS_SECURE_BATCHES(i915)) 2460 + return -EPERM; 2461 + 2512 2462 if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN)) 2513 - return -EPERM; 2463 + return -EPERM; 2514 2464 2515 2465 eb.batch_flags |= I915_DISPATCH_SECURE; 2516 2466 } ··· 2594 2530 goto err_vma; 2595 2531 } 2596 2532 2533 + if (eb.batch_len == 0) 2534 + eb.batch_len = eb.batch->size - eb.batch_start_offset; 2535 + 2597 2536 if (eb_use_cmdparser(&eb)) { 2598 2537 struct i915_vma *vma; 2599 2538 2600 - vma = eb_parse(&eb, drm_is_current_master(file)); 2539 + vma = eb_parse(&eb); 2601 2540 if (IS_ERR(vma)) { 2602 2541 err = PTR_ERR(vma); 2603 2542 goto err_vma; 2604 2543 } 2605 - 2606 - if (vma) { 2607 - /* 2608 - * Batch parsed and accepted: 2609 - * 2610 - * Set the DISPATCH_SECURE bit to remove the NON_SECURE 2611 - * bit from MI_BATCH_BUFFER_START commands issued in 2612 - * the dispatch_execbuffer implementations. We 2613 - * specifically don't want that set on batches the 2614 - * command parser has accepted. 2615 - */ 2616 - eb.batch_flags |= I915_DISPATCH_SECURE; 2617 - eb.batch_start_offset = 0; 2618 - eb.batch = vma; 2619 - } 2620 2544 } 2621 - 2622 - if (eb.batch_len == 0) 2623 - eb.batch_len = eb.batch->size - eb.batch_start_offset; 2624 2545 2625 2546 /* 2626 2547 * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure

+10 -3

drivers/gpu/drm/i915/gt/intel_engine_types.h

··· 475 475 476 476 struct intel_engine_hangcheck hangcheck; 477 477 478 - #define I915_ENGINE_NEEDS_CMD_PARSER BIT(0) 478 + #define I915_ENGINE_USING_CMD_PARSER BIT(0) 479 479 #define I915_ENGINE_SUPPORTS_STATS BIT(1) 480 480 #define I915_ENGINE_HAS_PREEMPTION BIT(2) 481 481 #define I915_ENGINE_HAS_SEMAPHORES BIT(3) 482 482 #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4) 483 483 #define I915_ENGINE_IS_VIRTUAL BIT(5) 484 + #define I915_ENGINE_REQUIRES_CMD_PARSER BIT(7) 484 485 unsigned int flags; 485 486 486 487 /* ··· 542 541 }; 543 542 544 543 static inline bool 545 - intel_engine_needs_cmd_parser(const struct intel_engine_cs *engine) 544 + intel_engine_using_cmd_parser(const struct intel_engine_cs *engine) 546 545 { 547 - return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER; 546 + return engine->flags & I915_ENGINE_USING_CMD_PARSER; 547 + } 548 + 549 + static inline bool 550 + intel_engine_requires_cmd_parser(const struct intel_engine_cs *engine) 551 + { 552 + return engine->flags & I915_ENGINE_REQUIRES_CMD_PARSER; 548 553 } 549 554 550 555 static inline bool

+8

drivers/gpu/drm/i915/gt/intel_gt_pm.c

··· 38 38 gt->awake = intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); 39 39 GEM_BUG_ON(!gt->awake); 40 40 41 + if (NEEDS_RC6_CTX_CORRUPTION_WA(i915)) 42 + intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); 43 + 41 44 intel_enable_gt_powersave(i915); 42 45 43 46 i915_update_gfx_val(i915); ··· 69 66 i915_pmu_gt_parked(i915); 70 67 if (INTEL_GEN(i915) >= 6) 71 68 gen6_rps_idle(i915); 69 + 70 + if (NEEDS_RC6_CTX_CORRUPTION_WA(i915)) { 71 + i915_rc6_ctx_wa_check(i915); 72 + intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL); 73 + } 72 74 73 75 /* Everything switched off, flush any residual interrupt just in case */ 74 76 intel_synchronize_irq(i915);

+1 -9

drivers/gpu/drm/i915/gt/intel_mocs.c

··· 199 199 MOCS_ENTRY(15, \ 200 200 LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \ 201 201 L3_3_WB), \ 202 - /* Bypass LLC - Uncached (EHL+) */ \ 203 - MOCS_ENTRY(16, \ 204 - LE_1_UC | LE_TC_1_LLC | LE_SCF(1), \ 205 - L3_1_UC), \ 206 - /* Bypass LLC - L3 (Read-Only) (EHL+) */ \ 207 - MOCS_ENTRY(17, \ 208 - LE_1_UC | LE_TC_1_LLC | LE_SCF(1), \ 209 - L3_3_WB), \ 210 202 /* Self-Snoop - L3 + LLC */ \ 211 203 MOCS_ENTRY(18, \ 212 204 LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \ ··· 262 270 L3_1_UC), 263 271 /* HW Special Case (Displayable) */ 264 272 MOCS_ENTRY(61, 265 - LE_1_UC | LE_TC_1_LLC | LE_SCF(1), 273 + LE_1_UC | LE_TC_1_LLC, 266 274 L3_3_WB), 267 275 }; 268 276

+2 -2

drivers/gpu/drm/i915/gvt/dmabuf.c

··· 498 498 goto out_free_gem; 499 499 } 500 500 501 - i915_gem_object_put(obj); 502 - 503 501 ret = dma_buf_fd(dmabuf, DRM_CLOEXEC | DRM_RDWR); 504 502 if (ret < 0) { 505 503 gvt_vgpu_err("create dma-buf fd failed ret:%d\n", ret); ··· 521 523 dmabuf_fd, 522 524 file_count(dmabuf->file), 523 525 kref_read(&obj->base.refcount)); 526 + 527 + i915_gem_object_put(obj); 524 528 525 529 return dmabuf_fd; 526 530

+308 -131

drivers/gpu/drm/i915/i915_cmd_parser.c

··· 53 53 * granting userspace undue privileges. There are three categories of privilege. 54 54 * 55 55 * First, commands which are explicitly defined as privileged or which should 56 - * only be used by the kernel driver. The parser generally rejects such 57 - * commands, though it may allow some from the drm master process. 56 + * only be used by the kernel driver. The parser rejects such commands 58 57 * 59 58 * Second, commands which access registers. To support correct/enhanced 60 59 * userspace functionality, particularly certain OpenGL extensions, the parser 61 - * provides a whitelist of registers which userspace may safely access (for both 62 - * normal and drm master processes). 60 + * provides a whitelist of registers which userspace may safely access 63 61 * 64 62 * Third, commands which access privileged memory (i.e. GGTT, HWS page, etc). 65 63 * The parser always rejects such commands. ··· 82 84 * in the per-engine command tables. 83 85 * 84 86 * Other command table entries map fairly directly to high level categories 85 - * mentioned above: rejected, master-only, register whitelist. The parser 86 - * implements a number of checks, including the privileged memory checks, via a 87 - * general bitmasking mechanism. 87 + * mentioned above: rejected, register whitelist. The parser implements a number 88 + * of checks, including the privileged memory checks, via a general bitmasking 89 + * mechanism. 88 90 */ 89 91 90 92 /* ··· 102 104 * CMD_DESC_REJECT: The command is never allowed 103 105 * CMD_DESC_REGISTER: The command should be checked against the 104 106 * register whitelist for the appropriate ring 105 - * CMD_DESC_MASTER: The command is allowed if the submitting process 106 - * is the DRM master 107 107 */ 108 108 u32 flags; 109 109 #define CMD_DESC_FIXED (1<<0) ··· 109 113 #define CMD_DESC_REJECT (1<<2) 110 114 #define CMD_DESC_REGISTER (1<<3) 111 115 #define CMD_DESC_BITMASK (1<<4) 112 - #define CMD_DESC_MASTER (1<<5) 113 116 114 117 /* 115 118 * The command's unique identification bits and the bitmask to get them. ··· 189 194 #define CMD(op, opm, f, lm, fl, ...) \ 190 195 { \ 191 196 .flags = (fl) | ((f) ? CMD_DESC_FIXED : 0), \ 192 - .cmd = { (op), ~0u << (opm) }, \ 197 + .cmd = { (op & ~0u << (opm)), ~0u << (opm) }, \ 193 198 .length = { (lm) }, \ 194 199 __VA_ARGS__ \ 195 200 } ··· 204 209 #define R CMD_DESC_REJECT 205 210 #define W CMD_DESC_REGISTER 206 211 #define B CMD_DESC_BITMASK 207 - #define M CMD_DESC_MASTER 208 212 209 213 /* Command Mask Fixed Len Action 210 214 ---------------------------------------------------------- */ 211 - static const struct drm_i915_cmd_descriptor common_cmds[] = { 215 + static const struct drm_i915_cmd_descriptor gen7_common_cmds[] = { 212 216 CMD( MI_NOOP, SMI, F, 1, S ), 213 217 CMD( MI_USER_INTERRUPT, SMI, F, 1, R ), 214 - CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, M ), 218 + CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, R ), 215 219 CMD( MI_ARB_CHECK, SMI, F, 1, S ), 216 220 CMD( MI_REPORT_HEAD, SMI, F, 1, S ), 217 221 CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ), ··· 240 246 CMD( MI_BATCH_BUFFER_START, SMI, !F, 0xFF, S ), 241 247 }; 242 248 243 - static const struct drm_i915_cmd_descriptor render_cmds[] = { 249 + static const struct drm_i915_cmd_descriptor gen7_render_cmds[] = { 244 250 CMD( MI_FLUSH, SMI, F, 1, S ), 245 251 CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), 246 252 CMD( MI_PREDICATE, SMI, F, 1, S ), ··· 307 313 CMD( MI_URB_ATOMIC_ALLOC, SMI, F, 1, S ), 308 314 CMD( MI_SET_APPID, SMI, F, 1, S ), 309 315 CMD( MI_RS_CONTEXT, SMI, F, 1, S ), 310 - CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, M ), 316 + CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, R ), 311 317 CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ), 312 318 CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, W, 313 319 .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 } ), ··· 324 330 CMD( GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS, S3D, !F, 0x1FF, S ), 325 331 }; 326 332 327 - static const struct drm_i915_cmd_descriptor video_cmds[] = { 333 + static const struct drm_i915_cmd_descriptor gen7_video_cmds[] = { 328 334 CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), 329 335 CMD( MI_SET_APPID, SMI, F, 1, S ), 330 336 CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, B, ··· 368 374 CMD( MFX_WAIT, SMFX, F, 1, S ), 369 375 }; 370 376 371 - static const struct drm_i915_cmd_descriptor vecs_cmds[] = { 377 + static const struct drm_i915_cmd_descriptor gen7_vecs_cmds[] = { 372 378 CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), 373 379 CMD( MI_SET_APPID, SMI, F, 1, S ), 374 380 CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, B, ··· 406 412 }}, ), 407 413 }; 408 414 409 - static const struct drm_i915_cmd_descriptor blt_cmds[] = { 415 + static const struct drm_i915_cmd_descriptor gen7_blt_cmds[] = { 410 416 CMD( MI_DISPLAY_FLIP, SMI, !F, 0xFF, R ), 411 417 CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3FF, B, 412 418 .bits = {{ ··· 440 446 }; 441 447 442 448 static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = { 443 - CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, M ), 449 + CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, R ), 444 450 CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ), 451 + }; 452 + 453 + /* 454 + * For Gen9 we can still rely on the h/w to enforce cmd security, and only 455 + * need to re-enforce the register access checks. We therefore only need to 456 + * teach the cmdparser how to find the end of each command, and identify 457 + * register accesses. The table doesn't need to reject any commands, and so 458 + * the only commands listed here are: 459 + * 1) Those that touch registers 460 + * 2) Those that do not have the default 8-bit length 461 + * 462 + * Note that the default MI length mask chosen for this table is 0xFF, not 463 + * the 0x3F used on older devices. This is because the vast majority of MI 464 + * cmds on Gen9 use a standard 8-bit Length field. 465 + * All the Gen9 blitter instructions are standard 0xFF length mask, and 466 + * none allow access to non-general registers, so in fact no BLT cmds are 467 + * included in the table at all. 468 + * 469 + */ 470 + static const struct drm_i915_cmd_descriptor gen9_blt_cmds[] = { 471 + CMD( MI_NOOP, SMI, F, 1, S ), 472 + CMD( MI_USER_INTERRUPT, SMI, F, 1, S ), 473 + CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, S ), 474 + CMD( MI_FLUSH, SMI, F, 1, S ), 475 + CMD( MI_ARB_CHECK, SMI, F, 1, S ), 476 + CMD( MI_REPORT_HEAD, SMI, F, 1, S ), 477 + CMD( MI_ARB_ON_OFF, SMI, F, 1, S ), 478 + CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ), 479 + CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, S ), 480 + CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, S ), 481 + CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3FF, S ), 482 + CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W, 483 + .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 2 } ), 484 + CMD( MI_UPDATE_GTT, SMI, !F, 0x3FF, S ), 485 + CMD( MI_STORE_REGISTER_MEM_GEN8, SMI, F, 4, W, 486 + .reg = { .offset = 1, .mask = 0x007FFFFC } ), 487 + CMD( MI_FLUSH_DW, SMI, !F, 0x3F, S ), 488 + CMD( MI_LOAD_REGISTER_MEM_GEN8, SMI, F, 4, W, 489 + .reg = { .offset = 1, .mask = 0x007FFFFC } ), 490 + CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, W, 491 + .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 } ), 492 + 493 + /* 494 + * We allow BB_START but apply further checks. We just sanitize the 495 + * basic fields here. 496 + */ 497 + #define MI_BB_START_OPERAND_MASK GENMASK(SMI-1, 0) 498 + #define MI_BB_START_OPERAND_EXPECT (MI_BATCH_PPGTT_HSW | 1) 499 + CMD( MI_BATCH_BUFFER_START_GEN8, SMI, !F, 0xFF, B, 500 + .bits = {{ 501 + .offset = 0, 502 + .mask = MI_BB_START_OPERAND_MASK, 503 + .expected = MI_BB_START_OPERAND_EXPECT, 504 + }}, ), 445 505 }; 446 506 447 507 static const struct drm_i915_cmd_descriptor noop_desc = ··· 511 463 #undef R 512 464 #undef W 513 465 #undef B 514 - #undef M 515 466 516 - static const struct drm_i915_cmd_table gen7_render_cmds[] = { 517 - { common_cmds, ARRAY_SIZE(common_cmds) }, 518 - { render_cmds, ARRAY_SIZE(render_cmds) }, 467 + static const struct drm_i915_cmd_table gen7_render_cmd_table[] = { 468 + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, 469 + { gen7_render_cmds, ARRAY_SIZE(gen7_render_cmds) }, 519 470 }; 520 471 521 - static const struct drm_i915_cmd_table hsw_render_ring_cmds[] = { 522 - { common_cmds, ARRAY_SIZE(common_cmds) }, 523 - { render_cmds, ARRAY_SIZE(render_cmds) }, 472 + static const struct drm_i915_cmd_table hsw_render_ring_cmd_table[] = { 473 + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, 474 + { gen7_render_cmds, ARRAY_SIZE(gen7_render_cmds) }, 524 475 { hsw_render_cmds, ARRAY_SIZE(hsw_render_cmds) }, 525 476 }; 526 477 527 - static const struct drm_i915_cmd_table gen7_video_cmds[] = { 528 - { common_cmds, ARRAY_SIZE(common_cmds) }, 529 - { video_cmds, ARRAY_SIZE(video_cmds) }, 478 + static const struct drm_i915_cmd_table gen7_video_cmd_table[] = { 479 + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, 480 + { gen7_video_cmds, ARRAY_SIZE(gen7_video_cmds) }, 530 481 }; 531 482 532 - static const struct drm_i915_cmd_table hsw_vebox_cmds[] = { 533 - { common_cmds, ARRAY_SIZE(common_cmds) }, 534 - { vecs_cmds, ARRAY_SIZE(vecs_cmds) }, 483 + static const struct drm_i915_cmd_table hsw_vebox_cmd_table[] = { 484 + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, 485 + { gen7_vecs_cmds, ARRAY_SIZE(gen7_vecs_cmds) }, 535 486 }; 536 487 537 - static const struct drm_i915_cmd_table gen7_blt_cmds[] = { 538 - { common_cmds, ARRAY_SIZE(common_cmds) }, 539 - { blt_cmds, ARRAY_SIZE(blt_cmds) }, 488 + static const struct drm_i915_cmd_table gen7_blt_cmd_table[] = { 489 + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, 490 + { gen7_blt_cmds, ARRAY_SIZE(gen7_blt_cmds) }, 540 491 }; 541 492 542 - static const struct drm_i915_cmd_table hsw_blt_ring_cmds[] = { 543 - { common_cmds, ARRAY_SIZE(common_cmds) }, 544 - { blt_cmds, ARRAY_SIZE(blt_cmds) }, 493 + static const struct drm_i915_cmd_table hsw_blt_ring_cmd_table[] = { 494 + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, 495 + { gen7_blt_cmds, ARRAY_SIZE(gen7_blt_cmds) }, 545 496 { hsw_blt_cmds, ARRAY_SIZE(hsw_blt_cmds) }, 546 497 }; 498 + 499 + static const struct drm_i915_cmd_table gen9_blt_cmd_table[] = { 500 + { gen9_blt_cmds, ARRAY_SIZE(gen9_blt_cmds) }, 501 + }; 502 + 547 503 548 504 /* 549 505 * Register whitelists, sorted by increasing register offset. ··· 664 612 REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE), 665 613 }; 666 614 667 - static const struct drm_i915_reg_descriptor ivb_master_regs[] = { 668 - REG32(FORCEWAKE_MT), 669 - REG32(DERRMR), 670 - REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_A)), 671 - REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_B)), 672 - REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_C)), 673 - }; 674 - 675 - static const struct drm_i915_reg_descriptor hsw_master_regs[] = { 676 - REG32(FORCEWAKE_MT), 677 - REG32(DERRMR), 615 + static const struct drm_i915_reg_descriptor gen9_blt_regs[] = { 616 + REG64_IDX(RING_TIMESTAMP, RENDER_RING_BASE), 617 + REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE), 618 + REG32(BCS_SWCTRL), 619 + REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE), 620 + REG64_IDX(BCS_GPR, 0), 621 + REG64_IDX(BCS_GPR, 1), 622 + REG64_IDX(BCS_GPR, 2), 623 + REG64_IDX(BCS_GPR, 3), 624 + REG64_IDX(BCS_GPR, 4), 625 + REG64_IDX(BCS_GPR, 5), 626 + REG64_IDX(BCS_GPR, 6), 627 + REG64_IDX(BCS_GPR, 7), 628 + REG64_IDX(BCS_GPR, 8), 629 + REG64_IDX(BCS_GPR, 9), 630 + REG64_IDX(BCS_GPR, 10), 631 + REG64_IDX(BCS_GPR, 11), 632 + REG64_IDX(BCS_GPR, 12), 633 + REG64_IDX(BCS_GPR, 13), 634 + REG64_IDX(BCS_GPR, 14), 635 + REG64_IDX(BCS_GPR, 15), 678 636 }; 679 637 680 638 #undef REG64 ··· 693 631 struct drm_i915_reg_table { 694 632 const struct drm_i915_reg_descriptor *regs; 695 633 int num_regs; 696 - bool master; 697 634 }; 698 635 699 636 static const struct drm_i915_reg_table ivb_render_reg_tables[] = { 700 - { gen7_render_regs, ARRAY_SIZE(gen7_render_regs), false }, 701 - { ivb_master_regs, ARRAY_SIZE(ivb_master_regs), true }, 637 + { gen7_render_regs, ARRAY_SIZE(gen7_render_regs) }, 702 638 }; 703 639 704 640 static const struct drm_i915_reg_table ivb_blt_reg_tables[] = { 705 - { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs), false }, 706 - { ivb_master_regs, ARRAY_SIZE(ivb_master_regs), true }, 641 + { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs) }, 707 642 }; 708 643 709 644 static const struct drm_i915_reg_table hsw_render_reg_tables[] = { 710 - { gen7_render_regs, ARRAY_SIZE(gen7_render_regs), false }, 711 - { hsw_render_regs, ARRAY_SIZE(hsw_render_regs), false }, 712 - { hsw_master_regs, ARRAY_SIZE(hsw_master_regs), true }, 645 + { gen7_render_regs, ARRAY_SIZE(gen7_render_regs) }, 646 + { hsw_render_regs, ARRAY_SIZE(hsw_render_regs) }, 713 647 }; 714 648 715 649 static const struct drm_i915_reg_table hsw_blt_reg_tables[] = { 716 - { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs), false }, 717 - { hsw_master_regs, ARRAY_SIZE(hsw_master_regs), true }, 650 + { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs) }, 651 + }; 652 + 653 + static const struct drm_i915_reg_table gen9_blt_reg_tables[] = { 654 + { gen9_blt_regs, ARRAY_SIZE(gen9_blt_regs) }, 718 655 }; 719 656 720 657 static u32 gen7_render_get_cmd_length_mask(u32 cmd_header) ··· 765 704 if (client == INSTR_MI_CLIENT) 766 705 return 0x3F; 767 706 else if (client == INSTR_BC_CLIENT) 707 + return 0xFF; 708 + 709 + DRM_DEBUG_DRIVER("CMD: Abnormal blt cmd length! 0x%08X\n", cmd_header); 710 + return 0; 711 + } 712 + 713 + static u32 gen9_blt_get_cmd_length_mask(u32 cmd_header) 714 + { 715 + u32 client = cmd_header >> INSTR_CLIENT_SHIFT; 716 + 717 + if (client == INSTR_MI_CLIENT || client == INSTR_BC_CLIENT) 768 718 return 0xFF; 769 719 770 720 DRM_DEBUG_DRIVER("CMD: Abnormal blt cmd length! 0x%08X\n", cmd_header); ··· 939 867 int cmd_table_count; 940 868 int ret; 941 869 942 - if (!IS_GEN(engine->i915, 7)) 870 + if (!IS_GEN(engine->i915, 7) && !(IS_GEN(engine->i915, 9) && 871 + engine->class == COPY_ENGINE_CLASS)) 943 872 return; 944 873 945 874 switch (engine->class) { 946 875 case RENDER_CLASS: 947 876 if (IS_HASWELL(engine->i915)) { 948 - cmd_tables = hsw_render_ring_cmds; 877 + cmd_tables = hsw_render_ring_cmd_table; 949 878 cmd_table_count = 950 - ARRAY_SIZE(hsw_render_ring_cmds); 879 + ARRAY_SIZE(hsw_render_ring_cmd_table); 951 880 } else { 952 - cmd_tables = gen7_render_cmds; 953 - cmd_table_count = ARRAY_SIZE(gen7_render_cmds); 881 + cmd_tables = gen7_render_cmd_table; 882 + cmd_table_count = ARRAY_SIZE(gen7_render_cmd_table); 954 883 } 955 884 956 885 if (IS_HASWELL(engine->i915)) { ··· 961 888 engine->reg_tables = ivb_render_reg_tables; 962 889 engine->reg_table_count = ARRAY_SIZE(ivb_render_reg_tables); 963 890 } 964 - 965 891 engine->get_cmd_length_mask = gen7_render_get_cmd_length_mask; 966 892 break; 967 893 case VIDEO_DECODE_CLASS: 968 - cmd_tables = gen7_video_cmds; 969 - cmd_table_count = ARRAY_SIZE(gen7_video_cmds); 894 + cmd_tables = gen7_video_cmd_table; 895 + cmd_table_count = ARRAY_SIZE(gen7_video_cmd_table); 970 896 engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; 971 897 break; 972 898 case COPY_ENGINE_CLASS: 973 - if (IS_HASWELL(engine->i915)) { 974 - cmd_tables = hsw_blt_ring_cmds; 975 - cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmds); 899 + engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask; 900 + if (IS_GEN(engine->i915, 9)) { 901 + cmd_tables = gen9_blt_cmd_table; 902 + cmd_table_count = ARRAY_SIZE(gen9_blt_cmd_table); 903 + engine->get_cmd_length_mask = 904 + gen9_blt_get_cmd_length_mask; 905 + 906 + /* BCS Engine unsafe without parser */ 907 + engine->flags |= I915_ENGINE_REQUIRES_CMD_PARSER; 908 + } else if (IS_HASWELL(engine->i915)) { 909 + cmd_tables = hsw_blt_ring_cmd_table; 910 + cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmd_table); 976 911 } else { 977 - cmd_tables = gen7_blt_cmds; 978 - cmd_table_count = ARRAY_SIZE(gen7_blt_cmds); 912 + cmd_tables = gen7_blt_cmd_table; 913 + cmd_table_count = ARRAY_SIZE(gen7_blt_cmd_table); 979 914 } 980 915 981 - if (IS_HASWELL(engine->i915)) { 916 + if (IS_GEN(engine->i915, 9)) { 917 + engine->reg_tables = gen9_blt_reg_tables; 918 + engine->reg_table_count = 919 + ARRAY_SIZE(gen9_blt_reg_tables); 920 + } else if (IS_HASWELL(engine->i915)) { 982 921 engine->reg_tables = hsw_blt_reg_tables; 983 922 engine->reg_table_count = ARRAY_SIZE(hsw_blt_reg_tables); 984 923 } else { 985 924 engine->reg_tables = ivb_blt_reg_tables; 986 925 engine->reg_table_count = ARRAY_SIZE(ivb_blt_reg_tables); 987 926 } 988 - 989 - engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask; 990 927 break; 991 928 case VIDEO_ENHANCEMENT_CLASS: 992 - cmd_tables = hsw_vebox_cmds; 993 - cmd_table_count = ARRAY_SIZE(hsw_vebox_cmds); 929 + cmd_tables = hsw_vebox_cmd_table; 930 + cmd_table_count = ARRAY_SIZE(hsw_vebox_cmd_table); 994 931 /* VECS can use the same length_mask function as VCS */ 995 932 engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; 996 933 break; ··· 1026 943 return; 1027 944 } 1028 945 1029 - engine->flags |= I915_ENGINE_NEEDS_CMD_PARSER; 946 + engine->flags |= I915_ENGINE_USING_CMD_PARSER; 1030 947 } 1031 948 1032 949 /** ··· 1038 955 */ 1039 956 void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine) 1040 957 { 1041 - if (!intel_engine_needs_cmd_parser(engine)) 958 + if (!intel_engine_using_cmd_parser(engine)) 1042 959 return; 1043 960 1044 961 fini_hash_table(engine); ··· 1112 1029 } 1113 1030 1114 1031 static const struct drm_i915_reg_descriptor * 1115 - find_reg(const struct intel_engine_cs *engine, bool is_master, u32 addr) 1032 + find_reg(const struct intel_engine_cs *engine, u32 addr) 1116 1033 { 1117 1034 const struct drm_i915_reg_table *table = engine->reg_tables; 1035 + const struct drm_i915_reg_descriptor *reg = NULL; 1118 1036 int count = engine->reg_table_count; 1119 1037 1120 - for (; count > 0; ++table, --count) { 1121 - if (!table->master || is_master) { 1122 - const struct drm_i915_reg_descriptor *reg; 1038 + for (; !reg && (count > 0); ++table, --count) 1039 + reg = __find_reg(table->regs, table->num_regs, addr); 1123 1040 1124 - reg = __find_reg(table->regs, table->num_regs, addr); 1125 - if (reg != NULL) 1126 - return reg; 1127 - } 1128 - } 1129 - 1130 - return NULL; 1041 + return reg; 1131 1042 } 1132 1043 1133 1044 /* Returns a vmap'd pointer to dst_obj, which the caller must unmap */ ··· 1205 1128 1206 1129 static bool check_cmd(const struct intel_engine_cs *engine, 1207 1130 const struct drm_i915_cmd_descriptor *desc, 1208 - const u32 *cmd, u32 length, 1209 - const bool is_master) 1131 + const u32 *cmd, u32 length) 1210 1132 { 1211 1133 if (desc->flags & CMD_DESC_SKIP) 1212 1134 return true; 1213 1135 1214 1136 if (desc->flags & CMD_DESC_REJECT) { 1215 1137 DRM_DEBUG_DRIVER("CMD: Rejected command: 0x%08X\n", *cmd); 1216 - return false; 1217 - } 1218 - 1219 - if ((desc->flags & CMD_DESC_MASTER) && !is_master) { 1220 - DRM_DEBUG_DRIVER("CMD: Rejected master-only command: 0x%08X\n", 1221 - *cmd); 1222 1138 return false; 1223 1139 } 1224 1140 ··· 1228 1158 offset += step) { 1229 1159 const u32 reg_addr = cmd[offset] & desc->reg.mask; 1230 1160 const struct drm_i915_reg_descriptor *reg = 1231 - find_reg(engine, is_master, reg_addr); 1161 + find_reg(engine, reg_addr); 1232 1162 1233 1163 if (!reg) { 1234 1164 DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (%s)\n", ··· 1306 1236 return true; 1307 1237 } 1308 1238 1239 + static int check_bbstart(const struct i915_gem_context *ctx, 1240 + u32 *cmd, u32 offset, u32 length, 1241 + u32 batch_len, 1242 + u64 batch_start, 1243 + u64 shadow_batch_start) 1244 + { 1245 + u64 jump_offset, jump_target; 1246 + u32 target_cmd_offset, target_cmd_index; 1247 + 1248 + /* For igt compatibility on older platforms */ 1249 + if (CMDPARSER_USES_GGTT(ctx->i915)) { 1250 + DRM_DEBUG("CMD: Rejecting BB_START for ggtt based submission\n"); 1251 + return -EACCES; 1252 + } 1253 + 1254 + if (length != 3) { 1255 + DRM_DEBUG("CMD: Recursive BB_START with bad length(%u)\n", 1256 + length); 1257 + return -EINVAL; 1258 + } 1259 + 1260 + jump_target = *(u64*)(cmd+1); 1261 + jump_offset = jump_target - batch_start; 1262 + 1263 + /* 1264 + * Any underflow of jump_target is guaranteed to be outside the range 1265 + * of a u32, so >= test catches both too large and too small 1266 + */ 1267 + if (jump_offset >= batch_len) { 1268 + DRM_DEBUG("CMD: BB_START to 0x%llx jumps out of BB\n", 1269 + jump_target); 1270 + return -EINVAL; 1271 + } 1272 + 1273 + /* 1274 + * This cannot overflow a u32 because we already checked jump_offset 1275 + * is within the BB, and the batch_len is a u32 1276 + */ 1277 + target_cmd_offset = lower_32_bits(jump_offset); 1278 + target_cmd_index = target_cmd_offset / sizeof(u32); 1279 + 1280 + *(u64*)(cmd + 1) = shadow_batch_start + target_cmd_offset; 1281 + 1282 + if (target_cmd_index == offset) 1283 + return 0; 1284 + 1285 + if (ctx->jump_whitelist_cmds <= target_cmd_index) { 1286 + DRM_DEBUG("CMD: Rejecting BB_START - truncated whitelist array\n"); 1287 + return -EINVAL; 1288 + } else if (!test_bit(target_cmd_index, ctx->jump_whitelist)) { 1289 + DRM_DEBUG("CMD: BB_START to 0x%llx not a previously executed cmd\n", 1290 + jump_target); 1291 + return -EINVAL; 1292 + } 1293 + 1294 + return 0; 1295 + } 1296 + 1297 + static void init_whitelist(struct i915_gem_context *ctx, u32 batch_len) 1298 + { 1299 + const u32 batch_cmds = DIV_ROUND_UP(batch_len, sizeof(u32)); 1300 + const u32 exact_size = BITS_TO_LONGS(batch_cmds); 1301 + u32 next_size = BITS_TO_LONGS(roundup_pow_of_two(batch_cmds)); 1302 + unsigned long *next_whitelist; 1303 + 1304 + if (CMDPARSER_USES_GGTT(ctx->i915)) 1305 + return; 1306 + 1307 + if (batch_cmds <= ctx->jump_whitelist_cmds) { 1308 + bitmap_zero(ctx->jump_whitelist, batch_cmds); 1309 + return; 1310 + } 1311 + 1312 + again: 1313 + next_whitelist = kcalloc(next_size, sizeof(long), GFP_KERNEL); 1314 + if (next_whitelist) { 1315 + kfree(ctx->jump_whitelist); 1316 + ctx->jump_whitelist = next_whitelist; 1317 + ctx->jump_whitelist_cmds = 1318 + next_size * BITS_PER_BYTE * sizeof(long); 1319 + return; 1320 + } 1321 + 1322 + if (next_size > exact_size) { 1323 + next_size = exact_size; 1324 + goto again; 1325 + } 1326 + 1327 + DRM_DEBUG("CMD: Failed to extend whitelist. BB_START may be disallowed\n"); 1328 + bitmap_zero(ctx->jump_whitelist, ctx->jump_whitelist_cmds); 1329 + 1330 + return; 1331 + } 1332 + 1309 1333 #define LENGTH_BIAS 2 1310 1334 1311 1335 /** 1312 1336 * i915_parse_cmds() - parse a submitted batch buffer for privilege violations 1337 + * @ctx: the context in which the batch is to execute 1313 1338 * @engine: the engine on which the batch is to execute 1314 1339 * @batch_obj: the batch buffer in question 1315 - * @shadow_batch_obj: copy of the batch buffer in question 1340 + * @batch_start: Canonical base address of batch 1316 1341 * @batch_start_offset: byte offset in the batch at which execution starts 1317 1342 * @batch_len: length of the commands in batch_obj 1318 - * @is_master: is the submitting process the drm master? 1343 + * @shadow_batch_obj: copy of the batch buffer in question 1344 + * @shadow_batch_start: Canonical base address of shadow_batch_obj 1319 1345 * 1320 1346 * Parses the specified batch buffer looking for privilege violations as 1321 1347 * described in the overview. ··· 1419 1253 * Return: non-zero if the parser finds violations or otherwise fails; -EACCES 1420 1254 * if the batch appears legal but should use hardware parsing 1421 1255 */ 1422 - int intel_engine_cmd_parser(struct intel_engine_cs *engine, 1256 + 1257 + int intel_engine_cmd_parser(struct i915_gem_context *ctx, 1258 + struct intel_engine_cs *engine, 1423 1259 struct drm_i915_gem_object *batch_obj, 1424 - struct drm_i915_gem_object *shadow_batch_obj, 1260 + u64 batch_start, 1425 1261 u32 batch_start_offset, 1426 1262 u32 batch_len, 1427 - bool is_master) 1263 + struct drm_i915_gem_object *shadow_batch_obj, 1264 + u64 shadow_batch_start) 1428 1265 { 1429 - u32 *cmd, *batch_end; 1266 + u32 *cmd, *batch_end, offset = 0; 1430 1267 struct drm_i915_cmd_descriptor default_desc = noop_desc; 1431 1268 const struct drm_i915_cmd_descriptor *desc = &default_desc; 1432 1269 bool needs_clflush_after = false; ··· 1443 1274 return PTR_ERR(cmd); 1444 1275 } 1445 1276 1277 + init_whitelist(ctx, batch_len); 1278 + 1446 1279 /* 1447 1280 * We use the batch length as size because the shadow object is as 1448 1281 * large or larger and copy_batch() will write MI_NOPs to the extra ··· 1454 1283 do { 1455 1284 u32 length; 1456 1285 1457 - if (*cmd == MI_BATCH_BUFFER_END) { 1458 - if (needs_clflush_after) { 1459 - void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping); 1460 - drm_clflush_virt_range(ptr, 1461 - (void *)(cmd + 1) - ptr); 1462 - } 1286 + if (*cmd == MI_BATCH_BUFFER_END) 1463 1287 break; 1464 - } 1465 1288 1466 1289 desc = find_cmd(engine, *cmd, desc, &default_desc); 1467 1290 if (!desc) { 1468 1291 DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n", 1469 1292 *cmd); 1470 1293 ret = -EINVAL; 1471 - break; 1472 - } 1473 - 1474 - /* 1475 - * If the batch buffer contains a chained batch, return an 1476 - * error that tells the caller to abort and dispatch the 1477 - * workload as a non-secure batch. 1478 - */ 1479 - if (desc->cmd.value == MI_BATCH_BUFFER_START) { 1480 - ret = -EACCES; 1481 - break; 1294 + goto err; 1482 1295 } 1483 1296 1484 1297 if (desc->flags & CMD_DESC_FIXED) ··· 1476 1321 length, 1477 1322 batch_end - cmd); 1478 1323 ret = -EINVAL; 1324 + goto err; 1325 + } 1326 + 1327 + if (!check_cmd(engine, desc, cmd, length)) { 1328 + ret = -EACCES; 1329 + goto err; 1330 + } 1331 + 1332 + if (desc->cmd.value == MI_BATCH_BUFFER_START) { 1333 + ret = check_bbstart(ctx, cmd, offset, length, 1334 + batch_len, batch_start, 1335 + shadow_batch_start); 1336 + 1337 + if (ret) 1338 + goto err; 1479 1339 break; 1480 1340 } 1481 1341 1482 - if (!check_cmd(engine, desc, cmd, length, is_master)) { 1483 - ret = -EACCES; 1484 - break; 1485 - } 1342 + if (ctx->jump_whitelist_cmds > offset) 1343 + set_bit(offset, ctx->jump_whitelist); 1486 1344 1487 1345 cmd += length; 1346 + offset += length; 1488 1347 if (cmd >= batch_end) { 1489 1348 DRM_DEBUG_DRIVER("CMD: Got to the end of the buffer w/o a BBE cmd!\n"); 1490 1349 ret = -EINVAL; 1491 - break; 1350 + goto err; 1492 1351 } 1493 1352 } while (1); 1494 1353 1354 + if (needs_clflush_after) { 1355 + void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping); 1356 + 1357 + drm_clflush_virt_range(ptr, (void *)(cmd + 1) - ptr); 1358 + } 1359 + 1360 + err: 1495 1361 i915_gem_object_unpin_map(shadow_batch_obj); 1496 1362 return ret; 1497 1363 } ··· 1533 1357 1534 1358 /* If the command parser is not enabled, report 0 - unsupported */ 1535 1359 for_each_uabi_engine(engine, dev_priv) { 1536 - if (intel_engine_needs_cmd_parser(engine)) { 1360 + if (intel_engine_using_cmd_parser(engine)) { 1537 1361 active = true; 1538 1362 break; 1539 1363 } ··· 1558 1382 * the parser enabled. 1559 1383 * 9. Don't whitelist or handle oacontrol specially, as ownership 1560 1384 * for oacontrol state is moving to i915-perf. 1385 + * 10. Support for Gen9 BCS Parsing 1561 1386 */ 1562 - return 9; 1387 + return 10; 1563 1388 }

+4 -3

drivers/gpu/drm/i915/i915_drv.c

··· 364 364 if (ret) 365 365 goto cleanup_vga_client; 366 366 367 - /* must happen before intel_power_domains_init_hw() on VLV/CHV */ 368 - intel_update_rawclk(dev_priv); 369 - 370 367 intel_power_domains_init_hw(dev_priv, false); 371 368 372 369 intel_csr_ucode_init(dev_priv); ··· 1847 1850 1848 1851 i915_gem_suspend_late(dev_priv); 1849 1852 1853 + i915_rc6_ctx_wa_suspend(dev_priv); 1854 + 1850 1855 intel_uncore_suspend(&dev_priv->uncore); 1851 1856 1852 1857 intel_power_domains_suspend(dev_priv, ··· 2051 2052 intel_sanitize_gt_powersave(dev_priv); 2052 2053 2053 2054 intel_power_domains_resume(dev_priv); 2055 + 2056 + i915_rc6_ctx_wa_resume(dev_priv); 2054 2057 2055 2058 intel_gt_sanitize(&dev_priv->gt, true); 2056 2059

+26 -5

drivers/gpu/drm/i915/i915_drv.h

··· 593 593 594 594 struct intel_rc6 { 595 595 bool enabled; 596 + bool ctx_corrupted; 597 + intel_wakeref_t ctx_corrupted_wakeref; 596 598 u64 prev_hw_residency[4]; 597 599 u64 cur_residency[4]; 598 600 }; ··· 2077 2075 #define VEBOX_MASK(dev_priv) \ 2078 2076 ENGINE_INSTANCES_MASK(dev_priv, VECS0, I915_MAX_VECS) 2079 2077 2078 + /* 2079 + * The Gen7 cmdparser copies the scanned buffer to the ggtt for execution 2080 + * All later gens can run the final buffer from the ppgtt 2081 + */ 2082 + #define CMDPARSER_USES_GGTT(dev_priv) IS_GEN(dev_priv, 7) 2083 + 2080 2084 #define HAS_LLC(dev_priv) (INTEL_INFO(dev_priv)->has_llc) 2081 2085 #define HAS_SNOOP(dev_priv) (INTEL_INFO(dev_priv)->has_snoop) 2082 2086 #define HAS_EDRAM(dev_priv) ((dev_priv)->edram_size_mb) 2087 + #define HAS_SECURE_BATCHES(dev_priv) (INTEL_GEN(dev_priv) < 6) 2083 2088 #define HAS_WT(dev_priv) ((IS_HASWELL(dev_priv) || \ 2084 2089 IS_BROADWELL(dev_priv)) && HAS_EDRAM(dev_priv)) 2085 2090 ··· 2119 2110 /* Early gen2 have a totally busted CS tlb and require pinned batches. */ 2120 2111 #define HAS_BROKEN_CS_TLB(dev_priv) (IS_I830(dev_priv) || IS_I845G(dev_priv)) 2121 2112 2113 + #define NEEDS_RC6_CTX_CORRUPTION_WA(dev_priv) \ 2114 + (IS_BROADWELL(dev_priv) || IS_GEN(dev_priv, 9)) 2115 + 2122 2116 /* WaRsDisableCoarsePowerGating:skl,cnl */ 2123 2117 #define NEEDS_WaRsDisableCoarsePowerGating(dev_priv) \ 2124 - (IS_CANNONLAKE(dev_priv) || \ 2125 - IS_SKL_GT3(dev_priv) || IS_SKL_GT4(dev_priv)) 2118 + (IS_CANNONLAKE(dev_priv) || IS_GEN(dev_priv, 9)) 2126 2119 2127 2120 #define HAS_GMBUS_IRQ(dev_priv) (INTEL_GEN(dev_priv) >= 4) 2128 2121 #define HAS_GMBUS_BURST_READ(dev_priv) (INTEL_GEN(dev_priv) >= 10 || \ ··· 2295 2284 unsigned long flags); 2296 2285 #define I915_GEM_OBJECT_UNBIND_ACTIVE BIT(0) 2297 2286 2287 + struct i915_vma * __must_check 2288 + i915_gem_object_pin(struct drm_i915_gem_object *obj, 2289 + struct i915_address_space *vm, 2290 + const struct i915_ggtt_view *view, 2291 + u64 size, 2292 + u64 alignment, 2293 + u64 flags); 2294 + 2298 2295 void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv); 2299 2296 2300 2297 static inline int __must_check ··· 2412 2393 int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv); 2413 2394 void intel_engine_init_cmd_parser(struct intel_engine_cs *engine); 2414 2395 void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine); 2415 - int intel_engine_cmd_parser(struct intel_engine_cs *engine, 2396 + int intel_engine_cmd_parser(struct i915_gem_context *cxt, 2397 + struct intel_engine_cs *engine, 2416 2398 struct drm_i915_gem_object *batch_obj, 2417 - struct drm_i915_gem_object *shadow_batch_obj, 2399 + u64 user_batch_start, 2418 2400 u32 batch_start_offset, 2419 2401 u32 batch_len, 2420 - bool is_master); 2402 + struct drm_i915_gem_object *shadow_batch_obj, 2403 + u64 shadow_batch_start); 2421 2404 2422 2405 /* intel_device_info.c */ 2423 2406 static inline struct intel_device_info *

+15 -1

drivers/gpu/drm/i915/i915_gem.c

··· 964 964 { 965 965 struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 966 966 struct i915_address_space *vm = &dev_priv->ggtt.vm; 967 + 968 + return i915_gem_object_pin(obj, vm, view, size, alignment, 969 + flags | PIN_GLOBAL); 970 + } 971 + 972 + struct i915_vma * 973 + i915_gem_object_pin(struct drm_i915_gem_object *obj, 974 + struct i915_address_space *vm, 975 + const struct i915_ggtt_view *view, 976 + u64 size, 977 + u64 alignment, 978 + u64 flags) 979 + { 980 + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); 967 981 struct i915_vma *vma; 968 982 int ret; 969 983 ··· 1052 1038 return ERR_PTR(ret); 1053 1039 } 1054 1040 1055 - ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); 1041 + ret = i915_vma_pin(vma, size, alignment, flags); 1056 1042 if (ret) 1057 1043 return ERR_PTR(ret); 1058 1044

+1 -1

drivers/gpu/drm/i915/i915_getparam.c

··· 62 62 value = !!(i915->caps.scheduler & I915_SCHEDULER_CAP_SEMAPHORES); 63 63 break; 64 64 case I915_PARAM_HAS_SECURE_BATCHES: 65 - value = capable(CAP_SYS_ADMIN); 65 + value = HAS_SECURE_BATCHES(i915) && capable(CAP_SYS_ADMIN); 66 66 break; 67 67 case I915_PARAM_CMD_PARSER_VERSION: 68 68 value = i915_cmd_parser_get_version(i915);

+10

drivers/gpu/drm/i915/i915_reg.h

··· 471 471 #define ECOCHK_PPGTT_WT_HSW (0x2 << 3) 472 472 #define ECOCHK_PPGTT_WB_HSW (0x3 << 3) 473 473 474 + #define GEN8_RC6_CTX_INFO _MMIO(0x8504) 475 + 474 476 #define GAC_ECO_BITS _MMIO(0x14090) 475 477 #define ECOBITS_SNB_BIT (1 << 13) 476 478 #define ECOBITS_PPGTT_CACHE64B (3 << 8) ··· 556 554 * Registers used only by the command parser 557 555 */ 558 556 #define BCS_SWCTRL _MMIO(0x22200) 557 + 558 + /* There are 16 GPR registers */ 559 + #define BCS_GPR(n) _MMIO(0x22600 + (n) * 8) 560 + #define BCS_GPR_UDW(n) _MMIO(0x22600 + (n) * 8 + 4) 559 561 560 562 #define GPGPU_THREADS_DISPATCHED _MMIO(0x2290) 561 563 #define GPGPU_THREADS_DISPATCHED_UDW _MMIO(0x2290 + 4) ··· 7216 7210 #define BXT_CSR_DC3_DC5_COUNT _MMIO(0x80038) 7217 7211 #define TGL_DMC_DEBUG_DC5_COUNT _MMIO(0x101084) 7218 7212 #define TGL_DMC_DEBUG_DC6_COUNT _MMIO(0x101088) 7213 + 7214 + /* Display Internal Timeout Register */ 7215 + #define RM_TIMEOUT _MMIO(0x42060) 7216 + #define MMIO_TIMEOUT_US(us) ((us) << 0) 7219 7217 7220 7218 /* interrupts */ 7221 7219 #define DE_MASTER_IRQ_CONTROL (1 << 31)

+120 -2

drivers/gpu/drm/i915/intel_pm.c

··· 126 126 */ 127 127 I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) | 128 128 PWM1_GATING_DIS | PWM2_GATING_DIS); 129 + 130 + /* 131 + * Lower the display internal timeout. 132 + * This is needed to avoid any hard hangs when DSI port PLL 133 + * is off and a MMIO access is attempted by any privilege 134 + * application, using batch buffers or any other means. 135 + */ 136 + I915_WRITE(RM_TIMEOUT, MMIO_TIMEOUT_US(950)); 129 137 } 130 138 131 139 static void glk_init_clock_gating(struct drm_i915_private *dev_priv) ··· 8552 8544 dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK); 8553 8545 } 8554 8546 8547 + static bool i915_rc6_ctx_corrupted(struct drm_i915_private *dev_priv) 8548 + { 8549 + return !I915_READ(GEN8_RC6_CTX_INFO); 8550 + } 8551 + 8552 + static void i915_rc6_ctx_wa_init(struct drm_i915_private *i915) 8553 + { 8554 + if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915)) 8555 + return; 8556 + 8557 + if (i915_rc6_ctx_corrupted(i915)) { 8558 + DRM_INFO("RC6 context corrupted, disabling runtime power management\n"); 8559 + i915->gt_pm.rc6.ctx_corrupted = true; 8560 + i915->gt_pm.rc6.ctx_corrupted_wakeref = 8561 + intel_runtime_pm_get(&i915->runtime_pm); 8562 + } 8563 + } 8564 + 8565 + static void i915_rc6_ctx_wa_cleanup(struct drm_i915_private *i915) 8566 + { 8567 + if (i915->gt_pm.rc6.ctx_corrupted) { 8568 + intel_runtime_pm_put(&i915->runtime_pm, 8569 + i915->gt_pm.rc6.ctx_corrupted_wakeref); 8570 + i915->gt_pm.rc6.ctx_corrupted = false; 8571 + } 8572 + } 8573 + 8574 + /** 8575 + * i915_rc6_ctx_wa_suspend - system suspend sequence for the RC6 CTX WA 8576 + * @i915: i915 device 8577 + * 8578 + * Perform any steps needed to clean up the RC6 CTX WA before system suspend. 8579 + */ 8580 + void i915_rc6_ctx_wa_suspend(struct drm_i915_private *i915) 8581 + { 8582 + if (i915->gt_pm.rc6.ctx_corrupted) 8583 + intel_runtime_pm_put(&i915->runtime_pm, 8584 + i915->gt_pm.rc6.ctx_corrupted_wakeref); 8585 + } 8586 + 8587 + /** 8588 + * i915_rc6_ctx_wa_resume - system resume sequence for the RC6 CTX WA 8589 + * @i915: i915 device 8590 + * 8591 + * Perform any steps needed to re-init the RC6 CTX WA after system resume. 8592 + */ 8593 + void i915_rc6_ctx_wa_resume(struct drm_i915_private *i915) 8594 + { 8595 + if (!i915->gt_pm.rc6.ctx_corrupted) 8596 + return; 8597 + 8598 + if (i915_rc6_ctx_corrupted(i915)) { 8599 + i915->gt_pm.rc6.ctx_corrupted_wakeref = 8600 + intel_runtime_pm_get(&i915->runtime_pm); 8601 + return; 8602 + } 8603 + 8604 + DRM_INFO("RC6 context restored, re-enabling runtime power management\n"); 8605 + i915->gt_pm.rc6.ctx_corrupted = false; 8606 + } 8607 + 8608 + static void intel_disable_rc6(struct drm_i915_private *dev_priv); 8609 + 8610 + /** 8611 + * i915_rc6_ctx_wa_check - check for a new RC6 CTX corruption 8612 + * @i915: i915 device 8613 + * 8614 + * Check if an RC6 CTX corruption has happened since the last check and if so 8615 + * disable RC6 and runtime power management. 8616 + * 8617 + * Return false if no context corruption has happened since the last call of 8618 + * this function, true otherwise. 8619 + */ 8620 + bool i915_rc6_ctx_wa_check(struct drm_i915_private *i915) 8621 + { 8622 + if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915)) 8623 + return false; 8624 + 8625 + if (i915->gt_pm.rc6.ctx_corrupted) 8626 + return false; 8627 + 8628 + if (!i915_rc6_ctx_corrupted(i915)) 8629 + return false; 8630 + 8631 + DRM_NOTE("RC6 context corruption, disabling runtime power management\n"); 8632 + 8633 + intel_disable_rc6(i915); 8634 + i915->gt_pm.rc6.ctx_corrupted = true; 8635 + i915->gt_pm.rc6.ctx_corrupted_wakeref = 8636 + intel_runtime_pm_get_noresume(&i915->runtime_pm); 8637 + 8638 + return true; 8639 + } 8640 + 8555 8641 void intel_init_gt_powersave(struct drm_i915_private *dev_priv) 8556 8642 { 8557 8643 struct intel_rps *rps = &dev_priv->gt_pm.rps; ··· 8658 8556 DRM_INFO("RC6 disabled, disabling runtime PM support\n"); 8659 8557 pm_runtime_get(&dev_priv->drm.pdev->dev); 8660 8558 } 8559 + 8560 + i915_rc6_ctx_wa_init(dev_priv); 8661 8561 8662 8562 /* Initialize RPS limits (for userspace) */ 8663 8563 if (IS_CHERRYVIEW(dev_priv)) ··· 8699 8595 if (IS_VALLEYVIEW(dev_priv)) 8700 8596 valleyview_cleanup_gt_powersave(dev_priv); 8701 8597 8598 + i915_rc6_ctx_wa_cleanup(dev_priv); 8599 + 8702 8600 if (!HAS_RC6(dev_priv)) 8703 8601 pm_runtime_put(&dev_priv->drm.pdev->dev); 8704 8602 } ··· 8729 8623 i915->gt_pm.llc_pstate.enabled = false; 8730 8624 } 8731 8625 8732 - static void intel_disable_rc6(struct drm_i915_private *dev_priv) 8626 + static void __intel_disable_rc6(struct drm_i915_private *dev_priv) 8733 8627 { 8734 8628 lockdep_assert_held(&dev_priv->gt_pm.rps.lock); 8735 8629 ··· 8746 8640 gen6_disable_rc6(dev_priv); 8747 8641 8748 8642 dev_priv->gt_pm.rc6.enabled = false; 8643 + } 8644 + 8645 + static void intel_disable_rc6(struct drm_i915_private *dev_priv) 8646 + { 8647 + struct intel_rps *rps = &dev_priv->gt_pm.rps; 8648 + 8649 + mutex_lock(&rps->lock); 8650 + __intel_disable_rc6(dev_priv); 8651 + mutex_unlock(&rps->lock); 8749 8652 } 8750 8653 8751 8654 static void intel_disable_rps(struct drm_i915_private *dev_priv) ··· 8782 8667 { 8783 8668 mutex_lock(&dev_priv->gt_pm.rps.lock); 8784 8669 8785 - intel_disable_rc6(dev_priv); 8670 + __intel_disable_rc6(dev_priv); 8786 8671 intel_disable_rps(dev_priv); 8787 8672 if (HAS_LLC(dev_priv)) 8788 8673 intel_disable_llc_pstate(dev_priv); ··· 8807 8692 lockdep_assert_held(&dev_priv->gt_pm.rps.lock); 8808 8693 8809 8694 if (dev_priv->gt_pm.rc6.enabled) 8695 + return; 8696 + 8697 + if (dev_priv->gt_pm.rc6.ctx_corrupted) 8810 8698 return; 8811 8699 8812 8700 if (IS_CHERRYVIEW(dev_priv))

+3

drivers/gpu/drm/i915/intel_pm.h

··· 36 36 void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv); 37 37 void intel_enable_gt_powersave(struct drm_i915_private *dev_priv); 38 38 void intel_disable_gt_powersave(struct drm_i915_private *dev_priv); 39 + bool i915_rc6_ctx_wa_check(struct drm_i915_private *i915); 40 + void i915_rc6_ctx_wa_suspend(struct drm_i915_private *i915); 41 + void i915_rc6_ctx_wa_resume(struct drm_i915_private *i915); 39 42 void gen6_rps_busy(struct drm_i915_private *dev_priv); 40 43 void gen6_rps_idle(struct drm_i915_private *dev_priv); 41 44 void gen6_rps_boost(struct i915_request *rq);

+1 -1

drivers/gpu/drm/sun4i/sun4i_tcon.c

··· 488 488 489 489 WARN_ON(!tcon->quirks->has_channel_0); 490 490 491 - tcon->dclk_min_div = 6; 491 + tcon->dclk_min_div = 1; 492 492 tcon->dclk_max_div = 127; 493 493 sun4i_tcon0_mode_set_common(tcon, mode); 494 494

+3

drivers/hwtracing/intel_th/gth.c

··· 626 626 if (!count) 627 627 dev_dbg(&thdev->dev, "timeout waiting for CTS Trigger\n"); 628 628 629 + /* De-assert the trigger */ 630 + iowrite32(0, gth->base + REG_CTS_CTL); 631 + 629 632 intel_th_gth_stop(gth, output, false); 630 633 intel_th_gth_start(gth, output); 631 634 }

+8 -3

drivers/hwtracing/intel_th/msu.c

··· 164 164 }; 165 165 166 166 static LIST_HEAD(msu_buffer_list); 167 - static struct mutex msu_buffer_mutex; 167 + static DEFINE_MUTEX(msu_buffer_mutex); 168 168 169 169 /** 170 170 * struct msu_buffer_entry - internal MSU buffer bookkeeping ··· 327 327 struct msc_block_desc *bdesc = sg_virt(sg); 328 328 329 329 if (msc_block_wrapped(bdesc)) 330 - return win->nr_blocks << PAGE_SHIFT; 330 + return (size_t)win->nr_blocks << PAGE_SHIFT; 331 331 332 332 size += msc_total_sz(bdesc); 333 333 if (msc_block_last_written(bdesc)) ··· 1848 1848 len = cp - buf; 1849 1849 1850 1850 mode = kstrndup(buf, len, GFP_KERNEL); 1851 + if (!mode) 1852 + return -ENOMEM; 1853 + 1851 1854 i = match_string(msc_mode, ARRAY_SIZE(msc_mode), mode); 1852 - if (i >= 0) 1855 + if (i >= 0) { 1856 + kfree(mode); 1853 1857 goto found; 1858 + } 1854 1859 1855 1860 /* Buffer sinks only work with a usable IRQ */ 1856 1861 if (!msc->do_irq) {

+10

drivers/hwtracing/intel_th/pci.c

··· 200 200 .driver_data = (kernel_ulong_t)&intel_th_2x, 201 201 }, 202 202 { 203 + /* Comet Lake PCH */ 204 + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x06a6), 205 + .driver_data = (kernel_ulong_t)&intel_th_2x, 206 + }, 207 + { 203 208 /* Ice Lake NNPI */ 204 209 PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x45c5), 205 210 .driver_data = (kernel_ulong_t)&intel_th_2x, ··· 212 207 { 213 208 /* Tiger Lake PCH */ 214 209 PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa0a6), 210 + .driver_data = (kernel_ulong_t)&intel_th_2x, 211 + }, 212 + { 213 + /* Jasper Lake PCH */ 214 + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x4da6), 215 215 .driver_data = (kernel_ulong_t)&intel_th_2x, 216 216 }, 217 217 { 0 },

+2 -2

drivers/iio/adc/stm32-adc.c

··· 1399 1399 cookie = dmaengine_submit(desc); 1400 1400 ret = dma_submit_error(cookie); 1401 1401 if (ret) { 1402 - dmaengine_terminate_all(adc->dma_chan); 1402 + dmaengine_terminate_sync(adc->dma_chan); 1403 1403 return ret; 1404 1404 } 1405 1405 ··· 1477 1477 stm32_adc_conv_irq_disable(adc); 1478 1478 1479 1479 if (adc->dma_chan) 1480 - dmaengine_terminate_all(adc->dma_chan); 1480 + dmaengine_terminate_sync(adc->dma_chan); 1481 1481 1482 1482 if (stm32_adc_set_trig(indio_dev, NULL)) 1483 1483 dev_err(&indio_dev->dev, "Can't clear trigger\n");

+4 -1

drivers/iio/imu/adis16480.c

··· 317 317 struct adis16480 *st = iio_priv(indio_dev); 318 318 unsigned int t, reg; 319 319 320 + if (val < 0 || val2 < 0) 321 + return -EINVAL; 322 + 320 323 t = val * 1000 + val2 / 1000; 321 - if (t <= 0) 324 + if (t == 0) 322 325 return -EINVAL; 323 326 324 327 /*

+9

drivers/iio/imu/inv_mpu6050/inv_mpu_core.c

··· 114 114 .name = "MPU6050", 115 115 .reg = &reg_set_6050, 116 116 .config = &chip_config_6050, 117 + .fifo_size = 1024, 117 118 }, 118 119 { 119 120 .whoami = INV_MPU6500_WHOAMI_VALUE, 120 121 .name = "MPU6500", 121 122 .reg = &reg_set_6500, 122 123 .config = &chip_config_6050, 124 + .fifo_size = 512, 123 125 }, 124 126 { 125 127 .whoami = INV_MPU6515_WHOAMI_VALUE, 126 128 .name = "MPU6515", 127 129 .reg = &reg_set_6500, 128 130 .config = &chip_config_6050, 131 + .fifo_size = 512, 129 132 }, 130 133 { 131 134 .whoami = INV_MPU6000_WHOAMI_VALUE, 132 135 .name = "MPU6000", 133 136 .reg = &reg_set_6050, 134 137 .config = &chip_config_6050, 138 + .fifo_size = 1024, 135 139 }, 136 140 { 137 141 .whoami = INV_MPU9150_WHOAMI_VALUE, 138 142 .name = "MPU9150", 139 143 .reg = &reg_set_6050, 140 144 .config = &chip_config_6050, 145 + .fifo_size = 1024, 141 146 }, 142 147 { 143 148 .whoami = INV_MPU9250_WHOAMI_VALUE, 144 149 .name = "MPU9250", 145 150 .reg = &reg_set_6500, 146 151 .config = &chip_config_6050, 152 + .fifo_size = 512, 147 153 }, 148 154 { 149 155 .whoami = INV_MPU9255_WHOAMI_VALUE, 150 156 .name = "MPU9255", 151 157 .reg = &reg_set_6500, 152 158 .config = &chip_config_6050, 159 + .fifo_size = 512, 153 160 }, 154 161 { 155 162 .whoami = INV_ICM20608_WHOAMI_VALUE, 156 163 .name = "ICM20608", 157 164 .reg = &reg_set_6500, 158 165 .config = &chip_config_6050, 166 + .fifo_size = 512, 159 167 }, 160 168 { 161 169 .whoami = INV_ICM20602_WHOAMI_VALUE, 162 170 .name = "ICM20602", 163 171 .reg = &reg_set_icm20602, 164 172 .config = &chip_config_6050, 173 + .fifo_size = 1008, 165 174 }, 166 175 }; 167 176

+2

drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h

··· 100 100 * @name: name of the chip. 101 101 * @reg: register map of the chip. 102 102 * @config: configuration of the chip. 103 + * @fifo_size: size of the FIFO in bytes. 103 104 */ 104 105 struct inv_mpu6050_hw { 105 106 u8 whoami; 106 107 u8 *name; 107 108 const struct inv_mpu6050_reg_map *reg; 108 109 const struct inv_mpu6050_chip_config *config; 110 + size_t fifo_size; 109 111 }; 110 112 111 113 /*

+12 -3

drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c

··· 180 180 "failed to ack interrupt\n"); 181 181 goto flush_fifo; 182 182 } 183 - /* handle fifo overflow by reseting fifo */ 184 - if (int_status & INV_MPU6050_BIT_FIFO_OVERFLOW_INT) 185 - goto flush_fifo; 186 183 if (!(int_status & INV_MPU6050_BIT_RAW_DATA_RDY_INT)) { 187 184 dev_warn(regmap_get_device(st->map), 188 185 "spurious interrupt with status 0x%x\n", int_status); ··· 208 211 if (result) 209 212 goto end_session; 210 213 fifo_count = get_unaligned_be16(&data[0]); 214 + 215 + /* 216 + * Handle fifo overflow by resetting fifo. 217 + * Reset if there is only 3 data set free remaining to mitigate 218 + * possible delay between reading fifo count and fifo data. 219 + */ 220 + nb = 3 * bytes_per_datum; 221 + if (fifo_count >= st->hw->fifo_size - nb) { 222 + dev_warn(regmap_get_device(st->map), "fifo overflow reset\n"); 223 + goto flush_fifo; 224 + } 225 + 211 226 /* compute and process all complete datum */ 212 227 nb = fifo_count / bytes_per_datum; 213 228 inv_mpu6050_update_period(st, pf->timestamp, nb);

+15 -14

drivers/iio/proximity/srf04.c

··· 110 110 udelay(data->cfg->trigger_pulse_us); 111 111 gpiod_set_value(data->gpiod_trig, 0); 112 112 113 - /* it cannot take more than 20 ms */ 113 + /* it should not take more than 20 ms until echo is rising */ 114 114 ret = wait_for_completion_killable_timeout(&data->rising, HZ/50); 115 115 if (ret < 0) { 116 116 mutex_unlock(&data->lock); ··· 120 120 return -ETIMEDOUT; 121 121 } 122 122 123 - ret = wait_for_completion_killable_timeout(&data->falling, HZ/50); 123 + /* it cannot take more than 50 ms until echo is falling */ 124 + ret = wait_for_completion_killable_timeout(&data->falling, HZ/20); 124 125 if (ret < 0) { 125 126 mutex_unlock(&data->lock); 126 127 return ret; ··· 136 135 137 136 dt_ns = ktime_to_ns(ktime_dt); 138 137 /* 139 - * measuring more than 3 meters is beyond the capabilities of 140 - * the sensor 138 + * measuring more than 6,45 meters is beyond the capabilities of 139 + * the supported sensors 141 140 * ==> filter out invalid results for not measuring echos of 142 141 * another us sensor 143 142 * 144 143 * formula: 145 - * distance 3 m 146 - * time = ---------- = --------- = 9404389 ns 147 - * speed 319 m/s 144 + * distance 6,45 * 2 m 145 + * time = ---------- = ------------ = 40438871 ns 146 + * speed 319 m/s 148 147 * 149 148 * using a minimum speed at -20 °C of 319 m/s 150 149 */ 151 - if (dt_ns > 9404389) 150 + if (dt_ns > 40438871) 152 151 return -EIO; 153 152 154 153 time_ns = dt_ns; ··· 160 159 * with Temp in °C 161 160 * and speed in m/s 162 161 * 163 - * use 343 m/s as ultrasonic speed at 20 °C here in absence of the 162 + * use 343,5 m/s as ultrasonic speed at 20 °C here in absence of the 164 163 * temperature 165 164 * 166 165 * therefore: 167 - * time 343 168 - * distance = ------ * ----- 169 - * 10^6 2 166 + * time 343,5 time * 106 167 + * distance = ------ * ------- = ------------ 168 + * 10^6 2 617176 170 169 * with time in ns 171 170 * and distance in mm (one way) 172 171 * 173 - * because we limit to 3 meters the multiplication with 343 just 172 + * because we limit to 6,45 meters the multiplication with 106 just 174 173 * fits into 32 bit 175 174 */ 176 - distance_mm = time_ns * 343 / 2000000; 175 + distance_mm = time_ns * 106 / 617176; 177 176 178 177 return distance_mm; 179 178 }

-1

drivers/infiniband/hw/hfi1/init.c

··· 1489 1489 goto bail_dev; 1490 1490 } 1491 1491 1492 - hfi1_compute_tid_rdma_flow_wt(); 1493 1492 /* 1494 1493 * These must be called before the driver is registered with 1495 1494 * the PCI subsystem.

+3 -1

drivers/infiniband/hw/hfi1/pcie.c

··· 319 319 /* 320 320 * bus->max_bus_speed is set from the bridge's linkcap Max Link Speed 321 321 */ 322 - if (parent && dd->pcidev->bus->max_bus_speed != PCIE_SPEED_8_0GT) { 322 + if (parent && 323 + (dd->pcidev->bus->max_bus_speed == PCIE_SPEED_2_5GT || 324 + dd->pcidev->bus->max_bus_speed == PCIE_SPEED_5_0GT)) { 323 325 dd_dev_info(dd, "Parent PCIe bridge does not support Gen3\n"); 324 326 dd->link_gen3_capable = 0; 325 327 }

+8 -8

drivers/infiniband/hw/hfi1/rc.c

··· 2209 2209 if (qp->s_flags & RVT_S_WAIT_RNR) 2210 2210 goto bail_stop; 2211 2211 rdi = ib_to_rvt(qp->ibqp.device); 2212 - if (qp->s_rnr_retry == 0 && 2213 - !((rdi->post_parms[wqe->wr.opcode].flags & 2214 - RVT_OPERATION_IGN_RNR_CNT) && 2215 - qp->s_rnr_retry_cnt == 0)) { 2216 - status = IB_WC_RNR_RETRY_EXC_ERR; 2217 - goto class_b; 2212 + if (!(rdi->post_parms[wqe->wr.opcode].flags & 2213 + RVT_OPERATION_IGN_RNR_CNT)) { 2214 + if (qp->s_rnr_retry == 0) { 2215 + status = IB_WC_RNR_RETRY_EXC_ERR; 2216 + goto class_b; 2217 + } 2218 + if (qp->s_rnr_retry_cnt < 7 && qp->s_rnr_retry_cnt > 0) 2219 + qp->s_rnr_retry--; 2218 2220 } 2219 - if (qp->s_rnr_retry_cnt < 7 && qp->s_rnr_retry_cnt > 0) 2220 - qp->s_rnr_retry--; 2221 2221 2222 2222 /* 2223 2223 * The last valid PSN is the previous PSN. For TID RDMA WRITE

+32 -25

drivers/infiniband/hw/hfi1/tid_rdma.c

··· 107 107 * C - Capcode 108 108 */ 109 109 110 - static u32 tid_rdma_flow_wt; 111 - 112 110 static void tid_rdma_trigger_resume(struct work_struct *work); 113 111 static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req); 114 112 static int hfi1_kern_exp_rcv_alloc_flows(struct tid_rdma_request *req, ··· 133 135 struct hfi1_ctxtdata *rcd, 134 136 struct tid_rdma_flow *flow, 135 137 bool fecn); 138 + 139 + static void validate_r_tid_ack(struct hfi1_qp_priv *priv) 140 + { 141 + if (priv->r_tid_ack == HFI1_QP_WQE_INVALID) 142 + priv->r_tid_ack = priv->r_tid_tail; 143 + } 144 + 145 + static void tid_rdma_schedule_ack(struct rvt_qp *qp) 146 + { 147 + struct hfi1_qp_priv *priv = qp->priv; 148 + 149 + priv->s_flags |= RVT_S_ACK_PENDING; 150 + hfi1_schedule_tid_send(qp); 151 + } 152 + 153 + static void tid_rdma_trigger_ack(struct rvt_qp *qp) 154 + { 155 + validate_r_tid_ack(qp->priv); 156 + tid_rdma_schedule_ack(qp); 157 + } 136 158 137 159 static u64 tid_rdma_opfn_encode(struct tid_rdma_params *p) 138 160 { ··· 3023 3005 qpriv->s_nak_state = IB_NAK_PSN_ERROR; 3024 3006 /* We are NAK'ing the next expected PSN */ 3025 3007 qpriv->s_nak_psn = mask_psn(flow->flow_state.r_next_psn); 3026 - qpriv->s_flags |= RVT_S_ACK_PENDING; 3027 - if (qpriv->r_tid_ack == HFI1_QP_WQE_INVALID) 3028 - qpriv->r_tid_ack = qpriv->r_tid_tail; 3029 - hfi1_schedule_tid_send(qp); 3008 + tid_rdma_trigger_ack(qp); 3030 3009 } 3031 3010 goto unlock; 3032 3011 } ··· 3386 3371 return sizeof(ohdr->u.tid_rdma.w_req) / sizeof(u32); 3387 3372 } 3388 3373 3389 - void hfi1_compute_tid_rdma_flow_wt(void) 3374 + static u32 hfi1_compute_tid_rdma_flow_wt(struct rvt_qp *qp) 3390 3375 { 3391 3376 /* 3392 3377 * Heuristic for computing the RNR timeout when waiting on the flow 3393 3378 * queue. Rather than a computationaly expensive exact estimate of when 3394 3379 * a flow will be available, we assume that if a QP is at position N in 3395 3380 * the flow queue it has to wait approximately (N + 1) * (number of 3396 - * segments between two sync points), assuming PMTU of 4K. The rationale 3397 - * for this is that flows are released and recycled at each sync point. 3381 + * segments between two sync points). The rationale for this is that 3382 + * flows are released and recycled at each sync point. 3398 3383 */ 3399 - tid_rdma_flow_wt = MAX_TID_FLOW_PSN * enum_to_mtu(OPA_MTU_4096) / 3400 - TID_RDMA_MAX_SEGMENT_SIZE; 3384 + return (MAX_TID_FLOW_PSN * qp->pmtu) >> TID_RDMA_SEGMENT_SHIFT; 3401 3385 } 3402 3386 3403 3387 static u32 position_in_queue(struct hfi1_qp_priv *qpriv, ··· 3519 3505 if (qpriv->flow_state.index >= RXE_NUM_TID_FLOWS) { 3520 3506 ret = hfi1_kern_setup_hw_flow(qpriv->rcd, qp); 3521 3507 if (ret) { 3522 - to_seg = tid_rdma_flow_wt * 3508 + to_seg = hfi1_compute_tid_rdma_flow_wt(qp) * 3523 3509 position_in_queue(qpriv, 3524 3510 &rcd->flow_queue); 3525 3511 break; ··· 3540 3526 /* 3541 3527 * If overtaking req->acked_tail, send an RNR NAK. Because the 3542 3528 * QP is not queued in this case, and the issue can only be 3543 - * caused due a delay in scheduling the second leg which we 3529 + * caused by a delay in scheduling the second leg which we 3544 3530 * cannot estimate, we use a rather arbitrary RNR timeout of 3545 3531 * (MAX_FLOWS / 2) segments 3546 3532 */ ··· 3548 3534 MAX_FLOWS)) { 3549 3535 ret = -EAGAIN; 3550 3536 to_seg = MAX_FLOWS >> 1; 3551 - qpriv->s_flags |= RVT_S_ACK_PENDING; 3552 - hfi1_schedule_tid_send(qp); 3537 + tid_rdma_trigger_ack(qp); 3553 3538 break; 3554 3539 } 3555 3540 ··· 4348 4335 trace_hfi1_tid_req_rcv_write_data(qp, 0, e->opcode, e->psn, e->lpsn, 4349 4336 req); 4350 4337 trace_hfi1_tid_write_rsp_rcv_data(qp); 4351 - if (priv->r_tid_ack == HFI1_QP_WQE_INVALID) 4352 - priv->r_tid_ack = priv->r_tid_tail; 4338 + validate_r_tid_ack(priv); 4353 4339 4354 4340 if (opcode == TID_OP(WRITE_DATA_LAST)) { 4355 4341 release_rdma_sge_mr(e); ··· 4387 4375 } 4388 4376 4389 4377 done: 4390 - priv->s_flags |= RVT_S_ACK_PENDING; 4391 - hfi1_schedule_tid_send(qp); 4378 + tid_rdma_schedule_ack(qp); 4392 4379 exit: 4393 4380 priv->r_next_psn_kdeth = flow->flow_state.r_next_psn; 4394 4381 if (fecn) ··· 4399 4388 if (!priv->s_nak_state) { 4400 4389 priv->s_nak_state = IB_NAK_PSN_ERROR; 4401 4390 priv->s_nak_psn = flow->flow_state.r_next_psn; 4402 - priv->s_flags |= RVT_S_ACK_PENDING; 4403 - if (priv->r_tid_ack == HFI1_QP_WQE_INVALID) 4404 - priv->r_tid_ack = priv->r_tid_tail; 4405 - hfi1_schedule_tid_send(qp); 4391 + tid_rdma_trigger_ack(qp); 4406 4392 } 4407 4393 goto done; 4408 4394 } ··· 4947 4939 qpriv->resync = true; 4948 4940 /* RESYNC request always gets a TID RDMA ACK. */ 4949 4941 qpriv->s_nak_state = 0; 4950 - qpriv->s_flags |= RVT_S_ACK_PENDING; 4951 - hfi1_schedule_tid_send(qp); 4942 + tid_rdma_trigger_ack(qp); 4952 4943 bail: 4953 4944 if (fecn) 4954 4945 qp->s_flags |= RVT_S_ECN;

+1 -2

drivers/infiniband/hw/hfi1/tid_rdma.h

··· 17 17 #define TID_RDMA_MIN_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */ 18 18 #define TID_RDMA_MAX_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */ 19 19 #define TID_RDMA_MAX_PAGES (BIT(18) >> PAGE_SHIFT) 20 + #define TID_RDMA_SEGMENT_SHIFT 18 20 21 21 22 /* 22 23 * Bit definitions for priv->s_flags. ··· 274 273 u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe, 275 274 struct ib_other_headers *ohdr, 276 275 u32 *bth1, u32 *bth2, u32 *len); 277 - 278 - void hfi1_compute_tid_rdma_flow_wt(void); 279 276 280 277 void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet); 281 278

+1 -1

drivers/infiniband/hw/hns/hns_roce_hem.h

··· 59 59 60 60 #define HNS_ROCE_HEM_CHUNK_LEN \ 61 61 ((256 - sizeof(struct list_head) - 2 * sizeof(int)) / \ 62 - (sizeof(struct scatterlist))) 62 + (sizeof(struct scatterlist) + sizeof(void *))) 63 63 64 64 #define check_whether_bt_num_3(type, hop_num) \ 65 65 (type < HEM_TYPE_MTT && hop_num == 2)

+1 -1

drivers/infiniband/hw/hns/hns_roce_srq.c

··· 376 376 srq->max = roundup_pow_of_two(srq_init_attr->attr.max_wr + 1); 377 377 srq->max_gs = srq_init_attr->attr.max_sge; 378 378 379 - srq_desc_size = max(16, 16 * srq->max_gs); 379 + srq_desc_size = roundup_pow_of_two(max(16, 16 * srq->max_gs)); 380 380 381 381 srq->wqe_shift = ilog2(srq_desc_size); 382 382

+9

drivers/input/ff-memless.c

··· 489 489 { 490 490 struct ml_device *ml = ff->private; 491 491 492 + /* 493 + * Even though we stop all playing effects when tearing down 494 + * an input device (via input_device_flush() that calls into 495 + * input_ff_flush() that stops and erases all effects), we 496 + * do not actually stop the timer, and therefore we should 497 + * do it here. 498 + */ 499 + del_timer_sync(&ml->timer); 500 + 492 501 kfree(ml->private); 493 502 } 494 503

+1

drivers/input/mouse/synaptics.c

··· 177 177 "LEN0096", /* X280 */ 178 178 "LEN0097", /* X280 -> ALPS trackpoint */ 179 179 "LEN009b", /* T580 */ 180 + "LEN0402", /* X1 Extreme 2nd Generation */ 180 181 "LEN200f", /* T450s */ 181 182 "LEN2054", /* E480 */ 182 183 "LEN2055", /* E580 */

+3 -6

drivers/input/rmi4/rmi_f11.c

··· 510 510 struct rmi_2d_sensor_platform_data sensor_pdata; 511 511 unsigned long *abs_mask; 512 512 unsigned long *rel_mask; 513 - unsigned long *result_bits; 514 513 }; 515 514 516 515 enum f11_finger_state { ··· 1056 1057 /* 1057 1058 ** init instance data, fill in values and create any sysfs files 1058 1059 */ 1059 - f11 = devm_kzalloc(&fn->dev, sizeof(struct f11_data) + mask_size * 3, 1060 + f11 = devm_kzalloc(&fn->dev, sizeof(struct f11_data) + mask_size * 2, 1060 1061 GFP_KERNEL); 1061 1062 if (!f11) 1062 1063 return -ENOMEM; ··· 1075 1076 + sizeof(struct f11_data)); 1076 1077 f11->rel_mask = (unsigned long *)((char *)f11 1077 1078 + sizeof(struct f11_data) + mask_size); 1078 - f11->result_bits = (unsigned long *)((char *)f11 1079 - + sizeof(struct f11_data) + mask_size * 2); 1080 1079 1081 1080 set_bit(fn->irq_pos, f11->abs_mask); 1082 1081 set_bit(fn->irq_pos + 1, f11->rel_mask); ··· 1281 1284 valid_bytes = f11->sensor.attn_size; 1282 1285 memcpy(f11->sensor.data_pkt, drvdata->attn_data.data, 1283 1286 valid_bytes); 1284 - drvdata->attn_data.data += f11->sensor.attn_size; 1285 - drvdata->attn_data.size -= f11->sensor.attn_size; 1287 + drvdata->attn_data.data += valid_bytes; 1288 + drvdata->attn_data.size -= valid_bytes; 1286 1289 } else { 1287 1290 error = rmi_read_block(rmi_dev, 1288 1291 data_base_addr, f11->sensor.data_pkt,

+28 -4

drivers/input/rmi4/rmi_f12.c

··· 55 55 56 56 const struct rmi_register_desc_item *data15; 57 57 u16 data15_offset; 58 + 59 + unsigned long *abs_mask; 60 + unsigned long *rel_mask; 58 61 }; 59 62 60 63 static int rmi_f12_read_sensor_tuning(struct f12_data *f12) ··· 212 209 valid_bytes = sensor->attn_size; 213 210 memcpy(sensor->data_pkt, drvdata->attn_data.data, 214 211 valid_bytes); 215 - drvdata->attn_data.data += sensor->attn_size; 216 - drvdata->attn_data.size -= sensor->attn_size; 212 + drvdata->attn_data.data += valid_bytes; 213 + drvdata->attn_data.size -= valid_bytes; 217 214 } else { 218 215 retval = rmi_read_block(rmi_dev, f12->data_addr, 219 216 sensor->data_pkt, sensor->pkt_size); ··· 294 291 static int rmi_f12_config(struct rmi_function *fn) 295 292 { 296 293 struct rmi_driver *drv = fn->rmi_dev->driver; 294 + struct f12_data *f12 = dev_get_drvdata(&fn->dev); 295 + struct rmi_2d_sensor *sensor; 297 296 int ret; 298 297 299 - drv->set_irq_bits(fn->rmi_dev, fn->irq_mask); 298 + sensor = &f12->sensor; 299 + 300 + if (!sensor->report_abs) 301 + drv->clear_irq_bits(fn->rmi_dev, f12->abs_mask); 302 + else 303 + drv->set_irq_bits(fn->rmi_dev, f12->abs_mask); 304 + 305 + drv->clear_irq_bits(fn->rmi_dev, f12->rel_mask); 300 306 301 307 ret = rmi_f12_write_control_regs(fn); 302 308 if (ret) ··· 327 315 struct rmi_device_platform_data *pdata = rmi_get_platform_data(rmi_dev); 328 316 struct rmi_driver_data *drvdata = dev_get_drvdata(&rmi_dev->dev); 329 317 u16 data_offset = 0; 318 + int mask_size; 330 319 331 320 rmi_dbg(RMI_DEBUG_FN, &fn->dev, "%s\n", __func__); 321 + 322 + mask_size = BITS_TO_LONGS(drvdata->irq_count) * sizeof(unsigned long); 332 323 333 324 ret = rmi_read(fn->rmi_dev, query_addr, &buf); 334 325 if (ret < 0) { ··· 347 332 return -ENODEV; 348 333 } 349 334 350 - f12 = devm_kzalloc(&fn->dev, sizeof(struct f12_data), GFP_KERNEL); 335 + f12 = devm_kzalloc(&fn->dev, sizeof(struct f12_data) + mask_size * 2, 336 + GFP_KERNEL); 351 337 if (!f12) 352 338 return -ENOMEM; 339 + 340 + f12->abs_mask = (unsigned long *)((char *)f12 341 + + sizeof(struct f12_data)); 342 + f12->rel_mask = (unsigned long *)((char *)f12 343 + + sizeof(struct f12_data) + mask_size); 344 + 345 + set_bit(fn->irq_pos, f12->abs_mask); 346 + set_bit(fn->irq_pos + 1, f12->rel_mask); 353 347 354 348 f12->has_dribble = !!(buf & BIT(3)); 355 349

+3 -2

drivers/input/rmi4/rmi_f54.c

··· 359 359 static const struct vb2_queue rmi_f54_queue = { 360 360 .type = V4L2_BUF_TYPE_VIDEO_CAPTURE, 361 361 .io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF | VB2_READ, 362 - .buf_struct_size = sizeof(struct vb2_buffer), 362 + .buf_struct_size = sizeof(struct vb2_v4l2_buffer), 363 363 .ops = &rmi_f54_queue_ops, 364 364 .mem_ops = &vb2_vmalloc_memops, 365 365 .timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC, ··· 601 601 { 602 602 struct rmi_driver *drv = fn->rmi_dev->driver; 603 603 604 - drv->set_irq_bits(fn->rmi_dev, fn->irq_mask); 604 + drv->clear_irq_bits(fn->rmi_dev, fn->irq_mask); 605 605 606 606 return 0; 607 607 } ··· 730 730 731 731 video_unregister_device(&f54->vdev); 732 732 v4l2_device_unregister(&f54->v4l2); 733 + destroy_workqueue(f54->workqueue); 733 734 } 734 735 735 736 struct rmi_function_handler rmi_f54_handler = {

-7

drivers/input/touchscreen/cyttsp4_core.c

··· 1990 1990 1991 1991 /* get sysinfo */ 1992 1992 md->si = &cd->sysinfo; 1993 - if (!md->si) { 1994 - dev_err(dev, "%s: Fail get sysinfo pointer from core p=%p\n", 1995 - __func__, md->si); 1996 - goto error_get_sysinfo; 1997 - } 1998 1993 1999 1994 rc = cyttsp4_setup_input_device(cd); 2000 1995 if (rc) ··· 1999 2004 2000 2005 error_init_input: 2001 2006 input_free_device(md->input); 2002 - error_get_sysinfo: 2003 - input_set_drvdata(md->input, NULL); 2004 2007 error_alloc_failed: 2005 2008 dev_err(dev, "%s failed.\n", __func__); 2006 2009 return rc;

+4

drivers/interconnect/core.c

··· 405 405 if (!path) 406 406 return; 407 407 408 + mutex_lock(&icc_lock); 409 + 408 410 for (i = 0; i < path->num_nodes; i++) 409 411 path->reqs[i].tag = tag; 412 + 413 + mutex_unlock(&icc_lock); 410 414 } 411 415 EXPORT_SYMBOL_GPL(icc_set_tag); 412 416

+2 -1

drivers/interconnect/qcom/qcs404.c

··· 433 433 if (!qp) 434 434 return -ENOMEM; 435 435 436 - data = devm_kcalloc(dev, num_nodes, sizeof(*node), GFP_KERNEL); 436 + data = devm_kzalloc(dev, struct_size(data, nodes, num_nodes), 437 + GFP_KERNEL); 437 438 if (!data) 438 439 return -ENOMEM; 439 440

+2 -1

drivers/interconnect/qcom/sdm845.c

··· 790 790 if (!qp) 791 791 return -ENOMEM; 792 792 793 - data = devm_kcalloc(&pdev->dev, num_nodes, sizeof(*node), GFP_KERNEL); 793 + data = devm_kzalloc(&pdev->dev, struct_size(data, nodes, num_nodes), 794 + GFP_KERNEL); 794 795 if (!data) 795 796 return -ENOMEM; 796 797

+1 -1

drivers/mmc/host/sdhci-of-at91.c

··· 358 358 pm_runtime_use_autosuspend(&pdev->dev); 359 359 360 360 /* HS200 is broken at this moment */ 361 - host->quirks2 = SDHCI_QUIRK2_BROKEN_HS200; 361 + host->quirks2 |= SDHCI_QUIRK2_BROKEN_HS200; 362 362 363 363 ret = sdhci_add_host(host); 364 364 if (ret)

+1

drivers/net/can/slcan.c

··· 617 617 sl->tty = NULL; 618 618 tty->disc_data = NULL; 619 619 clear_bit(SLF_INUSE, &sl->flags); 620 + free_netdev(sl->dev); 620 621 621 622 err_exit: 622 623 rtnl_unlock();

+13

drivers/net/dsa/mv88e6xxx/ptp.c

··· 273 273 int pin; 274 274 int err; 275 275 276 + /* Reject requests with unsupported flags */ 277 + if (rq->extts.flags & ~(PTP_ENABLE_FEATURE | 278 + PTP_RISING_EDGE | 279 + PTP_FALLING_EDGE | 280 + PTP_STRICT_FLAGS)) 281 + return -EOPNOTSUPP; 282 + 283 + /* Reject requests to enable time stamping on both edges. */ 284 + if ((rq->extts.flags & PTP_STRICT_FLAGS) && 285 + (rq->extts.flags & PTP_ENABLE_FEATURE) && 286 + (rq->extts.flags & PTP_EXTTS_EDGES) == PTP_EXTTS_EDGES) 287 + return -EOPNOTSUPP; 288 + 276 289 pin = ptp_find_pin(chip->ptp_clock, PTP_PF_EXTTS, rq->extts.index); 277 290 278 291 if (pin < 0)

+4

drivers/net/ethernet/broadcom/tg3.c

··· 6280 6280 6281 6281 switch (rq->type) { 6282 6282 case PTP_CLK_REQ_PEROUT: 6283 + /* Reject requests with unsupported flags */ 6284 + if (rq->perout.flags) 6285 + return -EOPNOTSUPP; 6286 + 6283 6287 if (rq->perout.index != 0) 6284 6288 return -EINVAL; 6285 6289

+3 -2

drivers/net/ethernet/cirrus/ep93xx_eth.c

··· 763 763 { 764 764 struct net_device *dev; 765 765 struct ep93xx_priv *ep; 766 + struct resource *mem; 766 767 767 768 dev = platform_get_drvdata(pdev); 768 769 if (dev == NULL) ··· 779 778 iounmap(ep->base_addr); 780 779 781 780 if (ep->res != NULL) { 782 - release_resource(ep->res); 783 - kfree(ep->res); 781 + mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); 782 + release_mem_region(mem->start, resource_size(mem)); 784 783 } 785 784 786 785 free_netdev(dev);

+1

drivers/net/ethernet/cortina/gemini.c

··· 2524 2524 struct gemini_ethernet_port *port = platform_get_drvdata(pdev); 2525 2525 2526 2526 gemini_port_remove(port); 2527 + free_netdev(port->netdev); 2527 2528 return 0; 2528 2529 } 2529 2530

+9 -1

drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c

··· 2260 2260 err_service_reg: 2261 2261 free_channel(priv, channel); 2262 2262 err_alloc_ch: 2263 - if (err == -EPROBE_DEFER) 2263 + if (err == -EPROBE_DEFER) { 2264 + for (i = 0; i < priv->num_channels; i++) { 2265 + channel = priv->channel[i]; 2266 + nctx = &channel->nctx; 2267 + dpaa2_io_service_deregister(channel->dpio, nctx, dev); 2268 + free_channel(priv, channel); 2269 + } 2270 + priv->num_channels = 0; 2264 2271 return err; 2272 + } 2265 2273 2266 2274 if (cpumask_empty(&priv->dpio_cpumask)) { 2267 2275 dev_err(dev, "No cpu with an affine DPIO/DPCON\n");

-5

drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c

··· 70 70 #define HNS3_NIC_LB_TEST_TX_CNT_ERR 2 71 71 #define HNS3_NIC_LB_TEST_RX_CNT_ERR 3 72 72 73 - struct hns3_link_mode_mapping { 74 - u32 hns3_link_mode; 75 - u32 ethtool_link_mode; 76 - }; 77 - 78 73 static int hns3_lp_setup(struct net_device *ndev, enum hnae3_loop loop, bool en) 79 74 { 80 75 struct hnae3_handle *h = hns3_get_handle(ndev);

+17 -2

drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c

··· 124 124 if (ret) 125 125 return ret; 126 126 127 - for (i = 0; i < HNAE3_MAX_TC; i++) { 127 + for (i = 0; i < hdev->tc_max; i++) { 128 128 switch (ets->tc_tsa[i]) { 129 129 case IEEE_8021QAZ_TSA_STRICT: 130 130 if (hdev->tm_info.tc_info[i].tc_sch_mode != ··· 318 318 struct net_device *netdev = h->kinfo.netdev; 319 319 struct hclge_dev *hdev = vport->back; 320 320 u8 i, j, pfc_map, *prio_tc; 321 + int ret; 321 322 322 323 if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE) || 323 324 hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE) ··· 348 347 349 348 hclge_tm_pfc_info_update(hdev); 350 349 351 - return hclge_pause_setup_hw(hdev, false); 350 + ret = hclge_pause_setup_hw(hdev, false); 351 + if (ret) 352 + return ret; 353 + 354 + ret = hclge_notify_client(hdev, HNAE3_DOWN_CLIENT); 355 + if (ret) 356 + return ret; 357 + 358 + ret = hclge_buffer_alloc(hdev); 359 + if (ret) { 360 + hclge_notify_client(hdev, HNAE3_UP_CLIENT); 361 + return ret; 362 + } 363 + 364 + return hclge_notify_client(hdev, HNAE3_UP_CLIENT); 352 365 } 353 366 354 367 /* DCBX configuration */

+14 -2

drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c

··· 6366 6366 6367 6367 func_id = hclge_get_port_number(HOST_PORT, 0, vfid, 0); 6368 6368 req = (struct hclge_mac_vlan_switch_cmd *)desc.data; 6369 + 6370 + /* read current config parameter */ 6369 6371 hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MAC_VLAN_SWITCH_PARAM, 6370 - false); 6372 + true); 6371 6373 req->roce_sel = HCLGE_MAC_VLAN_NIC_SEL; 6372 6374 req->func_id = cpu_to_le32(func_id); 6373 - req->switch_param = switch_param; 6375 + 6376 + ret = hclge_cmd_send(&hdev->hw, &desc, 1); 6377 + if (ret) { 6378 + dev_err(&hdev->pdev->dev, 6379 + "read mac vlan switch parameter fail, ret = %d\n", ret); 6380 + return ret; 6381 + } 6382 + 6383 + /* modify and write new config parameter */ 6384 + hclge_cmd_reuse_desc(&desc, false); 6385 + req->switch_param = (req->switch_param & param_mask) | switch_param; 6374 6386 req->param_mask = param_mask; 6375 6387 6376 6388 ret = hclge_cmd_send(&hdev->hw, &desc, 1);

+17

drivers/net/ethernet/intel/igb/igb_ptp.c

··· 521 521 522 522 switch (rq->type) { 523 523 case PTP_CLK_REQ_EXTTS: 524 + /* Reject requests with unsupported flags */ 525 + if (rq->extts.flags & ~(PTP_ENABLE_FEATURE | 526 + PTP_RISING_EDGE | 527 + PTP_FALLING_EDGE | 528 + PTP_STRICT_FLAGS)) 529 + return -EOPNOTSUPP; 530 + 531 + /* Reject requests failing to enable both edges. */ 532 + if ((rq->extts.flags & PTP_STRICT_FLAGS) && 533 + (rq->extts.flags & PTP_ENABLE_FEATURE) && 534 + (rq->extts.flags & PTP_EXTTS_EDGES) != PTP_EXTTS_EDGES) 535 + return -EOPNOTSUPP; 536 + 524 537 if (on) { 525 538 pin = ptp_find_pin(igb->ptp_clock, PTP_PF_EXTTS, 526 539 rq->extts.index); ··· 564 551 return 0; 565 552 566 553 case PTP_CLK_REQ_PEROUT: 554 + /* Reject requests with unsupported flags */ 555 + if (rq->perout.flags) 556 + return -EOPNOTSUPP; 557 + 567 558 if (on) { 568 559 pin = ptp_find_pin(igb->ptp_clock, PTP_PF_PEROUT, 569 560 rq->perout.index);

+2 -2

drivers/net/ethernet/marvell/octeontx2/af/cgx.h

··· 1 - /* SPDX-License-Identifier: GPL-2.0 2 - * Marvell OcteonTx2 CGX driver 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* Marvell OcteonTx2 CGX driver 3 3 * 4 4 * Copyright (C) 2018 Marvell International Ltd. 5 5 *

+2 -2

drivers/net/ethernet/marvell/octeontx2/af/cgx_fw_if.h

··· 1 - /* SPDX-License-Identifier: GPL-2.0 2 - * Marvell OcteonTx2 CGX driver 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* Marvell OcteonTx2 CGX driver 3 3 * 4 4 * Copyright (C) 2018 Marvell International Ltd. 5 5 *

+2 -2

drivers/net/ethernet/marvell/octeontx2/af/common.h