Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * cache.c - Intel VT-d cache invalidation
4 *
5 * Copyright (C) 2024 Intel Corporation
6 *
7 * Author: Lu Baolu <baolu.lu@linux.intel.com>
8 */
9
10#define pr_fmt(fmt) "DMAR: " fmt
11
12#include <linux/dmar.h>
13#include <linux/iommu.h>
14#include <linux/memory.h>
15#include <linux/pci.h>
16#include <linux/spinlock.h>
17
18#include "iommu.h"
19#include "pasid.h"
20#include "trace.h"
21
22/* Check if an existing cache tag can be reused for a new association. */
23static bool cache_tage_match(struct cache_tag *tag, u16 domain_id,
24 struct intel_iommu *iommu, struct device *dev,
25 ioasid_t pasid, enum cache_tag_type type)
26{
27 if (tag->type != type)
28 return false;
29
30 if (tag->domain_id != domain_id || tag->pasid != pasid)
31 return false;
32
33 if (type == CACHE_TAG_IOTLB || type == CACHE_TAG_NESTING_IOTLB)
34 return tag->iommu == iommu;
35
36 if (type == CACHE_TAG_DEVTLB || type == CACHE_TAG_NESTING_DEVTLB)
37 return tag->dev == dev;
38
39 return false;
40}
41
42/* Assign a cache tag with specified type to domain. */
43static int cache_tag_assign(struct dmar_domain *domain, u16 did,
44 struct device *dev, ioasid_t pasid,
45 enum cache_tag_type type)
46{
47 struct device_domain_info *info = dev_iommu_priv_get(dev);
48 struct intel_iommu *iommu = info->iommu;
49 struct cache_tag *tag, *temp;
50 unsigned long flags;
51
52 tag = kzalloc(sizeof(*tag), GFP_KERNEL);
53 if (!tag)
54 return -ENOMEM;
55
56 tag->type = type;
57 tag->iommu = iommu;
58 tag->domain_id = did;
59 tag->pasid = pasid;
60 tag->users = 1;
61
62 if (type == CACHE_TAG_DEVTLB || type == CACHE_TAG_NESTING_DEVTLB)
63 tag->dev = dev;
64 else
65 tag->dev = iommu->iommu.dev;
66
67 spin_lock_irqsave(&domain->cache_lock, flags);
68 list_for_each_entry(temp, &domain->cache_tags, node) {
69 if (cache_tage_match(temp, did, iommu, dev, pasid, type)) {
70 temp->users++;
71 spin_unlock_irqrestore(&domain->cache_lock, flags);
72 kfree(tag);
73 trace_cache_tag_assign(temp);
74 return 0;
75 }
76 }
77 list_add_tail(&tag->node, &domain->cache_tags);
78 spin_unlock_irqrestore(&domain->cache_lock, flags);
79 trace_cache_tag_assign(tag);
80
81 return 0;
82}
83
84/* Unassign a cache tag with specified type from domain. */
85static void cache_tag_unassign(struct dmar_domain *domain, u16 did,
86 struct device *dev, ioasid_t pasid,
87 enum cache_tag_type type)
88{
89 struct device_domain_info *info = dev_iommu_priv_get(dev);
90 struct intel_iommu *iommu = info->iommu;
91 struct cache_tag *tag;
92 unsigned long flags;
93
94 spin_lock_irqsave(&domain->cache_lock, flags);
95 list_for_each_entry(tag, &domain->cache_tags, node) {
96 if (cache_tage_match(tag, did, iommu, dev, pasid, type)) {
97 trace_cache_tag_unassign(tag);
98 if (--tag->users == 0) {
99 list_del(&tag->node);
100 kfree(tag);
101 }
102 break;
103 }
104 }
105 spin_unlock_irqrestore(&domain->cache_lock, flags);
106}
107
108static int __cache_tag_assign_domain(struct dmar_domain *domain, u16 did,
109 struct device *dev, ioasid_t pasid)
110{
111 struct device_domain_info *info = dev_iommu_priv_get(dev);
112 int ret;
113
114 ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_IOTLB);
115 if (ret || !info->ats_enabled)
116 return ret;
117
118 ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_DEVTLB);
119 if (ret)
120 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_IOTLB);
121
122 return ret;
123}
124
125static void __cache_tag_unassign_domain(struct dmar_domain *domain, u16 did,
126 struct device *dev, ioasid_t pasid)
127{
128 struct device_domain_info *info = dev_iommu_priv_get(dev);
129
130 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_IOTLB);
131
132 if (info->ats_enabled)
133 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_DEVTLB);
134}
135
136static int __cache_tag_assign_parent_domain(struct dmar_domain *domain, u16 did,
137 struct device *dev, ioasid_t pasid)
138{
139 struct device_domain_info *info = dev_iommu_priv_get(dev);
140 int ret;
141
142 ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB);
143 if (ret || !info->ats_enabled)
144 return ret;
145
146 ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_NESTING_DEVTLB);
147 if (ret)
148 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB);
149
150 return ret;
151}
152
153static void __cache_tag_unassign_parent_domain(struct dmar_domain *domain, u16 did,
154 struct device *dev, ioasid_t pasid)
155{
156 struct device_domain_info *info = dev_iommu_priv_get(dev);
157
158 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB);
159
160 if (info->ats_enabled)
161 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_DEVTLB);
162}
163
164static u16 domain_get_id_for_dev(struct dmar_domain *domain, struct device *dev)
165{
166 struct device_domain_info *info = dev_iommu_priv_get(dev);
167 struct intel_iommu *iommu = info->iommu;
168
169 /*
170 * The driver assigns different domain IDs for all domains except
171 * the SVA type.
172 */
173 if (domain->domain.type == IOMMU_DOMAIN_SVA)
174 return FLPT_DEFAULT_DID;
175
176 return domain_id_iommu(domain, iommu);
177}
178
179/*
180 * Assign cache tags to a domain when it's associated with a device's
181 * PASID using a specific domain ID.
182 *
183 * On success (return value of 0), cache tags are created and added to the
184 * domain's cache tag list. On failure (negative return value), an error
185 * code is returned indicating the reason for the failure.
186 */
187int cache_tag_assign_domain(struct dmar_domain *domain,
188 struct device *dev, ioasid_t pasid)
189{
190 u16 did = domain_get_id_for_dev(domain, dev);
191 int ret;
192
193 ret = __cache_tag_assign_domain(domain, did, dev, pasid);
194 if (ret || domain->domain.type != IOMMU_DOMAIN_NESTED)
195 return ret;
196
197 ret = __cache_tag_assign_parent_domain(domain->s2_domain, did, dev, pasid);
198 if (ret)
199 __cache_tag_unassign_domain(domain, did, dev, pasid);
200
201 return ret;
202}
203
204/*
205 * Remove the cache tags associated with a device's PASID when the domain is
206 * detached from the device.
207 *
208 * The cache tags must be previously assigned to the domain by calling the
209 * assign interface.
210 */
211void cache_tag_unassign_domain(struct dmar_domain *domain,
212 struct device *dev, ioasid_t pasid)
213{
214 u16 did = domain_get_id_for_dev(domain, dev);
215
216 __cache_tag_unassign_domain(domain, did, dev, pasid);
217 if (domain->domain.type == IOMMU_DOMAIN_NESTED)
218 __cache_tag_unassign_parent_domain(domain->s2_domain, did, dev, pasid);
219}
220
221static unsigned long calculate_psi_aligned_address(unsigned long start,
222 unsigned long end,
223 unsigned long *_pages,
224 unsigned long *_mask)
225{
226 unsigned long pages = aligned_nrpages(start, end - start + 1);
227 unsigned long aligned_pages = __roundup_pow_of_two(pages);
228 unsigned long bitmask = aligned_pages - 1;
229 unsigned long mask = ilog2(aligned_pages);
230 unsigned long pfn = IOVA_PFN(start);
231
232 /*
233 * PSI masks the low order bits of the base address. If the
234 * address isn't aligned to the mask, then compute a mask value
235 * needed to ensure the target range is flushed.
236 */
237 if (unlikely(bitmask & pfn)) {
238 unsigned long end_pfn = pfn + pages - 1, shared_bits;
239
240 /*
241 * Since end_pfn <= pfn + bitmask, the only way bits
242 * higher than bitmask can differ in pfn and end_pfn is
243 * by carrying. This means after masking out bitmask,
244 * high bits starting with the first set bit in
245 * shared_bits are all equal in both pfn and end_pfn.
246 */
247 shared_bits = ~(pfn ^ end_pfn) & ~bitmask;
248 mask = shared_bits ? __ffs(shared_bits) : MAX_AGAW_PFN_WIDTH;
249 aligned_pages = 1UL << mask;
250 }
251
252 *_pages = aligned_pages;
253 *_mask = mask;
254
255 return ALIGN_DOWN(start, VTD_PAGE_SIZE << mask);
256}
257
258/*
259 * Invalidates a range of IOVA from @start (inclusive) to @end (inclusive)
260 * when the memory mappings in the target domain have been modified.
261 */
262void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
263 unsigned long end, int ih)
264{
265 unsigned long pages, mask, addr;
266 struct cache_tag *tag;
267 unsigned long flags;
268
269 addr = calculate_psi_aligned_address(start, end, &pages, &mask);
270
271 spin_lock_irqsave(&domain->cache_lock, flags);
272 list_for_each_entry(tag, &domain->cache_tags, node) {
273 struct intel_iommu *iommu = tag->iommu;
274 struct device_domain_info *info;
275 u16 sid;
276
277 switch (tag->type) {
278 case CACHE_TAG_IOTLB:
279 case CACHE_TAG_NESTING_IOTLB:
280 if (domain->use_first_level) {
281 qi_flush_piotlb(iommu, tag->domain_id,
282 tag->pasid, addr, pages, ih);
283 } else {
284 /*
285 * Fallback to domain selective flush if no
286 * PSI support or the size is too big.
287 */
288 if (!cap_pgsel_inv(iommu->cap) ||
289 mask > cap_max_amask_val(iommu->cap))
290 iommu->flush.flush_iotlb(iommu, tag->domain_id,
291 0, 0, DMA_TLB_DSI_FLUSH);
292 else
293 iommu->flush.flush_iotlb(iommu, tag->domain_id,
294 addr | ih, mask,
295 DMA_TLB_PSI_FLUSH);
296 }
297 break;
298 case CACHE_TAG_NESTING_DEVTLB:
299 /*
300 * Address translation cache in device side caches the
301 * result of nested translation. There is no easy way
302 * to identify the exact set of nested translations
303 * affected by a change in S2. So just flush the entire
304 * device cache.
305 */
306 addr = 0;
307 mask = MAX_AGAW_PFN_WIDTH;
308 fallthrough;
309 case CACHE_TAG_DEVTLB:
310 info = dev_iommu_priv_get(tag->dev);
311 sid = PCI_DEVID(info->bus, info->devfn);
312
313 if (tag->pasid == IOMMU_NO_PASID)
314 qi_flush_dev_iotlb(iommu, sid, info->pfsid,
315 info->ats_qdep, addr, mask);
316 else
317 qi_flush_dev_iotlb_pasid(iommu, sid, info->pfsid,
318 tag->pasid, info->ats_qdep,
319 addr, mask);
320
321 quirk_extra_dev_tlb_flush(info, addr, mask, tag->pasid, info->ats_qdep);
322 break;
323 }
324
325 trace_cache_tag_flush_range(tag, start, end, addr, pages, mask);
326 }
327 spin_unlock_irqrestore(&domain->cache_lock, flags);
328}
329
330/*
331 * Invalidates all ranges of IOVA when the memory mappings in the target
332 * domain have been modified.
333 */
334void cache_tag_flush_all(struct dmar_domain *domain)
335{
336 struct cache_tag *tag;
337 unsigned long flags;
338
339 spin_lock_irqsave(&domain->cache_lock, flags);
340 list_for_each_entry(tag, &domain->cache_tags, node) {
341 struct intel_iommu *iommu = tag->iommu;
342 struct device_domain_info *info;
343 u16 sid;
344
345 switch (tag->type) {
346 case CACHE_TAG_IOTLB:
347 case CACHE_TAG_NESTING_IOTLB:
348 if (domain->use_first_level)
349 qi_flush_piotlb(iommu, tag->domain_id,
350 tag->pasid, 0, -1, 0);
351 else
352 iommu->flush.flush_iotlb(iommu, tag->domain_id,
353 0, 0, DMA_TLB_DSI_FLUSH);
354 break;
355 case CACHE_TAG_DEVTLB:
356 case CACHE_TAG_NESTING_DEVTLB:
357 info = dev_iommu_priv_get(tag->dev);
358 sid = PCI_DEVID(info->bus, info->devfn);
359
360 qi_flush_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
361 0, MAX_AGAW_PFN_WIDTH);
362 quirk_extra_dev_tlb_flush(info, 0, MAX_AGAW_PFN_WIDTH,
363 IOMMU_NO_PASID, info->ats_qdep);
364 break;
365 }
366
367 trace_cache_tag_flush_all(tag);
368 }
369 spin_unlock_irqrestore(&domain->cache_lock, flags);
370}
371
372/*
373 * Invalidate a range of IOVA when new mappings are created in the target
374 * domain.
375 *
376 * - VT-d spec, Section 6.1 Caching Mode: When the CM field is reported as
377 * Set, any software updates to remapping structures other than first-
378 * stage mapping requires explicit invalidation of the caches.
379 * - VT-d spec, Section 6.8 Write Buffer Flushing: For hardware that requires
380 * write buffer flushing, software must explicitly perform write-buffer
381 * flushing, if cache invalidation is not required.
382 */
383void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
384 unsigned long end)
385{
386 unsigned long pages, mask, addr;
387 struct cache_tag *tag;
388 unsigned long flags;
389
390 addr = calculate_psi_aligned_address(start, end, &pages, &mask);
391
392 spin_lock_irqsave(&domain->cache_lock, flags);
393 list_for_each_entry(tag, &domain->cache_tags, node) {
394 struct intel_iommu *iommu = tag->iommu;
395
396 if (!cap_caching_mode(iommu->cap) || domain->use_first_level) {
397 iommu_flush_write_buffer(iommu);
398 continue;
399 }
400
401 if (tag->type == CACHE_TAG_IOTLB ||
402 tag->type == CACHE_TAG_NESTING_IOTLB) {
403 /*
404 * Fallback to domain selective flush if no
405 * PSI support or the size is too big.
406 */
407 if (!cap_pgsel_inv(iommu->cap) ||
408 mask > cap_max_amask_val(iommu->cap))
409 iommu->flush.flush_iotlb(iommu, tag->domain_id,
410 0, 0, DMA_TLB_DSI_FLUSH);
411 else
412 iommu->flush.flush_iotlb(iommu, tag->domain_id,
413 addr, mask,
414 DMA_TLB_PSI_FLUSH);
415 }
416
417 trace_cache_tag_flush_range_np(tag, start, end, addr, pages, mask);
418 }
419 spin_unlock_irqrestore(&domain->cache_lock, flags);
420}