Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * cache.c - Intel VT-d cache invalidation
4 *
5 * Copyright (C) 2024 Intel Corporation
6 *
7 * Author: Lu Baolu <baolu.lu@linux.intel.com>
8 */
9
10#define pr_fmt(fmt) "DMAR: " fmt
11
12#include <linux/dmar.h>
13#include <linux/iommu.h>
14#include <linux/memory.h>
15#include <linux/pci.h>
16#include <linux/spinlock.h>
17
18#include "iommu.h"
19#include "pasid.h"
20#include "trace.h"
21
22/* Check if an existing cache tag can be reused for a new association. */
23static bool cache_tage_match(struct cache_tag *tag, u16 domain_id,
24 struct intel_iommu *iommu, struct device *dev,
25 ioasid_t pasid, enum cache_tag_type type)
26{
27 if (tag->type != type)
28 return false;
29
30 if (tag->domain_id != domain_id || tag->pasid != pasid)
31 return false;
32
33 if (type == CACHE_TAG_IOTLB || type == CACHE_TAG_NESTING_IOTLB)
34 return tag->iommu == iommu;
35
36 if (type == CACHE_TAG_DEVTLB || type == CACHE_TAG_NESTING_DEVTLB)
37 return tag->dev == dev;
38
39 return false;
40}
41
42/* Assign a cache tag with specified type to domain. */
43int cache_tag_assign(struct dmar_domain *domain, u16 did, struct device *dev,
44 ioasid_t pasid, enum cache_tag_type type)
45{
46 struct device_domain_info *info = dev_iommu_priv_get(dev);
47 struct intel_iommu *iommu = info->iommu;
48 struct cache_tag *tag, *temp;
49 struct list_head *prev;
50 unsigned long flags;
51
52 tag = kzalloc(sizeof(*tag), GFP_KERNEL);
53 if (!tag)
54 return -ENOMEM;
55
56 tag->type = type;
57 tag->iommu = iommu;
58 tag->domain_id = did;
59 tag->pasid = pasid;
60 tag->users = 1;
61
62 if (type == CACHE_TAG_DEVTLB || type == CACHE_TAG_NESTING_DEVTLB)
63 tag->dev = dev;
64 else
65 tag->dev = iommu->iommu.dev;
66
67 spin_lock_irqsave(&domain->cache_lock, flags);
68 prev = &domain->cache_tags;
69 list_for_each_entry(temp, &domain->cache_tags, node) {
70 if (cache_tage_match(temp, did, iommu, dev, pasid, type)) {
71 temp->users++;
72 spin_unlock_irqrestore(&domain->cache_lock, flags);
73 kfree(tag);
74 trace_cache_tag_assign(temp);
75 return 0;
76 }
77 if (temp->iommu == iommu)
78 prev = &temp->node;
79 }
80 /*
81 * Link cache tags of same iommu unit together, so corresponding
82 * flush ops can be batched for iommu unit.
83 */
84 list_add(&tag->node, prev);
85
86 spin_unlock_irqrestore(&domain->cache_lock, flags);
87 trace_cache_tag_assign(tag);
88
89 return 0;
90}
91
92/* Unassign a cache tag with specified type from domain. */
93static void cache_tag_unassign(struct dmar_domain *domain, u16 did,
94 struct device *dev, ioasid_t pasid,
95 enum cache_tag_type type)
96{
97 struct device_domain_info *info = dev_iommu_priv_get(dev);
98 struct intel_iommu *iommu = info->iommu;
99 struct cache_tag *tag;
100 unsigned long flags;
101
102 spin_lock_irqsave(&domain->cache_lock, flags);
103 list_for_each_entry(tag, &domain->cache_tags, node) {
104 if (cache_tage_match(tag, did, iommu, dev, pasid, type)) {
105 trace_cache_tag_unassign(tag);
106 if (--tag->users == 0) {
107 list_del(&tag->node);
108 kfree(tag);
109 }
110 break;
111 }
112 }
113 spin_unlock_irqrestore(&domain->cache_lock, flags);
114}
115
116/* domain->qi_batch will be freed in iommu_free_domain() path. */
117static int domain_qi_batch_alloc(struct dmar_domain *domain)
118{
119 unsigned long flags;
120 int ret = 0;
121
122 spin_lock_irqsave(&domain->cache_lock, flags);
123 if (domain->qi_batch)
124 goto out_unlock;
125
126 domain->qi_batch = kzalloc(sizeof(*domain->qi_batch), GFP_ATOMIC);
127 if (!domain->qi_batch)
128 ret = -ENOMEM;
129out_unlock:
130 spin_unlock_irqrestore(&domain->cache_lock, flags);
131
132 return ret;
133}
134
135static int __cache_tag_assign_domain(struct dmar_domain *domain, u16 did,
136 struct device *dev, ioasid_t pasid)
137{
138 struct device_domain_info *info = dev_iommu_priv_get(dev);
139 int ret;
140
141 ret = domain_qi_batch_alloc(domain);
142 if (ret)
143 return ret;
144
145 ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_IOTLB);
146 if (ret || !info->ats_enabled)
147 return ret;
148
149 ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_DEVTLB);
150 if (ret)
151 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_IOTLB);
152
153 return ret;
154}
155
156static void __cache_tag_unassign_domain(struct dmar_domain *domain, u16 did,
157 struct device *dev, ioasid_t pasid)
158{
159 struct device_domain_info *info = dev_iommu_priv_get(dev);
160
161 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_IOTLB);
162
163 if (info->ats_enabled)
164 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_DEVTLB);
165}
166
167static int __cache_tag_assign_parent_domain(struct dmar_domain *domain, u16 did,
168 struct device *dev, ioasid_t pasid)
169{
170 struct device_domain_info *info = dev_iommu_priv_get(dev);
171 int ret;
172
173 ret = domain_qi_batch_alloc(domain);
174 if (ret)
175 return ret;
176
177 ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB);
178 if (ret || !info->ats_enabled)
179 return ret;
180
181 ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_NESTING_DEVTLB);
182 if (ret)
183 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB);
184
185 return ret;
186}
187
188static void __cache_tag_unassign_parent_domain(struct dmar_domain *domain, u16 did,
189 struct device *dev, ioasid_t pasid)
190{
191 struct device_domain_info *info = dev_iommu_priv_get(dev);
192
193 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB);
194
195 if (info->ats_enabled)
196 cache_tag_unassign(domain, did, dev, pasid, CACHE_TAG_NESTING_DEVTLB);
197}
198
199static u16 domain_get_id_for_dev(struct dmar_domain *domain, struct device *dev)
200{
201 struct device_domain_info *info = dev_iommu_priv_get(dev);
202 struct intel_iommu *iommu = info->iommu;
203
204 /*
205 * The driver assigns different domain IDs for all domains except
206 * the SVA type.
207 */
208 if (domain->domain.type == IOMMU_DOMAIN_SVA)
209 return FLPT_DEFAULT_DID;
210
211 return domain_id_iommu(domain, iommu);
212}
213
214/*
215 * Assign cache tags to a domain when it's associated with a device's
216 * PASID using a specific domain ID.
217 *
218 * On success (return value of 0), cache tags are created and added to the
219 * domain's cache tag list. On failure (negative return value), an error
220 * code is returned indicating the reason for the failure.
221 */
222int cache_tag_assign_domain(struct dmar_domain *domain,
223 struct device *dev, ioasid_t pasid)
224{
225 u16 did = domain_get_id_for_dev(domain, dev);
226 int ret;
227
228 ret = __cache_tag_assign_domain(domain, did, dev, pasid);
229 if (ret || domain->domain.type != IOMMU_DOMAIN_NESTED)
230 return ret;
231
232 ret = __cache_tag_assign_parent_domain(domain->s2_domain, did, dev, pasid);
233 if (ret)
234 __cache_tag_unassign_domain(domain, did, dev, pasid);
235
236 return ret;
237}
238
239/*
240 * Remove the cache tags associated with a device's PASID when the domain is
241 * detached from the device.
242 *
243 * The cache tags must be previously assigned to the domain by calling the
244 * assign interface.
245 */
246void cache_tag_unassign_domain(struct dmar_domain *domain,
247 struct device *dev, ioasid_t pasid)
248{
249 u16 did = domain_get_id_for_dev(domain, dev);
250
251 __cache_tag_unassign_domain(domain, did, dev, pasid);
252 if (domain->domain.type == IOMMU_DOMAIN_NESTED)
253 __cache_tag_unassign_parent_domain(domain->s2_domain, did, dev, pasid);
254}
255
256static unsigned long calculate_psi_aligned_address(unsigned long start,
257 unsigned long end,
258 unsigned long *_pages,
259 unsigned long *_mask)
260{
261 unsigned long pages = aligned_nrpages(start, end - start + 1);
262 unsigned long aligned_pages = __roundup_pow_of_two(pages);
263 unsigned long bitmask = aligned_pages - 1;
264 unsigned long mask = ilog2(aligned_pages);
265 unsigned long pfn = IOVA_PFN(start);
266
267 /*
268 * PSI masks the low order bits of the base address. If the
269 * address isn't aligned to the mask, then compute a mask value
270 * needed to ensure the target range is flushed.
271 */
272 if (unlikely(bitmask & pfn)) {
273 unsigned long end_pfn = pfn + pages - 1, shared_bits;
274
275 /*
276 * Since end_pfn <= pfn + bitmask, the only way bits
277 * higher than bitmask can differ in pfn and end_pfn is
278 * by carrying. This means after masking out bitmask,
279 * high bits starting with the first set bit in
280 * shared_bits are all equal in both pfn and end_pfn.
281 */
282 shared_bits = ~(pfn ^ end_pfn) & ~bitmask;
283 mask = shared_bits ? __ffs(shared_bits) : MAX_AGAW_PFN_WIDTH;
284 aligned_pages = 1UL << mask;
285 }
286
287 *_pages = aligned_pages;
288 *_mask = mask;
289
290 return ALIGN_DOWN(start, VTD_PAGE_SIZE << mask);
291}
292
293static void qi_batch_flush_descs(struct intel_iommu *iommu, struct qi_batch *batch)
294{
295 if (!iommu || !batch->index)
296 return;
297
298 qi_submit_sync(iommu, batch->descs, batch->index, 0);
299
300 /* Reset the index value and clean the whole batch buffer. */
301 memset(batch, 0, sizeof(*batch));
302}
303
304static void qi_batch_increment_index(struct intel_iommu *iommu, struct qi_batch *batch)
305{
306 if (++batch->index == QI_MAX_BATCHED_DESC_COUNT)
307 qi_batch_flush_descs(iommu, batch);
308}
309
310static void qi_batch_add_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
311 unsigned int size_order, u64 type,
312 struct qi_batch *batch)
313{
314 qi_desc_iotlb(iommu, did, addr, size_order, type, &batch->descs[batch->index]);
315 qi_batch_increment_index(iommu, batch);
316}
317
318static void qi_batch_add_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
319 u16 qdep, u64 addr, unsigned int mask,
320 struct qi_batch *batch)
321{
322 /*
323 * According to VT-d spec, software is recommended to not submit any Device-TLB
324 * invalidation requests while address remapping hardware is disabled.
325 */
326 if (!(iommu->gcmd & DMA_GCMD_TE))
327 return;
328
329 qi_desc_dev_iotlb(sid, pfsid, qdep, addr, mask, &batch->descs[batch->index]);
330 qi_batch_increment_index(iommu, batch);
331}
332
333static void qi_batch_add_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid,
334 u64 addr, unsigned long npages, bool ih,
335 struct qi_batch *batch)
336{
337 /*
338 * npages == -1 means a PASID-selective invalidation, otherwise,
339 * a positive value for Page-selective-within-PASID invalidation.
340 * 0 is not a valid input.
341 */
342 if (!npages)
343 return;
344
345 qi_desc_piotlb(did, pasid, addr, npages, ih, &batch->descs[batch->index]);
346 qi_batch_increment_index(iommu, batch);
347}
348
349static void qi_batch_add_pasid_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
350 u32 pasid, u16 qdep, u64 addr,
351 unsigned int size_order, struct qi_batch *batch)
352{
353 /*
354 * According to VT-d spec, software is recommended to not submit any
355 * Device-TLB invalidation requests while address remapping hardware
356 * is disabled.
357 */
358 if (!(iommu->gcmd & DMA_GCMD_TE))
359 return;
360
361 qi_desc_dev_iotlb_pasid(sid, pfsid, pasid, qdep, addr, size_order,
362 &batch->descs[batch->index]);
363 qi_batch_increment_index(iommu, batch);
364}
365
366static void cache_tag_flush_iotlb(struct dmar_domain *domain, struct cache_tag *tag,
367 unsigned long addr, unsigned long pages,
368 unsigned long mask, int ih)
369{
370 struct intel_iommu *iommu = tag->iommu;
371 u64 type = DMA_TLB_PSI_FLUSH;
372
373 if (domain->use_first_level) {
374 qi_batch_add_piotlb(iommu, tag->domain_id, tag->pasid, addr,
375 pages, ih, domain->qi_batch);
376 return;
377 }
378
379 /*
380 * Fallback to domain selective flush if no PSI support or the size
381 * is too big.
382 */
383 if (!cap_pgsel_inv(iommu->cap) ||
384 mask > cap_max_amask_val(iommu->cap) || pages == -1) {
385 addr = 0;
386 mask = 0;
387 ih = 0;
388 type = DMA_TLB_DSI_FLUSH;
389 }
390
391 if (ecap_qis(iommu->ecap))
392 qi_batch_add_iotlb(iommu, tag->domain_id, addr | ih, mask, type,
393 domain->qi_batch);
394 else
395 __iommu_flush_iotlb(iommu, tag->domain_id, addr | ih, mask, type);
396}
397
398static void cache_tag_flush_devtlb_psi(struct dmar_domain *domain, struct cache_tag *tag,
399 unsigned long addr, unsigned long mask)
400{
401 struct intel_iommu *iommu = tag->iommu;
402 struct device_domain_info *info;
403 u16 sid;
404
405 info = dev_iommu_priv_get(tag->dev);
406 sid = PCI_DEVID(info->bus, info->devfn);
407
408 if (tag->pasid == IOMMU_NO_PASID) {
409 qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
410 addr, mask, domain->qi_batch);
411 if (info->dtlb_extra_inval)
412 qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep,
413 addr, mask, domain->qi_batch);
414 return;
415 }
416
417 qi_batch_add_pasid_dev_iotlb(iommu, sid, info->pfsid, tag->pasid,
418 info->ats_qdep, addr, mask, domain->qi_batch);
419 if (info->dtlb_extra_inval)
420 qi_batch_add_pasid_dev_iotlb(iommu, sid, info->pfsid, tag->pasid,
421 info->ats_qdep, addr, mask,
422 domain->qi_batch);
423}
424
425static void cache_tag_flush_devtlb_all(struct dmar_domain *domain, struct cache_tag *tag)
426{
427 struct intel_iommu *iommu = tag->iommu;
428 struct device_domain_info *info;
429 u16 sid;
430
431 info = dev_iommu_priv_get(tag->dev);
432 sid = PCI_DEVID(info->bus, info->devfn);
433
434 qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0,
435 MAX_AGAW_PFN_WIDTH, domain->qi_batch);
436 if (info->dtlb_extra_inval)
437 qi_batch_add_dev_iotlb(iommu, sid, info->pfsid, info->ats_qdep, 0,
438 MAX_AGAW_PFN_WIDTH, domain->qi_batch);
439}
440
441/*
442 * Invalidates a range of IOVA from @start (inclusive) to @end (inclusive)
443 * when the memory mappings in the target domain have been modified.
444 */
445void cache_tag_flush_range(struct dmar_domain *domain, unsigned long start,
446 unsigned long end, int ih)
447{
448 struct intel_iommu *iommu = NULL;
449 unsigned long pages, mask, addr;
450 struct cache_tag *tag;
451 unsigned long flags;
452
453 addr = calculate_psi_aligned_address(start, end, &pages, &mask);
454
455 spin_lock_irqsave(&domain->cache_lock, flags);
456 list_for_each_entry(tag, &domain->cache_tags, node) {
457 if (iommu && iommu != tag->iommu)
458 qi_batch_flush_descs(iommu, domain->qi_batch);
459 iommu = tag->iommu;
460
461 switch (tag->type) {
462 case CACHE_TAG_IOTLB:
463 case CACHE_TAG_NESTING_IOTLB:
464 cache_tag_flush_iotlb(domain, tag, addr, pages, mask, ih);
465 break;
466 case CACHE_TAG_NESTING_DEVTLB:
467 /*
468 * Address translation cache in device side caches the
469 * result of nested translation. There is no easy way
470 * to identify the exact set of nested translations
471 * affected by a change in S2. So just flush the entire
472 * device cache.
473 */
474 addr = 0;
475 mask = MAX_AGAW_PFN_WIDTH;
476 fallthrough;
477 case CACHE_TAG_DEVTLB:
478 cache_tag_flush_devtlb_psi(domain, tag, addr, mask);
479 break;
480 }
481
482 trace_cache_tag_flush_range(tag, start, end, addr, pages, mask);
483 }
484 qi_batch_flush_descs(iommu, domain->qi_batch);
485 spin_unlock_irqrestore(&domain->cache_lock, flags);
486}
487
488/*
489 * Invalidates all ranges of IOVA when the memory mappings in the target
490 * domain have been modified.
491 */
492void cache_tag_flush_all(struct dmar_domain *domain)
493{
494 struct intel_iommu *iommu = NULL;
495 struct cache_tag *tag;
496 unsigned long flags;
497
498 spin_lock_irqsave(&domain->cache_lock, flags);
499 list_for_each_entry(tag, &domain->cache_tags, node) {
500 if (iommu && iommu != tag->iommu)
501 qi_batch_flush_descs(iommu, domain->qi_batch);
502 iommu = tag->iommu;
503
504 switch (tag->type) {
505 case CACHE_TAG_IOTLB:
506 case CACHE_TAG_NESTING_IOTLB:
507 cache_tag_flush_iotlb(domain, tag, 0, -1, 0, 0);
508 break;
509 case CACHE_TAG_DEVTLB:
510 case CACHE_TAG_NESTING_DEVTLB:
511 cache_tag_flush_devtlb_all(domain, tag);
512 break;
513 }
514
515 trace_cache_tag_flush_all(tag);
516 }
517 qi_batch_flush_descs(iommu, domain->qi_batch);
518 spin_unlock_irqrestore(&domain->cache_lock, flags);
519}
520
521/*
522 * Invalidate a range of IOVA when new mappings are created in the target
523 * domain.
524 *
525 * - VT-d spec, Section 6.1 Caching Mode: When the CM field is reported as
526 * Set, any software updates to remapping structures other than first-
527 * stage mapping requires explicit invalidation of the caches.
528 * - VT-d spec, Section 6.8 Write Buffer Flushing: For hardware that requires
529 * write buffer flushing, software must explicitly perform write-buffer
530 * flushing, if cache invalidation is not required.
531 */
532void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start,
533 unsigned long end)
534{
535 struct intel_iommu *iommu = NULL;
536 unsigned long pages, mask, addr;
537 struct cache_tag *tag;
538 unsigned long flags;
539
540 addr = calculate_psi_aligned_address(start, end, &pages, &mask);
541
542 spin_lock_irqsave(&domain->cache_lock, flags);
543 list_for_each_entry(tag, &domain->cache_tags, node) {
544 if (iommu && iommu != tag->iommu)
545 qi_batch_flush_descs(iommu, domain->qi_batch);
546 iommu = tag->iommu;
547
548 if (!cap_caching_mode(iommu->cap) || domain->use_first_level) {
549 iommu_flush_write_buffer(iommu);
550 continue;
551 }
552
553 if (tag->type == CACHE_TAG_IOTLB ||
554 tag->type == CACHE_TAG_NESTING_IOTLB)
555 cache_tag_flush_iotlb(domain, tag, addr, pages, mask, 0);
556
557 trace_cache_tag_flush_range_np(tag, start, end, addr, pages, mask);
558 }
559 qi_batch_flush_descs(iommu, domain->qi_batch);
560 spin_unlock_irqrestore(&domain->cache_lock, flags);
561}