Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES.
3 */
4#ifndef _UAPI_IOMMUFD_H
5#define _UAPI_IOMMUFD_H
6
7#include <linux/ioctl.h>
8#include <linux/types.h>
9
10#define IOMMUFD_TYPE (';')
11
12/**
13 * DOC: General ioctl format
14 *
15 * The ioctl interface follows a general format to allow for extensibility. Each
16 * ioctl is passed in a structure pointer as the argument providing the size of
17 * the structure in the first u32. The kernel checks that any structure space
18 * beyond what it understands is 0. This allows userspace to use the backward
19 * compatible portion while consistently using the newer, larger, structures.
20 *
21 * ioctls use a standard meaning for common errnos:
22 *
23 * - ENOTTY: The IOCTL number itself is not supported at all
24 * - E2BIG: The IOCTL number is supported, but the provided structure has
25 * non-zero in a part the kernel does not understand.
26 * - EOPNOTSUPP: The IOCTL number is supported, and the structure is
27 * understood, however a known field has a value the kernel does not
28 * understand or support.
29 * - EINVAL: Everything about the IOCTL was understood, but a field is not
30 * correct.
31 * - ENOENT: An ID or IOVA provided does not exist.
32 * - ENOMEM: Out of memory.
33 * - EOVERFLOW: Mathematics overflowed.
34 *
35 * As well as additional errnos, within specific ioctls.
36 */
37enum {
38 IOMMUFD_CMD_BASE = 0x80,
39 IOMMUFD_CMD_DESTROY = IOMMUFD_CMD_BASE,
40 IOMMUFD_CMD_IOAS_ALLOC = 0x81,
41 IOMMUFD_CMD_IOAS_ALLOW_IOVAS = 0x82,
42 IOMMUFD_CMD_IOAS_COPY = 0x83,
43 IOMMUFD_CMD_IOAS_IOVA_RANGES = 0x84,
44 IOMMUFD_CMD_IOAS_MAP = 0x85,
45 IOMMUFD_CMD_IOAS_UNMAP = 0x86,
46 IOMMUFD_CMD_OPTION = 0x87,
47 IOMMUFD_CMD_VFIO_IOAS = 0x88,
48 IOMMUFD_CMD_HWPT_ALLOC = 0x89,
49 IOMMUFD_CMD_GET_HW_INFO = 0x8a,
50 IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING = 0x8b,
51 IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP = 0x8c,
52 IOMMUFD_CMD_HWPT_INVALIDATE = 0x8d,
53 IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e,
54 IOMMUFD_CMD_IOAS_MAP_FILE = 0x8f,
55 IOMMUFD_CMD_VIOMMU_ALLOC = 0x90,
56 IOMMUFD_CMD_VDEVICE_ALLOC = 0x91,
57 IOMMUFD_CMD_IOAS_CHANGE_PROCESS = 0x92,
58 IOMMUFD_CMD_VEVENTQ_ALLOC = 0x93,
59 IOMMUFD_CMD_HW_QUEUE_ALLOC = 0x94,
60};
61
62/**
63 * struct iommu_destroy - ioctl(IOMMU_DESTROY)
64 * @size: sizeof(struct iommu_destroy)
65 * @id: iommufd object ID to destroy. Can be any destroyable object type.
66 *
67 * Destroy any object held within iommufd.
68 */
69struct iommu_destroy {
70 __u32 size;
71 __u32 id;
72};
73#define IOMMU_DESTROY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_DESTROY)
74
75/**
76 * struct iommu_ioas_alloc - ioctl(IOMMU_IOAS_ALLOC)
77 * @size: sizeof(struct iommu_ioas_alloc)
78 * @flags: Must be 0
79 * @out_ioas_id: Output IOAS ID for the allocated object
80 *
81 * Allocate an IO Address Space (IOAS) which holds an IO Virtual Address (IOVA)
82 * to memory mapping.
83 */
84struct iommu_ioas_alloc {
85 __u32 size;
86 __u32 flags;
87 __u32 out_ioas_id;
88};
89#define IOMMU_IOAS_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_ALLOC)
90
91/**
92 * struct iommu_iova_range - ioctl(IOMMU_IOVA_RANGE)
93 * @start: First IOVA
94 * @last: Inclusive last IOVA
95 *
96 * An interval in IOVA space.
97 */
98struct iommu_iova_range {
99 __aligned_u64 start;
100 __aligned_u64 last;
101};
102
103/**
104 * struct iommu_ioas_iova_ranges - ioctl(IOMMU_IOAS_IOVA_RANGES)
105 * @size: sizeof(struct iommu_ioas_iova_ranges)
106 * @ioas_id: IOAS ID to read ranges from
107 * @num_iovas: Input/Output total number of ranges in the IOAS
108 * @__reserved: Must be 0
109 * @allowed_iovas: Pointer to the output array of struct iommu_iova_range
110 * @out_iova_alignment: Minimum alignment required for mapping IOVA
111 *
112 * Query an IOAS for ranges of allowed IOVAs. Mapping IOVA outside these ranges
113 * is not allowed. num_iovas will be set to the total number of iovas and
114 * the allowed_iovas[] will be filled in as space permits.
115 *
116 * The allowed ranges are dependent on the HW path the DMA operation takes, and
117 * can change during the lifetime of the IOAS. A fresh empty IOAS will have a
118 * full range, and each attached device will narrow the ranges based on that
119 * device's HW restrictions. Detaching a device can widen the ranges. Userspace
120 * should query ranges after every attach/detach to know what IOVAs are valid
121 * for mapping.
122 *
123 * On input num_iovas is the length of the allowed_iovas array. On output it is
124 * the total number of iovas filled in. The ioctl will return -EMSGSIZE and set
125 * num_iovas to the required value if num_iovas is too small. In this case the
126 * caller should allocate a larger output array and re-issue the ioctl.
127 *
128 * out_iova_alignment returns the minimum IOVA alignment that can be given
129 * to IOMMU_IOAS_MAP/COPY. IOVA's must satisfy::
130 *
131 * starting_iova % out_iova_alignment == 0
132 * (starting_iova + length) % out_iova_alignment == 0
133 *
134 * out_iova_alignment can be 1 indicating any IOVA is allowed. It cannot
135 * be higher than the system PAGE_SIZE.
136 */
137struct iommu_ioas_iova_ranges {
138 __u32 size;
139 __u32 ioas_id;
140 __u32 num_iovas;
141 __u32 __reserved;
142 __aligned_u64 allowed_iovas;
143 __aligned_u64 out_iova_alignment;
144};
145#define IOMMU_IOAS_IOVA_RANGES _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_IOVA_RANGES)
146
147/**
148 * struct iommu_ioas_allow_iovas - ioctl(IOMMU_IOAS_ALLOW_IOVAS)
149 * @size: sizeof(struct iommu_ioas_allow_iovas)
150 * @ioas_id: IOAS ID to allow IOVAs from
151 * @num_iovas: Input/Output total number of ranges in the IOAS
152 * @__reserved: Must be 0
153 * @allowed_iovas: Pointer to array of struct iommu_iova_range
154 *
155 * Ensure a range of IOVAs are always available for allocation. If this call
156 * succeeds then IOMMU_IOAS_IOVA_RANGES will never return a list of IOVA ranges
157 * that are narrower than the ranges provided here. This call will fail if
158 * IOMMU_IOAS_IOVA_RANGES is currently narrower than the given ranges.
159 *
160 * When an IOAS is first created the IOVA_RANGES will be maximally sized, and as
161 * devices are attached the IOVA will narrow based on the device restrictions.
162 * When an allowed range is specified any narrowing will be refused, ie device
163 * attachment can fail if the device requires limiting within the allowed range.
164 *
165 * Automatic IOVA allocation is also impacted by this call. MAP will only
166 * allocate within the allowed IOVAs if they are present.
167 *
168 * This call replaces the entire allowed list with the given list.
169 */
170struct iommu_ioas_allow_iovas {
171 __u32 size;
172 __u32 ioas_id;
173 __u32 num_iovas;
174 __u32 __reserved;
175 __aligned_u64 allowed_iovas;
176};
177#define IOMMU_IOAS_ALLOW_IOVAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_ALLOW_IOVAS)
178
179/**
180 * enum iommufd_ioas_map_flags - Flags for map and copy
181 * @IOMMU_IOAS_MAP_FIXED_IOVA: If clear the kernel will compute an appropriate
182 * IOVA to place the mapping at
183 * @IOMMU_IOAS_MAP_WRITEABLE: DMA is allowed to write to this mapping
184 * @IOMMU_IOAS_MAP_READABLE: DMA is allowed to read from this mapping
185 */
186enum iommufd_ioas_map_flags {
187 IOMMU_IOAS_MAP_FIXED_IOVA = 1 << 0,
188 IOMMU_IOAS_MAP_WRITEABLE = 1 << 1,
189 IOMMU_IOAS_MAP_READABLE = 1 << 2,
190};
191
192/**
193 * struct iommu_ioas_map - ioctl(IOMMU_IOAS_MAP)
194 * @size: sizeof(struct iommu_ioas_map)
195 * @flags: Combination of enum iommufd_ioas_map_flags
196 * @ioas_id: IOAS ID to change the mapping of
197 * @__reserved: Must be 0
198 * @user_va: Userspace pointer to start mapping from
199 * @length: Number of bytes to map
200 * @iova: IOVA the mapping was placed at. If IOMMU_IOAS_MAP_FIXED_IOVA is set
201 * then this must be provided as input.
202 *
203 * Set an IOVA mapping from a user pointer. If FIXED_IOVA is specified then the
204 * mapping will be established at iova, otherwise a suitable location based on
205 * the reserved and allowed lists will be automatically selected and returned in
206 * iova.
207 *
208 * If IOMMU_IOAS_MAP_FIXED_IOVA is specified then the iova range must currently
209 * be unused, existing IOVA cannot be replaced.
210 */
211struct iommu_ioas_map {
212 __u32 size;
213 __u32 flags;
214 __u32 ioas_id;
215 __u32 __reserved;
216 __aligned_u64 user_va;
217 __aligned_u64 length;
218 __aligned_u64 iova;
219};
220#define IOMMU_IOAS_MAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP)
221
222/**
223 * struct iommu_ioas_map_file - ioctl(IOMMU_IOAS_MAP_FILE)
224 * @size: sizeof(struct iommu_ioas_map_file)
225 * @flags: same as for iommu_ioas_map
226 * @ioas_id: same as for iommu_ioas_map
227 * @fd: the memfd to map
228 * @start: byte offset from start of file to map from
229 * @length: same as for iommu_ioas_map
230 * @iova: same as for iommu_ioas_map
231 *
232 * Set an IOVA mapping from a memfd file. All other arguments and semantics
233 * match those of IOMMU_IOAS_MAP.
234 */
235struct iommu_ioas_map_file {
236 __u32 size;
237 __u32 flags;
238 __u32 ioas_id;
239 __s32 fd;
240 __aligned_u64 start;
241 __aligned_u64 length;
242 __aligned_u64 iova;
243};
244#define IOMMU_IOAS_MAP_FILE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP_FILE)
245
246/**
247 * struct iommu_ioas_copy - ioctl(IOMMU_IOAS_COPY)
248 * @size: sizeof(struct iommu_ioas_copy)
249 * @flags: Combination of enum iommufd_ioas_map_flags
250 * @dst_ioas_id: IOAS ID to change the mapping of
251 * @src_ioas_id: IOAS ID to copy from
252 * @length: Number of bytes to copy and map
253 * @dst_iova: IOVA the mapping was placed at. If IOMMU_IOAS_MAP_FIXED_IOVA is
254 * set then this must be provided as input.
255 * @src_iova: IOVA to start the copy
256 *
257 * Copy an already existing mapping from src_ioas_id and establish it in
258 * dst_ioas_id. The src iova/length must exactly match a range used with
259 * IOMMU_IOAS_MAP.
260 *
261 * This may be used to efficiently clone a subset of an IOAS to another, or as a
262 * kind of 'cache' to speed up mapping. Copy has an efficiency advantage over
263 * establishing equivalent new mappings, as internal resources are shared, and
264 * the kernel will pin the user memory only once.
265 */
266struct iommu_ioas_copy {
267 __u32 size;
268 __u32 flags;
269 __u32 dst_ioas_id;
270 __u32 src_ioas_id;
271 __aligned_u64 length;
272 __aligned_u64 dst_iova;
273 __aligned_u64 src_iova;
274};
275#define IOMMU_IOAS_COPY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_COPY)
276
277/**
278 * struct iommu_ioas_unmap - ioctl(IOMMU_IOAS_UNMAP)
279 * @size: sizeof(struct iommu_ioas_unmap)
280 * @ioas_id: IOAS ID to change the mapping of
281 * @iova: IOVA to start the unmapping at
282 * @length: Number of bytes to unmap, and return back the bytes unmapped
283 *
284 * Unmap an IOVA range. The iova/length must be a superset of a previously
285 * mapped range used with IOMMU_IOAS_MAP or IOMMU_IOAS_COPY. Splitting or
286 * truncating ranges is not allowed. The values 0 to U64_MAX will unmap
287 * everything.
288 */
289struct iommu_ioas_unmap {
290 __u32 size;
291 __u32 ioas_id;
292 __aligned_u64 iova;
293 __aligned_u64 length;
294};
295#define IOMMU_IOAS_UNMAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_UNMAP)
296
297/**
298 * enum iommufd_option - ioctl(IOMMU_OPTION_RLIMIT_MODE) and
299 * ioctl(IOMMU_OPTION_HUGE_PAGES)
300 * @IOMMU_OPTION_RLIMIT_MODE:
301 * Change how RLIMIT_MEMLOCK accounting works. The caller must have privilege
302 * to invoke this. Value 0 (default) is user based accounting, 1 uses process
303 * based accounting. Global option, object_id must be 0
304 * @IOMMU_OPTION_HUGE_PAGES:
305 * Value 1 (default) allows contiguous pages to be combined when generating
306 * iommu mappings. Value 0 disables combining, everything is mapped to
307 * PAGE_SIZE. This can be useful for benchmarking. This is a per-IOAS
308 * option, the object_id must be the IOAS ID.
309 */
310enum iommufd_option {
311 IOMMU_OPTION_RLIMIT_MODE = 0,
312 IOMMU_OPTION_HUGE_PAGES = 1,
313};
314
315/**
316 * enum iommufd_option_ops - ioctl(IOMMU_OPTION_OP_SET) and
317 * ioctl(IOMMU_OPTION_OP_GET)
318 * @IOMMU_OPTION_OP_SET: Set the option's value
319 * @IOMMU_OPTION_OP_GET: Get the option's value
320 */
321enum iommufd_option_ops {
322 IOMMU_OPTION_OP_SET = 0,
323 IOMMU_OPTION_OP_GET = 1,
324};
325
326/**
327 * struct iommu_option - iommu option multiplexer
328 * @size: sizeof(struct iommu_option)
329 * @option_id: One of enum iommufd_option
330 * @op: One of enum iommufd_option_ops
331 * @__reserved: Must be 0
332 * @object_id: ID of the object if required
333 * @val64: Option value to set or value returned on get
334 *
335 * Change a simple option value. This multiplexor allows controlling options
336 * on objects. IOMMU_OPTION_OP_SET will load an option and IOMMU_OPTION_OP_GET
337 * will return the current value.
338 */
339struct iommu_option {
340 __u32 size;
341 __u32 option_id;
342 __u16 op;
343 __u16 __reserved;
344 __u32 object_id;
345 __aligned_u64 val64;
346};
347#define IOMMU_OPTION _IO(IOMMUFD_TYPE, IOMMUFD_CMD_OPTION)
348
349/**
350 * enum iommufd_vfio_ioas_op - IOMMU_VFIO_IOAS_* ioctls
351 * @IOMMU_VFIO_IOAS_GET: Get the current compatibility IOAS
352 * @IOMMU_VFIO_IOAS_SET: Change the current compatibility IOAS
353 * @IOMMU_VFIO_IOAS_CLEAR: Disable VFIO compatibility
354 */
355enum iommufd_vfio_ioas_op {
356 IOMMU_VFIO_IOAS_GET = 0,
357 IOMMU_VFIO_IOAS_SET = 1,
358 IOMMU_VFIO_IOAS_CLEAR = 2,
359};
360
361/**
362 * struct iommu_vfio_ioas - ioctl(IOMMU_VFIO_IOAS)
363 * @size: sizeof(struct iommu_vfio_ioas)
364 * @ioas_id: For IOMMU_VFIO_IOAS_SET the input IOAS ID to set
365 * For IOMMU_VFIO_IOAS_GET will output the IOAS ID
366 * @op: One of enum iommufd_vfio_ioas_op
367 * @__reserved: Must be 0
368 *
369 * The VFIO compatibility support uses a single ioas because VFIO APIs do not
370 * support the ID field. Set or Get the IOAS that VFIO compatibility will use.
371 * When VFIO_GROUP_SET_CONTAINER is used on an iommufd it will get the
372 * compatibility ioas, either by taking what is already set, or auto creating
373 * one. From then on VFIO will continue to use that ioas and is not effected by
374 * this ioctl. SET or CLEAR does not destroy any auto-created IOAS.
375 */
376struct iommu_vfio_ioas {
377 __u32 size;
378 __u32 ioas_id;
379 __u16 op;
380 __u16 __reserved;
381};
382#define IOMMU_VFIO_IOAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VFIO_IOAS)
383
384/**
385 * enum iommufd_hwpt_alloc_flags - Flags for HWPT allocation
386 * @IOMMU_HWPT_ALLOC_NEST_PARENT: If set, allocate a HWPT that can serve as
387 * the parent HWPT in a nesting configuration.
388 * @IOMMU_HWPT_ALLOC_DIRTY_TRACKING: Dirty tracking support for device IOMMU is
389 * enforced on device attachment
390 * @IOMMU_HWPT_FAULT_ID_VALID: The fault_id field of hwpt allocation data is
391 * valid.
392 * @IOMMU_HWPT_ALLOC_PASID: Requests a domain that can be used with PASID. The
393 * domain can be attached to any PASID on the device.
394 * Any domain attached to the non-PASID part of the
395 * device must also be flagged, otherwise attaching a
396 * PASID will blocked.
397 * For the user that wants to attach PASID, ioas is
398 * not recommended for both the non-PASID part
399 * and PASID part of the device.
400 * If IOMMU does not support PASID it will return
401 * error (-EOPNOTSUPP).
402 */
403enum iommufd_hwpt_alloc_flags {
404 IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0,
405 IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1,
406 IOMMU_HWPT_FAULT_ID_VALID = 1 << 2,
407 IOMMU_HWPT_ALLOC_PASID = 1 << 3,
408};
409
410/**
411 * enum iommu_hwpt_vtd_s1_flags - Intel VT-d stage-1 page table
412 * entry attributes
413 * @IOMMU_VTD_S1_SRE: Supervisor request
414 * @IOMMU_VTD_S1_EAFE: Extended access enable
415 * @IOMMU_VTD_S1_WPE: Write protect enable
416 */
417enum iommu_hwpt_vtd_s1_flags {
418 IOMMU_VTD_S1_SRE = 1 << 0,
419 IOMMU_VTD_S1_EAFE = 1 << 1,
420 IOMMU_VTD_S1_WPE = 1 << 2,
421};
422
423/**
424 * struct iommu_hwpt_vtd_s1 - Intel VT-d stage-1 page table
425 * info (IOMMU_HWPT_DATA_VTD_S1)
426 * @flags: Combination of enum iommu_hwpt_vtd_s1_flags
427 * @pgtbl_addr: The base address of the stage-1 page table.
428 * @addr_width: The address width of the stage-1 page table
429 * @__reserved: Must be 0
430 */
431struct iommu_hwpt_vtd_s1 {
432 __aligned_u64 flags;
433 __aligned_u64 pgtbl_addr;
434 __u32 addr_width;
435 __u32 __reserved;
436};
437
438/**
439 * struct iommu_hwpt_arm_smmuv3 - ARM SMMUv3 nested STE
440 * (IOMMU_HWPT_DATA_ARM_SMMUV3)
441 *
442 * @ste: The first two double words of the user space Stream Table Entry for
443 * the translation. Must be little-endian.
444 * Allowed fields: (Refer to "5.2 Stream Table Entry" in SMMUv3 HW Spec)
445 * - word-0: V, Cfg, S1Fmt, S1ContextPtr, S1CDMax
446 * - word-1: EATS, S1DSS, S1CIR, S1COR, S1CSH, S1STALLD
447 *
448 * -EIO will be returned if @ste is not legal or contains any non-allowed field.
449 * Cfg can be used to select a S1, Bypass or Abort configuration. A Bypass
450 * nested domain will translate the same as the nesting parent. The S1 will
451 * install a Context Descriptor Table pointing at userspace memory translated
452 * by the nesting parent.
453 *
454 * It's suggested to allocate a vDEVICE object carrying vSID and then re-attach
455 * the nested domain, as soon as the vSID is available in the VMM level:
456 *
457 * - when Cfg=translate, a vDEVICE must be allocated prior to attaching to the
458 * allocated nested domain, as CD/ATS invalidations and vevents need a vSID.
459 * - when Cfg=bypass/abort, a vDEVICE is not enforced during the nested domain
460 * attachment, to support a GBPA case where VM sets CR0.SMMUEN=0. However, if
461 * VM sets CR0.SMMUEN=1 while missing a vDEVICE object, kernel would fail to
462 * report events to the VM. E.g. F_TRANSLATION when guest STE.Cfg=abort.
463 */
464struct iommu_hwpt_arm_smmuv3 {
465 __aligned_le64 ste[2];
466};
467
468/**
469 * enum iommu_hwpt_data_type - IOMMU HWPT Data Type
470 * @IOMMU_HWPT_DATA_NONE: no data
471 * @IOMMU_HWPT_DATA_VTD_S1: Intel VT-d stage-1 page table
472 * @IOMMU_HWPT_DATA_ARM_SMMUV3: ARM SMMUv3 Context Descriptor Table
473 */
474enum iommu_hwpt_data_type {
475 IOMMU_HWPT_DATA_NONE = 0,
476 IOMMU_HWPT_DATA_VTD_S1 = 1,
477 IOMMU_HWPT_DATA_ARM_SMMUV3 = 2,
478};
479
480/**
481 * struct iommu_hwpt_alloc - ioctl(IOMMU_HWPT_ALLOC)
482 * @size: sizeof(struct iommu_hwpt_alloc)
483 * @flags: Combination of enum iommufd_hwpt_alloc_flags
484 * @dev_id: The device to allocate this HWPT for
485 * @pt_id: The IOAS or HWPT or vIOMMU to connect this HWPT to
486 * @out_hwpt_id: The ID of the new HWPT
487 * @__reserved: Must be 0
488 * @data_type: One of enum iommu_hwpt_data_type
489 * @data_len: Length of the type specific data
490 * @data_uptr: User pointer to the type specific data
491 * @fault_id: The ID of IOMMUFD_FAULT object. Valid only if flags field of
492 * IOMMU_HWPT_FAULT_ID_VALID is set.
493 * @__reserved2: Padding to 64-bit alignment. Must be 0.
494 *
495 * Explicitly allocate a hardware page table object. This is the same object
496 * type that is returned by iommufd_device_attach() and represents the
497 * underlying iommu driver's iommu_domain kernel object.
498 *
499 * A kernel-managed HWPT will be created with the mappings from the given
500 * IOAS via the @pt_id. The @data_type for this allocation must be set to
501 * IOMMU_HWPT_DATA_NONE. The HWPT can be allocated as a parent HWPT for a
502 * nesting configuration by passing IOMMU_HWPT_ALLOC_NEST_PARENT via @flags.
503 *
504 * A user-managed nested HWPT will be created from a given vIOMMU (wrapping a
505 * parent HWPT) or a parent HWPT via @pt_id, in which the parent HWPT must be
506 * allocated previously via the same ioctl from a given IOAS (@pt_id). In this
507 * case, the @data_type must be set to a pre-defined type corresponding to an
508 * I/O page table type supported by the underlying IOMMU hardware. The device
509 * via @dev_id and the vIOMMU via @pt_id must be associated to the same IOMMU
510 * instance.
511 *
512 * If the @data_type is set to IOMMU_HWPT_DATA_NONE, @data_len and
513 * @data_uptr should be zero. Otherwise, both @data_len and @data_uptr
514 * must be given.
515 */
516struct iommu_hwpt_alloc {
517 __u32 size;
518 __u32 flags;
519 __u32 dev_id;
520 __u32 pt_id;
521 __u32 out_hwpt_id;
522 __u32 __reserved;
523 __u32 data_type;
524 __u32 data_len;
525 __aligned_u64 data_uptr;
526 __u32 fault_id;
527 __u32 __reserved2;
528};
529#define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC)
530
531/**
532 * enum iommu_hw_info_vtd_flags - Flags for VT-d hw_info
533 * @IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17: If set, disallow read-only mappings
534 * on a nested_parent domain.
535 * https://www.intel.com/content/www/us/en/content-details/772415/content-details.html
536 */
537enum iommu_hw_info_vtd_flags {
538 IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17 = 1 << 0,
539};
540
541/**
542 * struct iommu_hw_info_vtd - Intel VT-d hardware information
543 *
544 * @flags: Combination of enum iommu_hw_info_vtd_flags
545 * @__reserved: Must be 0
546 *
547 * @cap_reg: Value of Intel VT-d capability register defined in VT-d spec
548 * section 11.4.2 Capability Register.
549 * @ecap_reg: Value of Intel VT-d capability register defined in VT-d spec
550 * section 11.4.3 Extended Capability Register.
551 *
552 * User needs to understand the Intel VT-d specification to decode the
553 * register value.
554 */
555struct iommu_hw_info_vtd {
556 __u32 flags;
557 __u32 __reserved;
558 __aligned_u64 cap_reg;
559 __aligned_u64 ecap_reg;
560};
561
562/**
563 * struct iommu_hw_info_arm_smmuv3 - ARM SMMUv3 hardware information
564 * (IOMMU_HW_INFO_TYPE_ARM_SMMUV3)
565 *
566 * @flags: Must be set to 0
567 * @__reserved: Must be 0
568 * @idr: Implemented features for ARM SMMU Non-secure programming interface
569 * @iidr: Information about the implementation and implementer of ARM SMMU,
570 * and architecture version supported
571 * @aidr: ARM SMMU architecture version
572 *
573 * For the details of @idr, @iidr and @aidr, please refer to the chapters
574 * from 6.3.1 to 6.3.6 in the SMMUv3 Spec.
575 *
576 * This reports the raw HW capability, and not all bits are meaningful to be
577 * read by userspace. Only the following fields should be used:
578 *
579 * idr[0]: ST_LEVEL, TERM_MODEL, STALL_MODEL, TTENDIAN , CD2L, ASID16, TTF
580 * idr[1]: SIDSIZE, SSIDSIZE
581 * idr[3]: BBML, RIL
582 * idr[5]: VAX, GRAN64K, GRAN16K, GRAN4K
583 *
584 * - S1P should be assumed to be true if a NESTED HWPT can be created
585 * - VFIO/iommufd only support platforms with COHACC, it should be assumed to be
586 * true.
587 * - ATS is a per-device property. If the VMM describes any devices as ATS
588 * capable in ACPI/DT it should set the corresponding idr.
589 *
590 * This list may expand in future (eg E0PD, AIE, PBHA, D128, DS etc). It is
591 * important that VMMs do not read bits outside the list to allow for
592 * compatibility with future kernels. Several features in the SMMUv3
593 * architecture are not currently supported by the kernel for nesting: HTTU,
594 * BTM, MPAM and others.
595 */
596struct iommu_hw_info_arm_smmuv3 {
597 __u32 flags;
598 __u32 __reserved;
599 __u32 idr[6];
600 __u32 iidr;
601 __u32 aidr;
602};
603
604/**
605 * struct iommu_hw_info_tegra241_cmdqv - NVIDIA Tegra241 CMDQV Hardware
606 * Information (IOMMU_HW_INFO_TYPE_TEGRA241_CMDQV)
607 *
608 * @flags: Must be 0
609 * @version: Version number for the CMDQ-V HW for PARAM bits[03:00]
610 * @log2vcmdqs: Log2 of the total number of VCMDQs for PARAM bits[07:04]
611 * @log2vsids: Log2 of the total number of SID replacements for PARAM bits[15:12]
612 * @__reserved: Must be 0
613 *
614 * VMM can use these fields directly in its emulated global PARAM register. Note
615 * that only one Virtual Interface (VINTF) should be exposed to a VM, i.e. PARAM
616 * bits[11:08] should be set to 0 for log2 of the total number of VINTFs.
617 */
618struct iommu_hw_info_tegra241_cmdqv {
619 __u32 flags;
620 __u8 version;
621 __u8 log2vcmdqs;
622 __u8 log2vsids;
623 __u8 __reserved;
624};
625
626/**
627 * enum iommu_hw_info_type - IOMMU Hardware Info Types
628 * @IOMMU_HW_INFO_TYPE_NONE: Output by the drivers that do not report hardware
629 * info
630 * @IOMMU_HW_INFO_TYPE_DEFAULT: Input to request for a default type
631 * @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type
632 * @IOMMU_HW_INFO_TYPE_ARM_SMMUV3: ARM SMMUv3 iommu info type
633 * @IOMMU_HW_INFO_TYPE_TEGRA241_CMDQV: NVIDIA Tegra241 CMDQV (extension for ARM
634 * SMMUv3) info type
635 */
636enum iommu_hw_info_type {
637 IOMMU_HW_INFO_TYPE_NONE = 0,
638 IOMMU_HW_INFO_TYPE_DEFAULT = 0,
639 IOMMU_HW_INFO_TYPE_INTEL_VTD = 1,
640 IOMMU_HW_INFO_TYPE_ARM_SMMUV3 = 2,
641 IOMMU_HW_INFO_TYPE_TEGRA241_CMDQV = 3,
642};
643
644/**
645 * enum iommufd_hw_capabilities
646 * @IOMMU_HW_CAP_DIRTY_TRACKING: IOMMU hardware support for dirty tracking
647 * If available, it means the following APIs
648 * are supported:
649 *
650 * IOMMU_HWPT_GET_DIRTY_BITMAP
651 * IOMMU_HWPT_SET_DIRTY_TRACKING
652 *
653 * @IOMMU_HW_CAP_PCI_PASID_EXEC: Execute Permission Supported, user ignores it
654 * when the struct
655 * iommu_hw_info::out_max_pasid_log2 is zero.
656 * @IOMMU_HW_CAP_PCI_PASID_PRIV: Privileged Mode Supported, user ignores it
657 * when the struct
658 * iommu_hw_info::out_max_pasid_log2 is zero.
659 */
660enum iommufd_hw_capabilities {
661 IOMMU_HW_CAP_DIRTY_TRACKING = 1 << 0,
662 IOMMU_HW_CAP_PCI_PASID_EXEC = 1 << 1,
663 IOMMU_HW_CAP_PCI_PASID_PRIV = 1 << 2,
664};
665
666/**
667 * enum iommufd_hw_info_flags - Flags for iommu_hw_info
668 * @IOMMU_HW_INFO_FLAG_INPUT_TYPE: If set, @in_data_type carries an input type
669 * for user space to request for a specific info
670 */
671enum iommufd_hw_info_flags {
672 IOMMU_HW_INFO_FLAG_INPUT_TYPE = 1 << 0,
673};
674
675/**
676 * struct iommu_hw_info - ioctl(IOMMU_GET_HW_INFO)
677 * @size: sizeof(struct iommu_hw_info)
678 * @flags: Must be 0
679 * @dev_id: The device bound to the iommufd
680 * @data_len: Input the length of a user buffer in bytes. Output the length of
681 * data that kernel supports
682 * @data_uptr: User pointer to a user-space buffer used by the kernel to fill
683 * the iommu type specific hardware information data
684 * @in_data_type: This shares the same field with @out_data_type, making it be
685 * a bidirectional field. When IOMMU_HW_INFO_FLAG_INPUT_TYPE is
686 * set, an input type carried via this @in_data_type field will
687 * be valid, requesting for the info data to the given type. If
688 * IOMMU_HW_INFO_FLAG_INPUT_TYPE is unset, any input value will
689 * be seen as IOMMU_HW_INFO_TYPE_DEFAULT
690 * @out_data_type: Output the iommu hardware info type as defined in the enum
691 * iommu_hw_info_type.
692 * @out_capabilities: Output the generic iommu capability info type as defined
693 * in the enum iommu_hw_capabilities.
694 * @out_max_pasid_log2: Output the width of PASIDs. 0 means no PASID support.
695 * PCI devices turn to out_capabilities to check if the
696 * specific capabilities is supported or not.
697 * @__reserved: Must be 0
698 *
699 * Query an iommu type specific hardware information data from an iommu behind
700 * a given device that has been bound to iommufd. This hardware info data will
701 * be used to sync capabilities between the virtual iommu and the physical
702 * iommu, e.g. a nested translation setup needs to check the hardware info, so
703 * a guest stage-1 page table can be compatible with the physical iommu.
704 *
705 * To capture an iommu type specific hardware information data, @data_uptr and
706 * its length @data_len must be provided. Trailing bytes will be zeroed if the
707 * user buffer is larger than the data that kernel has. Otherwise, kernel only
708 * fills the buffer using the given length in @data_len. If the ioctl succeeds,
709 * @data_len will be updated to the length that kernel actually supports,
710 * @out_data_type will be filled to decode the data filled in the buffer
711 * pointed by @data_uptr. Input @data_len == zero is allowed.
712 */
713struct iommu_hw_info {
714 __u32 size;
715 __u32 flags;
716 __u32 dev_id;
717 __u32 data_len;
718 __aligned_u64 data_uptr;
719 union {
720 __u32 in_data_type;
721 __u32 out_data_type;
722 };
723 __u8 out_max_pasid_log2;
724 __u8 __reserved[3];
725 __aligned_u64 out_capabilities;
726};
727#define IOMMU_GET_HW_INFO _IO(IOMMUFD_TYPE, IOMMUFD_CMD_GET_HW_INFO)
728
729/*
730 * enum iommufd_hwpt_set_dirty_tracking_flags - Flags for steering dirty
731 * tracking
732 * @IOMMU_HWPT_DIRTY_TRACKING_ENABLE: Enable dirty tracking
733 */
734enum iommufd_hwpt_set_dirty_tracking_flags {
735 IOMMU_HWPT_DIRTY_TRACKING_ENABLE = 1,
736};
737
738/**
739 * struct iommu_hwpt_set_dirty_tracking - ioctl(IOMMU_HWPT_SET_DIRTY_TRACKING)
740 * @size: sizeof(struct iommu_hwpt_set_dirty_tracking)
741 * @flags: Combination of enum iommufd_hwpt_set_dirty_tracking_flags
742 * @hwpt_id: HW pagetable ID that represents the IOMMU domain
743 * @__reserved: Must be 0
744 *
745 * Toggle dirty tracking on an HW pagetable.
746 */
747struct iommu_hwpt_set_dirty_tracking {
748 __u32 size;
749 __u32 flags;
750 __u32 hwpt_id;
751 __u32 __reserved;
752};
753#define IOMMU_HWPT_SET_DIRTY_TRACKING _IO(IOMMUFD_TYPE, \
754 IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING)
755
756/**
757 * enum iommufd_hwpt_get_dirty_bitmap_flags - Flags for getting dirty bits
758 * @IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR: Just read the PTEs without clearing
759 * any dirty bits metadata. This flag
760 * can be passed in the expectation
761 * where the next operation is an unmap
762 * of the same IOVA range.
763 *
764 */
765enum iommufd_hwpt_get_dirty_bitmap_flags {
766 IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR = 1,
767};
768
769/**
770 * struct iommu_hwpt_get_dirty_bitmap - ioctl(IOMMU_HWPT_GET_DIRTY_BITMAP)
771 * @size: sizeof(struct iommu_hwpt_get_dirty_bitmap)
772 * @hwpt_id: HW pagetable ID that represents the IOMMU domain
773 * @flags: Combination of enum iommufd_hwpt_get_dirty_bitmap_flags
774 * @__reserved: Must be 0
775 * @iova: base IOVA of the bitmap first bit
776 * @length: IOVA range size
777 * @page_size: page size granularity of each bit in the bitmap
778 * @data: bitmap where to set the dirty bits. The bitmap bits each
779 * represent a page_size which you deviate from an arbitrary iova.
780 *
781 * Checking a given IOVA is dirty:
782 *
783 * data[(iova / page_size) / 64] & (1ULL << ((iova / page_size) % 64))
784 *
785 * Walk the IOMMU pagetables for a given IOVA range to return a bitmap
786 * with the dirty IOVAs. In doing so it will also by default clear any
787 * dirty bit metadata set in the IOPTE.
788 */
789struct iommu_hwpt_get_dirty_bitmap {
790 __u32 size;
791 __u32 hwpt_id;
792 __u32 flags;
793 __u32 __reserved;
794 __aligned_u64 iova;
795 __aligned_u64 length;
796 __aligned_u64 page_size;
797 __aligned_u64 data;
798};
799#define IOMMU_HWPT_GET_DIRTY_BITMAP _IO(IOMMUFD_TYPE, \
800 IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP)
801
802/**
803 * enum iommu_hwpt_invalidate_data_type - IOMMU HWPT Cache Invalidation
804 * Data Type
805 * @IOMMU_HWPT_INVALIDATE_DATA_VTD_S1: Invalidation data for VTD_S1
806 * @IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3: Invalidation data for ARM SMMUv3
807 */
808enum iommu_hwpt_invalidate_data_type {
809 IOMMU_HWPT_INVALIDATE_DATA_VTD_S1 = 0,
810 IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3 = 1,
811};
812
813/**
814 * enum iommu_hwpt_vtd_s1_invalidate_flags - Flags for Intel VT-d
815 * stage-1 cache invalidation
816 * @IOMMU_VTD_INV_FLAGS_LEAF: Indicates whether the invalidation applies
817 * to all-levels page structure cache or just
818 * the leaf PTE cache.
819 */
820enum iommu_hwpt_vtd_s1_invalidate_flags {
821 IOMMU_VTD_INV_FLAGS_LEAF = 1 << 0,
822};
823
824/**
825 * struct iommu_hwpt_vtd_s1_invalidate - Intel VT-d cache invalidation
826 * (IOMMU_HWPT_INVALIDATE_DATA_VTD_S1)
827 * @addr: The start address of the range to be invalidated. It needs to
828 * be 4KB aligned.
829 * @npages: Number of contiguous 4K pages to be invalidated.
830 * @flags: Combination of enum iommu_hwpt_vtd_s1_invalidate_flags
831 * @__reserved: Must be 0
832 *
833 * The Intel VT-d specific invalidation data for user-managed stage-1 cache
834 * invalidation in nested translation. Userspace uses this structure to
835 * tell the impacted cache scope after modifying the stage-1 page table.
836 *
837 * Invalidating all the caches related to the page table by setting @addr
838 * to be 0 and @npages to be U64_MAX.
839 *
840 * The device TLB will be invalidated automatically if ATS is enabled.
841 */
842struct iommu_hwpt_vtd_s1_invalidate {
843 __aligned_u64 addr;
844 __aligned_u64 npages;
845 __u32 flags;
846 __u32 __reserved;
847};
848
849/**
850 * struct iommu_viommu_arm_smmuv3_invalidate - ARM SMMUv3 cache invalidation
851 * (IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3)
852 * @cmd: 128-bit cache invalidation command that runs in SMMU CMDQ.
853 * Must be little-endian.
854 *
855 * Supported command list only when passing in a vIOMMU via @hwpt_id:
856 * CMDQ_OP_TLBI_NSNH_ALL
857 * CMDQ_OP_TLBI_NH_VA
858 * CMDQ_OP_TLBI_NH_VAA
859 * CMDQ_OP_TLBI_NH_ALL
860 * CMDQ_OP_TLBI_NH_ASID
861 * CMDQ_OP_ATC_INV
862 * CMDQ_OP_CFGI_CD
863 * CMDQ_OP_CFGI_CD_ALL
864 *
865 * -EIO will be returned if the command is not supported.
866 */
867struct iommu_viommu_arm_smmuv3_invalidate {
868 __aligned_le64 cmd[2];
869};
870
871/**
872 * struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE)
873 * @size: sizeof(struct iommu_hwpt_invalidate)
874 * @hwpt_id: ID of a nested HWPT or a vIOMMU, for cache invalidation
875 * @data_uptr: User pointer to an array of driver-specific cache invalidation
876 * data.
877 * @data_type: One of enum iommu_hwpt_invalidate_data_type, defining the data
878 * type of all the entries in the invalidation request array. It
879 * should be a type supported by the hwpt pointed by @hwpt_id.
880 * @entry_len: Length (in bytes) of a request entry in the request array
881 * @entry_num: Input the number of cache invalidation requests in the array.
882 * Output the number of requests successfully handled by kernel.
883 * @__reserved: Must be 0.
884 *
885 * Invalidate iommu cache for user-managed page table or vIOMMU. Modifications
886 * on a user-managed page table should be followed by this operation, if a HWPT
887 * is passed in via @hwpt_id. Other caches, such as device cache or descriptor
888 * cache can be flushed if a vIOMMU is passed in via the @hwpt_id field.
889 *
890 * Each ioctl can support one or more cache invalidation requests in the array
891 * that has a total size of @entry_len * @entry_num.
892 *
893 * An empty invalidation request array by setting @entry_num==0 is allowed, and
894 * @entry_len and @data_uptr would be ignored in this case. This can be used to
895 * check if the given @data_type is supported or not by kernel.
896 */
897struct iommu_hwpt_invalidate {
898 __u32 size;
899 __u32 hwpt_id;
900 __aligned_u64 data_uptr;
901 __u32 data_type;
902 __u32 entry_len;
903 __u32 entry_num;
904 __u32 __reserved;
905};
906#define IOMMU_HWPT_INVALIDATE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_INVALIDATE)
907
908/**
909 * enum iommu_hwpt_pgfault_flags - flags for struct iommu_hwpt_pgfault
910 * @IOMMU_PGFAULT_FLAGS_PASID_VALID: The pasid field of the fault data is
911 * valid.
912 * @IOMMU_PGFAULT_FLAGS_LAST_PAGE: It's the last fault of a fault group.
913 */
914enum iommu_hwpt_pgfault_flags {
915 IOMMU_PGFAULT_FLAGS_PASID_VALID = (1 << 0),
916 IOMMU_PGFAULT_FLAGS_LAST_PAGE = (1 << 1),
917};
918
919/**
920 * enum iommu_hwpt_pgfault_perm - perm bits for struct iommu_hwpt_pgfault
921 * @IOMMU_PGFAULT_PERM_READ: request for read permission
922 * @IOMMU_PGFAULT_PERM_WRITE: request for write permission
923 * @IOMMU_PGFAULT_PERM_EXEC: (PCIE 10.4.1) request with a PASID that has the
924 * Execute Requested bit set in PASID TLP Prefix.
925 * @IOMMU_PGFAULT_PERM_PRIV: (PCIE 10.4.1) request with a PASID that has the
926 * Privileged Mode Requested bit set in PASID TLP
927 * Prefix.
928 */
929enum iommu_hwpt_pgfault_perm {
930 IOMMU_PGFAULT_PERM_READ = (1 << 0),
931 IOMMU_PGFAULT_PERM_WRITE = (1 << 1),
932 IOMMU_PGFAULT_PERM_EXEC = (1 << 2),
933 IOMMU_PGFAULT_PERM_PRIV = (1 << 3),
934};
935
936/**
937 * struct iommu_hwpt_pgfault - iommu page fault data
938 * @flags: Combination of enum iommu_hwpt_pgfault_flags
939 * @dev_id: id of the originated device
940 * @pasid: Process Address Space ID
941 * @grpid: Page Request Group Index
942 * @perm: Combination of enum iommu_hwpt_pgfault_perm
943 * @__reserved: Must be 0.
944 * @addr: Fault address
945 * @length: a hint of how much data the requestor is expecting to fetch. For
946 * example, if the PRI initiator knows it is going to do a 10MB
947 * transfer, it could fill in 10MB and the OS could pre-fault in
948 * 10MB of IOVA. It's default to 0 if there's no such hint.
949 * @cookie: kernel-managed cookie identifying a group of fault messages. The
950 * cookie number encoded in the last page fault of the group should
951 * be echoed back in the response message.
952 */
953struct iommu_hwpt_pgfault {
954 __u32 flags;
955 __u32 dev_id;
956 __u32 pasid;
957 __u32 grpid;
958 __u32 perm;
959 __u32 __reserved;
960 __aligned_u64 addr;
961 __u32 length;
962 __u32 cookie;
963};
964
965/**
966 * enum iommufd_page_response_code - Return status of fault handlers
967 * @IOMMUFD_PAGE_RESP_SUCCESS: Fault has been handled and the page tables
968 * populated, retry the access. This is the
969 * "Success" defined in PCI 10.4.2.1.
970 * @IOMMUFD_PAGE_RESP_INVALID: Could not handle this fault, don't retry the
971 * access. This is the "Invalid Request" in PCI
972 * 10.4.2.1.
973 */
974enum iommufd_page_response_code {
975 IOMMUFD_PAGE_RESP_SUCCESS = 0,
976 IOMMUFD_PAGE_RESP_INVALID = 1,
977};
978
979/**
980 * struct iommu_hwpt_page_response - IOMMU page fault response
981 * @cookie: The kernel-managed cookie reported in the fault message.
982 * @code: One of response code in enum iommufd_page_response_code.
983 */
984struct iommu_hwpt_page_response {
985 __u32 cookie;
986 __u32 code;
987};
988
989/**
990 * struct iommu_fault_alloc - ioctl(IOMMU_FAULT_QUEUE_ALLOC)
991 * @size: sizeof(struct iommu_fault_alloc)
992 * @flags: Must be 0
993 * @out_fault_id: The ID of the new FAULT
994 * @out_fault_fd: The fd of the new FAULT
995 *
996 * Explicitly allocate a fault handling object.
997 */
998struct iommu_fault_alloc {
999 __u32 size;
1000 __u32 flags;
1001 __u32 out_fault_id;
1002 __u32 out_fault_fd;
1003};
1004#define IOMMU_FAULT_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_FAULT_QUEUE_ALLOC)
1005
1006/**
1007 * enum iommu_viommu_type - Virtual IOMMU Type
1008 * @IOMMU_VIOMMU_TYPE_DEFAULT: Reserved for future use
1009 * @IOMMU_VIOMMU_TYPE_ARM_SMMUV3: ARM SMMUv3 driver specific type
1010 * @IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV: NVIDIA Tegra241 CMDQV (extension for ARM
1011 * SMMUv3) enabled ARM SMMUv3 type
1012 */
1013enum iommu_viommu_type {
1014 IOMMU_VIOMMU_TYPE_DEFAULT = 0,
1015 IOMMU_VIOMMU_TYPE_ARM_SMMUV3 = 1,
1016 IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV = 2,
1017};
1018
1019/**
1020 * struct iommu_viommu_tegra241_cmdqv - NVIDIA Tegra241 CMDQV Virtual Interface
1021 * (IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV)
1022 * @out_vintf_mmap_offset: mmap offset argument for VINTF's page0
1023 * @out_vintf_mmap_length: mmap length argument for VINTF's page0
1024 *
1025 * Both @out_vintf_mmap_offset and @out_vintf_mmap_length are reported by kernel
1026 * for user space to mmap the VINTF page0 from the host physical address space
1027 * to the guest physical address space so that a guest kernel can directly R/W
1028 * access to the VINTF page0 in order to control its virtual command queues.
1029 */
1030struct iommu_viommu_tegra241_cmdqv {
1031 __aligned_u64 out_vintf_mmap_offset;
1032 __aligned_u64 out_vintf_mmap_length;
1033};
1034
1035/**
1036 * struct iommu_viommu_alloc - ioctl(IOMMU_VIOMMU_ALLOC)
1037 * @size: sizeof(struct iommu_viommu_alloc)
1038 * @flags: Must be 0
1039 * @type: Type of the virtual IOMMU. Must be defined in enum iommu_viommu_type
1040 * @dev_id: The device's physical IOMMU will be used to back the virtual IOMMU
1041 * @hwpt_id: ID of a nesting parent HWPT to associate to
1042 * @out_viommu_id: Output virtual IOMMU ID for the allocated object
1043 * @data_len: Length of the type specific data
1044 * @__reserved: Must be 0
1045 * @data_uptr: User pointer to a driver-specific virtual IOMMU data
1046 *
1047 * Allocate a virtual IOMMU object, representing the underlying physical IOMMU's
1048 * virtualization support that is a security-isolated slice of the real IOMMU HW
1049 * that is unique to a specific VM. Operations global to the IOMMU are connected
1050 * to the vIOMMU, such as:
1051 * - Security namespace for guest owned ID, e.g. guest-controlled cache tags
1052 * - Non-device-affiliated event reporting, e.g. invalidation queue errors
1053 * - Access to a sharable nesting parent pagetable across physical IOMMUs
1054 * - Virtualization of various platforms IDs, e.g. RIDs and others
1055 * - Delivery of paravirtualized invalidation
1056 * - Direct assigned invalidation queues
1057 * - Direct assigned interrupts
1058 */
1059struct iommu_viommu_alloc {
1060 __u32 size;
1061 __u32 flags;
1062 __u32 type;
1063 __u32 dev_id;
1064 __u32 hwpt_id;
1065 __u32 out_viommu_id;
1066 __u32 data_len;
1067 __u32 __reserved;
1068 __aligned_u64 data_uptr;
1069};
1070#define IOMMU_VIOMMU_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_ALLOC)
1071
1072/**
1073 * struct iommu_vdevice_alloc - ioctl(IOMMU_VDEVICE_ALLOC)
1074 * @size: sizeof(struct iommu_vdevice_alloc)
1075 * @viommu_id: vIOMMU ID to associate with the virtual device
1076 * @dev_id: The physical device to allocate a virtual instance on the vIOMMU
1077 * @out_vdevice_id: Object handle for the vDevice. Pass to IOMMU_DESTORY
1078 * @virt_id: Virtual device ID per vIOMMU, e.g. vSID of ARM SMMUv3, vDeviceID
1079 * of AMD IOMMU, and vRID of Intel VT-d
1080 *
1081 * Allocate a virtual device instance (for a physical device) against a vIOMMU.
1082 * This instance holds the device's information (related to its vIOMMU) in a VM.
1083 * User should use IOMMU_DESTROY to destroy the virtual device before
1084 * destroying the physical device (by closing vfio_cdev fd). Otherwise the
1085 * virtual device would be forcibly destroyed on physical device destruction,
1086 * its vdevice_id would be permanently leaked (unremovable & unreusable) until
1087 * iommu fd closed.
1088 */
1089struct iommu_vdevice_alloc {
1090 __u32 size;
1091 __u32 viommu_id;
1092 __u32 dev_id;
1093 __u32 out_vdevice_id;
1094 __aligned_u64 virt_id;
1095};
1096#define IOMMU_VDEVICE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VDEVICE_ALLOC)
1097
1098/**
1099 * struct iommu_ioas_change_process - ioctl(VFIO_IOAS_CHANGE_PROCESS)
1100 * @size: sizeof(struct iommu_ioas_change_process)
1101 * @__reserved: Must be 0
1102 *
1103 * This transfers pinned memory counts for every memory map in every IOAS
1104 * in the context to the current process. This only supports maps created
1105 * with IOMMU_IOAS_MAP_FILE, and returns EINVAL if other maps are present.
1106 * If the ioctl returns a failure status, then nothing is changed.
1107 *
1108 * This API is useful for transferring operation of a device from one process
1109 * to another, such as during userland live update.
1110 */
1111struct iommu_ioas_change_process {
1112 __u32 size;
1113 __u32 __reserved;
1114};
1115
1116#define IOMMU_IOAS_CHANGE_PROCESS \
1117 _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_CHANGE_PROCESS)
1118
1119/**
1120 * enum iommu_veventq_flag - flag for struct iommufd_vevent_header
1121 * @IOMMU_VEVENTQ_FLAG_LOST_EVENTS: vEVENTQ has lost vEVENTs
1122 */
1123enum iommu_veventq_flag {
1124 IOMMU_VEVENTQ_FLAG_LOST_EVENTS = (1U << 0),
1125};
1126
1127/**
1128 * struct iommufd_vevent_header - Virtual Event Header for a vEVENTQ Status
1129 * @flags: Combination of enum iommu_veventq_flag
1130 * @sequence: The sequence index of a vEVENT in the vEVENTQ, with a range of
1131 * [0, INT_MAX] where the following index of INT_MAX is 0
1132 *
1133 * Each iommufd_vevent_header reports a sequence index of the following vEVENT:
1134 *
1135 * +----------------------+-------+----------------------+-------+---+-------+
1136 * | header0 {sequence=0} | data0 | header1 {sequence=1} | data1 |...| dataN |
1137 * +----------------------+-------+----------------------+-------+---+-------+
1138 *
1139 * And this sequence index is expected to be monotonic to the sequence index of
1140 * the previous vEVENT. If two adjacent sequence indexes has a delta larger than
1141 * 1, it means that delta - 1 number of vEVENTs has lost, e.g. two lost vEVENTs:
1142 *
1143 * +-----+----------------------+-------+----------------------+-------+-----+
1144 * | ... | header3 {sequence=3} | data3 | header6 {sequence=6} | data6 | ... |
1145 * +-----+----------------------+-------+----------------------+-------+-----+
1146 *
1147 * If a vEVENT lost at the tail of the vEVENTQ and there is no following vEVENT
1148 * providing the next sequence index, an IOMMU_VEVENTQ_FLAG_LOST_EVENTS header
1149 * would be added to the tail, and no data would follow this header:
1150 *
1151 * +--+----------------------+-------+-----------------------------------------+
1152 * |..| header3 {sequence=3} | data3 | header4 {flags=LOST_EVENTS, sequence=4} |
1153 * +--+----------------------+-------+-----------------------------------------+
1154 */
1155struct iommufd_vevent_header {
1156 __u32 flags;
1157 __u32 sequence;
1158};
1159
1160/**
1161 * enum iommu_veventq_type - Virtual Event Queue Type
1162 * @IOMMU_VEVENTQ_TYPE_DEFAULT: Reserved for future use
1163 * @IOMMU_VEVENTQ_TYPE_ARM_SMMUV3: ARM SMMUv3 Virtual Event Queue
1164 * @IOMMU_VEVENTQ_TYPE_TEGRA241_CMDQV: NVIDIA Tegra241 CMDQV Extension IRQ
1165 */
1166enum iommu_veventq_type {
1167 IOMMU_VEVENTQ_TYPE_DEFAULT = 0,
1168 IOMMU_VEVENTQ_TYPE_ARM_SMMUV3 = 1,
1169 IOMMU_VEVENTQ_TYPE_TEGRA241_CMDQV = 2,
1170};
1171
1172/**
1173 * struct iommu_vevent_arm_smmuv3 - ARM SMMUv3 Virtual Event
1174 * (IOMMU_VEVENTQ_TYPE_ARM_SMMUV3)
1175 * @evt: 256-bit ARM SMMUv3 Event record, little-endian.
1176 * Reported event records: (Refer to "7.3 Event records" in SMMUv3 HW Spec)
1177 * - 0x04 C_BAD_STE
1178 * - 0x06 F_STREAM_DISABLED
1179 * - 0x08 C_BAD_SUBSTREAMID
1180 * - 0x0a C_BAD_CD
1181 * - 0x10 F_TRANSLATION
1182 * - 0x11 F_ADDR_SIZE
1183 * - 0x12 F_ACCESS
1184 * - 0x13 F_PERMISSION
1185 *
1186 * StreamID field reports a virtual device ID. To receive a virtual event for a
1187 * device, a vDEVICE must be allocated via IOMMU_VDEVICE_ALLOC.
1188 */
1189struct iommu_vevent_arm_smmuv3 {
1190 __aligned_le64 evt[4];
1191};
1192
1193/**
1194 * struct iommu_vevent_tegra241_cmdqv - Tegra241 CMDQV IRQ
1195 * (IOMMU_VEVENTQ_TYPE_TEGRA241_CMDQV)
1196 * @lvcmdq_err_map: 128-bit logical vcmdq error map, little-endian.
1197 * (Refer to register LVCMDQ_ERR_MAPs per VINTF )
1198 *
1199 * The 128-bit register value from HW exclusively reflect the error bits for a
1200 * Virtual Interface represented by a vIOMMU object. Read and report directly.
1201 */
1202struct iommu_vevent_tegra241_cmdqv {
1203 __aligned_le64 lvcmdq_err_map[2];
1204};
1205
1206/**
1207 * struct iommu_veventq_alloc - ioctl(IOMMU_VEVENTQ_ALLOC)
1208 * @size: sizeof(struct iommu_veventq_alloc)
1209 * @flags: Must be 0
1210 * @viommu_id: virtual IOMMU ID to associate the vEVENTQ with
1211 * @type: Type of the vEVENTQ. Must be defined in enum iommu_veventq_type
1212 * @veventq_depth: Maximum number of events in the vEVENTQ
1213 * @out_veventq_id: The ID of the new vEVENTQ
1214 * @out_veventq_fd: The fd of the new vEVENTQ. User space must close the
1215 * successfully returned fd after using it
1216 * @__reserved: Must be 0
1217 *
1218 * Explicitly allocate a virtual event queue interface for a vIOMMU. A vIOMMU
1219 * can have multiple FDs for different types, but is confined to one per @type.
1220 * User space should open the @out_veventq_fd to read vEVENTs out of a vEVENTQ,
1221 * if there are vEVENTs available. A vEVENTQ will lose events due to overflow,
1222 * if the number of the vEVENTs hits @veventq_depth.
1223 *
1224 * Each vEVENT in a vEVENTQ encloses a struct iommufd_vevent_header followed by
1225 * a type-specific data structure, in a normal case:
1226 *
1227 * +-+---------+-------+---------+-------+-----+---------+-------+-+
1228 * | | header0 | data0 | header1 | data1 | ... | headerN | dataN | |
1229 * +-+---------+-------+---------+-------+-----+---------+-------+-+
1230 *
1231 * unless a tailing IOMMU_VEVENTQ_FLAG_LOST_EVENTS header is logged (refer to
1232 * struct iommufd_vevent_header).
1233 */
1234struct iommu_veventq_alloc {
1235 __u32 size;
1236 __u32 flags;
1237 __u32 viommu_id;
1238 __u32 type;
1239 __u32 veventq_depth;
1240 __u32 out_veventq_id;
1241 __u32 out_veventq_fd;
1242 __u32 __reserved;
1243};
1244#define IOMMU_VEVENTQ_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VEVENTQ_ALLOC)
1245
1246/**
1247 * enum iommu_hw_queue_type - HW Queue Type
1248 * @IOMMU_HW_QUEUE_TYPE_DEFAULT: Reserved for future use
1249 * @IOMMU_HW_QUEUE_TYPE_TEGRA241_CMDQV: NVIDIA Tegra241 CMDQV (extension for ARM
1250 * SMMUv3) Virtual Command Queue (VCMDQ)
1251 */
1252enum iommu_hw_queue_type {
1253 IOMMU_HW_QUEUE_TYPE_DEFAULT = 0,
1254 /*
1255 * TEGRA241_CMDQV requirements (otherwise, allocation will fail)
1256 * - alloc starts from the lowest @index=0 in ascending order
1257 * - destroy starts from the last allocated @index in descending order
1258 * - @base_addr must be aligned to @length in bytes and mapped in IOAS
1259 * - @length must be a power of 2, with a minimum 32 bytes and a maximum
1260 * 2 ^ idr[1].CMDQS * 16 bytes (use GET_HW_INFO call to read idr[1]
1261 * from struct iommu_hw_info_arm_smmuv3)
1262 * - suggest to back the queue memory with contiguous physical pages or
1263 * a single huge page with alignment of the queue size, and limit the
1264 * emulated vSMMU's IDR1.CMDQS to log2(huge page size / 16 bytes)
1265 */
1266 IOMMU_HW_QUEUE_TYPE_TEGRA241_CMDQV = 1,
1267};
1268
1269/**
1270 * struct iommu_hw_queue_alloc - ioctl(IOMMU_HW_QUEUE_ALLOC)
1271 * @size: sizeof(struct iommu_hw_queue_alloc)
1272 * @flags: Must be 0
1273 * @viommu_id: Virtual IOMMU ID to associate the HW queue with
1274 * @type: One of enum iommu_hw_queue_type
1275 * @index: The logical index to the HW queue per virtual IOMMU for a multi-queue
1276 * model
1277 * @out_hw_queue_id: The ID of the new HW queue
1278 * @nesting_parent_iova: Base address of the queue memory in the guest physical
1279 * address space
1280 * @length: Length of the queue memory
1281 *
1282 * Allocate a HW queue object for a vIOMMU-specific HW-accelerated queue, which
1283 * allows HW to access a guest queue memory described using @nesting_parent_iova
1284 * and @length.
1285 *
1286 * A vIOMMU can allocate multiple queues, but it must use a different @index per
1287 * type to separate each allocation, e.g::
1288 *
1289 * Type1 HW queue0, Type1 HW queue1, Type2 HW queue0, ...
1290 */
1291struct iommu_hw_queue_alloc {
1292 __u32 size;
1293 __u32 flags;
1294 __u32 viommu_id;
1295 __u32 type;
1296 __u32 index;
1297 __u32 out_hw_queue_id;
1298 __aligned_u64 nesting_parent_iova;
1299 __aligned_u64 length;
1300};
1301#define IOMMU_HW_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HW_QUEUE_ALLOC)
1302#endif