Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES.
3 */
4#ifndef _UAPI_IOMMUFD_H
5#define _UAPI_IOMMUFD_H
6
7#include <linux/types.h>
8#include <linux/ioctl.h>
9
10#define IOMMUFD_TYPE (';')
11
12/**
13 * DOC: General ioctl format
14 *
15 * The ioctl interface follows a general format to allow for extensibility. Each
16 * ioctl is passed in a structure pointer as the argument providing the size of
17 * the structure in the first u32. The kernel checks that any structure space
18 * beyond what it understands is 0. This allows userspace to use the backward
19 * compatible portion while consistently using the newer, larger, structures.
20 *
21 * ioctls use a standard meaning for common errnos:
22 *
23 * - ENOTTY: The IOCTL number itself is not supported at all
24 * - E2BIG: The IOCTL number is supported, but the provided structure has
25 * non-zero in a part the kernel does not understand.
26 * - EOPNOTSUPP: The IOCTL number is supported, and the structure is
27 * understood, however a known field has a value the kernel does not
28 * understand or support.
29 * - EINVAL: Everything about the IOCTL was understood, but a field is not
30 * correct.
31 * - ENOENT: An ID or IOVA provided does not exist.
32 * - ENOMEM: Out of memory.
33 * - EOVERFLOW: Mathematics overflowed.
34 *
35 * As well as additional errnos, within specific ioctls.
36 */
37enum {
38 IOMMUFD_CMD_BASE = 0x80,
39 IOMMUFD_CMD_DESTROY = IOMMUFD_CMD_BASE,
40 IOMMUFD_CMD_IOAS_ALLOC = 0x81,
41 IOMMUFD_CMD_IOAS_ALLOW_IOVAS = 0x82,
42 IOMMUFD_CMD_IOAS_COPY = 0x83,
43 IOMMUFD_CMD_IOAS_IOVA_RANGES = 0x84,
44 IOMMUFD_CMD_IOAS_MAP = 0x85,
45 IOMMUFD_CMD_IOAS_UNMAP = 0x86,
46 IOMMUFD_CMD_OPTION = 0x87,
47 IOMMUFD_CMD_VFIO_IOAS = 0x88,
48 IOMMUFD_CMD_HWPT_ALLOC = 0x89,
49 IOMMUFD_CMD_GET_HW_INFO = 0x8a,
50 IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING = 0x8b,
51 IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP = 0x8c,
52 IOMMUFD_CMD_HWPT_INVALIDATE = 0x8d,
53 IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e,
54};
55
56/**
57 * struct iommu_destroy - ioctl(IOMMU_DESTROY)
58 * @size: sizeof(struct iommu_destroy)
59 * @id: iommufd object ID to destroy. Can be any destroyable object type.
60 *
61 * Destroy any object held within iommufd.
62 */
63struct iommu_destroy {
64 __u32 size;
65 __u32 id;
66};
67#define IOMMU_DESTROY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_DESTROY)
68
69/**
70 * struct iommu_ioas_alloc - ioctl(IOMMU_IOAS_ALLOC)
71 * @size: sizeof(struct iommu_ioas_alloc)
72 * @flags: Must be 0
73 * @out_ioas_id: Output IOAS ID for the allocated object
74 *
75 * Allocate an IO Address Space (IOAS) which holds an IO Virtual Address (IOVA)
76 * to memory mapping.
77 */
78struct iommu_ioas_alloc {
79 __u32 size;
80 __u32 flags;
81 __u32 out_ioas_id;
82};
83#define IOMMU_IOAS_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_ALLOC)
84
85/**
86 * struct iommu_iova_range - ioctl(IOMMU_IOVA_RANGE)
87 * @start: First IOVA
88 * @last: Inclusive last IOVA
89 *
90 * An interval in IOVA space.
91 */
92struct iommu_iova_range {
93 __aligned_u64 start;
94 __aligned_u64 last;
95};
96
97/**
98 * struct iommu_ioas_iova_ranges - ioctl(IOMMU_IOAS_IOVA_RANGES)
99 * @size: sizeof(struct iommu_ioas_iova_ranges)
100 * @ioas_id: IOAS ID to read ranges from
101 * @num_iovas: Input/Output total number of ranges in the IOAS
102 * @__reserved: Must be 0
103 * @allowed_iovas: Pointer to the output array of struct iommu_iova_range
104 * @out_iova_alignment: Minimum alignment required for mapping IOVA
105 *
106 * Query an IOAS for ranges of allowed IOVAs. Mapping IOVA outside these ranges
107 * is not allowed. num_iovas will be set to the total number of iovas and
108 * the allowed_iovas[] will be filled in as space permits.
109 *
110 * The allowed ranges are dependent on the HW path the DMA operation takes, and
111 * can change during the lifetime of the IOAS. A fresh empty IOAS will have a
112 * full range, and each attached device will narrow the ranges based on that
113 * device's HW restrictions. Detaching a device can widen the ranges. Userspace
114 * should query ranges after every attach/detach to know what IOVAs are valid
115 * for mapping.
116 *
117 * On input num_iovas is the length of the allowed_iovas array. On output it is
118 * the total number of iovas filled in. The ioctl will return -EMSGSIZE and set
119 * num_iovas to the required value if num_iovas is too small. In this case the
120 * caller should allocate a larger output array and re-issue the ioctl.
121 *
122 * out_iova_alignment returns the minimum IOVA alignment that can be given
123 * to IOMMU_IOAS_MAP/COPY. IOVA's must satisfy::
124 *
125 * starting_iova % out_iova_alignment == 0
126 * (starting_iova + length) % out_iova_alignment == 0
127 *
128 * out_iova_alignment can be 1 indicating any IOVA is allowed. It cannot
129 * be higher than the system PAGE_SIZE.
130 */
131struct iommu_ioas_iova_ranges {
132 __u32 size;
133 __u32 ioas_id;
134 __u32 num_iovas;
135 __u32 __reserved;
136 __aligned_u64 allowed_iovas;
137 __aligned_u64 out_iova_alignment;
138};
139#define IOMMU_IOAS_IOVA_RANGES _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_IOVA_RANGES)
140
141/**
142 * struct iommu_ioas_allow_iovas - ioctl(IOMMU_IOAS_ALLOW_IOVAS)
143 * @size: sizeof(struct iommu_ioas_allow_iovas)
144 * @ioas_id: IOAS ID to allow IOVAs from
145 * @num_iovas: Input/Output total number of ranges in the IOAS
146 * @__reserved: Must be 0
147 * @allowed_iovas: Pointer to array of struct iommu_iova_range
148 *
149 * Ensure a range of IOVAs are always available for allocation. If this call
150 * succeeds then IOMMU_IOAS_IOVA_RANGES will never return a list of IOVA ranges
151 * that are narrower than the ranges provided here. This call will fail if
152 * IOMMU_IOAS_IOVA_RANGES is currently narrower than the given ranges.
153 *
154 * When an IOAS is first created the IOVA_RANGES will be maximally sized, and as
155 * devices are attached the IOVA will narrow based on the device restrictions.
156 * When an allowed range is specified any narrowing will be refused, ie device
157 * attachment can fail if the device requires limiting within the allowed range.
158 *
159 * Automatic IOVA allocation is also impacted by this call. MAP will only
160 * allocate within the allowed IOVAs if they are present.
161 *
162 * This call replaces the entire allowed list with the given list.
163 */
164struct iommu_ioas_allow_iovas {
165 __u32 size;
166 __u32 ioas_id;
167 __u32 num_iovas;
168 __u32 __reserved;
169 __aligned_u64 allowed_iovas;
170};
171#define IOMMU_IOAS_ALLOW_IOVAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_ALLOW_IOVAS)
172
173/**
174 * enum iommufd_ioas_map_flags - Flags for map and copy
175 * @IOMMU_IOAS_MAP_FIXED_IOVA: If clear the kernel will compute an appropriate
176 * IOVA to place the mapping at
177 * @IOMMU_IOAS_MAP_WRITEABLE: DMA is allowed to write to this mapping
178 * @IOMMU_IOAS_MAP_READABLE: DMA is allowed to read from this mapping
179 */
180enum iommufd_ioas_map_flags {
181 IOMMU_IOAS_MAP_FIXED_IOVA = 1 << 0,
182 IOMMU_IOAS_MAP_WRITEABLE = 1 << 1,
183 IOMMU_IOAS_MAP_READABLE = 1 << 2,
184};
185
186/**
187 * struct iommu_ioas_map - ioctl(IOMMU_IOAS_MAP)
188 * @size: sizeof(struct iommu_ioas_map)
189 * @flags: Combination of enum iommufd_ioas_map_flags
190 * @ioas_id: IOAS ID to change the mapping of
191 * @__reserved: Must be 0
192 * @user_va: Userspace pointer to start mapping from
193 * @length: Number of bytes to map
194 * @iova: IOVA the mapping was placed at. If IOMMU_IOAS_MAP_FIXED_IOVA is set
195 * then this must be provided as input.
196 *
197 * Set an IOVA mapping from a user pointer. If FIXED_IOVA is specified then the
198 * mapping will be established at iova, otherwise a suitable location based on
199 * the reserved and allowed lists will be automatically selected and returned in
200 * iova.
201 *
202 * If IOMMU_IOAS_MAP_FIXED_IOVA is specified then the iova range must currently
203 * be unused, existing IOVA cannot be replaced.
204 */
205struct iommu_ioas_map {
206 __u32 size;
207 __u32 flags;
208 __u32 ioas_id;
209 __u32 __reserved;
210 __aligned_u64 user_va;
211 __aligned_u64 length;
212 __aligned_u64 iova;
213};
214#define IOMMU_IOAS_MAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP)
215
216/**
217 * struct iommu_ioas_copy - ioctl(IOMMU_IOAS_COPY)
218 * @size: sizeof(struct iommu_ioas_copy)
219 * @flags: Combination of enum iommufd_ioas_map_flags
220 * @dst_ioas_id: IOAS ID to change the mapping of
221 * @src_ioas_id: IOAS ID to copy from
222 * @length: Number of bytes to copy and map
223 * @dst_iova: IOVA the mapping was placed at. If IOMMU_IOAS_MAP_FIXED_IOVA is
224 * set then this must be provided as input.
225 * @src_iova: IOVA to start the copy
226 *
227 * Copy an already existing mapping from src_ioas_id and establish it in
228 * dst_ioas_id. The src iova/length must exactly match a range used with
229 * IOMMU_IOAS_MAP.
230 *
231 * This may be used to efficiently clone a subset of an IOAS to another, or as a
232 * kind of 'cache' to speed up mapping. Copy has an efficiency advantage over
233 * establishing equivalent new mappings, as internal resources are shared, and
234 * the kernel will pin the user memory only once.
235 */
236struct iommu_ioas_copy {
237 __u32 size;
238 __u32 flags;
239 __u32 dst_ioas_id;
240 __u32 src_ioas_id;
241 __aligned_u64 length;
242 __aligned_u64 dst_iova;
243 __aligned_u64 src_iova;
244};
245#define IOMMU_IOAS_COPY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_COPY)
246
247/**
248 * struct iommu_ioas_unmap - ioctl(IOMMU_IOAS_UNMAP)
249 * @size: sizeof(struct iommu_ioas_unmap)
250 * @ioas_id: IOAS ID to change the mapping of
251 * @iova: IOVA to start the unmapping at
252 * @length: Number of bytes to unmap, and return back the bytes unmapped
253 *
254 * Unmap an IOVA range. The iova/length must be a superset of a previously
255 * mapped range used with IOMMU_IOAS_MAP or IOMMU_IOAS_COPY. Splitting or
256 * truncating ranges is not allowed. The values 0 to U64_MAX will unmap
257 * everything.
258 */
259struct iommu_ioas_unmap {
260 __u32 size;
261 __u32 ioas_id;
262 __aligned_u64 iova;
263 __aligned_u64 length;
264};
265#define IOMMU_IOAS_UNMAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_UNMAP)
266
267/**
268 * enum iommufd_option - ioctl(IOMMU_OPTION_RLIMIT_MODE) and
269 * ioctl(IOMMU_OPTION_HUGE_PAGES)
270 * @IOMMU_OPTION_RLIMIT_MODE:
271 * Change how RLIMIT_MEMLOCK accounting works. The caller must have privilege
272 * to invoke this. Value 0 (default) is user based accouting, 1 uses process
273 * based accounting. Global option, object_id must be 0
274 * @IOMMU_OPTION_HUGE_PAGES:
275 * Value 1 (default) allows contiguous pages to be combined when generating
276 * iommu mappings. Value 0 disables combining, everything is mapped to
277 * PAGE_SIZE. This can be useful for benchmarking. This is a per-IOAS
278 * option, the object_id must be the IOAS ID.
279 */
280enum iommufd_option {
281 IOMMU_OPTION_RLIMIT_MODE = 0,
282 IOMMU_OPTION_HUGE_PAGES = 1,
283};
284
285/**
286 * enum iommufd_option_ops - ioctl(IOMMU_OPTION_OP_SET) and
287 * ioctl(IOMMU_OPTION_OP_GET)
288 * @IOMMU_OPTION_OP_SET: Set the option's value
289 * @IOMMU_OPTION_OP_GET: Get the option's value
290 */
291enum iommufd_option_ops {
292 IOMMU_OPTION_OP_SET = 0,
293 IOMMU_OPTION_OP_GET = 1,
294};
295
296/**
297 * struct iommu_option - iommu option multiplexer
298 * @size: sizeof(struct iommu_option)
299 * @option_id: One of enum iommufd_option
300 * @op: One of enum iommufd_option_ops
301 * @__reserved: Must be 0
302 * @object_id: ID of the object if required
303 * @val64: Option value to set or value returned on get
304 *
305 * Change a simple option value. This multiplexor allows controlling options
306 * on objects. IOMMU_OPTION_OP_SET will load an option and IOMMU_OPTION_OP_GET
307 * will return the current value.
308 */
309struct iommu_option {
310 __u32 size;
311 __u32 option_id;
312 __u16 op;
313 __u16 __reserved;
314 __u32 object_id;
315 __aligned_u64 val64;
316};
317#define IOMMU_OPTION _IO(IOMMUFD_TYPE, IOMMUFD_CMD_OPTION)
318
319/**
320 * enum iommufd_vfio_ioas_op - IOMMU_VFIO_IOAS_* ioctls
321 * @IOMMU_VFIO_IOAS_GET: Get the current compatibility IOAS
322 * @IOMMU_VFIO_IOAS_SET: Change the current compatibility IOAS
323 * @IOMMU_VFIO_IOAS_CLEAR: Disable VFIO compatibility
324 */
325enum iommufd_vfio_ioas_op {
326 IOMMU_VFIO_IOAS_GET = 0,
327 IOMMU_VFIO_IOAS_SET = 1,
328 IOMMU_VFIO_IOAS_CLEAR = 2,
329};
330
331/**
332 * struct iommu_vfio_ioas - ioctl(IOMMU_VFIO_IOAS)
333 * @size: sizeof(struct iommu_vfio_ioas)
334 * @ioas_id: For IOMMU_VFIO_IOAS_SET the input IOAS ID to set
335 * For IOMMU_VFIO_IOAS_GET will output the IOAS ID
336 * @op: One of enum iommufd_vfio_ioas_op
337 * @__reserved: Must be 0
338 *
339 * The VFIO compatibility support uses a single ioas because VFIO APIs do not
340 * support the ID field. Set or Get the IOAS that VFIO compatibility will use.
341 * When VFIO_GROUP_SET_CONTAINER is used on an iommufd it will get the
342 * compatibility ioas, either by taking what is already set, or auto creating
343 * one. From then on VFIO will continue to use that ioas and is not effected by
344 * this ioctl. SET or CLEAR does not destroy any auto-created IOAS.
345 */
346struct iommu_vfio_ioas {
347 __u32 size;
348 __u32 ioas_id;
349 __u16 op;
350 __u16 __reserved;
351};
352#define IOMMU_VFIO_IOAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VFIO_IOAS)
353
354/**
355 * enum iommufd_hwpt_alloc_flags - Flags for HWPT allocation
356 * @IOMMU_HWPT_ALLOC_NEST_PARENT: If set, allocate a HWPT that can serve as
357 * the parent HWPT in a nesting configuration.
358 * @IOMMU_HWPT_ALLOC_DIRTY_TRACKING: Dirty tracking support for device IOMMU is
359 * enforced on device attachment
360 * @IOMMU_HWPT_FAULT_ID_VALID: The fault_id field of hwpt allocation data is
361 * valid.
362 */
363enum iommufd_hwpt_alloc_flags {
364 IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0,
365 IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1,
366 IOMMU_HWPT_FAULT_ID_VALID = 1 << 2,
367};
368
369/**
370 * enum iommu_hwpt_vtd_s1_flags - Intel VT-d stage-1 page table
371 * entry attributes
372 * @IOMMU_VTD_S1_SRE: Supervisor request
373 * @IOMMU_VTD_S1_EAFE: Extended access enable
374 * @IOMMU_VTD_S1_WPE: Write protect enable
375 */
376enum iommu_hwpt_vtd_s1_flags {
377 IOMMU_VTD_S1_SRE = 1 << 0,
378 IOMMU_VTD_S1_EAFE = 1 << 1,
379 IOMMU_VTD_S1_WPE = 1 << 2,
380};
381
382/**
383 * struct iommu_hwpt_vtd_s1 - Intel VT-d stage-1 page table
384 * info (IOMMU_HWPT_DATA_VTD_S1)
385 * @flags: Combination of enum iommu_hwpt_vtd_s1_flags
386 * @pgtbl_addr: The base address of the stage-1 page table.
387 * @addr_width: The address width of the stage-1 page table
388 * @__reserved: Must be 0
389 */
390struct iommu_hwpt_vtd_s1 {
391 __aligned_u64 flags;
392 __aligned_u64 pgtbl_addr;
393 __u32 addr_width;
394 __u32 __reserved;
395};
396
397/**
398 * enum iommu_hwpt_data_type - IOMMU HWPT Data Type
399 * @IOMMU_HWPT_DATA_NONE: no data
400 * @IOMMU_HWPT_DATA_VTD_S1: Intel VT-d stage-1 page table
401 */
402enum iommu_hwpt_data_type {
403 IOMMU_HWPT_DATA_NONE = 0,
404 IOMMU_HWPT_DATA_VTD_S1 = 1,
405};
406
407/**
408 * struct iommu_hwpt_alloc - ioctl(IOMMU_HWPT_ALLOC)
409 * @size: sizeof(struct iommu_hwpt_alloc)
410 * @flags: Combination of enum iommufd_hwpt_alloc_flags
411 * @dev_id: The device to allocate this HWPT for
412 * @pt_id: The IOAS or HWPT to connect this HWPT to
413 * @out_hwpt_id: The ID of the new HWPT
414 * @__reserved: Must be 0
415 * @data_type: One of enum iommu_hwpt_data_type
416 * @data_len: Length of the type specific data
417 * @data_uptr: User pointer to the type specific data
418 * @fault_id: The ID of IOMMUFD_FAULT object. Valid only if flags field of
419 * IOMMU_HWPT_FAULT_ID_VALID is set.
420 * @__reserved2: Padding to 64-bit alignment. Must be 0.
421 *
422 * Explicitly allocate a hardware page table object. This is the same object
423 * type that is returned by iommufd_device_attach() and represents the
424 * underlying iommu driver's iommu_domain kernel object.
425 *
426 * A kernel-managed HWPT will be created with the mappings from the given
427 * IOAS via the @pt_id. The @data_type for this allocation must be set to
428 * IOMMU_HWPT_DATA_NONE. The HWPT can be allocated as a parent HWPT for a
429 * nesting configuration by passing IOMMU_HWPT_ALLOC_NEST_PARENT via @flags.
430 *
431 * A user-managed nested HWPT will be created from a given parent HWPT via
432 * @pt_id, in which the parent HWPT must be allocated previously via the
433 * same ioctl from a given IOAS (@pt_id). In this case, the @data_type
434 * must be set to a pre-defined type corresponding to an I/O page table
435 * type supported by the underlying IOMMU hardware.
436 *
437 * If the @data_type is set to IOMMU_HWPT_DATA_NONE, @data_len and
438 * @data_uptr should be zero. Otherwise, both @data_len and @data_uptr
439 * must be given.
440 */
441struct iommu_hwpt_alloc {
442 __u32 size;
443 __u32 flags;
444 __u32 dev_id;
445 __u32 pt_id;
446 __u32 out_hwpt_id;
447 __u32 __reserved;
448 __u32 data_type;
449 __u32 data_len;
450 __aligned_u64 data_uptr;
451 __u32 fault_id;
452 __u32 __reserved2;
453};
454#define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC)
455
456/**
457 * enum iommu_hw_info_vtd_flags - Flags for VT-d hw_info
458 * @IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17: If set, disallow read-only mappings
459 * on a nested_parent domain.
460 * https://www.intel.com/content/www/us/en/content-details/772415/content-details.html
461 */
462enum iommu_hw_info_vtd_flags {
463 IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17 = 1 << 0,
464};
465
466/**
467 * struct iommu_hw_info_vtd - Intel VT-d hardware information
468 *
469 * @flags: Combination of enum iommu_hw_info_vtd_flags
470 * @__reserved: Must be 0
471 *
472 * @cap_reg: Value of Intel VT-d capability register defined in VT-d spec
473 * section 11.4.2 Capability Register.
474 * @ecap_reg: Value of Intel VT-d capability register defined in VT-d spec
475 * section 11.4.3 Extended Capability Register.
476 *
477 * User needs to understand the Intel VT-d specification to decode the
478 * register value.
479 */
480struct iommu_hw_info_vtd {
481 __u32 flags;
482 __u32 __reserved;
483 __aligned_u64 cap_reg;
484 __aligned_u64 ecap_reg;
485};
486
487/**
488 * enum iommu_hw_info_type - IOMMU Hardware Info Types
489 * @IOMMU_HW_INFO_TYPE_NONE: Used by the drivers that do not report hardware
490 * info
491 * @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type
492 */
493enum iommu_hw_info_type {
494 IOMMU_HW_INFO_TYPE_NONE = 0,
495 IOMMU_HW_INFO_TYPE_INTEL_VTD = 1,
496};
497
498/**
499 * enum iommufd_hw_capabilities
500 * @IOMMU_HW_CAP_DIRTY_TRACKING: IOMMU hardware support for dirty tracking
501 * If available, it means the following APIs
502 * are supported:
503 *
504 * IOMMU_HWPT_GET_DIRTY_BITMAP
505 * IOMMU_HWPT_SET_DIRTY_TRACKING
506 *
507 */
508enum iommufd_hw_capabilities {
509 IOMMU_HW_CAP_DIRTY_TRACKING = 1 << 0,
510};
511
512/**
513 * struct iommu_hw_info - ioctl(IOMMU_GET_HW_INFO)
514 * @size: sizeof(struct iommu_hw_info)
515 * @flags: Must be 0
516 * @dev_id: The device bound to the iommufd
517 * @data_len: Input the length of a user buffer in bytes. Output the length of
518 * data that kernel supports
519 * @data_uptr: User pointer to a user-space buffer used by the kernel to fill
520 * the iommu type specific hardware information data
521 * @out_data_type: Output the iommu hardware info type as defined in the enum
522 * iommu_hw_info_type.
523 * @out_capabilities: Output the generic iommu capability info type as defined
524 * in the enum iommu_hw_capabilities.
525 * @__reserved: Must be 0
526 *
527 * Query an iommu type specific hardware information data from an iommu behind
528 * a given device that has been bound to iommufd. This hardware info data will
529 * be used to sync capabilities between the virtual iommu and the physical
530 * iommu, e.g. a nested translation setup needs to check the hardware info, so
531 * a guest stage-1 page table can be compatible with the physical iommu.
532 *
533 * To capture an iommu type specific hardware information data, @data_uptr and
534 * its length @data_len must be provided. Trailing bytes will be zeroed if the
535 * user buffer is larger than the data that kernel has. Otherwise, kernel only
536 * fills the buffer using the given length in @data_len. If the ioctl succeeds,
537 * @data_len will be updated to the length that kernel actually supports,
538 * @out_data_type will be filled to decode the data filled in the buffer
539 * pointed by @data_uptr. Input @data_len == zero is allowed.
540 */
541struct iommu_hw_info {
542 __u32 size;
543 __u32 flags;
544 __u32 dev_id;
545 __u32 data_len;
546 __aligned_u64 data_uptr;
547 __u32 out_data_type;
548 __u32 __reserved;
549 __aligned_u64 out_capabilities;
550};
551#define IOMMU_GET_HW_INFO _IO(IOMMUFD_TYPE, IOMMUFD_CMD_GET_HW_INFO)
552
553/*
554 * enum iommufd_hwpt_set_dirty_tracking_flags - Flags for steering dirty
555 * tracking
556 * @IOMMU_HWPT_DIRTY_TRACKING_ENABLE: Enable dirty tracking
557 */
558enum iommufd_hwpt_set_dirty_tracking_flags {
559 IOMMU_HWPT_DIRTY_TRACKING_ENABLE = 1,
560};
561
562/**
563 * struct iommu_hwpt_set_dirty_tracking - ioctl(IOMMU_HWPT_SET_DIRTY_TRACKING)
564 * @size: sizeof(struct iommu_hwpt_set_dirty_tracking)
565 * @flags: Combination of enum iommufd_hwpt_set_dirty_tracking_flags
566 * @hwpt_id: HW pagetable ID that represents the IOMMU domain
567 * @__reserved: Must be 0
568 *
569 * Toggle dirty tracking on an HW pagetable.
570 */
571struct iommu_hwpt_set_dirty_tracking {
572 __u32 size;
573 __u32 flags;
574 __u32 hwpt_id;
575 __u32 __reserved;
576};
577#define IOMMU_HWPT_SET_DIRTY_TRACKING _IO(IOMMUFD_TYPE, \
578 IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING)
579
580/**
581 * enum iommufd_hwpt_get_dirty_bitmap_flags - Flags for getting dirty bits
582 * @IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR: Just read the PTEs without clearing
583 * any dirty bits metadata. This flag
584 * can be passed in the expectation
585 * where the next operation is an unmap
586 * of the same IOVA range.
587 *
588 */
589enum iommufd_hwpt_get_dirty_bitmap_flags {
590 IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR = 1,
591};
592
593/**
594 * struct iommu_hwpt_get_dirty_bitmap - ioctl(IOMMU_HWPT_GET_DIRTY_BITMAP)
595 * @size: sizeof(struct iommu_hwpt_get_dirty_bitmap)
596 * @hwpt_id: HW pagetable ID that represents the IOMMU domain
597 * @flags: Combination of enum iommufd_hwpt_get_dirty_bitmap_flags
598 * @__reserved: Must be 0
599 * @iova: base IOVA of the bitmap first bit
600 * @length: IOVA range size
601 * @page_size: page size granularity of each bit in the bitmap
602 * @data: bitmap where to set the dirty bits. The bitmap bits each
603 * represent a page_size which you deviate from an arbitrary iova.
604 *
605 * Checking a given IOVA is dirty:
606 *
607 * data[(iova / page_size) / 64] & (1ULL << ((iova / page_size) % 64))
608 *
609 * Walk the IOMMU pagetables for a given IOVA range to return a bitmap
610 * with the dirty IOVAs. In doing so it will also by default clear any
611 * dirty bit metadata set in the IOPTE.
612 */
613struct iommu_hwpt_get_dirty_bitmap {
614 __u32 size;
615 __u32 hwpt_id;
616 __u32 flags;
617 __u32 __reserved;
618 __aligned_u64 iova;
619 __aligned_u64 length;
620 __aligned_u64 page_size;
621 __aligned_u64 data;
622};
623#define IOMMU_HWPT_GET_DIRTY_BITMAP _IO(IOMMUFD_TYPE, \
624 IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP)
625
626/**
627 * enum iommu_hwpt_invalidate_data_type - IOMMU HWPT Cache Invalidation
628 * Data Type
629 * @IOMMU_HWPT_INVALIDATE_DATA_VTD_S1: Invalidation data for VTD_S1
630 */
631enum iommu_hwpt_invalidate_data_type {
632 IOMMU_HWPT_INVALIDATE_DATA_VTD_S1 = 0,
633};
634
635/**
636 * enum iommu_hwpt_vtd_s1_invalidate_flags - Flags for Intel VT-d
637 * stage-1 cache invalidation
638 * @IOMMU_VTD_INV_FLAGS_LEAF: Indicates whether the invalidation applies
639 * to all-levels page structure cache or just
640 * the leaf PTE cache.
641 */
642enum iommu_hwpt_vtd_s1_invalidate_flags {
643 IOMMU_VTD_INV_FLAGS_LEAF = 1 << 0,
644};
645
646/**
647 * struct iommu_hwpt_vtd_s1_invalidate - Intel VT-d cache invalidation
648 * (IOMMU_HWPT_INVALIDATE_DATA_VTD_S1)
649 * @addr: The start address of the range to be invalidated. It needs to
650 * be 4KB aligned.
651 * @npages: Number of contiguous 4K pages to be invalidated.
652 * @flags: Combination of enum iommu_hwpt_vtd_s1_invalidate_flags
653 * @__reserved: Must be 0
654 *
655 * The Intel VT-d specific invalidation data for user-managed stage-1 cache
656 * invalidation in nested translation. Userspace uses this structure to
657 * tell the impacted cache scope after modifying the stage-1 page table.
658 *
659 * Invalidating all the caches related to the page table by setting @addr
660 * to be 0 and @npages to be U64_MAX.
661 *
662 * The device TLB will be invalidated automatically if ATS is enabled.
663 */
664struct iommu_hwpt_vtd_s1_invalidate {
665 __aligned_u64 addr;
666 __aligned_u64 npages;
667 __u32 flags;
668 __u32 __reserved;
669};
670
671/**
672 * struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE)
673 * @size: sizeof(struct iommu_hwpt_invalidate)
674 * @hwpt_id: ID of a nested HWPT for cache invalidation
675 * @data_uptr: User pointer to an array of driver-specific cache invalidation
676 * data.
677 * @data_type: One of enum iommu_hwpt_invalidate_data_type, defining the data
678 * type of all the entries in the invalidation request array. It
679 * should be a type supported by the hwpt pointed by @hwpt_id.
680 * @entry_len: Length (in bytes) of a request entry in the request array
681 * @entry_num: Input the number of cache invalidation requests in the array.
682 * Output the number of requests successfully handled by kernel.
683 * @__reserved: Must be 0.
684 *
685 * Invalidate the iommu cache for user-managed page table. Modifications on a
686 * user-managed page table should be followed by this operation to sync cache.
687 * Each ioctl can support one or more cache invalidation requests in the array
688 * that has a total size of @entry_len * @entry_num.
689 *
690 * An empty invalidation request array by setting @entry_num==0 is allowed, and
691 * @entry_len and @data_uptr would be ignored in this case. This can be used to
692 * check if the given @data_type is supported or not by kernel.
693 */
694struct iommu_hwpt_invalidate {
695 __u32 size;
696 __u32 hwpt_id;
697 __aligned_u64 data_uptr;
698 __u32 data_type;
699 __u32 entry_len;
700 __u32 entry_num;
701 __u32 __reserved;
702};
703#define IOMMU_HWPT_INVALIDATE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_INVALIDATE)
704
705/**
706 * enum iommu_hwpt_pgfault_flags - flags for struct iommu_hwpt_pgfault
707 * @IOMMU_PGFAULT_FLAGS_PASID_VALID: The pasid field of the fault data is
708 * valid.
709 * @IOMMU_PGFAULT_FLAGS_LAST_PAGE: It's the last fault of a fault group.
710 */
711enum iommu_hwpt_pgfault_flags {
712 IOMMU_PGFAULT_FLAGS_PASID_VALID = (1 << 0),
713 IOMMU_PGFAULT_FLAGS_LAST_PAGE = (1 << 1),
714};
715
716/**
717 * enum iommu_hwpt_pgfault_perm - perm bits for struct iommu_hwpt_pgfault
718 * @IOMMU_PGFAULT_PERM_READ: request for read permission
719 * @IOMMU_PGFAULT_PERM_WRITE: request for write permission
720 * @IOMMU_PGFAULT_PERM_EXEC: (PCIE 10.4.1) request with a PASID that has the
721 * Execute Requested bit set in PASID TLP Prefix.
722 * @IOMMU_PGFAULT_PERM_PRIV: (PCIE 10.4.1) request with a PASID that has the
723 * Privileged Mode Requested bit set in PASID TLP
724 * Prefix.
725 */
726enum iommu_hwpt_pgfault_perm {
727 IOMMU_PGFAULT_PERM_READ = (1 << 0),
728 IOMMU_PGFAULT_PERM_WRITE = (1 << 1),
729 IOMMU_PGFAULT_PERM_EXEC = (1 << 2),
730 IOMMU_PGFAULT_PERM_PRIV = (1 << 3),
731};
732
733/**
734 * struct iommu_hwpt_pgfault - iommu page fault data
735 * @flags: Combination of enum iommu_hwpt_pgfault_flags
736 * @dev_id: id of the originated device
737 * @pasid: Process Address Space ID
738 * @grpid: Page Request Group Index
739 * @perm: Combination of enum iommu_hwpt_pgfault_perm
740 * @addr: Fault address
741 * @length: a hint of how much data the requestor is expecting to fetch. For
742 * example, if the PRI initiator knows it is going to do a 10MB
743 * transfer, it could fill in 10MB and the OS could pre-fault in
744 * 10MB of IOVA. It's default to 0 if there's no such hint.
745 * @cookie: kernel-managed cookie identifying a group of fault messages. The
746 * cookie number encoded in the last page fault of the group should
747 * be echoed back in the response message.
748 */
749struct iommu_hwpt_pgfault {
750 __u32 flags;
751 __u32 dev_id;
752 __u32 pasid;
753 __u32 grpid;
754 __u32 perm;
755 __u64 addr;
756 __u32 length;
757 __u32 cookie;
758};
759
760/**
761 * enum iommufd_page_response_code - Return status of fault handlers
762 * @IOMMUFD_PAGE_RESP_SUCCESS: Fault has been handled and the page tables
763 * populated, retry the access. This is the
764 * "Success" defined in PCI 10.4.2.1.
765 * @IOMMUFD_PAGE_RESP_INVALID: Could not handle this fault, don't retry the
766 * access. This is the "Invalid Request" in PCI
767 * 10.4.2.1.
768 */
769enum iommufd_page_response_code {
770 IOMMUFD_PAGE_RESP_SUCCESS = 0,
771 IOMMUFD_PAGE_RESP_INVALID = 1,
772};
773
774/**
775 * struct iommu_hwpt_page_response - IOMMU page fault response
776 * @cookie: The kernel-managed cookie reported in the fault message.
777 * @code: One of response code in enum iommufd_page_response_code.
778 */
779struct iommu_hwpt_page_response {
780 __u32 cookie;
781 __u32 code;
782};
783
784/**
785 * struct iommu_fault_alloc - ioctl(IOMMU_FAULT_QUEUE_ALLOC)
786 * @size: sizeof(struct iommu_fault_alloc)
787 * @flags: Must be 0
788 * @out_fault_id: The ID of the new FAULT
789 * @out_fault_fd: The fd of the new FAULT
790 *
791 * Explicitly allocate a fault handling object.
792 */
793struct iommu_fault_alloc {
794 __u32 size;
795 __u32 flags;
796 __u32 out_fault_id;
797 __u32 out_fault_fd;
798};
799#define IOMMU_FAULT_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_FAULT_QUEUE_ALLOC)
800#endif