Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

iommu/vt-d: Add IOMMU perfmon support

Implement the IOMMU performance monitor capability, which supports the
collection of information about key events occurring during operation of
the remapping hardware, to aid performance tuning and debug.

The IOMMU perfmon support is implemented as part of the IOMMU driver and
interfaces with the Linux perf subsystem.

The IOMMU PMU has the following unique features compared with the other
PMUs.
- Support counting. Not support sampling.
- Does not support per-thread counting. The scope is system-wide.
- Support per-counter capability register. The event constraints can be
enumerated.
- The available event and event group can also be enumerated.
- Extra Enhanced Commands are introduced to control the counters.

Add a new variable, struct iommu_pmu *pmu, to in the struct intel_iommu
to track the PMU related information.

Add iommu_pmu_register() and iommu_pmu_unregister() to register and
unregister a IOMMU PMU. The register function setup the IOMMU PMU ops
and invoke the standard perf_pmu_register() interface to register a PMU
in the perf subsystem. This patch only exposes the functions. The
following patch will enable them in the IOMMU driver.

The IOMMU PMUs can be found under /sys/bus/event_source/devices/dmar*

The available filters and event format can be found at the format folder

$ ls /sys/bus/event_source/devices/dmar1/format/
event event_group filter_ats filter_ats_en filter_page_table
filter_page_table_en

The supported events can be found at the events folder

$ ls /sys/bus/event_source/devices/dmar1/events/
ats_blocked fs_nonleaf_hit int_cache_hit_posted
iommu_mem_blocked iotlb_hit pasid_cache_lookup ss_nonleaf_hit
ctxt_cache_hit fs_nonleaf_lookup int_cache_lookup
iommu_mrds iotlb_lookup pg_req_posted ss_nonleaf_lookup
ctxt_cache_lookup int_cache_hit_nonposted iommu_clocks
iommu_requests pasid_cache_hit pw_occupancy

The command below illustrates filter usage with a simple example.

$ perf stat -e dmar1/iommu_requests,filter_ats_en=0x1,filter_ats=0x1/
-a sleep 1

Performance counter stats for 'system wide':

368,947 dmar1/iommu_requests,filter_ats_en=0x1,filter_ats=0x1/

1.002592074 seconds time elapsed

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Link: https://lore.kernel.org/r/20230128200428.1459118-5-kan.liang@linux.intel.com
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>

authored by

Kan Liang and committed by
Joerg Roedel
7232ab8b dc578758

+594
+29
Documentation/ABI/testing/sysfs-bus-event_source-devices-iommu
··· 1 + What: /sys/bus/event_source/devices/dmar*/format 2 + Date: Jan 2023 3 + KernelVersion: 6.3 4 + Contact: Kan Liang <kan.liang@linux.intel.com> 5 + Description: Read-only. Attribute group to describe the magic bits 6 + that go into perf_event_attr.config, 7 + perf_event_attr.config1 or perf_event_attr.config2 for 8 + the IOMMU pmu. (See also 9 + ABI/testing/sysfs-bus-event_source-devices-format). 10 + 11 + Each attribute in this group defines a bit range in 12 + perf_event_attr.config, perf_event_attr.config1, 13 + or perf_event_attr.config2. All supported attributes 14 + are listed below (See the VT-d Spec 4.0 for possible 15 + attribute values):: 16 + 17 + event = "config:0-27" - event ID 18 + event_group = "config:28-31" - event group ID 19 + 20 + filter_requester_en = "config1:0" - Enable Requester ID filter 21 + filter_domain_en = "config1:1" - Enable Domain ID filter 22 + filter_pasid_en = "config1:2" - Enable PASID filter 23 + filter_ats_en = "config1:3" - Enable Address Type filter 24 + filter_page_table_en= "config1:4" - Enable Page Table Level filter 25 + filter_requester_id = "config1:16-31" - Requester ID filter 26 + filter_domain = "config1:32-47" - Domain ID filter 27 + filter_pasid = "config2:0-21" - PASID filter 28 + filter_ats = "config2:24-28" - Address Type filter 29 + filter_page_table = "config2:32-36" - Page Table Level filter
+15
drivers/iommu/intel/iommu.h
··· 22 22 #include <linux/ioasid.h> 23 23 #include <linux/bitfield.h> 24 24 #include <linux/xarray.h> 25 + #include <linux/perf_event.h> 25 26 26 27 #include <asm/cacheflush.h> 27 28 #include <asm/iommu.h> ··· 607 606 iommu core */ 608 607 }; 609 608 609 + /* 610 + * In theory, the VT-d 4.0 spec can support up to 2 ^ 16 counters. 611 + * But in practice, there are only 14 counters for the existing 612 + * platform. Setting the max number of counters to 64 should be good 613 + * enough for a long time. Also, supporting more than 64 counters 614 + * requires more extras, e.g., extra freeze and overflow registers, 615 + * which is not necessary for now. 616 + */ 617 + #define IOMMU_PMU_IDX_MAX 64 618 + 610 619 struct iommu_pmu { 611 620 struct intel_iommu *iommu; 612 621 u32 num_cntr; /* Number of counters */ ··· 631 620 632 621 u64 *evcap; /* Indicates all supported events */ 633 622 u32 **cntr_evcap; /* Supported events of each counter. */ 623 + 624 + struct pmu pmu; 625 + DECLARE_BITMAP(used_mask, IOMMU_PMU_IDX_MAX); 626 + struct perf_event *event_list[IOMMU_PMU_IDX_MAX]; 634 627 }; 635 628 636 629 struct intel_iommu {
+526
drivers/iommu/intel/perfmon.c
··· 10 10 #include "iommu.h" 11 11 #include "perfmon.h" 12 12 13 + PMU_FORMAT_ATTR(event, "config:0-27"); /* ES: Events Select */ 14 + PMU_FORMAT_ATTR(event_group, "config:28-31"); /* EGI: Event Group Index */ 15 + 16 + static struct attribute *iommu_pmu_format_attrs[] = { 17 + &format_attr_event_group.attr, 18 + &format_attr_event.attr, 19 + NULL 20 + }; 21 + 22 + static struct attribute_group iommu_pmu_format_attr_group = { 23 + .name = "format", 24 + .attrs = iommu_pmu_format_attrs, 25 + }; 26 + 27 + /* The available events are added in attr_update later */ 28 + static struct attribute *attrs_empty[] = { 29 + NULL 30 + }; 31 + 32 + static struct attribute_group iommu_pmu_events_attr_group = { 33 + .name = "events", 34 + .attrs = attrs_empty, 35 + }; 36 + 37 + static const struct attribute_group *iommu_pmu_attr_groups[] = { 38 + &iommu_pmu_format_attr_group, 39 + &iommu_pmu_events_attr_group, 40 + NULL 41 + }; 42 + 43 + static inline struct iommu_pmu *dev_to_iommu_pmu(struct device *dev) 44 + { 45 + /* 46 + * The perf_event creates its own dev for each PMU. 47 + * See pmu_dev_alloc() 48 + */ 49 + return container_of(dev_get_drvdata(dev), struct iommu_pmu, pmu); 50 + } 51 + 52 + #define IOMMU_PMU_ATTR(_name, _format, _filter) \ 53 + PMU_FORMAT_ATTR(_name, _format); \ 54 + \ 55 + static struct attribute *_name##_attr[] = { \ 56 + &format_attr_##_name.attr, \ 57 + NULL \ 58 + }; \ 59 + \ 60 + static umode_t \ 61 + _name##_is_visible(struct kobject *kobj, struct attribute *attr, int i) \ 62 + { \ 63 + struct device *dev = kobj_to_dev(kobj); \ 64 + struct iommu_pmu *iommu_pmu = dev_to_iommu_pmu(dev); \ 65 + \ 66 + if (!iommu_pmu) \ 67 + return 0; \ 68 + return (iommu_pmu->filter & _filter) ? attr->mode : 0; \ 69 + } \ 70 + \ 71 + static struct attribute_group _name = { \ 72 + .name = "format", \ 73 + .attrs = _name##_attr, \ 74 + .is_visible = _name##_is_visible, \ 75 + }; 76 + 77 + IOMMU_PMU_ATTR(filter_requester_id_en, "config1:0", IOMMU_PMU_FILTER_REQUESTER_ID); 78 + IOMMU_PMU_ATTR(filter_domain_en, "config1:1", IOMMU_PMU_FILTER_DOMAIN); 79 + IOMMU_PMU_ATTR(filter_pasid_en, "config1:2", IOMMU_PMU_FILTER_PASID); 80 + IOMMU_PMU_ATTR(filter_ats_en, "config1:3", IOMMU_PMU_FILTER_ATS); 81 + IOMMU_PMU_ATTR(filter_page_table_en, "config1:4", IOMMU_PMU_FILTER_PAGE_TABLE); 82 + IOMMU_PMU_ATTR(filter_requester_id, "config1:16-31", IOMMU_PMU_FILTER_REQUESTER_ID); 83 + IOMMU_PMU_ATTR(filter_domain, "config1:32-47", IOMMU_PMU_FILTER_DOMAIN); 84 + IOMMU_PMU_ATTR(filter_pasid, "config2:0-21", IOMMU_PMU_FILTER_PASID); 85 + IOMMU_PMU_ATTR(filter_ats, "config2:24-28", IOMMU_PMU_FILTER_ATS); 86 + IOMMU_PMU_ATTR(filter_page_table, "config2:32-36", IOMMU_PMU_FILTER_PAGE_TABLE); 87 + 88 + #define iommu_pmu_en_requester_id(e) ((e) & 0x1) 89 + #define iommu_pmu_en_domain(e) (((e) >> 1) & 0x1) 90 + #define iommu_pmu_en_pasid(e) (((e) >> 2) & 0x1) 91 + #define iommu_pmu_en_ats(e) (((e) >> 3) & 0x1) 92 + #define iommu_pmu_en_page_table(e) (((e) >> 4) & 0x1) 93 + #define iommu_pmu_get_requester_id(filter) (((filter) >> 16) & 0xffff) 94 + #define iommu_pmu_get_domain(filter) (((filter) >> 32) & 0xffff) 95 + #define iommu_pmu_get_pasid(filter) ((filter) & 0x3fffff) 96 + #define iommu_pmu_get_ats(filter) (((filter) >> 24) & 0x1f) 97 + #define iommu_pmu_get_page_table(filter) (((filter) >> 32) & 0x1f) 98 + 99 + #define iommu_pmu_set_filter(_name, _config, _filter, _idx, _econfig) \ 100 + { \ 101 + if ((iommu_pmu->filter & _filter) && iommu_pmu_en_##_name(_econfig)) { \ 102 + dmar_writel(iommu_pmu->cfg_reg + _idx * IOMMU_PMU_CFG_OFFSET + \ 103 + IOMMU_PMU_CFG_SIZE + \ 104 + (ffs(_filter) - 1) * IOMMU_PMU_CFG_FILTERS_OFFSET, \ 105 + iommu_pmu_get_##_name(_config) | IOMMU_PMU_FILTER_EN);\ 106 + } \ 107 + } 108 + 109 + #define iommu_pmu_clear_filter(_filter, _idx) \ 110 + { \ 111 + if (iommu_pmu->filter & _filter) { \ 112 + dmar_writel(iommu_pmu->cfg_reg + _idx * IOMMU_PMU_CFG_OFFSET + \ 113 + IOMMU_PMU_CFG_SIZE + \ 114 + (ffs(_filter) - 1) * IOMMU_PMU_CFG_FILTERS_OFFSET, \ 115 + 0); \ 116 + } \ 117 + } 118 + 119 + /* 120 + * Define the event attr related functions 121 + * Input: _name: event attr name 122 + * _string: string of the event in sysfs 123 + * _g_idx: event group encoding 124 + * _event: event encoding 125 + */ 126 + #define IOMMU_PMU_EVENT_ATTR(_name, _string, _g_idx, _event) \ 127 + PMU_EVENT_ATTR_STRING(_name, event_attr_##_name, _string) \ 128 + \ 129 + static struct attribute *_name##_attr[] = { \ 130 + &event_attr_##_name.attr.attr, \ 131 + NULL \ 132 + }; \ 133 + \ 134 + static umode_t \ 135 + _name##_is_visible(struct kobject *kobj, struct attribute *attr, int i) \ 136 + { \ 137 + struct device *dev = kobj_to_dev(kobj); \ 138 + struct iommu_pmu *iommu_pmu = dev_to_iommu_pmu(dev); \ 139 + \ 140 + if (!iommu_pmu) \ 141 + return 0; \ 142 + return (iommu_pmu->evcap[_g_idx] & _event) ? attr->mode : 0; \ 143 + } \ 144 + \ 145 + static struct attribute_group _name = { \ 146 + .name = "events", \ 147 + .attrs = _name##_attr, \ 148 + .is_visible = _name##_is_visible, \ 149 + }; 150 + 151 + IOMMU_PMU_EVENT_ATTR(iommu_clocks, "event_group=0x0,event=0x001", 0x0, 0x001) 152 + IOMMU_PMU_EVENT_ATTR(iommu_requests, "event_group=0x0,event=0x002", 0x0, 0x002) 153 + IOMMU_PMU_EVENT_ATTR(pw_occupancy, "event_group=0x0,event=0x004", 0x0, 0x004) 154 + IOMMU_PMU_EVENT_ATTR(ats_blocked, "event_group=0x0,event=0x008", 0x0, 0x008) 155 + IOMMU_PMU_EVENT_ATTR(iommu_mrds, "event_group=0x1,event=0x001", 0x1, 0x001) 156 + IOMMU_PMU_EVENT_ATTR(iommu_mem_blocked, "event_group=0x1,event=0x020", 0x1, 0x020) 157 + IOMMU_PMU_EVENT_ATTR(pg_req_posted, "event_group=0x1,event=0x040", 0x1, 0x040) 158 + IOMMU_PMU_EVENT_ATTR(ctxt_cache_lookup, "event_group=0x2,event=0x001", 0x2, 0x001) 159 + IOMMU_PMU_EVENT_ATTR(ctxt_cache_hit, "event_group=0x2,event=0x002", 0x2, 0x002) 160 + IOMMU_PMU_EVENT_ATTR(pasid_cache_lookup, "event_group=0x2,event=0x004", 0x2, 0x004) 161 + IOMMU_PMU_EVENT_ATTR(pasid_cache_hit, "event_group=0x2,event=0x008", 0x2, 0x008) 162 + IOMMU_PMU_EVENT_ATTR(ss_nonleaf_lookup, "event_group=0x2,event=0x010", 0x2, 0x010) 163 + IOMMU_PMU_EVENT_ATTR(ss_nonleaf_hit, "event_group=0x2,event=0x020", 0x2, 0x020) 164 + IOMMU_PMU_EVENT_ATTR(fs_nonleaf_lookup, "event_group=0x2,event=0x040", 0x2, 0x040) 165 + IOMMU_PMU_EVENT_ATTR(fs_nonleaf_hit, "event_group=0x2,event=0x080", 0x2, 0x080) 166 + IOMMU_PMU_EVENT_ATTR(hpt_nonleaf_lookup, "event_group=0x2,event=0x100", 0x2, 0x100) 167 + IOMMU_PMU_EVENT_ATTR(hpt_nonleaf_hit, "event_group=0x2,event=0x200", 0x2, 0x200) 168 + IOMMU_PMU_EVENT_ATTR(iotlb_lookup, "event_group=0x3,event=0x001", 0x3, 0x001) 169 + IOMMU_PMU_EVENT_ATTR(iotlb_hit, "event_group=0x3,event=0x002", 0x3, 0x002) 170 + IOMMU_PMU_EVENT_ATTR(hpt_leaf_lookup, "event_group=0x3,event=0x004", 0x3, 0x004) 171 + IOMMU_PMU_EVENT_ATTR(hpt_leaf_hit, "event_group=0x3,event=0x008", 0x3, 0x008) 172 + IOMMU_PMU_EVENT_ATTR(int_cache_lookup, "event_group=0x4,event=0x001", 0x4, 0x001) 173 + IOMMU_PMU_EVENT_ATTR(int_cache_hit_nonposted, "event_group=0x4,event=0x002", 0x4, 0x002) 174 + IOMMU_PMU_EVENT_ATTR(int_cache_hit_posted, "event_group=0x4,event=0x004", 0x4, 0x004) 175 + 176 + static const struct attribute_group *iommu_pmu_attr_update[] = { 177 + &filter_requester_id_en, 178 + &filter_domain_en, 179 + &filter_pasid_en, 180 + &filter_ats_en, 181 + &filter_page_table_en, 182 + &filter_requester_id, 183 + &filter_domain, 184 + &filter_pasid, 185 + &filter_ats, 186 + &filter_page_table, 187 + &iommu_clocks, 188 + &iommu_requests, 189 + &pw_occupancy, 190 + &ats_blocked, 191 + &iommu_mrds, 192 + &iommu_mem_blocked, 193 + &pg_req_posted, 194 + &ctxt_cache_lookup, 195 + &ctxt_cache_hit, 196 + &pasid_cache_lookup, 197 + &pasid_cache_hit, 198 + &ss_nonleaf_lookup, 199 + &ss_nonleaf_hit, 200 + &fs_nonleaf_lookup, 201 + &fs_nonleaf_hit, 202 + &hpt_nonleaf_lookup, 203 + &hpt_nonleaf_hit, 204 + &iotlb_lookup, 205 + &iotlb_hit, 206 + &hpt_leaf_lookup, 207 + &hpt_leaf_hit, 208 + &int_cache_lookup, 209 + &int_cache_hit_nonposted, 210 + &int_cache_hit_posted, 211 + NULL 212 + }; 213 + 214 + static inline void __iomem * 215 + iommu_event_base(struct iommu_pmu *iommu_pmu, int idx) 216 + { 217 + return iommu_pmu->cntr_reg + idx * iommu_pmu->cntr_stride; 218 + } 219 + 220 + static inline void __iomem * 221 + iommu_config_base(struct iommu_pmu *iommu_pmu, int idx) 222 + { 223 + return iommu_pmu->cfg_reg + idx * IOMMU_PMU_CFG_OFFSET; 224 + } 225 + 226 + static inline struct iommu_pmu *iommu_event_to_pmu(struct perf_event *event) 227 + { 228 + return container_of(event->pmu, struct iommu_pmu, pmu); 229 + } 230 + 231 + static inline u64 iommu_event_config(struct perf_event *event) 232 + { 233 + u64 config = event->attr.config; 234 + 235 + return (iommu_event_select(config) << IOMMU_EVENT_CFG_ES_SHIFT) | 236 + (iommu_event_group(config) << IOMMU_EVENT_CFG_EGI_SHIFT) | 237 + IOMMU_EVENT_CFG_INT; 238 + } 239 + 240 + static inline bool is_iommu_pmu_event(struct iommu_pmu *iommu_pmu, 241 + struct perf_event *event) 242 + { 243 + return event->pmu == &iommu_pmu->pmu; 244 + } 245 + 246 + static int iommu_pmu_validate_event(struct perf_event *event) 247 + { 248 + struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); 249 + u32 event_group = iommu_event_group(event->attr.config); 250 + 251 + if (event_group >= iommu_pmu->num_eg) 252 + return -EINVAL; 253 + 254 + return 0; 255 + } 256 + 257 + static int iommu_pmu_validate_group(struct perf_event *event) 258 + { 259 + struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); 260 + struct perf_event *sibling; 261 + int nr = 0; 262 + 263 + /* 264 + * All events in a group must be scheduled simultaneously. 265 + * Check whether there is enough counters for all the events. 266 + */ 267 + for_each_sibling_event(sibling, event->group_leader) { 268 + if (!is_iommu_pmu_event(iommu_pmu, sibling) || 269 + sibling->state <= PERF_EVENT_STATE_OFF) 270 + continue; 271 + 272 + if (++nr > iommu_pmu->num_cntr) 273 + return -EINVAL; 274 + } 275 + 276 + return 0; 277 + } 278 + 279 + static int iommu_pmu_event_init(struct perf_event *event) 280 + { 281 + struct hw_perf_event *hwc = &event->hw; 282 + 283 + if (event->attr.type != event->pmu->type) 284 + return -ENOENT; 285 + 286 + /* sampling not supported */ 287 + if (event->attr.sample_period) 288 + return -EINVAL; 289 + 290 + if (event->cpu < 0) 291 + return -EINVAL; 292 + 293 + if (iommu_pmu_validate_event(event)) 294 + return -EINVAL; 295 + 296 + hwc->config = iommu_event_config(event); 297 + 298 + return iommu_pmu_validate_group(event); 299 + } 300 + 301 + static void iommu_pmu_event_update(struct perf_event *event) 302 + { 303 + struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); 304 + struct hw_perf_event *hwc = &event->hw; 305 + u64 prev_count, new_count, delta; 306 + int shift = 64 - iommu_pmu->cntr_width; 307 + 308 + again: 309 + prev_count = local64_read(&hwc->prev_count); 310 + new_count = dmar_readq(iommu_event_base(iommu_pmu, hwc->idx)); 311 + if (local64_xchg(&hwc->prev_count, new_count) != prev_count) 312 + goto again; 313 + 314 + /* 315 + * The counter width is enumerated. Always shift the counter 316 + * before using it. 317 + */ 318 + delta = (new_count << shift) - (prev_count << shift); 319 + delta >>= shift; 320 + 321 + local64_add(delta, &event->count); 322 + } 323 + 324 + static void iommu_pmu_start(struct perf_event *event, int flags) 325 + { 326 + struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); 327 + struct intel_iommu *iommu = iommu_pmu->iommu; 328 + struct hw_perf_event *hwc = &event->hw; 329 + u64 count; 330 + 331 + if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) 332 + return; 333 + 334 + if (WARN_ON_ONCE(hwc->idx < 0 || hwc->idx >= IOMMU_PMU_IDX_MAX)) 335 + return; 336 + 337 + if (flags & PERF_EF_RELOAD) 338 + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); 339 + 340 + hwc->state = 0; 341 + 342 + /* Always reprogram the period */ 343 + count = dmar_readq(iommu_event_base(iommu_pmu, hwc->idx)); 344 + local64_set((&hwc->prev_count), count); 345 + 346 + /* 347 + * The error of ecmd will be ignored. 348 + * - The existing perf_event subsystem doesn't handle the error. 349 + * Only IOMMU PMU returns runtime HW error. We don't want to 350 + * change the existing generic interfaces for the specific case. 351 + * - It's a corner case caused by HW, which is very unlikely to 352 + * happen. There is nothing SW can do. 353 + * - The worst case is that the user will get <not count> with 354 + * perf command, which can give the user some hints. 355 + */ 356 + ecmd_submit_sync(iommu, DMA_ECMD_ENABLE, hwc->idx, 0); 357 + 358 + perf_event_update_userpage(event); 359 + } 360 + 361 + static void iommu_pmu_stop(struct perf_event *event, int flags) 362 + { 363 + struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); 364 + struct intel_iommu *iommu = iommu_pmu->iommu; 365 + struct hw_perf_event *hwc = &event->hw; 366 + 367 + if (!(hwc->state & PERF_HES_STOPPED)) { 368 + ecmd_submit_sync(iommu, DMA_ECMD_DISABLE, hwc->idx, 0); 369 + 370 + iommu_pmu_event_update(event); 371 + 372 + hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; 373 + } 374 + } 375 + 376 + static inline int 377 + iommu_pmu_validate_per_cntr_event(struct iommu_pmu *iommu_pmu, 378 + int idx, struct perf_event *event) 379 + { 380 + u32 event_group = iommu_event_group(event->attr.config); 381 + u32 select = iommu_event_select(event->attr.config); 382 + 383 + if (!(iommu_pmu->cntr_evcap[idx][event_group] & select)) 384 + return -EINVAL; 385 + 386 + return 0; 387 + } 388 + 389 + static int iommu_pmu_assign_event(struct iommu_pmu *iommu_pmu, 390 + struct perf_event *event) 391 + { 392 + struct hw_perf_event *hwc = &event->hw; 393 + int idx; 394 + 395 + /* 396 + * The counters which support limited events are usually at the end. 397 + * Schedule them first to accommodate more events. 398 + */ 399 + for (idx = iommu_pmu->num_cntr - 1; idx >= 0; idx--) { 400 + if (test_and_set_bit(idx, iommu_pmu->used_mask)) 401 + continue; 402 + /* Check per-counter event capabilities */ 403 + if (!iommu_pmu_validate_per_cntr_event(iommu_pmu, idx, event)) 404 + break; 405 + clear_bit(idx, iommu_pmu->used_mask); 406 + } 407 + if (idx < 0) 408 + return -EINVAL; 409 + 410 + iommu_pmu->event_list[idx] = event; 411 + hwc->idx = idx; 412 + 413 + /* config events */ 414 + dmar_writeq(iommu_config_base(iommu_pmu, idx), hwc->config); 415 + 416 + iommu_pmu_set_filter(requester_id, event->attr.config1, 417 + IOMMU_PMU_FILTER_REQUESTER_ID, idx, 418 + event->attr.config1); 419 + iommu_pmu_set_filter(domain, event->attr.config1, 420 + IOMMU_PMU_FILTER_DOMAIN, idx, 421 + event->attr.config1); 422 + iommu_pmu_set_filter(pasid, event->attr.config1, 423 + IOMMU_PMU_FILTER_PASID, idx, 424 + event->attr.config1); 425 + iommu_pmu_set_filter(ats, event->attr.config2, 426 + IOMMU_PMU_FILTER_ATS, idx, 427 + event->attr.config1); 428 + iommu_pmu_set_filter(page_table, event->attr.config2, 429 + IOMMU_PMU_FILTER_PAGE_TABLE, idx, 430 + event->attr.config1); 431 + 432 + return 0; 433 + } 434 + 435 + static int iommu_pmu_add(struct perf_event *event, int flags) 436 + { 437 + struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); 438 + struct hw_perf_event *hwc = &event->hw; 439 + int ret; 440 + 441 + ret = iommu_pmu_assign_event(iommu_pmu, event); 442 + if (ret < 0) 443 + return ret; 444 + 445 + hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; 446 + 447 + if (flags & PERF_EF_START) 448 + iommu_pmu_start(event, 0); 449 + 450 + return 0; 451 + } 452 + 453 + static void iommu_pmu_del(struct perf_event *event, int flags) 454 + { 455 + struct iommu_pmu *iommu_pmu = iommu_event_to_pmu(event); 456 + int idx = event->hw.idx; 457 + 458 + iommu_pmu_stop(event, PERF_EF_UPDATE); 459 + 460 + iommu_pmu_clear_filter(IOMMU_PMU_FILTER_REQUESTER_ID, idx); 461 + iommu_pmu_clear_filter(IOMMU_PMU_FILTER_DOMAIN, idx); 462 + iommu_pmu_clear_filter(IOMMU_PMU_FILTER_PASID, idx); 463 + iommu_pmu_clear_filter(IOMMU_PMU_FILTER_ATS, idx); 464 + iommu_pmu_clear_filter(IOMMU_PMU_FILTER_PAGE_TABLE, idx); 465 + 466 + iommu_pmu->event_list[idx] = NULL; 467 + event->hw.idx = -1; 468 + clear_bit(idx, iommu_pmu->used_mask); 469 + 470 + perf_event_update_userpage(event); 471 + } 472 + 473 + static void iommu_pmu_enable(struct pmu *pmu) 474 + { 475 + struct iommu_pmu *iommu_pmu = container_of(pmu, struct iommu_pmu, pmu); 476 + struct intel_iommu *iommu = iommu_pmu->iommu; 477 + 478 + ecmd_submit_sync(iommu, DMA_ECMD_UNFREEZE, 0, 0); 479 + } 480 + 481 + static void iommu_pmu_disable(struct pmu *pmu) 482 + { 483 + struct iommu_pmu *iommu_pmu = container_of(pmu, struct iommu_pmu, pmu); 484 + struct intel_iommu *iommu = iommu_pmu->iommu; 485 + 486 + ecmd_submit_sync(iommu, DMA_ECMD_FREEZE, 0, 0); 487 + } 488 + 489 + static int __iommu_pmu_register(struct intel_iommu *iommu) 490 + { 491 + struct iommu_pmu *iommu_pmu = iommu->pmu; 492 + 493 + iommu_pmu->pmu.name = iommu->name; 494 + iommu_pmu->pmu.task_ctx_nr = perf_invalid_context; 495 + iommu_pmu->pmu.event_init = iommu_pmu_event_init; 496 + iommu_pmu->pmu.pmu_enable = iommu_pmu_enable; 497 + iommu_pmu->pmu.pmu_disable = iommu_pmu_disable; 498 + iommu_pmu->pmu.add = iommu_pmu_add; 499 + iommu_pmu->pmu.del = iommu_pmu_del; 500 + iommu_pmu->pmu.start = iommu_pmu_start; 501 + iommu_pmu->pmu.stop = iommu_pmu_stop; 502 + iommu_pmu->pmu.read = iommu_pmu_event_update; 503 + iommu_pmu->pmu.attr_groups = iommu_pmu_attr_groups; 504 + iommu_pmu->pmu.attr_update = iommu_pmu_attr_update; 505 + iommu_pmu->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE; 506 + iommu_pmu->pmu.module = THIS_MODULE; 507 + 508 + return perf_pmu_register(&iommu_pmu->pmu, iommu_pmu->pmu.name, -1); 509 + } 510 + 13 511 static inline void __iomem * 14 512 get_perf_reg_address(struct intel_iommu *iommu, u32 offset) 15 513 { ··· 543 45 if (!pcap_interrupt(perfcap)) 544 46 return -ENODEV; 545 47 48 + /* Check required Enhanced Command Capability */ 49 + if (!ecmd_has_pmu_essential(iommu)) 50 + return -ENODEV; 51 + 546 52 iommu_pmu = kzalloc(sizeof(*iommu_pmu), GFP_KERNEL); 547 53 if (!iommu_pmu) 548 54 return -ENOMEM; 549 55 550 56 iommu_pmu->num_cntr = pcap_num_cntr(perfcap); 57 + if (iommu_pmu->num_cntr > IOMMU_PMU_IDX_MAX) { 58 + pr_warn_once("The number of IOMMU counters %d > max(%d), clipping!", 59 + iommu_pmu->num_cntr, IOMMU_PMU_IDX_MAX); 60 + iommu_pmu->num_cntr = IOMMU_PMU_IDX_MAX; 61 + } 62 + 551 63 iommu_pmu->cntr_width = pcap_cntr_width(perfcap); 552 64 iommu_pmu->filter = pcap_filters_mask(perfcap); 553 65 iommu_pmu->cntr_stride = pcap_cntr_stride(perfcap); ··· 677 169 kfree(iommu_pmu->evcap); 678 170 kfree(iommu_pmu); 679 171 iommu->pmu = NULL; 172 + } 173 + 174 + void iommu_pmu_register(struct intel_iommu *iommu) 175 + { 176 + if (!iommu->pmu) 177 + return; 178 + 179 + if (__iommu_pmu_register(iommu)) { 180 + pr_err("Failed to register PMU for iommu (seq_id = %d)\n", 181 + iommu->seq_id); 182 + free_iommu_pmu(iommu); 183 + } 184 + } 185 + 186 + void iommu_pmu_unregister(struct intel_iommu *iommu) 187 + { 188 + if (iommu->pmu) 189 + perf_pmu_unregister(&iommu->pmu->pmu); 680 190 }
+24
drivers/iommu/intel/perfmon.h
··· 7 7 #define IOMMU_PMU_NUM_OFF_REGS 4 8 8 #define IOMMU_PMU_OFF_REGS_STEP 4 9 9 10 + #define IOMMU_PMU_FILTER_REQUESTER_ID 0x01 11 + #define IOMMU_PMU_FILTER_DOMAIN 0x02 12 + #define IOMMU_PMU_FILTER_PASID 0x04 13 + #define IOMMU_PMU_FILTER_ATS 0x08 14 + #define IOMMU_PMU_FILTER_PAGE_TABLE 0x10 15 + 16 + #define IOMMU_PMU_FILTER_EN BIT(31) 17 + 10 18 #define IOMMU_PMU_CFG_OFFSET 0x100 11 19 #define IOMMU_PMU_CFG_CNTRCAP_OFFSET 0x80 12 20 #define IOMMU_PMU_CFG_CNTREVCAP_OFFSET 0x84 ··· 28 20 #define iommu_cntrcap_ios(p) (((p) >> 16) & 0x1) 29 21 #define iommu_cntrcap_egcnt(p) (((p) >> 28) & 0xf) 30 22 23 + #define IOMMU_EVENT_CFG_EGI_SHIFT 8 24 + #define IOMMU_EVENT_CFG_ES_SHIFT 32 25 + #define IOMMU_EVENT_CFG_INT BIT_ULL(1) 26 + 31 27 #define iommu_event_select(p) ((p) & 0xfffffff) 32 28 #define iommu_event_group(p) (((p) >> 28) & 0xf) 33 29 34 30 #ifdef CONFIG_INTEL_IOMMU_PERF_EVENTS 35 31 int alloc_iommu_pmu(struct intel_iommu *iommu); 36 32 void free_iommu_pmu(struct intel_iommu *iommu); 33 + void iommu_pmu_register(struct intel_iommu *iommu); 34 + void iommu_pmu_unregister(struct intel_iommu *iommu); 37 35 #else 38 36 static inline int 39 37 alloc_iommu_pmu(struct intel_iommu *iommu) ··· 49 35 50 36 static inline void 51 37 free_iommu_pmu(struct intel_iommu *iommu) 38 + { 39 + } 40 + 41 + static inline void 42 + iommu_pmu_register(struct intel_iommu *iommu) 43 + { 44 + } 45 + 46 + static inline void 47 + iommu_pmu_unregister(struct intel_iommu *iommu) 52 48 { 53 49 } 54 50 #endif /* CONFIG_INTEL_IOMMU_PERF_EVENTS */