irqchip/gic-v3-its: Add device proxy for VPE management if !DirectLpi

When we don't have the DirectLPI feature, we must work around the
architecture shortcomings to be able to perform the required
maintenance (interrupt masking, clearing and injection).

For this, we create a fake device whose sole purpose is to
provide a way to issue commands as if we were dealing with LPIs
coming from that device (while they actually originate from
the ITS). This fake device doesn't have LPIs allocated to it,
but instead uses the VPE LPIs.

Of course, this could be a real bottleneck, and a naive
implementation would require 6 commands to issue an invalidation.

Instead, let's allocate at least one event per physical CPU
(rounded up to the next power of 2), and opportunistically
map the VPE doorbell to an event. This doorbell will be mapped
until we roll over and need to reallocate this slot.

This ensures that most of the time, we only need 2 commands
to issue an INV, INT or CLEAR, making the performance a lot
better, given that we always issue a CLEAR on entry, and
an INV on each side of a trapped WFI.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>

+147 -6
+145 -6
drivers/irqchip/irq-gic-v3-its.c
··· 136 136 u32 device_id; 137 137 }; 138 138 139 + static struct { 140 + raw_spinlock_t lock; 141 + struct its_device *dev; 142 + struct its_vpe **vpes; 143 + int next_victim; 144 + } vpe_proxy; 145 + 139 146 static LIST_HEAD(its_nodes); 140 147 static DEFINE_SPINLOCK(its_lock); 141 148 static struct rdists *gic_rdists; ··· 2097 2090 msi_info = msi_get_domain_info(domain); 2098 2091 its = msi_info->data; 2099 2092 2093 + if (!gic_rdists->has_direct_lpi && 2094 + vpe_proxy.dev && 2095 + vpe_proxy.dev->its == its && 2096 + dev_id == vpe_proxy.dev->device_id) { 2097 + /* Bad luck. Get yourself a better implementation */ 2098 + WARN_ONCE(1, "DevId %x clashes with GICv4 VPE proxy device\n", 2099 + dev_id); 2100 + return -EINVAL; 2101 + } 2102 + 2100 2103 its_dev = its_find_device(its, dev_id); 2101 2104 if (its_dev) { 2102 2105 /* ··· 2254 2237 .deactivate = its_irq_domain_deactivate, 2255 2238 }; 2256 2239 2240 + /* 2241 + * This is insane. 2242 + * 2243 + * If a GICv4 doesn't implement Direct LPIs (which is extremely 2244 + * likely), the only way to perform an invalidate is to use a fake 2245 + * device to issue an INV command, implying that the LPI has first 2246 + * been mapped to some event on that device. Since this is not exactly 2247 + * cheap, we try to keep that mapping around as long as possible, and 2248 + * only issue an UNMAP if we're short on available slots. 2249 + * 2250 + * Broken by design(tm). 2251 + */ 2252 + static void its_vpe_db_proxy_unmap_locked(struct its_vpe *vpe) 2253 + { 2254 + /* Already unmapped? */ 2255 + if (vpe->vpe_proxy_event == -1) 2256 + return; 2257 + 2258 + its_send_discard(vpe_proxy.dev, vpe->vpe_proxy_event); 2259 + vpe_proxy.vpes[vpe->vpe_proxy_event] = NULL; 2260 + 2261 + /* 2262 + * We don't track empty slots at all, so let's move the 2263 + * next_victim pointer if we can quickly reuse that slot 2264 + * instead of nuking an existing entry. Not clear that this is 2265 + * always a win though, and this might just generate a ripple 2266 + * effect... Let's just hope VPEs don't migrate too often. 2267 + */ 2268 + if (vpe_proxy.vpes[vpe_proxy.next_victim]) 2269 + vpe_proxy.next_victim = vpe->vpe_proxy_event; 2270 + 2271 + vpe->vpe_proxy_event = -1; 2272 + } 2273 + 2274 + static void its_vpe_db_proxy_unmap(struct its_vpe *vpe) 2275 + { 2276 + if (!gic_rdists->has_direct_lpi) { 2277 + unsigned long flags; 2278 + 2279 + raw_spin_lock_irqsave(&vpe_proxy.lock, flags); 2280 + its_vpe_db_proxy_unmap_locked(vpe); 2281 + raw_spin_unlock_irqrestore(&vpe_proxy.lock, flags); 2282 + } 2283 + } 2284 + 2285 + static void its_vpe_db_proxy_map_locked(struct its_vpe *vpe) 2286 + { 2287 + /* Already mapped? */ 2288 + if (vpe->vpe_proxy_event != -1) 2289 + return; 2290 + 2291 + /* This slot was already allocated. Kick the other VPE out. */ 2292 + if (vpe_proxy.vpes[vpe_proxy.next_victim]) 2293 + its_vpe_db_proxy_unmap_locked(vpe_proxy.vpes[vpe_proxy.next_victim]); 2294 + 2295 + /* Map the new VPE instead */ 2296 + vpe_proxy.vpes[vpe_proxy.next_victim] = vpe; 2297 + vpe->vpe_proxy_event = vpe_proxy.next_victim; 2298 + vpe_proxy.next_victim = (vpe_proxy.next_victim + 1) % vpe_proxy.dev->nr_ites; 2299 + 2300 + vpe_proxy.dev->event_map.col_map[vpe->vpe_proxy_event] = vpe->col_idx; 2301 + its_send_mapti(vpe_proxy.dev, vpe->vpe_db_lpi, vpe->vpe_proxy_event); 2302 + } 2303 + 2257 2304 static int its_vpe_set_affinity(struct irq_data *d, 2258 2305 const struct cpumask *mask_val, 2259 2306 bool force) ··· 2327 2246 2328 2247 /* 2329 2248 * Changing affinity is mega expensive, so let's be as lazy as 2330 - * we can and only do it if we really have to. 2249 + * we can and only do it if we really have to. Also, if mapped 2250 + * into the proxy device, we need to nuke that mapping. 2331 2251 */ 2332 2252 if (vpe->col_idx != cpu) { 2253 + its_vpe_db_proxy_unmap(vpe); 2333 2254 vpe->col_idx = cpu; 2334 2255 its_send_vmovp(vpe); 2335 2256 } ··· 2426 2343 } 2427 2344 } 2428 2345 2346 + static void its_vpe_send_cmd(struct its_vpe *vpe, 2347 + void (*cmd)(struct its_device *, u32)) 2348 + { 2349 + unsigned long flags; 2350 + 2351 + raw_spin_lock_irqsave(&vpe_proxy.lock, flags); 2352 + 2353 + its_vpe_db_proxy_map_locked(vpe); 2354 + cmd(vpe_proxy.dev, vpe->vpe_proxy_event); 2355 + 2356 + raw_spin_unlock_irqrestore(&vpe_proxy.lock, flags); 2357 + } 2358 + 2429 2359 static void its_vpe_send_inv(struct irq_data *d) 2430 2360 { 2431 2361 struct its_vpe *vpe = irq_data_get_irq_chip_data(d); 2432 - void __iomem *rdbase; 2433 2362 2434 - rdbase = per_cpu_ptr(gic_rdists->rdist, vpe->col_idx)->rd_base; 2435 - gic_write_lpir(vpe->vpe_db_lpi, rdbase + GICR_INVLPIR); 2436 - while (gic_read_lpir(rdbase + GICR_SYNCR) & 1) 2437 - cpu_relax(); 2363 + if (gic_rdists->has_direct_lpi) { 2364 + void __iomem *rdbase; 2365 + 2366 + rdbase = per_cpu_ptr(gic_rdists->rdist, vpe->col_idx)->rd_base; 2367 + gic_write_lpir(vpe->vpe_db_lpi, rdbase + GICR_INVLPIR); 2368 + while (gic_read_lpir(rdbase + GICR_SYNCR) & 1) 2369 + cpu_relax(); 2370 + } else { 2371 + its_vpe_send_cmd(vpe, its_send_inv); 2372 + } 2438 2373 } 2439 2374 2440 2375 static void its_vpe_mask_irq(struct irq_data *d) ··· 2518 2417 2519 2418 vpe->vpe_id = vpe_id; 2520 2419 vpe->vpt_page = vpt_page; 2420 + vpe->vpe_proxy_event = -1; 2521 2421 2522 2422 return 0; 2523 2423 } 2524 2424 2525 2425 static void its_vpe_teardown(struct its_vpe *vpe) 2526 2426 { 2427 + its_vpe_db_proxy_unmap(vpe); 2527 2428 its_vpe_id_free(vpe->vpe_id); 2528 2429 its_free_pending_table(vpe->vpt_page); 2529 2430 } ··· 2756 2653 2757 2654 static int its_init_vpe_domain(void) 2758 2655 { 2656 + struct its_node *its; 2657 + u32 devid; 2658 + int entries; 2659 + 2660 + if (gic_rdists->has_direct_lpi) { 2661 + pr_info("ITS: Using DirectLPI for VPE invalidation\n"); 2662 + return 0; 2663 + } 2664 + 2665 + /* Any ITS will do, even if not v4 */ 2666 + its = list_first_entry(&its_nodes, struct its_node, entry); 2667 + 2668 + entries = roundup_pow_of_two(nr_cpu_ids); 2669 + vpe_proxy.vpes = kzalloc(sizeof(*vpe_proxy.vpes) * entries, 2670 + GFP_KERNEL); 2671 + if (!vpe_proxy.vpes) { 2672 + pr_err("ITS: Can't allocate GICv4 proxy device array\n"); 2673 + return -ENOMEM; 2674 + } 2675 + 2676 + /* Use the last possible DevID */ 2677 + devid = GENMASK(its->device_ids - 1, 0); 2678 + vpe_proxy.dev = its_create_device(its, devid, entries, false); 2679 + if (!vpe_proxy.dev) { 2680 + kfree(vpe_proxy.vpes); 2681 + pr_err("ITS: Can't allocate GICv4 proxy device\n"); 2682 + return -ENOMEM; 2683 + } 2684 + 2685 + BUG_ON(entries != vpe_proxy.dev->nr_ites); 2686 + 2687 + raw_spin_lock_init(&vpe_proxy.lock); 2688 + vpe_proxy.next_victim = 0; 2689 + pr_info("ITS: Allocated DevID %x as GICv4 proxy device (%d slots)\n", 2690 + devid, vpe_proxy.dev->nr_ites); 2691 + 2759 2692 return 0; 2760 2693 } 2761 2694
+2
include/linux/irqchip/arm-gic-v4.h
··· 39 39 /* Doorbell interrupt */ 40 40 int irq; 41 41 irq_hw_number_t vpe_db_lpi; 42 + /* VPE proxy mapping */ 43 + int vpe_proxy_event; 42 44 /* 43 45 * This collection ID is used to indirect the target 44 46 * redistributor for this VPE. The ID itself isn't involved in