Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Xen event channels
4 *
5 * Xen models interrupts with abstract event channels. Because each
6 * domain gets 1024 event channels, but NR_IRQ is not that large, we
7 * must dynamically map irqs<->event channels. The event channels
8 * interface with the rest of the kernel by defining a xen interrupt
9 * chip. When an event is received, it is mapped to an irq and sent
10 * through the normal interrupt processing path.
11 *
12 * There are four kinds of events which can be mapped to an event
13 * channel:
14 *
15 * 1. Inter-domain notifications. This includes all the virtual
16 * device events, since they're driven by front-ends in another domain
17 * (typically dom0).
18 * 2. VIRQs, typically used for timers. These are per-cpu events.
19 * 3. IPIs.
20 * 4. PIRQs - Hardware interrupts.
21 *
22 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
23 */
24
25#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
26
27#include <linux/linkage.h>
28#include <linux/interrupt.h>
29#include <linux/irq.h>
30#include <linux/moduleparam.h>
31#include <linux/string.h>
32#include <linux/memblock.h>
33#include <linux/slab.h>
34#include <linux/irqnr.h>
35#include <linux/pci.h>
36#include <linux/spinlock.h>
37#include <linux/cpuhotplug.h>
38#include <linux/atomic.h>
39#include <linux/ktime.h>
40
41#ifdef CONFIG_X86
42#include <asm/desc.h>
43#include <asm/ptrace.h>
44#include <asm/idtentry.h>
45#include <asm/irq.h>
46#include <asm/io_apic.h>
47#include <asm/i8259.h>
48#include <asm/xen/pci.h>
49#endif
50#include <asm/sync_bitops.h>
51#include <asm/xen/hypercall.h>
52#include <asm/xen/hypervisor.h>
53#include <xen/page.h>
54
55#include <xen/xen.h>
56#include <xen/hvm.h>
57#include <xen/xen-ops.h>
58#include <xen/events.h>
59#include <xen/interface/xen.h>
60#include <xen/interface/event_channel.h>
61#include <xen/interface/hvm/hvm_op.h>
62#include <xen/interface/hvm/params.h>
63#include <xen/interface/physdev.h>
64#include <xen/interface/sched.h>
65#include <xen/interface/vcpu.h>
66#include <xen/xenbus.h>
67#include <asm/hw_irq.h>
68
69#include "events_internal.h"
70
71#undef MODULE_PARAM_PREFIX
72#define MODULE_PARAM_PREFIX "xen."
73
74/* Interrupt types. */
75enum xen_irq_type {
76 IRQT_UNBOUND = 0,
77 IRQT_PIRQ,
78 IRQT_VIRQ,
79 IRQT_IPI,
80 IRQT_EVTCHN
81};
82
83/*
84 * Packed IRQ information:
85 * type - enum xen_irq_type
86 * event channel - irq->event channel mapping
87 * cpu - cpu this event channel is bound to
88 * index - type-specific information:
89 * PIRQ - vector, with MSB being "needs EIO", or physical IRQ of the HVM
90 * guest, or GSI (real passthrough IRQ) of the device.
91 * VIRQ - virq number
92 * IPI - IPI vector
93 * EVTCHN -
94 */
95struct irq_info {
96 struct list_head list;
97 struct list_head eoi_list;
98 short refcnt;
99 u8 spurious_cnt;
100 u8 is_accounted;
101 short type; /* type: IRQT_* */
102 u8 mask_reason; /* Why is event channel masked */
103#define EVT_MASK_REASON_EXPLICIT 0x01
104#define EVT_MASK_REASON_TEMPORARY 0x02
105#define EVT_MASK_REASON_EOI_PENDING 0x04
106 u8 is_active; /* Is event just being handled? */
107 unsigned irq;
108 evtchn_port_t evtchn; /* event channel */
109 unsigned short cpu; /* cpu bound */
110 unsigned short eoi_cpu; /* EOI must happen on this cpu-1 */
111 unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */
112 u64 eoi_time; /* Time in jiffies when to EOI. */
113 raw_spinlock_t lock;
114
115 union {
116 unsigned short virq;
117 enum ipi_vector ipi;
118 struct {
119 unsigned short pirq;
120 unsigned short gsi;
121 unsigned char vector;
122 unsigned char flags;
123 uint16_t domid;
124 } pirq;
125 struct xenbus_device *interdomain;
126 } u;
127};
128
129#define PIRQ_NEEDS_EOI (1 << 0)
130#define PIRQ_SHAREABLE (1 << 1)
131#define PIRQ_MSI_GROUP (1 << 2)
132
133static uint __read_mostly event_loop_timeout = 2;
134module_param(event_loop_timeout, uint, 0644);
135
136static uint __read_mostly event_eoi_delay = 10;
137module_param(event_eoi_delay, uint, 0644);
138
139const struct evtchn_ops *evtchn_ops;
140
141/*
142 * This lock protects updates to the following mapping and reference-count
143 * arrays. The lock does not need to be acquired to read the mapping tables.
144 */
145static DEFINE_MUTEX(irq_mapping_update_lock);
146
147/*
148 * Lock protecting event handling loop against removing event channels.
149 * Adding of event channels is no issue as the associated IRQ becomes active
150 * only after everything is setup (before request_[threaded_]irq() the handler
151 * can't be entered for an event, as the event channel will be unmasked only
152 * then).
153 */
154static DEFINE_RWLOCK(evtchn_rwlock);
155
156/*
157 * Lock hierarchy:
158 *
159 * irq_mapping_update_lock
160 * evtchn_rwlock
161 * IRQ-desc lock
162 * percpu eoi_list_lock
163 * irq_info->lock
164 */
165
166static LIST_HEAD(xen_irq_list_head);
167
168/* IRQ <-> VIRQ mapping. */
169static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1};
170
171/* IRQ <-> IPI mapping */
172static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1};
173
174/* Event channel distribution data */
175static atomic_t channels_on_cpu[NR_CPUS];
176
177static int **evtchn_to_irq;
178#ifdef CONFIG_X86
179static unsigned long *pirq_eoi_map;
180#endif
181static bool (*pirq_needs_eoi)(unsigned irq);
182
183#define EVTCHN_ROW(e) (e / (PAGE_SIZE/sizeof(**evtchn_to_irq)))
184#define EVTCHN_COL(e) (e % (PAGE_SIZE/sizeof(**evtchn_to_irq)))
185#define EVTCHN_PER_ROW (PAGE_SIZE / sizeof(**evtchn_to_irq))
186
187/* Xen will never allocate port zero for any purpose. */
188#define VALID_EVTCHN(chn) ((chn) != 0)
189
190static struct irq_info *legacy_info_ptrs[NR_IRQS_LEGACY];
191
192static struct irq_chip xen_dynamic_chip;
193static struct irq_chip xen_lateeoi_chip;
194static struct irq_chip xen_percpu_chip;
195static struct irq_chip xen_pirq_chip;
196static void enable_dynirq(struct irq_data *data);
197static void disable_dynirq(struct irq_data *data);
198
199static DEFINE_PER_CPU(unsigned int, irq_epoch);
200
201static void clear_evtchn_to_irq_row(unsigned row)
202{
203 unsigned col;
204
205 for (col = 0; col < EVTCHN_PER_ROW; col++)
206 WRITE_ONCE(evtchn_to_irq[row][col], -1);
207}
208
209static void clear_evtchn_to_irq_all(void)
210{
211 unsigned row;
212
213 for (row = 0; row < EVTCHN_ROW(xen_evtchn_max_channels()); row++) {
214 if (evtchn_to_irq[row] == NULL)
215 continue;
216 clear_evtchn_to_irq_row(row);
217 }
218}
219
220static int set_evtchn_to_irq(evtchn_port_t evtchn, unsigned int irq)
221{
222 unsigned row;
223 unsigned col;
224
225 if (evtchn >= xen_evtchn_max_channels())
226 return -EINVAL;
227
228 row = EVTCHN_ROW(evtchn);
229 col = EVTCHN_COL(evtchn);
230
231 if (evtchn_to_irq[row] == NULL) {
232 /* Unallocated irq entries return -1 anyway */
233 if (irq == -1)
234 return 0;
235
236 evtchn_to_irq[row] = (int *)get_zeroed_page(GFP_KERNEL);
237 if (evtchn_to_irq[row] == NULL)
238 return -ENOMEM;
239
240 clear_evtchn_to_irq_row(row);
241 }
242
243 WRITE_ONCE(evtchn_to_irq[row][col], irq);
244 return 0;
245}
246
247int get_evtchn_to_irq(evtchn_port_t evtchn)
248{
249 if (evtchn >= xen_evtchn_max_channels())
250 return -1;
251 if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL)
252 return -1;
253 return READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]);
254}
255
256/* Get info for IRQ */
257static struct irq_info *info_for_irq(unsigned irq)
258{
259 if (irq < nr_legacy_irqs())
260 return legacy_info_ptrs[irq];
261 else
262 return irq_get_chip_data(irq);
263}
264
265static void set_info_for_irq(unsigned int irq, struct irq_info *info)
266{
267 if (irq < nr_legacy_irqs())
268 legacy_info_ptrs[irq] = info;
269 else
270 irq_set_chip_data(irq, info);
271}
272
273/* Per CPU channel accounting */
274static void channels_on_cpu_dec(struct irq_info *info)
275{
276 if (!info->is_accounted)
277 return;
278
279 info->is_accounted = 0;
280
281 if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids))
282 return;
283
284 WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], -1 , 0));
285}
286
287static void channels_on_cpu_inc(struct irq_info *info)
288{
289 if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids))
290 return;
291
292 if (WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], 1,
293 INT_MAX)))
294 return;
295
296 info->is_accounted = 1;
297}
298
299/* Constructors for packed IRQ information. */
300static int xen_irq_info_common_setup(struct irq_info *info,
301 unsigned irq,
302 enum xen_irq_type type,
303 evtchn_port_t evtchn,
304 unsigned short cpu)
305{
306 int ret;
307
308 BUG_ON(info->type != IRQT_UNBOUND && info->type != type);
309
310 info->type = type;
311 info->irq = irq;
312 info->evtchn = evtchn;
313 info->cpu = cpu;
314 info->mask_reason = EVT_MASK_REASON_EXPLICIT;
315 raw_spin_lock_init(&info->lock);
316
317 ret = set_evtchn_to_irq(evtchn, irq);
318 if (ret < 0)
319 return ret;
320
321 irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN);
322
323 return xen_evtchn_port_setup(evtchn);
324}
325
326static int xen_irq_info_evtchn_setup(unsigned irq,
327 evtchn_port_t evtchn,
328 struct xenbus_device *dev)
329{
330 struct irq_info *info = info_for_irq(irq);
331 int ret;
332
333 ret = xen_irq_info_common_setup(info, irq, IRQT_EVTCHN, evtchn, 0);
334 info->u.interdomain = dev;
335 if (dev)
336 atomic_inc(&dev->event_channels);
337
338 return ret;
339}
340
341static int xen_irq_info_ipi_setup(unsigned cpu,
342 unsigned irq,
343 evtchn_port_t evtchn,
344 enum ipi_vector ipi)
345{
346 struct irq_info *info = info_for_irq(irq);
347
348 info->u.ipi = ipi;
349
350 per_cpu(ipi_to_irq, cpu)[ipi] = irq;
351
352 return xen_irq_info_common_setup(info, irq, IRQT_IPI, evtchn, 0);
353}
354
355static int xen_irq_info_virq_setup(unsigned cpu,
356 unsigned irq,
357 evtchn_port_t evtchn,
358 unsigned virq)
359{
360 struct irq_info *info = info_for_irq(irq);
361
362 info->u.virq = virq;
363
364 per_cpu(virq_to_irq, cpu)[virq] = irq;
365
366 return xen_irq_info_common_setup(info, irq, IRQT_VIRQ, evtchn, 0);
367}
368
369static int xen_irq_info_pirq_setup(unsigned irq,
370 evtchn_port_t evtchn,
371 unsigned pirq,
372 unsigned gsi,
373 uint16_t domid,
374 unsigned char flags)
375{
376 struct irq_info *info = info_for_irq(irq);
377
378 info->u.pirq.pirq = pirq;
379 info->u.pirq.gsi = gsi;
380 info->u.pirq.domid = domid;
381 info->u.pirq.flags = flags;
382
383 return xen_irq_info_common_setup(info, irq, IRQT_PIRQ, evtchn, 0);
384}
385
386static void xen_irq_info_cleanup(struct irq_info *info)
387{
388 set_evtchn_to_irq(info->evtchn, -1);
389 xen_evtchn_port_remove(info->evtchn, info->cpu);
390 info->evtchn = 0;
391 channels_on_cpu_dec(info);
392}
393
394/*
395 * Accessors for packed IRQ information.
396 */
397evtchn_port_t evtchn_from_irq(unsigned irq)
398{
399 const struct irq_info *info = NULL;
400
401 if (likely(irq < nr_irqs))
402 info = info_for_irq(irq);
403 if (!info)
404 return 0;
405
406 return info->evtchn;
407}
408
409unsigned int irq_from_evtchn(evtchn_port_t evtchn)
410{
411 return get_evtchn_to_irq(evtchn);
412}
413EXPORT_SYMBOL_GPL(irq_from_evtchn);
414
415int irq_from_virq(unsigned int cpu, unsigned int virq)
416{
417 return per_cpu(virq_to_irq, cpu)[virq];
418}
419
420static enum ipi_vector ipi_from_irq(unsigned irq)
421{
422 struct irq_info *info = info_for_irq(irq);
423
424 BUG_ON(info == NULL);
425 BUG_ON(info->type != IRQT_IPI);
426
427 return info->u.ipi;
428}
429
430static unsigned virq_from_irq(unsigned irq)
431{
432 struct irq_info *info = info_for_irq(irq);
433
434 BUG_ON(info == NULL);
435 BUG_ON(info->type != IRQT_VIRQ);
436
437 return info->u.virq;
438}
439
440static unsigned pirq_from_irq(unsigned irq)
441{
442 struct irq_info *info = info_for_irq(irq);
443
444 BUG_ON(info == NULL);
445 BUG_ON(info->type != IRQT_PIRQ);
446
447 return info->u.pirq.pirq;
448}
449
450static enum xen_irq_type type_from_irq(unsigned irq)
451{
452 return info_for_irq(irq)->type;
453}
454
455static unsigned cpu_from_irq(unsigned irq)
456{
457 return info_for_irq(irq)->cpu;
458}
459
460unsigned int cpu_from_evtchn(evtchn_port_t evtchn)
461{
462 int irq = get_evtchn_to_irq(evtchn);
463 unsigned ret = 0;
464
465 if (irq != -1)
466 ret = cpu_from_irq(irq);
467
468 return ret;
469}
470
471static void do_mask(struct irq_info *info, u8 reason)
472{
473 unsigned long flags;
474
475 raw_spin_lock_irqsave(&info->lock, flags);
476
477 if (!info->mask_reason)
478 mask_evtchn(info->evtchn);
479
480 info->mask_reason |= reason;
481
482 raw_spin_unlock_irqrestore(&info->lock, flags);
483}
484
485static void do_unmask(struct irq_info *info, u8 reason)
486{
487 unsigned long flags;
488
489 raw_spin_lock_irqsave(&info->lock, flags);
490
491 info->mask_reason &= ~reason;
492
493 if (!info->mask_reason)
494 unmask_evtchn(info->evtchn);
495
496 raw_spin_unlock_irqrestore(&info->lock, flags);
497}
498
499#ifdef CONFIG_X86
500static bool pirq_check_eoi_map(unsigned irq)
501{
502 return test_bit(pirq_from_irq(irq), pirq_eoi_map);
503}
504#endif
505
506static bool pirq_needs_eoi_flag(unsigned irq)
507{
508 struct irq_info *info = info_for_irq(irq);
509 BUG_ON(info->type != IRQT_PIRQ);
510
511 return info->u.pirq.flags & PIRQ_NEEDS_EOI;
512}
513
514static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu,
515 bool force_affinity)
516{
517 int irq = get_evtchn_to_irq(evtchn);
518 struct irq_info *info = info_for_irq(irq);
519
520 BUG_ON(irq == -1);
521
522 if (IS_ENABLED(CONFIG_SMP) && force_affinity) {
523 cpumask_copy(irq_get_affinity_mask(irq), cpumask_of(cpu));
524 cpumask_copy(irq_get_effective_affinity_mask(irq),
525 cpumask_of(cpu));
526 }
527
528 xen_evtchn_port_bind_to_cpu(evtchn, cpu, info->cpu);
529
530 channels_on_cpu_dec(info);
531 info->cpu = cpu;
532 channels_on_cpu_inc(info);
533}
534
535/**
536 * notify_remote_via_irq - send event to remote end of event channel via irq
537 * @irq: irq of event channel to send event to
538 *
539 * Unlike notify_remote_via_evtchn(), this is safe to use across
540 * save/restore. Notifications on a broken connection are silently
541 * dropped.
542 */
543void notify_remote_via_irq(int irq)
544{
545 evtchn_port_t evtchn = evtchn_from_irq(irq);
546
547 if (VALID_EVTCHN(evtchn))
548 notify_remote_via_evtchn(evtchn);
549}
550EXPORT_SYMBOL_GPL(notify_remote_via_irq);
551
552struct lateeoi_work {
553 struct delayed_work delayed;
554 spinlock_t eoi_list_lock;
555 struct list_head eoi_list;
556};
557
558static DEFINE_PER_CPU(struct lateeoi_work, lateeoi);
559
560static void lateeoi_list_del(struct irq_info *info)
561{
562 struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
563 unsigned long flags;
564
565 spin_lock_irqsave(&eoi->eoi_list_lock, flags);
566 list_del_init(&info->eoi_list);
567 spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
568}
569
570static void lateeoi_list_add(struct irq_info *info)
571{
572 struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu);
573 struct irq_info *elem;
574 u64 now = get_jiffies_64();
575 unsigned long delay;
576 unsigned long flags;
577
578 if (now < info->eoi_time)
579 delay = info->eoi_time - now;
580 else
581 delay = 1;
582
583 spin_lock_irqsave(&eoi->eoi_list_lock, flags);
584
585 if (list_empty(&eoi->eoi_list)) {
586 list_add(&info->eoi_list, &eoi->eoi_list);
587 mod_delayed_work_on(info->eoi_cpu, system_wq,
588 &eoi->delayed, delay);
589 } else {
590 list_for_each_entry_reverse(elem, &eoi->eoi_list, eoi_list) {
591 if (elem->eoi_time <= info->eoi_time)
592 break;
593 }
594 list_add(&info->eoi_list, &elem->eoi_list);
595 }
596
597 spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
598}
599
600static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious)
601{
602 evtchn_port_t evtchn;
603 unsigned int cpu;
604 unsigned int delay = 0;
605
606 evtchn = info->evtchn;
607 if (!VALID_EVTCHN(evtchn) || !list_empty(&info->eoi_list))
608 return;
609
610 if (spurious) {
611 struct xenbus_device *dev = info->u.interdomain;
612 unsigned int threshold = 1;
613
614 if (dev && dev->spurious_threshold)
615 threshold = dev->spurious_threshold;
616
617 if ((1 << info->spurious_cnt) < (HZ << 2)) {
618 if (info->spurious_cnt != 0xFF)
619 info->spurious_cnt++;
620 }
621 if (info->spurious_cnt > threshold) {
622 delay = 1 << (info->spurious_cnt - 1 - threshold);
623 if (delay > HZ)
624 delay = HZ;
625 if (!info->eoi_time)
626 info->eoi_cpu = smp_processor_id();
627 info->eoi_time = get_jiffies_64() + delay;
628 if (dev)
629 atomic_add(delay, &dev->jiffies_eoi_delayed);
630 }
631 if (dev)
632 atomic_inc(&dev->spurious_events);
633 } else {
634 info->spurious_cnt = 0;
635 }
636
637 cpu = info->eoi_cpu;
638 if (info->eoi_time &&
639 (info->irq_epoch == per_cpu(irq_epoch, cpu) || delay)) {
640 lateeoi_list_add(info);
641 return;
642 }
643
644 info->eoi_time = 0;
645 do_unmask(info, EVT_MASK_REASON_EOI_PENDING);
646}
647
648static void xen_irq_lateeoi_worker(struct work_struct *work)
649{
650 struct lateeoi_work *eoi;
651 struct irq_info *info;
652 u64 now = get_jiffies_64();
653 unsigned long flags;
654
655 eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed);
656
657 read_lock_irqsave(&evtchn_rwlock, flags);
658
659 while (true) {
660 spin_lock(&eoi->eoi_list_lock);
661
662 info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info,
663 eoi_list);
664
665 if (info == NULL || now < info->eoi_time) {
666 spin_unlock(&eoi->eoi_list_lock);
667 break;
668 }
669
670 list_del_init(&info->eoi_list);
671
672 spin_unlock(&eoi->eoi_list_lock);
673
674 info->eoi_time = 0;
675
676 xen_irq_lateeoi_locked(info, false);
677 }
678
679 if (info)
680 mod_delayed_work_on(info->eoi_cpu, system_wq,
681 &eoi->delayed, info->eoi_time - now);
682
683 read_unlock_irqrestore(&evtchn_rwlock, flags);
684}
685
686static void xen_cpu_init_eoi(unsigned int cpu)
687{
688 struct lateeoi_work *eoi = &per_cpu(lateeoi, cpu);
689
690 INIT_DELAYED_WORK(&eoi->delayed, xen_irq_lateeoi_worker);
691 spin_lock_init(&eoi->eoi_list_lock);
692 INIT_LIST_HEAD(&eoi->eoi_list);
693}
694
695void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags)
696{
697 struct irq_info *info;
698 unsigned long flags;
699
700 read_lock_irqsave(&evtchn_rwlock, flags);
701
702 info = info_for_irq(irq);
703
704 if (info)
705 xen_irq_lateeoi_locked(info, eoi_flags & XEN_EOI_FLAG_SPURIOUS);
706
707 read_unlock_irqrestore(&evtchn_rwlock, flags);
708}
709EXPORT_SYMBOL_GPL(xen_irq_lateeoi);
710
711static void xen_irq_init(unsigned irq)
712{
713 struct irq_info *info;
714
715 info = kzalloc(sizeof(*info), GFP_KERNEL);
716 if (info == NULL)
717 panic("Unable to allocate metadata for IRQ%d\n", irq);
718
719 info->type = IRQT_UNBOUND;
720 info->refcnt = -1;
721
722 set_info_for_irq(irq, info);
723 /*
724 * Interrupt affinity setting can be immediate. No point
725 * in delaying it until an interrupt is handled.
726 */
727 irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
728
729 INIT_LIST_HEAD(&info->eoi_list);
730 list_add_tail(&info->list, &xen_irq_list_head);
731}
732
733static int __must_check xen_allocate_irqs_dynamic(int nvec)
734{
735 int i, irq = irq_alloc_descs(-1, 0, nvec, -1);
736
737 if (irq >= 0) {
738 for (i = 0; i < nvec; i++)
739 xen_irq_init(irq + i);
740 }
741
742 return irq;
743}
744
745static inline int __must_check xen_allocate_irq_dynamic(void)
746{
747
748 return xen_allocate_irqs_dynamic(1);
749}
750
751static int __must_check xen_allocate_irq_gsi(unsigned gsi)
752{
753 int irq;
754
755 /*
756 * A PV guest has no concept of a GSI (since it has no ACPI
757 * nor access to/knowledge of the physical APICs). Therefore
758 * all IRQs are dynamically allocated from the entire IRQ
759 * space.
760 */
761 if (xen_pv_domain() && !xen_initial_domain())
762 return xen_allocate_irq_dynamic();
763
764 /* Legacy IRQ descriptors are already allocated by the arch. */
765 if (gsi < nr_legacy_irqs())
766 irq = gsi;
767 else
768 irq = irq_alloc_desc_at(gsi, -1);
769
770 xen_irq_init(irq);
771
772 return irq;
773}
774
775static void xen_free_irq(unsigned irq)
776{
777 struct irq_info *info = info_for_irq(irq);
778 unsigned long flags;
779
780 if (WARN_ON(!info))
781 return;
782
783 write_lock_irqsave(&evtchn_rwlock, flags);
784
785 if (!list_empty(&info->eoi_list))
786 lateeoi_list_del(info);
787
788 list_del(&info->list);
789
790 set_info_for_irq(irq, NULL);
791
792 WARN_ON(info->refcnt > 0);
793
794 write_unlock_irqrestore(&evtchn_rwlock, flags);
795
796 kfree(info);
797
798 /* Legacy IRQ descriptors are managed by the arch. */
799 if (irq < nr_legacy_irqs())
800 return;
801
802 irq_free_desc(irq);
803}
804
805static void xen_evtchn_close(evtchn_port_t port)
806{
807 struct evtchn_close close;
808
809 close.port = port;
810 if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
811 BUG();
812}
813
814static void event_handler_exit(struct irq_info *info)
815{
816 smp_store_release(&info->is_active, 0);
817 clear_evtchn(info->evtchn);
818}
819
820static void pirq_query_unmask(int irq)
821{
822 struct physdev_irq_status_query irq_status;
823 struct irq_info *info = info_for_irq(irq);
824
825 BUG_ON(info->type != IRQT_PIRQ);
826
827 irq_status.irq = pirq_from_irq(irq);
828 if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
829 irq_status.flags = 0;
830
831 info->u.pirq.flags &= ~PIRQ_NEEDS_EOI;
832 if (irq_status.flags & XENIRQSTAT_needs_eoi)
833 info->u.pirq.flags |= PIRQ_NEEDS_EOI;
834}
835
836static void eoi_pirq(struct irq_data *data)
837{
838 struct irq_info *info = info_for_irq(data->irq);
839 evtchn_port_t evtchn = info ? info->evtchn : 0;
840 struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) };
841 int rc = 0;
842
843 if (!VALID_EVTCHN(evtchn))
844 return;
845
846 event_handler_exit(info);
847
848 if (pirq_needs_eoi(data->irq)) {
849 rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
850 WARN_ON(rc);
851 }
852}
853
854static void mask_ack_pirq(struct irq_data *data)
855{
856 disable_dynirq(data);
857 eoi_pirq(data);
858}
859
860static unsigned int __startup_pirq(unsigned int irq)
861{
862 struct evtchn_bind_pirq bind_pirq;
863 struct irq_info *info = info_for_irq(irq);
864 evtchn_port_t evtchn = evtchn_from_irq(irq);
865 int rc;
866
867 BUG_ON(info->type != IRQT_PIRQ);
868
869 if (VALID_EVTCHN(evtchn))
870 goto out;
871
872 bind_pirq.pirq = pirq_from_irq(irq);
873 /* NB. We are happy to share unless we are probing. */
874 bind_pirq.flags = info->u.pirq.flags & PIRQ_SHAREABLE ?
875 BIND_PIRQ__WILL_SHARE : 0;
876 rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq);
877 if (rc != 0) {
878 pr_warn("Failed to obtain physical IRQ %d\n", irq);
879 return 0;
880 }
881 evtchn = bind_pirq.port;
882
883 pirq_query_unmask(irq);
884
885 rc = set_evtchn_to_irq(evtchn, irq);
886 if (rc)
887 goto err;
888
889 info->evtchn = evtchn;
890 bind_evtchn_to_cpu(evtchn, 0, false);
891
892 rc = xen_evtchn_port_setup(evtchn);
893 if (rc)
894 goto err;
895
896out:
897 do_unmask(info, EVT_MASK_REASON_EXPLICIT);
898
899 eoi_pirq(irq_get_irq_data(irq));
900
901 return 0;
902
903err:
904 pr_err("irq%d: Failed to set port to irq mapping (%d)\n", irq, rc);
905 xen_evtchn_close(evtchn);
906 return 0;
907}
908
909static unsigned int startup_pirq(struct irq_data *data)
910{
911 return __startup_pirq(data->irq);
912}
913
914static void shutdown_pirq(struct irq_data *data)
915{
916 unsigned int irq = data->irq;
917 struct irq_info *info = info_for_irq(irq);
918 evtchn_port_t evtchn = evtchn_from_irq(irq);
919
920 BUG_ON(info->type != IRQT_PIRQ);
921
922 if (!VALID_EVTCHN(evtchn))
923 return;
924
925 do_mask(info, EVT_MASK_REASON_EXPLICIT);
926 xen_evtchn_close(evtchn);
927 xen_irq_info_cleanup(info);
928}
929
930static void enable_pirq(struct irq_data *data)
931{
932 enable_dynirq(data);
933}
934
935static void disable_pirq(struct irq_data *data)
936{
937 disable_dynirq(data);
938}
939
940int xen_irq_from_gsi(unsigned gsi)
941{
942 struct irq_info *info;
943
944 list_for_each_entry(info, &xen_irq_list_head, list) {
945 if (info->type != IRQT_PIRQ)
946 continue;
947
948 if (info->u.pirq.gsi == gsi)
949 return info->irq;
950 }
951
952 return -1;
953}
954EXPORT_SYMBOL_GPL(xen_irq_from_gsi);
955
956static void __unbind_from_irq(unsigned int irq)
957{
958 evtchn_port_t evtchn = evtchn_from_irq(irq);
959 struct irq_info *info = info_for_irq(irq);
960
961 if (info->refcnt > 0) {
962 info->refcnt--;
963 if (info->refcnt != 0)
964 return;
965 }
966
967 if (VALID_EVTCHN(evtchn)) {
968 unsigned int cpu = cpu_from_irq(irq);
969 struct xenbus_device *dev;
970
971 xen_evtchn_close(evtchn);
972
973 switch (type_from_irq(irq)) {
974 case IRQT_VIRQ:
975 per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1;
976 break;
977 case IRQT_IPI:
978 per_cpu(ipi_to_irq, cpu)[ipi_from_irq(irq)] = -1;
979 break;
980 case IRQT_EVTCHN:
981 dev = info->u.interdomain;
982 if (dev)
983 atomic_dec(&dev->event_channels);
984 break;
985 default:
986 break;
987 }
988
989 xen_irq_info_cleanup(info);
990 }
991
992 xen_free_irq(irq);
993}
994
995/*
996 * Do not make any assumptions regarding the relationship between the
997 * IRQ number returned here and the Xen pirq argument.
998 *
999 * Note: We don't assign an event channel until the irq actually started
1000 * up. Return an existing irq if we've already got one for the gsi.
1001 *
1002 * Shareable implies level triggered, not shareable implies edge
1003 * triggered here.
1004 */
1005int xen_bind_pirq_gsi_to_irq(unsigned gsi,
1006 unsigned pirq, int shareable, char *name)
1007{
1008 int irq = -1;
1009 struct physdev_irq irq_op;
1010 int ret;
1011
1012 mutex_lock(&irq_mapping_update_lock);
1013
1014 irq = xen_irq_from_gsi(gsi);
1015 if (irq != -1) {
1016 pr_info("%s: returning irq %d for gsi %u\n",
1017 __func__, irq, gsi);
1018 goto out;
1019 }
1020
1021 irq = xen_allocate_irq_gsi(gsi);
1022 if (irq < 0)
1023 goto out;
1024
1025 irq_op.irq = irq;
1026 irq_op.vector = 0;
1027
1028 /* Only the privileged domain can do this. For non-priv, the pcifront
1029 * driver provides a PCI bus that does the call to do exactly
1030 * this in the priv domain. */
1031 if (xen_initial_domain() &&
1032 HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
1033 xen_free_irq(irq);
1034 irq = -ENOSPC;
1035 goto out;
1036 }
1037
1038 ret = xen_irq_info_pirq_setup(irq, 0, pirq, gsi, DOMID_SELF,
1039 shareable ? PIRQ_SHAREABLE : 0);
1040 if (ret < 0) {
1041 __unbind_from_irq(irq);
1042 irq = ret;
1043 goto out;
1044 }
1045
1046 pirq_query_unmask(irq);
1047 /* We try to use the handler with the appropriate semantic for the
1048 * type of interrupt: if the interrupt is an edge triggered
1049 * interrupt we use handle_edge_irq.
1050 *
1051 * On the other hand if the interrupt is level triggered we use
1052 * handle_fasteoi_irq like the native code does for this kind of
1053 * interrupts.
1054 *
1055 * Depending on the Xen version, pirq_needs_eoi might return true
1056 * not only for level triggered interrupts but for edge triggered
1057 * interrupts too. In any case Xen always honors the eoi mechanism,
1058 * not injecting any more pirqs of the same kind if the first one
1059 * hasn't received an eoi yet. Therefore using the fasteoi handler
1060 * is the right choice either way.
1061 */
1062 if (shareable)
1063 irq_set_chip_and_handler_name(irq, &xen_pirq_chip,
1064 handle_fasteoi_irq, name);
1065 else
1066 irq_set_chip_and_handler_name(irq, &xen_pirq_chip,
1067 handle_edge_irq, name);
1068
1069out:
1070 mutex_unlock(&irq_mapping_update_lock);
1071
1072 return irq;
1073}
1074
1075#ifdef CONFIG_PCI_MSI
1076int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc)
1077{
1078 int rc;
1079 struct physdev_get_free_pirq op_get_free_pirq;
1080
1081 op_get_free_pirq.type = MAP_PIRQ_TYPE_MSI;
1082 rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq);
1083
1084 WARN_ONCE(rc == -ENOSYS,
1085 "hypervisor does not support the PHYSDEVOP_get_free_pirq interface\n");
1086
1087 return rc ? -1 : op_get_free_pirq.pirq;
1088}
1089
1090int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
1091 int pirq, int nvec, const char *name, domid_t domid)
1092{
1093 int i, irq, ret;
1094
1095 mutex_lock(&irq_mapping_update_lock);
1096
1097 irq = xen_allocate_irqs_dynamic(nvec);
1098 if (irq < 0)
1099 goto out;
1100
1101 for (i = 0; i < nvec; i++) {
1102 irq_set_chip_and_handler_name(irq + i, &xen_pirq_chip, handle_edge_irq, name);
1103
1104 ret = xen_irq_info_pirq_setup(irq + i, 0, pirq + i, 0, domid,
1105 i == 0 ? 0 : PIRQ_MSI_GROUP);
1106 if (ret < 0)
1107 goto error_irq;
1108 }
1109
1110 ret = irq_set_msi_desc(irq, msidesc);
1111 if (ret < 0)
1112 goto error_irq;
1113out:
1114 mutex_unlock(&irq_mapping_update_lock);
1115 return irq;
1116error_irq:
1117 while (nvec--)
1118 __unbind_from_irq(irq + nvec);
1119 mutex_unlock(&irq_mapping_update_lock);
1120 return ret;
1121}
1122#endif
1123
1124int xen_destroy_irq(int irq)
1125{
1126 struct physdev_unmap_pirq unmap_irq;
1127 struct irq_info *info = info_for_irq(irq);
1128 int rc = -ENOENT;
1129
1130 mutex_lock(&irq_mapping_update_lock);
1131
1132 /*
1133 * If trying to remove a vector in a MSI group different
1134 * than the first one skip the PIRQ unmap unless this vector
1135 * is the first one in the group.
1136 */
1137 if (xen_initial_domain() && !(info->u.pirq.flags & PIRQ_MSI_GROUP)) {
1138 unmap_irq.pirq = info->u.pirq.pirq;
1139 unmap_irq.domid = info->u.pirq.domid;
1140 rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq);
1141 /* If another domain quits without making the pci_disable_msix
1142 * call, the Xen hypervisor takes care of freeing the PIRQs
1143 * (free_domain_pirqs).
1144 */
1145 if ((rc == -ESRCH && info->u.pirq.domid != DOMID_SELF))
1146 pr_info("domain %d does not have %d anymore\n",
1147 info->u.pirq.domid, info->u.pirq.pirq);
1148 else if (rc) {
1149 pr_warn("unmap irq failed %d\n", rc);
1150 goto out;
1151 }
1152 }
1153
1154 xen_free_irq(irq);
1155
1156out:
1157 mutex_unlock(&irq_mapping_update_lock);
1158 return rc;
1159}
1160
1161int xen_irq_from_pirq(unsigned pirq)
1162{
1163 int irq;
1164
1165 struct irq_info *info;
1166
1167 mutex_lock(&irq_mapping_update_lock);
1168
1169 list_for_each_entry(info, &xen_irq_list_head, list) {
1170 if (info->type != IRQT_PIRQ)
1171 continue;
1172 irq = info->irq;
1173 if (info->u.pirq.pirq == pirq)
1174 goto out;
1175 }
1176 irq = -1;
1177out:
1178 mutex_unlock(&irq_mapping_update_lock);
1179
1180 return irq;
1181}
1182
1183
1184int xen_pirq_from_irq(unsigned irq)
1185{
1186 return pirq_from_irq(irq);
1187}
1188EXPORT_SYMBOL_GPL(xen_pirq_from_irq);
1189
1190static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip,
1191 struct xenbus_device *dev)
1192{
1193 int irq;
1194 int ret;
1195
1196 if (evtchn >= xen_evtchn_max_channels())
1197 return -ENOMEM;
1198
1199 mutex_lock(&irq_mapping_update_lock);
1200
1201 irq = get_evtchn_to_irq(evtchn);
1202
1203 if (irq == -1) {
1204 irq = xen_allocate_irq_dynamic();
1205 if (irq < 0)
1206 goto out;
1207
1208 irq_set_chip_and_handler_name(irq, chip,
1209 handle_edge_irq, "event");
1210
1211 ret = xen_irq_info_evtchn_setup(irq, evtchn, dev);
1212 if (ret < 0) {
1213 __unbind_from_irq(irq);
1214 irq = ret;
1215 goto out;
1216 }
1217 /*
1218 * New interdomain events are initially bound to vCPU0 This
1219 * is required to setup the event channel in the first
1220 * place and also important for UP guests because the
1221 * affinity setting is not invoked on them so nothing would
1222 * bind the channel.
1223 */
1224 bind_evtchn_to_cpu(evtchn, 0, false);
1225 } else {
1226 struct irq_info *info = info_for_irq(irq);
1227 WARN_ON(info == NULL || info->type != IRQT_EVTCHN);
1228 }
1229
1230out:
1231 mutex_unlock(&irq_mapping_update_lock);
1232
1233 return irq;
1234}
1235
1236int bind_evtchn_to_irq(evtchn_port_t evtchn)
1237{
1238 return bind_evtchn_to_irq_chip(evtchn, &xen_dynamic_chip, NULL);
1239}
1240EXPORT_SYMBOL_GPL(bind_evtchn_to_irq);
1241
1242static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
1243{
1244 struct evtchn_bind_ipi bind_ipi;
1245 evtchn_port_t evtchn;
1246 int ret, irq;
1247
1248 mutex_lock(&irq_mapping_update_lock);
1249
1250 irq = per_cpu(ipi_to_irq, cpu)[ipi];
1251
1252 if (irq == -1) {
1253 irq = xen_allocate_irq_dynamic();
1254 if (irq < 0)
1255 goto out;
1256
1257 irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
1258 handle_percpu_irq, "ipi");
1259
1260 bind_ipi.vcpu = xen_vcpu_nr(cpu);
1261 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
1262 &bind_ipi) != 0)
1263 BUG();
1264 evtchn = bind_ipi.port;
1265
1266 ret = xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
1267 if (ret < 0) {
1268 __unbind_from_irq(irq);
1269 irq = ret;
1270 goto out;
1271 }
1272 /*
1273 * Force the affinity mask to the target CPU so proc shows
1274 * the correct target.
1275 */
1276 bind_evtchn_to_cpu(evtchn, cpu, true);
1277 } else {
1278 struct irq_info *info = info_for_irq(irq);
1279 WARN_ON(info == NULL || info->type != IRQT_IPI);
1280 }
1281
1282 out:
1283 mutex_unlock(&irq_mapping_update_lock);
1284 return irq;
1285}
1286
1287static int bind_interdomain_evtchn_to_irq_chip(struct xenbus_device *dev,
1288 evtchn_port_t remote_port,
1289 struct irq_chip *chip)
1290{
1291 struct evtchn_bind_interdomain bind_interdomain;
1292 int err;
1293
1294 bind_interdomain.remote_dom = dev->otherend_id;
1295 bind_interdomain.remote_port = remote_port;
1296
1297 err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
1298 &bind_interdomain);
1299
1300 return err ? : bind_evtchn_to_irq_chip(bind_interdomain.local_port,
1301 chip, dev);
1302}
1303
1304int bind_interdomain_evtchn_to_irq_lateeoi(struct xenbus_device *dev,
1305 evtchn_port_t remote_port)
1306{
1307 return bind_interdomain_evtchn_to_irq_chip(dev, remote_port,
1308 &xen_lateeoi_chip);
1309}
1310EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq_lateeoi);
1311
1312static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn)
1313{
1314 struct evtchn_status status;
1315 evtchn_port_t port;
1316 int rc = -ENOENT;
1317
1318 memset(&status, 0, sizeof(status));
1319 for (port = 0; port < xen_evtchn_max_channels(); port++) {
1320 status.dom = DOMID_SELF;
1321 status.port = port;
1322 rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status);
1323 if (rc < 0)
1324 continue;
1325 if (status.status != EVTCHNSTAT_virq)
1326 continue;
1327 if (status.u.virq == virq && status.vcpu == xen_vcpu_nr(cpu)) {
1328 *evtchn = port;
1329 break;
1330 }
1331 }
1332 return rc;
1333}
1334
1335/**
1336 * xen_evtchn_nr_channels - number of usable event channel ports
1337 *
1338 * This may be less than the maximum supported by the current
1339 * hypervisor ABI. Use xen_evtchn_max_channels() for the maximum
1340 * supported.
1341 */
1342unsigned xen_evtchn_nr_channels(void)
1343{
1344 return evtchn_ops->nr_channels();
1345}
1346EXPORT_SYMBOL_GPL(xen_evtchn_nr_channels);
1347
1348int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu)
1349{
1350 struct evtchn_bind_virq bind_virq;
1351 evtchn_port_t evtchn = 0;
1352 int irq, ret;
1353
1354 mutex_lock(&irq_mapping_update_lock);
1355
1356 irq = per_cpu(virq_to_irq, cpu)[virq];
1357
1358 if (irq == -1) {
1359 irq = xen_allocate_irq_dynamic();
1360 if (irq < 0)
1361 goto out;
1362
1363 if (percpu)
1364 irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
1365 handle_percpu_irq, "virq");
1366 else
1367 irq_set_chip_and_handler_name(irq, &xen_dynamic_chip,
1368 handle_edge_irq, "virq");
1369
1370 bind_virq.virq = virq;
1371 bind_virq.vcpu = xen_vcpu_nr(cpu);
1372 ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
1373 &bind_virq);
1374 if (ret == 0)
1375 evtchn = bind_virq.port;
1376 else {
1377 if (ret == -EEXIST)
1378 ret = find_virq(virq, cpu, &evtchn);
1379 BUG_ON(ret < 0);
1380 }
1381
1382 ret = xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
1383 if (ret < 0) {
1384 __unbind_from_irq(irq);
1385 irq = ret;
1386 goto out;
1387 }
1388
1389 /*
1390 * Force the affinity mask for percpu interrupts so proc
1391 * shows the correct target.
1392 */
1393 bind_evtchn_to_cpu(evtchn, cpu, percpu);
1394 } else {
1395 struct irq_info *info = info_for_irq(irq);
1396 WARN_ON(info == NULL || info->type != IRQT_VIRQ);
1397 }
1398
1399out:
1400 mutex_unlock(&irq_mapping_update_lock);
1401
1402 return irq;
1403}
1404
1405static void unbind_from_irq(unsigned int irq)
1406{
1407 mutex_lock(&irq_mapping_update_lock);
1408 __unbind_from_irq(irq);
1409 mutex_unlock(&irq_mapping_update_lock);
1410}
1411
1412static int bind_evtchn_to_irqhandler_chip(evtchn_port_t evtchn,
1413 irq_handler_t handler,
1414 unsigned long irqflags,
1415 const char *devname, void *dev_id,
1416 struct irq_chip *chip)
1417{
1418 int irq, retval;
1419
1420 irq = bind_evtchn_to_irq_chip(evtchn, chip, NULL);
1421 if (irq < 0)
1422 return irq;
1423 retval = request_irq(irq, handler, irqflags, devname, dev_id);
1424 if (retval != 0) {
1425 unbind_from_irq(irq);
1426 return retval;
1427 }
1428
1429 return irq;
1430}
1431
1432int bind_evtchn_to_irqhandler(evtchn_port_t evtchn,
1433 irq_handler_t handler,
1434 unsigned long irqflags,
1435 const char *devname, void *dev_id)
1436{
1437 return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags,
1438 devname, dev_id,
1439 &xen_dynamic_chip);
1440}
1441EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);
1442
1443int bind_evtchn_to_irqhandler_lateeoi(evtchn_port_t evtchn,
1444 irq_handler_t handler,
1445 unsigned long irqflags,
1446 const char *devname, void *dev_id)
1447{
1448 return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags,
1449 devname, dev_id,
1450 &xen_lateeoi_chip);
1451}
1452EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler_lateeoi);
1453
1454static int bind_interdomain_evtchn_to_irqhandler_chip(
1455 struct xenbus_device *dev, evtchn_port_t remote_port,
1456 irq_handler_t handler, unsigned long irqflags,
1457 const char *devname, void *dev_id, struct irq_chip *chip)
1458{
1459 int irq, retval;
1460
1461 irq = bind_interdomain_evtchn_to_irq_chip(dev, remote_port, chip);
1462 if (irq < 0)
1463 return irq;
1464
1465 retval = request_irq(irq, handler, irqflags, devname, dev_id);
1466 if (retval != 0) {
1467 unbind_from_irq(irq);
1468 return retval;
1469 }
1470
1471 return irq;
1472}
1473
1474int bind_interdomain_evtchn_to_irqhandler_lateeoi(struct xenbus_device *dev,
1475 evtchn_port_t remote_port,
1476 irq_handler_t handler,
1477 unsigned long irqflags,
1478 const char *devname,
1479 void *dev_id)
1480{
1481 return bind_interdomain_evtchn_to_irqhandler_chip(dev,
1482 remote_port, handler, irqflags, devname,
1483 dev_id, &xen_lateeoi_chip);
1484}
1485EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler_lateeoi);
1486
1487int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
1488 irq_handler_t handler,
1489 unsigned long irqflags, const char *devname, void *dev_id)
1490{
1491 int irq, retval;
1492
1493 irq = bind_virq_to_irq(virq, cpu, irqflags & IRQF_PERCPU);
1494 if (irq < 0)
1495 return irq;
1496 retval = request_irq(irq, handler, irqflags, devname, dev_id);
1497 if (retval != 0) {
1498 unbind_from_irq(irq);
1499 return retval;
1500 }
1501
1502 return irq;
1503}
1504EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler);
1505
1506int bind_ipi_to_irqhandler(enum ipi_vector ipi,
1507 unsigned int cpu,
1508 irq_handler_t handler,
1509 unsigned long irqflags,
1510 const char *devname,
1511 void *dev_id)
1512{
1513 int irq, retval;
1514
1515 irq = bind_ipi_to_irq(ipi, cpu);
1516 if (irq < 0)
1517 return irq;
1518
1519 irqflags |= IRQF_NO_SUSPEND | IRQF_FORCE_RESUME | IRQF_EARLY_RESUME;
1520 retval = request_irq(irq, handler, irqflags, devname, dev_id);
1521 if (retval != 0) {
1522 unbind_from_irq(irq);
1523 return retval;
1524 }
1525
1526 return irq;
1527}
1528
1529void unbind_from_irqhandler(unsigned int irq, void *dev_id)
1530{
1531 struct irq_info *info = info_for_irq(irq);
1532
1533 if (WARN_ON(!info))
1534 return;
1535 free_irq(irq, dev_id);
1536 unbind_from_irq(irq);
1537}
1538EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
1539
1540/**
1541 * xen_set_irq_priority() - set an event channel priority.
1542 * @irq:irq bound to an event channel.
1543 * @priority: priority between XEN_IRQ_PRIORITY_MAX and XEN_IRQ_PRIORITY_MIN.
1544 */
1545int xen_set_irq_priority(unsigned irq, unsigned priority)
1546{
1547 struct evtchn_set_priority set_priority;
1548
1549 set_priority.port = evtchn_from_irq(irq);
1550 set_priority.priority = priority;
1551
1552 return HYPERVISOR_event_channel_op(EVTCHNOP_set_priority,
1553 &set_priority);
1554}
1555EXPORT_SYMBOL_GPL(xen_set_irq_priority);
1556
1557int evtchn_make_refcounted(evtchn_port_t evtchn)
1558{
1559 int irq = get_evtchn_to_irq(evtchn);
1560 struct irq_info *info;
1561
1562 if (irq == -1)
1563 return -ENOENT;
1564
1565 info = info_for_irq(irq);
1566
1567 if (!info)
1568 return -ENOENT;
1569
1570 WARN_ON(info->refcnt != -1);
1571
1572 info->refcnt = 1;
1573
1574 return 0;
1575}
1576EXPORT_SYMBOL_GPL(evtchn_make_refcounted);
1577
1578int evtchn_get(evtchn_port_t evtchn)
1579{
1580 int irq;
1581 struct irq_info *info;
1582 int err = -ENOENT;
1583
1584 if (evtchn >= xen_evtchn_max_channels())
1585 return -EINVAL;
1586
1587 mutex_lock(&irq_mapping_update_lock);
1588
1589 irq = get_evtchn_to_irq(evtchn);
1590 if (irq == -1)
1591 goto done;
1592
1593 info = info_for_irq(irq);
1594
1595 if (!info)
1596 goto done;
1597
1598 err = -EINVAL;
1599 if (info->refcnt <= 0 || info->refcnt == SHRT_MAX)
1600 goto done;
1601
1602 info->refcnt++;
1603 err = 0;
1604 done:
1605 mutex_unlock(&irq_mapping_update_lock);
1606
1607 return err;
1608}
1609EXPORT_SYMBOL_GPL(evtchn_get);
1610
1611void evtchn_put(evtchn_port_t evtchn)
1612{
1613 int irq = get_evtchn_to_irq(evtchn);
1614 if (WARN_ON(irq == -1))
1615 return;
1616 unbind_from_irq(irq);
1617}
1618EXPORT_SYMBOL_GPL(evtchn_put);
1619
1620void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
1621{
1622 int irq;
1623
1624#ifdef CONFIG_X86
1625 if (unlikely(vector == XEN_NMI_VECTOR)) {
1626 int rc = HYPERVISOR_vcpu_op(VCPUOP_send_nmi, xen_vcpu_nr(cpu),
1627 NULL);
1628 if (rc < 0)
1629 printk(KERN_WARNING "Sending nmi to CPU%d failed (rc:%d)\n", cpu, rc);
1630 return;
1631 }
1632#endif
1633 irq = per_cpu(ipi_to_irq, cpu)[vector];
1634 BUG_ON(irq < 0);
1635 notify_remote_via_irq(irq);
1636}
1637
1638struct evtchn_loop_ctrl {
1639 ktime_t timeout;
1640 unsigned count;
1641 bool defer_eoi;
1642};
1643
1644void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
1645{
1646 int irq;
1647 struct irq_info *info;
1648 struct xenbus_device *dev;
1649
1650 irq = get_evtchn_to_irq(port);
1651 if (irq == -1)
1652 return;
1653
1654 /*
1655 * Check for timeout every 256 events.
1656 * We are setting the timeout value only after the first 256
1657 * events in order to not hurt the common case of few loop
1658 * iterations. The 256 is basically an arbitrary value.
1659 *
1660 * In case we are hitting the timeout we need to defer all further
1661 * EOIs in order to ensure to leave the event handling loop rather
1662 * sooner than later.
1663 */
1664 if (!ctrl->defer_eoi && !(++ctrl->count & 0xff)) {
1665 ktime_t kt = ktime_get();
1666
1667 if (!ctrl->timeout) {
1668 kt = ktime_add_ms(kt,
1669 jiffies_to_msecs(event_loop_timeout));
1670 ctrl->timeout = kt;
1671 } else if (kt > ctrl->timeout) {
1672 ctrl->defer_eoi = true;
1673 }
1674 }
1675
1676 info = info_for_irq(irq);
1677 if (xchg_acquire(&info->is_active, 1))
1678 return;
1679
1680 dev = (info->type == IRQT_EVTCHN) ? info->u.interdomain : NULL;
1681 if (dev)
1682 atomic_inc(&dev->events);
1683
1684 if (ctrl->defer_eoi) {
1685 info->eoi_cpu = smp_processor_id();
1686 info->irq_epoch = __this_cpu_read(irq_epoch);
1687 info->eoi_time = get_jiffies_64() + event_eoi_delay;
1688 }
1689
1690 generic_handle_irq(irq);
1691}
1692
1693static void __xen_evtchn_do_upcall(void)
1694{
1695 struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
1696 int cpu = smp_processor_id();
1697 struct evtchn_loop_ctrl ctrl = { 0 };
1698
1699 read_lock(&evtchn_rwlock);
1700
1701 do {
1702 vcpu_info->evtchn_upcall_pending = 0;
1703
1704 xen_evtchn_handle_events(cpu, &ctrl);
1705
1706 BUG_ON(!irqs_disabled());
1707
1708 virt_rmb(); /* Hypervisor can set upcall pending. */
1709
1710 } while (vcpu_info->evtchn_upcall_pending);
1711
1712 read_unlock(&evtchn_rwlock);
1713
1714 /*
1715 * Increment irq_epoch only now to defer EOIs only for
1716 * xen_irq_lateeoi() invocations occurring from inside the loop
1717 * above.
1718 */
1719 __this_cpu_inc(irq_epoch);
1720}
1721
1722void xen_evtchn_do_upcall(struct pt_regs *regs)
1723{
1724 struct pt_regs *old_regs = set_irq_regs(regs);
1725
1726 irq_enter();
1727
1728 __xen_evtchn_do_upcall();
1729
1730 irq_exit();
1731 set_irq_regs(old_regs);
1732}
1733
1734void xen_hvm_evtchn_do_upcall(void)
1735{
1736 __xen_evtchn_do_upcall();
1737}
1738EXPORT_SYMBOL_GPL(xen_hvm_evtchn_do_upcall);
1739
1740/* Rebind a new event channel to an existing irq. */
1741void rebind_evtchn_irq(evtchn_port_t evtchn, int irq)
1742{
1743 struct irq_info *info = info_for_irq(irq);
1744
1745 if (WARN_ON(!info))
1746 return;
1747
1748 /* Make sure the irq is masked, since the new event channel
1749 will also be masked. */
1750 disable_irq(irq);
1751
1752 mutex_lock(&irq_mapping_update_lock);
1753
1754 /* After resume the irq<->evtchn mappings are all cleared out */
1755 BUG_ON(get_evtchn_to_irq(evtchn) != -1);
1756 /* Expect irq to have been bound before,
1757 so there should be a proper type */
1758 BUG_ON(info->type == IRQT_UNBOUND);
1759
1760 (void)xen_irq_info_evtchn_setup(irq, evtchn, NULL);
1761
1762 mutex_unlock(&irq_mapping_update_lock);
1763
1764 bind_evtchn_to_cpu(evtchn, info->cpu, false);
1765
1766 /* Unmask the event channel. */
1767 enable_irq(irq);
1768}
1769
1770/* Rebind an evtchn so that it gets delivered to a specific cpu */
1771static int xen_rebind_evtchn_to_cpu(struct irq_info *info, unsigned int tcpu)
1772{
1773 struct evtchn_bind_vcpu bind_vcpu;
1774 evtchn_port_t evtchn = info ? info->evtchn : 0;
1775
1776 if (!VALID_EVTCHN(evtchn))
1777 return -1;
1778
1779 if (!xen_support_evtchn_rebind())
1780 return -1;
1781
1782 /* Send future instances of this interrupt to other vcpu. */
1783 bind_vcpu.port = evtchn;
1784 bind_vcpu.vcpu = xen_vcpu_nr(tcpu);
1785
1786 /*
1787 * Mask the event while changing the VCPU binding to prevent
1788 * it being delivered on an unexpected VCPU.
1789 */
1790 do_mask(info, EVT_MASK_REASON_TEMPORARY);
1791
1792 /*
1793 * If this fails, it usually just indicates that we're dealing with a
1794 * virq or IPI channel, which don't actually need to be rebound. Ignore
1795 * it, but don't do the xenlinux-level rebind in that case.
1796 */
1797 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
1798 bind_evtchn_to_cpu(evtchn, tcpu, false);
1799
1800 do_unmask(info, EVT_MASK_REASON_TEMPORARY);
1801
1802 return 0;
1803}
1804
1805/*
1806 * Find the CPU within @dest mask which has the least number of channels
1807 * assigned. This is not precise as the per cpu counts can be modified
1808 * concurrently.
1809 */
1810static unsigned int select_target_cpu(const struct cpumask *dest)
1811{
1812 unsigned int cpu, best_cpu = UINT_MAX, minch = UINT_MAX;
1813
1814 for_each_cpu_and(cpu, dest, cpu_online_mask) {
1815 unsigned int curch = atomic_read(&channels_on_cpu[cpu]);
1816
1817 if (curch < minch) {
1818 minch = curch;
1819 best_cpu = cpu;
1820 }
1821 }
1822
1823 /*
1824 * Catch the unlikely case that dest contains no online CPUs. Can't
1825 * recurse.
1826 */
1827 if (best_cpu == UINT_MAX)
1828 return select_target_cpu(cpu_online_mask);
1829
1830 return best_cpu;
1831}
1832
1833static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
1834 bool force)
1835{
1836 unsigned int tcpu = select_target_cpu(dest);
1837 int ret;
1838
1839 ret = xen_rebind_evtchn_to_cpu(info_for_irq(data->irq), tcpu);
1840 if (!ret)
1841 irq_data_update_effective_affinity(data, cpumask_of(tcpu));
1842
1843 return ret;
1844}
1845
1846static void enable_dynirq(struct irq_data *data)
1847{
1848 struct irq_info *info = info_for_irq(data->irq);
1849 evtchn_port_t evtchn = info ? info->evtchn : 0;
1850
1851 if (VALID_EVTCHN(evtchn))
1852 do_unmask(info, EVT_MASK_REASON_EXPLICIT);
1853}
1854
1855static void disable_dynirq(struct irq_data *data)
1856{
1857 struct irq_info *info = info_for_irq(data->irq);
1858 evtchn_port_t evtchn = info ? info->evtchn : 0;
1859
1860 if (VALID_EVTCHN(evtchn))
1861 do_mask(info, EVT_MASK_REASON_EXPLICIT);
1862}
1863
1864static void ack_dynirq(struct irq_data *data)
1865{
1866 struct irq_info *info = info_for_irq(data->irq);
1867 evtchn_port_t evtchn = info ? info->evtchn : 0;
1868
1869 if (VALID_EVTCHN(evtchn))
1870 event_handler_exit(info);
1871}
1872
1873static void mask_ack_dynirq(struct irq_data *data)
1874{
1875 disable_dynirq(data);
1876 ack_dynirq(data);
1877}
1878
1879static void lateeoi_ack_dynirq(struct irq_data *data)
1880{
1881 struct irq_info *info = info_for_irq(data->irq);
1882 evtchn_port_t evtchn = info ? info->evtchn : 0;
1883
1884 if (VALID_EVTCHN(evtchn)) {
1885 do_mask(info, EVT_MASK_REASON_EOI_PENDING);
1886 event_handler_exit(info);
1887 }
1888}
1889
1890static void lateeoi_mask_ack_dynirq(struct irq_data *data)
1891{
1892 struct irq_info *info = info_for_irq(data->irq);
1893 evtchn_port_t evtchn = info ? info->evtchn : 0;
1894
1895 if (VALID_EVTCHN(evtchn)) {
1896 do_mask(info, EVT_MASK_REASON_EXPLICIT);
1897 event_handler_exit(info);
1898 }
1899}
1900
1901static int retrigger_dynirq(struct irq_data *data)
1902{
1903 struct irq_info *info = info_for_irq(data->irq);
1904 evtchn_port_t evtchn = info ? info->evtchn : 0;
1905
1906 if (!VALID_EVTCHN(evtchn))
1907 return 0;
1908
1909 do_mask(info, EVT_MASK_REASON_TEMPORARY);
1910 set_evtchn(evtchn);
1911 do_unmask(info, EVT_MASK_REASON_TEMPORARY);
1912
1913 return 1;
1914}
1915
1916static void restore_pirqs(void)
1917{
1918 int pirq, rc, irq, gsi;
1919 struct physdev_map_pirq map_irq;
1920 struct irq_info *info;
1921
1922 list_for_each_entry(info, &xen_irq_list_head, list) {
1923 if (info->type != IRQT_PIRQ)
1924 continue;
1925
1926 pirq = info->u.pirq.pirq;
1927 gsi = info->u.pirq.gsi;
1928 irq = info->irq;
1929
1930 /* save/restore of PT devices doesn't work, so at this point the
1931 * only devices present are GSI based emulated devices */
1932 if (!gsi)
1933 continue;
1934
1935 map_irq.domid = DOMID_SELF;
1936 map_irq.type = MAP_PIRQ_TYPE_GSI;
1937 map_irq.index = gsi;
1938 map_irq.pirq = pirq;
1939
1940 rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
1941 if (rc) {
1942 pr_warn("xen map irq failed gsi=%d irq=%d pirq=%d rc=%d\n",
1943 gsi, irq, pirq, rc);
1944 xen_free_irq(irq);
1945 continue;
1946 }
1947
1948 printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq);
1949
1950 __startup_pirq(irq);
1951 }
1952}
1953
1954static void restore_cpu_virqs(unsigned int cpu)
1955{
1956 struct evtchn_bind_virq bind_virq;
1957 evtchn_port_t evtchn;
1958 int virq, irq;
1959
1960 for (virq = 0; virq < NR_VIRQS; virq++) {
1961 if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1)
1962 continue;
1963
1964 BUG_ON(virq_from_irq(irq) != virq);
1965
1966 /* Get a new binding from Xen. */
1967 bind_virq.virq = virq;
1968 bind_virq.vcpu = xen_vcpu_nr(cpu);
1969 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
1970 &bind_virq) != 0)
1971 BUG();
1972 evtchn = bind_virq.port;
1973
1974 /* Record the new mapping. */
1975 (void)xen_irq_info_virq_setup(cpu, irq, evtchn, virq);
1976 /* The affinity mask is still valid */
1977 bind_evtchn_to_cpu(evtchn, cpu, false);
1978 }
1979}
1980
1981static void restore_cpu_ipis(unsigned int cpu)
1982{
1983 struct evtchn_bind_ipi bind_ipi;
1984 evtchn_port_t evtchn;
1985 int ipi, irq;
1986
1987 for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) {
1988 if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1)
1989 continue;
1990
1991 BUG_ON(ipi_from_irq(irq) != ipi);
1992
1993 /* Get a new binding from Xen. */
1994 bind_ipi.vcpu = xen_vcpu_nr(cpu);
1995 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
1996 &bind_ipi) != 0)
1997 BUG();
1998 evtchn = bind_ipi.port;
1999
2000 /* Record the new mapping. */
2001 (void)xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi);
2002 /* The affinity mask is still valid */
2003 bind_evtchn_to_cpu(evtchn, cpu, false);
2004 }
2005}
2006
2007/* Clear an irq's pending state, in preparation for polling on it */
2008void xen_clear_irq_pending(int irq)
2009{
2010 struct irq_info *info = info_for_irq(irq);
2011 evtchn_port_t evtchn = info ? info->evtchn : 0;
2012
2013 if (VALID_EVTCHN(evtchn))
2014 event_handler_exit(info);
2015}
2016EXPORT_SYMBOL(xen_clear_irq_pending);
2017void xen_set_irq_pending(int irq)
2018{
2019 evtchn_port_t evtchn = evtchn_from_irq(irq);
2020
2021 if (VALID_EVTCHN(evtchn))
2022 set_evtchn(evtchn);
2023}
2024
2025bool xen_test_irq_pending(int irq)
2026{
2027 evtchn_port_t evtchn = evtchn_from_irq(irq);
2028 bool ret = false;
2029
2030 if (VALID_EVTCHN(evtchn))
2031 ret = test_evtchn(evtchn);
2032
2033 return ret;
2034}
2035
2036/* Poll waiting for an irq to become pending with timeout. In the usual case,
2037 * the irq will be disabled so it won't deliver an interrupt. */
2038void xen_poll_irq_timeout(int irq, u64 timeout)
2039{
2040 evtchn_port_t evtchn = evtchn_from_irq(irq);
2041
2042 if (VALID_EVTCHN(evtchn)) {
2043 struct sched_poll poll;
2044
2045 poll.nr_ports = 1;
2046 poll.timeout = timeout;
2047 set_xen_guest_handle(poll.ports, &evtchn);
2048
2049 if (HYPERVISOR_sched_op(SCHEDOP_poll, &poll) != 0)
2050 BUG();
2051 }
2052}
2053EXPORT_SYMBOL(xen_poll_irq_timeout);
2054/* Poll waiting for an irq to become pending. In the usual case, the
2055 * irq will be disabled so it won't deliver an interrupt. */
2056void xen_poll_irq(int irq)
2057{
2058 xen_poll_irq_timeout(irq, 0 /* no timeout */);
2059}
2060
2061/* Check whether the IRQ line is shared with other guests. */
2062int xen_test_irq_shared(int irq)
2063{
2064 struct irq_info *info = info_for_irq(irq);
2065 struct physdev_irq_status_query irq_status;
2066
2067 if (WARN_ON(!info))
2068 return -ENOENT;
2069
2070 irq_status.irq = info->u.pirq.pirq;
2071
2072 if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
2073 return 0;
2074 return !(irq_status.flags & XENIRQSTAT_shared);
2075}
2076EXPORT_SYMBOL_GPL(xen_test_irq_shared);
2077
2078void xen_irq_resume(void)
2079{
2080 unsigned int cpu;
2081 struct irq_info *info;
2082
2083 /* New event-channel space is not 'live' yet. */
2084 xen_evtchn_resume();
2085
2086 /* No IRQ <-> event-channel mappings. */
2087 list_for_each_entry(info, &xen_irq_list_head, list) {
2088 /* Zap event-channel binding */
2089 info->evtchn = 0;
2090 /* Adjust accounting */
2091 channels_on_cpu_dec(info);
2092 }
2093
2094 clear_evtchn_to_irq_all();
2095
2096 for_each_possible_cpu(cpu) {
2097 restore_cpu_virqs(cpu);
2098 restore_cpu_ipis(cpu);
2099 }
2100
2101 restore_pirqs();
2102}
2103
2104static struct irq_chip xen_dynamic_chip __read_mostly = {
2105 .name = "xen-dyn",
2106
2107 .irq_disable = disable_dynirq,
2108 .irq_mask = disable_dynirq,
2109 .irq_unmask = enable_dynirq,
2110
2111 .irq_ack = ack_dynirq,
2112 .irq_mask_ack = mask_ack_dynirq,
2113
2114 .irq_set_affinity = set_affinity_irq,
2115 .irq_retrigger = retrigger_dynirq,
2116};
2117
2118static struct irq_chip xen_lateeoi_chip __read_mostly = {
2119 /* The chip name needs to contain "xen-dyn" for irqbalance to work. */
2120 .name = "xen-dyn-lateeoi",
2121
2122 .irq_disable = disable_dynirq,
2123 .irq_mask = disable_dynirq,
2124 .irq_unmask = enable_dynirq,
2125
2126 .irq_ack = lateeoi_ack_dynirq,
2127 .irq_mask_ack = lateeoi_mask_ack_dynirq,
2128
2129 .irq_set_affinity = set_affinity_irq,
2130 .irq_retrigger = retrigger_dynirq,
2131};
2132
2133static struct irq_chip xen_pirq_chip __read_mostly = {
2134 .name = "xen-pirq",
2135
2136 .irq_startup = startup_pirq,
2137 .irq_shutdown = shutdown_pirq,
2138 .irq_enable = enable_pirq,
2139 .irq_disable = disable_pirq,
2140
2141 .irq_mask = disable_dynirq,
2142 .irq_unmask = enable_dynirq,
2143
2144 .irq_ack = eoi_pirq,
2145 .irq_eoi = eoi_pirq,
2146 .irq_mask_ack = mask_ack_pirq,
2147
2148 .irq_set_affinity = set_affinity_irq,
2149
2150 .irq_retrigger = retrigger_dynirq,
2151};
2152
2153static struct irq_chip xen_percpu_chip __read_mostly = {
2154 .name = "xen-percpu",
2155
2156 .irq_disable = disable_dynirq,
2157 .irq_mask = disable_dynirq,
2158 .irq_unmask = enable_dynirq,
2159
2160 .irq_ack = ack_dynirq,
2161};
2162
2163#ifdef CONFIG_XEN_PVHVM
2164/* Vector callbacks are better than PCI interrupts to receive event
2165 * channel notifications because we can receive vector callbacks on any
2166 * vcpu and we don't need PCI support or APIC interactions. */
2167void xen_setup_callback_vector(void)
2168{
2169 uint64_t callback_via;
2170
2171 if (xen_have_vector_callback) {
2172 callback_via = HVM_CALLBACK_VECTOR(HYPERVISOR_CALLBACK_VECTOR);
2173 if (xen_set_callback_via(callback_via)) {
2174 pr_err("Request for Xen HVM callback vector failed\n");
2175 xen_have_vector_callback = 0;
2176 }
2177 }
2178}
2179
2180static __init void xen_alloc_callback_vector(void)
2181{
2182 if (!xen_have_vector_callback)
2183 return;
2184
2185 pr_info("Xen HVM callback vector for event delivery is enabled\n");
2186 alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_xen_hvm_callback);
2187}
2188#else
2189void xen_setup_callback_vector(void) {}
2190static inline void xen_alloc_callback_vector(void) {}
2191#endif
2192
2193bool xen_fifo_events = true;
2194module_param_named(fifo_events, xen_fifo_events, bool, 0);
2195
2196static int xen_evtchn_cpu_prepare(unsigned int cpu)
2197{
2198 int ret = 0;
2199
2200 xen_cpu_init_eoi(cpu);
2201
2202 if (evtchn_ops->percpu_init)
2203 ret = evtchn_ops->percpu_init(cpu);
2204
2205 return ret;
2206}
2207
2208static int xen_evtchn_cpu_dead(unsigned int cpu)
2209{
2210 int ret = 0;
2211
2212 if (evtchn_ops->percpu_deinit)
2213 ret = evtchn_ops->percpu_deinit(cpu);
2214
2215 return ret;
2216}
2217
2218void __init xen_init_IRQ(void)
2219{
2220 int ret = -EINVAL;
2221 evtchn_port_t evtchn;
2222
2223 if (xen_fifo_events)
2224 ret = xen_evtchn_fifo_init();
2225 if (ret < 0) {
2226 xen_evtchn_2l_init();
2227 xen_fifo_events = false;
2228 }
2229
2230 xen_cpu_init_eoi(smp_processor_id());
2231
2232 cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE,
2233 "xen/evtchn:prepare",
2234 xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead);
2235
2236 evtchn_to_irq = kcalloc(EVTCHN_ROW(xen_evtchn_max_channels()),
2237 sizeof(*evtchn_to_irq), GFP_KERNEL);
2238 BUG_ON(!evtchn_to_irq);
2239
2240 /* No event channels are 'live' right now. */
2241 for (evtchn = 0; evtchn < xen_evtchn_nr_channels(); evtchn++)
2242 mask_evtchn(evtchn);
2243
2244 pirq_needs_eoi = pirq_needs_eoi_flag;
2245
2246#ifdef CONFIG_X86
2247 if (xen_pv_domain()) {
2248 if (xen_initial_domain())
2249 pci_xen_initial_domain();
2250 }
2251 if (xen_feature(XENFEAT_hvm_callback_vector)) {
2252 xen_setup_callback_vector();
2253 xen_alloc_callback_vector();
2254 }
2255
2256 if (xen_hvm_domain()) {
2257 native_init_IRQ();
2258 /* pci_xen_hvm_init must be called after native_init_IRQ so that
2259 * __acpi_register_gsi can point at the right function */
2260 pci_xen_hvm_init();
2261 } else {
2262 int rc;
2263 struct physdev_pirq_eoi_gmfn eoi_gmfn;
2264
2265 pirq_eoi_map = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
2266 eoi_gmfn.gmfn = virt_to_gfn(pirq_eoi_map);
2267 rc = HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2, &eoi_gmfn);
2268 if (rc != 0) {
2269 free_page((unsigned long) pirq_eoi_map);
2270 pirq_eoi_map = NULL;
2271 } else
2272 pirq_needs_eoi = pirq_check_eoi_map;
2273 }
2274#endif
2275}