Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/ioapic: Handle Extended Destination ID field in RTE

Bits 63-48 of the I/OAPIC Redirection Table Entry map directly to bits 19-4
of the address used in the resulting MSI cycle.

Historically, the x86 MSI format only used the top 8 of those 16 bits as
the destination APIC ID, and the "Extended Destination ID" in the lower 8
bits was unused.

With interrupt remapping, the lowest bit of the Extended Destination ID
(bit 48 of RTE, bit 4 of MSI address) is now used to indicate a remappable
format MSI.

A hypervisor can use the other 7 bits of the Extended Destination ID to
permit guests to address up to 15 bits of APIC IDs, thus allowing 32768
vCPUs before having to expose a vIOMMU and interrupt remapping to the
guest.

No behavioural change in this patch, since nothing yet permits APIC IDs
above 255 to be used with the non-IR I/OAPIC domain.

[ tglx: Converted it to the cleaned up entry/msi_msg format and added
commentry ]

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20201024213535.443185-32-dwmw2@infradead.org

authored by

David Woodhouse and committed by
Thomas Gleixner
51130d21 79eb3581

+17 -6
+2 -1
arch/x86/include/asm/io_apic.h
··· 67 67 is_level : 1, 68 68 masked : 1, 69 69 reserved_0 : 15, 70 - reserved_1 : 24, 70 + reserved_1 : 17, 71 + virt_destid_8_14 : 7, 71 72 destid_0_7 : 8; 72 73 }; 73 74 struct {
+15 -5
arch/x86/kernel/apic/io_apic.c
··· 1238 1238 (entry.ir_index_15 << 15) | entry.ir_index_0_14, 1239 1239 entry.ir_zero); 1240 1240 } else { 1241 - printk(KERN_DEBUG "%s, %s, D(%02X), M(%1d)\n", buf, 1241 + printk(KERN_DEBUG "%s, %s, D(%02X%02X), M(%1d)\n", buf, 1242 1242 entry.dest_mode_logical ? "logical " : "physical", 1243 - entry.destid_0_7, entry.delivery_mode); 1243 + entry.virt_destid_8_14, entry.destid_0_7, 1244 + entry.delivery_mode); 1244 1245 } 1245 1246 } 1246 1247 } ··· 1410 1409 */ 1411 1410 if (ioapic_i8259.pin != -1) { 1412 1411 struct IO_APIC_route_entry entry; 1412 + u32 apic_id = read_apic_id(); 1413 1413 1414 1414 memset(&entry, 0, sizeof(entry)); 1415 1415 entry.masked = false; ··· 1418 1416 entry.active_low = false; 1419 1417 entry.dest_mode_logical = false; 1420 1418 entry.delivery_mode = APIC_DELIVERY_MODE_EXTINT; 1421 - entry.destid_0_7 = read_apic_id(); 1419 + entry.destid_0_7 = apic_id & 0xFF; 1420 + entry.virt_destid_8_14 = apic_id >> 8; 1422 1421 1423 1422 /* 1424 1423 * Add it to the IO-APIC irq-routing table: ··· 1888 1885 /* DMAR/IR: 1, 0 for all other modes */ 1889 1886 entry->ir_format = msg.arch_addr_lo.dmar_format; 1890 1887 /* 1891 - * DMAR/IR: index bit 0-14. 1888 + * - DMAR/IR: index bit 0-14. 1889 + * 1890 + * - Virt: If the host supports x2apic without a virtualized IR 1891 + * unit then bit 0-6 of dmar_index_0_14 are providing bit 1892 + * 8-14 of the destination id. 1892 1893 * 1893 1894 * All other modes have bit 0-6 of dmar_index_0_14 cleared and the 1894 1895 * topmost 8 bits are destination id bit 0-7 (entry::destid_0_7). ··· 2070 2063 int apic, pin, i; 2071 2064 struct IO_APIC_route_entry entry0, entry1; 2072 2065 unsigned char save_control, save_freq_select; 2066 + u32 apic_id; 2073 2067 2074 2068 pin = find_isa_irq_pin(8, mp_INT); 2075 2069 if (pin == -1) { ··· 2086 2078 entry0 = ioapic_read_entry(apic, pin); 2087 2079 clear_IO_APIC_pin(apic, pin); 2088 2080 2081 + apic_id = hard_smp_processor_id(); 2089 2082 memset(&entry1, 0, sizeof(entry1)); 2090 2083 2091 2084 entry1.dest_mode_logical = true; 2092 2085 entry1.masked = false; 2093 - entry1.destid_0_7 = hard_smp_processor_id(); 2086 + entry1.destid_0_7 = apic_id & 0xFF; 2087 + entry1.virt_destid_8_14 = apic_id >> 8; 2094 2088 entry1.delivery_mode = APIC_DELIVERY_MODE_EXTINT; 2095 2089 entry1.active_low = entry0.active_low; 2096 2090 entry1.is_level = false;