Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ioat2: catch and recover from broken vtd configurations v6

On some platforms (MacPro3,1) the BIOS assigns the ioatdma device to the
incorrect iommu causing faults when the driver initializes. Add a quirk
to catch this misconfiguration and try falling back to untranslated
operation (which works in the MacPro3,1 case).

Assuming there are other platforms with misconfigured iommus teach the
ioatdma driver to treat initialization failures as non-fatal (just fail
the driver load and emit a warning instead of triggering a BUG_ON).

This can be classified as a boot regression since 2.6.32 on affected
platforms since the ioatdma module did not autoload prior to that
kernel.

Cc: <stable@kernel.org>
Acked-by: David Woodhouse <David.Woodhouse@intel.com>
Reported-by: Chris Li <lkml@chrisli.org>
Tested-by: Chris Li <lkml@chrisli.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>

+55 -3
+1
drivers/dma/ioat/dma.h
··· 97 97 #define IOAT_RESET_PENDING 2 98 98 #define IOAT_KOBJ_INIT_FAIL 3 99 99 #define IOAT_RESHAPE_PENDING 4 100 + #define IOAT_RUN 5 100 101 struct timer_list timer; 101 102 #define COMPLETION_TIMEOUT msecs_to_jiffies(100) 102 103 #define IDLE_TIMEOUT msecs_to_jiffies(2000)
+22 -2
drivers/dma/ioat/dma_v2.c
··· 287 287 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); 288 288 dev_err(to_dev(chan), "%s: Channel halted (%x)\n", 289 289 __func__, chanerr); 290 - BUG_ON(is_ioat_bug(chanerr)); 290 + if (test_bit(IOAT_RUN, &chan->state)) 291 + BUG_ON(is_ioat_bug(chanerr)); 292 + else /* we never got off the ground */ 293 + return; 291 294 } 292 295 293 296 /* if we haven't made progress and we have already ··· 495 492 return ring; 496 493 } 497 494 495 + void ioat2_free_chan_resources(struct dma_chan *c); 496 + 498 497 /* ioat2_alloc_chan_resources - allocate/initialize ioat2 descriptor ring 499 498 * @chan: channel to be initialized 500 499 */ ··· 505 500 struct ioat2_dma_chan *ioat = to_ioat2_chan(c); 506 501 struct ioat_chan_common *chan = &ioat->base; 507 502 struct ioat_ring_ent **ring; 503 + u64 status; 508 504 int order; 509 505 510 506 /* have we already been set up? */ ··· 546 540 tasklet_enable(&chan->cleanup_task); 547 541 ioat2_start_null_desc(ioat); 548 542 549 - return 1 << ioat->alloc_order; 543 + /* check that we got off the ground */ 544 + udelay(5); 545 + status = ioat_chansts(chan); 546 + if (is_ioat_active(status) || is_ioat_idle(status)) { 547 + set_bit(IOAT_RUN, &chan->state); 548 + return 1 << ioat->alloc_order; 549 + } else { 550 + u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); 551 + 552 + dev_WARN(to_dev(chan), 553 + "failed to start channel chanerr: %#x\n", chanerr); 554 + ioat2_free_chan_resources(c); 555 + return -EFAULT; 556 + } 550 557 } 551 558 552 559 bool reshape_ring(struct ioat2_dma_chan *ioat, int order) ··· 797 778 del_timer_sync(&chan->timer); 798 779 device->cleanup_fn((unsigned long) c); 799 780 device->reset_hw(chan); 781 + clear_bit(IOAT_RUN, &chan->state); 800 782 801 783 spin_lock_bh(&chan->cleanup_lock); 802 784 spin_lock_bh(&ioat->prep_lock);
+4 -1
drivers/dma/ioat/dma_v3.c
··· 361 361 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); 362 362 dev_err(to_dev(chan), "%s: Channel halted (%x)\n", 363 363 __func__, chanerr); 364 - BUG_ON(is_ioat_bug(chanerr)); 364 + if (test_bit(IOAT_RUN, &chan->state)) 365 + BUG_ON(is_ioat_bug(chanerr)); 366 + else /* we never got off the ground */ 367 + return; 365 368 } 366 369 367 370 /* if we haven't made progress and we have already
+28
drivers/pci/intel-iommu.c
··· 3029 3029 3030 3030 } 3031 3031 3032 + static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev) 3033 + { 3034 + struct dmar_drhd_unit *drhd; 3035 + u32 vtbar; 3036 + int rc; 3037 + 3038 + /* We know that this device on this chipset has its own IOMMU. 3039 + * If we find it under a different IOMMU, then the BIOS is lying 3040 + * to us. Hope that the IOMMU for this device is actually 3041 + * disabled, and it needs no translation... 3042 + */ 3043 + rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar); 3044 + if (rc) { 3045 + /* "can't" happen */ 3046 + dev_info(&pdev->dev, "failed to run vt-d quirk\n"); 3047 + return; 3048 + } 3049 + vtbar &= 0xffff0000; 3050 + 3051 + /* we know that the this iommu should be at offset 0xa000 from vtbar */ 3052 + drhd = dmar_find_matched_drhd_unit(pdev); 3053 + if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000, 3054 + TAINT_FIRMWARE_WORKAROUND, 3055 + "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n")) 3056 + pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO; 3057 + } 3058 + DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu); 3059 + 3032 3060 static void __init init_no_remapping_devices(void) 3033 3061 { 3034 3062 struct dmar_drhd_unit *drhd;