···145145 select NUMA146146 select ACPI_NUMA147147 select SWIOTLB148148+ select PCI_MSI148149 help149150 This selects the system type of your hardware. A "generic" kernel150151 will run on any supported IA-64 system. However, if you configure···153152154153 generic For any supported IA-64 system155154 DIG-compliant For DIG ("Developer's Interface Guide") compliant systems155155+ DIG+Intel+IOMMU For DIG systems with Intel IOMMU156156 HP-zx1/sx1000 For HP systems157157 HP-zx1/sx1000+swiotlb For HP systems with (broken) DMA-constrained devices.158158 SGI-SN2 For SGI Altix systems···166164config IA64_DIG167165 bool "DIG-compliant"168166 select SWIOTLB167167+168168+config IA64_DIG_VTD169169+ bool "DIG+Intel+IOMMU"170170+ select DMAR171171+ select PCI_MSI169172170173config IA64_HP_ZX1171174 bool "HP-zx1/sx1000"···620613source "drivers/pci/hotplug/Kconfig"621614622615source "drivers/pcmcia/Kconfig"616616+617617+config DMAR618618+ bool "Support for DMA Remapping Devices (EXPERIMENTAL)"619619+ depends on IA64_GENERIC && ACPI && EXPERIMENTAL620620+ help621621+ DMA remapping (DMAR) devices support enables independent address622622+ translations for Direct Memory Access (DMA) from devices.623623+ These DMA remapping devices are reported via ACPI tables624624+ and include PCI device scope covered by these DMA625625+ remapping devices.623626624627endmenu625628
···3434#define flush_dcache_mmap_unlock(mapping) do { } while (0)35353636extern void flush_icache_range (unsigned long start, unsigned long end);3737+extern void clflush_cache_range(void *addr, int size);3838+37393840#define flush_icache_user_range(vma, page, user_addr, len) \3941do { \
+3
arch/ia64/include/asm/device.h
···1010#ifdef CONFIG_ACPI1111 void *acpi_handle;1212#endif1313+#ifdef CONFIG_DMAR1414+ void *iommu; /* hook for IOMMU specific extension */1515+#endif1316};14171518#endif /* _ASM_IA64_DEVICE_H */
+50
arch/ia64/include/asm/dma-mapping.h
···77 */88#include <asm/machvec.h>99#include <linux/scatterlist.h>1010+#include <asm/swiotlb.h>1111+1212+struct dma_mapping_ops {1313+ int (*mapping_error)(struct device *dev,1414+ dma_addr_t dma_addr);1515+ void* (*alloc_coherent)(struct device *dev, size_t size,1616+ dma_addr_t *dma_handle, gfp_t gfp);1717+ void (*free_coherent)(struct device *dev, size_t size,1818+ void *vaddr, dma_addr_t dma_handle);1919+ dma_addr_t (*map_single)(struct device *hwdev, unsigned long ptr,2020+ size_t size, int direction);2121+ void (*unmap_single)(struct device *dev, dma_addr_t addr,2222+ size_t size, int direction);2323+ void (*sync_single_for_cpu)(struct device *hwdev,2424+ dma_addr_t dma_handle, size_t size,2525+ int direction);2626+ void (*sync_single_for_device)(struct device *hwdev,2727+ dma_addr_t dma_handle, size_t size,2828+ int direction);2929+ void (*sync_single_range_for_cpu)(struct device *hwdev,3030+ dma_addr_t dma_handle, unsigned long offset,3131+ size_t size, int direction);3232+ void (*sync_single_range_for_device)(struct device *hwdev,3333+ dma_addr_t dma_handle, unsigned long offset,3434+ size_t size, int direction);3535+ void (*sync_sg_for_cpu)(struct device *hwdev,3636+ struct scatterlist *sg, int nelems,3737+ int direction);3838+ void (*sync_sg_for_device)(struct device *hwdev,3939+ struct scatterlist *sg, int nelems,4040+ int direction);4141+ int (*map_sg)(struct device *hwdev, struct scatterlist *sg,4242+ int nents, int direction);4343+ void (*unmap_sg)(struct device *hwdev,4444+ struct scatterlist *sg, int nents,4545+ int direction);4646+ int (*dma_supported_op)(struct device *hwdev, u64 mask);4747+ int is_phys;4848+};4949+5050+extern struct dma_mapping_ops *dma_ops;5151+extern struct ia64_machine_vector ia64_mv;5252+extern void set_iommu_machvec(void);10531154#define dma_alloc_coherent(dev, size, handle, gfp) \1255 platform_dma_alloc_coherent(dev, size, handle, (gfp) | GFP_DMA)···13895}1399614097#define dma_is_consistent(d, h) (1) /* all we do is coherent memory... */9898+9999+static inline struct dma_mapping_ops *get_dma_ops(struct device *dev)100100+{101101+ return dma_ops;102102+}103103+104104+141105142106#endif /* _ASM_IA64_DMA_MAPPING_H */
+16
arch/ia64/include/asm/iommu.h
···11+#ifndef _ASM_IA64_IOMMU_H22+#define _ASM_IA64_IOMMU_H 133+44+#define cpu_has_x2apic 055+/* 10 seconds */66+#define DMAR_OPERATION_TIMEOUT (((cycles_t) local_cpu_data->itc_freq)*10)77+88+extern void pci_iommu_shutdown(void);99+extern void no_iommu_init(void);1010+extern int force_iommu, no_iommu;1111+extern int iommu_detected;1212+extern void iommu_dma_init(void);1313+extern void machvec_init(const char *name);1414+extern int forbid_dac;1515+1616+#endif
+2
arch/ia64/include/asm/machvec.h
···120120# include <asm/machvec_hpsim.h>121121# elif defined (CONFIG_IA64_DIG)122122# include <asm/machvec_dig.h>123123+# elif defined(CONFIG_IA64_DIG_VTD)124124+# include <asm/machvec_dig_vtd.h>123125# elif defined (CONFIG_IA64_HP_ZX1)124126# include <asm/machvec_hpzx1.h>125127# elif defined (CONFIG_IA64_HP_ZX1_SWIOTLB)
+38
arch/ia64/include/asm/machvec_dig_vtd.h
···11+#ifndef _ASM_IA64_MACHVEC_DIG_VTD_h22+#define _ASM_IA64_MACHVEC_DIG_VTD_h33+44+extern ia64_mv_setup_t dig_setup;55+extern ia64_mv_dma_alloc_coherent vtd_alloc_coherent;66+extern ia64_mv_dma_free_coherent vtd_free_coherent;77+extern ia64_mv_dma_map_single_attrs vtd_map_single_attrs;88+extern ia64_mv_dma_unmap_single_attrs vtd_unmap_single_attrs;99+extern ia64_mv_dma_map_sg_attrs vtd_map_sg_attrs;1010+extern ia64_mv_dma_unmap_sg_attrs vtd_unmap_sg_attrs;1111+extern ia64_mv_dma_supported iommu_dma_supported;1212+extern ia64_mv_dma_mapping_error vtd_dma_mapping_error;1313+extern ia64_mv_dma_init pci_iommu_alloc;1414+1515+/*1616+ * This stuff has dual use!1717+ *1818+ * For a generic kernel, the macros are used to initialize the1919+ * platform's machvec structure. When compiling a non-generic kernel,2020+ * the macros are used directly.2121+ */2222+#define platform_name "dig_vtd"2323+#define platform_setup dig_setup2424+#define platform_dma_init pci_iommu_alloc2525+#define platform_dma_alloc_coherent vtd_alloc_coherent2626+#define platform_dma_free_coherent vtd_free_coherent2727+#define platform_dma_map_single_attrs vtd_map_single_attrs2828+#define platform_dma_unmap_single_attrs vtd_unmap_single_attrs2929+#define platform_dma_map_sg_attrs vtd_map_sg_attrs3030+#define platform_dma_unmap_sg_attrs vtd_unmap_sg_attrs3131+#define platform_dma_sync_single_for_cpu machvec_dma_sync_single3232+#define platform_dma_sync_sg_for_cpu machvec_dma_sync_sg3333+#define platform_dma_sync_single_for_device machvec_dma_sync_single3434+#define platform_dma_sync_sg_for_device machvec_dma_sync_sg3535+#define platform_dma_supported iommu_dma_supported3636+#define platform_dma_mapping_error vtd_dma_mapping_error3737+3838+#endif /* _ASM_IA64_MACHVEC_DIG_VTD_h */
···11+#ifndef ASM_IA64__SWIOTLB_H22+#define ASM_IA64__SWIOTLB_H33+44+#include <linux/dma-mapping.h>55+66+/* SWIOTLB interface */77+88+extern dma_addr_t swiotlb_map_single(struct device *hwdev, void *ptr,99+ size_t size, int dir);1010+extern void *swiotlb_alloc_coherent(struct device *hwdev, size_t size,1111+ dma_addr_t *dma_handle, gfp_t flags);1212+extern void swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr,1313+ size_t size, int dir);1414+extern void swiotlb_sync_single_for_cpu(struct device *hwdev,1515+ dma_addr_t dev_addr,1616+ size_t size, int dir);1717+extern void swiotlb_sync_single_for_device(struct device *hwdev,1818+ dma_addr_t dev_addr,1919+ size_t size, int dir);2020+extern void swiotlb_sync_single_range_for_cpu(struct device *hwdev,2121+ dma_addr_t dev_addr,2222+ unsigned long offset,2323+ size_t size, int dir);2424+extern void swiotlb_sync_single_range_for_device(struct device *hwdev,2525+ dma_addr_t dev_addr,2626+ unsigned long offset,2727+ size_t size, int dir);2828+extern void swiotlb_sync_sg_for_cpu(struct device *hwdev,2929+ struct scatterlist *sg, int nelems,3030+ int dir);3131+extern void swiotlb_sync_sg_for_device(struct device *hwdev,3232+ struct scatterlist *sg, int nelems,3333+ int dir);3434+extern int swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg,3535+ int nents, int direction);3636+extern void swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg,3737+ int nents, int direction);3838+extern int swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr);3939+extern void swiotlb_free_coherent(struct device *hwdev, size_t size,4040+ void *vaddr, dma_addr_t dma_handle);4141+extern int swiotlb_dma_supported(struct device *hwdev, u64 mask);4242+extern void swiotlb_init(void);4343+4444+extern int swiotlb_force;4545+4646+#ifdef CONFIG_SWIOTLB4747+extern int swiotlb;4848+extern void pci_swiotlb_init(void);4949+#else5050+#define swiotlb 05151+static inline void pci_swiotlb_init(void)5252+{5353+}5454+#endif5555+5656+#endif /* ASM_IA64__SWIOTLB_H */
+4
arch/ia64/kernel/Makefile
···4242ifneq ($(CONFIG_IA64_ESI),)4343obj-y += esi_stub.o # must be in kernel proper4444endif4545+obj-$(CONFIG_DMAR) += pci-dma.o4646+ifeq ($(CONFIG_DMAR), y)4747+obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o4848+endif45494650# The gate DSO image is built using a special linker script.4751targets += gate.so gate-syms.o
+17
arch/ia64/kernel/acpi.c
···9292 struct acpi_table_rsdp *rsdp;9393 struct acpi_table_xsdt *xsdt;9494 struct acpi_table_header *hdr;9595+#ifdef CONFIG_DMAR9696+ u64 i, nentries;9797+#endif95989699 rsdp_phys = acpi_find_rsdp();97100 if (!rsdp_phys) {···129126 return "xen";130127 }131128129129+#ifdef CONFIG_DMAR130130+ /* Look for Intel IOMMU */131131+ nentries = (hdr->length - sizeof(*hdr)) /132132+ sizeof(xsdt->table_offset_entry[0]);133133+ for (i = 0; i < nentries; i++) {134134+ hdr = __va(xsdt->table_offset_entry[i]);135135+ if (strncmp(hdr->signature, ACPI_SIG_DMAR,136136+ sizeof(ACPI_SIG_DMAR) - 1) == 0)137137+ return "dig_vtd";138138+ }139139+#endif140140+132141 return "dig";133142#else134143# if defined (CONFIG_IA64_HP_SIM)···157142 return "dig";158143# elif defined (CONFIG_IA64_XEN_GUEST)159144 return "xen";145145+# elif defined(CONFIG_IA64_DIG_VTD)146146+ return "dig_vtd";160147# else161148# error Unknown platform. Fix acpi.c.162149# endif
···11+/*22+ * Dynamic DMA mapping support.33+ */44+55+#include <linux/types.h>66+#include <linux/mm.h>77+#include <linux/string.h>88+#include <linux/pci.h>99+#include <linux/module.h>1010+#include <linux/dmar.h>1111+#include <asm/iommu.h>1212+#include <asm/machvec.h>1313+#include <linux/dma-mapping.h>1414+1515+#include <asm/machvec.h>1616+#include <asm/system.h>1717+1818+#ifdef CONFIG_DMAR1919+2020+#include <linux/kernel.h>2121+#include <linux/string.h>2222+2323+#include <asm/page.h>2424+#include <asm/iommu.h>2525+2626+dma_addr_t bad_dma_address __read_mostly;2727+EXPORT_SYMBOL(bad_dma_address);2828+2929+static int iommu_sac_force __read_mostly;3030+3131+int no_iommu __read_mostly;3232+#ifdef CONFIG_IOMMU_DEBUG3333+int force_iommu __read_mostly = 1;3434+#else3535+int force_iommu __read_mostly;3636+#endif3737+3838+/* Set this to 1 if there is a HW IOMMU in the system */3939+int iommu_detected __read_mostly;4040+4141+/* Dummy device used for NULL arguments (normally ISA). Better would4242+ be probably a smaller DMA mask, but this is bug-to-bug compatible4343+ to i386. */4444+struct device fallback_dev = {4545+ .bus_id = "fallback device",4646+ .coherent_dma_mask = DMA_32BIT_MASK,4747+ .dma_mask = &fallback_dev.coherent_dma_mask,4848+};4949+5050+void __init pci_iommu_alloc(void)5151+{5252+ /*5353+ * The order of these functions is important for5454+ * fall-back/fail-over reasons5555+ */5656+ detect_intel_iommu();5757+5858+#ifdef CONFIG_SWIOTLB5959+ pci_swiotlb_init();6060+#endif6161+}6262+6363+static int __init pci_iommu_init(void)6464+{6565+ if (iommu_detected)6666+ intel_iommu_init();6767+6868+ return 0;6969+}7070+7171+/* Must execute after PCI subsystem */7272+fs_initcall(pci_iommu_init);7373+7474+void pci_iommu_shutdown(void)7575+{7676+ return;7777+}7878+7979+void __init8080+iommu_dma_init(void)8181+{8282+ return;8383+}8484+8585+struct dma_mapping_ops *dma_ops;8686+EXPORT_SYMBOL(dma_ops);8787+8888+int iommu_dma_supported(struct device *dev, u64 mask)8989+{9090+ struct dma_mapping_ops *ops = get_dma_ops(dev);9191+9292+#ifdef CONFIG_PCI9393+ if (mask > 0xffffffff && forbid_dac > 0) {9494+ dev_info(dev, "Disallowing DAC for device\n");9595+ return 0;9696+ }9797+#endif9898+9999+ if (ops->dma_supported_op)100100+ return ops->dma_supported_op(dev, mask);101101+102102+ /* Copied from i386. Doesn't make much sense, because it will103103+ only work for pci_alloc_coherent.104104+ The caller just has to use GFP_DMA in this case. */105105+ if (mask < DMA_24BIT_MASK)106106+ return 0;107107+108108+ /* Tell the device to use SAC when IOMMU force is on. This109109+ allows the driver to use cheaper accesses in some cases.110110+111111+ Problem with this is that if we overflow the IOMMU area and112112+ return DAC as fallback address the device may not handle it113113+ correctly.114114+115115+ As a special case some controllers have a 39bit address116116+ mode that is as efficient as 32bit (aic79xx). Don't force117117+ SAC for these. Assume all masks <= 40 bits are of this118118+ type. Normally this doesn't make any difference, but gives119119+ more gentle handling of IOMMU overflow. */120120+ if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) {121121+ dev_info(dev, "Force SAC with mask %lx\n", mask);122122+ return 0;123123+ }124124+125125+ return 1;126126+}127127+EXPORT_SYMBOL(iommu_dma_supported);128128+129129+#endif
···116116 */117117#define I_CACHE_STRIDE_SHIFT 5 /* Safest way to go: 32 bytes by 32 bytes */118118unsigned long ia64_i_cache_stride_shift = ~0;119119+/*120120+ * "clflush_cache_range()" needs to know what processor dependent stride size to121121+ * use when it flushes cache lines including both d-cache and i-cache.122122+ */123123+/* Safest way to go: 32 bytes by 32 bytes */124124+#define CACHE_STRIDE_SHIFT 5125125+unsigned long ia64_cache_stride_shift = ~0;119126120127/*121128 * The merge_mask variable needs to be set to (max(iommu_page_size(iommu)) - 1). This···854847}855848856849/*857857- * Calculate the max. cache line size.850850+ * Do the following calculations:858851 *859859- * In addition, the minimum of the i-cache stride sizes is calculated for860860- * "flush_icache_range()".852852+ * 1. the max. cache line size.853853+ * 2. the minimum of the i-cache stride sizes for "flush_icache_range()".854854+ * 3. the minimum of the cache stride sizes for "clflush_cache_range()".861855 */862856static void __cpuinit863863-get_max_cacheline_size (void)857857+get_cache_info(void)864858{865859 unsigned long line_size, max = 1;866860 u64 l, levels, unique_caches;···875867 max = SMP_CACHE_BYTES;876868 /* Safest setup for "flush_icache_range()" */877869 ia64_i_cache_stride_shift = I_CACHE_STRIDE_SHIFT;870870+ /* Safest setup for "clflush_cache_range()" */871871+ ia64_cache_stride_shift = CACHE_STRIDE_SHIFT;878872 goto out;879873 }880874881875 for (l = 0; l < levels; ++l) {882882- status = ia64_pal_cache_config_info(l, /* cache_type (data_or_unified)= */ 2,883883- &cci);876876+ /* cache_type (data_or_unified)=2 */877877+ status = ia64_pal_cache_config_info(l, 2, &cci);884878 if (status != 0) {885879 printk(KERN_ERR886880 "%s: ia64_pal_cache_config_info(l=%lu, 2) failed (status=%ld)\n",···890880 max = SMP_CACHE_BYTES;891881 /* The safest setup for "flush_icache_range()" */892882 cci.pcci_stride = I_CACHE_STRIDE_SHIFT;883883+ /* The safest setup for "clflush_cache_range()" */884884+ ia64_cache_stride_shift = CACHE_STRIDE_SHIFT;893885 cci.pcci_unified = 1;886886+ } else {887887+ if (cci.pcci_stride < ia64_cache_stride_shift)888888+ ia64_cache_stride_shift = cci.pcci_stride;889889+890890+ line_size = 1 << cci.pcci_line_size;891891+ if (line_size > max)892892+ max = line_size;894893 }895895- line_size = 1 << cci.pcci_line_size;896896- if (line_size > max)897897- max = line_size;894894+898895 if (!cci.pcci_unified) {899899- status = ia64_pal_cache_config_info(l,900900- /* cache_type (instruction)= */ 1,901901- &cci);896896+ /* cache_type (instruction)=1*/897897+ status = ia64_pal_cache_config_info(l, 1, &cci);902898 if (status != 0) {903899 printk(KERN_ERR904900 "%s: ia64_pal_cache_config_info(l=%lu, 1) failed (status=%ld)\n",···958942 }959943#endif960944961961- get_max_cacheline_size();945945+ get_cache_info();962946963947 /*964948 * We can't pass "local_cpu_data" to identify_cpu() because we haven't called
+55
arch/ia64/lib/flush.S
···6060 mov ar.lc=r3 // restore ar.lc6161 br.ret.sptk.many rp6262END(flush_icache_range)6363+6464+ /*6565+ * clflush_cache_range(start,size)6666+ *6767+ * Flush cache lines from start to start+size-1.6868+ *6969+ * Must deal with range from start to start+size-1 but nothing else7070+ * (need to be careful not to touch addresses that may be7171+ * unmapped).7272+ *7373+ * Note: "in0" and "in1" are preserved for debugging purposes.7474+ */7575+ .section .kprobes.text,"ax"7676+GLOBAL_ENTRY(clflush_cache_range)7777+7878+ .prologue7979+ alloc r2=ar.pfs,2,0,0,08080+ movl r3=ia64_cache_stride_shift8181+ mov r21=18282+ add r22=in1,in08383+ ;;8484+ ld8 r20=[r3] // r20: stride shift8585+ sub r22=r22,r0,1 // last byte address8686+ ;;8787+ shr.u r23=in0,r20 // start / (stride size)8888+ shr.u r22=r22,r20 // (last byte address) / (stride size)8989+ shl r21=r21,r20 // r21: stride size of the i-cache(s)9090+ ;;9191+ sub r8=r22,r23 // number of strides - 19292+ shl r24=r23,r20 // r24: addresses for "fc" =9393+ // "start" rounded down to stride9494+ // boundary9595+ .save ar.lc,r39696+ mov r3=ar.lc // save ar.lc9797+ ;;9898+9999+ .body100100+ mov ar.lc=r8101101+ ;;102102+ /*103103+ * 32 byte aligned loop, even number of (actually 2) bundles104104+ */105105+.Loop_fc:106106+ fc r24 // issuable on M0 only107107+ add r24=r21,r24 // we flush "stride size" bytes per iteration108108+ nop.i 0109109+ br.cloop.sptk.few .Loop_fc110110+ ;;111111+ sync.i112112+ ;;113113+ srlz.i114114+ ;;115115+ mov ar.lc=r3 // restore ar.lc116116+ br.ret.sptk.many rp117117+END(clflush_cache_range)