Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'powerpc-5.13-2' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

Pull powerpc updates and fixes from Michael Ellerman:
"A bit of a mixture of things, tying up some loose ends.

There's the removal of the nvlink code, which dependend on a commit in
the vfio tree. Then the enablement of huge vmalloc which was in next
for a few weeks but got dropped due to conflicts. And there's also a
few fixes.

Summary:

- Remove the nvlink support now that it's only user has been removed.

- Enable huge vmalloc mappings for Radix MMU (P9).

- Fix KVM conversion to gfn-based MMU notifier callbacks.

- Fix a kexec/kdump crash with hot plugged CPUs.

- Fix boot failure on 32-bit with CONFIG_STACKPROTECTOR.

- Restore alphabetic order of the selects under CONFIG_PPC.

Thanks to: Christophe Leroy, Christoph Hellwig, Nicholas Piggin,
Sandipan Das, and Sourabh Jain"

* tag 'powerpc-5.13-2' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux:
KVM: PPC: Book3S HV: Fix conversion to gfn-based MMU notifier callbacks
powerpc/kconfig: Restore alphabetic order of the selects under CONFIG_PPC
powerpc/32: Fix boot failure with CONFIG_STACKPROTECTOR
powerpc/powernv/memtrace: Fix dcache flushing
powerpc/kexec_file: Use current CPU info while setting up FDT
powerpc/64s/radix: Enable huge vmalloc mappings
powerpc/powernv: remove the nvlink support

+180 -995
+2
Documentation/admin-guide/kernel-parameters.txt
··· 3282 3282 3283 3283 nohugeiomap [KNL,X86,PPC,ARM64] Disable kernel huge I/O mappings. 3284 3284 3285 + nohugevmalloc [PPC] Disable kernel huge vmalloc mappings. 3286 + 3285 3287 nosmt [KNL,S390] Disable symmetric multithreading (SMT). 3286 3288 Equivalent to smt=1. 3287 3289
+23 -22
arch/powerpc/Kconfig
··· 120 120 select ARCH_32BIT_OFF_T if PPC32 121 121 select ARCH_ENABLE_MEMORY_HOTPLUG 122 122 select ARCH_ENABLE_MEMORY_HOTREMOVE 123 + select ARCH_HAS_COPY_MC if PPC64 123 124 select ARCH_HAS_DEBUG_VIRTUAL 124 125 select ARCH_HAS_DEBUG_VM_PGTABLE 125 126 select ARCH_HAS_DEVMEM_IS_ALLOWED 127 + select ARCH_HAS_DMA_MAP_DIRECT if PPC_PSERIES 126 128 select ARCH_HAS_ELF_RANDOMIZE 127 129 select ARCH_HAS_FORTIFY_SOURCE 128 130 select ARCH_HAS_GCOV_PROFILE_ALL 129 - select ARCH_HAS_KCOV 130 131 select ARCH_HAS_HUGEPD if HUGETLB_PAGE 131 - select ARCH_HAS_MEMREMAP_COMPAT_ALIGN 132 - select ARCH_HAS_MMIOWB if PPC64 133 - select ARCH_HAS_PHYS_TO_DMA 134 - select ARCH_HAS_PMEM_API 135 - select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE 136 - select ARCH_HAS_PTE_DEVMAP if PPC_BOOK3S_64 137 - select ARCH_HAS_PTE_SPECIAL 132 + select ARCH_HAS_KCOV 138 133 select ARCH_HAS_MEMBARRIER_CALLBACKS 139 134 select ARCH_HAS_MEMBARRIER_SYNC_CORE 135 + select ARCH_HAS_MEMREMAP_COMPAT_ALIGN 136 + select ARCH_HAS_MMIOWB if PPC64 137 + select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE 138 + select ARCH_HAS_PHYS_TO_DMA 139 + select ARCH_HAS_PMEM_API 140 + select ARCH_HAS_PTE_DEVMAP if PPC_BOOK3S_64 141 + select ARCH_HAS_PTE_SPECIAL 140 142 select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64 141 143 select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !HIBERNATION) 142 144 select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST 143 145 select ARCH_HAS_UACCESS_FLUSHCACHE 144 - select ARCH_HAS_COPY_MC if PPC64 145 146 select ARCH_HAS_UBSAN_SANITIZE_ALL 146 147 select ARCH_HAVE_NMI_SAFE_CMPXCHG 147 148 select ARCH_KEEP_MEMBLOCK ··· 165 164 select BUILDTIME_TABLE_SORT 166 165 select CLONE_BACKWARDS 167 166 select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN 168 - select DMA_OPS if PPC64 169 167 select DMA_OPS_BYPASS if PPC64 170 - select ARCH_HAS_DMA_MAP_DIRECT if PPC64 && PPC_PSERIES 168 + select DMA_OPS if PPC64 171 169 select DYNAMIC_FTRACE if FUNCTION_TRACER 172 170 select EDAC_ATOMIC_SCRUB 173 171 select EDAC_SUPPORT ··· 186 186 select GENERIC_TIME_VSYSCALL 187 187 select GENERIC_VDSO_TIME_NS 188 188 select HAVE_ARCH_AUDITSYSCALL 189 + select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP 189 190 select HAVE_ARCH_HUGE_VMAP if PPC_BOOK3S_64 && PPC_RADIX_MMU 190 191 select HAVE_ARCH_JUMP_LABEL 191 192 select HAVE_ARCH_JUMP_LABEL_RELATIVE 192 193 select HAVE_ARCH_KASAN if PPC32 && PPC_PAGE_SHIFT <= 14 193 194 select HAVE_ARCH_KASAN_VMALLOC if PPC32 && PPC_PAGE_SHIFT <= 14 194 - select HAVE_ARCH_KGDB 195 195 select HAVE_ARCH_KFENCE if PPC32 196 + select HAVE_ARCH_KGDB 196 197 select HAVE_ARCH_MMAP_RND_BITS 197 198 select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT 198 199 select HAVE_ARCH_NVRAM_OPS 199 200 select HAVE_ARCH_SECCOMP_FILTER 200 201 select HAVE_ARCH_TRACEHOOK 201 202 select HAVE_ASM_MODVERSIONS 202 - select HAVE_C_RECORDMCOUNT 203 - select HAVE_STACKPROTECTOR if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13) 204 - select HAVE_STACKPROTECTOR if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2) 205 203 select HAVE_CONTEXT_TRACKING if PPC64 204 + select HAVE_C_RECORDMCOUNT 206 205 select HAVE_DEBUG_KMEMLEAK 207 206 select HAVE_DEBUG_STACKOVERFLOW 208 207 select HAVE_DYNAMIC_FTRACE ··· 215 216 select HAVE_FUNCTION_TRACER 216 217 select HAVE_GCC_PLUGINS if GCC_VERSION >= 50200 # plugin support on gcc <= 5.1 is buggy on PPC 217 218 select HAVE_GENERIC_VDSO 219 + select HAVE_HARDLOCKUP_DETECTOR_ARCH if PPC_BOOK3S_64 && SMP 220 + select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH 218 221 select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx) 219 222 select HAVE_IDE 220 223 select HAVE_IOREMAP_PROT 221 224 select HAVE_IRQ_EXIT_ON_IRQ_STACK 225 + select HAVE_IRQ_TIME_ACCOUNTING 222 226 select HAVE_KERNEL_GZIP 223 227 select HAVE_KERNEL_LZMA if DEFAULT_UIMAGE 224 228 select HAVE_KERNEL_LZO if DEFAULT_UIMAGE ··· 233 231 select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS 234 232 select HAVE_MOD_ARCH_SPECIFIC 235 233 select HAVE_NMI if PERF_EVENTS || (PPC64 && PPC_BOOK3S) 236 - select HAVE_HARDLOCKUP_DETECTOR_ARCH if PPC64 && PPC_BOOK3S && SMP 237 234 select HAVE_OPTPROBES 238 235 select HAVE_PERF_EVENTS 239 236 select HAVE_PERF_EVENTS_NMI if PPC64 240 - select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH 241 237 select HAVE_PERF_REGS 242 238 select HAVE_PERF_USER_STACK_DUMP 243 - select HUGETLB_PAGE_SIZE_VARIABLE if PPC_BOOK3S_64 && HUGETLB_PAGE 244 - select MMU_GATHER_RCU_TABLE_FREE 245 - select MMU_GATHER_PAGE_SIZE 246 239 select HAVE_REGS_AND_STACK_ACCESS_API 247 240 select HAVE_RELIABLE_STACKTRACE 241 + select HAVE_RSEQ 248 242 select HAVE_SOFTIRQ_ON_OWN_STACK 243 + select HAVE_STACKPROTECTOR if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2) 244 + select HAVE_STACKPROTECTOR if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13) 249 245 select HAVE_SYSCALL_TRACEPOINTS 250 246 select HAVE_VIRT_CPU_ACCOUNTING 251 - select HAVE_IRQ_TIME_ACCOUNTING 252 - select HAVE_RSEQ 247 + select HUGETLB_PAGE_SIZE_VARIABLE if PPC_BOOK3S_64 && HUGETLB_PAGE 253 248 select IOMMU_HELPER if PPC64 254 249 select IRQ_DOMAIN 255 250 select IRQ_FORCED_THREADING 251 + select MMU_GATHER_PAGE_SIZE 252 + select MMU_GATHER_RCU_TABLE_FREE 256 253 select MODULES_USE_ELF_RELA 257 254 select NEED_DMA_MAP_STATE if PPC64 || NOT_COHERENT_CACHE 258 255 select NEED_SG_DMA_LENGTH
+1 -1
arch/powerpc/include/asm/kvm_book3s.h
··· 210 210 unsigned int lpid); 211 211 extern int kvmppc_radix_init(void); 212 212 extern void kvmppc_radix_exit(void); 213 - extern bool kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, 213 + extern void kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, 214 214 unsigned long gfn); 215 215 extern bool kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, 216 216 unsigned long gfn);
-3
arch/powerpc/include/asm/opal.h
··· 28 28 29 29 /* API functions */ 30 30 int64_t opal_invalid_call(void); 31 - int64_t opal_npu_destroy_context(uint64_t phb_id, uint64_t pid, uint64_t bdf); 32 - int64_t opal_npu_init_context(uint64_t phb_id, int pasid, uint64_t msr, 33 - uint64_t bdf); 34 31 int64_t opal_npu_map_lpar(uint64_t phb_id, uint64_t bdf, uint64_t lparid, 35 32 uint64_t lpcr); 36 33 int64_t opal_npu_spa_setup(uint64_t phb_id, uint32_t bdfn,
-1
arch/powerpc/include/asm/pci-bridge.h
··· 126 126 #endif /* CONFIG_PPC64 */ 127 127 128 128 void *private_data; 129 - struct npu *npu; 130 129 }; 131 130 132 131 /* These are used for config access before all the PCI probing
-7
arch/powerpc/include/asm/pci.h
··· 119 119 120 120 #endif /* __KERNEL__ */ 121 121 122 - extern struct pci_dev *pnv_pci_get_gpu_dev(struct pci_dev *npdev); 123 - extern struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index); 124 - extern int pnv_npu2_init(struct pci_controller *hose); 125 - extern int pnv_npu2_map_lpar_dev(struct pci_dev *gpdev, unsigned int lparid, 126 - unsigned long msr); 127 - extern int pnv_npu2_unmap_lpar_dev(struct pci_dev *gpdev); 128 - 129 122 #endif /* __ASM_POWERPC_PCI_H */
+13 -5
arch/powerpc/kernel/module.c
··· 8 8 #include <linux/moduleloader.h> 9 9 #include <linux/err.h> 10 10 #include <linux/vmalloc.h> 11 + #include <linux/mm.h> 11 12 #include <linux/bug.h> 12 13 #include <asm/module.h> 13 14 #include <linux/uaccess.h> ··· 89 88 return 0; 90 89 } 91 90 92 - #ifdef MODULES_VADDR 93 91 static __always_inline void * 94 92 __module_alloc(unsigned long size, unsigned long start, unsigned long end) 95 93 { 96 - return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL, 97 - PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, 98 - __builtin_return_address(0)); 94 + /* 95 + * Don't do huge page allocations for modules yet until more testing 96 + * is done. STRICT_MODULE_RWX may require extra work to support this 97 + * too. 98 + */ 99 + return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL, PAGE_KERNEL_EXEC, 100 + VM_FLUSH_RESET_PERMS | VM_NO_HUGE_VMAP, 101 + NUMA_NO_NODE, __builtin_return_address(0)); 99 102 } 100 103 101 104 void *module_alloc(unsigned long size) 102 105 { 106 + #ifdef MODULES_VADDR 103 107 unsigned long limit = (unsigned long)_etext - SZ_32M; 104 108 void *ptr = NULL; 105 109 ··· 118 112 ptr = __module_alloc(size, MODULES_VADDR, MODULES_END); 119 113 120 114 return ptr; 121 - } 115 + #else 116 + return __module_alloc(size, VMALLOC_START, VMALLOC_END); 122 117 #endif 118 + }
+92
arch/powerpc/kexec/file_load_64.c
··· 951 951 } 952 952 953 953 /** 954 + * add_node_props - Reads node properties from device node structure and add 955 + * them to fdt. 956 + * @fdt: Flattened device tree of the kernel 957 + * @node_offset: offset of the node to add a property at 958 + * @dn: device node pointer 959 + * 960 + * Returns 0 on success, negative errno on error. 961 + */ 962 + static int add_node_props(void *fdt, int node_offset, const struct device_node *dn) 963 + { 964 + int ret = 0; 965 + struct property *pp; 966 + 967 + if (!dn) 968 + return -EINVAL; 969 + 970 + for_each_property_of_node(dn, pp) { 971 + ret = fdt_setprop(fdt, node_offset, pp->name, pp->value, pp->length); 972 + if (ret < 0) { 973 + pr_err("Unable to add %s property: %s\n", pp->name, fdt_strerror(ret)); 974 + return ret; 975 + } 976 + } 977 + return ret; 978 + } 979 + 980 + /** 981 + * update_cpus_node - Update cpus node of flattened device tree using of_root 982 + * device node. 983 + * @fdt: Flattened device tree of the kernel. 984 + * 985 + * Returns 0 on success, negative errno on error. 986 + */ 987 + static int update_cpus_node(void *fdt) 988 + { 989 + struct device_node *cpus_node, *dn; 990 + int cpus_offset, cpus_subnode_offset, ret = 0; 991 + 992 + cpus_offset = fdt_path_offset(fdt, "/cpus"); 993 + if (cpus_offset < 0 && cpus_offset != -FDT_ERR_NOTFOUND) { 994 + pr_err("Malformed device tree: error reading /cpus node: %s\n", 995 + fdt_strerror(cpus_offset)); 996 + return cpus_offset; 997 + } 998 + 999 + if (cpus_offset > 0) { 1000 + ret = fdt_del_node(fdt, cpus_offset); 1001 + if (ret < 0) { 1002 + pr_err("Error deleting /cpus node: %s\n", fdt_strerror(ret)); 1003 + return -EINVAL; 1004 + } 1005 + } 1006 + 1007 + /* Add cpus node to fdt */ 1008 + cpus_offset = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"), "cpus"); 1009 + if (cpus_offset < 0) { 1010 + pr_err("Error creating /cpus node: %s\n", fdt_strerror(cpus_offset)); 1011 + return -EINVAL; 1012 + } 1013 + 1014 + /* Add cpus node properties */ 1015 + cpus_node = of_find_node_by_path("/cpus"); 1016 + ret = add_node_props(fdt, cpus_offset, cpus_node); 1017 + of_node_put(cpus_node); 1018 + if (ret < 0) 1019 + return ret; 1020 + 1021 + /* Loop through all subnodes of cpus and add them to fdt */ 1022 + for_each_node_by_type(dn, "cpu") { 1023 + cpus_subnode_offset = fdt_add_subnode(fdt, cpus_offset, dn->full_name); 1024 + if (cpus_subnode_offset < 0) { 1025 + pr_err("Unable to add %s subnode: %s\n", dn->full_name, 1026 + fdt_strerror(cpus_subnode_offset)); 1027 + ret = cpus_subnode_offset; 1028 + goto out; 1029 + } 1030 + 1031 + ret = add_node_props(fdt, cpus_subnode_offset, dn); 1032 + if (ret < 0) 1033 + goto out; 1034 + } 1035 + out: 1036 + of_node_put(dn); 1037 + return ret; 1038 + } 1039 + 1040 + /** 954 1041 * setup_new_fdt_ppc64 - Update the flattend device-tree of the kernel 955 1042 * being loaded. 956 1043 * @image: kexec image being loaded. ··· 1092 1005 goto out; 1093 1006 } 1094 1007 } 1008 + 1009 + /* Update cpus nodes information to account hotplug CPUs. */ 1010 + ret = update_cpus_node(fdt); 1011 + if (ret < 0) 1012 + goto out; 1095 1013 1096 1014 /* Update memory reserve map */ 1097 1015 ret = get_reserved_memory_ranges(&rmem);
+34 -14
arch/powerpc/kvm/book3s_64_mmu_hv.c
··· 795 795 } 796 796 } 797 797 798 - static bool kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, 798 + static void kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, 799 799 unsigned long gfn) 800 800 { 801 801 unsigned long i; ··· 829 829 unlock_rmap(rmapp); 830 830 __unlock_hpte(hptep, be64_to_cpu(hptep[0])); 831 831 } 832 - return false; 833 832 } 834 833 835 834 bool kvm_unmap_gfn_range_hv(struct kvm *kvm, struct kvm_gfn_range *range) 836 835 { 837 - if (kvm_is_radix(kvm)) 838 - return kvm_unmap_radix(kvm, range->slot, range->start); 836 + gfn_t gfn; 839 837 840 - return kvm_unmap_rmapp(kvm, range->slot, range->start); 838 + if (kvm_is_radix(kvm)) { 839 + for (gfn = range->start; gfn < range->end; gfn++) 840 + kvm_unmap_radix(kvm, range->slot, gfn); 841 + } else { 842 + for (gfn = range->start; gfn < range->end; gfn++) 843 + kvm_unmap_rmapp(kvm, range->slot, range->start); 844 + } 845 + 846 + return false; 841 847 } 842 848 843 849 void kvmppc_core_flush_memslot_hv(struct kvm *kvm, ··· 930 924 931 925 bool kvm_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range) 932 926 { 933 - if (kvm_is_radix(kvm)) 934 - kvm_age_radix(kvm, range->slot, range->start); 927 + gfn_t gfn; 928 + bool ret = false; 935 929 936 - return kvm_age_rmapp(kvm, range->slot, range->start); 930 + if (kvm_is_radix(kvm)) { 931 + for (gfn = range->start; gfn < range->end; gfn++) 932 + ret |= kvm_age_radix(kvm, range->slot, gfn); 933 + } else { 934 + for (gfn = range->start; gfn < range->end; gfn++) 935 + ret |= kvm_age_rmapp(kvm, range->slot, gfn); 936 + } 937 + 938 + return ret; 937 939 } 938 940 939 941 static bool kvm_test_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, ··· 979 965 980 966 bool kvm_test_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range) 981 967 { 982 - if (kvm_is_radix(kvm)) 983 - kvm_test_age_radix(kvm, range->slot, range->start); 968 + WARN_ON(range->start + 1 != range->end); 984 969 985 - return kvm_test_age_rmapp(kvm, range->slot, range->start); 970 + if (kvm_is_radix(kvm)) 971 + return kvm_test_age_radix(kvm, range->slot, range->start); 972 + else 973 + return kvm_test_age_rmapp(kvm, range->slot, range->start); 986 974 } 987 975 988 976 bool kvm_set_spte_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range) 989 977 { 990 - if (kvm_is_radix(kvm)) 991 - return kvm_unmap_radix(kvm, range->slot, range->start); 978 + WARN_ON(range->start + 1 != range->end); 992 979 993 - return kvm_unmap_rmapp(kvm, range->slot, range->start); 980 + if (kvm_is_radix(kvm)) 981 + kvm_unmap_radix(kvm, range->slot, range->start); 982 + else 983 + kvm_unmap_rmapp(kvm, range->slot, range->start); 984 + 985 + return false; 994 986 } 995 987 996 988 static int vcpus_running(struct kvm *kvm)
+2 -3
arch/powerpc/kvm/book3s_64_mmu_radix.c
··· 993 993 } 994 994 995 995 /* Called with kvm->mmu_lock held */ 996 - bool kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, 996 + void kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, 997 997 unsigned long gfn) 998 998 { 999 999 pte_t *ptep; ··· 1002 1002 1003 1003 if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) { 1004 1004 uv_page_inval(kvm->arch.lpid, gpa, PAGE_SHIFT); 1005 - return false; 1005 + return; 1006 1006 } 1007 1007 1008 1008 ptep = find_kvm_secondary_pte(kvm, gpa, &shift); 1009 1009 if (ptep && pte_present(*ptep)) 1010 1010 kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot, 1011 1011 kvm->arch.lpid); 1012 - return false; 1013 1012 } 1014 1013 1015 1014 /* Called with kvm->mmu_lock held */
+3
arch/powerpc/lib/Makefile
··· 5 5 6 6 ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) 7 7 8 + CFLAGS_code-patching.o += -fno-stack-protector 9 + CFLAGS_feature-fixups.o += -fno-stack-protector 10 + 8 11 CFLAGS_REMOVE_code-patching.o = $(CC_FLAGS_FTRACE) 9 12 CFLAGS_REMOVE_feature-fixups.o = $(CC_FLAGS_FTRACE) 10 13
+1 -1
arch/powerpc/platforms/powernv/Makefile
··· 10 10 obj-$(CONFIG_FA_DUMP) += opal-fadump.o 11 11 obj-$(CONFIG_PRESERVE_FA_DUMP) += opal-fadump.o 12 12 obj-$(CONFIG_OPAL_CORE) += opal-core.o 13 - obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o pci-ioda-tce.o 13 + obj-$(CONFIG_PCI) += pci.o pci-ioda.o pci-ioda-tce.o 14 14 obj-$(CONFIG_PCI_IOV) += pci-sriov.o 15 15 obj-$(CONFIG_CXL_BASE) += pci-cxl.o 16 16 obj-$(CONFIG_EEH) += eeh-powernv.o
+2 -2
arch/powerpc/platforms/powernv/memtrace.c
··· 104 104 * Before we go ahead and use this range as cache inhibited range 105 105 * flush the cache. 106 106 */ 107 - flush_dcache_range_chunked(PFN_PHYS(start_pfn), 108 - PFN_PHYS(start_pfn + nr_pages), 107 + flush_dcache_range_chunked((unsigned long)pfn_to_kaddr(start_pfn), 108 + (unsigned long)pfn_to_kaddr(start_pfn + nr_pages), 109 109 FLUSH_CHUNK_SIZE); 110 110 } 111 111
-705
arch/powerpc/platforms/powernv/npu-dma.c
··· 1 - // SPDX-License-Identifier: GPL-2.0-only 2 - /* 3 - * This file implements the DMA operations for NVLink devices. The NPU 4 - * devices all point to the same iommu table as the parent PCI device. 5 - * 6 - * Copyright Alistair Popple, IBM Corporation 2015. 7 - */ 8 - 9 - #include <linux/mmu_notifier.h> 10 - #include <linux/mmu_context.h> 11 - #include <linux/of.h> 12 - #include <linux/pci.h> 13 - #include <linux/memblock.h> 14 - #include <linux/sizes.h> 15 - 16 - #include <asm/debugfs.h> 17 - #include <asm/powernv.h> 18 - #include <asm/ppc-pci.h> 19 - #include <asm/opal.h> 20 - 21 - #include "pci.h" 22 - 23 - static struct pci_dev *get_pci_dev(struct device_node *dn) 24 - { 25 - struct pci_dn *pdn = PCI_DN(dn); 26 - struct pci_dev *pdev; 27 - 28 - pdev = pci_get_domain_bus_and_slot(pci_domain_nr(pdn->phb->bus), 29 - pdn->busno, pdn->devfn); 30 - 31 - /* 32 - * pci_get_domain_bus_and_slot() increased the reference count of 33 - * the PCI device, but callers don't need that actually as the PE 34 - * already holds a reference to the device. Since callers aren't 35 - * aware of the reference count change, call pci_dev_put() now to 36 - * avoid leaks. 37 - */ 38 - if (pdev) 39 - pci_dev_put(pdev); 40 - 41 - return pdev; 42 - } 43 - 44 - /* Given a NPU device get the associated PCI device. */ 45 - struct pci_dev *pnv_pci_get_gpu_dev(struct pci_dev *npdev) 46 - { 47 - struct device_node *dn; 48 - struct pci_dev *gpdev; 49 - 50 - if (WARN_ON(!npdev)) 51 - return NULL; 52 - 53 - if (WARN_ON(!npdev->dev.of_node)) 54 - return NULL; 55 - 56 - /* Get assoicated PCI device */ 57 - dn = of_parse_phandle(npdev->dev.of_node, "ibm,gpu", 0); 58 - if (!dn) 59 - return NULL; 60 - 61 - gpdev = get_pci_dev(dn); 62 - of_node_put(dn); 63 - 64 - return gpdev; 65 - } 66 - EXPORT_SYMBOL(pnv_pci_get_gpu_dev); 67 - 68 - /* Given the real PCI device get a linked NPU device. */ 69 - struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index) 70 - { 71 - struct device_node *dn; 72 - struct pci_dev *npdev; 73 - 74 - if (WARN_ON(!gpdev)) 75 - return NULL; 76 - 77 - /* Not all PCI devices have device-tree nodes */ 78 - if (!gpdev->dev.of_node) 79 - return NULL; 80 - 81 - /* Get assoicated PCI device */ 82 - dn = of_parse_phandle(gpdev->dev.of_node, "ibm,npu", index); 83 - if (!dn) 84 - return NULL; 85 - 86 - npdev = get_pci_dev(dn); 87 - of_node_put(dn); 88 - 89 - return npdev; 90 - } 91 - EXPORT_SYMBOL(pnv_pci_get_npu_dev); 92 - 93 - #ifdef CONFIG_IOMMU_API 94 - /* 95 - * Returns the PE assoicated with the PCI device of the given 96 - * NPU. Returns the linked pci device if pci_dev != NULL. 97 - */ 98 - static struct pnv_ioda_pe *get_gpu_pci_dev_and_pe(struct pnv_ioda_pe *npe, 99 - struct pci_dev **gpdev) 100 - { 101 - struct pnv_phb *phb; 102 - struct pci_controller *hose; 103 - struct pci_dev *pdev; 104 - struct pnv_ioda_pe *pe; 105 - struct pci_dn *pdn; 106 - 107 - pdev = pnv_pci_get_gpu_dev(npe->pdev); 108 - if (!pdev) 109 - return NULL; 110 - 111 - pdn = pci_get_pdn(pdev); 112 - if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE)) 113 - return NULL; 114 - 115 - hose = pci_bus_to_host(pdev->bus); 116 - phb = hose->private_data; 117 - pe = &phb->ioda.pe_array[pdn->pe_number]; 118 - 119 - if (gpdev) 120 - *gpdev = pdev; 121 - 122 - return pe; 123 - } 124 - 125 - static long pnv_npu_unset_window(struct iommu_table_group *table_group, 126 - int num); 127 - 128 - static long pnv_npu_set_window(struct iommu_table_group *table_group, int num, 129 - struct iommu_table *tbl) 130 - { 131 - struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe, 132 - table_group); 133 - struct pnv_phb *phb = npe->phb; 134 - int64_t rc; 135 - const unsigned long size = tbl->it_indirect_levels ? 136 - tbl->it_level_size : tbl->it_size; 137 - const __u64 start_addr = tbl->it_offset << tbl->it_page_shift; 138 - const __u64 win_size = tbl->it_size << tbl->it_page_shift; 139 - int num2 = (num == 0) ? 1 : 0; 140 - 141 - /* NPU has just one TVE so if there is another table, remove it first */ 142 - if (npe->table_group.tables[num2]) 143 - pnv_npu_unset_window(&npe->table_group, num2); 144 - 145 - pe_info(npe, "Setting up window %llx..%llx pg=%lx\n", 146 - start_addr, start_addr + win_size - 1, 147 - IOMMU_PAGE_SIZE(tbl)); 148 - 149 - rc = opal_pci_map_pe_dma_window(phb->opal_id, 150 - npe->pe_number, 151 - npe->pe_number, 152 - tbl->it_indirect_levels + 1, 153 - __pa(tbl->it_base), 154 - size << 3, 155 - IOMMU_PAGE_SIZE(tbl)); 156 - if (rc) { 157 - pe_err(npe, "Failed to configure TCE table, err %lld\n", rc); 158 - return rc; 159 - } 160 - pnv_pci_ioda2_tce_invalidate_entire(phb, false); 161 - 162 - /* Add the table to the list so its TCE cache will get invalidated */ 163 - pnv_pci_link_table_and_group(phb->hose->node, num, 164 - tbl, &npe->table_group); 165 - 166 - return 0; 167 - } 168 - 169 - static long pnv_npu_unset_window(struct iommu_table_group *table_group, int num) 170 - { 171 - struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe, 172 - table_group); 173 - struct pnv_phb *phb = npe->phb; 174 - int64_t rc; 175 - 176 - if (!npe->table_group.tables[num]) 177 - return 0; 178 - 179 - pe_info(npe, "Removing DMA window\n"); 180 - 181 - rc = opal_pci_map_pe_dma_window(phb->opal_id, npe->pe_number, 182 - npe->pe_number, 183 - 0/* levels */, 0/* table address */, 184 - 0/* table size */, 0/* page size */); 185 - if (rc) { 186 - pe_err(npe, "Unmapping failed, ret = %lld\n", rc); 187 - return rc; 188 - } 189 - pnv_pci_ioda2_tce_invalidate_entire(phb, false); 190 - 191 - pnv_pci_unlink_table_and_group(npe->table_group.tables[num], 192 - &npe->table_group); 193 - 194 - return 0; 195 - } 196 - 197 - /* Switch ownership from platform code to external user (e.g. VFIO) */ 198 - static void pnv_npu_take_ownership(struct iommu_table_group *table_group) 199 - { 200 - struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe, 201 - table_group); 202 - struct pnv_phb *phb = npe->phb; 203 - int64_t rc; 204 - struct pci_dev *gpdev = NULL; 205 - 206 - /* 207 - * Note: NPU has just a single TVE in the hardware which means that 208 - * while used by the kernel, it can have either 32bit window or 209 - * DMA bypass but never both. So we deconfigure 32bit window only 210 - * if it was enabled at the moment of ownership change. 211 - */ 212 - if (npe->table_group.tables[0]) { 213 - pnv_npu_unset_window(&npe->table_group, 0); 214 - return; 215 - } 216 - 217 - /* Disable bypass */ 218 - rc = opal_pci_map_pe_dma_window_real(phb->opal_id, 219 - npe->pe_number, npe->pe_number, 220 - 0 /* bypass base */, 0); 221 - if (rc) { 222 - pe_err(npe, "Failed to disable bypass, err %lld\n", rc); 223 - return; 224 - } 225 - pnv_pci_ioda2_tce_invalidate_entire(npe->phb, false); 226 - 227 - get_gpu_pci_dev_and_pe(npe, &gpdev); 228 - if (gpdev) 229 - pnv_npu2_unmap_lpar_dev(gpdev); 230 - } 231 - 232 - static void pnv_npu_release_ownership(struct iommu_table_group *table_group) 233 - { 234 - struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe, 235 - table_group); 236 - struct pci_dev *gpdev = NULL; 237 - 238 - get_gpu_pci_dev_and_pe(npe, &gpdev); 239 - if (gpdev) 240 - pnv_npu2_map_lpar_dev(gpdev, 0, MSR_DR | MSR_PR | MSR_HV); 241 - } 242 - 243 - static struct iommu_table_group_ops pnv_pci_npu_ops = { 244 - .set_window = pnv_npu_set_window, 245 - .unset_window = pnv_npu_unset_window, 246 - .take_ownership = pnv_npu_take_ownership, 247 - .release_ownership = pnv_npu_release_ownership, 248 - }; 249 - #endif /* !CONFIG_IOMMU_API */ 250 - 251 - /* 252 - * NPU2 ATS 253 - */ 254 - /* Maximum possible number of ATSD MMIO registers per NPU */ 255 - #define NV_NMMU_ATSD_REGS 8 256 - #define NV_NPU_MAX_PE_NUM 16 257 - 258 - /* 259 - * A compound NPU IOMMU group which might consist of 1 GPU + 2xNPUs (POWER8) or 260 - * up to 3 x (GPU + 2xNPUs) (POWER9). 261 - */ 262 - struct npu_comp { 263 - struct iommu_table_group table_group; 264 - int pe_num; 265 - struct pnv_ioda_pe *pe[NV_NPU_MAX_PE_NUM]; 266 - }; 267 - 268 - /* An NPU descriptor, valid for POWER9 only */ 269 - struct npu { 270 - int index; 271 - struct npu_comp npucomp; 272 - }; 273 - 274 - #ifdef CONFIG_IOMMU_API 275 - static long pnv_npu_peers_create_table_userspace( 276 - struct iommu_table_group *table_group, 277 - int num, __u32 page_shift, __u64 window_size, __u32 levels, 278 - struct iommu_table **ptbl) 279 - { 280 - struct npu_comp *npucomp = container_of(table_group, struct npu_comp, 281 - table_group); 282 - 283 - if (!npucomp->pe_num || !npucomp->pe[0] || 284 - !npucomp->pe[0]->table_group.ops || 285 - !npucomp->pe[0]->table_group.ops->create_table) 286 - return -EFAULT; 287 - 288 - return npucomp->pe[0]->table_group.ops->create_table( 289 - &npucomp->pe[0]->table_group, num, page_shift, 290 - window_size, levels, ptbl); 291 - } 292 - 293 - static long pnv_npu_peers_set_window(struct iommu_table_group *table_group, 294 - int num, struct iommu_table *tbl) 295 - { 296 - int i, j; 297 - long ret = 0; 298 - struct npu_comp *npucomp = container_of(table_group, struct npu_comp, 299 - table_group); 300 - 301 - for (i = 0; i < npucomp->pe_num; ++i) { 302 - struct pnv_ioda_pe *pe = npucomp->pe[i]; 303 - 304 - if (!pe->table_group.ops->set_window) 305 - continue; 306 - 307 - ret = pe->table_group.ops->set_window(&pe->table_group, 308 - num, tbl); 309 - if (ret) 310 - break; 311 - } 312 - 313 - if (ret) { 314 - for (j = 0; j < i; ++j) { 315 - struct pnv_ioda_pe *pe = npucomp->pe[j]; 316 - 317 - if (!pe->table_group.ops->unset_window) 318 - continue; 319 - 320 - ret = pe->table_group.ops->unset_window( 321 - &pe->table_group, num); 322 - if (ret) 323 - break; 324 - } 325 - } else { 326 - table_group->tables[num] = iommu_tce_table_get(tbl); 327 - } 328 - 329 - return ret; 330 - } 331 - 332 - static long pnv_npu_peers_unset_window(struct iommu_table_group *table_group, 333 - int num) 334 - { 335 - int i, j; 336 - long ret = 0; 337 - struct npu_comp *npucomp = container_of(table_group, struct npu_comp, 338 - table_group); 339 - 340 - for (i = 0; i < npucomp->pe_num; ++i) { 341 - struct pnv_ioda_pe *pe = npucomp->pe[i]; 342 - 343 - WARN_ON(npucomp->table_group.tables[num] != 344 - table_group->tables[num]); 345 - if (!npucomp->table_group.tables[num]) 346 - continue; 347 - 348 - if (!pe->table_group.ops->unset_window) 349 - continue; 350 - 351 - ret = pe->table_group.ops->unset_window(&pe->table_group, num); 352 - if (ret) 353 - break; 354 - } 355 - 356 - if (ret) { 357 - for (j = 0; j < i; ++j) { 358 - struct pnv_ioda_pe *pe = npucomp->pe[j]; 359 - 360 - if (!npucomp->table_group.tables[num]) 361 - continue; 362 - 363 - if (!pe->table_group.ops->set_window) 364 - continue; 365 - 366 - ret = pe->table_group.ops->set_window(&pe->table_group, 367 - num, table_group->tables[num]); 368 - if (ret) 369 - break; 370 - } 371 - } else if (table_group->tables[num]) { 372 - iommu_tce_table_put(table_group->tables[num]); 373 - table_group->tables[num] = NULL; 374 - } 375 - 376 - return ret; 377 - } 378 - 379 - static void pnv_npu_peers_take_ownership(struct iommu_table_group *table_group) 380 - { 381 - int i; 382 - struct npu_comp *npucomp = container_of(table_group, struct npu_comp, 383 - table_group); 384 - 385 - for (i = 0; i < npucomp->pe_num; ++i) { 386 - struct pnv_ioda_pe *pe = npucomp->pe[i]; 387 - 388 - if (!pe->table_group.ops || 389 - !pe->table_group.ops->take_ownership) 390 - continue; 391 - pe->table_group.ops->take_ownership(&pe->table_group); 392 - } 393 - } 394 - 395 - static void pnv_npu_peers_release_ownership( 396 - struct iommu_table_group *table_group) 397 - { 398 - int i; 399 - struct npu_comp *npucomp = container_of(table_group, struct npu_comp, 400 - table_group); 401 - 402 - for (i = 0; i < npucomp->pe_num; ++i) { 403 - struct pnv_ioda_pe *pe = npucomp->pe[i]; 404 - 405 - if (!pe->table_group.ops || 406 - !pe->table_group.ops->release_ownership) 407 - continue; 408 - pe->table_group.ops->release_ownership(&pe->table_group); 409 - } 410 - } 411 - 412 - static struct iommu_table_group_ops pnv_npu_peers_ops = { 413 - .get_table_size = pnv_pci_ioda2_get_table_size, 414 - .create_table = pnv_npu_peers_create_table_userspace, 415 - .set_window = pnv_npu_peers_set_window, 416 - .unset_window = pnv_npu_peers_unset_window, 417 - .take_ownership = pnv_npu_peers_take_ownership, 418 - .release_ownership = pnv_npu_peers_release_ownership, 419 - }; 420 - 421 - static void pnv_comp_attach_table_group(struct npu_comp *npucomp, 422 - struct pnv_ioda_pe *pe) 423 - { 424 - if (WARN_ON(npucomp->pe_num == NV_NPU_MAX_PE_NUM)) 425 - return; 426 - 427 - npucomp->pe[npucomp->pe_num] = pe; 428 - ++npucomp->pe_num; 429 - } 430 - 431 - static struct iommu_table_group * 432 - pnv_try_setup_npu_table_group(struct pnv_ioda_pe *pe) 433 - { 434 - struct iommu_table_group *compound_group; 435 - struct npu_comp *npucomp; 436 - struct pci_dev *gpdev = NULL; 437 - struct pci_controller *hose; 438 - struct pci_dev *npdev = NULL; 439 - 440 - list_for_each_entry(gpdev, &pe->pbus->devices, bus_list) { 441 - npdev = pnv_pci_get_npu_dev(gpdev, 0); 442 - if (npdev) 443 - break; 444 - } 445 - 446 - if (!npdev) 447 - /* It is not an NPU attached device, skip */ 448 - return NULL; 449 - 450 - hose = pci_bus_to_host(npdev->bus); 451 - 452 - if (hose->npu) { 453 - /* P9 case: compound group is per-NPU (all gpus, all links) */ 454 - npucomp = &hose->npu->npucomp; 455 - } else { 456 - /* P8 case: Compound group is per-GPU (1 gpu, 2 links) */ 457 - npucomp = pe->npucomp = kzalloc(sizeof(*npucomp), GFP_KERNEL); 458 - } 459 - 460 - compound_group = &npucomp->table_group; 461 - if (!compound_group->group) { 462 - compound_group->ops = &pnv_npu_peers_ops; 463 - iommu_register_group(compound_group, hose->global_number, 464 - pe->pe_number); 465 - 466 - /* Steal capabilities from a GPU PE */ 467 - compound_group->max_dynamic_windows_supported = 468 - pe->table_group.max_dynamic_windows_supported; 469 - compound_group->tce32_start = pe->table_group.tce32_start; 470 - compound_group->tce32_size = pe->table_group.tce32_size; 471 - compound_group->max_levels = pe->table_group.max_levels; 472 - if (!compound_group->pgsizes) 473 - compound_group->pgsizes = pe->table_group.pgsizes; 474 - } 475 - 476 - /* 477 - * The gpu would have been added to the iommu group that's created 478 - * for the PE. Pull it out now. 479 - */ 480 - iommu_del_device(&gpdev->dev); 481 - 482 - /* 483 - * I'm not sure this is strictly required, but it's probably a good idea 484 - * since the table_group for the PE is going to be attached to the 485 - * compound table group. If we leave the PE's iommu group active then 486 - * we might have the same table_group being modifiable via two sepeate 487 - * iommu groups. 488 - */ 489 - iommu_group_put(pe->table_group.group); 490 - 491 - /* now put the GPU into the compound group */ 492 - pnv_comp_attach_table_group(npucomp, pe); 493 - iommu_add_device(compound_group, &gpdev->dev); 494 - 495 - return compound_group; 496 - } 497 - 498 - static struct iommu_table_group *pnv_npu_compound_attach(struct pnv_ioda_pe *pe) 499 - { 500 - struct iommu_table_group *table_group; 501 - struct npu_comp *npucomp; 502 - struct pci_dev *gpdev = NULL; 503 - struct pci_dev *npdev; 504 - struct pnv_ioda_pe *gpe = get_gpu_pci_dev_and_pe(pe, &gpdev); 505 - 506 - WARN_ON(!(pe->flags & PNV_IODA_PE_DEV)); 507 - if (!gpe) 508 - return NULL; 509 - 510 - /* 511 - * IODA2 bridges get this set up from pci_controller_ops::setup_bridge 512 - * but NPU bridges do not have this hook defined so we do it here. 513 - * We do not setup other table group parameters as they won't be used 514 - * anyway - NVLink bridges are subordinate PEs. 515 - */ 516 - pe->table_group.ops = &pnv_pci_npu_ops; 517 - 518 - table_group = iommu_group_get_iommudata( 519 - iommu_group_get(&gpdev->dev)); 520 - 521 - /* 522 - * On P9 NPU PHB and PCI PHB support different page sizes, 523 - * keep only matching. We expect here that NVLink bridge PE pgsizes is 524 - * initialized by the caller. 525 - */ 526 - table_group->pgsizes &= pe->table_group.pgsizes; 527 - npucomp = container_of(table_group, struct npu_comp, table_group); 528 - pnv_comp_attach_table_group(npucomp, pe); 529 - 530 - list_for_each_entry(npdev, &pe->phb->hose->bus->devices, bus_list) { 531 - struct pci_dev *gpdevtmp = pnv_pci_get_gpu_dev(npdev); 532 - 533 - if (gpdevtmp != gpdev) 534 - continue; 535 - 536 - iommu_add_device(table_group, &npdev->dev); 537 - } 538 - 539 - return table_group; 540 - } 541 - 542 - void pnv_pci_npu_setup_iommu_groups(void) 543 - { 544 - struct pci_controller *hose; 545 - struct pnv_phb *phb; 546 - struct pnv_ioda_pe *pe; 547 - 548 - /* 549 - * For non-nvlink devices the IOMMU group is registered when the PE is 550 - * configured and devices are added to the group when the per-device 551 - * DMA setup is run. That's done in hose->ops.dma_dev_setup() which is 552 - * only initialise for "normal" IODA PHBs. 553 - * 554 - * For NVLink devices we need to ensure the NVLinks and the GPU end up 555 - * in the same IOMMU group, so that's handled here. 556 - */ 557 - list_for_each_entry(hose, &hose_list, list_node) { 558 - phb = hose->private_data; 559 - 560 - if (phb->type == PNV_PHB_IODA2) 561 - list_for_each_entry(pe, &phb->ioda.pe_list, list) 562 - pnv_try_setup_npu_table_group(pe); 563 - } 564 - 565 - /* 566 - * Now we have all PHBs discovered, time to add NPU devices to 567 - * the corresponding IOMMU groups. 568 - */ 569 - list_for_each_entry(hose, &hose_list, list_node) { 570 - unsigned long pgsizes; 571 - 572 - phb = hose->private_data; 573 - 574 - if (phb->type != PNV_PHB_NPU_NVLINK) 575 - continue; 576 - 577 - pgsizes = pnv_ioda_parse_tce_sizes(phb); 578 - list_for_each_entry(pe, &phb->ioda.pe_list, list) { 579 - /* 580 - * IODA2 bridges get this set up from 581 - * pci_controller_ops::setup_bridge but NPU bridges 582 - * do not have this hook defined so we do it here. 583 - */ 584 - pe->table_group.pgsizes = pgsizes; 585 - pnv_npu_compound_attach(pe); 586 - } 587 - } 588 - } 589 - #endif /* CONFIG_IOMMU_API */ 590 - 591 - int pnv_npu2_init(struct pci_controller *hose) 592 - { 593 - static int npu_index; 594 - struct npu *npu; 595 - int ret; 596 - 597 - npu = kzalloc(sizeof(*npu), GFP_KERNEL); 598 - if (!npu) 599 - return -ENOMEM; 600 - 601 - npu_index++; 602 - if (WARN_ON(npu_index >= NV_MAX_NPUS)) { 603 - ret = -ENOSPC; 604 - goto fail_exit; 605 - } 606 - npu->index = npu_index; 607 - hose->npu = npu; 608 - 609 - return 0; 610 - 611 - fail_exit: 612 - kfree(npu); 613 - return ret; 614 - } 615 - 616 - int pnv_npu2_map_lpar_dev(struct pci_dev *gpdev, unsigned int lparid, 617 - unsigned long msr) 618 - { 619 - int ret; 620 - struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0); 621 - struct pci_controller *hose; 622 - struct pnv_phb *nphb; 623 - 624 - if (!npdev) 625 - return -ENODEV; 626 - 627 - hose = pci_bus_to_host(npdev->bus); 628 - if (hose->npu == NULL) { 629 - dev_info_once(&npdev->dev, "Nvlink1 does not support contexts"); 630 - return 0; 631 - } 632 - 633 - nphb = hose->private_data; 634 - 635 - dev_dbg(&gpdev->dev, "Map LPAR opalid=%llu lparid=%u\n", 636 - nphb->opal_id, lparid); 637 - /* 638 - * Currently we only support radix and non-zero LPCR only makes sense 639 - * for hash tables so skiboot expects the LPCR parameter to be a zero. 640 - */ 641 - ret = opal_npu_map_lpar(nphb->opal_id, pci_dev_id(gpdev), lparid, 642 - 0 /* LPCR bits */); 643 - if (ret) { 644 - dev_err(&gpdev->dev, "Error %d mapping device to LPAR\n", ret); 645 - return ret; 646 - } 647 - 648 - dev_dbg(&gpdev->dev, "init context opalid=%llu msr=%lx\n", 649 - nphb->opal_id, msr); 650 - ret = opal_npu_init_context(nphb->opal_id, 0/*__unused*/, msr, 651 - pci_dev_id(gpdev)); 652 - if (ret < 0) 653 - dev_err(&gpdev->dev, "Failed to init context: %d\n", ret); 654 - else 655 - ret = 0; 656 - 657 - return 0; 658 - } 659 - EXPORT_SYMBOL_GPL(pnv_npu2_map_lpar_dev); 660 - 661 - void pnv_npu2_map_lpar(struct pnv_ioda_pe *gpe, unsigned long msr) 662 - { 663 - struct pci_dev *gpdev; 664 - 665 - list_for_each_entry(gpdev, &gpe->pbus->devices, bus_list) 666 - pnv_npu2_map_lpar_dev(gpdev, 0, msr); 667 - } 668 - 669 - int pnv_npu2_unmap_lpar_dev(struct pci_dev *gpdev) 670 - { 671 - int ret; 672 - struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0); 673 - struct pci_controller *hose; 674 - struct pnv_phb *nphb; 675 - 676 - if (!npdev) 677 - return -ENODEV; 678 - 679 - hose = pci_bus_to_host(npdev->bus); 680 - if (hose->npu == NULL) { 681 - dev_info_once(&npdev->dev, "Nvlink1 does not support contexts"); 682 - return 0; 683 - } 684 - 685 - nphb = hose->private_data; 686 - 687 - dev_dbg(&gpdev->dev, "destroy context opalid=%llu\n", 688 - nphb->opal_id); 689 - ret = opal_npu_destroy_context(nphb->opal_id, 0/*__unused*/, 690 - pci_dev_id(gpdev)); 691 - if (ret < 0) { 692 - dev_err(&gpdev->dev, "Failed to destroy context: %d\n", ret); 693 - return ret; 694 - } 695 - 696 - /* Set LPID to 0 anyway, just to be safe */ 697 - dev_dbg(&gpdev->dev, "Map LPAR opalid=%llu lparid=0\n", nphb->opal_id); 698 - ret = opal_npu_map_lpar(nphb->opal_id, pci_dev_id(gpdev), 0 /*LPID*/, 699 - 0 /* LPCR bits */); 700 - if (ret) 701 - dev_err(&gpdev->dev, "Error %d mapping device to LPAR\n", ret); 702 - 703 - return ret; 704 - } 705 - EXPORT_SYMBOL_GPL(pnv_npu2_unmap_lpar_dev);
-2
arch/powerpc/platforms/powernv/opal-call.c
··· 267 267 OPAL_CALL(opal_xive_set_queue_state, OPAL_XIVE_SET_QUEUE_STATE); 268 268 OPAL_CALL(opal_xive_get_vp_state, OPAL_XIVE_GET_VP_STATE); 269 269 OPAL_CALL(opal_signal_system_reset, OPAL_SIGNAL_SYSTEM_RESET); 270 - OPAL_CALL(opal_npu_init_context, OPAL_NPU_INIT_CONTEXT); 271 - OPAL_CALL(opal_npu_destroy_context, OPAL_NPU_DESTROY_CONTEXT); 272 270 OPAL_CALL(opal_npu_map_lpar, OPAL_NPU_MAP_LPAR); 273 271 OPAL_CALL(opal_imc_counters_init, OPAL_IMC_COUNTERS_INIT); 274 272 OPAL_CALL(opal_imc_counters_start, OPAL_IMC_COUNTERS_START);
+4 -181
arch/powerpc/platforms/powernv/pci-ioda.c
··· 47 47 #define PNV_IODA1_M64_SEGS 8 /* Segments per M64 BAR */ 48 48 #define PNV_IODA1_DMA32_SEGSIZE 0x10000000 49 49 50 - static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU_NVLINK", 51 - "NPU_OCAPI" }; 50 + static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU_OCAPI" }; 52 51 53 52 static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable); 54 53 static void pnv_pci_configure_bus(struct pci_bus *bus); ··· 191 192 unsigned int pe_num = pe->pe_number; 192 193 193 194 WARN_ON(pe->pdev); 194 - WARN_ON(pe->npucomp); /* NPUs for nvlink are not supposed to be freed */ 195 - kfree(pe->npucomp); 196 195 memset(pe, 0, sizeof(struct pnv_ioda_pe)); 197 196 198 197 mutex_lock(&phb->ioda.pe_alloc_mutex); ··· 872 875 * Release from all parents PELT-V. NPUs don't have a PELTV 873 876 * table 874 877 */ 875 - if (phb->type != PNV_PHB_NPU_NVLINK && phb->type != PNV_PHB_NPU_OCAPI) 878 + if (phb->type != PNV_PHB_NPU_OCAPI) 876 879 pnv_ioda_unset_peltv(phb, pe, parent); 877 880 878 881 rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid, ··· 943 946 * Configure PELTV. NPUs don't have a PELTV table so skip 944 947 * configuration on them. 945 948 */ 946 - if (phb->type != PNV_PHB_NPU_NVLINK && phb->type != PNV_PHB_NPU_OCAPI) 949 + if (phb->type != PNV_PHB_NPU_OCAPI) 947 950 pnv_ioda_set_peltv(phb, pe, true); 948 951 949 952 /* Setup reverse map */ ··· 999 1002 1000 1003 /* NOTE: We don't get a reference for the pointer in the PE 1001 1004 * data structure, both the device and PE structures should be 1002 - * destroyed at the same time. However, removing nvlink 1003 - * devices will need some work. 1005 + * destroyed at the same time. 1004 1006 * 1005 1007 * At some point we want to remove the PDN completely anyways 1006 1008 */ ··· 1093 1097 list_add_tail(&pe->list, &phb->ioda.pe_list); 1094 1098 1095 1099 return pe; 1096 - } 1097 - 1098 - static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev) 1099 - { 1100 - int pe_num, found_pe = false, rc; 1101 - long rid; 1102 - struct pnv_ioda_pe *pe; 1103 - struct pci_dev *gpu_pdev; 1104 - struct pci_dn *npu_pdn; 1105 - struct pnv_phb *phb = pci_bus_to_pnvhb(npu_pdev->bus); 1106 - 1107 - /* 1108 - * Intentionally leak a reference on the npu device (for 1109 - * nvlink only; this is not an opencapi path) to make sure it 1110 - * never goes away, as it's been the case all along and some 1111 - * work is needed otherwise. 1112 - */ 1113 - pci_dev_get(npu_pdev); 1114 - 1115 - /* 1116 - * Due to a hardware errata PE#0 on the NPU is reserved for 1117 - * error handling. This means we only have three PEs remaining 1118 - * which need to be assigned to four links, implying some 1119 - * links must share PEs. 1120 - * 1121 - * To achieve this we assign PEs such that NPUs linking the 1122 - * same GPU get assigned the same PE. 1123 - */ 1124 - gpu_pdev = pnv_pci_get_gpu_dev(npu_pdev); 1125 - for (pe_num = 0; pe_num < phb->ioda.total_pe_num; pe_num++) { 1126 - pe = &phb->ioda.pe_array[pe_num]; 1127 - if (!pe->pdev) 1128 - continue; 1129 - 1130 - if (pnv_pci_get_gpu_dev(pe->pdev) == gpu_pdev) { 1131 - /* 1132 - * This device has the same peer GPU so should 1133 - * be assigned the same PE as the existing 1134 - * peer NPU. 1135 - */ 1136 - dev_info(&npu_pdev->dev, 1137 - "Associating to existing PE %x\n", pe_num); 1138 - npu_pdn = pci_get_pdn(npu_pdev); 1139 - rid = npu_pdev->bus->number << 8 | npu_pdn->devfn; 1140 - npu_pdn->pe_number = pe_num; 1141 - phb->ioda.pe_rmap[rid] = pe->pe_number; 1142 - pe->device_count++; 1143 - 1144 - /* Map the PE to this link */ 1145 - rc = opal_pci_set_pe(phb->opal_id, pe_num, rid, 1146 - OpalPciBusAll, 1147 - OPAL_COMPARE_RID_DEVICE_NUMBER, 1148 - OPAL_COMPARE_RID_FUNCTION_NUMBER, 1149 - OPAL_MAP_PE); 1150 - WARN_ON(rc != OPAL_SUCCESS); 1151 - found_pe = true; 1152 - break; 1153 - } 1154 - } 1155 - 1156 - if (!found_pe) 1157 - /* 1158 - * Could not find an existing PE so allocate a new 1159 - * one. 1160 - */ 1161 - return pnv_ioda_setup_dev_PE(npu_pdev); 1162 - else 1163 - return pe; 1164 - } 1165 - 1166 - static void pnv_ioda_setup_npu_PEs(struct pci_bus *bus) 1167 - { 1168 - struct pci_dev *pdev; 1169 - 1170 - list_for_each_entry(pdev, &bus->devices, bus_list) 1171 - pnv_ioda_setup_npu_PE(pdev); 1172 - } 1173 - 1174 - static void pnv_pci_ioda_setup_nvlink(void) 1175 - { 1176 - struct pci_controller *hose; 1177 - struct pnv_phb *phb; 1178 - struct pnv_ioda_pe *pe; 1179 - 1180 - list_for_each_entry(hose, &hose_list, list_node) { 1181 - phb = hose->private_data; 1182 - if (phb->type == PNV_PHB_NPU_NVLINK) { 1183 - /* PE#0 is needed for error reporting */ 1184 - pnv_ioda_reserve_pe(phb, 0); 1185 - pnv_ioda_setup_npu_PEs(hose->bus); 1186 - if (phb->model == PNV_PHB_MODEL_NPU2) 1187 - WARN_ON_ONCE(pnv_npu2_init(hose)); 1188 - } 1189 - } 1190 - list_for_each_entry(hose, &hose_list, list_node) { 1191 - phb = hose->private_data; 1192 - if (phb->type != PNV_PHB_IODA2) 1193 - continue; 1194 - 1195 - list_for_each_entry(pe, &phb->ioda.pe_list, list) 1196 - pnv_npu2_map_lpar(pe, MSR_DR | MSR_PR | MSR_HV); 1197 - } 1198 - 1199 - #ifdef CONFIG_IOMMU_API 1200 - /* setup iommu groups so we can do nvlink pass-thru */ 1201 - pnv_pci_npu_setup_iommu_groups(); 1202 - #endif 1203 1100 } 1204 1101 1205 1102 static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb, ··· 1357 1468 #define PHB3_TCE_KILL_INVAL_PE PPC_BIT(1) 1358 1469 #define PHB3_TCE_KILL_INVAL_ONE PPC_BIT(2) 1359 1470 1360 - static void pnv_pci_phb3_tce_invalidate_entire(struct pnv_phb *phb, bool rm) 1361 - { 1362 - __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(phb, rm); 1363 - const unsigned long val = PHB3_TCE_KILL_INVAL_ALL; 1364 - 1365 - mb(); /* Ensure previous TCE table stores are visible */ 1366 - if (rm) 1367 - __raw_rm_writeq_be(val, invalidate); 1368 - else 1369 - __raw_writeq_be(val, invalidate); 1370 - } 1371 - 1372 1471 static inline void pnv_pci_phb3_tce_invalidate_pe(struct pnv_ioda_pe *pe) 1373 1472 { 1374 1473 /* 01xb - invalidate TCEs that match the specified PE# */ ··· 1416 1539 struct pnv_phb *phb = pe->phb; 1417 1540 unsigned int shift = tbl->it_page_shift; 1418 1541 1419 - /* 1420 - * NVLink1 can use the TCE kill register directly as 1421 - * it's the same as PHB3. NVLink2 is different and 1422 - * should go via the OPAL call. 1423 - */ 1424 - if (phb->model == PNV_PHB_MODEL_NPU) { 1425 - /* 1426 - * The NVLink hardware does not support TCE kill 1427 - * per TCE entry so we have to invalidate 1428 - * the entire cache for it. 1429 - */ 1430 - pnv_pci_phb3_tce_invalidate_entire(phb, rm); 1431 - continue; 1432 - } 1433 1542 if (phb->model == PNV_PHB_MODEL_PHB3 && phb->regs) 1434 1543 pnv_pci_phb3_tce_invalidate(pe, rm, shift, 1435 1544 index, npages); ··· 1425 1562 pe->pe_number, 1u << shift, 1426 1563 index << shift, npages); 1427 1564 } 1428 - } 1429 - 1430 - void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm) 1431 - { 1432 - if (phb->model == PNV_PHB_MODEL_NPU || phb->model == PNV_PHB_MODEL_PHB3) 1433 - pnv_pci_phb3_tce_invalidate_entire(phb, rm); 1434 - else 1435 - opal_pci_tce_kill(phb->opal_id, OPAL_PCI_TCE_KILL, 0, 0, 0, 0); 1436 1565 } 1437 1566 1438 1567 static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index, ··· 2306 2451 2307 2452 static void pnv_pci_ioda_fixup(void) 2308 2453 { 2309 - pnv_pci_ioda_setup_nvlink(); 2310 2454 pnv_pci_ioda_create_dbgfs(); 2311 2455 2312 2456 pnv_pci_enable_bridges(); ··· 2678 2824 pnv_ioda_release_pe(pe); 2679 2825 } 2680 2826 2681 - static void pnv_npu_disable_device(struct pci_dev *pdev) 2682 - { 2683 - struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); 2684 - struct eeh_pe *eehpe = edev ? edev->pe : NULL; 2685 - 2686 - if (eehpe && eeh_ops && eeh_ops->reset) 2687 - eeh_ops->reset(eehpe, EEH_RESET_HOT); 2688 - } 2689 - 2690 2827 static void pnv_pci_ioda_shutdown(struct pci_controller *hose) 2691 2828 { 2692 2829 struct pnv_phb *phb = hose->private_data; ··· 2717 2872 .setup_bridge = pnv_pci_fixup_bridge_resources, 2718 2873 .reset_secondary_bus = pnv_pci_reset_secondary_bus, 2719 2874 .shutdown = pnv_pci_ioda_shutdown, 2720 - }; 2721 - 2722 - static const struct pci_controller_ops pnv_npu_ioda_controller_ops = { 2723 - .setup_msi_irqs = pnv_setup_msi_irqs, 2724 - .teardown_msi_irqs = pnv_teardown_msi_irqs, 2725 - .enable_device_hook = pnv_pci_enable_device_hook, 2726 - .window_alignment = pnv_pci_window_alignment, 2727 - .reset_secondary_bus = pnv_pci_reset_secondary_bus, 2728 - .shutdown = pnv_pci_ioda_shutdown, 2729 - .disable_device = pnv_npu_disable_device, 2730 2875 }; 2731 2876 2732 2877 static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = { ··· 2792 2957 phb->model = PNV_PHB_MODEL_P7IOC; 2793 2958 else if (of_device_is_compatible(np, "ibm,power8-pciex")) 2794 2959 phb->model = PNV_PHB_MODEL_PHB3; 2795 - else if (of_device_is_compatible(np, "ibm,power8-npu-pciex")) 2796 - phb->model = PNV_PHB_MODEL_NPU; 2797 - else if (of_device_is_compatible(np, "ibm,power9-npu-pciex")) 2798 - phb->model = PNV_PHB_MODEL_NPU2; 2799 2960 else 2800 2961 phb->model = PNV_PHB_MODEL_UNKNOWN; 2801 2962 ··· 2949 3118 ppc_md.pcibios_fixup = pnv_pci_ioda_fixup; 2950 3119 2951 3120 switch (phb->type) { 2952 - case PNV_PHB_NPU_NVLINK: 2953 - hose->controller_ops = pnv_npu_ioda_controller_ops; 2954 - break; 2955 3121 case PNV_PHB_NPU_OCAPI: 2956 3122 hose->controller_ops = pnv_npu_ocapi_ioda_controller_ops; 2957 3123 break; ··· 2999 3171 void __init pnv_pci_init_ioda2_phb(struct device_node *np) 3000 3172 { 3001 3173 pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2); 3002 - } 3003 - 3004 - void __init pnv_pci_init_npu_phb(struct device_node *np) 3005 - { 3006 - pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU_NVLINK); 3007 3174 } 3008 3175 3009 3176 void __init pnv_pci_init_npu2_opencapi_phb(struct device_node *np)
-11
arch/powerpc/platforms/powernv/pci.c
··· 926 926 for_each_compatible_node(np, NULL, "ibm,ioda3-phb") 927 927 pnv_pci_init_ioda2_phb(np); 928 928 929 - /* Look for NPU PHBs */ 930 - for_each_compatible_node(np, NULL, "ibm,ioda2-npu-phb") 931 - pnv_pci_init_npu_phb(np); 932 - 933 - /* 934 - * Look for NPU2 PHBs which we treat mostly as NPU PHBs with 935 - * the exception of TCE kill which requires an OPAL call. 936 - */ 937 - for_each_compatible_node(np, NULL, "ibm,ioda2-npu2-phb") 938 - pnv_pci_init_npu_phb(np); 939 - 940 929 /* Look for NPU2 OpenCAPI PHBs */ 941 930 for_each_compatible_node(np, NULL, "ibm,ioda2-npu2-opencapi-phb") 942 931 pnv_pci_init_npu2_opencapi_phb(np);
+3 -14
arch/powerpc/platforms/powernv/pci.h
··· 10 10 struct pci_dn; 11 11 12 12 enum pnv_phb_type { 13 - PNV_PHB_IODA1 = 0, 14 - PNV_PHB_IODA2 = 1, 15 - PNV_PHB_NPU_NVLINK = 2, 16 - PNV_PHB_NPU_OCAPI = 3, 13 + PNV_PHB_IODA1, 14 + PNV_PHB_IODA2, 15 + PNV_PHB_NPU_OCAPI, 17 16 }; 18 17 19 18 /* Precise PHB model for error management */ ··· 20 21 PNV_PHB_MODEL_UNKNOWN, 21 22 PNV_PHB_MODEL_P7IOC, 22 23 PNV_PHB_MODEL_PHB3, 23 - PNV_PHB_MODEL_NPU, 24 - PNV_PHB_MODEL_NPU2, 25 24 }; 26 25 27 26 #define PNV_PCI_DIAG_BUF_SIZE 8192 ··· 78 81 79 82 /* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */ 80 83 struct iommu_table_group table_group; 81 - struct npu_comp *npucomp; 82 84 83 85 /* 64-bit TCE bypass region */ 84 86 bool tce_bypass_enabled; ··· 285 289 286 290 extern void pnv_pci_init_ioda_hub(struct device_node *np); 287 291 extern void pnv_pci_init_ioda2_phb(struct device_node *np); 288 - extern void pnv_pci_init_npu_phb(struct device_node *np); 289 292 extern void pnv_pci_init_npu2_opencapi_phb(struct device_node *np); 290 - extern void pnv_npu2_map_lpar(struct pnv_ioda_pe *gpe, unsigned long msr); 291 293 extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev); 292 294 extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option); 293 295 ··· 307 313 pe_level_printk(pe, KERN_WARNING, fmt, ##__VA_ARGS__) 308 314 #define pe_info(pe, fmt, ...) \ 309 315 pe_level_printk(pe, KERN_INFO, fmt, ##__VA_ARGS__) 310 - 311 - /* Nvlink functions */ 312 - extern void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass); 313 - extern void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm); 314 - extern void pnv_pci_npu_setup_iommu_groups(void); 315 316 316 317 /* pci-ioda-tce.c */ 317 318 #define POWERNV_IOMMU_DEFAULT_LEVELS 2
-23
arch/powerpc/platforms/pseries/pci.c
··· 224 224 225 225 void __init pSeries_final_fixup(void) 226 226 { 227 - struct pci_controller *hose; 228 - 229 227 pSeries_request_regions(); 230 228 231 229 eeh_show_enabled(); ··· 232 234 ppc_md.pcibios_sriov_enable = pseries_pcibios_sriov_enable; 233 235 ppc_md.pcibios_sriov_disable = pseries_pcibios_sriov_disable; 234 236 #endif 235 - list_for_each_entry(hose, &hose_list, list_node) { 236 - struct device_node *dn = hose->dn, *nvdn; 237 - 238 - while (1) { 239 - dn = of_find_all_nodes(dn); 240 - if (!dn) 241 - break; 242 - nvdn = of_parse_phandle(dn, "ibm,nvlink", 0); 243 - if (!nvdn) 244 - continue; 245 - if (!of_device_is_compatible(nvdn, "ibm,npu-link")) 246 - continue; 247 - if (!of_device_is_compatible(nvdn->parent, 248 - "ibm,power9-npu")) 249 - continue; 250 - #ifdef CONFIG_PPC_POWERNV 251 - WARN_ON_ONCE(pnv_npu2_init(hose)); 252 - #endif 253 - break; 254 - } 255 - } 256 237 } 257 238 258 239 /*