Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

powerpc/powernv: Shift VF resource with an offset

On PowerNV platform, resource position in M64 BAR implies the PE# the
resource belongs to. In some cases, adjustment of a resource is necessary
to locate it to a correct position in M64 BAR .

This patch adds pnv_pci_vf_resource_shift() to shift the 'real' PF IOV BAR
address according to an offset.

Note:

After doing so, there would be a "hole" in the /proc/iomem when offset
is a positive value. It looks like the device return some mmio back to
the system, which actually no one could use it.

[bhelgaas: rework loops, rework overlap check, index resource[]
conventionally, remove pci_regs.h include, squashed with next patch]
Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

authored by

Wei Yang and committed by
Benjamin Herrenschmidt
781a868f 5350ab3f

+553 -17
+4
arch/powerpc/include/asm/pci-bridge.h
··· 180 180 int pe_number; 181 181 #ifdef CONFIG_PCI_IOV 182 182 u16 vfs_expanded; /* number of VFs IOV BAR expanded */ 183 + u16 num_vfs; /* number of VFs enabled*/ 184 + int offset; /* PE# for the first VF PE */ 185 + #define IODA_INVALID_M64 (-1) 186 + int m64_wins[PCI_SRIOV_NUM_BARS]; 183 187 #endif /* CONFIG_PCI_IOV */ 184 188 #endif 185 189 struct list_head child_list;
+13
arch/powerpc/kernel/pci_dn.c
··· 217 217 struct pci_dn *pdn, *tmp; 218 218 int i; 219 219 220 + /* 221 + * VF and VF PE are created/released dynamically, so we need to 222 + * bind/unbind them. Otherwise the VF and VF PE would be mismatched 223 + * when re-enabling SR-IOV. 224 + */ 225 + if (pdev->is_virtfn) { 226 + pdn = pci_get_pdn(pdev); 227 + #ifdef CONFIG_PPC_POWERNV 228 + pdn->pe_number = IODA_INVALID_PE; 229 + #endif 230 + return; 231 + } 232 + 220 233 /* Only support IOV PF for now */ 221 234 if (!pdev->is_physfn) 222 235 return;
+511 -17
arch/powerpc/platforms/powernv/pci-ioda.c
··· 44 44 #include "powernv.h" 45 45 #include "pci.h" 46 46 47 + /* 256M DMA window, 4K TCE pages, 8 bytes TCE */ 48 + #define TCE32_TABLE_SIZE ((0x10000000 / 0x1000) * 8) 49 + 47 50 static void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, 48 51 const char *fmt, ...) 49 52 { ··· 59 56 vaf.fmt = fmt; 60 57 vaf.va = &args; 61 58 62 - if (pe->pdev) 59 + if (pe->flags & PNV_IODA_PE_DEV) 63 60 strlcpy(pfix, dev_name(&pe->pdev->dev), sizeof(pfix)); 64 - else 61 + else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) 65 62 sprintf(pfix, "%04x:%02x ", 66 63 pci_domain_nr(pe->pbus), pe->pbus->number); 64 + #ifdef CONFIG_PCI_IOV 65 + else if (pe->flags & PNV_IODA_PE_VF) 66 + sprintf(pfix, "%04x:%02x:%2x.%d", 67 + pci_domain_nr(pe->parent_dev->bus), 68 + (pe->rid & 0xff00) >> 8, 69 + PCI_SLOT(pe->rid), PCI_FUNC(pe->rid)); 70 + #endif /* CONFIG_PCI_IOV*/ 67 71 68 72 printk("%spci %s: [PE# %.3d] %pV", 69 73 level, pfix, pe->pe_number, &vaf); ··· 601 591 bool is_add) 602 592 { 603 593 struct pnv_ioda_pe *slave; 604 - struct pci_dev *pdev; 594 + struct pci_dev *pdev = NULL; 605 595 int ret; 606 596 607 597 /* ··· 640 630 641 631 if (pe->flags & (PNV_IODA_PE_BUS_ALL | PNV_IODA_PE_BUS)) 642 632 pdev = pe->pbus->self; 643 - else 633 + else if (pe->flags & PNV_IODA_PE_DEV) 644 634 pdev = pe->pdev->bus->self; 635 + #ifdef CONFIG_PCI_IOV 636 + else if (pe->flags & PNV_IODA_PE_VF) 637 + pdev = pe->parent_dev->bus->self; 638 + #endif /* CONFIG_PCI_IOV */ 645 639 while (pdev) { 646 640 struct pci_dn *pdn = pci_get_pdn(pdev); 647 641 struct pnv_ioda_pe *parent; ··· 662 648 663 649 return 0; 664 650 } 651 + 652 + #ifdef CONFIG_PCI_IOV 653 + static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) 654 + { 655 + struct pci_dev *parent; 656 + uint8_t bcomp, dcomp, fcomp; 657 + int64_t rc; 658 + long rid_end, rid; 659 + 660 + /* Currently, we just deconfigure VF PE. Bus PE will always there.*/ 661 + if (pe->pbus) { 662 + int count; 663 + 664 + dcomp = OPAL_IGNORE_RID_DEVICE_NUMBER; 665 + fcomp = OPAL_IGNORE_RID_FUNCTION_NUMBER; 666 + parent = pe->pbus->self; 667 + if (pe->flags & PNV_IODA_PE_BUS_ALL) 668 + count = pe->pbus->busn_res.end - pe->pbus->busn_res.start + 1; 669 + else 670 + count = 1; 671 + 672 + switch(count) { 673 + case 1: bcomp = OpalPciBusAll; break; 674 + case 2: bcomp = OpalPciBus7Bits; break; 675 + case 4: bcomp = OpalPciBus6Bits; break; 676 + case 8: bcomp = OpalPciBus5Bits; break; 677 + case 16: bcomp = OpalPciBus4Bits; break; 678 + case 32: bcomp = OpalPciBus3Bits; break; 679 + default: 680 + dev_err(&pe->pbus->dev, "Number of subordinate buses %d unsupported\n", 681 + count); 682 + /* Do an exact match only */ 683 + bcomp = OpalPciBusAll; 684 + } 685 + rid_end = pe->rid + (count << 8); 686 + } else { 687 + if (pe->flags & PNV_IODA_PE_VF) 688 + parent = pe->parent_dev; 689 + else 690 + parent = pe->pdev->bus->self; 691 + bcomp = OpalPciBusAll; 692 + dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER; 693 + fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER; 694 + rid_end = pe->rid + 1; 695 + } 696 + 697 + /* Clear the reverse map */ 698 + for (rid = pe->rid; rid < rid_end; rid++) 699 + phb->ioda.pe_rmap[rid] = 0; 700 + 701 + /* Release from all parents PELT-V */ 702 + while (parent) { 703 + struct pci_dn *pdn = pci_get_pdn(parent); 704 + if (pdn && pdn->pe_number != IODA_INVALID_PE) { 705 + rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number, 706 + pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN); 707 + /* XXX What to do in case of error ? */ 708 + } 709 + parent = parent->bus->self; 710 + } 711 + 712 + opal_pci_eeh_freeze_set(phb->opal_id, pe->pe_number, 713 + OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); 714 + 715 + /* Disassociate PE in PELT */ 716 + rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number, 717 + pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN); 718 + if (rc) 719 + pe_warn(pe, "OPAL error %ld remove self from PELTV\n", rc); 720 + rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid, 721 + bcomp, dcomp, fcomp, OPAL_UNMAP_PE); 722 + if (rc) 723 + pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc); 724 + 725 + pe->pbus = NULL; 726 + pe->pdev = NULL; 727 + pe->parent_dev = NULL; 728 + 729 + return 0; 730 + } 731 + #endif /* CONFIG_PCI_IOV */ 665 732 666 733 static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) 667 734 { ··· 770 675 case 16: bcomp = OpalPciBus4Bits; break; 771 676 case 32: bcomp = OpalPciBus3Bits; break; 772 677 default: 773 - pr_err("%s: Number of subordinate busses %d" 774 - " unsupported\n", 775 - pci_name(pe->pbus->self), count); 678 + dev_err(&pe->pbus->dev, "Number of subordinate buses %d unsupported\n", 679 + count); 776 680 /* Do an exact match only */ 777 681 bcomp = OpalPciBusAll; 778 682 } 779 683 rid_end = pe->rid + (count << 8); 780 684 } else { 781 - parent = pe->pdev->bus->self; 685 + #ifdef CONFIG_PCI_IOV 686 + if (pe->flags & PNV_IODA_PE_VF) 687 + parent = pe->parent_dev; 688 + else 689 + #endif /* CONFIG_PCI_IOV */ 690 + parent = pe->pdev->bus->self; 782 691 bcomp = OpalPciBusAll; 783 692 dcomp = OPAL_COMPARE_RID_DEVICE_NUMBER; 784 693 fcomp = OPAL_COMPARE_RID_FUNCTION_NUMBER; ··· 872 773 /* Default */ 873 774 return 10; 874 775 } 776 + 777 + #ifdef CONFIG_PCI_IOV 778 + static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset) 779 + { 780 + struct pci_dn *pdn = pci_get_pdn(dev); 781 + int i; 782 + struct resource *res, res2; 783 + resource_size_t size; 784 + u16 num_vfs; 785 + 786 + if (!dev->is_physfn) 787 + return -EINVAL; 788 + 789 + /* 790 + * "offset" is in VFs. The M64 windows are sized so that when they 791 + * are segmented, each segment is the same size as the IOV BAR. 792 + * Each segment is in a separate PE, and the high order bits of the 793 + * address are the PE number. Therefore, each VF's BAR is in a 794 + * separate PE, and changing the IOV BAR start address changes the 795 + * range of PEs the VFs are in. 796 + */ 797 + num_vfs = pdn->num_vfs; 798 + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { 799 + res = &dev->resource[i + PCI_IOV_RESOURCES]; 800 + if (!res->flags || !res->parent) 801 + continue; 802 + 803 + if (!pnv_pci_is_mem_pref_64(res->flags)) 804 + continue; 805 + 806 + /* 807 + * The actual IOV BAR range is determined by the start address 808 + * and the actual size for num_vfs VFs BAR. This check is to 809 + * make sure that after shifting, the range will not overlap 810 + * with another device. 811 + */ 812 + size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES); 813 + res2.flags = res->flags; 814 + res2.start = res->start + (size * offset); 815 + res2.end = res2.start + (size * num_vfs) - 1; 816 + 817 + if (res2.end > res->end) { 818 + dev_err(&dev->dev, "VF BAR%d: %pR would extend past %pR (trying to enable %d VFs shifted by %d)\n", 819 + i, &res2, res, num_vfs, offset); 820 + return -EBUSY; 821 + } 822 + } 823 + 824 + /* 825 + * After doing so, there would be a "hole" in the /proc/iomem when 826 + * offset is a positive value. It looks like the device return some 827 + * mmio back to the system, which actually no one could use it. 828 + */ 829 + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { 830 + res = &dev->resource[i + PCI_IOV_RESOURCES]; 831 + if (!res->flags || !res->parent) 832 + continue; 833 + 834 + if (!pnv_pci_is_mem_pref_64(res->flags)) 835 + continue; 836 + 837 + size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES); 838 + res2 = *res; 839 + res->start += size * offset; 840 + 841 + dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (enabling %d VFs shifted by %d)\n", 842 + i, &res2, res, num_vfs, offset); 843 + pci_update_resource(dev, i + PCI_IOV_RESOURCES); 844 + } 845 + return 0; 846 + } 847 + #endif /* CONFIG_PCI_IOV */ 875 848 876 849 #if 0 877 850 static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) ··· 1150 979 } 1151 980 1152 981 #ifdef CONFIG_PCI_IOV 982 + static int pnv_pci_vf_release_m64(struct pci_dev *pdev) 983 + { 984 + struct pci_bus *bus; 985 + struct pci_controller *hose; 986 + struct pnv_phb *phb; 987 + struct pci_dn *pdn; 988 + int i; 989 + 990 + bus = pdev->bus; 991 + hose = pci_bus_to_host(bus); 992 + phb = hose->private_data; 993 + pdn = pci_get_pdn(pdev); 994 + 995 + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { 996 + if (pdn->m64_wins[i] == IODA_INVALID_M64) 997 + continue; 998 + opal_pci_phb_mmio_enable(phb->opal_id, 999 + OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i], 0); 1000 + clear_bit(pdn->m64_wins[i], &phb->ioda.m64_bar_alloc); 1001 + pdn->m64_wins[i] = IODA_INVALID_M64; 1002 + } 1003 + 1004 + return 0; 1005 + } 1006 + 1007 + static int pnv_pci_vf_assign_m64(struct pci_dev *pdev) 1008 + { 1009 + struct pci_bus *bus; 1010 + struct pci_controller *hose; 1011 + struct pnv_phb *phb; 1012 + struct pci_dn *pdn; 1013 + unsigned int win; 1014 + struct resource *res; 1015 + int i; 1016 + int64_t rc; 1017 + 1018 + bus = pdev->bus; 1019 + hose = pci_bus_to_host(bus); 1020 + phb = hose->private_data; 1021 + pdn = pci_get_pdn(pdev); 1022 + 1023 + /* Initialize the m64_wins to IODA_INVALID_M64 */ 1024 + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) 1025 + pdn->m64_wins[i] = IODA_INVALID_M64; 1026 + 1027 + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { 1028 + res = &pdev->resource[i + PCI_IOV_RESOURCES]; 1029 + if (!res->flags || !res->parent) 1030 + continue; 1031 + 1032 + if (!pnv_pci_is_mem_pref_64(res->flags)) 1033 + continue; 1034 + 1035 + do { 1036 + win = find_next_zero_bit(&phb->ioda.m64_bar_alloc, 1037 + phb->ioda.m64_bar_idx + 1, 0); 1038 + 1039 + if (win >= phb->ioda.m64_bar_idx + 1) 1040 + goto m64_failed; 1041 + } while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc)); 1042 + 1043 + pdn->m64_wins[i] = win; 1044 + 1045 + /* Map the M64 here */ 1046 + rc = opal_pci_set_phb_mem_window(phb->opal_id, 1047 + OPAL_M64_WINDOW_TYPE, 1048 + pdn->m64_wins[i], 1049 + res->start, 1050 + 0, /* unused */ 1051 + resource_size(res)); 1052 + if (rc != OPAL_SUCCESS) { 1053 + dev_err(&pdev->dev, "Failed to map M64 window #%d: %lld\n", 1054 + win, rc); 1055 + goto m64_failed; 1056 + } 1057 + 1058 + rc = opal_pci_phb_mmio_enable(phb->opal_id, 1059 + OPAL_M64_WINDOW_TYPE, pdn->m64_wins[i], 1); 1060 + if (rc != OPAL_SUCCESS) { 1061 + dev_err(&pdev->dev, "Failed to enable M64 window #%d: %llx\n", 1062 + win, rc); 1063 + goto m64_failed; 1064 + } 1065 + } 1066 + return 0; 1067 + 1068 + m64_failed: 1069 + pnv_pci_vf_release_m64(pdev); 1070 + return -EBUSY; 1071 + } 1072 + 1073 + static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe *pe) 1074 + { 1075 + struct pci_bus *bus; 1076 + struct pci_controller *hose; 1077 + struct pnv_phb *phb; 1078 + struct iommu_table *tbl; 1079 + unsigned long addr; 1080 + int64_t rc; 1081 + 1082 + bus = dev->bus; 1083 + hose = pci_bus_to_host(bus); 1084 + phb = hose->private_data; 1085 + tbl = pe->tce32_table; 1086 + addr = tbl->it_base; 1087 + 1088 + opal_pci_map_pe_dma_window(phb->opal_id, pe->pe_number, 1089 + pe->pe_number << 1, 1, __pa(addr), 1090 + 0, 0x1000); 1091 + 1092 + rc = opal_pci_map_pe_dma_window_real(pe->phb->opal_id, 1093 + pe->pe_number, 1094 + (pe->pe_number << 1) + 1, 1095 + pe->tce_bypass_base, 1096 + 0); 1097 + if (rc) 1098 + pe_warn(pe, "OPAL error %ld release DMA window\n", rc); 1099 + 1100 + iommu_free_table(tbl, of_node_full_name(dev->dev.of_node)); 1101 + free_pages(addr, get_order(TCE32_TABLE_SIZE)); 1102 + pe->tce32_table = NULL; 1103 + } 1104 + 1105 + static void pnv_ioda_release_vf_PE(struct pci_dev *pdev) 1106 + { 1107 + struct pci_bus *bus; 1108 + struct pci_controller *hose; 1109 + struct pnv_phb *phb; 1110 + struct pnv_ioda_pe *pe, *pe_n; 1111 + struct pci_dn *pdn; 1112 + 1113 + bus = pdev->bus; 1114 + hose = pci_bus_to_host(bus); 1115 + phb = hose->private_data; 1116 + 1117 + if (!pdev->is_physfn) 1118 + return; 1119 + 1120 + pdn = pci_get_pdn(pdev); 1121 + list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) { 1122 + if (pe->parent_dev != pdev) 1123 + continue; 1124 + 1125 + pnv_pci_ioda2_release_dma_pe(pdev, pe); 1126 + 1127 + /* Remove from list */ 1128 + mutex_lock(&phb->ioda.pe_list_mutex); 1129 + list_del(&pe->list); 1130 + mutex_unlock(&phb->ioda.pe_list_mutex); 1131 + 1132 + pnv_ioda_deconfigure_pe(phb, pe); 1133 + 1134 + pnv_ioda_free_pe(phb, pe->pe_number); 1135 + } 1136 + } 1137 + 1138 + void pnv_pci_sriov_disable(struct pci_dev *pdev) 1139 + { 1140 + struct pci_bus *bus; 1141 + struct pci_controller *hose; 1142 + struct pnv_phb *phb; 1143 + struct pci_dn *pdn; 1144 + struct pci_sriov *iov; 1145 + u16 num_vfs; 1146 + 1147 + bus = pdev->bus; 1148 + hose = pci_bus_to_host(bus); 1149 + phb = hose->private_data; 1150 + pdn = pci_get_pdn(pdev); 1151 + iov = pdev->sriov; 1152 + num_vfs = pdn->num_vfs; 1153 + 1154 + /* Release VF PEs */ 1155 + pnv_ioda_release_vf_PE(pdev); 1156 + 1157 + if (phb->type == PNV_PHB_IODA2) { 1158 + pnv_pci_vf_resource_shift(pdev, -pdn->offset); 1159 + 1160 + /* Release M64 windows */ 1161 + pnv_pci_vf_release_m64(pdev); 1162 + 1163 + /* Release PE numbers */ 1164 + bitmap_clear(phb->ioda.pe_alloc, pdn->offset, num_vfs); 1165 + pdn->offset = 0; 1166 + } 1167 + } 1168 + 1169 + static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, 1170 + struct pnv_ioda_pe *pe); 1171 + static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) 1172 + { 1173 + struct pci_bus *bus; 1174 + struct pci_controller *hose; 1175 + struct pnv_phb *phb; 1176 + struct pnv_ioda_pe *pe; 1177 + int pe_num; 1178 + u16 vf_index; 1179 + struct pci_dn *pdn; 1180 + 1181 + bus = pdev->bus; 1182 + hose = pci_bus_to_host(bus); 1183 + phb = hose->private_data; 1184 + pdn = pci_get_pdn(pdev); 1185 + 1186 + if (!pdev->is_physfn) 1187 + return; 1188 + 1189 + /* Reserve PE for each VF */ 1190 + for (vf_index = 0; vf_index < num_vfs; vf_index++) { 1191 + pe_num = pdn->offset + vf_index; 1192 + 1193 + pe = &phb->ioda.pe_array[pe_num]; 1194 + pe->pe_number = pe_num; 1195 + pe->phb = phb; 1196 + pe->flags = PNV_IODA_PE_VF; 1197 + pe->pbus = NULL; 1198 + pe->parent_dev = pdev; 1199 + pe->tce32_seg = -1; 1200 + pe->mve_number = -1; 1201 + pe->rid = (pci_iov_virtfn_bus(pdev, vf_index) << 8) | 1202 + pci_iov_virtfn_devfn(pdev, vf_index); 1203 + 1204 + pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%d\n", 1205 + hose->global_number, pdev->bus->number, 1206 + PCI_SLOT(pci_iov_virtfn_devfn(pdev, vf_index)), 1207 + PCI_FUNC(pci_iov_virtfn_devfn(pdev, vf_index)), pe_num); 1208 + 1209 + if (pnv_ioda_configure_pe(phb, pe)) { 1210 + /* XXX What do we do here ? */ 1211 + if (pe_num) 1212 + pnv_ioda_free_pe(phb, pe_num); 1213 + pe->pdev = NULL; 1214 + continue; 1215 + } 1216 + 1217 + pe->tce32_table = kzalloc_node(sizeof(struct iommu_table), 1218 + GFP_KERNEL, hose->node); 1219 + pe->tce32_table->data = pe; 1220 + 1221 + /* Put PE to the list */ 1222 + mutex_lock(&phb->ioda.pe_list_mutex); 1223 + list_add_tail(&pe->list, &phb->ioda.pe_list); 1224 + mutex_unlock(&phb->ioda.pe_list_mutex); 1225 + 1226 + pnv_pci_ioda2_setup_dma_pe(phb, pe); 1227 + } 1228 + } 1229 + 1230 + int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) 1231 + { 1232 + struct pci_bus *bus; 1233 + struct pci_controller *hose; 1234 + struct pnv_phb *phb; 1235 + struct pci_dn *pdn; 1236 + int ret; 1237 + 1238 + bus = pdev->bus; 1239 + hose = pci_bus_to_host(bus); 1240 + phb = hose->private_data; 1241 + pdn = pci_get_pdn(pdev); 1242 + 1243 + if (phb->type == PNV_PHB_IODA2) { 1244 + /* Calculate available PE for required VFs */ 1245 + mutex_lock(&phb->ioda.pe_alloc_mutex); 1246 + pdn->offset = bitmap_find_next_zero_area( 1247 + phb->ioda.pe_alloc, phb->ioda.total_pe, 1248 + 0, num_vfs, 0); 1249 + if (pdn->offset >= phb->ioda.total_pe) { 1250 + mutex_unlock(&phb->ioda.pe_alloc_mutex); 1251 + dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs); 1252 + pdn->offset = 0; 1253 + return -EBUSY; 1254 + } 1255 + bitmap_set(phb->ioda.pe_alloc, pdn->offset, num_vfs); 1256 + pdn->num_vfs = num_vfs; 1257 + mutex_unlock(&phb->ioda.pe_alloc_mutex); 1258 + 1259 + /* Assign M64 window accordingly */ 1260 + ret = pnv_pci_vf_assign_m64(pdev); 1261 + if (ret) { 1262 + dev_info(&pdev->dev, "Not enough M64 window resources\n"); 1263 + goto m64_failed; 1264 + } 1265 + 1266 + /* 1267 + * When using one M64 BAR to map one IOV BAR, we need to shift 1268 + * the IOV BAR according to the PE# allocated to the VFs. 1269 + * Otherwise, the PE# for the VF will conflict with others. 1270 + */ 1271 + ret = pnv_pci_vf_resource_shift(pdev, pdn->offset); 1272 + if (ret) 1273 + goto m64_failed; 1274 + } 1275 + 1276 + /* Setup VF PEs */ 1277 + pnv_ioda_setup_vf_PE(pdev, num_vfs); 1278 + 1279 + return 0; 1280 + 1281 + m64_failed: 1282 + bitmap_clear(phb->ioda.pe_alloc, pdn->offset, num_vfs); 1283 + pdn->offset = 0; 1284 + 1285 + return ret; 1286 + } 1287 + 1153 1288 int pcibios_sriov_disable(struct pci_dev *pdev) 1154 1289 { 1290 + pnv_pci_sriov_disable(pdev); 1291 + 1155 1292 /* Release PCI data */ 1156 1293 remove_dev_pci_data(pdev); 1157 1294 return 0; ··· 1469 990 { 1470 991 /* Allocate PCI data */ 1471 992 add_dev_pci_data(pdev); 993 + 994 + pnv_pci_sriov_enable(pdev, num_vfs); 1472 995 return 0; 1473 996 } 1474 997 #endif /* CONFIG_PCI_IOV */ ··· 1667 1186 int64_t rc; 1668 1187 void *addr; 1669 1188 1670 - /* 256M DMA window, 4K TCE pages, 8 bytes TCE */ 1671 - #define TCE32_TABLE_SIZE ((0x10000000 / 0x1000) * 8) 1672 - 1673 1189 /* XXX FIXME: Handle 64-bit only DMA devices */ 1674 1190 /* XXX FIXME: Provide 64-bit DMA facilities & non-4K TCE tables etc.. */ 1675 1191 /* XXX FIXME: Allocate multi-level tables on PHB3 */ ··· 1729 1251 TCE_PCI_SWINV_PAIR); 1730 1252 } 1731 1253 iommu_init_table(tbl, phb->hose->node); 1732 - iommu_register_group(tbl, phb->hose->global_number, pe->pe_number); 1733 1254 1734 - if (pe->pdev) 1255 + if (pe->flags & PNV_IODA_PE_DEV) { 1256 + iommu_register_group(tbl, phb->hose->global_number, 1257 + pe->pe_number); 1735 1258 set_iommu_table_base_and_group(&pe->pdev->dev, tbl); 1736 - else 1259 + } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) { 1260 + iommu_register_group(tbl, phb->hose->global_number, 1261 + pe->pe_number); 1737 1262 pnv_ioda_setup_bus_dma(pe, pe->pbus, true); 1263 + } else if (pe->flags & PNV_IODA_PE_VF) { 1264 + iommu_register_group(tbl, phb->hose->global_number, 1265 + pe->pe_number); 1266 + } 1738 1267 1739 1268 return; 1740 1269 fail: ··· 1868 1383 tbl->it_type |= (TCE_PCI_SWINV_CREATE | TCE_PCI_SWINV_FREE); 1869 1384 } 1870 1385 iommu_init_table(tbl, phb->hose->node); 1871 - iommu_register_group(tbl, phb->hose->global_number, pe->pe_number); 1872 1386 1873 - if (pe->pdev) 1387 + if (pe->flags & PNV_IODA_PE_DEV) { 1388 + iommu_register_group(tbl, phb->hose->global_number, 1389 + pe->pe_number); 1874 1390 set_iommu_table_base_and_group(&pe->pdev->dev, tbl); 1875 - else 1391 + } else if (pe->flags & (PNV_IODA_PE_BUS | PNV_IODA_PE_BUS_ALL)) { 1392 + iommu_register_group(tbl, phb->hose->global_number, 1393 + pe->pe_number); 1876 1394 pnv_ioda_setup_bus_dma(pe, pe->pbus, true); 1395 + } else if (pe->flags & PNV_IODA_PE_VF) { 1396 + iommu_register_group(tbl, phb->hose->global_number, 1397 + pe->pe_number); 1398 + } 1877 1399 1878 1400 /* Also create a bypass window */ 1879 1401 if (!pnv_iommu_bypass_disabled) ··· 2560 2068 phb->hub_id = hub_id; 2561 2069 phb->opal_id = phb_id; 2562 2070 phb->type = ioda_type; 2071 + mutex_init(&phb->ioda.pe_alloc_mutex); 2563 2072 2564 2073 /* Detect specific models for error handling */ 2565 2074 if (of_device_is_compatible(np, "ibm,p7ioc-pciex")) ··· 2620 2127 2621 2128 INIT_LIST_HEAD(&phb->ioda.pe_dma_list); 2622 2129 INIT_LIST_HEAD(&phb->ioda.pe_list); 2130 + mutex_init(&phb->ioda.pe_list_mutex); 2623 2131 2624 2132 /* Calculate how many 32-bit TCE segments we have */ 2625 2133 phb->ioda.tce32_count = phb->ioda.m32_pci_base >> 28;
+18
arch/powerpc/platforms/powernv/pci.c
··· 714 714 { 715 715 struct pci_controller *hose = pci_bus_to_host(pdev->bus); 716 716 struct pnv_phb *phb = hose->private_data; 717 + #ifdef CONFIG_PCI_IOV 718 + struct pnv_ioda_pe *pe; 719 + struct pci_dn *pdn; 720 + 721 + /* Fix the VF pdn PE number */ 722 + if (pdev->is_virtfn) { 723 + pdn = pci_get_pdn(pdev); 724 + WARN_ON(pdn->pe_number != IODA_INVALID_PE); 725 + list_for_each_entry(pe, &phb->ioda.pe_list, list) { 726 + if (pe->rid == ((pdev->bus->number << 8) | 727 + (pdev->devfn & 0xff))) { 728 + pdn->pe_number = pe->pe_number; 729 + pe->pdev = pdev; 730 + break; 731 + } 732 + } 733 + } 734 + #endif /* CONFIG_PCI_IOV */ 717 735 718 736 /* If we have no phb structure, try to setup a fallback based on 719 737 * the device-tree (RTAS PCI for example)
+7
arch/powerpc/platforms/powernv/pci.h
··· 23 23 #define PNV_IODA_PE_BUS_ALL (1 << 2) /* PE has subordinate buses */ 24 24 #define PNV_IODA_PE_MASTER (1 << 3) /* Master PE in compound case */ 25 25 #define PNV_IODA_PE_SLAVE (1 << 4) /* Slave PE in compound case */ 26 + #define PNV_IODA_PE_VF (1 << 5) /* PE for one VF */ 26 27 27 28 /* Data associated with a PE, including IOMMU tracking etc.. */ 28 29 struct pnv_phb; ··· 35 34 * entire bus (& children). In the former case, pdev 36 35 * is populated, in the later case, pbus is. 37 36 */ 37 + #ifdef CONFIG_PCI_IOV 38 + struct pci_dev *parent_dev; 39 + #endif 38 40 struct pci_dev *pdev; 39 41 struct pci_bus *pbus; 40 42 ··· 149 145 150 146 /* PE allocation bitmap */ 151 147 unsigned long *pe_alloc; 148 + /* PE allocation mutex */ 149 + struct mutex pe_alloc_mutex; 152 150 153 151 /* M32 & IO segment maps */ 154 152 unsigned int *m32_segmap; ··· 165 159 * on the sequence of creation 166 160 */ 167 161 struct list_head pe_list; 162 + struct mutex pe_list_mutex; 168 163 169 164 /* Reverse map of PEs, will have to extend if 170 165 * we are to support more than 256 PEs, indexed