Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[SCSI] mpt2sas: Added NUNA IO support in driver which uses multi-reply queue support of the HBA

Support added for controllers capable of multi reply queues.

The following are the modifications to the driver to support NUMA.

1) Create the new structure adapter_reply_queue to contain the reply queue
info for every msix vector. This object will contain a
reply_post_host_index, reply_post_free for each instance, msix_index, among
other parameters. We will track all the reply queues on a link list called
ioc->reply_queue_list. Each reply queue is aligned with each IRQ, and is
passed to the interrupt via the bus_id parameter.

(2) The driver will figure out the msix_vector_count from the PCIe MSIX
capabilities register instead of the IOC Facts->MaxMSIxVectors. This is
because the firmware is not filling in this field until the driver has
already registered MSIX support.

(3) If the ioc_facts reports that the controller is MSIX compatible in the
capabilities, then the driver will request for multiple irqs. This count
is calculated based on the minimum between the online cpus available and
the ioc->msix_vector_count. This count is reported to firmware in the
ioc_init request.

(4) New routines were added _base_free_irq and _base_request_irq, so
registering and freeing msix vectors were done thru simple function API.

(5) The new routine _base_assign_reply_queues was added to align the msix
indexes across cpus. This will initialize the array called
ioc->cpu_msix_table. This array is looked up on every MPI request so the
MSIxIndex is set appropriately.

(6) A new shost sysfs attribute was added to report the reply_queue_count.

(7) User needs to set the affinity cpu mask, so the interrupts occur on the
same cpu that sent the original request.

Signed-off-by: Nagalakshmi Nandigama <nagalakshmi.nandigama@lsi.com>
Signed-off-by: James Bottomley <JBottomley@Parallels.com>

authored by

nagalakshmi.nandigama@lsi.com and committed by
James Bottomley
911ae943 66195fc9

+410 -126
+347 -118
drivers/scsi/mpt2sas/mpt2sas_base.c
··· 834 834 static irqreturn_t 835 835 _base_interrupt(int irq, void *bus_id) 836 836 { 837 + struct adapter_reply_queue *reply_q = bus_id; 837 838 union reply_descriptor rd; 838 839 u32 completed_cmds; 839 840 u8 request_desript_type; 840 841 u16 smid; 841 842 u8 cb_idx; 842 843 u32 reply; 843 - u8 msix_index; 844 - struct MPT2SAS_ADAPTER *ioc = bus_id; 844 + u8 msix_index = reply_q->msix_index; 845 + struct MPT2SAS_ADAPTER *ioc = reply_q->ioc; 845 846 Mpi2ReplyDescriptorsUnion_t *rpf; 846 847 u8 rc; 847 848 848 849 if (ioc->mask_interrupts) 849 850 return IRQ_NONE; 850 851 851 - rpf = &ioc->reply_post_free[ioc->reply_post_host_index]; 852 + if (!atomic_add_unless(&reply_q->busy, 1, 1)) 853 + return IRQ_NONE; 854 + 855 + rpf = &reply_q->reply_post_free[reply_q->reply_post_host_index]; 852 856 request_desript_type = rpf->Default.ReplyFlags 853 857 & MPI2_RPY_DESCRIPT_FLAGS_TYPE_MASK; 854 - if (request_desript_type == MPI2_RPY_DESCRIPT_FLAGS_UNUSED) 858 + if (request_desript_type == MPI2_RPY_DESCRIPT_FLAGS_UNUSED) { 859 + atomic_dec(&reply_q->busy); 855 860 return IRQ_NONE; 861 + } 856 862 857 863 completed_cmds = 0; 858 864 cb_idx = 0xFF; ··· 867 861 if (rd.u.low == UINT_MAX || rd.u.high == UINT_MAX) 868 862 goto out; 869 863 reply = 0; 870 - cb_idx = 0xFF; 871 864 smid = le16_to_cpu(rpf->Default.DescriptorTypeDependent1); 872 - msix_index = rpf->Default.MSIxIndex; 873 865 if (request_desript_type == 874 866 MPI2_RPY_DESCRIPT_FLAGS_ADDRESS_REPLY) { 875 867 reply = le32_to_cpu ··· 911 907 next: 912 908 913 909 rpf->Words = cpu_to_le64(ULLONG_MAX); 914 - ioc->reply_post_host_index = (ioc->reply_post_host_index == 910 + reply_q->reply_post_host_index = 911 + (reply_q->reply_post_host_index == 915 912 (ioc->reply_post_queue_depth - 1)) ? 0 : 916 - ioc->reply_post_host_index + 1; 913 + reply_q->reply_post_host_index + 1; 917 914 request_desript_type = 918 - ioc->reply_post_free[ioc->reply_post_host_index].Default. 919 - ReplyFlags & MPI2_RPY_DESCRIPT_FLAGS_TYPE_MASK; 915 + reply_q->reply_post_free[reply_q->reply_post_host_index]. 916 + Default.ReplyFlags & MPI2_RPY_DESCRIPT_FLAGS_TYPE_MASK; 920 917 completed_cmds++; 921 918 if (request_desript_type == MPI2_RPY_DESCRIPT_FLAGS_UNUSED) 922 919 goto out; 923 - if (!ioc->reply_post_host_index) 924 - rpf = ioc->reply_post_free; 920 + if (!reply_q->reply_post_host_index) 921 + rpf = reply_q->reply_post_free; 925 922 else 926 923 rpf++; 927 924 } while (1); 928 925 929 926 out: 930 927 931 - if (!completed_cmds) 928 + if (!completed_cmds) { 929 + atomic_dec(&reply_q->busy); 932 930 return IRQ_NONE; 933 - 931 + } 934 932 wmb(); 935 - writel(ioc->reply_post_host_index, &ioc->chip->ReplyPostHostIndex); 933 + if (ioc->is_warpdrive) { 934 + writel(reply_q->reply_post_host_index, 935 + ioc->reply_post_host_index[msix_index]); 936 + atomic_dec(&reply_q->busy); 937 + return IRQ_HANDLED; 938 + } 939 + writel(reply_q->reply_post_host_index | (msix_index << 940 + MPI2_RPHI_MSIX_INDEX_SHIFT), &ioc->chip->ReplyPostHostIndex); 941 + atomic_dec(&reply_q->busy); 936 942 return IRQ_HANDLED; 943 + } 944 + 945 + /** 946 + * _base_is_controller_msix_enabled - is controller support muli-reply queues 947 + * @ioc: per adapter object 948 + * 949 + */ 950 + static inline int 951 + _base_is_controller_msix_enabled(struct MPT2SAS_ADAPTER *ioc) 952 + { 953 + return (ioc->facts.IOCCapabilities & 954 + MPI2_IOCFACTS_CAPABILITY_MSI_X_INDEX) && ioc->msix_enable; 955 + } 956 + 957 + /** 958 + * mpt2sas_base_flush_reply_queues - flushing the MSIX reply queues 959 + * @ioc: per adapter object 960 + * Context: ISR conext 961 + * 962 + * Called when a Task Management request has completed. We want 963 + * to flush the other reply queues so all the outstanding IO has been 964 + * completed back to OS before we process the TM completetion. 965 + * 966 + * Return nothing. 967 + */ 968 + void 969 + mpt2sas_base_flush_reply_queues(struct MPT2SAS_ADAPTER *ioc) 970 + { 971 + struct adapter_reply_queue *reply_q; 972 + 973 + /* If MSIX capability is turned off 974 + * then multi-queues are not enabled 975 + */ 976 + if (!_base_is_controller_msix_enabled(ioc)) 977 + return; 978 + 979 + list_for_each_entry(reply_q, &ioc->reply_queue_list, list) { 980 + if (ioc->shost_recovery) 981 + return; 982 + /* TMs are on msix_index == 0 */ 983 + if (reply_q->msix_index == 0) 984 + continue; 985 + _base_interrupt(reply_q->vector, (void *)reply_q); 986 + } 937 987 } 938 988 939 989 /** ··· 1140 1082 } 1141 1083 1142 1084 /** 1143 - * _base_save_msix_table - backup msix vector table 1144 - * @ioc: per adapter object 1145 - * 1146 - * This address an errata where diag reset clears out the table 1147 - */ 1148 - static void 1149 - _base_save_msix_table(struct MPT2SAS_ADAPTER *ioc) 1150 - { 1151 - int i; 1152 - 1153 - if (!ioc->msix_enable || ioc->msix_table_backup == NULL) 1154 - return; 1155 - 1156 - for (i = 0; i < ioc->msix_vector_count; i++) 1157 - ioc->msix_table_backup[i] = ioc->msix_table[i]; 1158 - } 1159 - 1160 - /** 1161 - * _base_restore_msix_table - this restores the msix vector table 1162 - * @ioc: per adapter object 1163 - * 1164 - */ 1165 - static void 1166 - _base_restore_msix_table(struct MPT2SAS_ADAPTER *ioc) 1167 - { 1168 - int i; 1169 - 1170 - if (!ioc->msix_enable || ioc->msix_table_backup == NULL) 1171 - return; 1172 - 1173 - for (i = 0; i < ioc->msix_vector_count; i++) 1174 - ioc->msix_table[i] = ioc->msix_table_backup[i]; 1175 - } 1176 - 1177 - /** 1178 1085 * _base_check_enable_msix - checks MSIX capabable. 1179 1086 * @ioc: per adapter object 1180 1087 * ··· 1151 1128 { 1152 1129 int base; 1153 1130 u16 message_control; 1154 - u32 msix_table_offset; 1131 + 1155 1132 1156 1133 base = pci_find_capability(ioc->pdev, PCI_CAP_ID_MSIX); 1157 1134 if (!base) { ··· 1161 1138 } 1162 1139 1163 1140 /* get msix vector count */ 1164 - pci_read_config_word(ioc->pdev, base + 2, &message_control); 1165 - ioc->msix_vector_count = (message_control & 0x3FF) + 1; 1166 - 1167 - /* get msix table */ 1168 - pci_read_config_dword(ioc->pdev, base + 4, &msix_table_offset); 1169 - msix_table_offset &= 0xFFFFFFF8; 1170 - ioc->msix_table = (u32 *)((void *)ioc->chip + msix_table_offset); 1171 - 1141 + /* NUMA_IO not supported for older controllers */ 1142 + if (ioc->pdev->device == MPI2_MFGPAGE_DEVID_SAS2004 || 1143 + ioc->pdev->device == MPI2_MFGPAGE_DEVID_SAS2008 || 1144 + ioc->pdev->device == MPI2_MFGPAGE_DEVID_SAS2108_1 || 1145 + ioc->pdev->device == MPI2_MFGPAGE_DEVID_SAS2108_2 || 1146 + ioc->pdev->device == MPI2_MFGPAGE_DEVID_SAS2108_3 || 1147 + ioc->pdev->device == MPI2_MFGPAGE_DEVID_SAS2116_1 || 1148 + ioc->pdev->device == MPI2_MFGPAGE_DEVID_SAS2116_2) 1149 + ioc->msix_vector_count = 1; 1150 + else { 1151 + pci_read_config_word(ioc->pdev, base + 2, &message_control); 1152 + ioc->msix_vector_count = (message_control & 0x3FF) + 1; 1153 + } 1172 1154 dinitprintk(ioc, printk(MPT2SAS_INFO_FMT "msix is supported, " 1173 - "vector_count(%d), table_offset(0x%08x), table(%p)\n", ioc->name, 1174 - ioc->msix_vector_count, msix_table_offset, ioc->msix_table)); 1155 + "vector_count(%d)\n", ioc->name, ioc->msix_vector_count)); 1156 + 1175 1157 return 0; 1158 + } 1159 + 1160 + /** 1161 + * _base_free_irq - free irq 1162 + * @ioc: per adapter object 1163 + * 1164 + * Freeing respective reply_queue from the list. 1165 + */ 1166 + static void 1167 + _base_free_irq(struct MPT2SAS_ADAPTER *ioc) 1168 + { 1169 + struct adapter_reply_queue *reply_q, *next; 1170 + 1171 + if (list_empty(&ioc->reply_queue_list)) 1172 + return; 1173 + 1174 + list_for_each_entry_safe(reply_q, next, &ioc->reply_queue_list, list) { 1175 + list_del(&reply_q->list); 1176 + synchronize_irq(reply_q->vector); 1177 + free_irq(reply_q->vector, reply_q); 1178 + kfree(reply_q); 1179 + } 1180 + } 1181 + 1182 + /** 1183 + * _base_request_irq - request irq 1184 + * @ioc: per adapter object 1185 + * @index: msix index into vector table 1186 + * @vector: irq vector 1187 + * 1188 + * Inserting respective reply_queue into the list. 1189 + */ 1190 + static int 1191 + _base_request_irq(struct MPT2SAS_ADAPTER *ioc, u8 index, u32 vector) 1192 + { 1193 + struct adapter_reply_queue *reply_q; 1194 + int r; 1195 + 1196 + reply_q = kzalloc(sizeof(struct adapter_reply_queue), GFP_KERNEL); 1197 + if (!reply_q) { 1198 + printk(MPT2SAS_ERR_FMT "unable to allocate memory %d!\n", 1199 + ioc->name, (int)sizeof(struct adapter_reply_queue)); 1200 + return -ENOMEM; 1201 + } 1202 + reply_q->ioc = ioc; 1203 + reply_q->msix_index = index; 1204 + reply_q->vector = vector; 1205 + atomic_set(&reply_q->busy, 0); 1206 + if (ioc->msix_enable) 1207 + snprintf(reply_q->name, MPT_NAME_LENGTH, "%s%d-msix%d", 1208 + MPT2SAS_DRIVER_NAME, ioc->id, index); 1209 + else 1210 + snprintf(reply_q->name, MPT_NAME_LENGTH, "%s%d", 1211 + MPT2SAS_DRIVER_NAME, ioc->id); 1212 + r = request_irq(vector, _base_interrupt, IRQF_SHARED, reply_q->name, 1213 + reply_q); 1214 + if (r) { 1215 + printk(MPT2SAS_ERR_FMT "unable to allocate interrupt %d!\n", 1216 + reply_q->name, vector); 1217 + kfree(reply_q); 1218 + return -EBUSY; 1219 + } 1220 + 1221 + INIT_LIST_HEAD(&reply_q->list); 1222 + list_add_tail(&reply_q->list, &ioc->reply_queue_list); 1223 + return 0; 1224 + } 1225 + 1226 + /** 1227 + * _base_assign_reply_queues - assigning msix index for each cpu 1228 + * @ioc: per adapter object 1229 + * 1230 + * The enduser would need to set the affinity via /proc/irq/#/smp_affinity 1231 + * 1232 + * It would nice if we could call irq_set_affinity, however it is not 1233 + * an exported symbol 1234 + */ 1235 + static void 1236 + _base_assign_reply_queues(struct MPT2SAS_ADAPTER *ioc) 1237 + { 1238 + struct adapter_reply_queue *reply_q; 1239 + int cpu_id; 1240 + int cpu_grouping, loop, grouping, grouping_mod; 1241 + 1242 + if (!_base_is_controller_msix_enabled(ioc)) 1243 + return; 1244 + 1245 + memset(ioc->cpu_msix_table, 0, ioc->cpu_msix_table_sz); 1246 + /* when there are more cpus than available msix vectors, 1247 + * then group cpus togeather on same irq 1248 + */ 1249 + if (ioc->cpu_count > ioc->msix_vector_count) { 1250 + grouping = ioc->cpu_count / ioc->msix_vector_count; 1251 + grouping_mod = ioc->cpu_count % ioc->msix_vector_count; 1252 + if (grouping < 2 || (grouping == 2 && !grouping_mod)) 1253 + cpu_grouping = 2; 1254 + else if (grouping < 4 || (grouping == 4 && !grouping_mod)) 1255 + cpu_grouping = 4; 1256 + else if (grouping < 8 || (grouping == 8 && !grouping_mod)) 1257 + cpu_grouping = 8; 1258 + else 1259 + cpu_grouping = 16; 1260 + } else 1261 + cpu_grouping = 0; 1262 + 1263 + loop = 0; 1264 + reply_q = list_entry(ioc->reply_queue_list.next, 1265 + struct adapter_reply_queue, list); 1266 + for_each_online_cpu(cpu_id) { 1267 + if (!cpu_grouping) { 1268 + ioc->cpu_msix_table[cpu_id] = reply_q->msix_index; 1269 + reply_q = list_entry(reply_q->list.next, 1270 + struct adapter_reply_queue, list); 1271 + } else { 1272 + if (loop < cpu_grouping) { 1273 + ioc->cpu_msix_table[cpu_id] = 1274 + reply_q->msix_index; 1275 + loop++; 1276 + } else { 1277 + reply_q = list_entry(reply_q->list.next, 1278 + struct adapter_reply_queue, list); 1279 + ioc->cpu_msix_table[cpu_id] = 1280 + reply_q->msix_index; 1281 + loop = 1; 1282 + } 1283 + } 1284 + } 1176 1285 } 1177 1286 1178 1287 /** ··· 1317 1162 { 1318 1163 if (ioc->msix_enable) { 1319 1164 pci_disable_msix(ioc->pdev); 1320 - kfree(ioc->msix_table_backup); 1321 - ioc->msix_table_backup = NULL; 1322 1165 ioc->msix_enable = 0; 1323 1166 } 1324 1167 } ··· 1329 1176 static int 1330 1177 _base_enable_msix(struct MPT2SAS_ADAPTER *ioc) 1331 1178 { 1332 - struct msix_entry entries; 1179 + struct msix_entry *entries, *a; 1333 1180 int r; 1181 + int i; 1334 1182 u8 try_msix = 0; 1183 + 1184 + INIT_LIST_HEAD(&ioc->reply_queue_list); 1335 1185 1336 1186 if (msix_disable == -1 || msix_disable == 0) 1337 1187 try_msix = 1; ··· 1345 1189 if (_base_check_enable_msix(ioc) != 0) 1346 1190 goto try_ioapic; 1347 1191 1348 - ioc->msix_table_backup = kcalloc(ioc->msix_vector_count, 1349 - sizeof(u32), GFP_KERNEL); 1350 - if (!ioc->msix_table_backup) { 1351 - dfailprintk(ioc, printk(MPT2SAS_INFO_FMT "allocation for " 1352 - "msix_table_backup failed!!!\n", ioc->name)); 1192 + ioc->reply_queue_count = min_t(u8, ioc->cpu_count, 1193 + ioc->msix_vector_count); 1194 + 1195 + entries = kcalloc(ioc->reply_queue_count, sizeof(struct msix_entry), 1196 + GFP_KERNEL); 1197 + if (!entries) { 1198 + dfailprintk(ioc, printk(MPT2SAS_INFO_FMT "kcalloc " 1199 + "failed @ at %s:%d/%s() !!!\n", ioc->name, __FILE__, 1200 + __LINE__, __func__)); 1353 1201 goto try_ioapic; 1354 1202 } 1355 1203 1356 - memset(&entries, 0, sizeof(struct msix_entry)); 1357 - r = pci_enable_msix(ioc->pdev, &entries, 1); 1204 + for (i = 0, a = entries; i < ioc->reply_queue_count; i++, a++) 1205 + a->entry = i; 1206 + 1207 + r = pci_enable_msix(ioc->pdev, entries, ioc->reply_queue_count); 1358 1208 if (r) { 1359 1209 dfailprintk(ioc, printk(MPT2SAS_INFO_FMT "pci_enable_msix " 1360 1210 "failed (r=%d) !!!\n", ioc->name, r)); 1211 + kfree(entries); 1361 1212 goto try_ioapic; 1362 1213 } 1363 1214 1364 - r = request_irq(entries.vector, _base_interrupt, IRQF_SHARED, 1365 - ioc->name, ioc); 1366 - if (r) { 1367 - dfailprintk(ioc, printk(MPT2SAS_INFO_FMT "unable to allocate " 1368 - "interrupt %d !!!\n", ioc->name, entries.vector)); 1369 - pci_disable_msix(ioc->pdev); 1370 - goto try_ioapic; 1371 - } 1372 - 1373 - ioc->pci_irq = entries.vector; 1374 1215 ioc->msix_enable = 1; 1216 + for (i = 0, a = entries; i < ioc->reply_queue_count; i++, a++) { 1217 + r = _base_request_irq(ioc, i, a->vector); 1218 + if (r) { 1219 + _base_free_irq(ioc); 1220 + _base_disable_msix(ioc); 1221 + kfree(entries); 1222 + goto try_ioapic; 1223 + } 1224 + } 1225 + 1226 + kfree(entries); 1375 1227 return 0; 1376 1228 1377 1229 /* failback to io_apic interrupt routing */ 1378 1230 try_ioapic: 1379 1231 1380 - r = request_irq(ioc->pdev->irq, _base_interrupt, IRQF_SHARED, 1381 - ioc->name, ioc); 1382 - if (r) { 1383 - printk(MPT2SAS_ERR_FMT "unable to allocate interrupt %d!\n", 1384 - ioc->name, ioc->pdev->irq); 1385 - r = -EBUSY; 1386 - goto out_fail; 1387 - } 1232 + r = _base_request_irq(ioc, 0, ioc->pdev->irq); 1388 1233 1389 - ioc->pci_irq = ioc->pdev->irq; 1390 - return 0; 1391 - 1392 - out_fail: 1393 1234 return r; 1394 1235 } 1395 1236 ··· 1405 1252 int i, r = 0; 1406 1253 u64 pio_chip = 0; 1407 1254 u64 chip_phys = 0; 1255 + struct adapter_reply_queue *reply_q; 1408 1256 1409 1257 dinitprintk(ioc, printk(MPT2SAS_INFO_FMT "%s\n", 1410 1258 ioc->name, __func__)); ··· 1468 1314 if (r) 1469 1315 goto out_fail; 1470 1316 1471 - printk(MPT2SAS_INFO_FMT "%s: IRQ %d\n", 1472 - ioc->name, ((ioc->msix_enable) ? "PCI-MSI-X enabled" : 1473 - "IO-APIC enabled"), ioc->pci_irq); 1317 + list_for_each_entry(reply_q, &ioc->reply_queue_list, list) 1318 + printk(MPT2SAS_INFO_FMT "%s: IRQ %d\n", 1319 + reply_q->name, ((ioc->msix_enable) ? "PCI-MSI-X enabled" : 1320 + "IO-APIC enabled"), reply_q->vector); 1321 + 1474 1322 printk(MPT2SAS_INFO_FMT "iomem(0x%016llx), mapped(0x%p), size(%d)\n", 1475 1323 ioc->name, (unsigned long long)chip_phys, ioc->chip, memap_sz); 1476 1324 printk(MPT2SAS_INFO_FMT "ioport(0x%016llx), size(%d)\n", ··· 1487 1331 if (ioc->chip_phys) 1488 1332 iounmap(ioc->chip); 1489 1333 ioc->chip_phys = 0; 1490 - ioc->pci_irq = -1; 1491 1334 pci_release_selected_regions(ioc->pdev, ioc->bars); 1492 1335 pci_disable_pcie_error_reporting(pdev); 1493 1336 pci_disable_device(pdev); ··· 1733 1578 } 1734 1579 #endif 1735 1580 1581 + static inline u8 1582 + _base_get_msix_index(struct MPT2SAS_ADAPTER *ioc) 1583 + { 1584 + return ioc->cpu_msix_table[smp_processor_id()]; 1585 + } 1586 + 1736 1587 /** 1737 1588 * mpt2sas_base_put_smid_scsi_io - send SCSI_IO request to firmware 1738 1589 * @ioc: per adapter object ··· 1755 1594 1756 1595 1757 1596 descriptor.SCSIIO.RequestFlags = MPI2_REQ_DESCRIPT_FLAGS_SCSI_IO; 1758 - descriptor.SCSIIO.MSIxIndex = 0; /* TODO */ 1597 + descriptor.SCSIIO.MSIxIndex = _base_get_msix_index(ioc); 1759 1598 descriptor.SCSIIO.SMID = cpu_to_le16(smid); 1760 1599 descriptor.SCSIIO.DevHandle = cpu_to_le16(handle); 1761 1600 descriptor.SCSIIO.LMID = 0; ··· 1779 1618 1780 1619 descriptor.HighPriority.RequestFlags = 1781 1620 MPI2_REQ_DESCRIPT_FLAGS_HIGH_PRIORITY; 1782 - descriptor.HighPriority.MSIxIndex = 0; /* TODO */ 1621 + descriptor.HighPriority.MSIxIndex = 0; 1783 1622 descriptor.HighPriority.SMID = cpu_to_le16(smid); 1784 1623 descriptor.HighPriority.LMID = 0; 1785 1624 descriptor.HighPriority.Reserved1 = 0; ··· 1801 1640 u64 *request = (u64 *)&descriptor; 1802 1641 1803 1642 descriptor.Default.RequestFlags = MPI2_REQ_DESCRIPT_FLAGS_DEFAULT_TYPE; 1804 - descriptor.Default.MSIxIndex = 0; /* TODO */ 1643 + descriptor.Default.MSIxIndex = _base_get_msix_index(ioc); 1805 1644 descriptor.Default.SMID = cpu_to_le16(smid); 1806 1645 descriptor.Default.LMID = 0; 1807 1646 descriptor.Default.DescriptorTypeDependent = 0; ··· 1826 1665 1827 1666 descriptor.SCSITarget.RequestFlags = 1828 1667 MPI2_REQ_DESCRIPT_FLAGS_SCSI_TARGET; 1829 - descriptor.SCSITarget.MSIxIndex = 0; /* TODO */ 1668 + descriptor.SCSITarget.MSIxIndex = _base_get_msix_index(ioc); 1830 1669 descriptor.SCSITarget.SMID = cpu_to_le16(smid); 1831 1670 descriptor.SCSITarget.LMID = 0; 1832 1671 descriptor.SCSITarget.IoIndex = cpu_to_le16(io_index); ··· 2333 2172 u16 max_sge_elements; 2334 2173 u16 num_of_reply_frames; 2335 2174 u16 chains_needed_per_io; 2336 - u32 sz, total_sz; 2175 + u32 sz, total_sz, reply_post_free_sz; 2337 2176 u32 retry_sz; 2338 2177 u16 max_request_credit; 2339 2178 int i; ··· 2660 2499 total_sz += sz; 2661 2500 2662 2501 /* reply post queue, 16 byte align */ 2663 - sz = ioc->reply_post_queue_depth * sizeof(Mpi2DefaultReplyDescriptor_t); 2502 + reply_post_free_sz = ioc->reply_post_queue_depth * 2503 + sizeof(Mpi2DefaultReplyDescriptor_t); 2504 + if (_base_is_controller_msix_enabled(ioc)) 2505 + sz = reply_post_free_sz * ioc->reply_queue_count; 2506 + else 2507 + sz = reply_post_free_sz; 2664 2508 ioc->reply_post_free_dma_pool = pci_pool_create("reply_post_free pool", 2665 2509 ioc->pdev, sz, 16, 0); 2666 2510 if (!ioc->reply_post_free_dma_pool) { ··· 3353 3187 facts->MaxChainDepth = mpi_reply.MaxChainDepth; 3354 3188 facts->WhoInit = mpi_reply.WhoInit; 3355 3189 facts->NumberOfPorts = mpi_reply.NumberOfPorts; 3190 + facts->MaxMSIxVectors = mpi_reply.MaxMSIxVectors; 3356 3191 facts->RequestCredit = le16_to_cpu(mpi_reply.RequestCredit); 3357 3192 facts->MaxReplyDescriptorPostQueueDepth = 3358 3193 le16_to_cpu(mpi_reply.MaxReplyDescriptorPostQueueDepth); ··· 3411 3244 mpi_request.MsgVersion = cpu_to_le16(MPI2_VERSION); 3412 3245 mpi_request.HeaderVersion = cpu_to_le16(MPI2_HEADER_VERSION); 3413 3246 3414 - 3247 + if (_base_is_controller_msix_enabled(ioc)) 3248 + mpi_request.HostMSIxVectors = ioc->reply_queue_count; 3415 3249 mpi_request.SystemRequestFrameSize = cpu_to_le16(ioc->request_sz/4); 3416 3250 mpi_request.ReplyDescriptorPostQueueDepth = 3417 3251 cpu_to_le16(ioc->reply_post_queue_depth); ··· 3681 3513 u32 hcb_size; 3682 3514 3683 3515 printk(MPT2SAS_INFO_FMT "sending diag reset !!\n", ioc->name); 3684 - 3685 - _base_save_msix_table(ioc); 3686 - 3687 3516 drsprintk(ioc, printk(MPT2SAS_INFO_FMT "clear interrupts\n", 3688 3517 ioc->name)); 3689 3518 ··· 3776 3611 goto out; 3777 3612 } 3778 3613 3779 - _base_restore_msix_table(ioc); 3780 3614 printk(MPT2SAS_INFO_FMT "diag reset: SUCCESS\n", ioc->name); 3781 3615 return 0; 3782 3616 ··· 3856 3692 u16 smid; 3857 3693 struct _tr_list *delayed_tr, *delayed_tr_next; 3858 3694 u8 hide_flag; 3695 + struct adapter_reply_queue *reply_q; 3696 + long reply_post_free; 3697 + u32 reply_post_free_sz; 3859 3698 3860 3699 dinitprintk(ioc, printk(MPT2SAS_INFO_FMT "%s\n", ioc->name, 3861 3700 __func__)); ··· 3924 3757 ioc->reply_sz) 3925 3758 ioc->reply_free[i] = cpu_to_le32(reply_address); 3926 3759 3760 + /* initialize reply queues */ 3761 + _base_assign_reply_queues(ioc); 3762 + 3927 3763 /* initialize Reply Post Free Queue */ 3928 - for (i = 0; i < ioc->reply_post_queue_depth; i++) 3929 - ioc->reply_post_free[i].Words = cpu_to_le64(ULLONG_MAX); 3764 + reply_post_free = (long)ioc->reply_post_free; 3765 + reply_post_free_sz = ioc->reply_post_queue_depth * 3766 + sizeof(Mpi2DefaultReplyDescriptor_t); 3767 + list_for_each_entry(reply_q, &ioc->reply_queue_list, list) { 3768 + reply_q->reply_post_host_index = 0; 3769 + reply_q->reply_post_free = (Mpi2ReplyDescriptorsUnion_t *) 3770 + reply_post_free; 3771 + for (i = 0; i < ioc->reply_post_queue_depth; i++) 3772 + reply_q->reply_post_free[i].Words = 3773 + cpu_to_le64(ULLONG_MAX); 3774 + if (!_base_is_controller_msix_enabled(ioc)) 3775 + goto skip_init_reply_post_free_queue; 3776 + reply_post_free += reply_post_free_sz; 3777 + } 3778 + skip_init_reply_post_free_queue: 3930 3779 3931 3780 r = _base_send_ioc_init(ioc, sleep_flag); 3932 3781 if (r) 3933 3782 return r; 3934 3783 3935 - /* initialize the index's */ 3784 + /* initialize reply free host index */ 3936 3785 ioc->reply_free_host_index = ioc->reply_free_queue_depth - 1; 3937 - ioc->reply_post_host_index = 0; 3938 3786 writel(ioc->reply_free_host_index, &ioc->chip->ReplyFreeHostIndex); 3939 - writel(0, &ioc->chip->ReplyPostHostIndex); 3787 + 3788 + /* initialize reply post host index */ 3789 + list_for_each_entry(reply_q, &ioc->reply_queue_list, list) { 3790 + writel(reply_q->msix_index << MPI2_RPHI_MSIX_INDEX_SHIFT, 3791 + &ioc->chip->ReplyPostHostIndex); 3792 + if (!_base_is_controller_msix_enabled(ioc)) 3793 + goto skip_init_reply_post_host_index; 3794 + } 3795 + 3796 + skip_init_reply_post_host_index: 3940 3797 3941 3798 _base_unmask_interrupts(ioc); 3942 3799 r = _base_event_notification(ioc, sleep_flag); ··· 4011 3820 ioc->shost_recovery = 1; 4012 3821 _base_make_ioc_ready(ioc, CAN_SLEEP, SOFT_RESET); 4013 3822 ioc->shost_recovery = 0; 4014 - if (ioc->pci_irq) { 4015 - synchronize_irq(pdev->irq); 4016 - free_irq(ioc->pci_irq, ioc); 4017 - } 3823 + _base_free_irq(ioc); 4018 3824 _base_disable_msix(ioc); 4019 3825 if (ioc->chip_phys) 4020 3826 iounmap(ioc->chip); 4021 - ioc->pci_irq = -1; 4022 3827 ioc->chip_phys = 0; 4023 3828 pci_release_selected_regions(ioc->pdev, ioc->bars); 4024 3829 pci_disable_pcie_error_reporting(pdev); ··· 4032 3845 mpt2sas_base_attach(struct MPT2SAS_ADAPTER *ioc) 4033 3846 { 4034 3847 int r, i; 3848 + int cpu_id, last_cpu_id = 0; 4035 3849 4036 3850 dinitprintk(ioc, printk(MPT2SAS_INFO_FMT "%s\n", ioc->name, 4037 3851 __func__)); 4038 3852 3853 + /* setup cpu_msix_table */ 3854 + ioc->cpu_count = num_online_cpus(); 3855 + for_each_online_cpu(cpu_id) 3856 + last_cpu_id = cpu_id; 3857 + ioc->cpu_msix_table_sz = last_cpu_id + 1; 3858 + ioc->cpu_msix_table = kzalloc(ioc->cpu_msix_table_sz, GFP_KERNEL); 3859 + ioc->reply_queue_count = 1; 3860 + if (!ioc->cpu_msix_table) { 3861 + dfailprintk(ioc, printk(MPT2SAS_INFO_FMT "allocation for " 3862 + "cpu_msix_table failed!!!\n", ioc->name)); 3863 + r = -ENOMEM; 3864 + goto out_free_resources; 3865 + } 3866 + 3867 + if (ioc->is_warpdrive) { 3868 + ioc->reply_post_host_index = kcalloc(ioc->cpu_msix_table_sz, 3869 + sizeof(resource_size_t *), GFP_KERNEL); 3870 + if (!ioc->reply_post_host_index) { 3871 + dfailprintk(ioc, printk(MPT2SAS_INFO_FMT "allocation " 3872 + "for cpu_msix_table failed!!!\n", ioc->name)); 3873 + r = -ENOMEM; 3874 + goto out_free_resources; 3875 + } 3876 + } 3877 + 4039 3878 r = mpt2sas_base_map_resources(ioc); 4040 3879 if (r) 4041 3880 return r; 3881 + 3882 + if (ioc->is_warpdrive) { 3883 + ioc->reply_post_host_index[0] = 3884 + (resource_size_t *)&ioc->chip->ReplyPostHostIndex; 3885 + 3886 + for (i = 1; i < ioc->cpu_msix_table_sz; i++) 3887 + ioc->reply_post_host_index[i] = (resource_size_t *) 3888 + ((u8 *)&ioc->chip->Doorbell + (0x4000 + ((i - 1) 3889 + * 4))); 3890 + } 4042 3891 4043 3892 pci_set_drvdata(ioc->pdev, ioc->shost); 4044 3893 r = _base_get_ioc_facts(ioc, CAN_SLEEP); ··· 4196 3973 mpt2sas_base_free_resources(ioc); 4197 3974 _base_release_memory_pools(ioc); 4198 3975 pci_set_drvdata(ioc->pdev, NULL); 3976 + kfree(ioc->cpu_msix_table); 3977 + if (ioc->is_warpdrive) 3978 + kfree(ioc->reply_post_host_index); 4199 3979 kfree(ioc->pd_handles); 4200 3980 kfree(ioc->tm_cmds.reply); 4201 3981 kfree(ioc->transport_cmds.reply); ··· 4236 4010 mpt2sas_base_free_resources(ioc); 4237 4011 _base_release_memory_pools(ioc); 4238 4012 pci_set_drvdata(ioc->pdev, NULL); 4013 + kfree(ioc->cpu_msix_table); 4014 + if (ioc->is_warpdrive) 4015 + kfree(ioc->reply_post_host_index); 4239 4016 kfree(ioc->pd_handles); 4240 4017 kfree(ioc->pfacts); 4241 4018 kfree(ioc->ctl_cmds.reply);
+34 -8
drivers/scsi/mpt2sas/mpt2sas_base.h
··· 544 544 545 545 typedef void (*MPT_ADD_SGE)(void *paddr, u32 flags_length, dma_addr_t dma_addr); 546 546 547 + /** 548 + * struct adapter_reply_queue - the reply queue struct 549 + * @ioc: per adapter object 550 + * @msix_index: msix index into vector table 551 + * @vector: irq vector 552 + * @reply_post_host_index: head index in the pool where FW completes IO 553 + * @reply_post_free: reply post base virt address 554 + * @name: the name registered to request_irq() 555 + * @busy: isr is actively processing replies on another cpu 556 + * @list: this list 557 + */ 558 + struct adapter_reply_queue { 559 + struct MPT2SAS_ADAPTER *ioc; 560 + u8 msix_index; 561 + unsigned int vector; 562 + u32 reply_post_host_index; 563 + Mpi2ReplyDescriptorsUnion_t *reply_post_free; 564 + char name[MPT_NAME_LENGTH]; 565 + atomic_t busy; 566 + struct list_head list; 567 + }; 568 + 547 569 /* IOC Facts and Port Facts converted from little endian to cpu */ 548 570 union mpi2_version_union { 549 571 MPI2_VERSION_STRUCT Struct; ··· 628 606 * @list: ioc_list 629 607 * @shost: shost object 630 608 * @id: unique adapter id 631 - * @pci_irq: irq number 609 + * @cpu_count: number online cpus 632 610 * @name: generic ioc string 633 611 * @tmp_string: tmp string used for logging 634 612 * @pdev: pci pdev object ··· 658 636 * @wait_for_port_enable_to_complete: 659 637 * @msix_enable: flag indicating msix is enabled 660 638 * @msix_vector_count: number msix vectors 661 - * @msix_table: virt address to the msix table 662 - * @msix_table_backup: backup msix table 639 + * @cpu_msix_table: table for mapping cpus to msix index 640 + * @cpu_msix_table_sz: table size 663 641 * @scsi_io_cb_idx: shost generated commands 664 642 * @tm_cb_idx: task management commands 665 643 * @scsih_cb_idx: scsih internal commands ··· 750 728 * @reply_post_queue_depth: reply post queue depth 751 729 * @reply_post_free: pool for reply post (64bit descriptor) 752 730 * @reply_post_free_dma: 753 - * @reply_post_free_dma_pool: 731 + * @reply_queue_count: number of reply queue's 732 + * @reply_queue_list: link list contaning the reply queue info 754 733 * @reply_post_host_index: head index in the pool where FW completes IO 755 734 * @delayed_tr_list: target reset link list 756 735 * @delayed_tr_volume_list: volume target reset link list ··· 760 737 struct list_head list; 761 738 struct Scsi_Host *shost; 762 739 u8 id; 763 - u32 pci_irq; 740 + int cpu_count; 764 741 char name[MPT_NAME_LENGTH]; 765 742 char tmp_string[MPT_STRING_LENGTH]; 766 743 struct pci_dev *pdev; ··· 802 779 803 780 u8 msix_enable; 804 781 u16 msix_vector_count; 805 - u32 *msix_table; 806 - u32 *msix_table_backup; 782 + u8 *cpu_msix_table; 783 + resource_size_t **reply_post_host_index; 784 + u16 cpu_msix_table_sz; 807 785 u32 ioc_reset_count; 808 786 809 787 /* internal commands, callback index */ ··· 935 911 Mpi2ReplyDescriptorsUnion_t *reply_post_free; 936 912 dma_addr_t reply_post_free_dma; 937 913 struct dma_pool *reply_post_free_dma_pool; 938 - u32 reply_post_host_index; 914 + u8 reply_queue_count; 915 + struct list_head reply_queue_list; 939 916 940 917 struct list_head delayed_tr_list; 941 918 struct list_head delayed_tr_volume_list; ··· 980 955 void mpt2sas_base_build_zero_len_sge(struct MPT2SAS_ADAPTER *ioc, void *paddr); 981 956 __le32 mpt2sas_base_get_sense_buffer_dma(struct MPT2SAS_ADAPTER *ioc, 982 957 u16 smid); 958 + void mpt2sas_base_flush_reply_queues(struct MPT2SAS_ADAPTER *ioc); 983 959 984 960 /* hi-priority queue */ 985 961 u16 mpt2sas_base_get_smid_hpr(struct MPT2SAS_ADAPTER *ioc, u8 cb_idx);
+28
drivers/scsi/mpt2sas/mpt2sas_ctl.c
··· 2705 2705 static DEVICE_ATTR(ioc_reset_count, S_IRUGO, 2706 2706 _ctl_ioc_reset_count_show, NULL); 2707 2707 2708 + /** 2709 + * _ctl_ioc_reply_queue_count_show - number of reply queues 2710 + * @cdev - pointer to embedded class device 2711 + * @buf - the buffer returned 2712 + * 2713 + * This is number of reply queues 2714 + * 2715 + * A sysfs 'read-only' shost attribute. 2716 + */ 2717 + static ssize_t 2718 + _ctl_ioc_reply_queue_count_show(struct device *cdev, 2719 + struct device_attribute *attr, char *buf) 2720 + { 2721 + u8 reply_queue_count; 2722 + struct Scsi_Host *shost = class_to_shost(cdev); 2723 + struct MPT2SAS_ADAPTER *ioc = shost_priv(shost); 2724 + 2725 + if ((ioc->facts.IOCCapabilities & 2726 + MPI2_IOCFACTS_CAPABILITY_MSI_X_INDEX) && ioc->msix_enable) 2727 + reply_queue_count = ioc->reply_queue_count; 2728 + else 2729 + reply_queue_count = 1; 2730 + return snprintf(buf, PAGE_SIZE, "%d\n", reply_queue_count); 2731 + } 2732 + static DEVICE_ATTR(reply_queue_count, S_IRUGO, 2733 + _ctl_ioc_reply_queue_count_show, NULL); 2734 + 2708 2735 struct DIAG_BUFFER_START { 2709 2736 __le32 Size; 2710 2737 __le32 DiagVersion; ··· 2942 2915 &dev_attr_host_trace_buffer_size, 2943 2916 &dev_attr_host_trace_buffer, 2944 2917 &dev_attr_host_trace_buffer_enable, 2918 + &dev_attr_reply_queue_count, 2945 2919 NULL, 2946 2920 }; 2947 2921
+1
drivers/scsi/mpt2sas/mpt2sas_scsih.c
··· 2162 2162 return 1; 2163 2163 if (ioc->tm_cmds.smid != smid) 2164 2164 return 1; 2165 + mpt2sas_base_flush_reply_queues(ioc); 2165 2166 ioc->tm_cmds.status |= MPT2_CMD_COMPLETE; 2166 2167 mpi_reply = mpt2sas_base_get_reply_virt_addr(ioc, reply); 2167 2168 if (mpi_reply) {