Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

scsi: lpfc: Fix poor use of hardware queues if fewer irq vectors

While fixing the resources per socket, realized the driver was not using
hardware queues (up to 1 per cpu) if there were fewer interrupt
vectors. The driver was only using the hardware queue assigned to the cpu
with the vector.

Rework the affinity map check to use the additional hardware queue elements
that had been allocated. If the cpu count exceeds the hardware queue count
- share, but choose what is shared with by: hyperthread peer, core peer,
socket peer, or finally similar cpu in a different socket.

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>

authored by

James Smart and committed by
Martin K. Petersen
657add4e d9954a2d

+255 -113
+2 -1
drivers/scsi/lpfc/lpfc_attr.c
··· 5115 5115 5116 5116 /* set the values on the cq's */ 5117 5117 for (i = 0; i < phba->cfg_irq_chann; i++) { 5118 - eq = phba->sli4_hba.hdwq[i].hba_eq; 5118 + /* Get the EQ corresponding to the IRQ vector */ 5119 + eq = phba->sli4_hba.hba_eq_hdl[i].eq; 5119 5120 if (!eq) 5120 5121 continue; 5121 5122
+226 -95
drivers/scsi/lpfc/lpfc_init.c
··· 93 93 static void lpfc_sli4_disable_intr(struct lpfc_hba *); 94 94 static uint32_t lpfc_sli4_enable_intr(struct lpfc_hba *, uint32_t); 95 95 static void lpfc_sli4_oas_verify(struct lpfc_hba *phba); 96 - static uint16_t lpfc_find_eq_handle(struct lpfc_hba *, uint16_t); 97 96 static uint16_t lpfc_find_cpu_handle(struct lpfc_hba *, uint16_t, int); 98 97 99 98 static struct scsi_transport_template *lpfc_transport_template = NULL; ··· 1273 1274 if (!eqcnt) 1274 1275 goto requeue; 1275 1276 1277 + /* Loop thru all IRQ vectors */ 1276 1278 for (i = 0; i < phba->cfg_irq_chann; i++) { 1277 - eq = phba->sli4_hba.hdwq[i].hba_eq; 1279 + /* Get the EQ corresponding to the IRQ vector */ 1280 + eq = phba->sli4_hba.hba_eq_hdl[i].eq; 1278 1281 if (eq && eqcnt[eq->last_cpu] < 2) 1279 1282 eqcnt[eq->last_cpu]++; 1280 1283 continue; ··· 8749 8748 lpfc_sli4_queue_create(struct lpfc_hba *phba) 8750 8749 { 8751 8750 struct lpfc_queue *qdesc; 8752 - int idx, eqidx, cpu; 8751 + int idx, cpu, eqcpu; 8753 8752 struct lpfc_sli4_hdw_queue *qp; 8753 + struct lpfc_vector_map_info *cpup; 8754 + struct lpfc_vector_map_info *eqcpup; 8754 8755 struct lpfc_eq_intr_info *eqi; 8755 8756 8756 8757 /* ··· 8837 8834 INIT_LIST_HEAD(&phba->sli4_hba.lpfc_wq_list); 8838 8835 8839 8836 /* Create HBA Event Queues (EQs) */ 8840 - for (idx = 0; idx < phba->cfg_hdw_queue; idx++) { 8841 - /* determine EQ affinity */ 8842 - eqidx = lpfc_find_eq_handle(phba, idx); 8843 - cpu = lpfc_find_cpu_handle(phba, eqidx, LPFC_FIND_BY_EQ); 8844 - /* 8845 - * If there are more Hardware Queues than available 8846 - * EQs, multiple Hardware Queues may share a common EQ. 8837 + for_each_present_cpu(cpu) { 8838 + /* We only want to create 1 EQ per vector, even though 8839 + * multiple CPUs might be using that vector. so only 8840 + * selects the CPUs that are LPFC_CPU_FIRST_IRQ. 8847 8841 */ 8848 - if (idx >= phba->cfg_irq_chann) { 8849 - /* Share an existing EQ */ 8850 - phba->sli4_hba.hdwq[idx].hba_eq = 8851 - phba->sli4_hba.hdwq[eqidx].hba_eq; 8842 + cpup = &phba->sli4_hba.cpu_map[cpu]; 8843 + if (!(cpup->flag & LPFC_CPU_FIRST_IRQ)) 8852 8844 continue; 8853 - } 8854 - /* Create an EQ */ 8845 + 8846 + /* Get a ptr to the Hardware Queue associated with this CPU */ 8847 + qp = &phba->sli4_hba.hdwq[cpup->hdwq]; 8848 + 8849 + /* Allocate an EQ */ 8855 8850 qdesc = lpfc_sli4_queue_alloc(phba, LPFC_DEFAULT_PAGE_SIZE, 8856 8851 phba->sli4_hba.eq_esize, 8857 8852 phba->sli4_hba.eq_ecount, cpu); 8858 8853 if (!qdesc) { 8859 8854 lpfc_printf_log(phba, KERN_ERR, LOG_INIT, 8860 - "0497 Failed allocate EQ (%d)\n", idx); 8855 + "0497 Failed allocate EQ (%d)\n", 8856 + cpup->hdwq); 8861 8857 goto out_error; 8862 8858 } 8863 8859 qdesc->qe_valid = 1; 8864 - qdesc->hdwq = idx; 8865 - 8866 - /* Save the CPU this EQ is affinitised to */ 8867 - qdesc->chann = cpu; 8868 - phba->sli4_hba.hdwq[idx].hba_eq = qdesc; 8860 + qdesc->hdwq = cpup->hdwq; 8861 + qdesc->chann = cpu; /* First CPU this EQ is affinitised to */ 8869 8862 qdesc->last_cpu = qdesc->chann; 8863 + 8864 + /* Save the allocated EQ in the Hardware Queue */ 8865 + qp->hba_eq = qdesc; 8866 + 8870 8867 eqi = per_cpu_ptr(phba->sli4_hba.eq_info, qdesc->last_cpu); 8871 8868 list_add(&qdesc->cpu_list, &eqi->list); 8872 8869 } 8873 8870 8871 + /* Now we need to populate the other Hardware Queues, that share 8872 + * an IRQ vector, with the associated EQ ptr. 8873 + */ 8874 + for_each_present_cpu(cpu) { 8875 + cpup = &phba->sli4_hba.cpu_map[cpu]; 8876 + 8877 + /* Check for EQ already allocated in previous loop */ 8878 + if (cpup->flag & LPFC_CPU_FIRST_IRQ) 8879 + continue; 8880 + 8881 + /* Check for multiple CPUs per hdwq */ 8882 + qp = &phba->sli4_hba.hdwq[cpup->hdwq]; 8883 + if (qp->hba_eq) 8884 + continue; 8885 + 8886 + /* We need to share an EQ for this hdwq */ 8887 + eqcpu = lpfc_find_cpu_handle(phba, cpup->eq, LPFC_FIND_BY_EQ); 8888 + eqcpup = &phba->sli4_hba.cpu_map[eqcpu]; 8889 + qp->hba_eq = phba->sli4_hba.hdwq[eqcpup->hdwq].hba_eq; 8890 + } 8874 8891 8875 8892 /* Allocate SCSI SLI4 CQ/WQs */ 8876 8893 for (idx = 0; idx < phba->cfg_hdw_queue; idx++) { ··· 9153 9130 lpfc_sli4_release_hdwq(struct lpfc_hba *phba) 9154 9131 { 9155 9132 struct lpfc_sli4_hdw_queue *hdwq; 9133 + struct lpfc_queue *eq; 9156 9134 uint32_t idx; 9157 9135 9158 9136 hdwq = phba->sli4_hba.hdwq; 9159 - for (idx = 0; idx < phba->cfg_hdw_queue; idx++) { 9160 - if (idx < phba->cfg_irq_chann) 9161 - lpfc_sli4_queue_free(hdwq[idx].hba_eq); 9162 - hdwq[idx].hba_eq = NULL; 9163 9137 9138 + /* Loop thru all Hardware Queues */ 9139 + for (idx = 0; idx < phba->cfg_hdw_queue; idx++) { 9140 + /* Free the CQ/WQ corresponding to the Hardware Queue */ 9164 9141 lpfc_sli4_queue_free(hdwq[idx].fcp_cq); 9165 9142 lpfc_sli4_queue_free(hdwq[idx].nvme_cq); 9166 9143 lpfc_sli4_queue_free(hdwq[idx].fcp_wq); 9167 9144 lpfc_sli4_queue_free(hdwq[idx].nvme_wq); 9145 + hdwq[idx].hba_eq = NULL; 9168 9146 hdwq[idx].fcp_cq = NULL; 9169 9147 hdwq[idx].nvme_cq = NULL; 9170 9148 hdwq[idx].fcp_wq = NULL; 9171 9149 hdwq[idx].nvme_wq = NULL; 9150 + } 9151 + /* Loop thru all IRQ vectors */ 9152 + for (idx = 0; idx < phba->cfg_irq_chann; idx++) { 9153 + /* Free the EQ corresponding to the IRQ vector */ 9154 + eq = phba->sli4_hba.hba_eq_hdl[idx].eq; 9155 + lpfc_sli4_queue_free(eq); 9156 + phba->sli4_hba.hba_eq_hdl[idx].eq = NULL; 9172 9157 } 9173 9158 } 9174 9159 ··· 9361 9330 qp = phba->sli4_hba.hdwq; 9362 9331 memset(phba->sli4_hba.cq_lookup, 0, 9363 9332 (sizeof(struct lpfc_queue *) * (phba->sli4_hba.cq_max + 1))); 9333 + /* Loop thru all IRQ vectors */ 9364 9334 for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++) { 9365 - eq = qp[qidx].hba_eq; 9335 + /* Get the EQ corresponding to the IRQ vector */ 9336 + eq = phba->sli4_hba.hba_eq_hdl[qidx].eq; 9366 9337 if (!eq) 9367 9338 continue; 9339 + /* Loop through all CQs associated with that EQ */ 9368 9340 list_for_each_entry(childq, &eq->child_list, list) { 9369 9341 if (childq->queue_id > phba->sli4_hba.cq_max) 9370 9342 continue; ··· 9396 9362 { 9397 9363 uint32_t shdr_status, shdr_add_status; 9398 9364 union lpfc_sli4_cfg_shdr *shdr; 9365 + struct lpfc_vector_map_info *cpup; 9399 9366 struct lpfc_sli4_hdw_queue *qp; 9400 9367 LPFC_MBOXQ_t *mboxq; 9401 - int qidx; 9368 + int qidx, cpu; 9402 9369 uint32_t length, usdelay; 9403 9370 int rc = -ENOMEM; 9404 9371 ··· 9460 9425 rc = -ENOMEM; 9461 9426 goto out_error; 9462 9427 } 9428 + 9429 + /* Loop thru all IRQ vectors */ 9463 9430 for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++) { 9464 - if (!qp[qidx].hba_eq) { 9465 - lpfc_printf_log(phba, KERN_ERR, LOG_INIT, 9466 - "0522 Fast-path EQ (%d) not " 9467 - "allocated\n", qidx); 9468 - rc = -ENOMEM; 9469 - goto out_destroy; 9431 + /* Create HBA Event Queues (EQs) in order */ 9432 + for_each_present_cpu(cpu) { 9433 + cpup = &phba->sli4_hba.cpu_map[cpu]; 9434 + 9435 + /* Look for the CPU thats using that vector with 9436 + * LPFC_CPU_FIRST_IRQ set. 9437 + */ 9438 + if (!(cpup->flag & LPFC_CPU_FIRST_IRQ)) 9439 + continue; 9440 + if (qidx != cpup->eq) 9441 + continue; 9442 + 9443 + /* Create an EQ for that vector */ 9444 + rc = lpfc_eq_create(phba, qp[cpup->hdwq].hba_eq, 9445 + phba->cfg_fcp_imax); 9446 + if (rc) { 9447 + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, 9448 + "0523 Failed setup of fast-path" 9449 + " EQ (%d), rc = 0x%x\n", 9450 + cpup->eq, (uint32_t)rc); 9451 + goto out_destroy; 9452 + } 9453 + 9454 + /* Save the EQ for that vector in the hba_eq_hdl */ 9455 + phba->sli4_hba.hba_eq_hdl[cpup->eq].eq = 9456 + qp[cpup->hdwq].hba_eq; 9457 + 9458 + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, 9459 + "2584 HBA EQ setup: queue[%d]-id=%d\n", 9460 + cpup->eq, 9461 + qp[cpup->hdwq].hba_eq->queue_id); 9470 9462 } 9471 - rc = lpfc_eq_create(phba, qp[qidx].hba_eq, 9472 - phba->cfg_fcp_imax); 9473 - if (rc) { 9474 - lpfc_printf_log(phba, KERN_ERR, LOG_INIT, 9475 - "0523 Failed setup of fast-path EQ " 9476 - "(%d), rc = 0x%x\n", qidx, 9477 - (uint32_t)rc); 9478 - goto out_destroy; 9479 - } 9480 - lpfc_printf_log(phba, KERN_INFO, LOG_INIT, 9481 - "2584 HBA EQ setup: queue[%d]-id=%d\n", qidx, 9482 - qp[qidx].hba_eq->queue_id); 9483 9463 } 9484 9464 9465 + /* Loop thru all Hardware Queues */ 9485 9466 if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { 9486 9467 for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) { 9468 + cpu = lpfc_find_cpu_handle(phba, qidx, 9469 + LPFC_FIND_BY_HDWQ); 9470 + cpup = &phba->sli4_hba.cpu_map[cpu]; 9471 + 9472 + /* Create the CQ/WQ corresponding to the 9473 + * Hardware Queue 9474 + */ 9487 9475 rc = lpfc_create_wq_cq(phba, 9488 - qp[qidx].hba_eq, 9476 + phba->sli4_hba.hdwq[cpup->hdwq].hba_eq, 9489 9477 qp[qidx].nvme_cq, 9490 9478 qp[qidx].nvme_wq, 9491 9479 &phba->sli4_hba.hdwq[qidx].nvme_cq_map, ··· 9524 9466 } 9525 9467 9526 9468 for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) { 9469 + cpu = lpfc_find_cpu_handle(phba, qidx, LPFC_FIND_BY_HDWQ); 9470 + cpup = &phba->sli4_hba.cpu_map[cpu]; 9471 + 9472 + /* Create the CQ/WQ corresponding to the Hardware Queue */ 9527 9473 rc = lpfc_create_wq_cq(phba, 9528 - qp[qidx].hba_eq, 9474 + phba->sli4_hba.hdwq[cpup->hdwq].hba_eq, 9529 9475 qp[qidx].fcp_cq, 9530 9476 qp[qidx].fcp_wq, 9531 9477 &phba->sli4_hba.hdwq[qidx].fcp_cq_map, ··· 9781 9719 lpfc_sli4_queue_unset(struct lpfc_hba *phba) 9782 9720 { 9783 9721 struct lpfc_sli4_hdw_queue *qp; 9722 + struct lpfc_queue *eq; 9784 9723 int qidx; 9785 9724 9786 9725 /* Unset mailbox command work queue */ ··· 9833 9770 9834 9771 /* Unset fast-path SLI4 queues */ 9835 9772 if (phba->sli4_hba.hdwq) { 9773 + /* Loop thru all Hardware Queues */ 9836 9774 for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) { 9775 + /* Destroy the CQ/WQ corresponding to Hardware Queue */ 9837 9776 qp = &phba->sli4_hba.hdwq[qidx]; 9838 9777 lpfc_wq_destroy(phba, qp->fcp_wq); 9839 9778 lpfc_wq_destroy(phba, qp->nvme_wq); 9840 9779 lpfc_cq_destroy(phba, qp->fcp_cq); 9841 9780 lpfc_cq_destroy(phba, qp->nvme_cq); 9842 - if (qidx < phba->cfg_irq_chann) 9843 - lpfc_eq_destroy(phba, qp->hba_eq); 9781 + } 9782 + /* Loop thru all IRQ vectors */ 9783 + for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++) { 9784 + /* Destroy the EQ corresponding to the IRQ vector */ 9785 + eq = phba->sli4_hba.hba_eq_hdl[qidx].eq; 9786 + lpfc_eq_destroy(phba, eq); 9844 9787 } 9845 9788 } 9846 9789 ··· 10636 10567 } 10637 10568 10638 10569 /** 10639 - * lpfc_find_cpu_handle - Find the CPU that corresponds to the specified EQ 10570 + * lpfc_find_cpu_handle - Find the CPU that corresponds to the specified Queue 10640 10571 * @phba: pointer to lpfc hba data structure. 10641 10572 * @id: EQ vector index or Hardware Queue index 10642 10573 * @match: LPFC_FIND_BY_EQ = match by EQ 10643 10574 * LPFC_FIND_BY_HDWQ = match by Hardware Queue 10575 + * Return the CPU that matches the selection criteria 10644 10576 */ 10645 10577 static uint16_t 10646 10578 lpfc_find_cpu_handle(struct lpfc_hba *phba, uint16_t id, int match) ··· 10649 10579 struct lpfc_vector_map_info *cpup; 10650 10580 int cpu; 10651 10581 10652 - /* Find the desired phys_id for the specified EQ */ 10582 + /* Loop through all CPUs */ 10653 10583 for_each_present_cpu(cpu) { 10654 10584 cpup = &phba->sli4_hba.cpu_map[cpu]; 10585 + 10586 + /* If we are matching by EQ, there may be multiple CPUs using 10587 + * using the same vector, so select the one with 10588 + * LPFC_CPU_FIRST_IRQ set. 10589 + */ 10655 10590 if ((match == LPFC_FIND_BY_EQ) && 10591 + (cpup->flag & LPFC_CPU_FIRST_IRQ) && 10656 10592 (cpup->irq != LPFC_VECTOR_MAP_EMPTY) && 10657 10593 (cpup->eq == id)) 10658 10594 return cpu; 10595 + 10596 + /* If matching by HDWQ, select the first CPU that matches */ 10659 10597 if ((match == LPFC_FIND_BY_HDWQ) && (cpup->hdwq == id)) 10660 10598 return cpu; 10661 - } 10662 - return 0; 10663 - } 10664 - 10665 - /** 10666 - * lpfc_find_eq_handle - Find the EQ that corresponds to the specified 10667 - * Hardware Queue 10668 - * @phba: pointer to lpfc hba data structure. 10669 - * @hdwq: Hardware Queue index 10670 - */ 10671 - static uint16_t 10672 - lpfc_find_eq_handle(struct lpfc_hba *phba, uint16_t hdwq) 10673 - { 10674 - struct lpfc_vector_map_info *cpup; 10675 - int cpu; 10676 - 10677 - /* Find the desired phys_id for the specified EQ */ 10678 - for_each_present_cpu(cpu) { 10679 - cpup = &phba->sli4_hba.cpu_map[cpu]; 10680 - if (cpup->hdwq == hdwq) 10681 - return cpup->eq; 10682 10599 } 10683 10600 return 0; 10684 10601 } ··· 10776 10719 10777 10720 /* This loop sets up all CPUs that are affinitized with a 10778 10721 * irq vector assigned to the driver. All affinitized CPUs 10779 - * will get a link to that vectors IRQ and EQ. For now we 10780 - * are assuming all CPUs using the same EQ will all share 10781 - * the same hardware queue. 10722 + * will get a link to that vectors IRQ and EQ. 10782 10723 */ 10783 10724 for (idx = 0; idx < phba->cfg_irq_chann; idx++) { 10725 + /* Get a CPU mask for all CPUs affinitized to this vector */ 10784 10726 maskp = pci_irq_get_affinity(phba->pcidev, idx); 10785 10727 if (!maskp) 10786 10728 continue; 10787 10729 10730 + i = 0; 10731 + /* Loop through all CPUs associated with vector idx */ 10788 10732 for_each_cpu_and(cpu, maskp, cpu_present_mask) { 10733 + /* Set the EQ index and IRQ for that vector */ 10789 10734 cpup = &phba->sli4_hba.cpu_map[cpu]; 10790 10735 cpup->eq = idx; 10791 - cpup->hdwq = idx; 10792 10736 cpup->irq = pci_irq_vector(phba->pcidev, idx); 10793 10737 10794 - lpfc_printf_log(phba, KERN_ERR, LOG_INIT, 10738 + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, 10795 10739 "3336 Set Affinity: CPU %d " 10796 - "hdwq %d irq %d\n", 10797 - cpu, cpup->hdwq, cpup->irq); 10740 + "irq %d eq %d\n", 10741 + cpu, cpup->irq, cpup->eq); 10742 + 10743 + /* If this is the first CPU thats assigned to this 10744 + * vector, set LPFC_CPU_FIRST_IRQ. 10745 + */ 10746 + if (!i) 10747 + cpup->flag |= LPFC_CPU_FIRST_IRQ; 10748 + i++; 10798 10749 } 10799 10750 } 10800 10751 10801 10752 /* After looking at each irq vector assigned to this pcidev, its 10802 10753 * possible to see that not ALL CPUs have been accounted for. 10803 - * Next we will set any unassigned cpu map entries to a IRQ 10804 - * on the same phys_id 10754 + * Next we will set any unassigned (unaffinitized) cpu map 10755 + * entries to a IRQ on the same phys_id. 10805 10756 */ 10806 10757 first_cpu = cpumask_first(cpu_present_mask); 10807 10758 start_cpu = first_cpu; ··· 10822 10757 /* Mark CPU as IRQ not assigned by the kernel */ 10823 10758 cpup->flag |= LPFC_CPU_MAP_UNASSIGN; 10824 10759 10825 - /* If so, find a new_cpup thats on the the same 10760 + /* If so, find a new_cpup thats on the the SAME 10826 10761 * phys_id as cpup. start_cpu will start where we 10827 10762 * left off so all unassigned entries don't get assgined 10828 10763 * the IRQ of the first entry. ··· 10844 10779 found_same: 10845 10780 /* We found a matching phys_id, so copy the IRQ info */ 10846 10781 cpup->eq = new_cpup->eq; 10847 - cpup->hdwq = new_cpup->hdwq; 10848 10782 cpup->irq = new_cpup->irq; 10849 10783 10850 10784 /* Bump start_cpu to the next slot to minmize the ··· 10854 10790 if (start_cpu == nr_cpumask_bits) 10855 10791 start_cpu = first_cpu; 10856 10792 10857 - lpfc_printf_log(phba, KERN_ERR, LOG_INIT, 10793 + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, 10858 10794 "3337 Set Affinity: CPU %d " 10859 - "hdwq %d irq %d from id %d same " 10795 + "irq %d from id %d same " 10860 10796 "phys_id (%d)\n", 10861 - cpu, cpup->hdwq, cpup->irq, 10862 - new_cpu, cpup->phys_id); 10797 + cpu, cpup->irq, new_cpu, cpup->phys_id); 10863 10798 } 10864 10799 } 10865 10800 ··· 10873 10810 /* Mark it as IRQ not assigned by the kernel */ 10874 10811 cpup->flag |= LPFC_CPU_MAP_UNASSIGN; 10875 10812 10876 - /* If so, find a new_cpup thats on any phys_id 10813 + /* If so, find a new_cpup thats on ANY phys_id 10877 10814 * as the cpup. start_cpu will start where we 10878 10815 * left off so all unassigned entries don't get 10879 10816 * assigned the IRQ of the first entry. ··· 10892 10829 /* We should never leave an entry unassigned */ 10893 10830 lpfc_printf_log(phba, KERN_ERR, LOG_INIT, 10894 10831 "3339 Set Affinity: CPU %d " 10895 - "hdwq %d irq %d UNASSIGNED\n", 10896 - cpu, cpup->hdwq, cpup->irq); 10832 + "irq %d UNASSIGNED\n", 10833 + cpup->hdwq, cpup->irq); 10897 10834 continue; 10898 10835 found_any: 10899 10836 /* We found an available entry, copy the IRQ info */ 10900 10837 cpup->eq = new_cpup->eq; 10901 - cpup->hdwq = new_cpup->hdwq; 10902 10838 cpup->irq = new_cpup->irq; 10903 10839 10904 10840 /* Bump start_cpu to the next slot to minmize the ··· 10908 10846 if (start_cpu == nr_cpumask_bits) 10909 10847 start_cpu = first_cpu; 10910 10848 10911 - lpfc_printf_log(phba, KERN_ERR, LOG_INIT, 10849 + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, 10912 10850 "3338 Set Affinity: CPU %d " 10913 - "hdwq %d irq %d from id %d (%d/%d)\n", 10914 - cpu, cpup->hdwq, cpup->irq, new_cpu, 10851 + "irq %d from id %d (%d/%d)\n", 10852 + cpu, cpup->irq, new_cpu, 10915 10853 new_cpup->phys_id, new_cpup->core_id); 10916 10854 } 10917 10855 } 10856 + 10857 + /* Finally we need to associate a hdwq with each cpu_map entry 10858 + * This will be 1 to 1 - hdwq to cpu, unless there are less 10859 + * hardware queues then CPUs. For that case we will just round-robin 10860 + * the available hardware queues as they get assigned to CPUs. 10861 + */ 10862 + idx = 0; 10863 + start_cpu = 0; 10864 + for_each_present_cpu(cpu) { 10865 + cpup = &phba->sli4_hba.cpu_map[cpu]; 10866 + if (idx >= phba->cfg_hdw_queue) { 10867 + /* We need to reuse a Hardware Queue for another CPU, 10868 + * so be smart about it and pick one that has its 10869 + * IRQ/EQ mapped to the same phys_id (CPU package). 10870 + * and core_id. 10871 + */ 10872 + new_cpu = start_cpu; 10873 + for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) { 10874 + new_cpup = &phba->sli4_hba.cpu_map[new_cpu]; 10875 + if ((new_cpup->hdwq != LPFC_VECTOR_MAP_EMPTY) && 10876 + (new_cpup->phys_id == cpup->phys_id) && 10877 + (new_cpup->core_id == cpup->core_id)) 10878 + goto found_hdwq; 10879 + new_cpu = cpumask_next( 10880 + new_cpu, cpu_present_mask); 10881 + if (new_cpu == nr_cpumask_bits) 10882 + new_cpu = first_cpu; 10883 + } 10884 + 10885 + /* If we can't match both phys_id and core_id, 10886 + * settle for just a phys_id match. 10887 + */ 10888 + new_cpu = start_cpu; 10889 + for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) { 10890 + new_cpup = &phba->sli4_hba.cpu_map[new_cpu]; 10891 + if ((new_cpup->hdwq != LPFC_VECTOR_MAP_EMPTY) && 10892 + (new_cpup->phys_id == cpup->phys_id)) 10893 + goto found_hdwq; 10894 + new_cpu = cpumask_next( 10895 + new_cpu, cpu_present_mask); 10896 + if (new_cpu == nr_cpumask_bits) 10897 + new_cpu = first_cpu; 10898 + } 10899 + 10900 + /* Otherwise just round robin on cfg_hdw_queue */ 10901 + cpup->hdwq = idx % phba->cfg_hdw_queue; 10902 + goto logit; 10903 + found_hdwq: 10904 + /* We found an available entry, copy the IRQ info */ 10905 + start_cpu = cpumask_next(new_cpu, cpu_present_mask); 10906 + if (start_cpu == nr_cpumask_bits) 10907 + start_cpu = first_cpu; 10908 + cpup->hdwq = new_cpup->hdwq; 10909 + } else { 10910 + /* 1 to 1, CPU to hdwq */ 10911 + cpup->hdwq = idx; 10912 + } 10913 + logit: 10914 + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, 10915 + "3335 Set Affinity: CPU %d (phys %d core %d): " 10916 + "hdwq %d eq %d irq %d flg x%x\n", 10917 + cpu, cpup->phys_id, cpup->core_id, 10918 + cpup->hdwq, cpup->eq, cpup->irq, cpup->flag); 10919 + idx++; 10920 + } 10921 + 10922 + /* The cpu_map array will be used later during initialization 10923 + * when EQ / CQ / WQs are allocated and configured. 10924 + */ 10918 10925 return; 10919 10926 } 10920 10927
+25 -17
drivers/scsi/lpfc/lpfc_sli.c
··· 5548 5548 int qidx; 5549 5549 struct lpfc_sli4_hba *sli4_hba = &phba->sli4_hba; 5550 5550 struct lpfc_sli4_hdw_queue *qp; 5551 + struct lpfc_queue *eq; 5551 5552 5552 5553 sli4_hba->sli4_write_cq_db(phba, sli4_hba->mbx_cq, 0, LPFC_QUEUE_REARM); 5553 5554 sli4_hba->sli4_write_cq_db(phba, sli4_hba->els_cq, 0, LPFC_QUEUE_REARM); ··· 5556 5555 sli4_hba->sli4_write_cq_db(phba, sli4_hba->nvmels_cq, 0, 5557 5556 LPFC_QUEUE_REARM); 5558 5557 5559 - qp = sli4_hba->hdwq; 5560 5558 if (sli4_hba->hdwq) { 5559 + /* Loop thru all Hardware Queues */ 5561 5560 for (qidx = 0; qidx < phba->cfg_hdw_queue; qidx++) { 5562 - sli4_hba->sli4_write_cq_db(phba, qp[qidx].fcp_cq, 0, 5561 + qp = &sli4_hba->hdwq[qidx]; 5562 + /* ARM the corresponding CQ */ 5563 + sli4_hba->sli4_write_cq_db(phba, qp->fcp_cq, 0, 5563 5564 LPFC_QUEUE_REARM); 5564 - sli4_hba->sli4_write_cq_db(phba, qp[qidx].nvme_cq, 0, 5565 + sli4_hba->sli4_write_cq_db(phba, qp->nvme_cq, 0, 5565 5566 LPFC_QUEUE_REARM); 5566 5567 } 5567 5568 5568 - for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++) 5569 - sli4_hba->sli4_write_eq_db(phba, qp[qidx].hba_eq, 5570 - 0, LPFC_QUEUE_REARM); 5569 + /* Loop thru all IRQ vectors */ 5570 + for (qidx = 0; qidx < phba->cfg_irq_chann; qidx++) { 5571 + eq = sli4_hba->hba_eq_hdl[qidx].eq; 5572 + /* ARM the corresponding EQ */ 5573 + sli4_hba->sli4_write_eq_db(phba, eq, 5574 + 0, LPFC_QUEUE_REARM); 5575 + } 5571 5576 } 5572 5577 5573 5578 if (phba->nvmet_support) { ··· 7865 7858 struct lpfc_sli4_hba *sli4_hba = &phba->sli4_hba; 7866 7859 uint32_t eqidx; 7867 7860 struct lpfc_queue *fpeq = NULL; 7861 + struct lpfc_queue *eq; 7868 7862 bool mbox_pending; 7869 7863 7870 7864 if (unlikely(!phba) || (phba->sli_rev != LPFC_SLI_REV4)) 7871 7865 return false; 7872 7866 7873 - /* Find the eq associated with the mcq */ 7874 - 7875 - if (sli4_hba->hdwq) 7876 - for (eqidx = 0; eqidx < phba->cfg_irq_chann; eqidx++) 7877 - if (sli4_hba->hdwq[eqidx].hba_eq->queue_id == 7878 - sli4_hba->mbx_cq->assoc_qid) { 7879 - fpeq = sli4_hba->hdwq[eqidx].hba_eq; 7867 + /* Find the EQ associated with the mbox CQ */ 7868 + if (sli4_hba->hdwq) { 7869 + for (eqidx = 0; eqidx < phba->cfg_irq_chann; eqidx++) { 7870 + eq = phba->sli4_hba.hba_eq_hdl[eqidx].eq; 7871 + if (eq->queue_id == sli4_hba->mbx_cq->assoc_qid) { 7872 + fpeq = eq; 7880 7873 break; 7881 7874 } 7875 + } 7876 + } 7882 7877 if (!fpeq) 7883 7878 return false; 7884 7879 ··· 14226 14217 return IRQ_NONE; 14227 14218 14228 14219 /* Get to the EQ struct associated with this vector */ 14229 - fpeq = phba->sli4_hba.hdwq[hba_eqidx].hba_eq; 14220 + fpeq = phba->sli4_hba.hba_eq_hdl[hba_eqidx].eq; 14230 14221 if (unlikely(!fpeq)) 14231 14222 return IRQ_NONE; 14232 14223 ··· 14511 14502 /* set values by EQ_DELAY register if supported */ 14512 14503 if (phba->sli.sli_flag & LPFC_SLI_USE_EQDR) { 14513 14504 for (qidx = startq; qidx < phba->cfg_irq_chann; qidx++) { 14514 - eq = phba->sli4_hba.hdwq[qidx].hba_eq; 14505 + eq = phba->sli4_hba.hba_eq_hdl[qidx].eq; 14515 14506 if (!eq) 14516 14507 continue; 14517 14508 ··· 14520 14511 if (++cnt >= numq) 14521 14512 break; 14522 14513 } 14523 - 14524 14514 return; 14525 14515 } 14526 14516 ··· 14547 14539 dmult = LPFC_DMULT_MAX; 14548 14540 14549 14541 for (qidx = startq; qidx < phba->cfg_irq_chann; qidx++) { 14550 - eq = phba->sli4_hba.hdwq[qidx].hba_eq; 14542 + eq = phba->sli4_hba.hba_eq_hdl[qidx].eq; 14551 14543 if (!eq) 14552 14544 continue; 14553 14545 eq->q_mode = usdelay;
+2
drivers/scsi/lpfc/lpfc_sli4.h
··· 452 452 uint32_t idx; 453 453 char handler_name[LPFC_SLI4_HANDLER_NAME_SZ]; 454 454 struct lpfc_hba *phba; 455 + struct lpfc_queue *eq; 455 456 }; 456 457 457 458 /*BB Credit recovery value*/ ··· 553 552 uint16_t flag; 554 553 #define LPFC_CPU_MAP_HYPER 0x1 555 554 #define LPFC_CPU_MAP_UNASSIGN 0x2 555 + #define LPFC_CPU_FIRST_IRQ 0x4 556 556 }; 557 557 #define LPFC_VECTOR_MAP_EMPTY 0xffff 558 558