Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

scsi: lpfc: Improve PBDE checks during SGL processing

The PBDE feature, setting payload buffer address explicitly in the WQE so
it doesn't have to be fetched from the SGL, only makes sense when there is
a single buffer for the I/O. When there are multiple buffers it actually
hurts performance as the SGL subsequently has to be fetched.

Rework the SGL logic to only use PBDE when a single buffer.

Link: https://lore.kernel.org/r/20210910233159.115896-14-jsmart2021@gmail.com
Co-developed-by: Justin Tee <justin.tee@broadcom.com>
Signed-off-by: Justin Tee <justin.tee@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>

authored by

James Smart and committed by
Martin K. Petersen
315b3fd1 afd63fa5

+40 -42
+6 -6
drivers/scsi/lpfc/lpfc_nvme.c
··· 1299 1299 struct sli4_sge *first_data_sgl; 1300 1300 struct ulp_bde64 *bde; 1301 1301 dma_addr_t physaddr = 0; 1302 - uint32_t num_bde = 0; 1303 1302 uint32_t dma_len = 0; 1304 1303 uint32_t dma_offset = 0; 1305 1304 int nseg, i, j; ··· 1352 1353 } 1353 1354 1354 1355 sgl->word2 = 0; 1355 - if ((num_bde + 1) == nseg) { 1356 + if (nseg == 1) { 1356 1357 bf_set(lpfc_sli4_sge_last, sgl, 1); 1357 1358 bf_set(lpfc_sli4_sge_type, sgl, 1358 1359 LPFC_SGE_TYPE_DATA); ··· 1421 1422 1422 1423 j++; 1423 1424 } 1424 - if (phba->cfg_enable_pbde) { 1425 - /* Use PBDE support for first SGL only, offset == 0 */ 1425 + 1426 + /* PBDE support for first data SGE only */ 1427 + if (nseg == 1 && phba->cfg_enable_pbde) { 1426 1428 /* Words 13-15 */ 1427 1429 bde = (struct ulp_bde64 *) 1428 1430 &wqe->words[13]; ··· 1434 1434 bde->tus.f.bdeFlags = BUFF_TYPE_BDE_64; 1435 1435 bde->tus.w = cpu_to_le32(bde->tus.w); 1436 1436 1437 - /* Word 11 */ 1437 + /* Word 11 - set PBDE bit */ 1438 1438 bf_set(wqe_pbde, &wqe->generic.wqe_com, 1); 1439 1439 } else { 1440 1440 memset(&wqe->words[13], 0, (sizeof(uint32_t) * 3)); 1441 - bf_set(wqe_pbde, &wqe->generic.wqe_com, 0); 1441 + /* Word 11 - PBDE bit disabled by default template */ 1442 1442 } 1443 1443 1444 1444 } else {
+21 -23
drivers/scsi/lpfc/lpfc_nvmet.c
··· 2708 2708 struct ulp_bde64 *bde; 2709 2709 dma_addr_t physaddr; 2710 2710 int i, cnt, nsegs; 2711 - int do_pbde; 2711 + bool use_pbde = false; 2712 2712 int xc = 1; 2713 2713 2714 2714 if (!lpfc_is_link_up(phba)) { ··· 2816 2816 if (!xc) 2817 2817 bf_set(wqe_xc, &wqe->fcp_tsend.wqe_com, 0); 2818 2818 2819 - /* Word 11 - set sup, irsp, irsplen later */ 2820 - do_pbde = 0; 2821 - 2822 2819 /* Word 12 */ 2823 2820 wqe->fcp_tsend.fcp_data_len = rsp->transfer_length; 2824 2821 ··· 2893 2896 if (!xc) 2894 2897 bf_set(wqe_xc, &wqe->fcp_treceive.wqe_com, 0); 2895 2898 2896 - /* Word 11 - set pbde later */ 2897 - if (phba->cfg_enable_pbde) { 2898 - do_pbde = 1; 2899 + /* Word 11 - check for pbde */ 2900 + if (nsegs == 1 && phba->cfg_enable_pbde) { 2901 + use_pbde = true; 2902 + /* Word 11 - PBDE bit already preset by template */ 2899 2903 } else { 2904 + /* Overwrite default template setting */ 2900 2905 bf_set(wqe_pbde, &wqe->fcp_treceive.wqe_com, 0); 2901 - do_pbde = 0; 2902 2906 } 2903 2907 2904 2908 /* Word 12 */ ··· 2970 2972 ((rsp->rsplen >> 2) - 1)); 2971 2973 memcpy(&wqe->words[16], rsp->rspaddr, rsp->rsplen); 2972 2974 } 2973 - do_pbde = 0; 2974 2975 2975 2976 /* Word 12 */ 2976 2977 wqe->fcp_trsp.rsvd_12_15[0] = 0; ··· 3004 3007 bf_set(lpfc_sli4_sge_last, sgl, 1); 3005 3008 sgl->word2 = cpu_to_le32(sgl->word2); 3006 3009 sgl->sge_len = cpu_to_le32(cnt); 3007 - if (i == 0) { 3008 - bde = (struct ulp_bde64 *)&wqe->words[13]; 3009 - if (do_pbde) { 3010 - /* Words 13-15 (PBDE) */ 3011 - bde->addrLow = sgl->addr_lo; 3012 - bde->addrHigh = sgl->addr_hi; 3013 - bde->tus.f.bdeSize = 3014 - le32_to_cpu(sgl->sge_len); 3015 - bde->tus.f.bdeFlags = BUFF_TYPE_BDE_64; 3016 - bde->tus.w = cpu_to_le32(bde->tus.w); 3017 - } else { 3018 - memset(bde, 0, sizeof(struct ulp_bde64)); 3019 - } 3020 - } 3021 3010 sgl++; 3022 3011 ctxp->offset += cnt; 3012 + } 3013 + 3014 + bde = (struct ulp_bde64 *)&wqe->words[13]; 3015 + if (use_pbde) { 3016 + /* decrement sgl ptr backwards once to first data sge */ 3017 + sgl--; 3018 + 3019 + /* Words 13-15 (PBDE) */ 3020 + bde->addrLow = sgl->addr_lo; 3021 + bde->addrHigh = sgl->addr_hi; 3022 + bde->tus.f.bdeSize = le32_to_cpu(sgl->sge_len); 3023 + bde->tus.f.bdeFlags = BUFF_TYPE_BDE_64; 3024 + bde->tus.w = cpu_to_le32(bde->tus.w); 3025 + } else { 3026 + memset(bde, 0, sizeof(struct ulp_bde64)); 3023 3027 } 3024 3028 ctxp->state = LPFC_NVME_STE_DATA; 3025 3029 ctxp->entry_cnt++;
+13 -13
drivers/scsi/lpfc/lpfc_scsi.c
··· 3235 3235 struct lpfc_vport *vport = phba->pport; 3236 3236 union lpfc_wqe128 *wqe = &pwqeq->wqe; 3237 3237 dma_addr_t physaddr; 3238 - uint32_t num_bde = 0; 3239 3238 uint32_t dma_len; 3240 3239 uint32_t dma_offset = 0; 3241 3240 int nseg, i, j; ··· 3296 3297 j = 2; 3297 3298 for (i = 0; i < nseg; i++) { 3298 3299 sgl->word2 = 0; 3299 - if ((num_bde + 1) == nseg) { 3300 + if (nseg == 1) { 3300 3301 bf_set(lpfc_sli4_sge_last, sgl, 1); 3301 3302 bf_set(lpfc_sli4_sge_type, sgl, 3302 3303 LPFC_SGE_TYPE_DATA); ··· 3365 3366 3366 3367 j++; 3367 3368 } 3368 - /* 3369 - * Setup the first Payload BDE. For FCoE we just key off 3370 - * Performance Hints, for FC we use lpfc_enable_pbde. 3371 - * We populate words 13-15 of IOCB/WQE. 3369 + 3370 + /* PBDE support for first data SGE only. 3371 + * For FCoE, we key off Performance Hints. 3372 + * For FC, we key off lpfc_enable_pbde. 3372 3373 */ 3373 - if ((phba->sli3_options & LPFC_SLI4_PERFH_ENABLED) || 3374 - phba->cfg_enable_pbde) { 3374 + if (nseg == 1 && 3375 + ((phba->sli3_options & LPFC_SLI4_PERFH_ENABLED) || 3376 + phba->cfg_enable_pbde)) { 3377 + /* Words 13-15 */ 3375 3378 bde = (struct ulp_bde64 *) 3376 3379 &wqe->words[13]; 3377 3380 bde->addrLow = first_data_sgl->addr_lo; ··· 3383 3382 bde->tus.f.bdeFlags = BUFF_TYPE_BDE_64; 3384 3383 bde->tus.w = cpu_to_le32(bde->tus.w); 3385 3384 3385 + /* Word 11 - set PBDE bit */ 3386 + bf_set(wqe_pbde, &wqe->generic.wqe_com, 1); 3386 3387 } else { 3387 3388 memset(&wqe->words[13], 0, (sizeof(uint32_t) * 3)); 3389 + /* Word 11 - PBDE bit disabled by default template */ 3388 3390 } 3389 3391 } else { 3390 3392 sgl += 1; 3391 - /* clear the last flag in the fcp_rsp map entry */ 3393 + /* set the last flag in the fcp_rsp map entry */ 3392 3394 sgl->word2 = le32_to_cpu(sgl->word2); 3393 3395 bf_set(lpfc_sli4_sge_last, sgl, 1); 3394 3396 sgl->word2 = cpu_to_le32(sgl->word2); ··· 3403 3399 memset(bde, 0, (sizeof(uint32_t) * 3)); 3404 3400 } 3405 3401 } 3406 - 3407 - /* Word 11 */ 3408 - if (phba->cfg_enable_pbde) 3409 - bf_set(wqe_pbde, &wqe->generic.wqe_com, 1); 3410 3402 3411 3403 /* 3412 3404 * Finish initializing those IOCB fields that are dependent on the