Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

scsi: lpfc: Fix multiple NVMe remoteport registration calls for the same NPort ID

When a target makes the mistake of registering a FC4 type with the fabric,
but then rejects a PRLI of that type, the lpfc driver incorrectly retries
the PRLI causing multiple registrations with the transport. The driver
needs to detect the reject reason data and stop any retry.

Rework the PRLI reject scenarios.

Link: https://lore.kernel.org/r/20220911221505.117655-6-jsmart2021@gmail.com
Co-developed-by: Justin Tee <justin.tee@broadcom.com>
Signed-off-by: Justin Tee <justin.tee@broadcom.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>

authored by

James Smart and committed by
Martin K. Petersen
84536351 0630a1f7

+43 -47
-1
drivers/scsi/lpfc/lpfc_disc.h
··· 187 187 #define NLP_RNID_SND 0x00000400 /* sent RNID request for this entry */ 188 188 #define NLP_ELS_SND_MASK 0x000007e0 /* sent ELS request for this entry */ 189 189 #define NLP_NVMET_RECOV 0x00001000 /* NVMET auditing node for recovery. */ 190 - #define NLP_FCP_PRLI_RJT 0x00002000 /* Rport does not support FCP PRLI. */ 191 190 #define NLP_UNREG_INP 0x00008000 /* UNREG_RPI cmd is in progress */ 192 191 #define NLP_DROPPED 0x00010000 /* Init ref count has been dropped */ 193 192 #define NLP_DELAY_TMO 0x00020000 /* delay timeout is running for node */
+42 -46
drivers/scsi/lpfc/lpfc_els.c
··· 2200 2200 if (!elsiocb) 2201 2201 return 1; 2202 2202 2203 - spin_lock_irq(&ndlp->lock); 2204 - ndlp->nlp_flag &= ~NLP_FCP_PRLI_RJT; 2205 - spin_unlock_irq(&ndlp->lock); 2206 - 2207 2203 pcmd = (uint8_t *)elsiocb->cmd_dmabuf->virt; 2208 2204 2209 2205 /* For PLOGI request, remainder of payload is service parameters */ ··· 4672 4676 } 4673 4677 switch (stat.un.b.lsRjtRsnCode) { 4674 4678 case LSRJT_UNABLE_TPC: 4675 - /* The driver has a VALID PLOGI but the rport has 4676 - * rejected the PRLI - can't do it now. Delay 4677 - * for 1 second and try again. 4678 - * 4679 - * However, if explanation is REQ_UNSUPPORTED there's 4680 - * no point to retry PRLI. 4679 + /* Special case for PRLI LS_RJTs. Recall that lpfc 4680 + * uses a single routine to issue both PRLI FC4 types. 4681 + * If the PRLI is rejected because that FC4 type 4682 + * isn't really supported, don't retry and cause 4683 + * multiple transport registrations. Otherwise, parse 4684 + * the reason code/reason code explanation and take the 4685 + * appropriate action. 4681 4686 */ 4682 - if ((cmd == ELS_CMD_PRLI || cmd == ELS_CMD_NVMEPRLI) && 4683 - stat.un.b.lsRjtRsnCodeExp != 4684 - LSEXP_REQ_UNSUPPORTED) { 4685 - delay = 1000; 4686 - maxretry = lpfc_max_els_tries + 1; 4687 + lpfc_printf_vlog(vport, KERN_INFO, 4688 + LOG_DISCOVERY | LOG_ELS | LOG_NODE, 4689 + "0153 ELS cmd x%x LS_RJT by x%x. " 4690 + "RsnCode x%x RsnCodeExp x%x\n", 4691 + cmd, did, stat.un.b.lsRjtRsnCode, 4692 + stat.un.b.lsRjtRsnCodeExp); 4693 + 4694 + switch (stat.un.b.lsRjtRsnCodeExp) { 4695 + case LSEXP_CANT_GIVE_DATA: 4696 + case LSEXP_CMD_IN_PROGRESS: 4697 + if (cmd == ELS_CMD_PLOGI) { 4698 + delay = 1000; 4699 + maxretry = 48; 4700 + } 4687 4701 retry = 1; 4702 + break; 4703 + case LSEXP_REQ_UNSUPPORTED: 4704 + case LSEXP_NO_RSRC_ASSIGN: 4705 + /* These explanation codes get no retry. */ 4706 + if (cmd == ELS_CMD_PRLI || 4707 + cmd == ELS_CMD_NVMEPRLI) 4708 + break; 4709 + fallthrough; 4710 + default: 4711 + /* Limit the delay and retry action to a limited 4712 + * cmd set. There are other ELS commands where 4713 + * a retry is not expected. 4714 + */ 4715 + if (cmd == ELS_CMD_PLOGI || 4716 + cmd == ELS_CMD_PRLI || 4717 + cmd == ELS_CMD_NVMEPRLI) { 4718 + delay = 1000; 4719 + maxretry = lpfc_max_els_tries + 1; 4720 + retry = 1; 4721 + } 4688 4722 break; 4689 4723 } 4690 4724 4691 - /* Legacy bug fix code for targets with PLOGI delays. */ 4692 - if (stat.un.b.lsRjtRsnCodeExp == 4693 - LSEXP_CMD_IN_PROGRESS) { 4694 - if (cmd == ELS_CMD_PLOGI) { 4695 - delay = 1000; 4696 - maxretry = 48; 4697 - } 4698 - retry = 1; 4699 - break; 4700 - } 4701 - if (stat.un.b.lsRjtRsnCodeExp == 4702 - LSEXP_CANT_GIVE_DATA) { 4703 - if (cmd == ELS_CMD_PLOGI) { 4704 - delay = 1000; 4705 - maxretry = 48; 4706 - } 4707 - retry = 1; 4708 - break; 4709 - } 4710 - if (cmd == ELS_CMD_PLOGI) { 4711 - delay = 1000; 4712 - maxretry = lpfc_max_els_tries + 1; 4713 - retry = 1; 4714 - break; 4715 - } 4716 4725 if ((phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) && 4717 4726 (cmd == ELS_CMD_FDISC) && 4718 4727 (stat.un.b.lsRjtRsnCodeExp == LSEXP_OUT_OF_RESOURCE)){ ··· 4798 4797 */ 4799 4798 if (stat.un.b.lsRjtRsnCodeExp == 4800 4799 LSEXP_REQ_UNSUPPORTED) { 4801 - if (cmd == ELS_CMD_PRLI) { 4802 - spin_lock_irq(&ndlp->lock); 4803 - ndlp->nlp_flag |= NLP_FCP_PRLI_RJT; 4804 - spin_unlock_irq(&ndlp->lock); 4805 - retry = 0; 4800 + if (cmd == ELS_CMD_PRLI) 4806 4801 goto out_retry; 4807 - } 4808 4802 } 4809 4803 break; 4810 4804 }
+1
drivers/scsi/lpfc/lpfc_hw.h
··· 703 703 #define LSEXP_OUT_OF_RESOURCE 0x29 704 704 #define LSEXP_CANT_GIVE_DATA 0x2A 705 705 #define LSEXP_REQ_UNSUPPORTED 0x2C 706 + #define LSEXP_NO_RSRC_ASSIGN 0x52 706 707 uint8_t vendorUnique; /* FC Word 0, bit 0: 7 */ 707 708 } b; 708 709 } un;