Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

scsi: target: core: Remove from tmr_list during LUN unlink

Currently TMF commands are removed from de_device.dev_tmf_list at the very
end of se_cmd lifecycle. However, se_lun unlinks from se_cmd upon a command
status (response) being queued in transport layer. This means that LUN and
backend device can be deleted in the meantime and a panic will occur:

target_tmr_work()
cmd->se_tfo->queue_tm_rsp(cmd); // send abort_rsp to a wire
transport_lun_remove_cmd(cmd) // unlink se_cmd from se_lun
- // - // - // -
<<<--- lun remove
<<<--- core backend device remove
- // - // - // -
qlt_handle_abts_completion()
tfo->free_mcmd()
transport_generic_free_cmd()
target_put_sess_cmd()
core_tmr_release_req() {
if (dev) { // backend device, can not be null
spin_lock_irqsave(&dev->se_tmr_lock, flags); //<<<--- CRASH

Call Trace:
NIP [c000000000e1683c] _raw_spin_lock_irqsave+0x2c/0xc0
LR [c00800000e433338] core_tmr_release_req+0x40/0xa0 [target_core_mod]
Call Trace:
(unreliable)
0x0
target_put_sess_cmd+0x2a0/0x370 [target_core_mod]
transport_generic_free_cmd+0x6c/0x1b0 [target_core_mod]
tcm_qla2xxx_complete_mcmd+0x28/0x50 [tcm_qla2xxx]
process_one_work+0x2c4/0x5c0
worker_thread+0x88/0x690

For the iSCSI protocol this is easily reproduced:

- Send some SCSI sommand

- Send Abort of that command over iSCSI

- Remove LUN on target

- Send next iSCSI command to acknowledge the Abort_Response

- Target panics

There is no need to keep the command in tmr_list until response completion,
so move the removal from tmr_list from the response completion to the
response queueing when the LUN is unlinked. Move the removal from state
list too as it is a subject to the same race condition.

Link: https://lore.kernel.org/r/20211018135753.15297-1-d.bogdanov@yadro.com
Fixes: c66ac9db8d4a ("[SCSI] target: Add LIO target core v4.0.0-rc6")
Reviewed-by: Roman Bolshakov <r.bolshakov@yadro.com>
Reviewed-by: Mike Christie <michael.christie@oracle.com>
Signed-off-by: Dmitry Bogdanov <d.bogdanov@yadro.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>

authored by

Dmitry Bogdanov and committed by
Martin K. Petersen
12b6fcd0 83c3a7be

+24 -23
+1 -16
drivers/target/target_core_tmr.c
··· 50 50 51 51 void core_tmr_release_req(struct se_tmr_req *tmr) 52 52 { 53 - struct se_device *dev = tmr->tmr_dev; 54 - unsigned long flags; 55 - 56 - if (dev) { 57 - spin_lock_irqsave(&dev->se_tmr_lock, flags); 58 - list_del_init(&tmr->tmr_list); 59 - spin_unlock_irqrestore(&dev->se_tmr_lock, flags); 60 - } 61 - 62 53 kfree(tmr); 63 54 } 64 55 ··· 147 156 se_cmd->state_active = false; 148 157 spin_unlock_irqrestore(&dev->queues[i].lock, flags); 149 158 150 - /* 151 - * Ensure that this ABORT request is visible to the LU 152 - * RESET code. 153 - */ 154 - if (!tmr->tmr_dev) 155 - WARN_ON_ONCE(transport_lookup_tmr_lun(tmr->task_cmd) < 0); 156 - 157 159 if (dev->transport->tmr_notify) 158 160 dev->transport->tmr_notify(dev, TMR_ABORT_TASK, 159 161 &aborted_list); ··· 218 234 } 219 235 220 236 list_move_tail(&tmr_p->tmr_list, &drain_tmr_list); 237 + tmr_p->tmr_dev = NULL; 221 238 } 222 239 spin_unlock_irqrestore(&dev->se_tmr_lock, flags); 223 240
+23 -7
drivers/target/target_core_transport.c
··· 676 676 spin_unlock_irqrestore(&dev->queues[cmd->cpuid].lock, flags); 677 677 } 678 678 679 + static void target_remove_from_tmr_list(struct se_cmd *cmd) 680 + { 681 + struct se_device *dev = NULL; 682 + unsigned long flags; 683 + 684 + if (cmd->se_cmd_flags & SCF_SCSI_TMR_CDB) 685 + dev = cmd->se_tmr_req->tmr_dev; 686 + 687 + if (dev) { 688 + spin_lock_irqsave(&dev->se_tmr_lock, flags); 689 + if (cmd->se_tmr_req->tmr_dev) 690 + list_del_init(&cmd->se_tmr_req->tmr_list); 691 + spin_unlock_irqrestore(&dev->se_tmr_lock, flags); 692 + } 693 + } 679 694 /* 680 695 * This function is called by the target core after the target core has 681 696 * finished processing a SCSI command or SCSI TMF. Both the regular command ··· 701 686 static int transport_cmd_check_stop_to_fabric(struct se_cmd *cmd) 702 687 { 703 688 unsigned long flags; 704 - 705 - target_remove_from_state_list(cmd); 706 - 707 - /* 708 - * Clear struct se_cmd->se_lun before the handoff to FE. 709 - */ 710 - cmd->se_lun = NULL; 711 689 712 690 spin_lock_irqsave(&cmd->t_state_lock, flags); 713 691 /* ··· 736 728 if (!lun) 737 729 return; 738 730 731 + target_remove_from_state_list(cmd); 732 + target_remove_from_tmr_list(cmd); 733 + 739 734 if (cmpxchg(&cmd->lun_ref_active, true, false)) 740 735 percpu_ref_put(&lun->lun_ref); 736 + 737 + /* 738 + * Clear struct se_cmd->se_lun before the handoff to FE. 739 + */ 740 + cmd->se_lun = NULL; 741 741 } 742 742 743 743 static void target_complete_failure_work(struct work_struct *work)