Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

scsi: libiscsi: Drop taskqueuelock

The purpose of the taskqueuelock was to handle the issue where a bad target
decides to send a R2T and before its data has been sent decides to send a
cmd response to complete the cmd. The following patches fix up the
frwd/back locks so they are taken from the queue/xmit (frwd) and completion
(back) paths again. To get there this patch removes the taskqueuelock which
for iSCSI xmit wq based drivers was taken in the queue, xmit and completion
paths.

Instead of the lock, we just make sure we have a ref to the task when we
queue a R2T, and then we always remove the task from the requeue list in
the xmit path or the forced cleanup paths.

Link: https://lore.kernel.org/r/20210207044608.27585-3-michael.christie@oracle.com
Reviewed-by: Lee Duncan <lduncan@suse.com>
Signed-off-by: Mike Christie <michael.christie@oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>

authored by

Mike Christie and committed by
Martin K. Petersen
5923d64b d28d48c6

+172 -96
+113 -65
drivers/scsi/libiscsi.c
··· 523 523 WARN_ON_ONCE(task->state == ISCSI_TASK_FREE); 524 524 task->state = state; 525 525 526 - spin_lock_bh(&conn->taskqueuelock); 527 - if (!list_empty(&task->running)) { 528 - pr_debug_once("%s while task on list", __func__); 529 - list_del_init(&task->running); 530 - } 531 - spin_unlock_bh(&conn->taskqueuelock); 532 - 533 - if (conn->task == task) 534 - conn->task = NULL; 535 - 536 526 if (READ_ONCE(conn->ping_task) == task) 537 527 WRITE_ONCE(conn->ping_task, NULL); 538 528 ··· 554 564 } 555 565 EXPORT_SYMBOL_GPL(iscsi_complete_scsi_task); 556 566 567 + /* 568 + * Must be called with back and frwd lock 569 + */ 570 + static bool cleanup_queued_task(struct iscsi_task *task) 571 + { 572 + struct iscsi_conn *conn = task->conn; 573 + bool early_complete = false; 574 + 575 + /* Bad target might have completed task while it was still running */ 576 + if (task->state == ISCSI_TASK_COMPLETED) 577 + early_complete = true; 578 + 579 + if (!list_empty(&task->running)) { 580 + list_del_init(&task->running); 581 + /* 582 + * If it's on a list but still running, this could be from 583 + * a bad target sending a rsp early, cleanup from a TMF, or 584 + * session recovery. 585 + */ 586 + if (task->state == ISCSI_TASK_RUNNING || 587 + task->state == ISCSI_TASK_COMPLETED) 588 + __iscsi_put_task(task); 589 + } 590 + 591 + if (conn->task == task) { 592 + conn->task = NULL; 593 + __iscsi_put_task(task); 594 + } 595 + 596 + return early_complete; 597 + } 557 598 558 599 /* 559 - * session back_lock must be held and if not called for a task that is 600 + * session frwd_lock must be held and if not called for a task that is 560 601 * still pending or from the xmit thread, then xmit thread must 561 602 * be suspended. 562 603 */ ··· 606 585 if (!sc) 607 586 return; 608 587 588 + /* regular RX path uses back_lock */ 589 + spin_lock_bh(&conn->session->back_lock); 590 + if (cleanup_queued_task(task)) { 591 + spin_unlock_bh(&conn->session->back_lock); 592 + return; 593 + } 594 + 609 595 if (task->state == ISCSI_TASK_PENDING) { 610 596 /* 611 597 * cmd never made it to the xmit thread, so we should not count ··· 628 600 629 601 sc->result = err << 16; 630 602 scsi_set_resid(sc, scsi_bufflen(sc)); 631 - 632 - /* regular RX path uses back_lock */ 633 - spin_lock_bh(&conn->session->back_lock); 634 603 iscsi_complete_task(task, state); 635 604 spin_unlock_bh(&conn->session->back_lock); 636 605 } ··· 773 748 if (session->tt->xmit_task(task)) 774 749 goto free_task; 775 750 } else { 776 - spin_lock_bh(&conn->taskqueuelock); 777 751 list_add_tail(&task->running, &conn->mgmtqueue); 778 - spin_unlock_bh(&conn->taskqueuelock); 779 752 iscsi_conn_queue_work(conn); 780 753 } 781 754 ··· 1434 1411 return 0; 1435 1412 } 1436 1413 1437 - static int iscsi_xmit_task(struct iscsi_conn *conn) 1414 + static int iscsi_xmit_task(struct iscsi_conn *conn, struct iscsi_task *task, 1415 + bool was_requeue) 1438 1416 { 1439 - struct iscsi_task *task = conn->task; 1440 1417 int rc; 1441 1418 1442 - if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) 1443 - return -ENODATA; 1444 - 1445 1419 spin_lock_bh(&conn->session->back_lock); 1446 - if (conn->task == NULL) { 1420 + 1421 + if (!conn->task) { 1422 + /* Take a ref so we can access it after xmit_task() */ 1423 + __iscsi_get_task(task); 1424 + } else { 1425 + /* Already have a ref from when we failed to send it last call */ 1426 + conn->task = NULL; 1427 + } 1428 + 1429 + /* 1430 + * If this was a requeue for a R2T we have an extra ref on the task in 1431 + * case a bad target sends a cmd rsp before we have handled the task. 1432 + */ 1433 + if (was_requeue) 1434 + __iscsi_put_task(task); 1435 + 1436 + /* 1437 + * Do this after dropping the extra ref because if this was a requeue 1438 + * it's removed from that list and cleanup_queued_task would miss it. 1439 + */ 1440 + if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) { 1441 + /* 1442 + * Save the task and ref in case we weren't cleaning up this 1443 + * task and get woken up again. 1444 + */ 1445 + conn->task = task; 1447 1446 spin_unlock_bh(&conn->session->back_lock); 1448 1447 return -ENODATA; 1449 1448 } 1450 - __iscsi_get_task(task); 1451 1449 spin_unlock_bh(&conn->session->back_lock); 1450 + 1452 1451 spin_unlock_bh(&conn->session->frwd_lock); 1453 1452 rc = conn->session->tt->xmit_task(task); 1454 1453 spin_lock_bh(&conn->session->frwd_lock); 1455 1454 if (!rc) { 1456 1455 /* done with this task */ 1457 1456 task->last_xfer = jiffies; 1458 - conn->task = NULL; 1459 1457 } 1460 1458 /* regular RX path uses back_lock */ 1461 1459 spin_lock(&conn->session->back_lock); 1460 + if (rc && task->state == ISCSI_TASK_RUNNING) { 1461 + /* 1462 + * get an extra ref that is released next time we access it 1463 + * as conn->task above. 1464 + */ 1465 + __iscsi_get_task(task); 1466 + conn->task = task; 1467 + } 1468 + 1462 1469 __iscsi_put_task(task); 1463 1470 spin_unlock(&conn->session->back_lock); 1464 1471 return rc; ··· 1498 1445 * iscsi_requeue_task - requeue task to run from session workqueue 1499 1446 * @task: task to requeue 1500 1447 * 1501 - * LLDs that need to run a task from the session workqueue should call 1502 - * this. The session frwd_lock must be held. This should only be called 1503 - * by software drivers. 1448 + * Callers must have taken a ref to the task that is going to be requeued. 1504 1449 */ 1505 1450 void iscsi_requeue_task(struct iscsi_task *task) 1506 1451 { ··· 1508 1457 * this may be on the requeue list already if the xmit_task callout 1509 1458 * is handling the r2ts while we are adding new ones 1510 1459 */ 1511 - spin_lock_bh(&conn->taskqueuelock); 1512 - if (list_empty(&task->running)) 1460 + spin_lock_bh(&conn->session->frwd_lock); 1461 + if (list_empty(&task->running)) { 1513 1462 list_add_tail(&task->running, &conn->requeue); 1514 - spin_unlock_bh(&conn->taskqueuelock); 1463 + } else { 1464 + /* 1465 + * Don't need the extra ref since it's already requeued and 1466 + * has a ref. 1467 + */ 1468 + iscsi_put_task(task); 1469 + } 1515 1470 iscsi_conn_queue_work(conn); 1471 + spin_unlock_bh(&conn->session->frwd_lock); 1516 1472 } 1517 1473 EXPORT_SYMBOL_GPL(iscsi_requeue_task); 1518 1474 ··· 1545 1487 } 1546 1488 1547 1489 if (conn->task) { 1548 - rc = iscsi_xmit_task(conn); 1490 + rc = iscsi_xmit_task(conn, conn->task, false); 1549 1491 if (rc) 1550 1492 goto done; 1551 1493 } ··· 1555 1497 * only have one nop-out as a ping from us and targets should not 1556 1498 * overflow us with nop-ins 1557 1499 */ 1558 - spin_lock_bh(&conn->taskqueuelock); 1559 1500 check_mgmt: 1560 1501 while (!list_empty(&conn->mgmtqueue)) { 1561 - conn->task = list_entry(conn->mgmtqueue.next, 1562 - struct iscsi_task, running); 1563 - list_del_init(&conn->task->running); 1564 - spin_unlock_bh(&conn->taskqueuelock); 1565 - if (iscsi_prep_mgmt_task(conn, conn->task)) { 1502 + task = list_entry(conn->mgmtqueue.next, struct iscsi_task, 1503 + running); 1504 + list_del_init(&task->running); 1505 + if (iscsi_prep_mgmt_task(conn, task)) { 1566 1506 /* regular RX path uses back_lock */ 1567 1507 spin_lock_bh(&conn->session->back_lock); 1568 - __iscsi_put_task(conn->task); 1508 + __iscsi_put_task(task); 1569 1509 spin_unlock_bh(&conn->session->back_lock); 1570 - conn->task = NULL; 1571 - spin_lock_bh(&conn->taskqueuelock); 1572 1510 continue; 1573 1511 } 1574 - rc = iscsi_xmit_task(conn); 1512 + rc = iscsi_xmit_task(conn, task, false); 1575 1513 if (rc) 1576 1514 goto done; 1577 - spin_lock_bh(&conn->taskqueuelock); 1578 1515 } 1579 1516 1580 1517 /* process pending command queue */ 1581 1518 while (!list_empty(&conn->cmdqueue)) { 1582 - conn->task = list_entry(conn->cmdqueue.next, struct iscsi_task, 1583 - running); 1584 - list_del_init(&conn->task->running); 1585 - spin_unlock_bh(&conn->taskqueuelock); 1519 + task = list_entry(conn->cmdqueue.next, struct iscsi_task, 1520 + running); 1521 + list_del_init(&task->running); 1586 1522 if (conn->session->state == ISCSI_STATE_LOGGING_OUT) { 1587 - fail_scsi_task(conn->task, DID_IMM_RETRY); 1588 - spin_lock_bh(&conn->taskqueuelock); 1523 + fail_scsi_task(task, DID_IMM_RETRY); 1589 1524 continue; 1590 1525 } 1591 - rc = iscsi_prep_scsi_cmd_pdu(conn->task); 1526 + rc = iscsi_prep_scsi_cmd_pdu(task); 1592 1527 if (rc) { 1593 1528 if (rc == -ENOMEM || rc == -EACCES) 1594 - fail_scsi_task(conn->task, DID_IMM_RETRY); 1529 + fail_scsi_task(task, DID_IMM_RETRY); 1595 1530 else 1596 - fail_scsi_task(conn->task, DID_ABORT); 1597 - spin_lock_bh(&conn->taskqueuelock); 1531 + fail_scsi_task(task, DID_ABORT); 1598 1532 continue; 1599 1533 } 1600 - rc = iscsi_xmit_task(conn); 1534 + rc = iscsi_xmit_task(conn, task, false); 1601 1535 if (rc) 1602 1536 goto done; 1603 1537 /* ··· 1597 1547 * we need to check the mgmt queue for nops that need to 1598 1548 * be sent to aviod starvation 1599 1549 */ 1600 - spin_lock_bh(&conn->taskqueuelock); 1601 1550 if (!list_empty(&conn->mgmtqueue)) 1602 1551 goto check_mgmt; 1603 1552 } ··· 1610 1561 1611 1562 task = list_entry(conn->requeue.next, struct iscsi_task, 1612 1563 running); 1564 + 1613 1565 if (iscsi_check_tmf_restrictions(task, ISCSI_OP_SCSI_DATA_OUT)) 1614 1566 break; 1615 1567 1616 - conn->task = task; 1617 - list_del_init(&conn->task->running); 1618 - conn->task->state = ISCSI_TASK_RUNNING; 1619 - spin_unlock_bh(&conn->taskqueuelock); 1620 - rc = iscsi_xmit_task(conn); 1568 + list_del_init(&task->running); 1569 + rc = iscsi_xmit_task(conn, task, true); 1621 1570 if (rc) 1622 1571 goto done; 1623 - spin_lock_bh(&conn->taskqueuelock); 1624 1572 if (!list_empty(&conn->mgmtqueue)) 1625 1573 goto check_mgmt; 1626 1574 } 1627 - spin_unlock_bh(&conn->taskqueuelock); 1628 1575 spin_unlock_bh(&conn->session->frwd_lock); 1629 1576 return -ENODATA; 1630 1577 ··· 1786 1741 goto prepd_reject; 1787 1742 } 1788 1743 } else { 1789 - spin_lock_bh(&conn->taskqueuelock); 1790 1744 list_add_tail(&task->running, &conn->cmdqueue); 1791 - spin_unlock_bh(&conn->taskqueuelock); 1792 1745 iscsi_conn_queue_work(conn); 1793 1746 } 1794 1747 ··· 2957 2914 INIT_LIST_HEAD(&conn->mgmtqueue); 2958 2915 INIT_LIST_HEAD(&conn->cmdqueue); 2959 2916 INIT_LIST_HEAD(&conn->requeue); 2960 - spin_lock_init(&conn->taskqueuelock); 2961 2917 INIT_WORK(&conn->xmitwork, iscsi_xmitworker); 2962 2918 2963 2919 /* allocate login_task used for the login/text sequences */ ··· 3122 3080 ISCSI_DBG_SESSION(conn->session, 3123 3081 "failing mgmt itt 0x%x state %d\n", 3124 3082 task->itt, task->state); 3083 + 3084 + spin_lock_bh(&session->back_lock); 3085 + if (cleanup_queued_task(task)) { 3086 + spin_unlock_bh(&session->back_lock); 3087 + continue; 3088 + } 3089 + 3125 3090 state = ISCSI_TASK_ABRT_SESS_RECOV; 3126 3091 if (task->state == ISCSI_TASK_PENDING) 3127 3092 state = ISCSI_TASK_COMPLETED; 3128 - spin_lock_bh(&session->back_lock); 3129 3093 iscsi_complete_task(task, state); 3130 3094 spin_unlock_bh(&session->back_lock); 3131 3095 }
+57 -29
drivers/scsi/libiscsi_tcp.c
··· 524 524 /** 525 525 * iscsi_tcp_r2t_rsp - iSCSI R2T Response processing 526 526 * @conn: iscsi connection 527 - * @task: scsi command task 527 + * @hdr: PDU header 528 528 */ 529 - static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task) 529 + static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr) 530 530 { 531 531 struct iscsi_session *session = conn->session; 532 - struct iscsi_tcp_task *tcp_task = task->dd_data; 533 - struct iscsi_tcp_conn *tcp_conn = conn->dd_data; 534 - struct iscsi_r2t_rsp *rhdr = (struct iscsi_r2t_rsp *)tcp_conn->in.hdr; 532 + struct iscsi_tcp_task *tcp_task; 533 + struct iscsi_tcp_conn *tcp_conn; 534 + struct iscsi_r2t_rsp *rhdr; 535 535 struct iscsi_r2t_info *r2t; 536 - int r2tsn = be32_to_cpu(rhdr->r2tsn); 536 + struct iscsi_task *task; 537 537 u32 data_length; 538 538 u32 data_offset; 539 + int r2tsn; 539 540 int rc; 541 + 542 + spin_lock(&session->back_lock); 543 + task = iscsi_itt_to_ctask(conn, hdr->itt); 544 + if (!task) { 545 + spin_unlock(&session->back_lock); 546 + return ISCSI_ERR_BAD_ITT; 547 + } else if (task->sc->sc_data_direction != DMA_TO_DEVICE) { 548 + spin_unlock(&session->back_lock); 549 + return ISCSI_ERR_PROTO; 550 + } 551 + /* 552 + * A bad target might complete the cmd before we have handled R2Ts 553 + * so get a ref to the task that will be dropped in the xmit path. 554 + */ 555 + if (task->state != ISCSI_TASK_RUNNING) { 556 + spin_unlock(&session->back_lock); 557 + /* Let the path that got the early rsp complete it */ 558 + return 0; 559 + } 560 + task->last_xfer = jiffies; 561 + __iscsi_get_task(task); 562 + 563 + tcp_conn = conn->dd_data; 564 + rhdr = (struct iscsi_r2t_rsp *)tcp_conn->in.hdr; 565 + /* fill-in new R2T associated with the task */ 566 + iscsi_update_cmdsn(session, (struct iscsi_nopin *)rhdr); 567 + spin_unlock(&session->back_lock); 540 568 541 569 if (tcp_conn->in.datalen) { 542 570 iscsi_conn_printk(KERN_ERR, conn, 543 571 "invalid R2t with datalen %d\n", 544 572 tcp_conn->in.datalen); 545 - return ISCSI_ERR_DATALEN; 573 + rc = ISCSI_ERR_DATALEN; 574 + goto put_task; 546 575 } 547 576 577 + tcp_task = task->dd_data; 578 + r2tsn = be32_to_cpu(rhdr->r2tsn); 548 579 if (tcp_task->exp_datasn != r2tsn){ 549 580 ISCSI_DBG_TCP(conn, "task->exp_datasn(%d) != rhdr->r2tsn(%d)\n", 550 581 tcp_task->exp_datasn, r2tsn); 551 - return ISCSI_ERR_R2TSN; 582 + rc = ISCSI_ERR_R2TSN; 583 + goto put_task; 552 584 } 553 585 554 - /* fill-in new R2T associated with the task */ 555 - iscsi_update_cmdsn(session, (struct iscsi_nopin*)rhdr); 556 - 557 - if (!task->sc || session->state != ISCSI_STATE_LOGGED_IN) { 586 + if (session->state != ISCSI_STATE_LOGGED_IN) { 558 587 iscsi_conn_printk(KERN_INFO, conn, 559 588 "dropping R2T itt %d in recovery.\n", 560 589 task->itt); 561 - return 0; 590 + rc = 0; 591 + goto put_task; 562 592 } 563 593 564 594 data_length = be32_to_cpu(rhdr->data_length); 565 595 if (data_length == 0) { 566 596 iscsi_conn_printk(KERN_ERR, conn, 567 597 "invalid R2T with zero data len\n"); 568 - return ISCSI_ERR_DATALEN; 598 + rc = ISCSI_ERR_DATALEN; 599 + goto put_task; 569 600 } 570 601 571 602 if (data_length > session->max_burst) ··· 610 579 "invalid R2T with data len %u at offset %u " 611 580 "and total length %d\n", data_length, 612 581 data_offset, task->sc->sdb.length); 613 - return ISCSI_ERR_DATALEN; 582 + rc = ISCSI_ERR_DATALEN; 583 + goto put_task; 614 584 } 615 585 616 586 spin_lock(&tcp_task->pool2queue); ··· 621 589 "Target has sent more R2Ts than it " 622 590 "negotiated for or driver has leaked.\n"); 623 591 spin_unlock(&tcp_task->pool2queue); 624 - return ISCSI_ERR_PROTO; 592 + rc = ISCSI_ERR_PROTO; 593 + goto put_task; 625 594 } 626 595 627 596 r2t->exp_statsn = rhdr->statsn; ··· 640 607 641 608 iscsi_requeue_task(task); 642 609 return 0; 610 + 611 + put_task: 612 + iscsi_put_task(task); 613 + return rc; 643 614 } 644 615 645 616 /* ··· 767 730 rc = iscsi_complete_pdu(conn, hdr, NULL, 0); 768 731 break; 769 732 case ISCSI_OP_R2T: 770 - spin_lock(&conn->session->back_lock); 771 - task = iscsi_itt_to_ctask(conn, hdr->itt); 772 - spin_unlock(&conn->session->back_lock); 773 - if (!task) 774 - rc = ISCSI_ERR_BAD_ITT; 775 - else if (ahslen) 733 + if (ahslen) { 776 734 rc = ISCSI_ERR_AHSLEN; 777 - else if (task->sc->sc_data_direction == DMA_TO_DEVICE) { 778 - task->last_xfer = jiffies; 779 - spin_lock(&conn->session->frwd_lock); 780 - rc = iscsi_tcp_r2t_rsp(conn, task); 781 - spin_unlock(&conn->session->frwd_lock); 782 - } else 783 - rc = ISCSI_ERR_PROTO; 735 + break; 736 + } 737 + rc = iscsi_tcp_r2t_rsp(conn, hdr); 784 738 break; 785 739 case ISCSI_OP_LOGIN_RSP: 786 740 case ISCSI_OP_TEXT_RSP:
+2 -2
include/scsi/libiscsi.h
··· 187 187 struct iscsi_task *task; /* xmit task in progress */ 188 188 189 189 /* xmit */ 190 - spinlock_t taskqueuelock; /* protects the next three lists */ 190 + /* items must be added/deleted under frwd lock */ 191 191 struct list_head mgmtqueue; /* mgmt (control) xmit queue */ 192 192 struct list_head cmdqueue; /* data-path cmd queue */ 193 193 struct list_head requeue; /* tasks needing another run */ ··· 332 332 * cmdsn, queued_cmdsn * 333 333 * session resources: * 334 334 * - cmdpool kfifo_out , * 335 - * - mgmtpool, */ 335 + * - mgmtpool, queues */ 336 336 spinlock_t back_lock; /* protects cmdsn_exp * 337 337 * cmdsn_max, * 338 338 * cmdpool kfifo_in */