Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

scsi: zfcp: fix request object use-after-free in send path causing seqno errors

With a recent change to our send path for FSF commands we introduced a
possible use-after-free of request-objects, that might further lead to
zfcp crafting bad requests, which the FCP channel correctly complains
about with an error (FSF_PROT_SEQ_NUMB_ERROR). This error is then handled
by an adapter-wide recovery.

The following sequence illustrates the possible use-after-free:

Send Path:

int zfcp_fsf_open_port(struct zfcp_erp_action *erp_action)
{
struct zfcp_fsf_req *req;
...
spin_lock_irq(&qdio->req_q_lock);
// ^^^^^^^^^^^^^^^^
// protects QDIO queue during sending
...
req = zfcp_fsf_req_create(qdio,
FSF_QTCB_OPEN_PORT_WITH_DID,
SBAL_SFLAGS0_TYPE_READ,
qdio->adapter->pool.erp_req);
// ^^^^^^^^^^^^^^^^^^^
// allocation of the request-object
...
retval = zfcp_fsf_req_send(req);
...
spin_unlock_irq(&qdio->req_q_lock);
return retval;
}

static int zfcp_fsf_req_send(struct zfcp_fsf_req *req)
{
struct zfcp_adapter *adapter = req->adapter;
struct zfcp_qdio *qdio = adapter->qdio;
...
zfcp_reqlist_add(adapter->req_list, req);
// ^^^^^^^^^^^^^^^^
// add request to our driver-internal hash-table for tracking
// (protected by separate lock req_list->lock)
...
if (zfcp_qdio_send(qdio, &req->qdio_req)) {
// ^^^^^^^^^^^^^^
// hand-off the request to FCP channel;
// the request can complete at any point now
...
}

/* Don't increase for unsolicited status */
if (!zfcp_fsf_req_is_status_read_buffer(req))
// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
// possible use-after-free
adapter->fsf_req_seq_no++;
// ^^^^^^^^^^^^^^^^
// because of the use-after-free we might
// miss this accounting, and as follow-up
// this results in the FCP channel error
// FSF_PROT_SEQ_NUMB_ERROR
adapter->req_no++;

return 0;
}

static inline bool
zfcp_fsf_req_is_status_read_buffer(struct zfcp_fsf_req *req)
{
return req->qtcb == NULL;
// ^^^^^^^^^
// possible use-after-free
}

Response Path:

void zfcp_fsf_reqid_check(struct zfcp_qdio *qdio, int sbal_idx)
{
...
struct zfcp_fsf_req *fsf_req;
...
for (idx = 0; idx < QDIO_MAX_ELEMENTS_PER_BUFFER; idx++) {
...
fsf_req = zfcp_reqlist_find_rm(adapter->req_list,
req_id);
// ^^^^^^^^^^^^^^^^^^^^
// remove request from our driver-internal
// hash-table (lock req_list->lock)
...
zfcp_fsf_req_complete(fsf_req);
}
}

static void zfcp_fsf_req_complete(struct zfcp_fsf_req *req)
{
...
if (likely(req->status & ZFCP_STATUS_FSFREQ_CLEANUP))
zfcp_fsf_req_free(req);
// ^^^^^^^^^^^^^^^^^
// free memory for request-object
else
complete(&req->completion);
// ^^^^^^^^
// completion notification for code-paths that wait
// synchronous for the completion of the request; in
// those the memory is freed separately
}

The result of the use-after-free only affects the send path, and can not
lead to any data corruption. In case we miss the sequence-number
accounting, because the memory was already re-purposed, the next FSF
command will fail with said FCP channel error, and we will recover the
whole adapter. This causes no additional errors, but it slows down
traffic. There is a slight chance of the same thing happen again
recursively after the adapter recovery, but so far this has not been seen.

This was seen under z/VM, where the send path might run on a virtual CPU
that gets scheduled away by z/VM, while the return path might still run,
and so create the necessary timing. Running with KASAN can also slow down
the kernel sufficiently to run into this user-after-free, and then see the
report by KASAN.

To fix this, simply pull the test for the sequence-number accounting in
front of the hand-off to the FCP channel (this information doesn't change
during hand-off), but leave the sequence-number accounting itself where it
is.

To make future regressions of the same kind less likely, add comments to
all closely related code-paths.

Signed-off-by: Benjamin Block <bblock@linux.ibm.com>
Fixes: f9eca0227600 ("scsi: zfcp: drop duplicate fsf_command from zfcp_fsf_req which is also in QTCB header")
Cc: <stable@vger.kernel.org> #5.0+
Reviewed-by: Steffen Maier <maier@linux.ibm.com>
Reviewed-by: Jens Remus <jremus@linux.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>

authored by

Benjamin Block and committed by
Martin K. Petersen
b76becde 705d3b08

+40 -5
+40 -5
drivers/s390/scsi/zfcp_fsf.c
··· 11 11 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 12 12 13 13 #include <linux/blktrace_api.h> 14 + #include <linux/types.h> 14 15 #include <linux/slab.h> 15 16 #include <scsi/fc/fc_els.h> 16 17 #include "zfcp_ext.h" ··· 742 741 743 742 static int zfcp_fsf_req_send(struct zfcp_fsf_req *req) 744 743 { 744 + const bool is_srb = zfcp_fsf_req_is_status_read_buffer(req); 745 745 struct zfcp_adapter *adapter = req->adapter; 746 746 struct zfcp_qdio *qdio = adapter->qdio; 747 747 int req_id = req->req_id; ··· 759 757 return -EIO; 760 758 } 761 759 760 + /* 761 + * NOTE: DO NOT TOUCH ASYNC req PAST THIS POINT. 762 + * ONLY TOUCH SYNC req AGAIN ON req->completion. 763 + * 764 + * The request might complete and be freed concurrently at any point 765 + * now. This is not protected by the QDIO-lock (req_q_lock). So any 766 + * uncontrolled access after this might result in an use-after-free bug. 767 + * Only if the request doesn't have ZFCP_STATUS_FSFREQ_CLEANUP set, and 768 + * when it is completed via req->completion, is it safe to use req 769 + * again. 770 + */ 771 + 762 772 /* Don't increase for unsolicited status */ 763 - if (!zfcp_fsf_req_is_status_read_buffer(req)) 773 + if (!is_srb) 764 774 adapter->fsf_req_seq_no++; 765 775 adapter->req_no++; 766 776 ··· 819 805 retval = zfcp_fsf_req_send(req); 820 806 if (retval) 821 807 goto failed_req_send; 808 + /* NOTE: DO NOT TOUCH req PAST THIS POINT! */ 822 809 823 810 goto out; 824 811 ··· 929 914 req->qtcb->bottom.support.req_handle = (u64) old_req_id; 930 915 931 916 zfcp_fsf_start_timer(req, ZFCP_FSF_SCSI_ER_TIMEOUT); 932 - if (!zfcp_fsf_req_send(req)) 917 + if (!zfcp_fsf_req_send(req)) { 918 + /* NOTE: DO NOT TOUCH req, UNTIL IT COMPLETES! */ 933 919 goto out; 920 + } 934 921 935 922 out_error_free: 936 923 zfcp_fsf_req_free(req); ··· 1115 1098 ret = zfcp_fsf_req_send(req); 1116 1099 if (ret) 1117 1100 goto failed_send; 1101 + /* NOTE: DO NOT TOUCH req PAST THIS POINT! */ 1118 1102 1119 1103 goto out; 1120 1104 ··· 1216 1198 ret = zfcp_fsf_req_send(req); 1217 1199 if (ret) 1218 1200 goto failed_send; 1201 + /* NOTE: DO NOT TOUCH req PAST THIS POINT! */ 1219 1202 1220 1203 goto out; 1221 1204 ··· 1262 1243 zfcp_fsf_req_free(req); 1263 1244 erp_action->fsf_req_id = 0; 1264 1245 } 1246 + /* NOTE: DO NOT TOUCH req PAST THIS POINT! */ 1265 1247 out: 1266 1248 spin_unlock_irq(&qdio->req_q_lock); 1267 1249 return retval; ··· 1299 1279 zfcp_fsf_start_timer(req, ZFCP_FSF_REQUEST_TIMEOUT); 1300 1280 retval = zfcp_fsf_req_send(req); 1301 1281 spin_unlock_irq(&qdio->req_q_lock); 1302 - if (!retval) 1282 + if (!retval) { 1283 + /* NOTE: ONLY TOUCH SYNC req AGAIN ON req->completion. */ 1303 1284 wait_for_completion(&req->completion); 1285 + } 1304 1286 1305 1287 zfcp_fsf_req_free(req); 1306 1288 return retval; ··· 1352 1330 zfcp_fsf_req_free(req); 1353 1331 erp_action->fsf_req_id = 0; 1354 1332 } 1333 + /* NOTE: DO NOT TOUCH req PAST THIS POINT! */ 1355 1334 out: 1356 1335 spin_unlock_irq(&qdio->req_q_lock); 1357 1336 return retval; ··· 1395 1372 retval = zfcp_fsf_req_send(req); 1396 1373 spin_unlock_irq(&qdio->req_q_lock); 1397 1374 1398 - if (!retval) 1375 + if (!retval) { 1376 + /* NOTE: ONLY TOUCH SYNC req AGAIN ON req->completion. */ 1399 1377 wait_for_completion(&req->completion); 1378 + } 1400 1379 1401 1380 zfcp_fsf_req_free(req); 1402 1381 ··· 1518 1493 erp_action->fsf_req_id = 0; 1519 1494 put_device(&port->dev); 1520 1495 } 1496 + /* NOTE: DO NOT TOUCH req PAST THIS POINT! */ 1521 1497 out: 1522 1498 spin_unlock_irq(&qdio->req_q_lock); 1523 1499 return retval; ··· 1583 1557 zfcp_fsf_req_free(req); 1584 1558 erp_action->fsf_req_id = 0; 1585 1559 } 1560 + /* NOTE: DO NOT TOUCH req PAST THIS POINT! */ 1586 1561 out: 1587 1562 spin_unlock_irq(&qdio->req_q_lock); 1588 1563 return retval; ··· 1653 1626 retval = zfcp_fsf_req_send(req); 1654 1627 if (retval) 1655 1628 zfcp_fsf_req_free(req); 1629 + /* NOTE: DO NOT TOUCH req PAST THIS POINT! */ 1656 1630 out: 1657 1631 spin_unlock_irq(&qdio->req_q_lock); 1658 1632 if (!retval) ··· 1709 1681 retval = zfcp_fsf_req_send(req); 1710 1682 if (retval) 1711 1683 zfcp_fsf_req_free(req); 1684 + /* NOTE: DO NOT TOUCH req PAST THIS POINT! */ 1712 1685 out: 1713 1686 spin_unlock_irq(&qdio->req_q_lock); 1714 1687 if (!retval) ··· 1805 1776 zfcp_fsf_req_free(req); 1806 1777 erp_action->fsf_req_id = 0; 1807 1778 } 1779 + /* NOTE: DO NOT TOUCH req PAST THIS POINT! */ 1808 1780 out: 1809 1781 spin_unlock_irq(&qdio->req_q_lock); 1810 1782 return retval; ··· 1929 1899 zfcp_fsf_req_free(req); 1930 1900 erp_action->fsf_req_id = 0; 1931 1901 } 1902 + /* NOTE: DO NOT TOUCH req PAST THIS POINT! */ 1932 1903 out: 1933 1904 spin_unlock_irq(&qdio->req_q_lock); 1934 1905 return retval; ··· 2018 1987 zfcp_fsf_req_free(req); 2019 1988 erp_action->fsf_req_id = 0; 2020 1989 } 1990 + /* NOTE: DO NOT TOUCH req PAST THIS POINT! */ 2021 1991 out: 2022 1992 spin_unlock_irq(&qdio->req_q_lock); 2023 1993 return retval; ··· 2331 2299 retval = zfcp_fsf_req_send(req); 2332 2300 if (unlikely(retval)) 2333 2301 goto failed_scsi_cmnd; 2302 + /* NOTE: DO NOT TOUCH req PAST THIS POINT! */ 2334 2303 2335 2304 goto out; 2336 2305 ··· 2406 2373 zfcp_fc_fcp_tm(fcp_cmnd, sdev, tm_flags); 2407 2374 2408 2375 zfcp_fsf_start_timer(req, ZFCP_FSF_SCSI_ER_TIMEOUT); 2409 - if (!zfcp_fsf_req_send(req)) 2376 + if (!zfcp_fsf_req_send(req)) { 2377 + /* NOTE: DO NOT TOUCH req, UNTIL IT COMPLETES! */ 2410 2378 goto out; 2379 + } 2411 2380 2412 2381 zfcp_fsf_req_free(req); 2413 2382 req = NULL;