Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending

Pull SCSI target updates from Nicholas Bellinger:
"Things were a lot more calm than previously expected. It's primarily
fixes in various areas, with most of the new functionality centering
around TCMU backend driver work that Xiubo Li has been driving.

Here's the summary on the feature side:

- Make T10-PI verify configurable for emulated (FILEIO + RD) backends
(Dmitry Monakhov)
- Allow target-core/TCMU pass-through to use in-kernel SPC-PR logic
(Bryant Ly + MNC)
- Add TCMU support for growing ring buffer size (Xiubo Li + MNC)
- Add TCMU support for global block data pool (Xiubo Li + MNC)

and on the bug-fix side:

- Fix COMPARE_AND_WRITE non GOOD status handling for READ phase
failures (Gary Guo + nab)
- Fix iscsi-target hang with explicitly changing per NodeACL
CmdSN number depth with concurrent login driven session
reinstatement. (Gary Guo + nab)
- Fix ibmvscsis fabric driver ABORT task handling (Bryant Ly)
- Fix target-core/FILEIO zero length handling (Bart Van Assche)

Also, there was an OOPs introduced with the WRITE_VERIFY changes that
I ended up reverting at the last minute, because as not unusual Bart
and I could not agree on the fix in time for -rc1. Since it's specific
to a conformance test, it's been reverted for now.

There is a separate patch in the queue to address the underlying
control CDB write overflow regression in >= v4.3 separate from the
WRITE_VERIFY revert here, that will be pushed post -rc1"

* 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending: (30 commits)
Revert "target: Fix VERIFY and WRITE VERIFY command parsing"
IB/srpt: Avoid that aborting a command triggers a kernel warning
IB/srpt: Fix abort handling
target/fileio: Fix zero-length READ and WRITE handling
ibmvscsis: Do not send aborted task response
tcmu: fix module removal due to stuck thread
target: Don't force session reset if queue_depth does not change
iscsi-target: Set session_fall_back_to_erl0 when forcing reinstatement
target: Fix compare_and_write_callback handling for non GOOD status
tcmu: Recalculate the tcmu_cmd size to save cmd area memories
tcmu: Add global data block pool support
tcmu: Add dynamic growing data area feature support
target: fixup error message in target_tg_pt_gp_tg_pt_gp_id_store()
target: fixup error message in target_tg_pt_gp_alua_access_type_store()
target/user: PGR Support
target: Add WRITE_VERIFY_16
Documentation/target: add an example script to configure an iSCSI target
target: Use kmalloc_array() in transport_kmap_data_sg()
target: Use kmalloc_array() in compare_and_write_callback()
target: Improve size determinations in two functions
...

+859 -269
+80
Documentation/target/target-export-device
··· 1 + #!/bin/sh 2 + # 3 + # This script illustrates the sequence of operations in configfs to 4 + # create a very simple LIO iSCSI target with a file or block device 5 + # backstore. 6 + # 7 + # (C) Copyright 2014 Christophe Vu-Brugier <cvubrugier@fastmail.fm> 8 + # 9 + 10 + print_usage() { 11 + cat <<EOF 12 + Usage: $(basename $0) [-p PORTAL] DEVICE|FILE 13 + Export a block device or a file as an iSCSI target with a single LUN 14 + EOF 15 + } 16 + 17 + die() { 18 + echo $1 19 + exit 1 20 + } 21 + 22 + while getopts "hp:" arg; do 23 + case $arg in 24 + h) print_usage; exit 0;; 25 + p) PORTAL=${OPTARG};; 26 + esac 27 + done 28 + shift $(($OPTIND - 1)) 29 + 30 + DEVICE=$1 31 + [ -n "$DEVICE" ] || die "Missing device or file argument" 32 + [ -b $DEVICE -o -f $DEVICE ] || die "Invalid device or file: ${DEVICE}" 33 + IQN="iqn.2003-01.org.linux-iscsi.$(hostname):$(basename $DEVICE)" 34 + [ -n "$PORTAL" ] || PORTAL="0.0.0.0:3260" 35 + 36 + CONFIGFS=/sys/kernel/config 37 + CORE_DIR=$CONFIGFS/target/core 38 + ISCSI_DIR=$CONFIGFS/target/iscsi 39 + 40 + # Load the target modules and mount the config file system 41 + lsmod | grep -q configfs || modprobe configfs 42 + lsmod | grep -q target_core_mod || modprobe target_core_mod 43 + mount | grep -q ^configfs || mount -t configfs none $CONFIGFS 44 + mkdir -p $ISCSI_DIR 45 + 46 + # Create a backstore 47 + if [ -b $DEVICE ]; then 48 + BACKSTORE_DIR=$CORE_DIR/iblock_0/data 49 + mkdir -p $BACKSTORE_DIR 50 + echo "udev_path=${DEVICE}" > $BACKSTORE_DIR/control 51 + else 52 + BACKSTORE_DIR=$CORE_DIR/fileio_0/data 53 + mkdir -p $BACKSTORE_DIR 54 + DEVICE_SIZE=$(du -b $DEVICE | cut -f1) 55 + echo "fd_dev_name=${DEVICE}" > $BACKSTORE_DIR/control 56 + echo "fd_dev_size=${DEVICE_SIZE}" > $BACKSTORE_DIR/control 57 + echo 1 > $BACKSTORE_DIR/attrib/emulate_write_cache 58 + fi 59 + echo 1 > $BACKSTORE_DIR/enable 60 + 61 + # Create an iSCSI target and a target portal group (TPG) 62 + mkdir $ISCSI_DIR/$IQN 63 + mkdir $ISCSI_DIR/$IQN/tpgt_1/ 64 + 65 + # Create a LUN 66 + mkdir $ISCSI_DIR/$IQN/tpgt_1/lun/lun_0 67 + ln -s $BACKSTORE_DIR $ISCSI_DIR/$IQN/tpgt_1/lun/lun_0/data 68 + echo 1 > $ISCSI_DIR/$IQN/tpgt_1/enable 69 + 70 + # Create a network portal 71 + mkdir $ISCSI_DIR/$IQN/tpgt_1/np/$PORTAL 72 + 73 + # Disable authentication 74 + echo 0 > $ISCSI_DIR/$IQN/tpgt_1/attrib/authentication 75 + echo 1 > $ISCSI_DIR/$IQN/tpgt_1/attrib/generate_node_acls 76 + 77 + # Allow write access for non authenticated initiators 78 + echo 0 > $ISCSI_DIR/$IQN/tpgt_1/attrib/demo_mode_write_protect 79 + 80 + echo "Target ${IQN}, portal ${PORTAL} has been created"
+3 -6
drivers/infiniband/ulp/srpt/ib_srpt.c
··· 2302 2302 } 2303 2303 spin_unlock_irqrestore(&ioctx->spinlock, flags); 2304 2304 2305 - if (unlikely(transport_check_aborted_status(&ioctx->cmd, false) 2306 - || WARN_ON_ONCE(state == SRPT_STATE_CMD_RSP_SENT))) { 2307 - atomic_inc(&ch->req_lim_delta); 2308 - srpt_abort_cmd(ioctx); 2305 + if (unlikely(WARN_ON_ONCE(state == SRPT_STATE_CMD_RSP_SENT))) 2309 2306 return; 2310 - } 2311 2307 2312 2308 /* For read commands, transfer the data to the initiator. */ 2313 2309 if (ioctx->cmd.data_direction == DMA_FROM_DEVICE && ··· 2685 2689 struct srpt_rdma_ch *ch = ioctx->ch; 2686 2690 unsigned long flags; 2687 2691 2688 - WARN_ON(ioctx->state != SRPT_STATE_DONE); 2692 + WARN_ON_ONCE(ioctx->state != SRPT_STATE_DONE && 2693 + !(ioctx->cmd.transport_state & CMD_T_ABORTED)); 2689 2694 2690 2695 if (ioctx->n_rw_ctx) { 2691 2696 srpt_free_rw_ctxs(ch, ioctx);
+89 -25
drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
··· 1170 1170 cmd = list_first_entry_or_null(&vscsi->free_cmd, 1171 1171 struct ibmvscsis_cmd, list); 1172 1172 if (cmd) { 1173 + cmd->flags &= ~(DELAY_SEND); 1173 1174 list_del(&cmd->list); 1174 1175 cmd->iue = iue; 1175 1176 cmd->type = UNSET_TYPE; ··· 1750 1749 static void ibmvscsis_send_messages(struct scsi_info *vscsi) 1751 1750 { 1752 1751 u64 msg_hi = 0; 1753 - /* note do not attmempt to access the IU_data_ptr with this pointer 1752 + /* note do not attempt to access the IU_data_ptr with this pointer 1754 1753 * it is not valid 1755 1754 */ 1756 1755 struct viosrp_crq *crq = (struct viosrp_crq *)&msg_hi; 1757 1756 struct ibmvscsis_cmd *cmd, *nxt; 1758 1757 struct iu_entry *iue; 1759 1758 long rc = ADAPT_SUCCESS; 1759 + bool retry = false; 1760 1760 1761 1761 if (!(vscsi->flags & RESPONSE_Q_DOWN)) { 1762 - list_for_each_entry_safe(cmd, nxt, &vscsi->waiting_rsp, list) { 1763 - iue = cmd->iue; 1762 + do { 1763 + retry = false; 1764 + list_for_each_entry_safe(cmd, nxt, &vscsi->waiting_rsp, 1765 + list) { 1766 + /* 1767 + * Check to make sure abort cmd gets processed 1768 + * prior to the abort tmr cmd 1769 + */ 1770 + if (cmd->flags & DELAY_SEND) 1771 + continue; 1764 1772 1765 - crq->valid = VALID_CMD_RESP_EL; 1766 - crq->format = cmd->rsp.format; 1773 + if (cmd->abort_cmd) { 1774 + retry = true; 1775 + cmd->abort_cmd->flags &= ~(DELAY_SEND); 1776 + } 1767 1777 1768 - if (cmd->flags & CMD_FAST_FAIL) 1769 - crq->status = VIOSRP_ADAPTER_FAIL; 1778 + /* 1779 + * If CMD_T_ABORTED w/o CMD_T_TAS scenarios and 1780 + * the case where LIO issued a 1781 + * ABORT_TASK: Sending TMR_TASK_DOES_NOT_EXIST 1782 + * case then we dont send a response, since it 1783 + * was already done. 1784 + */ 1785 + if (cmd->se_cmd.transport_state & CMD_T_ABORTED && 1786 + !(cmd->se_cmd.transport_state & CMD_T_TAS)) { 1787 + list_del(&cmd->list); 1788 + ibmvscsis_free_cmd_resources(vscsi, 1789 + cmd); 1790 + } else { 1791 + iue = cmd->iue; 1770 1792 1771 - crq->IU_length = cpu_to_be16(cmd->rsp.len); 1793 + crq->valid = VALID_CMD_RESP_EL; 1794 + crq->format = cmd->rsp.format; 1772 1795 1773 - rc = h_send_crq(vscsi->dma_dev->unit_address, 1774 - be64_to_cpu(msg_hi), 1775 - be64_to_cpu(cmd->rsp.tag)); 1796 + if (cmd->flags & CMD_FAST_FAIL) 1797 + crq->status = VIOSRP_ADAPTER_FAIL; 1776 1798 1777 - pr_debug("send_messages: cmd %p, tag 0x%llx, rc %ld\n", 1778 - cmd, be64_to_cpu(cmd->rsp.tag), rc); 1799 + crq->IU_length = cpu_to_be16(cmd->rsp.len); 1779 1800 1780 - /* if all ok free up the command element resources */ 1781 - if (rc == H_SUCCESS) { 1782 - /* some movement has occurred */ 1783 - vscsi->rsp_q_timer.timer_pops = 0; 1784 - list_del(&cmd->list); 1801 + rc = h_send_crq(vscsi->dma_dev->unit_address, 1802 + be64_to_cpu(msg_hi), 1803 + be64_to_cpu(cmd->rsp.tag)); 1785 1804 1786 - ibmvscsis_free_cmd_resources(vscsi, cmd); 1787 - } else { 1788 - srp_snd_msg_failed(vscsi, rc); 1789 - break; 1805 + pr_debug("send_messages: cmd %p, tag 0x%llx, rc %ld\n", 1806 + cmd, be64_to_cpu(cmd->rsp.tag), rc); 1807 + 1808 + /* if all ok free up the command 1809 + * element resources 1810 + */ 1811 + if (rc == H_SUCCESS) { 1812 + /* some movement has occurred */ 1813 + vscsi->rsp_q_timer.timer_pops = 0; 1814 + list_del(&cmd->list); 1815 + 1816 + ibmvscsis_free_cmd_resources(vscsi, 1817 + cmd); 1818 + } else { 1819 + srp_snd_msg_failed(vscsi, rc); 1820 + break; 1821 + } 1822 + } 1790 1823 } 1791 - } 1824 + } while (retry); 1792 1825 1793 1826 if (!rc) { 1794 1827 /* ··· 2743 2708 2744 2709 for (i = 0, cmd = (struct ibmvscsis_cmd *)vscsi->cmd_pool; i < num; 2745 2710 i++, cmd++) { 2711 + cmd->abort_cmd = NULL; 2746 2712 cmd->adapter = vscsi; 2747 2713 INIT_WORK(&cmd->work, ibmvscsis_scheduler); 2748 2714 list_add_tail(&cmd->list, &vscsi->free_cmd); ··· 3615 3579 { 3616 3580 struct ibmvscsis_cmd *cmd = container_of(se_cmd, struct ibmvscsis_cmd, 3617 3581 se_cmd); 3582 + struct scsi_info *vscsi = cmd->adapter; 3618 3583 struct iu_entry *iue = cmd->iue; 3619 3584 int rc; 3585 + 3586 + /* 3587 + * If CLIENT_FAILED OR RESPONSE_Q_DOWN, then just return success 3588 + * since LIO can't do anything about it, and we dont want to 3589 + * attempt an srp_transfer_data. 3590 + */ 3591 + if ((vscsi->flags & (CLIENT_FAILED | RESPONSE_Q_DOWN))) { 3592 + pr_err("write_pending failed since: %d\n", vscsi->flags); 3593 + return 0; 3594 + } 3620 3595 3621 3596 rc = srp_transfer_data(cmd, &vio_iu(iue)->srp.cmd, ibmvscsis_rdma, 3622 3597 1, 1); ··· 3707 3660 struct ibmvscsis_cmd *cmd = container_of(se_cmd, struct ibmvscsis_cmd, 3708 3661 se_cmd); 3709 3662 struct scsi_info *vscsi = cmd->adapter; 3663 + struct ibmvscsis_cmd *cmd_itr; 3664 + struct iu_entry *iue = iue = cmd->iue; 3665 + struct srp_tsk_mgmt *srp_tsk = &vio_iu(iue)->srp.tsk_mgmt; 3666 + u64 tag_to_abort = be64_to_cpu(srp_tsk->task_tag); 3710 3667 uint len; 3711 3668 3712 3669 pr_debug("queue_tm_rsp %p, status %d\n", 3713 3670 se_cmd, (int)se_cmd->se_tmr_req->response); 3671 + 3672 + if (srp_tsk->tsk_mgmt_func == SRP_TSK_ABORT_TASK && 3673 + cmd->se_cmd.se_tmr_req->response == TMR_TASK_DOES_NOT_EXIST) { 3674 + spin_lock_bh(&vscsi->intr_lock); 3675 + list_for_each_entry(cmd_itr, &vscsi->active_q, list) { 3676 + if (tag_to_abort == cmd_itr->se_cmd.tag) { 3677 + cmd_itr->abort_cmd = cmd; 3678 + cmd->flags |= DELAY_SEND; 3679 + break; 3680 + } 3681 + } 3682 + spin_unlock_bh(&vscsi->intr_lock); 3683 + } 3714 3684 3715 3685 srp_build_response(vscsi, cmd, &len); 3716 3686 cmd->rsp.format = SRP_FORMAT; ··· 3736 3672 3737 3673 static void ibmvscsis_aborted_task(struct se_cmd *se_cmd) 3738 3674 { 3739 - /* TBD: What (if anything) should we do here? */ 3740 - pr_debug("ibmvscsis_aborted_task %p\n", se_cmd); 3675 + pr_debug("ibmvscsis_aborted_task %p task_tag: %llu\n", 3676 + se_cmd, se_cmd->tag); 3741 3677 } 3742 3678 3743 3679 static struct se_wwn *ibmvscsis_make_tport(struct target_fabric_configfs *tf,
+2
drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.h
··· 168 168 struct iu_rsp rsp; 169 169 struct work_struct work; 170 170 struct scsi_info *adapter; 171 + struct ibmvscsis_cmd *abort_cmd; 171 172 /* Sense buffer that will be mapped into outgoing status */ 172 173 unsigned char sense_buf[TRANSPORT_SENSE_BUFFER]; 173 174 u64 init_time; 174 175 #define CMD_FAST_FAIL BIT(0) 176 + #define DELAY_SEND BIT(1) 175 177 u32 flags; 176 178 char type; 177 179 };
+16 -35
drivers/target/iscsi/iscsi_target.c
··· 128 128 return ERR_PTR(-EINVAL); 129 129 } 130 130 131 - tiqn = kzalloc(sizeof(struct iscsi_tiqn), GFP_KERNEL); 132 - if (!tiqn) { 133 - pr_err("Unable to allocate struct iscsi_tiqn\n"); 131 + tiqn = kzalloc(sizeof(*tiqn), GFP_KERNEL); 132 + if (!tiqn) 134 133 return ERR_PTR(-ENOMEM); 135 - } 136 134 137 135 sprintf(tiqn->tiqn, "%s", buf); 138 136 INIT_LIST_HEAD(&tiqn->tiqn_list); ··· 360 362 return np; 361 363 } 362 364 363 - np = kzalloc(sizeof(struct iscsi_np), GFP_KERNEL); 365 + np = kzalloc(sizeof(*np), GFP_KERNEL); 364 366 if (!np) { 365 - pr_err("Unable to allocate memory for struct iscsi_np\n"); 366 367 mutex_unlock(&np_lock); 367 368 return ERR_PTR(-ENOMEM); 368 369 } ··· 693 696 int ret = 0, size; 694 697 695 698 pr_debug("iSCSI-Target "ISCSIT_VERSION"\n"); 696 - 697 - iscsit_global = kzalloc(sizeof(struct iscsit_global), GFP_KERNEL); 698 - if (!iscsit_global) { 699 - pr_err("Unable to allocate memory for iscsit_global\n"); 699 + iscsit_global = kzalloc(sizeof(*iscsit_global), GFP_KERNEL); 700 + if (!iscsit_global) 700 701 return -1; 701 - } 702 + 702 703 spin_lock_init(&iscsit_global->ts_bitmap_lock); 703 704 mutex_init(&auth_id_lock); 704 705 spin_lock_init(&sess_idr_lock); ··· 709 714 710 715 size = BITS_TO_LONGS(ISCSIT_BITMAP_BITS) * sizeof(long); 711 716 iscsit_global->ts_bitmap = vzalloc(size); 712 - if (!iscsit_global->ts_bitmap) { 713 - pr_err("Unable to allocate iscsit_global->ts_bitmap\n"); 717 + if (!iscsit_global->ts_bitmap) 714 718 goto configfs_out; 715 - } 716 719 717 720 lio_qr_cache = kmem_cache_create("lio_qr_cache", 718 721 sizeof(struct iscsi_queue_req), ··· 977 984 u32 iov_count = max(1UL, DIV_ROUND_UP(cmd->se_cmd.data_length, PAGE_SIZE)); 978 985 979 986 iov_count += ISCSI_IOV_DATA_BUFFER; 980 - 981 - cmd->iov_data = kzalloc(iov_count * sizeof(struct kvec), GFP_KERNEL); 982 - if (!cmd->iov_data) { 983 - pr_err("Unable to allocate cmd->iov_data\n"); 987 + cmd->iov_data = kcalloc(iov_count, sizeof(*cmd->iov_data), GFP_KERNEL); 988 + if (!cmd->iov_data) 984 989 return -ENOMEM; 985 - } 986 990 987 991 cmd->orig_iov_data_count = iov_count; 988 992 return 0; ··· 1840 1850 1841 1851 ping_data = kzalloc(payload_length + 1, GFP_KERNEL); 1842 1852 if (!ping_data) { 1843 - pr_err("Unable to allocate memory for" 1844 - " NOPOUT ping data.\n"); 1845 1853 ret = -1; 1846 1854 goto out; 1847 1855 } ··· 1985 1997 hdr->refcmdsn = cpu_to_be32(ISCSI_RESERVED_TAG); 1986 1998 1987 1999 cmd->data_direction = DMA_NONE; 1988 - 1989 - cmd->tmr_req = kzalloc(sizeof(struct iscsi_tmr_req), GFP_KERNEL); 1990 - if (!cmd->tmr_req) { 1991 - pr_err("Unable to allocate memory for" 1992 - " Task Management command!\n"); 2000 + cmd->tmr_req = kzalloc(sizeof(*cmd->tmr_req), GFP_KERNEL); 2001 + if (!cmd->tmr_req) 1993 2002 return iscsit_add_reject_cmd(cmd, 1994 2003 ISCSI_REASON_BOOKMARK_NO_RESOURCES, 1995 2004 buf); 1996 - } 1997 2005 1998 2006 /* 1999 2007 * TASK_REASSIGN for ERL=2 / connection stays inside of ··· 2249 2265 struct kvec iov[3]; 2250 2266 2251 2267 text_in = kzalloc(payload_length, GFP_KERNEL); 2252 - if (!text_in) { 2253 - pr_err("Unable to allocate memory for" 2254 - " incoming text parameters\n"); 2268 + if (!text_in) 2255 2269 goto reject; 2256 - } 2270 + 2257 2271 cmd->text_in_ptr = text_in; 2258 2272 2259 2273 memset(iov, 0, 3 * sizeof(struct kvec)); ··· 3335 3353 SENDTARGETS_BUF_LIMIT); 3336 3354 3337 3355 payload = kzalloc(buffer_len, GFP_KERNEL); 3338 - if (!payload) { 3339 - pr_err("Unable to allocate memory for sendtargets" 3340 - " response.\n"); 3356 + if (!payload) 3341 3357 return -ENOMEM; 3342 - } 3358 + 3343 3359 /* 3344 3360 * Locate pointer to iqn./eui. string for ICF_SENDTARGETS_SINGLE 3345 3361 * explicit case.. ··· 4663 4683 continue; 4664 4684 } 4665 4685 atomic_set(&sess->session_reinstatement, 1); 4686 + atomic_set(&sess->session_fall_back_to_erl0, 1); 4666 4687 spin_unlock(&sess->conn_lock); 4667 4688 4668 4689 list_move_tail(&se_sess->sess_list, &free_list);
+1
drivers/target/iscsi/iscsi_target_configfs.c
··· 1506 1506 return; 1507 1507 } 1508 1508 atomic_set(&sess->session_reinstatement, 1); 1509 + atomic_set(&sess->session_fall_back_to_erl0, 1); 1509 1510 spin_unlock(&sess->conn_lock); 1510 1511 1511 1512 iscsit_stop_time2retain_timer(sess);
+1
drivers/target/iscsi/iscsi_target_login.c
··· 208 208 initiatorname_param->value) && 209 209 (sess_p->sess_ops->SessionType == sessiontype))) { 210 210 atomic_set(&sess_p->session_reinstatement, 1); 211 + atomic_set(&sess_p->session_fall_back_to_erl0, 1); 211 212 spin_unlock(&sess_p->conn_lock); 212 213 iscsit_inc_session_usage_count(sess_p); 213 214 iscsit_stop_time2retain_timer(sess_p);
+44 -10
drivers/target/target_core_configfs.c
··· 533 533 DEF_CONFIGFS_ATTRIB_SHOW(pi_prot_type); 534 534 DEF_CONFIGFS_ATTRIB_SHOW(hw_pi_prot_type); 535 535 DEF_CONFIGFS_ATTRIB_SHOW(pi_prot_format); 536 + DEF_CONFIGFS_ATTRIB_SHOW(pi_prot_verify); 536 537 DEF_CONFIGFS_ATTRIB_SHOW(enforce_pr_isids); 537 538 DEF_CONFIGFS_ATTRIB_SHOW(is_nonrot); 538 539 DEF_CONFIGFS_ATTRIB_SHOW(emulate_rest_reord); ··· 824 823 ret = dev->transport->init_prot(dev); 825 824 if (ret) { 826 825 da->pi_prot_type = old_prot; 826 + da->pi_prot_verify = (bool) da->pi_prot_type; 827 827 return ret; 828 828 } 829 829 ··· 832 830 dev->transport->free_prot(dev); 833 831 } 834 832 833 + da->pi_prot_verify = (bool) da->pi_prot_type; 835 834 pr_debug("dev[%p]: SE Device Protection Type: %d\n", dev, flag); 836 835 return count; 837 836 } ··· 872 869 return ret; 873 870 874 871 pr_debug("dev[%p]: SE Device Protection Format complete\n", dev); 872 + return count; 873 + } 874 + 875 + static ssize_t pi_prot_verify_store(struct config_item *item, 876 + const char *page, size_t count) 877 + { 878 + struct se_dev_attrib *da = to_attrib(item); 879 + bool flag; 880 + int ret; 881 + 882 + ret = strtobool(page, &flag); 883 + if (ret < 0) 884 + return ret; 885 + 886 + if (!flag) { 887 + da->pi_prot_verify = flag; 888 + return count; 889 + } 890 + if (da->hw_pi_prot_type) { 891 + pr_warn("DIF protection enabled on underlying hardware," 892 + " ignoring\n"); 893 + return count; 894 + } 895 + if (!da->pi_prot_type) { 896 + pr_warn("DIF protection not supported by backend, ignoring\n"); 897 + return count; 898 + } 899 + da->pi_prot_verify = flag; 900 + 875 901 return count; 876 902 } 877 903 ··· 1099 1067 CONFIGFS_ATTR(, pi_prot_type); 1100 1068 CONFIGFS_ATTR_RO(, hw_pi_prot_type); 1101 1069 CONFIGFS_ATTR(, pi_prot_format); 1070 + CONFIGFS_ATTR(, pi_prot_verify); 1102 1071 CONFIGFS_ATTR(, enforce_pr_isids); 1103 1072 CONFIGFS_ATTR(, is_nonrot); 1104 1073 CONFIGFS_ATTR(, emulate_rest_reord); ··· 1137 1104 &attr_pi_prot_type, 1138 1105 &attr_hw_pi_prot_type, 1139 1106 &attr_pi_prot_format, 1107 + &attr_pi_prot_verify, 1140 1108 &attr_enforce_pr_isids, 1141 1109 &attr_is_nonrot, 1142 1110 &attr_emulate_rest_reord, ··· 1400 1366 struct se_device *dev = pr_to_dev(item); 1401 1367 int ret; 1402 1368 1403 - if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) 1369 + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR) 1404 1370 return sprintf(page, "Passthrough\n"); 1405 1371 1406 1372 spin_lock(&dev->dev_reservation_lock); ··· 1540 1506 { 1541 1507 struct se_device *dev = pr_to_dev(item); 1542 1508 1543 - if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) 1509 + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR) 1544 1510 return sprintf(page, "SPC_PASSTHROUGH\n"); 1545 1511 else if (dev->dev_reservation_flags & DRF_SPC2_RESERVATIONS) 1546 1512 return sprintf(page, "SPC2_RESERVATIONS\n"); ··· 1553 1519 { 1554 1520 struct se_device *dev = pr_to_dev(item); 1555 1521 1556 - if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) 1522 + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR) 1557 1523 return 0; 1558 1524 1559 1525 return sprintf(page, "APTPL Bit Status: %s\n", ··· 1565 1531 { 1566 1532 struct se_device *dev = pr_to_dev(item); 1567 1533 1568 - if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) 1534 + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR) 1569 1535 return 0; 1570 1536 1571 1537 return sprintf(page, "Ready to process PR APTPL metadata..\n"); ··· 1611 1577 u16 tpgt = 0; 1612 1578 u8 type = 0; 1613 1579 1614 - if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) 1580 + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR) 1615 1581 return count; 1616 1582 if (dev->dev_reservation_flags & DRF_SPC2_RESERVATIONS) 1617 1583 return count; ··· 2545 2511 int ret; \ 2546 2512 \ 2547 2513 if (!t->tg_pt_gp_valid_id) { \ 2548 - pr_err("Unable to do set ##_name ALUA state on non" \ 2514 + pr_err("Unable to do set " #_name " ALUA state on non" \ 2549 2515 " valid tg_pt_gp ID: %hu\n", \ 2550 2516 t->tg_pt_gp_valid_id); \ 2551 2517 return -EINVAL; \ ··· 2677 2643 2678 2644 ret = kstrtoul(page, 0, &tg_pt_gp_id); 2679 2645 if (ret < 0) { 2680 - pr_err("kstrtoul() returned %d for" 2681 - " tg_pt_gp_id\n", ret); 2646 + pr_err("ALUA tg_pt_gp_id: invalid value '%s' for tg_pt_gp_id\n", 2647 + page); 2682 2648 return ret; 2683 2649 } 2684 2650 if (tg_pt_gp_id > 0x0000ffff) { 2685 - pr_err("ALUA tg_pt_gp_id: %lu exceeds maximum:" 2686 - " 0x0000ffff\n", tg_pt_gp_id); 2651 + pr_err("ALUA tg_pt_gp_id: %lu exceeds maximum: 0x0000ffff\n", 2652 + tg_pt_gp_id); 2687 2653 return -EINVAL; 2688 2654 } 2689 2655
+38
drivers/target/target_core_device.c
··· 1045 1045 sense_reason_t (*exec_cmd)(struct se_cmd *cmd)) 1046 1046 { 1047 1047 unsigned char *cdb = cmd->t_task_cdb; 1048 + struct se_device *dev = cmd->se_dev; 1049 + unsigned int size; 1048 1050 1049 1051 /* 1050 1052 * Clear a lun set in the cdb if the initiator talking to use spoke ··· 1076 1074 if (cdb[0] == REPORT_LUNS) { 1077 1075 cmd->execute_cmd = spc_emulate_report_luns; 1078 1076 return TCM_NO_SENSE; 1077 + } 1078 + 1079 + /* 1080 + * For PERSISTENT RESERVE IN/OUT, RELEASE, and RESERVE we need to 1081 + * emulate the response, since tcmu does not have the information 1082 + * required to process these commands. 1083 + */ 1084 + if (!(dev->transport->transport_flags & 1085 + TRANSPORT_FLAG_PASSTHROUGH_PGR)) { 1086 + if (cdb[0] == PERSISTENT_RESERVE_IN) { 1087 + cmd->execute_cmd = target_scsi3_emulate_pr_in; 1088 + size = (cdb[7] << 8) + cdb[8]; 1089 + return target_cmd_size_check(cmd, size); 1090 + } 1091 + if (cdb[0] == PERSISTENT_RESERVE_OUT) { 1092 + cmd->execute_cmd = target_scsi3_emulate_pr_out; 1093 + size = (cdb[7] << 8) + cdb[8]; 1094 + return target_cmd_size_check(cmd, size); 1095 + } 1096 + 1097 + if (cdb[0] == RELEASE || cdb[0] == RELEASE_10) { 1098 + cmd->execute_cmd = target_scsi2_reservation_release; 1099 + if (cdb[0] == RELEASE_10) 1100 + size = (cdb[7] << 8) | cdb[8]; 1101 + else 1102 + size = cmd->data_length; 1103 + return target_cmd_size_check(cmd, size); 1104 + } 1105 + if (cdb[0] == RESERVE || cdb[0] == RESERVE_10) { 1106 + cmd->execute_cmd = target_scsi2_reservation_reserve; 1107 + if (cdb[0] == RESERVE_10) 1108 + size = (cdb[7] << 8) | cdb[8]; 1109 + else 1110 + size = cmd->data_length; 1111 + return target_cmd_size_check(cmd, size); 1112 + } 1079 1113 } 1080 1114 1081 1115 /* Set DATA_CDB flag for ops that should have it */
+22 -10
drivers/target/target_core_file.c
··· 277 277 else 278 278 ret = vfs_iter_read(fd, &iter, &pos); 279 279 280 - kfree(bvec); 281 - 282 280 if (is_write) { 283 281 if (ret < 0 || ret != data_length) { 284 282 pr_err("%s() write returned %d\n", __func__, ret); 285 - return (ret < 0 ? ret : -EINVAL); 283 + if (ret >= 0) 284 + ret = -EINVAL; 286 285 } 287 286 } else { 288 287 /* ··· 294 295 pr_err("%s() returned %d, expecting %u for " 295 296 "S_ISBLK\n", __func__, ret, 296 297 data_length); 297 - return (ret < 0 ? ret : -EINVAL); 298 + if (ret >= 0) 299 + ret = -EINVAL; 298 300 } 299 301 } else { 300 302 if (ret < 0) { 301 303 pr_err("%s() returned %d for non S_ISBLK\n", 302 304 __func__, ret); 303 - return ret; 305 + } else if (ret != data_length) { 306 + /* 307 + * Short read case: 308 + * Probably some one truncate file under us. 309 + * We must explicitly zero sg-pages to prevent 310 + * expose uninizialized pages to userspace. 311 + */ 312 + if (ret < data_length) 313 + ret += iov_iter_zero(data_length - ret, &iter); 314 + else 315 + ret = -EINVAL; 304 316 } 305 317 } 306 318 } 307 - return 1; 319 + kfree(bvec); 320 + return ret; 308 321 } 309 322 310 323 static sense_reason_t ··· 554 543 ret = fd_do_rw(cmd, file, dev->dev_attrib.block_size, 555 544 sgl, sgl_nents, cmd->data_length, 0); 556 545 557 - if (ret > 0 && cmd->prot_type && dev->dev_attrib.pi_prot_type) { 546 + if (ret > 0 && cmd->prot_type && dev->dev_attrib.pi_prot_type && 547 + dev->dev_attrib.pi_prot_verify) { 558 548 u32 sectors = cmd->data_length >> 559 549 ilog2(dev->dev_attrib.block_size); 560 550 ··· 565 553 return rc; 566 554 } 567 555 } else { 568 - if (cmd->prot_type && dev->dev_attrib.pi_prot_type) { 556 + if (cmd->prot_type && dev->dev_attrib.pi_prot_type && 557 + dev->dev_attrib.pi_prot_verify) { 569 558 u32 sectors = cmd->data_length >> 570 559 ilog2(dev->dev_attrib.block_size); 571 560 ··· 608 595 if (ret < 0) 609 596 return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; 610 597 611 - if (ret) 612 - target_complete_cmd(cmd, SAM_STAT_GOOD); 598 + target_complete_cmd(cmd, SAM_STAT_GOOD); 613 599 return 0; 614 600 } 615 601
+6 -6
drivers/target/target_core_iblock.c
··· 279 279 struct iblock_req *ibr = cmd->priv; 280 280 u8 status; 281 281 282 - if (!atomic_dec_and_test(&ibr->pending)) 282 + if (!refcount_dec_and_test(&ibr->pending)) 283 283 return; 284 284 285 285 if (atomic_read(&ibr->ib_bio_err_cnt)) ··· 487 487 bio_list_init(&list); 488 488 bio_list_add(&list, bio); 489 489 490 - atomic_set(&ibr->pending, 1); 490 + refcount_set(&ibr->pending, 1); 491 491 492 492 while (sectors) { 493 493 while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset) ··· 498 498 if (!bio) 499 499 goto fail_put_bios; 500 500 501 - atomic_inc(&ibr->pending); 501 + refcount_inc(&ibr->pending); 502 502 bio_list_add(&list, bio); 503 503 } 504 504 ··· 706 706 cmd->priv = ibr; 707 707 708 708 if (!sgl_nents) { 709 - atomic_set(&ibr->pending, 1); 709 + refcount_set(&ibr->pending, 1); 710 710 iblock_complete_cmd(cmd); 711 711 return 0; 712 712 } ··· 719 719 bio_list_init(&list); 720 720 bio_list_add(&list, bio); 721 721 722 - atomic_set(&ibr->pending, 2); 722 + refcount_set(&ibr->pending, 2); 723 723 bio_cnt = 1; 724 724 725 725 for_each_sg(sgl, sg, sgl_nents, i) { ··· 740 740 if (!bio) 741 741 goto fail_put_bios; 742 742 743 - atomic_inc(&ibr->pending); 743 + refcount_inc(&ibr->pending); 744 744 bio_list_add(&list, bio); 745 745 bio_cnt++; 746 746 }
+2 -1
drivers/target/target_core_iblock.h
··· 2 2 #define TARGET_CORE_IBLOCK_H 3 3 4 4 #include <linux/atomic.h> 5 + #include <linux/refcount.h> 5 6 #include <target/target_core_base.h> 6 7 7 8 #define IBLOCK_VERSION "4.0" ··· 11 10 #define IBLOCK_LBA_SHIFT 9 12 11 13 12 struct iblock_req { 14 - atomic_t pending; 13 + refcount_t pending; 15 14 atomic_t ib_bio_err_cnt; 16 15 } ____cacheline_aligned; 17 16
+1 -1
drivers/target/target_core_pr.c
··· 4147 4147 return 0; 4148 4148 if (dev->se_hba->hba_flags & HBA_FLAGS_INTERNAL_USE) 4149 4149 return 0; 4150 - if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH) 4150 + if (dev->transport->transport_flags & TRANSPORT_FLAG_PASSTHROUGH_PGR) 4151 4151 return 0; 4152 4152 4153 4153 spin_lock(&dev->dev_reservation_lock);
+5 -4
drivers/target/target_core_pr.h
··· 7 7 /* 8 8 * PERSISTENT_RESERVE_OUT service action codes 9 9 * 10 - * spc4r17 section 6.14.2 Table 171 10 + * spc5r04b section 6.15.2 Table 174 11 11 */ 12 12 #define PRO_REGISTER 0x00 13 13 #define PRO_RESERVE 0x01 ··· 17 17 #define PRO_PREEMPT_AND_ABORT 0x05 18 18 #define PRO_REGISTER_AND_IGNORE_EXISTING_KEY 0x06 19 19 #define PRO_REGISTER_AND_MOVE 0x07 20 + #define PRO_REPLACE_LOST_RESERVATION 0x08 20 21 /* 21 22 * PERSISTENT_RESERVE_IN service action codes 22 23 * 23 - * spc4r17 section 6.13.1 Table 159 24 + * spc5r04b section 6.14.1 Table 162 24 25 */ 25 26 #define PRI_READ_KEYS 0x00 26 27 #define PRI_READ_RESERVATION 0x01 ··· 30 29 /* 31 30 * PERSISTENT_RESERVE_ SCOPE field 32 31 * 33 - * spc4r17 section 6.13.3.3 Table 163 32 + * spc5r04b section 6.14.3.2 Table 166 34 33 */ 35 34 #define PR_SCOPE_LU_SCOPE 0x00 36 35 /* 37 36 * PERSISTENT_RESERVE_* TYPE field 38 37 * 39 - * spc4r17 section 6.13.3.4 Table 164 38 + * spc5r04b section 6.14.3.3 Table 167 40 39 */ 41 40 #define PR_TYPE_WRITE_EXCLUSIVE 0x01 42 41 #define PR_TYPE_EXCLUSIVE_ACCESS 0x03
+2 -1
drivers/target/target_core_pscsi.c
··· 1081 1081 .name = "pscsi", 1082 1082 .owner = THIS_MODULE, 1083 1083 .transport_flags = TRANSPORT_FLAG_PASSTHROUGH | 1084 - TRANSPORT_FLAG_PASSTHROUGH_ALUA, 1084 + TRANSPORT_FLAG_PASSTHROUGH_ALUA | 1085 + TRANSPORT_FLAG_PASSTHROUGH_PGR, 1085 1086 .attach_hba = pscsi_attach_hba, 1086 1087 .detach_hba = pscsi_detach_hba, 1087 1088 .pmode_enable_hba = pscsi_pmode_enable_hba,
+18 -32
drivers/target/target_core_rd.c
··· 47 47 { 48 48 struct rd_host *rd_host; 49 49 50 - rd_host = kzalloc(sizeof(struct rd_host), GFP_KERNEL); 51 - if (!rd_host) { 52 - pr_err("Unable to allocate memory for struct rd_host\n"); 50 + rd_host = kzalloc(sizeof(*rd_host), GFP_KERNEL); 51 + if (!rd_host) 53 52 return -ENOMEM; 54 - } 55 53 56 54 rd_host->rd_host_id = host_id; 57 55 ··· 146 148 147 149 sg = kcalloc(sg_per_table + chain_entry, sizeof(*sg), 148 150 GFP_KERNEL); 149 - if (!sg) { 150 - pr_err("Unable to allocate scatterlist array" 151 - " for struct rd_dev\n"); 151 + if (!sg) 152 152 return -ENOMEM; 153 - } 154 153 155 154 sg_init_table(sg, sg_per_table + chain_entry); 156 155 ··· 205 210 total_sg_needed = rd_dev->rd_page_count; 206 211 207 212 sg_tables = (total_sg_needed / max_sg_per_table) + 1; 208 - 209 - sg_table = kzalloc(sg_tables * sizeof(struct rd_dev_sg_table), GFP_KERNEL); 210 - if (!sg_table) { 211 - pr_err("Unable to allocate memory for Ramdisk" 212 - " scatterlist tables\n"); 213 + sg_table = kcalloc(sg_tables, sizeof(*sg_table), GFP_KERNEL); 214 + if (!sg_table) 213 215 return -ENOMEM; 214 - } 215 216 216 217 rd_dev->sg_table_array = sg_table; 217 218 rd_dev->sg_table_count = sg_tables; ··· 262 271 total_sg_needed = (rd_dev->rd_page_count * prot_length / block_size) + 1; 263 272 264 273 sg_tables = (total_sg_needed / max_sg_per_table) + 1; 265 - 266 - sg_table = kzalloc(sg_tables * sizeof(struct rd_dev_sg_table), GFP_KERNEL); 267 - if (!sg_table) { 268 - pr_err("Unable to allocate memory for Ramdisk protection" 269 - " scatterlist tables\n"); 274 + sg_table = kcalloc(sg_tables, sizeof(*sg_table), GFP_KERNEL); 275 + if (!sg_table) 270 276 return -ENOMEM; 271 - } 272 277 273 278 rd_dev->sg_prot_array = sg_table; 274 279 rd_dev->sg_prot_count = sg_tables; ··· 285 298 struct rd_dev *rd_dev; 286 299 struct rd_host *rd_host = hba->hba_ptr; 287 300 288 - rd_dev = kzalloc(sizeof(struct rd_dev), GFP_KERNEL); 289 - if (!rd_dev) { 290 - pr_err("Unable to allocate memory for struct rd_dev\n"); 301 + rd_dev = kzalloc(sizeof(*rd_dev), GFP_KERNEL); 302 + if (!rd_dev) 291 303 return NULL; 292 - } 293 304 294 305 rd_dev->rd_host = rd_host; 295 306 ··· 395 410 u32 prot_offset, prot_page; 396 411 u32 prot_npages __maybe_unused; 397 412 u64 tmp; 398 - sense_reason_t rc = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; 413 + sense_reason_t rc = 0; 399 414 400 415 tmp = cmd->t_task_lba * se_dev->prot_length; 401 416 prot_offset = do_div(tmp, PAGE_SIZE); ··· 408 423 prot_sg = &prot_table->sg_table[prot_page - 409 424 prot_table->page_start_offset]; 410 425 411 - if (is_read) 412 - rc = sbc_dif_verify(cmd, cmd->t_task_lba, sectors, 0, 413 - prot_sg, prot_offset); 414 - else 415 - rc = sbc_dif_verify(cmd, cmd->t_task_lba, sectors, 0, 416 - cmd->t_prot_sg, 0); 417 - 426 + if (se_dev->dev_attrib.pi_prot_verify) { 427 + if (is_read) 428 + rc = sbc_dif_verify(cmd, cmd->t_task_lba, sectors, 0, 429 + prot_sg, prot_offset); 430 + else 431 + rc = sbc_dif_verify(cmd, cmd->t_task_lba, sectors, 0, 432 + cmd->t_prot_sg, 0); 433 + } 418 434 if (!rc) 419 435 sbc_dif_copy_prot(cmd, sectors, is_read, prot_sg, prot_offset); 420 436
+7 -3
drivers/target/target_core_sbc.c
··· 507 507 * been failed with a non-zero SCSI status. 508 508 */ 509 509 if (cmd->scsi_status) { 510 - pr_err("compare_and_write_callback: non zero scsi_status:" 510 + pr_debug("compare_and_write_callback: non zero scsi_status:" 511 511 " 0x%02x\n", cmd->scsi_status); 512 + *post_ret = 1; 513 + if (cmd->scsi_status == SAM_STAT_CHECK_CONDITION) 514 + ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; 512 515 goto out; 513 516 } 514 517 ··· 522 519 goto out; 523 520 } 524 521 525 - write_sg = kmalloc(sizeof(struct scatterlist) * cmd->t_data_nents, 526 - GFP_KERNEL); 522 + write_sg = kmalloc_array(cmd->t_data_nents, sizeof(*write_sg), 523 + GFP_KERNEL); 527 524 if (!write_sg) { 528 525 pr_err("Unable to allocate compare_and_write sg\n"); 529 526 ret = TCM_OUT_OF_RESOURCES; ··· 927 924 cmd->execute_cmd = sbc_execute_rw; 928 925 break; 929 926 case WRITE_16: 927 + case WRITE_VERIFY_16: 930 928 sectors = transport_get_sectors_16(cdb); 931 929 cmd->t_task_lba = transport_lba_64(cdb); 932 930
+7
drivers/target/target_core_tpg.c
··· 398 398 struct se_portal_group *tpg = acl->se_tpg; 399 399 400 400 /* 401 + * Allow the setting of se_node_acl queue_depth to be idempotent, 402 + * and not force a session shutdown event if the value is not 403 + * changing. 404 + */ 405 + if (acl->queue_depth == queue_depth) 406 + return 0; 407 + /* 401 408 * User has requested to change the queue depth for a Initiator Node. 402 409 * Change the value in the Node's struct se_node_acl, and call 403 410 * target_set_nacl_queue_depth() to set the new queue depth.
+1 -1
drivers/target/target_core_transport.c
··· 2311 2311 return kmap(sg_page(sg)) + sg->offset; 2312 2312 2313 2313 /* >1 page. use vmap */ 2314 - pages = kmalloc(sizeof(*pages) * cmd->t_data_nents, GFP_KERNEL); 2314 + pages = kmalloc_array(cmd->t_data_nents, sizeof(*pages), GFP_KERNEL); 2315 2315 if (!pages) 2316 2316 return NULL; 2317 2317
+511 -134
drivers/target/target_core_user.c
··· 2 2 * Copyright (C) 2013 Shaohua Li <shli@kernel.org> 3 3 * Copyright (C) 2014 Red Hat, Inc. 4 4 * Copyright (C) 2015 Arrikto, Inc. 5 + * Copyright (C) 2017 Chinamobile, Inc. 5 6 * 6 7 * This program is free software; you can redistribute it and/or modify it 7 8 * under the terms and conditions of the GNU General Public License, ··· 26 25 #include <linux/parser.h> 27 26 #include <linux/vmalloc.h> 28 27 #include <linux/uio_driver.h> 28 + #include <linux/radix-tree.h> 29 29 #include <linux/stringify.h> 30 30 #include <linux/bitops.h> 31 31 #include <linux/highmem.h> 32 32 #include <linux/configfs.h> 33 + #include <linux/mutex.h> 34 + #include <linux/kthread.h> 33 35 #include <net/genetlink.h> 34 36 #include <scsi/scsi_common.h> 35 37 #include <scsi/scsi_proto.h> ··· 67 63 * this may have a 'UAM' comment. 68 64 */ 69 65 70 - 71 66 #define TCMU_TIME_OUT (30 * MSEC_PER_SEC) 72 67 73 - #define DATA_BLOCK_BITS 256 74 - #define DATA_BLOCK_SIZE 4096 68 + /* For cmd area, the size is fixed 8MB */ 69 + #define CMDR_SIZE (8 * 1024 * 1024) 75 70 76 - #define CMDR_SIZE (16 * 4096) 71 + /* 72 + * For data area, the block size is PAGE_SIZE and 73 + * the total size is 256K * PAGE_SIZE. 74 + */ 75 + #define DATA_BLOCK_SIZE PAGE_SIZE 76 + #define DATA_BLOCK_BITS (256 * 1024) 77 77 #define DATA_SIZE (DATA_BLOCK_BITS * DATA_BLOCK_SIZE) 78 + #define DATA_BLOCK_INIT_BITS 128 78 79 80 + /* The total size of the ring is 8M + 256K * PAGE_SIZE */ 79 81 #define TCMU_RING_SIZE (CMDR_SIZE + DATA_SIZE) 82 + 83 + /* Default maximum of the global data blocks(512K * PAGE_SIZE) */ 84 + #define TCMU_GLOBAL_MAX_BLOCKS (512 * 1024) 80 85 81 86 static struct device *tcmu_root_device; 82 87 ··· 96 83 #define TCMU_CONFIG_LEN 256 97 84 98 85 struct tcmu_dev { 86 + struct list_head node; 87 + 99 88 struct se_device se_dev; 100 89 101 90 char *name; ··· 109 94 110 95 struct uio_info uio_info; 111 96 97 + struct inode *inode; 98 + 112 99 struct tcmu_mailbox *mb_addr; 113 100 size_t dev_size; 114 101 u32 cmdr_size; ··· 120 103 size_t data_off; 121 104 size_t data_size; 122 105 123 - DECLARE_BITMAP(data_bitmap, DATA_BLOCK_BITS); 124 - 125 106 wait_queue_head_t wait_cmdr; 126 - /* TODO should this be a mutex? */ 127 - spinlock_t cmdr_lock; 107 + struct mutex cmdr_lock; 108 + 109 + bool waiting_global; 110 + uint32_t dbi_max; 111 + uint32_t dbi_thresh; 112 + DECLARE_BITMAP(data_bitmap, DATA_BLOCK_BITS); 113 + struct radix_tree_root data_blocks; 128 114 129 115 struct idr commands; 130 116 spinlock_t commands_lock; ··· 150 130 151 131 /* Can't use se_cmd when cleaning up expired cmds, because if 152 132 cmd has been completed then accessing se_cmd is off limits */ 153 - DECLARE_BITMAP(data_bitmap, DATA_BLOCK_BITS); 133 + uint32_t dbi_cnt; 134 + uint32_t dbi_cur; 135 + uint32_t *dbi; 154 136 155 137 unsigned long deadline; 156 138 157 139 #define TCMU_CMD_BIT_EXPIRED 0 158 140 unsigned long flags; 159 141 }; 142 + 143 + static struct task_struct *unmap_thread; 144 + static wait_queue_head_t unmap_wait; 145 + static DEFINE_MUTEX(root_udev_mutex); 146 + static LIST_HEAD(root_udev); 147 + 148 + static atomic_t global_db_count = ATOMIC_INIT(0); 160 149 161 150 static struct kmem_cache *tcmu_cmd_cache; 162 151 ··· 190 161 .netnsok = true, 191 162 }; 192 163 164 + #define tcmu_cmd_set_dbi_cur(cmd, index) ((cmd)->dbi_cur = (index)) 165 + #define tcmu_cmd_reset_dbi_cur(cmd) tcmu_cmd_set_dbi_cur(cmd, 0) 166 + #define tcmu_cmd_set_dbi(cmd, index) ((cmd)->dbi[(cmd)->dbi_cur++] = (index)) 167 + #define tcmu_cmd_get_dbi(cmd) ((cmd)->dbi[(cmd)->dbi_cur++]) 168 + 169 + static void tcmu_cmd_free_data(struct tcmu_cmd *tcmu_cmd, uint32_t len) 170 + { 171 + struct tcmu_dev *udev = tcmu_cmd->tcmu_dev; 172 + uint32_t i; 173 + 174 + for (i = 0; i < len; i++) 175 + clear_bit(tcmu_cmd->dbi[i], udev->data_bitmap); 176 + } 177 + 178 + static inline bool tcmu_get_empty_block(struct tcmu_dev *udev, 179 + struct tcmu_cmd *tcmu_cmd) 180 + { 181 + struct page *page; 182 + int ret, dbi; 183 + 184 + dbi = find_first_zero_bit(udev->data_bitmap, udev->dbi_thresh); 185 + if (dbi == udev->dbi_thresh) 186 + return false; 187 + 188 + page = radix_tree_lookup(&udev->data_blocks, dbi); 189 + if (!page) { 190 + 191 + if (atomic_add_return(1, &global_db_count) > 192 + TCMU_GLOBAL_MAX_BLOCKS) { 193 + atomic_dec(&global_db_count); 194 + return false; 195 + } 196 + 197 + /* try to get new page from the mm */ 198 + page = alloc_page(GFP_KERNEL); 199 + if (!page) 200 + return false; 201 + 202 + ret = radix_tree_insert(&udev->data_blocks, dbi, page); 203 + if (ret) { 204 + __free_page(page); 205 + return false; 206 + } 207 + 208 + } 209 + 210 + if (dbi > udev->dbi_max) 211 + udev->dbi_max = dbi; 212 + 213 + set_bit(dbi, udev->data_bitmap); 214 + tcmu_cmd_set_dbi(tcmu_cmd, dbi); 215 + 216 + return true; 217 + } 218 + 219 + static bool tcmu_get_empty_blocks(struct tcmu_dev *udev, 220 + struct tcmu_cmd *tcmu_cmd) 221 + { 222 + int i; 223 + 224 + udev->waiting_global = false; 225 + 226 + for (i = tcmu_cmd->dbi_cur; i < tcmu_cmd->dbi_cnt; i++) { 227 + if (!tcmu_get_empty_block(udev, tcmu_cmd)) 228 + goto err; 229 + } 230 + return true; 231 + 232 + err: 233 + udev->waiting_global = true; 234 + /* Try to wake up the unmap thread */ 235 + wake_up(&unmap_wait); 236 + return false; 237 + } 238 + 239 + static inline struct page * 240 + tcmu_get_block_page(struct tcmu_dev *udev, uint32_t dbi) 241 + { 242 + return radix_tree_lookup(&udev->data_blocks, dbi); 243 + } 244 + 245 + static inline void tcmu_free_cmd(struct tcmu_cmd *tcmu_cmd) 246 + { 247 + kfree(tcmu_cmd->dbi); 248 + kmem_cache_free(tcmu_cmd_cache, tcmu_cmd); 249 + } 250 + 251 + static inline size_t tcmu_cmd_get_data_length(struct tcmu_cmd *tcmu_cmd) 252 + { 253 + struct se_cmd *se_cmd = tcmu_cmd->se_cmd; 254 + size_t data_length = round_up(se_cmd->data_length, DATA_BLOCK_SIZE); 255 + 256 + if (se_cmd->se_cmd_flags & SCF_BIDI) { 257 + BUG_ON(!(se_cmd->t_bidi_data_sg && se_cmd->t_bidi_data_nents)); 258 + data_length += round_up(se_cmd->t_bidi_data_sg->length, 259 + DATA_BLOCK_SIZE); 260 + } 261 + 262 + return data_length; 263 + } 264 + 265 + static inline uint32_t tcmu_cmd_get_block_cnt(struct tcmu_cmd *tcmu_cmd) 266 + { 267 + size_t data_length = tcmu_cmd_get_data_length(tcmu_cmd); 268 + 269 + return data_length / DATA_BLOCK_SIZE; 270 + } 271 + 193 272 static struct tcmu_cmd *tcmu_alloc_cmd(struct se_cmd *se_cmd) 194 273 { 195 274 struct se_device *se_dev = se_cmd->se_dev; ··· 315 178 tcmu_cmd->deadline = jiffies + 316 179 msecs_to_jiffies(udev->cmd_time_out); 317 180 181 + tcmu_cmd_reset_dbi_cur(tcmu_cmd); 182 + tcmu_cmd->dbi_cnt = tcmu_cmd_get_block_cnt(tcmu_cmd); 183 + tcmu_cmd->dbi = kcalloc(tcmu_cmd->dbi_cnt, sizeof(uint32_t), 184 + GFP_KERNEL); 185 + if (!tcmu_cmd->dbi) { 186 + kmem_cache_free(tcmu_cmd_cache, tcmu_cmd); 187 + return NULL; 188 + } 189 + 318 190 idr_preload(GFP_KERNEL); 319 191 spin_lock_irq(&udev->commands_lock); 320 192 cmd_id = idr_alloc(&udev->commands, tcmu_cmd, 0, ··· 332 186 idr_preload_end(); 333 187 334 188 if (cmd_id < 0) { 335 - kmem_cache_free(tcmu_cmd_cache, tcmu_cmd); 189 + tcmu_free_cmd(tcmu_cmd); 336 190 return NULL; 337 191 } 338 192 tcmu_cmd->cmd_id = cmd_id; ··· 394 248 #define UPDATE_HEAD(head, used, size) smp_store_release(&head, ((head % size) + used) % size) 395 249 396 250 /* offset is relative to mb_addr */ 397 - static inline size_t get_block_offset(struct tcmu_dev *dev, 398 - int block, int remaining) 251 + static inline size_t get_block_offset_user(struct tcmu_dev *dev, 252 + int dbi, int remaining) 399 253 { 400 - return dev->data_off + block * DATA_BLOCK_SIZE + 254 + return dev->data_off + dbi * DATA_BLOCK_SIZE + 401 255 DATA_BLOCK_SIZE - remaining; 402 256 } 403 257 ··· 406 260 return (size_t)iov->iov_base + iov->iov_len; 407 261 } 408 262 409 - static void alloc_and_scatter_data_area(struct tcmu_dev *udev, 410 - struct scatterlist *data_sg, unsigned int data_nents, 411 - struct iovec **iov, int *iov_cnt, bool copy_data) 263 + static int scatter_data_area(struct tcmu_dev *udev, 264 + struct tcmu_cmd *tcmu_cmd, struct scatterlist *data_sg, 265 + unsigned int data_nents, struct iovec **iov, 266 + int *iov_cnt, bool copy_data) 412 267 { 413 - int i, block; 268 + int i, dbi; 414 269 int block_remaining = 0; 415 - void *from, *to; 416 - size_t copy_bytes, to_offset; 270 + void *from, *to = NULL; 271 + size_t copy_bytes, to_offset, offset; 417 272 struct scatterlist *sg; 273 + struct page *page; 418 274 419 275 for_each_sg(data_sg, sg, data_nents, i) { 420 276 int sg_remaining = sg->length; 421 277 from = kmap_atomic(sg_page(sg)) + sg->offset; 422 278 while (sg_remaining > 0) { 423 279 if (block_remaining == 0) { 424 - block = find_first_zero_bit(udev->data_bitmap, 425 - DATA_BLOCK_BITS); 280 + if (to) 281 + kunmap_atomic(to); 282 + 426 283 block_remaining = DATA_BLOCK_SIZE; 427 - set_bit(block, udev->data_bitmap); 284 + dbi = tcmu_cmd_get_dbi(tcmu_cmd); 285 + page = tcmu_get_block_page(udev, dbi); 286 + to = kmap_atomic(page); 428 287 } 288 + 429 289 copy_bytes = min_t(size_t, sg_remaining, 430 290 block_remaining); 431 - to_offset = get_block_offset(udev, block, 291 + to_offset = get_block_offset_user(udev, dbi, 432 292 block_remaining); 433 - to = (void *)udev->mb_addr + to_offset; 293 + offset = DATA_BLOCK_SIZE - block_remaining; 294 + to = (void *)(unsigned long)to + offset; 295 + 434 296 if (*iov_cnt != 0 && 435 297 to_offset == iov_tail(udev, *iov)) { 436 298 (*iov)->iov_len += copy_bytes; 437 299 } else { 438 300 new_iov(iov, iov_cnt, udev); 439 - (*iov)->iov_base = (void __user *) to_offset; 301 + (*iov)->iov_base = (void __user *)to_offset; 440 302 (*iov)->iov_len = copy_bytes; 441 303 } 442 304 if (copy_data) { ··· 457 303 } 458 304 kunmap_atomic(from - sg->offset); 459 305 } 460 - } 306 + if (to) 307 + kunmap_atomic(to); 461 308 462 - static void free_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd) 463 - { 464 - bitmap_xor(udev->data_bitmap, udev->data_bitmap, cmd->data_bitmap, 465 - DATA_BLOCK_BITS); 309 + return 0; 466 310 } 467 311 468 312 static void gather_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd, 469 313 bool bidi) 470 314 { 471 315 struct se_cmd *se_cmd = cmd->se_cmd; 472 - int i, block; 316 + int i, dbi; 473 317 int block_remaining = 0; 474 - void *from, *to; 475 - size_t copy_bytes, from_offset; 318 + void *from = NULL, *to; 319 + size_t copy_bytes, offset; 476 320 struct scatterlist *sg, *data_sg; 321 + struct page *page; 477 322 unsigned int data_nents; 478 - DECLARE_BITMAP(bitmap, DATA_BLOCK_BITS); 479 - 480 - bitmap_copy(bitmap, cmd->data_bitmap, DATA_BLOCK_BITS); 323 + uint32_t count = 0; 481 324 482 325 if (!bidi) { 483 326 data_sg = se_cmd->t_data_sg; 484 327 data_nents = se_cmd->t_data_nents; 485 328 } else { 486 - uint32_t count; 487 329 488 330 /* 489 331 * For bidi case, the first count blocks are for Data-Out ··· 487 337 * the Data-Out buffer blocks should be discarded. 488 338 */ 489 339 count = DIV_ROUND_UP(se_cmd->data_length, DATA_BLOCK_SIZE); 490 - while (count--) { 491 - block = find_first_bit(bitmap, DATA_BLOCK_BITS); 492 - clear_bit(block, bitmap); 493 - } 494 340 495 341 data_sg = se_cmd->t_bidi_data_sg; 496 342 data_nents = se_cmd->t_bidi_data_nents; 497 343 } 344 + 345 + tcmu_cmd_set_dbi_cur(cmd, count); 498 346 499 347 for_each_sg(data_sg, sg, data_nents, i) { 500 348 int sg_remaining = sg->length; 501 349 to = kmap_atomic(sg_page(sg)) + sg->offset; 502 350 while (sg_remaining > 0) { 503 351 if (block_remaining == 0) { 504 - block = find_first_bit(bitmap, 505 - DATA_BLOCK_BITS); 352 + if (from) 353 + kunmap_atomic(from); 354 + 506 355 block_remaining = DATA_BLOCK_SIZE; 507 - clear_bit(block, bitmap); 356 + dbi = tcmu_cmd_get_dbi(cmd); 357 + page = tcmu_get_block_page(udev, dbi); 358 + from = kmap_atomic(page); 508 359 } 509 360 copy_bytes = min_t(size_t, sg_remaining, 510 361 block_remaining); 511 - from_offset = get_block_offset(udev, block, 512 - block_remaining); 513 - from = (void *) udev->mb_addr + from_offset; 362 + offset = DATA_BLOCK_SIZE - block_remaining; 363 + from = (void *)(unsigned long)from + offset; 514 364 tcmu_flush_dcache_range(from, copy_bytes); 515 365 memcpy(to + sg->length - sg_remaining, from, 516 366 copy_bytes); ··· 520 370 } 521 371 kunmap_atomic(to - sg->offset); 522 372 } 373 + if (from) 374 + kunmap_atomic(from); 523 375 } 524 376 525 - static inline size_t spc_bitmap_free(unsigned long *bitmap) 377 + static inline size_t spc_bitmap_free(unsigned long *bitmap, uint32_t thresh) 526 378 { 527 - return DATA_BLOCK_SIZE * (DATA_BLOCK_BITS - 528 - bitmap_weight(bitmap, DATA_BLOCK_BITS)); 379 + return DATA_BLOCK_SIZE * (thresh - bitmap_weight(bitmap, thresh)); 529 380 } 530 381 531 382 /* ··· 535 384 * 536 385 * Called with ring lock held. 537 386 */ 538 - static bool is_ring_space_avail(struct tcmu_dev *udev, size_t cmd_size, size_t data_needed) 387 + static bool is_ring_space_avail(struct tcmu_dev *udev, struct tcmu_cmd *cmd, 388 + size_t cmd_size, size_t data_needed) 539 389 { 540 390 struct tcmu_mailbox *mb = udev->mb_addr; 391 + uint32_t blocks_needed = (data_needed + DATA_BLOCK_SIZE - 1) 392 + / DATA_BLOCK_SIZE; 541 393 size_t space, cmd_needed; 542 394 u32 cmd_head; 543 395 ··· 564 410 return false; 565 411 } 566 412 567 - space = spc_bitmap_free(udev->data_bitmap); 413 + /* try to check and get the data blocks as needed */ 414 + space = spc_bitmap_free(udev->data_bitmap, udev->dbi_thresh); 568 415 if (space < data_needed) { 569 - pr_debug("no data space: only %zu available, but ask for %zu\n", 570 - space, data_needed); 571 - return false; 416 + unsigned long blocks_left = DATA_BLOCK_BITS - udev->dbi_thresh; 417 + unsigned long grow; 418 + 419 + if (blocks_left < blocks_needed) { 420 + pr_debug("no data space: only %lu available, but ask for %zu\n", 421 + blocks_left * DATA_BLOCK_SIZE, 422 + data_needed); 423 + return false; 424 + } 425 + 426 + /* Try to expand the thresh */ 427 + if (!udev->dbi_thresh) { 428 + /* From idle state */ 429 + uint32_t init_thresh = DATA_BLOCK_INIT_BITS; 430 + 431 + udev->dbi_thresh = max(blocks_needed, init_thresh); 432 + } else { 433 + /* 434 + * Grow the data area by max(blocks needed, 435 + * dbi_thresh / 2), but limited to the max 436 + * DATA_BLOCK_BITS size. 437 + */ 438 + grow = max(blocks_needed, udev->dbi_thresh / 2); 439 + udev->dbi_thresh += grow; 440 + if (udev->dbi_thresh > DATA_BLOCK_BITS) 441 + udev->dbi_thresh = DATA_BLOCK_BITS; 442 + } 572 443 } 444 + 445 + if (!tcmu_get_empty_blocks(udev, cmd)) 446 + return false; 573 447 574 448 return true; 575 449 } 576 450 577 - static inline size_t tcmu_cmd_get_data_length(struct tcmu_cmd *tcmu_cmd) 451 + static inline size_t tcmu_cmd_get_base_cmd_size(size_t iov_cnt) 578 452 { 579 - struct se_cmd *se_cmd = tcmu_cmd->se_cmd; 580 - size_t data_length = round_up(se_cmd->data_length, DATA_BLOCK_SIZE); 581 - 582 - if (se_cmd->se_cmd_flags & SCF_BIDI) { 583 - BUG_ON(!(se_cmd->t_bidi_data_sg && se_cmd->t_bidi_data_nents)); 584 - data_length += round_up(se_cmd->t_bidi_data_sg->length, 585 - DATA_BLOCK_SIZE); 586 - } 587 - 588 - return data_length; 453 + return max(offsetof(struct tcmu_cmd_entry, req.iov[iov_cnt]), 454 + sizeof(struct tcmu_cmd_entry)); 589 455 } 590 456 591 - static inline uint32_t tcmu_cmd_get_block_cnt(struct tcmu_cmd *tcmu_cmd) 457 + static inline size_t tcmu_cmd_get_cmd_size(struct tcmu_cmd *tcmu_cmd, 458 + size_t base_command_size) 592 459 { 593 - size_t data_length = tcmu_cmd_get_data_length(tcmu_cmd); 460 + struct se_cmd *se_cmd = tcmu_cmd->se_cmd; 461 + size_t command_size; 594 462 595 - return data_length / DATA_BLOCK_SIZE; 463 + command_size = base_command_size + 464 + round_up(scsi_command_size(se_cmd->t_task_cdb), 465 + TCMU_OP_ALIGN_SIZE); 466 + 467 + WARN_ON(command_size & (TCMU_OP_ALIGN_SIZE-1)); 468 + 469 + return command_size; 596 470 } 597 471 598 472 static sense_reason_t ··· 632 450 struct tcmu_mailbox *mb; 633 451 struct tcmu_cmd_entry *entry; 634 452 struct iovec *iov; 635 - int iov_cnt; 453 + int iov_cnt, ret; 636 454 uint32_t cmd_head; 637 455 uint64_t cdb_off; 638 456 bool copy_to_data_area; 639 457 size_t data_length = tcmu_cmd_get_data_length(tcmu_cmd); 640 - DECLARE_BITMAP(old_bitmap, DATA_BLOCK_BITS); 641 458 642 459 if (test_bit(TCMU_DEV_BIT_BROKEN, &udev->flags)) 643 460 return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; ··· 645 464 * Must be a certain minimum size for response sense info, but 646 465 * also may be larger if the iov array is large. 647 466 * 648 - * We prepare way too many iovs for potential uses here, because it's 649 - * expensive to tell how many regions are freed in the bitmap 650 - */ 651 - base_command_size = max(offsetof(struct tcmu_cmd_entry, 652 - req.iov[tcmu_cmd_get_block_cnt(tcmu_cmd)]), 653 - sizeof(struct tcmu_cmd_entry)); 654 - command_size = base_command_size 655 - + round_up(scsi_command_size(se_cmd->t_task_cdb), TCMU_OP_ALIGN_SIZE); 467 + * We prepare as many iovs as possbile for potential uses here, 468 + * because it's expensive to tell how many regions are freed in 469 + * the bitmap & global data pool, as the size calculated here 470 + * will only be used to do the checks. 471 + * 472 + * The size will be recalculated later as actually needed to save 473 + * cmd area memories. 474 + */ 475 + base_command_size = tcmu_cmd_get_base_cmd_size(tcmu_cmd->dbi_cnt); 476 + command_size = tcmu_cmd_get_cmd_size(tcmu_cmd, base_command_size); 656 477 657 - WARN_ON(command_size & (TCMU_OP_ALIGN_SIZE-1)); 658 - 659 - spin_lock_irq(&udev->cmdr_lock); 478 + mutex_lock(&udev->cmdr_lock); 660 479 661 480 mb = udev->mb_addr; 662 481 cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */ ··· 665 484 pr_warn("TCMU: Request of size %zu/%zu is too big for %u/%zu " 666 485 "cmd ring/data area\n", command_size, data_length, 667 486 udev->cmdr_size, udev->data_size); 668 - spin_unlock_irq(&udev->cmdr_lock); 487 + mutex_unlock(&udev->cmdr_lock); 669 488 return TCM_INVALID_CDB_FIELD; 670 489 } 671 490 672 - while (!is_ring_space_avail(udev, command_size, data_length)) { 491 + while (!is_ring_space_avail(udev, tcmu_cmd, command_size, data_length)) { 673 492 int ret; 674 493 DEFINE_WAIT(__wait); 675 494 676 495 prepare_to_wait(&udev->wait_cmdr, &__wait, TASK_INTERRUPTIBLE); 677 496 678 497 pr_debug("sleeping for ring space\n"); 679 - spin_unlock_irq(&udev->cmdr_lock); 498 + mutex_unlock(&udev->cmdr_lock); 680 499 if (udev->cmd_time_out) 681 500 ret = schedule_timeout( 682 501 msecs_to_jiffies(udev->cmd_time_out)); ··· 688 507 return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; 689 508 } 690 509 691 - spin_lock_irq(&udev->cmdr_lock); 510 + mutex_lock(&udev->cmdr_lock); 692 511 693 512 /* We dropped cmdr_lock, cmd_head is stale */ 694 513 cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */ ··· 715 534 entry = (void *) mb + CMDR_OFF + cmd_head; 716 535 tcmu_flush_dcache_range(entry, sizeof(*entry)); 717 536 tcmu_hdr_set_op(&entry->hdr.len_op, TCMU_OP_CMD); 718 - tcmu_hdr_set_len(&entry->hdr.len_op, command_size); 719 537 entry->hdr.cmd_id = tcmu_cmd->cmd_id; 720 538 entry->hdr.kflags = 0; 721 539 entry->hdr.uflags = 0; 722 540 723 - bitmap_copy(old_bitmap, udev->data_bitmap, DATA_BLOCK_BITS); 724 - 725 541 /* Handle allocating space from the data area */ 542 + tcmu_cmd_reset_dbi_cur(tcmu_cmd); 726 543 iov = &entry->req.iov[0]; 727 544 iov_cnt = 0; 728 545 copy_to_data_area = (se_cmd->data_direction == DMA_TO_DEVICE 729 546 || se_cmd->se_cmd_flags & SCF_BIDI); 730 - alloc_and_scatter_data_area(udev, se_cmd->t_data_sg, 731 - se_cmd->t_data_nents, &iov, &iov_cnt, copy_to_data_area); 547 + ret = scatter_data_area(udev, tcmu_cmd, se_cmd->t_data_sg, 548 + se_cmd->t_data_nents, &iov, &iov_cnt, 549 + copy_to_data_area); 550 + if (ret) { 551 + tcmu_cmd_free_data(tcmu_cmd, tcmu_cmd->dbi_cnt); 552 + mutex_unlock(&udev->cmdr_lock); 553 + 554 + pr_err("tcmu: alloc and scatter data failed\n"); 555 + return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; 556 + } 732 557 entry->req.iov_cnt = iov_cnt; 733 558 entry->req.iov_dif_cnt = 0; 734 559 ··· 742 555 if (se_cmd->se_cmd_flags & SCF_BIDI) { 743 556 iov_cnt = 0; 744 557 iov++; 745 - alloc_and_scatter_data_area(udev, se_cmd->t_bidi_data_sg, 746 - se_cmd->t_bidi_data_nents, &iov, &iov_cnt, 747 - false); 558 + ret = scatter_data_area(udev, tcmu_cmd, 559 + se_cmd->t_bidi_data_sg, 560 + se_cmd->t_bidi_data_nents, 561 + &iov, &iov_cnt, false); 562 + if (ret) { 563 + tcmu_cmd_free_data(tcmu_cmd, tcmu_cmd->dbi_cnt); 564 + mutex_unlock(&udev->cmdr_lock); 565 + 566 + pr_err("tcmu: alloc and scatter bidi data failed\n"); 567 + return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; 568 + } 748 569 entry->req.iov_bidi_cnt = iov_cnt; 749 570 } 750 - /* cmd's data_bitmap is what changed in process */ 751 - bitmap_xor(tcmu_cmd->data_bitmap, old_bitmap, udev->data_bitmap, 752 - DATA_BLOCK_BITS); 571 + 572 + /* 573 + * Recalaulate the command's base size and size according 574 + * to the actual needs 575 + */ 576 + base_command_size = tcmu_cmd_get_base_cmd_size(entry->req.iov_cnt + 577 + entry->req.iov_bidi_cnt); 578 + command_size = tcmu_cmd_get_cmd_size(tcmu_cmd, base_command_size); 579 + 580 + tcmu_hdr_set_len(&entry->hdr.len_op, command_size); 753 581 754 582 /* All offsets relative to mb_addr, not start of entry! */ 755 583 cdb_off = CMDR_OFF + cmd_head + base_command_size; ··· 774 572 775 573 UPDATE_HEAD(mb->cmd_head, command_size, udev->cmdr_size); 776 574 tcmu_flush_dcache_range(mb, sizeof(*mb)); 777 - 778 - spin_unlock_irq(&udev->cmdr_lock); 575 + mutex_unlock(&udev->cmdr_lock); 779 576 780 577 /* TODO: only if FLUSH and FUA? */ 781 578 uio_event_notify(&udev->uio_info); ··· 805 604 idr_remove(&udev->commands, tcmu_cmd->cmd_id); 806 605 spin_unlock_irq(&udev->commands_lock); 807 606 808 - kmem_cache_free(tcmu_cmd_cache, tcmu_cmd); 607 + tcmu_free_cmd(tcmu_cmd); 809 608 } 810 609 811 610 return ret; ··· 816 615 struct se_cmd *se_cmd = cmd->se_cmd; 817 616 struct tcmu_dev *udev = cmd->tcmu_dev; 818 617 819 - if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) { 820 - /* 821 - * cmd has been completed already from timeout, just reclaim 822 - * data area space and free cmd 823 - */ 824 - free_data_area(udev, cmd); 618 + /* 619 + * cmd has been completed already from timeout, just reclaim 620 + * data area space and free cmd 621 + */ 622 + if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) 623 + goto out; 825 624 826 - kmem_cache_free(tcmu_cmd_cache, cmd); 827 - return; 828 - } 625 + tcmu_cmd_reset_dbi_cur(cmd); 829 626 830 627 if (entry->hdr.uflags & TCMU_UFLAG_UNKNOWN_OP) { 831 - free_data_area(udev, cmd); 832 628 pr_warn("TCMU: Userspace set UNKNOWN_OP flag on se_cmd %p\n", 833 629 cmd->se_cmd); 834 630 entry->rsp.scsi_status = SAM_STAT_CHECK_CONDITION; 835 631 } else if (entry->rsp.scsi_status == SAM_STAT_CHECK_CONDITION) { 836 632 memcpy(se_cmd->sense_buffer, entry->rsp.sense_buffer, 837 633 se_cmd->scsi_sense_length); 838 - free_data_area(udev, cmd); 839 634 } else if (se_cmd->se_cmd_flags & SCF_BIDI) { 840 635 /* Get Data-In buffer before clean up */ 841 636 gather_data_area(udev, cmd, true); 842 - free_data_area(udev, cmd); 843 637 } else if (se_cmd->data_direction == DMA_FROM_DEVICE) { 844 638 gather_data_area(udev, cmd, false); 845 - free_data_area(udev, cmd); 846 639 } else if (se_cmd->data_direction == DMA_TO_DEVICE) { 847 - free_data_area(udev, cmd); 640 + /* TODO: */ 848 641 } else if (se_cmd->data_direction != DMA_NONE) { 849 642 pr_warn("TCMU: data direction was %d!\n", 850 643 se_cmd->data_direction); 851 644 } 852 645 853 646 target_complete_cmd(cmd->se_cmd, entry->rsp.scsi_status); 854 - cmd->se_cmd = NULL; 855 647 856 - kmem_cache_free(tcmu_cmd_cache, cmd); 648 + out: 649 + cmd->se_cmd = NULL; 650 + tcmu_cmd_free_data(cmd, cmd->dbi_cnt); 651 + tcmu_free_cmd(cmd); 857 652 } 858 653 859 654 static unsigned int tcmu_handle_completions(struct tcmu_dev *udev) 860 655 { 861 656 struct tcmu_mailbox *mb; 862 - unsigned long flags; 863 657 int handled = 0; 864 658 865 659 if (test_bit(TCMU_DEV_BIT_BROKEN, &udev->flags)) { 866 660 pr_err("ring broken, not handling completions\n"); 867 661 return 0; 868 662 } 869 - 870 - spin_lock_irqsave(&udev->cmdr_lock, flags); 871 663 872 664 mb = udev->mb_addr; 873 665 tcmu_flush_dcache_range(mb, sizeof(*mb)); ··· 902 708 if (mb->cmd_tail == mb->cmd_head) 903 709 del_timer(&udev->timeout); /* no more pending cmds */ 904 710 905 - spin_unlock_irqrestore(&udev->cmdr_lock, flags); 906 - 907 711 wake_up(&udev->wait_cmdr); 908 712 909 713 return handled; ··· 928 736 { 929 737 struct tcmu_dev *udev = (struct tcmu_dev *)data; 930 738 unsigned long flags; 931 - int handled; 932 - 933 - handled = tcmu_handle_completions(udev); 934 - 935 - pr_warn("%d completions handled from timeout\n", handled); 936 739 937 740 spin_lock_irqsave(&udev->commands_lock, flags); 938 741 idr_for_each(&udev->commands, tcmu_check_expired_cmd, NULL); 939 742 spin_unlock_irqrestore(&udev->commands_lock, flags); 743 + 744 + /* Try to wake up the ummap thread */ 745 + wake_up(&unmap_wait); 940 746 941 747 /* 942 748 * We don't need to wakeup threads on wait_cmdr since they have their ··· 980 790 udev->cmd_time_out = TCMU_TIME_OUT; 981 791 982 792 init_waitqueue_head(&udev->wait_cmdr); 983 - spin_lock_init(&udev->cmdr_lock); 793 + mutex_init(&udev->cmdr_lock); 984 794 985 795 idr_init(&udev->commands); 986 796 spin_lock_init(&udev->commands_lock); ··· 995 805 { 996 806 struct tcmu_dev *tcmu_dev = container_of(info, struct tcmu_dev, uio_info); 997 807 808 + mutex_lock(&tcmu_dev->cmdr_lock); 998 809 tcmu_handle_completions(tcmu_dev); 810 + mutex_unlock(&tcmu_dev->cmdr_lock); 999 811 1000 812 return 0; 1001 813 } ··· 1019 827 return -1; 1020 828 } 1021 829 830 + static struct page *tcmu_try_get_block_page(struct tcmu_dev *udev, uint32_t dbi) 831 + { 832 + struct page *page; 833 + int ret; 834 + 835 + mutex_lock(&udev->cmdr_lock); 836 + page = tcmu_get_block_page(udev, dbi); 837 + if (likely(page)) { 838 + mutex_unlock(&udev->cmdr_lock); 839 + return page; 840 + } 841 + 842 + /* 843 + * Normally it shouldn't be here: 844 + * Only when the userspace has touched the blocks which 845 + * are out of the tcmu_cmd's data iov[], and will return 846 + * one zeroed page. 847 + */ 848 + pr_warn("Block(%u) out of cmd's iov[] has been touched!\n", dbi); 849 + pr_warn("Mostly it will be a bug of userspace, please have a check!\n"); 850 + 851 + if (dbi >= udev->dbi_thresh) { 852 + /* Extern the udev->dbi_thresh to dbi + 1 */ 853 + udev->dbi_thresh = dbi + 1; 854 + udev->dbi_max = dbi; 855 + } 856 + 857 + page = radix_tree_lookup(&udev->data_blocks, dbi); 858 + if (!page) { 859 + page = alloc_page(GFP_KERNEL | __GFP_ZERO); 860 + if (!page) { 861 + mutex_unlock(&udev->cmdr_lock); 862 + return NULL; 863 + } 864 + 865 + ret = radix_tree_insert(&udev->data_blocks, dbi, page); 866 + if (ret) { 867 + mutex_unlock(&udev->cmdr_lock); 868 + __free_page(page); 869 + return NULL; 870 + } 871 + 872 + /* 873 + * Since this case is rare in page fault routine, here we 874 + * will allow the global_db_count >= TCMU_GLOBAL_MAX_BLOCKS 875 + * to reduce possible page fault call trace. 876 + */ 877 + atomic_inc(&global_db_count); 878 + } 879 + mutex_unlock(&udev->cmdr_lock); 880 + 881 + return page; 882 + } 883 + 1022 884 static int tcmu_vma_fault(struct vm_fault *vmf) 1023 885 { 1024 886 struct tcmu_dev *udev = vmf->vma->vm_private_data; ··· 1091 845 */ 1092 846 offset = (vmf->pgoff - mi) << PAGE_SHIFT; 1093 847 1094 - addr = (void *)(unsigned long)info->mem[mi].addr + offset; 1095 - if (info->mem[mi].memtype == UIO_MEM_LOGICAL) 1096 - page = virt_to_page(addr); 1097 - else 848 + if (offset < udev->data_off) { 849 + /* For the vmalloc()ed cmd area pages */ 850 + addr = (void *)(unsigned long)info->mem[mi].addr + offset; 1098 851 page = vmalloc_to_page(addr); 852 + } else { 853 + uint32_t dbi; 854 + 855 + /* For the dynamically growing data area pages */ 856 + dbi = (offset - udev->data_off) / DATA_BLOCK_SIZE; 857 + page = tcmu_try_get_block_page(udev, dbi); 858 + if (!page) 859 + return VM_FAULT_NOPAGE; 860 + } 861 + 1099 862 get_page(page); 1100 863 vmf->page = page; 1101 864 return 0; ··· 1137 882 /* O_EXCL not supported for char devs, so fake it? */ 1138 883 if (test_and_set_bit(TCMU_DEV_BIT_OPEN, &udev->flags)) 1139 884 return -EBUSY; 885 + 886 + udev->inode = inode; 1140 887 1141 888 pr_debug("open\n"); 1142 889 ··· 1220 963 1221 964 info->name = str; 1222 965 1223 - udev->mb_addr = vzalloc(TCMU_RING_SIZE); 966 + udev->mb_addr = vzalloc(CMDR_SIZE); 1224 967 if (!udev->mb_addr) { 1225 968 ret = -ENOMEM; 1226 969 goto err_vzalloc; ··· 1229 972 /* mailbox fits in first part of CMDR space */ 1230 973 udev->cmdr_size = CMDR_SIZE - CMDR_OFF; 1231 974 udev->data_off = CMDR_SIZE; 1232 - udev->data_size = TCMU_RING_SIZE - CMDR_SIZE; 975 + udev->data_size = DATA_SIZE; 976 + udev->dbi_thresh = 0; /* Default in Idle state */ 977 + udev->waiting_global = false; 1233 978 979 + /* Initialise the mailbox of the ring buffer */ 1234 980 mb = udev->mb_addr; 1235 981 mb->version = TCMU_MAILBOX_VERSION; 1236 982 mb->flags = TCMU_MAILBOX_FLAG_CAP_OOOC; ··· 1244 984 WARN_ON(udev->data_size % PAGE_SIZE); 1245 985 WARN_ON(udev->data_size % DATA_BLOCK_SIZE); 1246 986 987 + INIT_RADIX_TREE(&udev->data_blocks, GFP_KERNEL); 988 + 1247 989 info->version = __stringify(TCMU_MAILBOX_VERSION); 1248 990 1249 991 info->mem[0].name = "tcm-user command & data buffer"; 1250 992 info->mem[0].addr = (phys_addr_t)(uintptr_t)udev->mb_addr; 1251 993 info->mem[0].size = TCMU_RING_SIZE; 1252 - info->mem[0].memtype = UIO_MEM_VIRTUAL; 994 + info->mem[0].memtype = UIO_MEM_NONE; 1253 995 1254 996 info->irqcontrol = tcmu_irqcontrol; 1255 997 info->irq = UIO_IRQ_CUSTOM; ··· 1276 1014 udev->uio_info.uio_dev->minor); 1277 1015 if (ret) 1278 1016 goto err_netlink; 1017 + 1018 + mutex_lock(&root_udev_mutex); 1019 + list_add(&udev->node, &root_udev); 1020 + mutex_unlock(&root_udev_mutex); 1279 1021 1280 1022 return 0; 1281 1023 ··· 1315 1049 return udev->uio_info.uio_dev ? true : false; 1316 1050 } 1317 1051 1052 + static void tcmu_blocks_release(struct tcmu_dev *udev) 1053 + { 1054 + int i; 1055 + struct page *page; 1056 + 1057 + /* Try to release all block pages */ 1058 + mutex_lock(&udev->cmdr_lock); 1059 + for (i = 0; i <= udev->dbi_max; i++) { 1060 + page = radix_tree_delete(&udev->data_blocks, i); 1061 + if (page) { 1062 + __free_page(page); 1063 + atomic_dec(&global_db_count); 1064 + } 1065 + } 1066 + mutex_unlock(&udev->cmdr_lock); 1067 + } 1068 + 1318 1069 static void tcmu_free_device(struct se_device *dev) 1319 1070 { 1320 1071 struct tcmu_dev *udev = TCMU_DEV(dev); ··· 1340 1057 int i; 1341 1058 1342 1059 del_timer_sync(&udev->timeout); 1060 + 1061 + mutex_lock(&root_udev_mutex); 1062 + list_del(&udev->node); 1063 + mutex_unlock(&root_udev_mutex); 1343 1064 1344 1065 vfree(udev->mb_addr); 1345 1066 ··· 1356 1069 idr_destroy(&udev->commands); 1357 1070 spin_unlock_irq(&udev->commands_lock); 1358 1071 WARN_ON(!all_expired); 1072 + 1073 + tcmu_blocks_release(udev); 1359 1074 1360 1075 if (tcmu_dev_configured(udev)) { 1361 1076 tcmu_netlink_event(TCMU_CMD_REMOVED_DEVICE, udev->uio_info.name, ··· 1545 1256 .tb_dev_attrib_attrs = NULL, 1546 1257 }; 1547 1258 1259 + static int unmap_thread_fn(void *data) 1260 + { 1261 + struct tcmu_dev *udev; 1262 + loff_t off; 1263 + uint32_t start, end, block; 1264 + struct page *page; 1265 + int i; 1266 + 1267 + while (1) { 1268 + DEFINE_WAIT(__wait); 1269 + 1270 + prepare_to_wait(&unmap_wait, &__wait, TASK_INTERRUPTIBLE); 1271 + schedule(); 1272 + finish_wait(&unmap_wait, &__wait); 1273 + 1274 + if (kthread_should_stop()) 1275 + break; 1276 + 1277 + mutex_lock(&root_udev_mutex); 1278 + list_for_each_entry(udev, &root_udev, node) { 1279 + mutex_lock(&udev->cmdr_lock); 1280 + 1281 + /* Try to complete the finished commands first */ 1282 + tcmu_handle_completions(udev); 1283 + 1284 + /* Skip the udevs waiting the global pool or in idle */ 1285 + if (udev->waiting_global || !udev->dbi_thresh) { 1286 + mutex_unlock(&udev->cmdr_lock); 1287 + continue; 1288 + } 1289 + 1290 + end = udev->dbi_max + 1; 1291 + block = find_last_bit(udev->data_bitmap, end); 1292 + if (block == udev->dbi_max) { 1293 + /* 1294 + * The last bit is dbi_max, so there is 1295 + * no need to shrink any blocks. 1296 + */ 1297 + mutex_unlock(&udev->cmdr_lock); 1298 + continue; 1299 + } else if (block == end) { 1300 + /* The current udev will goto idle state */ 1301 + udev->dbi_thresh = start = 0; 1302 + udev->dbi_max = 0; 1303 + } else { 1304 + udev->dbi_thresh = start = block + 1; 1305 + udev->dbi_max = block; 1306 + } 1307 + 1308 + /* Here will truncate the data area from off */ 1309 + off = udev->data_off + start * DATA_BLOCK_SIZE; 1310 + unmap_mapping_range(udev->inode->i_mapping, off, 0, 1); 1311 + 1312 + /* Release the block pages */ 1313 + for (i = start; i < end; i++) { 1314 + page = radix_tree_delete(&udev->data_blocks, i); 1315 + if (page) { 1316 + __free_page(page); 1317 + atomic_dec(&global_db_count); 1318 + } 1319 + } 1320 + mutex_unlock(&udev->cmdr_lock); 1321 + } 1322 + 1323 + /* 1324 + * Try to wake up the udevs who are waiting 1325 + * for the global data pool. 1326 + */ 1327 + list_for_each_entry(udev, &root_udev, node) { 1328 + if (udev->waiting_global) 1329 + wake_up(&udev->wait_cmdr); 1330 + } 1331 + mutex_unlock(&root_udev_mutex); 1332 + } 1333 + 1334 + return 0; 1335 + } 1336 + 1548 1337 static int __init tcmu_module_init(void) 1549 1338 { 1550 1339 int ret, i, len = 0; ··· 1668 1301 if (ret) 1669 1302 goto out_attrs; 1670 1303 1304 + init_waitqueue_head(&unmap_wait); 1305 + unmap_thread = kthread_run(unmap_thread_fn, NULL, "tcmu_unmap"); 1306 + if (IS_ERR(unmap_thread)) { 1307 + ret = PTR_ERR(unmap_thread); 1308 + goto out_unreg_transport; 1309 + } 1310 + 1671 1311 return 0; 1672 1312 1313 + out_unreg_transport: 1314 + target_backend_unregister(&tcmu_ops); 1673 1315 out_attrs: 1674 1316 kfree(tcmu_attrs); 1675 1317 out_unreg_genl: ··· 1693 1317 1694 1318 static void __exit tcmu_module_exit(void) 1695 1319 { 1320 + kthread_stop(unmap_thread); 1696 1321 target_backend_unregister(&tcmu_ops); 1697 1322 kfree(tcmu_attrs); 1698 1323 genl_unregister_family(&tcmu_genl_family);
+1
include/scsi/scsi_proto.h
··· 112 112 #define WRITE_16 0x8a 113 113 #define READ_ATTRIBUTE 0x8c 114 114 #define WRITE_ATTRIBUTE 0x8d 115 + #define WRITE_VERIFY_16 0x8e 115 116 #define VERIFY_16 0x8f 116 117 #define SYNCHRONIZE_CACHE_16 0x91 117 118 #define WRITE_SAME_16 0x93
+1
include/target/target_core_backend.h
··· 10 10 * backend module. 11 11 */ 12 12 #define TRANSPORT_FLAG_PASSTHROUGH_ALUA 0x2 13 + #define TRANSPORT_FLAG_PASSTHROUGH_PGR 0x4 13 14 14 15 struct request_queue; 15 16 struct scatterlist;
+1
include/target/target_core_base.h
··· 664 664 int pi_prot_format; 665 665 enum target_prot_type pi_prot_type; 666 666 enum target_prot_type hw_pi_prot_type; 667 + int pi_prot_verify; 667 668 int enforce_pr_isids; 668 669 int force_pr_aptpl; 669 670 int is_nonrot;