Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband

* 'for-linus' of master.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband:
IB/iser: iSER Kconfig and Makefile
IB/iser: iSER handling of memory for RDMA
IB/iser: iSER RDMA CM (CMA) and IB verbs interaction
IB/iser: iSER initiator iSCSI PDU and TX/RX
IB/iser: iSCSI iSER transport provider high level code
IB/iser: iSCSI iSER transport provider header file
IB/uverbs: Remove unnecessary list_del()s
IB/uverbs: Don't free wr list when it's known to be empty

+3130 -7
+2
drivers/infiniband/Kconfig
··· 41 41 42 42 source "drivers/infiniband/ulp/srp/Kconfig" 43 43 44 + source "drivers/infiniband/ulp/iser/Kconfig" 45 + 44 46 endmenu
+1
drivers/infiniband/Makefile
··· 3 3 obj-$(CONFIG_IPATH_CORE) += hw/ipath/ 4 4 obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/ 5 5 obj-$(CONFIG_INFINIBAND_SRP) += ulp/srp/ 6 + obj-$(CONFIG_INFINIBAND_ISER) += ulp/iser/
+1 -1
drivers/infiniband/core/uverbs_cmd.c
··· 1530 1530 out_put: 1531 1531 put_qp_read(qp); 1532 1532 1533 - out: 1534 1533 while (wr) { 1535 1534 if (is_ud && wr->wr.ud.ah) 1536 1535 put_ah_read(wr->wr.ud.ah); ··· 1538 1539 wr = next; 1539 1540 } 1540 1541 1542 + out: 1541 1543 kfree(user_wr); 1542 1544 1543 1545 return ret ? ret : in_len;
-6
drivers/infiniband/core/uverbs_main.c
··· 188 188 189 189 idr_remove_uobj(&ib_uverbs_ah_idr, uobj); 190 190 ib_destroy_ah(ah); 191 - list_del(&uobj->list); 192 191 kfree(uobj); 193 192 } 194 193 ··· 199 200 idr_remove_uobj(&ib_uverbs_qp_idr, uobj); 200 201 ib_uverbs_detach_umcast(qp, uqp); 201 202 ib_destroy_qp(qp); 202 - list_del(&uobj->list); 203 203 ib_uverbs_release_uevent(file, &uqp->uevent); 204 204 kfree(uqp); 205 205 } ··· 211 213 212 214 idr_remove_uobj(&ib_uverbs_cq_idr, uobj); 213 215 ib_destroy_cq(cq); 214 - list_del(&uobj->list); 215 216 ib_uverbs_release_ucq(file, ev_file, ucq); 216 217 kfree(ucq); 217 218 } ··· 222 225 223 226 idr_remove_uobj(&ib_uverbs_srq_idr, uobj); 224 227 ib_destroy_srq(srq); 225 - list_del(&uobj->list); 226 228 ib_uverbs_release_uevent(file, uevent); 227 229 kfree(uevent); 228 230 } ··· 239 243 memobj = container_of(uobj, struct ib_umem_object, uobject); 240 244 ib_umem_release_on_close(mrdev, &memobj->umem); 241 245 242 - list_del(&uobj->list); 243 246 kfree(memobj); 244 247 } 245 248 ··· 247 252 248 253 idr_remove_uobj(&ib_uverbs_pd_idr, uobj); 249 254 ib_dealloc_pd(pd); 250 - list_del(&uobj->list); 251 255 kfree(uobj); 252 256 } 253 257
+11
drivers/infiniband/ulp/iser/Kconfig
··· 1 + config INFINIBAND_ISER 2 + tristate "ISCSI RDMA Protocol" 3 + depends on INFINIBAND && SCSI 4 + select SCSI_ISCSI_ATTRS 5 + ---help--- 6 + Support for the ISCSI RDMA Protocol over InfiniBand. This 7 + allows you to access storage devices that speak ISER/ISCSI 8 + over InfiniBand. 9 + 10 + The ISER protocol is defined by IETF. 11 + See <http://www.ietf.org/>.
+4
drivers/infiniband/ulp/iser/Makefile
··· 1 + obj-$(CONFIG_INFINIBAND_ISER) += ib_iser.o 2 + 3 + ib_iser-y := iser_verbs.o iser_initiator.o iser_memory.o \ 4 + iscsi_iser.o
+790
drivers/infiniband/ulp/iser/iscsi_iser.c
··· 1 + /* 2 + * iSCSI Initiator over iSER Data-Path 3 + * 4 + * Copyright (C) 2004 Dmitry Yusupov 5 + * Copyright (C) 2004 Alex Aizman 6 + * Copyright (C) 2005 Mike Christie 7 + * Copyright (c) 2005, 2006 Voltaire, Inc. All rights reserved. 8 + * maintained by openib-general@openib.org 9 + * 10 + * This software is available to you under a choice of one of two 11 + * licenses. You may choose to be licensed under the terms of the GNU 12 + * General Public License (GPL) Version 2, available from the file 13 + * COPYING in the main directory of this source tree, or the 14 + * OpenIB.org BSD license below: 15 + * 16 + * Redistribution and use in source and binary forms, with or 17 + * without modification, are permitted provided that the following 18 + * conditions are met: 19 + * 20 + * - Redistributions of source code must retain the above 21 + * copyright notice, this list of conditions and the following 22 + * disclaimer. 23 + * 24 + * - Redistributions in binary form must reproduce the above 25 + * copyright notice, this list of conditions and the following 26 + * disclaimer in the documentation and/or other materials 27 + * provided with the distribution. 28 + * 29 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 30 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 31 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 32 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 33 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 34 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 35 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 36 + * SOFTWARE. 37 + * 38 + * Credits: 39 + * Christoph Hellwig 40 + * FUJITA Tomonori 41 + * Arne Redlich 42 + * Zhenyu Wang 43 + * Modified by: 44 + * Erez Zilber 45 + * 46 + * 47 + * $Id: iscsi_iser.c 6965 2006-05-07 11:36:20Z ogerlitz $ 48 + */ 49 + 50 + #include <linux/types.h> 51 + #include <linux/list.h> 52 + #include <linux/hardirq.h> 53 + #include <linux/kfifo.h> 54 + #include <linux/blkdev.h> 55 + #include <linux/init.h> 56 + #include <linux/ioctl.h> 57 + #include <linux/devfs_fs_kernel.h> 58 + #include <linux/cdev.h> 59 + #include <linux/in.h> 60 + #include <linux/net.h> 61 + #include <linux/scatterlist.h> 62 + #include <linux/delay.h> 63 + 64 + #include <net/sock.h> 65 + 66 + #include <asm/uaccess.h> 67 + 68 + #include <scsi/scsi_cmnd.h> 69 + #include <scsi/scsi_device.h> 70 + #include <scsi/scsi_eh.h> 71 + #include <scsi/scsi_tcq.h> 72 + #include <scsi/scsi_host.h> 73 + #include <scsi/scsi.h> 74 + #include <scsi/scsi_transport_iscsi.h> 75 + 76 + #include "iscsi_iser.h" 77 + 78 + static unsigned int iscsi_max_lun = 512; 79 + module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO); 80 + 81 + int iser_debug_level = 0; 82 + 83 + MODULE_DESCRIPTION("iSER (iSCSI Extensions for RDMA) Datamover " 84 + "v" DRV_VER " (" DRV_DATE ")"); 85 + MODULE_LICENSE("Dual BSD/GPL"); 86 + MODULE_AUTHOR("Alex Nezhinsky, Dan Bar Dov, Or Gerlitz"); 87 + 88 + module_param_named(debug_level, iser_debug_level, int, 0644); 89 + MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0 (default:disabled)"); 90 + 91 + struct iser_global ig; 92 + 93 + void 94 + iscsi_iser_recv(struct iscsi_conn *conn, 95 + struct iscsi_hdr *hdr, char *rx_data, int rx_data_len) 96 + { 97 + int rc = 0; 98 + uint32_t ret_itt; 99 + int datalen; 100 + int ahslen; 101 + 102 + /* verify PDU length */ 103 + datalen = ntoh24(hdr->dlength); 104 + if (datalen != rx_data_len) { 105 + printk(KERN_ERR "iscsi_iser: datalen %d (hdr) != %d (IB) \n", 106 + datalen, rx_data_len); 107 + rc = ISCSI_ERR_DATALEN; 108 + goto error; 109 + } 110 + 111 + /* read AHS */ 112 + ahslen = hdr->hlength * 4; 113 + 114 + /* verify itt (itt encoding: age+cid+itt) */ 115 + rc = iscsi_verify_itt(conn, hdr, &ret_itt); 116 + 117 + if (!rc) 118 + rc = iscsi_complete_pdu(conn, hdr, rx_data, rx_data_len); 119 + 120 + if (rc && rc != ISCSI_ERR_NO_SCSI_CMD) 121 + goto error; 122 + 123 + return; 124 + error: 125 + iscsi_conn_failure(conn, rc); 126 + } 127 + 128 + 129 + /** 130 + * iscsi_iser_cmd_init - Initialize iSCSI SCSI_READ or SCSI_WRITE commands 131 + * 132 + **/ 133 + static void 134 + iscsi_iser_cmd_init(struct iscsi_cmd_task *ctask) 135 + { 136 + struct iscsi_iser_conn *iser_conn = ctask->conn->dd_data; 137 + struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; 138 + struct scsi_cmnd *sc = ctask->sc; 139 + 140 + iser_ctask->command_sent = 0; 141 + iser_ctask->iser_conn = iser_conn; 142 + 143 + if (sc->sc_data_direction == DMA_TO_DEVICE) { 144 + BUG_ON(ctask->total_length == 0); 145 + /* bytes to be sent via RDMA operations */ 146 + iser_ctask->rdma_data_count = ctask->total_length - 147 + ctask->imm_count - 148 + ctask->unsol_count; 149 + 150 + debug_scsi("cmd [itt %x total %d imm %d unsol_data %d " 151 + "rdma_data %d]\n", 152 + ctask->itt, ctask->total_length, ctask->imm_count, 153 + ctask->unsol_count, iser_ctask->rdma_data_count); 154 + } else 155 + /* bytes to be sent via RDMA operations */ 156 + iser_ctask->rdma_data_count = ctask->total_length; 157 + 158 + iser_ctask_rdma_init(iser_ctask); 159 + } 160 + 161 + /** 162 + * iscsi_mtask_xmit - xmit management(immediate) task 163 + * @conn: iscsi connection 164 + * @mtask: task management task 165 + * 166 + * Notes: 167 + * The function can return -EAGAIN in which case caller must 168 + * call it again later, or recover. '0' return code means successful 169 + * xmit. 170 + * 171 + **/ 172 + static int 173 + iscsi_iser_mtask_xmit(struct iscsi_conn *conn, 174 + struct iscsi_mgmt_task *mtask) 175 + { 176 + int error = 0; 177 + 178 + debug_scsi("mtask deq [cid %d itt 0x%x]\n", conn->id, mtask->itt); 179 + 180 + error = iser_send_control(conn, mtask); 181 + 182 + /* since iser xmits control with zero copy, mtasks can not be recycled 183 + * right after sending them. 184 + * The recycling scheme is based on whether a response is expected 185 + * - if yes, the mtask is recycled at iscsi_complete_pdu 186 + * - if no, the mtask is recycled at iser_snd_completion 187 + */ 188 + if (error && error != -EAGAIN) 189 + iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); 190 + 191 + return error; 192 + } 193 + 194 + static int 195 + iscsi_iser_ctask_xmit_unsol_data(struct iscsi_conn *conn, 196 + struct iscsi_cmd_task *ctask) 197 + { 198 + struct iscsi_data hdr; 199 + int error = 0; 200 + struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; 201 + 202 + /* Send data-out PDUs while there's still unsolicited data to send */ 203 + while (ctask->unsol_count > 0) { 204 + iscsi_prep_unsolicit_data_pdu(ctask, &hdr, 205 + iser_ctask->rdma_data_count); 206 + 207 + debug_scsi("Sending data-out: itt 0x%x, data count %d\n", 208 + hdr.itt, ctask->data_count); 209 + 210 + /* the buffer description has been passed with the command */ 211 + /* Send the command */ 212 + error = iser_send_data_out(conn, ctask, &hdr); 213 + if (error) { 214 + ctask->unsol_datasn--; 215 + goto iscsi_iser_ctask_xmit_unsol_data_exit; 216 + } 217 + ctask->unsol_count -= ctask->data_count; 218 + debug_scsi("Need to send %d more as data-out PDUs\n", 219 + ctask->unsol_count); 220 + } 221 + 222 + iscsi_iser_ctask_xmit_unsol_data_exit: 223 + return error; 224 + } 225 + 226 + static int 227 + iscsi_iser_ctask_xmit(struct iscsi_conn *conn, 228 + struct iscsi_cmd_task *ctask) 229 + { 230 + struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; 231 + int error = 0; 232 + 233 + debug_scsi("ctask deq [cid %d itt 0x%x]\n", 234 + conn->id, ctask->itt); 235 + 236 + /* 237 + * serialize with TMF AbortTask 238 + */ 239 + if (ctask->mtask) 240 + return error; 241 + 242 + /* Send the cmd PDU */ 243 + if (!iser_ctask->command_sent) { 244 + error = iser_send_command(conn, ctask); 245 + if (error) 246 + goto iscsi_iser_ctask_xmit_exit; 247 + iser_ctask->command_sent = 1; 248 + } 249 + 250 + /* Send unsolicited data-out PDU(s) if necessary */ 251 + if (ctask->unsol_count) 252 + error = iscsi_iser_ctask_xmit_unsol_data(conn, ctask); 253 + 254 + iscsi_iser_ctask_xmit_exit: 255 + if (error && error != -EAGAIN) 256 + iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED); 257 + return error; 258 + } 259 + 260 + static void 261 + iscsi_iser_cleanup_ctask(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask) 262 + { 263 + struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; 264 + 265 + if (iser_ctask->status == ISER_TASK_STATUS_STARTED) { 266 + iser_ctask->status = ISER_TASK_STATUS_COMPLETED; 267 + iser_ctask_rdma_finalize(iser_ctask); 268 + } 269 + } 270 + 271 + static struct iser_conn * 272 + iscsi_iser_ib_conn_lookup(__u64 ep_handle) 273 + { 274 + struct iser_conn *ib_conn; 275 + struct iser_conn *uib_conn = (struct iser_conn *)(unsigned long)ep_handle; 276 + 277 + mutex_lock(&ig.connlist_mutex); 278 + list_for_each_entry(ib_conn, &ig.connlist, conn_list) { 279 + if (ib_conn == uib_conn) { 280 + mutex_unlock(&ig.connlist_mutex); 281 + return ib_conn; 282 + } 283 + } 284 + mutex_unlock(&ig.connlist_mutex); 285 + iser_err("no conn exists for eph %llx\n",(unsigned long long)ep_handle); 286 + return NULL; 287 + } 288 + 289 + static struct iscsi_cls_conn * 290 + iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx) 291 + { 292 + struct iscsi_conn *conn; 293 + struct iscsi_cls_conn *cls_conn; 294 + struct iscsi_iser_conn *iser_conn; 295 + 296 + cls_conn = iscsi_conn_setup(cls_session, conn_idx); 297 + if (!cls_conn) 298 + return NULL; 299 + conn = cls_conn->dd_data; 300 + 301 + /* 302 + * due to issues with the login code re iser sematics 303 + * this not set in iscsi_conn_setup - FIXME 304 + */ 305 + conn->max_recv_dlength = 128; 306 + 307 + iser_conn = kzalloc(sizeof(*iser_conn), GFP_KERNEL); 308 + if (!iser_conn) 309 + goto conn_alloc_fail; 310 + 311 + /* currently this is the only field which need to be initiated */ 312 + rwlock_init(&iser_conn->lock); 313 + 314 + conn->dd_data = iser_conn; 315 + iser_conn->iscsi_conn = conn; 316 + 317 + return cls_conn; 318 + 319 + conn_alloc_fail: 320 + iscsi_conn_teardown(cls_conn); 321 + return NULL; 322 + } 323 + 324 + static void 325 + iscsi_iser_conn_destroy(struct iscsi_cls_conn *cls_conn) 326 + { 327 + struct iscsi_conn *conn = cls_conn->dd_data; 328 + struct iscsi_iser_conn *iser_conn = conn->dd_data; 329 + 330 + iscsi_conn_teardown(cls_conn); 331 + kfree(iser_conn); 332 + } 333 + 334 + static int 335 + iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session, 336 + struct iscsi_cls_conn *cls_conn, uint64_t transport_eph, 337 + int is_leading) 338 + { 339 + struct iscsi_conn *conn = cls_conn->dd_data; 340 + struct iscsi_iser_conn *iser_conn; 341 + struct iser_conn *ib_conn; 342 + int error; 343 + 344 + error = iscsi_conn_bind(cls_session, cls_conn, is_leading); 345 + if (error) 346 + return error; 347 + 348 + /* the transport ep handle comes from user space so it must be 349 + * verified against the global ib connections list */ 350 + ib_conn = iscsi_iser_ib_conn_lookup(transport_eph); 351 + if (!ib_conn) { 352 + iser_err("can't bind eph %llx\n", 353 + (unsigned long long)transport_eph); 354 + return -EINVAL; 355 + } 356 + /* binds the iSER connection retrieved from the previously 357 + * connected ep_handle to the iSCSI layer connection. exchanges 358 + * connection pointers */ 359 + iser_err("binding iscsi conn %p to iser_conn %p\n",conn,ib_conn); 360 + iser_conn = conn->dd_data; 361 + ib_conn->iser_conn = iser_conn; 362 + iser_conn->ib_conn = ib_conn; 363 + 364 + conn->recv_lock = &iser_conn->lock; 365 + 366 + return 0; 367 + } 368 + 369 + static int 370 + iscsi_iser_conn_start(struct iscsi_cls_conn *cls_conn) 371 + { 372 + struct iscsi_conn *conn = cls_conn->dd_data; 373 + int err; 374 + 375 + err = iscsi_conn_start(cls_conn); 376 + if (err) 377 + return err; 378 + 379 + return iser_conn_set_full_featured_mode(conn); 380 + } 381 + 382 + static void 383 + iscsi_iser_conn_terminate(struct iscsi_conn *conn) 384 + { 385 + struct iscsi_iser_conn *iser_conn = conn->dd_data; 386 + struct iser_conn *ib_conn = iser_conn->ib_conn; 387 + 388 + BUG_ON(!ib_conn); 389 + /* starts conn teardown process, waits until all previously * 390 + * posted buffers get flushed, deallocates all conn resources */ 391 + iser_conn_terminate(ib_conn); 392 + iser_conn->ib_conn = NULL; 393 + conn->recv_lock = NULL; 394 + } 395 + 396 + 397 + static struct iscsi_transport iscsi_iser_transport; 398 + 399 + static struct iscsi_cls_session * 400 + iscsi_iser_session_create(struct iscsi_transport *iscsit, 401 + struct scsi_transport_template *scsit, 402 + uint32_t initial_cmdsn, uint32_t *hostno) 403 + { 404 + struct iscsi_cls_session *cls_session; 405 + struct iscsi_session *session; 406 + int i; 407 + uint32_t hn; 408 + struct iscsi_cmd_task *ctask; 409 + struct iscsi_mgmt_task *mtask; 410 + struct iscsi_iser_cmd_task *iser_ctask; 411 + struct iser_desc *desc; 412 + 413 + cls_session = iscsi_session_setup(iscsit, scsit, 414 + sizeof(struct iscsi_iser_cmd_task), 415 + sizeof(struct iser_desc), 416 + initial_cmdsn, &hn); 417 + if (!cls_session) 418 + return NULL; 419 + 420 + *hostno = hn; 421 + session = class_to_transport_session(cls_session); 422 + 423 + /* libiscsi setup itts, data and pool so just set desc fields */ 424 + for (i = 0; i < session->cmds_max; i++) { 425 + ctask = session->cmds[i]; 426 + iser_ctask = ctask->dd_data; 427 + ctask->hdr = (struct iscsi_cmd *)&iser_ctask->desc.iscsi_header; 428 + } 429 + 430 + for (i = 0; i < session->mgmtpool_max; i++) { 431 + mtask = session->mgmt_cmds[i]; 432 + desc = mtask->dd_data; 433 + mtask->hdr = &desc->iscsi_header; 434 + desc->data = mtask->data; 435 + } 436 + 437 + return cls_session; 438 + } 439 + 440 + static int 441 + iscsi_iser_conn_set_param(struct iscsi_cls_conn *cls_conn, 442 + enum iscsi_param param, uint32_t value) 443 + { 444 + struct iscsi_conn *conn = cls_conn->dd_data; 445 + struct iscsi_session *session = conn->session; 446 + 447 + spin_lock_bh(&session->lock); 448 + if (conn->c_stage != ISCSI_CONN_INITIAL_STAGE && 449 + conn->stop_stage != STOP_CONN_RECOVER) { 450 + printk(KERN_ERR "iscsi_iser: can not change parameter [%d]\n", 451 + param); 452 + spin_unlock_bh(&session->lock); 453 + return 0; 454 + } 455 + spin_unlock_bh(&session->lock); 456 + 457 + switch (param) { 458 + case ISCSI_PARAM_MAX_RECV_DLENGTH: 459 + /* TBD */ 460 + break; 461 + case ISCSI_PARAM_MAX_XMIT_DLENGTH: 462 + conn->max_xmit_dlength = value; 463 + break; 464 + case ISCSI_PARAM_HDRDGST_EN: 465 + if (value) { 466 + printk(KERN_ERR "DataDigest wasn't negotiated to None"); 467 + return -EPROTO; 468 + } 469 + break; 470 + case ISCSI_PARAM_DATADGST_EN: 471 + if (value) { 472 + printk(KERN_ERR "DataDigest wasn't negotiated to None"); 473 + return -EPROTO; 474 + } 475 + break; 476 + case ISCSI_PARAM_INITIAL_R2T_EN: 477 + session->initial_r2t_en = value; 478 + break; 479 + case ISCSI_PARAM_IMM_DATA_EN: 480 + session->imm_data_en = value; 481 + break; 482 + case ISCSI_PARAM_FIRST_BURST: 483 + session->first_burst = value; 484 + break; 485 + case ISCSI_PARAM_MAX_BURST: 486 + session->max_burst = value; 487 + break; 488 + case ISCSI_PARAM_PDU_INORDER_EN: 489 + session->pdu_inorder_en = value; 490 + break; 491 + case ISCSI_PARAM_DATASEQ_INORDER_EN: 492 + session->dataseq_inorder_en = value; 493 + break; 494 + case ISCSI_PARAM_ERL: 495 + session->erl = value; 496 + break; 497 + case ISCSI_PARAM_IFMARKER_EN: 498 + if (value) { 499 + printk(KERN_ERR "IFMarker wasn't negotiated to No"); 500 + return -EPROTO; 501 + } 502 + break; 503 + case ISCSI_PARAM_OFMARKER_EN: 504 + if (value) { 505 + printk(KERN_ERR "OFMarker wasn't negotiated to No"); 506 + return -EPROTO; 507 + } 508 + break; 509 + default: 510 + break; 511 + } 512 + 513 + return 0; 514 + } 515 + 516 + static int 517 + iscsi_iser_session_get_param(struct iscsi_cls_session *cls_session, 518 + enum iscsi_param param, uint32_t *value) 519 + { 520 + struct Scsi_Host *shost = iscsi_session_to_shost(cls_session); 521 + struct iscsi_session *session = iscsi_hostdata(shost->hostdata); 522 + 523 + switch (param) { 524 + case ISCSI_PARAM_INITIAL_R2T_EN: 525 + *value = session->initial_r2t_en; 526 + break; 527 + case ISCSI_PARAM_MAX_R2T: 528 + *value = session->max_r2t; 529 + break; 530 + case ISCSI_PARAM_IMM_DATA_EN: 531 + *value = session->imm_data_en; 532 + break; 533 + case ISCSI_PARAM_FIRST_BURST: 534 + *value = session->first_burst; 535 + break; 536 + case ISCSI_PARAM_MAX_BURST: 537 + *value = session->max_burst; 538 + break; 539 + case ISCSI_PARAM_PDU_INORDER_EN: 540 + *value = session->pdu_inorder_en; 541 + break; 542 + case ISCSI_PARAM_DATASEQ_INORDER_EN: 543 + *value = session->dataseq_inorder_en; 544 + break; 545 + case ISCSI_PARAM_ERL: 546 + *value = session->erl; 547 + break; 548 + case ISCSI_PARAM_IFMARKER_EN: 549 + *value = 0; 550 + break; 551 + case ISCSI_PARAM_OFMARKER_EN: 552 + *value = 0; 553 + break; 554 + default: 555 + return ISCSI_ERR_PARAM_NOT_FOUND; 556 + } 557 + 558 + return 0; 559 + } 560 + 561 + static int 562 + iscsi_iser_conn_get_param(struct iscsi_cls_conn *cls_conn, 563 + enum iscsi_param param, uint32_t *value) 564 + { 565 + struct iscsi_conn *conn = cls_conn->dd_data; 566 + 567 + switch(param) { 568 + case ISCSI_PARAM_MAX_RECV_DLENGTH: 569 + *value = conn->max_recv_dlength; 570 + break; 571 + case ISCSI_PARAM_MAX_XMIT_DLENGTH: 572 + *value = conn->max_xmit_dlength; 573 + break; 574 + case ISCSI_PARAM_HDRDGST_EN: 575 + *value = 0; 576 + break; 577 + case ISCSI_PARAM_DATADGST_EN: 578 + *value = 0; 579 + break; 580 + /*case ISCSI_PARAM_TARGET_RECV_DLENGTH: 581 + *value = conn->target_recv_dlength; 582 + break; 583 + case ISCSI_PARAM_INITIATOR_RECV_DLENGTH: 584 + *value = conn->initiator_recv_dlength; 585 + break;*/ 586 + default: 587 + return ISCSI_ERR_PARAM_NOT_FOUND; 588 + } 589 + 590 + return 0; 591 + } 592 + 593 + 594 + static void 595 + iscsi_iser_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *stats) 596 + { 597 + struct iscsi_conn *conn = cls_conn->dd_data; 598 + 599 + stats->txdata_octets = conn->txdata_octets; 600 + stats->rxdata_octets = conn->rxdata_octets; 601 + stats->scsicmd_pdus = conn->scsicmd_pdus_cnt; 602 + stats->dataout_pdus = conn->dataout_pdus_cnt; 603 + stats->scsirsp_pdus = conn->scsirsp_pdus_cnt; 604 + stats->datain_pdus = conn->datain_pdus_cnt; /* always 0 */ 605 + stats->r2t_pdus = conn->r2t_pdus_cnt; /* always 0 */ 606 + stats->tmfcmd_pdus = conn->tmfcmd_pdus_cnt; 607 + stats->tmfrsp_pdus = conn->tmfrsp_pdus_cnt; 608 + stats->custom_length = 3; 609 + strcpy(stats->custom[0].desc, "qp_tx_queue_full"); 610 + stats->custom[0].value = 0; /* TB iser_conn->qp_tx_queue_full; */ 611 + strcpy(stats->custom[1].desc, "fmr_map_not_avail"); 612 + stats->custom[1].value = 0; /* TB iser_conn->fmr_map_not_avail */; 613 + strcpy(stats->custom[2].desc, "eh_abort_cnt"); 614 + stats->custom[2].value = conn->eh_abort_cnt; 615 + } 616 + 617 + static int 618 + iscsi_iser_ep_connect(struct sockaddr *dst_addr, int non_blocking, 619 + __u64 *ep_handle) 620 + { 621 + int err; 622 + struct iser_conn *ib_conn; 623 + 624 + err = iser_conn_init(&ib_conn); 625 + if (err) 626 + goto out; 627 + 628 + err = iser_connect(ib_conn, NULL, (struct sockaddr_in *)dst_addr, non_blocking); 629 + if (!err) 630 + *ep_handle = (__u64)(unsigned long)ib_conn; 631 + 632 + out: 633 + return err; 634 + } 635 + 636 + static int 637 + iscsi_iser_ep_poll(__u64 ep_handle, int timeout_ms) 638 + { 639 + struct iser_conn *ib_conn = iscsi_iser_ib_conn_lookup(ep_handle); 640 + int rc; 641 + 642 + if (!ib_conn) 643 + return -EINVAL; 644 + 645 + rc = wait_event_interruptible_timeout(ib_conn->wait, 646 + ib_conn->state == ISER_CONN_UP, 647 + msecs_to_jiffies(timeout_ms)); 648 + 649 + /* if conn establishment failed, return error code to iscsi */ 650 + if (!rc && 651 + (ib_conn->state == ISER_CONN_TERMINATING || 652 + ib_conn->state == ISER_CONN_DOWN)) 653 + rc = -1; 654 + 655 + iser_err("ib conn %p rc = %d\n", ib_conn, rc); 656 + 657 + if (rc > 0) 658 + return 1; /* success, this is the equivalent of POLLOUT */ 659 + else if (!rc) 660 + return 0; /* timeout */ 661 + else 662 + return rc; /* signal */ 663 + } 664 + 665 + static void 666 + iscsi_iser_ep_disconnect(__u64 ep_handle) 667 + { 668 + struct iser_conn *ib_conn = iscsi_iser_ib_conn_lookup(ep_handle); 669 + 670 + if (!ib_conn) 671 + return; 672 + 673 + iser_err("ib conn %p state %d\n",ib_conn, ib_conn->state); 674 + 675 + iser_conn_terminate(ib_conn); 676 + } 677 + 678 + static struct scsi_host_template iscsi_iser_sht = { 679 + .name = "iSCSI Initiator over iSER, v." DRV_VER, 680 + .queuecommand = iscsi_queuecommand, 681 + .can_queue = ISCSI_XMIT_CMDS_MAX - 1, 682 + .sg_tablesize = ISCSI_ISER_SG_TABLESIZE, 683 + .cmd_per_lun = ISCSI_MAX_CMD_PER_LUN, 684 + .eh_abort_handler = iscsi_eh_abort, 685 + .eh_host_reset_handler = iscsi_eh_host_reset, 686 + .use_clustering = DISABLE_CLUSTERING, 687 + .proc_name = "iscsi_iser", 688 + .this_id = -1, 689 + }; 690 + 691 + static struct iscsi_transport iscsi_iser_transport = { 692 + .owner = THIS_MODULE, 693 + .name = "iser", 694 + .caps = CAP_RECOVERY_L0 | CAP_MULTI_R2T, 695 + .param_mask = ISCSI_MAX_RECV_DLENGTH | 696 + ISCSI_MAX_XMIT_DLENGTH | 697 + ISCSI_HDRDGST_EN | 698 + ISCSI_DATADGST_EN | 699 + ISCSI_INITIAL_R2T_EN | 700 + ISCSI_MAX_R2T | 701 + ISCSI_IMM_DATA_EN | 702 + ISCSI_FIRST_BURST | 703 + ISCSI_MAX_BURST | 704 + ISCSI_PDU_INORDER_EN | 705 + ISCSI_DATASEQ_INORDER_EN, 706 + .host_template = &iscsi_iser_sht, 707 + .conndata_size = sizeof(struct iscsi_conn), 708 + .max_lun = ISCSI_ISER_MAX_LUN, 709 + .max_cmd_len = ISCSI_ISER_MAX_CMD_LEN, 710 + /* session management */ 711 + .create_session = iscsi_iser_session_create, 712 + .destroy_session = iscsi_session_teardown, 713 + /* connection management */ 714 + .create_conn = iscsi_iser_conn_create, 715 + .bind_conn = iscsi_iser_conn_bind, 716 + .destroy_conn = iscsi_iser_conn_destroy, 717 + .set_param = iscsi_iser_conn_set_param, 718 + .get_conn_param = iscsi_iser_conn_get_param, 719 + .get_session_param = iscsi_iser_session_get_param, 720 + .start_conn = iscsi_iser_conn_start, 721 + .stop_conn = iscsi_conn_stop, 722 + /* these are called as part of conn recovery */ 723 + .suspend_conn_recv = NULL, /* FIXME is/how this relvant to iser? */ 724 + .terminate_conn = iscsi_iser_conn_terminate, 725 + /* IO */ 726 + .send_pdu = iscsi_conn_send_pdu, 727 + .get_stats = iscsi_iser_conn_get_stats, 728 + .init_cmd_task = iscsi_iser_cmd_init, 729 + .xmit_cmd_task = iscsi_iser_ctask_xmit, 730 + .xmit_mgmt_task = iscsi_iser_mtask_xmit, 731 + .cleanup_cmd_task = iscsi_iser_cleanup_ctask, 732 + /* recovery */ 733 + .session_recovery_timedout = iscsi_session_recovery_timedout, 734 + 735 + .ep_connect = iscsi_iser_ep_connect, 736 + .ep_poll = iscsi_iser_ep_poll, 737 + .ep_disconnect = iscsi_iser_ep_disconnect 738 + }; 739 + 740 + static int __init iser_init(void) 741 + { 742 + int err; 743 + 744 + iser_dbg("Starting iSER datamover...\n"); 745 + 746 + if (iscsi_max_lun < 1) { 747 + printk(KERN_ERR "Invalid max_lun value of %u\n", iscsi_max_lun); 748 + return -EINVAL; 749 + } 750 + 751 + iscsi_iser_transport.max_lun = iscsi_max_lun; 752 + 753 + memset(&ig, 0, sizeof(struct iser_global)); 754 + 755 + ig.desc_cache = kmem_cache_create("iser_descriptors", 756 + sizeof (struct iser_desc), 757 + 0, SLAB_HWCACHE_ALIGN, 758 + NULL, NULL); 759 + if (ig.desc_cache == NULL) 760 + return -ENOMEM; 761 + 762 + /* device init is called only after the first addr resolution */ 763 + mutex_init(&ig.device_list_mutex); 764 + INIT_LIST_HEAD(&ig.device_list); 765 + mutex_init(&ig.connlist_mutex); 766 + INIT_LIST_HEAD(&ig.connlist); 767 + 768 + if (!iscsi_register_transport(&iscsi_iser_transport)) { 769 + iser_err("iscsi_register_transport failed\n"); 770 + err = -EINVAL; 771 + goto register_transport_failure; 772 + } 773 + 774 + return 0; 775 + 776 + register_transport_failure: 777 + kmem_cache_destroy(ig.desc_cache); 778 + 779 + return err; 780 + } 781 + 782 + static void __exit iser_exit(void) 783 + { 784 + iser_dbg("Removing iSER datamover...\n"); 785 + iscsi_unregister_transport(&iscsi_iser_transport); 786 + kmem_cache_destroy(ig.desc_cache); 787 + } 788 + 789 + module_init(iser_init); 790 + module_exit(iser_exit);
+354
drivers/infiniband/ulp/iser/iscsi_iser.h
··· 1 + /* 2 + * iSER transport for the Open iSCSI Initiator & iSER transport internals 3 + * 4 + * Copyright (C) 2004 Dmitry Yusupov 5 + * Copyright (C) 2004 Alex Aizman 6 + * Copyright (C) 2005 Mike Christie 7 + * based on code maintained by open-iscsi@googlegroups.com 8 + * 9 + * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved. 10 + * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. 11 + * 12 + * This software is available to you under a choice of one of two 13 + * licenses. You may choose to be licensed under the terms of the GNU 14 + * General Public License (GPL) Version 2, available from the file 15 + * COPYING in the main directory of this source tree, or the 16 + * OpenIB.org BSD license below: 17 + * 18 + * Redistribution and use in source and binary forms, with or 19 + * without modification, are permitted provided that the following 20 + * conditions are met: 21 + * 22 + * - Redistributions of source code must retain the above 23 + * copyright notice, this list of conditions and the following 24 + * disclaimer. 25 + * 26 + * - Redistributions in binary form must reproduce the above 27 + * copyright notice, this list of conditions and the following 28 + * disclaimer in the documentation and/or other materials 29 + * provided with the distribution. 30 + * 31 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 32 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 33 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 34 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 35 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 36 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 37 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 38 + * SOFTWARE. 39 + * 40 + * $Id: iscsi_iser.h 7051 2006-05-10 12:29:11Z ogerlitz $ 41 + */ 42 + #ifndef __ISCSI_ISER_H__ 43 + #define __ISCSI_ISER_H__ 44 + 45 + #include <linux/types.h> 46 + #include <linux/net.h> 47 + #include <scsi/libiscsi.h> 48 + #include <scsi/scsi_transport_iscsi.h> 49 + 50 + #include <linux/wait.h> 51 + #include <linux/sched.h> 52 + #include <linux/list.h> 53 + #include <linux/slab.h> 54 + #include <linux/dma-mapping.h> 55 + #include <linux/mutex.h> 56 + #include <linux/mempool.h> 57 + #include <linux/uio.h> 58 + 59 + #include <linux/socket.h> 60 + #include <linux/in.h> 61 + #include <linux/in6.h> 62 + 63 + #include <rdma/ib_verbs.h> 64 + #include <rdma/ib_fmr_pool.h> 65 + #include <rdma/rdma_cm.h> 66 + 67 + #define DRV_NAME "iser" 68 + #define PFX DRV_NAME ": " 69 + #define DRV_VER "0.1" 70 + #define DRV_DATE "May 7th, 2006" 71 + 72 + #define iser_dbg(fmt, arg...) \ 73 + do { \ 74 + if (iser_debug_level > 0) \ 75 + printk(KERN_DEBUG PFX "%s:" fmt,\ 76 + __func__ , ## arg); \ 77 + } while (0) 78 + 79 + #define iser_err(fmt, arg...) \ 80 + do { \ 81 + printk(KERN_ERR PFX "%s:" fmt, \ 82 + __func__ , ## arg); \ 83 + } while (0) 84 + 85 + /* support upto 512KB in one RDMA */ 86 + #define ISCSI_ISER_SG_TABLESIZE (0x80000 >> PAGE_SHIFT) 87 + #define ISCSI_ISER_MAX_LUN 256 88 + #define ISCSI_ISER_MAX_CMD_LEN 16 89 + 90 + /* QP settings */ 91 + /* Maximal bounds on received asynchronous PDUs */ 92 + #define ISER_MAX_RX_MISC_PDUS 4 /* NOOP_IN(2) , ASYNC_EVENT(2) */ 93 + 94 + #define ISER_MAX_TX_MISC_PDUS 6 /* NOOP_OUT(2), TEXT(1), * 95 + * SCSI_TMFUNC(2), LOGOUT(1) */ 96 + 97 + #define ISER_QP_MAX_RECV_DTOS (ISCSI_XMIT_CMDS_MAX + \ 98 + ISER_MAX_RX_MISC_PDUS + \ 99 + ISER_MAX_TX_MISC_PDUS) 100 + 101 + /* the max TX (send) WR supported by the iSER QP is defined by * 102 + * max_send_wr = T * (1 + D) + C ; D is how many inflight dataouts we expect * 103 + * to have at max for SCSI command. The tx posting & completion handling code * 104 + * supports -EAGAIN scheme where tx is suspended till the QP has room for more * 105 + * send WR. D=8 comes from 64K/8K */ 106 + 107 + #define ISER_INFLIGHT_DATAOUTS 8 108 + 109 + #define ISER_QP_MAX_REQ_DTOS (ISCSI_XMIT_CMDS_MAX * \ 110 + (1 + ISER_INFLIGHT_DATAOUTS) + \ 111 + ISER_MAX_TX_MISC_PDUS + \ 112 + ISER_MAX_RX_MISC_PDUS) 113 + 114 + #define ISER_VER 0x10 115 + #define ISER_WSV 0x08 116 + #define ISER_RSV 0x04 117 + 118 + struct iser_hdr { 119 + u8 flags; 120 + u8 rsvd[3]; 121 + __be32 write_stag; /* write rkey */ 122 + __be64 write_va; 123 + __be32 read_stag; /* read rkey */ 124 + __be64 read_va; 125 + } __attribute__((packed)); 126 + 127 + 128 + /* Length of an object name string */ 129 + #define ISER_OBJECT_NAME_SIZE 64 130 + 131 + enum iser_ib_conn_state { 132 + ISER_CONN_INIT, /* descriptor allocd, no conn */ 133 + ISER_CONN_PENDING, /* in the process of being established */ 134 + ISER_CONN_UP, /* up and running */ 135 + ISER_CONN_TERMINATING, /* in the process of being terminated */ 136 + ISER_CONN_DOWN, /* shut down */ 137 + ISER_CONN_STATES_NUM 138 + }; 139 + 140 + enum iser_task_status { 141 + ISER_TASK_STATUS_INIT = 0, 142 + ISER_TASK_STATUS_STARTED, 143 + ISER_TASK_STATUS_COMPLETED 144 + }; 145 + 146 + enum iser_data_dir { 147 + ISER_DIR_IN = 0, /* to initiator */ 148 + ISER_DIR_OUT, /* from initiator */ 149 + ISER_DIRS_NUM 150 + }; 151 + 152 + struct iser_data_buf { 153 + void *buf; /* pointer to the sg list */ 154 + unsigned int size; /* num entries of this sg */ 155 + unsigned long data_len; /* total data len */ 156 + unsigned int dma_nents; /* returned by dma_map_sg */ 157 + char *copy_buf; /* allocated copy buf for SGs unaligned * 158 + * for rdma which are copied */ 159 + struct scatterlist sg_single; /* SG-ified clone of a non SG SC or * 160 + * unaligned SG */ 161 + }; 162 + 163 + /* fwd declarations */ 164 + struct iser_device; 165 + struct iscsi_iser_conn; 166 + struct iscsi_iser_cmd_task; 167 + 168 + struct iser_mem_reg { 169 + u32 lkey; 170 + u32 rkey; 171 + u64 va; 172 + u64 len; 173 + void *mem_h; 174 + }; 175 + 176 + struct iser_regd_buf { 177 + struct iser_mem_reg reg; /* memory registration info */ 178 + void *virt_addr; 179 + struct iser_device *device; /* device->device for dma_unmap */ 180 + dma_addr_t dma_addr; /* if non zero, addr for dma_unmap */ 181 + enum dma_data_direction direction; /* direction for dma_unmap */ 182 + unsigned int data_size; 183 + atomic_t ref_count; /* refcount, freed when dec to 0 */ 184 + }; 185 + 186 + #define MAX_REGD_BUF_VECTOR_LEN 2 187 + 188 + struct iser_dto { 189 + struct iscsi_iser_cmd_task *ctask; 190 + struct iscsi_iser_conn *conn; 191 + int notify_enable; 192 + 193 + /* vector of registered buffers */ 194 + unsigned int regd_vector_len; 195 + struct iser_regd_buf *regd[MAX_REGD_BUF_VECTOR_LEN]; 196 + 197 + /* offset into the registered buffer may be specified */ 198 + unsigned int offset[MAX_REGD_BUF_VECTOR_LEN]; 199 + 200 + /* a smaller size may be specified, if 0, then full size is used */ 201 + unsigned int used_sz[MAX_REGD_BUF_VECTOR_LEN]; 202 + }; 203 + 204 + enum iser_desc_type { 205 + ISCSI_RX, 206 + ISCSI_TX_CONTROL , 207 + ISCSI_TX_SCSI_COMMAND, 208 + ISCSI_TX_DATAOUT 209 + }; 210 + 211 + struct iser_desc { 212 + struct iser_hdr iser_header; 213 + struct iscsi_hdr iscsi_header; 214 + struct iser_regd_buf hdr_regd_buf; 215 + void *data; /* used by RX & TX_CONTROL */ 216 + struct iser_regd_buf data_regd_buf; /* used by RX & TX_CONTROL */ 217 + enum iser_desc_type type; 218 + struct iser_dto dto; 219 + }; 220 + 221 + struct iser_device { 222 + struct ib_device *ib_device; 223 + struct ib_pd *pd; 224 + struct ib_cq *cq; 225 + struct ib_mr *mr; 226 + struct tasklet_struct cq_tasklet; 227 + struct list_head ig_list; /* entry in ig devices list */ 228 + int refcount; 229 + }; 230 + 231 + struct iser_conn { 232 + struct iscsi_iser_conn *iser_conn; /* iser conn for upcalls */ 233 + enum iser_ib_conn_state state; /* rdma connection state */ 234 + spinlock_t lock; /* used for state changes */ 235 + struct iser_device *device; /* device context */ 236 + struct rdma_cm_id *cma_id; /* CMA ID */ 237 + struct ib_qp *qp; /* QP */ 238 + struct ib_fmr_pool *fmr_pool; /* pool of IB FMRs */ 239 + int disc_evt_flag; /* disconn event delivered */ 240 + wait_queue_head_t wait; /* waitq for conn/disconn */ 241 + atomic_t post_recv_buf_count; /* posted rx count */ 242 + atomic_t post_send_buf_count; /* posted tx count */ 243 + struct work_struct comperror_work; /* conn term sleepable ctx*/ 244 + char name[ISER_OBJECT_NAME_SIZE]; 245 + struct iser_page_vec *page_vec; /* represents SG to fmr maps* 246 + * maps serialized as tx is*/ 247 + struct list_head conn_list; /* entry in ig conn list */ 248 + }; 249 + 250 + struct iscsi_iser_conn { 251 + struct iscsi_conn *iscsi_conn;/* ptr to iscsi conn */ 252 + struct iser_conn *ib_conn; /* iSER IB conn */ 253 + 254 + rwlock_t lock; 255 + }; 256 + 257 + struct iscsi_iser_cmd_task { 258 + struct iser_desc desc; 259 + struct iscsi_iser_conn *iser_conn; 260 + int rdma_data_count;/* RDMA bytes */ 261 + enum iser_task_status status; 262 + int command_sent; /* set if command sent */ 263 + int dir[ISER_DIRS_NUM]; /* set if dir use*/ 264 + struct iser_regd_buf rdma_regd[ISER_DIRS_NUM];/* regd rdma buf */ 265 + struct iser_data_buf data[ISER_DIRS_NUM]; /* orig. data des*/ 266 + struct iser_data_buf data_copy[ISER_DIRS_NUM];/* contig. copy */ 267 + }; 268 + 269 + struct iser_page_vec { 270 + u64 *pages; 271 + int length; 272 + int offset; 273 + int data_size; 274 + }; 275 + 276 + struct iser_global { 277 + struct mutex device_list_mutex;/* */ 278 + struct list_head device_list; /* all iSER devices */ 279 + struct mutex connlist_mutex; 280 + struct list_head connlist; /* all iSER IB connections */ 281 + 282 + kmem_cache_t *desc_cache; 283 + }; 284 + 285 + extern struct iser_global ig; 286 + extern int iser_debug_level; 287 + 288 + /* allocate connection resources needed for rdma functionality */ 289 + int iser_conn_set_full_featured_mode(struct iscsi_conn *conn); 290 + 291 + int iser_send_control(struct iscsi_conn *conn, 292 + struct iscsi_mgmt_task *mtask); 293 + 294 + int iser_send_command(struct iscsi_conn *conn, 295 + struct iscsi_cmd_task *ctask); 296 + 297 + int iser_send_data_out(struct iscsi_conn *conn, 298 + struct iscsi_cmd_task *ctask, 299 + struct iscsi_data *hdr); 300 + 301 + void iscsi_iser_recv(struct iscsi_conn *conn, 302 + struct iscsi_hdr *hdr, 303 + char *rx_data, 304 + int rx_data_len); 305 + 306 + int iser_conn_init(struct iser_conn **ib_conn); 307 + 308 + void iser_conn_terminate(struct iser_conn *ib_conn); 309 + 310 + void iser_conn_release(struct iser_conn *ib_conn); 311 + 312 + void iser_rcv_completion(struct iser_desc *desc, 313 + unsigned long dto_xfer_len); 314 + 315 + void iser_snd_completion(struct iser_desc *desc); 316 + 317 + void iser_ctask_rdma_init(struct iscsi_iser_cmd_task *ctask); 318 + 319 + void iser_ctask_rdma_finalize(struct iscsi_iser_cmd_task *ctask); 320 + 321 + void iser_dto_buffs_release(struct iser_dto *dto); 322 + 323 + int iser_regd_buff_release(struct iser_regd_buf *regd_buf); 324 + 325 + void iser_reg_single(struct iser_device *device, 326 + struct iser_regd_buf *regd_buf, 327 + enum dma_data_direction direction); 328 + 329 + int iser_start_rdma_unaligned_sg(struct iscsi_iser_cmd_task *ctask, 330 + enum iser_data_dir cmd_dir); 331 + 332 + void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_cmd_task *ctask, 333 + enum iser_data_dir cmd_dir); 334 + 335 + int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *ctask, 336 + enum iser_data_dir cmd_dir); 337 + 338 + int iser_connect(struct iser_conn *ib_conn, 339 + struct sockaddr_in *src_addr, 340 + struct sockaddr_in *dst_addr, 341 + int non_blocking); 342 + 343 + int iser_reg_page_vec(struct iser_conn *ib_conn, 344 + struct iser_page_vec *page_vec, 345 + struct iser_mem_reg *mem_reg); 346 + 347 + void iser_unreg_mem(struct iser_mem_reg *mem_reg); 348 + 349 + int iser_post_recv(struct iser_desc *rx_desc); 350 + int iser_post_send(struct iser_desc *tx_desc); 351 + 352 + int iser_conn_state_comp(struct iser_conn *ib_conn, 353 + enum iser_ib_conn_state comp); 354 + #endif
+738
drivers/infiniband/ulp/iser/iser_initiator.c
··· 1 + /* 2 + * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved. 3 + * 4 + * This software is available to you under a choice of one of two 5 + * licenses. You may choose to be licensed under the terms of the GNU 6 + * General Public License (GPL) Version 2, available from the file 7 + * COPYING in the main directory of this source tree, or the 8 + * OpenIB.org BSD license below: 9 + * 10 + * Redistribution and use in source and binary forms, with or 11 + * without modification, are permitted provided that the following 12 + * conditions are met: 13 + * 14 + * - Redistributions of source code must retain the above 15 + * copyright notice, this list of conditions and the following 16 + * disclaimer. 17 + * 18 + * - Redistributions in binary form must reproduce the above 19 + * copyright notice, this list of conditions and the following 20 + * disclaimer in the documentation and/or other materials 21 + * provided with the distribution. 22 + * 23 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 + * SOFTWARE. 31 + * 32 + * $Id: iser_initiator.c 6964 2006-05-07 11:11:43Z ogerlitz $ 33 + */ 34 + #include <linux/kernel.h> 35 + #include <linux/slab.h> 36 + #include <linux/mm.h> 37 + #include <asm/io.h> 38 + #include <asm/scatterlist.h> 39 + #include <linux/scatterlist.h> 40 + #include <linux/kfifo.h> 41 + #include <scsi/scsi_cmnd.h> 42 + #include <scsi/scsi_host.h> 43 + 44 + #include "iscsi_iser.h" 45 + 46 + /* Constant PDU lengths calculations */ 47 + #define ISER_TOTAL_HEADERS_LEN (sizeof (struct iser_hdr) + \ 48 + sizeof (struct iscsi_hdr)) 49 + 50 + /* iser_dto_add_regd_buff - increments the reference count for * 51 + * the registered buffer & adds it to the DTO object */ 52 + static void iser_dto_add_regd_buff(struct iser_dto *dto, 53 + struct iser_regd_buf *regd_buf, 54 + unsigned long use_offset, 55 + unsigned long use_size) 56 + { 57 + int add_idx; 58 + 59 + atomic_inc(&regd_buf->ref_count); 60 + 61 + add_idx = dto->regd_vector_len; 62 + dto->regd[add_idx] = regd_buf; 63 + dto->used_sz[add_idx] = use_size; 64 + dto->offset[add_idx] = use_offset; 65 + 66 + dto->regd_vector_len++; 67 + } 68 + 69 + static int iser_dma_map_task_data(struct iscsi_iser_cmd_task *iser_ctask, 70 + struct iser_data_buf *data, 71 + enum iser_data_dir iser_dir, 72 + enum dma_data_direction dma_dir) 73 + { 74 + struct device *dma_device; 75 + 76 + iser_ctask->dir[iser_dir] = 1; 77 + dma_device = iser_ctask->iser_conn->ib_conn->device->ib_device->dma_device; 78 + 79 + data->dma_nents = dma_map_sg(dma_device, data->buf, data->size, dma_dir); 80 + if (data->dma_nents == 0) { 81 + iser_err("dma_map_sg failed!!!\n"); 82 + return -EINVAL; 83 + } 84 + return 0; 85 + } 86 + 87 + static void iser_dma_unmap_task_data(struct iscsi_iser_cmd_task *iser_ctask) 88 + { 89 + struct device *dma_device; 90 + struct iser_data_buf *data; 91 + 92 + dma_device = iser_ctask->iser_conn->ib_conn->device->ib_device->dma_device; 93 + 94 + if (iser_ctask->dir[ISER_DIR_IN]) { 95 + data = &iser_ctask->data[ISER_DIR_IN]; 96 + dma_unmap_sg(dma_device, data->buf, data->size, DMA_FROM_DEVICE); 97 + } 98 + 99 + if (iser_ctask->dir[ISER_DIR_OUT]) { 100 + data = &iser_ctask->data[ISER_DIR_OUT]; 101 + dma_unmap_sg(dma_device, data->buf, data->size, DMA_TO_DEVICE); 102 + } 103 + } 104 + 105 + /* Register user buffer memory and initialize passive rdma 106 + * dto descriptor. Total data size is stored in 107 + * iser_ctask->data[ISER_DIR_IN].data_len 108 + */ 109 + static int iser_prepare_read_cmd(struct iscsi_cmd_task *ctask, 110 + unsigned int edtl) 111 + 112 + { 113 + struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; 114 + struct iser_regd_buf *regd_buf; 115 + int err; 116 + struct iser_hdr *hdr = &iser_ctask->desc.iser_header; 117 + struct iser_data_buf *buf_in = &iser_ctask->data[ISER_DIR_IN]; 118 + 119 + err = iser_dma_map_task_data(iser_ctask, 120 + buf_in, 121 + ISER_DIR_IN, 122 + DMA_FROM_DEVICE); 123 + if (err) 124 + return err; 125 + 126 + if (edtl > iser_ctask->data[ISER_DIR_IN].data_len) { 127 + iser_err("Total data length: %ld, less than EDTL: " 128 + "%d, in READ cmd BHS itt: %d, conn: 0x%p\n", 129 + iser_ctask->data[ISER_DIR_IN].data_len, edtl, 130 + ctask->itt, iser_ctask->iser_conn); 131 + return -EINVAL; 132 + } 133 + 134 + err = iser_reg_rdma_mem(iser_ctask,ISER_DIR_IN); 135 + if (err) { 136 + iser_err("Failed to set up Data-IN RDMA\n"); 137 + return err; 138 + } 139 + regd_buf = &iser_ctask->rdma_regd[ISER_DIR_IN]; 140 + 141 + hdr->flags |= ISER_RSV; 142 + hdr->read_stag = cpu_to_be32(regd_buf->reg.rkey); 143 + hdr->read_va = cpu_to_be64(regd_buf->reg.va); 144 + 145 + iser_dbg("Cmd itt:%d READ tags RKEY:%#.4X VA:%#llX\n", 146 + ctask->itt, regd_buf->reg.rkey, 147 + (unsigned long long)regd_buf->reg.va); 148 + 149 + return 0; 150 + } 151 + 152 + /* Register user buffer memory and initialize passive rdma 153 + * dto descriptor. Total data size is stored in 154 + * ctask->data[ISER_DIR_OUT].data_len 155 + */ 156 + static int 157 + iser_prepare_write_cmd(struct iscsi_cmd_task *ctask, 158 + unsigned int imm_sz, 159 + unsigned int unsol_sz, 160 + unsigned int edtl) 161 + { 162 + struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; 163 + struct iser_regd_buf *regd_buf; 164 + int err; 165 + struct iser_dto *send_dto = &iser_ctask->desc.dto; 166 + struct iser_hdr *hdr = &iser_ctask->desc.iser_header; 167 + struct iser_data_buf *buf_out = &iser_ctask->data[ISER_DIR_OUT]; 168 + 169 + err = iser_dma_map_task_data(iser_ctask, 170 + buf_out, 171 + ISER_DIR_OUT, 172 + DMA_TO_DEVICE); 173 + if (err) 174 + return err; 175 + 176 + if (edtl > iser_ctask->data[ISER_DIR_OUT].data_len) { 177 + iser_err("Total data length: %ld, less than EDTL: %d, " 178 + "in WRITE cmd BHS itt: %d, conn: 0x%p\n", 179 + iser_ctask->data[ISER_DIR_OUT].data_len, 180 + edtl, ctask->itt, ctask->conn); 181 + return -EINVAL; 182 + } 183 + 184 + err = iser_reg_rdma_mem(iser_ctask,ISER_DIR_OUT); 185 + if (err != 0) { 186 + iser_err("Failed to register write cmd RDMA mem\n"); 187 + return err; 188 + } 189 + 190 + regd_buf = &iser_ctask->rdma_regd[ISER_DIR_OUT]; 191 + 192 + if (unsol_sz < edtl) { 193 + hdr->flags |= ISER_WSV; 194 + hdr->write_stag = cpu_to_be32(regd_buf->reg.rkey); 195 + hdr->write_va = cpu_to_be64(regd_buf->reg.va + unsol_sz); 196 + 197 + iser_dbg("Cmd itt:%d, WRITE tags, RKEY:%#.4X " 198 + "VA:%#llX + unsol:%d\n", 199 + ctask->itt, regd_buf->reg.rkey, 200 + (unsigned long long)regd_buf->reg.va, unsol_sz); 201 + } 202 + 203 + if (imm_sz > 0) { 204 + iser_dbg("Cmd itt:%d, WRITE, adding imm.data sz: %d\n", 205 + ctask->itt, imm_sz); 206 + iser_dto_add_regd_buff(send_dto, 207 + regd_buf, 208 + 0, 209 + imm_sz); 210 + } 211 + 212 + return 0; 213 + } 214 + 215 + /** 216 + * iser_post_receive_control - allocates, initializes and posts receive DTO. 217 + */ 218 + static int iser_post_receive_control(struct iscsi_conn *conn) 219 + { 220 + struct iscsi_iser_conn *iser_conn = conn->dd_data; 221 + struct iser_desc *rx_desc; 222 + struct iser_regd_buf *regd_hdr; 223 + struct iser_regd_buf *regd_data; 224 + struct iser_dto *recv_dto = NULL; 225 + struct iser_device *device = iser_conn->ib_conn->device; 226 + int rx_data_size, err = 0; 227 + 228 + rx_desc = kmem_cache_alloc(ig.desc_cache, GFP_NOIO); 229 + if (rx_desc == NULL) { 230 + iser_err("Failed to alloc desc for post recv\n"); 231 + return -ENOMEM; 232 + } 233 + rx_desc->type = ISCSI_RX; 234 + 235 + /* for the login sequence we must support rx of upto 8K; login is done 236 + * after conn create/bind (connect) and conn stop/bind (reconnect), 237 + * what's common for both schemes is that the connection is not started 238 + */ 239 + if (conn->c_stage != ISCSI_CONN_STARTED) 240 + rx_data_size = DEFAULT_MAX_RECV_DATA_SEGMENT_LENGTH; 241 + else /* FIXME till user space sets conn->max_recv_dlength correctly */ 242 + rx_data_size = 128; 243 + 244 + rx_desc->data = kmalloc(rx_data_size, GFP_NOIO); 245 + if (rx_desc->data == NULL) { 246 + iser_err("Failed to alloc data buf for post recv\n"); 247 + err = -ENOMEM; 248 + goto post_rx_kmalloc_failure; 249 + } 250 + 251 + recv_dto = &rx_desc->dto; 252 + recv_dto->conn = iser_conn; 253 + recv_dto->regd_vector_len = 0; 254 + 255 + regd_hdr = &rx_desc->hdr_regd_buf; 256 + memset(regd_hdr, 0, sizeof(struct iser_regd_buf)); 257 + regd_hdr->device = device; 258 + regd_hdr->virt_addr = rx_desc; /* == &rx_desc->iser_header */ 259 + regd_hdr->data_size = ISER_TOTAL_HEADERS_LEN; 260 + 261 + iser_reg_single(device, regd_hdr, DMA_FROM_DEVICE); 262 + 263 + iser_dto_add_regd_buff(recv_dto, regd_hdr, 0, 0); 264 + 265 + regd_data = &rx_desc->data_regd_buf; 266 + memset(regd_data, 0, sizeof(struct iser_regd_buf)); 267 + regd_data->device = device; 268 + regd_data->virt_addr = rx_desc->data; 269 + regd_data->data_size = rx_data_size; 270 + 271 + iser_reg_single(device, regd_data, DMA_FROM_DEVICE); 272 + 273 + iser_dto_add_regd_buff(recv_dto, regd_data, 0, 0); 274 + 275 + err = iser_post_recv(rx_desc); 276 + if (!err) 277 + return 0; 278 + 279 + /* iser_post_recv failed */ 280 + iser_dto_buffs_release(recv_dto); 281 + kfree(rx_desc->data); 282 + post_rx_kmalloc_failure: 283 + kmem_cache_free(ig.desc_cache, rx_desc); 284 + return err; 285 + } 286 + 287 + /* creates a new tx descriptor and adds header regd buffer */ 288 + static void iser_create_send_desc(struct iscsi_iser_conn *iser_conn, 289 + struct iser_desc *tx_desc) 290 + { 291 + struct iser_regd_buf *regd_hdr = &tx_desc->hdr_regd_buf; 292 + struct iser_dto *send_dto = &tx_desc->dto; 293 + 294 + memset(regd_hdr, 0, sizeof(struct iser_regd_buf)); 295 + regd_hdr->device = iser_conn->ib_conn->device; 296 + regd_hdr->virt_addr = tx_desc; /* == &tx_desc->iser_header */ 297 + regd_hdr->data_size = ISER_TOTAL_HEADERS_LEN; 298 + 299 + send_dto->conn = iser_conn; 300 + send_dto->notify_enable = 1; 301 + send_dto->regd_vector_len = 0; 302 + 303 + memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr)); 304 + tx_desc->iser_header.flags = ISER_VER; 305 + 306 + iser_dto_add_regd_buff(send_dto, regd_hdr, 0, 0); 307 + } 308 + 309 + /** 310 + * iser_conn_set_full_featured_mode - (iSER API) 311 + */ 312 + int iser_conn_set_full_featured_mode(struct iscsi_conn *conn) 313 + { 314 + struct iscsi_iser_conn *iser_conn = conn->dd_data; 315 + 316 + int i; 317 + /* no need to keep it in a var, we are after login so if this should 318 + * be negotiated, by now the result should be available here */ 319 + int initial_post_recv_bufs_num = ISER_MAX_RX_MISC_PDUS; 320 + 321 + iser_dbg("Initially post: %d\n", initial_post_recv_bufs_num); 322 + 323 + /* Check that there is no posted recv or send buffers left - */ 324 + /* they must be consumed during the login phase */ 325 + BUG_ON(atomic_read(&iser_conn->ib_conn->post_recv_buf_count) != 0); 326 + BUG_ON(atomic_read(&iser_conn->ib_conn->post_send_buf_count) != 0); 327 + 328 + /* Initial post receive buffers */ 329 + for (i = 0; i < initial_post_recv_bufs_num; i++) { 330 + if (iser_post_receive_control(conn) != 0) { 331 + iser_err("Failed to post recv bufs at:%d conn:0x%p\n", 332 + i, conn); 333 + return -ENOMEM; 334 + } 335 + } 336 + iser_dbg("Posted %d post recv bufs, conn:0x%p\n", i, conn); 337 + return 0; 338 + } 339 + 340 + static int 341 + iser_check_xmit(struct iscsi_conn *conn, void *task) 342 + { 343 + int rc = 0; 344 + struct iscsi_iser_conn *iser_conn = conn->dd_data; 345 + 346 + write_lock_bh(conn->recv_lock); 347 + if (atomic_read(&iser_conn->ib_conn->post_send_buf_count) == 348 + ISER_QP_MAX_REQ_DTOS) { 349 + iser_dbg("%ld can't xmit task %p, suspending tx\n",jiffies,task); 350 + set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx); 351 + rc = -EAGAIN; 352 + } 353 + write_unlock_bh(conn->recv_lock); 354 + return rc; 355 + } 356 + 357 + 358 + /** 359 + * iser_send_command - send command PDU 360 + */ 361 + int iser_send_command(struct iscsi_conn *conn, 362 + struct iscsi_cmd_task *ctask) 363 + { 364 + struct iscsi_iser_conn *iser_conn = conn->dd_data; 365 + struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; 366 + struct iser_dto *send_dto = NULL; 367 + unsigned long edtl; 368 + int err = 0; 369 + struct iser_data_buf *data_buf; 370 + 371 + struct iscsi_cmd *hdr = ctask->hdr; 372 + struct scsi_cmnd *sc = ctask->sc; 373 + 374 + if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) { 375 + iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn); 376 + return -EPERM; 377 + } 378 + if (iser_check_xmit(conn, ctask)) 379 + return -EAGAIN; 380 + 381 + edtl = ntohl(hdr->data_length); 382 + 383 + /* build the tx desc regd header and add it to the tx desc dto */ 384 + iser_ctask->desc.type = ISCSI_TX_SCSI_COMMAND; 385 + send_dto = &iser_ctask->desc.dto; 386 + send_dto->ctask = iser_ctask; 387 + iser_create_send_desc(iser_conn, &iser_ctask->desc); 388 + 389 + if (hdr->flags & ISCSI_FLAG_CMD_READ) 390 + data_buf = &iser_ctask->data[ISER_DIR_IN]; 391 + else 392 + data_buf = &iser_ctask->data[ISER_DIR_OUT]; 393 + 394 + if (sc->use_sg) { /* using a scatter list */ 395 + data_buf->buf = sc->request_buffer; 396 + data_buf->size = sc->use_sg; 397 + } else if (sc->request_bufflen) { 398 + /* using a single buffer - convert it into one entry SG */ 399 + sg_init_one(&data_buf->sg_single, 400 + sc->request_buffer, sc->request_bufflen); 401 + data_buf->buf = &data_buf->sg_single; 402 + data_buf->size = 1; 403 + } 404 + 405 + data_buf->data_len = sc->request_bufflen; 406 + 407 + if (hdr->flags & ISCSI_FLAG_CMD_READ) { 408 + err = iser_prepare_read_cmd(ctask, edtl); 409 + if (err) 410 + goto send_command_error; 411 + } 412 + if (hdr->flags & ISCSI_FLAG_CMD_WRITE) { 413 + err = iser_prepare_write_cmd(ctask, 414 + ctask->imm_count, 415 + ctask->imm_count + 416 + ctask->unsol_count, 417 + edtl); 418 + if (err) 419 + goto send_command_error; 420 + } 421 + 422 + iser_reg_single(iser_conn->ib_conn->device, 423 + send_dto->regd[0], DMA_TO_DEVICE); 424 + 425 + if (iser_post_receive_control(conn) != 0) { 426 + iser_err("post_recv failed!\n"); 427 + err = -ENOMEM; 428 + goto send_command_error; 429 + } 430 + 431 + iser_ctask->status = ISER_TASK_STATUS_STARTED; 432 + 433 + err = iser_post_send(&iser_ctask->desc); 434 + if (!err) 435 + return 0; 436 + 437 + send_command_error: 438 + iser_dto_buffs_release(send_dto); 439 + iser_err("conn %p failed ctask->itt %d err %d\n",conn, ctask->itt, err); 440 + return err; 441 + } 442 + 443 + /** 444 + * iser_send_data_out - send data out PDU 445 + */ 446 + int iser_send_data_out(struct iscsi_conn *conn, 447 + struct iscsi_cmd_task *ctask, 448 + struct iscsi_data *hdr) 449 + { 450 + struct iscsi_iser_conn *iser_conn = conn->dd_data; 451 + struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data; 452 + struct iser_desc *tx_desc = NULL; 453 + struct iser_dto *send_dto = NULL; 454 + unsigned long buf_offset; 455 + unsigned long data_seg_len; 456 + unsigned int itt; 457 + int err = 0; 458 + 459 + if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) { 460 + iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn); 461 + return -EPERM; 462 + } 463 + 464 + if (iser_check_xmit(conn, ctask)) 465 + return -EAGAIN; 466 + 467 + itt = ntohl(hdr->itt); 468 + data_seg_len = ntoh24(hdr->dlength); 469 + buf_offset = ntohl(hdr->offset); 470 + 471 + iser_dbg("%s itt %d dseg_len %d offset %d\n", 472 + __func__,(int)itt,(int)data_seg_len,(int)buf_offset); 473 + 474 + tx_desc = kmem_cache_alloc(ig.desc_cache, GFP_NOIO); 475 + if (tx_desc == NULL) { 476 + iser_err("Failed to alloc desc for post dataout\n"); 477 + return -ENOMEM; 478 + } 479 + 480 + tx_desc->type = ISCSI_TX_DATAOUT; 481 + memcpy(&tx_desc->iscsi_header, hdr, sizeof(struct iscsi_hdr)); 482 + 483 + /* build the tx desc regd header and add it to the tx desc dto */ 484 + send_dto = &tx_desc->dto; 485 + send_dto->ctask = iser_ctask; 486 + iser_create_send_desc(iser_conn, tx_desc); 487 + 488 + iser_reg_single(iser_conn->ib_conn->device, 489 + send_dto->regd[0], DMA_TO_DEVICE); 490 + 491 + /* all data was registered for RDMA, we can use the lkey */ 492 + iser_dto_add_regd_buff(send_dto, 493 + &iser_ctask->rdma_regd[ISER_DIR_OUT], 494 + buf_offset, 495 + data_seg_len); 496 + 497 + if (buf_offset + data_seg_len > iser_ctask->data[ISER_DIR_OUT].data_len) { 498 + iser_err("Offset:%ld & DSL:%ld in Data-Out " 499 + "inconsistent with total len:%ld, itt:%d\n", 500 + buf_offset, data_seg_len, 501 + iser_ctask->data[ISER_DIR_OUT].data_len, itt); 502 + err = -EINVAL; 503 + goto send_data_out_error; 504 + } 505 + iser_dbg("data-out itt: %d, offset: %ld, sz: %ld\n", 506 + itt, buf_offset, data_seg_len); 507 + 508 + 509 + err = iser_post_send(tx_desc); 510 + if (!err) 511 + return 0; 512 + 513 + send_data_out_error: 514 + iser_dto_buffs_release(send_dto); 515 + kmem_cache_free(ig.desc_cache, tx_desc); 516 + iser_err("conn %p failed err %d\n",conn, err); 517 + return err; 518 + } 519 + 520 + int iser_send_control(struct iscsi_conn *conn, 521 + struct iscsi_mgmt_task *mtask) 522 + { 523 + struct iscsi_iser_conn *iser_conn = conn->dd_data; 524 + struct iser_desc *mdesc = mtask->dd_data; 525 + struct iser_dto *send_dto = NULL; 526 + unsigned int itt; 527 + unsigned long data_seg_len; 528 + int err = 0; 529 + unsigned char opcode; 530 + struct iser_regd_buf *regd_buf; 531 + struct iser_device *device; 532 + 533 + if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) { 534 + iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn); 535 + return -EPERM; 536 + } 537 + 538 + if (iser_check_xmit(conn,mtask)) 539 + return -EAGAIN; 540 + 541 + /* build the tx desc regd header and add it to the tx desc dto */ 542 + mdesc->type = ISCSI_TX_CONTROL; 543 + send_dto = &mdesc->dto; 544 + send_dto->ctask = NULL; 545 + iser_create_send_desc(iser_conn, mdesc); 546 + 547 + device = iser_conn->ib_conn->device; 548 + 549 + iser_reg_single(device, send_dto->regd[0], DMA_TO_DEVICE); 550 + 551 + itt = ntohl(mtask->hdr->itt); 552 + opcode = mtask->hdr->opcode & ISCSI_OPCODE_MASK; 553 + data_seg_len = ntoh24(mtask->hdr->dlength); 554 + 555 + if (data_seg_len > 0) { 556 + regd_buf = &mdesc->data_regd_buf; 557 + memset(regd_buf, 0, sizeof(struct iser_regd_buf)); 558 + regd_buf->device = device; 559 + regd_buf->virt_addr = mtask->data; 560 + regd_buf->data_size = mtask->data_count; 561 + iser_reg_single(device, regd_buf, 562 + DMA_TO_DEVICE); 563 + iser_dto_add_regd_buff(send_dto, regd_buf, 564 + 0, 565 + data_seg_len); 566 + } 567 + 568 + if (iser_post_receive_control(conn) != 0) { 569 + iser_err("post_rcv_buff failed!\n"); 570 + err = -ENOMEM; 571 + goto send_control_error; 572 + } 573 + 574 + err = iser_post_send(mdesc); 575 + if (!err) 576 + return 0; 577 + 578 + send_control_error: 579 + iser_dto_buffs_release(send_dto); 580 + iser_err("conn %p failed err %d\n",conn, err); 581 + return err; 582 + } 583 + 584 + /** 585 + * iser_rcv_dto_completion - recv DTO completion 586 + */ 587 + void iser_rcv_completion(struct iser_desc *rx_desc, 588 + unsigned long dto_xfer_len) 589 + { 590 + struct iser_dto *dto = &rx_desc->dto; 591 + struct iscsi_iser_conn *conn = dto->conn; 592 + struct iscsi_session *session = conn->iscsi_conn->session; 593 + struct iscsi_cmd_task *ctask; 594 + struct iscsi_iser_cmd_task *iser_ctask; 595 + struct iscsi_hdr *hdr; 596 + char *rx_data = NULL; 597 + int rx_data_len = 0; 598 + unsigned int itt; 599 + unsigned char opcode; 600 + 601 + hdr = &rx_desc->iscsi_header; 602 + 603 + iser_dbg("op 0x%x itt 0x%x\n", hdr->opcode,hdr->itt); 604 + 605 + if (dto_xfer_len > ISER_TOTAL_HEADERS_LEN) { /* we have data */ 606 + rx_data_len = dto_xfer_len - ISER_TOTAL_HEADERS_LEN; 607 + rx_data = dto->regd[1]->virt_addr; 608 + rx_data += dto->offset[1]; 609 + } 610 + 611 + opcode = hdr->opcode & ISCSI_OPCODE_MASK; 612 + 613 + if (opcode == ISCSI_OP_SCSI_CMD_RSP) { 614 + itt = hdr->itt & ISCSI_ITT_MASK; /* mask out cid and age bits */ 615 + if (!(itt < session->cmds_max)) 616 + iser_err("itt can't be matched to task!!!" 617 + "conn %p opcode %d cmds_max %d itt %d\n", 618 + conn->iscsi_conn,opcode,session->cmds_max,itt); 619 + /* use the mapping given with the cmds array indexed by itt */ 620 + ctask = (struct iscsi_cmd_task *)session->cmds[itt]; 621 + iser_ctask = ctask->dd_data; 622 + iser_dbg("itt %d ctask %p\n",itt,ctask); 623 + iser_ctask->status = ISER_TASK_STATUS_COMPLETED; 624 + iser_ctask_rdma_finalize(iser_ctask); 625 + } 626 + 627 + iser_dto_buffs_release(dto); 628 + 629 + iscsi_iser_recv(conn->iscsi_conn, hdr, rx_data, rx_data_len); 630 + 631 + kfree(rx_desc->data); 632 + kmem_cache_free(ig.desc_cache, rx_desc); 633 + 634 + /* decrementing conn->post_recv_buf_count only --after-- freeing the * 635 + * task eliminates the need to worry on tasks which are completed in * 636 + * parallel to the execution of iser_conn_term. So the code that waits * 637 + * for the posted rx bufs refcount to become zero handles everything */ 638 + atomic_dec(&conn->ib_conn->post_recv_buf_count); 639 + } 640 + 641 + void iser_snd_completion(struct iser_desc *tx_desc) 642 + { 643 + struct iser_dto *dto = &tx_desc->dto; 644 + struct iscsi_iser_conn *iser_conn = dto->conn; 645 + struct iscsi_conn *conn = iser_conn->iscsi_conn; 646 + struct iscsi_mgmt_task *mtask; 647 + 648 + iser_dbg("Initiator, Data sent dto=0x%p\n", dto); 649 + 650 + iser_dto_buffs_release(dto); 651 + 652 + if (tx_desc->type == ISCSI_TX_DATAOUT) 653 + kmem_cache_free(ig.desc_cache, tx_desc); 654 + 655 + atomic_dec(&iser_conn->ib_conn->post_send_buf_count); 656 + 657 + write_lock(conn->recv_lock); 658 + if (conn->suspend_tx) { 659 + iser_dbg("%ld resuming tx\n",jiffies); 660 + clear_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx); 661 + scsi_queue_work(conn->session->host, &conn->xmitwork); 662 + } 663 + write_unlock(conn->recv_lock); 664 + 665 + if (tx_desc->type == ISCSI_TX_CONTROL) { 666 + /* this arithmetic is legal by libiscsi dd_data allocation */ 667 + mtask = (void *) ((long)(void *)tx_desc - 668 + sizeof(struct iscsi_mgmt_task)); 669 + if (mtask->hdr->itt == cpu_to_be32(ISCSI_RESERVED_TAG)) { 670 + struct iscsi_session *session = conn->session; 671 + 672 + spin_lock(&conn->session->lock); 673 + list_del(&mtask->running); 674 + __kfifo_put(session->mgmtpool.queue, (void*)&mtask, 675 + sizeof(void*)); 676 + spin_unlock(&session->lock); 677 + } 678 + } 679 + } 680 + 681 + void iser_ctask_rdma_init(struct iscsi_iser_cmd_task *iser_ctask) 682 + 683 + { 684 + iser_ctask->status = ISER_TASK_STATUS_INIT; 685 + 686 + iser_ctask->dir[ISER_DIR_IN] = 0; 687 + iser_ctask->dir[ISER_DIR_OUT] = 0; 688 + 689 + iser_ctask->data[ISER_DIR_IN].data_len = 0; 690 + iser_ctask->data[ISER_DIR_OUT].data_len = 0; 691 + 692 + memset(&iser_ctask->rdma_regd[ISER_DIR_IN], 0, 693 + sizeof(struct iser_regd_buf)); 694 + memset(&iser_ctask->rdma_regd[ISER_DIR_OUT], 0, 695 + sizeof(struct iser_regd_buf)); 696 + } 697 + 698 + void iser_ctask_rdma_finalize(struct iscsi_iser_cmd_task *iser_ctask) 699 + { 700 + int deferred; 701 + 702 + /* if we were reading, copy back to unaligned sglist, 703 + * anyway dma_unmap and free the copy 704 + */ 705 + if (iser_ctask->data_copy[ISER_DIR_IN].copy_buf != NULL) 706 + iser_finalize_rdma_unaligned_sg(iser_ctask, ISER_DIR_IN); 707 + if (iser_ctask->data_copy[ISER_DIR_OUT].copy_buf != NULL) 708 + iser_finalize_rdma_unaligned_sg(iser_ctask, ISER_DIR_OUT); 709 + 710 + if (iser_ctask->dir[ISER_DIR_IN]) { 711 + deferred = iser_regd_buff_release 712 + (&iser_ctask->rdma_regd[ISER_DIR_IN]); 713 + if (deferred) { 714 + iser_err("References remain for BUF-IN rdma reg\n"); 715 + BUG(); 716 + } 717 + } 718 + 719 + if (iser_ctask->dir[ISER_DIR_OUT]) { 720 + deferred = iser_regd_buff_release 721 + (&iser_ctask->rdma_regd[ISER_DIR_OUT]); 722 + if (deferred) { 723 + iser_err("References remain for BUF-OUT rdma reg\n"); 724 + BUG(); 725 + } 726 + } 727 + 728 + iser_dma_unmap_task_data(iser_ctask); 729 + } 730 + 731 + void iser_dto_buffs_release(struct iser_dto *dto) 732 + { 733 + int i; 734 + 735 + for (i = 0; i < dto->regd_vector_len; i++) 736 + iser_regd_buff_release(dto->regd[i]); 737 + } 738 +
+401
drivers/infiniband/ulp/iser/iser_memory.c
··· 1 + /* 2 + * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved. 3 + * 4 + * This software is available to you under a choice of one of two 5 + * licenses. You may choose to be licensed under the terms of the GNU 6 + * General Public License (GPL) Version 2, available from the file 7 + * COPYING in the main directory of this source tree, or the 8 + * OpenIB.org BSD license below: 9 + * 10 + * Redistribution and use in source and binary forms, with or 11 + * without modification, are permitted provided that the following 12 + * conditions are met: 13 + * 14 + * - Redistributions of source code must retain the above 15 + * copyright notice, this list of conditions and the following 16 + * disclaimer. 17 + * 18 + * - Redistributions in binary form must reproduce the above 19 + * copyright notice, this list of conditions and the following 20 + * disclaimer in the documentation and/or other materials 21 + * provided with the distribution. 22 + * 23 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 + * SOFTWARE. 31 + * 32 + * $Id: iser_memory.c 6964 2006-05-07 11:11:43Z ogerlitz $ 33 + */ 34 + #include <linux/module.h> 35 + #include <linux/kernel.h> 36 + #include <linux/slab.h> 37 + #include <linux/mm.h> 38 + #include <asm/io.h> 39 + #include <asm/scatterlist.h> 40 + #include <linux/scatterlist.h> 41 + 42 + #include "iscsi_iser.h" 43 + 44 + #define ISER_KMALLOC_THRESHOLD 0x20000 /* 128K - kmalloc limit */ 45 + /** 46 + * Decrements the reference count for the 47 + * registered buffer & releases it 48 + * 49 + * returns 0 if released, 1 if deferred 50 + */ 51 + int iser_regd_buff_release(struct iser_regd_buf *regd_buf) 52 + { 53 + struct device *dma_device; 54 + 55 + if ((atomic_read(&regd_buf->ref_count) == 0) || 56 + atomic_dec_and_test(&regd_buf->ref_count)) { 57 + /* if we used the dma mr, unreg is just NOP */ 58 + if (regd_buf->reg.rkey != 0) 59 + iser_unreg_mem(&regd_buf->reg); 60 + 61 + if (regd_buf->dma_addr) { 62 + dma_device = regd_buf->device->ib_device->dma_device; 63 + dma_unmap_single(dma_device, 64 + regd_buf->dma_addr, 65 + regd_buf->data_size, 66 + regd_buf->direction); 67 + } 68 + /* else this regd buf is associated with task which we */ 69 + /* dma_unmap_single/sg later */ 70 + return 0; 71 + } else { 72 + iser_dbg("Release deferred, regd.buff: 0x%p\n", regd_buf); 73 + return 1; 74 + } 75 + } 76 + 77 + /** 78 + * iser_reg_single - fills registered buffer descriptor with 79 + * registration information 80 + */ 81 + void iser_reg_single(struct iser_device *device, 82 + struct iser_regd_buf *regd_buf, 83 + enum dma_data_direction direction) 84 + { 85 + dma_addr_t dma_addr; 86 + 87 + dma_addr = dma_map_single(device->ib_device->dma_device, 88 + regd_buf->virt_addr, 89 + regd_buf->data_size, direction); 90 + BUG_ON(dma_mapping_error(dma_addr)); 91 + 92 + regd_buf->reg.lkey = device->mr->lkey; 93 + regd_buf->reg.rkey = 0; /* indicate there's no need to unreg */ 94 + regd_buf->reg.len = regd_buf->data_size; 95 + regd_buf->reg.va = dma_addr; 96 + 97 + regd_buf->dma_addr = dma_addr; 98 + regd_buf->direction = direction; 99 + } 100 + 101 + /** 102 + * iser_start_rdma_unaligned_sg 103 + */ 104 + int iser_start_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask, 105 + enum iser_data_dir cmd_dir) 106 + { 107 + int dma_nents; 108 + struct device *dma_device; 109 + char *mem = NULL; 110 + struct iser_data_buf *data = &iser_ctask->data[cmd_dir]; 111 + unsigned long cmd_data_len = data->data_len; 112 + 113 + if (cmd_data_len > ISER_KMALLOC_THRESHOLD) 114 + mem = (void *)__get_free_pages(GFP_NOIO, 115 + long_log2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT); 116 + else 117 + mem = kmalloc(cmd_data_len, GFP_NOIO); 118 + 119 + if (mem == NULL) { 120 + iser_err("Failed to allocate mem size %d %d for copying sglist\n", 121 + data->size,(int)cmd_data_len); 122 + return -ENOMEM; 123 + } 124 + 125 + if (cmd_dir == ISER_DIR_OUT) { 126 + /* copy the unaligned sg the buffer which is used for RDMA */ 127 + struct scatterlist *sg = (struct scatterlist *)data->buf; 128 + int i; 129 + char *p, *from; 130 + 131 + for (p = mem, i = 0; i < data->size; i++) { 132 + from = kmap_atomic(sg[i].page, KM_USER0); 133 + memcpy(p, 134 + from + sg[i].offset, 135 + sg[i].length); 136 + kunmap_atomic(from, KM_USER0); 137 + p += sg[i].length; 138 + } 139 + } 140 + 141 + sg_init_one(&iser_ctask->data_copy[cmd_dir].sg_single, mem, cmd_data_len); 142 + iser_ctask->data_copy[cmd_dir].buf = 143 + &iser_ctask->data_copy[cmd_dir].sg_single; 144 + iser_ctask->data_copy[cmd_dir].size = 1; 145 + 146 + iser_ctask->data_copy[cmd_dir].copy_buf = mem; 147 + 148 + dma_device = iser_ctask->iser_conn->ib_conn->device->ib_device->dma_device; 149 + 150 + if (cmd_dir == ISER_DIR_OUT) 151 + dma_nents = dma_map_sg(dma_device, 152 + &iser_ctask->data_copy[cmd_dir].sg_single, 153 + 1, DMA_TO_DEVICE); 154 + else 155 + dma_nents = dma_map_sg(dma_device, 156 + &iser_ctask->data_copy[cmd_dir].sg_single, 157 + 1, DMA_FROM_DEVICE); 158 + 159 + BUG_ON(dma_nents == 0); 160 + 161 + iser_ctask->data_copy[cmd_dir].dma_nents = dma_nents; 162 + return 0; 163 + } 164 + 165 + /** 166 + * iser_finalize_rdma_unaligned_sg 167 + */ 168 + void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_cmd_task *iser_ctask, 169 + enum iser_data_dir cmd_dir) 170 + { 171 + struct device *dma_device; 172 + struct iser_data_buf *mem_copy; 173 + unsigned long cmd_data_len; 174 + 175 + dma_device = iser_ctask->iser_conn->ib_conn->device->ib_device->dma_device; 176 + mem_copy = &iser_ctask->data_copy[cmd_dir]; 177 + 178 + if (cmd_dir == ISER_DIR_OUT) 179 + dma_unmap_sg(dma_device, &mem_copy->sg_single, 1, 180 + DMA_TO_DEVICE); 181 + else 182 + dma_unmap_sg(dma_device, &mem_copy->sg_single, 1, 183 + DMA_FROM_DEVICE); 184 + 185 + if (cmd_dir == ISER_DIR_IN) { 186 + char *mem; 187 + struct scatterlist *sg; 188 + unsigned char *p, *to; 189 + unsigned int sg_size; 190 + int i; 191 + 192 + /* copy back read RDMA to unaligned sg */ 193 + mem = mem_copy->copy_buf; 194 + 195 + sg = (struct scatterlist *)iser_ctask->data[ISER_DIR_IN].buf; 196 + sg_size = iser_ctask->data[ISER_DIR_IN].size; 197 + 198 + for (p = mem, i = 0; i < sg_size; i++){ 199 + to = kmap_atomic(sg[i].page, KM_SOFTIRQ0); 200 + memcpy(to + sg[i].offset, 201 + p, 202 + sg[i].length); 203 + kunmap_atomic(to, KM_SOFTIRQ0); 204 + p += sg[i].length; 205 + } 206 + } 207 + 208 + cmd_data_len = iser_ctask->data[cmd_dir].data_len; 209 + 210 + if (cmd_data_len > ISER_KMALLOC_THRESHOLD) 211 + free_pages((unsigned long)mem_copy->copy_buf, 212 + long_log2(roundup_pow_of_two(cmd_data_len)) - PAGE_SHIFT); 213 + else 214 + kfree(mem_copy->copy_buf); 215 + 216 + mem_copy->copy_buf = NULL; 217 + } 218 + 219 + /** 220 + * iser_sg_to_page_vec - Translates scatterlist entries to physical addresses 221 + * and returns the length of resulting physical address array (may be less than 222 + * the original due to possible compaction). 223 + * 224 + * we build a "page vec" under the assumption that the SG meets the RDMA 225 + * alignment requirements. Other then the first and last SG elements, all 226 + * the "internal" elements can be compacted into a list whose elements are 227 + * dma addresses of physical pages. The code supports also the weird case 228 + * where --few fragments of the same page-- are present in the SG as 229 + * consecutive elements. Also, it handles one entry SG. 230 + */ 231 + static int iser_sg_to_page_vec(struct iser_data_buf *data, 232 + struct iser_page_vec *page_vec) 233 + { 234 + struct scatterlist *sg = (struct scatterlist *)data->buf; 235 + dma_addr_t first_addr, last_addr, page; 236 + int start_aligned, end_aligned; 237 + unsigned int cur_page = 0; 238 + unsigned long total_sz = 0; 239 + int i; 240 + 241 + /* compute the offset of first element */ 242 + page_vec->offset = (u64) sg[0].offset; 243 + 244 + for (i = 0; i < data->dma_nents; i++) { 245 + total_sz += sg_dma_len(&sg[i]); 246 + 247 + first_addr = sg_dma_address(&sg[i]); 248 + last_addr = first_addr + sg_dma_len(&sg[i]); 249 + 250 + start_aligned = !(first_addr & ~PAGE_MASK); 251 + end_aligned = !(last_addr & ~PAGE_MASK); 252 + 253 + /* continue to collect page fragments till aligned or SG ends */ 254 + while (!end_aligned && (i + 1 < data->dma_nents)) { 255 + i++; 256 + total_sz += sg_dma_len(&sg[i]); 257 + last_addr = sg_dma_address(&sg[i]) + sg_dma_len(&sg[i]); 258 + end_aligned = !(last_addr & ~PAGE_MASK); 259 + } 260 + 261 + first_addr = first_addr & PAGE_MASK; 262 + 263 + for (page = first_addr; page < last_addr; page += PAGE_SIZE) 264 + page_vec->pages[cur_page++] = page; 265 + 266 + } 267 + page_vec->data_size = total_sz; 268 + iser_dbg("page_vec->data_size:%d cur_page %d\n", page_vec->data_size,cur_page); 269 + return cur_page; 270 + } 271 + 272 + #define MASK_4K ((1UL << 12) - 1) /* 0xFFF */ 273 + #define IS_4K_ALIGNED(addr) ((((unsigned long)addr) & MASK_4K) == 0) 274 + 275 + /** 276 + * iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned 277 + * for RDMA sub-list of a scatter-gather list of memory buffers, and returns 278 + * the number of entries which are aligned correctly. Supports the case where 279 + * consecutive SG elements are actually fragments of the same physcial page. 280 + */ 281 + static unsigned int iser_data_buf_aligned_len(struct iser_data_buf *data) 282 + { 283 + struct scatterlist *sg; 284 + dma_addr_t end_addr, next_addr; 285 + int i, cnt; 286 + unsigned int ret_len = 0; 287 + 288 + sg = (struct scatterlist *)data->buf; 289 + 290 + for (cnt = 0, i = 0; i < data->dma_nents; i++, cnt++) { 291 + /* iser_dbg("Checking sg iobuf [%d]: phys=0x%08lX " 292 + "offset: %ld sz: %ld\n", i, 293 + (unsigned long)page_to_phys(sg[i].page), 294 + (unsigned long)sg[i].offset, 295 + (unsigned long)sg[i].length); */ 296 + end_addr = sg_dma_address(&sg[i]) + 297 + sg_dma_len(&sg[i]); 298 + /* iser_dbg("Checking sg iobuf end address " 299 + "0x%08lX\n", end_addr); */ 300 + if (i + 1 < data->dma_nents) { 301 + next_addr = sg_dma_address(&sg[i+1]); 302 + /* are i, i+1 fragments of the same page? */ 303 + if (end_addr == next_addr) 304 + continue; 305 + else if (!IS_4K_ALIGNED(end_addr)) { 306 + ret_len = cnt + 1; 307 + break; 308 + } 309 + } 310 + } 311 + if (i == data->dma_nents) 312 + ret_len = cnt; /* loop ended */ 313 + iser_dbg("Found %d aligned entries out of %d in sg:0x%p\n", 314 + ret_len, data->dma_nents, data); 315 + return ret_len; 316 + } 317 + 318 + static void iser_data_buf_dump(struct iser_data_buf *data) 319 + { 320 + struct scatterlist *sg = (struct scatterlist *)data->buf; 321 + int i; 322 + 323 + for (i = 0; i < data->size; i++) 324 + iser_err("sg[%d] dma_addr:0x%lX page:0x%p " 325 + "off:%d sz:%d dma_len:%d\n", 326 + i, (unsigned long)sg_dma_address(&sg[i]), 327 + sg[i].page, sg[i].offset, 328 + sg[i].length,sg_dma_len(&sg[i])); 329 + } 330 + 331 + static void iser_dump_page_vec(struct iser_page_vec *page_vec) 332 + { 333 + int i; 334 + 335 + iser_err("page vec length %d data size %d\n", 336 + page_vec->length, page_vec->data_size); 337 + for (i = 0; i < page_vec->length; i++) 338 + iser_err("%d %lx\n",i,(unsigned long)page_vec->pages[i]); 339 + } 340 + 341 + static void iser_page_vec_build(struct iser_data_buf *data, 342 + struct iser_page_vec *page_vec) 343 + { 344 + int page_vec_len = 0; 345 + 346 + page_vec->length = 0; 347 + page_vec->offset = 0; 348 + 349 + iser_dbg("Translating sg sz: %d\n", data->dma_nents); 350 + page_vec_len = iser_sg_to_page_vec(data,page_vec); 351 + iser_dbg("sg len %d page_vec_len %d\n", data->dma_nents,page_vec_len); 352 + 353 + page_vec->length = page_vec_len; 354 + 355 + if (page_vec_len * PAGE_SIZE < page_vec->data_size) { 356 + iser_err("page_vec too short to hold this SG\n"); 357 + iser_data_buf_dump(data); 358 + iser_dump_page_vec(page_vec); 359 + BUG(); 360 + } 361 + } 362 + 363 + /** 364 + * iser_reg_rdma_mem - Registers memory intended for RDMA, 365 + * obtaining rkey and va 366 + * 367 + * returns 0 on success, errno code on failure 368 + */ 369 + int iser_reg_rdma_mem(struct iscsi_iser_cmd_task *iser_ctask, 370 + enum iser_data_dir cmd_dir) 371 + { 372 + struct iser_conn *ib_conn = iser_ctask->iser_conn->ib_conn; 373 + struct iser_data_buf *mem = &iser_ctask->data[cmd_dir]; 374 + struct iser_regd_buf *regd_buf; 375 + int aligned_len; 376 + int err; 377 + 378 + regd_buf = &iser_ctask->rdma_regd[cmd_dir]; 379 + 380 + aligned_len = iser_data_buf_aligned_len(mem); 381 + if (aligned_len != mem->size) { 382 + iser_err("rdma alignment violation %d/%d aligned\n", 383 + aligned_len, mem->size); 384 + iser_data_buf_dump(mem); 385 + /* allocate copy buf, if we are writing, copy the */ 386 + /* unaligned scatterlist, dma map the copy */ 387 + if (iser_start_rdma_unaligned_sg(iser_ctask, cmd_dir) != 0) 388 + return -ENOMEM; 389 + mem = &iser_ctask->data_copy[cmd_dir]; 390 + } 391 + 392 + iser_page_vec_build(mem, ib_conn->page_vec); 393 + err = iser_reg_page_vec(ib_conn, ib_conn->page_vec, &regd_buf->reg); 394 + if (err) 395 + return err; 396 + 397 + /* take a reference on this regd buf such that it will not be released * 398 + * (eg in send dto completion) before we get the scsi response */ 399 + atomic_inc(&regd_buf->ref_count); 400 + return 0; 401 + }
+827
drivers/infiniband/ulp/iser/iser_verbs.c
··· 1 + /* 2 + * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved. 3 + * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. 4 + * 5 + * This software is available to you under a choice of one of two 6 + * licenses. You may choose to be licensed under the terms of the GNU 7 + * General Public License (GPL) Version 2, available from the file 8 + * COPYING in the main directory of this source tree, or the 9 + * OpenIB.org BSD license below: 10 + * 11 + * Redistribution and use in source and binary forms, with or 12 + * without modification, are permitted provided that the following 13 + * conditions are met: 14 + * 15 + * - Redistributions of source code must retain the above 16 + * copyright notice, this list of conditions and the following 17 + * disclaimer. 18 + * 19 + * - Redistributions in binary form must reproduce the above 20 + * copyright notice, this list of conditions and the following 21 + * disclaimer in the documentation and/or other materials 22 + * provided with the distribution. 23 + * 24 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 + * SOFTWARE. 32 + * 33 + * $Id: iser_verbs.c 7051 2006-05-10 12:29:11Z ogerlitz $ 34 + */ 35 + #include <asm/io.h> 36 + #include <linux/kernel.h> 37 + #include <linux/module.h> 38 + #include <linux/smp_lock.h> 39 + #include <linux/delay.h> 40 + #include <linux/version.h> 41 + 42 + #include "iscsi_iser.h" 43 + 44 + #define ISCSI_ISER_MAX_CONN 8 45 + #define ISER_MAX_CQ_LEN ((ISER_QP_MAX_RECV_DTOS + \ 46 + ISER_QP_MAX_REQ_DTOS) * \ 47 + ISCSI_ISER_MAX_CONN) 48 + 49 + static void iser_cq_tasklet_fn(unsigned long data); 50 + static void iser_cq_callback(struct ib_cq *cq, void *cq_context); 51 + static void iser_comp_error_worker(void *data); 52 + 53 + static void iser_cq_event_callback(struct ib_event *cause, void *context) 54 + { 55 + iser_err("got cq event %d \n", cause->event); 56 + } 57 + 58 + static void iser_qp_event_callback(struct ib_event *cause, void *context) 59 + { 60 + iser_err("got qp event %d\n",cause->event); 61 + } 62 + 63 + /** 64 + * iser_create_device_ib_res - creates Protection Domain (PD), Completion 65 + * Queue (CQ), DMA Memory Region (DMA MR) with the device associated with 66 + * the adapator. 67 + * 68 + * returns 0 on success, -1 on failure 69 + */ 70 + static int iser_create_device_ib_res(struct iser_device *device) 71 + { 72 + device->pd = ib_alloc_pd(device->ib_device); 73 + if (IS_ERR(device->pd)) 74 + goto pd_err; 75 + 76 + device->cq = ib_create_cq(device->ib_device, 77 + iser_cq_callback, 78 + iser_cq_event_callback, 79 + (void *)device, 80 + ISER_MAX_CQ_LEN); 81 + if (IS_ERR(device->cq)) 82 + goto cq_err; 83 + 84 + if (ib_req_notify_cq(device->cq, IB_CQ_NEXT_COMP)) 85 + goto cq_arm_err; 86 + 87 + tasklet_init(&device->cq_tasklet, 88 + iser_cq_tasklet_fn, 89 + (unsigned long)device); 90 + 91 + device->mr = ib_get_dma_mr(device->pd, 92 + IB_ACCESS_LOCAL_WRITE); 93 + if (IS_ERR(device->mr)) 94 + goto dma_mr_err; 95 + 96 + return 0; 97 + 98 + dma_mr_err: 99 + tasklet_kill(&device->cq_tasklet); 100 + cq_arm_err: 101 + ib_destroy_cq(device->cq); 102 + cq_err: 103 + ib_dealloc_pd(device->pd); 104 + pd_err: 105 + iser_err("failed to allocate an IB resource\n"); 106 + return -1; 107 + } 108 + 109 + /** 110 + * iser_free_device_ib_res - destory/dealloc/dereg the DMA MR, 111 + * CQ and PD created with the device associated with the adapator. 112 + */ 113 + static void iser_free_device_ib_res(struct iser_device *device) 114 + { 115 + BUG_ON(device->mr == NULL); 116 + 117 + tasklet_kill(&device->cq_tasklet); 118 + 119 + (void)ib_dereg_mr(device->mr); 120 + (void)ib_destroy_cq(device->cq); 121 + (void)ib_dealloc_pd(device->pd); 122 + 123 + device->mr = NULL; 124 + device->cq = NULL; 125 + device->pd = NULL; 126 + } 127 + 128 + /** 129 + * iser_create_ib_conn_res - Creates FMR pool and Queue-Pair (QP) 130 + * 131 + * returns 0 on success, -1 on failure 132 + */ 133 + static int iser_create_ib_conn_res(struct iser_conn *ib_conn) 134 + { 135 + struct iser_device *device; 136 + struct ib_qp_init_attr init_attr; 137 + int ret; 138 + struct ib_fmr_pool_param params; 139 + 140 + BUG_ON(ib_conn->device == NULL); 141 + 142 + device = ib_conn->device; 143 + 144 + ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) + 145 + (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)), 146 + GFP_KERNEL); 147 + if (!ib_conn->page_vec) { 148 + ret = -ENOMEM; 149 + goto alloc_err; 150 + } 151 + ib_conn->page_vec->pages = (u64 *) (ib_conn->page_vec + 1); 152 + 153 + params.page_shift = PAGE_SHIFT; 154 + /* when the first/last SG element are not start/end * 155 + * page aligned, the map whould be of N+1 pages */ 156 + params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1; 157 + /* make the pool size twice the max number of SCSI commands * 158 + * the ML is expected to queue, watermark for unmap at 50% */ 159 + params.pool_size = ISCSI_XMIT_CMDS_MAX * 2; 160 + params.dirty_watermark = ISCSI_XMIT_CMDS_MAX; 161 + params.cache = 0; 162 + params.flush_function = NULL; 163 + params.access = (IB_ACCESS_LOCAL_WRITE | 164 + IB_ACCESS_REMOTE_WRITE | 165 + IB_ACCESS_REMOTE_READ); 166 + 167 + ib_conn->fmr_pool = ib_create_fmr_pool(device->pd, &params); 168 + if (IS_ERR(ib_conn->fmr_pool)) { 169 + ret = PTR_ERR(ib_conn->fmr_pool); 170 + goto fmr_pool_err; 171 + } 172 + 173 + memset(&init_attr, 0, sizeof init_attr); 174 + 175 + init_attr.event_handler = iser_qp_event_callback; 176 + init_attr.qp_context = (void *)ib_conn; 177 + init_attr.send_cq = device->cq; 178 + init_attr.recv_cq = device->cq; 179 + init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS; 180 + init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; 181 + init_attr.cap.max_send_sge = MAX_REGD_BUF_VECTOR_LEN; 182 + init_attr.cap.max_recv_sge = 2; 183 + init_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 184 + init_attr.qp_type = IB_QPT_RC; 185 + 186 + ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr); 187 + if (ret) 188 + goto qp_err; 189 + 190 + ib_conn->qp = ib_conn->cma_id->qp; 191 + iser_err("setting conn %p cma_id %p: fmr_pool %p qp %p\n", 192 + ib_conn, ib_conn->cma_id, 193 + ib_conn->fmr_pool, ib_conn->cma_id->qp); 194 + return ret; 195 + 196 + qp_err: 197 + (void)ib_destroy_fmr_pool(ib_conn->fmr_pool); 198 + fmr_pool_err: 199 + kfree(ib_conn->page_vec); 200 + alloc_err: 201 + iser_err("unable to alloc mem or create resource, err %d\n", ret); 202 + return ret; 203 + } 204 + 205 + /** 206 + * releases the FMR pool, QP and CMA ID objects, returns 0 on success, 207 + * -1 on failure 208 + */ 209 + static int iser_free_ib_conn_res(struct iser_conn *ib_conn) 210 + { 211 + BUG_ON(ib_conn == NULL); 212 + 213 + iser_err("freeing conn %p cma_id %p fmr pool %p qp %p\n", 214 + ib_conn, ib_conn->cma_id, 215 + ib_conn->fmr_pool, ib_conn->qp); 216 + 217 + /* qp is created only once both addr & route are resolved */ 218 + if (ib_conn->fmr_pool != NULL) 219 + ib_destroy_fmr_pool(ib_conn->fmr_pool); 220 + 221 + if (ib_conn->qp != NULL) 222 + rdma_destroy_qp(ib_conn->cma_id); 223 + 224 + if (ib_conn->cma_id != NULL) 225 + rdma_destroy_id(ib_conn->cma_id); 226 + 227 + ib_conn->fmr_pool = NULL; 228 + ib_conn->qp = NULL; 229 + ib_conn->cma_id = NULL; 230 + kfree(ib_conn->page_vec); 231 + 232 + return 0; 233 + } 234 + 235 + /** 236 + * based on the resolved device node GUID see if there already allocated 237 + * device for this device. If there's no such, create one. 238 + */ 239 + static 240 + struct iser_device *iser_device_find_by_ib_device(struct rdma_cm_id *cma_id) 241 + { 242 + struct list_head *p_list; 243 + struct iser_device *device = NULL; 244 + 245 + mutex_lock(&ig.device_list_mutex); 246 + 247 + p_list = ig.device_list.next; 248 + while (p_list != &ig.device_list) { 249 + device = list_entry(p_list, struct iser_device, ig_list); 250 + /* find if there's a match using the node GUID */ 251 + if (device->ib_device->node_guid == cma_id->device->node_guid) 252 + break; 253 + } 254 + 255 + if (device == NULL) { 256 + device = kzalloc(sizeof *device, GFP_KERNEL); 257 + if (device == NULL) 258 + goto out; 259 + /* assign this device to the device */ 260 + device->ib_device = cma_id->device; 261 + /* init the device and link it into ig device list */ 262 + if (iser_create_device_ib_res(device)) { 263 + kfree(device); 264 + device = NULL; 265 + goto out; 266 + } 267 + list_add(&device->ig_list, &ig.device_list); 268 + } 269 + out: 270 + BUG_ON(device == NULL); 271 + device->refcount++; 272 + mutex_unlock(&ig.device_list_mutex); 273 + return device; 274 + } 275 + 276 + /* if there's no demand for this device, release it */ 277 + static void iser_device_try_release(struct iser_device *device) 278 + { 279 + mutex_lock(&ig.device_list_mutex); 280 + device->refcount--; 281 + iser_err("device %p refcount %d\n",device,device->refcount); 282 + if (!device->refcount) { 283 + iser_free_device_ib_res(device); 284 + list_del(&device->ig_list); 285 + kfree(device); 286 + } 287 + mutex_unlock(&ig.device_list_mutex); 288 + } 289 + 290 + int iser_conn_state_comp(struct iser_conn *ib_conn, 291 + enum iser_ib_conn_state comp) 292 + { 293 + int ret; 294 + 295 + spin_lock_bh(&ib_conn->lock); 296 + ret = (ib_conn->state == comp); 297 + spin_unlock_bh(&ib_conn->lock); 298 + return ret; 299 + } 300 + 301 + static int iser_conn_state_comp_exch(struct iser_conn *ib_conn, 302 + enum iser_ib_conn_state comp, 303 + enum iser_ib_conn_state exch) 304 + { 305 + int ret; 306 + 307 + spin_lock_bh(&ib_conn->lock); 308 + if ((ret = (ib_conn->state == comp))) 309 + ib_conn->state = exch; 310 + spin_unlock_bh(&ib_conn->lock); 311 + return ret; 312 + } 313 + 314 + /** 315 + * triggers start of the disconnect procedures and wait for them to be done 316 + */ 317 + void iser_conn_terminate(struct iser_conn *ib_conn) 318 + { 319 + int err = 0; 320 + 321 + /* change the ib conn state only if the conn is UP, however always call 322 + * rdma_disconnect since this is the only way to cause the CMA to change 323 + * the QP state to ERROR 324 + */ 325 + 326 + iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, ISER_CONN_TERMINATING); 327 + err = rdma_disconnect(ib_conn->cma_id); 328 + if (err) 329 + iser_err("Failed to disconnect, conn: 0x%p err %d\n", 330 + ib_conn,err); 331 + 332 + wait_event_interruptible(ib_conn->wait, 333 + ib_conn->state == ISER_CONN_DOWN); 334 + 335 + iser_conn_release(ib_conn); 336 + } 337 + 338 + static void iser_connect_error(struct rdma_cm_id *cma_id) 339 + { 340 + struct iser_conn *ib_conn; 341 + ib_conn = (struct iser_conn *)cma_id->context; 342 + 343 + ib_conn->state = ISER_CONN_DOWN; 344 + wake_up_interruptible(&ib_conn->wait); 345 + } 346 + 347 + static void iser_addr_handler(struct rdma_cm_id *cma_id) 348 + { 349 + struct iser_device *device; 350 + struct iser_conn *ib_conn; 351 + int ret; 352 + 353 + device = iser_device_find_by_ib_device(cma_id); 354 + ib_conn = (struct iser_conn *)cma_id->context; 355 + ib_conn->device = device; 356 + 357 + ret = rdma_resolve_route(cma_id, 1000); 358 + if (ret) { 359 + iser_err("resolve route failed: %d\n", ret); 360 + iser_connect_error(cma_id); 361 + } 362 + return; 363 + } 364 + 365 + static void iser_route_handler(struct rdma_cm_id *cma_id) 366 + { 367 + struct rdma_conn_param conn_param; 368 + int ret; 369 + 370 + ret = iser_create_ib_conn_res((struct iser_conn *)cma_id->context); 371 + if (ret) 372 + goto failure; 373 + 374 + iser_dbg("path.mtu is %d setting it to %d\n", 375 + cma_id->route.path_rec->mtu, IB_MTU_1024); 376 + 377 + /* we must set the MTU to 1024 as this is what the target is assuming */ 378 + if (cma_id->route.path_rec->mtu > IB_MTU_1024) 379 + cma_id->route.path_rec->mtu = IB_MTU_1024; 380 + 381 + memset(&conn_param, 0, sizeof conn_param); 382 + conn_param.responder_resources = 4; 383 + conn_param.initiator_depth = 1; 384 + conn_param.retry_count = 7; 385 + conn_param.rnr_retry_count = 6; 386 + 387 + ret = rdma_connect(cma_id, &conn_param); 388 + if (ret) { 389 + iser_err("failure connecting: %d\n", ret); 390 + goto failure; 391 + } 392 + 393 + return; 394 + failure: 395 + iser_connect_error(cma_id); 396 + } 397 + 398 + static void iser_connected_handler(struct rdma_cm_id *cma_id) 399 + { 400 + struct iser_conn *ib_conn; 401 + 402 + ib_conn = (struct iser_conn *)cma_id->context; 403 + ib_conn->state = ISER_CONN_UP; 404 + wake_up_interruptible(&ib_conn->wait); 405 + } 406 + 407 + static void iser_disconnected_handler(struct rdma_cm_id *cma_id) 408 + { 409 + struct iser_conn *ib_conn; 410 + 411 + ib_conn = (struct iser_conn *)cma_id->context; 412 + ib_conn->disc_evt_flag = 1; 413 + 414 + /* getting here when the state is UP means that the conn is being * 415 + * terminated asynchronously from the iSCSI layer's perspective. */ 416 + if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, 417 + ISER_CONN_TERMINATING)) 418 + iscsi_conn_failure(ib_conn->iser_conn->iscsi_conn, 419 + ISCSI_ERR_CONN_FAILED); 420 + 421 + /* Complete the termination process if no posts are pending */ 422 + if ((atomic_read(&ib_conn->post_recv_buf_count) == 0) && 423 + (atomic_read(&ib_conn->post_send_buf_count) == 0)) { 424 + ib_conn->state = ISER_CONN_DOWN; 425 + wake_up_interruptible(&ib_conn->wait); 426 + } 427 + } 428 + 429 + static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) 430 + { 431 + int ret = 0; 432 + 433 + iser_err("event %d conn %p id %p\n",event->event,cma_id->context,cma_id); 434 + 435 + switch (event->event) { 436 + case RDMA_CM_EVENT_ADDR_RESOLVED: 437 + iser_addr_handler(cma_id); 438 + break; 439 + case RDMA_CM_EVENT_ROUTE_RESOLVED: 440 + iser_route_handler(cma_id); 441 + break; 442 + case RDMA_CM_EVENT_ESTABLISHED: 443 + iser_connected_handler(cma_id); 444 + break; 445 + case RDMA_CM_EVENT_ADDR_ERROR: 446 + case RDMA_CM_EVENT_ROUTE_ERROR: 447 + case RDMA_CM_EVENT_CONNECT_ERROR: 448 + case RDMA_CM_EVENT_UNREACHABLE: 449 + case RDMA_CM_EVENT_REJECTED: 450 + iser_err("event: %d, error: %d\n", event->event, event->status); 451 + iser_connect_error(cma_id); 452 + break; 453 + case RDMA_CM_EVENT_DISCONNECTED: 454 + iser_disconnected_handler(cma_id); 455 + break; 456 + case RDMA_CM_EVENT_DEVICE_REMOVAL: 457 + BUG(); 458 + break; 459 + case RDMA_CM_EVENT_CONNECT_RESPONSE: 460 + BUG(); 461 + break; 462 + case RDMA_CM_EVENT_CONNECT_REQUEST: 463 + default: 464 + break; 465 + } 466 + return ret; 467 + } 468 + 469 + int iser_conn_init(struct iser_conn **ibconn) 470 + { 471 + struct iser_conn *ib_conn; 472 + 473 + ib_conn = kzalloc(sizeof *ib_conn, GFP_KERNEL); 474 + if (!ib_conn) { 475 + iser_err("can't alloc memory for struct iser_conn\n"); 476 + return -ENOMEM; 477 + } 478 + ib_conn->state = ISER_CONN_INIT; 479 + init_waitqueue_head(&ib_conn->wait); 480 + atomic_set(&ib_conn->post_recv_buf_count, 0); 481 + atomic_set(&ib_conn->post_send_buf_count, 0); 482 + INIT_WORK(&ib_conn->comperror_work, iser_comp_error_worker, 483 + ib_conn); 484 + INIT_LIST_HEAD(&ib_conn->conn_list); 485 + spin_lock_init(&ib_conn->lock); 486 + 487 + *ibconn = ib_conn; 488 + return 0; 489 + } 490 + 491 + /** 492 + * starts the process of connecting to the target 493 + * sleeps untill the connection is established or rejected 494 + */ 495 + int iser_connect(struct iser_conn *ib_conn, 496 + struct sockaddr_in *src_addr, 497 + struct sockaddr_in *dst_addr, 498 + int non_blocking) 499 + { 500 + struct sockaddr *src, *dst; 501 + int err = 0; 502 + 503 + sprintf(ib_conn->name,"%d.%d.%d.%d:%d", 504 + NIPQUAD(dst_addr->sin_addr.s_addr), dst_addr->sin_port); 505 + 506 + /* the device is known only --after-- address resolution */ 507 + ib_conn->device = NULL; 508 + 509 + iser_err("connecting to: %d.%d.%d.%d, port 0x%x\n", 510 + NIPQUAD(dst_addr->sin_addr), dst_addr->sin_port); 511 + 512 + ib_conn->state = ISER_CONN_PENDING; 513 + 514 + ib_conn->cma_id = rdma_create_id(iser_cma_handler, 515 + (void *)ib_conn, 516 + RDMA_PS_TCP); 517 + if (IS_ERR(ib_conn->cma_id)) { 518 + err = PTR_ERR(ib_conn->cma_id); 519 + iser_err("rdma_create_id failed: %d\n", err); 520 + goto id_failure; 521 + } 522 + 523 + src = (struct sockaddr *)src_addr; 524 + dst = (struct sockaddr *)dst_addr; 525 + err = rdma_resolve_addr(ib_conn->cma_id, src, dst, 1000); 526 + if (err) { 527 + iser_err("rdma_resolve_addr failed: %d\n", err); 528 + goto addr_failure; 529 + } 530 + 531 + if (!non_blocking) { 532 + wait_event_interruptible(ib_conn->wait, 533 + (ib_conn->state != ISER_CONN_PENDING)); 534 + 535 + if (ib_conn->state != ISER_CONN_UP) { 536 + err = -EIO; 537 + goto connect_failure; 538 + } 539 + } 540 + 541 + mutex_lock(&ig.connlist_mutex); 542 + list_add(&ib_conn->conn_list, &ig.connlist); 543 + mutex_unlock(&ig.connlist_mutex); 544 + return 0; 545 + 546 + id_failure: 547 + ib_conn->cma_id = NULL; 548 + addr_failure: 549 + ib_conn->state = ISER_CONN_DOWN; 550 + connect_failure: 551 + iser_conn_release(ib_conn); 552 + return err; 553 + } 554 + 555 + /** 556 + * Frees all conn objects and deallocs conn descriptor 557 + */ 558 + void iser_conn_release(struct iser_conn *ib_conn) 559 + { 560 + struct iser_device *device = ib_conn->device; 561 + 562 + BUG_ON(ib_conn->state != ISER_CONN_DOWN); 563 + 564 + mutex_lock(&ig.connlist_mutex); 565 + list_del(&ib_conn->conn_list); 566 + mutex_unlock(&ig.connlist_mutex); 567 + 568 + iser_free_ib_conn_res(ib_conn); 569 + ib_conn->device = NULL; 570 + /* on EVENT_ADDR_ERROR there's no device yet for this conn */ 571 + if (device != NULL) 572 + iser_device_try_release(device); 573 + kfree(ib_conn); 574 + } 575 + 576 + 577 + /** 578 + * iser_reg_page_vec - Register physical memory 579 + * 580 + * returns: 0 on success, errno code on failure 581 + */ 582 + int iser_reg_page_vec(struct iser_conn *ib_conn, 583 + struct iser_page_vec *page_vec, 584 + struct iser_mem_reg *mem_reg) 585 + { 586 + struct ib_pool_fmr *mem; 587 + u64 io_addr; 588 + u64 *page_list; 589 + int status; 590 + 591 + page_list = page_vec->pages; 592 + io_addr = page_list[0]; 593 + 594 + mem = ib_fmr_pool_map_phys(ib_conn->fmr_pool, 595 + page_list, 596 + page_vec->length, 597 + &io_addr); 598 + 599 + if (IS_ERR(mem)) { 600 + status = (int)PTR_ERR(mem); 601 + iser_err("ib_fmr_pool_map_phys failed: %d\n", status); 602 + return status; 603 + } 604 + 605 + mem_reg->lkey = mem->fmr->lkey; 606 + mem_reg->rkey = mem->fmr->rkey; 607 + mem_reg->len = page_vec->length * PAGE_SIZE; 608 + mem_reg->va = io_addr; 609 + mem_reg->mem_h = (void *)mem; 610 + 611 + mem_reg->va += page_vec->offset; 612 + mem_reg->len = page_vec->data_size; 613 + 614 + iser_dbg("PHYSICAL Mem.register, [PHYS p_array: 0x%p, sz: %d, " 615 + "entry[0]: (0x%08lx,%ld)] -> " 616 + "[lkey: 0x%08X mem_h: 0x%p va: 0x%08lX sz: %ld]\n", 617 + page_vec, page_vec->length, 618 + (unsigned long)page_vec->pages[0], 619 + (unsigned long)page_vec->data_size, 620 + (unsigned int)mem_reg->lkey, mem_reg->mem_h, 621 + (unsigned long)mem_reg->va, (unsigned long)mem_reg->len); 622 + return 0; 623 + } 624 + 625 + /** 626 + * Unregister (previosuly registered) memory. 627 + */ 628 + void iser_unreg_mem(struct iser_mem_reg *reg) 629 + { 630 + int ret; 631 + 632 + iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n",reg->mem_h); 633 + 634 + ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h); 635 + if (ret) 636 + iser_err("ib_fmr_pool_unmap failed %d\n", ret); 637 + 638 + reg->mem_h = NULL; 639 + } 640 + 641 + /** 642 + * iser_dto_to_iov - builds IOV from a dto descriptor 643 + */ 644 + static void iser_dto_to_iov(struct iser_dto *dto, struct ib_sge *iov, int iov_len) 645 + { 646 + int i; 647 + struct ib_sge *sge; 648 + struct iser_regd_buf *regd_buf; 649 + 650 + if (dto->regd_vector_len > iov_len) { 651 + iser_err("iov size %d too small for posting dto of len %d\n", 652 + iov_len, dto->regd_vector_len); 653 + BUG(); 654 + } 655 + 656 + for (i = 0; i < dto->regd_vector_len; i++) { 657 + sge = &iov[i]; 658 + regd_buf = dto->regd[i]; 659 + 660 + sge->addr = regd_buf->reg.va; 661 + sge->length = regd_buf->reg.len; 662 + sge->lkey = regd_buf->reg.lkey; 663 + 664 + if (dto->used_sz[i] > 0) /* Adjust size */ 665 + sge->length = dto->used_sz[i]; 666 + 667 + /* offset and length should not exceed the regd buf length */ 668 + if (sge->length + dto->offset[i] > regd_buf->reg.len) { 669 + iser_err("Used len:%ld + offset:%d, exceed reg.buf.len:" 670 + "%ld in dto:0x%p [%d], va:0x%08lX\n", 671 + (unsigned long)sge->length, dto->offset[i], 672 + (unsigned long)regd_buf->reg.len, dto, i, 673 + (unsigned long)sge->addr); 674 + BUG(); 675 + } 676 + 677 + sge->addr += dto->offset[i]; /* Adjust offset */ 678 + } 679 + } 680 + 681 + /** 682 + * iser_post_recv - Posts a receive buffer. 683 + * 684 + * returns 0 on success, -1 on failure 685 + */ 686 + int iser_post_recv(struct iser_desc *rx_desc) 687 + { 688 + int ib_ret, ret_val = 0; 689 + struct ib_recv_wr recv_wr, *recv_wr_failed; 690 + struct ib_sge iov[2]; 691 + struct iser_conn *ib_conn; 692 + struct iser_dto *recv_dto = &rx_desc->dto; 693 + 694 + /* Retrieve conn */ 695 + ib_conn = recv_dto->conn->ib_conn; 696 + 697 + iser_dto_to_iov(recv_dto, iov, 2); 698 + 699 + recv_wr.next = NULL; 700 + recv_wr.sg_list = iov; 701 + recv_wr.num_sge = recv_dto->regd_vector_len; 702 + recv_wr.wr_id = (unsigned long)rx_desc; 703 + 704 + atomic_inc(&ib_conn->post_recv_buf_count); 705 + ib_ret = ib_post_recv(ib_conn->qp, &recv_wr, &recv_wr_failed); 706 + if (ib_ret) { 707 + iser_err("ib_post_recv failed ret=%d\n", ib_ret); 708 + atomic_dec(&ib_conn->post_recv_buf_count); 709 + ret_val = -1; 710 + } 711 + 712 + return ret_val; 713 + } 714 + 715 + /** 716 + * iser_start_send - Initiate a Send DTO operation 717 + * 718 + * returns 0 on success, -1 on failure 719 + */ 720 + int iser_post_send(struct iser_desc *tx_desc) 721 + { 722 + int ib_ret, ret_val = 0; 723 + struct ib_send_wr send_wr, *send_wr_failed; 724 + struct ib_sge iov[MAX_REGD_BUF_VECTOR_LEN]; 725 + struct iser_conn *ib_conn; 726 + struct iser_dto *dto = &tx_desc->dto; 727 + 728 + ib_conn = dto->conn->ib_conn; 729 + 730 + iser_dto_to_iov(dto, iov, MAX_REGD_BUF_VECTOR_LEN); 731 + 732 + send_wr.next = NULL; 733 + send_wr.wr_id = (unsigned long)tx_desc; 734 + send_wr.sg_list = iov; 735 + send_wr.num_sge = dto->regd_vector_len; 736 + send_wr.opcode = IB_WR_SEND; 737 + send_wr.send_flags = dto->notify_enable ? IB_SEND_SIGNALED : 0; 738 + 739 + atomic_inc(&ib_conn->post_send_buf_count); 740 + 741 + ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed); 742 + if (ib_ret) { 743 + iser_err("Failed to start SEND DTO, dto: 0x%p, IOV len: %d\n", 744 + dto, dto->regd_vector_len); 745 + iser_err("ib_post_send failed, ret:%d\n", ib_ret); 746 + atomic_dec(&ib_conn->post_send_buf_count); 747 + ret_val = -1; 748 + } 749 + 750 + return ret_val; 751 + } 752 + 753 + static void iser_comp_error_worker(void *data) 754 + { 755 + struct iser_conn *ib_conn = data; 756 + 757 + /* getting here when the state is UP means that the conn is being * 758 + * terminated asynchronously from the iSCSI layer's perspective. */ 759 + if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, 760 + ISER_CONN_TERMINATING)) 761 + iscsi_conn_failure(ib_conn->iser_conn->iscsi_conn, 762 + ISCSI_ERR_CONN_FAILED); 763 + 764 + /* complete the termination process if disconnect event was delivered * 765 + * note there are no more non completed posts to the QP */ 766 + if (ib_conn->disc_evt_flag) { 767 + ib_conn->state = ISER_CONN_DOWN; 768 + wake_up_interruptible(&ib_conn->wait); 769 + } 770 + } 771 + 772 + static void iser_handle_comp_error(struct iser_desc *desc) 773 + { 774 + struct iser_dto *dto = &desc->dto; 775 + struct iser_conn *ib_conn = dto->conn->ib_conn; 776 + 777 + iser_dto_buffs_release(dto); 778 + 779 + if (desc->type == ISCSI_RX) { 780 + kfree(desc->data); 781 + kmem_cache_free(ig.desc_cache, desc); 782 + atomic_dec(&ib_conn->post_recv_buf_count); 783 + } else { /* type is TX control/command/dataout */ 784 + if (desc->type == ISCSI_TX_DATAOUT) 785 + kmem_cache_free(ig.desc_cache, desc); 786 + atomic_dec(&ib_conn->post_send_buf_count); 787 + } 788 + 789 + if (atomic_read(&ib_conn->post_recv_buf_count) == 0 && 790 + atomic_read(&ib_conn->post_send_buf_count) == 0) 791 + schedule_work(&ib_conn->comperror_work); 792 + } 793 + 794 + static void iser_cq_tasklet_fn(unsigned long data) 795 + { 796 + struct iser_device *device = (struct iser_device *)data; 797 + struct ib_cq *cq = device->cq; 798 + struct ib_wc wc; 799 + struct iser_desc *desc; 800 + unsigned long xfer_len; 801 + 802 + while (ib_poll_cq(cq, 1, &wc) == 1) { 803 + desc = (struct iser_desc *) (unsigned long) wc.wr_id; 804 + BUG_ON(desc == NULL); 805 + 806 + if (wc.status == IB_WC_SUCCESS) { 807 + if (desc->type == ISCSI_RX) { 808 + xfer_len = (unsigned long)wc.byte_len; 809 + iser_rcv_completion(desc, xfer_len); 810 + } else /* type == ISCSI_TX_CONTROL/SCSI_CMD/DOUT */ 811 + iser_snd_completion(desc); 812 + } else { 813 + iser_err("comp w. error op %d status %d\n",desc->type,wc.status); 814 + iser_handle_comp_error(desc); 815 + } 816 + } 817 + /* #warning "it is assumed here that arming CQ only once its empty" * 818 + * " would not cause interrupts to be missed" */ 819 + ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); 820 + } 821 + 822 + static void iser_cq_callback(struct ib_cq *cq, void *cq_context) 823 + { 824 + struct iser_device *device = (struct iser_device *)cq_context; 825 + 826 + tasklet_schedule(&device->cq_tasklet); 827 + }
+1
drivers/scsi/Makefile
··· 34 34 obj-$(CONFIG_SCSI_SAS_ATTRS) += scsi_transport_sas.o 35 35 36 36 obj-$(CONFIG_ISCSI_TCP) += libiscsi.o iscsi_tcp.o 37 + obj-$(CONFIG_INFINIBAND_ISER) += libiscsi.o 37 38 obj-$(CONFIG_SCSI_AMIGA7XX) += amiga7xx.o 53c7xx.o 38 39 obj-$(CONFIG_A3000_SCSI) += a3000.o wd33c93.o 39 40 obj-$(CONFIG_A2091_SCSI) += a2091.o wd33c93.o