Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/*
2 * Copyright (c) 2005 Cisco Systems. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
34
35#include <linux/module.h>
36#include <linux/init.h>
37#include <linux/slab.h>
38#include <linux/err.h>
39#include <linux/string.h>
40#include <linux/parser.h>
41#include <linux/random.h>
42#include <linux/jiffies.h>
43#include <linux/lockdep.h>
44#include <linux/inet.h>
45#include <rdma/ib_cache.h>
46
47#include <linux/atomic.h>
48
49#include <scsi/scsi.h>
50#include <scsi/scsi_device.h>
51#include <scsi/scsi_dbg.h>
52#include <scsi/scsi_tcq.h>
53#include <scsi/srp.h>
54#include <scsi/scsi_transport_srp.h>
55
56#include "ib_srp.h"
57
58#define DRV_NAME "ib_srp"
59#define PFX DRV_NAME ": "
60
61MODULE_AUTHOR("Roland Dreier");
62MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator");
63MODULE_LICENSE("Dual BSD/GPL");
64
65#if !defined(CONFIG_DYNAMIC_DEBUG)
66#define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt)
67#define DYNAMIC_DEBUG_BRANCH(descriptor) false
68#endif
69
70static unsigned int srp_sg_tablesize;
71static unsigned int cmd_sg_entries;
72static unsigned int indirect_sg_entries;
73static bool allow_ext_sg;
74static bool prefer_fr = true;
75static bool register_always = true;
76static bool never_register;
77static int topspin_workarounds = 1;
78
79module_param(srp_sg_tablesize, uint, 0444);
80MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries");
81
82module_param(cmd_sg_entries, uint, 0444);
83MODULE_PARM_DESC(cmd_sg_entries,
84 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
85
86module_param(indirect_sg_entries, uint, 0444);
87MODULE_PARM_DESC(indirect_sg_entries,
88 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SG_MAX_SEGMENTS) ")");
89
90module_param(allow_ext_sg, bool, 0444);
91MODULE_PARM_DESC(allow_ext_sg,
92 "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");
93
94module_param(topspin_workarounds, int, 0444);
95MODULE_PARM_DESC(topspin_workarounds,
96 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
97
98module_param(prefer_fr, bool, 0444);
99MODULE_PARM_DESC(prefer_fr,
100"Whether to use fast registration if both FMR and fast registration are supported");
101
102module_param(register_always, bool, 0444);
103MODULE_PARM_DESC(register_always,
104 "Use memory registration even for contiguous memory regions");
105
106module_param(never_register, bool, 0444);
107MODULE_PARM_DESC(never_register, "Never register memory");
108
109static const struct kernel_param_ops srp_tmo_ops;
110
111static int srp_reconnect_delay = 10;
112module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
113 S_IRUGO | S_IWUSR);
114MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
115
116static int srp_fast_io_fail_tmo = 15;
117module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
118 S_IRUGO | S_IWUSR);
119MODULE_PARM_DESC(fast_io_fail_tmo,
120 "Number of seconds between the observation of a transport"
121 " layer error and failing all I/O. \"off\" means that this"
122 " functionality is disabled.");
123
124static int srp_dev_loss_tmo = 600;
125module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
126 S_IRUGO | S_IWUSR);
127MODULE_PARM_DESC(dev_loss_tmo,
128 "Maximum number of seconds that the SRP transport should"
129 " insulate transport layer errors. After this time has been"
130 " exceeded the SCSI host is removed. Should be"
131 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
132 " if fast_io_fail_tmo has not been set. \"off\" means that"
133 " this functionality is disabled.");
134
135static bool srp_use_imm_data = true;
136module_param_named(use_imm_data, srp_use_imm_data, bool, 0644);
137MODULE_PARM_DESC(use_imm_data,
138 "Whether or not to request permission to use immediate data during SRP login.");
139
140static unsigned int srp_max_imm_data = 8 * 1024;
141module_param_named(max_imm_data, srp_max_imm_data, uint, 0644);
142MODULE_PARM_DESC(max_imm_data, "Maximum immediate data size.");
143
144static unsigned ch_count;
145module_param(ch_count, uint, 0444);
146MODULE_PARM_DESC(ch_count,
147 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");
148
149static void srp_add_one(struct ib_device *device);
150static void srp_remove_one(struct ib_device *device, void *client_data);
151static void srp_rename_dev(struct ib_device *device, void *client_data);
152static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc);
153static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
154 const char *opname);
155static int srp_ib_cm_handler(struct ib_cm_id *cm_id,
156 const struct ib_cm_event *event);
157static int srp_rdma_cm_handler(struct rdma_cm_id *cm_id,
158 struct rdma_cm_event *event);
159
160static struct scsi_transport_template *ib_srp_transport_template;
161static struct workqueue_struct *srp_remove_wq;
162
163static struct ib_client srp_client = {
164 .name = "srp",
165 .add = srp_add_one,
166 .remove = srp_remove_one,
167 .rename = srp_rename_dev
168};
169
170static struct ib_sa_client srp_sa_client;
171
172static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
173{
174 int tmo = *(int *)kp->arg;
175
176 if (tmo >= 0)
177 return sprintf(buffer, "%d\n", tmo);
178 else
179 return sprintf(buffer, "off\n");
180}
181
182static int srp_tmo_set(const char *val, const struct kernel_param *kp)
183{
184 int tmo, res;
185
186 res = srp_parse_tmo(&tmo, val);
187 if (res)
188 goto out;
189
190 if (kp->arg == &srp_reconnect_delay)
191 res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
192 srp_dev_loss_tmo);
193 else if (kp->arg == &srp_fast_io_fail_tmo)
194 res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
195 else
196 res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
197 tmo);
198 if (res)
199 goto out;
200 *(int *)kp->arg = tmo;
201
202out:
203 return res;
204}
205
206static const struct kernel_param_ops srp_tmo_ops = {
207 .get = srp_tmo_get,
208 .set = srp_tmo_set,
209};
210
211static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
212{
213 return (struct srp_target_port *) host->hostdata;
214}
215
216static const char *srp_target_info(struct Scsi_Host *host)
217{
218 return host_to_target(host)->target_name;
219}
220
221static int srp_target_is_topspin(struct srp_target_port *target)
222{
223 static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad };
224 static const u8 cisco_oui[3] = { 0x00, 0x1b, 0x0d };
225
226 return topspin_workarounds &&
227 (!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) ||
228 !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui));
229}
230
231static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,
232 gfp_t gfp_mask,
233 enum dma_data_direction direction)
234{
235 struct srp_iu *iu;
236
237 iu = kmalloc(sizeof *iu, gfp_mask);
238 if (!iu)
239 goto out;
240
241 iu->buf = kzalloc(size, gfp_mask);
242 if (!iu->buf)
243 goto out_free_iu;
244
245 iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size,
246 direction);
247 if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma))
248 goto out_free_buf;
249
250 iu->size = size;
251 iu->direction = direction;
252
253 return iu;
254
255out_free_buf:
256 kfree(iu->buf);
257out_free_iu:
258 kfree(iu);
259out:
260 return NULL;
261}
262
263static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
264{
265 if (!iu)
266 return;
267
268 ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size,
269 iu->direction);
270 kfree(iu->buf);
271 kfree(iu);
272}
273
274static void srp_qp_event(struct ib_event *event, void *context)
275{
276 pr_debug("QP event %s (%d)\n",
277 ib_event_msg(event->event), event->event);
278}
279
280static int srp_init_ib_qp(struct srp_target_port *target,
281 struct ib_qp *qp)
282{
283 struct ib_qp_attr *attr;
284 int ret;
285
286 attr = kmalloc(sizeof *attr, GFP_KERNEL);
287 if (!attr)
288 return -ENOMEM;
289
290 ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
291 target->srp_host->port,
292 be16_to_cpu(target->ib_cm.pkey),
293 &attr->pkey_index);
294 if (ret)
295 goto out;
296
297 attr->qp_state = IB_QPS_INIT;
298 attr->qp_access_flags = (IB_ACCESS_REMOTE_READ |
299 IB_ACCESS_REMOTE_WRITE);
300 attr->port_num = target->srp_host->port;
301
302 ret = ib_modify_qp(qp, attr,
303 IB_QP_STATE |
304 IB_QP_PKEY_INDEX |
305 IB_QP_ACCESS_FLAGS |
306 IB_QP_PORT);
307
308out:
309 kfree(attr);
310 return ret;
311}
312
313static int srp_new_ib_cm_id(struct srp_rdma_ch *ch)
314{
315 struct srp_target_port *target = ch->target;
316 struct ib_cm_id *new_cm_id;
317
318 new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev,
319 srp_ib_cm_handler, ch);
320 if (IS_ERR(new_cm_id))
321 return PTR_ERR(new_cm_id);
322
323 if (ch->ib_cm.cm_id)
324 ib_destroy_cm_id(ch->ib_cm.cm_id);
325 ch->ib_cm.cm_id = new_cm_id;
326 if (rdma_cap_opa_ah(target->srp_host->srp_dev->dev,
327 target->srp_host->port))
328 ch->ib_cm.path.rec_type = SA_PATH_REC_TYPE_OPA;
329 else
330 ch->ib_cm.path.rec_type = SA_PATH_REC_TYPE_IB;
331 ch->ib_cm.path.sgid = target->sgid;
332 ch->ib_cm.path.dgid = target->ib_cm.orig_dgid;
333 ch->ib_cm.path.pkey = target->ib_cm.pkey;
334 ch->ib_cm.path.service_id = target->ib_cm.service_id;
335
336 return 0;
337}
338
339static int srp_new_rdma_cm_id(struct srp_rdma_ch *ch)
340{
341 struct srp_target_port *target = ch->target;
342 struct rdma_cm_id *new_cm_id;
343 int ret;
344
345 new_cm_id = rdma_create_id(target->net, srp_rdma_cm_handler, ch,
346 RDMA_PS_TCP, IB_QPT_RC);
347 if (IS_ERR(new_cm_id)) {
348 ret = PTR_ERR(new_cm_id);
349 new_cm_id = NULL;
350 goto out;
351 }
352
353 init_completion(&ch->done);
354 ret = rdma_resolve_addr(new_cm_id, target->rdma_cm.src_specified ?
355 &target->rdma_cm.src.sa : NULL,
356 &target->rdma_cm.dst.sa,
357 SRP_PATH_REC_TIMEOUT_MS);
358 if (ret) {
359 pr_err("No route available from %pISpsc to %pISpsc (%d)\n",
360 &target->rdma_cm.src, &target->rdma_cm.dst, ret);
361 goto out;
362 }
363 ret = wait_for_completion_interruptible(&ch->done);
364 if (ret < 0)
365 goto out;
366
367 ret = ch->status;
368 if (ret) {
369 pr_err("Resolving address %pISpsc failed (%d)\n",
370 &target->rdma_cm.dst, ret);
371 goto out;
372 }
373
374 swap(ch->rdma_cm.cm_id, new_cm_id);
375
376out:
377 if (new_cm_id)
378 rdma_destroy_id(new_cm_id);
379
380 return ret;
381}
382
383static int srp_new_cm_id(struct srp_rdma_ch *ch)
384{
385 struct srp_target_port *target = ch->target;
386
387 return target->using_rdma_cm ? srp_new_rdma_cm_id(ch) :
388 srp_new_ib_cm_id(ch);
389}
390
391static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
392{
393 struct srp_device *dev = target->srp_host->srp_dev;
394 struct ib_fmr_pool_param fmr_param;
395
396 memset(&fmr_param, 0, sizeof(fmr_param));
397 fmr_param.pool_size = target->mr_pool_size;
398 fmr_param.dirty_watermark = fmr_param.pool_size / 4;
399 fmr_param.cache = 1;
400 fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
401 fmr_param.page_shift = ilog2(dev->mr_page_size);
402 fmr_param.access = (IB_ACCESS_LOCAL_WRITE |
403 IB_ACCESS_REMOTE_WRITE |
404 IB_ACCESS_REMOTE_READ);
405
406 return ib_create_fmr_pool(dev->pd, &fmr_param);
407}
408
409/**
410 * srp_destroy_fr_pool() - free the resources owned by a pool
411 * @pool: Fast registration pool to be destroyed.
412 */
413static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
414{
415 int i;
416 struct srp_fr_desc *d;
417
418 if (!pool)
419 return;
420
421 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
422 if (d->mr)
423 ib_dereg_mr(d->mr);
424 }
425 kfree(pool);
426}
427
428/**
429 * srp_create_fr_pool() - allocate and initialize a pool for fast registration
430 * @device: IB device to allocate fast registration descriptors for.
431 * @pd: Protection domain associated with the FR descriptors.
432 * @pool_size: Number of descriptors to allocate.
433 * @max_page_list_len: Maximum fast registration work request page list length.
434 */
435static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
436 struct ib_pd *pd, int pool_size,
437 int max_page_list_len)
438{
439 struct srp_fr_pool *pool;
440 struct srp_fr_desc *d;
441 struct ib_mr *mr;
442 int i, ret = -EINVAL;
443 enum ib_mr_type mr_type;
444
445 if (pool_size <= 0)
446 goto err;
447 ret = -ENOMEM;
448 pool = kzalloc(struct_size(pool, desc, pool_size), GFP_KERNEL);
449 if (!pool)
450 goto err;
451 pool->size = pool_size;
452 pool->max_page_list_len = max_page_list_len;
453 spin_lock_init(&pool->lock);
454 INIT_LIST_HEAD(&pool->free_list);
455
456 if (device->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)
457 mr_type = IB_MR_TYPE_SG_GAPS;
458 else
459 mr_type = IB_MR_TYPE_MEM_REG;
460
461 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
462 mr = ib_alloc_mr(pd, mr_type, max_page_list_len);
463 if (IS_ERR(mr)) {
464 ret = PTR_ERR(mr);
465 if (ret == -ENOMEM)
466 pr_info("%s: ib_alloc_mr() failed. Try to reduce max_cmd_per_lun, max_sect or ch_count\n",
467 dev_name(&device->dev));
468 goto destroy_pool;
469 }
470 d->mr = mr;
471 list_add_tail(&d->entry, &pool->free_list);
472 }
473
474out:
475 return pool;
476
477destroy_pool:
478 srp_destroy_fr_pool(pool);
479
480err:
481 pool = ERR_PTR(ret);
482 goto out;
483}
484
485/**
486 * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
487 * @pool: Pool to obtain descriptor from.
488 */
489static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
490{
491 struct srp_fr_desc *d = NULL;
492 unsigned long flags;
493
494 spin_lock_irqsave(&pool->lock, flags);
495 if (!list_empty(&pool->free_list)) {
496 d = list_first_entry(&pool->free_list, typeof(*d), entry);
497 list_del(&d->entry);
498 }
499 spin_unlock_irqrestore(&pool->lock, flags);
500
501 return d;
502}
503
504/**
505 * srp_fr_pool_put() - put an FR descriptor back in the free list
506 * @pool: Pool the descriptor was allocated from.
507 * @desc: Pointer to an array of fast registration descriptor pointers.
508 * @n: Number of descriptors to put back.
509 *
510 * Note: The caller must already have queued an invalidation request for
511 * desc->mr->rkey before calling this function.
512 */
513static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
514 int n)
515{
516 unsigned long flags;
517 int i;
518
519 spin_lock_irqsave(&pool->lock, flags);
520 for (i = 0; i < n; i++)
521 list_add(&desc[i]->entry, &pool->free_list);
522 spin_unlock_irqrestore(&pool->lock, flags);
523}
524
525static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
526{
527 struct srp_device *dev = target->srp_host->srp_dev;
528
529 return srp_create_fr_pool(dev->dev, dev->pd, target->mr_pool_size,
530 dev->max_pages_per_mr);
531}
532
533/**
534 * srp_destroy_qp() - destroy an RDMA queue pair
535 * @ch: SRP RDMA channel.
536 *
537 * Drain the qp before destroying it. This avoids that the receive
538 * completion handler can access the queue pair while it is
539 * being destroyed.
540 */
541static void srp_destroy_qp(struct srp_rdma_ch *ch)
542{
543 spin_lock_irq(&ch->lock);
544 ib_process_cq_direct(ch->send_cq, -1);
545 spin_unlock_irq(&ch->lock);
546
547 ib_drain_qp(ch->qp);
548 ib_destroy_qp(ch->qp);
549}
550
551static int srp_create_ch_ib(struct srp_rdma_ch *ch)
552{
553 struct srp_target_port *target = ch->target;
554 struct srp_device *dev = target->srp_host->srp_dev;
555 const struct ib_device_attr *attr = &dev->dev->attrs;
556 struct ib_qp_init_attr *init_attr;
557 struct ib_cq *recv_cq, *send_cq;
558 struct ib_qp *qp;
559 struct ib_fmr_pool *fmr_pool = NULL;
560 struct srp_fr_pool *fr_pool = NULL;
561 const int m = 1 + dev->use_fast_reg * target->mr_per_cmd * 2;
562 int ret;
563
564 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
565 if (!init_attr)
566 return -ENOMEM;
567
568 /* queue_size + 1 for ib_drain_rq() */
569 recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1,
570 ch->comp_vector, IB_POLL_SOFTIRQ);
571 if (IS_ERR(recv_cq)) {
572 ret = PTR_ERR(recv_cq);
573 goto err;
574 }
575
576 send_cq = ib_alloc_cq(dev->dev, ch, m * target->queue_size,
577 ch->comp_vector, IB_POLL_DIRECT);
578 if (IS_ERR(send_cq)) {
579 ret = PTR_ERR(send_cq);
580 goto err_recv_cq;
581 }
582
583 init_attr->event_handler = srp_qp_event;
584 init_attr->cap.max_send_wr = m * target->queue_size;
585 init_attr->cap.max_recv_wr = target->queue_size + 1;
586 init_attr->cap.max_recv_sge = 1;
587 init_attr->cap.max_send_sge = min(SRP_MAX_SGE, attr->max_send_sge);
588 init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
589 init_attr->qp_type = IB_QPT_RC;
590 init_attr->send_cq = send_cq;
591 init_attr->recv_cq = recv_cq;
592
593 ch->max_imm_sge = min(init_attr->cap.max_send_sge - 1U, 255U);
594
595 if (target->using_rdma_cm) {
596 ret = rdma_create_qp(ch->rdma_cm.cm_id, dev->pd, init_attr);
597 qp = ch->rdma_cm.cm_id->qp;
598 } else {
599 qp = ib_create_qp(dev->pd, init_attr);
600 if (!IS_ERR(qp)) {
601 ret = srp_init_ib_qp(target, qp);
602 if (ret)
603 ib_destroy_qp(qp);
604 } else {
605 ret = PTR_ERR(qp);
606 }
607 }
608 if (ret) {
609 pr_err("QP creation failed for dev %s: %d\n",
610 dev_name(&dev->dev->dev), ret);
611 goto err_send_cq;
612 }
613
614 if (dev->use_fast_reg) {
615 fr_pool = srp_alloc_fr_pool(target);
616 if (IS_ERR(fr_pool)) {
617 ret = PTR_ERR(fr_pool);
618 shost_printk(KERN_WARNING, target->scsi_host, PFX
619 "FR pool allocation failed (%d)\n", ret);
620 goto err_qp;
621 }
622 } else if (dev->use_fmr) {
623 fmr_pool = srp_alloc_fmr_pool(target);
624 if (IS_ERR(fmr_pool)) {
625 ret = PTR_ERR(fmr_pool);
626 shost_printk(KERN_WARNING, target->scsi_host, PFX
627 "FMR pool allocation failed (%d)\n", ret);
628 goto err_qp;
629 }
630 }
631
632 if (ch->qp)
633 srp_destroy_qp(ch);
634 if (ch->recv_cq)
635 ib_free_cq(ch->recv_cq);
636 if (ch->send_cq)
637 ib_free_cq(ch->send_cq);
638
639 ch->qp = qp;
640 ch->recv_cq = recv_cq;
641 ch->send_cq = send_cq;
642
643 if (dev->use_fast_reg) {
644 if (ch->fr_pool)
645 srp_destroy_fr_pool(ch->fr_pool);
646 ch->fr_pool = fr_pool;
647 } else if (dev->use_fmr) {
648 if (ch->fmr_pool)
649 ib_destroy_fmr_pool(ch->fmr_pool);
650 ch->fmr_pool = fmr_pool;
651 }
652
653 kfree(init_attr);
654 return 0;
655
656err_qp:
657 if (target->using_rdma_cm)
658 rdma_destroy_qp(ch->rdma_cm.cm_id);
659 else
660 ib_destroy_qp(qp);
661
662err_send_cq:
663 ib_free_cq(send_cq);
664
665err_recv_cq:
666 ib_free_cq(recv_cq);
667
668err:
669 kfree(init_attr);
670 return ret;
671}
672
673/*
674 * Note: this function may be called without srp_alloc_iu_bufs() having been
675 * invoked. Hence the ch->[rt]x_ring checks.
676 */
677static void srp_free_ch_ib(struct srp_target_port *target,
678 struct srp_rdma_ch *ch)
679{
680 struct srp_device *dev = target->srp_host->srp_dev;
681 int i;
682
683 if (!ch->target)
684 return;
685
686 if (target->using_rdma_cm) {
687 if (ch->rdma_cm.cm_id) {
688 rdma_destroy_id(ch->rdma_cm.cm_id);
689 ch->rdma_cm.cm_id = NULL;
690 }
691 } else {
692 if (ch->ib_cm.cm_id) {
693 ib_destroy_cm_id(ch->ib_cm.cm_id);
694 ch->ib_cm.cm_id = NULL;
695 }
696 }
697
698 /* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
699 if (!ch->qp)
700 return;
701
702 if (dev->use_fast_reg) {
703 if (ch->fr_pool)
704 srp_destroy_fr_pool(ch->fr_pool);
705 } else if (dev->use_fmr) {
706 if (ch->fmr_pool)
707 ib_destroy_fmr_pool(ch->fmr_pool);
708 }
709
710 srp_destroy_qp(ch);
711 ib_free_cq(ch->send_cq);
712 ib_free_cq(ch->recv_cq);
713
714 /*
715 * Avoid that the SCSI error handler tries to use this channel after
716 * it has been freed. The SCSI error handler can namely continue
717 * trying to perform recovery actions after scsi_remove_host()
718 * returned.
719 */
720 ch->target = NULL;
721
722 ch->qp = NULL;
723 ch->send_cq = ch->recv_cq = NULL;
724
725 if (ch->rx_ring) {
726 for (i = 0; i < target->queue_size; ++i)
727 srp_free_iu(target->srp_host, ch->rx_ring[i]);
728 kfree(ch->rx_ring);
729 ch->rx_ring = NULL;
730 }
731 if (ch->tx_ring) {
732 for (i = 0; i < target->queue_size; ++i)
733 srp_free_iu(target->srp_host, ch->tx_ring[i]);
734 kfree(ch->tx_ring);
735 ch->tx_ring = NULL;
736 }
737}
738
739static void srp_path_rec_completion(int status,
740 struct sa_path_rec *pathrec,
741 void *ch_ptr)
742{
743 struct srp_rdma_ch *ch = ch_ptr;
744 struct srp_target_port *target = ch->target;
745
746 ch->status = status;
747 if (status)
748 shost_printk(KERN_ERR, target->scsi_host,
749 PFX "Got failed path rec status %d\n", status);
750 else
751 ch->ib_cm.path = *pathrec;
752 complete(&ch->done);
753}
754
755static int srp_ib_lookup_path(struct srp_rdma_ch *ch)
756{
757 struct srp_target_port *target = ch->target;
758 int ret;
759
760 ch->ib_cm.path.numb_path = 1;
761
762 init_completion(&ch->done);
763
764 ch->ib_cm.path_query_id = ib_sa_path_rec_get(&srp_sa_client,
765 target->srp_host->srp_dev->dev,
766 target->srp_host->port,
767 &ch->ib_cm.path,
768 IB_SA_PATH_REC_SERVICE_ID |
769 IB_SA_PATH_REC_DGID |
770 IB_SA_PATH_REC_SGID |
771 IB_SA_PATH_REC_NUMB_PATH |
772 IB_SA_PATH_REC_PKEY,
773 SRP_PATH_REC_TIMEOUT_MS,
774 GFP_KERNEL,
775 srp_path_rec_completion,
776 ch, &ch->ib_cm.path_query);
777 if (ch->ib_cm.path_query_id < 0)
778 return ch->ib_cm.path_query_id;
779
780 ret = wait_for_completion_interruptible(&ch->done);
781 if (ret < 0)
782 return ret;
783
784 if (ch->status < 0)
785 shost_printk(KERN_WARNING, target->scsi_host,
786 PFX "Path record query failed: sgid %pI6, dgid %pI6, pkey %#04x, service_id %#16llx\n",
787 ch->ib_cm.path.sgid.raw, ch->ib_cm.path.dgid.raw,
788 be16_to_cpu(target->ib_cm.pkey),
789 be64_to_cpu(target->ib_cm.service_id));
790
791 return ch->status;
792}
793
794static int srp_rdma_lookup_path(struct srp_rdma_ch *ch)
795{
796 struct srp_target_port *target = ch->target;
797 int ret;
798
799 init_completion(&ch->done);
800
801 ret = rdma_resolve_route(ch->rdma_cm.cm_id, SRP_PATH_REC_TIMEOUT_MS);
802 if (ret)
803 return ret;
804
805 wait_for_completion_interruptible(&ch->done);
806
807 if (ch->status != 0)
808 shost_printk(KERN_WARNING, target->scsi_host,
809 PFX "Path resolution failed\n");
810
811 return ch->status;
812}
813
814static int srp_lookup_path(struct srp_rdma_ch *ch)
815{
816 struct srp_target_port *target = ch->target;
817
818 return target->using_rdma_cm ? srp_rdma_lookup_path(ch) :
819 srp_ib_lookup_path(ch);
820}
821
822static u8 srp_get_subnet_timeout(struct srp_host *host)
823{
824 struct ib_port_attr attr;
825 int ret;
826 u8 subnet_timeout = 18;
827
828 ret = ib_query_port(host->srp_dev->dev, host->port, &attr);
829 if (ret == 0)
830 subnet_timeout = attr.subnet_timeout;
831
832 if (unlikely(subnet_timeout < 15))
833 pr_warn("%s: subnet timeout %d may cause SRP login to fail.\n",
834 dev_name(&host->srp_dev->dev->dev), subnet_timeout);
835
836 return subnet_timeout;
837}
838
839static int srp_send_req(struct srp_rdma_ch *ch, uint32_t max_iu_len,
840 bool multich)
841{
842 struct srp_target_port *target = ch->target;
843 struct {
844 struct rdma_conn_param rdma_param;
845 struct srp_login_req_rdma rdma_req;
846 struct ib_cm_req_param ib_param;
847 struct srp_login_req ib_req;
848 } *req = NULL;
849 char *ipi, *tpi;
850 int status;
851
852 req = kzalloc(sizeof *req, GFP_KERNEL);
853 if (!req)
854 return -ENOMEM;
855
856 req->ib_param.flow_control = 1;
857 req->ib_param.retry_count = target->tl_retry_count;
858
859 /*
860 * Pick some arbitrary defaults here; we could make these
861 * module parameters if anyone cared about setting them.
862 */
863 req->ib_param.responder_resources = 4;
864 req->ib_param.rnr_retry_count = 7;
865 req->ib_param.max_cm_retries = 15;
866
867 req->ib_req.opcode = SRP_LOGIN_REQ;
868 req->ib_req.tag = 0;
869 req->ib_req.req_it_iu_len = cpu_to_be32(max_iu_len);
870 req->ib_req.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
871 SRP_BUF_FORMAT_INDIRECT);
872 req->ib_req.req_flags = (multich ? SRP_MULTICHAN_MULTI :
873 SRP_MULTICHAN_SINGLE);
874 if (srp_use_imm_data) {
875 req->ib_req.req_flags |= SRP_IMMED_REQUESTED;
876 req->ib_req.imm_data_offset = cpu_to_be16(SRP_IMM_DATA_OFFSET);
877 }
878
879 if (target->using_rdma_cm) {
880 req->rdma_param.flow_control = req->ib_param.flow_control;
881 req->rdma_param.responder_resources =
882 req->ib_param.responder_resources;
883 req->rdma_param.initiator_depth = req->ib_param.initiator_depth;
884 req->rdma_param.retry_count = req->ib_param.retry_count;
885 req->rdma_param.rnr_retry_count = req->ib_param.rnr_retry_count;
886 req->rdma_param.private_data = &req->rdma_req;
887 req->rdma_param.private_data_len = sizeof(req->rdma_req);
888
889 req->rdma_req.opcode = req->ib_req.opcode;
890 req->rdma_req.tag = req->ib_req.tag;
891 req->rdma_req.req_it_iu_len = req->ib_req.req_it_iu_len;
892 req->rdma_req.req_buf_fmt = req->ib_req.req_buf_fmt;
893 req->rdma_req.req_flags = req->ib_req.req_flags;
894 req->rdma_req.imm_data_offset = req->ib_req.imm_data_offset;
895
896 ipi = req->rdma_req.initiator_port_id;
897 tpi = req->rdma_req.target_port_id;
898 } else {
899 u8 subnet_timeout;
900
901 subnet_timeout = srp_get_subnet_timeout(target->srp_host);
902
903 req->ib_param.primary_path = &ch->ib_cm.path;
904 req->ib_param.alternate_path = NULL;
905 req->ib_param.service_id = target->ib_cm.service_id;
906 get_random_bytes(&req->ib_param.starting_psn, 4);
907 req->ib_param.starting_psn &= 0xffffff;
908 req->ib_param.qp_num = ch->qp->qp_num;
909 req->ib_param.qp_type = ch->qp->qp_type;
910 req->ib_param.local_cm_response_timeout = subnet_timeout + 2;
911 req->ib_param.remote_cm_response_timeout = subnet_timeout + 2;
912 req->ib_param.private_data = &req->ib_req;
913 req->ib_param.private_data_len = sizeof(req->ib_req);
914
915 ipi = req->ib_req.initiator_port_id;
916 tpi = req->ib_req.target_port_id;
917 }
918
919 /*
920 * In the published SRP specification (draft rev. 16a), the
921 * port identifier format is 8 bytes of ID extension followed
922 * by 8 bytes of GUID. Older drafts put the two halves in the
923 * opposite order, so that the GUID comes first.
924 *
925 * Targets conforming to these obsolete drafts can be
926 * recognized by the I/O Class they report.
927 */
928 if (target->io_class == SRP_REV10_IB_IO_CLASS) {
929 memcpy(ipi, &target->sgid.global.interface_id, 8);
930 memcpy(ipi + 8, &target->initiator_ext, 8);
931 memcpy(tpi, &target->ioc_guid, 8);
932 memcpy(tpi + 8, &target->id_ext, 8);
933 } else {
934 memcpy(ipi, &target->initiator_ext, 8);
935 memcpy(ipi + 8, &target->sgid.global.interface_id, 8);
936 memcpy(tpi, &target->id_ext, 8);
937 memcpy(tpi + 8, &target->ioc_guid, 8);
938 }
939
940 /*
941 * Topspin/Cisco SRP targets will reject our login unless we
942 * zero out the first 8 bytes of our initiator port ID and set
943 * the second 8 bytes to the local node GUID.
944 */
945 if (srp_target_is_topspin(target)) {
946 shost_printk(KERN_DEBUG, target->scsi_host,
947 PFX "Topspin/Cisco initiator port ID workaround "
948 "activated for target GUID %016llx\n",
949 be64_to_cpu(target->ioc_guid));
950 memset(ipi, 0, 8);
951 memcpy(ipi + 8, &target->srp_host->srp_dev->dev->node_guid, 8);
952 }
953
954 if (target->using_rdma_cm)
955 status = rdma_connect(ch->rdma_cm.cm_id, &req->rdma_param);
956 else
957 status = ib_send_cm_req(ch->ib_cm.cm_id, &req->ib_param);
958
959 kfree(req);
960
961 return status;
962}
963
964static bool srp_queue_remove_work(struct srp_target_port *target)
965{
966 bool changed = false;
967
968 spin_lock_irq(&target->lock);
969 if (target->state != SRP_TARGET_REMOVED) {
970 target->state = SRP_TARGET_REMOVED;
971 changed = true;
972 }
973 spin_unlock_irq(&target->lock);
974
975 if (changed)
976 queue_work(srp_remove_wq, &target->remove_work);
977
978 return changed;
979}
980
981static void srp_disconnect_target(struct srp_target_port *target)
982{
983 struct srp_rdma_ch *ch;
984 int i, ret;
985
986 /* XXX should send SRP_I_LOGOUT request */
987
988 for (i = 0; i < target->ch_count; i++) {
989 ch = &target->ch[i];
990 ch->connected = false;
991 ret = 0;
992 if (target->using_rdma_cm) {
993 if (ch->rdma_cm.cm_id)
994 rdma_disconnect(ch->rdma_cm.cm_id);
995 } else {
996 if (ch->ib_cm.cm_id)
997 ret = ib_send_cm_dreq(ch->ib_cm.cm_id,
998 NULL, 0);
999 }
1000 if (ret < 0) {
1001 shost_printk(KERN_DEBUG, target->scsi_host,
1002 PFX "Sending CM DREQ failed\n");
1003 }
1004 }
1005}
1006
1007static void srp_free_req_data(struct srp_target_port *target,
1008 struct srp_rdma_ch *ch)
1009{
1010 struct srp_device *dev = target->srp_host->srp_dev;
1011 struct ib_device *ibdev = dev->dev;
1012 struct srp_request *req;
1013 int i;
1014
1015 if (!ch->req_ring)
1016 return;
1017
1018 for (i = 0; i < target->req_ring_size; ++i) {
1019 req = &ch->req_ring[i];
1020 if (dev->use_fast_reg) {
1021 kfree(req->fr_list);
1022 } else {
1023 kfree(req->fmr_list);
1024 kfree(req->map_page);
1025 }
1026 if (req->indirect_dma_addr) {
1027 ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
1028 target->indirect_size,
1029 DMA_TO_DEVICE);
1030 }
1031 kfree(req->indirect_desc);
1032 }
1033
1034 kfree(ch->req_ring);
1035 ch->req_ring = NULL;
1036}
1037
1038static int srp_alloc_req_data(struct srp_rdma_ch *ch)
1039{
1040 struct srp_target_port *target = ch->target;
1041 struct srp_device *srp_dev = target->srp_host->srp_dev;
1042 struct ib_device *ibdev = srp_dev->dev;
1043 struct srp_request *req;
1044 void *mr_list;
1045 dma_addr_t dma_addr;
1046 int i, ret = -ENOMEM;
1047
1048 ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring),
1049 GFP_KERNEL);
1050 if (!ch->req_ring)
1051 goto out;
1052
1053 for (i = 0; i < target->req_ring_size; ++i) {
1054 req = &ch->req_ring[i];
1055 mr_list = kmalloc_array(target->mr_per_cmd, sizeof(void *),
1056 GFP_KERNEL);
1057 if (!mr_list)
1058 goto out;
1059 if (srp_dev->use_fast_reg) {
1060 req->fr_list = mr_list;
1061 } else {
1062 req->fmr_list = mr_list;
1063 req->map_page = kmalloc_array(srp_dev->max_pages_per_mr,
1064 sizeof(void *),
1065 GFP_KERNEL);
1066 if (!req->map_page)
1067 goto out;
1068 }
1069 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
1070 if (!req->indirect_desc)
1071 goto out;
1072
1073 dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
1074 target->indirect_size,
1075 DMA_TO_DEVICE);
1076 if (ib_dma_mapping_error(ibdev, dma_addr))
1077 goto out;
1078
1079 req->indirect_dma_addr = dma_addr;
1080 }
1081 ret = 0;
1082
1083out:
1084 return ret;
1085}
1086
1087/**
1088 * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
1089 * @shost: SCSI host whose attributes to remove from sysfs.
1090 *
1091 * Note: Any attributes defined in the host template and that did not exist
1092 * before invocation of this function will be ignored.
1093 */
1094static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
1095{
1096 struct device_attribute **attr;
1097
1098 for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr)
1099 device_remove_file(&shost->shost_dev, *attr);
1100}
1101
1102static void srp_remove_target(struct srp_target_port *target)
1103{
1104 struct srp_rdma_ch *ch;
1105 int i;
1106
1107 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
1108
1109 srp_del_scsi_host_attr(target->scsi_host);
1110 srp_rport_get(target->rport);
1111 srp_remove_host(target->scsi_host);
1112 scsi_remove_host(target->scsi_host);
1113 srp_stop_rport_timers(target->rport);
1114 srp_disconnect_target(target);
1115 kobj_ns_drop(KOBJ_NS_TYPE_NET, target->net);
1116 for (i = 0; i < target->ch_count; i++) {
1117 ch = &target->ch[i];
1118 srp_free_ch_ib(target, ch);
1119 }
1120 cancel_work_sync(&target->tl_err_work);
1121 srp_rport_put(target->rport);
1122 for (i = 0; i < target->ch_count; i++) {
1123 ch = &target->ch[i];
1124 srp_free_req_data(target, ch);
1125 }
1126 kfree(target->ch);
1127 target->ch = NULL;
1128
1129 spin_lock(&target->srp_host->target_lock);
1130 list_del(&target->list);
1131 spin_unlock(&target->srp_host->target_lock);
1132
1133 scsi_host_put(target->scsi_host);
1134}
1135
1136static void srp_remove_work(struct work_struct *work)
1137{
1138 struct srp_target_port *target =
1139 container_of(work, struct srp_target_port, remove_work);
1140
1141 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
1142
1143 srp_remove_target(target);
1144}
1145
1146static void srp_rport_delete(struct srp_rport *rport)
1147{
1148 struct srp_target_port *target = rport->lld_data;
1149
1150 srp_queue_remove_work(target);
1151}
1152
1153/**
1154 * srp_connected_ch() - number of connected channels
1155 * @target: SRP target port.
1156 */
1157static int srp_connected_ch(struct srp_target_port *target)
1158{
1159 int i, c = 0;
1160
1161 for (i = 0; i < target->ch_count; i++)
1162 c += target->ch[i].connected;
1163
1164 return c;
1165}
1166
1167static int srp_connect_ch(struct srp_rdma_ch *ch, uint32_t max_iu_len,
1168 bool multich)
1169{
1170 struct srp_target_port *target = ch->target;
1171 int ret;
1172
1173 WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0);
1174
1175 ret = srp_lookup_path(ch);
1176 if (ret)
1177 goto out;
1178
1179 while (1) {
1180 init_completion(&ch->done);
1181 ret = srp_send_req(ch, max_iu_len, multich);
1182 if (ret)
1183 goto out;
1184 ret = wait_for_completion_interruptible(&ch->done);
1185 if (ret < 0)
1186 goto out;
1187
1188 /*
1189 * The CM event handling code will set status to
1190 * SRP_PORT_REDIRECT if we get a port redirect REJ
1191 * back, or SRP_DLID_REDIRECT if we get a lid/qp
1192 * redirect REJ back.
1193 */
1194 ret = ch->status;
1195 switch (ret) {
1196 case 0:
1197 ch->connected = true;
1198 goto out;
1199
1200 case SRP_PORT_REDIRECT:
1201 ret = srp_lookup_path(ch);
1202 if (ret)
1203 goto out;
1204 break;
1205
1206 case SRP_DLID_REDIRECT:
1207 break;
1208
1209 case SRP_STALE_CONN:
1210 shost_printk(KERN_ERR, target->scsi_host, PFX
1211 "giving up on stale connection\n");
1212 ret = -ECONNRESET;
1213 goto out;
1214
1215 default:
1216 goto out;
1217 }
1218 }
1219
1220out:
1221 return ret <= 0 ? ret : -ENODEV;
1222}
1223
1224static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc)
1225{
1226 srp_handle_qp_err(cq, wc, "INV RKEY");
1227}
1228
1229static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch,
1230 u32 rkey)
1231{
1232 struct ib_send_wr wr = {
1233 .opcode = IB_WR_LOCAL_INV,
1234 .next = NULL,
1235 .num_sge = 0,
1236 .send_flags = 0,
1237 .ex.invalidate_rkey = rkey,
1238 };
1239
1240 wr.wr_cqe = &req->reg_cqe;
1241 req->reg_cqe.done = srp_inv_rkey_err_done;
1242 return ib_post_send(ch->qp, &wr, NULL);
1243}
1244
1245static void srp_unmap_data(struct scsi_cmnd *scmnd,
1246 struct srp_rdma_ch *ch,
1247 struct srp_request *req)
1248{
1249 struct srp_target_port *target = ch->target;
1250 struct srp_device *dev = target->srp_host->srp_dev;
1251 struct ib_device *ibdev = dev->dev;
1252 int i, res;
1253
1254 if (!scsi_sglist(scmnd) ||
1255 (scmnd->sc_data_direction != DMA_TO_DEVICE &&
1256 scmnd->sc_data_direction != DMA_FROM_DEVICE))
1257 return;
1258
1259 if (dev->use_fast_reg) {
1260 struct srp_fr_desc **pfr;
1261
1262 for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
1263 res = srp_inv_rkey(req, ch, (*pfr)->mr->rkey);
1264 if (res < 0) {
1265 shost_printk(KERN_ERR, target->scsi_host, PFX
1266 "Queueing INV WR for rkey %#x failed (%d)\n",
1267 (*pfr)->mr->rkey, res);
1268 queue_work(system_long_wq,
1269 &target->tl_err_work);
1270 }
1271 }
1272 if (req->nmdesc)
1273 srp_fr_pool_put(ch->fr_pool, req->fr_list,
1274 req->nmdesc);
1275 } else if (dev->use_fmr) {
1276 struct ib_pool_fmr **pfmr;
1277
1278 for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
1279 ib_fmr_pool_unmap(*pfmr);
1280 }
1281
1282 ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
1283 scmnd->sc_data_direction);
1284}
1285
1286/**
1287 * srp_claim_req - Take ownership of the scmnd associated with a request.
1288 * @ch: SRP RDMA channel.
1289 * @req: SRP request.
1290 * @sdev: If not NULL, only take ownership for this SCSI device.
1291 * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
1292 * ownership of @req->scmnd if it equals @scmnd.
1293 *
1294 * Return value:
1295 * Either NULL or a pointer to the SCSI command the caller became owner of.
1296 */
1297static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch,
1298 struct srp_request *req,
1299 struct scsi_device *sdev,
1300 struct scsi_cmnd *scmnd)
1301{
1302 unsigned long flags;
1303
1304 spin_lock_irqsave(&ch->lock, flags);
1305 if (req->scmnd &&
1306 (!sdev || req->scmnd->device == sdev) &&
1307 (!scmnd || req->scmnd == scmnd)) {
1308 scmnd = req->scmnd;
1309 req->scmnd = NULL;
1310 } else {
1311 scmnd = NULL;
1312 }
1313 spin_unlock_irqrestore(&ch->lock, flags);
1314
1315 return scmnd;
1316}
1317
1318/**
1319 * srp_free_req() - Unmap data and adjust ch->req_lim.
1320 * @ch: SRP RDMA channel.
1321 * @req: Request to be freed.
1322 * @scmnd: SCSI command associated with @req.
1323 * @req_lim_delta: Amount to be added to @target->req_lim.
1324 */
1325static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req,
1326 struct scsi_cmnd *scmnd, s32 req_lim_delta)
1327{
1328 unsigned long flags;
1329
1330 srp_unmap_data(scmnd, ch, req);
1331
1332 spin_lock_irqsave(&ch->lock, flags);
1333 ch->req_lim += req_lim_delta;
1334 spin_unlock_irqrestore(&ch->lock, flags);
1335}
1336
1337static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req,
1338 struct scsi_device *sdev, int result)
1339{
1340 struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL);
1341
1342 if (scmnd) {
1343 srp_free_req(ch, req, scmnd, 0);
1344 scmnd->result = result;
1345 scmnd->scsi_done(scmnd);
1346 }
1347}
1348
1349static void srp_terminate_io(struct srp_rport *rport)
1350{
1351 struct srp_target_port *target = rport->lld_data;
1352 struct srp_rdma_ch *ch;
1353 int i, j;
1354
1355 for (i = 0; i < target->ch_count; i++) {
1356 ch = &target->ch[i];
1357
1358 for (j = 0; j < target->req_ring_size; ++j) {
1359 struct srp_request *req = &ch->req_ring[j];
1360
1361 srp_finish_req(ch, req, NULL,
1362 DID_TRANSPORT_FAILFAST << 16);
1363 }
1364 }
1365}
1366
1367/* Calculate maximum initiator to target information unit length. */
1368static uint32_t srp_max_it_iu_len(int cmd_sg_cnt, bool use_imm_data,
1369 uint32_t max_it_iu_size)
1370{
1371 uint32_t max_iu_len = sizeof(struct srp_cmd) + SRP_MAX_ADD_CDB_LEN +
1372 sizeof(struct srp_indirect_buf) +
1373 cmd_sg_cnt * sizeof(struct srp_direct_buf);
1374
1375 if (use_imm_data)
1376 max_iu_len = max(max_iu_len, SRP_IMM_DATA_OFFSET +
1377 srp_max_imm_data);
1378
1379 if (max_it_iu_size)
1380 max_iu_len = min(max_iu_len, max_it_iu_size);
1381
1382 pr_debug("max_iu_len = %d\n", max_iu_len);
1383
1384 return max_iu_len;
1385}
1386
1387/*
1388 * It is up to the caller to ensure that srp_rport_reconnect() calls are
1389 * serialized and that no concurrent srp_queuecommand(), srp_abort(),
1390 * srp_reset_device() or srp_reset_host() calls will occur while this function
1391 * is in progress. One way to realize that is not to call this function
1392 * directly but to call srp_reconnect_rport() instead since that last function
1393 * serializes calls of this function via rport->mutex and also blocks
1394 * srp_queuecommand() calls before invoking this function.
1395 */
1396static int srp_rport_reconnect(struct srp_rport *rport)
1397{
1398 struct srp_target_port *target = rport->lld_data;
1399 struct srp_rdma_ch *ch;
1400 uint32_t max_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt,
1401 srp_use_imm_data,
1402 target->max_it_iu_size);
1403 int i, j, ret = 0;
1404 bool multich = false;
1405
1406 srp_disconnect_target(target);
1407
1408 if (target->state == SRP_TARGET_SCANNING)
1409 return -ENODEV;
1410
1411 /*
1412 * Now get a new local CM ID so that we avoid confusing the target in
1413 * case things are really fouled up. Doing so also ensures that all CM
1414 * callbacks will have finished before a new QP is allocated.
1415 */
1416 for (i = 0; i < target->ch_count; i++) {
1417 ch = &target->ch[i];
1418 ret += srp_new_cm_id(ch);
1419 }
1420 for (i = 0; i < target->ch_count; i++) {
1421 ch = &target->ch[i];
1422 for (j = 0; j < target->req_ring_size; ++j) {
1423 struct srp_request *req = &ch->req_ring[j];
1424
1425 srp_finish_req(ch, req, NULL, DID_RESET << 16);
1426 }
1427 }
1428 for (i = 0; i < target->ch_count; i++) {
1429 ch = &target->ch[i];
1430 /*
1431 * Whether or not creating a new CM ID succeeded, create a new
1432 * QP. This guarantees that all completion callback function
1433 * invocations have finished before request resetting starts.
1434 */
1435 ret += srp_create_ch_ib(ch);
1436
1437 INIT_LIST_HEAD(&ch->free_tx);
1438 for (j = 0; j < target->queue_size; ++j)
1439 list_add(&ch->tx_ring[j]->list, &ch->free_tx);
1440 }
1441
1442 target->qp_in_error = false;
1443
1444 for (i = 0; i < target->ch_count; i++) {
1445 ch = &target->ch[i];
1446 if (ret)
1447 break;
1448 ret = srp_connect_ch(ch, max_iu_len, multich);
1449 multich = true;
1450 }
1451
1452 if (ret == 0)
1453 shost_printk(KERN_INFO, target->scsi_host,
1454 PFX "reconnect succeeded\n");
1455
1456 return ret;
1457}
1458
1459static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
1460 unsigned int dma_len, u32 rkey)
1461{
1462 struct srp_direct_buf *desc = state->desc;
1463
1464 WARN_ON_ONCE(!dma_len);
1465
1466 desc->va = cpu_to_be64(dma_addr);
1467 desc->key = cpu_to_be32(rkey);
1468 desc->len = cpu_to_be32(dma_len);
1469
1470 state->total_len += dma_len;
1471 state->desc++;
1472 state->ndesc++;
1473}
1474
1475static int srp_map_finish_fmr(struct srp_map_state *state,
1476 struct srp_rdma_ch *ch)
1477{
1478 struct srp_target_port *target = ch->target;
1479 struct srp_device *dev = target->srp_host->srp_dev;
1480 struct ib_pool_fmr *fmr;
1481 u64 io_addr = 0;
1482
1483 if (state->fmr.next >= state->fmr.end) {
1484 shost_printk(KERN_ERR, ch->target->scsi_host,
1485 PFX "Out of MRs (mr_per_cmd = %d)\n",
1486 ch->target->mr_per_cmd);
1487 return -ENOMEM;
1488 }
1489
1490 WARN_ON_ONCE(!dev->use_fmr);
1491
1492 if (state->npages == 0)
1493 return 0;
1494
1495 if (state->npages == 1 && target->global_rkey) {
1496 srp_map_desc(state, state->base_dma_addr, state->dma_len,
1497 target->global_rkey);
1498 goto reset_state;
1499 }
1500
1501 fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages,
1502 state->npages, io_addr);
1503 if (IS_ERR(fmr))
1504 return PTR_ERR(fmr);
1505
1506 *state->fmr.next++ = fmr;
1507 state->nmdesc++;
1508
1509 srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask,
1510 state->dma_len, fmr->fmr->rkey);
1511
1512reset_state:
1513 state->npages = 0;
1514 state->dma_len = 0;
1515
1516 return 0;
1517}
1518
1519static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc)
1520{
1521 srp_handle_qp_err(cq, wc, "FAST REG");
1522}
1523
1524/*
1525 * Map up to sg_nents elements of state->sg where *sg_offset_p is the offset
1526 * where to start in the first element. If sg_offset_p != NULL then
1527 * *sg_offset_p is updated to the offset in state->sg[retval] of the first
1528 * byte that has not yet been mapped.
1529 */
1530static int srp_map_finish_fr(struct srp_map_state *state,
1531 struct srp_request *req,
1532 struct srp_rdma_ch *ch, int sg_nents,
1533 unsigned int *sg_offset_p)
1534{
1535 struct srp_target_port *target = ch->target;
1536 struct srp_device *dev = target->srp_host->srp_dev;
1537 struct ib_reg_wr wr;
1538 struct srp_fr_desc *desc;
1539 u32 rkey;
1540 int n, err;
1541
1542 if (state->fr.next >= state->fr.end) {
1543 shost_printk(KERN_ERR, ch->target->scsi_host,
1544 PFX "Out of MRs (mr_per_cmd = %d)\n",
1545 ch->target->mr_per_cmd);
1546 return -ENOMEM;
1547 }
1548
1549 WARN_ON_ONCE(!dev->use_fast_reg);
1550
1551 if (sg_nents == 1 && target->global_rkey) {
1552 unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
1553
1554 srp_map_desc(state, sg_dma_address(state->sg) + sg_offset,
1555 sg_dma_len(state->sg) - sg_offset,
1556 target->global_rkey);
1557 if (sg_offset_p)
1558 *sg_offset_p = 0;
1559 return 1;
1560 }
1561
1562 desc = srp_fr_pool_get(ch->fr_pool);
1563 if (!desc)
1564 return -ENOMEM;
1565
1566 rkey = ib_inc_rkey(desc->mr->rkey);
1567 ib_update_fast_reg_key(desc->mr, rkey);
1568
1569 n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, sg_offset_p,
1570 dev->mr_page_size);
1571 if (unlikely(n < 0)) {
1572 srp_fr_pool_put(ch->fr_pool, &desc, 1);
1573 pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n",
1574 dev_name(&req->scmnd->device->sdev_gendev), sg_nents,
1575 sg_offset_p ? *sg_offset_p : -1, n);
1576 return n;
1577 }
1578
1579 WARN_ON_ONCE(desc->mr->length == 0);
1580
1581 req->reg_cqe.done = srp_reg_mr_err_done;
1582
1583 wr.wr.next = NULL;
1584 wr.wr.opcode = IB_WR_REG_MR;
1585 wr.wr.wr_cqe = &req->reg_cqe;
1586 wr.wr.num_sge = 0;
1587 wr.wr.send_flags = 0;
1588 wr.mr = desc->mr;
1589 wr.key = desc->mr->rkey;
1590 wr.access = (IB_ACCESS_LOCAL_WRITE |
1591 IB_ACCESS_REMOTE_READ |
1592 IB_ACCESS_REMOTE_WRITE);
1593
1594 *state->fr.next++ = desc;
1595 state->nmdesc++;
1596
1597 srp_map_desc(state, desc->mr->iova,
1598 desc->mr->length, desc->mr->rkey);
1599
1600 err = ib_post_send(ch->qp, &wr.wr, NULL);
1601 if (unlikely(err)) {
1602 WARN_ON_ONCE(err == -ENOMEM);
1603 return err;
1604 }
1605
1606 return n;
1607}
1608
1609static int srp_map_sg_entry(struct srp_map_state *state,
1610 struct srp_rdma_ch *ch,
1611 struct scatterlist *sg)
1612{
1613 struct srp_target_port *target = ch->target;
1614 struct srp_device *dev = target->srp_host->srp_dev;
1615 dma_addr_t dma_addr = sg_dma_address(sg);
1616 unsigned int dma_len = sg_dma_len(sg);
1617 unsigned int len = 0;
1618 int ret;
1619
1620 WARN_ON_ONCE(!dma_len);
1621
1622 while (dma_len) {
1623 unsigned offset = dma_addr & ~dev->mr_page_mask;
1624
1625 if (state->npages == dev->max_pages_per_mr ||
1626 (state->npages > 0 && offset != 0)) {
1627 ret = srp_map_finish_fmr(state, ch);
1628 if (ret)
1629 return ret;
1630 }
1631
1632 len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);
1633
1634 if (!state->npages)
1635 state->base_dma_addr = dma_addr;
1636 state->pages[state->npages++] = dma_addr & dev->mr_page_mask;
1637 state->dma_len += len;
1638 dma_addr += len;
1639 dma_len -= len;
1640 }
1641
1642 /*
1643 * If the end of the MR is not on a page boundary then we need to
1644 * close it out and start a new one -- we can only merge at page
1645 * boundaries.
1646 */
1647 ret = 0;
1648 if ((dma_addr & ~dev->mr_page_mask) != 0)
1649 ret = srp_map_finish_fmr(state, ch);
1650 return ret;
1651}
1652
1653static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1654 struct srp_request *req, struct scatterlist *scat,
1655 int count)
1656{
1657 struct scatterlist *sg;
1658 int i, ret;
1659
1660 state->pages = req->map_page;
1661 state->fmr.next = req->fmr_list;
1662 state->fmr.end = req->fmr_list + ch->target->mr_per_cmd;
1663
1664 for_each_sg(scat, sg, count, i) {
1665 ret = srp_map_sg_entry(state, ch, sg);
1666 if (ret)
1667 return ret;
1668 }
1669
1670 ret = srp_map_finish_fmr(state, ch);
1671 if (ret)
1672 return ret;
1673
1674 return 0;
1675}
1676
1677static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1678 struct srp_request *req, struct scatterlist *scat,
1679 int count)
1680{
1681 unsigned int sg_offset = 0;
1682
1683 state->fr.next = req->fr_list;
1684 state->fr.end = req->fr_list + ch->target->mr_per_cmd;
1685 state->sg = scat;
1686
1687 if (count == 0)
1688 return 0;
1689
1690 while (count) {
1691 int i, n;
1692
1693 n = srp_map_finish_fr(state, req, ch, count, &sg_offset);
1694 if (unlikely(n < 0))
1695 return n;
1696
1697 count -= n;
1698 for (i = 0; i < n; i++)
1699 state->sg = sg_next(state->sg);
1700 }
1701
1702 return 0;
1703}
1704
1705static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch,
1706 struct srp_request *req, struct scatterlist *scat,
1707 int count)
1708{
1709 struct srp_target_port *target = ch->target;
1710 struct scatterlist *sg;
1711 int i;
1712
1713 for_each_sg(scat, sg, count, i) {
1714 srp_map_desc(state, sg_dma_address(sg), sg_dma_len(sg),
1715 target->global_rkey);
1716 }
1717
1718 return 0;
1719}
1720
1721/*
1722 * Register the indirect data buffer descriptor with the HCA.
1723 *
1724 * Note: since the indirect data buffer descriptor has been allocated with
1725 * kmalloc() it is guaranteed that this buffer is a physically contiguous
1726 * memory buffer.
1727 */
1728static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
1729 void **next_mr, void **end_mr, u32 idb_len,
1730 __be32 *idb_rkey)
1731{
1732 struct srp_target_port *target = ch->target;
1733 struct srp_device *dev = target->srp_host->srp_dev;
1734 struct srp_map_state state;
1735 struct srp_direct_buf idb_desc;
1736 u64 idb_pages[1];
1737 struct scatterlist idb_sg[1];
1738 int ret;
1739
1740 memset(&state, 0, sizeof(state));
1741 memset(&idb_desc, 0, sizeof(idb_desc));
1742 state.gen.next = next_mr;
1743 state.gen.end = end_mr;
1744 state.desc = &idb_desc;
1745 state.base_dma_addr = req->indirect_dma_addr;
1746 state.dma_len = idb_len;
1747
1748 if (dev->use_fast_reg) {
1749 state.sg = idb_sg;
1750 sg_init_one(idb_sg, req->indirect_desc, idb_len);
1751 idb_sg->dma_address = req->indirect_dma_addr; /* hack! */
1752#ifdef CONFIG_NEED_SG_DMA_LENGTH
1753 idb_sg->dma_length = idb_sg->length; /* hack^2 */
1754#endif
1755 ret = srp_map_finish_fr(&state, req, ch, 1, NULL);
1756 if (ret < 0)
1757 return ret;
1758 WARN_ON_ONCE(ret < 1);
1759 } else if (dev->use_fmr) {
1760 state.pages = idb_pages;
1761 state.pages[0] = (req->indirect_dma_addr &
1762 dev->mr_page_mask);
1763 state.npages = 1;
1764 ret = srp_map_finish_fmr(&state, ch);
1765 if (ret < 0)
1766 return ret;
1767 } else {
1768 return -EINVAL;
1769 }
1770
1771 *idb_rkey = idb_desc.key;
1772
1773 return 0;
1774}
1775
1776static void srp_check_mapping(struct srp_map_state *state,
1777 struct srp_rdma_ch *ch, struct srp_request *req,
1778 struct scatterlist *scat, int count)
1779{
1780 struct srp_device *dev = ch->target->srp_host->srp_dev;
1781 struct srp_fr_desc **pfr;
1782 u64 desc_len = 0, mr_len = 0;
1783 int i;
1784
1785 for (i = 0; i < state->ndesc; i++)
1786 desc_len += be32_to_cpu(req->indirect_desc[i].len);
1787 if (dev->use_fast_reg)
1788 for (i = 0, pfr = req->fr_list; i < state->nmdesc; i++, pfr++)
1789 mr_len += (*pfr)->mr->length;
1790 else if (dev->use_fmr)
1791 for (i = 0; i < state->nmdesc; i++)
1792 mr_len += be32_to_cpu(req->indirect_desc[i].len);
1793 if (desc_len != scsi_bufflen(req->scmnd) ||
1794 mr_len > scsi_bufflen(req->scmnd))
1795 pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n",
1796 scsi_bufflen(req->scmnd), desc_len, mr_len,
1797 state->ndesc, state->nmdesc);
1798}
1799
1800/**
1801 * srp_map_data() - map SCSI data buffer onto an SRP request
1802 * @scmnd: SCSI command to map
1803 * @ch: SRP RDMA channel
1804 * @req: SRP request
1805 *
1806 * Returns the length in bytes of the SRP_CMD IU or a negative value if
1807 * mapping failed. The size of any immediate data is not included in the
1808 * return value.
1809 */
1810static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
1811 struct srp_request *req)
1812{
1813 struct srp_target_port *target = ch->target;
1814 struct scatterlist *scat, *sg;
1815 struct srp_cmd *cmd = req->cmd->buf;
1816 int i, len, nents, count, ret;
1817 struct srp_device *dev;
1818 struct ib_device *ibdev;
1819 struct srp_map_state state;
1820 struct srp_indirect_buf *indirect_hdr;
1821 u64 data_len;
1822 u32 idb_len, table_len;
1823 __be32 idb_rkey;
1824 u8 fmt;
1825
1826 req->cmd->num_sge = 1;
1827
1828 if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
1829 return sizeof(struct srp_cmd) + cmd->add_cdb_len;
1830
1831 if (scmnd->sc_data_direction != DMA_FROM_DEVICE &&
1832 scmnd->sc_data_direction != DMA_TO_DEVICE) {
1833 shost_printk(KERN_WARNING, target->scsi_host,
1834 PFX "Unhandled data direction %d\n",
1835 scmnd->sc_data_direction);
1836 return -EINVAL;
1837 }
1838
1839 nents = scsi_sg_count(scmnd);
1840 scat = scsi_sglist(scmnd);
1841 data_len = scsi_bufflen(scmnd);
1842
1843 dev = target->srp_host->srp_dev;
1844 ibdev = dev->dev;
1845
1846 count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction);
1847 if (unlikely(count == 0))
1848 return -EIO;
1849
1850 if (ch->use_imm_data &&
1851 count <= ch->max_imm_sge &&
1852 SRP_IMM_DATA_OFFSET + data_len <= ch->max_it_iu_len &&
1853 scmnd->sc_data_direction == DMA_TO_DEVICE) {
1854 struct srp_imm_buf *buf;
1855 struct ib_sge *sge = &req->cmd->sge[1];
1856
1857 fmt = SRP_DATA_DESC_IMM;
1858 len = SRP_IMM_DATA_OFFSET;
1859 req->nmdesc = 0;
1860 buf = (void *)cmd->add_data + cmd->add_cdb_len;
1861 buf->len = cpu_to_be32(data_len);
1862 WARN_ON_ONCE((void *)(buf + 1) > (void *)cmd + len);
1863 for_each_sg(scat, sg, count, i) {
1864 sge[i].addr = sg_dma_address(sg);
1865 sge[i].length = sg_dma_len(sg);
1866 sge[i].lkey = target->lkey;
1867 }
1868 req->cmd->num_sge += count;
1869 goto map_complete;
1870 }
1871
1872 fmt = SRP_DATA_DESC_DIRECT;
1873 len = sizeof(struct srp_cmd) + cmd->add_cdb_len +
1874 sizeof(struct srp_direct_buf);
1875
1876 if (count == 1 && target->global_rkey) {
1877 /*
1878 * The midlayer only generated a single gather/scatter
1879 * entry, or DMA mapping coalesced everything to a
1880 * single entry. So a direct descriptor along with
1881 * the DMA MR suffices.
1882 */
1883 struct srp_direct_buf *buf;
1884
1885 buf = (void *)cmd->add_data + cmd->add_cdb_len;
1886 buf->va = cpu_to_be64(sg_dma_address(scat));
1887 buf->key = cpu_to_be32(target->global_rkey);
1888 buf->len = cpu_to_be32(sg_dma_len(scat));
1889
1890 req->nmdesc = 0;
1891 goto map_complete;
1892 }
1893
1894 /*
1895 * We have more than one scatter/gather entry, so build our indirect
1896 * descriptor table, trying to merge as many entries as we can.
1897 */
1898 indirect_hdr = (void *)cmd->add_data + cmd->add_cdb_len;
1899
1900 ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr,
1901 target->indirect_size, DMA_TO_DEVICE);
1902
1903 memset(&state, 0, sizeof(state));
1904 state.desc = req->indirect_desc;
1905 if (dev->use_fast_reg)
1906 ret = srp_map_sg_fr(&state, ch, req, scat, count);
1907 else if (dev->use_fmr)
1908 ret = srp_map_sg_fmr(&state, ch, req, scat, count);
1909 else
1910 ret = srp_map_sg_dma(&state, ch, req, scat, count);
1911 req->nmdesc = state.nmdesc;
1912 if (ret < 0)
1913 goto unmap;
1914
1915 {
1916 DEFINE_DYNAMIC_DEBUG_METADATA(ddm,
1917 "Memory mapping consistency check");
1918 if (DYNAMIC_DEBUG_BRANCH(ddm))
1919 srp_check_mapping(&state, ch, req, scat, count);
1920 }
1921
1922 /* We've mapped the request, now pull as much of the indirect
1923 * descriptor table as we can into the command buffer. If this
1924 * target is not using an external indirect table, we are
1925 * guaranteed to fit into the command, as the SCSI layer won't
1926 * give us more S/G entries than we allow.
1927 */
1928 if (state.ndesc == 1) {
1929 /*
1930 * Memory registration collapsed the sg-list into one entry,
1931 * so use a direct descriptor.
1932 */
1933 struct srp_direct_buf *buf;
1934
1935 buf = (void *)cmd->add_data + cmd->add_cdb_len;
1936 *buf = req->indirect_desc[0];
1937 goto map_complete;
1938 }
1939
1940 if (unlikely(target->cmd_sg_cnt < state.ndesc &&
1941 !target->allow_ext_sg)) {
1942 shost_printk(KERN_ERR, target->scsi_host,
1943 "Could not fit S/G list into SRP_CMD\n");
1944 ret = -EIO;
1945 goto unmap;
1946 }
1947
1948 count = min(state.ndesc, target->cmd_sg_cnt);
1949 table_len = state.ndesc * sizeof (struct srp_direct_buf);
1950 idb_len = sizeof(struct srp_indirect_buf) + table_len;
1951
1952 fmt = SRP_DATA_DESC_INDIRECT;
1953 len = sizeof(struct srp_cmd) + cmd->add_cdb_len +
1954 sizeof(struct srp_indirect_buf);
1955 len += count * sizeof (struct srp_direct_buf);
1956
1957 memcpy(indirect_hdr->desc_list, req->indirect_desc,
1958 count * sizeof (struct srp_direct_buf));
1959
1960 if (!target->global_rkey) {
1961 ret = srp_map_idb(ch, req, state.gen.next, state.gen.end,
1962 idb_len, &idb_rkey);
1963 if (ret < 0)
1964 goto unmap;
1965 req->nmdesc++;
1966 } else {
1967 idb_rkey = cpu_to_be32(target->global_rkey);
1968 }
1969
1970 indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
1971 indirect_hdr->table_desc.key = idb_rkey;
1972 indirect_hdr->table_desc.len = cpu_to_be32(table_len);
1973 indirect_hdr->len = cpu_to_be32(state.total_len);
1974
1975 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1976 cmd->data_out_desc_cnt = count;
1977 else
1978 cmd->data_in_desc_cnt = count;
1979
1980 ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len,
1981 DMA_TO_DEVICE);
1982
1983map_complete:
1984 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1985 cmd->buf_fmt = fmt << 4;
1986 else
1987 cmd->buf_fmt = fmt;
1988
1989 return len;
1990
1991unmap:
1992 srp_unmap_data(scmnd, ch, req);
1993 if (ret == -ENOMEM && req->nmdesc >= target->mr_pool_size)
1994 ret = -E2BIG;
1995 return ret;
1996}
1997
1998/*
1999 * Return an IU and possible credit to the free pool
2000 */
2001static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu,
2002 enum srp_iu_type iu_type)
2003{
2004 unsigned long flags;
2005
2006 spin_lock_irqsave(&ch->lock, flags);
2007 list_add(&iu->list, &ch->free_tx);
2008 if (iu_type != SRP_IU_RSP)
2009 ++ch->req_lim;
2010 spin_unlock_irqrestore(&ch->lock, flags);
2011}
2012
2013/*
2014 * Must be called with ch->lock held to protect req_lim and free_tx.
2015 * If IU is not sent, it must be returned using srp_put_tx_iu().
2016 *
2017 * Note:
2018 * An upper limit for the number of allocated information units for each
2019 * request type is:
2020 * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues
2021 * more than Scsi_Host.can_queue requests.
2022 * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE.
2023 * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than
2024 * one unanswered SRP request to an initiator.
2025 */
2026static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
2027 enum srp_iu_type iu_type)
2028{
2029 struct srp_target_port *target = ch->target;
2030 s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
2031 struct srp_iu *iu;
2032
2033 lockdep_assert_held(&ch->lock);
2034
2035 ib_process_cq_direct(ch->send_cq, -1);
2036
2037 if (list_empty(&ch->free_tx))
2038 return NULL;
2039
2040 /* Initiator responses to target requests do not consume credits */
2041 if (iu_type != SRP_IU_RSP) {
2042 if (ch->req_lim <= rsv) {
2043 ++target->zero_req_lim;
2044 return NULL;
2045 }
2046
2047 --ch->req_lim;
2048 }
2049
2050 iu = list_first_entry(&ch->free_tx, struct srp_iu, list);
2051 list_del(&iu->list);
2052 return iu;
2053}
2054
2055/*
2056 * Note: if this function is called from inside ib_drain_sq() then it will
2057 * be called without ch->lock being held. If ib_drain_sq() dequeues a WQE
2058 * with status IB_WC_SUCCESS then that's a bug.
2059 */
2060static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
2061{
2062 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
2063 struct srp_rdma_ch *ch = cq->cq_context;
2064
2065 if (unlikely(wc->status != IB_WC_SUCCESS)) {
2066 srp_handle_qp_err(cq, wc, "SEND");
2067 return;
2068 }
2069
2070 lockdep_assert_held(&ch->lock);
2071
2072 list_add(&iu->list, &ch->free_tx);
2073}
2074
2075/**
2076 * srp_post_send() - send an SRP information unit
2077 * @ch: RDMA channel over which to send the information unit.
2078 * @iu: Information unit to send.
2079 * @len: Length of the information unit excluding immediate data.
2080 */
2081static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
2082{
2083 struct srp_target_port *target = ch->target;
2084 struct ib_send_wr wr;
2085
2086 if (WARN_ON_ONCE(iu->num_sge > SRP_MAX_SGE))
2087 return -EINVAL;
2088
2089 iu->sge[0].addr = iu->dma;
2090 iu->sge[0].length = len;
2091 iu->sge[0].lkey = target->lkey;
2092
2093 iu->cqe.done = srp_send_done;
2094
2095 wr.next = NULL;
2096 wr.wr_cqe = &iu->cqe;
2097 wr.sg_list = &iu->sge[0];
2098 wr.num_sge = iu->num_sge;
2099 wr.opcode = IB_WR_SEND;
2100 wr.send_flags = IB_SEND_SIGNALED;
2101
2102 return ib_post_send(ch->qp, &wr, NULL);
2103}
2104
2105static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
2106{
2107 struct srp_target_port *target = ch->target;
2108 struct ib_recv_wr wr;
2109 struct ib_sge list;
2110
2111 list.addr = iu->dma;
2112 list.length = iu->size;
2113 list.lkey = target->lkey;
2114
2115 iu->cqe.done = srp_recv_done;
2116
2117 wr.next = NULL;
2118 wr.wr_cqe = &iu->cqe;
2119 wr.sg_list = &list;
2120 wr.num_sge = 1;
2121
2122 return ib_post_recv(ch->qp, &wr, NULL);
2123}
2124
2125static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
2126{
2127 struct srp_target_port *target = ch->target;
2128 struct srp_request *req;
2129 struct scsi_cmnd *scmnd;
2130 unsigned long flags;
2131
2132 if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
2133 spin_lock_irqsave(&ch->lock, flags);
2134 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
2135 if (rsp->tag == ch->tsk_mgmt_tag) {
2136 ch->tsk_mgmt_status = -1;
2137 if (be32_to_cpu(rsp->resp_data_len) >= 4)
2138 ch->tsk_mgmt_status = rsp->data[3];
2139 complete(&ch->tsk_mgmt_done);
2140 } else {
2141 shost_printk(KERN_ERR, target->scsi_host,
2142 "Received tsk mgmt response too late for tag %#llx\n",
2143 rsp->tag);
2144 }
2145 spin_unlock_irqrestore(&ch->lock, flags);
2146 } else {
2147 scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
2148 if (scmnd && scmnd->host_scribble) {
2149 req = (void *)scmnd->host_scribble;
2150 scmnd = srp_claim_req(ch, req, NULL, scmnd);
2151 } else {
2152 scmnd = NULL;
2153 }
2154 if (!scmnd) {
2155 shost_printk(KERN_ERR, target->scsi_host,
2156 "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
2157 rsp->tag, ch - target->ch, ch->qp->qp_num);
2158
2159 spin_lock_irqsave(&ch->lock, flags);
2160 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
2161 spin_unlock_irqrestore(&ch->lock, flags);
2162
2163 return;
2164 }
2165 scmnd->result = rsp->status;
2166
2167 if (rsp->flags & SRP_RSP_FLAG_SNSVALID) {
2168 memcpy(scmnd->sense_buffer, rsp->data +
2169 be32_to_cpu(rsp->resp_data_len),
2170 min_t(int, be32_to_cpu(rsp->sense_data_len),
2171 SCSI_SENSE_BUFFERSIZE));
2172 }
2173
2174 if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER))
2175 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
2176 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER))
2177 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt));
2178 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER))
2179 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
2180 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER))
2181 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt));
2182
2183 srp_free_req(ch, req, scmnd,
2184 be32_to_cpu(rsp->req_lim_delta));
2185
2186 scmnd->host_scribble = NULL;
2187 scmnd->scsi_done(scmnd);
2188 }
2189}
2190
2191static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta,
2192 void *rsp, int len)
2193{
2194 struct srp_target_port *target = ch->target;
2195 struct ib_device *dev = target->srp_host->srp_dev->dev;
2196 unsigned long flags;
2197 struct srp_iu *iu;
2198 int err;
2199
2200 spin_lock_irqsave(&ch->lock, flags);
2201 ch->req_lim += req_delta;
2202 iu = __srp_get_tx_iu(ch, SRP_IU_RSP);
2203 spin_unlock_irqrestore(&ch->lock, flags);
2204
2205 if (!iu) {
2206 shost_printk(KERN_ERR, target->scsi_host, PFX
2207 "no IU available to send response\n");
2208 return 1;
2209 }
2210
2211 iu->num_sge = 1;
2212 ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE);
2213 memcpy(iu->buf, rsp, len);
2214 ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE);
2215
2216 err = srp_post_send(ch, iu, len);
2217 if (err) {
2218 shost_printk(KERN_ERR, target->scsi_host, PFX
2219 "unable to post response: %d\n", err);
2220 srp_put_tx_iu(ch, iu, SRP_IU_RSP);
2221 }
2222
2223 return err;
2224}
2225
2226static void srp_process_cred_req(struct srp_rdma_ch *ch,
2227 struct srp_cred_req *req)
2228{
2229 struct srp_cred_rsp rsp = {
2230 .opcode = SRP_CRED_RSP,
2231 .tag = req->tag,
2232 };
2233 s32 delta = be32_to_cpu(req->req_lim_delta);
2234
2235 if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
2236 shost_printk(KERN_ERR, ch->target->scsi_host, PFX
2237 "problems processing SRP_CRED_REQ\n");
2238}
2239
2240static void srp_process_aer_req(struct srp_rdma_ch *ch,
2241 struct srp_aer_req *req)
2242{
2243 struct srp_target_port *target = ch->target;
2244 struct srp_aer_rsp rsp = {
2245 .opcode = SRP_AER_RSP,
2246 .tag = req->tag,
2247 };
2248 s32 delta = be32_to_cpu(req->req_lim_delta);
2249
2250 shost_printk(KERN_ERR, target->scsi_host, PFX
2251 "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun));
2252
2253 if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
2254 shost_printk(KERN_ERR, target->scsi_host, PFX
2255 "problems processing SRP_AER_REQ\n");
2256}
2257
2258static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc)
2259{
2260 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
2261 struct srp_rdma_ch *ch = cq->cq_context;
2262 struct srp_target_port *target = ch->target;
2263 struct ib_device *dev = target->srp_host->srp_dev->dev;
2264 int res;
2265 u8 opcode;
2266
2267 if (unlikely(wc->status != IB_WC_SUCCESS)) {
2268 srp_handle_qp_err(cq, wc, "RECV");
2269 return;
2270 }
2271
2272 ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
2273 DMA_FROM_DEVICE);
2274
2275 opcode = *(u8 *) iu->buf;
2276
2277 if (0) {
2278 shost_printk(KERN_ERR, target->scsi_host,
2279 PFX "recv completion, opcode 0x%02x\n", opcode);
2280 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1,
2281 iu->buf, wc->byte_len, true);
2282 }
2283
2284 switch (opcode) {
2285 case SRP_RSP:
2286 srp_process_rsp(ch, iu->buf);
2287 break;
2288
2289 case SRP_CRED_REQ:
2290 srp_process_cred_req(ch, iu->buf);
2291 break;
2292
2293 case SRP_AER_REQ:
2294 srp_process_aer_req(ch, iu->buf);
2295 break;
2296
2297 case SRP_T_LOGOUT:
2298 /* XXX Handle target logout */
2299 shost_printk(KERN_WARNING, target->scsi_host,
2300 PFX "Got target logout request\n");
2301 break;
2302
2303 default:
2304 shost_printk(KERN_WARNING, target->scsi_host,
2305 PFX "Unhandled SRP opcode 0x%02x\n", opcode);
2306 break;
2307 }
2308
2309 ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len,
2310 DMA_FROM_DEVICE);
2311
2312 res = srp_post_recv(ch, iu);
2313 if (res != 0)
2314 shost_printk(KERN_ERR, target->scsi_host,
2315 PFX "Recv failed with error code %d\n", res);
2316}
2317
2318/**
2319 * srp_tl_err_work() - handle a transport layer error
2320 * @work: Work structure embedded in an SRP target port.
2321 *
2322 * Note: This function may get invoked before the rport has been created,
2323 * hence the target->rport test.
2324 */
2325static void srp_tl_err_work(struct work_struct *work)
2326{
2327 struct srp_target_port *target;
2328
2329 target = container_of(work, struct srp_target_port, tl_err_work);
2330 if (target->rport)
2331 srp_start_tl_fail_timers(target->rport);
2332}
2333
2334static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
2335 const char *opname)
2336{
2337 struct srp_rdma_ch *ch = cq->cq_context;
2338 struct srp_target_port *target = ch->target;
2339
2340 if (ch->connected && !target->qp_in_error) {
2341 shost_printk(KERN_ERR, target->scsi_host,
2342 PFX "failed %s status %s (%d) for CQE %p\n",
2343 opname, ib_wc_status_msg(wc->status), wc->status,
2344 wc->wr_cqe);
2345 queue_work(system_long_wq, &target->tl_err_work);
2346 }
2347 target->qp_in_error = true;
2348}
2349
2350static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
2351{
2352 struct srp_target_port *target = host_to_target(shost);
2353 struct srp_rdma_ch *ch;
2354 struct srp_request *req;
2355 struct srp_iu *iu;
2356 struct srp_cmd *cmd;
2357 struct ib_device *dev;
2358 unsigned long flags;
2359 u32 tag;
2360 u16 idx;
2361 int len, ret;
2362
2363 scmnd->result = srp_chkready(target->rport);
2364 if (unlikely(scmnd->result))
2365 goto err;
2366
2367 WARN_ON_ONCE(scmnd->request->tag < 0);
2368 tag = blk_mq_unique_tag(scmnd->request);
2369 ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)];
2370 idx = blk_mq_unique_tag_to_tag(tag);
2371 WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n",
2372 dev_name(&shost->shost_gendev), tag, idx,
2373 target->req_ring_size);
2374
2375 spin_lock_irqsave(&ch->lock, flags);
2376 iu = __srp_get_tx_iu(ch, SRP_IU_CMD);
2377 spin_unlock_irqrestore(&ch->lock, flags);
2378
2379 if (!iu)
2380 goto err;
2381
2382 req = &ch->req_ring[idx];
2383 dev = target->srp_host->srp_dev->dev;
2384 ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_it_iu_len,
2385 DMA_TO_DEVICE);
2386
2387 scmnd->host_scribble = (void *) req;
2388
2389 cmd = iu->buf;
2390 memset(cmd, 0, sizeof *cmd);
2391
2392 cmd->opcode = SRP_CMD;
2393 int_to_scsilun(scmnd->device->lun, &cmd->lun);
2394 cmd->tag = tag;
2395 memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len);
2396 if (unlikely(scmnd->cmd_len > sizeof(cmd->cdb))) {
2397 cmd->add_cdb_len = round_up(scmnd->cmd_len - sizeof(cmd->cdb),
2398 4);
2399 if (WARN_ON_ONCE(cmd->add_cdb_len > SRP_MAX_ADD_CDB_LEN))
2400 goto err_iu;
2401 }
2402
2403 req->scmnd = scmnd;
2404 req->cmd = iu;
2405
2406 len = srp_map_data(scmnd, ch, req);
2407 if (len < 0) {
2408 shost_printk(KERN_ERR, target->scsi_host,
2409 PFX "Failed to map data (%d)\n", len);
2410 /*
2411 * If we ran out of memory descriptors (-ENOMEM) because an
2412 * application is queuing many requests with more than
2413 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer
2414 * to reduce queue depth temporarily.
2415 */
2416 scmnd->result = len == -ENOMEM ?
2417 DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16;
2418 goto err_iu;
2419 }
2420
2421 ib_dma_sync_single_for_device(dev, iu->dma, ch->max_it_iu_len,
2422 DMA_TO_DEVICE);
2423
2424 if (srp_post_send(ch, iu, len)) {
2425 shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
2426 scmnd->result = DID_ERROR << 16;
2427 goto err_unmap;
2428 }
2429
2430 return 0;
2431
2432err_unmap:
2433 srp_unmap_data(scmnd, ch, req);
2434
2435err_iu:
2436 srp_put_tx_iu(ch, iu, SRP_IU_CMD);
2437
2438 /*
2439 * Avoid that the loops that iterate over the request ring can
2440 * encounter a dangling SCSI command pointer.
2441 */
2442 req->scmnd = NULL;
2443
2444err:
2445 if (scmnd->result) {
2446 scmnd->scsi_done(scmnd);
2447 ret = 0;
2448 } else {
2449 ret = SCSI_MLQUEUE_HOST_BUSY;
2450 }
2451
2452 return ret;
2453}
2454
2455/*
2456 * Note: the resources allocated in this function are freed in
2457 * srp_free_ch_ib().
2458 */
2459static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch)
2460{
2461 struct srp_target_port *target = ch->target;
2462 int i;
2463
2464 ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring),
2465 GFP_KERNEL);
2466 if (!ch->rx_ring)
2467 goto err_no_ring;
2468 ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring),
2469 GFP_KERNEL);
2470 if (!ch->tx_ring)
2471 goto err_no_ring;
2472
2473 for (i = 0; i < target->queue_size; ++i) {
2474 ch->rx_ring[i] = srp_alloc_iu(target->srp_host,
2475 ch->max_ti_iu_len,
2476 GFP_KERNEL, DMA_FROM_DEVICE);
2477 if (!ch->rx_ring[i])
2478 goto err;
2479 }
2480
2481 for (i = 0; i < target->queue_size; ++i) {
2482 ch->tx_ring[i] = srp_alloc_iu(target->srp_host,
2483 ch->max_it_iu_len,
2484 GFP_KERNEL, DMA_TO_DEVICE);
2485 if (!ch->tx_ring[i])
2486 goto err;
2487
2488 list_add(&ch->tx_ring[i]->list, &ch->free_tx);
2489 }
2490
2491 return 0;
2492
2493err:
2494 for (i = 0; i < target->queue_size; ++i) {
2495 srp_free_iu(target->srp_host, ch->rx_ring[i]);
2496 srp_free_iu(target->srp_host, ch->tx_ring[i]);
2497 }
2498
2499
2500err_no_ring:
2501 kfree(ch->tx_ring);
2502 ch->tx_ring = NULL;
2503 kfree(ch->rx_ring);
2504 ch->rx_ring = NULL;
2505
2506 return -ENOMEM;
2507}
2508
2509static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
2510{
2511 uint64_t T_tr_ns, max_compl_time_ms;
2512 uint32_t rq_tmo_jiffies;
2513
2514 /*
2515 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
2516 * table 91), both the QP timeout and the retry count have to be set
2517 * for RC QP's during the RTR to RTS transition.
2518 */
2519 WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) !=
2520 (IB_QP_TIMEOUT | IB_QP_RETRY_CNT));
2521
2522 /*
2523 * Set target->rq_tmo_jiffies to one second more than the largest time
2524 * it can take before an error completion is generated. See also
2525 * C9-140..142 in the IBTA spec for more information about how to
2526 * convert the QP Local ACK Timeout value to nanoseconds.
2527 */
2528 T_tr_ns = 4096 * (1ULL << qp_attr->timeout);
2529 max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns;
2530 do_div(max_compl_time_ms, NSEC_PER_MSEC);
2531 rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000);
2532
2533 return rq_tmo_jiffies;
2534}
2535
2536static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
2537 const struct srp_login_rsp *lrsp,
2538 struct srp_rdma_ch *ch)
2539{
2540 struct srp_target_port *target = ch->target;
2541 struct ib_qp_attr *qp_attr = NULL;
2542 int attr_mask = 0;
2543 int ret = 0;
2544 int i;
2545
2546 if (lrsp->opcode == SRP_LOGIN_RSP) {
2547 ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
2548 ch->req_lim = be32_to_cpu(lrsp->req_lim_delta);
2549 ch->use_imm_data = srp_use_imm_data &&
2550 (lrsp->rsp_flags & SRP_LOGIN_RSP_IMMED_SUPP);
2551 ch->max_it_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt,
2552 ch->use_imm_data,
2553 target->max_it_iu_size);
2554 WARN_ON_ONCE(ch->max_it_iu_len >
2555 be32_to_cpu(lrsp->max_it_iu_len));
2556
2557 if (ch->use_imm_data)
2558 shost_printk(KERN_DEBUG, target->scsi_host,
2559 PFX "using immediate data\n");
2560
2561 /*
2562 * Reserve credits for task management so we don't
2563 * bounce requests back to the SCSI mid-layer.
2564 */
2565 target->scsi_host->can_queue
2566 = min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE,
2567 target->scsi_host->can_queue);
2568 target->scsi_host->cmd_per_lun
2569 = min_t(int, target->scsi_host->can_queue,
2570 target->scsi_host->cmd_per_lun);
2571 } else {
2572 shost_printk(KERN_WARNING, target->scsi_host,
2573 PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
2574 ret = -ECONNRESET;
2575 goto error;
2576 }
2577
2578 if (!ch->rx_ring) {
2579 ret = srp_alloc_iu_bufs(ch);
2580 if (ret)
2581 goto error;
2582 }
2583
2584 for (i = 0; i < target->queue_size; i++) {
2585 struct srp_iu *iu = ch->rx_ring[i];
2586
2587 ret = srp_post_recv(ch, iu);
2588 if (ret)
2589 goto error;
2590 }
2591
2592 if (!target->using_rdma_cm) {
2593 ret = -ENOMEM;
2594 qp_attr = kmalloc(sizeof(*qp_attr), GFP_KERNEL);
2595 if (!qp_attr)
2596 goto error;
2597
2598 qp_attr->qp_state = IB_QPS_RTR;
2599 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2600 if (ret)
2601 goto error_free;
2602
2603 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2604 if (ret)
2605 goto error_free;
2606
2607 qp_attr->qp_state = IB_QPS_RTS;
2608 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2609 if (ret)
2610 goto error_free;
2611
2612 target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
2613
2614 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2615 if (ret)
2616 goto error_free;
2617
2618 ret = ib_send_cm_rtu(cm_id, NULL, 0);
2619 }
2620
2621error_free:
2622 kfree(qp_attr);
2623
2624error:
2625 ch->status = ret;
2626}
2627
2628static void srp_ib_cm_rej_handler(struct ib_cm_id *cm_id,
2629 const struct ib_cm_event *event,
2630 struct srp_rdma_ch *ch)
2631{
2632 struct srp_target_port *target = ch->target;
2633 struct Scsi_Host *shost = target->scsi_host;
2634 struct ib_class_port_info *cpi;
2635 int opcode;
2636 u16 dlid;
2637
2638 switch (event->param.rej_rcvd.reason) {
2639 case IB_CM_REJ_PORT_CM_REDIRECT:
2640 cpi = event->param.rej_rcvd.ari;
2641 dlid = be16_to_cpu(cpi->redirect_lid);
2642 sa_path_set_dlid(&ch->ib_cm.path, dlid);
2643 ch->ib_cm.path.pkey = cpi->redirect_pkey;
2644 cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
2645 memcpy(ch->ib_cm.path.dgid.raw, cpi->redirect_gid, 16);
2646
2647 ch->status = dlid ? SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
2648 break;
2649
2650 case IB_CM_REJ_PORT_REDIRECT:
2651 if (srp_target_is_topspin(target)) {
2652 union ib_gid *dgid = &ch->ib_cm.path.dgid;
2653
2654 /*
2655 * Topspin/Cisco SRP gateways incorrectly send
2656 * reject reason code 25 when they mean 24
2657 * (port redirect).
2658 */
2659 memcpy(dgid->raw, event->param.rej_rcvd.ari, 16);
2660
2661 shost_printk(KERN_DEBUG, shost,
2662 PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n",
2663 be64_to_cpu(dgid->global.subnet_prefix),
2664 be64_to_cpu(dgid->global.interface_id));
2665
2666 ch->status = SRP_PORT_REDIRECT;
2667 } else {
2668 shost_printk(KERN_WARNING, shost,
2669 " REJ reason: IB_CM_REJ_PORT_REDIRECT\n");
2670 ch->status = -ECONNRESET;
2671 }
2672 break;
2673
2674 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2675 shost_printk(KERN_WARNING, shost,
2676 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2677 ch->status = -ECONNRESET;
2678 break;
2679
2680 case IB_CM_REJ_CONSUMER_DEFINED:
2681 opcode = *(u8 *) event->private_data;
2682 if (opcode == SRP_LOGIN_REJ) {
2683 struct srp_login_rej *rej = event->private_data;
2684 u32 reason = be32_to_cpu(rej->reason);
2685
2686 if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2687 shost_printk(KERN_WARNING, shost,
2688 PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2689 else
2690 shost_printk(KERN_WARNING, shost, PFX
2691 "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
2692 target->sgid.raw,
2693 target->ib_cm.orig_dgid.raw,
2694 reason);
2695 } else
2696 shost_printk(KERN_WARNING, shost,
2697 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
2698 " opcode 0x%02x\n", opcode);
2699 ch->status = -ECONNRESET;
2700 break;
2701
2702 case IB_CM_REJ_STALE_CONN:
2703 shost_printk(KERN_WARNING, shost, " REJ reason: stale connection\n");
2704 ch->status = SRP_STALE_CONN;
2705 break;
2706
2707 default:
2708 shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n",
2709 event->param.rej_rcvd.reason);
2710 ch->status = -ECONNRESET;
2711 }
2712}
2713
2714static int srp_ib_cm_handler(struct ib_cm_id *cm_id,
2715 const struct ib_cm_event *event)
2716{
2717 struct srp_rdma_ch *ch = cm_id->context;
2718 struct srp_target_port *target = ch->target;
2719 int comp = 0;
2720
2721 switch (event->event) {
2722 case IB_CM_REQ_ERROR:
2723 shost_printk(KERN_DEBUG, target->scsi_host,
2724 PFX "Sending CM REQ failed\n");
2725 comp = 1;
2726 ch->status = -ECONNRESET;
2727 break;
2728
2729 case IB_CM_REP_RECEIVED:
2730 comp = 1;
2731 srp_cm_rep_handler(cm_id, event->private_data, ch);
2732 break;
2733
2734 case IB_CM_REJ_RECEIVED:
2735 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2736 comp = 1;
2737
2738 srp_ib_cm_rej_handler(cm_id, event, ch);
2739 break;
2740
2741 case IB_CM_DREQ_RECEIVED:
2742 shost_printk(KERN_WARNING, target->scsi_host,
2743 PFX "DREQ received - connection closed\n");
2744 ch->connected = false;
2745 if (ib_send_cm_drep(cm_id, NULL, 0))
2746 shost_printk(KERN_ERR, target->scsi_host,
2747 PFX "Sending CM DREP failed\n");
2748 queue_work(system_long_wq, &target->tl_err_work);
2749 break;
2750
2751 case IB_CM_TIMEWAIT_EXIT:
2752 shost_printk(KERN_ERR, target->scsi_host,
2753 PFX "connection closed\n");
2754 comp = 1;
2755
2756 ch->status = 0;
2757 break;
2758
2759 case IB_CM_MRA_RECEIVED:
2760 case IB_CM_DREQ_ERROR:
2761 case IB_CM_DREP_RECEIVED:
2762 break;
2763
2764 default:
2765 shost_printk(KERN_WARNING, target->scsi_host,
2766 PFX "Unhandled CM event %d\n", event->event);
2767 break;
2768 }
2769
2770 if (comp)
2771 complete(&ch->done);
2772
2773 return 0;
2774}
2775
2776static void srp_rdma_cm_rej_handler(struct srp_rdma_ch *ch,
2777 struct rdma_cm_event *event)
2778{
2779 struct srp_target_port *target = ch->target;
2780 struct Scsi_Host *shost = target->scsi_host;
2781 int opcode;
2782
2783 switch (event->status) {
2784 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2785 shost_printk(KERN_WARNING, shost,
2786 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2787 ch->status = -ECONNRESET;
2788 break;
2789
2790 case IB_CM_REJ_CONSUMER_DEFINED:
2791 opcode = *(u8 *) event->param.conn.private_data;
2792 if (opcode == SRP_LOGIN_REJ) {
2793 struct srp_login_rej *rej =
2794 (struct srp_login_rej *)
2795 event->param.conn.private_data;
2796 u32 reason = be32_to_cpu(rej->reason);
2797
2798 if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2799 shost_printk(KERN_WARNING, shost,
2800 PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2801 else
2802 shost_printk(KERN_WARNING, shost,
2803 PFX "SRP LOGIN REJECTED, reason 0x%08x\n", reason);
2804 } else {
2805 shost_printk(KERN_WARNING, shost,
2806 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED, opcode 0x%02x\n",
2807 opcode);
2808 }
2809 ch->status = -ECONNRESET;
2810 break;
2811
2812 case IB_CM_REJ_STALE_CONN:
2813 shost_printk(KERN_WARNING, shost,
2814 " REJ reason: stale connection\n");
2815 ch->status = SRP_STALE_CONN;
2816 break;
2817
2818 default:
2819 shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n",
2820 event->status);
2821 ch->status = -ECONNRESET;
2822 break;
2823 }
2824}
2825
2826static int srp_rdma_cm_handler(struct rdma_cm_id *cm_id,
2827 struct rdma_cm_event *event)
2828{
2829 struct srp_rdma_ch *ch = cm_id->context;
2830 struct srp_target_port *target = ch->target;
2831 int comp = 0;
2832
2833 switch (event->event) {
2834 case RDMA_CM_EVENT_ADDR_RESOLVED:
2835 ch->status = 0;
2836 comp = 1;
2837 break;
2838
2839 case RDMA_CM_EVENT_ADDR_ERROR:
2840 ch->status = -ENXIO;
2841 comp = 1;
2842 break;
2843
2844 case RDMA_CM_EVENT_ROUTE_RESOLVED:
2845 ch->status = 0;
2846 comp = 1;
2847 break;
2848
2849 case RDMA_CM_EVENT_ROUTE_ERROR:
2850 case RDMA_CM_EVENT_UNREACHABLE:
2851 ch->status = -EHOSTUNREACH;
2852 comp = 1;
2853 break;
2854
2855 case RDMA_CM_EVENT_CONNECT_ERROR:
2856 shost_printk(KERN_DEBUG, target->scsi_host,
2857 PFX "Sending CM REQ failed\n");
2858 comp = 1;
2859 ch->status = -ECONNRESET;
2860 break;
2861
2862 case RDMA_CM_EVENT_ESTABLISHED:
2863 comp = 1;
2864 srp_cm_rep_handler(NULL, event->param.conn.private_data, ch);
2865 break;
2866
2867 case RDMA_CM_EVENT_REJECTED:
2868 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2869 comp = 1;
2870
2871 srp_rdma_cm_rej_handler(ch, event);
2872 break;
2873
2874 case RDMA_CM_EVENT_DISCONNECTED:
2875 if (ch->connected) {
2876 shost_printk(KERN_WARNING, target->scsi_host,
2877 PFX "received DREQ\n");
2878 rdma_disconnect(ch->rdma_cm.cm_id);
2879 comp = 1;
2880 ch->status = 0;
2881 queue_work(system_long_wq, &target->tl_err_work);
2882 }
2883 break;
2884
2885 case RDMA_CM_EVENT_TIMEWAIT_EXIT:
2886 shost_printk(KERN_ERR, target->scsi_host,
2887 PFX "connection closed\n");
2888
2889 comp = 1;
2890 ch->status = 0;
2891 break;
2892
2893 default:
2894 shost_printk(KERN_WARNING, target->scsi_host,
2895 PFX "Unhandled CM event %d\n", event->event);
2896 break;
2897 }
2898
2899 if (comp)
2900 complete(&ch->done);
2901
2902 return 0;
2903}
2904
2905/**
2906 * srp_change_queue_depth - setting device queue depth
2907 * @sdev: scsi device struct
2908 * @qdepth: requested queue depth
2909 *
2910 * Returns queue depth.
2911 */
2912static int
2913srp_change_queue_depth(struct scsi_device *sdev, int qdepth)
2914{
2915 if (!sdev->tagged_supported)
2916 qdepth = 1;
2917 return scsi_change_queue_depth(sdev, qdepth);
2918}
2919
2920static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
2921 u8 func, u8 *status)
2922{
2923 struct srp_target_port *target = ch->target;
2924 struct srp_rport *rport = target->rport;
2925 struct ib_device *dev = target->srp_host->srp_dev->dev;
2926 struct srp_iu *iu;
2927 struct srp_tsk_mgmt *tsk_mgmt;
2928 int res;
2929
2930 if (!ch->connected || target->qp_in_error)
2931 return -1;
2932
2933 /*
2934 * Lock the rport mutex to avoid that srp_create_ch_ib() is
2935 * invoked while a task management function is being sent.
2936 */
2937 mutex_lock(&rport->mutex);
2938 spin_lock_irq(&ch->lock);
2939 iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT);
2940 spin_unlock_irq(&ch->lock);
2941
2942 if (!iu) {
2943 mutex_unlock(&rport->mutex);
2944
2945 return -1;
2946 }
2947
2948 iu->num_sge = 1;
2949
2950 ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
2951 DMA_TO_DEVICE);
2952 tsk_mgmt = iu->buf;
2953 memset(tsk_mgmt, 0, sizeof *tsk_mgmt);
2954
2955 tsk_mgmt->opcode = SRP_TSK_MGMT;
2956 int_to_scsilun(lun, &tsk_mgmt->lun);
2957 tsk_mgmt->tsk_mgmt_func = func;
2958 tsk_mgmt->task_tag = req_tag;
2959
2960 spin_lock_irq(&ch->lock);
2961 ch->tsk_mgmt_tag = (ch->tsk_mgmt_tag + 1) | SRP_TAG_TSK_MGMT;
2962 tsk_mgmt->tag = ch->tsk_mgmt_tag;
2963 spin_unlock_irq(&ch->lock);
2964
2965 init_completion(&ch->tsk_mgmt_done);
2966
2967 ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
2968 DMA_TO_DEVICE);
2969 if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
2970 srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT);
2971 mutex_unlock(&rport->mutex);
2972
2973 return -1;
2974 }
2975 res = wait_for_completion_timeout(&ch->tsk_mgmt_done,
2976 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS));
2977 if (res > 0 && status)
2978 *status = ch->tsk_mgmt_status;
2979 mutex_unlock(&rport->mutex);
2980
2981 WARN_ON_ONCE(res < 0);
2982
2983 return res > 0 ? 0 : -1;
2984}
2985
2986static int srp_abort(struct scsi_cmnd *scmnd)
2987{
2988 struct srp_target_port *target = host_to_target(scmnd->device->host);
2989 struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
2990 u32 tag;
2991 u16 ch_idx;
2992 struct srp_rdma_ch *ch;
2993 int ret;
2994
2995 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
2996
2997 if (!req)
2998 return SUCCESS;
2999 tag = blk_mq_unique_tag(scmnd->request);
3000 ch_idx = blk_mq_unique_tag_to_hwq(tag);
3001 if (WARN_ON_ONCE(ch_idx >= target->ch_count))
3002 return SUCCESS;
3003 ch = &target->ch[ch_idx];
3004 if (!srp_claim_req(ch, req, NULL, scmnd))
3005 return SUCCESS;
3006 shost_printk(KERN_ERR, target->scsi_host,
3007 "Sending SRP abort for tag %#x\n", tag);
3008 if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
3009 SRP_TSK_ABORT_TASK, NULL) == 0)
3010 ret = SUCCESS;
3011 else if (target->rport->state == SRP_RPORT_LOST)
3012 ret = FAST_IO_FAIL;
3013 else
3014 ret = FAILED;
3015 if (ret == SUCCESS) {
3016 srp_free_req(ch, req, scmnd, 0);
3017 scmnd->result = DID_ABORT << 16;
3018 scmnd->scsi_done(scmnd);
3019 }
3020
3021 return ret;
3022}
3023
3024static int srp_reset_device(struct scsi_cmnd *scmnd)
3025{
3026 struct srp_target_port *target = host_to_target(scmnd->device->host);
3027 struct srp_rdma_ch *ch;
3028 u8 status;
3029
3030 shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
3031
3032 ch = &target->ch[0];
3033 if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
3034 SRP_TSK_LUN_RESET, &status))
3035 return FAILED;
3036 if (status)
3037 return FAILED;
3038
3039 return SUCCESS;
3040}
3041
3042static int srp_reset_host(struct scsi_cmnd *scmnd)
3043{
3044 struct srp_target_port *target = host_to_target(scmnd->device->host);
3045
3046 shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
3047
3048 return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
3049}
3050
3051static int srp_target_alloc(struct scsi_target *starget)
3052{
3053 struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
3054 struct srp_target_port *target = host_to_target(shost);
3055
3056 if (target->target_can_queue)
3057 starget->can_queue = target->target_can_queue;
3058 return 0;
3059}
3060
3061static int srp_slave_configure(struct scsi_device *sdev)
3062{
3063 struct Scsi_Host *shost = sdev->host;
3064 struct srp_target_port *target = host_to_target(shost);
3065 struct request_queue *q = sdev->request_queue;
3066 unsigned long timeout;
3067
3068 if (sdev->type == TYPE_DISK) {
3069 timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies);
3070 blk_queue_rq_timeout(q, timeout);
3071 }
3072
3073 return 0;
3074}
3075
3076static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr,
3077 char *buf)
3078{
3079 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3080
3081 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext));
3082}
3083
3084static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr,
3085 char *buf)
3086{
3087 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3088
3089 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid));
3090}
3091
3092static ssize_t show_service_id(struct device *dev,
3093 struct device_attribute *attr, char *buf)
3094{
3095 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3096
3097 if (target->using_rdma_cm)
3098 return -ENOENT;
3099 return sprintf(buf, "0x%016llx\n",
3100 be64_to_cpu(target->ib_cm.service_id));
3101}
3102
3103static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
3104 char *buf)
3105{
3106 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3107
3108 if (target->using_rdma_cm)
3109 return -ENOENT;
3110 return sprintf(buf, "0x%04x\n", be16_to_cpu(target->ib_cm.pkey));
3111}
3112
3113static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
3114 char *buf)
3115{
3116 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3117
3118 return sprintf(buf, "%pI6\n", target->sgid.raw);
3119}
3120
3121static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
3122 char *buf)
3123{
3124 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3125 struct srp_rdma_ch *ch = &target->ch[0];
3126
3127 if (target->using_rdma_cm)
3128 return -ENOENT;
3129 return sprintf(buf, "%pI6\n", ch->ib_cm.path.dgid.raw);
3130}
3131
3132static ssize_t show_orig_dgid(struct device *dev,
3133 struct device_attribute *attr, char *buf)
3134{
3135 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3136
3137 if (target->using_rdma_cm)
3138 return -ENOENT;
3139 return sprintf(buf, "%pI6\n", target->ib_cm.orig_dgid.raw);
3140}
3141
3142static ssize_t show_req_lim(struct device *dev,
3143 struct device_attribute *attr, char *buf)
3144{
3145 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3146 struct srp_rdma_ch *ch;
3147 int i, req_lim = INT_MAX;
3148
3149 for (i = 0; i < target->ch_count; i++) {
3150 ch = &target->ch[i];
3151 req_lim = min(req_lim, ch->req_lim);
3152 }
3153 return sprintf(buf, "%d\n", req_lim);
3154}
3155
3156static ssize_t show_zero_req_lim(struct device *dev,
3157 struct device_attribute *attr, char *buf)
3158{
3159 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3160
3161 return sprintf(buf, "%d\n", target->zero_req_lim);
3162}
3163
3164static ssize_t show_local_ib_port(struct device *dev,
3165 struct device_attribute *attr, char *buf)
3166{
3167 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3168
3169 return sprintf(buf, "%d\n", target->srp_host->port);
3170}
3171
3172static ssize_t show_local_ib_device(struct device *dev,
3173 struct device_attribute *attr, char *buf)
3174{
3175 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3176
3177 return sprintf(buf, "%s\n",
3178 dev_name(&target->srp_host->srp_dev->dev->dev));
3179}
3180
3181static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr,
3182 char *buf)
3183{
3184 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3185
3186 return sprintf(buf, "%d\n", target->ch_count);
3187}
3188
3189static ssize_t show_comp_vector(struct device *dev,
3190 struct device_attribute *attr, char *buf)
3191{
3192 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3193
3194 return sprintf(buf, "%d\n", target->comp_vector);
3195}
3196
3197static ssize_t show_tl_retry_count(struct device *dev,
3198 struct device_attribute *attr, char *buf)
3199{
3200 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3201
3202 return sprintf(buf, "%d\n", target->tl_retry_count);
3203}
3204
3205static ssize_t show_cmd_sg_entries(struct device *dev,
3206 struct device_attribute *attr, char *buf)
3207{
3208 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3209
3210 return sprintf(buf, "%u\n", target->cmd_sg_cnt);
3211}
3212
3213static ssize_t show_allow_ext_sg(struct device *dev,
3214 struct device_attribute *attr, char *buf)
3215{
3216 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3217
3218 return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false");
3219}
3220
3221static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL);
3222static DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL);
3223static DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL);
3224static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
3225static DEVICE_ATTR(sgid, S_IRUGO, show_sgid, NULL);
3226static DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL);
3227static DEVICE_ATTR(orig_dgid, S_IRUGO, show_orig_dgid, NULL);
3228static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL);
3229static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL);
3230static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL);
3231static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
3232static DEVICE_ATTR(ch_count, S_IRUGO, show_ch_count, NULL);
3233static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL);
3234static DEVICE_ATTR(tl_retry_count, S_IRUGO, show_tl_retry_count, NULL);
3235static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL);
3236static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL);
3237
3238static struct device_attribute *srp_host_attrs[] = {
3239 &dev_attr_id_ext,
3240 &dev_attr_ioc_guid,
3241 &dev_attr_service_id,
3242 &dev_attr_pkey,
3243 &dev_attr_sgid,
3244 &dev_attr_dgid,
3245 &dev_attr_orig_dgid,
3246 &dev_attr_req_lim,
3247 &dev_attr_zero_req_lim,
3248 &dev_attr_local_ib_port,
3249 &dev_attr_local_ib_device,
3250 &dev_attr_ch_count,
3251 &dev_attr_comp_vector,
3252 &dev_attr_tl_retry_count,
3253 &dev_attr_cmd_sg_entries,
3254 &dev_attr_allow_ext_sg,
3255 NULL
3256};
3257
3258static struct scsi_host_template srp_template = {
3259 .module = THIS_MODULE,
3260 .name = "InfiniBand SRP initiator",
3261 .proc_name = DRV_NAME,
3262 .target_alloc = srp_target_alloc,
3263 .slave_configure = srp_slave_configure,
3264 .info = srp_target_info,
3265 .queuecommand = srp_queuecommand,
3266 .change_queue_depth = srp_change_queue_depth,
3267 .eh_timed_out = srp_timed_out,
3268 .eh_abort_handler = srp_abort,
3269 .eh_device_reset_handler = srp_reset_device,
3270 .eh_host_reset_handler = srp_reset_host,
3271 .skip_settle_delay = true,
3272 .sg_tablesize = SRP_DEF_SG_TABLESIZE,
3273 .can_queue = SRP_DEFAULT_CMD_SQ_SIZE,
3274 .this_id = -1,
3275 .cmd_per_lun = SRP_DEFAULT_CMD_SQ_SIZE,
3276 .shost_attrs = srp_host_attrs,
3277 .track_queue_depth = 1,
3278};
3279
3280static int srp_sdev_count(struct Scsi_Host *host)
3281{
3282 struct scsi_device *sdev;
3283 int c = 0;
3284
3285 shost_for_each_device(sdev, host)
3286 c++;
3287
3288 return c;
3289}
3290
3291/*
3292 * Return values:
3293 * < 0 upon failure. Caller is responsible for SRP target port cleanup.
3294 * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port
3295 * removal has been scheduled.
3296 * 0 and target->state != SRP_TARGET_REMOVED upon success.
3297 */
3298static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
3299{
3300 struct srp_rport_identifiers ids;
3301 struct srp_rport *rport;
3302
3303 target->state = SRP_TARGET_SCANNING;
3304 sprintf(target->target_name, "SRP.T10:%016llX",
3305 be64_to_cpu(target->id_ext));
3306
3307 if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dev.parent))
3308 return -ENODEV;
3309
3310 memcpy(ids.port_id, &target->id_ext, 8);
3311 memcpy(ids.port_id + 8, &target->ioc_guid, 8);
3312 ids.roles = SRP_RPORT_ROLE_TARGET;
3313 rport = srp_rport_add(target->scsi_host, &ids);
3314 if (IS_ERR(rport)) {
3315 scsi_remove_host(target->scsi_host);
3316 return PTR_ERR(rport);
3317 }
3318
3319 rport->lld_data = target;
3320 target->rport = rport;
3321
3322 spin_lock(&host->target_lock);
3323 list_add_tail(&target->list, &host->target_list);
3324 spin_unlock(&host->target_lock);
3325
3326 scsi_scan_target(&target->scsi_host->shost_gendev,
3327 0, target->scsi_id, SCAN_WILD_CARD, SCSI_SCAN_INITIAL);
3328
3329 if (srp_connected_ch(target) < target->ch_count ||
3330 target->qp_in_error) {
3331 shost_printk(KERN_INFO, target->scsi_host,
3332 PFX "SCSI scan failed - removing SCSI host\n");
3333 srp_queue_remove_work(target);
3334 goto out;
3335 }
3336
3337 pr_debug("%s: SCSI scan succeeded - detected %d LUNs\n",
3338 dev_name(&target->scsi_host->shost_gendev),
3339 srp_sdev_count(target->scsi_host));
3340
3341 spin_lock_irq(&target->lock);
3342 if (target->state == SRP_TARGET_SCANNING)
3343 target->state = SRP_TARGET_LIVE;
3344 spin_unlock_irq(&target->lock);
3345
3346out:
3347 return 0;
3348}
3349
3350static void srp_release_dev(struct device *dev)
3351{
3352 struct srp_host *host =
3353 container_of(dev, struct srp_host, dev);
3354
3355 complete(&host->released);
3356}
3357
3358static struct class srp_class = {
3359 .name = "infiniband_srp",
3360 .dev_release = srp_release_dev
3361};
3362
3363/**
3364 * srp_conn_unique() - check whether the connection to a target is unique
3365 * @host: SRP host.
3366 * @target: SRP target port.
3367 */
3368static bool srp_conn_unique(struct srp_host *host,
3369 struct srp_target_port *target)
3370{
3371 struct srp_target_port *t;
3372 bool ret = false;
3373
3374 if (target->state == SRP_TARGET_REMOVED)
3375 goto out;
3376
3377 ret = true;
3378
3379 spin_lock(&host->target_lock);
3380 list_for_each_entry(t, &host->target_list, list) {
3381 if (t != target &&
3382 target->id_ext == t->id_ext &&
3383 target->ioc_guid == t->ioc_guid &&
3384 target->initiator_ext == t->initiator_ext) {
3385 ret = false;
3386 break;
3387 }
3388 }
3389 spin_unlock(&host->target_lock);
3390
3391out:
3392 return ret;
3393}
3394
3395/*
3396 * Target ports are added by writing
3397 *
3398 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>,
3399 * pkey=<P_Key>,service_id=<service ID>
3400 * or
3401 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,
3402 * [src=<IPv4 address>,]dest=<IPv4 address>:<port number>
3403 *
3404 * to the add_target sysfs attribute.
3405 */
3406enum {
3407 SRP_OPT_ERR = 0,
3408 SRP_OPT_ID_EXT = 1 << 0,
3409 SRP_OPT_IOC_GUID = 1 << 1,
3410 SRP_OPT_DGID = 1 << 2,
3411 SRP_OPT_PKEY = 1 << 3,
3412 SRP_OPT_SERVICE_ID = 1 << 4,
3413 SRP_OPT_MAX_SECT = 1 << 5,
3414 SRP_OPT_MAX_CMD_PER_LUN = 1 << 6,
3415 SRP_OPT_IO_CLASS = 1 << 7,
3416 SRP_OPT_INITIATOR_EXT = 1 << 8,
3417 SRP_OPT_CMD_SG_ENTRIES = 1 << 9,
3418 SRP_OPT_ALLOW_EXT_SG = 1 << 10,
3419 SRP_OPT_SG_TABLESIZE = 1 << 11,
3420 SRP_OPT_COMP_VECTOR = 1 << 12,
3421 SRP_OPT_TL_RETRY_COUNT = 1 << 13,
3422 SRP_OPT_QUEUE_SIZE = 1 << 14,
3423 SRP_OPT_IP_SRC = 1 << 15,
3424 SRP_OPT_IP_DEST = 1 << 16,
3425 SRP_OPT_TARGET_CAN_QUEUE= 1 << 17,
3426 SRP_OPT_MAX_IT_IU_SIZE = 1 << 18,
3427};
3428
3429static unsigned int srp_opt_mandatory[] = {
3430 SRP_OPT_ID_EXT |
3431 SRP_OPT_IOC_GUID |
3432 SRP_OPT_DGID |
3433 SRP_OPT_PKEY |
3434 SRP_OPT_SERVICE_ID,
3435 SRP_OPT_ID_EXT |
3436 SRP_OPT_IOC_GUID |
3437 SRP_OPT_IP_DEST,
3438};
3439
3440static const match_table_t srp_opt_tokens = {
3441 { SRP_OPT_ID_EXT, "id_ext=%s" },
3442 { SRP_OPT_IOC_GUID, "ioc_guid=%s" },
3443 { SRP_OPT_DGID, "dgid=%s" },
3444 { SRP_OPT_PKEY, "pkey=%x" },
3445 { SRP_OPT_SERVICE_ID, "service_id=%s" },
3446 { SRP_OPT_MAX_SECT, "max_sect=%d" },
3447 { SRP_OPT_MAX_CMD_PER_LUN, "max_cmd_per_lun=%d" },
3448 { SRP_OPT_TARGET_CAN_QUEUE, "target_can_queue=%d" },
3449 { SRP_OPT_IO_CLASS, "io_class=%x" },
3450 { SRP_OPT_INITIATOR_EXT, "initiator_ext=%s" },
3451 { SRP_OPT_CMD_SG_ENTRIES, "cmd_sg_entries=%u" },
3452 { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" },
3453 { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" },
3454 { SRP_OPT_COMP_VECTOR, "comp_vector=%u" },
3455 { SRP_OPT_TL_RETRY_COUNT, "tl_retry_count=%u" },
3456 { SRP_OPT_QUEUE_SIZE, "queue_size=%d" },
3457 { SRP_OPT_IP_SRC, "src=%s" },
3458 { SRP_OPT_IP_DEST, "dest=%s" },
3459 { SRP_OPT_MAX_IT_IU_SIZE, "max_it_iu_size=%d" },
3460 { SRP_OPT_ERR, NULL }
3461};
3462
3463/**
3464 * srp_parse_in - parse an IP address and port number combination
3465 * @net: [in] Network namespace.
3466 * @sa: [out] Address family, IP address and port number.
3467 * @addr_port_str: [in] IP address and port number.
3468 * @has_port: [out] Whether or not @addr_port_str includes a port number.
3469 *
3470 * Parse the following address formats:
3471 * - IPv4: <ip_address>:<port>, e.g. 1.2.3.4:5.
3472 * - IPv6: \[<ipv6_address>\]:<port>, e.g. [1::2:3%4]:5.
3473 */
3474static int srp_parse_in(struct net *net, struct sockaddr_storage *sa,
3475 const char *addr_port_str, bool *has_port)
3476{
3477 char *addr_end, *addr = kstrdup(addr_port_str, GFP_KERNEL);
3478 char *port_str;
3479 int ret;
3480
3481 if (!addr)
3482 return -ENOMEM;
3483 port_str = strrchr(addr, ':');
3484 if (port_str && strchr(port_str, ']'))
3485 port_str = NULL;
3486 if (port_str)
3487 *port_str++ = '\0';
3488 if (has_port)
3489 *has_port = port_str != NULL;
3490 ret = inet_pton_with_scope(net, AF_INET, addr, port_str, sa);
3491 if (ret && addr[0]) {
3492 addr_end = addr + strlen(addr) - 1;
3493 if (addr[0] == '[' && *addr_end == ']') {
3494 *addr_end = '\0';
3495 ret = inet_pton_with_scope(net, AF_INET6, addr + 1,
3496 port_str, sa);
3497 }
3498 }
3499 kfree(addr);
3500 pr_debug("%s -> %pISpfsc\n", addr_port_str, sa);
3501 return ret;
3502}
3503
3504static int srp_parse_options(struct net *net, const char *buf,
3505 struct srp_target_port *target)
3506{
3507 char *options, *sep_opt;
3508 char *p;
3509 substring_t args[MAX_OPT_ARGS];
3510 unsigned long long ull;
3511 bool has_port;
3512 int opt_mask = 0;
3513 int token;
3514 int ret = -EINVAL;
3515 int i;
3516
3517 options = kstrdup(buf, GFP_KERNEL);
3518 if (!options)
3519 return -ENOMEM;
3520
3521 sep_opt = options;
3522 while ((p = strsep(&sep_opt, ",\n")) != NULL) {
3523 if (!*p)
3524 continue;
3525
3526 token = match_token(p, srp_opt_tokens, args);
3527 opt_mask |= token;
3528
3529 switch (token) {
3530 case SRP_OPT_ID_EXT:
3531 p = match_strdup(args);
3532 if (!p) {
3533 ret = -ENOMEM;
3534 goto out;
3535 }
3536 ret = kstrtoull(p, 16, &ull);
3537 if (ret) {
3538 pr_warn("invalid id_ext parameter '%s'\n", p);
3539 kfree(p);
3540 goto out;
3541 }
3542 target->id_ext = cpu_to_be64(ull);
3543 kfree(p);
3544 break;
3545
3546 case SRP_OPT_IOC_GUID:
3547 p = match_strdup(args);
3548 if (!p) {
3549 ret = -ENOMEM;
3550 goto out;
3551 }
3552 ret = kstrtoull(p, 16, &ull);
3553 if (ret) {
3554 pr_warn("invalid ioc_guid parameter '%s'\n", p);
3555 kfree(p);
3556 goto out;
3557 }
3558 target->ioc_guid = cpu_to_be64(ull);
3559 kfree(p);
3560 break;
3561
3562 case SRP_OPT_DGID:
3563 p = match_strdup(args);
3564 if (!p) {
3565 ret = -ENOMEM;
3566 goto out;
3567 }
3568 if (strlen(p) != 32) {
3569 pr_warn("bad dest GID parameter '%s'\n", p);
3570 kfree(p);
3571 goto out;
3572 }
3573
3574 ret = hex2bin(target->ib_cm.orig_dgid.raw, p, 16);
3575 kfree(p);
3576 if (ret < 0)
3577 goto out;
3578 break;
3579
3580 case SRP_OPT_PKEY:
3581 if (match_hex(args, &token)) {
3582 pr_warn("bad P_Key parameter '%s'\n", p);
3583 goto out;
3584 }
3585 target->ib_cm.pkey = cpu_to_be16(token);
3586 break;
3587
3588 case SRP_OPT_SERVICE_ID:
3589 p = match_strdup(args);
3590 if (!p) {
3591 ret = -ENOMEM;
3592 goto out;
3593 }
3594 ret = kstrtoull(p, 16, &ull);
3595 if (ret) {
3596 pr_warn("bad service_id parameter '%s'\n", p);
3597 kfree(p);
3598 goto out;
3599 }
3600 target->ib_cm.service_id = cpu_to_be64(ull);
3601 kfree(p);
3602 break;
3603
3604 case SRP_OPT_IP_SRC:
3605 p = match_strdup(args);
3606 if (!p) {
3607 ret = -ENOMEM;
3608 goto out;
3609 }
3610 ret = srp_parse_in(net, &target->rdma_cm.src.ss, p,
3611 NULL);
3612 if (ret < 0) {
3613 pr_warn("bad source parameter '%s'\n", p);
3614 kfree(p);
3615 goto out;
3616 }
3617 target->rdma_cm.src_specified = true;
3618 kfree(p);
3619 break;
3620
3621 case SRP_OPT_IP_DEST:
3622 p = match_strdup(args);
3623 if (!p) {
3624 ret = -ENOMEM;
3625 goto out;
3626 }
3627 ret = srp_parse_in(net, &target->rdma_cm.dst.ss, p,
3628 &has_port);
3629 if (!has_port)
3630 ret = -EINVAL;
3631 if (ret < 0) {
3632 pr_warn("bad dest parameter '%s'\n", p);
3633 kfree(p);
3634 goto out;
3635 }
3636 target->using_rdma_cm = true;
3637 kfree(p);
3638 break;
3639
3640 case SRP_OPT_MAX_SECT:
3641 if (match_int(args, &token)) {
3642 pr_warn("bad max sect parameter '%s'\n", p);
3643 goto out;
3644 }
3645 target->scsi_host->max_sectors = token;
3646 break;
3647
3648 case SRP_OPT_QUEUE_SIZE:
3649 if (match_int(args, &token) || token < 1) {
3650 pr_warn("bad queue_size parameter '%s'\n", p);
3651 goto out;
3652 }
3653 target->scsi_host->can_queue = token;
3654 target->queue_size = token + SRP_RSP_SQ_SIZE +
3655 SRP_TSK_MGMT_SQ_SIZE;
3656 if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3657 target->scsi_host->cmd_per_lun = token;
3658 break;
3659
3660 case SRP_OPT_MAX_CMD_PER_LUN:
3661 if (match_int(args, &token) || token < 1) {
3662 pr_warn("bad max cmd_per_lun parameter '%s'\n",
3663 p);
3664 goto out;
3665 }
3666 target->scsi_host->cmd_per_lun = token;
3667 break;
3668
3669 case SRP_OPT_TARGET_CAN_QUEUE:
3670 if (match_int(args, &token) || token < 1) {
3671 pr_warn("bad max target_can_queue parameter '%s'\n",
3672 p);
3673 goto out;
3674 }
3675 target->target_can_queue = token;
3676 break;
3677
3678 case SRP_OPT_IO_CLASS:
3679 if (match_hex(args, &token)) {
3680 pr_warn("bad IO class parameter '%s'\n", p);
3681 goto out;
3682 }
3683 if (token != SRP_REV10_IB_IO_CLASS &&
3684 token != SRP_REV16A_IB_IO_CLASS) {
3685 pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
3686 token, SRP_REV10_IB_IO_CLASS,
3687 SRP_REV16A_IB_IO_CLASS);
3688 goto out;
3689 }
3690 target->io_class = token;
3691 break;
3692
3693 case SRP_OPT_INITIATOR_EXT:
3694 p = match_strdup(args);
3695 if (!p) {
3696 ret = -ENOMEM;
3697 goto out;
3698 }
3699 ret = kstrtoull(p, 16, &ull);
3700 if (ret) {
3701 pr_warn("bad initiator_ext value '%s'\n", p);
3702 kfree(p);
3703 goto out;
3704 }
3705 target->initiator_ext = cpu_to_be64(ull);
3706 kfree(p);
3707 break;
3708
3709 case SRP_OPT_CMD_SG_ENTRIES:
3710 if (match_int(args, &token) || token < 1 || token > 255) {
3711 pr_warn("bad max cmd_sg_entries parameter '%s'\n",
3712 p);
3713 goto out;
3714 }
3715 target->cmd_sg_cnt = token;
3716 break;
3717
3718 case SRP_OPT_ALLOW_EXT_SG:
3719 if (match_int(args, &token)) {
3720 pr_warn("bad allow_ext_sg parameter '%s'\n", p);
3721 goto out;
3722 }
3723 target->allow_ext_sg = !!token;
3724 break;
3725
3726 case SRP_OPT_SG_TABLESIZE:
3727 if (match_int(args, &token) || token < 1 ||
3728 token > SG_MAX_SEGMENTS) {
3729 pr_warn("bad max sg_tablesize parameter '%s'\n",
3730 p);
3731 goto out;
3732 }
3733 target->sg_tablesize = token;
3734 break;
3735
3736 case SRP_OPT_COMP_VECTOR:
3737 if (match_int(args, &token) || token < 0) {
3738 pr_warn("bad comp_vector parameter '%s'\n", p);
3739 goto out;
3740 }
3741 target->comp_vector = token;
3742 break;
3743
3744 case SRP_OPT_TL_RETRY_COUNT:
3745 if (match_int(args, &token) || token < 2 || token > 7) {
3746 pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
3747 p);
3748 goto out;
3749 }
3750 target->tl_retry_count = token;
3751 break;
3752
3753 case SRP_OPT_MAX_IT_IU_SIZE:
3754 if (match_int(args, &token) || token < 0) {
3755 pr_warn("bad maximum initiator to target IU size '%s'\n", p);
3756 goto out;
3757 }
3758 target->max_it_iu_size = token;
3759 break;
3760
3761 default:
3762 pr_warn("unknown parameter or missing value '%s' in target creation request\n",
3763 p);
3764 goto out;
3765 }
3766 }
3767
3768 for (i = 0; i < ARRAY_SIZE(srp_opt_mandatory); i++) {
3769 if ((opt_mask & srp_opt_mandatory[i]) == srp_opt_mandatory[i]) {
3770 ret = 0;
3771 break;
3772 }
3773 }
3774 if (ret)
3775 pr_warn("target creation request is missing one or more parameters\n");
3776
3777 if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
3778 && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3779 pr_warn("cmd_per_lun = %d > queue_size = %d\n",
3780 target->scsi_host->cmd_per_lun,
3781 target->scsi_host->can_queue);
3782
3783out:
3784 kfree(options);
3785 return ret;
3786}
3787
3788static ssize_t srp_create_target(struct device *dev,
3789 struct device_attribute *attr,
3790 const char *buf, size_t count)
3791{
3792 struct srp_host *host =
3793 container_of(dev, struct srp_host, dev);
3794 struct Scsi_Host *target_host;
3795 struct srp_target_port *target;
3796 struct srp_rdma_ch *ch;
3797 struct srp_device *srp_dev = host->srp_dev;
3798 struct ib_device *ibdev = srp_dev->dev;
3799 int ret, node_idx, node, cpu, i;
3800 unsigned int max_sectors_per_mr, mr_per_cmd = 0;
3801 bool multich = false;
3802 uint32_t max_iu_len;
3803
3804 target_host = scsi_host_alloc(&srp_template,
3805 sizeof (struct srp_target_port));
3806 if (!target_host)
3807 return -ENOMEM;
3808
3809 target_host->transportt = ib_srp_transport_template;
3810 target_host->max_channel = 0;
3811 target_host->max_id = 1;
3812 target_host->max_lun = -1LL;
3813 target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
3814 target_host->max_segment_size = ib_dma_max_seg_size(ibdev);
3815
3816 if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
3817 target_host->virt_boundary_mask = ~srp_dev->mr_page_mask;
3818
3819 target = host_to_target(target_host);
3820
3821 target->net = kobj_ns_grab_current(KOBJ_NS_TYPE_NET);
3822 target->io_class = SRP_REV16A_IB_IO_CLASS;
3823 target->scsi_host = target_host;
3824 target->srp_host = host;
3825 target->lkey = host->srp_dev->pd->local_dma_lkey;
3826 target->global_rkey = host->srp_dev->global_rkey;
3827 target->cmd_sg_cnt = cmd_sg_entries;
3828 target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries;
3829 target->allow_ext_sg = allow_ext_sg;
3830 target->tl_retry_count = 7;
3831 target->queue_size = SRP_DEFAULT_QUEUE_SIZE;
3832
3833 /*
3834 * Avoid that the SCSI host can be removed by srp_remove_target()
3835 * before this function returns.
3836 */
3837 scsi_host_get(target->scsi_host);
3838
3839 ret = mutex_lock_interruptible(&host->add_target_mutex);
3840 if (ret < 0)
3841 goto put;
3842
3843 ret = srp_parse_options(target->net, buf, target);
3844 if (ret)
3845 goto out;
3846
3847 target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE;
3848
3849 if (!srp_conn_unique(target->srp_host, target)) {
3850 if (target->using_rdma_cm) {
3851 shost_printk(KERN_INFO, target->scsi_host,
3852 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;dest=%pIS\n",
3853 be64_to_cpu(target->id_ext),
3854 be64_to_cpu(target->ioc_guid),
3855 &target->rdma_cm.dst);
3856 } else {
3857 shost_printk(KERN_INFO, target->scsi_host,
3858 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
3859 be64_to_cpu(target->id_ext),
3860 be64_to_cpu(target->ioc_guid),
3861 be64_to_cpu(target->initiator_ext));
3862 }
3863 ret = -EEXIST;
3864 goto out;
3865 }
3866
3867 if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg &&
3868 target->cmd_sg_cnt < target->sg_tablesize) {
3869 pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
3870 target->sg_tablesize = target->cmd_sg_cnt;
3871 }
3872
3873 if (srp_dev->use_fast_reg || srp_dev->use_fmr) {
3874 bool gaps_reg = (ibdev->attrs.device_cap_flags &
3875 IB_DEVICE_SG_GAPS_REG);
3876
3877 max_sectors_per_mr = srp_dev->max_pages_per_mr <<
3878 (ilog2(srp_dev->mr_page_size) - 9);
3879 if (!gaps_reg) {
3880 /*
3881 * FR and FMR can only map one HCA page per entry. If
3882 * the start address is not aligned on a HCA page
3883 * boundary two entries will be used for the head and
3884 * the tail although these two entries combined
3885 * contain at most one HCA page of data. Hence the "+
3886 * 1" in the calculation below.
3887 *
3888 * The indirect data buffer descriptor is contiguous
3889 * so the memory for that buffer will only be
3890 * registered if register_always is true. Hence add
3891 * one to mr_per_cmd if register_always has been set.
3892 */
3893 mr_per_cmd = register_always +
3894 (target->scsi_host->max_sectors + 1 +
3895 max_sectors_per_mr - 1) / max_sectors_per_mr;
3896 } else {
3897 mr_per_cmd = register_always +
3898 (target->sg_tablesize +
3899 srp_dev->max_pages_per_mr - 1) /
3900 srp_dev->max_pages_per_mr;
3901 }
3902 pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n",
3903 target->scsi_host->max_sectors, srp_dev->max_pages_per_mr, srp_dev->mr_page_size,
3904 max_sectors_per_mr, mr_per_cmd);
3905 }
3906
3907 target_host->sg_tablesize = target->sg_tablesize;
3908 target->mr_pool_size = target->scsi_host->can_queue * mr_per_cmd;
3909 target->mr_per_cmd = mr_per_cmd;
3910 target->indirect_size = target->sg_tablesize *
3911 sizeof (struct srp_direct_buf);
3912 max_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt,
3913 srp_use_imm_data,
3914 target->max_it_iu_size);
3915
3916 INIT_WORK(&target->tl_err_work, srp_tl_err_work);
3917 INIT_WORK(&target->remove_work, srp_remove_work);
3918 spin_lock_init(&target->lock);
3919 ret = rdma_query_gid(ibdev, host->port, 0, &target->sgid);
3920 if (ret)
3921 goto out;
3922
3923 ret = -ENOMEM;
3924 target->ch_count = max_t(unsigned, num_online_nodes(),
3925 min(ch_count ? :
3926 min(4 * num_online_nodes(),
3927 ibdev->num_comp_vectors),
3928 num_online_cpus()));
3929 target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
3930 GFP_KERNEL);
3931 if (!target->ch)
3932 goto out;
3933
3934 node_idx = 0;
3935 for_each_online_node(node) {
3936 const int ch_start = (node_idx * target->ch_count /
3937 num_online_nodes());
3938 const int ch_end = ((node_idx + 1) * target->ch_count /
3939 num_online_nodes());
3940 const int cv_start = node_idx * ibdev->num_comp_vectors /
3941 num_online_nodes();
3942 const int cv_end = (node_idx + 1) * ibdev->num_comp_vectors /
3943 num_online_nodes();
3944 int cpu_idx = 0;
3945
3946 for_each_online_cpu(cpu) {
3947 if (cpu_to_node(cpu) != node)
3948 continue;
3949 if (ch_start + cpu_idx >= ch_end)
3950 continue;
3951 ch = &target->ch[ch_start + cpu_idx];
3952 ch->target = target;
3953 ch->comp_vector = cv_start == cv_end ? cv_start :
3954 cv_start + cpu_idx % (cv_end - cv_start);
3955 spin_lock_init(&ch->lock);
3956 INIT_LIST_HEAD(&ch->free_tx);
3957 ret = srp_new_cm_id(ch);
3958 if (ret)
3959 goto err_disconnect;
3960
3961 ret = srp_create_ch_ib(ch);
3962 if (ret)
3963 goto err_disconnect;
3964
3965 ret = srp_alloc_req_data(ch);
3966 if (ret)
3967 goto err_disconnect;
3968
3969 ret = srp_connect_ch(ch, max_iu_len, multich);
3970 if (ret) {
3971 char dst[64];
3972
3973 if (target->using_rdma_cm)
3974 snprintf(dst, sizeof(dst), "%pIS",
3975 &target->rdma_cm.dst);
3976 else
3977 snprintf(dst, sizeof(dst), "%pI6",
3978 target->ib_cm.orig_dgid.raw);
3979 shost_printk(KERN_ERR, target->scsi_host,
3980 PFX "Connection %d/%d to %s failed\n",
3981 ch_start + cpu_idx,
3982 target->ch_count, dst);
3983 if (node_idx == 0 && cpu_idx == 0) {
3984 goto free_ch;
3985 } else {
3986 srp_free_ch_ib(target, ch);
3987 srp_free_req_data(target, ch);
3988 target->ch_count = ch - target->ch;
3989 goto connected;
3990 }
3991 }
3992
3993 multich = true;
3994 cpu_idx++;
3995 }
3996 node_idx++;
3997 }
3998
3999connected:
4000 target->scsi_host->nr_hw_queues = target->ch_count;
4001
4002 ret = srp_add_target(host, target);
4003 if (ret)
4004 goto err_disconnect;
4005
4006 if (target->state != SRP_TARGET_REMOVED) {
4007 if (target->using_rdma_cm) {
4008 shost_printk(KERN_DEBUG, target->scsi_host, PFX
4009 "new target: id_ext %016llx ioc_guid %016llx sgid %pI6 dest %pIS\n",
4010 be64_to_cpu(target->id_ext),
4011 be64_to_cpu(target->ioc_guid),
4012 target->sgid.raw, &target->rdma_cm.dst);
4013 } else {
4014 shost_printk(KERN_DEBUG, target->scsi_host, PFX
4015 "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
4016 be64_to_cpu(target->id_ext),
4017 be64_to_cpu(target->ioc_guid),
4018 be16_to_cpu(target->ib_cm.pkey),
4019 be64_to_cpu(target->ib_cm.service_id),
4020 target->sgid.raw,
4021 target->ib_cm.orig_dgid.raw);
4022 }
4023 }
4024
4025 ret = count;
4026
4027out:
4028 mutex_unlock(&host->add_target_mutex);
4029
4030put:
4031 scsi_host_put(target->scsi_host);
4032 if (ret < 0) {
4033 /*
4034 * If a call to srp_remove_target() has not been scheduled,
4035 * drop the network namespace reference now that was obtained
4036 * earlier in this function.
4037 */
4038 if (target->state != SRP_TARGET_REMOVED)
4039 kobj_ns_drop(KOBJ_NS_TYPE_NET, target->net);
4040 scsi_host_put(target->scsi_host);
4041 }
4042
4043 return ret;
4044
4045err_disconnect:
4046 srp_disconnect_target(target);
4047
4048free_ch:
4049 for (i = 0; i < target->ch_count; i++) {
4050 ch = &target->ch[i];
4051 srp_free_ch_ib(target, ch);
4052 srp_free_req_data(target, ch);
4053 }
4054
4055 kfree(target->ch);
4056 goto out;
4057}
4058
4059static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target);
4060
4061static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
4062 char *buf)
4063{
4064 struct srp_host *host = container_of(dev, struct srp_host, dev);
4065
4066 return sprintf(buf, "%s\n", dev_name(&host->srp_dev->dev->dev));
4067}
4068
4069static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
4070
4071static ssize_t show_port(struct device *dev, struct device_attribute *attr,
4072 char *buf)
4073{
4074 struct srp_host *host = container_of(dev, struct srp_host, dev);
4075
4076 return sprintf(buf, "%d\n", host->port);
4077}
4078
4079static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
4080
4081static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
4082{
4083 struct srp_host *host;
4084
4085 host = kzalloc(sizeof *host, GFP_KERNEL);
4086 if (!host)
4087 return NULL;
4088
4089 INIT_LIST_HEAD(&host->target_list);
4090 spin_lock_init(&host->target_lock);
4091 init_completion(&host->released);
4092 mutex_init(&host->add_target_mutex);
4093 host->srp_dev = device;
4094 host->port = port;
4095
4096 host->dev.class = &srp_class;
4097 host->dev.parent = device->dev->dev.parent;
4098 dev_set_name(&host->dev, "srp-%s-%d", dev_name(&device->dev->dev),
4099 port);
4100
4101 if (device_register(&host->dev))
4102 goto free_host;
4103 if (device_create_file(&host->dev, &dev_attr_add_target))
4104 goto err_class;
4105 if (device_create_file(&host->dev, &dev_attr_ibdev))
4106 goto err_class;
4107 if (device_create_file(&host->dev, &dev_attr_port))
4108 goto err_class;
4109
4110 return host;
4111
4112err_class:
4113 device_unregister(&host->dev);
4114
4115free_host:
4116 kfree(host);
4117
4118 return NULL;
4119}
4120
4121static void srp_rename_dev(struct ib_device *device, void *client_data)
4122{
4123 struct srp_device *srp_dev = client_data;
4124 struct srp_host *host, *tmp_host;
4125
4126 list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
4127 char name[IB_DEVICE_NAME_MAX + 8];
4128
4129 snprintf(name, sizeof(name), "srp-%s-%d",
4130 dev_name(&device->dev), host->port);
4131 device_rename(&host->dev, name);
4132 }
4133}
4134
4135static void srp_add_one(struct ib_device *device)
4136{
4137 struct srp_device *srp_dev;
4138 struct ib_device_attr *attr = &device->attrs;
4139 struct srp_host *host;
4140 int mr_page_shift;
4141 unsigned int p;
4142 u64 max_pages_per_mr;
4143 unsigned int flags = 0;
4144
4145 srp_dev = kzalloc(sizeof(*srp_dev), GFP_KERNEL);
4146 if (!srp_dev)
4147 return;
4148
4149 /*
4150 * Use the smallest page size supported by the HCA, down to a
4151 * minimum of 4096 bytes. We're unlikely to build large sglists
4152 * out of smaller entries.
4153 */
4154 mr_page_shift = max(12, ffs(attr->page_size_cap) - 1);
4155 srp_dev->mr_page_size = 1 << mr_page_shift;
4156 srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1);
4157 max_pages_per_mr = attr->max_mr_size;
4158 do_div(max_pages_per_mr, srp_dev->mr_page_size);
4159 pr_debug("%s: %llu / %u = %llu <> %u\n", __func__,
4160 attr->max_mr_size, srp_dev->mr_page_size,
4161 max_pages_per_mr, SRP_MAX_PAGES_PER_MR);
4162 srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
4163 max_pages_per_mr);
4164
4165 srp_dev->has_fmr = (device->ops.alloc_fmr &&
4166 device->ops.dealloc_fmr &&
4167 device->ops.map_phys_fmr &&
4168 device->ops.unmap_fmr);
4169 srp_dev->has_fr = (attr->device_cap_flags &
4170 IB_DEVICE_MEM_MGT_EXTENSIONS);
4171 if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) {
4172 dev_warn(&device->dev, "neither FMR nor FR is supported\n");
4173 } else if (!never_register &&
4174 attr->max_mr_size >= 2 * srp_dev->mr_page_size) {
4175 srp_dev->use_fast_reg = (srp_dev->has_fr &&
4176 (!srp_dev->has_fmr || prefer_fr));
4177 srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
4178 }
4179
4180 if (never_register || !register_always ||
4181 (!srp_dev->has_fmr && !srp_dev->has_fr))
4182 flags |= IB_PD_UNSAFE_GLOBAL_RKEY;
4183
4184 if (srp_dev->use_fast_reg) {
4185 srp_dev->max_pages_per_mr =
4186 min_t(u32, srp_dev->max_pages_per_mr,
4187 attr->max_fast_reg_page_list_len);
4188 }
4189 srp_dev->mr_max_size = srp_dev->mr_page_size *
4190 srp_dev->max_pages_per_mr;
4191 pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
4192 dev_name(&device->dev), mr_page_shift, attr->max_mr_size,
4193 attr->max_fast_reg_page_list_len,
4194 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
4195
4196 INIT_LIST_HEAD(&srp_dev->dev_list);
4197
4198 srp_dev->dev = device;
4199 srp_dev->pd = ib_alloc_pd(device, flags);
4200 if (IS_ERR(srp_dev->pd))
4201 goto free_dev;
4202
4203 if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) {
4204 srp_dev->global_rkey = srp_dev->pd->unsafe_global_rkey;
4205 WARN_ON_ONCE(srp_dev->global_rkey == 0);
4206 }
4207
4208 rdma_for_each_port (device, p) {
4209 host = srp_add_port(srp_dev, p);
4210 if (host)
4211 list_add_tail(&host->list, &srp_dev->dev_list);
4212 }
4213
4214 ib_set_client_data(device, &srp_client, srp_dev);
4215 return;
4216
4217free_dev:
4218 kfree(srp_dev);
4219}
4220
4221static void srp_remove_one(struct ib_device *device, void *client_data)
4222{
4223 struct srp_device *srp_dev;
4224 struct srp_host *host, *tmp_host;
4225 struct srp_target_port *target;
4226
4227 srp_dev = client_data;
4228 if (!srp_dev)
4229 return;
4230
4231 list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
4232 device_unregister(&host->dev);
4233 /*
4234 * Wait for the sysfs entry to go away, so that no new
4235 * target ports can be created.
4236 */
4237 wait_for_completion(&host->released);
4238
4239 /*
4240 * Remove all target ports.
4241 */
4242 spin_lock(&host->target_lock);
4243 list_for_each_entry(target, &host->target_list, list)
4244 srp_queue_remove_work(target);
4245 spin_unlock(&host->target_lock);
4246
4247 /*
4248 * Wait for tl_err and target port removal tasks.
4249 */
4250 flush_workqueue(system_long_wq);
4251 flush_workqueue(srp_remove_wq);
4252
4253 kfree(host);
4254 }
4255
4256 ib_dealloc_pd(srp_dev->pd);
4257
4258 kfree(srp_dev);
4259}
4260
4261static struct srp_function_template ib_srp_transport_functions = {
4262 .has_rport_state = true,
4263 .reset_timer_if_blocked = true,
4264 .reconnect_delay = &srp_reconnect_delay,
4265 .fast_io_fail_tmo = &srp_fast_io_fail_tmo,
4266 .dev_loss_tmo = &srp_dev_loss_tmo,
4267 .reconnect = srp_rport_reconnect,
4268 .rport_delete = srp_rport_delete,
4269 .terminate_rport_io = srp_terminate_io,
4270};
4271
4272static int __init srp_init_module(void)
4273{
4274 int ret;
4275
4276 BUILD_BUG_ON(sizeof(struct srp_imm_buf) != 4);
4277 BUILD_BUG_ON(sizeof(struct srp_login_req) != 64);
4278 BUILD_BUG_ON(sizeof(struct srp_login_req_rdma) != 56);
4279 BUILD_BUG_ON(sizeof(struct srp_cmd) != 48);
4280
4281 if (srp_sg_tablesize) {
4282 pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
4283 if (!cmd_sg_entries)
4284 cmd_sg_entries = srp_sg_tablesize;
4285 }
4286
4287 if (!cmd_sg_entries)
4288 cmd_sg_entries = SRP_DEF_SG_TABLESIZE;
4289
4290 if (cmd_sg_entries > 255) {
4291 pr_warn("Clamping cmd_sg_entries to 255\n");
4292 cmd_sg_entries = 255;
4293 }
4294
4295 if (!indirect_sg_entries)
4296 indirect_sg_entries = cmd_sg_entries;
4297 else if (indirect_sg_entries < cmd_sg_entries) {
4298 pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n",
4299 cmd_sg_entries);
4300 indirect_sg_entries = cmd_sg_entries;
4301 }
4302
4303 if (indirect_sg_entries > SG_MAX_SEGMENTS) {
4304 pr_warn("Clamping indirect_sg_entries to %u\n",
4305 SG_MAX_SEGMENTS);
4306 indirect_sg_entries = SG_MAX_SEGMENTS;
4307 }
4308
4309 srp_remove_wq = create_workqueue("srp_remove");
4310 if (!srp_remove_wq) {
4311 ret = -ENOMEM;
4312 goto out;
4313 }
4314
4315 ret = -ENOMEM;
4316 ib_srp_transport_template =
4317 srp_attach_transport(&ib_srp_transport_functions);
4318 if (!ib_srp_transport_template)
4319 goto destroy_wq;
4320
4321 ret = class_register(&srp_class);
4322 if (ret) {
4323 pr_err("couldn't register class infiniband_srp\n");
4324 goto release_tr;
4325 }
4326
4327 ib_sa_register_client(&srp_sa_client);
4328
4329 ret = ib_register_client(&srp_client);
4330 if (ret) {
4331 pr_err("couldn't register IB client\n");
4332 goto unreg_sa;
4333 }
4334
4335out:
4336 return ret;
4337
4338unreg_sa:
4339 ib_sa_unregister_client(&srp_sa_client);
4340 class_unregister(&srp_class);
4341
4342release_tr:
4343 srp_release_transport(ib_srp_transport_template);
4344
4345destroy_wq:
4346 destroy_workqueue(srp_remove_wq);
4347 goto out;
4348}
4349
4350static void __exit srp_cleanup_module(void)
4351{
4352 ib_unregister_client(&srp_client);
4353 ib_sa_unregister_client(&srp_sa_client);
4354 class_unregister(&srp_class);
4355 srp_release_transport(ib_srp_transport_template);
4356 destroy_workqueue(srp_remove_wq);
4357}
4358
4359module_init(srp_init_module);
4360module_exit(srp_cleanup_module);