Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v2.6.32 2339 lines 63 kB view raw
1/* 2 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. 3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34#include <rdma/ib_mad.h> 35#include <rdma/ib_user_verbs.h> 36#include <linux/io.h> 37#include <linux/utsname.h> 38#include <linux/rculist.h> 39 40#include "ipath_kernel.h" 41#include "ipath_verbs.h" 42#include "ipath_common.h" 43 44static unsigned int ib_ipath_qp_table_size = 251; 45module_param_named(qp_table_size, ib_ipath_qp_table_size, uint, S_IRUGO); 46MODULE_PARM_DESC(qp_table_size, "QP table size"); 47 48unsigned int ib_ipath_lkey_table_size = 12; 49module_param_named(lkey_table_size, ib_ipath_lkey_table_size, uint, 50 S_IRUGO); 51MODULE_PARM_DESC(lkey_table_size, 52 "LKEY table size in bits (2^n, 1 <= n <= 23)"); 53 54static unsigned int ib_ipath_max_pds = 0xFFFF; 55module_param_named(max_pds, ib_ipath_max_pds, uint, S_IWUSR | S_IRUGO); 56MODULE_PARM_DESC(max_pds, 57 "Maximum number of protection domains to support"); 58 59static unsigned int ib_ipath_max_ahs = 0xFFFF; 60module_param_named(max_ahs, ib_ipath_max_ahs, uint, S_IWUSR | S_IRUGO); 61MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support"); 62 63unsigned int ib_ipath_max_cqes = 0x2FFFF; 64module_param_named(max_cqes, ib_ipath_max_cqes, uint, S_IWUSR | S_IRUGO); 65MODULE_PARM_DESC(max_cqes, 66 "Maximum number of completion queue entries to support"); 67 68unsigned int ib_ipath_max_cqs = 0x1FFFF; 69module_param_named(max_cqs, ib_ipath_max_cqs, uint, S_IWUSR | S_IRUGO); 70MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support"); 71 72unsigned int ib_ipath_max_qp_wrs = 0x3FFF; 73module_param_named(max_qp_wrs, ib_ipath_max_qp_wrs, uint, 74 S_IWUSR | S_IRUGO); 75MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support"); 76 77unsigned int ib_ipath_max_qps = 16384; 78module_param_named(max_qps, ib_ipath_max_qps, uint, S_IWUSR | S_IRUGO); 79MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support"); 80 81unsigned int ib_ipath_max_sges = 0x60; 82module_param_named(max_sges, ib_ipath_max_sges, uint, S_IWUSR | S_IRUGO); 83MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support"); 84 85unsigned int ib_ipath_max_mcast_grps = 16384; 86module_param_named(max_mcast_grps, ib_ipath_max_mcast_grps, uint, 87 S_IWUSR | S_IRUGO); 88MODULE_PARM_DESC(max_mcast_grps, 89 "Maximum number of multicast groups to support"); 90 91unsigned int ib_ipath_max_mcast_qp_attached = 16; 92module_param_named(max_mcast_qp_attached, ib_ipath_max_mcast_qp_attached, 93 uint, S_IWUSR | S_IRUGO); 94MODULE_PARM_DESC(max_mcast_qp_attached, 95 "Maximum number of attached QPs to support"); 96 97unsigned int ib_ipath_max_srqs = 1024; 98module_param_named(max_srqs, ib_ipath_max_srqs, uint, S_IWUSR | S_IRUGO); 99MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support"); 100 101unsigned int ib_ipath_max_srq_sges = 128; 102module_param_named(max_srq_sges, ib_ipath_max_srq_sges, 103 uint, S_IWUSR | S_IRUGO); 104MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support"); 105 106unsigned int ib_ipath_max_srq_wrs = 0x1FFFF; 107module_param_named(max_srq_wrs, ib_ipath_max_srq_wrs, 108 uint, S_IWUSR | S_IRUGO); 109MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support"); 110 111static unsigned int ib_ipath_disable_sma; 112module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO); 113MODULE_PARM_DESC(disable_sma, "Disable the SMA"); 114 115/* 116 * Note that it is OK to post send work requests in the SQE and ERR 117 * states; ipath_do_send() will process them and generate error 118 * completions as per IB 1.2 C10-96. 119 */ 120const int ib_ipath_state_ops[IB_QPS_ERR + 1] = { 121 [IB_QPS_RESET] = 0, 122 [IB_QPS_INIT] = IPATH_POST_RECV_OK, 123 [IB_QPS_RTR] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK, 124 [IB_QPS_RTS] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | 125 IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK | 126 IPATH_PROCESS_NEXT_SEND_OK, 127 [IB_QPS_SQD] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | 128 IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK, 129 [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | 130 IPATH_POST_SEND_OK | IPATH_FLUSH_SEND, 131 [IB_QPS_ERR] = IPATH_POST_RECV_OK | IPATH_FLUSH_RECV | 132 IPATH_POST_SEND_OK | IPATH_FLUSH_SEND, 133}; 134 135struct ipath_ucontext { 136 struct ib_ucontext ibucontext; 137}; 138 139static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext 140 *ibucontext) 141{ 142 return container_of(ibucontext, struct ipath_ucontext, ibucontext); 143} 144 145/* 146 * Translate ib_wr_opcode into ib_wc_opcode. 147 */ 148const enum ib_wc_opcode ib_ipath_wc_opcode[] = { 149 [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE, 150 [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE, 151 [IB_WR_SEND] = IB_WC_SEND, 152 [IB_WR_SEND_WITH_IMM] = IB_WC_SEND, 153 [IB_WR_RDMA_READ] = IB_WC_RDMA_READ, 154 [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP, 155 [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD 156}; 157 158/* 159 * System image GUID. 160 */ 161static __be64 sys_image_guid; 162 163/** 164 * ipath_copy_sge - copy data to SGE memory 165 * @ss: the SGE state 166 * @data: the data to copy 167 * @length: the length of the data 168 */ 169void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length) 170{ 171 struct ipath_sge *sge = &ss->sge; 172 173 while (length) { 174 u32 len = sge->length; 175 176 if (len > length) 177 len = length; 178 if (len > sge->sge_length) 179 len = sge->sge_length; 180 BUG_ON(len == 0); 181 memcpy(sge->vaddr, data, len); 182 sge->vaddr += len; 183 sge->length -= len; 184 sge->sge_length -= len; 185 if (sge->sge_length == 0) { 186 if (--ss->num_sge) 187 *sge = *ss->sg_list++; 188 } else if (sge->length == 0 && sge->mr != NULL) { 189 if (++sge->n >= IPATH_SEGSZ) { 190 if (++sge->m >= sge->mr->mapsz) 191 break; 192 sge->n = 0; 193 } 194 sge->vaddr = 195 sge->mr->map[sge->m]->segs[sge->n].vaddr; 196 sge->length = 197 sge->mr->map[sge->m]->segs[sge->n].length; 198 } 199 data += len; 200 length -= len; 201 } 202} 203 204/** 205 * ipath_skip_sge - skip over SGE memory - XXX almost dup of prev func 206 * @ss: the SGE state 207 * @length: the number of bytes to skip 208 */ 209void ipath_skip_sge(struct ipath_sge_state *ss, u32 length) 210{ 211 struct ipath_sge *sge = &ss->sge; 212 213 while (length) { 214 u32 len = sge->length; 215 216 if (len > length) 217 len = length; 218 if (len > sge->sge_length) 219 len = sge->sge_length; 220 BUG_ON(len == 0); 221 sge->vaddr += len; 222 sge->length -= len; 223 sge->sge_length -= len; 224 if (sge->sge_length == 0) { 225 if (--ss->num_sge) 226 *sge = *ss->sg_list++; 227 } else if (sge->length == 0 && sge->mr != NULL) { 228 if (++sge->n >= IPATH_SEGSZ) { 229 if (++sge->m >= sge->mr->mapsz) 230 break; 231 sge->n = 0; 232 } 233 sge->vaddr = 234 sge->mr->map[sge->m]->segs[sge->n].vaddr; 235 sge->length = 236 sge->mr->map[sge->m]->segs[sge->n].length; 237 } 238 length -= len; 239 } 240} 241 242/* 243 * Count the number of DMA descriptors needed to send length bytes of data. 244 * Don't modify the ipath_sge_state to get the count. 245 * Return zero if any of the segments is not aligned. 246 */ 247static u32 ipath_count_sge(struct ipath_sge_state *ss, u32 length) 248{ 249 struct ipath_sge *sg_list = ss->sg_list; 250 struct ipath_sge sge = ss->sge; 251 u8 num_sge = ss->num_sge; 252 u32 ndesc = 1; /* count the header */ 253 254 while (length) { 255 u32 len = sge.length; 256 257 if (len > length) 258 len = length; 259 if (len > sge.sge_length) 260 len = sge.sge_length; 261 BUG_ON(len == 0); 262 if (((long) sge.vaddr & (sizeof(u32) - 1)) || 263 (len != length && (len & (sizeof(u32) - 1)))) { 264 ndesc = 0; 265 break; 266 } 267 ndesc++; 268 sge.vaddr += len; 269 sge.length -= len; 270 sge.sge_length -= len; 271 if (sge.sge_length == 0) { 272 if (--num_sge) 273 sge = *sg_list++; 274 } else if (sge.length == 0 && sge.mr != NULL) { 275 if (++sge.n >= IPATH_SEGSZ) { 276 if (++sge.m >= sge.mr->mapsz) 277 break; 278 sge.n = 0; 279 } 280 sge.vaddr = 281 sge.mr->map[sge.m]->segs[sge.n].vaddr; 282 sge.length = 283 sge.mr->map[sge.m]->segs[sge.n].length; 284 } 285 length -= len; 286 } 287 return ndesc; 288} 289 290/* 291 * Copy from the SGEs to the data buffer. 292 */ 293static void ipath_copy_from_sge(void *data, struct ipath_sge_state *ss, 294 u32 length) 295{ 296 struct ipath_sge *sge = &ss->sge; 297 298 while (length) { 299 u32 len = sge->length; 300 301 if (len > length) 302 len = length; 303 if (len > sge->sge_length) 304 len = sge->sge_length; 305 BUG_ON(len == 0); 306 memcpy(data, sge->vaddr, len); 307 sge->vaddr += len; 308 sge->length -= len; 309 sge->sge_length -= len; 310 if (sge->sge_length == 0) { 311 if (--ss->num_sge) 312 *sge = *ss->sg_list++; 313 } else if (sge->length == 0 && sge->mr != NULL) { 314 if (++sge->n >= IPATH_SEGSZ) { 315 if (++sge->m >= sge->mr->mapsz) 316 break; 317 sge->n = 0; 318 } 319 sge->vaddr = 320 sge->mr->map[sge->m]->segs[sge->n].vaddr; 321 sge->length = 322 sge->mr->map[sge->m]->segs[sge->n].length; 323 } 324 data += len; 325 length -= len; 326 } 327} 328 329/** 330 * ipath_post_one_send - post one RC, UC, or UD send work request 331 * @qp: the QP to post on 332 * @wr: the work request to send 333 */ 334static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr) 335{ 336 struct ipath_swqe *wqe; 337 u32 next; 338 int i; 339 int j; 340 int acc; 341 int ret; 342 unsigned long flags; 343 struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd; 344 345 spin_lock_irqsave(&qp->s_lock, flags); 346 347 if (qp->ibqp.qp_type != IB_QPT_SMI && 348 !(dd->ipath_flags & IPATH_LINKACTIVE)) { 349 ret = -ENETDOWN; 350 goto bail; 351 } 352 353 /* Check that state is OK to post send. */ 354 if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK))) 355 goto bail_inval; 356 357 /* IB spec says that num_sge == 0 is OK. */ 358 if (wr->num_sge > qp->s_max_sge) 359 goto bail_inval; 360 361 /* 362 * Don't allow RDMA reads or atomic operations on UC or 363 * undefined operations. 364 * Make sure buffer is large enough to hold the result for atomics. 365 */ 366 if (qp->ibqp.qp_type == IB_QPT_UC) { 367 if ((unsigned) wr->opcode >= IB_WR_RDMA_READ) 368 goto bail_inval; 369 } else if (qp->ibqp.qp_type == IB_QPT_UD) { 370 /* Check UD opcode */ 371 if (wr->opcode != IB_WR_SEND && 372 wr->opcode != IB_WR_SEND_WITH_IMM) 373 goto bail_inval; 374 /* Check UD destination address PD */ 375 if (qp->ibqp.pd != wr->wr.ud.ah->pd) 376 goto bail_inval; 377 } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) 378 goto bail_inval; 379 else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP && 380 (wr->num_sge == 0 || 381 wr->sg_list[0].length < sizeof(u64) || 382 wr->sg_list[0].addr & (sizeof(u64) - 1))) 383 goto bail_inval; 384 else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) 385 goto bail_inval; 386 387 next = qp->s_head + 1; 388 if (next >= qp->s_size) 389 next = 0; 390 if (next == qp->s_last) { 391 ret = -ENOMEM; 392 goto bail; 393 } 394 395 wqe = get_swqe_ptr(qp, qp->s_head); 396 wqe->wr = *wr; 397 wqe->length = 0; 398 if (wr->num_sge) { 399 acc = wr->opcode >= IB_WR_RDMA_READ ? 400 IB_ACCESS_LOCAL_WRITE : 0; 401 for (i = 0, j = 0; i < wr->num_sge; i++) { 402 u32 length = wr->sg_list[i].length; 403 int ok; 404 405 if (length == 0) 406 continue; 407 ok = ipath_lkey_ok(qp, &wqe->sg_list[j], 408 &wr->sg_list[i], acc); 409 if (!ok) 410 goto bail_inval; 411 wqe->length += length; 412 j++; 413 } 414 wqe->wr.num_sge = j; 415 } 416 if (qp->ibqp.qp_type == IB_QPT_UC || 417 qp->ibqp.qp_type == IB_QPT_RC) { 418 if (wqe->length > 0x80000000U) 419 goto bail_inval; 420 } else if (wqe->length > to_idev(qp->ibqp.device)->dd->ipath_ibmtu) 421 goto bail_inval; 422 wqe->ssn = qp->s_ssn++; 423 qp->s_head = next; 424 425 ret = 0; 426 goto bail; 427 428bail_inval: 429 ret = -EINVAL; 430bail: 431 spin_unlock_irqrestore(&qp->s_lock, flags); 432 return ret; 433} 434 435/** 436 * ipath_post_send - post a send on a QP 437 * @ibqp: the QP to post the send on 438 * @wr: the list of work requests to post 439 * @bad_wr: the first bad WR is put here 440 * 441 * This may be called from interrupt context. 442 */ 443static int ipath_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 444 struct ib_send_wr **bad_wr) 445{ 446 struct ipath_qp *qp = to_iqp(ibqp); 447 int err = 0; 448 449 for (; wr; wr = wr->next) { 450 err = ipath_post_one_send(qp, wr); 451 if (err) { 452 *bad_wr = wr; 453 goto bail; 454 } 455 } 456 457 /* Try to do the send work in the caller's context. */ 458 ipath_do_send((unsigned long) qp); 459 460bail: 461 return err; 462} 463 464/** 465 * ipath_post_receive - post a receive on a QP 466 * @ibqp: the QP to post the receive on 467 * @wr: the WR to post 468 * @bad_wr: the first bad WR is put here 469 * 470 * This may be called from interrupt context. 471 */ 472static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, 473 struct ib_recv_wr **bad_wr) 474{ 475 struct ipath_qp *qp = to_iqp(ibqp); 476 struct ipath_rwq *wq = qp->r_rq.wq; 477 unsigned long flags; 478 int ret; 479 480 /* Check that state is OK to post receive. */ 481 if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK) || !wq) { 482 *bad_wr = wr; 483 ret = -EINVAL; 484 goto bail; 485 } 486 487 for (; wr; wr = wr->next) { 488 struct ipath_rwqe *wqe; 489 u32 next; 490 int i; 491 492 if ((unsigned) wr->num_sge > qp->r_rq.max_sge) { 493 *bad_wr = wr; 494 ret = -EINVAL; 495 goto bail; 496 } 497 498 spin_lock_irqsave(&qp->r_rq.lock, flags); 499 next = wq->head + 1; 500 if (next >= qp->r_rq.size) 501 next = 0; 502 if (next == wq->tail) { 503 spin_unlock_irqrestore(&qp->r_rq.lock, flags); 504 *bad_wr = wr; 505 ret = -ENOMEM; 506 goto bail; 507 } 508 509 wqe = get_rwqe_ptr(&qp->r_rq, wq->head); 510 wqe->wr_id = wr->wr_id; 511 wqe->num_sge = wr->num_sge; 512 for (i = 0; i < wr->num_sge; i++) 513 wqe->sg_list[i] = wr->sg_list[i]; 514 /* Make sure queue entry is written before the head index. */ 515 smp_wmb(); 516 wq->head = next; 517 spin_unlock_irqrestore(&qp->r_rq.lock, flags); 518 } 519 ret = 0; 520 521bail: 522 return ret; 523} 524 525/** 526 * ipath_qp_rcv - processing an incoming packet on a QP 527 * @dev: the device the packet came on 528 * @hdr: the packet header 529 * @has_grh: true if the packet has a GRH 530 * @data: the packet data 531 * @tlen: the packet length 532 * @qp: the QP the packet came on 533 * 534 * This is called from ipath_ib_rcv() to process an incoming packet 535 * for the given QP. 536 * Called at interrupt level. 537 */ 538static void ipath_qp_rcv(struct ipath_ibdev *dev, 539 struct ipath_ib_header *hdr, int has_grh, 540 void *data, u32 tlen, struct ipath_qp *qp) 541{ 542 /* Check for valid receive state. */ 543 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) { 544 dev->n_pkt_drops++; 545 return; 546 } 547 548 switch (qp->ibqp.qp_type) { 549 case IB_QPT_SMI: 550 case IB_QPT_GSI: 551 if (ib_ipath_disable_sma) 552 break; 553 /* FALLTHROUGH */ 554 case IB_QPT_UD: 555 ipath_ud_rcv(dev, hdr, has_grh, data, tlen, qp); 556 break; 557 558 case IB_QPT_RC: 559 ipath_rc_rcv(dev, hdr, has_grh, data, tlen, qp); 560 break; 561 562 case IB_QPT_UC: 563 ipath_uc_rcv(dev, hdr, has_grh, data, tlen, qp); 564 break; 565 566 default: 567 break; 568 } 569} 570 571/** 572 * ipath_ib_rcv - process an incoming packet 573 * @arg: the device pointer 574 * @rhdr: the header of the packet 575 * @data: the packet data 576 * @tlen: the packet length 577 * 578 * This is called from ipath_kreceive() to process an incoming packet at 579 * interrupt level. Tlen is the length of the header + data + CRC in bytes. 580 */ 581void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data, 582 u32 tlen) 583{ 584 struct ipath_ib_header *hdr = rhdr; 585 struct ipath_other_headers *ohdr; 586 struct ipath_qp *qp; 587 u32 qp_num; 588 int lnh; 589 u8 opcode; 590 u16 lid; 591 592 if (unlikely(dev == NULL)) 593 goto bail; 594 595 if (unlikely(tlen < 24)) { /* LRH+BTH+CRC */ 596 dev->rcv_errors++; 597 goto bail; 598 } 599 600 /* Check for a valid destination LID (see ch. 7.11.1). */ 601 lid = be16_to_cpu(hdr->lrh[1]); 602 if (lid < IPATH_MULTICAST_LID_BASE) { 603 lid &= ~((1 << dev->dd->ipath_lmc) - 1); 604 if (unlikely(lid != dev->dd->ipath_lid)) { 605 dev->rcv_errors++; 606 goto bail; 607 } 608 } 609 610 /* Check for GRH */ 611 lnh = be16_to_cpu(hdr->lrh[0]) & 3; 612 if (lnh == IPATH_LRH_BTH) 613 ohdr = &hdr->u.oth; 614 else if (lnh == IPATH_LRH_GRH) 615 ohdr = &hdr->u.l.oth; 616 else { 617 dev->rcv_errors++; 618 goto bail; 619 } 620 621 opcode = be32_to_cpu(ohdr->bth[0]) >> 24; 622 dev->opstats[opcode].n_bytes += tlen; 623 dev->opstats[opcode].n_packets++; 624 625 /* Get the destination QP number. */ 626 qp_num = be32_to_cpu(ohdr->bth[1]) & IPATH_QPN_MASK; 627 if (qp_num == IPATH_MULTICAST_QPN) { 628 struct ipath_mcast *mcast; 629 struct ipath_mcast_qp *p; 630 631 if (lnh != IPATH_LRH_GRH) { 632 dev->n_pkt_drops++; 633 goto bail; 634 } 635 mcast = ipath_mcast_find(&hdr->u.l.grh.dgid); 636 if (mcast == NULL) { 637 dev->n_pkt_drops++; 638 goto bail; 639 } 640 dev->n_multicast_rcv++; 641 list_for_each_entry_rcu(p, &mcast->qp_list, list) 642 ipath_qp_rcv(dev, hdr, 1, data, tlen, p->qp); 643 /* 644 * Notify ipath_multicast_detach() if it is waiting for us 645 * to finish. 646 */ 647 if (atomic_dec_return(&mcast->refcount) <= 1) 648 wake_up(&mcast->wait); 649 } else { 650 qp = ipath_lookup_qpn(&dev->qp_table, qp_num); 651 if (qp) { 652 dev->n_unicast_rcv++; 653 ipath_qp_rcv(dev, hdr, lnh == IPATH_LRH_GRH, data, 654 tlen, qp); 655 /* 656 * Notify ipath_destroy_qp() if it is waiting 657 * for us to finish. 658 */ 659 if (atomic_dec_and_test(&qp->refcount)) 660 wake_up(&qp->wait); 661 } else 662 dev->n_pkt_drops++; 663 } 664 665bail:; 666} 667 668/** 669 * ipath_ib_timer - verbs timer 670 * @arg: the device pointer 671 * 672 * This is called from ipath_do_rcv_timer() at interrupt level to check for 673 * QPs which need retransmits and to collect performance numbers. 674 */ 675static void ipath_ib_timer(struct ipath_ibdev *dev) 676{ 677 struct ipath_qp *resend = NULL; 678 struct ipath_qp *rnr = NULL; 679 struct list_head *last; 680 struct ipath_qp *qp; 681 unsigned long flags; 682 683 if (dev == NULL) 684 return; 685 686 spin_lock_irqsave(&dev->pending_lock, flags); 687 /* Start filling the next pending queue. */ 688 if (++dev->pending_index >= ARRAY_SIZE(dev->pending)) 689 dev->pending_index = 0; 690 /* Save any requests still in the new queue, they have timed out. */ 691 last = &dev->pending[dev->pending_index]; 692 while (!list_empty(last)) { 693 qp = list_entry(last->next, struct ipath_qp, timerwait); 694 list_del_init(&qp->timerwait); 695 qp->timer_next = resend; 696 resend = qp; 697 atomic_inc(&qp->refcount); 698 } 699 last = &dev->rnrwait; 700 if (!list_empty(last)) { 701 qp = list_entry(last->next, struct ipath_qp, timerwait); 702 if (--qp->s_rnr_timeout == 0) { 703 do { 704 list_del_init(&qp->timerwait); 705 qp->timer_next = rnr; 706 rnr = qp; 707 atomic_inc(&qp->refcount); 708 if (list_empty(last)) 709 break; 710 qp = list_entry(last->next, struct ipath_qp, 711 timerwait); 712 } while (qp->s_rnr_timeout == 0); 713 } 714 } 715 /* 716 * We should only be in the started state if pma_sample_start != 0 717 */ 718 if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED && 719 --dev->pma_sample_start == 0) { 720 dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING; 721 ipath_snapshot_counters(dev->dd, &dev->ipath_sword, 722 &dev->ipath_rword, 723 &dev->ipath_spkts, 724 &dev->ipath_rpkts, 725 &dev->ipath_xmit_wait); 726 } 727 if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) { 728 if (dev->pma_sample_interval == 0) { 729 u64 ta, tb, tc, td, te; 730 731 dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE; 732 ipath_snapshot_counters(dev->dd, &ta, &tb, 733 &tc, &td, &te); 734 735 dev->ipath_sword = ta - dev->ipath_sword; 736 dev->ipath_rword = tb - dev->ipath_rword; 737 dev->ipath_spkts = tc - dev->ipath_spkts; 738 dev->ipath_rpkts = td - dev->ipath_rpkts; 739 dev->ipath_xmit_wait = te - dev->ipath_xmit_wait; 740 } 741 else 742 dev->pma_sample_interval--; 743 } 744 spin_unlock_irqrestore(&dev->pending_lock, flags); 745 746 /* XXX What if timer fires again while this is running? */ 747 while (resend != NULL) { 748 qp = resend; 749 resend = qp->timer_next; 750 751 spin_lock_irqsave(&qp->s_lock, flags); 752 if (qp->s_last != qp->s_tail && 753 ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) { 754 dev->n_timeouts++; 755 ipath_restart_rc(qp, qp->s_last_psn + 1); 756 } 757 spin_unlock_irqrestore(&qp->s_lock, flags); 758 759 /* Notify ipath_destroy_qp() if it is waiting. */ 760 if (atomic_dec_and_test(&qp->refcount)) 761 wake_up(&qp->wait); 762 } 763 while (rnr != NULL) { 764 qp = rnr; 765 rnr = qp->timer_next; 766 767 spin_lock_irqsave(&qp->s_lock, flags); 768 if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) 769 ipath_schedule_send(qp); 770 spin_unlock_irqrestore(&qp->s_lock, flags); 771 772 /* Notify ipath_destroy_qp() if it is waiting. */ 773 if (atomic_dec_and_test(&qp->refcount)) 774 wake_up(&qp->wait); 775 } 776} 777 778static void update_sge(struct ipath_sge_state *ss, u32 length) 779{ 780 struct ipath_sge *sge = &ss->sge; 781 782 sge->vaddr += length; 783 sge->length -= length; 784 sge->sge_length -= length; 785 if (sge->sge_length == 0) { 786 if (--ss->num_sge) 787 *sge = *ss->sg_list++; 788 } else if (sge->length == 0 && sge->mr != NULL) { 789 if (++sge->n >= IPATH_SEGSZ) { 790 if (++sge->m >= sge->mr->mapsz) 791 return; 792 sge->n = 0; 793 } 794 sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr; 795 sge->length = sge->mr->map[sge->m]->segs[sge->n].length; 796 } 797} 798 799#ifdef __LITTLE_ENDIAN 800static inline u32 get_upper_bits(u32 data, u32 shift) 801{ 802 return data >> shift; 803} 804 805static inline u32 set_upper_bits(u32 data, u32 shift) 806{ 807 return data << shift; 808} 809 810static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off) 811{ 812 data <<= ((sizeof(u32) - n) * BITS_PER_BYTE); 813 data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE); 814 return data; 815} 816#else 817static inline u32 get_upper_bits(u32 data, u32 shift) 818{ 819 return data << shift; 820} 821 822static inline u32 set_upper_bits(u32 data, u32 shift) 823{ 824 return data >> shift; 825} 826 827static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off) 828{ 829 data >>= ((sizeof(u32) - n) * BITS_PER_BYTE); 830 data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE); 831 return data; 832} 833#endif 834 835static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss, 836 u32 length, unsigned flush_wc) 837{ 838 u32 extra = 0; 839 u32 data = 0; 840 u32 last; 841 842 while (1) { 843 u32 len = ss->sge.length; 844 u32 off; 845 846 if (len > length) 847 len = length; 848 if (len > ss->sge.sge_length) 849 len = ss->sge.sge_length; 850 BUG_ON(len == 0); 851 /* If the source address is not aligned, try to align it. */ 852 off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1); 853 if (off) { 854 u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr & 855 ~(sizeof(u32) - 1)); 856 u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE); 857 u32 y; 858 859 y = sizeof(u32) - off; 860 if (len > y) 861 len = y; 862 if (len + extra >= sizeof(u32)) { 863 data |= set_upper_bits(v, extra * 864 BITS_PER_BYTE); 865 len = sizeof(u32) - extra; 866 if (len == length) { 867 last = data; 868 break; 869 } 870 __raw_writel(data, piobuf); 871 piobuf++; 872 extra = 0; 873 data = 0; 874 } else { 875 /* Clear unused upper bytes */ 876 data |= clear_upper_bytes(v, len, extra); 877 if (len == length) { 878 last = data; 879 break; 880 } 881 extra += len; 882 } 883 } else if (extra) { 884 /* Source address is aligned. */ 885 u32 *addr = (u32 *) ss->sge.vaddr; 886 int shift = extra * BITS_PER_BYTE; 887 int ushift = 32 - shift; 888 u32 l = len; 889 890 while (l >= sizeof(u32)) { 891 u32 v = *addr; 892 893 data |= set_upper_bits(v, shift); 894 __raw_writel(data, piobuf); 895 data = get_upper_bits(v, ushift); 896 piobuf++; 897 addr++; 898 l -= sizeof(u32); 899 } 900 /* 901 * We still have 'extra' number of bytes leftover. 902 */ 903 if (l) { 904 u32 v = *addr; 905 906 if (l + extra >= sizeof(u32)) { 907 data |= set_upper_bits(v, shift); 908 len -= l + extra - sizeof(u32); 909 if (len == length) { 910 last = data; 911 break; 912 } 913 __raw_writel(data, piobuf); 914 piobuf++; 915 extra = 0; 916 data = 0; 917 } else { 918 /* Clear unused upper bytes */ 919 data |= clear_upper_bytes(v, l, 920 extra); 921 if (len == length) { 922 last = data; 923 break; 924 } 925 extra += l; 926 } 927 } else if (len == length) { 928 last = data; 929 break; 930 } 931 } else if (len == length) { 932 u32 w; 933 934 /* 935 * Need to round up for the last dword in the 936 * packet. 937 */ 938 w = (len + 3) >> 2; 939 __iowrite32_copy(piobuf, ss->sge.vaddr, w - 1); 940 piobuf += w - 1; 941 last = ((u32 *) ss->sge.vaddr)[w - 1]; 942 break; 943 } else { 944 u32 w = len >> 2; 945 946 __iowrite32_copy(piobuf, ss->sge.vaddr, w); 947 piobuf += w; 948 949 extra = len & (sizeof(u32) - 1); 950 if (extra) { 951 u32 v = ((u32 *) ss->sge.vaddr)[w]; 952 953 /* Clear unused upper bytes */ 954 data = clear_upper_bytes(v, extra, 0); 955 } 956 } 957 update_sge(ss, len); 958 length -= len; 959 } 960 /* Update address before sending packet. */ 961 update_sge(ss, length); 962 if (flush_wc) { 963 /* must flush early everything before trigger word */ 964 ipath_flush_wc(); 965 __raw_writel(last, piobuf); 966 /* be sure trigger word is written */ 967 ipath_flush_wc(); 968 } else 969 __raw_writel(last, piobuf); 970} 971 972/* 973 * Convert IB rate to delay multiplier. 974 */ 975unsigned ipath_ib_rate_to_mult(enum ib_rate rate) 976{ 977 switch (rate) { 978 case IB_RATE_2_5_GBPS: return 8; 979 case IB_RATE_5_GBPS: return 4; 980 case IB_RATE_10_GBPS: return 2; 981 case IB_RATE_20_GBPS: return 1; 982 default: return 0; 983 } 984} 985 986/* 987 * Convert delay multiplier to IB rate 988 */ 989static enum ib_rate ipath_mult_to_ib_rate(unsigned mult) 990{ 991 switch (mult) { 992 case 8: return IB_RATE_2_5_GBPS; 993 case 4: return IB_RATE_5_GBPS; 994 case 2: return IB_RATE_10_GBPS; 995 case 1: return IB_RATE_20_GBPS; 996 default: return IB_RATE_PORT_CURRENT; 997 } 998} 999 1000static inline struct ipath_verbs_txreq *get_txreq(struct ipath_ibdev *dev) 1001{ 1002 struct ipath_verbs_txreq *tx = NULL; 1003 unsigned long flags; 1004 1005 spin_lock_irqsave(&dev->pending_lock, flags); 1006 if (!list_empty(&dev->txreq_free)) { 1007 struct list_head *l = dev->txreq_free.next; 1008 1009 list_del(l); 1010 tx = list_entry(l, struct ipath_verbs_txreq, txreq.list); 1011 } 1012 spin_unlock_irqrestore(&dev->pending_lock, flags); 1013 return tx; 1014} 1015 1016static inline void put_txreq(struct ipath_ibdev *dev, 1017 struct ipath_verbs_txreq *tx) 1018{ 1019 unsigned long flags; 1020 1021 spin_lock_irqsave(&dev->pending_lock, flags); 1022 list_add(&tx->txreq.list, &dev->txreq_free); 1023 spin_unlock_irqrestore(&dev->pending_lock, flags); 1024} 1025 1026static void sdma_complete(void *cookie, int status) 1027{ 1028 struct ipath_verbs_txreq *tx = cookie; 1029 struct ipath_qp *qp = tx->qp; 1030 struct ipath_ibdev *dev = to_idev(qp->ibqp.device); 1031 unsigned long flags; 1032 enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ? 1033 IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR; 1034 1035 if (atomic_dec_and_test(&qp->s_dma_busy)) { 1036 spin_lock_irqsave(&qp->s_lock, flags); 1037 if (tx->wqe) 1038 ipath_send_complete(qp, tx->wqe, ibs); 1039 if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND && 1040 qp->s_last != qp->s_head) || 1041 (qp->s_flags & IPATH_S_WAIT_DMA)) 1042 ipath_schedule_send(qp); 1043 spin_unlock_irqrestore(&qp->s_lock, flags); 1044 wake_up(&qp->wait_dma); 1045 } else if (tx->wqe) { 1046 spin_lock_irqsave(&qp->s_lock, flags); 1047 ipath_send_complete(qp, tx->wqe, ibs); 1048 spin_unlock_irqrestore(&qp->s_lock, flags); 1049 } 1050 1051 if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF) 1052 kfree(tx->txreq.map_addr); 1053 put_txreq(dev, tx); 1054 1055 if (atomic_dec_and_test(&qp->refcount)) 1056 wake_up(&qp->wait); 1057} 1058 1059static void decrement_dma_busy(struct ipath_qp *qp) 1060{ 1061 unsigned long flags; 1062 1063 if (atomic_dec_and_test(&qp->s_dma_busy)) { 1064 spin_lock_irqsave(&qp->s_lock, flags); 1065 if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND && 1066 qp->s_last != qp->s_head) || 1067 (qp->s_flags & IPATH_S_WAIT_DMA)) 1068 ipath_schedule_send(qp); 1069 spin_unlock_irqrestore(&qp->s_lock, flags); 1070 wake_up(&qp->wait_dma); 1071 } 1072} 1073 1074/* 1075 * Compute the number of clock cycles of delay before sending the next packet. 1076 * The multipliers reflect the number of clocks for the fastest rate so 1077 * one tick at 4xDDR is 8 ticks at 1xSDR. 1078 * If the destination port will take longer to receive a packet than 1079 * the outgoing link can send it, we need to delay sending the next packet 1080 * by the difference in time it takes the receiver to receive and the sender 1081 * to send this packet. 1082 * Note that this delay is always correct for UC and RC but not always 1083 * optimal for UD. For UD, the destination HCA can be different for each 1084 * packet, in which case, we could send packets to a different destination 1085 * while "waiting" for the delay. The overhead for doing this without 1086 * HW support is more than just paying the cost of delaying some packets 1087 * unnecessarily. 1088 */ 1089static inline unsigned ipath_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult) 1090{ 1091 return (rcv_mult > snd_mult) ? 1092 (plen * (rcv_mult - snd_mult) + 1) >> 1 : 0; 1093} 1094 1095static int ipath_verbs_send_dma(struct ipath_qp *qp, 1096 struct ipath_ib_header *hdr, u32 hdrwords, 1097 struct ipath_sge_state *ss, u32 len, 1098 u32 plen, u32 dwords) 1099{ 1100 struct ipath_ibdev *dev = to_idev(qp->ibqp.device); 1101 struct ipath_devdata *dd = dev->dd; 1102 struct ipath_verbs_txreq *tx; 1103 u32 *piobuf; 1104 u32 control; 1105 u32 ndesc; 1106 int ret; 1107 1108 tx = qp->s_tx; 1109 if (tx) { 1110 qp->s_tx = NULL; 1111 /* resend previously constructed packet */ 1112 atomic_inc(&qp->s_dma_busy); 1113 ret = ipath_sdma_verbs_send(dd, tx->ss, tx->len, tx); 1114 if (ret) { 1115 qp->s_tx = tx; 1116 decrement_dma_busy(qp); 1117 } 1118 goto bail; 1119 } 1120 1121 tx = get_txreq(dev); 1122 if (!tx) { 1123 ret = -EBUSY; 1124 goto bail; 1125 } 1126 1127 /* 1128 * Get the saved delay count we computed for the previous packet 1129 * and save the delay count for this packet to be used next time 1130 * we get here. 1131 */ 1132 control = qp->s_pkt_delay; 1133 qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult); 1134 1135 tx->qp = qp; 1136 atomic_inc(&qp->refcount); 1137 tx->wqe = qp->s_wqe; 1138 tx->txreq.callback = sdma_complete; 1139 tx->txreq.callback_cookie = tx; 1140 tx->txreq.flags = IPATH_SDMA_TXREQ_F_HEADTOHOST | 1141 IPATH_SDMA_TXREQ_F_INTREQ | IPATH_SDMA_TXREQ_F_FREEDESC; 1142 if (plen + 1 >= IPATH_SMALLBUF_DWORDS) 1143 tx->txreq.flags |= IPATH_SDMA_TXREQ_F_USELARGEBUF; 1144 1145 /* VL15 packets bypass credit check */ 1146 if ((be16_to_cpu(hdr->lrh[0]) >> 12) == 15) { 1147 control |= 1ULL << 31; 1148 tx->txreq.flags |= IPATH_SDMA_TXREQ_F_VL15; 1149 } 1150 1151 if (len) { 1152 /* 1153 * Don't try to DMA if it takes more descriptors than 1154 * the queue holds. 1155 */ 1156 ndesc = ipath_count_sge(ss, len); 1157 if (ndesc >= dd->ipath_sdma_descq_cnt) 1158 ndesc = 0; 1159 } else 1160 ndesc = 1; 1161 if (ndesc) { 1162 tx->hdr.pbc[0] = cpu_to_le32(plen); 1163 tx->hdr.pbc[1] = cpu_to_le32(control); 1164 memcpy(&tx->hdr.hdr, hdr, hdrwords << 2); 1165 tx->txreq.sg_count = ndesc; 1166 tx->map_len = (hdrwords + 2) << 2; 1167 tx->txreq.map_addr = &tx->hdr; 1168 atomic_inc(&qp->s_dma_busy); 1169 ret = ipath_sdma_verbs_send(dd, ss, dwords, tx); 1170 if (ret) { 1171 /* save ss and length in dwords */ 1172 tx->ss = ss; 1173 tx->len = dwords; 1174 qp->s_tx = tx; 1175 decrement_dma_busy(qp); 1176 } 1177 goto bail; 1178 } 1179 1180 /* Allocate a buffer and copy the header and payload to it. */ 1181 tx->map_len = (plen + 1) << 2; 1182 piobuf = kmalloc(tx->map_len, GFP_ATOMIC); 1183 if (unlikely(piobuf == NULL)) { 1184 ret = -EBUSY; 1185 goto err_tx; 1186 } 1187 tx->txreq.map_addr = piobuf; 1188 tx->txreq.flags |= IPATH_SDMA_TXREQ_F_FREEBUF; 1189 tx->txreq.sg_count = 1; 1190 1191 *piobuf++ = (__force u32) cpu_to_le32(plen); 1192 *piobuf++ = (__force u32) cpu_to_le32(control); 1193 memcpy(piobuf, hdr, hdrwords << 2); 1194 ipath_copy_from_sge(piobuf + hdrwords, ss, len); 1195 1196 atomic_inc(&qp->s_dma_busy); 1197 ret = ipath_sdma_verbs_send(dd, NULL, 0, tx); 1198 /* 1199 * If we couldn't queue the DMA request, save the info 1200 * and try again later rather than destroying the 1201 * buffer and undoing the side effects of the copy. 1202 */ 1203 if (ret) { 1204 tx->ss = NULL; 1205 tx->len = 0; 1206 qp->s_tx = tx; 1207 decrement_dma_busy(qp); 1208 } 1209 dev->n_unaligned++; 1210 goto bail; 1211 1212err_tx: 1213 if (atomic_dec_and_test(&qp->refcount)) 1214 wake_up(&qp->wait); 1215 put_txreq(dev, tx); 1216bail: 1217 return ret; 1218} 1219 1220static int ipath_verbs_send_pio(struct ipath_qp *qp, 1221 struct ipath_ib_header *ibhdr, u32 hdrwords, 1222 struct ipath_sge_state *ss, u32 len, 1223 u32 plen, u32 dwords) 1224{ 1225 struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd; 1226 u32 *hdr = (u32 *) ibhdr; 1227 u32 __iomem *piobuf; 1228 unsigned flush_wc; 1229 u32 control; 1230 int ret; 1231 unsigned long flags; 1232 1233 piobuf = ipath_getpiobuf(dd, plen, NULL); 1234 if (unlikely(piobuf == NULL)) { 1235 ret = -EBUSY; 1236 goto bail; 1237 } 1238 1239 /* 1240 * Get the saved delay count we computed for the previous packet 1241 * and save the delay count for this packet to be used next time 1242 * we get here. 1243 */ 1244 control = qp->s_pkt_delay; 1245 qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult); 1246 1247 /* VL15 packets bypass credit check */ 1248 if ((be16_to_cpu(ibhdr->lrh[0]) >> 12) == 15) 1249 control |= 1ULL << 31; 1250 1251 /* 1252 * Write the length to the control qword plus any needed flags. 1253 * We have to flush after the PBC for correctness on some cpus 1254 * or WC buffer can be written out of order. 1255 */ 1256 writeq(((u64) control << 32) | plen, piobuf); 1257 piobuf += 2; 1258 1259 flush_wc = dd->ipath_flags & IPATH_PIO_FLUSH_WC; 1260 if (len == 0) { 1261 /* 1262 * If there is just the header portion, must flush before 1263 * writing last word of header for correctness, and after 1264 * the last header word (trigger word). 1265 */ 1266 if (flush_wc) { 1267 ipath_flush_wc(); 1268 __iowrite32_copy(piobuf, hdr, hdrwords - 1); 1269 ipath_flush_wc(); 1270 __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1); 1271 ipath_flush_wc(); 1272 } else 1273 __iowrite32_copy(piobuf, hdr, hdrwords); 1274 goto done; 1275 } 1276 1277 if (flush_wc) 1278 ipath_flush_wc(); 1279 __iowrite32_copy(piobuf, hdr, hdrwords); 1280 piobuf += hdrwords; 1281 1282 /* The common case is aligned and contained in one segment. */ 1283 if (likely(ss->num_sge == 1 && len <= ss->sge.length && 1284 !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) { 1285 u32 *addr = (u32 *) ss->sge.vaddr; 1286 1287 /* Update address before sending packet. */ 1288 update_sge(ss, len); 1289 if (flush_wc) { 1290 __iowrite32_copy(piobuf, addr, dwords - 1); 1291 /* must flush early everything before trigger word */ 1292 ipath_flush_wc(); 1293 __raw_writel(addr[dwords - 1], piobuf + dwords - 1); 1294 /* be sure trigger word is written */ 1295 ipath_flush_wc(); 1296 } else 1297 __iowrite32_copy(piobuf, addr, dwords); 1298 goto done; 1299 } 1300 copy_io(piobuf, ss, len, flush_wc); 1301done: 1302 if (qp->s_wqe) { 1303 spin_lock_irqsave(&qp->s_lock, flags); 1304 ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS); 1305 spin_unlock_irqrestore(&qp->s_lock, flags); 1306 } 1307 ret = 0; 1308bail: 1309 return ret; 1310} 1311 1312/** 1313 * ipath_verbs_send - send a packet 1314 * @qp: the QP to send on 1315 * @hdr: the packet header 1316 * @hdrwords: the number of 32-bit words in the header 1317 * @ss: the SGE to send 1318 * @len: the length of the packet in bytes 1319 */ 1320int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr, 1321 u32 hdrwords, struct ipath_sge_state *ss, u32 len) 1322{ 1323 struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd; 1324 u32 plen; 1325 int ret; 1326 u32 dwords = (len + 3) >> 2; 1327 1328 /* 1329 * Calculate the send buffer trigger address. 1330 * The +1 counts for the pbc control dword following the pbc length. 1331 */ 1332 plen = hdrwords + dwords + 1; 1333 1334 /* 1335 * VL15 packets (IB_QPT_SMI) will always use PIO, so we 1336 * can defer SDMA restart until link goes ACTIVE without 1337 * worrying about just how we got there. 1338 */ 1339 if (qp->ibqp.qp_type == IB_QPT_SMI || 1340 !(dd->ipath_flags & IPATH_HAS_SEND_DMA)) 1341 ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len, 1342 plen, dwords); 1343 else 1344 ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len, 1345 plen, dwords); 1346 1347 return ret; 1348} 1349 1350int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords, 1351 u64 *rwords, u64 *spkts, u64 *rpkts, 1352 u64 *xmit_wait) 1353{ 1354 int ret; 1355 1356 if (!(dd->ipath_flags & IPATH_INITTED)) { 1357 /* no hardware, freeze, etc. */ 1358 ret = -EINVAL; 1359 goto bail; 1360 } 1361 *swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt); 1362 *rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt); 1363 *spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt); 1364 *rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt); 1365 *xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt); 1366 1367 ret = 0; 1368 1369bail: 1370 return ret; 1371} 1372 1373/** 1374 * ipath_get_counters - get various chip counters 1375 * @dd: the infinipath device 1376 * @cntrs: counters are placed here 1377 * 1378 * Return the counters needed by recv_pma_get_portcounters(). 1379 */ 1380int ipath_get_counters(struct ipath_devdata *dd, 1381 struct ipath_verbs_counters *cntrs) 1382{ 1383 struct ipath_cregs const *crp = dd->ipath_cregs; 1384 int ret; 1385 1386 if (!(dd->ipath_flags & IPATH_INITTED)) { 1387 /* no hardware, freeze, etc. */ 1388 ret = -EINVAL; 1389 goto bail; 1390 } 1391 cntrs->symbol_error_counter = 1392 ipath_snap_cntr(dd, crp->cr_ibsymbolerrcnt); 1393 cntrs->link_error_recovery_counter = 1394 ipath_snap_cntr(dd, crp->cr_iblinkerrrecovcnt); 1395 /* 1396 * The link downed counter counts when the other side downs the 1397 * connection. We add in the number of times we downed the link 1398 * due to local link integrity errors to compensate. 1399 */ 1400 cntrs->link_downed_counter = 1401 ipath_snap_cntr(dd, crp->cr_iblinkdowncnt); 1402 cntrs->port_rcv_errors = 1403 ipath_snap_cntr(dd, crp->cr_rxdroppktcnt) + 1404 ipath_snap_cntr(dd, crp->cr_rcvovflcnt) + 1405 ipath_snap_cntr(dd, crp->cr_portovflcnt) + 1406 ipath_snap_cntr(dd, crp->cr_err_rlencnt) + 1407 ipath_snap_cntr(dd, crp->cr_invalidrlencnt) + 1408 ipath_snap_cntr(dd, crp->cr_errlinkcnt) + 1409 ipath_snap_cntr(dd, crp->cr_erricrccnt) + 1410 ipath_snap_cntr(dd, crp->cr_errvcrccnt) + 1411 ipath_snap_cntr(dd, crp->cr_errlpcrccnt) + 1412 ipath_snap_cntr(dd, crp->cr_badformatcnt) + 1413 dd->ipath_rxfc_unsupvl_errs; 1414 if (crp->cr_rxotherlocalphyerrcnt) 1415 cntrs->port_rcv_errors += 1416 ipath_snap_cntr(dd, crp->cr_rxotherlocalphyerrcnt); 1417 if (crp->cr_rxvlerrcnt) 1418 cntrs->port_rcv_errors += 1419 ipath_snap_cntr(dd, crp->cr_rxvlerrcnt); 1420 cntrs->port_rcv_remphys_errors = 1421 ipath_snap_cntr(dd, crp->cr_rcvebpcnt); 1422 cntrs->port_xmit_discards = ipath_snap_cntr(dd, crp->cr_unsupvlcnt); 1423 cntrs->port_xmit_data = ipath_snap_cntr(dd, crp->cr_wordsendcnt); 1424 cntrs->port_rcv_data = ipath_snap_cntr(dd, crp->cr_wordrcvcnt); 1425 cntrs->port_xmit_packets = ipath_snap_cntr(dd, crp->cr_pktsendcnt); 1426 cntrs->port_rcv_packets = ipath_snap_cntr(dd, crp->cr_pktrcvcnt); 1427 cntrs->local_link_integrity_errors = 1428 crp->cr_locallinkintegrityerrcnt ? 1429 ipath_snap_cntr(dd, crp->cr_locallinkintegrityerrcnt) : 1430 ((dd->ipath_flags & IPATH_GPIO_ERRINTRS) ? 1431 dd->ipath_lli_errs : dd->ipath_lli_errors); 1432 cntrs->excessive_buffer_overrun_errors = 1433 crp->cr_excessbufferovflcnt ? 1434 ipath_snap_cntr(dd, crp->cr_excessbufferovflcnt) : 1435 dd->ipath_overrun_thresh_errs; 1436 cntrs->vl15_dropped = crp->cr_vl15droppedpktcnt ? 1437 ipath_snap_cntr(dd, crp->cr_vl15droppedpktcnt) : 0; 1438 1439 ret = 0; 1440 1441bail: 1442 return ret; 1443} 1444 1445/** 1446 * ipath_ib_piobufavail - callback when a PIO buffer is available 1447 * @arg: the device pointer 1448 * 1449 * This is called from ipath_intr() at interrupt level when a PIO buffer is 1450 * available after ipath_verbs_send() returned an error that no buffers were 1451 * available. Return 1 if we consumed all the PIO buffers and we still have 1452 * QPs waiting for buffers (for now, just restart the send tasklet and 1453 * return zero). 1454 */ 1455int ipath_ib_piobufavail(struct ipath_ibdev *dev) 1456{ 1457 struct list_head *list; 1458 struct ipath_qp *qplist; 1459 struct ipath_qp *qp; 1460 unsigned long flags; 1461 1462 if (dev == NULL) 1463 goto bail; 1464 1465 list = &dev->piowait; 1466 qplist = NULL; 1467 1468 spin_lock_irqsave(&dev->pending_lock, flags); 1469 while (!list_empty(list)) { 1470 qp = list_entry(list->next, struct ipath_qp, piowait); 1471 list_del_init(&qp->piowait); 1472 qp->pio_next = qplist; 1473 qplist = qp; 1474 atomic_inc(&qp->refcount); 1475 } 1476 spin_unlock_irqrestore(&dev->pending_lock, flags); 1477 1478 while (qplist != NULL) { 1479 qp = qplist; 1480 qplist = qp->pio_next; 1481 1482 spin_lock_irqsave(&qp->s_lock, flags); 1483 if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) 1484 ipath_schedule_send(qp); 1485 spin_unlock_irqrestore(&qp->s_lock, flags); 1486 1487 /* Notify ipath_destroy_qp() if it is waiting. */ 1488 if (atomic_dec_and_test(&qp->refcount)) 1489 wake_up(&qp->wait); 1490 } 1491 1492bail: 1493 return 0; 1494} 1495 1496static int ipath_query_device(struct ib_device *ibdev, 1497 struct ib_device_attr *props) 1498{ 1499 struct ipath_ibdev *dev = to_idev(ibdev); 1500 1501 memset(props, 0, sizeof(*props)); 1502 1503 props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR | 1504 IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | 1505 IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | 1506 IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE; 1507 props->page_size_cap = PAGE_SIZE; 1508 props->vendor_id = 1509 IPATH_SRC_OUI_1 << 16 | IPATH_SRC_OUI_2 << 8 | IPATH_SRC_OUI_3; 1510 props->vendor_part_id = dev->dd->ipath_deviceid; 1511 props->hw_ver = dev->dd->ipath_pcirev; 1512 1513 props->sys_image_guid = dev->sys_image_guid; 1514 1515 props->max_mr_size = ~0ull; 1516 props->max_qp = ib_ipath_max_qps; 1517 props->max_qp_wr = ib_ipath_max_qp_wrs; 1518 props->max_sge = ib_ipath_max_sges; 1519 props->max_cq = ib_ipath_max_cqs; 1520 props->max_ah = ib_ipath_max_ahs; 1521 props->max_cqe = ib_ipath_max_cqes; 1522 props->max_mr = dev->lk_table.max; 1523 props->max_fmr = dev->lk_table.max; 1524 props->max_map_per_fmr = 32767; 1525 props->max_pd = ib_ipath_max_pds; 1526 props->max_qp_rd_atom = IPATH_MAX_RDMA_ATOMIC; 1527 props->max_qp_init_rd_atom = 255; 1528 /* props->max_res_rd_atom */ 1529 props->max_srq = ib_ipath_max_srqs; 1530 props->max_srq_wr = ib_ipath_max_srq_wrs; 1531 props->max_srq_sge = ib_ipath_max_srq_sges; 1532 /* props->local_ca_ack_delay */ 1533 props->atomic_cap = IB_ATOMIC_GLOB; 1534 props->max_pkeys = ipath_get_npkeys(dev->dd); 1535 props->max_mcast_grp = ib_ipath_max_mcast_grps; 1536 props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached; 1537 props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * 1538 props->max_mcast_grp; 1539 1540 return 0; 1541} 1542 1543const u8 ipath_cvt_physportstate[32] = { 1544 [INFINIPATH_IBCS_LT_STATE_DISABLED] = IB_PHYSPORTSTATE_DISABLED, 1545 [INFINIPATH_IBCS_LT_STATE_LINKUP] = IB_PHYSPORTSTATE_LINKUP, 1546 [INFINIPATH_IBCS_LT_STATE_POLLACTIVE] = IB_PHYSPORTSTATE_POLL, 1547 [INFINIPATH_IBCS_LT_STATE_POLLQUIET] = IB_PHYSPORTSTATE_POLL, 1548 [INFINIPATH_IBCS_LT_STATE_SLEEPDELAY] = IB_PHYSPORTSTATE_SLEEP, 1549 [INFINIPATH_IBCS_LT_STATE_SLEEPQUIET] = IB_PHYSPORTSTATE_SLEEP, 1550 [INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE] = 1551 IB_PHYSPORTSTATE_CFG_TRAIN, 1552 [INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG] = 1553 IB_PHYSPORTSTATE_CFG_TRAIN, 1554 [INFINIPATH_IBCS_LT_STATE_CFGWAITRMT] = 1555 IB_PHYSPORTSTATE_CFG_TRAIN, 1556 [INFINIPATH_IBCS_LT_STATE_CFGIDLE] = IB_PHYSPORTSTATE_CFG_TRAIN, 1557 [INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN] = 1558 IB_PHYSPORTSTATE_LINK_ERR_RECOVER, 1559 [INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT] = 1560 IB_PHYSPORTSTATE_LINK_ERR_RECOVER, 1561 [INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] = 1562 IB_PHYSPORTSTATE_LINK_ERR_RECOVER, 1563 [0x10] = IB_PHYSPORTSTATE_CFG_TRAIN, 1564 [0x11] = IB_PHYSPORTSTATE_CFG_TRAIN, 1565 [0x12] = IB_PHYSPORTSTATE_CFG_TRAIN, 1566 [0x13] = IB_PHYSPORTSTATE_CFG_TRAIN, 1567 [0x14] = IB_PHYSPORTSTATE_CFG_TRAIN, 1568 [0x15] = IB_PHYSPORTSTATE_CFG_TRAIN, 1569 [0x16] = IB_PHYSPORTSTATE_CFG_TRAIN, 1570 [0x17] = IB_PHYSPORTSTATE_CFG_TRAIN 1571}; 1572 1573u32 ipath_get_cr_errpkey(struct ipath_devdata *dd) 1574{ 1575 return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey); 1576} 1577 1578static int ipath_query_port(struct ib_device *ibdev, 1579 u8 port, struct ib_port_attr *props) 1580{ 1581 struct ipath_ibdev *dev = to_idev(ibdev); 1582 struct ipath_devdata *dd = dev->dd; 1583 enum ib_mtu mtu; 1584 u16 lid = dd->ipath_lid; 1585 u64 ibcstat; 1586 1587 memset(props, 0, sizeof(*props)); 1588 props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE); 1589 props->lmc = dd->ipath_lmc; 1590 props->sm_lid = dev->sm_lid; 1591 props->sm_sl = dev->sm_sl; 1592 ibcstat = dd->ipath_lastibcstat; 1593 /* map LinkState to IB portinfo values. */ 1594 props->state = ipath_ib_linkstate(dd, ibcstat) + 1; 1595 1596 /* See phys_state_show() */ 1597 props->phys_state = /* MEA: assumes shift == 0 */ 1598 ipath_cvt_physportstate[dd->ipath_lastibcstat & 1599 dd->ibcs_lts_mask]; 1600 props->port_cap_flags = dev->port_cap_flags; 1601 props->gid_tbl_len = 1; 1602 props->max_msg_sz = 0x80000000; 1603 props->pkey_tbl_len = ipath_get_npkeys(dd); 1604 props->bad_pkey_cntr = ipath_get_cr_errpkey(dd) - 1605 dev->z_pkey_violations; 1606 props->qkey_viol_cntr = dev->qkey_violations; 1607 props->active_width = dd->ipath_link_width_active; 1608 /* See rate_show() */ 1609 props->active_speed = dd->ipath_link_speed_active; 1610 props->max_vl_num = 1; /* VLCap = VL0 */ 1611 props->init_type_reply = 0; 1612 1613 props->max_mtu = ipath_mtu4096 ? IB_MTU_4096 : IB_MTU_2048; 1614 switch (dd->ipath_ibmtu) { 1615 case 4096: 1616 mtu = IB_MTU_4096; 1617 break; 1618 case 2048: 1619 mtu = IB_MTU_2048; 1620 break; 1621 case 1024: 1622 mtu = IB_MTU_1024; 1623 break; 1624 case 512: 1625 mtu = IB_MTU_512; 1626 break; 1627 case 256: 1628 mtu = IB_MTU_256; 1629 break; 1630 default: 1631 mtu = IB_MTU_2048; 1632 } 1633 props->active_mtu = mtu; 1634 props->subnet_timeout = dev->subnet_timeout; 1635 1636 return 0; 1637} 1638 1639static int ipath_modify_device(struct ib_device *device, 1640 int device_modify_mask, 1641 struct ib_device_modify *device_modify) 1642{ 1643 int ret; 1644 1645 if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID | 1646 IB_DEVICE_MODIFY_NODE_DESC)) { 1647 ret = -EOPNOTSUPP; 1648 goto bail; 1649 } 1650 1651 if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC) 1652 memcpy(device->node_desc, device_modify->node_desc, 64); 1653 1654 if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) 1655 to_idev(device)->sys_image_guid = 1656 cpu_to_be64(device_modify->sys_image_guid); 1657 1658 ret = 0; 1659 1660bail: 1661 return ret; 1662} 1663 1664static int ipath_modify_port(struct ib_device *ibdev, 1665 u8 port, int port_modify_mask, 1666 struct ib_port_modify *props) 1667{ 1668 struct ipath_ibdev *dev = to_idev(ibdev); 1669 1670 dev->port_cap_flags |= props->set_port_cap_mask; 1671 dev->port_cap_flags &= ~props->clr_port_cap_mask; 1672 if (port_modify_mask & IB_PORT_SHUTDOWN) 1673 ipath_set_linkstate(dev->dd, IPATH_IB_LINKDOWN); 1674 if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR) 1675 dev->qkey_violations = 0; 1676 return 0; 1677} 1678 1679static int ipath_query_gid(struct ib_device *ibdev, u8 port, 1680 int index, union ib_gid *gid) 1681{ 1682 struct ipath_ibdev *dev = to_idev(ibdev); 1683 int ret; 1684 1685 if (index >= 1) { 1686 ret = -EINVAL; 1687 goto bail; 1688 } 1689 gid->global.subnet_prefix = dev->gid_prefix; 1690 gid->global.interface_id = dev->dd->ipath_guid; 1691 1692 ret = 0; 1693 1694bail: 1695 return ret; 1696} 1697 1698static struct ib_pd *ipath_alloc_pd(struct ib_device *ibdev, 1699 struct ib_ucontext *context, 1700 struct ib_udata *udata) 1701{ 1702 struct ipath_ibdev *dev = to_idev(ibdev); 1703 struct ipath_pd *pd; 1704 struct ib_pd *ret; 1705 1706 /* 1707 * This is actually totally arbitrary. Some correctness tests 1708 * assume there's a maximum number of PDs that can be allocated. 1709 * We don't actually have this limit, but we fail the test if 1710 * we allow allocations of more than we report for this value. 1711 */ 1712 1713 pd = kmalloc(sizeof *pd, GFP_KERNEL); 1714 if (!pd) { 1715 ret = ERR_PTR(-ENOMEM); 1716 goto bail; 1717 } 1718 1719 spin_lock(&dev->n_pds_lock); 1720 if (dev->n_pds_allocated == ib_ipath_max_pds) { 1721 spin_unlock(&dev->n_pds_lock); 1722 kfree(pd); 1723 ret = ERR_PTR(-ENOMEM); 1724 goto bail; 1725 } 1726 1727 dev->n_pds_allocated++; 1728 spin_unlock(&dev->n_pds_lock); 1729 1730 /* ib_alloc_pd() will initialize pd->ibpd. */ 1731 pd->user = udata != NULL; 1732 1733 ret = &pd->ibpd; 1734 1735bail: 1736 return ret; 1737} 1738 1739static int ipath_dealloc_pd(struct ib_pd *ibpd) 1740{ 1741 struct ipath_pd *pd = to_ipd(ibpd); 1742 struct ipath_ibdev *dev = to_idev(ibpd->device); 1743 1744 spin_lock(&dev->n_pds_lock); 1745 dev->n_pds_allocated--; 1746 spin_unlock(&dev->n_pds_lock); 1747 1748 kfree(pd); 1749 1750 return 0; 1751} 1752 1753/** 1754 * ipath_create_ah - create an address handle 1755 * @pd: the protection domain 1756 * @ah_attr: the attributes of the AH 1757 * 1758 * This may be called from interrupt context. 1759 */ 1760static struct ib_ah *ipath_create_ah(struct ib_pd *pd, 1761 struct ib_ah_attr *ah_attr) 1762{ 1763 struct ipath_ah *ah; 1764 struct ib_ah *ret; 1765 struct ipath_ibdev *dev = to_idev(pd->device); 1766 unsigned long flags; 1767 1768 /* A multicast address requires a GRH (see ch. 8.4.1). */ 1769 if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE && 1770 ah_attr->dlid != IPATH_PERMISSIVE_LID && 1771 !(ah_attr->ah_flags & IB_AH_GRH)) { 1772 ret = ERR_PTR(-EINVAL); 1773 goto bail; 1774 } 1775 1776 if (ah_attr->dlid == 0) { 1777 ret = ERR_PTR(-EINVAL); 1778 goto bail; 1779 } 1780 1781 if (ah_attr->port_num < 1 || 1782 ah_attr->port_num > pd->device->phys_port_cnt) { 1783 ret = ERR_PTR(-EINVAL); 1784 goto bail; 1785 } 1786 1787 ah = kmalloc(sizeof *ah, GFP_ATOMIC); 1788 if (!ah) { 1789 ret = ERR_PTR(-ENOMEM); 1790 goto bail; 1791 } 1792 1793 spin_lock_irqsave(&dev->n_ahs_lock, flags); 1794 if (dev->n_ahs_allocated == ib_ipath_max_ahs) { 1795 spin_unlock_irqrestore(&dev->n_ahs_lock, flags); 1796 kfree(ah); 1797 ret = ERR_PTR(-ENOMEM); 1798 goto bail; 1799 } 1800 1801 dev->n_ahs_allocated++; 1802 spin_unlock_irqrestore(&dev->n_ahs_lock, flags); 1803 1804 /* ib_create_ah() will initialize ah->ibah. */ 1805 ah->attr = *ah_attr; 1806 ah->attr.static_rate = ipath_ib_rate_to_mult(ah_attr->static_rate); 1807 1808 ret = &ah->ibah; 1809 1810bail: 1811 return ret; 1812} 1813 1814/** 1815 * ipath_destroy_ah - destroy an address handle 1816 * @ibah: the AH to destroy 1817 * 1818 * This may be called from interrupt context. 1819 */ 1820static int ipath_destroy_ah(struct ib_ah *ibah) 1821{ 1822 struct ipath_ibdev *dev = to_idev(ibah->device); 1823 struct ipath_ah *ah = to_iah(ibah); 1824 unsigned long flags; 1825 1826 spin_lock_irqsave(&dev->n_ahs_lock, flags); 1827 dev->n_ahs_allocated--; 1828 spin_unlock_irqrestore(&dev->n_ahs_lock, flags); 1829 1830 kfree(ah); 1831 1832 return 0; 1833} 1834 1835static int ipath_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) 1836{ 1837 struct ipath_ah *ah = to_iah(ibah); 1838 1839 *ah_attr = ah->attr; 1840 ah_attr->static_rate = ipath_mult_to_ib_rate(ah->attr.static_rate); 1841 1842 return 0; 1843} 1844 1845/** 1846 * ipath_get_npkeys - return the size of the PKEY table for port 0 1847 * @dd: the infinipath device 1848 */ 1849unsigned ipath_get_npkeys(struct ipath_devdata *dd) 1850{ 1851 return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys); 1852} 1853 1854/** 1855 * ipath_get_pkey - return the indexed PKEY from the port PKEY table 1856 * @dd: the infinipath device 1857 * @index: the PKEY index 1858 */ 1859unsigned ipath_get_pkey(struct ipath_devdata *dd, unsigned index) 1860{ 1861 unsigned ret; 1862 1863 /* always a kernel port, no locking needed */ 1864 if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys)) 1865 ret = 0; 1866 else 1867 ret = dd->ipath_pd[0]->port_pkeys[index]; 1868 1869 return ret; 1870} 1871 1872static int ipath_query_pkey(struct ib_device *ibdev, u8 port, u16 index, 1873 u16 *pkey) 1874{ 1875 struct ipath_ibdev *dev = to_idev(ibdev); 1876 int ret; 1877 1878 if (index >= ipath_get_npkeys(dev->dd)) { 1879 ret = -EINVAL; 1880 goto bail; 1881 } 1882 1883 *pkey = ipath_get_pkey(dev->dd, index); 1884 ret = 0; 1885 1886bail: 1887 return ret; 1888} 1889 1890/** 1891 * ipath_alloc_ucontext - allocate a ucontest 1892 * @ibdev: the infiniband device 1893 * @udata: not used by the InfiniPath driver 1894 */ 1895 1896static struct ib_ucontext *ipath_alloc_ucontext(struct ib_device *ibdev, 1897 struct ib_udata *udata) 1898{ 1899 struct ipath_ucontext *context; 1900 struct ib_ucontext *ret; 1901 1902 context = kmalloc(sizeof *context, GFP_KERNEL); 1903 if (!context) { 1904 ret = ERR_PTR(-ENOMEM); 1905 goto bail; 1906 } 1907 1908 ret = &context->ibucontext; 1909 1910bail: 1911 return ret; 1912} 1913 1914static int ipath_dealloc_ucontext(struct ib_ucontext *context) 1915{ 1916 kfree(to_iucontext(context)); 1917 return 0; 1918} 1919 1920static int ipath_verbs_register_sysfs(struct ib_device *dev); 1921 1922static void __verbs_timer(unsigned long arg) 1923{ 1924 struct ipath_devdata *dd = (struct ipath_devdata *) arg; 1925 1926 /* Handle verbs layer timeouts. */ 1927 ipath_ib_timer(dd->verbs_dev); 1928 1929 mod_timer(&dd->verbs_timer, jiffies + 1); 1930} 1931 1932static int enable_timer(struct ipath_devdata *dd) 1933{ 1934 /* 1935 * Early chips had a design flaw where the chip and kernel idea 1936 * of the tail register don't always agree, and therefore we won't 1937 * get an interrupt on the next packet received. 1938 * If the board supports per packet receive interrupts, use it. 1939 * Otherwise, the timer function periodically checks for packets 1940 * to cover this case. 1941 * Either way, the timer is needed for verbs layer related 1942 * processing. 1943 */ 1944 if (dd->ipath_flags & IPATH_GPIO_INTR) { 1945 ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect, 1946 0x2074076542310ULL); 1947 /* Enable GPIO bit 2 interrupt */ 1948 dd->ipath_gpio_mask |= (u64) (1 << IPATH_GPIO_PORT0_BIT); 1949 ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, 1950 dd->ipath_gpio_mask); 1951 } 1952 1953 init_timer(&dd->verbs_timer); 1954 dd->verbs_timer.function = __verbs_timer; 1955 dd->verbs_timer.data = (unsigned long)dd; 1956 dd->verbs_timer.expires = jiffies + 1; 1957 add_timer(&dd->verbs_timer); 1958 1959 return 0; 1960} 1961 1962static int disable_timer(struct ipath_devdata *dd) 1963{ 1964 /* Disable GPIO bit 2 interrupt */ 1965 if (dd->ipath_flags & IPATH_GPIO_INTR) { 1966 /* Disable GPIO bit 2 interrupt */ 1967 dd->ipath_gpio_mask &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT)); 1968 ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask, 1969 dd->ipath_gpio_mask); 1970 /* 1971 * We might want to undo changes to debugportselect, 1972 * but how? 1973 */ 1974 } 1975 1976 del_timer_sync(&dd->verbs_timer); 1977 1978 return 0; 1979} 1980 1981/** 1982 * ipath_register_ib_device - register our device with the infiniband core 1983 * @dd: the device data structure 1984 * Return the allocated ipath_ibdev pointer or NULL on error. 1985 */ 1986int ipath_register_ib_device(struct ipath_devdata *dd) 1987{ 1988 struct ipath_verbs_counters cntrs; 1989 struct ipath_ibdev *idev; 1990 struct ib_device *dev; 1991 struct ipath_verbs_txreq *tx; 1992 unsigned i; 1993 int ret; 1994 1995 idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev); 1996 if (idev == NULL) { 1997 ret = -ENOMEM; 1998 goto bail; 1999 } 2000 2001 dev = &idev->ibdev; 2002 2003 if (dd->ipath_sdma_descq_cnt) { 2004 tx = kmalloc(dd->ipath_sdma_descq_cnt * sizeof *tx, 2005 GFP_KERNEL); 2006 if (tx == NULL) { 2007 ret = -ENOMEM; 2008 goto err_tx; 2009 } 2010 } else 2011 tx = NULL; 2012 idev->txreq_bufs = tx; 2013 2014 /* Only need to initialize non-zero fields. */ 2015 spin_lock_init(&idev->n_pds_lock); 2016 spin_lock_init(&idev->n_ahs_lock); 2017 spin_lock_init(&idev->n_cqs_lock); 2018 spin_lock_init(&idev->n_qps_lock); 2019 spin_lock_init(&idev->n_srqs_lock); 2020 spin_lock_init(&idev->n_mcast_grps_lock); 2021 2022 spin_lock_init(&idev->qp_table.lock); 2023 spin_lock_init(&idev->lk_table.lock); 2024 idev->sm_lid = __constant_be16_to_cpu(IB_LID_PERMISSIVE); 2025 /* Set the prefix to the default value (see ch. 4.1.1) */ 2026 idev->gid_prefix = __constant_cpu_to_be64(0xfe80000000000000ULL); 2027 2028 ret = ipath_init_qp_table(idev, ib_ipath_qp_table_size); 2029 if (ret) 2030 goto err_qp; 2031 2032 /* 2033 * The top ib_ipath_lkey_table_size bits are used to index the 2034 * table. The lower 8 bits can be owned by the user (copied from 2035 * the LKEY). The remaining bits act as a generation number or tag. 2036 */ 2037 idev->lk_table.max = 1 << ib_ipath_lkey_table_size; 2038 idev->lk_table.table = kzalloc(idev->lk_table.max * 2039 sizeof(*idev->lk_table.table), 2040 GFP_KERNEL); 2041 if (idev->lk_table.table == NULL) { 2042 ret = -ENOMEM; 2043 goto err_lk; 2044 } 2045 INIT_LIST_HEAD(&idev->pending_mmaps); 2046 spin_lock_init(&idev->pending_lock); 2047 idev->mmap_offset = PAGE_SIZE; 2048 spin_lock_init(&idev->mmap_offset_lock); 2049 INIT_LIST_HEAD(&idev->pending[0]); 2050 INIT_LIST_HEAD(&idev->pending[1]); 2051 INIT_LIST_HEAD(&idev->pending[2]); 2052 INIT_LIST_HEAD(&idev->piowait); 2053 INIT_LIST_HEAD(&idev->rnrwait); 2054 INIT_LIST_HEAD(&idev->txreq_free); 2055 idev->pending_index = 0; 2056 idev->port_cap_flags = 2057 IB_PORT_SYS_IMAGE_GUID_SUP | IB_PORT_CLIENT_REG_SUP; 2058 if (dd->ipath_flags & IPATH_HAS_LINK_LATENCY) 2059 idev->port_cap_flags |= IB_PORT_LINK_LATENCY_SUP; 2060 idev->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA; 2061 idev->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA; 2062 idev->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS; 2063 idev->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS; 2064 idev->pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT; 2065 2066 /* Snapshot current HW counters to "clear" them. */ 2067 ipath_get_counters(dd, &cntrs); 2068 idev->z_symbol_error_counter = cntrs.symbol_error_counter; 2069 idev->z_link_error_recovery_counter = 2070 cntrs.link_error_recovery_counter; 2071 idev->z_link_downed_counter = cntrs.link_downed_counter; 2072 idev->z_port_rcv_errors = cntrs.port_rcv_errors; 2073 idev->z_port_rcv_remphys_errors = 2074 cntrs.port_rcv_remphys_errors; 2075 idev->z_port_xmit_discards = cntrs.port_xmit_discards; 2076 idev->z_port_xmit_data = cntrs.port_xmit_data; 2077 idev->z_port_rcv_data = cntrs.port_rcv_data; 2078 idev->z_port_xmit_packets = cntrs.port_xmit_packets; 2079 idev->z_port_rcv_packets = cntrs.port_rcv_packets; 2080 idev->z_local_link_integrity_errors = 2081 cntrs.local_link_integrity_errors; 2082 idev->z_excessive_buffer_overrun_errors = 2083 cntrs.excessive_buffer_overrun_errors; 2084 idev->z_vl15_dropped = cntrs.vl15_dropped; 2085 2086 for (i = 0; i < dd->ipath_sdma_descq_cnt; i++, tx++) 2087 list_add(&tx->txreq.list, &idev->txreq_free); 2088 2089 /* 2090 * The system image GUID is supposed to be the same for all 2091 * IB HCAs in a single system but since there can be other 2092 * device types in the system, we can't be sure this is unique. 2093 */ 2094 if (!sys_image_guid) 2095 sys_image_guid = dd->ipath_guid; 2096 idev->sys_image_guid = sys_image_guid; 2097 idev->ib_unit = dd->ipath_unit; 2098 idev->dd = dd; 2099 2100 strlcpy(dev->name, "ipath%d", IB_DEVICE_NAME_MAX); 2101 dev->owner = THIS_MODULE; 2102 dev->node_guid = dd->ipath_guid; 2103 dev->uverbs_abi_ver = IPATH_UVERBS_ABI_VERSION; 2104 dev->uverbs_cmd_mask = 2105 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | 2106 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | 2107 (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | 2108 (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | 2109 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | 2110 (1ull << IB_USER_VERBS_CMD_CREATE_AH) | 2111 (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | 2112 (1ull << IB_USER_VERBS_CMD_QUERY_AH) | 2113 (1ull << IB_USER_VERBS_CMD_REG_MR) | 2114 (1ull << IB_USER_VERBS_CMD_DEREG_MR) | 2115 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | 2116 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | 2117 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | 2118 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | 2119 (1ull << IB_USER_VERBS_CMD_POLL_CQ) | 2120 (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | 2121 (1ull << IB_USER_VERBS_CMD_CREATE_QP) | 2122 (1ull << IB_USER_VERBS_CMD_QUERY_QP) | 2123 (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | 2124 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | 2125 (1ull << IB_USER_VERBS_CMD_POST_SEND) | 2126 (1ull << IB_USER_VERBS_CMD_POST_RECV) | 2127 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | 2128 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | 2129 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | 2130 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | 2131 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | 2132 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | 2133 (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV); 2134 dev->node_type = RDMA_NODE_IB_CA; 2135 dev->phys_port_cnt = 1; 2136 dev->num_comp_vectors = 1; 2137 dev->dma_device = &dd->pcidev->dev; 2138 dev->query_device = ipath_query_device; 2139 dev->modify_device = ipath_modify_device; 2140 dev->query_port = ipath_query_port; 2141 dev->modify_port = ipath_modify_port; 2142 dev->query_pkey = ipath_query_pkey; 2143 dev->query_gid = ipath_query_gid; 2144 dev->alloc_ucontext = ipath_alloc_ucontext; 2145 dev->dealloc_ucontext = ipath_dealloc_ucontext; 2146 dev->alloc_pd = ipath_alloc_pd; 2147 dev->dealloc_pd = ipath_dealloc_pd; 2148 dev->create_ah = ipath_create_ah; 2149 dev->destroy_ah = ipath_destroy_ah; 2150 dev->query_ah = ipath_query_ah; 2151 dev->create_srq = ipath_create_srq; 2152 dev->modify_srq = ipath_modify_srq; 2153 dev->query_srq = ipath_query_srq; 2154 dev->destroy_srq = ipath_destroy_srq; 2155 dev->create_qp = ipath_create_qp; 2156 dev->modify_qp = ipath_modify_qp; 2157 dev->query_qp = ipath_query_qp; 2158 dev->destroy_qp = ipath_destroy_qp; 2159 dev->post_send = ipath_post_send; 2160 dev->post_recv = ipath_post_receive; 2161 dev->post_srq_recv = ipath_post_srq_receive; 2162 dev->create_cq = ipath_create_cq; 2163 dev->destroy_cq = ipath_destroy_cq; 2164 dev->resize_cq = ipath_resize_cq; 2165 dev->poll_cq = ipath_poll_cq; 2166 dev->req_notify_cq = ipath_req_notify_cq; 2167 dev->get_dma_mr = ipath_get_dma_mr; 2168 dev->reg_phys_mr = ipath_reg_phys_mr; 2169 dev->reg_user_mr = ipath_reg_user_mr; 2170 dev->dereg_mr = ipath_dereg_mr; 2171 dev->alloc_fmr = ipath_alloc_fmr; 2172 dev->map_phys_fmr = ipath_map_phys_fmr; 2173 dev->unmap_fmr = ipath_unmap_fmr; 2174 dev->dealloc_fmr = ipath_dealloc_fmr; 2175 dev->attach_mcast = ipath_multicast_attach; 2176 dev->detach_mcast = ipath_multicast_detach; 2177 dev->process_mad = ipath_process_mad; 2178 dev->mmap = ipath_mmap; 2179 dev->dma_ops = &ipath_dma_mapping_ops; 2180 2181 snprintf(dev->node_desc, sizeof(dev->node_desc), 2182 IPATH_IDSTR " %s", init_utsname()->nodename); 2183 2184 ret = ib_register_device(dev); 2185 if (ret) 2186 goto err_reg; 2187 2188 if (ipath_verbs_register_sysfs(dev)) 2189 goto err_class; 2190 2191 enable_timer(dd); 2192 2193 goto bail; 2194 2195err_class: 2196 ib_unregister_device(dev); 2197err_reg: 2198 kfree(idev->lk_table.table); 2199err_lk: 2200 kfree(idev->qp_table.table); 2201err_qp: 2202 kfree(idev->txreq_bufs); 2203err_tx: 2204 ib_dealloc_device(dev); 2205 ipath_dev_err(dd, "cannot register verbs: %d!\n", -ret); 2206 idev = NULL; 2207 2208bail: 2209 dd->verbs_dev = idev; 2210 return ret; 2211} 2212 2213void ipath_unregister_ib_device(struct ipath_ibdev *dev) 2214{ 2215 struct ib_device *ibdev = &dev->ibdev; 2216 u32 qps_inuse; 2217 2218 ib_unregister_device(ibdev); 2219 2220 disable_timer(dev->dd); 2221 2222 if (!list_empty(&dev->pending[0]) || 2223 !list_empty(&dev->pending[1]) || 2224 !list_empty(&dev->pending[2])) 2225 ipath_dev_err(dev->dd, "pending list not empty!\n"); 2226 if (!list_empty(&dev->piowait)) 2227 ipath_dev_err(dev->dd, "piowait list not empty!\n"); 2228 if (!list_empty(&dev->rnrwait)) 2229 ipath_dev_err(dev->dd, "rnrwait list not empty!\n"); 2230 if (!ipath_mcast_tree_empty()) 2231 ipath_dev_err(dev->dd, "multicast table memory leak!\n"); 2232 /* 2233 * Note that ipath_unregister_ib_device() can be called before all 2234 * the QPs are destroyed! 2235 */ 2236 qps_inuse = ipath_free_all_qps(&dev->qp_table); 2237 if (qps_inuse) 2238 ipath_dev_err(dev->dd, "QP memory leak! %u still in use\n", 2239 qps_inuse); 2240 kfree(dev->qp_table.table); 2241 kfree(dev->lk_table.table); 2242 kfree(dev->txreq_bufs); 2243 ib_dealloc_device(ibdev); 2244} 2245 2246static ssize_t show_rev(struct device *device, struct device_attribute *attr, 2247 char *buf) 2248{ 2249 struct ipath_ibdev *dev = 2250 container_of(device, struct ipath_ibdev, ibdev.dev); 2251 2252 return sprintf(buf, "%x\n", dev->dd->ipath_pcirev); 2253} 2254 2255static ssize_t show_hca(struct device *device, struct device_attribute *attr, 2256 char *buf) 2257{ 2258 struct ipath_ibdev *dev = 2259 container_of(device, struct ipath_ibdev, ibdev.dev); 2260 int ret; 2261 2262 ret = dev->dd->ipath_f_get_boardname(dev->dd, buf, 128); 2263 if (ret < 0) 2264 goto bail; 2265 strcat(buf, "\n"); 2266 ret = strlen(buf); 2267 2268bail: 2269 return ret; 2270} 2271 2272static ssize_t show_stats(struct device *device, struct device_attribute *attr, 2273 char *buf) 2274{ 2275 struct ipath_ibdev *dev = 2276 container_of(device, struct ipath_ibdev, ibdev.dev); 2277 int i; 2278 int len; 2279 2280 len = sprintf(buf, 2281 "RC resends %d\n" 2282 "RC no QACK %d\n" 2283 "RC ACKs %d\n" 2284 "RC SEQ NAKs %d\n" 2285 "RC RDMA seq %d\n" 2286 "RC RNR NAKs %d\n" 2287 "RC OTH NAKs %d\n" 2288 "RC timeouts %d\n" 2289 "RC RDMA dup %d\n" 2290 "piobuf wait %d\n" 2291 "unaligned %d\n" 2292 "PKT drops %d\n" 2293 "WQE errs %d\n", 2294 dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks, 2295 dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks, 2296 dev->n_other_naks, dev->n_timeouts, 2297 dev->n_rdma_dup_busy, dev->n_piowait, dev->n_unaligned, 2298 dev->n_pkt_drops, dev->n_wqe_errs); 2299 for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) { 2300 const struct ipath_opcode_stats *si = &dev->opstats[i]; 2301 2302 if (!si->n_packets && !si->n_bytes) 2303 continue; 2304 len += sprintf(buf + len, "%02x %llu/%llu\n", i, 2305 (unsigned long long) si->n_packets, 2306 (unsigned long long) si->n_bytes); 2307 } 2308 return len; 2309} 2310 2311static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 2312static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); 2313static DEVICE_ATTR(board_id, S_IRUGO, show_hca, NULL); 2314static DEVICE_ATTR(stats, S_IRUGO, show_stats, NULL); 2315 2316static struct device_attribute *ipath_class_attributes[] = { 2317 &dev_attr_hw_rev, 2318 &dev_attr_hca_type, 2319 &dev_attr_board_id, 2320 &dev_attr_stats 2321}; 2322 2323static int ipath_verbs_register_sysfs(struct ib_device *dev) 2324{ 2325 int i; 2326 int ret; 2327 2328 for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i) 2329 if (device_create_file(&dev->dev, 2330 ipath_class_attributes[i])) { 2331 ret = 1; 2332 goto bail; 2333 } 2334 2335 ret = 0; 2336 2337bail: 2338 return ret; 2339}