IB/ipath: Fix many locking issues when switching to error state

The send DMA hardware queue voided a number of prior assumptions about
when a send is complete which led to completions being generated out of
order. There were also a number of locking issues when switching the QP
to the error or reset states, and we implement the IB_QPS_SQD state.

Signed-off-by: Ralph Campbell <ralph.campbell@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

authored by Ralph Campbell and committed by Roland Dreier e509be89 53dc1ca1

+554 -304
+90 -91
drivers/infiniband/hw/ipath/ipath_qp.c
··· 242 242 { 243 243 struct ipath_qp *q, **qpp; 244 244 unsigned long flags; 245 - int fnd = 0; 246 245 247 246 spin_lock_irqsave(&qpt->lock, flags); 248 247 ··· 252 253 *qpp = qp->next; 253 254 qp->next = NULL; 254 255 atomic_dec(&qp->refcount); 255 - fnd = 1; 256 256 break; 257 257 } 258 258 } 259 259 260 260 spin_unlock_irqrestore(&qpt->lock, flags); 261 - 262 - if (!fnd) 263 - return; 264 - 265 - free_qpn(qpt, qp->ibqp.qp_num); 266 - 267 - wait_event(qp->wait, !atomic_read(&qp->refcount)); 268 261 } 269 262 270 263 /** 271 - * ipath_free_all_qps - remove all QPs from the table 264 + * ipath_free_all_qps - check for QPs still in use 272 265 * @qpt: the QP table to empty 266 + * 267 + * There should not be any QPs still in use. 268 + * Free memory for table. 273 269 */ 274 - void ipath_free_all_qps(struct ipath_qp_table *qpt) 270 + unsigned ipath_free_all_qps(struct ipath_qp_table *qpt) 275 271 { 276 272 unsigned long flags; 277 - struct ipath_qp *qp, *nqp; 278 - u32 n; 273 + struct ipath_qp *qp; 274 + u32 n, qp_inuse = 0; 279 275 276 + spin_lock_irqsave(&qpt->lock, flags); 280 277 for (n = 0; n < qpt->max; n++) { 281 - spin_lock_irqsave(&qpt->lock, flags); 282 278 qp = qpt->table[n]; 283 279 qpt->table[n] = NULL; 284 - spin_unlock_irqrestore(&qpt->lock, flags); 285 280 286 - while (qp) { 287 - nqp = qp->next; 288 - free_qpn(qpt, qp->ibqp.qp_num); 289 - if (!atomic_dec_and_test(&qp->refcount) || 290 - !ipath_destroy_qp(&qp->ibqp)) 291 - ipath_dbg("QP memory leak!\n"); 292 - qp = nqp; 293 - } 281 + for (; qp; qp = qp->next) 282 + qp_inuse++; 294 283 } 284 + spin_unlock_irqrestore(&qpt->lock, flags); 295 285 296 - for (n = 0; n < ARRAY_SIZE(qpt->map); n++) { 286 + for (n = 0; n < ARRAY_SIZE(qpt->map); n++) 297 287 if (qpt->map[n].page) 298 - free_page((unsigned long)qpt->map[n].page); 299 - } 288 + free_page((unsigned long) qpt->map[n].page); 289 + return qp_inuse; 300 290 } 301 291 302 292 /** ··· 324 336 qp->remote_qpn = 0; 325 337 qp->qkey = 0; 326 338 qp->qp_access_flags = 0; 327 - qp->s_busy = 0; 339 + atomic_set(&qp->s_dma_busy, 0); 328 340 qp->s_flags &= IPATH_S_SIGNAL_REQ_WR; 329 341 qp->s_hdrwords = 0; 330 342 qp->s_wqe = NULL; 331 343 qp->s_pkt_delay = 0; 344 + qp->s_draining = 0; 332 345 qp->s_psn = 0; 333 346 qp->r_psn = 0; 334 347 qp->r_msn = 0; ··· 342 353 } 343 354 qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; 344 355 qp->r_nak_state = 0; 345 - qp->r_wrid_valid = 0; 356 + qp->r_aflags = 0; 357 + qp->r_flags = 0; 346 358 qp->s_rnr_timeout = 0; 347 359 qp->s_head = 0; 348 360 qp->s_tail = 0; ··· 351 361 qp->s_last = 0; 352 362 qp->s_ssn = 1; 353 363 qp->s_lsn = 0; 354 - qp->s_wait_credit = 0; 355 364 memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue)); 356 365 qp->r_head_ack_queue = 0; 357 366 qp->s_tail_ack_queue = 0; ··· 359 370 qp->r_rq.wq->head = 0; 360 371 qp->r_rq.wq->tail = 0; 361 372 } 362 - qp->r_reuse_sge = 0; 363 373 } 364 374 365 375 /** ··· 390 402 list_del_init(&qp->piowait); 391 403 spin_unlock(&dev->pending_lock); 392 404 393 - wc.vendor_err = 0; 394 - wc.byte_len = 0; 395 - wc.imm_data = 0; 405 + /* Schedule the sending tasklet to drain the send work queue. */ 406 + if (qp->s_last != qp->s_head) 407 + ipath_schedule_send(qp); 408 + 409 + memset(&wc, 0, sizeof(wc)); 396 410 wc.qp = &qp->ibqp; 397 - wc.src_qp = 0; 398 - wc.wc_flags = 0; 399 - wc.pkey_index = 0; 400 - wc.slid = 0; 401 - wc.sl = 0; 402 - wc.dlid_path_bits = 0; 403 - wc.port_num = 0; 404 - if (qp->r_wrid_valid) { 405 - qp->r_wrid_valid = 0; 411 + wc.opcode = IB_WC_RECV; 412 + 413 + if (test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) { 406 414 wc.wr_id = qp->r_wr_id; 407 - wc.opcode = IB_WC_RECV; 408 415 wc.status = err; 409 416 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); 410 417 } 411 418 wc.status = IB_WC_WR_FLUSH_ERR; 412 - 413 - while (qp->s_last != qp->s_head) { 414 - struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); 415 - 416 - wc.wr_id = wqe->wr.wr_id; 417 - wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; 418 - if (++qp->s_last >= qp->s_size) 419 - qp->s_last = 0; 420 - ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1); 421 - } 422 - qp->s_cur = qp->s_tail = qp->s_head; 423 - qp->s_hdrwords = 0; 424 - qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; 425 419 426 420 if (qp->r_rq.wq) { 427 421 struct ipath_rwq *wq; ··· 420 450 tail = wq->tail; 421 451 if (tail >= qp->r_rq.size) 422 452 tail = 0; 423 - wc.opcode = IB_WC_RECV; 424 453 while (tail != head) { 425 454 wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id; 426 455 if (++tail >= qp->r_rq.size) ··· 451 482 struct ipath_ibdev *dev = to_idev(ibqp->device); 452 483 struct ipath_qp *qp = to_iqp(ibqp); 453 484 enum ib_qp_state cur_state, new_state; 454 - unsigned long flags; 455 485 int lastwqe = 0; 456 486 int ret; 457 487 458 - spin_lock_irqsave(&qp->s_lock, flags); 488 + spin_lock_irq(&qp->s_lock); 459 489 460 490 cur_state = attr_mask & IB_QP_CUR_STATE ? 461 491 attr->cur_qp_state : qp->state; ··· 507 539 508 540 switch (new_state) { 509 541 case IB_QPS_RESET: 542 + if (qp->state != IB_QPS_RESET) { 543 + qp->state = IB_QPS_RESET; 544 + spin_lock(&dev->pending_lock); 545 + if (!list_empty(&qp->timerwait)) 546 + list_del_init(&qp->timerwait); 547 + if (!list_empty(&qp->piowait)) 548 + list_del_init(&qp->piowait); 549 + spin_unlock(&dev->pending_lock); 550 + qp->s_flags &= ~IPATH_S_ANY_WAIT; 551 + spin_unlock_irq(&qp->s_lock); 552 + /* Stop the sending tasklet */ 553 + tasklet_kill(&qp->s_task); 554 + wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy)); 555 + spin_lock_irq(&qp->s_lock); 556 + } 510 557 ipath_reset_qp(qp, ibqp->qp_type); 558 + break; 559 + 560 + case IB_QPS_SQD: 561 + qp->s_draining = qp->s_last != qp->s_cur; 562 + qp->state = new_state; 563 + break; 564 + 565 + case IB_QPS_SQE: 566 + if (qp->ibqp.qp_type == IB_QPT_RC) 567 + goto inval; 568 + qp->state = new_state; 511 569 break; 512 570 513 571 case IB_QPS_ERR: ··· 541 547 break; 542 548 543 549 default: 550 + qp->state = new_state; 544 551 break; 545 - 546 552 } 547 553 548 554 if (attr_mask & IB_QP_PKEY_INDEX) ··· 595 601 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) 596 602 qp->s_max_rd_atomic = attr->max_rd_atomic; 597 603 598 - qp->state = new_state; 599 - spin_unlock_irqrestore(&qp->s_lock, flags); 604 + spin_unlock_irq(&qp->s_lock); 600 605 601 606 if (lastwqe) { 602 607 struct ib_event ev; ··· 609 616 goto bail; 610 617 611 618 inval: 612 - spin_unlock_irqrestore(&qp->s_lock, flags); 619 + spin_unlock_irq(&qp->s_lock); 613 620 ret = -EINVAL; 614 621 615 622 bail: ··· 640 647 attr->pkey_index = qp->s_pkey_index; 641 648 attr->alt_pkey_index = 0; 642 649 attr->en_sqd_async_notify = 0; 643 - attr->sq_draining = 0; 650 + attr->sq_draining = qp->s_draining; 644 651 attr->max_rd_atomic = qp->s_max_rd_atomic; 645 652 attr->max_dest_rd_atomic = qp->r_max_rd_atomic; 646 653 attr->min_rnr_timer = qp->r_min_rnr_timer; ··· 830 837 spin_lock_init(&qp->r_rq.lock); 831 838 atomic_set(&qp->refcount, 0); 832 839 init_waitqueue_head(&qp->wait); 840 + init_waitqueue_head(&qp->wait_dma); 833 841 tasklet_init(&qp->s_task, ipath_do_send, (unsigned long)qp); 834 842 INIT_LIST_HEAD(&qp->piowait); 835 843 INIT_LIST_HEAD(&qp->timerwait); ··· 924 930 else 925 931 vfree(qp->r_rq.wq); 926 932 ipath_free_qp(&dev->qp_table, qp); 933 + free_qpn(&dev->qp_table, qp->ibqp.qp_num); 927 934 bail_qp: 928 935 kfree(qp); 929 936 bail_swq: ··· 946 951 { 947 952 struct ipath_qp *qp = to_iqp(ibqp); 948 953 struct ipath_ibdev *dev = to_idev(ibqp->device); 949 - unsigned long flags; 950 954 951 - spin_lock_irqsave(&qp->s_lock, flags); 952 - qp->state = IB_QPS_ERR; 953 - spin_unlock_irqrestore(&qp->s_lock, flags); 954 - spin_lock(&dev->n_qps_lock); 955 - dev->n_qps_allocated--; 956 - spin_unlock(&dev->n_qps_lock); 955 + /* Make sure HW and driver activity is stopped. */ 956 + spin_lock_irq(&qp->s_lock); 957 + if (qp->state != IB_QPS_RESET) { 958 + qp->state = IB_QPS_RESET; 959 + spin_lock(&dev->pending_lock); 960 + if (!list_empty(&qp->timerwait)) 961 + list_del_init(&qp->timerwait); 962 + if (!list_empty(&qp->piowait)) 963 + list_del_init(&qp->piowait); 964 + spin_unlock(&dev->pending_lock); 965 + qp->s_flags &= ~IPATH_S_ANY_WAIT; 966 + spin_unlock_irq(&qp->s_lock); 967 + /* Stop the sending tasklet */ 968 + tasklet_kill(&qp->s_task); 969 + wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy)); 970 + } else 971 + spin_unlock_irq(&qp->s_lock); 957 972 958 - /* Stop the sending tasklet. */ 959 - tasklet_kill(&qp->s_task); 973 + ipath_free_qp(&dev->qp_table, qp); 960 974 961 975 if (qp->s_tx) { 962 976 atomic_dec(&qp->refcount); 963 977 if (qp->s_tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF) 964 978 kfree(qp->s_tx->txreq.map_addr); 979 + spin_lock_irq(&dev->pending_lock); 980 + list_add(&qp->s_tx->txreq.list, &dev->txreq_free); 981 + spin_unlock_irq(&dev->pending_lock); 982 + qp->s_tx = NULL; 965 983 } 966 984 967 - /* Make sure the QP isn't on the timeout list. */ 968 - spin_lock_irqsave(&dev->pending_lock, flags); 969 - if (!list_empty(&qp->timerwait)) 970 - list_del_init(&qp->timerwait); 971 - if (!list_empty(&qp->piowait)) 972 - list_del_init(&qp->piowait); 973 - if (qp->s_tx) 974 - list_add(&qp->s_tx->txreq.list, &dev->txreq_free); 975 - spin_unlock_irqrestore(&dev->pending_lock, flags); 985 + wait_event(qp->wait, !atomic_read(&qp->refcount)); 976 986 977 - /* 978 - * Make sure that the QP is not in the QPN table so receive 979 - * interrupts will discard packets for this QP. XXX Also remove QP 980 - * from multicast table. 981 - */ 982 - if (atomic_read(&qp->refcount) != 0) 983 - ipath_free_qp(&dev->qp_table, qp); 987 + /* all user's cleaned up, mark it available */ 988 + free_qpn(&dev->qp_table, qp->ibqp.qp_num); 989 + spin_lock(&dev->n_qps_lock); 990 + dev->n_qps_allocated--; 991 + spin_unlock(&dev->n_qps_lock); 984 992 985 993 if (qp->ip) 986 994 kref_put(&qp->ip->ref, ipath_release_mmap_info); ··· 1053 1055 } 1054 1056 1055 1057 /* Restart sending if it was blocked due to lack of credits. */ 1056 - if (qp->s_cur != qp->s_head && 1058 + if ((qp->s_flags & IPATH_S_WAIT_SSN_CREDIT) && 1059 + qp->s_cur != qp->s_head && 1057 1060 (qp->s_lsn == (u32) -1 || 1058 1061 ipath_cmp24(get_swqe_ptr(qp, qp->s_cur)->ssn, 1059 1062 qp->s_lsn + 1) <= 0)) 1060 - tasklet_hi_schedule(&qp->s_task); 1063 + ipath_schedule_send(qp); 1061 1064 }
+97 -54
drivers/infiniband/hw/ipath/ipath_rc.c
··· 92 92 u32 bth0; 93 93 u32 bth2; 94 94 95 + /* Don't send an ACK if we aren't supposed to. */ 96 + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) 97 + goto bail; 98 + 95 99 /* header size in 32-bit words LRH+BTH = (8+12)/4. */ 96 100 hwords = 5; 97 101 ··· 242 238 ipath_make_rc_ack(dev, qp, ohdr, pmtu)) 243 239 goto done; 244 240 245 - if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) || 246 - qp->s_rnr_timeout || qp->s_wait_credit) 247 - goto bail; 241 + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) { 242 + if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND)) 243 + goto bail; 244 + /* We are in the error state, flush the work request. */ 245 + if (qp->s_last == qp->s_head) 246 + goto bail; 247 + /* If DMAs are in progress, we can't flush immediately. */ 248 + if (atomic_read(&qp->s_dma_busy)) { 249 + qp->s_flags |= IPATH_S_WAIT_DMA; 250 + goto bail; 251 + } 252 + wqe = get_swqe_ptr(qp, qp->s_last); 253 + ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); 254 + goto done; 255 + } 248 256 249 - /* Limit the number of packets sent without an ACK. */ 250 - if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) { 251 - qp->s_wait_credit = 1; 252 - dev->n_rc_stalls++; 257 + /* Leave BUSY set until RNR timeout. */ 258 + if (qp->s_rnr_timeout) { 259 + qp->s_flags |= IPATH_S_WAITING; 253 260 goto bail; 254 261 } 255 262 ··· 272 257 wqe = get_swqe_ptr(qp, qp->s_cur); 273 258 switch (qp->s_state) { 274 259 default: 260 + if (!(ib_ipath_state_ops[qp->state] & 261 + IPATH_PROCESS_NEXT_SEND_OK)) 262 + goto bail; 275 263 /* 276 264 * Resend an old request or start a new one. 277 265 * ··· 312 294 case IB_WR_SEND_WITH_IMM: 313 295 /* If no credit, return. */ 314 296 if (qp->s_lsn != (u32) -1 && 315 - ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) 297 + ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) { 298 + qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT; 316 299 goto bail; 300 + } 317 301 wqe->lpsn = wqe->psn; 318 302 if (len > pmtu) { 319 303 wqe->lpsn += (len - 1) / pmtu; ··· 345 325 case IB_WR_RDMA_WRITE_WITH_IMM: 346 326 /* If no credit, return. */ 347 327 if (qp->s_lsn != (u32) -1 && 348 - ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) 328 + ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) { 329 + qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT; 349 330 goto bail; 331 + } 350 332 ohdr->u.rc.reth.vaddr = 351 333 cpu_to_be64(wqe->wr.wr.rdma.remote_addr); 352 334 ohdr->u.rc.reth.rkey = ··· 592 570 ipath_make_ruc_header(dev, qp, ohdr, bth0 | (qp->s_state << 24), bth2); 593 571 done: 594 572 ret = 1; 573 + goto unlock; 574 + 595 575 bail: 576 + qp->s_flags &= ~IPATH_S_BUSY; 577 + unlock: 596 578 spin_unlock_irqrestore(&qp->s_lock, flags); 597 579 return ret; 598 580 } ··· 632 606 633 607 spin_unlock_irqrestore(&qp->s_lock, flags); 634 608 609 + /* Don't try to send ACKs if the link isn't ACTIVE */ 635 610 dd = dev->dd; 611 + if (!(dd->ipath_flags & IPATH_LINKACTIVE)) 612 + goto done; 613 + 636 614 piobuf = ipath_getpiobuf(dd, 0, NULL); 637 615 if (!piobuf) { 638 616 /* ··· 698 668 goto done; 699 669 700 670 queue_ack: 701 - dev->n_rc_qacks++; 702 - qp->s_flags |= IPATH_S_ACK_PENDING; 703 - qp->s_nak_state = qp->r_nak_state; 704 - qp->s_ack_psn = qp->r_ack_psn; 671 + if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK) { 672 + dev->n_rc_qacks++; 673 + qp->s_flags |= IPATH_S_ACK_PENDING; 674 + qp->s_nak_state = qp->r_nak_state; 675 + qp->s_ack_psn = qp->r_ack_psn; 676 + 677 + /* Schedule the send tasklet. */ 678 + ipath_schedule_send(qp); 679 + } 705 680 spin_unlock_irqrestore(&qp->s_lock, flags); 706 - 707 - /* Call ipath_do_rc_send() in another thread. */ 708 - tasklet_hi_schedule(&qp->s_task); 709 - 710 681 done: 711 682 return; 712 683 } ··· 766 735 /* 767 736 * Set the state to restart in the middle of a request. 768 737 * Don't change the s_sge, s_cur_sge, or s_cur_size. 769 - * See ipath_do_rc_send(). 738 + * See ipath_make_rc_req(). 770 739 */ 771 740 switch (opcode) { 772 741 case IB_WR_SEND: ··· 832 801 dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK; 833 802 834 803 reset_psn(qp, psn); 835 - tasklet_hi_schedule(&qp->s_task); 804 + ipath_schedule_send(qp); 836 805 837 806 bail: 838 807 return; ··· 840 809 841 810 static inline void update_last_psn(struct ipath_qp *qp, u32 psn) 842 811 { 843 - if (qp->s_last_psn != psn) { 844 - qp->s_last_psn = psn; 845 - if (qp->s_wait_credit) { 846 - qp->s_wait_credit = 0; 847 - tasklet_hi_schedule(&qp->s_task); 848 - } 849 - } 812 + qp->s_last_psn = psn; 850 813 } 851 814 852 815 /** ··· 940 915 wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) { 941 916 qp->s_num_rd_atomic--; 942 917 /* Restart sending task if fence is complete */ 943 - if ((qp->s_flags & IPATH_S_FENCE_PENDING) && 944 - !qp->s_num_rd_atomic) { 945 - qp->s_flags &= ~IPATH_S_FENCE_PENDING; 946 - tasklet_hi_schedule(&qp->s_task); 947 - } else if (qp->s_flags & IPATH_S_RDMAR_PENDING) { 948 - qp->s_flags &= ~IPATH_S_RDMAR_PENDING; 949 - tasklet_hi_schedule(&qp->s_task); 950 - } 918 + if (((qp->s_flags & IPATH_S_FENCE_PENDING) && 919 + !qp->s_num_rd_atomic) || 920 + qp->s_flags & IPATH_S_RDMAR_PENDING) 921 + ipath_schedule_send(qp); 951 922 } 952 923 /* Post a send completion queue entry if requested. */ 953 924 if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || ··· 977 956 } else { 978 957 if (++qp->s_last >= qp->s_size) 979 958 qp->s_last = 0; 959 + if (qp->state == IB_QPS_SQD && qp->s_last == qp->s_cur) 960 + qp->s_draining = 0; 980 961 if (qp->s_last == qp->s_tail) 981 962 break; 982 963 wqe = get_swqe_ptr(qp, qp->s_last); ··· 1002 979 */ 1003 980 if (ipath_cmp24(qp->s_psn, psn) <= 0) { 1004 981 reset_psn(qp, psn + 1); 1005 - tasklet_hi_schedule(&qp->s_task); 982 + ipath_schedule_send(qp); 1006 983 } 1007 984 } else if (ipath_cmp24(qp->s_psn, psn) <= 0) { 1008 985 qp->s_state = OP(SEND_LAST); ··· 1041 1018 ib_ipath_rnr_table[(aeth >> IPATH_AETH_CREDIT_SHIFT) & 1042 1019 IPATH_AETH_CREDIT_MASK]; 1043 1020 ipath_insert_rnr_queue(qp); 1021 + ipath_schedule_send(qp); 1044 1022 goto bail; 1045 1023 1046 1024 case 3: /* NAK */ ··· 1131 1107 u64 val; 1132 1108 1133 1109 spin_lock_irqsave(&qp->s_lock, flags); 1110 + 1111 + /* Double check we can process this now that we hold the s_lock. */ 1112 + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) 1113 + goto ack_done; 1134 1114 1135 1115 /* Ignore invalid responses. */ 1136 1116 if (ipath_cmp24(psn, qp->s_next_psn) >= 0) ··· 1371 1343 psn &= IPATH_PSN_MASK; 1372 1344 e = NULL; 1373 1345 old_req = 1; 1346 + 1374 1347 spin_lock_irqsave(&qp->s_lock, flags); 1348 + /* Double check we can process this now that we hold the s_lock. */ 1349 + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) 1350 + goto unlock_done; 1351 + 1375 1352 for (i = qp->r_head_ack_queue; ; i = prev) { 1376 1353 if (i == qp->s_tail_ack_queue) 1377 1354 old_req = 0; ··· 1504 1471 break; 1505 1472 } 1506 1473 qp->r_nak_state = 0; 1507 - tasklet_hi_schedule(&qp->s_task); 1474 + ipath_schedule_send(qp); 1508 1475 1509 1476 unlock_done: 1510 1477 spin_unlock_irqrestore(&qp->s_lock, flags); ··· 1536 1503 1537 1504 static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n) 1538 1505 { 1539 - unsigned long flags; 1540 1506 unsigned next; 1541 1507 1542 1508 next = n + 1; 1543 1509 if (next > IPATH_MAX_RDMA_ATOMIC) 1544 1510 next = 0; 1545 - spin_lock_irqsave(&qp->s_lock, flags); 1546 1511 if (n == qp->s_tail_ack_queue) { 1547 1512 qp->s_tail_ack_queue = next; 1548 1513 qp->s_ack_state = OP(ACKNOWLEDGE); 1549 1514 } 1550 - spin_unlock_irqrestore(&qp->s_lock, flags); 1551 1515 } 1552 1516 1553 1517 /** ··· 1573 1543 int diff; 1574 1544 struct ib_reth *reth; 1575 1545 int header_in_data; 1546 + unsigned long flags; 1576 1547 1577 1548 /* Validate the SLID. See Ch. 9.6.1.5 */ 1578 1549 if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid)) ··· 1721 1690 goto nack_inv; 1722 1691 ipath_copy_sge(&qp->r_sge, data, tlen); 1723 1692 qp->r_msn++; 1724 - if (!qp->r_wrid_valid) 1693 + if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) 1725 1694 break; 1726 - qp->r_wrid_valid = 0; 1727 1695 wc.wr_id = qp->r_wr_id; 1728 1696 wc.status = IB_WC_SUCCESS; 1729 1697 if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) || ··· 1794 1764 next = qp->r_head_ack_queue + 1; 1795 1765 if (next > IPATH_MAX_RDMA_ATOMIC) 1796 1766 next = 0; 1767 + spin_lock_irqsave(&qp->s_lock, flags); 1768 + /* Double check we can process this while holding the s_lock. */ 1769 + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) 1770 + goto unlock; 1797 1771 if (unlikely(next == qp->s_tail_ack_queue)) { 1798 1772 if (!qp->s_ack_queue[next].sent) 1799 - goto nack_inv; 1773 + goto nack_inv_unlck; 1800 1774 ipath_update_ack_queue(qp, next); 1801 1775 } 1802 1776 e = &qp->s_ack_queue[qp->r_head_ack_queue]; ··· 1821 1787 ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr, 1822 1788 rkey, IB_ACCESS_REMOTE_READ); 1823 1789 if (unlikely(!ok)) 1824 - goto nack_acc; 1790 + goto nack_acc_unlck; 1825 1791 /* 1826 1792 * Update the next expected PSN. We add 1 later 1827 1793 * below, so only add the remainder here. ··· 1848 1814 qp->r_psn++; 1849 1815 qp->r_state = opcode; 1850 1816 qp->r_nak_state = 0; 1851 - barrier(); 1852 1817 qp->r_head_ack_queue = next; 1853 1818 1854 - /* Call ipath_do_rc_send() in another thread. */ 1855 - tasklet_hi_schedule(&qp->s_task); 1819 + /* Schedule the send tasklet. */ 1820 + ipath_schedule_send(qp); 1856 1821 1857 - goto done; 1822 + goto unlock; 1858 1823 } 1859 1824 1860 1825 case OP(COMPARE_SWAP): ··· 1872 1839 next = qp->r_head_ack_queue + 1; 1873 1840 if (next > IPATH_MAX_RDMA_ATOMIC) 1874 1841 next = 0; 1842 + spin_lock_irqsave(&qp->s_lock, flags); 1843 + /* Double check we can process this while holding the s_lock. */ 1844 + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) 1845 + goto unlock; 1875 1846 if (unlikely(next == qp->s_tail_ack_queue)) { 1876 1847 if (!qp->s_ack_queue[next].sent) 1877 - goto nack_inv; 1848 + goto nack_inv_unlck; 1878 1849 ipath_update_ack_queue(qp, next); 1879 1850 } 1880 1851 if (!header_in_data) ··· 1888 1851 vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) | 1889 1852 be32_to_cpu(ateth->vaddr[1]); 1890 1853 if (unlikely(vaddr & (sizeof(u64) - 1))) 1891 - goto nack_inv; 1854 + goto nack_inv_unlck; 1892 1855 rkey = be32_to_cpu(ateth->rkey); 1893 1856 /* Check rkey & NAK */ 1894 1857 if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, 1895 1858 sizeof(u64), vaddr, rkey, 1896 1859 IB_ACCESS_REMOTE_ATOMIC))) 1897 - goto nack_acc; 1860 + goto nack_acc_unlck; 1898 1861 /* Perform atomic OP and save result. */ 1899 1862 maddr = (atomic64_t *) qp->r_sge.sge.vaddr; 1900 1863 sdata = be64_to_cpu(ateth->swap_data); ··· 1911 1874 qp->r_psn++; 1912 1875 qp->r_state = opcode; 1913 1876 qp->r_nak_state = 0; 1914 - barrier(); 1915 1877 qp->r_head_ack_queue = next; 1916 1878 1917 - /* Call ipath_do_rc_send() in another thread. */ 1918 - tasklet_hi_schedule(&qp->s_task); 1879 + /* Schedule the send tasklet. */ 1880 + ipath_schedule_send(qp); 1919 1881 1920 - goto done; 1882 + goto unlock; 1921 1883 } 1922 1884 1923 1885 default: ··· 1937 1901 qp->r_ack_psn = qp->r_psn; 1938 1902 goto send_ack; 1939 1903 1904 + nack_inv_unlck: 1905 + spin_unlock_irqrestore(&qp->s_lock, flags); 1940 1906 nack_inv: 1941 1907 ipath_rc_error(qp, IB_WC_LOC_QP_OP_ERR); 1942 1908 qp->r_nak_state = IB_NAK_INVALID_REQUEST; 1943 1909 qp->r_ack_psn = qp->r_psn; 1944 1910 goto send_ack; 1945 1911 1912 + nack_acc_unlck: 1913 + spin_unlock_irqrestore(&qp->s_lock, flags); 1946 1914 nack_acc: 1947 1915 ipath_rc_error(qp, IB_WC_LOC_PROT_ERR); 1948 1916 qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; 1949 1917 qp->r_ack_psn = qp->r_psn; 1950 1918 send_ack: 1951 1919 send_rc_ack(qp); 1920 + goto done; 1952 1921 1922 + unlock: 1923 + spin_unlock_irqrestore(&qp->s_lock, flags); 1953 1924 done: 1954 1925 return; 1955 1926 }
+115 -53
drivers/infiniband/hw/ipath/ipath_ruc.c
··· 78 78 * ipath_insert_rnr_queue - put QP on the RNR timeout list for the device 79 79 * @qp: the QP 80 80 * 81 + * Called with the QP s_lock held and interrupts disabled. 81 82 * XXX Use a simple list for now. We might need a priority 82 83 * queue if we have lots of QPs waiting for RNR timeouts 83 84 * but that should be rare. ··· 86 85 void ipath_insert_rnr_queue(struct ipath_qp *qp) 87 86 { 88 87 struct ipath_ibdev *dev = to_idev(qp->ibqp.device); 89 - unsigned long flags; 90 88 91 - spin_lock_irqsave(&dev->pending_lock, flags); 89 + /* We already did a spin_lock_irqsave(), so just use spin_lock */ 90 + spin_lock(&dev->pending_lock); 92 91 if (list_empty(&dev->rnrwait)) 93 92 list_add(&qp->timerwait, &dev->rnrwait); 94 93 else { ··· 110 109 nqp->s_rnr_timeout -= qp->s_rnr_timeout; 111 110 list_add(&qp->timerwait, l); 112 111 } 113 - spin_unlock_irqrestore(&dev->pending_lock, flags); 112 + spin_unlock(&dev->pending_lock); 114 113 } 115 114 116 115 /** ··· 186 185 } 187 186 188 187 spin_lock_irqsave(&rq->lock, flags); 188 + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) { 189 + ret = 0; 190 + goto unlock; 191 + } 192 + 189 193 wq = rq->wq; 190 194 tail = wq->tail; 191 195 /* Validate tail before using it since it is user writable. */ ··· 198 192 tail = 0; 199 193 do { 200 194 if (unlikely(tail == wq->head)) { 201 - spin_unlock_irqrestore(&rq->lock, flags); 202 195 ret = 0; 203 - goto bail; 196 + goto unlock; 204 197 } 205 198 /* Make sure entry is read after head index is read. */ 206 199 smp_rmb(); ··· 212 207 wq->tail = tail; 213 208 214 209 ret = 1; 215 - qp->r_wrid_valid = 1; 210 + set_bit(IPATH_R_WRID_VALID, &qp->r_aflags); 216 211 if (handler) { 217 212 u32 n; 218 213 ··· 239 234 goto bail; 240 235 } 241 236 } 237 + unlock: 242 238 spin_unlock_irqrestore(&rq->lock, flags); 243 - 244 239 bail: 245 240 return ret; 246 241 } ··· 268 263 atomic64_t *maddr; 269 264 enum ib_wc_status send_status; 270 265 266 + /* 267 + * Note that we check the responder QP state after 268 + * checking the requester's state. 269 + */ 271 270 qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn); 272 - if (!qp) { 273 - dev->n_pkt_drops++; 274 - return; 275 - } 276 271 277 - again: 278 272 spin_lock_irqsave(&sqp->s_lock, flags); 279 273 280 - if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK) || 281 - sqp->s_rnr_timeout) { 282 - spin_unlock_irqrestore(&sqp->s_lock, flags); 283 - goto done; 284 - } 274 + /* Return if we are already busy processing a work request. */ 275 + if ((sqp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) || 276 + !(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) 277 + goto unlock; 285 278 286 - /* Get the next send request. */ 287 - if (sqp->s_last == sqp->s_head) { 288 - /* Send work queue is empty. */ 289 - spin_unlock_irqrestore(&sqp->s_lock, flags); 290 - goto done; 279 + sqp->s_flags |= IPATH_S_BUSY; 280 + 281 + again: 282 + if (sqp->s_last == sqp->s_head) 283 + goto clr_busy; 284 + wqe = get_swqe_ptr(sqp, sqp->s_last); 285 + 286 + /* Return if it is not OK to start a new work reqeust. */ 287 + if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_NEXT_SEND_OK)) { 288 + if (!(ib_ipath_state_ops[sqp->state] & IPATH_FLUSH_SEND)) 289 + goto clr_busy; 290 + /* We are in the error state, flush the work request. */ 291 + send_status = IB_WC_WR_FLUSH_ERR; 292 + goto flush_send; 291 293 } 292 294 293 295 /* 294 296 * We can rely on the entry not changing without the s_lock 295 297 * being held until we update s_last. 298 + * We increment s_cur to indicate s_last is in progress. 296 299 */ 297 - wqe = get_swqe_ptr(sqp, sqp->s_last); 300 + if (sqp->s_last == sqp->s_cur) { 301 + if (++sqp->s_cur >= sqp->s_size) 302 + sqp->s_cur = 0; 303 + } 298 304 spin_unlock_irqrestore(&sqp->s_lock, flags); 305 + 306 + if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) { 307 + dev->n_pkt_drops++; 308 + /* 309 + * For RC, the requester would timeout and retry so 310 + * shortcut the timeouts and just signal too many retries. 311 + */ 312 + if (sqp->ibqp.qp_type == IB_QPT_RC) 313 + send_status = IB_WC_RETRY_EXC_ERR; 314 + else 315 + send_status = IB_WC_SUCCESS; 316 + goto serr; 317 + } 299 318 300 319 memset(&wc, 0, sizeof wc); 301 320 send_status = IB_WC_SUCCESS; ··· 425 396 sqp->s_len -= len; 426 397 } 427 398 428 - if (wqe->wr.opcode == IB_WR_RDMA_WRITE || 429 - wqe->wr.opcode == IB_WR_RDMA_READ) 399 + if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) 430 400 goto send_comp; 431 401 432 402 if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM) ··· 445 417 wqe->wr.send_flags & IB_SEND_SOLICITED); 446 418 447 419 send_comp: 420 + spin_lock_irqsave(&sqp->s_lock, flags); 421 + flush_send: 448 422 sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; 449 423 ipath_send_complete(sqp, wqe, send_status); 450 424 goto again; ··· 467 437 sqp->s_rnr_retry--; 468 438 spin_lock_irqsave(&sqp->s_lock, flags); 469 439 if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_RECV_OK)) 470 - goto unlock; 440 + goto clr_busy; 441 + sqp->s_flags |= IPATH_S_WAITING; 471 442 dev->n_rnr_naks++; 472 443 sqp->s_rnr_timeout = ib_ipath_rnr_table[qp->r_min_rnr_timer]; 473 444 ipath_insert_rnr_queue(sqp); 474 - goto unlock; 445 + goto clr_busy; 475 446 476 447 inv_err: 477 448 send_status = IB_WC_REM_INV_REQ_ERR; ··· 504 473 } 505 474 goto done; 506 475 } 476 + clr_busy: 477 + sqp->s_flags &= ~IPATH_S_BUSY; 507 478 unlock: 508 479 spin_unlock_irqrestore(&sqp->s_lock, flags); 509 480 done: 510 - if (atomic_dec_and_test(&qp->refcount)) 481 + if (qp && atomic_dec_and_test(&qp->refcount)) 511 482 wake_up(&qp->wait); 512 483 } 513 484 514 485 static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp) 515 486 { 516 487 if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA) || 517 - qp->ibqp.qp_type == IB_QPT_SMI) { 488 + qp->ibqp.qp_type == IB_QPT_SMI) { 518 489 unsigned long flags; 519 490 520 491 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); ··· 534 501 * @dev: the device we ran out of buffers on 535 502 * 536 503 * Called when we run out of PIO buffers. 504 + * If we are now in the error state, return zero to flush the 505 + * send work request. 537 506 */ 538 - static void ipath_no_bufs_available(struct ipath_qp *qp, 507 + static int ipath_no_bufs_available(struct ipath_qp *qp, 539 508 struct ipath_ibdev *dev) 540 509 { 541 510 unsigned long flags; 511 + int ret = 1; 542 512 543 513 /* 544 514 * Note that as soon as want_buffer() is called and 545 515 * possibly before it returns, ipath_ib_piobufavail() 546 - * could be called. If we are still in the tasklet function, 547 - * tasklet_hi_schedule() will not call us until the next time 548 - * tasklet_hi_schedule() is called. 549 - * We leave the busy flag set so that another post send doesn't 550 - * try to put the same QP on the piowait list again. 516 + * could be called. Therefore, put QP on the piowait list before 517 + * enabling the PIO avail interrupt. 551 518 */ 552 - spin_lock_irqsave(&dev->pending_lock, flags); 553 - list_add_tail(&qp->piowait, &dev->piowait); 554 - spin_unlock_irqrestore(&dev->pending_lock, flags); 555 - want_buffer(dev->dd, qp); 556 - dev->n_piowait++; 519 + spin_lock_irqsave(&qp->s_lock, flags); 520 + if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) { 521 + dev->n_piowait++; 522 + qp->s_flags |= IPATH_S_WAITING; 523 + qp->s_flags &= ~IPATH_S_BUSY; 524 + spin_lock(&dev->pending_lock); 525 + if (list_empty(&qp->piowait)) 526 + list_add_tail(&qp->piowait, &dev->piowait); 527 + spin_unlock(&dev->pending_lock); 528 + } else 529 + ret = 0; 530 + spin_unlock_irqrestore(&qp->s_lock, flags); 531 + if (ret) 532 + want_buffer(dev->dd, qp); 533 + return ret; 557 534 } 558 535 559 536 /** ··· 639 596 struct ipath_qp *qp = (struct ipath_qp *)data; 640 597 struct ipath_ibdev *dev = to_idev(qp->ibqp.device); 641 598 int (*make_req)(struct ipath_qp *qp); 642 - 643 - if (test_and_set_bit(IPATH_S_BUSY, &qp->s_busy)) 644 - goto bail; 599 + unsigned long flags; 645 600 646 601 if ((qp->ibqp.qp_type == IB_QPT_RC || 647 602 qp->ibqp.qp_type == IB_QPT_UC) && 648 603 qp->remote_ah_attr.dlid == dev->dd->ipath_lid) { 649 604 ipath_ruc_loopback(qp); 650 - goto clear; 605 + goto bail; 651 606 } 652 607 653 608 if (qp->ibqp.qp_type == IB_QPT_RC) ··· 654 613 make_req = ipath_make_uc_req; 655 614 else 656 615 make_req = ipath_make_ud_req; 616 + 617 + spin_lock_irqsave(&qp->s_lock, flags); 618 + 619 + /* Return if we are already busy processing a work request. */ 620 + if ((qp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) || 621 + !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) { 622 + spin_unlock_irqrestore(&qp->s_lock, flags); 623 + goto bail; 624 + } 625 + 626 + qp->s_flags |= IPATH_S_BUSY; 627 + 628 + spin_unlock_irqrestore(&qp->s_lock, flags); 657 629 658 630 again: 659 631 /* Check for a constructed packet to be sent. */ ··· 677 623 */ 678 624 if (ipath_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords, 679 625 qp->s_cur_sge, qp->s_cur_size)) { 680 - ipath_no_bufs_available(qp, dev); 681 - goto bail; 626 + if (ipath_no_bufs_available(qp, dev)) 627 + goto bail; 682 628 } 683 629 dev->n_unicast_xmit++; 684 630 /* Record that we sent the packet and s_hdr is empty. */ ··· 687 633 688 634 if (make_req(qp)) 689 635 goto again; 690 - clear: 691 - clear_bit(IPATH_S_BUSY, &qp->s_busy); 636 + 692 637 bail:; 693 638 } 694 639 640 + /* 641 + * This should be called with s_lock held. 642 + */ 695 643 void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe, 696 644 enum ib_wc_status status) 697 645 { 698 - unsigned long flags; 699 - u32 last; 646 + u32 old_last, last; 647 + 648 + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) 649 + return; 700 650 701 651 /* See ch. 11.2.4.1 and 10.7.3.1 */ 702 652 if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || ··· 719 661 status != IB_WC_SUCCESS); 720 662 } 721 663 722 - spin_lock_irqsave(&qp->s_lock, flags); 723 - last = qp->s_last; 664 + old_last = last = qp->s_last; 724 665 if (++last >= qp->s_size) 725 666 last = 0; 726 667 qp->s_last = last; 727 - spin_unlock_irqrestore(&qp->s_lock, flags); 668 + if (qp->s_cur == old_last) 669 + qp->s_cur = last; 670 + if (qp->s_tail == old_last) 671 + qp->s_tail = last; 672 + if (qp->state == IB_QPS_SQD && last == qp->s_cur) 673 + qp->s_draining = 0; 728 674 }
+39 -20
drivers/infiniband/hw/ipath/ipath_uc.c
··· 1 1 /* 2 - * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. 2 + * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved. 3 3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 4 4 * 5 5 * This software is available to you under a choice of one of two ··· 47 47 { 48 48 struct ipath_other_headers *ohdr; 49 49 struct ipath_swqe *wqe; 50 + unsigned long flags; 50 51 u32 hwords; 51 52 u32 bth0; 52 53 u32 len; 53 54 u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); 54 55 int ret = 0; 55 56 56 - if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) 57 + spin_lock_irqsave(&qp->s_lock, flags); 58 + 59 + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) { 60 + if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND)) 61 + goto bail; 62 + /* We are in the error state, flush the work request. */ 63 + if (qp->s_last == qp->s_head) 64 + goto bail; 65 + /* If DMAs are in progress, we can't flush immediately. */ 66 + if (atomic_read(&qp->s_dma_busy)) { 67 + qp->s_flags |= IPATH_S_WAIT_DMA; 68 + goto bail; 69 + } 70 + wqe = get_swqe_ptr(qp, qp->s_last); 71 + ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); 57 72 goto done; 73 + } 58 74 59 75 ohdr = &qp->s_hdr.u.oth; 60 76 if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) ··· 85 69 qp->s_wqe = NULL; 86 70 switch (qp->s_state) { 87 71 default: 72 + if (!(ib_ipath_state_ops[qp->state] & 73 + IPATH_PROCESS_NEXT_SEND_OK)) 74 + goto bail; 88 75 /* Check if send work queue is empty. */ 89 76 if (qp->s_cur == qp->s_head) 90 - goto done; 77 + goto bail; 91 78 /* 92 79 * Start a new request. 93 80 */ ··· 153 134 break; 154 135 155 136 default: 156 - goto done; 137 + goto bail; 157 138 } 158 139 break; 159 140 ··· 213 194 ipath_make_ruc_header(to_idev(qp->ibqp.device), 214 195 qp, ohdr, bth0 | (qp->s_state << 24), 215 196 qp->s_next_psn++ & IPATH_PSN_MASK); 216 - ret = 1; 217 - 218 197 done: 198 + ret = 1; 199 + goto unlock; 200 + 201 + bail: 202 + qp->s_flags &= ~IPATH_S_BUSY; 203 + unlock: 204 + spin_unlock_irqrestore(&qp->s_lock, flags); 219 205 return ret; 220 206 } 221 207 ··· 282 258 */ 283 259 opcode = be32_to_cpu(ohdr->bth[0]) >> 24; 284 260 285 - wc.imm_data = 0; 286 - wc.wc_flags = 0; 261 + memset(&wc, 0, sizeof wc); 287 262 288 263 /* Compare the PSN verses the expected PSN. */ 289 264 if (unlikely(ipath_cmp24(psn, qp->r_psn) != 0)) { ··· 345 322 case OP(SEND_ONLY): 346 323 case OP(SEND_ONLY_WITH_IMMEDIATE): 347 324 send_first: 348 - if (qp->r_reuse_sge) { 349 - qp->r_reuse_sge = 0; 325 + if (qp->r_flags & IPATH_R_REUSE_SGE) { 326 + qp->r_flags &= ~IPATH_R_REUSE_SGE; 350 327 qp->r_sge = qp->s_rdma_read_sge; 351 328 } else if (!ipath_get_rwqe(qp, 0)) { 352 329 dev->n_pkt_drops++; ··· 363 340 case OP(SEND_MIDDLE): 364 341 /* Check for invalid length PMTU or posted rwqe len. */ 365 342 if (unlikely(tlen != (hdrsize + pmtu + 4))) { 366 - qp->r_reuse_sge = 1; 343 + qp->r_flags |= IPATH_R_REUSE_SGE; 367 344 dev->n_pkt_drops++; 368 345 goto done; 369 346 } 370 347 qp->r_rcv_len += pmtu; 371 348 if (unlikely(qp->r_rcv_len > qp->r_len)) { 372 - qp->r_reuse_sge = 1; 349 + qp->r_flags |= IPATH_R_REUSE_SGE; 373 350 dev->n_pkt_drops++; 374 351 goto done; 375 352 } ··· 395 372 /* Check for invalid length. */ 396 373 /* XXX LAST len should be >= 1 */ 397 374 if (unlikely(tlen < (hdrsize + pad + 4))) { 398 - qp->r_reuse_sge = 1; 375 + qp->r_flags |= IPATH_R_REUSE_SGE; 399 376 dev->n_pkt_drops++; 400 377 goto done; 401 378 } ··· 403 380 tlen -= (hdrsize + pad + 4); 404 381 wc.byte_len = tlen + qp->r_rcv_len; 405 382 if (unlikely(wc.byte_len > qp->r_len)) { 406 - qp->r_reuse_sge = 1; 383 + qp->r_flags |= IPATH_R_REUSE_SGE; 407 384 dev->n_pkt_drops++; 408 385 goto done; 409 386 } ··· 413 390 wc.wr_id = qp->r_wr_id; 414 391 wc.status = IB_WC_SUCCESS; 415 392 wc.opcode = IB_WC_RECV; 416 - wc.vendor_err = 0; 417 393 wc.qp = &qp->ibqp; 418 394 wc.src_qp = qp->remote_qpn; 419 - wc.pkey_index = 0; 420 395 wc.slid = qp->remote_ah_attr.dlid; 421 396 wc.sl = qp->remote_ah_attr.sl; 422 - wc.dlid_path_bits = 0; 423 - wc.port_num = 0; 424 397 /* Signal completion event if the solicited bit is set. */ 425 398 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 426 399 (ohdr->bth[0] & ··· 507 488 dev->n_pkt_drops++; 508 489 goto done; 509 490 } 510 - if (qp->r_reuse_sge) 511 - qp->r_reuse_sge = 0; 491 + if (qp->r_flags & IPATH_R_REUSE_SGE) 492 + qp->r_flags &= ~IPATH_R_REUSE_SGE; 512 493 else if (!ipath_get_rwqe(qp, 1)) { 513 494 dev->n_pkt_drops++; 514 495 goto done;
+48 -18
drivers/infiniband/hw/ipath/ipath_ud.c
··· 65 65 u32 length; 66 66 67 67 qp = ipath_lookup_qpn(&dev->qp_table, swqe->wr.wr.ud.remote_qpn); 68 - if (!qp) { 68 + if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) { 69 69 dev->n_pkt_drops++; 70 - goto send_comp; 70 + goto done; 71 71 } 72 72 73 73 rsge.sg_list = NULL; ··· 91 91 * present on the wire. 92 92 */ 93 93 length = swqe->length; 94 + memset(&wc, 0, sizeof wc); 94 95 wc.byte_len = length + sizeof(struct ib_grh); 95 96 96 97 if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) { 97 98 wc.wc_flags = IB_WC_WITH_IMM; 98 99 wc.imm_data = swqe->wr.ex.imm_data; 99 - } else { 100 - wc.wc_flags = 0; 101 - wc.imm_data = 0; 102 100 } 103 101 104 102 /* ··· 227 229 } 228 230 wc.status = IB_WC_SUCCESS; 229 231 wc.opcode = IB_WC_RECV; 230 - wc.vendor_err = 0; 231 232 wc.qp = &qp->ibqp; 232 233 wc.src_qp = sqp->ibqp.qp_num; 233 234 /* XXX do we know which pkey matched? Only needed for GSI. */ ··· 245 248 kfree(rsge.sg_list); 246 249 if (atomic_dec_and_test(&qp->refcount)) 247 250 wake_up(&qp->wait); 248 - send_comp: 249 - ipath_send_complete(sqp, swqe, IB_WC_SUCCESS); 251 + done:; 250 252 } 251 253 252 254 /** ··· 260 264 struct ipath_other_headers *ohdr; 261 265 struct ib_ah_attr *ah_attr; 262 266 struct ipath_swqe *wqe; 267 + unsigned long flags; 263 268 u32 nwords; 264 269 u32 extra_bytes; 265 270 u32 bth0; ··· 268 271 u16 lid; 269 272 int ret = 0; 270 273 271 - if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK))) 272 - goto bail; 274 + spin_lock_irqsave(&qp->s_lock, flags); 275 + 276 + if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_NEXT_SEND_OK)) { 277 + if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND)) 278 + goto bail; 279 + /* We are in the error state, flush the work request. */ 280 + if (qp->s_last == qp->s_head) 281 + goto bail; 282 + /* If DMAs are in progress, we can't flush immediately. */ 283 + if (atomic_read(&qp->s_dma_busy)) { 284 + qp->s_flags |= IPATH_S_WAIT_DMA; 285 + goto bail; 286 + } 287 + wqe = get_swqe_ptr(qp, qp->s_last); 288 + ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); 289 + goto done; 290 + } 273 291 274 292 if (qp->s_cur == qp->s_head) 275 293 goto bail; 276 294 277 295 wqe = get_swqe_ptr(qp, qp->s_cur); 296 + if (++qp->s_cur >= qp->s_size) 297 + qp->s_cur = 0; 278 298 279 299 /* Construct the header. */ 280 300 ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr; ··· 302 288 dev->n_unicast_xmit++; 303 289 } else { 304 290 dev->n_unicast_xmit++; 305 - lid = ah_attr->dlid & 306 - ~((1 << dev->dd->ipath_lmc) - 1); 291 + lid = ah_attr->dlid & ~((1 << dev->dd->ipath_lmc) - 1); 307 292 if (unlikely(lid == dev->dd->ipath_lid)) { 293 + /* 294 + * If DMAs are in progress, we can't generate 295 + * a completion for the loopback packet since 296 + * it would be out of order. 297 + * XXX Instead of waiting, we could queue a 298 + * zero length descriptor so we get a callback. 299 + */ 300 + if (atomic_read(&qp->s_dma_busy)) { 301 + qp->s_flags |= IPATH_S_WAIT_DMA; 302 + goto bail; 303 + } 304 + spin_unlock_irqrestore(&qp->s_lock, flags); 308 305 ipath_ud_loopback(qp, wqe); 306 + spin_lock_irqsave(&qp->s_lock, flags); 307 + ipath_send_complete(qp, wqe, IB_WC_SUCCESS); 309 308 goto done; 310 309 } 311 310 } ··· 395 368 ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num); 396 369 397 370 done: 398 - if (++qp->s_cur >= qp->s_size) 399 - qp->s_cur = 0; 400 371 ret = 1; 372 + goto unlock; 401 373 402 374 bail: 375 + qp->s_flags &= ~IPATH_S_BUSY; 376 + unlock: 377 + spin_unlock_irqrestore(&qp->s_lock, flags); 403 378 return ret; 404 379 } 405 380 ··· 535 506 /* 536 507 * Get the next work request entry to find where to put the data. 537 508 */ 538 - if (qp->r_reuse_sge) 539 - qp->r_reuse_sge = 0; 509 + if (qp->r_flags & IPATH_R_REUSE_SGE) 510 + qp->r_flags &= ~IPATH_R_REUSE_SGE; 540 511 else if (!ipath_get_rwqe(qp, 0)) { 541 512 /* 542 513 * Count VL15 packets dropped due to no receive buffer. ··· 552 523 } 553 524 /* Silently drop packets which are too big. */ 554 525 if (wc.byte_len > qp->r_len) { 555 - qp->r_reuse_sge = 1; 526 + qp->r_flags |= IPATH_R_REUSE_SGE; 556 527 dev->n_pkt_drops++; 557 528 goto bail; 558 529 } ··· 564 535 ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh)); 565 536 ipath_copy_sge(&qp->r_sge, data, 566 537 wc.byte_len - sizeof(struct ib_grh)); 567 - qp->r_wrid_valid = 0; 538 + if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) 539 + goto bail; 568 540 wc.wr_id = qp->r_wr_id; 569 541 wc.status = IB_WC_SUCCESS; 570 542 wc.opcode = IB_WC_RECV;
-2
drivers/infiniband/hw/ipath/ipath_user_sdma.h
··· 45 45 int ipath_user_sdma_make_progress(struct ipath_devdata *dd, 46 46 struct ipath_user_sdma_queue *pq); 47 47 48 - int ipath_user_sdma_pkt_sent(const struct ipath_user_sdma_queue *pq, 49 - u32 counter); 50 48 void ipath_user_sdma_queue_drain(struct ipath_devdata *dd, 51 49 struct ipath_user_sdma_queue *pq); 52 50
+117 -57
drivers/infiniband/hw/ipath/ipath_verbs.c
··· 111 111 module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO); 112 112 MODULE_PARM_DESC(disable_sma, "Disable the SMA"); 113 113 114 + /* 115 + * Note that it is OK to post send work requests in the SQE and ERR 116 + * states; ipath_do_send() will process them and generate error 117 + * completions as per IB 1.2 C10-96. 118 + */ 114 119 const int ib_ipath_state_ops[IB_QPS_ERR + 1] = { 115 120 [IB_QPS_RESET] = 0, 116 121 [IB_QPS_INIT] = IPATH_POST_RECV_OK, 117 122 [IB_QPS_RTR] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK, 118 123 [IB_QPS_RTS] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | 119 - IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK, 124 + IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK | 125 + IPATH_PROCESS_NEXT_SEND_OK, 120 126 [IB_QPS_SQD] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | 121 - IPATH_POST_SEND_OK, 122 - [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK, 123 - [IB_QPS_ERR] = 0, 127 + IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK, 128 + [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | 129 + IPATH_POST_SEND_OK | IPATH_FLUSH_SEND, 130 + [IB_QPS_ERR] = IPATH_POST_RECV_OK | IPATH_FLUSH_RECV | 131 + IPATH_POST_SEND_OK | IPATH_FLUSH_SEND, 124 132 }; 125 133 126 134 struct ipath_ucontext { ··· 238 230 } 239 231 } 240 232 241 - static void ipath_flush_wqe(struct ipath_qp *qp, struct ib_send_wr *wr) 242 - { 243 - struct ib_wc wc; 244 - 245 - memset(&wc, 0, sizeof(wc)); 246 - wc.wr_id = wr->wr_id; 247 - wc.status = IB_WC_WR_FLUSH_ERR; 248 - wc.opcode = ib_ipath_wc_opcode[wr->opcode]; 249 - wc.qp = &qp->ibqp; 250 - ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1); 251 - } 252 - 253 233 /* 254 234 * Count the number of DMA descriptors needed to send length bytes of data. 255 235 * Don't modify the ipath_sge_state to get the count. ··· 343 347 spin_lock_irqsave(&qp->s_lock, flags); 344 348 345 349 /* Check that state is OK to post send. */ 346 - if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK))) { 347 - if (qp->state != IB_QPS_SQE && qp->state != IB_QPS_ERR) 348 - goto bail_inval; 349 - /* C10-96 says generate a flushed completion entry. */ 350 - ipath_flush_wqe(qp, wr); 351 - ret = 0; 352 - goto bail; 353 - } 350 + if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK))) 351 + goto bail_inval; 354 352 355 353 /* IB spec says that num_sge == 0 is OK. */ 356 354 if (wr->num_sge > qp->s_max_sge) ··· 667 677 static void ipath_ib_timer(struct ipath_ibdev *dev) 668 678 { 669 679 struct ipath_qp *resend = NULL; 680 + struct ipath_qp *rnr = NULL; 670 681 struct list_head *last; 671 682 struct ipath_qp *qp; 672 683 unsigned long flags; ··· 694 703 if (--qp->s_rnr_timeout == 0) { 695 704 do { 696 705 list_del_init(&qp->timerwait); 697 - tasklet_hi_schedule(&qp->s_task); 706 + qp->timer_next = rnr; 707 + rnr = qp; 708 + atomic_inc(&qp->refcount); 698 709 if (list_empty(last)) 699 710 break; 700 711 qp = list_entry(last->next, struct ipath_qp, ··· 736 743 spin_unlock_irqrestore(&dev->pending_lock, flags); 737 744 738 745 /* XXX What if timer fires again while this is running? */ 739 - for (qp = resend; qp != NULL; qp = qp->timer_next) { 746 + while (resend != NULL) { 747 + qp = resend; 748 + resend = qp->timer_next; 749 + 740 750 spin_lock_irqsave(&qp->s_lock, flags); 741 - if (qp->s_last != qp->s_tail && qp->state == IB_QPS_RTS) { 751 + if (qp->s_last != qp->s_tail && 752 + ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) { 742 753 dev->n_timeouts++; 743 754 ipath_restart_rc(qp, qp->s_last_psn + 1); 744 755 } 756 + spin_unlock_irqrestore(&qp->s_lock, flags); 757 + 758 + /* Notify ipath_destroy_qp() if it is waiting. */ 759 + if (atomic_dec_and_test(&qp->refcount)) 760 + wake_up(&qp->wait); 761 + } 762 + while (rnr != NULL) { 763 + qp = rnr; 764 + rnr = qp->timer_next; 765 + 766 + spin_lock_irqsave(&qp->s_lock, flags); 767 + if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) 768 + ipath_schedule_send(qp); 745 769 spin_unlock_irqrestore(&qp->s_lock, flags); 746 770 747 771 /* Notify ipath_destroy_qp() if it is waiting. */ ··· 1020 1010 struct ipath_verbs_txreq *tx = cookie; 1021 1011 struct ipath_qp *qp = tx->qp; 1022 1012 struct ipath_ibdev *dev = to_idev(qp->ibqp.device); 1013 + unsigned int flags; 1014 + enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ? 1015 + IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR; 1023 1016 1024 - /* Generate a completion queue entry if needed */ 1025 - if (qp->ibqp.qp_type != IB_QPT_RC && tx->wqe) { 1026 - enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ? 1027 - IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR; 1028 - 1017 + if (atomic_dec_and_test(&qp->s_dma_busy)) { 1018 + spin_lock_irqsave(&qp->s_lock, flags); 1019 + if (tx->wqe) 1020 + ipath_send_complete(qp, tx->wqe, ibs); 1021 + if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND && 1022 + qp->s_last != qp->s_head) || 1023 + (qp->s_flags & IPATH_S_WAIT_DMA)) 1024 + ipath_schedule_send(qp); 1025 + spin_unlock_irqrestore(&qp->s_lock, flags); 1026 + wake_up(&qp->wait_dma); 1027 + } else if (tx->wqe) { 1028 + spin_lock_irqsave(&qp->s_lock, flags); 1029 1029 ipath_send_complete(qp, tx->wqe, ibs); 1030 + spin_unlock_irqrestore(&qp->s_lock, flags); 1030 1031 } 1031 1032 1032 1033 if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF) ··· 1046 1025 1047 1026 if (atomic_dec_and_test(&qp->refcount)) 1048 1027 wake_up(&qp->wait); 1028 + } 1029 + 1030 + static void decrement_dma_busy(struct ipath_qp *qp) 1031 + { 1032 + unsigned int flags; 1033 + 1034 + if (atomic_dec_and_test(&qp->s_dma_busy)) { 1035 + spin_lock_irqsave(&qp->s_lock, flags); 1036 + if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND && 1037 + qp->s_last != qp->s_head) || 1038 + (qp->s_flags & IPATH_S_WAIT_DMA)) 1039 + ipath_schedule_send(qp); 1040 + spin_unlock_irqrestore(&qp->s_lock, flags); 1041 + wake_up(&qp->wait_dma); 1042 + } 1049 1043 } 1050 1044 1051 1045 /* ··· 1101 1065 if (tx) { 1102 1066 qp->s_tx = NULL; 1103 1067 /* resend previously constructed packet */ 1068 + atomic_inc(&qp->s_dma_busy); 1104 1069 ret = ipath_sdma_verbs_send(dd, tx->ss, tx->len, tx); 1105 - if (ret) 1070 + if (ret) { 1106 1071 qp->s_tx = tx; 1072 + decrement_dma_busy(qp); 1073 + } 1107 1074 goto bail; 1108 1075 } 1109 1076 ··· 1157 1118 tx->txreq.sg_count = ndesc; 1158 1119 tx->map_len = (hdrwords + 2) << 2; 1159 1120 tx->txreq.map_addr = &tx->hdr; 1121 + atomic_inc(&qp->s_dma_busy); 1160 1122 ret = ipath_sdma_verbs_send(dd, ss, dwords, tx); 1161 1123 if (ret) { 1162 1124 /* save ss and length in dwords */ 1163 1125 tx->ss = ss; 1164 1126 tx->len = dwords; 1165 1127 qp->s_tx = tx; 1128 + decrement_dma_busy(qp); 1166 1129 } 1167 1130 goto bail; 1168 1131 } ··· 1185 1144 memcpy(piobuf, hdr, hdrwords << 2); 1186 1145 ipath_copy_from_sge(piobuf + hdrwords, ss, len); 1187 1146 1147 + atomic_inc(&qp->s_dma_busy); 1188 1148 ret = ipath_sdma_verbs_send(dd, NULL, 0, tx); 1189 1149 /* 1190 1150 * If we couldn't queue the DMA request, save the info ··· 1196 1154 tx->ss = NULL; 1197 1155 tx->len = 0; 1198 1156 qp->s_tx = tx; 1157 + decrement_dma_busy(qp); 1199 1158 } 1200 1159 dev->n_unaligned++; 1201 1160 goto bail; ··· 1220 1177 unsigned flush_wc; 1221 1178 u32 control; 1222 1179 int ret; 1180 + unsigned int flags; 1223 1181 1224 1182 piobuf = ipath_getpiobuf(dd, plen, NULL); 1225 1183 if (unlikely(piobuf == NULL)) { ··· 1291 1247 } 1292 1248 copy_io(piobuf, ss, len, flush_wc); 1293 1249 done: 1294 - if (qp->s_wqe) 1250 + if (qp->s_wqe) { 1251 + spin_lock_irqsave(&qp->s_lock, flags); 1295 1252 ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS); 1253 + spin_unlock_irqrestore(&qp->s_lock, flags); 1254 + } 1296 1255 ret = 0; 1297 1256 bail: 1298 1257 return ret; ··· 1328 1281 * can defer SDMA restart until link goes ACTIVE without 1329 1282 * worrying about just how we got there. 1330 1283 */ 1331 - if (qp->ibqp.qp_type == IB_QPT_SMI) 1284 + if (qp->ibqp.qp_type == IB_QPT_SMI || 1285 + !(dd->ipath_flags & IPATH_HAS_SEND_DMA)) 1332 1286 ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len, 1333 - plen, dwords); 1334 - /* All non-VL15 packets are dropped if link is not ACTIVE */ 1335 - else if (!(dd->ipath_flags & IPATH_LINKACTIVE)) { 1336 - if (qp->s_wqe) 1337 - ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS); 1338 - ret = 0; 1339 - } else if (dd->ipath_flags & IPATH_HAS_SEND_DMA) 1340 - ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len, 1341 1287 plen, dwords); 1342 1288 else 1343 - ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len, 1289 + ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len, 1344 1290 plen, dwords); 1345 1291 1346 1292 return ret; ··· 1441 1401 * This is called from ipath_intr() at interrupt level when a PIO buffer is 1442 1402 * available after ipath_verbs_send() returned an error that no buffers were 1443 1403 * available. Return 1 if we consumed all the PIO buffers and we still have 1444 - * QPs waiting for buffers (for now, just do a tasklet_hi_schedule and 1404 + * QPs waiting for buffers (for now, just restart the send tasklet and 1445 1405 * return zero). 1446 1406 */ 1447 1407 int ipath_ib_piobufavail(struct ipath_ibdev *dev) 1448 1408 { 1409 + struct list_head *list; 1410 + struct ipath_qp *qplist; 1449 1411 struct ipath_qp *qp; 1450 1412 unsigned long flags; 1451 1413 1452 1414 if (dev == NULL) 1453 1415 goto bail; 1454 1416 1417 + list = &dev->piowait; 1418 + qplist = NULL; 1419 + 1455 1420 spin_lock_irqsave(&dev->pending_lock, flags); 1456 - while (!list_empty(&dev->piowait)) { 1457 - qp = list_entry(dev->piowait.next, struct ipath_qp, 1458 - piowait); 1421 + while (!list_empty(list)) { 1422 + qp = list_entry(list->next, struct ipath_qp, piowait); 1459 1423 list_del_init(&qp->piowait); 1460 - clear_bit(IPATH_S_BUSY, &qp->s_busy); 1461 - tasklet_hi_schedule(&qp->s_task); 1424 + qp->pio_next = qplist; 1425 + qplist = qp; 1426 + atomic_inc(&qp->refcount); 1462 1427 } 1463 1428 spin_unlock_irqrestore(&dev->pending_lock, flags); 1429 + 1430 + while (qplist != NULL) { 1431 + qp = qplist; 1432 + qplist = qp->pio_next; 1433 + 1434 + spin_lock_irqsave(&qp->s_lock, flags); 1435 + if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) 1436 + ipath_schedule_send(qp); 1437 + spin_unlock_irqrestore(&qp->s_lock, flags); 1438 + 1439 + /* Notify ipath_destroy_qp() if it is waiting. */ 1440 + if (atomic_dec_and_test(&qp->refcount)) 1441 + wake_up(&qp->wait); 1442 + } 1464 1443 1465 1444 bail: 1466 1445 return 0; ··· 2202 2143 void ipath_unregister_ib_device(struct ipath_ibdev *dev) 2203 2144 { 2204 2145 struct ib_device *ibdev = &dev->ibdev; 2205 - 2206 - disable_timer(dev->dd); 2146 + u32 qps_inuse; 2207 2147 2208 2148 ib_unregister_device(ibdev); 2149 + 2150 + disable_timer(dev->dd); 2209 2151 2210 2152 if (!list_empty(&dev->pending[0]) || 2211 2153 !list_empty(&dev->pending[1]) || ··· 2222 2162 * Note that ipath_unregister_ib_device() can be called before all 2223 2163 * the QPs are destroyed! 2224 2164 */ 2225 - ipath_free_all_qps(&dev->qp_table); 2165 + qps_inuse = ipath_free_all_qps(&dev->qp_table); 2166 + if (qps_inuse) 2167 + ipath_dev_err(dev->dd, "QP memory leak! %u still in use\n", 2168 + qps_inuse); 2226 2169 kfree(dev->qp_table.table); 2227 2170 kfree(dev->lk_table.table); 2228 2171 kfree(dev->txreq_bufs); ··· 2276 2213 "RC OTH NAKs %d\n" 2277 2214 "RC timeouts %d\n" 2278 2215 "RC RDMA dup %d\n" 2279 - "RC stalls %d\n" 2280 2216 "piobuf wait %d\n" 2281 - "no piobuf %d\n" 2282 2217 "unaligned %d\n" 2283 2218 "PKT drops %d\n" 2284 2219 "WQE errs %d\n", 2285 2220 dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks, 2286 2221 dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks, 2287 2222 dev->n_other_naks, dev->n_timeouts, 2288 - dev->n_rdma_dup_busy, dev->n_rc_stalls, dev->n_piowait, 2289 - dev->n_no_piobuf, dev->n_unaligned, 2223 + dev->n_rdma_dup_busy, dev->n_piowait, dev->n_unaligned, 2290 2224 dev->n_pkt_drops, dev->n_wqe_errs); 2291 2225 for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) { 2292 2226 const struct ipath_opcode_stats *si = &dev->opstats[i];
+48 -9
drivers/infiniband/hw/ipath/ipath_verbs.h
··· 74 74 #define IPATH_POST_RECV_OK 0x02 75 75 #define IPATH_PROCESS_RECV_OK 0x04 76 76 #define IPATH_PROCESS_SEND_OK 0x08 77 + #define IPATH_PROCESS_NEXT_SEND_OK 0x10 78 + #define IPATH_FLUSH_SEND 0x20 79 + #define IPATH_FLUSH_RECV 0x40 80 + #define IPATH_PROCESS_OR_FLUSH_SEND \ 81 + (IPATH_PROCESS_SEND_OK | IPATH_FLUSH_SEND) 77 82 78 83 /* IB Performance Manager status values */ 79 84 #define IB_PMA_SAMPLE_STATUS_DONE 0x00 ··· 358 353 struct ib_qp ibqp; 359 354 struct ipath_qp *next; /* link list for QPN hash table */ 360 355 struct ipath_qp *timer_next; /* link list for ipath_ib_timer() */ 356 + struct ipath_qp *pio_next; /* link for ipath_ib_piobufavail() */ 361 357 struct list_head piowait; /* link for wait PIO buf */ 362 358 struct list_head timerwait; /* link for waiting for timeouts */ 363 359 struct ib_ah_attr remote_ah_attr; 364 360 struct ipath_ib_header s_hdr; /* next packet header to send */ 365 361 atomic_t refcount; 366 362 wait_queue_head_t wait; 363 + wait_queue_head_t wait_dma; 367 364 struct tasklet_struct s_task; 368 365 struct ipath_mmap_info *ip; 369 366 struct ipath_sge_state *s_cur_sge; ··· 376 369 struct ipath_sge_state s_rdma_read_sge; 377 370 struct ipath_sge_state r_sge; /* current receive data */ 378 371 spinlock_t s_lock; 379 - unsigned long s_busy; 372 + atomic_t s_dma_busy; 380 373 u16 s_pkt_delay; 381 374 u16 s_hdrwords; /* size of s_hdr in 32 bit words */ 382 375 u32 s_cur_size; /* size of send packet in bytes */ ··· 390 383 u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ 391 384 u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ 392 385 u64 r_wr_id; /* ID for current receive WQE */ 386 + unsigned long r_aflags; 393 387 u32 r_len; /* total length of r_sge */ 394 388 u32 r_rcv_len; /* receive data len processed */ 395 389 u32 r_psn; /* expected rcv packet sequence number */ ··· 402 394 u8 r_state; /* opcode of last packet received */ 403 395 u8 r_nak_state; /* non-zero if NAK is pending */ 404 396 u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ 405 - u8 r_reuse_sge; /* for UC receive errors */ 406 - u8 r_wrid_valid; /* r_wrid set but CQ entry not yet made */ 397 + u8 r_flags; 407 398 u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ 408 399 u8 r_head_ack_queue; /* index into s_ack_queue[] */ 409 400 u8 qp_access_flags; ··· 411 404 u8 s_rnr_retry_cnt; 412 405 u8 s_retry; /* requester retry counter */ 413 406 u8 s_rnr_retry; /* requester RNR retry counter */ 414 - u8 s_wait_credit; /* limit number of unacked packets sent */ 415 407 u8 s_pkey_index; /* PKEY index to use */ 416 408 u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ 417 409 u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ 418 410 u8 s_tail_ack_queue; /* index into s_ack_queue[] */ 419 411 u8 s_flags; 420 412 u8 s_dmult; 413 + u8 s_draining; 421 414 u8 timeout; /* Timeout for this QP */ 422 415 enum ib_mtu path_mtu; 423 416 u32 remote_qpn; ··· 435 428 struct ipath_sge r_sg_list[0]; /* verified SGEs */ 436 429 }; 437 430 438 - /* Bit definition for s_busy. */ 439 - #define IPATH_S_BUSY 0 431 + /* 432 + * Atomic bit definitions for r_aflags. 433 + */ 434 + #define IPATH_R_WRID_VALID 0 435 + 436 + /* 437 + * Bit definitions for r_flags. 438 + */ 439 + #define IPATH_R_REUSE_SGE 0x01 440 440 441 441 /* 442 442 * Bit definitions for s_flags. 443 + * 444 + * IPATH_S_FENCE_PENDING - waiting for all prior RDMA read or atomic SWQEs 445 + * before processing the next SWQE 446 + * IPATH_S_RDMAR_PENDING - waiting for any RDMA read or atomic SWQEs 447 + * before processing the next SWQE 448 + * IPATH_S_WAITING - waiting for RNR timeout or send buffer available. 449 + * IPATH_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE 450 + * IPATH_S_WAIT_DMA - waiting for send DMA queue to drain before generating 451 + * next send completion entry not via send DMA. 443 452 */ 444 453 #define IPATH_S_SIGNAL_REQ_WR 0x01 445 454 #define IPATH_S_FENCE_PENDING 0x02 446 455 #define IPATH_S_RDMAR_PENDING 0x04 447 456 #define IPATH_S_ACK_PENDING 0x08 457 + #define IPATH_S_BUSY 0x10 458 + #define IPATH_S_WAITING 0x20 459 + #define IPATH_S_WAIT_SSN_CREDIT 0x40 460 + #define IPATH_S_WAIT_DMA 0x80 461 + 462 + #define IPATH_S_ANY_WAIT (IPATH_S_FENCE_PENDING | IPATH_S_RDMAR_PENDING | \ 463 + IPATH_S_WAITING | IPATH_S_WAIT_SSN_CREDIT | IPATH_S_WAIT_DMA) 448 464 449 465 #define IPATH_PSN_CREDIT 512 450 466 ··· 603 573 u32 n_rnr_naks; 604 574 u32 n_other_naks; 605 575 u32 n_timeouts; 606 - u32 n_rc_stalls; 607 576 u32 n_pkt_drops; 608 577 u32 n_vl15_dropped; 609 578 u32 n_wqe_errs; 610 579 u32 n_rdma_dup_busy; 611 580 u32 n_piowait; 612 - u32 n_no_piobuf; 613 581 u32 n_unaligned; 614 582 u32 port_cap_flags; 615 583 u32 pma_sample_start; ··· 685 657 return container_of(ibdev, struct ipath_ibdev, ibdev); 686 658 } 687 659 660 + /* 661 + * This must be called with s_lock held. 662 + */ 663 + static inline void ipath_schedule_send(struct ipath_qp *qp) 664 + { 665 + if (qp->s_flags & IPATH_S_ANY_WAIT) 666 + qp->s_flags &= ~IPATH_S_ANY_WAIT; 667 + if (!(qp->s_flags & IPATH_S_BUSY)) 668 + tasklet_hi_schedule(&qp->s_task); 669 + } 670 + 688 671 int ipath_process_mad(struct ib_device *ibdev, 689 672 int mad_flags, 690 673 u8 port_num, ··· 745 706 int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 746 707 int attr_mask, struct ib_qp_init_attr *init_attr); 747 708 748 - void ipath_free_all_qps(struct ipath_qp_table *qpt); 709 + unsigned ipath_free_all_qps(struct ipath_qp_table *qpt); 749 710 750 711 int ipath_init_qp_table(struct ipath_ibdev *idev, int size); 751 712