Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

RDMA/cxgb3: Correctly serialize peer abort path

Open MPI and other stress testing exposed a few bad bugs in handling
aborts in the middle of a normal close. Fix these by:

- serializing abort reply and peer abort processing with disconnect
processing

- warning (and ignoring) if ep timer is stopped when it wasn't running

- cleaning up disconnect path to correctly deal with aborting and
dead endpoints

- in iwch_modify_qp(), taking a ref on the ep before releasing the qp
lock if iwch_ep_disconnect() will be called. The ref is dropped
after calling disconnect.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

authored by

Steve Wise and committed by
Roland Dreier
989a1780 e463c7b1

+72 -35
+66 -34
drivers/infiniband/hw/cxgb3/iwch_cm.c
··· 125 125 static void stop_ep_timer(struct iwch_ep *ep) 126 126 { 127 127 PDBG("%s ep %p\n", __func__, ep); 128 + if (!timer_pending(&ep->timer)) { 129 + printk(KERN_ERR "%s timer stopped when its not running! ep %p state %u\n", 130 + __func__, ep, ep->com.state); 131 + WARN_ON(1); 132 + return; 133 + } 128 134 del_timer_sync(&ep->timer); 129 135 put_ep(&ep->com); 130 136 } ··· 1089 1083 static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) 1090 1084 { 1091 1085 struct iwch_ep *ep = ctx; 1086 + unsigned long flags; 1087 + int release = 0; 1092 1088 1093 1089 PDBG("%s ep %p\n", __func__, ep); 1090 + BUG_ON(!ep); 1094 1091 1095 1092 /* 1096 1093 * We get 2 abort replies from the HW. The first one must ··· 1104 1095 return CPL_RET_BUF_DONE; 1105 1096 } 1106 1097 1107 - close_complete_upcall(ep); 1108 - state_set(&ep->com, DEAD); 1109 - release_ep_resources(ep); 1098 + spin_lock_irqsave(&ep->com.lock, flags); 1099 + switch (ep->com.state) { 1100 + case ABORTING: 1101 + close_complete_upcall(ep); 1102 + __state_set(&ep->com, DEAD); 1103 + release = 1; 1104 + break; 1105 + default: 1106 + printk(KERN_ERR "%s ep %p state %d\n", 1107 + __func__, ep, ep->com.state); 1108 + break; 1109 + } 1110 + spin_unlock_irqrestore(&ep->com.lock, flags); 1111 + 1112 + if (release) 1113 + release_ep_resources(ep); 1110 1114 return CPL_RET_BUF_DONE; 1111 1115 } 1112 1116 ··· 1492 1470 struct sk_buff *rpl_skb; 1493 1471 struct iwch_qp_attributes attrs; 1494 1472 int ret; 1495 - int state; 1473 + int release = 0; 1474 + unsigned long flags; 1496 1475 1497 1476 if (is_neg_adv_abort(req->status)) { 1498 1477 PDBG("%s neg_adv_abort ep %p tid %d\n", __func__, ep, ··· 1511 1488 return CPL_RET_BUF_DONE; 1512 1489 } 1513 1490 1514 - state = state_read(&ep->com); 1515 - PDBG("%s ep %p state %u\n", __func__, ep, state); 1516 - switch (state) { 1491 + spin_lock_irqsave(&ep->com.lock, flags); 1492 + PDBG("%s ep %p state %u\n", __func__, ep, ep->com.state); 1493 + switch (ep->com.state) { 1517 1494 case CONNECTING: 1518 1495 break; 1519 1496 case MPA_REQ_WAIT: ··· 1559 1536 break; 1560 1537 case DEAD: 1561 1538 PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__); 1539 + spin_unlock_irqrestore(&ep->com.lock, flags); 1562 1540 return CPL_RET_BUF_DONE; 1563 1541 default: 1564 1542 BUG_ON(1); 1565 1543 break; 1566 1544 } 1567 1545 dst_confirm(ep->dst); 1546 + if (ep->com.state != ABORTING) { 1547 + __state_set(&ep->com, DEAD); 1548 + release = 1; 1549 + } 1550 + spin_unlock_irqrestore(&ep->com.lock, flags); 1568 1551 1569 1552 rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL); 1570 1553 if (!rpl_skb) { 1571 1554 printk(KERN_ERR MOD "%s - cannot allocate skb!\n", 1572 1555 __func__); 1573 - dst_release(ep->dst); 1574 - l2t_release(L2DATA(ep->com.tdev), ep->l2t); 1575 - put_ep(&ep->com); 1576 - return CPL_RET_BUF_DONE; 1556 + release = 1; 1557 + goto out; 1577 1558 } 1578 1559 rpl_skb->priority = CPL_PRIORITY_DATA; 1579 1560 rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl)); ··· 1586 1559 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid)); 1587 1560 rpl->cmd = CPL_ABORT_NO_RST; 1588 1561 cxgb3_ofld_send(ep->com.tdev, rpl_skb); 1589 - if (state != ABORTING) { 1590 - state_set(&ep->com, DEAD); 1562 + out: 1563 + if (release) 1591 1564 release_ep_resources(ep); 1592 - } 1593 1565 return CPL_RET_BUF_DONE; 1594 1566 } 1595 1567 ··· 1687 1661 struct iwch_ep *ep = (struct iwch_ep *)arg; 1688 1662 struct iwch_qp_attributes attrs; 1689 1663 unsigned long flags; 1664 + int abort = 1; 1690 1665 1691 1666 spin_lock_irqsave(&ep->com.lock, flags); 1692 1667 PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid, 1693 1668 ep->com.state); 1694 1669 switch (ep->com.state) { 1695 1670 case MPA_REQ_SENT: 1671 + __state_set(&ep->com, ABORTING); 1696 1672 connect_reply_upcall(ep, -ETIMEDOUT); 1697 1673 break; 1698 1674 case MPA_REQ_WAIT: 1675 + __state_set(&ep->com, ABORTING); 1699 1676 break; 1700 1677 case CLOSING: 1701 1678 case MORIBUND: ··· 1708 1679 ep->com.qp, IWCH_QP_ATTR_NEXT_STATE, 1709 1680 &attrs, 1); 1710 1681 } 1682 + __state_set(&ep->com, ABORTING); 1711 1683 break; 1712 1684 default: 1713 - BUG(); 1685 + printk(KERN_ERR "%s unexpected state ep %p state %u\n", 1686 + __func__, ep, ep->com.state); 1687 + WARN_ON(1); 1688 + abort = 0; 1714 1689 } 1715 - __state_set(&ep->com, CLOSING); 1716 1690 spin_unlock_irqrestore(&ep->com.lock, flags); 1717 - abort_connection(ep, NULL, GFP_ATOMIC); 1691 + if (abort) 1692 + abort_connection(ep, NULL, GFP_ATOMIC); 1718 1693 put_ep(&ep->com); 1719 1694 } 1720 1695 ··· 2001 1968 PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep, 2002 1969 states[ep->com.state], abrupt); 2003 1970 2004 - if (ep->com.state == DEAD) { 2005 - PDBG("%s already dead ep %p\n", __func__, ep); 2006 - goto out; 2007 - } 2008 - 2009 - if (abrupt) { 2010 - if (ep->com.state != ABORTING) { 2011 - ep->com.state = ABORTING; 2012 - close = 1; 2013 - } 2014 - goto out; 2015 - } 2016 - 2017 1971 switch (ep->com.state) { 2018 1972 case MPA_REQ_WAIT: 2019 1973 case MPA_REQ_SENT: 2020 1974 case MPA_REQ_RCVD: 2021 1975 case MPA_REP_SENT: 2022 1976 case FPDU_MODE: 2023 - start_ep_timer(ep); 2024 - ep->com.state = CLOSING; 2025 1977 close = 1; 1978 + if (abrupt) 1979 + ep->com.state = ABORTING; 1980 + else { 1981 + ep->com.state = CLOSING; 1982 + start_ep_timer(ep); 1983 + } 2026 1984 break; 2027 1985 case CLOSING: 2028 - ep->com.state = MORIBUND; 2029 1986 close = 1; 1987 + if (abrupt) { 1988 + stop_ep_timer(ep); 1989 + ep->com.state = ABORTING; 1990 + } else 1991 + ep->com.state = MORIBUND; 2030 1992 break; 2031 1993 case MORIBUND: 1994 + case ABORTING: 1995 + case DEAD: 1996 + PDBG("%s ignoring disconnect ep %p state %u\n", 1997 + __func__, ep, ep->com.state); 2032 1998 break; 2033 1999 default: 2034 2000 BUG(); 2035 2001 break; 2036 2002 } 2037 - out: 2003 + 2038 2004 spin_unlock_irqrestore(&ep->com.lock, flags); 2039 2005 if (close) { 2040 2006 if (abrupt)
+1
drivers/infiniband/hw/cxgb3/iwch_cm.h
··· 56 56 #define put_ep(ep) { \ 57 57 PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__, \ 58 58 ep, atomic_read(&((ep)->kref.refcount))); \ 59 + WARN_ON(atomic_read(&((ep)->kref.refcount)) < 1); \ 59 60 kref_put(&((ep)->kref), __free_ep); \ 60 61 } 61 62
+5 -1
drivers/infiniband/hw/cxgb3/iwch_qp.c
··· 832 832 abort=0; 833 833 disconnect = 1; 834 834 ep = qhp->ep; 835 + get_ep(&ep->com); 835 836 } 836 837 flush_qp(qhp, &flag); 837 838 break; ··· 849 848 abort=1; 850 849 disconnect = 1; 851 850 ep = qhp->ep; 851 + get_ep(&ep->com); 852 852 } 853 853 goto err; 854 854 break; ··· 931 929 * on the EP. This can be a normal close (RTS->CLOSING) or 932 930 * an abnormal close (RTS/CLOSING->ERROR). 933 931 */ 934 - if (disconnect) 932 + if (disconnect) { 935 933 iwch_ep_disconnect(ep, abort, GFP_KERNEL); 934 + put_ep(&ep->com); 935 + } 936 936 937 937 /* 938 938 * If free is 1, then we've disassociated the EP from the QP