IB/mad: include GID/class when matching receives

Received responses are currently matched against sent requests based
on TID only. According to the spec, responses should match based on
the combination of TID, management class, and requester LID/GID.

Without the additional qualification, an agent that is responding to
two requests, both of which have the same TID, can match RMPP ACKs
with the incorrect transaction. This problem can occur on the SM node
when responding to SA queries.

Signed-off-by: Jack Morgenstein <jackm@mellanox.co.il>
Signed-off-by: Sean Hefty <sean.hefty@intel.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

authored by

Jack Morgenstein and committed by
Roland Dreier
fa9656bb e1f7868c

+67 -29
+52 -6
drivers/infiniband/core/mad.c
··· 1618 1618 (rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_DATA); 1619 1619 } 1620 1620 1621 + static inline int rcv_has_same_class(struct ib_mad_send_wr_private *wr, 1622 + struct ib_mad_recv_wc *rwc) 1623 + { 1624 + return ((struct ib_mad *)(wr->send_buf.mad))->mad_hdr.mgmt_class == 1625 + rwc->recv_buf.mad->mad_hdr.mgmt_class; 1626 + } 1627 + 1628 + static inline int rcv_has_same_gid(struct ib_mad_send_wr_private *wr, 1629 + struct ib_mad_recv_wc *rwc ) 1630 + { 1631 + struct ib_ah_attr attr; 1632 + u8 send_resp, rcv_resp; 1633 + 1634 + send_resp = ((struct ib_mad *)(wr->send_buf.mad))-> 1635 + mad_hdr.method & IB_MGMT_METHOD_RESP; 1636 + rcv_resp = rwc->recv_buf.mad->mad_hdr.method & IB_MGMT_METHOD_RESP; 1637 + 1638 + if (!send_resp && rcv_resp) 1639 + /* is request/response. GID/LIDs are both local (same). */ 1640 + return 1; 1641 + 1642 + if (send_resp == rcv_resp) 1643 + /* both requests, or both responses. GIDs different */ 1644 + return 0; 1645 + 1646 + if (ib_query_ah(wr->send_buf.ah, &attr)) 1647 + /* Assume not equal, to avoid false positives. */ 1648 + return 0; 1649 + 1650 + if (!(attr.ah_flags & IB_AH_GRH) && !(rwc->wc->wc_flags & IB_WC_GRH)) 1651 + return attr.dlid == rwc->wc->slid; 1652 + else if ((attr.ah_flags & IB_AH_GRH) && 1653 + (rwc->wc->wc_flags & IB_WC_GRH)) 1654 + return memcmp(attr.grh.dgid.raw, 1655 + rwc->recv_buf.grh->sgid.raw, 16) == 0; 1656 + else 1657 + /* one has GID, other does not. Assume different */ 1658 + return 0; 1659 + } 1621 1660 struct ib_mad_send_wr_private* 1622 - ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, __be64 tid) 1661 + ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, 1662 + struct ib_mad_recv_wc *mad_recv_wc) 1623 1663 { 1624 1664 struct ib_mad_send_wr_private *mad_send_wr; 1665 + struct ib_mad *mad; 1666 + 1667 + mad = (struct ib_mad *)mad_recv_wc->recv_buf.mad; 1625 1668 1626 1669 list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list, 1627 1670 agent_list) { 1628 - if (mad_send_wr->tid == tid) 1671 + if ((mad_send_wr->tid == mad->mad_hdr.tid) && 1672 + rcv_has_same_class(mad_send_wr, mad_recv_wc) && 1673 + rcv_has_same_gid(mad_send_wr, mad_recv_wc)) 1629 1674 return mad_send_wr; 1630 1675 } 1631 1676 ··· 1681 1636 list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list, 1682 1637 agent_list) { 1683 1638 if (is_data_mad(mad_agent_priv, mad_send_wr->send_buf.mad) && 1684 - mad_send_wr->tid == tid && mad_send_wr->timeout) { 1639 + mad_send_wr->tid == mad->mad_hdr.tid && 1640 + mad_send_wr->timeout && 1641 + rcv_has_same_class(mad_send_wr, mad_recv_wc) && 1642 + rcv_has_same_gid(mad_send_wr, mad_recv_wc)) { 1685 1643 /* Verify request has not been canceled */ 1686 1644 return (mad_send_wr->status == IB_WC_SUCCESS) ? 1687 1645 mad_send_wr : NULL; ··· 1709 1661 struct ib_mad_send_wr_private *mad_send_wr; 1710 1662 struct ib_mad_send_wc mad_send_wc; 1711 1663 unsigned long flags; 1712 - __be64 tid; 1713 1664 1714 1665 INIT_LIST_HEAD(&mad_recv_wc->rmpp_list); 1715 1666 list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list); ··· 1724 1677 1725 1678 /* Complete corresponding request */ 1726 1679 if (response_mad(mad_recv_wc->recv_buf.mad)) { 1727 - tid = mad_recv_wc->recv_buf.mad->mad_hdr.tid; 1728 1680 spin_lock_irqsave(&mad_agent_priv->lock, flags); 1729 - mad_send_wr = ib_find_send_mad(mad_agent_priv, tid); 1681 + mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc); 1730 1682 if (!mad_send_wr) { 1731 1683 spin_unlock_irqrestore(&mad_agent_priv->lock, flags); 1732 1684 ib_free_recv_mad(mad_recv_wc);
+2 -1
drivers/infiniband/core/mad_priv.h
··· 216 216 int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr); 217 217 218 218 struct ib_mad_send_wr_private * 219 - ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, __be64 tid); 219 + ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, 220 + struct ib_mad_recv_wc *mad_recv_wc); 220 221 221 222 void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, 222 223 struct ib_mad_send_wc *mad_send_wc);
+13 -22
drivers/infiniband/core/mad_rmpp.c
··· 562 562 return ib_send_mad(mad_send_wr); 563 563 } 564 564 565 - static void abort_send(struct ib_mad_agent_private *agent, __be64 tid, 566 - u8 rmpp_status) 565 + static void abort_send(struct ib_mad_agent_private *agent, 566 + struct ib_mad_recv_wc *mad_recv_wc, u8 rmpp_status) 567 567 { 568 568 struct ib_mad_send_wr_private *mad_send_wr; 569 569 struct ib_mad_send_wc wc; 570 570 unsigned long flags; 571 571 572 572 spin_lock_irqsave(&agent->lock, flags); 573 - mad_send_wr = ib_find_send_mad(agent, tid); 573 + mad_send_wr = ib_find_send_mad(agent, mad_recv_wc); 574 574 if (!mad_send_wr) 575 575 goto out; /* Unmatched send */ 576 576 ··· 612 612 613 613 rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad; 614 614 if (rmpp_mad->rmpp_hdr.rmpp_status) { 615 - abort_send(agent, rmpp_mad->mad_hdr.tid, 616 - IB_MGMT_RMPP_STATUS_BAD_STATUS); 615 + abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); 617 616 nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); 618 617 return; 619 618 } ··· 620 621 seg_num = be32_to_cpu(rmpp_mad->rmpp_hdr.seg_num); 621 622 newwin = be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin); 622 623 if (newwin < seg_num) { 623 - abort_send(agent, rmpp_mad->mad_hdr.tid, 624 - IB_MGMT_RMPP_STATUS_W2S); 624 + abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_W2S); 625 625 nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_W2S); 626 626 return; 627 627 } 628 628 629 629 spin_lock_irqsave(&agent->lock, flags); 630 - mad_send_wr = ib_find_send_mad(agent, rmpp_mad->mad_hdr.tid); 630 + mad_send_wr = ib_find_send_mad(agent, mad_recv_wc); 631 631 if (!mad_send_wr) 632 632 goto out; /* Unmatched ACK */ 633 633 ··· 637 639 if (seg_num > mad_send_wr->send_buf.seg_count || 638 640 seg_num > mad_send_wr->newwin) { 639 641 spin_unlock_irqrestore(&agent->lock, flags); 640 - abort_send(agent, rmpp_mad->mad_hdr.tid, 641 - IB_MGMT_RMPP_STATUS_S2B); 642 + abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_S2B); 642 643 nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_S2B); 643 644 return; 644 645 } ··· 725 728 rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad; 726 729 727 730 if (rmpp_mad->rmpp_hdr.rmpp_status != IB_MGMT_RMPP_STATUS_RESX) { 728 - abort_send(agent, rmpp_mad->mad_hdr.tid, 729 - IB_MGMT_RMPP_STATUS_BAD_STATUS); 731 + abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); 730 732 nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); 731 733 } else 732 - abort_send(agent, rmpp_mad->mad_hdr.tid, 733 - rmpp_mad->rmpp_hdr.rmpp_status); 734 + abort_send(agent, mad_recv_wc, rmpp_mad->rmpp_hdr.rmpp_status); 734 735 } 735 736 736 737 static void process_rmpp_abort(struct ib_mad_agent_private *agent, ··· 740 745 741 746 if (rmpp_mad->rmpp_hdr.rmpp_status < IB_MGMT_RMPP_STATUS_ABORT_MIN || 742 747 rmpp_mad->rmpp_hdr.rmpp_status > IB_MGMT_RMPP_STATUS_ABORT_MAX) { 743 - abort_send(agent, rmpp_mad->mad_hdr.tid, 744 - IB_MGMT_RMPP_STATUS_BAD_STATUS); 748 + abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); 745 749 nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); 746 750 } else 747 - abort_send(agent, rmpp_mad->mad_hdr.tid, 748 - rmpp_mad->rmpp_hdr.rmpp_status); 751 + abort_send(agent, mad_recv_wc, rmpp_mad->rmpp_hdr.rmpp_status); 749 752 } 750 753 751 754 struct ib_mad_recv_wc * ··· 757 764 return mad_recv_wc; 758 765 759 766 if (rmpp_mad->rmpp_hdr.rmpp_version != IB_MGMT_RMPP_VERSION) { 760 - abort_send(agent, rmpp_mad->mad_hdr.tid, 761 - IB_MGMT_RMPP_STATUS_UNV); 767 + abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_UNV); 762 768 nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_UNV); 763 769 goto out; 764 770 } ··· 775 783 process_rmpp_abort(agent, mad_recv_wc); 776 784 break; 777 785 default: 778 - abort_send(agent, rmpp_mad->mad_hdr.tid, 779 - IB_MGMT_RMPP_STATUS_BADT); 786 + abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BADT); 780 787 nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BADT); 781 788 break; 782 789 }