IB/mad: include GID/class when matching receives

Received responses are currently matched against sent requests based
on TID only. According to the spec, responses should match based on
the combination of TID, management class, and requester LID/GID.

Without the additional qualification, an agent that is responding to
two requests, both of which have the same TID, can match RMPP ACKs
with the incorrect transaction. This problem can occur on the SM node
when responding to SA queries.

Signed-off-by: Jack Morgenstein <jackm@mellanox.co.il>
Signed-off-by: Sean Hefty <sean.hefty@intel.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>

authored by

Jack Morgenstein and committed by
Roland Dreier
fa9656bb e1f7868c

+67 -29
+52 -6
drivers/infiniband/core/mad.c
··· 1618 (rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_DATA); 1619 } 1620 1621 struct ib_mad_send_wr_private* 1622 - ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, __be64 tid) 1623 { 1624 struct ib_mad_send_wr_private *mad_send_wr; 1625 1626 list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list, 1627 agent_list) { 1628 - if (mad_send_wr->tid == tid) 1629 return mad_send_wr; 1630 } 1631 ··· 1681 list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list, 1682 agent_list) { 1683 if (is_data_mad(mad_agent_priv, mad_send_wr->send_buf.mad) && 1684 - mad_send_wr->tid == tid && mad_send_wr->timeout) { 1685 /* Verify request has not been canceled */ 1686 return (mad_send_wr->status == IB_WC_SUCCESS) ? 1687 mad_send_wr : NULL; ··· 1709 struct ib_mad_send_wr_private *mad_send_wr; 1710 struct ib_mad_send_wc mad_send_wc; 1711 unsigned long flags; 1712 - __be64 tid; 1713 1714 INIT_LIST_HEAD(&mad_recv_wc->rmpp_list); 1715 list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list); ··· 1724 1725 /* Complete corresponding request */ 1726 if (response_mad(mad_recv_wc->recv_buf.mad)) { 1727 - tid = mad_recv_wc->recv_buf.mad->mad_hdr.tid; 1728 spin_lock_irqsave(&mad_agent_priv->lock, flags); 1729 - mad_send_wr = ib_find_send_mad(mad_agent_priv, tid); 1730 if (!mad_send_wr) { 1731 spin_unlock_irqrestore(&mad_agent_priv->lock, flags); 1732 ib_free_recv_mad(mad_recv_wc);
··· 1618 (rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_DATA); 1619 } 1620 1621 + static inline int rcv_has_same_class(struct ib_mad_send_wr_private *wr, 1622 + struct ib_mad_recv_wc *rwc) 1623 + { 1624 + return ((struct ib_mad *)(wr->send_buf.mad))->mad_hdr.mgmt_class == 1625 + rwc->recv_buf.mad->mad_hdr.mgmt_class; 1626 + } 1627 + 1628 + static inline int rcv_has_same_gid(struct ib_mad_send_wr_private *wr, 1629 + struct ib_mad_recv_wc *rwc ) 1630 + { 1631 + struct ib_ah_attr attr; 1632 + u8 send_resp, rcv_resp; 1633 + 1634 + send_resp = ((struct ib_mad *)(wr->send_buf.mad))-> 1635 + mad_hdr.method & IB_MGMT_METHOD_RESP; 1636 + rcv_resp = rwc->recv_buf.mad->mad_hdr.method & IB_MGMT_METHOD_RESP; 1637 + 1638 + if (!send_resp && rcv_resp) 1639 + /* is request/response. GID/LIDs are both local (same). */ 1640 + return 1; 1641 + 1642 + if (send_resp == rcv_resp) 1643 + /* both requests, or both responses. GIDs different */ 1644 + return 0; 1645 + 1646 + if (ib_query_ah(wr->send_buf.ah, &attr)) 1647 + /* Assume not equal, to avoid false positives. */ 1648 + return 0; 1649 + 1650 + if (!(attr.ah_flags & IB_AH_GRH) && !(rwc->wc->wc_flags & IB_WC_GRH)) 1651 + return attr.dlid == rwc->wc->slid; 1652 + else if ((attr.ah_flags & IB_AH_GRH) && 1653 + (rwc->wc->wc_flags & IB_WC_GRH)) 1654 + return memcmp(attr.grh.dgid.raw, 1655 + rwc->recv_buf.grh->sgid.raw, 16) == 0; 1656 + else 1657 + /* one has GID, other does not. Assume different */ 1658 + return 0; 1659 + } 1660 struct ib_mad_send_wr_private* 1661 + ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, 1662 + struct ib_mad_recv_wc *mad_recv_wc) 1663 { 1664 struct ib_mad_send_wr_private *mad_send_wr; 1665 + struct ib_mad *mad; 1666 + 1667 + mad = (struct ib_mad *)mad_recv_wc->recv_buf.mad; 1668 1669 list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list, 1670 agent_list) { 1671 + if ((mad_send_wr->tid == mad->mad_hdr.tid) && 1672 + rcv_has_same_class(mad_send_wr, mad_recv_wc) && 1673 + rcv_has_same_gid(mad_send_wr, mad_recv_wc)) 1674 return mad_send_wr; 1675 } 1676 ··· 1636 list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list, 1637 agent_list) { 1638 if (is_data_mad(mad_agent_priv, mad_send_wr->send_buf.mad) && 1639 + mad_send_wr->tid == mad->mad_hdr.tid && 1640 + mad_send_wr->timeout && 1641 + rcv_has_same_class(mad_send_wr, mad_recv_wc) && 1642 + rcv_has_same_gid(mad_send_wr, mad_recv_wc)) { 1643 /* Verify request has not been canceled */ 1644 return (mad_send_wr->status == IB_WC_SUCCESS) ? 1645 mad_send_wr : NULL; ··· 1661 struct ib_mad_send_wr_private *mad_send_wr; 1662 struct ib_mad_send_wc mad_send_wc; 1663 unsigned long flags; 1664 1665 INIT_LIST_HEAD(&mad_recv_wc->rmpp_list); 1666 list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list); ··· 1677 1678 /* Complete corresponding request */ 1679 if (response_mad(mad_recv_wc->recv_buf.mad)) { 1680 spin_lock_irqsave(&mad_agent_priv->lock, flags); 1681 + mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc); 1682 if (!mad_send_wr) { 1683 spin_unlock_irqrestore(&mad_agent_priv->lock, flags); 1684 ib_free_recv_mad(mad_recv_wc);
+2 -1
drivers/infiniband/core/mad_priv.h
··· 216 int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr); 217 218 struct ib_mad_send_wr_private * 219 - ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, __be64 tid); 220 221 void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, 222 struct ib_mad_send_wc *mad_send_wc);
··· 216 int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr); 217 218 struct ib_mad_send_wr_private * 219 + ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, 220 + struct ib_mad_recv_wc *mad_recv_wc); 221 222 void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr, 223 struct ib_mad_send_wc *mad_send_wc);
+13 -22
drivers/infiniband/core/mad_rmpp.c
··· 562 return ib_send_mad(mad_send_wr); 563 } 564 565 - static void abort_send(struct ib_mad_agent_private *agent, __be64 tid, 566 - u8 rmpp_status) 567 { 568 struct ib_mad_send_wr_private *mad_send_wr; 569 struct ib_mad_send_wc wc; 570 unsigned long flags; 571 572 spin_lock_irqsave(&agent->lock, flags); 573 - mad_send_wr = ib_find_send_mad(agent, tid); 574 if (!mad_send_wr) 575 goto out; /* Unmatched send */ 576 ··· 612 613 rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad; 614 if (rmpp_mad->rmpp_hdr.rmpp_status) { 615 - abort_send(agent, rmpp_mad->mad_hdr.tid, 616 - IB_MGMT_RMPP_STATUS_BAD_STATUS); 617 nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); 618 return; 619 } ··· 620 seg_num = be32_to_cpu(rmpp_mad->rmpp_hdr.seg_num); 621 newwin = be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin); 622 if (newwin < seg_num) { 623 - abort_send(agent, rmpp_mad->mad_hdr.tid, 624 - IB_MGMT_RMPP_STATUS_W2S); 625 nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_W2S); 626 return; 627 } 628 629 spin_lock_irqsave(&agent->lock, flags); 630 - mad_send_wr = ib_find_send_mad(agent, rmpp_mad->mad_hdr.tid); 631 if (!mad_send_wr) 632 goto out; /* Unmatched ACK */ 633 ··· 637 if (seg_num > mad_send_wr->send_buf.seg_count || 638 seg_num > mad_send_wr->newwin) { 639 spin_unlock_irqrestore(&agent->lock, flags); 640 - abort_send(agent, rmpp_mad->mad_hdr.tid, 641 - IB_MGMT_RMPP_STATUS_S2B); 642 nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_S2B); 643 return; 644 } ··· 725 rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad; 726 727 if (rmpp_mad->rmpp_hdr.rmpp_status != IB_MGMT_RMPP_STATUS_RESX) { 728 - abort_send(agent, rmpp_mad->mad_hdr.tid, 729 - IB_MGMT_RMPP_STATUS_BAD_STATUS); 730 nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); 731 } else 732 - abort_send(agent, rmpp_mad->mad_hdr.tid, 733 - rmpp_mad->rmpp_hdr.rmpp_status); 734 } 735 736 static void process_rmpp_abort(struct ib_mad_agent_private *agent, ··· 740 741 if (rmpp_mad->rmpp_hdr.rmpp_status < IB_MGMT_RMPP_STATUS_ABORT_MIN || 742 rmpp_mad->rmpp_hdr.rmpp_status > IB_MGMT_RMPP_STATUS_ABORT_MAX) { 743 - abort_send(agent, rmpp_mad->mad_hdr.tid, 744 - IB_MGMT_RMPP_STATUS_BAD_STATUS); 745 nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); 746 } else 747 - abort_send(agent, rmpp_mad->mad_hdr.tid, 748 - rmpp_mad->rmpp_hdr.rmpp_status); 749 } 750 751 struct ib_mad_recv_wc * ··· 757 return mad_recv_wc; 758 759 if (rmpp_mad->rmpp_hdr.rmpp_version != IB_MGMT_RMPP_VERSION) { 760 - abort_send(agent, rmpp_mad->mad_hdr.tid, 761 - IB_MGMT_RMPP_STATUS_UNV); 762 nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_UNV); 763 goto out; 764 } ··· 775 process_rmpp_abort(agent, mad_recv_wc); 776 break; 777 default: 778 - abort_send(agent, rmpp_mad->mad_hdr.tid, 779 - IB_MGMT_RMPP_STATUS_BADT); 780 nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BADT); 781 break; 782 }
··· 562 return ib_send_mad(mad_send_wr); 563 } 564 565 + static void abort_send(struct ib_mad_agent_private *agent, 566 + struct ib_mad_recv_wc *mad_recv_wc, u8 rmpp_status) 567 { 568 struct ib_mad_send_wr_private *mad_send_wr; 569 struct ib_mad_send_wc wc; 570 unsigned long flags; 571 572 spin_lock_irqsave(&agent->lock, flags); 573 + mad_send_wr = ib_find_send_mad(agent, mad_recv_wc); 574 if (!mad_send_wr) 575 goto out; /* Unmatched send */ 576 ··· 612 613 rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad; 614 if (rmpp_mad->rmpp_hdr.rmpp_status) { 615 + abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); 616 nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); 617 return; 618 } ··· 621 seg_num = be32_to_cpu(rmpp_mad->rmpp_hdr.seg_num); 622 newwin = be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin); 623 if (newwin < seg_num) { 624 + abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_W2S); 625 nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_W2S); 626 return; 627 } 628 629 spin_lock_irqsave(&agent->lock, flags); 630 + mad_send_wr = ib_find_send_mad(agent, mad_recv_wc); 631 if (!mad_send_wr) 632 goto out; /* Unmatched ACK */ 633 ··· 639 if (seg_num > mad_send_wr->send_buf.seg_count || 640 seg_num > mad_send_wr->newwin) { 641 spin_unlock_irqrestore(&agent->lock, flags); 642 + abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_S2B); 643 nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_S2B); 644 return; 645 } ··· 728 rmpp_mad = (struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad; 729 730 if (rmpp_mad->rmpp_hdr.rmpp_status != IB_MGMT_RMPP_STATUS_RESX) { 731 + abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); 732 nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); 733 } else 734 + abort_send(agent, mad_recv_wc, rmpp_mad->rmpp_hdr.rmpp_status); 735 } 736 737 static void process_rmpp_abort(struct ib_mad_agent_private *agent, ··· 745 746 if (rmpp_mad->rmpp_hdr.rmpp_status < IB_MGMT_RMPP_STATUS_ABORT_MIN || 747 rmpp_mad->rmpp_hdr.rmpp_status > IB_MGMT_RMPP_STATUS_ABORT_MAX) { 748 + abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); 749 nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BAD_STATUS); 750 } else 751 + abort_send(agent, mad_recv_wc, rmpp_mad->rmpp_hdr.rmpp_status); 752 } 753 754 struct ib_mad_recv_wc * ··· 764 return mad_recv_wc; 765 766 if (rmpp_mad->rmpp_hdr.rmpp_version != IB_MGMT_RMPP_VERSION) { 767 + abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_UNV); 768 nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_UNV); 769 goto out; 770 } ··· 783 process_rmpp_abort(agent, mad_recv_wc); 784 break; 785 default: 786 + abort_send(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BADT); 787 nack_recv(agent, mad_recv_wc, IB_MGMT_RMPP_STATUS_BADT); 788 break; 789 }