Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

RDMA/core: Add LAG functionality

Add support to get the RoCE LAG xmit slave by building skb of the RoCE
packet and call to master_get_xmit_slave. If driver wants to get the
slave assume all slaves are available, then need to set
RDMA_LAG_FLAGS_HASH_ALL_SLAVES in flags.

Link: https://lore.kernel.org/r/20200430192146.12863-14-maorg@mellanox.com
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>

authored by

Maor Gottlieb and committed by
Jason Gunthorpe
bd3920ea fa5d010c

+161 -1
+1 -1
drivers/infiniband/core/Makefile
··· 12 12 roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \ 13 13 multicast.o mad.o smi.o agent.o mad_rmpp.o \ 14 14 nldev.o restrack.o counters.o ib_core_uverbs.o \ 15 - trace.o 15 + trace.o lag.o 16 16 17 17 ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o 18 18 ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o
+136
drivers/infiniband/core/lag.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 + /* 3 + * Copyright (c) 2020 Mellanox Technologies. All rights reserved. 4 + */ 5 + 6 + #include <rdma/ib_verbs.h> 7 + #include <rdma/ib_cache.h> 8 + #include <rdma/lag.h> 9 + 10 + static struct sk_buff *rdma_build_skb(struct ib_device *device, 11 + struct net_device *netdev, 12 + struct rdma_ah_attr *ah_attr, 13 + gfp_t flags) 14 + { 15 + struct ipv6hdr *ip6h; 16 + struct sk_buff *skb; 17 + struct ethhdr *eth; 18 + struct iphdr *iph; 19 + struct udphdr *uh; 20 + u8 smac[ETH_ALEN]; 21 + bool is_ipv4; 22 + int hdr_len; 23 + 24 + is_ipv4 = ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw); 25 + hdr_len = ETH_HLEN + sizeof(struct udphdr) + LL_RESERVED_SPACE(netdev); 26 + hdr_len += is_ipv4 ? sizeof(struct iphdr) : sizeof(struct ipv6hdr); 27 + 28 + skb = alloc_skb(hdr_len, flags); 29 + if (!skb) 30 + return NULL; 31 + 32 + skb->dev = netdev; 33 + skb_reserve(skb, hdr_len); 34 + skb_push(skb, sizeof(struct udphdr)); 35 + skb_reset_transport_header(skb); 36 + uh = udp_hdr(skb); 37 + uh->source = htons(0xC000); 38 + uh->dest = htons(ROCE_V2_UDP_DPORT); 39 + uh->len = htons(sizeof(struct udphdr)); 40 + 41 + if (is_ipv4) { 42 + skb_push(skb, sizeof(struct iphdr)); 43 + skb_reset_network_header(skb); 44 + iph = ip_hdr(skb); 45 + iph->frag_off = 0; 46 + iph->version = 4; 47 + iph->protocol = IPPROTO_UDP; 48 + iph->ihl = 0x5; 49 + iph->tot_len = htons(sizeof(struct udphdr) + sizeof(struct 50 + iphdr)); 51 + memcpy(&iph->saddr, ah_attr->grh.sgid_attr->gid.raw + 12, 52 + sizeof(struct in_addr)); 53 + memcpy(&iph->daddr, ah_attr->grh.dgid.raw + 12, 54 + sizeof(struct in_addr)); 55 + } else { 56 + skb_push(skb, sizeof(struct ipv6hdr)); 57 + skb_reset_network_header(skb); 58 + ip6h = ipv6_hdr(skb); 59 + ip6h->version = 6; 60 + ip6h->nexthdr = IPPROTO_UDP; 61 + memcpy(&ip6h->flow_lbl, &ah_attr->grh.flow_label, 62 + sizeof(*ip6h->flow_lbl)); 63 + memcpy(&ip6h->saddr, ah_attr->grh.sgid_attr->gid.raw, 64 + sizeof(struct in6_addr)); 65 + memcpy(&ip6h->daddr, ah_attr->grh.dgid.raw, 66 + sizeof(struct in6_addr)); 67 + } 68 + 69 + skb_push(skb, sizeof(struct ethhdr)); 70 + skb_reset_mac_header(skb); 71 + eth = eth_hdr(skb); 72 + skb->protocol = eth->h_proto = htons(is_ipv4 ? ETH_P_IP : ETH_P_IPV6); 73 + rdma_read_gid_l2_fields(ah_attr->grh.sgid_attr, NULL, smac); 74 + memcpy(eth->h_source, smac, ETH_ALEN); 75 + memcpy(eth->h_dest, ah_attr->roce.dmac, ETH_ALEN); 76 + 77 + return skb; 78 + } 79 + 80 + static struct net_device *rdma_get_xmit_slave_udp(struct ib_device *device, 81 + struct net_device *master, 82 + struct rdma_ah_attr *ah_attr, 83 + gfp_t flags) 84 + { 85 + struct net_device *slave; 86 + struct sk_buff *skb; 87 + 88 + skb = rdma_build_skb(device, master, ah_attr, flags); 89 + if (!skb) 90 + return ERR_PTR(-ENOMEM); 91 + 92 + rcu_read_lock(); 93 + slave = netdev_get_xmit_slave(master, skb, 94 + !!(device->lag_flags & 95 + RDMA_LAG_FLAGS_HASH_ALL_SLAVES)); 96 + if (slave) 97 + dev_hold(slave); 98 + rcu_read_unlock(); 99 + kfree_skb(skb); 100 + return slave; 101 + } 102 + 103 + void rdma_lag_put_ah_roce_slave(struct net_device *xmit_slave) 104 + { 105 + if (xmit_slave) 106 + dev_put(xmit_slave); 107 + } 108 + 109 + struct net_device *rdma_lag_get_ah_roce_slave(struct ib_device *device, 110 + struct rdma_ah_attr *ah_attr, 111 + gfp_t flags) 112 + { 113 + struct net_device *slave = NULL; 114 + struct net_device *master; 115 + 116 + if (!(ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE && 117 + ah_attr->grh.sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)) 118 + return NULL; 119 + 120 + rcu_read_lock(); 121 + master = rdma_read_gid_attr_ndev_rcu(ah_attr->grh.sgid_attr); 122 + if (IS_ERR(master)) { 123 + rcu_read_unlock(); 124 + return master; 125 + } 126 + dev_hold(master); 127 + rcu_read_unlock(); 128 + 129 + if (!netif_is_bond_master(master)) 130 + goto put; 131 + 132 + slave = rdma_get_xmit_slave_udp(device, master, ah_attr, flags); 133 + put: 134 + dev_put(master); 135 + return slave; 136 + }
+1
include/rdma/ib_verbs.h
··· 2714 2714 /* Used by iWarp CM */ 2715 2715 char iw_ifname[IFNAMSIZ]; 2716 2716 u32 iw_driver_flags; 2717 + u32 lag_flags; 2717 2718 }; 2718 2719 2719 2720 struct ib_client_nl_info;
+23
include/rdma/lag.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ 2 + /* 3 + * Copyright (c) 2020 Mellanox Technologies. All rights reserved. 4 + */ 5 + 6 + #ifndef _RDMA_LAG_H_ 7 + #define _RDMA_LAG_H_ 8 + 9 + #include <net/lag.h> 10 + 11 + struct ib_device; 12 + struct rdma_ah_attr; 13 + 14 + enum rdma_lag_flags { 15 + RDMA_LAG_FLAGS_HASH_ALL_SLAVES = 1 << 0 16 + }; 17 + 18 + void rdma_lag_put_ah_roce_slave(struct net_device *xmit_slave); 19 + struct net_device *rdma_lag_get_ah_roce_slave(struct ib_device *device, 20 + struct rdma_ah_attr *ah_attr, 21 + gfp_t flags); 22 + 23 + #endif /* _RDMA_LAG_H_ */