Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

IB/core: Add gid_type to gid attribute

In order to support multiple GID types, we need to store the gid_type
with each GID. This is also aligned with the RoCE v2 annex "RoCEv2 PORT
GID table entries shall have a "GID type" attribute that denotes the L3
Address type". The currently supported GID is IB_GID_TYPE_IB which is
also RoCE v1 GID type.

This implies that gid_type should be added to roce_gid_table meta-data.

Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>

authored by

Matan Barak and committed by
Doug Ledford
b39ffa1d cee3c4d0

+182 -59
+95 -43
drivers/infiniband/core/cache.c
··· 64 64 GID_ATTR_FIND_MASK_GID = 1UL << 0, 65 65 GID_ATTR_FIND_MASK_NETDEV = 1UL << 1, 66 66 GID_ATTR_FIND_MASK_DEFAULT = 1UL << 2, 67 + GID_ATTR_FIND_MASK_GID_TYPE = 1UL << 3, 67 68 }; 68 69 69 70 enum gid_table_entry_props { ··· 125 124 ib_dispatch_event(&event); 126 125 } 127 126 } 127 + 128 + static const char * const gid_type_str[] = { 129 + [IB_GID_TYPE_IB] = "IB/RoCE v1", 130 + }; 131 + 132 + const char *ib_cache_gid_type_str(enum ib_gid_type gid_type) 133 + { 134 + if (gid_type < ARRAY_SIZE(gid_type_str) && gid_type_str[gid_type]) 135 + return gid_type_str[gid_type]; 136 + 137 + return "Invalid GID type"; 138 + } 139 + EXPORT_SYMBOL(ib_cache_gid_type_str); 128 140 129 141 /* This function expects that rwlock will be write locked in all 130 142 * scenarios and that lock will be locked in sleep-able (RoCE) ··· 247 233 if (found >= 0) 248 234 continue; 249 235 236 + if (mask & GID_ATTR_FIND_MASK_GID_TYPE && 237 + attr->gid_type != val->gid_type) 238 + continue; 239 + 250 240 if (mask & GID_ATTR_FIND_MASK_GID && 251 241 memcmp(gid, &data->gid, sizeof(*gid))) 252 242 continue; ··· 314 296 write_lock_irq(&table->rwlock); 315 297 316 298 ix = find_gid(table, gid, attr, false, GID_ATTR_FIND_MASK_GID | 299 + GID_ATTR_FIND_MASK_GID_TYPE | 317 300 GID_ATTR_FIND_MASK_NETDEV, &empty); 318 301 if (ix >= 0) 319 302 goto out_unlock; ··· 348 329 349 330 ix = find_gid(table, gid, attr, false, 350 331 GID_ATTR_FIND_MASK_GID | 332 + GID_ATTR_FIND_MASK_GID_TYPE | 351 333 GID_ATTR_FIND_MASK_NETDEV | 352 334 GID_ATTR_FIND_MASK_DEFAULT, 353 335 NULL); ··· 447 427 448 428 static int ib_cache_gid_find(struct ib_device *ib_dev, 449 429 const union ib_gid *gid, 430 + enum ib_gid_type gid_type, 450 431 struct net_device *ndev, u8 *port, 451 432 u16 *index) 452 433 { 453 - unsigned long mask = GID_ATTR_FIND_MASK_GID; 454 - struct ib_gid_attr gid_attr_val = {.ndev = ndev}; 434 + unsigned long mask = GID_ATTR_FIND_MASK_GID | 435 + GID_ATTR_FIND_MASK_GID_TYPE; 436 + struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type}; 455 437 456 438 if (ndev) 457 439 mask |= GID_ATTR_FIND_MASK_NETDEV; ··· 464 442 465 443 int ib_find_cached_gid_by_port(struct ib_device *ib_dev, 466 444 const union ib_gid *gid, 445 + enum ib_gid_type gid_type, 467 446 u8 port, struct net_device *ndev, 468 447 u16 *index) 469 448 { 470 449 int local_index; 471 450 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; 472 451 struct ib_gid_table *table; 473 - unsigned long mask = GID_ATTR_FIND_MASK_GID; 474 - struct ib_gid_attr val = {.ndev = ndev}; 452 + unsigned long mask = GID_ATTR_FIND_MASK_GID | 453 + GID_ATTR_FIND_MASK_GID_TYPE; 454 + struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type}; 475 455 unsigned long flags; 476 456 477 457 if (port < rdma_start_port(ib_dev) || ··· 631 607 632 608 void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port, 633 609 struct net_device *ndev, 610 + unsigned long gid_type_mask, 634 611 enum ib_cache_gid_default_mode mode) 635 612 { 636 613 struct ib_gid_table **ports_table = ib_dev->cache.gid_cache; 637 614 union ib_gid gid; 638 615 struct ib_gid_attr gid_attr; 616 + struct ib_gid_attr zattr_type = zattr; 639 617 struct ib_gid_table *table; 640 - int ix; 641 - union ib_gid current_gid; 642 - struct ib_gid_attr current_gid_attr = {}; 618 + unsigned int gid_type; 643 619 644 620 table = ports_table[port - rdma_start_port(ib_dev)]; 645 621 ··· 647 623 memset(&gid_attr, 0, sizeof(gid_attr)); 648 624 gid_attr.ndev = ndev; 649 625 650 - mutex_lock(&table->lock); 651 - write_lock_irq(&table->rwlock); 652 - ix = find_gid(table, NULL, NULL, true, GID_ATTR_FIND_MASK_DEFAULT, NULL); 626 + for (gid_type = 0; gid_type < IB_GID_TYPE_SIZE; ++gid_type) { 627 + int ix; 628 + union ib_gid current_gid; 629 + struct ib_gid_attr current_gid_attr = {}; 653 630 654 - /* Coudn't find default GID location */ 655 - WARN_ON(ix < 0); 631 + if (1UL << gid_type & ~gid_type_mask) 632 + continue; 656 633 657 - if (!__ib_cache_gid_get(ib_dev, port, ix, 658 - &current_gid, &current_gid_attr) && 659 - mode == IB_CACHE_GID_DEFAULT_MODE_SET && 660 - !memcmp(&gid, &current_gid, sizeof(gid)) && 661 - !memcmp(&gid_attr, &current_gid_attr, sizeof(gid_attr))) 662 - goto unlock; 634 + gid_attr.gid_type = gid_type; 663 635 664 - if (memcmp(&current_gid, &zgid, sizeof(current_gid)) || 665 - memcmp(&current_gid_attr, &zattr, 666 - sizeof(current_gid_attr))) { 667 - if (del_gid(ib_dev, port, table, ix, true)) { 668 - pr_warn("ib_cache_gid: can't delete index %d for default gid %pI6\n", 669 - ix, gid.raw); 670 - goto unlock; 671 - } else { 672 - dispatch_gid_change_event(ib_dev, port); 636 + mutex_lock(&table->lock); 637 + write_lock_irq(&table->rwlock); 638 + ix = find_gid(table, NULL, &gid_attr, true, 639 + GID_ATTR_FIND_MASK_GID_TYPE | 640 + GID_ATTR_FIND_MASK_DEFAULT, 641 + NULL); 642 + 643 + /* Coudn't find default GID location */ 644 + WARN_ON(ix < 0); 645 + 646 + zattr_type.gid_type = gid_type; 647 + 648 + if (!__ib_cache_gid_get(ib_dev, port, ix, 649 + &current_gid, &current_gid_attr) && 650 + mode == IB_CACHE_GID_DEFAULT_MODE_SET && 651 + !memcmp(&gid, &current_gid, sizeof(gid)) && 652 + !memcmp(&gid_attr, &current_gid_attr, sizeof(gid_attr))) 653 + goto release; 654 + 655 + if (memcmp(&current_gid, &zgid, sizeof(current_gid)) || 656 + memcmp(&current_gid_attr, &zattr_type, 657 + sizeof(current_gid_attr))) { 658 + if (del_gid(ib_dev, port, table, ix, true)) { 659 + pr_warn("ib_cache_gid: can't delete index %d for default gid %pI6\n", 660 + ix, gid.raw); 661 + goto release; 662 + } else { 663 + dispatch_gid_change_event(ib_dev, port); 664 + } 673 665 } 674 - } 675 666 676 - if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) { 677 - if (add_gid(ib_dev, port, table, ix, &gid, &gid_attr, true)) { 678 - pr_warn("ib_cache_gid: unable to add default gid %pI6\n", 679 - gid.raw); 680 - } else { 681 - dispatch_gid_change_event(ib_dev, port); 667 + if (mode == IB_CACHE_GID_DEFAULT_MODE_SET) { 668 + if (add_gid(ib_dev, port, table, ix, &gid, &gid_attr, true)) 669 + pr_warn("ib_cache_gid: unable to add default gid %pI6\n", 670 + gid.raw); 671 + else 672 + dispatch_gid_change_event(ib_dev, port); 682 673 } 683 - } 684 674 685 - unlock: 686 - if (current_gid_attr.ndev) 687 - dev_put(current_gid_attr.ndev); 688 - write_unlock_irq(&table->rwlock); 689 - mutex_unlock(&table->lock); 675 + release: 676 + if (current_gid_attr.ndev) 677 + dev_put(current_gid_attr.ndev); 678 + write_unlock_irq(&table->rwlock); 679 + mutex_unlock(&table->lock); 680 + } 690 681 } 691 682 692 683 static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port, 693 684 struct ib_gid_table *table) 694 685 { 695 - if (rdma_protocol_roce(ib_dev, port)) { 696 - struct ib_gid_table_entry *entry = &table->data_vec[0]; 686 + unsigned int i; 687 + unsigned long roce_gid_type_mask; 688 + unsigned int num_default_gids; 689 + unsigned int current_gid = 0; 690 + 691 + roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port); 692 + num_default_gids = hweight_long(roce_gid_type_mask); 693 + for (i = 0; i < num_default_gids && i < table->sz; i++) { 694 + struct ib_gid_table_entry *entry = 695 + &table->data_vec[i]; 697 696 698 697 entry->props |= GID_TABLE_ENTRY_DEFAULT; 698 + current_gid = find_next_bit(&roce_gid_type_mask, 699 + BITS_PER_LONG, 700 + current_gid); 701 + entry->attr.gid_type = current_gid++; 699 702 } 700 703 701 704 return 0; ··· 845 794 846 795 int ib_find_cached_gid(struct ib_device *device, 847 796 const union ib_gid *gid, 797 + enum ib_gid_type gid_type, 848 798 struct net_device *ndev, 849 799 u8 *port_num, 850 800 u16 *index) 851 801 { 852 - return ib_cache_gid_find(device, gid, ndev, port_num, index); 802 + return ib_cache_gid_find(device, gid, gid_type, ndev, port_num, index); 853 803 } 854 804 EXPORT_SYMBOL(ib_find_cached_gid); 855 805
+1 -1
drivers/infiniband/core/cm.c
··· 364 364 read_lock_irqsave(&cm.device_lock, flags); 365 365 list_for_each_entry(cm_dev, &cm.device_list, list) { 366 366 if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid, 367 - ndev, &p, NULL)) { 367 + IB_GID_TYPE_IB, ndev, &p, NULL)) { 368 368 port = cm_dev->port[p-1]; 369 369 break; 370 370 }
+2 -1
drivers/infiniband/core/cma.c
··· 456 456 if (dev_type == ARPHRD_ETHER) 457 457 ndev = dev_get_by_index(&init_net, bound_if_index); 458 458 459 - ret = ib_find_cached_gid_by_port(device, gid, port, ndev, NULL); 459 + ret = ib_find_cached_gid_by_port(device, gid, IB_GID_TYPE_IB, port, 460 + ndev, NULL); 460 461 461 462 if (ndev) 462 463 dev_put(ndev);
+4
drivers/infiniband/core/core_priv.h
··· 70 70 IB_CACHE_GID_DEFAULT_MODE_DELETE 71 71 }; 72 72 73 + const char *ib_cache_gid_type_str(enum ib_gid_type gid_type); 74 + 73 75 void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port, 74 76 struct net_device *ndev, 77 + unsigned long gid_type_mask, 75 78 enum ib_cache_gid_default_mode mode); 76 79 77 80 int ib_cache_gid_add(struct ib_device *ib_dev, u8 port, ··· 90 87 void roce_gid_mgmt_cleanup(void); 91 88 92 89 int roce_rescan_device(struct ib_device *ib_dev); 90 + unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port); 93 91 94 92 int ib_cache_setup_one(struct ib_device *device); 95 93 void ib_cache_cleanup_one(struct ib_device *device);
+7 -2
drivers/infiniband/core/device.c
··· 815 815 * a specified GID value occurs. 816 816 * @device: The device to query. 817 817 * @gid: The GID value to search for. 818 + * @gid_type: Type of GID. 818 819 * @ndev: The ndev related to the GID to search for. 819 820 * @port_num: The port number of the device where the GID value was found. 820 821 * @index: The index into the GID table where the GID was found. This 821 822 * parameter may be NULL. 822 823 */ 823 824 int ib_find_gid(struct ib_device *device, union ib_gid *gid, 824 - struct net_device *ndev, u8 *port_num, u16 *index) 825 + enum ib_gid_type gid_type, struct net_device *ndev, 826 + u8 *port_num, u16 *index) 825 827 { 826 828 union ib_gid tmp_gid; 827 829 int ret, port, i; 828 830 829 831 for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) { 830 832 if (rdma_cap_roce_gid_table(device, port)) { 831 - if (!ib_find_cached_gid_by_port(device, gid, port, 833 + if (!ib_find_cached_gid_by_port(device, gid, gid_type, port, 832 834 ndev, index)) { 833 835 *port_num = port; 834 836 return 0; 835 837 } 836 838 } 839 + 840 + if (gid_type != IB_GID_TYPE_IB) 841 + continue; 837 842 838 843 for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) { 839 844 ret = ib_query_gid(device, port, i, &tmp_gid, NULL);
+1 -1
drivers/infiniband/core/multicast.c
··· 729 729 u16 gid_index; 730 730 u8 p; 731 731 732 - ret = ib_find_cached_gid(device, &rec->port_gid, 732 + ret = ib_find_cached_gid(device, &rec->port_gid, IB_GID_TYPE_IB, 733 733 NULL, &p, &gid_index); 734 734 if (ret) 735 735 return ret;
+52 -8
drivers/infiniband/core/roce_gid_mgmt.c
··· 67 67 struct netdev_event_work_cmd cmds[ROCE_NETDEV_CALLBACK_SZ]; 68 68 }; 69 69 70 + static const struct { 71 + bool (*is_supported)(const struct ib_device *device, u8 port_num); 72 + enum ib_gid_type gid_type; 73 + } PORT_CAP_TO_GID_TYPE[] = { 74 + {rdma_protocol_roce, IB_GID_TYPE_ROCE}, 75 + }; 76 + 77 + #define CAP_TO_GID_TABLE_SIZE ARRAY_SIZE(PORT_CAP_TO_GID_TYPE) 78 + 79 + unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port) 80 + { 81 + int i; 82 + unsigned int ret_flags = 0; 83 + 84 + if (!rdma_protocol_roce(ib_dev, port)) 85 + return 1UL << IB_GID_TYPE_IB; 86 + 87 + for (i = 0; i < CAP_TO_GID_TABLE_SIZE; i++) 88 + if (PORT_CAP_TO_GID_TYPE[i].is_supported(ib_dev, port)) 89 + ret_flags |= 1UL << PORT_CAP_TO_GID_TYPE[i].gid_type; 90 + 91 + return ret_flags; 92 + } 93 + EXPORT_SYMBOL(roce_gid_type_mask_support); 94 + 70 95 static void update_gid(enum gid_op_type gid_op, struct ib_device *ib_dev, 71 96 u8 port, union ib_gid *gid, 72 97 struct ib_gid_attr *gid_attr) 73 98 { 74 - switch (gid_op) { 75 - case GID_ADD: 76 - ib_cache_gid_add(ib_dev, port, gid, gid_attr); 77 - break; 78 - case GID_DEL: 79 - ib_cache_gid_del(ib_dev, port, gid, gid_attr); 80 - break; 99 + int i; 100 + unsigned long gid_type_mask = roce_gid_type_mask_support(ib_dev, port); 101 + 102 + for (i = 0; i < IB_GID_TYPE_SIZE; i++) { 103 + if ((1UL << i) & gid_type_mask) { 104 + gid_attr->gid_type = i; 105 + switch (gid_op) { 106 + case GID_ADD: 107 + ib_cache_gid_add(ib_dev, port, 108 + gid, gid_attr); 109 + break; 110 + case GID_DEL: 111 + ib_cache_gid_del(ib_dev, port, 112 + gid, gid_attr); 113 + break; 114 + } 115 + } 81 116 } 82 117 } 83 118 ··· 238 203 u8 port, struct net_device *event_ndev, 239 204 struct net_device *rdma_ndev) 240 205 { 206 + unsigned long gid_type_mask; 207 + 241 208 rcu_read_lock(); 242 209 if (!rdma_ndev || 243 210 ((rdma_ndev != event_ndev && ··· 252 215 } 253 216 rcu_read_unlock(); 254 217 255 - ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev, 218 + gid_type_mask = roce_gid_type_mask_support(ib_dev, port); 219 + 220 + ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev, gid_type_mask, 256 221 IB_CACHE_GID_DEFAULT_MODE_SET); 257 222 } 258 223 ··· 276 237 if (is_upper_dev_rcu(rdma_ndev, event_ndev) && 277 238 is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) == 278 239 BONDING_SLAVE_STATE_INACTIVE) { 240 + unsigned long gid_type_mask; 241 + 279 242 rcu_read_unlock(); 280 243 244 + gid_type_mask = roce_gid_type_mask_support(ib_dev, port); 245 + 281 246 ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev, 247 + gid_type_mask, 282 248 IB_CACHE_GID_DEFAULT_MODE_DELETE); 283 249 } else { 284 250 rcu_read_unlock();
+3 -2
drivers/infiniband/core/sa_query.c
··· 1014 1014 ah_attr->ah_flags = IB_AH_GRH; 1015 1015 ah_attr->grh.dgid = rec->dgid; 1016 1016 1017 - ret = ib_find_cached_gid(device, &rec->sgid, ndev, &port_num, 1018 - &gid_index); 1017 + ret = ib_find_cached_gid(device, &rec->sgid, rec->gid_type, ndev, 1018 + &port_num, &gid_index); 1019 1019 if (ret) { 1020 1020 if (ndev) 1021 1021 dev_put(ndev); ··· 1157 1157 mad->data, &rec); 1158 1158 rec.net = NULL; 1159 1159 rec.ifindex = 0; 1160 + rec.gid_type = IB_GID_TYPE_IB; 1160 1161 memset(rec.dmac, 0, ETH_ALEN); 1161 1162 query->callback(status, &rec, query->context); 1162 1163 } else
+1
drivers/infiniband/core/uverbs_marshall.c
··· 144 144 memset(dst->dmac, 0, sizeof(dst->dmac)); 145 145 dst->net = NULL; 146 146 dst->ifindex = 0; 147 + dst->gid_type = IB_GID_TYPE_IB; 147 148 } 148 149 EXPORT_SYMBOL(ib_copy_path_rec_from_user);
+1
drivers/infiniband/core/verbs.c
··· 381 381 382 382 if (!rdma_cap_eth_ah(device, port_num)) { 383 383 ret = ib_find_cached_gid_by_port(device, &grh->dgid, 384 + IB_GID_TYPE_IB, 384 385 port_num, NULL, 385 386 &gid_index); 386 387 if (ret)
+4
include/rdma/ib_cache.h
··· 60 60 * a specified GID value occurs. 61 61 * @device: The device to query. 62 62 * @gid: The GID value to search for. 63 + * @gid_type: The GID type to search for. 63 64 * @ndev: In RoCE, the net device of the device. NULL means ignore. 64 65 * @port_num: The port number of the device where the GID value was found. 65 66 * @index: The index into the cached GID table where the GID was found. This ··· 71 70 */ 72 71 int ib_find_cached_gid(struct ib_device *device, 73 72 const union ib_gid *gid, 73 + enum ib_gid_type gid_type, 74 74 struct net_device *ndev, 75 75 u8 *port_num, 76 76 u16 *index); ··· 81 79 * GID value occurs 82 80 * @device: The device to query. 83 81 * @gid: The GID value to search for. 82 + * @gid_type: The GID type to search for. 84 83 * @port_num: The port number of the device where the GID value sould be 85 84 * searched. 86 85 * @ndev: In RoCE, the net device of the device. Null means ignore. ··· 93 90 */ 94 91 int ib_find_cached_gid_by_port(struct ib_device *device, 95 92 const union ib_gid *gid, 93 + enum ib_gid_type gid_type, 96 94 u8 port_num, 97 95 struct net_device *ndev, 98 96 u16 *index);
+1
include/rdma/ib_sa.h
··· 160 160 int ifindex; 161 161 /* ignored in IB */ 162 162 struct net *net; 163 + enum ib_gid_type gid_type; 163 164 }; 164 165 165 166 static inline struct net_device *ib_get_ndev_from_path(struct ib_sa_path_rec *rec)
+10 -1
include/rdma/ib_verbs.h
··· 69 69 70 70 extern union ib_gid zgid; 71 71 72 + enum ib_gid_type { 73 + /* If link layer is Ethernet, this is RoCE V1 */ 74 + IB_GID_TYPE_IB = 0, 75 + IB_GID_TYPE_ROCE = 0, 76 + IB_GID_TYPE_SIZE 77 + }; 78 + 72 79 struct ib_gid_attr { 80 + enum ib_gid_type gid_type; 73 81 struct net_device *ndev; 74 82 }; 75 83 ··· 2253 2245 struct ib_port_modify *port_modify); 2254 2246 2255 2247 int ib_find_gid(struct ib_device *device, union ib_gid *gid, 2256 - struct net_device *ndev, u8 *port_num, u16 *index); 2248 + enum ib_gid_type gid_type, struct net_device *ndev, 2249 + u8 *port_num, u16 *index); 2257 2250 2258 2251 int ib_find_pkey(struct ib_device *device, 2259 2252 u8 port_num, u16 pkey, u16 *index);