Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

RDMA/core: Add an rb_tree that stores cm_ids sorted by ifindex and remote IP

Add to the cma, a tree that keeps track of all rdma_id_private channels
that were created while in RoCE mode.

The IDs are sorted first according to their netdevice ifindex then their
destination IP. And for IDs with matching IP they would be at the same node
in the tree, since the tree data is a list of all ids with matching destination IP.

The tree allows fast and efficient lookup of ids using an ifindex and
IP address which is useful for identifying relevant net_events promptly.

Link: https://lore.kernel.org/r/2fac52c86cc918c634ab24b3867d4aed992f54ec.1654601342.git.leonro@nvidia.com
Signed-off-by: Patrisious Haddad <phaddad@nvidia.com>
Reviewed-by: Mark Zhang <markzhang@nvidia.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>

authored by

Patrisious Haddad and committed by
Leon Romanovsky
fc008bdb 158e71bb

+138 -12
+137 -12
drivers/infiniband/core/cma.c
··· 11 11 #include <linux/in6.h> 12 12 #include <linux/mutex.h> 13 13 #include <linux/random.h> 14 + #include <linux/rbtree.h> 14 15 #include <linux/igmp.h> 15 16 #include <linux/xarray.h> 16 17 #include <linux/inetdevice.h> ··· 169 168 static LIST_HEAD(dev_list); 170 169 static LIST_HEAD(listen_any_list); 171 170 static DEFINE_MUTEX(lock); 171 + static struct rb_root id_table = RB_ROOT; 172 + /* Serialize operations of id_table tree */ 173 + static DEFINE_SPINLOCK(id_table_lock); 172 174 static struct workqueue_struct *cma_wq; 173 175 static unsigned int cma_pernet_id; 174 176 ··· 205 201 return NULL; 206 202 } 207 203 } 204 + 205 + struct id_table_entry { 206 + struct list_head id_list; 207 + struct rb_node rb_node; 208 + }; 208 209 209 210 struct cma_device { 210 211 struct list_head list; ··· 429 420 return hdr->ip_version >> 4; 430 421 } 431 422 432 - static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver) 423 + static void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver) 433 424 { 434 425 hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF); 426 + } 427 + 428 + static struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv) 429 + { 430 + return (struct sockaddr *)&id_priv->id.route.addr.src_addr; 431 + } 432 + 433 + static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv) 434 + { 435 + return (struct sockaddr *)&id_priv->id.route.addr.dst_addr; 435 436 } 436 437 437 438 static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join) ··· 462 443 rtnl_unlock(); 463 444 } 464 445 return (in_dev) ? 0 : -ENODEV; 446 + } 447 + 448 + static int compare_netdev_and_ip(int ifindex_a, struct sockaddr *sa, 449 + struct id_table_entry *entry_b) 450 + { 451 + struct rdma_id_private *id_priv = list_first_entry( 452 + &entry_b->id_list, struct rdma_id_private, id_list_entry); 453 + int ifindex_b = id_priv->id.route.addr.dev_addr.bound_dev_if; 454 + struct sockaddr *sb = cma_dst_addr(id_priv); 455 + 456 + if (ifindex_a != ifindex_b) 457 + return (ifindex_a > ifindex_b) ? 1 : -1; 458 + 459 + if (sa->sa_family != sb->sa_family) 460 + return sa->sa_family - sb->sa_family; 461 + 462 + if (sa->sa_family == AF_INET) 463 + return memcmp((char *)&((struct sockaddr_in *)sa)->sin_addr, 464 + (char *)&((struct sockaddr_in *)sb)->sin_addr, 465 + sizeof(((struct sockaddr_in *)sa)->sin_addr)); 466 + 467 + return ipv6_addr_cmp(&((struct sockaddr_in6 *)sa)->sin6_addr, 468 + &((struct sockaddr_in6 *)sb)->sin6_addr); 469 + } 470 + 471 + static int cma_add_id_to_tree(struct rdma_id_private *node_id_priv) 472 + { 473 + struct rb_node **new, *parent = NULL; 474 + struct id_table_entry *this, *node; 475 + unsigned long flags; 476 + int result; 477 + 478 + node = kzalloc(sizeof(*node), GFP_KERNEL); 479 + if (!node) 480 + return -ENOMEM; 481 + 482 + spin_lock_irqsave(&id_table_lock, flags); 483 + new = &id_table.rb_node; 484 + while (*new) { 485 + this = container_of(*new, struct id_table_entry, rb_node); 486 + result = compare_netdev_and_ip( 487 + node_id_priv->id.route.addr.dev_addr.bound_dev_if, 488 + cma_dst_addr(node_id_priv), this); 489 + 490 + parent = *new; 491 + if (result < 0) 492 + new = &((*new)->rb_left); 493 + else if (result > 0) 494 + new = &((*new)->rb_right); 495 + else { 496 + list_add_tail(&node_id_priv->id_list_entry, 497 + &this->id_list); 498 + kfree(node); 499 + goto unlock; 500 + } 501 + } 502 + 503 + INIT_LIST_HEAD(&node->id_list); 504 + list_add_tail(&node_id_priv->id_list_entry, &node->id_list); 505 + 506 + rb_link_node(&node->rb_node, parent, new); 507 + rb_insert_color(&node->rb_node, &id_table); 508 + 509 + unlock: 510 + spin_unlock_irqrestore(&id_table_lock, flags); 511 + return 0; 512 + } 513 + 514 + static struct id_table_entry * 515 + node_from_ndev_ip(struct rb_root *root, int ifindex, struct sockaddr *sa) 516 + { 517 + struct rb_node *node = root->rb_node; 518 + struct id_table_entry *data; 519 + int result; 520 + 521 + while (node) { 522 + data = container_of(node, struct id_table_entry, rb_node); 523 + result = compare_netdev_and_ip(ifindex, sa, data); 524 + if (result < 0) 525 + node = node->rb_left; 526 + else if (result > 0) 527 + node = node->rb_right; 528 + else 529 + return data; 530 + } 531 + 532 + return NULL; 533 + } 534 + 535 + static void cma_remove_id_from_tree(struct rdma_id_private *id_priv) 536 + { 537 + struct id_table_entry *data; 538 + unsigned long flags; 539 + 540 + spin_lock_irqsave(&id_table_lock, flags); 541 + if (list_empty(&id_priv->id_list_entry)) 542 + goto out; 543 + 544 + data = node_from_ndev_ip(&id_table, 545 + id_priv->id.route.addr.dev_addr.bound_dev_if, 546 + cma_dst_addr(id_priv)); 547 + if (!data) 548 + goto out; 549 + 550 + list_del_init(&id_priv->id_list_entry); 551 + if (list_empty(&data->id_list)) { 552 + rb_erase(&data->rb_node, &id_table); 553 + kfree(data); 554 + } 555 + out: 556 + spin_unlock_irqrestore(&id_table_lock, flags); 465 557 } 466 558 467 559 static void _cma_attach_to_dev(struct rdma_id_private *id_priv, ··· 609 479 id_priv->id.route.addr.dev_addr.sgid_attr = NULL; 610 480 } 611 481 mutex_unlock(&lock); 612 - } 613 - 614 - static inline struct sockaddr *cma_src_addr(struct rdma_id_private *id_priv) 615 - { 616 - return (struct sockaddr *) &id_priv->id.route.addr.src_addr; 617 - } 618 - 619 - static inline struct sockaddr *cma_dst_addr(struct rdma_id_private *id_priv) 620 - { 621 - return (struct sockaddr *) &id_priv->id.route.addr.dst_addr; 622 482 } 623 483 624 484 static inline unsigned short cma_family(struct rdma_id_private *id_priv) ··· 981 861 refcount_set(&id_priv->refcount, 1); 982 862 mutex_init(&id_priv->handler_mutex); 983 863 INIT_LIST_HEAD(&id_priv->device_item); 864 + INIT_LIST_HEAD(&id_priv->id_list_entry); 984 865 INIT_LIST_HEAD(&id_priv->listen_list); 985 866 INIT_LIST_HEAD(&id_priv->mc_list); 986 867 get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); ··· 2004 1883 cma_cancel_operation(id_priv, state); 2005 1884 2006 1885 rdma_restrack_del(&id_priv->res); 1886 + cma_remove_id_from_tree(id_priv); 2007 1887 if (id_priv->cma_dev) { 2008 1888 if (rdma_cap_ib_cm(id_priv->id.device, 1)) { 2009 1889 if (id_priv->cm_id.ib) ··· 3294 3172 cma_id_get(id_priv); 3295 3173 if (rdma_cap_ib_sa(id->device, id->port_num)) 3296 3174 ret = cma_resolve_ib_route(id_priv, timeout_ms); 3297 - else if (rdma_protocol_roce(id->device, id->port_num)) 3175 + else if (rdma_protocol_roce(id->device, id->port_num)) { 3298 3176 ret = cma_resolve_iboe_route(id_priv); 3177 + if (!ret) 3178 + cma_add_id_to_tree(id_priv); 3179 + } 3299 3180 else if (rdma_protocol_iwarp(id->device, id->port_num)) 3300 3181 ret = cma_resolve_iw_route(id_priv); 3301 3182 else
+1
drivers/infiniband/core/cma_priv.h
··· 64 64 struct list_head listen_item; 65 65 struct list_head listen_list; 66 66 }; 67 + struct list_head id_list_entry; 67 68 struct cma_device *cma_dev; 68 69 struct list_head mc_list; 69 70