Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bpf: lpm_trie: check left child of last leftmost node for NULL

If the leftmost parent node of the tree has does not have a child
on the left side, then trie_get_next_key (and bpftool map dump) will
not look at the child on the right. This leads to the traversal
missing elements.

Lookup is not affected.

Update selftest to handle this case.

Reproducer:

bpftool map create /sys/fs/bpf/lpm type lpm_trie key 6 \
value 1 entries 256 name test_lpm flags 1
bpftool map update pinned /sys/fs/bpf/lpm key 8 0 0 0 0 0 value 1
bpftool map update pinned /sys/fs/bpf/lpm key 16 0 0 0 0 128 value 2
bpftool map dump pinned /sys/fs/bpf/lpm

Returns only 1 element. (2 expected)

Fixes: b471f2f1de8b ("bpf: implement MAP_GET_NEXT_KEY command for LPM_TRIE")
Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>

authored by

Jonathan Lemon and committed by
Daniel Borkmann
da2577fd dce5cccc

+45 -5
+7 -2
kernel/bpf/lpm_trie.c
··· 716 716 * have exact two children, so this function will never return NULL. 717 717 */ 718 718 for (node = search_root; node;) { 719 - if (!(node->flags & LPM_TREE_NODE_FLAG_IM)) 719 + if (node->flags & LPM_TREE_NODE_FLAG_IM) { 720 + node = rcu_dereference(node->child[0]); 721 + } else { 720 722 next_node = node; 721 - node = rcu_dereference(node->child[0]); 723 + node = rcu_dereference(node->child[0]); 724 + if (!node) 725 + node = rcu_dereference(next_node->child[1]); 726 + } 722 727 } 723 728 do_copy: 724 729 next_key->prefixlen = next_node->prefixlen;
+38 -3
tools/testing/selftests/bpf/test_lpm_map.c
··· 573 573 574 574 /* add one more element (total two) */ 575 575 key_p->prefixlen = 24; 576 - inet_pton(AF_INET, "192.168.0.0", key_p->data); 576 + inet_pton(AF_INET, "192.168.128.0", key_p->data); 577 577 assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); 578 578 579 579 memset(key_p, 0, key_size); 580 580 assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); 581 581 assert(key_p->prefixlen == 24 && key_p->data[0] == 192 && 582 - key_p->data[1] == 168 && key_p->data[2] == 0); 582 + key_p->data[1] == 168 && key_p->data[2] == 128); 583 583 584 584 memset(next_key_p, 0, key_size); 585 585 assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); ··· 592 592 593 593 /* Add one more element (total three) */ 594 594 key_p->prefixlen = 24; 595 - inet_pton(AF_INET, "192.168.128.0", key_p->data); 595 + inet_pton(AF_INET, "192.168.0.0", key_p->data); 596 596 assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); 597 597 598 598 memset(key_p, 0, key_size); ··· 625 625 key_p->data[1] == 168 && key_p->data[2] == 0); 626 626 627 627 memset(next_key_p, 0, key_size); 628 + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); 629 + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && 630 + next_key_p->data[1] == 168 && next_key_p->data[2] == 1); 631 + 632 + memcpy(key_p, next_key_p, key_size); 633 + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); 634 + assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && 635 + next_key_p->data[1] == 168 && next_key_p->data[2] == 128); 636 + 637 + memcpy(key_p, next_key_p, key_size); 638 + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); 639 + assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 && 640 + next_key_p->data[1] == 168); 641 + 642 + memcpy(key_p, next_key_p, key_size); 643 + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 && 644 + errno == ENOENT); 645 + 646 + /* Add one more element (total five) */ 647 + key_p->prefixlen = 28; 648 + inet_pton(AF_INET, "192.168.1.128", key_p->data); 649 + assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0); 650 + 651 + memset(key_p, 0, key_size); 652 + assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0); 653 + assert(key_p->prefixlen == 24 && key_p->data[0] == 192 && 654 + key_p->data[1] == 168 && key_p->data[2] == 0); 655 + 656 + memset(next_key_p, 0, key_size); 657 + assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); 658 + assert(next_key_p->prefixlen == 28 && next_key_p->data[0] == 192 && 659 + next_key_p->data[1] == 168 && next_key_p->data[2] == 1 && 660 + next_key_p->data[3] == 128); 661 + 662 + memcpy(key_p, next_key_p, key_size); 628 663 assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0); 629 664 assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 && 630 665 next_key_p->data[1] == 168 && next_key_p->data[2] == 1);