Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

xfrm: interface: support collect metadata mode

This commit adds support for 'collect_md' mode on xfrm interfaces.

Each net can have one collect_md device, created by providing the
IFLA_XFRM_COLLECT_METADATA flag at creation. This device cannot be
altered and has no if_id or link device attributes.

On transmit to this device, the if_id is fetched from the attached dst
metadata on the skb. If exists, the link property is also fetched from
the metadata. The dst metadata type used is METADATA_XFRM which holds
these properties.

On the receive side, xfrmi_rcv_cb() populates a dst metadata for each
packet received and attaches it to the skb. The if_id used in this case is
fetched from the xfrm state, and the link is fetched from the incoming
device. This information can later be used by upper layers such as tc,
ebpf, and ip rules.

Because the skb is scrubed in xfrmi_rcv_cb(), the attachment of the dst
metadata is postponed until after scrubing. Similarly, xfrm_input() is
adapted to avoid dropping metadata dsts by only dropping 'valid'
(skb_valid_dst(skb) == true) dsts.

Policy matching on packets arriving from collect_md xfrmi devices is
done by using the xfrm state existing in the skb's sec_path.
The xfrm_if_cb.decode_cb() interface implemented by xfrmi_decode_session()
is changed to keep the details of the if_id extraction tucked away
in xfrm_interface.c.

Reviewed-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>

authored by

Eyal Birger and committed by
Steffen Klassert
abc340b3 5182a5d4

+121 -29
+9 -2
include/net/xfrm.h
··· 312 312 struct net *net; 313 313 }; 314 314 315 + struct xfrm_if_decode_session_result { 316 + struct net *net; 317 + u32 if_id; 318 + }; 319 + 315 320 struct xfrm_if_cb { 316 - struct xfrm_if *(*decode_session)(struct sk_buff *skb, 317 - unsigned short family); 321 + bool (*decode_session)(struct sk_buff *skb, 322 + unsigned short family, 323 + struct xfrm_if_decode_session_result *res); 318 324 }; 319 325 320 326 void xfrm_if_register_cb(const struct xfrm_if_cb *ifcb); ··· 991 985 struct xfrm_if_parms { 992 986 int link; /* ifindex of underlying L2 interface */ 993 987 u32 if_id; /* interface identifyer */ 988 + bool collect_md; 994 989 }; 995 990 996 991 struct xfrm_if {
+1
include/uapi/linux/if_link.h
··· 694 694 IFLA_XFRM_UNSPEC, 695 695 IFLA_XFRM_LINK, 696 696 IFLA_XFRM_IF_ID, 697 + IFLA_XFRM_COLLECT_METADATA, 697 698 __IFLA_XFRM_MAX 698 699 }; 699 700
+5 -2
net/xfrm/xfrm_input.c
··· 20 20 #include <net/xfrm.h> 21 21 #include <net/ip_tunnels.h> 22 22 #include <net/ip6_tunnel.h> 23 + #include <net/dst_metadata.h> 23 24 24 25 #include "xfrm_inout.h" 25 26 ··· 721 720 sp = skb_sec_path(skb); 722 721 if (sp) 723 722 sp->olen = 0; 724 - skb_dst_drop(skb); 723 + if (skb_valid_dst(skb)) 724 + skb_dst_drop(skb); 725 725 gro_cells_receive(&gro_cells, skb); 726 726 return 0; 727 727 } else { ··· 740 738 sp = skb_sec_path(skb); 741 739 if (sp) 742 740 sp->olen = 0; 743 - skb_dst_drop(skb); 741 + if (skb_valid_dst(skb)) 742 + skb_dst_drop(skb); 744 743 gro_cells_receive(&gro_cells, skb); 745 744 return err; 746 745 }
+101 -20
net/xfrm/xfrm_interface.c
··· 41 41 #include <net/addrconf.h> 42 42 #include <net/xfrm.h> 43 43 #include <net/net_namespace.h> 44 + #include <net/dst_metadata.h> 44 45 #include <net/netns/generic.h> 45 46 #include <linux/etherdevice.h> 46 47 ··· 57 56 struct xfrmi_net { 58 57 /* lists for storing interfaces in use */ 59 58 struct xfrm_if __rcu *xfrmi[XFRMI_HASH_SIZE]; 59 + struct xfrm_if __rcu *collect_md_xfrmi; 60 60 }; 61 61 62 62 #define for_each_xfrmi_rcu(start, xi) \ ··· 79 77 return xi; 80 78 } 81 79 80 + xi = rcu_dereference(xfrmn->collect_md_xfrmi); 81 + if (xi && (xi->dev->flags & IFF_UP)) 82 + return xi; 83 + 82 84 return NULL; 83 85 } 84 86 85 - static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb, 86 - unsigned short family) 87 + static bool xfrmi_decode_session(struct sk_buff *skb, 88 + unsigned short family, 89 + struct xfrm_if_decode_session_result *res) 87 90 { 88 91 struct net_device *dev; 92 + struct xfrm_if *xi; 89 93 int ifindex = 0; 90 94 91 95 if (!secpath_exists(skb) || !skb->dev) 92 - return NULL; 96 + return false; 93 97 94 98 switch (family) { 95 99 case AF_INET6: ··· 115 107 } 116 108 117 109 if (!dev || !(dev->flags & IFF_UP)) 118 - return NULL; 110 + return false; 119 111 if (dev->netdev_ops != &xfrmi_netdev_ops) 120 - return NULL; 112 + return false; 121 113 122 - return netdev_priv(dev); 114 + xi = netdev_priv(dev); 115 + res->net = xi->net; 116 + 117 + if (xi->p.collect_md) 118 + res->if_id = xfrm_input_state(skb)->if_id; 119 + else 120 + res->if_id = xi->p.if_id; 121 + return true; 123 122 } 124 123 125 124 static void xfrmi_link(struct xfrmi_net *xfrmn, struct xfrm_if *xi) ··· 172 157 if (err < 0) 173 158 goto out; 174 159 175 - xfrmi_link(xfrmn, xi); 160 + if (xi->p.collect_md) 161 + rcu_assign_pointer(xfrmn->collect_md_xfrmi, xi); 162 + else 163 + xfrmi_link(xfrmn, xi); 176 164 177 165 return 0; 178 166 ··· 203 185 struct xfrm_if *xi = netdev_priv(dev); 204 186 struct xfrmi_net *xfrmn = net_generic(xi->net, xfrmi_net_id); 205 187 206 - xfrmi_unlink(xfrmn, xi); 188 + if (xi->p.collect_md) 189 + RCU_INIT_POINTER(xfrmn->collect_md_xfrmi, NULL); 190 + else 191 + xfrmi_unlink(xfrmn, xi); 207 192 } 208 193 209 194 static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet) ··· 235 214 struct xfrm_state *x; 236 215 struct xfrm_if *xi; 237 216 bool xnet; 217 + int link; 238 218 239 219 if (err && !secpath_exists(skb)) 240 220 return 0; ··· 246 224 if (!xi) 247 225 return 1; 248 226 227 + link = skb->dev->ifindex; 249 228 dev = xi->dev; 250 229 skb->dev = dev; 251 230 ··· 277 254 } 278 255 279 256 xfrmi_scrub_packet(skb, xnet); 257 + if (xi->p.collect_md) { 258 + struct metadata_dst *md_dst; 259 + 260 + md_dst = metadata_dst_alloc(0, METADATA_XFRM, GFP_ATOMIC); 261 + if (!md_dst) 262 + return -ENOMEM; 263 + 264 + md_dst->u.xfrm_info.if_id = x->if_id; 265 + md_dst->u.xfrm_info.link = link; 266 + skb_dst_set(skb, (struct dst_entry *)md_dst); 267 + } 280 268 dev_sw_netstats_rx_add(dev, skb->len); 281 269 282 270 return 0; ··· 303 269 struct net_device *tdev; 304 270 struct xfrm_state *x; 305 271 int err = -1; 272 + u32 if_id; 306 273 int mtu; 307 274 275 + if (xi->p.collect_md) { 276 + struct xfrm_md_info *md_info = skb_xfrm_md_info(skb); 277 + 278 + if (unlikely(!md_info)) 279 + return -EINVAL; 280 + 281 + if_id = md_info->if_id; 282 + fl->flowi_oif = md_info->link; 283 + } else { 284 + if_id = xi->p.if_id; 285 + } 286 + 308 287 dst_hold(dst); 309 - dst = xfrm_lookup_with_ifid(xi->net, dst, fl, NULL, 0, xi->p.if_id); 288 + dst = xfrm_lookup_with_ifid(xi->net, dst, fl, NULL, 0, if_id); 310 289 if (IS_ERR(dst)) { 311 290 err = PTR_ERR(dst); 312 291 dst = NULL; ··· 330 283 if (!x) 331 284 goto tx_err_link_failure; 332 285 333 - if (x->if_id != xi->p.if_id) 286 + if (x->if_id != if_id) 334 287 goto tx_err_link_failure; 335 288 336 289 tdev = dst->dev; ··· 680 633 681 634 if (data[IFLA_XFRM_IF_ID]) 682 635 parms->if_id = nla_get_u32(data[IFLA_XFRM_IF_ID]); 636 + 637 + if (data[IFLA_XFRM_COLLECT_METADATA]) 638 + parms->collect_md = true; 683 639 } 684 640 685 641 static int xfrmi_newlink(struct net *src_net, struct net_device *dev, ··· 695 645 int err; 696 646 697 647 xfrmi_netlink_parms(data, &p); 698 - if (!p.if_id) { 699 - NL_SET_ERR_MSG(extack, "if_id must be non zero"); 700 - return -EINVAL; 701 - } 648 + if (p.collect_md) { 649 + struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id); 702 650 703 - xi = xfrmi_locate(net, &p); 704 - if (xi) 705 - return -EEXIST; 651 + if (p.link || p.if_id) { 652 + NL_SET_ERR_MSG(extack, "link and if_id must be zero"); 653 + return -EINVAL; 654 + } 655 + 656 + if (rtnl_dereference(xfrmn->collect_md_xfrmi)) 657 + return -EEXIST; 658 + 659 + } else { 660 + if (!p.if_id) { 661 + NL_SET_ERR_MSG(extack, "if_id must be non zero"); 662 + return -EINVAL; 663 + } 664 + 665 + xi = xfrmi_locate(net, &p); 666 + if (xi) 667 + return -EEXIST; 668 + } 706 669 707 670 xi = netdev_priv(dev); 708 671 xi->p = p; ··· 745 682 return -EINVAL; 746 683 } 747 684 685 + if (p.collect_md) { 686 + NL_SET_ERR_MSG(extack, "collect_md can't be changed"); 687 + return -EINVAL; 688 + } 689 + 748 690 xi = xfrmi_locate(net, &p); 749 691 if (!xi) { 750 692 xi = netdev_priv(dev); 751 693 } else { 752 694 if (xi->dev != dev) 753 695 return -EEXIST; 696 + if (xi->p.collect_md) { 697 + NL_SET_ERR_MSG(extack, 698 + "device can't be changed to collect_md"); 699 + return -EINVAL; 700 + } 754 701 } 755 702 756 703 return xfrmi_update(xi, &p); ··· 773 700 nla_total_size(4) + 774 701 /* IFLA_XFRM_IF_ID */ 775 702 nla_total_size(4) + 703 + /* IFLA_XFRM_COLLECT_METADATA */ 704 + nla_total_size(0) + 776 705 0; 777 706 } 778 707 ··· 784 709 struct xfrm_if_parms *parm = &xi->p; 785 710 786 711 if (nla_put_u32(skb, IFLA_XFRM_LINK, parm->link) || 787 - nla_put_u32(skb, IFLA_XFRM_IF_ID, parm->if_id)) 712 + nla_put_u32(skb, IFLA_XFRM_IF_ID, parm->if_id) || 713 + (xi->p.collect_md && nla_put_flag(skb, IFLA_XFRM_COLLECT_METADATA))) 788 714 goto nla_put_failure; 789 715 return 0; 790 716 ··· 801 725 } 802 726 803 727 static const struct nla_policy xfrmi_policy[IFLA_XFRM_MAX + 1] = { 804 - [IFLA_XFRM_LINK] = { .type = NLA_U32 }, 805 - [IFLA_XFRM_IF_ID] = { .type = NLA_U32 }, 728 + [IFLA_XFRM_UNSPEC] = { .strict_start_type = IFLA_XFRM_COLLECT_METADATA }, 729 + [IFLA_XFRM_LINK] = { .type = NLA_U32 }, 730 + [IFLA_XFRM_IF_ID] = { .type = NLA_U32 }, 731 + [IFLA_XFRM_COLLECT_METADATA] = { .type = NLA_FLAG }, 806 732 }; 807 733 808 734 static struct rtnl_link_ops xfrmi_link_ops __read_mostly = { ··· 840 762 xip = &xi->next) 841 763 unregister_netdevice_queue(xi->dev, &list); 842 764 } 765 + xi = rtnl_dereference(xfrmn->collect_md_xfrmi); 766 + if (xi) 767 + unregister_netdevice_queue(xi->dev, &list); 843 768 } 844 769 unregister_netdevice_many(&list); 845 770 rtnl_unlock();
+5 -5
net/xfrm/xfrm_policy.c
··· 3515 3515 int xerr_idx = -1; 3516 3516 const struct xfrm_if_cb *ifcb; 3517 3517 struct sec_path *sp; 3518 - struct xfrm_if *xi; 3519 3518 u32 if_id = 0; 3520 3519 3521 3520 rcu_read_lock(); 3522 3521 ifcb = xfrm_if_get_cb(); 3523 3522 3524 3523 if (ifcb) { 3525 - xi = ifcb->decode_session(skb, family); 3526 - if (xi) { 3527 - if_id = xi->p.if_id; 3528 - net = xi->net; 3524 + struct xfrm_if_decode_session_result r; 3525 + 3526 + if (ifcb->decode_session(skb, family, &r)) { 3527 + if_id = r.if_id; 3528 + net = r.net; 3529 3529 } 3530 3530 } 3531 3531 rcu_read_unlock();