Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mctp: Implement extended addressing

This change allows an extended address struct - struct sockaddr_mctp_ext
- to be passed to sendmsg/recvmsg. This allows userspace to specify
output ifindex and physical address information (for sendmsg) or receive
the input ifindex/physaddr for incoming messages (for recvmsg). This is
typically used by userspace for MCTP address discovery and assignment
operations.

The extended addressing facility is conditional on a new sockopt:
MCTP_OPT_ADDR_EXT; userspace must explicitly enable addressing before
the kernel will consume/populate the extended address data.

Includes a fix for an uninitialised var:
Reported-by: kernel test robot <lkp@intel.com>

Signed-off-by: Jeremy Kerr <jk@codeconstruct.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Jeremy Kerr and committed by
David S. Miller
99ce45d5 971f5c40

+170 -39
+1
include/linux/socket.h
··· 365 365 #define SOL_TLS 282 366 366 #define SOL_XDP 283 367 367 #define SOL_MPTCP 284 368 + #define SOL_MCTP 285 368 369 369 370 /* IPX options */ 370 371 #define IPX_TYPE 1
+9 -4
include/net/mctp.h
··· 11 11 12 12 #include <linux/bits.h> 13 13 #include <linux/mctp.h> 14 + #include <linux/netdevice.h> 14 15 #include <net/net_namespace.h> 15 16 #include <net/sock.h> 16 17 ··· 58 57 unsigned int bind_net; 59 58 mctp_eid_t bind_addr; 60 59 __u8 bind_type; 60 + 61 + /* sendmsg()/recvmsg() uses struct sockaddr_mctp_ext */ 62 + bool addr_ext; 61 63 62 64 /* list of mctp_sk_key, for incoming tag lookup. updates protected 63 65 * by sk->net->keys_lock ··· 157 153 struct mctp_skb_cb { 158 154 unsigned int magic; 159 155 unsigned int net; 156 + int ifindex; /* extended/direct addressing if set */ 160 157 mctp_eid_t src; 158 + unsigned char halen; 159 + unsigned char haddr[MAX_ADDR_LEN]; 161 160 }; 162 161 163 162 /* skb control-block accessors with a little extra debugging for initial ··· 184 177 { 185 178 struct mctp_skb_cb *cb = (void *)skb->cb; 186 179 180 + BUILD_BUG_ON(sizeof(struct mctp_skb_cb) > sizeof(skb->cb)); 187 181 WARN_ON(cb->magic != 0x4d435450); 188 182 return (void *)(skb->cb); 189 183 } ··· 197 189 * 198 190 * Updates to the route table are performed under rtnl; all reads under RCU, 199 191 * so routes cannot be referenced over a RCU grace period. Specifically: A 200 - * caller cannot block between mctp_route_lookup and passing the route to 201 - * mctp_do_route. 192 + * caller cannot block between mctp_route_lookup and mctp_route_release() 202 193 */ 203 194 struct mctp_route { 204 195 mctp_eid_t min, max; ··· 216 209 /* route interfaces */ 217 210 struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet, 218 211 mctp_eid_t daddr); 219 - 220 - int mctp_do_route(struct mctp_route *rt, struct sk_buff *skb); 221 212 222 213 int mctp_local_output(struct sock *sk, struct mctp_route *rt, 223 214 struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag);
+11
include/uapi/linux/mctp.h
··· 11 11 12 12 #include <linux/types.h> 13 13 #include <linux/socket.h> 14 + #include <linux/netdevice.h> 14 15 15 16 typedef __u8 mctp_eid_t; 16 17 ··· 29 28 __u8 __smctp_pad1; 30 29 }; 31 30 31 + struct sockaddr_mctp_ext { 32 + struct sockaddr_mctp smctp_base; 33 + int smctp_ifindex; 34 + __u8 smctp_halen; 35 + __u8 __smctp_pad0[3]; 36 + __u8 smctp_haddr[MAX_ADDR_LEN]; 37 + }; 38 + 32 39 #define MCTP_NET_ANY 0x0 33 40 34 41 #define MCTP_ADDR_NULL 0x00 ··· 44 35 45 36 #define MCTP_TAG_MASK 0x07 46 37 #define MCTP_TAG_OWNER 0x08 38 + 39 + #define MCTP_OPT_ADDR_EXT 1 47 40 48 41 #endif /* __UAPI_MCTP_H */
+76 -10
net/mctp/af_mctp.c
··· 77 77 const int hlen = MCTP_HEADER_MAXLEN + sizeof(struct mctp_hdr); 78 78 int rc, addrlen = msg->msg_namelen; 79 79 struct sock *sk = sock->sk; 80 + struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); 80 81 struct mctp_skb_cb *cb; 81 82 struct mctp_route *rt; 82 83 struct sk_buff *skb; ··· 101 100 if (addr->smctp_network == MCTP_NET_ANY) 102 101 addr->smctp_network = mctp_default_net(sock_net(sk)); 103 102 104 - rt = mctp_route_lookup(sock_net(sk), addr->smctp_network, 105 - addr->smctp_addr.s_addr); 106 - if (!rt) 107 - return -EHOSTUNREACH; 108 - 109 103 skb = sock_alloc_send_skb(sk, hlen + 1 + len, 110 104 msg->msg_flags & MSG_DONTWAIT, &rc); 111 105 if (!skb) ··· 112 116 *(u8 *)skb_put(skb, 1) = addr->smctp_type; 113 117 114 118 rc = memcpy_from_msg((void *)skb_put(skb, len), msg, len); 115 - if (rc < 0) { 116 - kfree_skb(skb); 117 - return rc; 118 - } 119 + if (rc < 0) 120 + goto err_free; 119 121 120 122 /* set up cb */ 121 123 cb = __mctp_cb(skb); 122 124 cb->net = addr->smctp_network; 123 125 126 + /* direct addressing */ 127 + if (msk->addr_ext && addrlen >= sizeof(struct sockaddr_mctp_ext)) { 128 + DECLARE_SOCKADDR(struct sockaddr_mctp_ext *, 129 + extaddr, msg->msg_name); 130 + 131 + if (extaddr->smctp_halen > sizeof(cb->haddr)) { 132 + rc = -EINVAL; 133 + goto err_free; 134 + } 135 + 136 + cb->ifindex = extaddr->smctp_ifindex; 137 + cb->halen = extaddr->smctp_halen; 138 + memcpy(cb->haddr, extaddr->smctp_haddr, cb->halen); 139 + 140 + rt = NULL; 141 + } else { 142 + rt = mctp_route_lookup(sock_net(sk), addr->smctp_network, 143 + addr->smctp_addr.s_addr); 144 + if (!rt) { 145 + rc = -EHOSTUNREACH; 146 + goto err_free; 147 + } 148 + } 149 + 124 150 rc = mctp_local_output(sk, rt, skb, addr->smctp_addr.s_addr, 125 151 addr->smctp_tag); 126 152 127 153 return rc ? : len; 154 + 155 + err_free: 156 + kfree_skb(skb); 157 + return rc; 128 158 } 129 159 130 160 static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, ··· 158 136 { 159 137 DECLARE_SOCKADDR(struct sockaddr_mctp *, addr, msg->msg_name); 160 138 struct sock *sk = sock->sk; 139 + struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); 161 140 struct sk_buff *skb; 162 141 size_t msglen; 163 142 u8 type; ··· 204 181 addr->smctp_tag = hdr->flags_seq_tag & 205 182 (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO); 206 183 msg->msg_namelen = sizeof(*addr); 184 + 185 + if (msk->addr_ext) { 186 + DECLARE_SOCKADDR(struct sockaddr_mctp_ext *, ae, 187 + msg->msg_name); 188 + msg->msg_namelen = sizeof(*ae); 189 + ae->smctp_ifindex = cb->ifindex; 190 + ae->smctp_halen = cb->halen; 191 + memset(ae->smctp_haddr, 0x0, sizeof(ae->smctp_haddr)); 192 + memcpy(ae->smctp_haddr, cb->haddr, cb->halen); 193 + } 207 194 } 208 195 209 196 rc = len; ··· 229 196 static int mctp_setsockopt(struct socket *sock, int level, int optname, 230 197 sockptr_t optval, unsigned int optlen) 231 198 { 232 - return -EINVAL; 199 + struct mctp_sock *msk = container_of(sock->sk, struct mctp_sock, sk); 200 + int val; 201 + 202 + if (level != SOL_MCTP) 203 + return -EINVAL; 204 + 205 + if (optname == MCTP_OPT_ADDR_EXT) { 206 + if (optlen != sizeof(int)) 207 + return -EINVAL; 208 + if (copy_from_sockptr(&val, optval, sizeof(int))) 209 + return -EFAULT; 210 + msk->addr_ext = val; 211 + return 0; 212 + } 213 + 214 + return -ENOPROTOOPT; 233 215 } 234 216 235 217 static int mctp_getsockopt(struct socket *sock, int level, int optname, 236 218 char __user *optval, int __user *optlen) 237 219 { 220 + struct mctp_sock *msk = container_of(sock->sk, struct mctp_sock, sk); 221 + int len, val; 222 + 223 + if (level != SOL_MCTP) 224 + return -EINVAL; 225 + 226 + if (get_user(len, optlen)) 227 + return -EFAULT; 228 + 229 + if (optname == MCTP_OPT_ADDR_EXT) { 230 + if (len != sizeof(int)) 231 + return -EINVAL; 232 + val = !!msk->addr_ext; 233 + if (copy_to_user(optval, &val, len)) 234 + return -EFAULT; 235 + return 0; 236 + } 237 + 238 238 return -EINVAL; 239 239 } 240 240
+73 -25
net/mctp/route.c
··· 434 434 435 435 static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb) 436 436 { 437 + struct mctp_skb_cb *cb = mctp_cb(skb); 437 438 struct mctp_hdr *hdr = mctp_hdr(skb); 438 439 char daddr_buf[MAX_ADDR_LEN]; 439 440 char *daddr = NULL; ··· 449 448 return -EMSGSIZE; 450 449 } 451 450 452 - /* If lookup fails let the device handle daddr==NULL */ 453 - if (mctp_neigh_lookup(route->dev, hdr->dest, daddr_buf) == 0) 454 - daddr = daddr_buf; 451 + if (cb->ifindex) { 452 + /* direct route; use the hwaddr we stashed in sendmsg */ 453 + daddr = cb->haddr; 454 + } else { 455 + /* If lookup fails let the device handle daddr==NULL */ 456 + if (mctp_neigh_lookup(route->dev, hdr->dest, daddr_buf) == 0) 457 + daddr = daddr_buf; 458 + } 455 459 456 460 rc = dev_hard_header(skb, skb->dev, ntohs(skb->protocol), 457 461 daddr, skb->dev->dev_addr, skb->len); ··· 655 649 return NULL; 656 650 } 657 651 658 - /* sends a skb to rt and releases the route. */ 659 - int mctp_do_route(struct mctp_route *rt, struct sk_buff *skb) 660 - { 661 - int rc; 662 - 663 - rc = rt->output(rt, skb); 664 - mctp_route_release(rt); 665 - return rc; 666 - } 667 - 668 652 static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb, 669 653 unsigned int mtu, u8 tag) 670 654 { ··· 721 725 /* copy message payload */ 722 726 skb_copy_bits(skb, pos, skb_transport_header(skb2), size); 723 727 724 - /* do route, but don't drop the rt reference */ 728 + /* do route */ 725 729 rc = rt->output(rt, skb2); 726 730 if (rc) 727 731 break; ··· 730 734 pos += size; 731 735 } 732 736 733 - mctp_route_release(rt); 734 737 consume_skb(skb); 735 738 return rc; 736 739 } ··· 739 744 { 740 745 struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); 741 746 struct mctp_skb_cb *cb = mctp_cb(skb); 747 + struct mctp_route tmp_rt; 748 + struct net_device *dev; 742 749 struct mctp_hdr *hdr; 743 750 unsigned long flags; 744 751 unsigned int mtu; 745 752 mctp_eid_t saddr; 753 + bool ext_rt; 746 754 int rc; 747 755 u8 tag; 748 756 749 - if (WARN_ON(!rt->dev)) 757 + rc = -ENODEV; 758 + 759 + if (rt) { 760 + ext_rt = false; 761 + dev = NULL; 762 + 763 + if (WARN_ON(!rt->dev)) 764 + goto out_release; 765 + 766 + } else if (cb->ifindex) { 767 + ext_rt = true; 768 + rt = &tmp_rt; 769 + 770 + rcu_read_lock(); 771 + dev = dev_get_by_index_rcu(sock_net(sk), cb->ifindex); 772 + if (!dev) { 773 + rcu_read_unlock(); 774 + return rc; 775 + } 776 + 777 + rt->dev = __mctp_dev_get(dev); 778 + rcu_read_unlock(); 779 + 780 + if (!rt->dev) 781 + goto out_release; 782 + 783 + /* establish temporary route - we set up enough to keep 784 + * mctp_route_output happy 785 + */ 786 + rt->output = mctp_route_output; 787 + rt->mtu = 0; 788 + 789 + } else { 750 790 return -EINVAL; 791 + } 751 792 752 793 spin_lock_irqsave(&rt->dev->addrs_lock, flags); 753 794 if (rt->dev->num_addrs == 0) { ··· 796 765 spin_unlock_irqrestore(&rt->dev->addrs_lock, flags); 797 766 798 767 if (rc) 799 - return rc; 768 + goto out_release; 800 769 801 770 if (req_tag & MCTP_HDR_FLAG_TO) { 802 771 rc = mctp_alloc_local_tag(msk, saddr, daddr, &tag); 803 772 if (rc) 804 - return rc; 773 + goto out_release; 805 774 tag |= MCTP_HDR_FLAG_TO; 806 775 } else { 807 776 tag = req_tag; 808 777 } 809 - 810 778 811 779 skb->protocol = htons(ETH_P_MCTP); 812 780 skb->priority = 0; ··· 826 796 mtu = mctp_route_mtu(rt); 827 797 828 798 if (skb->len + sizeof(struct mctp_hdr) <= mtu) { 829 - hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM | 830 - tag; 831 - return mctp_do_route(rt, skb); 799 + hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | 800 + MCTP_HDR_FLAG_EOM | tag; 801 + rc = rt->output(rt, skb); 832 802 } else { 833 - return mctp_do_fragment_route(rt, skb, mtu, tag); 803 + rc = mctp_do_fragment_route(rt, skb, mtu, tag); 834 804 } 805 + 806 + out_release: 807 + if (!ext_rt) 808 + mctp_route_release(rt); 809 + 810 + if (dev) 811 + dev_put(dev); 812 + 813 + return rc; 814 + 835 815 } 836 816 837 817 /* route management */ ··· 982 942 if (mh->ver < MCTP_VER_MIN || mh->ver > MCTP_VER_MAX) 983 943 goto err_drop; 984 944 985 - cb = __mctp_cb(skb); 945 + /* MCTP drivers must populate halen/haddr */ 946 + if (dev->type == ARPHRD_MCTP) { 947 + cb = mctp_cb(skb); 948 + } else { 949 + cb = __mctp_cb(skb); 950 + cb->halen = 0; 951 + } 986 952 cb->net = READ_ONCE(mdev->net); 953 + cb->ifindex = dev->ifindex; 987 954 988 955 rt = mctp_route_lookup(net, cb->net, mh->dest); 989 956 ··· 1001 954 if (!rt) 1002 955 goto err_drop; 1003 956 1004 - mctp_do_route(rt, skb); 957 + rt->output(rt, skb); 958 + mctp_route_release(rt); 1005 959 1006 960 return NET_RX_SUCCESS; 1007 961