Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mctp: Populate socket implementation

Start filling-out the socket syscalls: bind, sendmsg & recvmsg.

This requires an input route implementation, so we add to
mctp_route_input, allowing lookups on binds & message tags. This just
handles single-packet messages at present, we will add fragmentation in
a future change.

Signed-off-by: Jeremy Kerr <jk@codeconstruct.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Jeremy Kerr and committed by
David S. Miller
833ef3b9 831119f8

+491 -10
+59
include/net/mctp.h
··· 12 12 #include <linux/bits.h> 13 13 #include <linux/mctp.h> 14 14 #include <net/net_namespace.h> 15 + #include <net/sock.h> 15 16 16 17 /* MCTP packet definitions */ 17 18 struct mctp_hdr { ··· 46 45 { 47 46 return (struct mctp_hdr *)skb_network_header(skb); 48 47 } 48 + 49 + /* socket implementation */ 50 + struct mctp_sock { 51 + struct sock sk; 52 + 53 + /* bind() params */ 54 + int bind_net; 55 + mctp_eid_t bind_addr; 56 + __u8 bind_type; 57 + 58 + /* list of mctp_sk_key, for incoming tag lookup. updates protected 59 + * by sk->net->keys_lock 60 + */ 61 + struct hlist_head keys; 62 + }; 63 + 64 + /* Key for matching incoming packets to sockets or reassembly contexts. 65 + * Packets are matched on (src,dest,tag). 66 + * 67 + * Lifetime requirements: 68 + * 69 + * - keys are free()ed via RCU 70 + * 71 + * - a mctp_sk_key contains a reference to a struct sock; this is valid 72 + * for the life of the key. On sock destruction (through unhash), the key is 73 + * removed from lists (see below), and will not be observable after a RCU 74 + * grace period. 75 + * 76 + * any RX occurring within that grace period may still queue to the socket, 77 + * but will hit the SOCK_DEAD case before the socket is freed. 78 + * 79 + * - these mctp_sk_keys appear on two lists: 80 + * 1) the struct mctp_sock->keys list 81 + * 2) the struct netns_mctp->keys list 82 + * 83 + * updates to either list are performed under the netns_mctp->keys 84 + * lock. 85 + * 86 + * - there is a single destruction path for a mctp_sk_key - through socket 87 + * unhash (see mctp_sk_unhash). This performs the list removal under 88 + * keys_lock. 89 + */ 90 + struct mctp_sk_key { 91 + mctp_eid_t peer_addr; 92 + mctp_eid_t local_addr; 93 + __u8 tag; /* incoming tag match; invert TO for local */ 94 + 95 + /* we hold a ref to sk when set */ 96 + struct sock *sk; 97 + 98 + /* routing lookup list */ 99 + struct hlist_node hlist; 100 + 101 + /* per-socket list */ 102 + struct hlist_node sklist; 103 + 104 + struct rcu_head rcu; 105 + }; 49 106 50 107 struct mctp_skb_cb { 51 108 unsigned int magic;
+13
include/net/netns/mctp.h
··· 12 12 /* Only updated under RTNL, entries freed via RCU */ 13 13 struct list_head routes; 14 14 15 + /* Bound sockets: list of sockets bound by type. 16 + * This list is updated from non-atomic contexts (under bind_lock), 17 + * and read (under rcu) in packet rx 18 + */ 19 + struct mutex bind_lock; 20 + struct hlist_head binds; 21 + 22 + /* tag allocations. This list is read and updated from atomic contexts, 23 + * but elements are free()ed after a RCU grace-period 24 + */ 25 + spinlock_t keys_lock; 26 + struct hlist_head keys; 27 + 15 28 /* neighbour table */ 16 29 struct mutex neigh_lock; 17 30 struct list_head neighbours;
+196 -7
net/mctp/af_mctp.c
··· 18 18 19 19 /* socket implementation */ 20 20 21 - struct mctp_sock { 22 - struct sock sk; 23 - }; 24 - 25 21 static int mctp_release(struct socket *sock) 26 22 { 27 23 struct sock *sk = sock->sk; ··· 32 36 33 37 static int mctp_bind(struct socket *sock, struct sockaddr *addr, int addrlen) 34 38 { 35 - return 0; 39 + struct sock *sk = sock->sk; 40 + struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); 41 + struct sockaddr_mctp *smctp; 42 + int rc; 43 + 44 + if (addrlen < sizeof(*smctp)) 45 + return -EINVAL; 46 + 47 + if (addr->sa_family != AF_MCTP) 48 + return -EAFNOSUPPORT; 49 + 50 + if (!capable(CAP_NET_BIND_SERVICE)) 51 + return -EACCES; 52 + 53 + /* it's a valid sockaddr for MCTP, cast and do protocol checks */ 54 + smctp = (struct sockaddr_mctp *)addr; 55 + 56 + lock_sock(sk); 57 + 58 + /* TODO: allow rebind */ 59 + if (sk_hashed(sk)) { 60 + rc = -EADDRINUSE; 61 + goto out_release; 62 + } 63 + msk->bind_net = smctp->smctp_network; 64 + msk->bind_addr = smctp->smctp_addr.s_addr; 65 + msk->bind_type = smctp->smctp_type & 0x7f; /* ignore the IC bit */ 66 + 67 + rc = sk->sk_prot->hash(sk); 68 + 69 + out_release: 70 + release_sock(sk); 71 + 72 + return rc; 36 73 } 37 74 38 75 static int mctp_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) 39 76 { 40 - return 0; 77 + DECLARE_SOCKADDR(struct sockaddr_mctp *, addr, msg->msg_name); 78 + const int hlen = MCTP_HEADER_MAXLEN + sizeof(struct mctp_hdr); 79 + int rc, addrlen = msg->msg_namelen; 80 + struct sock *sk = sock->sk; 81 + struct mctp_skb_cb *cb; 82 + struct mctp_route *rt; 83 + struct sk_buff *skb; 84 + 85 + if (addr) { 86 + if (addrlen < sizeof(struct sockaddr_mctp)) 87 + return -EINVAL; 88 + if (addr->smctp_family != AF_MCTP) 89 + return -EINVAL; 90 + if (addr->smctp_tag & ~(MCTP_TAG_MASK | MCTP_TAG_OWNER)) 91 + return -EINVAL; 92 + 93 + } else { 94 + /* TODO: connect()ed sockets */ 95 + return -EDESTADDRREQ; 96 + } 97 + 98 + if (!capable(CAP_NET_RAW)) 99 + return -EACCES; 100 + 101 + rt = mctp_route_lookup(sock_net(sk), addr->smctp_network, 102 + addr->smctp_addr.s_addr); 103 + if (!rt) 104 + return -EHOSTUNREACH; 105 + 106 + skb = sock_alloc_send_skb(sk, hlen + 1 + len, 107 + msg->msg_flags & MSG_DONTWAIT, &rc); 108 + if (!skb) 109 + return rc; 110 + 111 + skb_reserve(skb, hlen); 112 + 113 + /* set type as fist byte in payload */ 114 + *(u8 *)skb_put(skb, 1) = addr->smctp_type; 115 + 116 + rc = memcpy_from_msg((void *)skb_put(skb, len), msg, len); 117 + if (rc < 0) { 118 + kfree_skb(skb); 119 + return rc; 120 + } 121 + 122 + /* set up cb */ 123 + cb = __mctp_cb(skb); 124 + cb->net = addr->smctp_network; 125 + 126 + rc = mctp_local_output(sk, rt, skb, addr->smctp_addr.s_addr, 127 + addr->smctp_tag); 128 + 129 + return rc ? : len; 41 130 } 42 131 43 132 static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, 44 133 int flags) 45 134 { 46 - return 0; 135 + DECLARE_SOCKADDR(struct sockaddr_mctp *, addr, msg->msg_name); 136 + struct sock *sk = sock->sk; 137 + struct sk_buff *skb; 138 + size_t msglen; 139 + u8 type; 140 + int rc; 141 + 142 + if (flags & ~(MSG_DONTWAIT | MSG_TRUNC | MSG_PEEK)) 143 + return -EOPNOTSUPP; 144 + 145 + skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &rc); 146 + if (!skb) 147 + return rc; 148 + 149 + if (!skb->len) { 150 + rc = 0; 151 + goto out_free; 152 + } 153 + 154 + /* extract message type, remove from data */ 155 + type = *((u8 *)skb->data); 156 + msglen = skb->len - 1; 157 + 158 + if (len < msglen) 159 + msg->msg_flags |= MSG_TRUNC; 160 + else 161 + len = msglen; 162 + 163 + rc = skb_copy_datagram_msg(skb, 1, msg, len); 164 + if (rc < 0) 165 + goto out_free; 166 + 167 + sock_recv_ts_and_drops(msg, sk, skb); 168 + 169 + if (addr) { 170 + struct mctp_skb_cb *cb = mctp_cb(skb); 171 + /* TODO: expand mctp_skb_cb for header fields? */ 172 + struct mctp_hdr *hdr = mctp_hdr(skb); 173 + 174 + hdr = mctp_hdr(skb); 175 + addr = msg->msg_name; 176 + addr->smctp_family = AF_MCTP; 177 + addr->smctp_network = cb->net; 178 + addr->smctp_addr.s_addr = hdr->src; 179 + addr->smctp_type = type; 180 + addr->smctp_tag = hdr->flags_seq_tag & 181 + (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO); 182 + msg->msg_namelen = sizeof(*addr); 183 + } 184 + 185 + rc = len; 186 + 187 + if (flags & MSG_TRUNC) 188 + rc = msglen; 189 + 190 + out_free: 191 + skb_free_datagram(sk, skb); 192 + return rc; 47 193 } 48 194 49 195 static int mctp_setsockopt(struct socket *sock, int level, int optname, ··· 221 83 .sendpage = sock_no_sendpage, 222 84 }; 223 85 86 + static int mctp_sk_init(struct sock *sk) 87 + { 88 + struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); 89 + 90 + INIT_HLIST_HEAD(&msk->keys); 91 + return 0; 92 + } 93 + 224 94 static void mctp_sk_close(struct sock *sk, long timeout) 225 95 { 226 96 sk_common_release(sk); 97 + } 98 + 99 + static int mctp_sk_hash(struct sock *sk) 100 + { 101 + struct net *net = sock_net(sk); 102 + 103 + mutex_lock(&net->mctp.bind_lock); 104 + sk_add_node_rcu(sk, &net->mctp.binds); 105 + mutex_unlock(&net->mctp.bind_lock); 106 + 107 + return 0; 108 + } 109 + 110 + static void mctp_sk_unhash(struct sock *sk) 111 + { 112 + struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); 113 + struct net *net = sock_net(sk); 114 + struct mctp_sk_key *key; 115 + struct hlist_node *tmp; 116 + unsigned long flags; 117 + 118 + /* remove from any type-based binds */ 119 + mutex_lock(&net->mctp.bind_lock); 120 + sk_del_node_init_rcu(sk); 121 + mutex_unlock(&net->mctp.bind_lock); 122 + 123 + /* remove tag allocations */ 124 + spin_lock_irqsave(&net->mctp.keys_lock, flags); 125 + hlist_for_each_entry_safe(key, tmp, &msk->keys, sklist) { 126 + hlist_del_rcu(&key->sklist); 127 + hlist_del_rcu(&key->hlist); 128 + kfree_rcu(key, rcu); 129 + } 130 + spin_unlock_irqrestore(&net->mctp.keys_lock, flags); 131 + 132 + synchronize_rcu(); 227 133 } 228 134 229 135 static struct proto mctp_proto = { 230 136 .name = "MCTP", 231 137 .owner = THIS_MODULE, 232 138 .obj_size = sizeof(struct mctp_sock), 139 + .init = mctp_sk_init, 233 140 .close = mctp_sk_close, 141 + .hash = mctp_sk_hash, 142 + .unhash = mctp_sk_unhash, 234 143 }; 235 144 236 145 static int mctp_pf_create(struct net *net, struct socket *sock, ··· 331 146 static __init int mctp_init(void) 332 147 { 333 148 int rc; 149 + 150 + /* ensure our uapi tag definitions match the header format */ 151 + BUILD_BUG_ON(MCTP_TAG_OWNER != MCTP_HDR_FLAG_TO); 152 + BUILD_BUG_ON(MCTP_TAG_MASK != MCTP_HDR_TAG_MASK); 334 153 335 154 pr_info("mctp: management component transport protocol core\n"); 336 155
+223 -3
net/mctp/route.c
··· 30 30 return 0; 31 31 } 32 32 33 + static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb) 34 + { 35 + struct mctp_skb_cb *cb = mctp_cb(skb); 36 + struct mctp_hdr *mh; 37 + struct sock *sk; 38 + u8 type; 39 + 40 + WARN_ON(!rcu_read_lock_held()); 41 + 42 + /* TODO: look up in skb->cb? */ 43 + mh = mctp_hdr(skb); 44 + 45 + if (!skb_headlen(skb)) 46 + return NULL; 47 + 48 + type = (*(u8 *)skb->data) & 0x7f; 49 + 50 + sk_for_each_rcu(sk, &net->mctp.binds) { 51 + struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); 52 + 53 + if (msk->bind_net != MCTP_NET_ANY && msk->bind_net != cb->net) 54 + continue; 55 + 56 + if (msk->bind_type != type) 57 + continue; 58 + 59 + if (msk->bind_addr != MCTP_ADDR_ANY && 60 + msk->bind_addr != mh->dest) 61 + continue; 62 + 63 + return msk; 64 + } 65 + 66 + return NULL; 67 + } 68 + 69 + static bool mctp_key_match(struct mctp_sk_key *key, mctp_eid_t local, 70 + mctp_eid_t peer, u8 tag) 71 + { 72 + if (key->local_addr != local) 73 + return false; 74 + 75 + if (key->peer_addr != peer) 76 + return false; 77 + 78 + if (key->tag != tag) 79 + return false; 80 + 81 + return true; 82 + } 83 + 84 + static struct mctp_sk_key *mctp_lookup_key(struct net *net, struct sk_buff *skb, 85 + mctp_eid_t peer) 86 + { 87 + struct mctp_sk_key *key, *ret; 88 + struct mctp_hdr *mh; 89 + u8 tag; 90 + 91 + WARN_ON(!rcu_read_lock_held()); 92 + 93 + mh = mctp_hdr(skb); 94 + tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO); 95 + 96 + ret = NULL; 97 + 98 + hlist_for_each_entry_rcu(key, &net->mctp.keys, hlist) { 99 + if (mctp_key_match(key, mh->dest, peer, tag)) { 100 + ret = key; 101 + break; 102 + } 103 + } 104 + 105 + return ret; 106 + } 107 + 33 108 static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) 34 109 { 35 - /* -> to local stack */ 36 - /* TODO: socket lookup, reassemble */ 110 + struct net *net = dev_net(skb->dev); 111 + struct mctp_sk_key *key; 112 + struct mctp_sock *msk; 113 + struct mctp_hdr *mh; 114 + 115 + msk = NULL; 116 + 117 + /* we may be receiving a locally-routed packet; drop source sk 118 + * accounting 119 + */ 120 + skb_orphan(skb); 121 + 122 + /* ensure we have enough data for a header and a type */ 123 + if (skb->len < sizeof(struct mctp_hdr) + 1) 124 + goto drop; 125 + 126 + /* grab header, advance data ptr */ 127 + mh = mctp_hdr(skb); 128 + skb_pull(skb, sizeof(struct mctp_hdr)); 129 + 130 + if (mh->ver != 1) 131 + goto drop; 132 + 133 + /* TODO: reassembly */ 134 + if ((mh->flags_seq_tag & (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM)) 135 + != (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM)) 136 + goto drop; 137 + 138 + rcu_read_lock(); 139 + /* 1. lookup socket matching (src,dest,tag) */ 140 + key = mctp_lookup_key(net, skb, mh->src); 141 + 142 + /* 2. lookup socket macthing (BCAST,dest,tag) */ 143 + if (!key) 144 + key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY); 145 + 146 + /* 3. SOM? -> lookup bound socket, conditionally (!EOM) create 147 + * mapping for future (1)/(2). 148 + */ 149 + if (key) 150 + msk = container_of(key->sk, struct mctp_sock, sk); 151 + else if (!msk && (mh->flags_seq_tag & MCTP_HDR_FLAG_SOM)) 152 + msk = mctp_lookup_bind(net, skb); 153 + 154 + if (!msk) 155 + goto unlock_drop; 156 + 157 + sock_queue_rcv_skb(&msk->sk, skb); 158 + 159 + rcu_read_unlock(); 160 + 161 + return 0; 162 + 163 + unlock_drop: 164 + rcu_read_unlock(); 165 + drop: 37 166 kfree_skb(skb); 38 167 return 0; 39 168 } ··· 220 91 return rt; 221 92 } 222 93 94 + /* tag management */ 95 + static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key, 96 + struct mctp_sock *msk) 97 + { 98 + struct netns_mctp *mns = &net->mctp; 99 + 100 + lockdep_assert_held(&mns->keys_lock); 101 + 102 + key->sk = &msk->sk; 103 + 104 + /* we hold the net->key_lock here, allowing updates to both 105 + * then net and sk 106 + */ 107 + hlist_add_head_rcu(&key->hlist, &mns->keys); 108 + hlist_add_head_rcu(&key->sklist, &msk->keys); 109 + } 110 + 111 + /* Allocate a locally-owned tag value for (saddr, daddr), and reserve 112 + * it for the socket msk 113 + */ 114 + static int mctp_alloc_local_tag(struct mctp_sock *msk, 115 + mctp_eid_t saddr, mctp_eid_t daddr, u8 *tagp) 116 + { 117 + struct net *net = sock_net(&msk->sk); 118 + struct netns_mctp *mns = &net->mctp; 119 + struct mctp_sk_key *key, *tmp; 120 + unsigned long flags; 121 + int rc = -EAGAIN; 122 + u8 tagbits; 123 + 124 + /* be optimistic, alloc now */ 125 + key = kzalloc(sizeof(*key), GFP_KERNEL); 126 + if (!key) 127 + return -ENOMEM; 128 + key->local_addr = saddr; 129 + key->peer_addr = daddr; 130 + 131 + /* 8 possible tag values */ 132 + tagbits = 0xff; 133 + 134 + spin_lock_irqsave(&mns->keys_lock, flags); 135 + 136 + /* Walk through the existing keys, looking for potential conflicting 137 + * tags. If we find a conflict, clear that bit from tagbits 138 + */ 139 + hlist_for_each_entry(tmp, &mns->keys, hlist) { 140 + /* if we don't own the tag, it can't conflict */ 141 + if (tmp->tag & MCTP_HDR_FLAG_TO) 142 + continue; 143 + 144 + if ((tmp->peer_addr == daddr || 145 + tmp->peer_addr == MCTP_ADDR_ANY) && 146 + tmp->local_addr == saddr) 147 + tagbits &= ~(1 << tmp->tag); 148 + 149 + if (!tagbits) 150 + break; 151 + } 152 + 153 + if (tagbits) { 154 + key->tag = __ffs(tagbits); 155 + mctp_reserve_tag(net, key, msk); 156 + *tagp = key->tag; 157 + rc = 0; 158 + } 159 + 160 + spin_unlock_irqrestore(&mns->keys_lock, flags); 161 + 162 + if (!tagbits) 163 + kfree(key); 164 + 165 + return rc; 166 + } 167 + 223 168 /* routing lookups */ 224 169 static bool mctp_rt_match_eid(struct mctp_route *rt, 225 170 unsigned int net, mctp_eid_t eid) ··· 343 140 int mctp_local_output(struct sock *sk, struct mctp_route *rt, 344 141 struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag) 345 142 { 143 + struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); 346 144 struct mctp_skb_cb *cb = mctp_cb(skb); 347 145 struct mctp_hdr *hdr; 348 146 unsigned long flags; 349 147 mctp_eid_t saddr; 350 148 int rc; 149 + u8 tag; 351 150 352 151 if (WARN_ON(!rt->dev)) 353 152 return -EINVAL; ··· 367 162 if (rc) 368 163 return rc; 369 164 165 + if (req_tag & MCTP_HDR_FLAG_TO) { 166 + rc = mctp_alloc_local_tag(msk, saddr, daddr, &tag); 167 + if (rc) 168 + return rc; 169 + tag |= MCTP_HDR_FLAG_TO; 170 + } else { 171 + tag = req_tag; 172 + } 173 + 370 174 /* TODO: we have the route MTU here; packetise */ 371 175 372 176 skb_reset_transport_header(skb); ··· 385 171 hdr->ver = 1; 386 172 hdr->dest = daddr; 387 173 hdr->src = saddr; 388 - hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM; /* TODO */ 174 + hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM | /* TODO */ 175 + tag; 389 176 177 + skb->dev = rt->dev->dev; 390 178 skb->protocol = htons(ETH_P_MCTP); 391 179 skb->priority = 0; 392 180 ··· 745 529 struct netns_mctp *ns = &net->mctp; 746 530 747 531 INIT_LIST_HEAD(&ns->routes); 532 + INIT_HLIST_HEAD(&ns->binds); 533 + mutex_init(&ns->bind_lock); 534 + INIT_HLIST_HEAD(&ns->keys); 535 + spin_lock_init(&ns->keys_lock); 748 536 return 0; 749 537 } 750 538