Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0-only
2#include <linux/types.h>
3#include <linux/skbuff.h>
4#include <linux/socket.h>
5#include <linux/sysctl.h>
6#include <linux/net.h>
7#include <linux/module.h>
8#include <linux/if_arp.h>
9#include <linux/ipv6.h>
10#include <linux/mpls.h>
11#include <linux/netconf.h>
12#include <linux/nospec.h>
13#include <linux/vmalloc.h>
14#include <linux/percpu.h>
15#include <net/gso.h>
16#include <net/ip.h>
17#include <net/dst.h>
18#include <net/sock.h>
19#include <net/arp.h>
20#include <net/ip_fib.h>
21#include <net/netevent.h>
22#include <net/ip_tunnels.h>
23#include <net/netns/generic.h>
24#if IS_ENABLED(CONFIG_IPV6)
25#include <net/ipv6.h>
26#endif
27#include <net/ipv6_stubs.h>
28#include <net/rtnh.h>
29#include "internal.h"
30
31/* max memory we will use for mpls_route */
32#define MAX_MPLS_ROUTE_MEM 4096
33
34/* Maximum number of labels to look ahead at when selecting a path of
35 * a multipath route
36 */
37#define MAX_MP_SELECT_LABELS 4
38
39#define MPLS_NEIGH_TABLE_UNSPEC (NEIGH_LINK_TABLE + 1)
40
41static int label_limit = (1 << 20) - 1;
42static int ttl_max = 255;
43
44#if IS_ENABLED(CONFIG_NET_IP_TUNNEL)
45static size_t ipgre_mpls_encap_hlen(struct ip_tunnel_encap *e)
46{
47 return sizeof(struct mpls_shim_hdr);
48}
49
50static const struct ip_tunnel_encap_ops mpls_iptun_ops = {
51 .encap_hlen = ipgre_mpls_encap_hlen,
52};
53
54static int ipgre_tunnel_encap_add_mpls_ops(void)
55{
56 return ip_tunnel_encap_add_ops(&mpls_iptun_ops, TUNNEL_ENCAP_MPLS);
57}
58
59static void ipgre_tunnel_encap_del_mpls_ops(void)
60{
61 ip_tunnel_encap_del_ops(&mpls_iptun_ops, TUNNEL_ENCAP_MPLS);
62}
63#else
64static int ipgre_tunnel_encap_add_mpls_ops(void)
65{
66 return 0;
67}
68
69static void ipgre_tunnel_encap_del_mpls_ops(void)
70{
71}
72#endif
73
74static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt,
75 struct nlmsghdr *nlh, struct net *net, u32 portid,
76 unsigned int nlm_flags);
77
78static struct mpls_route *mpls_route_input(struct net *net, unsigned int index)
79{
80 struct mpls_route __rcu **platform_label;
81
82 platform_label = mpls_dereference(net, net->mpls.platform_label);
83 return mpls_dereference(net, platform_label[index]);
84}
85
86static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned int index)
87{
88 struct mpls_route __rcu **platform_label;
89
90 if (index >= net->mpls.platform_labels)
91 return NULL;
92
93 platform_label = rcu_dereference(net->mpls.platform_label);
94 return rcu_dereference(platform_label[index]);
95}
96
97bool mpls_output_possible(const struct net_device *dev)
98{
99 return dev && (dev->flags & IFF_UP) && netif_carrier_ok(dev);
100}
101EXPORT_SYMBOL_GPL(mpls_output_possible);
102
103static u8 *__mpls_nh_via(struct mpls_route *rt, struct mpls_nh *nh)
104{
105 return (u8 *)nh + rt->rt_via_offset;
106}
107
108static const u8 *mpls_nh_via(const struct mpls_route *rt,
109 const struct mpls_nh *nh)
110{
111 return __mpls_nh_via((struct mpls_route *)rt, (struct mpls_nh *)nh);
112}
113
114static unsigned int mpls_nh_header_size(const struct mpls_nh *nh)
115{
116 /* The size of the layer 2.5 labels to be added for this route */
117 return nh->nh_labels * sizeof(struct mpls_shim_hdr);
118}
119
120unsigned int mpls_dev_mtu(const struct net_device *dev)
121{
122 /* The amount of data the layer 2 frame can hold */
123 return dev->mtu;
124}
125EXPORT_SYMBOL_GPL(mpls_dev_mtu);
126
127bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
128{
129 if (skb->len <= mtu)
130 return false;
131
132 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
133 return false;
134
135 return true;
136}
137EXPORT_SYMBOL_GPL(mpls_pkt_too_big);
138
139void mpls_stats_inc_outucastpkts(struct net *net,
140 struct net_device *dev,
141 const struct sk_buff *skb)
142{
143 struct mpls_dev *mdev;
144
145 if (skb->protocol == htons(ETH_P_MPLS_UC)) {
146 mdev = mpls_dev_rcu(dev);
147 if (mdev)
148 MPLS_INC_STATS_LEN(mdev, skb->len,
149 tx_packets,
150 tx_bytes);
151 } else if (skb->protocol == htons(ETH_P_IP)) {
152 IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
153#if IS_ENABLED(CONFIG_IPV6)
154 } else if (skb->protocol == htons(ETH_P_IPV6)) {
155 struct inet6_dev *in6dev = in6_dev_rcu(dev);
156
157 if (in6dev)
158 IP6_UPD_PO_STATS(net, in6dev,
159 IPSTATS_MIB_OUT, skb->len);
160#endif
161 }
162}
163EXPORT_SYMBOL_GPL(mpls_stats_inc_outucastpkts);
164
165static u32 mpls_multipath_hash(struct mpls_route *rt, struct sk_buff *skb)
166{
167 struct mpls_entry_decoded dec;
168 unsigned int mpls_hdr_len = 0;
169 struct mpls_shim_hdr *hdr;
170 bool eli_seen = false;
171 int label_index;
172 u32 hash = 0;
173
174 for (label_index = 0; label_index < MAX_MP_SELECT_LABELS;
175 label_index++) {
176 mpls_hdr_len += sizeof(*hdr);
177 if (!pskb_may_pull(skb, mpls_hdr_len))
178 break;
179
180 /* Read and decode the current label */
181 hdr = mpls_hdr(skb) + label_index;
182 dec = mpls_entry_decode(hdr);
183
184 /* RFC6790 - reserved labels MUST NOT be used as keys
185 * for the load-balancing function
186 */
187 if (likely(dec.label >= MPLS_LABEL_FIRST_UNRESERVED)) {
188 hash = jhash_1word(dec.label, hash);
189
190 /* The entropy label follows the entropy label
191 * indicator, so this means that the entropy
192 * label was just added to the hash - no need to
193 * go any deeper either in the label stack or in the
194 * payload
195 */
196 if (eli_seen)
197 break;
198 } else if (dec.label == MPLS_LABEL_ENTROPY) {
199 eli_seen = true;
200 }
201
202 if (!dec.bos)
203 continue;
204
205 /* found bottom label; does skb have room for a header? */
206 if (pskb_may_pull(skb, mpls_hdr_len + sizeof(struct iphdr))) {
207 const struct iphdr *v4hdr;
208
209 v4hdr = (const struct iphdr *)(hdr + 1);
210 if (v4hdr->version == 4) {
211 hash = jhash_3words(ntohl(v4hdr->saddr),
212 ntohl(v4hdr->daddr),
213 v4hdr->protocol, hash);
214 } else if (v4hdr->version == 6 &&
215 pskb_may_pull(skb, mpls_hdr_len +
216 sizeof(struct ipv6hdr))) {
217 const struct ipv6hdr *v6hdr;
218
219 v6hdr = (const struct ipv6hdr *)(hdr + 1);
220 hash = __ipv6_addr_jhash(&v6hdr->saddr, hash);
221 hash = __ipv6_addr_jhash(&v6hdr->daddr, hash);
222 hash = jhash_1word(v6hdr->nexthdr, hash);
223 }
224 }
225
226 break;
227 }
228
229 return hash;
230}
231
232static struct mpls_nh *mpls_get_nexthop(struct mpls_route *rt, u8 index)
233{
234 return (struct mpls_nh *)((u8 *)rt->rt_nh + index * rt->rt_nh_size);
235}
236
237/* number of alive nexthops (rt->rt_nhn_alive) and the flags for
238 * a next hop (nh->nh_flags) are modified by netdev event handlers.
239 * Since those fields can change at any moment, use READ_ONCE to
240 * access both.
241 */
242static const struct mpls_nh *mpls_select_multipath(struct mpls_route *rt,
243 struct sk_buff *skb)
244{
245 u32 hash = 0;
246 int nh_index = 0;
247 int n = 0;
248 u8 alive;
249
250 /* No need to look further into packet if there's only
251 * one path
252 */
253 if (rt->rt_nhn == 1)
254 return rt->rt_nh;
255
256 alive = READ_ONCE(rt->rt_nhn_alive);
257 if (alive == 0)
258 return NULL;
259
260 hash = mpls_multipath_hash(rt, skb);
261 nh_index = hash % alive;
262 if (alive == rt->rt_nhn)
263 goto out;
264 for_nexthops(rt) {
265 unsigned int nh_flags = READ_ONCE(nh->nh_flags);
266
267 if (nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
268 continue;
269 if (n == nh_index)
270 return nh;
271 n++;
272 } endfor_nexthops(rt);
273
274out:
275 return mpls_get_nexthop(rt, nh_index);
276}
277
278static bool mpls_egress(struct net *net, struct mpls_route *rt,
279 struct sk_buff *skb, struct mpls_entry_decoded dec)
280{
281 enum mpls_payload_type payload_type;
282 bool success = false;
283
284 /* The IPv4 code below accesses through the IPv4 header
285 * checksum, which is 12 bytes into the packet.
286 * The IPv6 code below accesses through the IPv6 hop limit
287 * which is 8 bytes into the packet.
288 *
289 * For all supported cases there should always be at least 12
290 * bytes of packet data present. The IPv4 header is 20 bytes
291 * without options and the IPv6 header is always 40 bytes
292 * long.
293 */
294 if (!pskb_may_pull(skb, 12))
295 return false;
296
297 payload_type = rt->rt_payload_type;
298 if (payload_type == MPT_UNSPEC)
299 payload_type = ip_hdr(skb)->version;
300
301 switch (payload_type) {
302 case MPT_IPV4: {
303 struct iphdr *hdr4 = ip_hdr(skb);
304 u8 new_ttl;
305 skb->protocol = htons(ETH_P_IP);
306
307 /* If propagating TTL, take the decremented TTL from
308 * the incoming MPLS header, otherwise decrement the
309 * TTL, but only if not 0 to avoid underflow.
310 */
311 if (rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED ||
312 (rt->rt_ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
313 net->mpls.ip_ttl_propagate))
314 new_ttl = dec.ttl;
315 else
316 new_ttl = hdr4->ttl ? hdr4->ttl - 1 : 0;
317
318 csum_replace2(&hdr4->check,
319 htons(hdr4->ttl << 8),
320 htons(new_ttl << 8));
321 hdr4->ttl = new_ttl;
322 success = true;
323 break;
324 }
325 case MPT_IPV6: {
326 struct ipv6hdr *hdr6 = ipv6_hdr(skb);
327 skb->protocol = htons(ETH_P_IPV6);
328
329 /* If propagating TTL, take the decremented TTL from
330 * the incoming MPLS header, otherwise decrement the
331 * hop limit, but only if not 0 to avoid underflow.
332 */
333 if (rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED ||
334 (rt->rt_ttl_propagate == MPLS_TTL_PROP_DEFAULT &&
335 net->mpls.ip_ttl_propagate))
336 hdr6->hop_limit = dec.ttl;
337 else if (hdr6->hop_limit)
338 hdr6->hop_limit = hdr6->hop_limit - 1;
339 success = true;
340 break;
341 }
342 case MPT_UNSPEC:
343 /* Should have decided which protocol it is by now */
344 break;
345 }
346
347 return success;
348}
349
350static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
351 struct packet_type *pt, struct net_device *orig_dev)
352{
353 struct net *net = dev_net_rcu(dev);
354 struct mpls_shim_hdr *hdr;
355 const struct mpls_nh *nh;
356 struct mpls_route *rt;
357 struct mpls_entry_decoded dec;
358 struct net_device *out_dev;
359 struct mpls_dev *out_mdev;
360 struct mpls_dev *mdev;
361 unsigned int hh_len;
362 unsigned int new_header_size;
363 unsigned int mtu;
364 int err;
365
366 /* Careful this entire function runs inside of an rcu critical section */
367
368 mdev = mpls_dev_rcu(dev);
369 if (!mdev)
370 goto drop;
371
372 MPLS_INC_STATS_LEN(mdev, skb->len, rx_packets,
373 rx_bytes);
374
375 if (!mdev->input_enabled) {
376 MPLS_INC_STATS(mdev, rx_dropped);
377 goto drop;
378 }
379
380 if (skb->pkt_type != PACKET_HOST)
381 goto err;
382
383 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
384 goto err;
385
386 if (!pskb_may_pull(skb, sizeof(*hdr)))
387 goto err;
388
389 skb_dst_drop(skb);
390
391 /* Read and decode the label */
392 hdr = mpls_hdr(skb);
393 dec = mpls_entry_decode(hdr);
394
395 rt = mpls_route_input_rcu(net, dec.label);
396 if (!rt) {
397 MPLS_INC_STATS(mdev, rx_noroute);
398 goto drop;
399 }
400
401 nh = mpls_select_multipath(rt, skb);
402 if (!nh)
403 goto err;
404
405 /* Pop the label */
406 skb_pull(skb, sizeof(*hdr));
407 skb_reset_network_header(skb);
408
409 skb_orphan(skb);
410
411 if (skb_warn_if_lro(skb))
412 goto err;
413
414 skb_forward_csum(skb);
415
416 /* Verify ttl is valid */
417 if (dec.ttl <= 1)
418 goto err;
419
420 /* Find the output device */
421 out_dev = nh->nh_dev;
422 if (!mpls_output_possible(out_dev))
423 goto tx_err;
424
425 /* Verify the destination can hold the packet */
426 new_header_size = mpls_nh_header_size(nh);
427 mtu = mpls_dev_mtu(out_dev);
428 if (mpls_pkt_too_big(skb, mtu - new_header_size))
429 goto tx_err;
430
431 hh_len = LL_RESERVED_SPACE(out_dev);
432 if (!out_dev->header_ops)
433 hh_len = 0;
434
435 /* Ensure there is enough space for the headers in the skb */
436 if (skb_cow(skb, hh_len + new_header_size))
437 goto tx_err;
438
439 skb->dev = out_dev;
440 skb->protocol = htons(ETH_P_MPLS_UC);
441
442 dec.ttl -= 1;
443 if (unlikely(!new_header_size && dec.bos)) {
444 /* Penultimate hop popping */
445 if (!mpls_egress(net, rt, skb, dec))
446 goto err;
447 } else {
448 bool bos;
449 int i;
450 skb_push(skb, new_header_size);
451 skb_reset_network_header(skb);
452 /* Push the new labels */
453 hdr = mpls_hdr(skb);
454 bos = dec.bos;
455 for (i = nh->nh_labels - 1; i >= 0; i--) {
456 hdr[i] = mpls_entry_encode(nh->nh_label[i],
457 dec.ttl, 0, bos);
458 bos = false;
459 }
460 }
461
462 mpls_stats_inc_outucastpkts(net, out_dev, skb);
463
464 /* If via wasn't specified then send out using device address */
465 if (nh->nh_via_table == MPLS_NEIGH_TABLE_UNSPEC)
466 err = neigh_xmit(NEIGH_LINK_TABLE, out_dev,
467 out_dev->dev_addr, skb);
468 else
469 err = neigh_xmit(nh->nh_via_table, out_dev,
470 mpls_nh_via(rt, nh), skb);
471 if (err)
472 net_dbg_ratelimited("%s: packet transmission failed: %d\n",
473 __func__, err);
474 return 0;
475
476tx_err:
477 out_mdev = out_dev ? mpls_dev_rcu(out_dev) : NULL;
478 if (out_mdev)
479 MPLS_INC_STATS(out_mdev, tx_errors);
480 goto drop;
481err:
482 MPLS_INC_STATS(mdev, rx_errors);
483drop:
484 kfree_skb(skb);
485 return NET_RX_DROP;
486}
487
488static struct packet_type mpls_packet_type __read_mostly = {
489 .type = cpu_to_be16(ETH_P_MPLS_UC),
490 .func = mpls_forward,
491};
492
493static const struct nla_policy rtm_mpls_policy[RTA_MAX+1] = {
494 [RTA_DST] = { .type = NLA_U32 },
495 [RTA_OIF] = { .type = NLA_U32 },
496 [RTA_TTL_PROPAGATE] = { .type = NLA_U8 },
497};
498
499struct mpls_route_config {
500 u32 rc_protocol;
501 u32 rc_ifindex;
502 u8 rc_via_table;
503 u8 rc_via_alen;
504 u8 rc_via[MAX_VIA_ALEN];
505 u32 rc_label;
506 u8 rc_ttl_propagate;
507 u8 rc_output_labels;
508 u32 rc_output_label[MAX_NEW_LABELS];
509 u32 rc_nlflags;
510 enum mpls_payload_type rc_payload_type;
511 struct nl_info rc_nlinfo;
512 struct rtnexthop *rc_mp;
513 int rc_mp_len;
514};
515
516/* all nexthops within a route have the same size based on max
517 * number of labels and max via length for a hop
518 */
519static struct mpls_route *mpls_rt_alloc(u8 num_nh, u8 max_alen, u8 max_labels)
520{
521 u8 nh_size = MPLS_NH_SIZE(max_labels, max_alen);
522 struct mpls_route *rt;
523 size_t size;
524
525 size = sizeof(*rt) + num_nh * nh_size;
526 if (size > MAX_MPLS_ROUTE_MEM)
527 return ERR_PTR(-EINVAL);
528
529 rt = kzalloc(size, GFP_KERNEL);
530 if (!rt)
531 return ERR_PTR(-ENOMEM);
532
533 rt->rt_nhn = num_nh;
534 rt->rt_nhn_alive = num_nh;
535 rt->rt_nh_size = nh_size;
536 rt->rt_via_offset = MPLS_NH_VIA_OFF(max_labels);
537
538 return rt;
539}
540
541static void mpls_rt_free_rcu(struct rcu_head *head)
542{
543 struct mpls_route *rt;
544
545 rt = container_of(head, struct mpls_route, rt_rcu);
546
547 change_nexthops(rt) {
548 netdev_put(nh->nh_dev, &nh->nh_dev_tracker);
549 } endfor_nexthops(rt);
550
551 kfree(rt);
552}
553
554static void mpls_rt_free(struct mpls_route *rt)
555{
556 if (rt)
557 call_rcu(&rt->rt_rcu, mpls_rt_free_rcu);
558}
559
560static void mpls_notify_route(struct net *net, unsigned index,
561 struct mpls_route *old, struct mpls_route *new,
562 const struct nl_info *info)
563{
564 struct nlmsghdr *nlh = info ? info->nlh : NULL;
565 unsigned portid = info ? info->portid : 0;
566 int event = new ? RTM_NEWROUTE : RTM_DELROUTE;
567 struct mpls_route *rt = new ? new : old;
568 unsigned nlm_flags = (old && new) ? NLM_F_REPLACE : 0;
569 /* Ignore reserved labels for now */
570 if (rt && (index >= MPLS_LABEL_FIRST_UNRESERVED))
571 rtmsg_lfib(event, index, rt, nlh, net, portid, nlm_flags);
572}
573
574static void mpls_route_update(struct net *net, unsigned index,
575 struct mpls_route *new,
576 const struct nl_info *info)
577{
578 struct mpls_route __rcu **platform_label;
579 struct mpls_route *rt;
580
581 platform_label = mpls_dereference(net, net->mpls.platform_label);
582 rt = mpls_dereference(net, platform_label[index]);
583 rcu_assign_pointer(platform_label[index], new);
584
585 mpls_notify_route(net, index, rt, new, info);
586
587 /* If we removed a route free it now */
588 mpls_rt_free(rt);
589}
590
591static unsigned int find_free_label(struct net *net)
592{
593 unsigned int index;
594
595 for (index = MPLS_LABEL_FIRST_UNRESERVED;
596 index < net->mpls.platform_labels;
597 index++) {
598 if (!mpls_route_input(net, index))
599 return index;
600 }
601
602 return LABEL_NOT_SPECIFIED;
603}
604
605#if IS_ENABLED(CONFIG_INET)
606static struct net_device *inet_fib_lookup_dev(struct net *net,
607 struct mpls_nh *nh,
608 const void *addr)
609{
610 struct net_device *dev;
611 struct rtable *rt;
612 struct in_addr daddr;
613
614 memcpy(&daddr, addr, sizeof(struct in_addr));
615 rt = ip_route_output(net, daddr.s_addr, 0, 0, 0, RT_SCOPE_UNIVERSE);
616 if (IS_ERR(rt))
617 return ERR_CAST(rt);
618
619 dev = rt->dst.dev;
620 netdev_hold(dev, &nh->nh_dev_tracker, GFP_KERNEL);
621 ip_rt_put(rt);
622
623 return dev;
624}
625#else
626static struct net_device *inet_fib_lookup_dev(struct net *net,
627 struct mpls_nh *nh,
628 const void *addr)
629{
630 return ERR_PTR(-EAFNOSUPPORT);
631}
632#endif
633
634#if IS_ENABLED(CONFIG_IPV6)
635static struct net_device *inet6_fib_lookup_dev(struct net *net,
636 struct mpls_nh *nh,
637 const void *addr)
638{
639 struct net_device *dev;
640 struct dst_entry *dst;
641 struct flowi6 fl6;
642
643 if (!ipv6_stub)
644 return ERR_PTR(-EAFNOSUPPORT);
645
646 memset(&fl6, 0, sizeof(fl6));
647 memcpy(&fl6.daddr, addr, sizeof(struct in6_addr));
648 dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL);
649 if (IS_ERR(dst))
650 return ERR_CAST(dst);
651
652 dev = dst->dev;
653 netdev_hold(dev, &nh->nh_dev_tracker, GFP_KERNEL);
654 dst_release(dst);
655
656 return dev;
657}
658#else
659static struct net_device *inet6_fib_lookup_dev(struct net *net,
660 struct mpls_nh *nh,
661 const void *addr)
662{
663 return ERR_PTR(-EAFNOSUPPORT);
664}
665#endif
666
667static struct net_device *find_outdev(struct net *net,
668 struct mpls_route *rt,
669 struct mpls_nh *nh, int oif)
670{
671 struct net_device *dev = NULL;
672
673 if (!oif) {
674 switch (nh->nh_via_table) {
675 case NEIGH_ARP_TABLE:
676 dev = inet_fib_lookup_dev(net, nh, mpls_nh_via(rt, nh));
677 break;
678 case NEIGH_ND_TABLE:
679 dev = inet6_fib_lookup_dev(net, nh, mpls_nh_via(rt, nh));
680 break;
681 case NEIGH_LINK_TABLE:
682 break;
683 }
684 } else {
685 dev = netdev_get_by_index(net, oif,
686 &nh->nh_dev_tracker, GFP_KERNEL);
687 }
688
689 if (!dev)
690 return ERR_PTR(-ENODEV);
691
692 if (IS_ERR(dev))
693 return dev;
694
695 nh->nh_dev = dev;
696
697 return dev;
698}
699
700static int mpls_nh_assign_dev(struct net *net, struct mpls_route *rt,
701 struct mpls_nh *nh, int oif)
702{
703 struct net_device *dev = NULL;
704 int err = -ENODEV;
705
706 dev = find_outdev(net, rt, nh, oif);
707 if (IS_ERR(dev)) {
708 err = PTR_ERR(dev);
709 goto errout;
710 }
711
712 /* Ensure this is a supported device */
713 err = -EINVAL;
714 if (!mpls_dev_get(net, dev))
715 goto errout_put;
716
717 if ((nh->nh_via_table == NEIGH_LINK_TABLE) &&
718 (dev->addr_len != nh->nh_via_alen))
719 goto errout_put;
720
721 if (!(dev->flags & IFF_UP)) {
722 nh->nh_flags |= RTNH_F_DEAD;
723 } else {
724 unsigned int flags;
725
726 flags = netif_get_flags(dev);
727 if (!(flags & (IFF_RUNNING | IFF_LOWER_UP)))
728 nh->nh_flags |= RTNH_F_LINKDOWN;
729 }
730
731 return 0;
732
733errout_put:
734 netdev_put(nh->nh_dev, &nh->nh_dev_tracker);
735 nh->nh_dev = NULL;
736errout:
737 return err;
738}
739
740static int nla_get_via(const struct nlattr *nla, u8 *via_alen, u8 *via_table,
741 u8 via_addr[], struct netlink_ext_ack *extack)
742{
743 struct rtvia *via = nla_data(nla);
744 int err = -EINVAL;
745 int alen;
746
747 if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr)) {
748 NL_SET_ERR_MSG_ATTR(extack, nla,
749 "Invalid attribute length for RTA_VIA");
750 goto errout;
751 }
752 alen = nla_len(nla) -
753 offsetof(struct rtvia, rtvia_addr);
754 if (alen > MAX_VIA_ALEN) {
755 NL_SET_ERR_MSG_ATTR(extack, nla,
756 "Invalid address length for RTA_VIA");
757 goto errout;
758 }
759
760 /* Validate the address family */
761 switch (via->rtvia_family) {
762 case AF_PACKET:
763 *via_table = NEIGH_LINK_TABLE;
764 break;
765 case AF_INET:
766 *via_table = NEIGH_ARP_TABLE;
767 if (alen != 4)
768 goto errout;
769 break;
770 case AF_INET6:
771 *via_table = NEIGH_ND_TABLE;
772 if (alen != 16)
773 goto errout;
774 break;
775 default:
776 /* Unsupported address family */
777 goto errout;
778 }
779
780 memcpy(via_addr, via->rtvia_addr, alen);
781 *via_alen = alen;
782 err = 0;
783
784errout:
785 return err;
786}
787
788static int mpls_nh_build_from_cfg(struct mpls_route_config *cfg,
789 struct mpls_route *rt)
790{
791 struct net *net = cfg->rc_nlinfo.nl_net;
792 struct mpls_nh *nh = rt->rt_nh;
793 int err;
794 int i;
795
796 if (!nh)
797 return -ENOMEM;
798
799 nh->nh_labels = cfg->rc_output_labels;
800 for (i = 0; i < nh->nh_labels; i++)
801 nh->nh_label[i] = cfg->rc_output_label[i];
802
803 nh->nh_via_table = cfg->rc_via_table;
804 memcpy(__mpls_nh_via(rt, nh), cfg->rc_via, cfg->rc_via_alen);
805 nh->nh_via_alen = cfg->rc_via_alen;
806
807 err = mpls_nh_assign_dev(net, rt, nh, cfg->rc_ifindex);
808 if (err)
809 goto errout;
810
811 if (nh->nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
812 rt->rt_nhn_alive--;
813
814 return 0;
815
816errout:
817 return err;
818}
819
820static int mpls_nh_build(struct net *net, struct mpls_route *rt,
821 struct mpls_nh *nh, int oif, struct nlattr *via,
822 struct nlattr *newdst, u8 max_labels,
823 struct netlink_ext_ack *extack)
824{
825 int err = -ENOMEM;
826
827 if (!nh)
828 goto errout;
829
830 if (newdst) {
831 err = nla_get_labels(newdst, max_labels, &nh->nh_labels,
832 nh->nh_label, extack);
833 if (err)
834 goto errout;
835 }
836
837 if (via) {
838 err = nla_get_via(via, &nh->nh_via_alen, &nh->nh_via_table,
839 __mpls_nh_via(rt, nh), extack);
840 if (err)
841 goto errout;
842 } else {
843 nh->nh_via_table = MPLS_NEIGH_TABLE_UNSPEC;
844 }
845
846 err = mpls_nh_assign_dev(net, rt, nh, oif);
847 if (err)
848 goto errout;
849
850 return 0;
851
852errout:
853 return err;
854}
855
856static u8 mpls_count_nexthops(struct rtnexthop *rtnh, int len,
857 u8 cfg_via_alen, u8 *max_via_alen,
858 u8 *max_labels)
859{
860 int remaining = len;
861 u8 nhs = 0;
862
863 *max_via_alen = 0;
864 *max_labels = 0;
865
866 while (rtnh_ok(rtnh, remaining)) {
867 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
868 int attrlen;
869 u8 n_labels = 0;
870
871 attrlen = rtnh_attrlen(rtnh);
872 nla = nla_find(attrs, attrlen, RTA_VIA);
873 if (nla && nla_len(nla) >=
874 offsetof(struct rtvia, rtvia_addr)) {
875 int via_alen = nla_len(nla) -
876 offsetof(struct rtvia, rtvia_addr);
877
878 if (via_alen <= MAX_VIA_ALEN)
879 *max_via_alen = max_t(u16, *max_via_alen,
880 via_alen);
881 }
882
883 nla = nla_find(attrs, attrlen, RTA_NEWDST);
884 if (nla &&
885 nla_get_labels(nla, MAX_NEW_LABELS, &n_labels,
886 NULL, NULL) != 0)
887 return 0;
888
889 *max_labels = max_t(u8, *max_labels, n_labels);
890
891 /* number of nexthops is tracked by a u8.
892 * Check for overflow.
893 */
894 if (nhs == 255)
895 return 0;
896 nhs++;
897
898 rtnh = rtnh_next(rtnh, &remaining);
899 }
900
901 /* leftover implies invalid nexthop configuration, discard it */
902 return remaining > 0 ? 0 : nhs;
903}
904
905static int mpls_nh_build_multi(struct mpls_route_config *cfg,
906 struct mpls_route *rt, u8 max_labels,
907 struct netlink_ext_ack *extack)
908{
909 struct rtnexthop *rtnh = cfg->rc_mp;
910 struct nlattr *nla_via, *nla_newdst;
911 int remaining = cfg->rc_mp_len;
912 int err = 0;
913
914 rt->rt_nhn = 0;
915
916 change_nexthops(rt) {
917 int attrlen;
918
919 nla_via = NULL;
920 nla_newdst = NULL;
921
922 err = -EINVAL;
923 if (!rtnh_ok(rtnh, remaining))
924 goto errout;
925
926 /* neither weighted multipath nor any flags
927 * are supported
928 */
929 if (rtnh->rtnh_hops || rtnh->rtnh_flags)
930 goto errout;
931
932 attrlen = rtnh_attrlen(rtnh);
933 if (attrlen > 0) {
934 struct nlattr *attrs = rtnh_attrs(rtnh);
935
936 nla_via = nla_find(attrs, attrlen, RTA_VIA);
937 nla_newdst = nla_find(attrs, attrlen, RTA_NEWDST);
938 }
939
940 err = mpls_nh_build(cfg->rc_nlinfo.nl_net, rt, nh,
941 rtnh->rtnh_ifindex, nla_via, nla_newdst,
942 max_labels, extack);
943 if (err)
944 goto errout;
945
946 if (nh->nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))
947 rt->rt_nhn_alive--;
948
949 rtnh = rtnh_next(rtnh, &remaining);
950 rt->rt_nhn++;
951 } endfor_nexthops(rt);
952
953 return 0;
954
955errout:
956 return err;
957}
958
959static bool mpls_label_ok(struct net *net, unsigned int *index,
960 struct netlink_ext_ack *extack)
961{
962 /* Reserved labels may not be set */
963 if (*index < MPLS_LABEL_FIRST_UNRESERVED) {
964 NL_SET_ERR_MSG(extack,
965 "Invalid label - must be MPLS_LABEL_FIRST_UNRESERVED or higher");
966 return false;
967 }
968
969 /* The full 20 bit range may not be supported. */
970 if (*index >= net->mpls.platform_labels) {
971 NL_SET_ERR_MSG(extack,
972 "Label >= configured maximum in platform_labels");
973 return false;
974 }
975
976 *index = array_index_nospec(*index, net->mpls.platform_labels);
977
978 return true;
979}
980
981static int mpls_route_add(struct mpls_route_config *cfg,
982 struct netlink_ext_ack *extack)
983{
984 struct net *net = cfg->rc_nlinfo.nl_net;
985 struct mpls_route *rt, *old;
986 int err = -EINVAL;
987 u8 max_via_alen;
988 unsigned index;
989 u8 max_labels;
990 u8 nhs;
991
992 index = cfg->rc_label;
993
994 /* If a label was not specified during insert pick one */
995 if ((index == LABEL_NOT_SPECIFIED) &&
996 (cfg->rc_nlflags & NLM_F_CREATE)) {
997 index = find_free_label(net);
998 }
999
1000 if (!mpls_label_ok(net, &index, extack))
1001 goto errout;
1002
1003 /* Append makes no sense with mpls */
1004 err = -EOPNOTSUPP;
1005 if (cfg->rc_nlflags & NLM_F_APPEND) {
1006 NL_SET_ERR_MSG(extack, "MPLS does not support route append");
1007 goto errout;
1008 }
1009
1010 err = -EEXIST;
1011 old = mpls_route_input(net, index);
1012 if ((cfg->rc_nlflags & NLM_F_EXCL) && old)
1013 goto errout;
1014
1015 err = -EEXIST;
1016 if (!(cfg->rc_nlflags & NLM_F_REPLACE) && old)
1017 goto errout;
1018
1019 err = -ENOENT;
1020 if (!(cfg->rc_nlflags & NLM_F_CREATE) && !old)
1021 goto errout;
1022
1023 err = -EINVAL;
1024 if (cfg->rc_mp) {
1025 nhs = mpls_count_nexthops(cfg->rc_mp, cfg->rc_mp_len,
1026 cfg->rc_via_alen, &max_via_alen,
1027 &max_labels);
1028 } else {
1029 max_via_alen = cfg->rc_via_alen;
1030 max_labels = cfg->rc_output_labels;
1031 nhs = 1;
1032 }
1033
1034 if (nhs == 0) {
1035 NL_SET_ERR_MSG(extack, "Route does not contain a nexthop");
1036 goto errout;
1037 }
1038
1039 rt = mpls_rt_alloc(nhs, max_via_alen, max_labels);
1040 if (IS_ERR(rt)) {
1041 err = PTR_ERR(rt);
1042 goto errout;
1043 }
1044
1045 rt->rt_protocol = cfg->rc_protocol;
1046 rt->rt_payload_type = cfg->rc_payload_type;
1047 rt->rt_ttl_propagate = cfg->rc_ttl_propagate;
1048
1049 if (cfg->rc_mp)
1050 err = mpls_nh_build_multi(cfg, rt, max_labels, extack);
1051 else
1052 err = mpls_nh_build_from_cfg(cfg, rt);
1053 if (err)
1054 goto freert;
1055
1056 mpls_route_update(net, index, rt, &cfg->rc_nlinfo);
1057
1058 return 0;
1059
1060freert:
1061 mpls_rt_free(rt);
1062errout:
1063 return err;
1064}
1065
1066static int mpls_route_del(struct mpls_route_config *cfg,
1067 struct netlink_ext_ack *extack)
1068{
1069 struct net *net = cfg->rc_nlinfo.nl_net;
1070 unsigned index;
1071 int err = -EINVAL;
1072
1073 index = cfg->rc_label;
1074
1075 if (!mpls_label_ok(net, &index, extack))
1076 goto errout;
1077
1078 mpls_route_update(net, index, NULL, &cfg->rc_nlinfo);
1079
1080 err = 0;
1081errout:
1082 return err;
1083}
1084
1085static void mpls_get_stats(struct mpls_dev *mdev,
1086 struct mpls_link_stats *stats)
1087{
1088 struct mpls_pcpu_stats *p;
1089 int i;
1090
1091 memset(stats, 0, sizeof(*stats));
1092
1093 for_each_possible_cpu(i) {
1094 struct mpls_link_stats local;
1095 unsigned int start;
1096
1097 p = per_cpu_ptr(mdev->stats, i);
1098 do {
1099 start = u64_stats_fetch_begin(&p->syncp);
1100 local = p->stats;
1101 } while (u64_stats_fetch_retry(&p->syncp, start));
1102
1103 stats->rx_packets += local.rx_packets;
1104 stats->rx_bytes += local.rx_bytes;
1105 stats->tx_packets += local.tx_packets;
1106 stats->tx_bytes += local.tx_bytes;
1107 stats->rx_errors += local.rx_errors;
1108 stats->tx_errors += local.tx_errors;
1109 stats->rx_dropped += local.rx_dropped;
1110 stats->tx_dropped += local.tx_dropped;
1111 stats->rx_noroute += local.rx_noroute;
1112 }
1113}
1114
1115static int mpls_fill_stats_af(struct sk_buff *skb,
1116 const struct net_device *dev)
1117{
1118 struct mpls_link_stats *stats;
1119 struct mpls_dev *mdev;
1120 struct nlattr *nla;
1121
1122 mdev = mpls_dev_rcu(dev);
1123 if (!mdev)
1124 return -ENODATA;
1125
1126 nla = nla_reserve_64bit(skb, MPLS_STATS_LINK,
1127 sizeof(struct mpls_link_stats),
1128 MPLS_STATS_UNSPEC);
1129 if (!nla)
1130 return -EMSGSIZE;
1131
1132 stats = nla_data(nla);
1133 mpls_get_stats(mdev, stats);
1134
1135 return 0;
1136}
1137
1138static size_t mpls_get_stats_af_size(const struct net_device *dev)
1139{
1140 struct mpls_dev *mdev;
1141
1142 mdev = mpls_dev_rcu(dev);
1143 if (!mdev)
1144 return 0;
1145
1146 return nla_total_size_64bit(sizeof(struct mpls_link_stats));
1147}
1148
1149static int mpls_netconf_fill_devconf(struct sk_buff *skb, struct mpls_dev *mdev,
1150 u32 portid, u32 seq, int event,
1151 unsigned int flags, int type)
1152{
1153 struct nlmsghdr *nlh;
1154 struct netconfmsg *ncm;
1155 bool all = false;
1156
1157 nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg),
1158 flags);
1159 if (!nlh)
1160 return -EMSGSIZE;
1161
1162 if (type == NETCONFA_ALL)
1163 all = true;
1164
1165 ncm = nlmsg_data(nlh);
1166 ncm->ncm_family = AF_MPLS;
1167
1168 if (nla_put_s32(skb, NETCONFA_IFINDEX, mdev->dev->ifindex) < 0)
1169 goto nla_put_failure;
1170
1171 if ((all || type == NETCONFA_INPUT) &&
1172 nla_put_s32(skb, NETCONFA_INPUT,
1173 READ_ONCE(mdev->input_enabled)) < 0)
1174 goto nla_put_failure;
1175
1176 nlmsg_end(skb, nlh);
1177 return 0;
1178
1179nla_put_failure:
1180 nlmsg_cancel(skb, nlh);
1181 return -EMSGSIZE;
1182}
1183
1184static int mpls_netconf_msgsize_devconf(int type)
1185{
1186 int size = NLMSG_ALIGN(sizeof(struct netconfmsg))
1187 + nla_total_size(4); /* NETCONFA_IFINDEX */
1188 bool all = false;
1189
1190 if (type == NETCONFA_ALL)
1191 all = true;
1192
1193 if (all || type == NETCONFA_INPUT)
1194 size += nla_total_size(4);
1195
1196 return size;
1197}
1198
1199static void mpls_netconf_notify_devconf(struct net *net, int event,
1200 int type, struct mpls_dev *mdev)
1201{
1202 struct sk_buff *skb;
1203 int err = -ENOBUFS;
1204
1205 skb = nlmsg_new(mpls_netconf_msgsize_devconf(type), GFP_KERNEL);
1206 if (!skb)
1207 goto errout;
1208
1209 err = mpls_netconf_fill_devconf(skb, mdev, 0, 0, event, 0, type);
1210 if (err < 0) {
1211 /* -EMSGSIZE implies BUG in mpls_netconf_msgsize_devconf() */
1212 WARN_ON(err == -EMSGSIZE);
1213 kfree_skb(skb);
1214 goto errout;
1215 }
1216
1217 rtnl_notify(skb, net, 0, RTNLGRP_MPLS_NETCONF, NULL, GFP_KERNEL);
1218 return;
1219errout:
1220 rtnl_set_sk_err(net, RTNLGRP_MPLS_NETCONF, err);
1221}
1222
1223static const struct nla_policy devconf_mpls_policy[NETCONFA_MAX + 1] = {
1224 [NETCONFA_IFINDEX] = { .len = sizeof(int) },
1225};
1226
1227static int mpls_netconf_valid_get_req(struct sk_buff *skb,
1228 const struct nlmsghdr *nlh,
1229 struct nlattr **tb,
1230 struct netlink_ext_ack *extack)
1231{
1232 int i, err;
1233
1234 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
1235 NL_SET_ERR_MSG_MOD(extack,
1236 "Invalid header for netconf get request");
1237 return -EINVAL;
1238 }
1239
1240 if (!netlink_strict_get_check(skb))
1241 return nlmsg_parse_deprecated(nlh, sizeof(struct netconfmsg),
1242 tb, NETCONFA_MAX,
1243 devconf_mpls_policy, extack);
1244
1245 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct netconfmsg),
1246 tb, NETCONFA_MAX,
1247 devconf_mpls_policy, extack);
1248 if (err)
1249 return err;
1250
1251 for (i = 0; i <= NETCONFA_MAX; i++) {
1252 if (!tb[i])
1253 continue;
1254
1255 switch (i) {
1256 case NETCONFA_IFINDEX:
1257 break;
1258 default:
1259 NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in netconf get request");
1260 return -EINVAL;
1261 }
1262 }
1263
1264 return 0;
1265}
1266
1267static int mpls_netconf_get_devconf(struct sk_buff *in_skb,
1268 struct nlmsghdr *nlh,
1269 struct netlink_ext_ack *extack)
1270{
1271 struct net *net = sock_net(in_skb->sk);
1272 struct nlattr *tb[NETCONFA_MAX + 1];
1273 struct net_device *dev;
1274 struct mpls_dev *mdev;
1275 struct sk_buff *skb;
1276 int ifindex;
1277 int err;
1278
1279 err = mpls_netconf_valid_get_req(in_skb, nlh, tb, extack);
1280 if (err < 0)
1281 goto errout;
1282
1283 if (!tb[NETCONFA_IFINDEX]) {
1284 err = -EINVAL;
1285 goto errout;
1286 }
1287
1288 ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
1289
1290 skb = nlmsg_new(mpls_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
1291 if (!skb) {
1292 err = -ENOBUFS;
1293 goto errout;
1294 }
1295
1296 rcu_read_lock();
1297
1298 dev = dev_get_by_index_rcu(net, ifindex);
1299 if (!dev) {
1300 err = -EINVAL;
1301 goto errout_unlock;
1302 }
1303
1304 mdev = mpls_dev_rcu(dev);
1305 if (!mdev) {
1306 err = -EINVAL;
1307 goto errout_unlock;
1308 }
1309
1310 err = mpls_netconf_fill_devconf(skb, mdev,
1311 NETLINK_CB(in_skb).portid,
1312 nlh->nlmsg_seq, RTM_NEWNETCONF, 0,
1313 NETCONFA_ALL);
1314 if (err < 0) {
1315 /* -EMSGSIZE implies BUG in mpls_netconf_msgsize_devconf() */
1316 WARN_ON(err == -EMSGSIZE);
1317 goto errout_unlock;
1318 }
1319
1320 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1321
1322 rcu_read_unlock();
1323errout:
1324 return err;
1325
1326errout_unlock:
1327 rcu_read_unlock();
1328 kfree_skb(skb);
1329 goto errout;
1330}
1331
1332static int mpls_netconf_dump_devconf(struct sk_buff *skb,
1333 struct netlink_callback *cb)
1334{
1335 const struct nlmsghdr *nlh = cb->nlh;
1336 struct net *net = sock_net(skb->sk);
1337 struct {
1338 unsigned long ifindex;
1339 } *ctx = (void *)cb->ctx;
1340 struct net_device *dev;
1341 struct mpls_dev *mdev;
1342 int err = 0;
1343
1344 if (cb->strict_check) {
1345 struct netlink_ext_ack *extack = cb->extack;
1346 struct netconfmsg *ncm;
1347
1348 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ncm))) {
1349 NL_SET_ERR_MSG_MOD(extack, "Invalid header for netconf dump request");
1350 return -EINVAL;
1351 }
1352
1353 if (nlmsg_attrlen(nlh, sizeof(*ncm))) {
1354 NL_SET_ERR_MSG_MOD(extack, "Invalid data after header in netconf dump request");
1355 return -EINVAL;
1356 }
1357 }
1358
1359 rcu_read_lock();
1360 for_each_netdev_dump(net, dev, ctx->ifindex) {
1361 mdev = mpls_dev_rcu(dev);
1362 if (!mdev)
1363 continue;
1364 err = mpls_netconf_fill_devconf(skb, mdev,
1365 NETLINK_CB(cb->skb).portid,
1366 nlh->nlmsg_seq,
1367 RTM_NEWNETCONF,
1368 NLM_F_MULTI,
1369 NETCONFA_ALL);
1370 if (err < 0)
1371 break;
1372 }
1373 rcu_read_unlock();
1374
1375 return err;
1376}
1377
1378#define MPLS_PERDEV_SYSCTL_OFFSET(field) \
1379 (&((struct mpls_dev *)0)->field)
1380
1381static int mpls_conf_proc(const struct ctl_table *ctl, int write,
1382 void *buffer, size_t *lenp, loff_t *ppos)
1383{
1384 int oval = *(int *)ctl->data;
1385 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1386
1387 if (write) {
1388 struct mpls_dev *mdev = ctl->extra1;
1389 int i = (int *)ctl->data - (int *)mdev;
1390 struct net *net = ctl->extra2;
1391 int val = *(int *)ctl->data;
1392
1393 if (i == offsetof(struct mpls_dev, input_enabled) &&
1394 val != oval) {
1395 mpls_netconf_notify_devconf(net, RTM_NEWNETCONF,
1396 NETCONFA_INPUT, mdev);
1397 }
1398 }
1399
1400 return ret;
1401}
1402
1403static const struct ctl_table mpls_dev_table[] = {
1404 {
1405 .procname = "input",
1406 .maxlen = sizeof(int),
1407 .mode = 0644,
1408 .proc_handler = mpls_conf_proc,
1409 .data = MPLS_PERDEV_SYSCTL_OFFSET(input_enabled),
1410 },
1411};
1412
1413static int mpls_dev_sysctl_register(struct net_device *dev,
1414 struct mpls_dev *mdev)
1415{
1416 char path[sizeof("net/mpls/conf/") + IFNAMSIZ];
1417 size_t table_size = ARRAY_SIZE(mpls_dev_table);
1418 struct net *net = dev_net(dev);
1419 struct ctl_table *table;
1420 int i;
1421
1422 table = kmemdup(&mpls_dev_table, sizeof(mpls_dev_table), GFP_KERNEL);
1423 if (!table)
1424 goto out;
1425
1426 /* Table data contains only offsets relative to the base of
1427 * the mdev at this point, so make them absolute.
1428 */
1429 for (i = 0; i < table_size; i++) {
1430 table[i].data = (char *)mdev + (uintptr_t)table[i].data;
1431 table[i].extra1 = mdev;
1432 table[i].extra2 = net;
1433 }
1434
1435 snprintf(path, sizeof(path), "net/mpls/conf/%s", dev->name);
1436
1437 mdev->sysctl = register_net_sysctl_sz(net, path, table, table_size);
1438 if (!mdev->sysctl)
1439 goto free;
1440
1441 mpls_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_ALL, mdev);
1442 return 0;
1443
1444free:
1445 kfree(table);
1446out:
1447 mdev->sysctl = NULL;
1448 return -ENOBUFS;
1449}
1450
1451static void mpls_dev_sysctl_unregister(struct net_device *dev,
1452 struct mpls_dev *mdev)
1453{
1454 struct net *net = dev_net(dev);
1455 const struct ctl_table *table;
1456
1457 if (!mdev->sysctl)
1458 return;
1459
1460 table = mdev->sysctl->ctl_table_arg;
1461 unregister_net_sysctl_table(mdev->sysctl);
1462 kfree(table);
1463
1464 mpls_netconf_notify_devconf(net, RTM_DELNETCONF, 0, mdev);
1465}
1466
1467static struct mpls_dev *mpls_add_dev(struct net_device *dev)
1468{
1469 struct mpls_dev *mdev;
1470 int err = -ENOMEM;
1471 int i;
1472
1473 mdev = kzalloc(sizeof(*mdev), GFP_KERNEL);
1474 if (!mdev)
1475 return ERR_PTR(err);
1476
1477 mdev->stats = alloc_percpu(struct mpls_pcpu_stats);
1478 if (!mdev->stats)
1479 goto free;
1480
1481 for_each_possible_cpu(i) {
1482 struct mpls_pcpu_stats *mpls_stats;
1483
1484 mpls_stats = per_cpu_ptr(mdev->stats, i);
1485 u64_stats_init(&mpls_stats->syncp);
1486 }
1487
1488 mdev->dev = dev;
1489
1490 err = mpls_dev_sysctl_register(dev, mdev);
1491 if (err)
1492 goto free;
1493
1494 rcu_assign_pointer(dev->mpls_ptr, mdev);
1495
1496 return mdev;
1497
1498free:
1499 free_percpu(mdev->stats);
1500 kfree(mdev);
1501 return ERR_PTR(err);
1502}
1503
1504static void mpls_dev_destroy_rcu(struct rcu_head *head)
1505{
1506 struct mpls_dev *mdev = container_of(head, struct mpls_dev, rcu);
1507
1508 free_percpu(mdev->stats);
1509 kfree(mdev);
1510}
1511
1512static int mpls_ifdown(struct net_device *dev, int event)
1513{
1514 struct net *net = dev_net(dev);
1515 unsigned int index;
1516
1517 for (index = 0; index < net->mpls.platform_labels; index++) {
1518 struct mpls_route *rt;
1519 bool nh_del = false;
1520 u8 alive = 0;
1521
1522 rt = mpls_route_input(net, index);
1523 if (!rt)
1524 continue;
1525
1526 if (event == NETDEV_UNREGISTER) {
1527 u8 deleted = 0;
1528
1529 for_nexthops(rt) {
1530 if (!nh->nh_dev || nh->nh_dev == dev)
1531 deleted++;
1532 if (nh->nh_dev == dev)
1533 nh_del = true;
1534 } endfor_nexthops(rt);
1535
1536 /* if there are no more nexthops, delete the route */
1537 if (deleted == rt->rt_nhn) {
1538 mpls_route_update(net, index, NULL, NULL);
1539 continue;
1540 }
1541
1542 if (nh_del) {
1543 size_t size = sizeof(*rt) + rt->rt_nhn *
1544 rt->rt_nh_size;
1545 struct mpls_route *orig = rt;
1546
1547 rt = kmemdup(orig, size, GFP_KERNEL);
1548 if (!rt)
1549 return -ENOMEM;
1550 }
1551 }
1552
1553 change_nexthops(rt) {
1554 unsigned int nh_flags = nh->nh_flags;
1555
1556 if (nh->nh_dev != dev) {
1557 if (nh_del)
1558 netdev_hold(nh->nh_dev, &nh->nh_dev_tracker,
1559 GFP_KERNEL);
1560 goto next;
1561 }
1562
1563 switch (event) {
1564 case NETDEV_DOWN:
1565 case NETDEV_UNREGISTER:
1566 nh_flags |= RTNH_F_DEAD;
1567 fallthrough;
1568 case NETDEV_CHANGE:
1569 nh_flags |= RTNH_F_LINKDOWN;
1570 break;
1571 }
1572 if (event == NETDEV_UNREGISTER)
1573 nh->nh_dev = NULL;
1574
1575 if (nh->nh_flags != nh_flags)
1576 WRITE_ONCE(nh->nh_flags, nh_flags);
1577next:
1578 if (!(nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)))
1579 alive++;
1580 } endfor_nexthops(rt);
1581
1582 WRITE_ONCE(rt->rt_nhn_alive, alive);
1583
1584 if (nh_del)
1585 mpls_route_update(net, index, rt, NULL);
1586 }
1587
1588 return 0;
1589}
1590
1591static void mpls_ifup(struct net_device *dev, unsigned int flags)
1592{
1593 struct net *net = dev_net(dev);
1594 unsigned int index;
1595 u8 alive;
1596
1597 for (index = 0; index < net->mpls.platform_labels; index++) {
1598 struct mpls_route *rt;
1599
1600 rt = mpls_route_input(net, index);
1601 if (!rt)
1602 continue;
1603
1604 alive = 0;
1605 change_nexthops(rt) {
1606 unsigned int nh_flags = nh->nh_flags;
1607
1608 if (!(nh_flags & flags)) {
1609 alive++;
1610 continue;
1611 }
1612 if (nh->nh_dev != dev)
1613 continue;
1614 alive++;
1615 nh_flags &= ~flags;
1616 WRITE_ONCE(nh->nh_flags, nh_flags);
1617 } endfor_nexthops(rt);
1618
1619 WRITE_ONCE(rt->rt_nhn_alive, alive);
1620 }
1621}
1622
1623static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
1624 void *ptr)
1625{
1626 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1627 struct net *net = dev_net(dev);
1628 struct mpls_dev *mdev;
1629 unsigned int flags;
1630 int err;
1631
1632 mutex_lock(&net->mpls.platform_mutex);
1633
1634 if (event == NETDEV_REGISTER) {
1635 mdev = mpls_add_dev(dev);
1636 if (IS_ERR(mdev)) {
1637 err = PTR_ERR(mdev);
1638 goto err;
1639 }
1640
1641 goto out;
1642 }
1643
1644 mdev = mpls_dev_get(net, dev);
1645 if (!mdev)
1646 goto out;
1647
1648 switch (event) {
1649
1650 case NETDEV_DOWN:
1651 err = mpls_ifdown(dev, event);
1652 if (err)
1653 goto err;
1654 break;
1655 case NETDEV_UP:
1656 flags = netif_get_flags(dev);
1657 if (flags & (IFF_RUNNING | IFF_LOWER_UP))
1658 mpls_ifup(dev, RTNH_F_DEAD | RTNH_F_LINKDOWN);
1659 else
1660 mpls_ifup(dev, RTNH_F_DEAD);
1661 break;
1662 case NETDEV_CHANGE:
1663 flags = netif_get_flags(dev);
1664 if (flags & (IFF_RUNNING | IFF_LOWER_UP)) {
1665 mpls_ifup(dev, RTNH_F_DEAD | RTNH_F_LINKDOWN);
1666 } else {
1667 err = mpls_ifdown(dev, event);
1668 if (err)
1669 goto err;
1670 }
1671 break;
1672 case NETDEV_UNREGISTER:
1673 err = mpls_ifdown(dev, event);
1674 if (err)
1675 goto err;
1676
1677 mdev = mpls_dev_get(net, dev);
1678 if (mdev) {
1679 mpls_dev_sysctl_unregister(dev, mdev);
1680 RCU_INIT_POINTER(dev->mpls_ptr, NULL);
1681 call_rcu(&mdev->rcu, mpls_dev_destroy_rcu);
1682 }
1683 break;
1684 case NETDEV_CHANGENAME:
1685 mdev = mpls_dev_get(net, dev);
1686 if (mdev) {
1687 mpls_dev_sysctl_unregister(dev, mdev);
1688 err = mpls_dev_sysctl_register(dev, mdev);
1689 if (err)
1690 goto err;
1691 }
1692 break;
1693 }
1694
1695out:
1696 mutex_unlock(&net->mpls.platform_mutex);
1697 return NOTIFY_OK;
1698
1699err:
1700 mutex_unlock(&net->mpls.platform_mutex);
1701 return notifier_from_errno(err);
1702}
1703
1704static struct notifier_block mpls_dev_notifier = {
1705 .notifier_call = mpls_dev_notify,
1706};
1707
1708static int nla_put_via(struct sk_buff *skb,
1709 u8 table, const void *addr, int alen)
1710{
1711 static const int table_to_family[NEIGH_NR_TABLES + 1] = {
1712 AF_INET, AF_INET6, AF_PACKET,
1713 };
1714 struct nlattr *nla;
1715 struct rtvia *via;
1716 int family = AF_UNSPEC;
1717
1718 nla = nla_reserve(skb, RTA_VIA, alen + 2);
1719 if (!nla)
1720 return -EMSGSIZE;
1721
1722 if (table <= NEIGH_NR_TABLES)
1723 family = table_to_family[table];
1724
1725 via = nla_data(nla);
1726 via->rtvia_family = family;
1727 memcpy(via->rtvia_addr, addr, alen);
1728 return 0;
1729}
1730
1731int nla_put_labels(struct sk_buff *skb, int attrtype,
1732 u8 labels, const u32 label[])
1733{
1734 struct nlattr *nla;
1735 struct mpls_shim_hdr *nla_label;
1736 bool bos;
1737 int i;
1738 nla = nla_reserve(skb, attrtype, labels*4);
1739 if (!nla)
1740 return -EMSGSIZE;
1741
1742 nla_label = nla_data(nla);
1743 bos = true;
1744 for (i = labels - 1; i >= 0; i--) {
1745 nla_label[i] = mpls_entry_encode(label[i], 0, 0, bos);
1746 bos = false;
1747 }
1748
1749 return 0;
1750}
1751EXPORT_SYMBOL_GPL(nla_put_labels);
1752
1753int nla_get_labels(const struct nlattr *nla, u8 max_labels, u8 *labels,
1754 u32 label[], struct netlink_ext_ack *extack)
1755{
1756 unsigned len = nla_len(nla);
1757 struct mpls_shim_hdr *nla_label;
1758 u8 nla_labels;
1759 bool bos;
1760 int i;
1761
1762 /* len needs to be an even multiple of 4 (the label size). Number
1763 * of labels is a u8 so check for overflow.
1764 */
1765 if (len & 3 || len / 4 > 255) {
1766 NL_SET_ERR_MSG_ATTR(extack, nla,
1767 "Invalid length for labels attribute");
1768 return -EINVAL;
1769 }
1770
1771 /* Limit the number of new labels allowed */
1772 nla_labels = len/4;
1773 if (nla_labels > max_labels) {
1774 NL_SET_ERR_MSG(extack, "Too many labels");
1775 return -EINVAL;
1776 }
1777
1778 /* when label == NULL, caller wants number of labels */
1779 if (!label)
1780 goto out;
1781
1782 nla_label = nla_data(nla);
1783 bos = true;
1784 for (i = nla_labels - 1; i >= 0; i--, bos = false) {
1785 struct mpls_entry_decoded dec;
1786 dec = mpls_entry_decode(nla_label + i);
1787
1788 /* Ensure the bottom of stack flag is properly set
1789 * and ttl and tc are both clear.
1790 */
1791 if (dec.ttl) {
1792 NL_SET_ERR_MSG_ATTR(extack, nla,
1793 "TTL in label must be 0");
1794 return -EINVAL;
1795 }
1796
1797 if (dec.tc) {
1798 NL_SET_ERR_MSG_ATTR(extack, nla,
1799 "Traffic class in label must be 0");
1800 return -EINVAL;
1801 }
1802
1803 if (dec.bos != bos) {
1804 NL_SET_BAD_ATTR(extack, nla);
1805 if (bos) {
1806 NL_SET_ERR_MSG(extack,
1807 "BOS bit must be set in first label");
1808 } else {
1809 NL_SET_ERR_MSG(extack,
1810 "BOS bit can only be set in first label");
1811 }
1812 return -EINVAL;
1813 }
1814
1815 switch (dec.label) {
1816 case MPLS_LABEL_IMPLNULL:
1817 /* RFC3032: This is a label that an LSR may
1818 * assign and distribute, but which never
1819 * actually appears in the encapsulation.
1820 */
1821 NL_SET_ERR_MSG_ATTR(extack, nla,
1822 "Implicit NULL Label (3) can not be used in encapsulation");
1823 return -EINVAL;
1824 }
1825
1826 label[i] = dec.label;
1827 }
1828out:
1829 *labels = nla_labels;
1830 return 0;
1831}
1832EXPORT_SYMBOL_GPL(nla_get_labels);
1833
1834static int rtm_to_route_config(struct sk_buff *skb,
1835 struct nlmsghdr *nlh,
1836 struct mpls_route_config *cfg,
1837 struct netlink_ext_ack *extack)
1838{
1839 struct rtmsg *rtm;
1840 struct nlattr *tb[RTA_MAX+1];
1841 int index;
1842 int err;
1843
1844 err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
1845 rtm_mpls_policy, extack);
1846 if (err < 0)
1847 goto errout;
1848
1849 err = -EINVAL;
1850 rtm = nlmsg_data(nlh);
1851
1852 if (rtm->rtm_family != AF_MPLS) {
1853 NL_SET_ERR_MSG(extack, "Invalid address family in rtmsg");
1854 goto errout;
1855 }
1856 if (rtm->rtm_dst_len != 20) {
1857 NL_SET_ERR_MSG(extack, "rtm_dst_len must be 20 for MPLS");
1858 goto errout;
1859 }
1860 if (rtm->rtm_src_len != 0) {
1861 NL_SET_ERR_MSG(extack, "rtm_src_len must be 0 for MPLS");
1862 goto errout;
1863 }
1864 if (rtm->rtm_tos != 0) {
1865 NL_SET_ERR_MSG(extack, "rtm_tos must be 0 for MPLS");
1866 goto errout;
1867 }
1868 if (rtm->rtm_table != RT_TABLE_MAIN) {
1869 NL_SET_ERR_MSG(extack,
1870 "MPLS only supports the main route table");
1871 goto errout;
1872 }
1873 /* Any value is acceptable for rtm_protocol */
1874
1875 /* As mpls uses destination specific addresses
1876 * (or source specific address in the case of multicast)
1877 * all addresses have universal scope.
1878 */
1879 if (rtm->rtm_scope != RT_SCOPE_UNIVERSE) {
1880 NL_SET_ERR_MSG(extack,
1881 "Invalid route scope - MPLS only supports UNIVERSE");
1882 goto errout;
1883 }
1884 if (rtm->rtm_type != RTN_UNICAST) {
1885 NL_SET_ERR_MSG(extack,
1886 "Invalid route type - MPLS only supports UNICAST");
1887 goto errout;
1888 }
1889 if (rtm->rtm_flags != 0) {
1890 NL_SET_ERR_MSG(extack, "rtm_flags must be 0 for MPLS");
1891 goto errout;
1892 }
1893
1894 cfg->rc_label = LABEL_NOT_SPECIFIED;
1895 cfg->rc_protocol = rtm->rtm_protocol;
1896 cfg->rc_via_table = MPLS_NEIGH_TABLE_UNSPEC;
1897 cfg->rc_ttl_propagate = MPLS_TTL_PROP_DEFAULT;
1898 cfg->rc_nlflags = nlh->nlmsg_flags;
1899 cfg->rc_nlinfo.portid = NETLINK_CB(skb).portid;
1900 cfg->rc_nlinfo.nlh = nlh;
1901 cfg->rc_nlinfo.nl_net = sock_net(skb->sk);
1902
1903 for (index = 0; index <= RTA_MAX; index++) {
1904 struct nlattr *nla = tb[index];
1905 if (!nla)
1906 continue;
1907
1908 switch (index) {
1909 case RTA_OIF:
1910 cfg->rc_ifindex = nla_get_u32(nla);
1911 break;
1912 case RTA_NEWDST:
1913 if (nla_get_labels(nla, MAX_NEW_LABELS,
1914 &cfg->rc_output_labels,
1915 cfg->rc_output_label, extack))
1916 goto errout;
1917 break;
1918 case RTA_DST:
1919 {
1920 u8 label_count;
1921 if (nla_get_labels(nla, 1, &label_count,
1922 &cfg->rc_label, extack))
1923 goto errout;
1924
1925 if (!mpls_label_ok(cfg->rc_nlinfo.nl_net,
1926 &cfg->rc_label, extack))
1927 goto errout;
1928 break;
1929 }
1930 case RTA_GATEWAY:
1931 NL_SET_ERR_MSG(extack, "MPLS does not support RTA_GATEWAY attribute");
1932 goto errout;
1933 case RTA_VIA:
1934 {
1935 if (nla_get_via(nla, &cfg->rc_via_alen,
1936 &cfg->rc_via_table, cfg->rc_via,
1937 extack))
1938 goto errout;
1939 break;
1940 }
1941 case RTA_MULTIPATH:
1942 {
1943 cfg->rc_mp = nla_data(nla);
1944 cfg->rc_mp_len = nla_len(nla);
1945 break;
1946 }
1947 case RTA_TTL_PROPAGATE:
1948 {
1949 u8 ttl_propagate = nla_get_u8(nla);
1950
1951 if (ttl_propagate > 1) {
1952 NL_SET_ERR_MSG_ATTR(extack, nla,
1953 "RTA_TTL_PROPAGATE can only be 0 or 1");
1954 goto errout;
1955 }
1956 cfg->rc_ttl_propagate = ttl_propagate ?
1957 MPLS_TTL_PROP_ENABLED :
1958 MPLS_TTL_PROP_DISABLED;
1959 break;
1960 }
1961 default:
1962 NL_SET_ERR_MSG_ATTR(extack, nla, "Unknown attribute");
1963 /* Unsupported attribute */
1964 goto errout;
1965 }
1966 }
1967
1968 err = 0;
1969errout:
1970 return err;
1971}
1972
1973static int mpls_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
1974 struct netlink_ext_ack *extack)
1975{
1976 struct net *net = sock_net(skb->sk);
1977 struct mpls_route_config *cfg;
1978 int err;
1979
1980 cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
1981 if (!cfg)
1982 return -ENOMEM;
1983
1984 err = rtm_to_route_config(skb, nlh, cfg, extack);
1985 if (err < 0)
1986 goto out;
1987
1988 mutex_lock(&net->mpls.platform_mutex);
1989 err = mpls_route_del(cfg, extack);
1990 mutex_unlock(&net->mpls.platform_mutex);
1991out:
1992 kfree(cfg);
1993
1994 return err;
1995}
1996
1997
1998static int mpls_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
1999 struct netlink_ext_ack *extack)
2000{
2001 struct net *net = sock_net(skb->sk);
2002 struct mpls_route_config *cfg;
2003 int err;
2004
2005 cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
2006 if (!cfg)
2007 return -ENOMEM;
2008
2009 err = rtm_to_route_config(skb, nlh, cfg, extack);
2010 if (err < 0)
2011 goto out;
2012
2013 mutex_lock(&net->mpls.platform_mutex);
2014 err = mpls_route_add(cfg, extack);
2015 mutex_unlock(&net->mpls.platform_mutex);
2016out:
2017 kfree(cfg);
2018
2019 return err;
2020}
2021
2022static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event,
2023 u32 label, struct mpls_route *rt, int flags)
2024{
2025 struct net_device *dev;
2026 struct nlmsghdr *nlh;
2027 struct rtmsg *rtm;
2028
2029 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags);
2030 if (nlh == NULL)
2031 return -EMSGSIZE;
2032
2033 rtm = nlmsg_data(nlh);
2034 rtm->rtm_family = AF_MPLS;
2035 rtm->rtm_dst_len = 20;
2036 rtm->rtm_src_len = 0;
2037 rtm->rtm_tos = 0;
2038 rtm->rtm_table = RT_TABLE_MAIN;
2039 rtm->rtm_protocol = rt->rt_protocol;
2040 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2041 rtm->rtm_type = RTN_UNICAST;
2042 rtm->rtm_flags = 0;
2043
2044 if (nla_put_labels(skb, RTA_DST, 1, &label))
2045 goto nla_put_failure;
2046
2047 if (rt->rt_ttl_propagate != MPLS_TTL_PROP_DEFAULT) {
2048 bool ttl_propagate =
2049 rt->rt_ttl_propagate == MPLS_TTL_PROP_ENABLED;
2050
2051 if (nla_put_u8(skb, RTA_TTL_PROPAGATE,
2052 ttl_propagate))
2053 goto nla_put_failure;
2054 }
2055 if (rt->rt_nhn == 1) {
2056 const struct mpls_nh *nh = rt->rt_nh;
2057
2058 if (nh->nh_labels &&
2059 nla_put_labels(skb, RTA_NEWDST, nh->nh_labels,
2060 nh->nh_label))
2061 goto nla_put_failure;
2062 if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC &&
2063 nla_put_via(skb, nh->nh_via_table, mpls_nh_via(rt, nh),
2064 nh->nh_via_alen))
2065 goto nla_put_failure;
2066 dev = nh->nh_dev;
2067 if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex))
2068 goto nla_put_failure;
2069 if (nh->nh_flags & RTNH_F_LINKDOWN)
2070 rtm->rtm_flags |= RTNH_F_LINKDOWN;
2071 if (nh->nh_flags & RTNH_F_DEAD)
2072 rtm->rtm_flags |= RTNH_F_DEAD;
2073 } else {
2074 struct rtnexthop *rtnh;
2075 struct nlattr *mp;
2076 u8 linkdown = 0;
2077 u8 dead = 0;
2078
2079 mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
2080 if (!mp)
2081 goto nla_put_failure;
2082
2083 for_nexthops(rt) {
2084 dev = nh->nh_dev;
2085 if (!dev)
2086 continue;
2087
2088 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
2089 if (!rtnh)
2090 goto nla_put_failure;
2091
2092 rtnh->rtnh_ifindex = dev->ifindex;
2093 if (nh->nh_flags & RTNH_F_LINKDOWN) {
2094 rtnh->rtnh_flags |= RTNH_F_LINKDOWN;
2095 linkdown++;
2096 }
2097 if (nh->nh_flags & RTNH_F_DEAD) {
2098 rtnh->rtnh_flags |= RTNH_F_DEAD;
2099 dead++;
2100 }
2101
2102 if (nh->nh_labels && nla_put_labels(skb, RTA_NEWDST,
2103 nh->nh_labels,
2104 nh->nh_label))
2105 goto nla_put_failure;
2106 if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC &&
2107 nla_put_via(skb, nh->nh_via_table,
2108 mpls_nh_via(rt, nh),
2109 nh->nh_via_alen))
2110 goto nla_put_failure;
2111
2112 /* length of rtnetlink header + attributes */
2113 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
2114 } endfor_nexthops(rt);
2115
2116 if (linkdown == rt->rt_nhn)
2117 rtm->rtm_flags |= RTNH_F_LINKDOWN;
2118 if (dead == rt->rt_nhn)
2119 rtm->rtm_flags |= RTNH_F_DEAD;
2120
2121 nla_nest_end(skb, mp);
2122 }
2123
2124 nlmsg_end(skb, nlh);
2125 return 0;
2126
2127nla_put_failure:
2128 nlmsg_cancel(skb, nlh);
2129 return -EMSGSIZE;
2130}
2131
2132#if IS_ENABLED(CONFIG_INET)
2133static int mpls_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
2134 struct fib_dump_filter *filter,
2135 struct netlink_callback *cb)
2136{
2137 return ip_valid_fib_dump_req(net, nlh, filter, cb);
2138}
2139#else
2140static int mpls_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
2141 struct fib_dump_filter *filter,
2142 struct netlink_callback *cb)
2143{
2144 struct netlink_ext_ack *extack = cb->extack;
2145 struct nlattr *tb[RTA_MAX + 1];
2146 struct rtmsg *rtm;
2147 int err, i;
2148
2149 rtm = nlmsg_payload(nlh, sizeof(*rtm));
2150 if (!rtm) {
2151 NL_SET_ERR_MSG_MOD(extack, "Invalid header for FIB dump request");
2152 return -EINVAL;
2153 }
2154
2155 if (rtm->rtm_dst_len || rtm->rtm_src_len || rtm->rtm_tos ||
2156 rtm->rtm_table || rtm->rtm_scope || rtm->rtm_type ||
2157 rtm->rtm_flags) {
2158 NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for FIB dump request");
2159 return -EINVAL;
2160 }
2161
2162 if (rtm->rtm_protocol) {
2163 filter->protocol = rtm->rtm_protocol;
2164 filter->filter_set = 1;
2165 cb->answer_flags = NLM_F_DUMP_FILTERED;
2166 }
2167
2168 err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
2169 rtm_mpls_policy, extack);
2170 if (err < 0)
2171 return err;
2172
2173 for (i = 0; i <= RTA_MAX; ++i) {
2174 int ifindex;
2175
2176 if (i == RTA_OIF) {
2177 ifindex = nla_get_u32(tb[i]);
2178 filter->dev = dev_get_by_index_rcu(net, ifindex);
2179 if (!filter->dev)
2180 return -ENODEV;
2181 filter->filter_set = 1;
2182 } else if (tb[i]) {
2183 NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in dump request");
2184 return -EINVAL;
2185 }
2186 }
2187
2188 return 0;
2189}
2190#endif
2191
2192static bool mpls_rt_uses_dev(struct mpls_route *rt,
2193 const struct net_device *dev)
2194{
2195 if (rt->rt_nhn == 1) {
2196 struct mpls_nh *nh = rt->rt_nh;
2197
2198 if (nh->nh_dev == dev)
2199 return true;
2200 } else {
2201 for_nexthops(rt) {
2202 if (nh->nh_dev == dev)
2203 return true;
2204 } endfor_nexthops(rt);
2205 }
2206
2207 return false;
2208}
2209
2210static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb)
2211{
2212 const struct nlmsghdr *nlh = cb->nlh;
2213 struct net *net = sock_net(skb->sk);
2214 struct mpls_route __rcu **platform_label;
2215 struct fib_dump_filter filter = {
2216 .rtnl_held = false,
2217 };
2218 unsigned int flags = NLM_F_MULTI;
2219 size_t platform_labels;
2220 unsigned int index;
2221 int err;
2222
2223 rcu_read_lock();
2224
2225 if (cb->strict_check) {
2226 err = mpls_valid_fib_dump_req(net, nlh, &filter, cb);
2227 if (err < 0)
2228 goto err;
2229
2230 /* for MPLS, there is only 1 table with fixed type and flags.
2231 * If either are set in the filter then return nothing.
2232 */
2233 if ((filter.table_id && filter.table_id != RT_TABLE_MAIN) ||
2234 (filter.rt_type && filter.rt_type != RTN_UNICAST) ||
2235 filter.flags)
2236 goto unlock;
2237 }
2238
2239 index = cb->args[0];
2240 if (index < MPLS_LABEL_FIRST_UNRESERVED)
2241 index = MPLS_LABEL_FIRST_UNRESERVED;
2242
2243 platform_label = rcu_dereference(net->mpls.platform_label);
2244 platform_labels = net->mpls.platform_labels;
2245
2246 if (filter.filter_set)
2247 flags |= NLM_F_DUMP_FILTERED;
2248
2249 for (; index < platform_labels; index++) {
2250 struct mpls_route *rt;
2251
2252 rt = rcu_dereference(platform_label[index]);
2253 if (!rt)
2254 continue;
2255
2256 if ((filter.dev && !mpls_rt_uses_dev(rt, filter.dev)) ||
2257 (filter.protocol && rt->rt_protocol != filter.protocol))
2258 continue;
2259
2260 if (mpls_dump_route(skb, NETLINK_CB(cb->skb).portid,
2261 cb->nlh->nlmsg_seq, RTM_NEWROUTE,
2262 index, rt, flags) < 0)
2263 break;
2264 }
2265 cb->args[0] = index;
2266
2267unlock:
2268 rcu_read_unlock();
2269 return skb->len;
2270
2271err:
2272 rcu_read_unlock();
2273 return err;
2274}
2275
2276static inline size_t lfib_nlmsg_size(struct mpls_route *rt)
2277{
2278 size_t payload =
2279 NLMSG_ALIGN(sizeof(struct rtmsg))
2280 + nla_total_size(4) /* RTA_DST */
2281 + nla_total_size(1); /* RTA_TTL_PROPAGATE */
2282
2283 if (rt->rt_nhn == 1) {
2284 struct mpls_nh *nh = rt->rt_nh;
2285
2286 if (nh->nh_dev)
2287 payload += nla_total_size(4); /* RTA_OIF */
2288 if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC) /* RTA_VIA */
2289 payload += nla_total_size(2 + nh->nh_via_alen);
2290 if (nh->nh_labels) /* RTA_NEWDST */
2291 payload += nla_total_size(nh->nh_labels * 4);
2292 } else {
2293 /* each nexthop is packed in an attribute */
2294 size_t nhsize = 0;
2295
2296 for_nexthops(rt) {
2297 if (!nh->nh_dev)
2298 continue;
2299 nhsize += nla_total_size(sizeof(struct rtnexthop));
2300 /* RTA_VIA */
2301 if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC)
2302 nhsize += nla_total_size(2 + nh->nh_via_alen);
2303 if (nh->nh_labels)
2304 nhsize += nla_total_size(nh->nh_labels * 4);
2305 } endfor_nexthops(rt);
2306 /* nested attribute */
2307 payload += nla_total_size(nhsize);
2308 }
2309
2310 return payload;
2311}
2312
2313static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt,
2314 struct nlmsghdr *nlh, struct net *net, u32 portid,
2315 unsigned int nlm_flags)
2316{
2317 struct sk_buff *skb;
2318 u32 seq = nlh ? nlh->nlmsg_seq : 0;
2319 int err = -ENOBUFS;
2320
2321 skb = nlmsg_new(lfib_nlmsg_size(rt), GFP_KERNEL);
2322 if (skb == NULL)
2323 goto errout;
2324
2325 err = mpls_dump_route(skb, portid, seq, event, label, rt, nlm_flags);
2326 if (err < 0) {
2327 /* -EMSGSIZE implies BUG in lfib_nlmsg_size */
2328 WARN_ON(err == -EMSGSIZE);
2329 kfree_skb(skb);
2330 goto errout;
2331 }
2332 rtnl_notify(skb, net, portid, RTNLGRP_MPLS_ROUTE, nlh, GFP_KERNEL);
2333
2334 return;
2335errout:
2336 rtnl_set_sk_err(net, RTNLGRP_MPLS_ROUTE, err);
2337}
2338
2339static int mpls_valid_getroute_req(struct sk_buff *skb,
2340 const struct nlmsghdr *nlh,
2341 struct nlattr **tb,
2342 struct netlink_ext_ack *extack)
2343{
2344 struct rtmsg *rtm;
2345 int i, err;
2346
2347 rtm = nlmsg_payload(nlh, sizeof(*rtm));
2348 if (!rtm) {
2349 NL_SET_ERR_MSG_MOD(extack,
2350 "Invalid header for get route request");
2351 return -EINVAL;
2352 }
2353
2354 if (!netlink_strict_get_check(skb))
2355 return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
2356 rtm_mpls_policy, extack);
2357
2358 if ((rtm->rtm_dst_len && rtm->rtm_dst_len != 20) ||
2359 rtm->rtm_src_len || rtm->rtm_tos || rtm->rtm_table ||
2360 rtm->rtm_protocol || rtm->rtm_scope || rtm->rtm_type) {
2361 NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request");
2362 return -EINVAL;
2363 }
2364 if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) {
2365 NL_SET_ERR_MSG_MOD(extack,
2366 "Invalid flags for get route request");
2367 return -EINVAL;
2368 }
2369
2370 err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
2371 rtm_mpls_policy, extack);
2372 if (err)
2373 return err;
2374
2375 if ((tb[RTA_DST] || tb[RTA_NEWDST]) && !rtm->rtm_dst_len) {
2376 NL_SET_ERR_MSG_MOD(extack, "rtm_dst_len must be 20 for MPLS");
2377 return -EINVAL;
2378 }
2379
2380 for (i = 0; i <= RTA_MAX; i++) {
2381 if (!tb[i])
2382 continue;
2383
2384 switch (i) {
2385 case RTA_DST:
2386 case RTA_NEWDST:
2387 break;
2388 default:
2389 NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request");
2390 return -EINVAL;
2391 }
2392 }
2393
2394 return 0;
2395}
2396
2397static int mpls_getroute(struct sk_buff *in_skb, struct nlmsghdr *in_nlh,
2398 struct netlink_ext_ack *extack)
2399{
2400 struct net *net = sock_net(in_skb->sk);
2401 u32 portid = NETLINK_CB(in_skb).portid;
2402 u32 in_label = LABEL_NOT_SPECIFIED;
2403 struct nlattr *tb[RTA_MAX + 1];
2404 struct mpls_route *rt = NULL;
2405 u32 labels[MAX_NEW_LABELS];
2406 struct mpls_shim_hdr *hdr;
2407 unsigned int hdr_size = 0;
2408 const struct mpls_nh *nh;
2409 struct net_device *dev;
2410 struct rtmsg *rtm, *r;
2411 struct nlmsghdr *nlh;
2412 struct sk_buff *skb;
2413 u8 n_labels;
2414 int err;
2415
2416 mutex_lock(&net->mpls.platform_mutex);
2417
2418 err = mpls_valid_getroute_req(in_skb, in_nlh, tb, extack);
2419 if (err < 0)
2420 goto errout;
2421
2422 rtm = nlmsg_data(in_nlh);
2423
2424 if (tb[RTA_DST]) {
2425 u8 label_count;
2426
2427 if (nla_get_labels(tb[RTA_DST], 1, &label_count,
2428 &in_label, extack)) {
2429 err = -EINVAL;
2430 goto errout;
2431 }
2432
2433 if (!mpls_label_ok(net, &in_label, extack)) {
2434 err = -EINVAL;
2435 goto errout;
2436 }
2437 }
2438
2439 if (in_label < net->mpls.platform_labels)
2440 rt = mpls_route_input(net, in_label);
2441 if (!rt) {
2442 err = -ENETUNREACH;
2443 goto errout;
2444 }
2445
2446 if (rtm->rtm_flags & RTM_F_FIB_MATCH) {
2447 skb = nlmsg_new(lfib_nlmsg_size(rt), GFP_KERNEL);
2448 if (!skb) {
2449 err = -ENOBUFS;
2450 goto errout;
2451 }
2452
2453 err = mpls_dump_route(skb, portid, in_nlh->nlmsg_seq,
2454 RTM_NEWROUTE, in_label, rt, 0);
2455 if (err < 0) {
2456 /* -EMSGSIZE implies BUG in lfib_nlmsg_size */
2457 WARN_ON(err == -EMSGSIZE);
2458 goto errout_free;
2459 }
2460
2461 err = rtnl_unicast(skb, net, portid);
2462 goto errout;
2463 }
2464
2465 if (tb[RTA_NEWDST]) {
2466 if (nla_get_labels(tb[RTA_NEWDST], MAX_NEW_LABELS, &n_labels,
2467 labels, extack) != 0) {
2468 err = -EINVAL;
2469 goto errout;
2470 }
2471
2472 hdr_size = n_labels * sizeof(struct mpls_shim_hdr);
2473 }
2474
2475 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2476 if (!skb) {
2477 err = -ENOBUFS;
2478 goto errout;
2479 }
2480
2481 skb->protocol = htons(ETH_P_MPLS_UC);
2482
2483 if (hdr_size) {
2484 bool bos;
2485 int i;
2486
2487 if (skb_cow(skb, hdr_size)) {
2488 err = -ENOBUFS;
2489 goto errout_free;
2490 }
2491
2492 skb_reserve(skb, hdr_size);
2493 skb_push(skb, hdr_size);
2494 skb_reset_network_header(skb);
2495
2496 /* Push new labels */
2497 hdr = mpls_hdr(skb);
2498 bos = true;
2499 for (i = n_labels - 1; i >= 0; i--) {
2500 hdr[i] = mpls_entry_encode(labels[i],
2501 1, 0, bos);
2502 bos = false;
2503 }
2504 }
2505
2506 nh = mpls_select_multipath(rt, skb);
2507 if (!nh) {
2508 err = -ENETUNREACH;
2509 goto errout_free;
2510 }
2511
2512 if (hdr_size) {
2513 skb_pull(skb, hdr_size);
2514 skb_reset_network_header(skb);
2515 }
2516
2517 nlh = nlmsg_put(skb, portid, in_nlh->nlmsg_seq,
2518 RTM_NEWROUTE, sizeof(*r), 0);
2519 if (!nlh) {
2520 err = -EMSGSIZE;
2521 goto errout_free;
2522 }
2523
2524 r = nlmsg_data(nlh);
2525 r->rtm_family = AF_MPLS;
2526 r->rtm_dst_len = 20;
2527 r->rtm_src_len = 0;
2528 r->rtm_table = RT_TABLE_MAIN;
2529 r->rtm_type = RTN_UNICAST;
2530 r->rtm_scope = RT_SCOPE_UNIVERSE;
2531 r->rtm_protocol = rt->rt_protocol;
2532 r->rtm_flags = 0;
2533
2534 if (nla_put_labels(skb, RTA_DST, 1, &in_label))
2535 goto nla_put_failure;
2536
2537 if (nh->nh_labels &&
2538 nla_put_labels(skb, RTA_NEWDST, nh->nh_labels,
2539 nh->nh_label))
2540 goto nla_put_failure;
2541
2542 if (nh->nh_via_table != MPLS_NEIGH_TABLE_UNSPEC &&
2543 nla_put_via(skb, nh->nh_via_table, mpls_nh_via(rt, nh),
2544 nh->nh_via_alen))
2545 goto nla_put_failure;
2546 dev = nh->nh_dev;
2547 if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex))
2548 goto nla_put_failure;
2549
2550 nlmsg_end(skb, nlh);
2551
2552 err = rtnl_unicast(skb, net, portid);
2553errout:
2554 mutex_unlock(&net->mpls.platform_mutex);
2555 return err;
2556
2557nla_put_failure:
2558 nlmsg_cancel(skb, nlh);
2559 err = -EMSGSIZE;
2560errout_free:
2561 mutex_unlock(&net->mpls.platform_mutex);
2562 kfree_skb(skb);
2563 return err;
2564}
2565
2566static int resize_platform_label_table(struct net *net, size_t limit)
2567{
2568 size_t size = sizeof(struct mpls_route *) * limit;
2569 size_t old_limit;
2570 size_t cp_size;
2571 struct mpls_route __rcu **labels = NULL, **old;
2572 struct mpls_route *rt0 = NULL, *rt2 = NULL;
2573 unsigned index;
2574
2575 if (size) {
2576 labels = kvzalloc(size, GFP_KERNEL);
2577 if (!labels)
2578 goto nolabels;
2579 }
2580
2581 /* In case the predefined labels need to be populated */
2582 if (limit > MPLS_LABEL_IPV4NULL) {
2583 struct net_device *lo = net->loopback_dev;
2584
2585 rt0 = mpls_rt_alloc(1, lo->addr_len, 0);
2586 if (IS_ERR(rt0))
2587 goto nort0;
2588
2589 rt0->rt_nh->nh_dev = lo;
2590 netdev_hold(lo, &rt0->rt_nh->nh_dev_tracker, GFP_KERNEL);
2591 rt0->rt_protocol = RTPROT_KERNEL;
2592 rt0->rt_payload_type = MPT_IPV4;
2593 rt0->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT;
2594 rt0->rt_nh->nh_via_table = NEIGH_LINK_TABLE;
2595 rt0->rt_nh->nh_via_alen = lo->addr_len;
2596 memcpy(__mpls_nh_via(rt0, rt0->rt_nh), lo->dev_addr,
2597 lo->addr_len);
2598 }
2599 if (limit > MPLS_LABEL_IPV6NULL) {
2600 struct net_device *lo = net->loopback_dev;
2601
2602 rt2 = mpls_rt_alloc(1, lo->addr_len, 0);
2603 if (IS_ERR(rt2))
2604 goto nort2;
2605
2606 rt2->rt_nh->nh_dev = lo;
2607 netdev_hold(lo, &rt2->rt_nh->nh_dev_tracker, GFP_KERNEL);
2608 rt2->rt_protocol = RTPROT_KERNEL;
2609 rt2->rt_payload_type = MPT_IPV6;
2610 rt2->rt_ttl_propagate = MPLS_TTL_PROP_DEFAULT;
2611 rt2->rt_nh->nh_via_table = NEIGH_LINK_TABLE;
2612 rt2->rt_nh->nh_via_alen = lo->addr_len;
2613 memcpy(__mpls_nh_via(rt2, rt2->rt_nh), lo->dev_addr,
2614 lo->addr_len);
2615 }
2616
2617 mutex_lock(&net->mpls.platform_mutex);
2618
2619 /* Remember the original table */
2620 old = mpls_dereference(net, net->mpls.platform_label);
2621 old_limit = net->mpls.platform_labels;
2622
2623 /* Free any labels beyond the new table */
2624 for (index = limit; index < old_limit; index++)
2625 mpls_route_update(net, index, NULL, NULL);
2626
2627 /* Copy over the old labels */
2628 cp_size = size;
2629 if (old_limit < limit)
2630 cp_size = old_limit * sizeof(struct mpls_route *);
2631
2632 memcpy(labels, old, cp_size);
2633
2634 /* If needed set the predefined labels */
2635 if ((old_limit <= MPLS_LABEL_IPV6NULL) &&
2636 (limit > MPLS_LABEL_IPV6NULL)) {
2637 RCU_INIT_POINTER(labels[MPLS_LABEL_IPV6NULL], rt2);
2638 rt2 = NULL;
2639 }
2640
2641 if ((old_limit <= MPLS_LABEL_IPV4NULL) &&
2642 (limit > MPLS_LABEL_IPV4NULL)) {
2643 RCU_INIT_POINTER(labels[MPLS_LABEL_IPV4NULL], rt0);
2644 rt0 = NULL;
2645 }
2646
2647 /* Update the global pointers */
2648 net->mpls.platform_labels = limit;
2649 rcu_assign_pointer(net->mpls.platform_label, labels);
2650
2651 mutex_unlock(&net->mpls.platform_mutex);
2652
2653 mpls_rt_free(rt2);
2654 mpls_rt_free(rt0);
2655
2656 if (old) {
2657 synchronize_rcu();
2658 kvfree(old);
2659 }
2660 return 0;
2661
2662nort2:
2663 mpls_rt_free(rt0);
2664nort0:
2665 kvfree(labels);
2666nolabels:
2667 return -ENOMEM;
2668}
2669
2670static int mpls_platform_labels(const struct ctl_table *table, int write,
2671 void *buffer, size_t *lenp, loff_t *ppos)
2672{
2673 struct net *net = table->data;
2674 int platform_labels = net->mpls.platform_labels;
2675 int ret;
2676 struct ctl_table tmp = {
2677 .procname = table->procname,
2678 .data = &platform_labels,
2679 .maxlen = sizeof(int),
2680 .mode = table->mode,
2681 .extra1 = SYSCTL_ZERO,
2682 .extra2 = &label_limit,
2683 };
2684
2685 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2686
2687 if (write && ret == 0)
2688 ret = resize_platform_label_table(net, platform_labels);
2689
2690 return ret;
2691}
2692
2693#define MPLS_NS_SYSCTL_OFFSET(field) \
2694 (&((struct net *)0)->field)
2695
2696static const struct ctl_table mpls_table[] = {
2697 {
2698 .procname = "platform_labels",
2699 .data = NULL,
2700 .maxlen = sizeof(int),
2701 .mode = 0644,
2702 .proc_handler = mpls_platform_labels,
2703 },
2704 {
2705 .procname = "ip_ttl_propagate",
2706 .data = MPLS_NS_SYSCTL_OFFSET(mpls.ip_ttl_propagate),
2707 .maxlen = sizeof(int),
2708 .mode = 0644,
2709 .proc_handler = proc_dointvec_minmax,
2710 .extra1 = SYSCTL_ZERO,
2711 .extra2 = SYSCTL_ONE,
2712 },
2713 {
2714 .procname = "default_ttl",
2715 .data = MPLS_NS_SYSCTL_OFFSET(mpls.default_ttl),
2716 .maxlen = sizeof(int),
2717 .mode = 0644,
2718 .proc_handler = proc_dointvec_minmax,
2719 .extra1 = SYSCTL_ONE,
2720 .extra2 = &ttl_max,
2721 },
2722};
2723
2724static __net_init int mpls_net_init(struct net *net)
2725{
2726 size_t table_size = ARRAY_SIZE(mpls_table);
2727 struct ctl_table *table;
2728 int i;
2729
2730 mutex_init(&net->mpls.platform_mutex);
2731 net->mpls.platform_labels = 0;
2732 net->mpls.platform_label = NULL;
2733 net->mpls.ip_ttl_propagate = 1;
2734 net->mpls.default_ttl = 255;
2735
2736 table = kmemdup(mpls_table, sizeof(mpls_table), GFP_KERNEL);
2737 if (table == NULL)
2738 return -ENOMEM;
2739
2740 /* Table data contains only offsets relative to the base of
2741 * the mdev at this point, so make them absolute.
2742 */
2743 for (i = 0; i < table_size; i++)
2744 table[i].data = (char *)net + (uintptr_t)table[i].data;
2745
2746 net->mpls.ctl = register_net_sysctl_sz(net, "net/mpls", table,
2747 table_size);
2748 if (net->mpls.ctl == NULL) {
2749 kfree(table);
2750 return -ENOMEM;
2751 }
2752
2753 return 0;
2754}
2755
2756static __net_exit void mpls_net_exit(struct net *net)
2757{
2758 struct mpls_route __rcu **platform_label;
2759 size_t platform_labels;
2760 const struct ctl_table *table;
2761 unsigned int index;
2762
2763 table = net->mpls.ctl->ctl_table_arg;
2764 unregister_net_sysctl_table(net->mpls.ctl);
2765 kfree(table);
2766
2767 /* An rcu grace period has passed since there was a device in
2768 * the network namespace (and thus the last in flight packet)
2769 * left this network namespace. This is because
2770 * unregister_netdevice_many and netdev_run_todo has completed
2771 * for each network device that was in this network namespace.
2772 *
2773 * As such no additional rcu synchronization is necessary when
2774 * freeing the platform_label table.
2775 */
2776 mutex_lock(&net->mpls.platform_mutex);
2777
2778 platform_label = mpls_dereference(net, net->mpls.platform_label);
2779 platform_labels = net->mpls.platform_labels;
2780
2781 for (index = 0; index < platform_labels; index++) {
2782 struct mpls_route *rt;
2783
2784 rt = mpls_dereference(net, platform_label[index]);
2785 mpls_notify_route(net, index, rt, NULL, NULL);
2786 mpls_rt_free(rt);
2787 }
2788
2789 mutex_unlock(&net->mpls.platform_mutex);
2790
2791 kvfree(platform_label);
2792}
2793
2794static struct pernet_operations mpls_net_ops = {
2795 .init = mpls_net_init,
2796 .exit = mpls_net_exit,
2797};
2798
2799static struct rtnl_af_ops mpls_af_ops __read_mostly = {
2800 .family = AF_MPLS,
2801 .fill_stats_af = mpls_fill_stats_af,
2802 .get_stats_af_size = mpls_get_stats_af_size,
2803};
2804
2805static const struct rtnl_msg_handler mpls_rtnl_msg_handlers[] __initdata_or_module = {
2806 {THIS_MODULE, PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL,
2807 RTNL_FLAG_DOIT_UNLOCKED},
2808 {THIS_MODULE, PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL,
2809 RTNL_FLAG_DOIT_UNLOCKED},
2810 {THIS_MODULE, PF_MPLS, RTM_GETROUTE, mpls_getroute, mpls_dump_routes,
2811 RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},
2812 {THIS_MODULE, PF_MPLS, RTM_GETNETCONF,
2813 mpls_netconf_get_devconf, mpls_netconf_dump_devconf,
2814 RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED},
2815};
2816
2817static int __init mpls_init(void)
2818{
2819 int err;
2820
2821 BUILD_BUG_ON(sizeof(struct mpls_shim_hdr) != 4);
2822
2823 err = register_pernet_subsys(&mpls_net_ops);
2824 if (err)
2825 goto out;
2826
2827 err = register_netdevice_notifier(&mpls_dev_notifier);
2828 if (err)
2829 goto out_unregister_pernet;
2830
2831 dev_add_pack(&mpls_packet_type);
2832
2833 err = rtnl_af_register(&mpls_af_ops);
2834 if (err)
2835 goto out_unregister_dev_type;
2836
2837 err = rtnl_register_many(mpls_rtnl_msg_handlers);
2838 if (err)
2839 goto out_unregister_rtnl_af;
2840
2841 err = ipgre_tunnel_encap_add_mpls_ops();
2842 if (err) {
2843 pr_err("Can't add mpls over gre tunnel ops\n");
2844 goto out_unregister_rtnl;
2845 }
2846
2847 err = 0;
2848out:
2849 return err;
2850
2851out_unregister_rtnl:
2852 rtnl_unregister_many(mpls_rtnl_msg_handlers);
2853out_unregister_rtnl_af:
2854 rtnl_af_unregister(&mpls_af_ops);
2855out_unregister_dev_type:
2856 dev_remove_pack(&mpls_packet_type);
2857out_unregister_pernet:
2858 unregister_pernet_subsys(&mpls_net_ops);
2859 goto out;
2860}
2861module_init(mpls_init);
2862
2863static void __exit mpls_exit(void)
2864{
2865 rtnl_unregister_all(PF_MPLS);
2866 rtnl_af_unregister(&mpls_af_ops);
2867 dev_remove_pack(&mpls_packet_type);
2868 unregister_netdevice_notifier(&mpls_dev_notifier);
2869 unregister_pernet_subsys(&mpls_net_ops);
2870 ipgre_tunnel_encap_del_mpls_ops();
2871}
2872module_exit(mpls_exit);
2873
2874MODULE_DESCRIPTION("MultiProtocol Label Switching");
2875MODULE_LICENSE("GPL v2");
2876MODULE_ALIAS_NETPROTO(PF_MPLS);