Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
fork
Configure Feed
Select the types of activity you want to include in your feed.
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
25 */
26
27#include <linux/capability.h>
28#include <linux/errno.h>
29#include <linux/export.h>
30#include <linux/types.h>
31#include <linux/times.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/route.h>
36#include <linux/netdevice.h>
37#include <linux/in6.h>
38#include <linux/mroute6.h>
39#include <linux/init.h>
40#include <linux/if_arp.h>
41#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
43#include <linux/nsproxy.h>
44#include <linux/slab.h>
45#include <net/net_namespace.h>
46#include <net/snmp.h>
47#include <net/ipv6.h>
48#include <net/ip6_fib.h>
49#include <net/ip6_route.h>
50#include <net/ndisc.h>
51#include <net/addrconf.h>
52#include <net/tcp.h>
53#include <linux/rtnetlink.h>
54#include <net/dst.h>
55#include <net/xfrm.h>
56#include <net/netevent.h>
57#include <net/netlink.h>
58
59#include <asm/uaccess.h>
60
61#ifdef CONFIG_SYSCTL
62#include <linux/sysctl.h>
63#endif
64
65/* Set to 3 to get tracing. */
66#define RT6_DEBUG 2
67
68#if RT6_DEBUG >= 3
69#define RDBG(x) printk x
70#define RT6_TRACE(x...) printk(KERN_DEBUG x)
71#else
72#define RDBG(x)
73#define RT6_TRACE(x...) do { ; } while (0)
74#endif
75
76static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
77 const struct in6_addr *dest);
78static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
79static unsigned int ip6_default_advmss(const struct dst_entry *dst);
80static unsigned int ip6_mtu(const struct dst_entry *dst);
81static struct dst_entry *ip6_negative_advice(struct dst_entry *);
82static void ip6_dst_destroy(struct dst_entry *);
83static void ip6_dst_ifdown(struct dst_entry *,
84 struct net_device *dev, int how);
85static int ip6_dst_gc(struct dst_ops *ops);
86
87static int ip6_pkt_discard(struct sk_buff *skb);
88static int ip6_pkt_discard_out(struct sk_buff *skb);
89static void ip6_link_failure(struct sk_buff *skb);
90static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
91
92#ifdef CONFIG_IPV6_ROUTE_INFO
93static struct rt6_info *rt6_add_route_info(struct net *net,
94 const struct in6_addr *prefix, int prefixlen,
95 const struct in6_addr *gwaddr, int ifindex,
96 unsigned pref);
97static struct rt6_info *rt6_get_route_info(struct net *net,
98 const struct in6_addr *prefix, int prefixlen,
99 const struct in6_addr *gwaddr, int ifindex);
100#endif
101
102static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
103{
104 struct rt6_info *rt = (struct rt6_info *) dst;
105 struct inet_peer *peer;
106 u32 *p = NULL;
107
108 if (!(rt->dst.flags & DST_HOST))
109 return NULL;
110
111 if (!rt->rt6i_peer)
112 rt6_bind_peer(rt, 1);
113
114 peer = rt->rt6i_peer;
115 if (peer) {
116 u32 *old_p = __DST_METRICS_PTR(old);
117 unsigned long prev, new;
118
119 p = peer->metrics;
120 if (inet_metrics_new(peer))
121 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
122
123 new = (unsigned long) p;
124 prev = cmpxchg(&dst->_metrics, old, new);
125
126 if (prev != old) {
127 p = __DST_METRICS_PTR(prev);
128 if (prev & DST_METRICS_READ_ONLY)
129 p = NULL;
130 }
131 }
132 return p;
133}
134
135static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
136{
137 return __neigh_lookup_errno(&nd_tbl, daddr, dst->dev);
138}
139
140static struct dst_ops ip6_dst_ops_template = {
141 .family = AF_INET6,
142 .protocol = cpu_to_be16(ETH_P_IPV6),
143 .gc = ip6_dst_gc,
144 .gc_thresh = 1024,
145 .check = ip6_dst_check,
146 .default_advmss = ip6_default_advmss,
147 .mtu = ip6_mtu,
148 .cow_metrics = ipv6_cow_metrics,
149 .destroy = ip6_dst_destroy,
150 .ifdown = ip6_dst_ifdown,
151 .negative_advice = ip6_negative_advice,
152 .link_failure = ip6_link_failure,
153 .update_pmtu = ip6_rt_update_pmtu,
154 .local_out = __ip6_local_out,
155 .neigh_lookup = ip6_neigh_lookup,
156};
157
158static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
159{
160 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
161
162 return mtu ? : dst->dev->mtu;
163}
164
165static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
166{
167}
168
169static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
170 unsigned long old)
171{
172 return NULL;
173}
174
175static struct dst_ops ip6_dst_blackhole_ops = {
176 .family = AF_INET6,
177 .protocol = cpu_to_be16(ETH_P_IPV6),
178 .destroy = ip6_dst_destroy,
179 .check = ip6_dst_check,
180 .mtu = ip6_blackhole_mtu,
181 .default_advmss = ip6_default_advmss,
182 .update_pmtu = ip6_rt_blackhole_update_pmtu,
183 .cow_metrics = ip6_rt_blackhole_cow_metrics,
184 .neigh_lookup = ip6_neigh_lookup,
185};
186
187static const u32 ip6_template_metrics[RTAX_MAX] = {
188 [RTAX_HOPLIMIT - 1] = 255,
189};
190
191static struct rt6_info ip6_null_entry_template = {
192 .dst = {
193 .__refcnt = ATOMIC_INIT(1),
194 .__use = 1,
195 .obsolete = -1,
196 .error = -ENETUNREACH,
197 .input = ip6_pkt_discard,
198 .output = ip6_pkt_discard_out,
199 },
200 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
201 .rt6i_protocol = RTPROT_KERNEL,
202 .rt6i_metric = ~(u32) 0,
203 .rt6i_ref = ATOMIC_INIT(1),
204};
205
206#ifdef CONFIG_IPV6_MULTIPLE_TABLES
207
208static int ip6_pkt_prohibit(struct sk_buff *skb);
209static int ip6_pkt_prohibit_out(struct sk_buff *skb);
210
211static struct rt6_info ip6_prohibit_entry_template = {
212 .dst = {
213 .__refcnt = ATOMIC_INIT(1),
214 .__use = 1,
215 .obsolete = -1,
216 .error = -EACCES,
217 .input = ip6_pkt_prohibit,
218 .output = ip6_pkt_prohibit_out,
219 },
220 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
221 .rt6i_protocol = RTPROT_KERNEL,
222 .rt6i_metric = ~(u32) 0,
223 .rt6i_ref = ATOMIC_INIT(1),
224};
225
226static struct rt6_info ip6_blk_hole_entry_template = {
227 .dst = {
228 .__refcnt = ATOMIC_INIT(1),
229 .__use = 1,
230 .obsolete = -1,
231 .error = -EINVAL,
232 .input = dst_discard,
233 .output = dst_discard,
234 },
235 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
236 .rt6i_protocol = RTPROT_KERNEL,
237 .rt6i_metric = ~(u32) 0,
238 .rt6i_ref = ATOMIC_INIT(1),
239};
240
241#endif
242
243/* allocate dst with ip6_dst_ops */
244static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
245 struct net_device *dev,
246 int flags)
247{
248 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
249
250 if (rt != NULL)
251 memset(&rt->rt6i_table, 0,
252 sizeof(*rt) - sizeof(struct dst_entry));
253
254 return rt;
255}
256
257static void ip6_dst_destroy(struct dst_entry *dst)
258{
259 struct rt6_info *rt = (struct rt6_info *)dst;
260 struct inet6_dev *idev = rt->rt6i_idev;
261 struct inet_peer *peer = rt->rt6i_peer;
262
263 if (!(rt->dst.flags & DST_HOST))
264 dst_destroy_metrics_generic(dst);
265
266 if (idev != NULL) {
267 rt->rt6i_idev = NULL;
268 in6_dev_put(idev);
269 }
270 if (peer) {
271 rt->rt6i_peer = NULL;
272 inet_putpeer(peer);
273 }
274}
275
276static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
277
278static u32 rt6_peer_genid(void)
279{
280 return atomic_read(&__rt6_peer_genid);
281}
282
283void rt6_bind_peer(struct rt6_info *rt, int create)
284{
285 struct inet_peer *peer;
286
287 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
288 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
289 inet_putpeer(peer);
290 else
291 rt->rt6i_peer_genid = rt6_peer_genid();
292}
293
294static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
295 int how)
296{
297 struct rt6_info *rt = (struct rt6_info *)dst;
298 struct inet6_dev *idev = rt->rt6i_idev;
299 struct net_device *loopback_dev =
300 dev_net(dev)->loopback_dev;
301
302 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
303 struct inet6_dev *loopback_idev =
304 in6_dev_get(loopback_dev);
305 if (loopback_idev != NULL) {
306 rt->rt6i_idev = loopback_idev;
307 in6_dev_put(idev);
308 }
309 }
310}
311
312static __inline__ int rt6_check_expired(const struct rt6_info *rt)
313{
314 return (rt->rt6i_flags & RTF_EXPIRES) &&
315 time_after(jiffies, rt->rt6i_expires);
316}
317
318static inline int rt6_need_strict(const struct in6_addr *daddr)
319{
320 return ipv6_addr_type(daddr) &
321 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
322}
323
324/*
325 * Route lookup. Any table->tb6_lock is implied.
326 */
327
328static inline struct rt6_info *rt6_device_match(struct net *net,
329 struct rt6_info *rt,
330 const struct in6_addr *saddr,
331 int oif,
332 int flags)
333{
334 struct rt6_info *local = NULL;
335 struct rt6_info *sprt;
336
337 if (!oif && ipv6_addr_any(saddr))
338 goto out;
339
340 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
341 struct net_device *dev = sprt->rt6i_dev;
342
343 if (oif) {
344 if (dev->ifindex == oif)
345 return sprt;
346 if (dev->flags & IFF_LOOPBACK) {
347 if (sprt->rt6i_idev == NULL ||
348 sprt->rt6i_idev->dev->ifindex != oif) {
349 if (flags & RT6_LOOKUP_F_IFACE && oif)
350 continue;
351 if (local && (!oif ||
352 local->rt6i_idev->dev->ifindex == oif))
353 continue;
354 }
355 local = sprt;
356 }
357 } else {
358 if (ipv6_chk_addr(net, saddr, dev,
359 flags & RT6_LOOKUP_F_IFACE))
360 return sprt;
361 }
362 }
363
364 if (oif) {
365 if (local)
366 return local;
367
368 if (flags & RT6_LOOKUP_F_IFACE)
369 return net->ipv6.ip6_null_entry;
370 }
371out:
372 return rt;
373}
374
375#ifdef CONFIG_IPV6_ROUTER_PREF
376static void rt6_probe(struct rt6_info *rt)
377{
378 struct neighbour *neigh;
379 /*
380 * Okay, this does not seem to be appropriate
381 * for now, however, we need to check if it
382 * is really so; aka Router Reachability Probing.
383 *
384 * Router Reachability Probe MUST be rate-limited
385 * to no more than one per minute.
386 */
387 rcu_read_lock();
388 neigh = rt ? dst_get_neighbour(&rt->dst) : NULL;
389 if (!neigh || (neigh->nud_state & NUD_VALID))
390 goto out;
391 read_lock_bh(&neigh->lock);
392 if (!(neigh->nud_state & NUD_VALID) &&
393 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
394 struct in6_addr mcaddr;
395 struct in6_addr *target;
396
397 neigh->updated = jiffies;
398 read_unlock_bh(&neigh->lock);
399
400 target = (struct in6_addr *)&neigh->primary_key;
401 addrconf_addr_solict_mult(target, &mcaddr);
402 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
403 } else {
404 read_unlock_bh(&neigh->lock);
405 }
406out:
407 rcu_read_unlock();
408}
409#else
410static inline void rt6_probe(struct rt6_info *rt)
411{
412}
413#endif
414
415/*
416 * Default Router Selection (RFC 2461 6.3.6)
417 */
418static inline int rt6_check_dev(struct rt6_info *rt, int oif)
419{
420 struct net_device *dev = rt->rt6i_dev;
421 if (!oif || dev->ifindex == oif)
422 return 2;
423 if ((dev->flags & IFF_LOOPBACK) &&
424 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
425 return 1;
426 return 0;
427}
428
429static inline int rt6_check_neigh(struct rt6_info *rt)
430{
431 struct neighbour *neigh;
432 int m;
433
434 rcu_read_lock();
435 neigh = dst_get_neighbour(&rt->dst);
436 if (rt->rt6i_flags & RTF_NONEXTHOP ||
437 !(rt->rt6i_flags & RTF_GATEWAY))
438 m = 1;
439 else if (neigh) {
440 read_lock_bh(&neigh->lock);
441 if (neigh->nud_state & NUD_VALID)
442 m = 2;
443#ifdef CONFIG_IPV6_ROUTER_PREF
444 else if (neigh->nud_state & NUD_FAILED)
445 m = 0;
446#endif
447 else
448 m = 1;
449 read_unlock_bh(&neigh->lock);
450 } else
451 m = 0;
452 rcu_read_unlock();
453 return m;
454}
455
456static int rt6_score_route(struct rt6_info *rt, int oif,
457 int strict)
458{
459 int m, n;
460
461 m = rt6_check_dev(rt, oif);
462 if (!m && (strict & RT6_LOOKUP_F_IFACE))
463 return -1;
464#ifdef CONFIG_IPV6_ROUTER_PREF
465 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
466#endif
467 n = rt6_check_neigh(rt);
468 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
469 return -1;
470 return m;
471}
472
473static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
474 int *mpri, struct rt6_info *match)
475{
476 int m;
477
478 if (rt6_check_expired(rt))
479 goto out;
480
481 m = rt6_score_route(rt, oif, strict);
482 if (m < 0)
483 goto out;
484
485 if (m > *mpri) {
486 if (strict & RT6_LOOKUP_F_REACHABLE)
487 rt6_probe(match);
488 *mpri = m;
489 match = rt;
490 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
491 rt6_probe(rt);
492 }
493
494out:
495 return match;
496}
497
498static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
499 struct rt6_info *rr_head,
500 u32 metric, int oif, int strict)
501{
502 struct rt6_info *rt, *match;
503 int mpri = -1;
504
505 match = NULL;
506 for (rt = rr_head; rt && rt->rt6i_metric == metric;
507 rt = rt->dst.rt6_next)
508 match = find_match(rt, oif, strict, &mpri, match);
509 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
510 rt = rt->dst.rt6_next)
511 match = find_match(rt, oif, strict, &mpri, match);
512
513 return match;
514}
515
516static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
517{
518 struct rt6_info *match, *rt0;
519 struct net *net;
520
521 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
522 __func__, fn->leaf, oif);
523
524 rt0 = fn->rr_ptr;
525 if (!rt0)
526 fn->rr_ptr = rt0 = fn->leaf;
527
528 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
529
530 if (!match &&
531 (strict & RT6_LOOKUP_F_REACHABLE)) {
532 struct rt6_info *next = rt0->dst.rt6_next;
533
534 /* no entries matched; do round-robin */
535 if (!next || next->rt6i_metric != rt0->rt6i_metric)
536 next = fn->leaf;
537
538 if (next != rt0)
539 fn->rr_ptr = next;
540 }
541
542 RT6_TRACE("%s() => %p\n",
543 __func__, match);
544
545 net = dev_net(rt0->rt6i_dev);
546 return match ? match : net->ipv6.ip6_null_entry;
547}
548
549#ifdef CONFIG_IPV6_ROUTE_INFO
550int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
551 const struct in6_addr *gwaddr)
552{
553 struct net *net = dev_net(dev);
554 struct route_info *rinfo = (struct route_info *) opt;
555 struct in6_addr prefix_buf, *prefix;
556 unsigned int pref;
557 unsigned long lifetime;
558 struct rt6_info *rt;
559
560 if (len < sizeof(struct route_info)) {
561 return -EINVAL;
562 }
563
564 /* Sanity check for prefix_len and length */
565 if (rinfo->length > 3) {
566 return -EINVAL;
567 } else if (rinfo->prefix_len > 128) {
568 return -EINVAL;
569 } else if (rinfo->prefix_len > 64) {
570 if (rinfo->length < 2) {
571 return -EINVAL;
572 }
573 } else if (rinfo->prefix_len > 0) {
574 if (rinfo->length < 1) {
575 return -EINVAL;
576 }
577 }
578
579 pref = rinfo->route_pref;
580 if (pref == ICMPV6_ROUTER_PREF_INVALID)
581 return -EINVAL;
582
583 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
584
585 if (rinfo->length == 3)
586 prefix = (struct in6_addr *)rinfo->prefix;
587 else {
588 /* this function is safe */
589 ipv6_addr_prefix(&prefix_buf,
590 (struct in6_addr *)rinfo->prefix,
591 rinfo->prefix_len);
592 prefix = &prefix_buf;
593 }
594
595 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
596 dev->ifindex);
597
598 if (rt && !lifetime) {
599 ip6_del_rt(rt);
600 rt = NULL;
601 }
602
603 if (!rt && lifetime)
604 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
605 pref);
606 else if (rt)
607 rt->rt6i_flags = RTF_ROUTEINFO |
608 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
609
610 if (rt) {
611 if (!addrconf_finite_timeout(lifetime)) {
612 rt->rt6i_flags &= ~RTF_EXPIRES;
613 } else {
614 rt->rt6i_expires = jiffies + HZ * lifetime;
615 rt->rt6i_flags |= RTF_EXPIRES;
616 }
617 dst_release(&rt->dst);
618 }
619 return 0;
620}
621#endif
622
623#define BACKTRACK(__net, saddr) \
624do { \
625 if (rt == __net->ipv6.ip6_null_entry) { \
626 struct fib6_node *pn; \
627 while (1) { \
628 if (fn->fn_flags & RTN_TL_ROOT) \
629 goto out; \
630 pn = fn->parent; \
631 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
632 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
633 else \
634 fn = pn; \
635 if (fn->fn_flags & RTN_RTINFO) \
636 goto restart; \
637 } \
638 } \
639} while(0)
640
641static struct rt6_info *ip6_pol_route_lookup(struct net *net,
642 struct fib6_table *table,
643 struct flowi6 *fl6, int flags)
644{
645 struct fib6_node *fn;
646 struct rt6_info *rt;
647
648 read_lock_bh(&table->tb6_lock);
649 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
650restart:
651 rt = fn->leaf;
652 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
653 BACKTRACK(net, &fl6->saddr);
654out:
655 dst_use(&rt->dst, jiffies);
656 read_unlock_bh(&table->tb6_lock);
657 return rt;
658
659}
660
661struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
662 const struct in6_addr *saddr, int oif, int strict)
663{
664 struct flowi6 fl6 = {
665 .flowi6_oif = oif,
666 .daddr = *daddr,
667 };
668 struct dst_entry *dst;
669 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
670
671 if (saddr) {
672 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
673 flags |= RT6_LOOKUP_F_HAS_SADDR;
674 }
675
676 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
677 if (dst->error == 0)
678 return (struct rt6_info *) dst;
679
680 dst_release(dst);
681
682 return NULL;
683}
684
685EXPORT_SYMBOL(rt6_lookup);
686
687/* ip6_ins_rt is called with FREE table->tb6_lock.
688 It takes new route entry, the addition fails by any reason the
689 route is freed. In any case, if caller does not hold it, it may
690 be destroyed.
691 */
692
693static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
694{
695 int err;
696 struct fib6_table *table;
697
698 table = rt->rt6i_table;
699 write_lock_bh(&table->tb6_lock);
700 err = fib6_add(&table->tb6_root, rt, info);
701 write_unlock_bh(&table->tb6_lock);
702
703 return err;
704}
705
706int ip6_ins_rt(struct rt6_info *rt)
707{
708 struct nl_info info = {
709 .nl_net = dev_net(rt->rt6i_dev),
710 };
711 return __ip6_ins_rt(rt, &info);
712}
713
714static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
715 const struct in6_addr *daddr,
716 const struct in6_addr *saddr)
717{
718 struct rt6_info *rt;
719
720 /*
721 * Clone the route.
722 */
723
724 rt = ip6_rt_copy(ort, daddr);
725
726 if (rt) {
727 struct neighbour *neigh;
728 int attempts = !in_softirq();
729
730 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
731 if (ort->rt6i_dst.plen != 128 &&
732 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
733 rt->rt6i_flags |= RTF_ANYCAST;
734 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
735 }
736
737 rt->rt6i_flags |= RTF_CACHE;
738
739#ifdef CONFIG_IPV6_SUBTREES
740 if (rt->rt6i_src.plen && saddr) {
741 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
742 rt->rt6i_src.plen = 128;
743 }
744#endif
745
746 retry:
747 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
748 if (IS_ERR(neigh)) {
749 struct net *net = dev_net(rt->rt6i_dev);
750 int saved_rt_min_interval =
751 net->ipv6.sysctl.ip6_rt_gc_min_interval;
752 int saved_rt_elasticity =
753 net->ipv6.sysctl.ip6_rt_gc_elasticity;
754
755 if (attempts-- > 0) {
756 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
757 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
758
759 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
760
761 net->ipv6.sysctl.ip6_rt_gc_elasticity =
762 saved_rt_elasticity;
763 net->ipv6.sysctl.ip6_rt_gc_min_interval =
764 saved_rt_min_interval;
765 goto retry;
766 }
767
768 if (net_ratelimit())
769 printk(KERN_WARNING
770 "ipv6: Neighbour table overflow.\n");
771 dst_free(&rt->dst);
772 return NULL;
773 }
774 dst_set_neighbour(&rt->dst, neigh);
775
776 }
777
778 return rt;
779}
780
781static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
782 const struct in6_addr *daddr)
783{
784 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
785
786 if (rt) {
787 rt->rt6i_flags |= RTF_CACHE;
788 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst)));
789 }
790 return rt;
791}
792
793static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
794 struct flowi6 *fl6, int flags)
795{
796 struct fib6_node *fn;
797 struct rt6_info *rt, *nrt;
798 int strict = 0;
799 int attempts = 3;
800 int err;
801 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
802
803 strict |= flags & RT6_LOOKUP_F_IFACE;
804
805relookup:
806 read_lock_bh(&table->tb6_lock);
807
808restart_2:
809 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
810
811restart:
812 rt = rt6_select(fn, oif, strict | reachable);
813
814 BACKTRACK(net, &fl6->saddr);
815 if (rt == net->ipv6.ip6_null_entry ||
816 rt->rt6i_flags & RTF_CACHE)
817 goto out;
818
819 dst_hold(&rt->dst);
820 read_unlock_bh(&table->tb6_lock);
821
822 if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
823 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
824 else if (!(rt->dst.flags & DST_HOST))
825 nrt = rt6_alloc_clone(rt, &fl6->daddr);
826 else
827 goto out2;
828
829 dst_release(&rt->dst);
830 rt = nrt ? : net->ipv6.ip6_null_entry;
831
832 dst_hold(&rt->dst);
833 if (nrt) {
834 err = ip6_ins_rt(nrt);
835 if (!err)
836 goto out2;
837 }
838
839 if (--attempts <= 0)
840 goto out2;
841
842 /*
843 * Race condition! In the gap, when table->tb6_lock was
844 * released someone could insert this route. Relookup.
845 */
846 dst_release(&rt->dst);
847 goto relookup;
848
849out:
850 if (reachable) {
851 reachable = 0;
852 goto restart_2;
853 }
854 dst_hold(&rt->dst);
855 read_unlock_bh(&table->tb6_lock);
856out2:
857 rt->dst.lastuse = jiffies;
858 rt->dst.__use++;
859
860 return rt;
861}
862
863static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
864 struct flowi6 *fl6, int flags)
865{
866 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
867}
868
869void ip6_route_input(struct sk_buff *skb)
870{
871 const struct ipv6hdr *iph = ipv6_hdr(skb);
872 struct net *net = dev_net(skb->dev);
873 int flags = RT6_LOOKUP_F_HAS_SADDR;
874 struct flowi6 fl6 = {
875 .flowi6_iif = skb->dev->ifindex,
876 .daddr = iph->daddr,
877 .saddr = iph->saddr,
878 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
879 .flowi6_mark = skb->mark,
880 .flowi6_proto = iph->nexthdr,
881 };
882
883 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
884 flags |= RT6_LOOKUP_F_IFACE;
885
886 skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
887}
888
889static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
890 struct flowi6 *fl6, int flags)
891{
892 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
893}
894
895struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
896 struct flowi6 *fl6)
897{
898 int flags = 0;
899
900 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
901 flags |= RT6_LOOKUP_F_IFACE;
902
903 if (!ipv6_addr_any(&fl6->saddr))
904 flags |= RT6_LOOKUP_F_HAS_SADDR;
905 else if (sk)
906 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
907
908 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
909}
910
911EXPORT_SYMBOL(ip6_route_output);
912
913struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
914{
915 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
916 struct dst_entry *new = NULL;
917
918 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
919 if (rt) {
920 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
921
922 new = &rt->dst;
923
924 new->__use = 1;
925 new->input = dst_discard;
926 new->output = dst_discard;
927
928 if (dst_metrics_read_only(&ort->dst))
929 new->_metrics = ort->dst._metrics;
930 else
931 dst_copy_metrics(new, &ort->dst);
932 rt->rt6i_idev = ort->rt6i_idev;
933 if (rt->rt6i_idev)
934 in6_dev_hold(rt->rt6i_idev);
935 rt->rt6i_expires = 0;
936
937 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
938 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
939 rt->rt6i_metric = 0;
940
941 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
942#ifdef CONFIG_IPV6_SUBTREES
943 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
944#endif
945
946 dst_free(new);
947 }
948
949 dst_release(dst_orig);
950 return new ? new : ERR_PTR(-ENOMEM);
951}
952
953/*
954 * Destination cache support functions
955 */
956
957static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
958{
959 struct rt6_info *rt;
960
961 rt = (struct rt6_info *) dst;
962
963 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
964 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
965 if (!rt->rt6i_peer)
966 rt6_bind_peer(rt, 0);
967 rt->rt6i_peer_genid = rt6_peer_genid();
968 }
969 return dst;
970 }
971 return NULL;
972}
973
974static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
975{
976 struct rt6_info *rt = (struct rt6_info *) dst;
977
978 if (rt) {
979 if (rt->rt6i_flags & RTF_CACHE) {
980 if (rt6_check_expired(rt)) {
981 ip6_del_rt(rt);
982 dst = NULL;
983 }
984 } else {
985 dst_release(dst);
986 dst = NULL;
987 }
988 }
989 return dst;
990}
991
992static void ip6_link_failure(struct sk_buff *skb)
993{
994 struct rt6_info *rt;
995
996 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
997
998 rt = (struct rt6_info *) skb_dst(skb);
999 if (rt) {
1000 if (rt->rt6i_flags&RTF_CACHE) {
1001 dst_set_expires(&rt->dst, 0);
1002 rt->rt6i_flags |= RTF_EXPIRES;
1003 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1004 rt->rt6i_node->fn_sernum = -1;
1005 }
1006}
1007
1008static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1009{
1010 struct rt6_info *rt6 = (struct rt6_info*)dst;
1011
1012 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1013 rt6->rt6i_flags |= RTF_MODIFIED;
1014 if (mtu < IPV6_MIN_MTU) {
1015 u32 features = dst_metric(dst, RTAX_FEATURES);
1016 mtu = IPV6_MIN_MTU;
1017 features |= RTAX_FEATURE_ALLFRAG;
1018 dst_metric_set(dst, RTAX_FEATURES, features);
1019 }
1020 dst_metric_set(dst, RTAX_MTU, mtu);
1021 }
1022}
1023
1024static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1025{
1026 struct net_device *dev = dst->dev;
1027 unsigned int mtu = dst_mtu(dst);
1028 struct net *net = dev_net(dev);
1029
1030 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1031
1032 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1033 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1034
1035 /*
1036 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1037 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1038 * IPV6_MAXPLEN is also valid and means: "any MSS,
1039 * rely only on pmtu discovery"
1040 */
1041 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1042 mtu = IPV6_MAXPLEN;
1043 return mtu;
1044}
1045
1046static unsigned int ip6_mtu(const struct dst_entry *dst)
1047{
1048 struct inet6_dev *idev;
1049 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1050
1051 if (mtu)
1052 return mtu;
1053
1054 mtu = IPV6_MIN_MTU;
1055
1056 rcu_read_lock();
1057 idev = __in6_dev_get(dst->dev);
1058 if (idev)
1059 mtu = idev->cnf.mtu6;
1060 rcu_read_unlock();
1061
1062 return mtu;
1063}
1064
1065static struct dst_entry *icmp6_dst_gc_list;
1066static DEFINE_SPINLOCK(icmp6_dst_lock);
1067
1068struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1069 struct neighbour *neigh,
1070 const struct in6_addr *addr)
1071{
1072 struct rt6_info *rt;
1073 struct inet6_dev *idev = in6_dev_get(dev);
1074 struct net *net = dev_net(dev);
1075
1076 if (unlikely(idev == NULL))
1077 return NULL;
1078
1079 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
1080 if (unlikely(rt == NULL)) {
1081 in6_dev_put(idev);
1082 goto out;
1083 }
1084
1085 if (neigh)
1086 neigh_hold(neigh);
1087 else {
1088 neigh = ndisc_get_neigh(dev, addr);
1089 if (IS_ERR(neigh))
1090 neigh = NULL;
1091 }
1092
1093 rt->dst.flags |= DST_HOST;
1094 rt->dst.output = ip6_output;
1095 dst_set_neighbour(&rt->dst, neigh);
1096 atomic_set(&rt->dst.__refcnt, 1);
1097 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1098 rt->rt6i_dst.plen = 128;
1099 rt->rt6i_idev = idev;
1100 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1101
1102 spin_lock_bh(&icmp6_dst_lock);
1103 rt->dst.next = icmp6_dst_gc_list;
1104 icmp6_dst_gc_list = &rt->dst;
1105 spin_unlock_bh(&icmp6_dst_lock);
1106
1107 fib6_force_start_gc(net);
1108
1109out:
1110 return &rt->dst;
1111}
1112
1113int icmp6_dst_gc(void)
1114{
1115 struct dst_entry *dst, **pprev;
1116 int more = 0;
1117
1118 spin_lock_bh(&icmp6_dst_lock);
1119 pprev = &icmp6_dst_gc_list;
1120
1121 while ((dst = *pprev) != NULL) {
1122 if (!atomic_read(&dst->__refcnt)) {
1123 *pprev = dst->next;
1124 dst_free(dst);
1125 } else {
1126 pprev = &dst->next;
1127 ++more;
1128 }
1129 }
1130
1131 spin_unlock_bh(&icmp6_dst_lock);
1132
1133 return more;
1134}
1135
1136static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1137 void *arg)
1138{
1139 struct dst_entry *dst, **pprev;
1140
1141 spin_lock_bh(&icmp6_dst_lock);
1142 pprev = &icmp6_dst_gc_list;
1143 while ((dst = *pprev) != NULL) {
1144 struct rt6_info *rt = (struct rt6_info *) dst;
1145 if (func(rt, arg)) {
1146 *pprev = dst->next;
1147 dst_free(dst);
1148 } else {
1149 pprev = &dst->next;
1150 }
1151 }
1152 spin_unlock_bh(&icmp6_dst_lock);
1153}
1154
1155static int ip6_dst_gc(struct dst_ops *ops)
1156{
1157 unsigned long now = jiffies;
1158 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1159 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1160 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1161 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1162 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1163 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1164 int entries;
1165
1166 entries = dst_entries_get_fast(ops);
1167 if (time_after(rt_last_gc + rt_min_interval, now) &&
1168 entries <= rt_max_size)
1169 goto out;
1170
1171 net->ipv6.ip6_rt_gc_expire++;
1172 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1173 net->ipv6.ip6_rt_last_gc = now;
1174 entries = dst_entries_get_slow(ops);
1175 if (entries < ops->gc_thresh)
1176 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1177out:
1178 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1179 return entries > rt_max_size;
1180}
1181
1182/* Clean host part of a prefix. Not necessary in radix tree,
1183 but results in cleaner routing tables.
1184
1185 Remove it only when all the things will work!
1186 */
1187
1188int ip6_dst_hoplimit(struct dst_entry *dst)
1189{
1190 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1191 if (hoplimit == 0) {
1192 struct net_device *dev = dst->dev;
1193 struct inet6_dev *idev;
1194
1195 rcu_read_lock();
1196 idev = __in6_dev_get(dev);
1197 if (idev)
1198 hoplimit = idev->cnf.hop_limit;
1199 else
1200 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1201 rcu_read_unlock();
1202 }
1203 return hoplimit;
1204}
1205EXPORT_SYMBOL(ip6_dst_hoplimit);
1206
1207/*
1208 *
1209 */
1210
1211int ip6_route_add(struct fib6_config *cfg)
1212{
1213 int err;
1214 struct net *net = cfg->fc_nlinfo.nl_net;
1215 struct rt6_info *rt = NULL;
1216 struct net_device *dev = NULL;
1217 struct inet6_dev *idev = NULL;
1218 struct fib6_table *table;
1219 int addr_type;
1220
1221 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1222 return -EINVAL;
1223#ifndef CONFIG_IPV6_SUBTREES
1224 if (cfg->fc_src_len)
1225 return -EINVAL;
1226#endif
1227 if (cfg->fc_ifindex) {
1228 err = -ENODEV;
1229 dev = dev_get_by_index(net, cfg->fc_ifindex);
1230 if (!dev)
1231 goto out;
1232 idev = in6_dev_get(dev);
1233 if (!idev)
1234 goto out;
1235 }
1236
1237 if (cfg->fc_metric == 0)
1238 cfg->fc_metric = IP6_RT_PRIO_USER;
1239
1240 table = fib6_new_table(net, cfg->fc_table);
1241 if (table == NULL) {
1242 err = -ENOBUFS;
1243 goto out;
1244 }
1245
1246 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1247
1248 if (rt == NULL) {
1249 err = -ENOMEM;
1250 goto out;
1251 }
1252
1253 rt->dst.obsolete = -1;
1254 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1255 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1256 0;
1257
1258 if (cfg->fc_protocol == RTPROT_UNSPEC)
1259 cfg->fc_protocol = RTPROT_BOOT;
1260 rt->rt6i_protocol = cfg->fc_protocol;
1261
1262 addr_type = ipv6_addr_type(&cfg->fc_dst);
1263
1264 if (addr_type & IPV6_ADDR_MULTICAST)
1265 rt->dst.input = ip6_mc_input;
1266 else if (cfg->fc_flags & RTF_LOCAL)
1267 rt->dst.input = ip6_input;
1268 else
1269 rt->dst.input = ip6_forward;
1270
1271 rt->dst.output = ip6_output;
1272
1273 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1274 rt->rt6i_dst.plen = cfg->fc_dst_len;
1275 if (rt->rt6i_dst.plen == 128)
1276 rt->dst.flags |= DST_HOST;
1277
1278 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1279 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1280 if (!metrics) {
1281 err = -ENOMEM;
1282 goto out;
1283 }
1284 dst_init_metrics(&rt->dst, metrics, 0);
1285 }
1286#ifdef CONFIG_IPV6_SUBTREES
1287 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1288 rt->rt6i_src.plen = cfg->fc_src_len;
1289#endif
1290
1291 rt->rt6i_metric = cfg->fc_metric;
1292
1293 /* We cannot add true routes via loopback here,
1294 they would result in kernel looping; promote them to reject routes
1295 */
1296 if ((cfg->fc_flags & RTF_REJECT) ||
1297 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1298 && !(cfg->fc_flags&RTF_LOCAL))) {
1299 /* hold loopback dev/idev if we haven't done so. */
1300 if (dev != net->loopback_dev) {
1301 if (dev) {
1302 dev_put(dev);
1303 in6_dev_put(idev);
1304 }
1305 dev = net->loopback_dev;
1306 dev_hold(dev);
1307 idev = in6_dev_get(dev);
1308 if (!idev) {
1309 err = -ENODEV;
1310 goto out;
1311 }
1312 }
1313 rt->dst.output = ip6_pkt_discard_out;
1314 rt->dst.input = ip6_pkt_discard;
1315 rt->dst.error = -ENETUNREACH;
1316 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1317 goto install_route;
1318 }
1319
1320 if (cfg->fc_flags & RTF_GATEWAY) {
1321 const struct in6_addr *gw_addr;
1322 int gwa_type;
1323
1324 gw_addr = &cfg->fc_gateway;
1325 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1326 gwa_type = ipv6_addr_type(gw_addr);
1327
1328 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1329 struct rt6_info *grt;
1330
1331 /* IPv6 strictly inhibits using not link-local
1332 addresses as nexthop address.
1333 Otherwise, router will not able to send redirects.
1334 It is very good, but in some (rare!) circumstances
1335 (SIT, PtP, NBMA NOARP links) it is handy to allow
1336 some exceptions. --ANK
1337 */
1338 err = -EINVAL;
1339 if (!(gwa_type&IPV6_ADDR_UNICAST))
1340 goto out;
1341
1342 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1343
1344 err = -EHOSTUNREACH;
1345 if (grt == NULL)
1346 goto out;
1347 if (dev) {
1348 if (dev != grt->rt6i_dev) {
1349 dst_release(&grt->dst);
1350 goto out;
1351 }
1352 } else {
1353 dev = grt->rt6i_dev;
1354 idev = grt->rt6i_idev;
1355 dev_hold(dev);
1356 in6_dev_hold(grt->rt6i_idev);
1357 }
1358 if (!(grt->rt6i_flags&RTF_GATEWAY))
1359 err = 0;
1360 dst_release(&grt->dst);
1361
1362 if (err)
1363 goto out;
1364 }
1365 err = -EINVAL;
1366 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1367 goto out;
1368 }
1369
1370 err = -ENODEV;
1371 if (dev == NULL)
1372 goto out;
1373
1374 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1375 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1376 err = -EINVAL;
1377 goto out;
1378 }
1379 ipv6_addr_copy(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc);
1380 rt->rt6i_prefsrc.plen = 128;
1381 } else
1382 rt->rt6i_prefsrc.plen = 0;
1383
1384 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1385 struct neighbour *n = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1386 if (IS_ERR(n)) {
1387 err = PTR_ERR(n);
1388 goto out;
1389 }
1390 dst_set_neighbour(&rt->dst, n);
1391 }
1392
1393 rt->rt6i_flags = cfg->fc_flags;
1394
1395install_route:
1396 if (cfg->fc_mx) {
1397 struct nlattr *nla;
1398 int remaining;
1399
1400 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1401 int type = nla_type(nla);
1402
1403 if (type) {
1404 if (type > RTAX_MAX) {
1405 err = -EINVAL;
1406 goto out;
1407 }
1408
1409 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1410 }
1411 }
1412 }
1413
1414 rt->dst.dev = dev;
1415 rt->rt6i_idev = idev;
1416 rt->rt6i_table = table;
1417
1418 cfg->fc_nlinfo.nl_net = dev_net(dev);
1419
1420 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1421
1422out:
1423 if (dev)
1424 dev_put(dev);
1425 if (idev)
1426 in6_dev_put(idev);
1427 if (rt)
1428 dst_free(&rt->dst);
1429 return err;
1430}
1431
1432static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1433{
1434 int err;
1435 struct fib6_table *table;
1436 struct net *net = dev_net(rt->rt6i_dev);
1437
1438 if (rt == net->ipv6.ip6_null_entry)
1439 return -ENOENT;
1440
1441 table = rt->rt6i_table;
1442 write_lock_bh(&table->tb6_lock);
1443
1444 err = fib6_del(rt, info);
1445 dst_release(&rt->dst);
1446
1447 write_unlock_bh(&table->tb6_lock);
1448
1449 return err;
1450}
1451
1452int ip6_del_rt(struct rt6_info *rt)
1453{
1454 struct nl_info info = {
1455 .nl_net = dev_net(rt->rt6i_dev),
1456 };
1457 return __ip6_del_rt(rt, &info);
1458}
1459
1460static int ip6_route_del(struct fib6_config *cfg)
1461{
1462 struct fib6_table *table;
1463 struct fib6_node *fn;
1464 struct rt6_info *rt;
1465 int err = -ESRCH;
1466
1467 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1468 if (table == NULL)
1469 return err;
1470
1471 read_lock_bh(&table->tb6_lock);
1472
1473 fn = fib6_locate(&table->tb6_root,
1474 &cfg->fc_dst, cfg->fc_dst_len,
1475 &cfg->fc_src, cfg->fc_src_len);
1476
1477 if (fn) {
1478 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1479 if (cfg->fc_ifindex &&
1480 (rt->rt6i_dev == NULL ||
1481 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1482 continue;
1483 if (cfg->fc_flags & RTF_GATEWAY &&
1484 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1485 continue;
1486 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1487 continue;
1488 dst_hold(&rt->dst);
1489 read_unlock_bh(&table->tb6_lock);
1490
1491 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1492 }
1493 }
1494 read_unlock_bh(&table->tb6_lock);
1495
1496 return err;
1497}
1498
1499/*
1500 * Handle redirects
1501 */
1502struct ip6rd_flowi {
1503 struct flowi6 fl6;
1504 struct in6_addr gateway;
1505};
1506
1507static struct rt6_info *__ip6_route_redirect(struct net *net,
1508 struct fib6_table *table,
1509 struct flowi6 *fl6,
1510 int flags)
1511{
1512 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1513 struct rt6_info *rt;
1514 struct fib6_node *fn;
1515
1516 /*
1517 * Get the "current" route for this destination and
1518 * check if the redirect has come from approriate router.
1519 *
1520 * RFC 2461 specifies that redirects should only be
1521 * accepted if they come from the nexthop to the target.
1522 * Due to the way the routes are chosen, this notion
1523 * is a bit fuzzy and one might need to check all possible
1524 * routes.
1525 */
1526
1527 read_lock_bh(&table->tb6_lock);
1528 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1529restart:
1530 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1531 /*
1532 * Current route is on-link; redirect is always invalid.
1533 *
1534 * Seems, previous statement is not true. It could
1535 * be node, which looks for us as on-link (f.e. proxy ndisc)
1536 * But then router serving it might decide, that we should
1537 * know truth 8)8) --ANK (980726).
1538 */
1539 if (rt6_check_expired(rt))
1540 continue;
1541 if (!(rt->rt6i_flags & RTF_GATEWAY))
1542 continue;
1543 if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
1544 continue;
1545 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1546 continue;
1547 break;
1548 }
1549
1550 if (!rt)
1551 rt = net->ipv6.ip6_null_entry;
1552 BACKTRACK(net, &fl6->saddr);
1553out:
1554 dst_hold(&rt->dst);
1555
1556 read_unlock_bh(&table->tb6_lock);
1557
1558 return rt;
1559};
1560
1561static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1562 const struct in6_addr *src,
1563 const struct in6_addr *gateway,
1564 struct net_device *dev)
1565{
1566 int flags = RT6_LOOKUP_F_HAS_SADDR;
1567 struct net *net = dev_net(dev);
1568 struct ip6rd_flowi rdfl = {
1569 .fl6 = {
1570 .flowi6_oif = dev->ifindex,
1571 .daddr = *dest,
1572 .saddr = *src,
1573 },
1574 };
1575
1576 ipv6_addr_copy(&rdfl.gateway, gateway);
1577
1578 if (rt6_need_strict(dest))
1579 flags |= RT6_LOOKUP_F_IFACE;
1580
1581 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1582 flags, __ip6_route_redirect);
1583}
1584
1585void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1586 const struct in6_addr *saddr,
1587 struct neighbour *neigh, u8 *lladdr, int on_link)
1588{
1589 struct rt6_info *rt, *nrt = NULL;
1590 struct netevent_redirect netevent;
1591 struct net *net = dev_net(neigh->dev);
1592
1593 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1594
1595 if (rt == net->ipv6.ip6_null_entry) {
1596 if (net_ratelimit())
1597 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1598 "for redirect target\n");
1599 goto out;
1600 }
1601
1602 /*
1603 * We have finally decided to accept it.
1604 */
1605
1606 neigh_update(neigh, lladdr, NUD_STALE,
1607 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1608 NEIGH_UPDATE_F_OVERRIDE|
1609 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1610 NEIGH_UPDATE_F_ISROUTER))
1611 );
1612
1613 /*
1614 * Redirect received -> path was valid.
1615 * Look, redirects are sent only in response to data packets,
1616 * so that this nexthop apparently is reachable. --ANK
1617 */
1618 dst_confirm(&rt->dst);
1619
1620 /* Duplicate redirect: silently ignore. */
1621 if (neigh == dst_get_neighbour_raw(&rt->dst))
1622 goto out;
1623
1624 nrt = ip6_rt_copy(rt, dest);
1625 if (nrt == NULL)
1626 goto out;
1627
1628 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1629 if (on_link)
1630 nrt->rt6i_flags &= ~RTF_GATEWAY;
1631
1632 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1633 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1634
1635 if (ip6_ins_rt(nrt))
1636 goto out;
1637
1638 netevent.old = &rt->dst;
1639 netevent.new = &nrt->dst;
1640 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1641
1642 if (rt->rt6i_flags&RTF_CACHE) {
1643 ip6_del_rt(rt);
1644 return;
1645 }
1646
1647out:
1648 dst_release(&rt->dst);
1649}
1650
1651/*
1652 * Handle ICMP "packet too big" messages
1653 * i.e. Path MTU discovery
1654 */
1655
1656static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
1657 struct net *net, u32 pmtu, int ifindex)
1658{
1659 struct rt6_info *rt, *nrt;
1660 int allfrag = 0;
1661again:
1662 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1663 if (rt == NULL)
1664 return;
1665
1666 if (rt6_check_expired(rt)) {
1667 ip6_del_rt(rt);
1668 goto again;
1669 }
1670
1671 if (pmtu >= dst_mtu(&rt->dst))
1672 goto out;
1673
1674 if (pmtu < IPV6_MIN_MTU) {
1675 /*
1676 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1677 * MTU (1280) and a fragment header should always be included
1678 * after a node receiving Too Big message reporting PMTU is
1679 * less than the IPv6 Minimum Link MTU.
1680 */
1681 pmtu = IPV6_MIN_MTU;
1682 allfrag = 1;
1683 }
1684
1685 /* New mtu received -> path was valid.
1686 They are sent only in response to data packets,
1687 so that this nexthop apparently is reachable. --ANK
1688 */
1689 dst_confirm(&rt->dst);
1690
1691 /* Host route. If it is static, it would be better
1692 not to override it, but add new one, so that
1693 when cache entry will expire old pmtu
1694 would return automatically.
1695 */
1696 if (rt->rt6i_flags & RTF_CACHE) {
1697 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1698 if (allfrag) {
1699 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1700 features |= RTAX_FEATURE_ALLFRAG;
1701 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1702 }
1703 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1704 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1705 goto out;
1706 }
1707
1708 /* Network route.
1709 Two cases are possible:
1710 1. It is connected route. Action: COW
1711 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1712 */
1713 if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
1714 nrt = rt6_alloc_cow(rt, daddr, saddr);
1715 else
1716 nrt = rt6_alloc_clone(rt, daddr);
1717
1718 if (nrt) {
1719 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1720 if (allfrag) {
1721 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1722 features |= RTAX_FEATURE_ALLFRAG;
1723 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1724 }
1725
1726 /* According to RFC 1981, detecting PMTU increase shouldn't be
1727 * happened within 5 mins, the recommended timer is 10 mins.
1728 * Here this route expiration time is set to ip6_rt_mtu_expires
1729 * which is 10 mins. After 10 mins the decreased pmtu is expired
1730 * and detecting PMTU increase will be automatically happened.
1731 */
1732 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1733 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1734
1735 ip6_ins_rt(nrt);
1736 }
1737out:
1738 dst_release(&rt->dst);
1739}
1740
1741void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
1742 struct net_device *dev, u32 pmtu)
1743{
1744 struct net *net = dev_net(dev);
1745
1746 /*
1747 * RFC 1981 states that a node "MUST reduce the size of the packets it
1748 * is sending along the path" that caused the Packet Too Big message.
1749 * Since it's not possible in the general case to determine which
1750 * interface was used to send the original packet, we update the MTU
1751 * on the interface that will be used to send future packets. We also
1752 * update the MTU on the interface that received the Packet Too Big in
1753 * case the original packet was forced out that interface with
1754 * SO_BINDTODEVICE or similar. This is the next best thing to the
1755 * correct behaviour, which would be to update the MTU on all
1756 * interfaces.
1757 */
1758 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1759 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1760}
1761
1762/*
1763 * Misc support functions
1764 */
1765
1766static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1767 const struct in6_addr *dest)
1768{
1769 struct net *net = dev_net(ort->rt6i_dev);
1770 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
1771 ort->dst.dev, 0);
1772
1773 if (rt) {
1774 rt->dst.input = ort->dst.input;
1775 rt->dst.output = ort->dst.output;
1776 rt->dst.flags |= DST_HOST;
1777
1778 ipv6_addr_copy(&rt->rt6i_dst.addr, dest);
1779 rt->rt6i_dst.plen = 128;
1780 dst_copy_metrics(&rt->dst, &ort->dst);
1781 rt->dst.error = ort->dst.error;
1782 rt->rt6i_idev = ort->rt6i_idev;
1783 if (rt->rt6i_idev)
1784 in6_dev_hold(rt->rt6i_idev);
1785 rt->dst.lastuse = jiffies;
1786 rt->rt6i_expires = 0;
1787
1788 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1789 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1790 rt->rt6i_metric = 0;
1791
1792#ifdef CONFIG_IPV6_SUBTREES
1793 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1794#endif
1795 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1796 rt->rt6i_table = ort->rt6i_table;
1797 }
1798 return rt;
1799}
1800
1801#ifdef CONFIG_IPV6_ROUTE_INFO
1802static struct rt6_info *rt6_get_route_info(struct net *net,
1803 const struct in6_addr *prefix, int prefixlen,
1804 const struct in6_addr *gwaddr, int ifindex)
1805{
1806 struct fib6_node *fn;
1807 struct rt6_info *rt = NULL;
1808 struct fib6_table *table;
1809
1810 table = fib6_get_table(net, RT6_TABLE_INFO);
1811 if (table == NULL)
1812 return NULL;
1813
1814 write_lock_bh(&table->tb6_lock);
1815 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1816 if (!fn)
1817 goto out;
1818
1819 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1820 if (rt->rt6i_dev->ifindex != ifindex)
1821 continue;
1822 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1823 continue;
1824 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1825 continue;
1826 dst_hold(&rt->dst);
1827 break;
1828 }
1829out:
1830 write_unlock_bh(&table->tb6_lock);
1831 return rt;
1832}
1833
1834static struct rt6_info *rt6_add_route_info(struct net *net,
1835 const struct in6_addr *prefix, int prefixlen,
1836 const struct in6_addr *gwaddr, int ifindex,
1837 unsigned pref)
1838{
1839 struct fib6_config cfg = {
1840 .fc_table = RT6_TABLE_INFO,
1841 .fc_metric = IP6_RT_PRIO_USER,
1842 .fc_ifindex = ifindex,
1843 .fc_dst_len = prefixlen,
1844 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1845 RTF_UP | RTF_PREF(pref),
1846 .fc_nlinfo.pid = 0,
1847 .fc_nlinfo.nlh = NULL,
1848 .fc_nlinfo.nl_net = net,
1849 };
1850
1851 ipv6_addr_copy(&cfg.fc_dst, prefix);
1852 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1853
1854 /* We should treat it as a default route if prefix length is 0. */
1855 if (!prefixlen)
1856 cfg.fc_flags |= RTF_DEFAULT;
1857
1858 ip6_route_add(&cfg);
1859
1860 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1861}
1862#endif
1863
1864struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1865{
1866 struct rt6_info *rt;
1867 struct fib6_table *table;
1868
1869 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1870 if (table == NULL)
1871 return NULL;
1872
1873 write_lock_bh(&table->tb6_lock);
1874 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1875 if (dev == rt->rt6i_dev &&
1876 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1877 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1878 break;
1879 }
1880 if (rt)
1881 dst_hold(&rt->dst);
1882 write_unlock_bh(&table->tb6_lock);
1883 return rt;
1884}
1885
1886struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1887 struct net_device *dev,
1888 unsigned int pref)
1889{
1890 struct fib6_config cfg = {
1891 .fc_table = RT6_TABLE_DFLT,
1892 .fc_metric = IP6_RT_PRIO_USER,
1893 .fc_ifindex = dev->ifindex,
1894 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1895 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1896 .fc_nlinfo.pid = 0,
1897 .fc_nlinfo.nlh = NULL,
1898 .fc_nlinfo.nl_net = dev_net(dev),
1899 };
1900
1901 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1902
1903 ip6_route_add(&cfg);
1904
1905 return rt6_get_dflt_router(gwaddr, dev);
1906}
1907
1908void rt6_purge_dflt_routers(struct net *net)
1909{
1910 struct rt6_info *rt;
1911 struct fib6_table *table;
1912
1913 /* NOTE: Keep consistent with rt6_get_dflt_router */
1914 table = fib6_get_table(net, RT6_TABLE_DFLT);
1915 if (table == NULL)
1916 return;
1917
1918restart:
1919 read_lock_bh(&table->tb6_lock);
1920 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1921 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1922 dst_hold(&rt->dst);
1923 read_unlock_bh(&table->tb6_lock);
1924 ip6_del_rt(rt);
1925 goto restart;
1926 }
1927 }
1928 read_unlock_bh(&table->tb6_lock);
1929}
1930
1931static void rtmsg_to_fib6_config(struct net *net,
1932 struct in6_rtmsg *rtmsg,
1933 struct fib6_config *cfg)
1934{
1935 memset(cfg, 0, sizeof(*cfg));
1936
1937 cfg->fc_table = RT6_TABLE_MAIN;
1938 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1939 cfg->fc_metric = rtmsg->rtmsg_metric;
1940 cfg->fc_expires = rtmsg->rtmsg_info;
1941 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1942 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1943 cfg->fc_flags = rtmsg->rtmsg_flags;
1944
1945 cfg->fc_nlinfo.nl_net = net;
1946
1947 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1948 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1949 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1950}
1951
1952int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1953{
1954 struct fib6_config cfg;
1955 struct in6_rtmsg rtmsg;
1956 int err;
1957
1958 switch(cmd) {
1959 case SIOCADDRT: /* Add a route */
1960 case SIOCDELRT: /* Delete a route */
1961 if (!capable(CAP_NET_ADMIN))
1962 return -EPERM;
1963 err = copy_from_user(&rtmsg, arg,
1964 sizeof(struct in6_rtmsg));
1965 if (err)
1966 return -EFAULT;
1967
1968 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1969
1970 rtnl_lock();
1971 switch (cmd) {
1972 case SIOCADDRT:
1973 err = ip6_route_add(&cfg);
1974 break;
1975 case SIOCDELRT:
1976 err = ip6_route_del(&cfg);
1977 break;
1978 default:
1979 err = -EINVAL;
1980 }
1981 rtnl_unlock();
1982
1983 return err;
1984 }
1985
1986 return -EINVAL;
1987}
1988
1989/*
1990 * Drop the packet on the floor
1991 */
1992
1993static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1994{
1995 int type;
1996 struct dst_entry *dst = skb_dst(skb);
1997 switch (ipstats_mib_noroutes) {
1998 case IPSTATS_MIB_INNOROUTES:
1999 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2000 if (type == IPV6_ADDR_ANY) {
2001 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2002 IPSTATS_MIB_INADDRERRORS);
2003 break;
2004 }
2005 /* FALLTHROUGH */
2006 case IPSTATS_MIB_OUTNOROUTES:
2007 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2008 ipstats_mib_noroutes);
2009 break;
2010 }
2011 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2012 kfree_skb(skb);
2013 return 0;
2014}
2015
2016static int ip6_pkt_discard(struct sk_buff *skb)
2017{
2018 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2019}
2020
2021static int ip6_pkt_discard_out(struct sk_buff *skb)
2022{
2023 skb->dev = skb_dst(skb)->dev;
2024 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2025}
2026
2027#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2028
2029static int ip6_pkt_prohibit(struct sk_buff *skb)
2030{
2031 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2032}
2033
2034static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2035{
2036 skb->dev = skb_dst(skb)->dev;
2037 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2038}
2039
2040#endif
2041
2042/*
2043 * Allocate a dst for local (unicast / anycast) address.
2044 */
2045
2046struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2047 const struct in6_addr *addr,
2048 int anycast)
2049{
2050 struct net *net = dev_net(idev->dev);
2051 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
2052 net->loopback_dev, 0);
2053 struct neighbour *neigh;
2054
2055 if (rt == NULL) {
2056 if (net_ratelimit())
2057 pr_warning("IPv6: Maximum number of routes reached,"
2058 " consider increasing route/max_size.\n");
2059 return ERR_PTR(-ENOMEM);
2060 }
2061
2062 in6_dev_hold(idev);
2063
2064 rt->dst.flags |= DST_HOST;
2065 rt->dst.input = ip6_input;
2066 rt->dst.output = ip6_output;
2067 rt->rt6i_idev = idev;
2068 rt->dst.obsolete = -1;
2069
2070 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2071 if (anycast)
2072 rt->rt6i_flags |= RTF_ANYCAST;
2073 else
2074 rt->rt6i_flags |= RTF_LOCAL;
2075 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2076 if (IS_ERR(neigh)) {
2077 dst_free(&rt->dst);
2078
2079 return ERR_CAST(neigh);
2080 }
2081 dst_set_neighbour(&rt->dst, neigh);
2082
2083 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2084 rt->rt6i_dst.plen = 128;
2085 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2086
2087 atomic_set(&rt->dst.__refcnt, 1);
2088
2089 return rt;
2090}
2091
2092int ip6_route_get_saddr(struct net *net,
2093 struct rt6_info *rt,
2094 const struct in6_addr *daddr,
2095 unsigned int prefs,
2096 struct in6_addr *saddr)
2097{
2098 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2099 int err = 0;
2100 if (rt->rt6i_prefsrc.plen)
2101 ipv6_addr_copy(saddr, &rt->rt6i_prefsrc.addr);
2102 else
2103 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2104 daddr, prefs, saddr);
2105 return err;
2106}
2107
2108/* remove deleted ip from prefsrc entries */
2109struct arg_dev_net_ip {
2110 struct net_device *dev;
2111 struct net *net;
2112 struct in6_addr *addr;
2113};
2114
2115static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2116{
2117 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2118 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2119 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2120
2121 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2122 rt != net->ipv6.ip6_null_entry &&
2123 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2124 /* remove prefsrc entry */
2125 rt->rt6i_prefsrc.plen = 0;
2126 }
2127 return 0;
2128}
2129
2130void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2131{
2132 struct net *net = dev_net(ifp->idev->dev);
2133 struct arg_dev_net_ip adni = {
2134 .dev = ifp->idev->dev,
2135 .net = net,
2136 .addr = &ifp->addr,
2137 };
2138 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2139}
2140
2141struct arg_dev_net {
2142 struct net_device *dev;
2143 struct net *net;
2144};
2145
2146static int fib6_ifdown(struct rt6_info *rt, void *arg)
2147{
2148 const struct arg_dev_net *adn = arg;
2149 const struct net_device *dev = adn->dev;
2150
2151 if ((rt->rt6i_dev == dev || dev == NULL) &&
2152 rt != adn->net->ipv6.ip6_null_entry) {
2153 RT6_TRACE("deleted by ifdown %p\n", rt);
2154 return -1;
2155 }
2156 return 0;
2157}
2158
2159void rt6_ifdown(struct net *net, struct net_device *dev)
2160{
2161 struct arg_dev_net adn = {
2162 .dev = dev,
2163 .net = net,
2164 };
2165
2166 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2167 icmp6_clean_all(fib6_ifdown, &adn);
2168}
2169
2170struct rt6_mtu_change_arg
2171{
2172 struct net_device *dev;
2173 unsigned mtu;
2174};
2175
2176static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2177{
2178 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2179 struct inet6_dev *idev;
2180
2181 /* In IPv6 pmtu discovery is not optional,
2182 so that RTAX_MTU lock cannot disable it.
2183 We still use this lock to block changes
2184 caused by addrconf/ndisc.
2185 */
2186
2187 idev = __in6_dev_get(arg->dev);
2188 if (idev == NULL)
2189 return 0;
2190
2191 /* For administrative MTU increase, there is no way to discover
2192 IPv6 PMTU increase, so PMTU increase should be updated here.
2193 Since RFC 1981 doesn't include administrative MTU increase
2194 update PMTU increase is a MUST. (i.e. jumbo frame)
2195 */
2196 /*
2197 If new MTU is less than route PMTU, this new MTU will be the
2198 lowest MTU in the path, update the route PMTU to reflect PMTU
2199 decreases; if new MTU is greater than route PMTU, and the
2200 old MTU is the lowest MTU in the path, update the route PMTU
2201 to reflect the increase. In this case if the other nodes' MTU
2202 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2203 PMTU discouvery.
2204 */
2205 if (rt->rt6i_dev == arg->dev &&
2206 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2207 (dst_mtu(&rt->dst) >= arg->mtu ||
2208 (dst_mtu(&rt->dst) < arg->mtu &&
2209 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2210 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2211 }
2212 return 0;
2213}
2214
2215void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2216{
2217 struct rt6_mtu_change_arg arg = {
2218 .dev = dev,
2219 .mtu = mtu,
2220 };
2221
2222 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2223}
2224
2225static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2226 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2227 [RTA_OIF] = { .type = NLA_U32 },
2228 [RTA_IIF] = { .type = NLA_U32 },
2229 [RTA_PRIORITY] = { .type = NLA_U32 },
2230 [RTA_METRICS] = { .type = NLA_NESTED },
2231};
2232
2233static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2234 struct fib6_config *cfg)
2235{
2236 struct rtmsg *rtm;
2237 struct nlattr *tb[RTA_MAX+1];
2238 int err;
2239
2240 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2241 if (err < 0)
2242 goto errout;
2243
2244 err = -EINVAL;
2245 rtm = nlmsg_data(nlh);
2246 memset(cfg, 0, sizeof(*cfg));
2247
2248 cfg->fc_table = rtm->rtm_table;
2249 cfg->fc_dst_len = rtm->rtm_dst_len;
2250 cfg->fc_src_len = rtm->rtm_src_len;
2251 cfg->fc_flags = RTF_UP;
2252 cfg->fc_protocol = rtm->rtm_protocol;
2253
2254 if (rtm->rtm_type == RTN_UNREACHABLE)
2255 cfg->fc_flags |= RTF_REJECT;
2256
2257 if (rtm->rtm_type == RTN_LOCAL)
2258 cfg->fc_flags |= RTF_LOCAL;
2259
2260 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2261 cfg->fc_nlinfo.nlh = nlh;
2262 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2263
2264 if (tb[RTA_GATEWAY]) {
2265 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2266 cfg->fc_flags |= RTF_GATEWAY;
2267 }
2268
2269 if (tb[RTA_DST]) {
2270 int plen = (rtm->rtm_dst_len + 7) >> 3;
2271
2272 if (nla_len(tb[RTA_DST]) < plen)
2273 goto errout;
2274
2275 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2276 }
2277
2278 if (tb[RTA_SRC]) {
2279 int plen = (rtm->rtm_src_len + 7) >> 3;
2280
2281 if (nla_len(tb[RTA_SRC]) < plen)
2282 goto errout;
2283
2284 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2285 }
2286
2287 if (tb[RTA_PREFSRC])
2288 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2289
2290 if (tb[RTA_OIF])
2291 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2292
2293 if (tb[RTA_PRIORITY])
2294 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2295
2296 if (tb[RTA_METRICS]) {
2297 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2298 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2299 }
2300
2301 if (tb[RTA_TABLE])
2302 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2303
2304 err = 0;
2305errout:
2306 return err;
2307}
2308
2309static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2310{
2311 struct fib6_config cfg;
2312 int err;
2313
2314 err = rtm_to_fib6_config(skb, nlh, &cfg);
2315 if (err < 0)
2316 return err;
2317
2318 return ip6_route_del(&cfg);
2319}
2320
2321static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2322{
2323 struct fib6_config cfg;
2324 int err;
2325
2326 err = rtm_to_fib6_config(skb, nlh, &cfg);
2327 if (err < 0)
2328 return err;
2329
2330 return ip6_route_add(&cfg);
2331}
2332
2333static inline size_t rt6_nlmsg_size(void)
2334{
2335 return NLMSG_ALIGN(sizeof(struct rtmsg))
2336 + nla_total_size(16) /* RTA_SRC */
2337 + nla_total_size(16) /* RTA_DST */
2338 + nla_total_size(16) /* RTA_GATEWAY */
2339 + nla_total_size(16) /* RTA_PREFSRC */
2340 + nla_total_size(4) /* RTA_TABLE */
2341 + nla_total_size(4) /* RTA_IIF */
2342 + nla_total_size(4) /* RTA_OIF */
2343 + nla_total_size(4) /* RTA_PRIORITY */
2344 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2345 + nla_total_size(sizeof(struct rta_cacheinfo));
2346}
2347
2348static int rt6_fill_node(struct net *net,
2349 struct sk_buff *skb, struct rt6_info *rt,
2350 struct in6_addr *dst, struct in6_addr *src,
2351 int iif, int type, u32 pid, u32 seq,
2352 int prefix, int nowait, unsigned int flags)
2353{
2354 struct rtmsg *rtm;
2355 struct nlmsghdr *nlh;
2356 long expires;
2357 u32 table;
2358 struct neighbour *n;
2359
2360 if (prefix) { /* user wants prefix routes only */
2361 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2362 /* success since this is not a prefix route */
2363 return 1;
2364 }
2365 }
2366
2367 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2368 if (nlh == NULL)
2369 return -EMSGSIZE;
2370
2371 rtm = nlmsg_data(nlh);
2372 rtm->rtm_family = AF_INET6;
2373 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2374 rtm->rtm_src_len = rt->rt6i_src.plen;
2375 rtm->rtm_tos = 0;
2376 if (rt->rt6i_table)
2377 table = rt->rt6i_table->tb6_id;
2378 else
2379 table = RT6_TABLE_UNSPEC;
2380 rtm->rtm_table = table;
2381 NLA_PUT_U32(skb, RTA_TABLE, table);
2382 if (rt->rt6i_flags&RTF_REJECT)
2383 rtm->rtm_type = RTN_UNREACHABLE;
2384 else if (rt->rt6i_flags&RTF_LOCAL)
2385 rtm->rtm_type = RTN_LOCAL;
2386 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2387 rtm->rtm_type = RTN_LOCAL;
2388 else
2389 rtm->rtm_type = RTN_UNICAST;
2390 rtm->rtm_flags = 0;
2391 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2392 rtm->rtm_protocol = rt->rt6i_protocol;
2393 if (rt->rt6i_flags&RTF_DYNAMIC)
2394 rtm->rtm_protocol = RTPROT_REDIRECT;
2395 else if (rt->rt6i_flags & RTF_ADDRCONF)
2396 rtm->rtm_protocol = RTPROT_KERNEL;
2397 else if (rt->rt6i_flags&RTF_DEFAULT)
2398 rtm->rtm_protocol = RTPROT_RA;
2399
2400 if (rt->rt6i_flags&RTF_CACHE)
2401 rtm->rtm_flags |= RTM_F_CLONED;
2402
2403 if (dst) {
2404 NLA_PUT(skb, RTA_DST, 16, dst);
2405 rtm->rtm_dst_len = 128;
2406 } else if (rtm->rtm_dst_len)
2407 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2408#ifdef CONFIG_IPV6_SUBTREES
2409 if (src) {
2410 NLA_PUT(skb, RTA_SRC, 16, src);
2411 rtm->rtm_src_len = 128;
2412 } else if (rtm->rtm_src_len)
2413 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2414#endif
2415 if (iif) {
2416#ifdef CONFIG_IPV6_MROUTE
2417 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2418 int err = ip6mr_get_route(net, skb, rtm, nowait);
2419 if (err <= 0) {
2420 if (!nowait) {
2421 if (err == 0)
2422 return 0;
2423 goto nla_put_failure;
2424 } else {
2425 if (err == -EMSGSIZE)
2426 goto nla_put_failure;
2427 }
2428 }
2429 } else
2430#endif
2431 NLA_PUT_U32(skb, RTA_IIF, iif);
2432 } else if (dst) {
2433 struct in6_addr saddr_buf;
2434 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2435 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2436 }
2437
2438 if (rt->rt6i_prefsrc.plen) {
2439 struct in6_addr saddr_buf;
2440 ipv6_addr_copy(&saddr_buf, &rt->rt6i_prefsrc.addr);
2441 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2442 }
2443
2444 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2445 goto nla_put_failure;
2446
2447 rcu_read_lock();
2448 n = dst_get_neighbour(&rt->dst);
2449 if (n)
2450 NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
2451 rcu_read_unlock();
2452
2453 if (rt->dst.dev)
2454 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2455
2456 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2457
2458 if (!(rt->rt6i_flags & RTF_EXPIRES))
2459 expires = 0;
2460 else if (rt->rt6i_expires - jiffies < INT_MAX)
2461 expires = rt->rt6i_expires - jiffies;
2462 else
2463 expires = INT_MAX;
2464
2465 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2466 expires, rt->dst.error) < 0)
2467 goto nla_put_failure;
2468
2469 return nlmsg_end(skb, nlh);
2470
2471nla_put_failure:
2472 nlmsg_cancel(skb, nlh);
2473 return -EMSGSIZE;
2474}
2475
2476int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2477{
2478 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2479 int prefix;
2480
2481 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2482 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2483 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2484 } else
2485 prefix = 0;
2486
2487 return rt6_fill_node(arg->net,
2488 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2489 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2490 prefix, 0, NLM_F_MULTI);
2491}
2492
2493static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2494{
2495 struct net *net = sock_net(in_skb->sk);
2496 struct nlattr *tb[RTA_MAX+1];
2497 struct rt6_info *rt;
2498 struct sk_buff *skb;
2499 struct rtmsg *rtm;
2500 struct flowi6 fl6;
2501 int err, iif = 0;
2502
2503 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2504 if (err < 0)
2505 goto errout;
2506
2507 err = -EINVAL;
2508 memset(&fl6, 0, sizeof(fl6));
2509
2510 if (tb[RTA_SRC]) {
2511 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2512 goto errout;
2513
2514 ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
2515 }
2516
2517 if (tb[RTA_DST]) {
2518 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2519 goto errout;
2520
2521 ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
2522 }
2523
2524 if (tb[RTA_IIF])
2525 iif = nla_get_u32(tb[RTA_IIF]);
2526
2527 if (tb[RTA_OIF])
2528 fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
2529
2530 if (iif) {
2531 struct net_device *dev;
2532 dev = __dev_get_by_index(net, iif);
2533 if (!dev) {
2534 err = -ENODEV;
2535 goto errout;
2536 }
2537 }
2538
2539 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2540 if (skb == NULL) {
2541 err = -ENOBUFS;
2542 goto errout;
2543 }
2544
2545 /* Reserve room for dummy headers, this skb can pass
2546 through good chunk of routing engine.
2547 */
2548 skb_reset_mac_header(skb);
2549 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2550
2551 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
2552 skb_dst_set(skb, &rt->dst);
2553
2554 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2555 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2556 nlh->nlmsg_seq, 0, 0, 0);
2557 if (err < 0) {
2558 kfree_skb(skb);
2559 goto errout;
2560 }
2561
2562 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2563errout:
2564 return err;
2565}
2566
2567void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2568{
2569 struct sk_buff *skb;
2570 struct net *net = info->nl_net;
2571 u32 seq;
2572 int err;
2573
2574 err = -ENOBUFS;
2575 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2576
2577 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2578 if (skb == NULL)
2579 goto errout;
2580
2581 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2582 event, info->pid, seq, 0, 0, 0);
2583 if (err < 0) {
2584 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2585 WARN_ON(err == -EMSGSIZE);
2586 kfree_skb(skb);
2587 goto errout;
2588 }
2589 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2590 info->nlh, gfp_any());
2591 return;
2592errout:
2593 if (err < 0)
2594 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2595}
2596
2597static int ip6_route_dev_notify(struct notifier_block *this,
2598 unsigned long event, void *data)
2599{
2600 struct net_device *dev = (struct net_device *)data;
2601 struct net *net = dev_net(dev);
2602
2603 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2604 net->ipv6.ip6_null_entry->dst.dev = dev;
2605 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2606#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2607 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2608 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2609 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2610 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2611#endif
2612 }
2613
2614 return NOTIFY_OK;
2615}
2616
2617/*
2618 * /proc
2619 */
2620
2621#ifdef CONFIG_PROC_FS
2622
2623struct rt6_proc_arg
2624{
2625 char *buffer;
2626 int offset;
2627 int length;
2628 int skip;
2629 int len;
2630};
2631
2632static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2633{
2634 struct seq_file *m = p_arg;
2635 struct neighbour *n;
2636
2637 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2638
2639#ifdef CONFIG_IPV6_SUBTREES
2640 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2641#else
2642 seq_puts(m, "00000000000000000000000000000000 00 ");
2643#endif
2644 rcu_read_lock();
2645 n = dst_get_neighbour(&rt->dst);
2646 if (n) {
2647 seq_printf(m, "%pi6", n->primary_key);
2648 } else {
2649 seq_puts(m, "00000000000000000000000000000000");
2650 }
2651 rcu_read_unlock();
2652 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2653 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2654 rt->dst.__use, rt->rt6i_flags,
2655 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2656 return 0;
2657}
2658
2659static int ipv6_route_show(struct seq_file *m, void *v)
2660{
2661 struct net *net = (struct net *)m->private;
2662 fib6_clean_all(net, rt6_info_route, 0, m);
2663 return 0;
2664}
2665
2666static int ipv6_route_open(struct inode *inode, struct file *file)
2667{
2668 return single_open_net(inode, file, ipv6_route_show);
2669}
2670
2671static const struct file_operations ipv6_route_proc_fops = {
2672 .owner = THIS_MODULE,
2673 .open = ipv6_route_open,
2674 .read = seq_read,
2675 .llseek = seq_lseek,
2676 .release = single_release_net,
2677};
2678
2679static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2680{
2681 struct net *net = (struct net *)seq->private;
2682 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2683 net->ipv6.rt6_stats->fib_nodes,
2684 net->ipv6.rt6_stats->fib_route_nodes,
2685 net->ipv6.rt6_stats->fib_rt_alloc,
2686 net->ipv6.rt6_stats->fib_rt_entries,
2687 net->ipv6.rt6_stats->fib_rt_cache,
2688 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2689 net->ipv6.rt6_stats->fib_discarded_routes);
2690
2691 return 0;
2692}
2693
2694static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2695{
2696 return single_open_net(inode, file, rt6_stats_seq_show);
2697}
2698
2699static const struct file_operations rt6_stats_seq_fops = {
2700 .owner = THIS_MODULE,
2701 .open = rt6_stats_seq_open,
2702 .read = seq_read,
2703 .llseek = seq_lseek,
2704 .release = single_release_net,
2705};
2706#endif /* CONFIG_PROC_FS */
2707
2708#ifdef CONFIG_SYSCTL
2709
2710static
2711int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2712 void __user *buffer, size_t *lenp, loff_t *ppos)
2713{
2714 struct net *net;
2715 int delay;
2716 if (!write)
2717 return -EINVAL;
2718
2719 net = (struct net *)ctl->extra1;
2720 delay = net->ipv6.sysctl.flush_delay;
2721 proc_dointvec(ctl, write, buffer, lenp, ppos);
2722 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2723 return 0;
2724}
2725
2726ctl_table ipv6_route_table_template[] = {
2727 {
2728 .procname = "flush",
2729 .data = &init_net.ipv6.sysctl.flush_delay,
2730 .maxlen = sizeof(int),
2731 .mode = 0200,
2732 .proc_handler = ipv6_sysctl_rtcache_flush
2733 },
2734 {
2735 .procname = "gc_thresh",
2736 .data = &ip6_dst_ops_template.gc_thresh,
2737 .maxlen = sizeof(int),
2738 .mode = 0644,
2739 .proc_handler = proc_dointvec,
2740 },
2741 {
2742 .procname = "max_size",
2743 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2744 .maxlen = sizeof(int),
2745 .mode = 0644,
2746 .proc_handler = proc_dointvec,
2747 },
2748 {
2749 .procname = "gc_min_interval",
2750 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2751 .maxlen = sizeof(int),
2752 .mode = 0644,
2753 .proc_handler = proc_dointvec_jiffies,
2754 },
2755 {
2756 .procname = "gc_timeout",
2757 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2758 .maxlen = sizeof(int),
2759 .mode = 0644,
2760 .proc_handler = proc_dointvec_jiffies,
2761 },
2762 {
2763 .procname = "gc_interval",
2764 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2765 .maxlen = sizeof(int),
2766 .mode = 0644,
2767 .proc_handler = proc_dointvec_jiffies,
2768 },
2769 {
2770 .procname = "gc_elasticity",
2771 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2772 .maxlen = sizeof(int),
2773 .mode = 0644,
2774 .proc_handler = proc_dointvec,
2775 },
2776 {
2777 .procname = "mtu_expires",
2778 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2779 .maxlen = sizeof(int),
2780 .mode = 0644,
2781 .proc_handler = proc_dointvec_jiffies,
2782 },
2783 {
2784 .procname = "min_adv_mss",
2785 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2786 .maxlen = sizeof(int),
2787 .mode = 0644,
2788 .proc_handler = proc_dointvec,
2789 },
2790 {
2791 .procname = "gc_min_interval_ms",
2792 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2793 .maxlen = sizeof(int),
2794 .mode = 0644,
2795 .proc_handler = proc_dointvec_ms_jiffies,
2796 },
2797 { }
2798};
2799
2800struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2801{
2802 struct ctl_table *table;
2803
2804 table = kmemdup(ipv6_route_table_template,
2805 sizeof(ipv6_route_table_template),
2806 GFP_KERNEL);
2807
2808 if (table) {
2809 table[0].data = &net->ipv6.sysctl.flush_delay;
2810 table[0].extra1 = net;
2811 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2812 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2813 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2814 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2815 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2816 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2817 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2818 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2819 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2820 }
2821
2822 return table;
2823}
2824#endif
2825
2826static int __net_init ip6_route_net_init(struct net *net)
2827{
2828 int ret = -ENOMEM;
2829
2830 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2831 sizeof(net->ipv6.ip6_dst_ops));
2832
2833 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2834 goto out_ip6_dst_ops;
2835
2836 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2837 sizeof(*net->ipv6.ip6_null_entry),
2838 GFP_KERNEL);
2839 if (!net->ipv6.ip6_null_entry)
2840 goto out_ip6_dst_entries;
2841 net->ipv6.ip6_null_entry->dst.path =
2842 (struct dst_entry *)net->ipv6.ip6_null_entry;
2843 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2844 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2845 ip6_template_metrics, true);
2846
2847#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2848 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2849 sizeof(*net->ipv6.ip6_prohibit_entry),
2850 GFP_KERNEL);
2851 if (!net->ipv6.ip6_prohibit_entry)
2852 goto out_ip6_null_entry;
2853 net->ipv6.ip6_prohibit_entry->dst.path =
2854 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2855 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2856 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2857 ip6_template_metrics, true);
2858
2859 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2860 sizeof(*net->ipv6.ip6_blk_hole_entry),
2861 GFP_KERNEL);
2862 if (!net->ipv6.ip6_blk_hole_entry)
2863 goto out_ip6_prohibit_entry;
2864 net->ipv6.ip6_blk_hole_entry->dst.path =
2865 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2866 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2867 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2868 ip6_template_metrics, true);
2869#endif
2870
2871 net->ipv6.sysctl.flush_delay = 0;
2872 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2873 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2874 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2875 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2876 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2877 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2878 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2879
2880#ifdef CONFIG_PROC_FS
2881 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2882 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2883#endif
2884 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2885
2886 ret = 0;
2887out:
2888 return ret;
2889
2890#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2891out_ip6_prohibit_entry:
2892 kfree(net->ipv6.ip6_prohibit_entry);
2893out_ip6_null_entry:
2894 kfree(net->ipv6.ip6_null_entry);
2895#endif
2896out_ip6_dst_entries:
2897 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2898out_ip6_dst_ops:
2899 goto out;
2900}
2901
2902static void __net_exit ip6_route_net_exit(struct net *net)
2903{
2904#ifdef CONFIG_PROC_FS
2905 proc_net_remove(net, "ipv6_route");
2906 proc_net_remove(net, "rt6_stats");
2907#endif
2908 kfree(net->ipv6.ip6_null_entry);
2909#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2910 kfree(net->ipv6.ip6_prohibit_entry);
2911 kfree(net->ipv6.ip6_blk_hole_entry);
2912#endif
2913 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2914}
2915
2916static struct pernet_operations ip6_route_net_ops = {
2917 .init = ip6_route_net_init,
2918 .exit = ip6_route_net_exit,
2919};
2920
2921static struct notifier_block ip6_route_dev_notifier = {
2922 .notifier_call = ip6_route_dev_notify,
2923 .priority = 0,
2924};
2925
2926int __init ip6_route_init(void)
2927{
2928 int ret;
2929
2930 ret = -ENOMEM;
2931 ip6_dst_ops_template.kmem_cachep =
2932 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2933 SLAB_HWCACHE_ALIGN, NULL);
2934 if (!ip6_dst_ops_template.kmem_cachep)
2935 goto out;
2936
2937 ret = dst_entries_init(&ip6_dst_blackhole_ops);
2938 if (ret)
2939 goto out_kmem_cache;
2940
2941 ret = register_pernet_subsys(&ip6_route_net_ops);
2942 if (ret)
2943 goto out_dst_entries;
2944
2945 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2946
2947 /* Registering of the loopback is done before this portion of code,
2948 * the loopback reference in rt6_info will not be taken, do it
2949 * manually for init_net */
2950 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
2951 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2952 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2953 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
2954 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2955 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
2956 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2957 #endif
2958 ret = fib6_init();
2959 if (ret)
2960 goto out_register_subsys;
2961
2962 ret = xfrm6_init();
2963 if (ret)
2964 goto out_fib6_init;
2965
2966 ret = fib6_rules_init();
2967 if (ret)
2968 goto xfrm6_init;
2969
2970 ret = -ENOBUFS;
2971 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
2972 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
2973 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
2974 goto fib6_rules_init;
2975
2976 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2977 if (ret)
2978 goto fib6_rules_init;
2979
2980out:
2981 return ret;
2982
2983fib6_rules_init:
2984 fib6_rules_cleanup();
2985xfrm6_init:
2986 xfrm6_fini();
2987out_fib6_init:
2988 fib6_gc_cleanup();
2989out_register_subsys:
2990 unregister_pernet_subsys(&ip6_route_net_ops);
2991out_dst_entries:
2992 dst_entries_destroy(&ip6_dst_blackhole_ops);
2993out_kmem_cache:
2994 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2995 goto out;
2996}
2997
2998void ip6_route_cleanup(void)
2999{
3000 unregister_netdevice_notifier(&ip6_route_dev_notifier);
3001 fib6_rules_cleanup();
3002 xfrm6_fini();
3003 fib6_gc_cleanup();
3004 unregister_pernet_subsys(&ip6_route_net_ops);
3005 dst_entries_destroy(&ip6_dst_blackhole_ops);
3006 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3007}