net/ipv6/ip6_output.c at v5.11-rc4

tjh.dev / kernel
fork
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork
kernel / net / ipv6 / ip6_output.c
at v5.11-rc4 1993 lines 52 kB view raw
wrap content
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *	IPv6 output functions
   4 *	Linux INET6 implementation
   5 *
   6 *	Authors:
   7 *	Pedro Roque		<roque@di.fc.ul.pt>
   8 *
   9 *	Based on linux/net/ipv4/ip_output.c
  10 *
  11 *	Changes:
  12 *	A.N.Kuznetsov	:	airthmetics in fragmentation.
  13 *				extension headers are implemented.
  14 *				route changes now work.
  15 *				ip6_forward does not confuse sniffers.
  16 *				etc.
  17 *
  18 *      H. von Brand    :       Added missing #include <linux/string.h>
  19 *	Imran Patel	:	frag id should be in NBO
  20 *      Kazunori MIYAZAWA @USAGI
  21 *			:       add ip6_append_data and related functions
  22 *				for datagram xmit
  23 */
  24
  25#include <linux/errno.h>
  26#include <linux/kernel.h>
  27#include <linux/string.h>
  28#include <linux/socket.h>
  29#include <linux/net.h>
  30#include <linux/netdevice.h>
  31#include <linux/if_arp.h>
  32#include <linux/in6.h>
  33#include <linux/tcp.h>
  34#include <linux/route.h>
  35#include <linux/module.h>
  36#include <linux/slab.h>
  37
  38#include <linux/bpf-cgroup.h>
  39#include <linux/netfilter.h>
  40#include <linux/netfilter_ipv6.h>
  41
  42#include <net/sock.h>
  43#include <net/snmp.h>
  44
  45#include <net/ipv6.h>
  46#include <net/ndisc.h>
  47#include <net/protocol.h>
  48#include <net/ip6_route.h>
  49#include <net/addrconf.h>
  50#include <net/rawv6.h>
  51#include <net/icmp.h>
  52#include <net/xfrm.h>
  53#include <net/checksum.h>
  54#include <linux/mroute6.h>
  55#include <net/l3mdev.h>
  56#include <net/lwtunnel.h>
  57#include <net/ip_tunnels.h>
  58
  59static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
  60{
  61	struct dst_entry *dst = skb_dst(skb);
  62	struct net_device *dev = dst->dev;
  63	const struct in6_addr *nexthop;
  64	struct neighbour *neigh;
  65	int ret;
  66
  67	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
  68		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
  69
  70		if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
  71		    ((mroute6_is_socket(net, skb) &&
  72		     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
  73		     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
  74					 &ipv6_hdr(skb)->saddr))) {
  75			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
  76
  77			/* Do not check for IFF_ALLMULTI; multicast routing
  78			   is not supported in any case.
  79			 */
  80			if (newskb)
  81				NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
  82					net, sk, newskb, NULL, newskb->dev,
  83					dev_loopback_xmit);
  84
  85			if (ipv6_hdr(skb)->hop_limit == 0) {
  86				IP6_INC_STATS(net, idev,
  87					      IPSTATS_MIB_OUTDISCARDS);
  88				kfree_skb(skb);
  89				return 0;
  90			}
  91		}
  92
  93		IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
  94
  95		if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
  96		    IPV6_ADDR_SCOPE_NODELOCAL &&
  97		    !(dev->flags & IFF_LOOPBACK)) {
  98			kfree_skb(skb);
  99			return 0;
 100		}
 101	}
 102
 103	if (lwtunnel_xmit_redirect(dst->lwtstate)) {
 104		int res = lwtunnel_xmit(skb);
 105
 106		if (res < 0 || res == LWTUNNEL_XMIT_DONE)
 107			return res;
 108	}
 109
 110	rcu_read_lock_bh();
 111	nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
 112	neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
 113	if (unlikely(!neigh))
 114		neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
 115	if (!IS_ERR(neigh)) {
 116		sock_confirm_neigh(skb, neigh);
 117		ret = neigh_output(neigh, skb, false);
 118		rcu_read_unlock_bh();
 119		return ret;
 120	}
 121	rcu_read_unlock_bh();
 122
 123	IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 124	kfree_skb(skb);
 125	return -EINVAL;
 126}
 127
 128static int
 129ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk,
 130				    struct sk_buff *skb, unsigned int mtu)
 131{
 132	struct sk_buff *segs, *nskb;
 133	netdev_features_t features;
 134	int ret = 0;
 135
 136	/* Please see corresponding comment in ip_finish_output_gso
 137	 * describing the cases where GSO segment length exceeds the
 138	 * egress MTU.
 139	 */
 140	features = netif_skb_features(skb);
 141	segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
 142	if (IS_ERR_OR_NULL(segs)) {
 143		kfree_skb(skb);
 144		return -ENOMEM;
 145	}
 146
 147	consume_skb(skb);
 148
 149	skb_list_walk_safe(segs, segs, nskb) {
 150		int err;
 151
 152		skb_mark_not_on_list(segs);
 153		err = ip6_fragment(net, sk, segs, ip6_finish_output2);
 154		if (err && ret == 0)
 155			ret = err;
 156	}
 157
 158	return ret;
 159}
 160
 161static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 162{
 163	unsigned int mtu;
 164
 165#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
 166	/* Policy lookup after SNAT yielded a new policy */
 167	if (skb_dst(skb)->xfrm) {
 168		IPCB(skb)->flags |= IPSKB_REROUTED;
 169		return dst_output(net, sk, skb);
 170	}
 171#endif
 172
 173	mtu = ip6_skb_dst_mtu(skb);
 174	if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu))
 175		return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu);
 176
 177	if ((skb->len > mtu && !skb_is_gso(skb)) ||
 178	    dst_allfrag(skb_dst(skb)) ||
 179	    (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
 180		return ip6_fragment(net, sk, skb, ip6_finish_output2);
 181	else
 182		return ip6_finish_output2(net, sk, skb);
 183}
 184
 185static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 186{
 187	int ret;
 188
 189	ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
 190	switch (ret) {
 191	case NET_XMIT_SUCCESS:
 192		return __ip6_finish_output(net, sk, skb);
 193	case NET_XMIT_CN:
 194		return __ip6_finish_output(net, sk, skb) ? : ret;
 195	default:
 196		kfree_skb(skb);
 197		return ret;
 198	}
 199}
 200
 201int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 202{
 203	struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
 204	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 205
 206	skb->protocol = htons(ETH_P_IPV6);
 207	skb->dev = dev;
 208
 209	if (unlikely(idev->cnf.disable_ipv6)) {
 210		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
 211		kfree_skb(skb);
 212		return 0;
 213	}
 214
 215	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
 216			    net, sk, skb, indev, dev,
 217			    ip6_finish_output,
 218			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 219}
 220
 221bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
 222{
 223	if (!np->autoflowlabel_set)
 224		return ip6_default_np_autolabel(net);
 225	else
 226		return np->autoflowlabel;
 227}
 228
 229/*
 230 * xmit an sk_buff (used by TCP, SCTP and DCCP)
 231 * Note : socket lock is not held for SYNACK packets, but might be modified
 232 * by calls to skb_set_owner_w() and ipv6_local_error(),
 233 * which are using proper atomic operations or spinlocks.
 234 */
 235int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 236	     __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
 237{
 238	struct net *net = sock_net(sk);
 239	const struct ipv6_pinfo *np = inet6_sk(sk);
 240	struct in6_addr *first_hop = &fl6->daddr;
 241	struct dst_entry *dst = skb_dst(skb);
 242	unsigned int head_room;
 243	struct ipv6hdr *hdr;
 244	u8  proto = fl6->flowi6_proto;
 245	int seg_len = skb->len;
 246	int hlimit = -1;
 247	u32 mtu;
 248
 249	head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
 250	if (opt)
 251		head_room += opt->opt_nflen + opt->opt_flen;
 252
 253	if (unlikely(skb_headroom(skb) < head_room)) {
 254		struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
 255		if (!skb2) {
 256			IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 257				      IPSTATS_MIB_OUTDISCARDS);
 258			kfree_skb(skb);
 259			return -ENOBUFS;
 260		}
 261		if (skb->sk)
 262			skb_set_owner_w(skb2, skb->sk);
 263		consume_skb(skb);
 264		skb = skb2;
 265	}
 266
 267	if (opt) {
 268		seg_len += opt->opt_nflen + opt->opt_flen;
 269
 270		if (opt->opt_flen)
 271			ipv6_push_frag_opts(skb, opt, &proto);
 272
 273		if (opt->opt_nflen)
 274			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
 275					     &fl6->saddr);
 276	}
 277
 278	skb_push(skb, sizeof(struct ipv6hdr));
 279	skb_reset_network_header(skb);
 280	hdr = ipv6_hdr(skb);
 281
 282	/*
 283	 *	Fill in the IPv6 header
 284	 */
 285	if (np)
 286		hlimit = np->hop_limit;
 287	if (hlimit < 0)
 288		hlimit = ip6_dst_hoplimit(dst);
 289
 290	ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
 291				ip6_autoflowlabel(net, np), fl6));
 292
 293	hdr->payload_len = htons(seg_len);
 294	hdr->nexthdr = proto;
 295	hdr->hop_limit = hlimit;
 296
 297	hdr->saddr = fl6->saddr;
 298	hdr->daddr = *first_hop;
 299
 300	skb->protocol = htons(ETH_P_IPV6);
 301	skb->priority = priority;
 302	skb->mark = mark;
 303
 304	mtu = dst_mtu(dst);
 305	if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
 306		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
 307			      IPSTATS_MIB_OUT, skb->len);
 308
 309		/* if egress device is enslaved to an L3 master device pass the
 310		 * skb to its handler for processing
 311		 */
 312		skb = l3mdev_ip6_out((struct sock *)sk, skb);
 313		if (unlikely(!skb))
 314			return 0;
 315
 316		/* hooks should never assume socket lock is held.
 317		 * we promote our socket to non const
 318		 */
 319		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
 320			       net, (struct sock *)sk, skb, NULL, dst->dev,
 321			       dst_output);
 322	}
 323
 324	skb->dev = dst->dev;
 325	/* ipv6_local_error() does not require socket lock,
 326	 * we promote our socket to non const
 327	 */
 328	ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
 329
 330	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
 331	kfree_skb(skb);
 332	return -EMSGSIZE;
 333}
 334EXPORT_SYMBOL(ip6_xmit);
 335
 336static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 337{
 338	struct ip6_ra_chain *ra;
 339	struct sock *last = NULL;
 340
 341	read_lock(&ip6_ra_lock);
 342	for (ra = ip6_ra_chain; ra; ra = ra->next) {
 343		struct sock *sk = ra->sk;
 344		if (sk && ra->sel == sel &&
 345		    (!sk->sk_bound_dev_if ||
 346		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
 347			struct ipv6_pinfo *np = inet6_sk(sk);
 348
 349			if (np && np->rtalert_isolate &&
 350			    !net_eq(sock_net(sk), dev_net(skb->dev))) {
 351				continue;
 352			}
 353			if (last) {
 354				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 355				if (skb2)
 356					rawv6_rcv(last, skb2);
 357			}
 358			last = sk;
 359		}
 360	}
 361
 362	if (last) {
 363		rawv6_rcv(last, skb);
 364		read_unlock(&ip6_ra_lock);
 365		return 1;
 366	}
 367	read_unlock(&ip6_ra_lock);
 368	return 0;
 369}
 370
 371static int ip6_forward_proxy_check(struct sk_buff *skb)
 372{
 373	struct ipv6hdr *hdr = ipv6_hdr(skb);
 374	u8 nexthdr = hdr->nexthdr;
 375	__be16 frag_off;
 376	int offset;
 377
 378	if (ipv6_ext_hdr(nexthdr)) {
 379		offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
 380		if (offset < 0)
 381			return 0;
 382	} else
 383		offset = sizeof(struct ipv6hdr);
 384
 385	if (nexthdr == IPPROTO_ICMPV6) {
 386		struct icmp6hdr *icmp6;
 387
 388		if (!pskb_may_pull(skb, (skb_network_header(skb) +
 389					 offset + 1 - skb->data)))
 390			return 0;
 391
 392		icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
 393
 394		switch (icmp6->icmp6_type) {
 395		case NDISC_ROUTER_SOLICITATION:
 396		case NDISC_ROUTER_ADVERTISEMENT:
 397		case NDISC_NEIGHBOUR_SOLICITATION:
 398		case NDISC_NEIGHBOUR_ADVERTISEMENT:
 399		case NDISC_REDIRECT:
 400			/* For reaction involving unicast neighbor discovery
 401			 * message destined to the proxied address, pass it to
 402			 * input function.
 403			 */
 404			return 1;
 405		default:
 406			break;
 407		}
 408	}
 409
 410	/*
 411	 * The proxying router can't forward traffic sent to a link-local
 412	 * address, so signal the sender and discard the packet. This
 413	 * behavior is clarified by the MIPv6 specification.
 414	 */
 415	if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
 416		dst_link_failure(skb);
 417		return -1;
 418	}
 419
 420	return 0;
 421}
 422
 423static inline int ip6_forward_finish(struct net *net, struct sock *sk,
 424				     struct sk_buff *skb)
 425{
 426	struct dst_entry *dst = skb_dst(skb);
 427
 428	__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
 429	__IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
 430
 431#ifdef CONFIG_NET_SWITCHDEV
 432	if (skb->offload_l3_fwd_mark) {
 433		consume_skb(skb);
 434		return 0;
 435	}
 436#endif
 437
 438	skb->tstamp = 0;
 439	return dst_output(net, sk, skb);
 440}
 441
 442static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
 443{
 444	if (skb->len <= mtu)
 445		return false;
 446
 447	/* ipv6 conntrack defrag sets max_frag_size + ignore_df */
 448	if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
 449		return true;
 450
 451	if (skb->ignore_df)
 452		return false;
 453
 454	if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
 455		return false;
 456
 457	return true;
 458}
 459
 460int ip6_forward(struct sk_buff *skb)
 461{
 462	struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
 463	struct dst_entry *dst = skb_dst(skb);
 464	struct ipv6hdr *hdr = ipv6_hdr(skb);
 465	struct inet6_skb_parm *opt = IP6CB(skb);
 466	struct net *net = dev_net(dst->dev);
 467	u32 mtu;
 468
 469	if (net->ipv6.devconf_all->forwarding == 0)
 470		goto error;
 471
 472	if (skb->pkt_type != PACKET_HOST)
 473		goto drop;
 474
 475	if (unlikely(skb->sk))
 476		goto drop;
 477
 478	if (skb_warn_if_lro(skb))
 479		goto drop;
 480
 481	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
 482		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
 483		goto drop;
 484	}
 485
 486	skb_forward_csum(skb);
 487
 488	/*
 489	 *	We DO NOT make any processing on
 490	 *	RA packets, pushing them to user level AS IS
 491	 *	without ane WARRANTY that application will be able
 492	 *	to interpret them. The reason is that we
 493	 *	cannot make anything clever here.
 494	 *
 495	 *	We are not end-node, so that if packet contains
 496	 *	AH/ESP, we cannot make anything.
 497	 *	Defragmentation also would be mistake, RA packets
 498	 *	cannot be fragmented, because there is no warranty
 499	 *	that different fragments will go along one path. --ANK
 500	 */
 501	if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
 502		if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
 503			return 0;
 504	}
 505
 506	/*
 507	 *	check and decrement ttl
 508	 */
 509	if (hdr->hop_limit <= 1) {
 510		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
 511		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
 512
 513		kfree_skb(skb);
 514		return -ETIMEDOUT;
 515	}
 516
 517	/* XXX: idev->cnf.proxy_ndp? */
 518	if (net->ipv6.devconf_all->proxy_ndp &&
 519	    pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
 520		int proxied = ip6_forward_proxy_check(skb);
 521		if (proxied > 0)
 522			return ip6_input(skb);
 523		else if (proxied < 0) {
 524			__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
 525			goto drop;
 526		}
 527	}
 528
 529	if (!xfrm6_route_forward(skb)) {
 530		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
 531		goto drop;
 532	}
 533	dst = skb_dst(skb);
 534
 535	/* IPv6 specs say nothing about it, but it is clear that we cannot
 536	   send redirects to source routed frames.
 537	   We don't send redirects to frames decapsulated from IPsec.
 538	 */
 539	if (IP6CB(skb)->iif == dst->dev->ifindex &&
 540	    opt->srcrt == 0 && !skb_sec_path(skb)) {
 541		struct in6_addr *target = NULL;
 542		struct inet_peer *peer;
 543		struct rt6_info *rt;
 544
 545		/*
 546		 *	incoming and outgoing devices are the same
 547		 *	send a redirect.
 548		 */
 549
 550		rt = (struct rt6_info *) dst;
 551		if (rt->rt6i_flags & RTF_GATEWAY)
 552			target = &rt->rt6i_gateway;
 553		else
 554			target = &hdr->daddr;
 555
 556		peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
 557
 558		/* Limit redirects both by destination (here)
 559		   and by source (inside ndisc_send_redirect)
 560		 */
 561		if (inet_peer_xrlim_allow(peer, 1*HZ))
 562			ndisc_send_redirect(skb, target);
 563		if (peer)
 564			inet_putpeer(peer);
 565	} else {
 566		int addrtype = ipv6_addr_type(&hdr->saddr);
 567
 568		/* This check is security critical. */
 569		if (addrtype == IPV6_ADDR_ANY ||
 570		    addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
 571			goto error;
 572		if (addrtype & IPV6_ADDR_LINKLOCAL) {
 573			icmpv6_send(skb, ICMPV6_DEST_UNREACH,
 574				    ICMPV6_NOT_NEIGHBOUR, 0);
 575			goto error;
 576		}
 577	}
 578
 579	mtu = ip6_dst_mtu_forward(dst);
 580	if (mtu < IPV6_MIN_MTU)
 581		mtu = IPV6_MIN_MTU;
 582
 583	if (ip6_pkt_too_big(skb, mtu)) {
 584		/* Again, force OUTPUT device used as source address */
 585		skb->dev = dst->dev;
 586		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 587		__IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
 588		__IP6_INC_STATS(net, ip6_dst_idev(dst),
 589				IPSTATS_MIB_FRAGFAILS);
 590		kfree_skb(skb);
 591		return -EMSGSIZE;
 592	}
 593
 594	if (skb_cow(skb, dst->dev->hard_header_len)) {
 595		__IP6_INC_STATS(net, ip6_dst_idev(dst),
 596				IPSTATS_MIB_OUTDISCARDS);
 597		goto drop;
 598	}
 599
 600	hdr = ipv6_hdr(skb);
 601
 602	/* Mangling hops number delayed to point after skb COW */
 603
 604	hdr->hop_limit--;
 605
 606	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
 607		       net, NULL, skb, skb->dev, dst->dev,
 608		       ip6_forward_finish);
 609
 610error:
 611	__IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
 612drop:
 613	kfree_skb(skb);
 614	return -EINVAL;
 615}
 616
 617static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 618{
 619	to->pkt_type = from->pkt_type;
 620	to->priority = from->priority;
 621	to->protocol = from->protocol;
 622	skb_dst_drop(to);
 623	skb_dst_set(to, dst_clone(skb_dst(from)));
 624	to->dev = from->dev;
 625	to->mark = from->mark;
 626
 627	skb_copy_hash(to, from);
 628
 629#ifdef CONFIG_NET_SCHED
 630	to->tc_index = from->tc_index;
 631#endif
 632	nf_copy(to, from);
 633	skb_ext_copy(to, from);
 634	skb_copy_secmark(to, from);
 635}
 636
 637int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
 638		      u8 nexthdr, __be32 frag_id,
 639		      struct ip6_fraglist_iter *iter)
 640{
 641	unsigned int first_len;
 642	struct frag_hdr *fh;
 643
 644	/* BUILD HEADER */
 645	*prevhdr = NEXTHDR_FRAGMENT;
 646	iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
 647	if (!iter->tmp_hdr)
 648		return -ENOMEM;
 649
 650	iter->frag = skb_shinfo(skb)->frag_list;
 651	skb_frag_list_init(skb);
 652
 653	iter->offset = 0;
 654	iter->hlen = hlen;
 655	iter->frag_id = frag_id;
 656	iter->nexthdr = nexthdr;
 657
 658	__skb_pull(skb, hlen);
 659	fh = __skb_push(skb, sizeof(struct frag_hdr));
 660	__skb_push(skb, hlen);
 661	skb_reset_network_header(skb);
 662	memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
 663
 664	fh->nexthdr = nexthdr;
 665	fh->reserved = 0;
 666	fh->frag_off = htons(IP6_MF);
 667	fh->identification = frag_id;
 668
 669	first_len = skb_pagelen(skb);
 670	skb->data_len = first_len - skb_headlen(skb);
 671	skb->len = first_len;
 672	ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
 673
 674	return 0;
 675}
 676EXPORT_SYMBOL(ip6_fraglist_init);
 677
 678void ip6_fraglist_prepare(struct sk_buff *skb,
 679			  struct ip6_fraglist_iter *iter)
 680{
 681	struct sk_buff *frag = iter->frag;
 682	unsigned int hlen = iter->hlen;
 683	struct frag_hdr *fh;
 684
 685	frag->ip_summed = CHECKSUM_NONE;
 686	skb_reset_transport_header(frag);
 687	fh = __skb_push(frag, sizeof(struct frag_hdr));
 688	__skb_push(frag, hlen);
 689	skb_reset_network_header(frag);
 690	memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
 691	iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
 692	fh->nexthdr = iter->nexthdr;
 693	fh->reserved = 0;
 694	fh->frag_off = htons(iter->offset);
 695	if (frag->next)
 696		fh->frag_off |= htons(IP6_MF);
 697	fh->identification = iter->frag_id;
 698	ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
 699	ip6_copy_metadata(frag, skb);
 700}
 701EXPORT_SYMBOL(ip6_fraglist_prepare);
 702
 703void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
 704		   unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
 705		   u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
 706{
 707	state->prevhdr = prevhdr;
 708	state->nexthdr = nexthdr;
 709	state->frag_id = frag_id;
 710
 711	state->hlen = hlen;
 712	state->mtu = mtu;
 713
 714	state->left = skb->len - hlen;	/* Space per frame */
 715	state->ptr = hlen;		/* Where to start from */
 716
 717	state->hroom = hdr_room;
 718	state->troom = needed_tailroom;
 719
 720	state->offset = 0;
 721}
 722EXPORT_SYMBOL(ip6_frag_init);
 723
 724struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
 725{
 726	u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
 727	struct sk_buff *frag;
 728	struct frag_hdr *fh;
 729	unsigned int len;
 730
 731	len = state->left;
 732	/* IF: it doesn't fit, use 'mtu' - the data space left */
 733	if (len > state->mtu)
 734		len = state->mtu;
 735	/* IF: we are not sending up to and including the packet end
 736	   then align the next start on an eight byte boundary */
 737	if (len < state->left)
 738		len &= ~7;
 739
 740	/* Allocate buffer */
 741	frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
 742			 state->hroom + state->troom, GFP_ATOMIC);
 743	if (!frag)
 744		return ERR_PTR(-ENOMEM);
 745
 746	/*
 747	 *	Set up data on packet
 748	 */
 749
 750	ip6_copy_metadata(frag, skb);
 751	skb_reserve(frag, state->hroom);
 752	skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
 753	skb_reset_network_header(frag);
 754	fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
 755	frag->transport_header = (frag->network_header + state->hlen +
 756				  sizeof(struct frag_hdr));
 757
 758	/*
 759	 *	Charge the memory for the fragment to any owner
 760	 *	it might possess
 761	 */
 762	if (skb->sk)
 763		skb_set_owner_w(frag, skb->sk);
 764
 765	/*
 766	 *	Copy the packet header into the new buffer.
 767	 */
 768	skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
 769
 770	fragnexthdr_offset = skb_network_header(frag);
 771	fragnexthdr_offset += prevhdr - skb_network_header(skb);
 772	*fragnexthdr_offset = NEXTHDR_FRAGMENT;
 773
 774	/*
 775	 *	Build fragment header.
 776	 */
 777	fh->nexthdr = state->nexthdr;
 778	fh->reserved = 0;
 779	fh->identification = state->frag_id;
 780
 781	/*
 782	 *	Copy a block of the IP datagram.
 783	 */
 784	BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
 785			     len));
 786	state->left -= len;
 787
 788	fh->frag_off = htons(state->offset);
 789	if (state->left > 0)
 790		fh->frag_off |= htons(IP6_MF);
 791	ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
 792
 793	state->ptr += len;
 794	state->offset += len;
 795
 796	return frag;
 797}
 798EXPORT_SYMBOL(ip6_frag_next);
 799
 800int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 801		 int (*output)(struct net *, struct sock *, struct sk_buff *))
 802{
 803	struct sk_buff *frag;
 804	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
 805	struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
 806				inet6_sk(skb->sk) : NULL;
 807	struct ip6_frag_state state;
 808	unsigned int mtu, hlen, nexthdr_offset;
 809	ktime_t tstamp = skb->tstamp;
 810	int hroom, err = 0;
 811	__be32 frag_id;
 812	u8 *prevhdr, nexthdr = 0;
 813
 814	err = ip6_find_1stfragopt(skb, &prevhdr);
 815	if (err < 0)
 816		goto fail;
 817	hlen = err;
 818	nexthdr = *prevhdr;
 819	nexthdr_offset = prevhdr - skb_network_header(skb);
 820
 821	mtu = ip6_skb_dst_mtu(skb);
 822
 823	/* We must not fragment if the socket is set to force MTU discovery
 824	 * or if the skb it not generated by a local socket.
 825	 */
 826	if (unlikely(!skb->ignore_df && skb->len > mtu))
 827		goto fail_toobig;
 828
 829	if (IP6CB(skb)->frag_max_size) {
 830		if (IP6CB(skb)->frag_max_size > mtu)
 831			goto fail_toobig;
 832
 833		/* don't send fragments larger than what we received */
 834		mtu = IP6CB(skb)->frag_max_size;
 835		if (mtu < IPV6_MIN_MTU)
 836			mtu = IPV6_MIN_MTU;
 837	}
 838
 839	if (np && np->frag_size < mtu) {
 840		if (np->frag_size)
 841			mtu = np->frag_size;
 842	}
 843	if (mtu < hlen + sizeof(struct frag_hdr) + 8)
 844		goto fail_toobig;
 845	mtu -= hlen + sizeof(struct frag_hdr);
 846
 847	frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
 848				    &ipv6_hdr(skb)->saddr);
 849
 850	if (skb->ip_summed == CHECKSUM_PARTIAL &&
 851	    (err = skb_checksum_help(skb)))
 852		goto fail;
 853
 854	prevhdr = skb_network_header(skb) + nexthdr_offset;
 855	hroom = LL_RESERVED_SPACE(rt->dst.dev);
 856	if (skb_has_frag_list(skb)) {
 857		unsigned int first_len = skb_pagelen(skb);
 858		struct ip6_fraglist_iter iter;
 859		struct sk_buff *frag2;
 860
 861		if (first_len - hlen > mtu ||
 862		    ((first_len - hlen) & 7) ||
 863		    skb_cloned(skb) ||
 864		    skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
 865			goto slow_path;
 866
 867		skb_walk_frags(skb, frag) {
 868			/* Correct geometry. */
 869			if (frag->len > mtu ||
 870			    ((frag->len & 7) && frag->next) ||
 871			    skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
 872				goto slow_path_clean;
 873
 874			/* Partially cloned skb? */
 875			if (skb_shared(frag))
 876				goto slow_path_clean;
 877
 878			BUG_ON(frag->sk);
 879			if (skb->sk) {
 880				frag->sk = skb->sk;
 881				frag->destructor = sock_wfree;
 882			}
 883			skb->truesize -= frag->truesize;
 884		}
 885
 886		err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
 887					&iter);
 888		if (err < 0)
 889			goto fail;
 890
 891		for (;;) {
 892			/* Prepare header of the next frame,
 893			 * before previous one went down. */
 894			if (iter.frag)
 895				ip6_fraglist_prepare(skb, &iter);
 896
 897			skb->tstamp = tstamp;
 898			err = output(net, sk, skb);
 899			if (!err)
 900				IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 901					      IPSTATS_MIB_FRAGCREATES);
 902
 903			if (err || !iter.frag)
 904				break;
 905
 906			skb = ip6_fraglist_next(&iter);
 907		}
 908
 909		kfree(iter.tmp_hdr);
 910
 911		if (err == 0) {
 912			IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 913				      IPSTATS_MIB_FRAGOKS);
 914			return 0;
 915		}
 916
 917		kfree_skb_list(iter.frag);
 918
 919		IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
 920			      IPSTATS_MIB_FRAGFAILS);
 921		return err;
 922
 923slow_path_clean:
 924		skb_walk_frags(skb, frag2) {
 925			if (frag2 == frag)
 926				break;
 927			frag2->sk = NULL;
 928			frag2->destructor = NULL;
 929			skb->truesize += frag2->truesize;
 930		}
 931	}
 932
 933slow_path:
 934	/*
 935	 *	Fragment the datagram.
 936	 */
 937
 938	ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
 939		      LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
 940		      &state);
 941
 942	/*
 943	 *	Keep copying data until we run out.
 944	 */
 945
 946	while (state.left > 0) {
 947		frag = ip6_frag_next(skb, &state);
 948		if (IS_ERR(frag)) {
 949			err = PTR_ERR(frag);
 950			goto fail;
 951		}
 952
 953		/*
 954		 *	Put this fragment into the sending queue.
 955		 */
 956		frag->tstamp = tstamp;
 957		err = output(net, sk, frag);
 958		if (err)
 959			goto fail;
 960
 961		IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 962			      IPSTATS_MIB_FRAGCREATES);
 963	}
 964	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 965		      IPSTATS_MIB_FRAGOKS);
 966	consume_skb(skb);
 967	return err;
 968
 969fail_toobig:
 970	if (skb->sk && dst_allfrag(skb_dst(skb)))
 971		sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
 972
 973	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
 974	err = -EMSGSIZE;
 975
 976fail:
 977	IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
 978		      IPSTATS_MIB_FRAGFAILS);
 979	kfree_skb(skb);
 980	return err;
 981}
 982
 983static inline int ip6_rt_check(const struct rt6key *rt_key,
 984			       const struct in6_addr *fl_addr,
 985			       const struct in6_addr *addr_cache)
 986{
 987	return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
 988		(!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
 989}
 990
 991static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
 992					  struct dst_entry *dst,
 993					  const struct flowi6 *fl6)
 994{
 995	struct ipv6_pinfo *np = inet6_sk(sk);
 996	struct rt6_info *rt;
 997
 998	if (!dst)
 999		goto out;
1000
1001	if (dst->ops->family != AF_INET6) {
1002		dst_release(dst);
1003		return NULL;
1004	}
1005
1006	rt = (struct rt6_info *)dst;
1007	/* Yes, checking route validity in not connected
1008	 * case is not very simple. Take into account,
1009	 * that we do not support routing by source, TOS,
1010	 * and MSG_DONTROUTE		--ANK (980726)
1011	 *
1012	 * 1. ip6_rt_check(): If route was host route,
1013	 *    check that cached destination is current.
1014	 *    If it is network route, we still may
1015	 *    check its validity using saved pointer
1016	 *    to the last used address: daddr_cache.
1017	 *    We do not want to save whole address now,
1018	 *    (because main consumer of this service
1019	 *    is tcp, which has not this problem),
1020	 *    so that the last trick works only on connected
1021	 *    sockets.
1022	 * 2. oif also should be the same.
1023	 */
1024	if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
1025#ifdef CONFIG_IPV6_SUBTREES
1026	    ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
1027#endif
1028	   (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
1029	      (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
1030		dst_release(dst);
1031		dst = NULL;
1032	}
1033
1034out:
1035	return dst;
1036}
1037
1038static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
1039			       struct dst_entry **dst, struct flowi6 *fl6)
1040{
1041#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1042	struct neighbour *n;
1043	struct rt6_info *rt;
1044#endif
1045	int err;
1046	int flags = 0;
1047
1048	/* The correct way to handle this would be to do
1049	 * ip6_route_get_saddr, and then ip6_route_output; however,
1050	 * the route-specific preferred source forces the
1051	 * ip6_route_output call _before_ ip6_route_get_saddr.
1052	 *
1053	 * In source specific routing (no src=any default route),
1054	 * ip6_route_output will fail given src=any saddr, though, so
1055	 * that's why we try it again later.
1056	 */
1057	if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
1058		struct fib6_info *from;
1059		struct rt6_info *rt;
1060		bool had_dst = *dst != NULL;
1061
1062		if (!had_dst)
1063			*dst = ip6_route_output(net, sk, fl6);
1064		rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
1065
1066		rcu_read_lock();
1067		from = rt ? rcu_dereference(rt->from) : NULL;
1068		err = ip6_route_get_saddr(net, from, &fl6->daddr,
1069					  sk ? inet6_sk(sk)->srcprefs : 0,
1070					  &fl6->saddr);
1071		rcu_read_unlock();
1072
1073		if (err)
1074			goto out_err_release;
1075
1076		/* If we had an erroneous initial result, pretend it
1077		 * never existed and let the SA-enabled version take
1078		 * over.
1079		 */
1080		if (!had_dst && (*dst)->error) {
1081			dst_release(*dst);
1082			*dst = NULL;
1083		}
1084
1085		if (fl6->flowi6_oif)
1086			flags |= RT6_LOOKUP_F_IFACE;
1087	}
1088
1089	if (!*dst)
1090		*dst = ip6_route_output_flags(net, sk, fl6, flags);
1091
1092	err = (*dst)->error;
1093	if (err)
1094		goto out_err_release;
1095
1096#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1097	/*
1098	 * Here if the dst entry we've looked up
1099	 * has a neighbour entry that is in the INCOMPLETE
1100	 * state and the src address from the flow is
1101	 * marked as OPTIMISTIC, we release the found
1102	 * dst entry and replace it instead with the
1103	 * dst entry of the nexthop router
1104	 */
1105	rt = (struct rt6_info *) *dst;
1106	rcu_read_lock_bh();
1107	n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1108				      rt6_nexthop(rt, &fl6->daddr));
1109	err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1110	rcu_read_unlock_bh();
1111
1112	if (err) {
1113		struct inet6_ifaddr *ifp;
1114		struct flowi6 fl_gw6;
1115		int redirect;
1116
1117		ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1118				      (*dst)->dev, 1);
1119
1120		redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1121		if (ifp)
1122			in6_ifa_put(ifp);
1123
1124		if (redirect) {
1125			/*
1126			 * We need to get the dst entry for the
1127			 * default router instead
1128			 */
1129			dst_release(*dst);
1130			memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1131			memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1132			*dst = ip6_route_output(net, sk, &fl_gw6);
1133			err = (*dst)->error;
1134			if (err)
1135				goto out_err_release;
1136		}
1137	}
1138#endif
1139	if (ipv6_addr_v4mapped(&fl6->saddr) &&
1140	    !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1141		err = -EAFNOSUPPORT;
1142		goto out_err_release;
1143	}
1144
1145	return 0;
1146
1147out_err_release:
1148	dst_release(*dst);
1149	*dst = NULL;
1150
1151	if (err == -ENETUNREACH)
1152		IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1153	return err;
1154}
1155
1156/**
1157 *	ip6_dst_lookup - perform route lookup on flow
1158 *	@net: Network namespace to perform lookup in
1159 *	@sk: socket which provides route info
1160 *	@dst: pointer to dst_entry * for result
1161 *	@fl6: flow to lookup
1162 *
1163 *	This function performs a route lookup on the given flow.
1164 *
1165 *	It returns zero on success, or a standard errno code on error.
1166 */
1167int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1168		   struct flowi6 *fl6)
1169{
1170	*dst = NULL;
1171	return ip6_dst_lookup_tail(net, sk, dst, fl6);
1172}
1173EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1174
1175/**
1176 *	ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1177 *	@net: Network namespace to perform lookup in
1178 *	@sk: socket which provides route info
1179 *	@fl6: flow to lookup
1180 *	@final_dst: final destination address for ipsec lookup
1181 *
1182 *	This function performs a route lookup on the given flow.
1183 *
1184 *	It returns a valid dst pointer on success, or a pointer encoded
1185 *	error code.
1186 */
1187struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
1188				      const struct in6_addr *final_dst)
1189{
1190	struct dst_entry *dst = NULL;
1191	int err;
1192
1193	err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
1194	if (err)
1195		return ERR_PTR(err);
1196	if (final_dst)
1197		fl6->daddr = *final_dst;
1198
1199	return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
1200}
1201EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1202
1203/**
1204 *	ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1205 *	@sk: socket which provides the dst cache and route info
1206 *	@fl6: flow to lookup
1207 *	@final_dst: final destination address for ipsec lookup
1208 *	@connected: whether @sk is connected or not
1209 *
1210 *	This function performs a route lookup on the given flow with the
1211 *	possibility of using the cached route in the socket if it is valid.
1212 *	It will take the socket dst lock when operating on the dst cache.
1213 *	As a result, this function can only be used in process context.
1214 *
1215 *	In addition, for a connected socket, cache the dst in the socket
1216 *	if the current cache is not valid.
1217 *
1218 *	It returns a valid dst pointer on success, or a pointer encoded
1219 *	error code.
1220 */
1221struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1222					 const struct in6_addr *final_dst,
1223					 bool connected)
1224{
1225	struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1226
1227	dst = ip6_sk_dst_check(sk, dst, fl6);
1228	if (dst)
1229		return dst;
1230
1231	dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
1232	if (connected && !IS_ERR(dst))
1233		ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1234
1235	return dst;
1236}
1237EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1238
1239/**
1240 *      ip6_dst_lookup_tunnel - perform route lookup on tunnel
1241 *      @skb: Packet for which lookup is done
1242 *      @dev: Tunnel device
1243 *      @net: Network namespace of tunnel device
1244 *      @sock: Socket which provides route info
1245 *      @saddr: Memory to store the src ip address
1246 *      @info: Tunnel information
1247 *      @protocol: IP protocol
1248 *      @use_cache: Flag to enable cache usage
1249 *      This function performs a route lookup on a tunnel
1250 *
1251 *      It returns a valid dst pointer and stores src address to be used in
1252 *      tunnel in param saddr on success, else a pointer encoded error code.
1253 */
1254
1255struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb,
1256					struct net_device *dev,
1257					struct net *net,
1258					struct socket *sock,
1259					struct in6_addr *saddr,
1260					const struct ip_tunnel_info *info,
1261					u8 protocol,
1262					bool use_cache)
1263{
1264	struct dst_entry *dst = NULL;
1265#ifdef CONFIG_DST_CACHE
1266	struct dst_cache *dst_cache;
1267#endif
1268	struct flowi6 fl6;
1269	__u8 prio;
1270
1271#ifdef CONFIG_DST_CACHE
1272	dst_cache = (struct dst_cache *)&info->dst_cache;
1273	if (use_cache) {
1274		dst = dst_cache_get_ip6(dst_cache, saddr);
1275		if (dst)
1276			return dst;
1277	}
1278#endif
1279	memset(&fl6, 0, sizeof(fl6));
1280	fl6.flowi6_mark = skb->mark;
1281	fl6.flowi6_proto = protocol;
1282	fl6.daddr = info->key.u.ipv6.dst;
1283	fl6.saddr = info->key.u.ipv6.src;
1284	prio = info->key.tos;
1285	fl6.flowlabel = ip6_make_flowinfo(RT_TOS(prio),
1286					  info->key.label);
1287
1288	dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6,
1289					      NULL);
1290	if (IS_ERR(dst)) {
1291		netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr);
1292		return ERR_PTR(-ENETUNREACH);
1293	}
1294	if (dst->dev == dev) { /* is this necessary? */
1295		netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr);
1296		dst_release(dst);
1297		return ERR_PTR(-ELOOP);
1298	}
1299#ifdef CONFIG_DST_CACHE
1300	if (use_cache)
1301		dst_cache_set_ip6(dst_cache, dst, &fl6.saddr);
1302#endif
1303	*saddr = fl6.saddr;
1304	return dst;
1305}
1306EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel);
1307
1308static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1309					       gfp_t gfp)
1310{
1311	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1312}
1313
1314static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1315						gfp_t gfp)
1316{
1317	return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1318}
1319
1320static void ip6_append_data_mtu(unsigned int *mtu,
1321				int *maxfraglen,
1322				unsigned int fragheaderlen,
1323				struct sk_buff *skb,
1324				struct rt6_info *rt,
1325				unsigned int orig_mtu)
1326{
1327	if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1328		if (!skb) {
1329			/* first fragment, reserve header_len */
1330			*mtu = orig_mtu - rt->dst.header_len;
1331
1332		} else {
1333			/*
1334			 * this fragment is not first, the headers
1335			 * space is regarded as data space.
1336			 */
1337			*mtu = orig_mtu;
1338		}
1339		*maxfraglen = ((*mtu - fragheaderlen) & ~7)
1340			      + fragheaderlen - sizeof(struct frag_hdr);
1341	}
1342}
1343
1344static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1345			  struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1346			  struct rt6_info *rt, struct flowi6 *fl6)
1347{
1348	struct ipv6_pinfo *np = inet6_sk(sk);
1349	unsigned int mtu;
1350	struct ipv6_txoptions *opt = ipc6->opt;
1351
1352	/*
1353	 * setup for corking
1354	 */
1355	if (opt) {
1356		if (WARN_ON(v6_cork->opt))
1357			return -EINVAL;
1358
1359		v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1360		if (unlikely(!v6_cork->opt))
1361			return -ENOBUFS;
1362
1363		v6_cork->opt->tot_len = sizeof(*opt);
1364		v6_cork->opt->opt_flen = opt->opt_flen;
1365		v6_cork->opt->opt_nflen = opt->opt_nflen;
1366
1367		v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1368						    sk->sk_allocation);
1369		if (opt->dst0opt && !v6_cork->opt->dst0opt)
1370			return -ENOBUFS;
1371
1372		v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1373						    sk->sk_allocation);
1374		if (opt->dst1opt && !v6_cork->opt->dst1opt)
1375			return -ENOBUFS;
1376
1377		v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1378						   sk->sk_allocation);
1379		if (opt->hopopt && !v6_cork->opt->hopopt)
1380			return -ENOBUFS;
1381
1382		v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1383						    sk->sk_allocation);
1384		if (opt->srcrt && !v6_cork->opt->srcrt)
1385			return -ENOBUFS;
1386
1387		/* need source address above miyazawa*/
1388	}
1389	dst_hold(&rt->dst);
1390	cork->base.dst = &rt->dst;
1391	cork->fl.u.ip6 = *fl6;
1392	v6_cork->hop_limit = ipc6->hlimit;
1393	v6_cork->tclass = ipc6->tclass;
1394	if (rt->dst.flags & DST_XFRM_TUNNEL)
1395		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1396		      READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1397	else
1398		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1399			READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1400	if (np->frag_size < mtu) {
1401		if (np->frag_size)
1402			mtu = np->frag_size;
1403	}
1404	if (mtu < IPV6_MIN_MTU)
1405		return -EINVAL;
1406	cork->base.fragsize = mtu;
1407	cork->base.gso_size = ipc6->gso_size;
1408	cork->base.tx_flags = 0;
1409	cork->base.mark = ipc6->sockc.mark;
1410	sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1411
1412	if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1413		cork->base.flags |= IPCORK_ALLFRAG;
1414	cork->base.length = 0;
1415
1416	cork->base.transmit_time = ipc6->sockc.transmit_time;
1417
1418	return 0;
1419}
1420
1421static int __ip6_append_data(struct sock *sk,
1422			     struct flowi6 *fl6,
1423			     struct sk_buff_head *queue,
1424			     struct inet_cork *cork,
1425			     struct inet6_cork *v6_cork,
1426			     struct page_frag *pfrag,
1427			     int getfrag(void *from, char *to, int offset,
1428					 int len, int odd, struct sk_buff *skb),
1429			     void *from, int length, int transhdrlen,
1430			     unsigned int flags, struct ipcm6_cookie *ipc6)
1431{
1432	struct sk_buff *skb, *skb_prev = NULL;
1433	unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1434	struct ubuf_info *uarg = NULL;
1435	int exthdrlen = 0;
1436	int dst_exthdrlen = 0;
1437	int hh_len;
1438	int copy;
1439	int err;
1440	int offset = 0;
1441	u32 tskey = 0;
1442	struct rt6_info *rt = (struct rt6_info *)cork->dst;
1443	struct ipv6_txoptions *opt = v6_cork->opt;
1444	int csummode = CHECKSUM_NONE;
1445	unsigned int maxnonfragsize, headersize;
1446	unsigned int wmem_alloc_delta = 0;
1447	bool paged, extra_uref = false;
1448
1449	skb = skb_peek_tail(queue);
1450	if (!skb) {
1451		exthdrlen = opt ? opt->opt_flen : 0;
1452		dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1453	}
1454
1455	paged = !!cork->gso_size;
1456	mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1457	orig_mtu = mtu;
1458
1459	if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
1460	    sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1461		tskey = sk->sk_tskey++;
1462
1463	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1464
1465	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1466			(opt ? opt->opt_nflen : 0);
1467	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1468		     sizeof(struct frag_hdr);
1469
1470	headersize = sizeof(struct ipv6hdr) +
1471		     (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1472		     (dst_allfrag(&rt->dst) ?
1473		      sizeof(struct frag_hdr) : 0) +
1474		     rt->rt6i_nfheader_len;
1475
1476	/* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1477	 * the first fragment
1478	 */
1479	if (headersize + transhdrlen > mtu)
1480		goto emsgsize;
1481
1482	if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1483	    (sk->sk_protocol == IPPROTO_UDP ||
1484	     sk->sk_protocol == IPPROTO_RAW)) {
1485		ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1486				sizeof(struct ipv6hdr));
1487		goto emsgsize;
1488	}
1489
1490	if (ip6_sk_ignore_df(sk))
1491		maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1492	else
1493		maxnonfragsize = mtu;
1494
1495	if (cork->length + length > maxnonfragsize - headersize) {
1496emsgsize:
1497		pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1498		ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1499		return -EMSGSIZE;
1500	}
1501
1502	/* CHECKSUM_PARTIAL only with no extension headers and when
1503	 * we are not going to fragment
1504	 */
1505	if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1506	    headersize == sizeof(struct ipv6hdr) &&
1507	    length <= mtu - headersize &&
1508	    (!(flags & MSG_MORE) || cork->gso_size) &&
1509	    rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1510		csummode = CHECKSUM_PARTIAL;
1511
1512	if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
1513		uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
1514		if (!uarg)
1515			return -ENOBUFS;
1516		extra_uref = !skb_zcopy(skb);	/* only ref on new uarg */
1517		if (rt->dst.dev->features & NETIF_F_SG &&
1518		    csummode == CHECKSUM_PARTIAL) {
1519			paged = true;
1520		} else {
1521			uarg->zerocopy = 0;
1522			skb_zcopy_set(skb, uarg, &extra_uref);
1523		}
1524	}
1525
1526	/*
1527	 * Let's try using as much space as possible.
1528	 * Use MTU if total length of the message fits into the MTU.
1529	 * Otherwise, we need to reserve fragment header and
1530	 * fragment alignment (= 8-15 octects, in total).
1531	 *
1532	 * Note that we may need to "move" the data from the tail
1533	 * of the buffer to the new fragment when we split
1534	 * the message.
1535	 *
1536	 * FIXME: It may be fragmented into multiple chunks
1537	 *        at once if non-fragmentable extension headers
1538	 *        are too large.
1539	 * --yoshfuji
1540	 */
1541
1542	cork->length += length;
1543	if (!skb)
1544		goto alloc_new_skb;
1545
1546	while (length > 0) {
1547		/* Check if the remaining data fits into current packet. */
1548		copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1549		if (copy < length)
1550			copy = maxfraglen - skb->len;
1551
1552		if (copy <= 0) {
1553			char *data;
1554			unsigned int datalen;
1555			unsigned int fraglen;
1556			unsigned int fraggap;
1557			unsigned int alloclen;
1558			unsigned int pagedlen;
1559alloc_new_skb:
1560			/* There's no room in the current skb */
1561			if (skb)
1562				fraggap = skb->len - maxfraglen;
1563			else
1564				fraggap = 0;
1565			/* update mtu and maxfraglen if necessary */
1566			if (!skb || !skb_prev)
1567				ip6_append_data_mtu(&mtu, &maxfraglen,
1568						    fragheaderlen, skb, rt,
1569						    orig_mtu);
1570
1571			skb_prev = skb;
1572
1573			/*
1574			 * If remaining data exceeds the mtu,
1575			 * we know we need more fragment(s).
1576			 */
1577			datalen = length + fraggap;
1578
1579			if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1580				datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1581			fraglen = datalen + fragheaderlen;
1582			pagedlen = 0;
1583
1584			if ((flags & MSG_MORE) &&
1585			    !(rt->dst.dev->features&NETIF_F_SG))
1586				alloclen = mtu;
1587			else if (!paged)
1588				alloclen = fraglen;
1589			else {
1590				alloclen = min_t(int, fraglen, MAX_HEADER);
1591				pagedlen = fraglen - alloclen;
1592			}
1593
1594			alloclen += dst_exthdrlen;
1595
1596			if (datalen != length + fraggap) {
1597				/*
1598				 * this is not the last fragment, the trailer
1599				 * space is regarded as data space.
1600				 */
1601				datalen += rt->dst.trailer_len;
1602			}
1603
1604			alloclen += rt->dst.trailer_len;
1605			fraglen = datalen + fragheaderlen;
1606
1607			/*
1608			 * We just reserve space for fragment header.
1609			 * Note: this may be overallocation if the message
1610			 * (without MSG_MORE) fits into the MTU.
1611			 */
1612			alloclen += sizeof(struct frag_hdr);
1613
1614			copy = datalen - transhdrlen - fraggap - pagedlen;
1615			if (copy < 0) {
1616				err = -EINVAL;
1617				goto error;
1618			}
1619			if (transhdrlen) {
1620				skb = sock_alloc_send_skb(sk,
1621						alloclen + hh_len,
1622						(flags & MSG_DONTWAIT), &err);
1623			} else {
1624				skb = NULL;
1625				if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1626				    2 * sk->sk_sndbuf)
1627					skb = alloc_skb(alloclen + hh_len,
1628							sk->sk_allocation);
1629				if (unlikely(!skb))
1630					err = -ENOBUFS;
1631			}
1632			if (!skb)
1633				goto error;
1634			/*
1635			 *	Fill in the control structures
1636			 */
1637			skb->protocol = htons(ETH_P_IPV6);
1638			skb->ip_summed = csummode;
1639			skb->csum = 0;
1640			/* reserve for fragmentation and ipsec header */
1641			skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1642				    dst_exthdrlen);
1643
1644			/*
1645			 *	Find where to start putting bytes
1646			 */
1647			data = skb_put(skb, fraglen - pagedlen);
1648			skb_set_network_header(skb, exthdrlen);
1649			data += fragheaderlen;
1650			skb->transport_header = (skb->network_header +
1651						 fragheaderlen);
1652			if (fraggap) {
1653				skb->csum = skb_copy_and_csum_bits(
1654					skb_prev, maxfraglen,
1655					data + transhdrlen, fraggap);
1656				skb_prev->csum = csum_sub(skb_prev->csum,
1657							  skb->csum);
1658				data += fraggap;
1659				pskb_trim_unique(skb_prev, maxfraglen);
1660			}
1661			if (copy > 0 &&
1662			    getfrag(from, data + transhdrlen, offset,
1663				    copy, fraggap, skb) < 0) {
1664				err = -EFAULT;
1665				kfree_skb(skb);
1666				goto error;
1667			}
1668
1669			offset += copy;
1670			length -= copy + transhdrlen;
1671			transhdrlen = 0;
1672			exthdrlen = 0;
1673			dst_exthdrlen = 0;
1674
1675			/* Only the initial fragment is time stamped */
1676			skb_shinfo(skb)->tx_flags = cork->tx_flags;
1677			cork->tx_flags = 0;
1678			skb_shinfo(skb)->tskey = tskey;
1679			tskey = 0;
1680			skb_zcopy_set(skb, uarg, &extra_uref);
1681
1682			if ((flags & MSG_CONFIRM) && !skb_prev)
1683				skb_set_dst_pending_confirm(skb, 1);
1684
1685			/*
1686			 * Put the packet on the pending queue
1687			 */
1688			if (!skb->destructor) {
1689				skb->destructor = sock_wfree;
1690				skb->sk = sk;
1691				wmem_alloc_delta += skb->truesize;
1692			}
1693			__skb_queue_tail(queue, skb);
1694			continue;
1695		}
1696
1697		if (copy > length)
1698			copy = length;
1699
1700		if (!(rt->dst.dev->features&NETIF_F_SG) &&
1701		    skb_tailroom(skb) >= copy) {
1702			unsigned int off;
1703
1704			off = skb->len;
1705			if (getfrag(from, skb_put(skb, copy),
1706						offset, copy, off, skb) < 0) {
1707				__skb_trim(skb, off);
1708				err = -EFAULT;
1709				goto error;
1710			}
1711		} else if (!uarg || !uarg->zerocopy) {
1712			int i = skb_shinfo(skb)->nr_frags;
1713
1714			err = -ENOMEM;
1715			if (!sk_page_frag_refill(sk, pfrag))
1716				goto error;
1717
1718			if (!skb_can_coalesce(skb, i, pfrag->page,
1719					      pfrag->offset)) {
1720				err = -EMSGSIZE;
1721				if (i == MAX_SKB_FRAGS)
1722					goto error;
1723
1724				__skb_fill_page_desc(skb, i, pfrag->page,
1725						     pfrag->offset, 0);
1726				skb_shinfo(skb)->nr_frags = ++i;
1727				get_page(pfrag->page);
1728			}
1729			copy = min_t(int, copy, pfrag->size - pfrag->offset);
1730			if (getfrag(from,
1731				    page_address(pfrag->page) + pfrag->offset,
1732				    offset, copy, skb->len, skb) < 0)
1733				goto error_efault;
1734
1735			pfrag->offset += copy;
1736			skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1737			skb->len += copy;
1738			skb->data_len += copy;
1739			skb->truesize += copy;
1740			wmem_alloc_delta += copy;
1741		} else {
1742			err = skb_zerocopy_iter_dgram(skb, from, copy);
1743			if (err < 0)
1744				goto error;
1745		}
1746		offset += copy;
1747		length -= copy;
1748	}
1749
1750	if (wmem_alloc_delta)
1751		refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1752	return 0;
1753
1754error_efault:
1755	err = -EFAULT;
1756error:
1757	if (uarg)
1758		sock_zerocopy_put_abort(uarg, extra_uref);
1759	cork->length -= length;
1760	IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1761	refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1762	return err;
1763}
1764
1765int ip6_append_data(struct sock *sk,
1766		    int getfrag(void *from, char *to, int offset, int len,
1767				int odd, struct sk_buff *skb),
1768		    void *from, int length, int transhdrlen,
1769		    struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1770		    struct rt6_info *rt, unsigned int flags)
1771{
1772	struct inet_sock *inet = inet_sk(sk);
1773	struct ipv6_pinfo *np = inet6_sk(sk);
1774	int exthdrlen;
1775	int err;
1776
1777	if (flags&MSG_PROBE)
1778		return 0;
1779	if (skb_queue_empty(&sk->sk_write_queue)) {
1780		/*
1781		 * setup for corking
1782		 */
1783		err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1784				     ipc6, rt, fl6);
1785		if (err)
1786			return err;
1787
1788		exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1789		length += exthdrlen;
1790		transhdrlen += exthdrlen;
1791	} else {
1792		fl6 = &inet->cork.fl.u.ip6;
1793		transhdrlen = 0;
1794	}
1795
1796	return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1797				 &np->cork, sk_page_frag(sk), getfrag,
1798				 from, length, transhdrlen, flags, ipc6);
1799}
1800EXPORT_SYMBOL_GPL(ip6_append_data);
1801
1802static void ip6_cork_release(struct inet_cork_full *cork,
1803			     struct inet6_cork *v6_cork)
1804{
1805	if (v6_cork->opt) {
1806		kfree(v6_cork->opt->dst0opt);
1807		kfree(v6_cork->opt->dst1opt);
1808		kfree(v6_cork->opt->hopopt);
1809		kfree(v6_cork->opt->srcrt);
1810		kfree(v6_cork->opt);
1811		v6_cork->opt = NULL;
1812	}
1813
1814	if (cork->base.dst) {
1815		dst_release(cork->base.dst);
1816		cork->base.dst = NULL;
1817		cork->base.flags &= ~IPCORK_ALLFRAG;
1818	}
1819	memset(&cork->fl, 0, sizeof(cork->fl));
1820}
1821
1822struct sk_buff *__ip6_make_skb(struct sock *sk,
1823			       struct sk_buff_head *queue,
1824			       struct inet_cork_full *cork,
1825			       struct inet6_cork *v6_cork)
1826{
1827	struct sk_buff *skb, *tmp_skb;
1828	struct sk_buff **tail_skb;
1829	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1830	struct ipv6_pinfo *np = inet6_sk(sk);
1831	struct net *net = sock_net(sk);
1832	struct ipv6hdr *hdr;
1833	struct ipv6_txoptions *opt = v6_cork->opt;
1834	struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1835	struct flowi6 *fl6 = &cork->fl.u.ip6;
1836	unsigned char proto = fl6->flowi6_proto;
1837
1838	skb = __skb_dequeue(queue);
1839	if (!skb)
1840		goto out;
1841	tail_skb = &(skb_shinfo(skb)->frag_list);
1842
1843	/* move skb->data to ip header from ext header */
1844	if (skb->data < skb_network_header(skb))
1845		__skb_pull(skb, skb_network_offset(skb));
1846	while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1847		__skb_pull(tmp_skb, skb_network_header_len(skb));
1848		*tail_skb = tmp_skb;
1849		tail_skb = &(tmp_skb->next);
1850		skb->len += tmp_skb->len;
1851		skb->data_len += tmp_skb->len;
1852		skb->truesize += tmp_skb->truesize;
1853		tmp_skb->destructor = NULL;
1854		tmp_skb->sk = NULL;
1855	}
1856
1857	/* Allow local fragmentation. */
1858	skb->ignore_df = ip6_sk_ignore_df(sk);
1859
1860	*final_dst = fl6->daddr;
1861	__skb_pull(skb, skb_network_header_len(skb));
1862	if (opt && opt->opt_flen)
1863		ipv6_push_frag_opts(skb, opt, &proto);
1864	if (opt && opt->opt_nflen)
1865		ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1866
1867	skb_push(skb, sizeof(struct ipv6hdr));
1868	skb_reset_network_header(skb);
1869	hdr = ipv6_hdr(skb);
1870
1871	ip6_flow_hdr(hdr, v6_cork->tclass,
1872		     ip6_make_flowlabel(net, skb, fl6->flowlabel,
1873					ip6_autoflowlabel(net, np), fl6));
1874	hdr->hop_limit = v6_cork->hop_limit;
1875	hdr->nexthdr = proto;
1876	hdr->saddr = fl6->saddr;
1877	hdr->daddr = *final_dst;
1878
1879	skb->priority = sk->sk_priority;
1880	skb->mark = cork->base.mark;
1881
1882	skb->tstamp = cork->base.transmit_time;
1883
1884	skb_dst_set(skb, dst_clone(&rt->dst));
1885	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1886	if (proto == IPPROTO_ICMPV6) {
1887		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1888
1889		ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1890		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1891	}
1892
1893	ip6_cork_release(cork, v6_cork);
1894out:
1895	return skb;
1896}
1897
1898int ip6_send_skb(struct sk_buff *skb)
1899{
1900	struct net *net = sock_net(skb->sk);
1901	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1902	int err;
1903
1904	err = ip6_local_out(net, skb->sk, skb);
1905	if (err) {
1906		if (err > 0)
1907			err = net_xmit_errno(err);
1908		if (err)
1909			IP6_INC_STATS(net, rt->rt6i_idev,
1910				      IPSTATS_MIB_OUTDISCARDS);
1911	}
1912
1913	return err;
1914}
1915
1916int ip6_push_pending_frames(struct sock *sk)
1917{
1918	struct sk_buff *skb;
1919
1920	skb = ip6_finish_skb(sk);
1921	if (!skb)
1922		return 0;
1923
1924	return ip6_send_skb(skb);
1925}
1926EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1927
1928static void __ip6_flush_pending_frames(struct sock *sk,
1929				       struct sk_buff_head *queue,
1930				       struct inet_cork_full *cork,
1931				       struct inet6_cork *v6_cork)
1932{
1933	struct sk_buff *skb;
1934
1935	while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1936		if (skb_dst(skb))
1937			IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1938				      IPSTATS_MIB_OUTDISCARDS);
1939		kfree_skb(skb);
1940	}
1941
1942	ip6_cork_release(cork, v6_cork);
1943}
1944
1945void ip6_flush_pending_frames(struct sock *sk)
1946{
1947	__ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1948				   &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1949}
1950EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1951
1952struct sk_buff *ip6_make_skb(struct sock *sk,
1953			     int getfrag(void *from, char *to, int offset,
1954					 int len, int odd, struct sk_buff *skb),
1955			     void *from, int length, int transhdrlen,
1956			     struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1957			     struct rt6_info *rt, unsigned int flags,
1958			     struct inet_cork_full *cork)
1959{
1960	struct inet6_cork v6_cork;
1961	struct sk_buff_head queue;
1962	int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1963	int err;
1964
1965	if (flags & MSG_PROBE)
1966		return NULL;
1967
1968	__skb_queue_head_init(&queue);
1969
1970	cork->base.flags = 0;
1971	cork->base.addr = 0;
1972	cork->base.opt = NULL;
1973	cork->base.dst = NULL;
1974	v6_cork.opt = NULL;
1975	err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
1976	if (err) {
1977		ip6_cork_release(cork, &v6_cork);
1978		return ERR_PTR(err);
1979	}
1980	if (ipc6->dontfrag < 0)
1981		ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1982
1983	err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
1984				&current->task_frag, getfrag, from,
1985				length + exthdrlen, transhdrlen + exthdrlen,
1986				flags, ipc6);
1987	if (err) {
1988		__ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
1989		return ERR_PTR(err);
1990	}
1991
1992	return __ip6_make_skb(sk, &queue, cork, &v6_cork);
1993}
Configure Feed

Configure Feed