net/ipv6/ip6_output.c at c9a28fa7b9ac19b676deefa0a171ce7df8755c08

tjh.dev / kernel
fork
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork
kernel / net / ipv6 / ip6_output.c
at c9a28fa7b9ac19b676deefa0a171ce7df8755c08 1481 lines 38 kB view raw
wrap content
   1/*
   2 *	IPv6 output functions
   3 *	Linux INET6 implementation
   4 *
   5 *	Authors:
   6 *	Pedro Roque		<roque@di.fc.ul.pt>
   7 *
   8 *	$Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
   9 *
  10 *	Based on linux/net/ipv4/ip_output.c
  11 *
  12 *	This program is free software; you can redistribute it and/or
  13 *      modify it under the terms of the GNU General Public License
  14 *      as published by the Free Software Foundation; either version
  15 *      2 of the License, or (at your option) any later version.
  16 *
  17 *	Changes:
  18 *	A.N.Kuznetsov	:	airthmetics in fragmentation.
  19 *				extension headers are implemented.
  20 *				route changes now work.
  21 *				ip6_forward does not confuse sniffers.
  22 *				etc.
  23 *
  24 *      H. von Brand    :       Added missing #include <linux/string.h>
  25 *	Imran Patel	: 	frag id should be in NBO
  26 *      Kazunori MIYAZAWA @USAGI
  27 *			:       add ip6_append_data and related functions
  28 *				for datagram xmit
  29 */
  30
  31#include <linux/errno.h>
  32#include <linux/kernel.h>
  33#include <linux/string.h>
  34#include <linux/socket.h>
  35#include <linux/net.h>
  36#include <linux/netdevice.h>
  37#include <linux/if_arp.h>
  38#include <linux/in6.h>
  39#include <linux/tcp.h>
  40#include <linux/route.h>
  41#include <linux/module.h>
  42
  43#include <linux/netfilter.h>
  44#include <linux/netfilter_ipv6.h>
  45
  46#include <net/sock.h>
  47#include <net/snmp.h>
  48
  49#include <net/ipv6.h>
  50#include <net/ndisc.h>
  51#include <net/protocol.h>
  52#include <net/ip6_route.h>
  53#include <net/addrconf.h>
  54#include <net/rawv6.h>
  55#include <net/icmp.h>
  56#include <net/xfrm.h>
  57#include <net/checksum.h>
  58
  59static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
  60
  61static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
  62{
  63	static u32 ipv6_fragmentation_id = 1;
  64	static DEFINE_SPINLOCK(ip6_id_lock);
  65
  66	spin_lock_bh(&ip6_id_lock);
  67	fhdr->identification = htonl(ipv6_fragmentation_id);
  68	if (++ipv6_fragmentation_id == 0)
  69		ipv6_fragmentation_id = 1;
  70	spin_unlock_bh(&ip6_id_lock);
  71}
  72
  73int __ip6_local_out(struct sk_buff *skb)
  74{
  75	int len;
  76
  77	len = skb->len - sizeof(struct ipv6hdr);
  78	if (len > IPV6_MAXPLEN)
  79		len = 0;
  80	ipv6_hdr(skb)->payload_len = htons(len);
  81
  82	return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dst->dev,
  83		       dst_output);
  84}
  85
  86int ip6_local_out(struct sk_buff *skb)
  87{
  88	int err;
  89
  90	err = __ip6_local_out(skb);
  91	if (likely(err == 1))
  92		err = dst_output(skb);
  93
  94	return err;
  95}
  96EXPORT_SYMBOL_GPL(ip6_local_out);
  97
  98static int ip6_output_finish(struct sk_buff *skb)
  99{
 100	struct dst_entry *dst = skb->dst;
 101
 102	if (dst->hh)
 103		return neigh_hh_output(dst->hh, skb);
 104	else if (dst->neighbour)
 105		return dst->neighbour->output(skb);
 106
 107	IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 108	kfree_skb(skb);
 109	return -EINVAL;
 110
 111}
 112
 113/* dev_loopback_xmit for use with netfilter. */
 114static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
 115{
 116	skb_reset_mac_header(newskb);
 117	__skb_pull(newskb, skb_network_offset(newskb));
 118	newskb->pkt_type = PACKET_LOOPBACK;
 119	newskb->ip_summed = CHECKSUM_UNNECESSARY;
 120	BUG_TRAP(newskb->dst);
 121
 122	netif_rx(newskb);
 123	return 0;
 124}
 125
 126
 127static int ip6_output2(struct sk_buff *skb)
 128{
 129	struct dst_entry *dst = skb->dst;
 130	struct net_device *dev = dst->dev;
 131
 132	skb->protocol = htons(ETH_P_IPV6);
 133	skb->dev = dev;
 134
 135	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
 136		struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
 137		struct inet6_dev *idev = ip6_dst_idev(skb->dst);
 138
 139		if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
 140		    ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
 141					&ipv6_hdr(skb)->saddr)) {
 142			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
 143
 144			/* Do not check for IFF_ALLMULTI; multicast routing
 145			   is not supported in any case.
 146			 */
 147			if (newskb)
 148				NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
 149					NULL, newskb->dev,
 150					ip6_dev_loopback_xmit);
 151
 152			if (ipv6_hdr(skb)->hop_limit == 0) {
 153				IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
 154				kfree_skb(skb);
 155				return 0;
 156			}
 157		}
 158
 159		IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS);
 160	}
 161
 162	return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
 163		       ip6_output_finish);
 164}
 165
 166static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
 167{
 168	struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
 169
 170	return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
 171	       skb->dst->dev->mtu : dst_mtu(skb->dst);
 172}
 173
 174int ip6_output(struct sk_buff *skb)
 175{
 176	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
 177				dst_allfrag(skb->dst))
 178		return ip6_fragment(skb, ip6_output2);
 179	else
 180		return ip6_output2(skb);
 181}
 182
 183/*
 184 *	xmit an sk_buff (used by TCP)
 185 */
 186
 187int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 188	     struct ipv6_txoptions *opt, int ipfragok)
 189{
 190	struct ipv6_pinfo *np = inet6_sk(sk);
 191	struct in6_addr *first_hop = &fl->fl6_dst;
 192	struct dst_entry *dst = skb->dst;
 193	struct ipv6hdr *hdr;
 194	u8  proto = fl->proto;
 195	int seg_len = skb->len;
 196	int hlimit, tclass;
 197	u32 mtu;
 198
 199	if (opt) {
 200		unsigned int head_room;
 201
 202		/* First: exthdrs may take lots of space (~8K for now)
 203		   MAX_HEADER is not enough.
 204		 */
 205		head_room = opt->opt_nflen + opt->opt_flen;
 206		seg_len += head_room;
 207		head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
 208
 209		if (skb_headroom(skb) < head_room) {
 210			struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
 211			if (skb2 == NULL) {
 212				IP6_INC_STATS(ip6_dst_idev(skb->dst),
 213					      IPSTATS_MIB_OUTDISCARDS);
 214				kfree_skb(skb);
 215				return -ENOBUFS;
 216			}
 217			kfree_skb(skb);
 218			skb = skb2;
 219			if (sk)
 220				skb_set_owner_w(skb, sk);
 221		}
 222		if (opt->opt_flen)
 223			ipv6_push_frag_opts(skb, opt, &proto);
 224		if (opt->opt_nflen)
 225			ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
 226	}
 227
 228	skb_push(skb, sizeof(struct ipv6hdr));
 229	skb_reset_network_header(skb);
 230	hdr = ipv6_hdr(skb);
 231
 232	/*
 233	 *	Fill in the IPv6 header
 234	 */
 235
 236	hlimit = -1;
 237	if (np)
 238		hlimit = np->hop_limit;
 239	if (hlimit < 0)
 240		hlimit = dst_metric(dst, RTAX_HOPLIMIT);
 241	if (hlimit < 0)
 242		hlimit = ipv6_get_hoplimit(dst->dev);
 243
 244	tclass = -1;
 245	if (np)
 246		tclass = np->tclass;
 247	if (tclass < 0)
 248		tclass = 0;
 249
 250	*(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
 251
 252	hdr->payload_len = htons(seg_len);
 253	hdr->nexthdr = proto;
 254	hdr->hop_limit = hlimit;
 255
 256	ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
 257	ipv6_addr_copy(&hdr->daddr, first_hop);
 258
 259	skb->priority = sk->sk_priority;
 260	skb->mark = sk->sk_mark;
 261
 262	mtu = dst_mtu(dst);
 263	if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) {
 264		IP6_INC_STATS(ip6_dst_idev(skb->dst),
 265			      IPSTATS_MIB_OUTREQUESTS);
 266		return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
 267				dst_output);
 268	}
 269
 270	if (net_ratelimit())
 271		printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
 272	skb->dev = dst->dev;
 273	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
 274	IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
 275	kfree_skb(skb);
 276	return -EMSGSIZE;
 277}
 278
 279EXPORT_SYMBOL(ip6_xmit);
 280
 281/*
 282 *	To avoid extra problems ND packets are send through this
 283 *	routine. It's code duplication but I really want to avoid
 284 *	extra checks since ipv6_build_header is used by TCP (which
 285 *	is for us performance critical)
 286 */
 287
 288int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
 289	       struct in6_addr *saddr, struct in6_addr *daddr,
 290	       int proto, int len)
 291{
 292	struct ipv6_pinfo *np = inet6_sk(sk);
 293	struct ipv6hdr *hdr;
 294	int totlen;
 295
 296	skb->protocol = htons(ETH_P_IPV6);
 297	skb->dev = dev;
 298
 299	totlen = len + sizeof(struct ipv6hdr);
 300
 301	skb_reset_network_header(skb);
 302	skb_put(skb, sizeof(struct ipv6hdr));
 303	hdr = ipv6_hdr(skb);
 304
 305	*(__be32*)hdr = htonl(0x60000000);
 306
 307	hdr->payload_len = htons(len);
 308	hdr->nexthdr = proto;
 309	hdr->hop_limit = np->hop_limit;
 310
 311	ipv6_addr_copy(&hdr->saddr, saddr);
 312	ipv6_addr_copy(&hdr->daddr, daddr);
 313
 314	return 0;
 315}
 316
 317static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 318{
 319	struct ip6_ra_chain *ra;
 320	struct sock *last = NULL;
 321
 322	read_lock(&ip6_ra_lock);
 323	for (ra = ip6_ra_chain; ra; ra = ra->next) {
 324		struct sock *sk = ra->sk;
 325		if (sk && ra->sel == sel &&
 326		    (!sk->sk_bound_dev_if ||
 327		     sk->sk_bound_dev_if == skb->dev->ifindex)) {
 328			if (last) {
 329				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 330				if (skb2)
 331					rawv6_rcv(last, skb2);
 332			}
 333			last = sk;
 334		}
 335	}
 336
 337	if (last) {
 338		rawv6_rcv(last, skb);
 339		read_unlock(&ip6_ra_lock);
 340		return 1;
 341	}
 342	read_unlock(&ip6_ra_lock);
 343	return 0;
 344}
 345
 346static int ip6_forward_proxy_check(struct sk_buff *skb)
 347{
 348	struct ipv6hdr *hdr = ipv6_hdr(skb);
 349	u8 nexthdr = hdr->nexthdr;
 350	int offset;
 351
 352	if (ipv6_ext_hdr(nexthdr)) {
 353		offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
 354		if (offset < 0)
 355			return 0;
 356	} else
 357		offset = sizeof(struct ipv6hdr);
 358
 359	if (nexthdr == IPPROTO_ICMPV6) {
 360		struct icmp6hdr *icmp6;
 361
 362		if (!pskb_may_pull(skb, (skb_network_header(skb) +
 363					 offset + 1 - skb->data)))
 364			return 0;
 365
 366		icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
 367
 368		switch (icmp6->icmp6_type) {
 369		case NDISC_ROUTER_SOLICITATION:
 370		case NDISC_ROUTER_ADVERTISEMENT:
 371		case NDISC_NEIGHBOUR_SOLICITATION:
 372		case NDISC_NEIGHBOUR_ADVERTISEMENT:
 373		case NDISC_REDIRECT:
 374			/* For reaction involving unicast neighbor discovery
 375			 * message destined to the proxied address, pass it to
 376			 * input function.
 377			 */
 378			return 1;
 379		default:
 380			break;
 381		}
 382	}
 383
 384	/*
 385	 * The proxying router can't forward traffic sent to a link-local
 386	 * address, so signal the sender and discard the packet. This
 387	 * behavior is clarified by the MIPv6 specification.
 388	 */
 389	if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
 390		dst_link_failure(skb);
 391		return -1;
 392	}
 393
 394	return 0;
 395}
 396
 397static inline int ip6_forward_finish(struct sk_buff *skb)
 398{
 399	return dst_output(skb);
 400}
 401
 402int ip6_forward(struct sk_buff *skb)
 403{
 404	struct dst_entry *dst = skb->dst;
 405	struct ipv6hdr *hdr = ipv6_hdr(skb);
 406	struct inet6_skb_parm *opt = IP6CB(skb);
 407
 408	if (ipv6_devconf.forwarding == 0)
 409		goto error;
 410
 411	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
 412		IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 413		goto drop;
 414	}
 415
 416	skb_forward_csum(skb);
 417
 418	/*
 419	 *	We DO NOT make any processing on
 420	 *	RA packets, pushing them to user level AS IS
 421	 *	without ane WARRANTY that application will be able
 422	 *	to interpret them. The reason is that we
 423	 *	cannot make anything clever here.
 424	 *
 425	 *	We are not end-node, so that if packet contains
 426	 *	AH/ESP, we cannot make anything.
 427	 *	Defragmentation also would be mistake, RA packets
 428	 *	cannot be fragmented, because there is no warranty
 429	 *	that different fragments will go along one path. --ANK
 430	 */
 431	if (opt->ra) {
 432		u8 *ptr = skb_network_header(skb) + opt->ra;
 433		if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
 434			return 0;
 435	}
 436
 437	/*
 438	 *	check and decrement ttl
 439	 */
 440	if (hdr->hop_limit <= 1) {
 441		/* Force OUTPUT device used as source address */
 442		skb->dev = dst->dev;
 443		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
 444			    0, skb->dev);
 445		IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
 446
 447		kfree_skb(skb);
 448		return -ETIMEDOUT;
 449	}
 450
 451	/* XXX: idev->cnf.proxy_ndp? */
 452	if (ipv6_devconf.proxy_ndp &&
 453	    pneigh_lookup(&nd_tbl, &init_net, &hdr->daddr, skb->dev, 0)) {
 454		int proxied = ip6_forward_proxy_check(skb);
 455		if (proxied > 0)
 456			return ip6_input(skb);
 457		else if (proxied < 0) {
 458			IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 459			goto drop;
 460		}
 461	}
 462
 463	if (!xfrm6_route_forward(skb)) {
 464		IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 465		goto drop;
 466	}
 467	dst = skb->dst;
 468
 469	/* IPv6 specs say nothing about it, but it is clear that we cannot
 470	   send redirects to source routed frames.
 471	   We don't send redirects to frames decapsulated from IPsec.
 472	 */
 473	if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
 474	    !skb->sp) {
 475		struct in6_addr *target = NULL;
 476		struct rt6_info *rt;
 477		struct neighbour *n = dst->neighbour;
 478
 479		/*
 480		 *	incoming and outgoing devices are the same
 481		 *	send a redirect.
 482		 */
 483
 484		rt = (struct rt6_info *) dst;
 485		if ((rt->rt6i_flags & RTF_GATEWAY))
 486			target = (struct in6_addr*)&n->primary_key;
 487		else
 488			target = &hdr->daddr;
 489
 490		/* Limit redirects both by destination (here)
 491		   and by source (inside ndisc_send_redirect)
 492		 */
 493		if (xrlim_allow(dst, 1*HZ))
 494			ndisc_send_redirect(skb, n, target);
 495	} else {
 496		int addrtype = ipv6_addr_type(&hdr->saddr);
 497
 498		/* This check is security critical. */
 499		if (addrtype & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK))
 500			goto error;
 501		if (addrtype & IPV6_ADDR_LINKLOCAL) {
 502			icmpv6_send(skb, ICMPV6_DEST_UNREACH,
 503				ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
 504			goto error;
 505		}
 506	}
 507
 508	if (skb->len > dst_mtu(dst)) {
 509		/* Again, force OUTPUT device used as source address */
 510		skb->dev = dst->dev;
 511		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
 512		IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
 513		IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
 514		kfree_skb(skb);
 515		return -EMSGSIZE;
 516	}
 517
 518	if (skb_cow(skb, dst->dev->hard_header_len)) {
 519		IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
 520		goto drop;
 521	}
 522
 523	hdr = ipv6_hdr(skb);
 524
 525	/* Mangling hops number delayed to point after skb COW */
 526
 527	hdr->hop_limit--;
 528
 529	IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
 530	return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
 531		       ip6_forward_finish);
 532
 533error:
 534	IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
 535drop:
 536	kfree_skb(skb);
 537	return -EINVAL;
 538}
 539
 540static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 541{
 542	to->pkt_type = from->pkt_type;
 543	to->priority = from->priority;
 544	to->protocol = from->protocol;
 545	dst_release(to->dst);
 546	to->dst = dst_clone(from->dst);
 547	to->dev = from->dev;
 548	to->mark = from->mark;
 549
 550#ifdef CONFIG_NET_SCHED
 551	to->tc_index = from->tc_index;
 552#endif
 553	nf_copy(to, from);
 554#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
 555    defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
 556	to->nf_trace = from->nf_trace;
 557#endif
 558	skb_copy_secmark(to, from);
 559}
 560
 561int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 562{
 563	u16 offset = sizeof(struct ipv6hdr);
 564	struct ipv6_opt_hdr *exthdr =
 565				(struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
 566	unsigned int packet_len = skb->tail - skb->network_header;
 567	int found_rhdr = 0;
 568	*nexthdr = &ipv6_hdr(skb)->nexthdr;
 569
 570	while (offset + 1 <= packet_len) {
 571
 572		switch (**nexthdr) {
 573
 574		case NEXTHDR_HOP:
 575			break;
 576		case NEXTHDR_ROUTING:
 577			found_rhdr = 1;
 578			break;
 579		case NEXTHDR_DEST:
 580#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 581			if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
 582				break;
 583#endif
 584			if (found_rhdr)
 585				return offset;
 586			break;
 587		default :
 588			return offset;
 589		}
 590
 591		offset += ipv6_optlen(exthdr);
 592		*nexthdr = &exthdr->nexthdr;
 593		exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
 594						 offset);
 595	}
 596
 597	return offset;
 598}
 599EXPORT_SYMBOL_GPL(ip6_find_1stfragopt);
 600
 601static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 602{
 603	struct net_device *dev;
 604	struct sk_buff *frag;
 605	struct rt6_info *rt = (struct rt6_info*)skb->dst;
 606	struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
 607	struct ipv6hdr *tmp_hdr;
 608	struct frag_hdr *fh;
 609	unsigned int mtu, hlen, left, len;
 610	__be32 frag_id = 0;
 611	int ptr, offset = 0, err=0;
 612	u8 *prevhdr, nexthdr = 0;
 613
 614	dev = rt->u.dst.dev;
 615	hlen = ip6_find_1stfragopt(skb, &prevhdr);
 616	nexthdr = *prevhdr;
 617
 618	mtu = ip6_skb_dst_mtu(skb);
 619
 620	/* We must not fragment if the socket is set to force MTU discovery
 621	 * or if the skb it not generated by a local socket.  (This last
 622	 * check should be redundant, but it's free.)
 623	 */
 624	if (!np || np->pmtudisc >= IPV6_PMTUDISC_DO) {
 625		skb->dev = skb->dst->dev;
 626		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
 627		IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
 628		kfree_skb(skb);
 629		return -EMSGSIZE;
 630	}
 631
 632	if (np && np->frag_size < mtu) {
 633		if (np->frag_size)
 634			mtu = np->frag_size;
 635	}
 636	mtu -= hlen + sizeof(struct frag_hdr);
 637
 638	if (skb_shinfo(skb)->frag_list) {
 639		int first_len = skb_pagelen(skb);
 640		int truesizes = 0;
 641
 642		if (first_len - hlen > mtu ||
 643		    ((first_len - hlen) & 7) ||
 644		    skb_cloned(skb))
 645			goto slow_path;
 646
 647		for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
 648			/* Correct geometry. */
 649			if (frag->len > mtu ||
 650			    ((frag->len & 7) && frag->next) ||
 651			    skb_headroom(frag) < hlen)
 652			    goto slow_path;
 653
 654			/* Partially cloned skb? */
 655			if (skb_shared(frag))
 656				goto slow_path;
 657
 658			BUG_ON(frag->sk);
 659			if (skb->sk) {
 660				sock_hold(skb->sk);
 661				frag->sk = skb->sk;
 662				frag->destructor = sock_wfree;
 663				truesizes += frag->truesize;
 664			}
 665		}
 666
 667		err = 0;
 668		offset = 0;
 669		frag = skb_shinfo(skb)->frag_list;
 670		skb_shinfo(skb)->frag_list = NULL;
 671		/* BUILD HEADER */
 672
 673		*prevhdr = NEXTHDR_FRAGMENT;
 674		tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
 675		if (!tmp_hdr) {
 676			IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
 677			return -ENOMEM;
 678		}
 679
 680		__skb_pull(skb, hlen);
 681		fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
 682		__skb_push(skb, hlen);
 683		skb_reset_network_header(skb);
 684		memcpy(skb_network_header(skb), tmp_hdr, hlen);
 685
 686		ipv6_select_ident(skb, fh);
 687		fh->nexthdr = nexthdr;
 688		fh->reserved = 0;
 689		fh->frag_off = htons(IP6_MF);
 690		frag_id = fh->identification;
 691
 692		first_len = skb_pagelen(skb);
 693		skb->data_len = first_len - skb_headlen(skb);
 694		skb->truesize -= truesizes;
 695		skb->len = first_len;
 696		ipv6_hdr(skb)->payload_len = htons(first_len -
 697						   sizeof(struct ipv6hdr));
 698
 699		dst_hold(&rt->u.dst);
 700
 701		for (;;) {
 702			/* Prepare header of the next frame,
 703			 * before previous one went down. */
 704			if (frag) {
 705				frag->ip_summed = CHECKSUM_NONE;
 706				skb_reset_transport_header(frag);
 707				fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
 708				__skb_push(frag, hlen);
 709				skb_reset_network_header(frag);
 710				memcpy(skb_network_header(frag), tmp_hdr,
 711				       hlen);
 712				offset += skb->len - hlen - sizeof(struct frag_hdr);
 713				fh->nexthdr = nexthdr;
 714				fh->reserved = 0;
 715				fh->frag_off = htons(offset);
 716				if (frag->next != NULL)
 717					fh->frag_off |= htons(IP6_MF);
 718				fh->identification = frag_id;
 719				ipv6_hdr(frag)->payload_len =
 720						htons(frag->len -
 721						      sizeof(struct ipv6hdr));
 722				ip6_copy_metadata(frag, skb);
 723			}
 724
 725			err = output(skb);
 726			if(!err)
 727				IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGCREATES);
 728
 729			if (err || !frag)
 730				break;
 731
 732			skb = frag;
 733			frag = skb->next;
 734			skb->next = NULL;
 735		}
 736
 737		kfree(tmp_hdr);
 738
 739		if (err == 0) {
 740			IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGOKS);
 741			dst_release(&rt->u.dst);
 742			return 0;
 743		}
 744
 745		while (frag) {
 746			skb = frag->next;
 747			kfree_skb(frag);
 748			frag = skb;
 749		}
 750
 751		IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGFAILS);
 752		dst_release(&rt->u.dst);
 753		return err;
 754	}
 755
 756slow_path:
 757	left = skb->len - hlen;		/* Space per frame */
 758	ptr = hlen;			/* Where to start from */
 759
 760	/*
 761	 *	Fragment the datagram.
 762	 */
 763
 764	*prevhdr = NEXTHDR_FRAGMENT;
 765
 766	/*
 767	 *	Keep copying data until we run out.
 768	 */
 769	while(left > 0)	{
 770		len = left;
 771		/* IF: it doesn't fit, use 'mtu' - the data space left */
 772		if (len > mtu)
 773			len = mtu;
 774		/* IF: we are not sending upto and including the packet end
 775		   then align the next start on an eight byte boundary */
 776		if (len < left)	{
 777			len &= ~7;
 778		}
 779		/*
 780		 *	Allocate buffer.
 781		 */
 782
 783		if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
 784			NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
 785			IP6_INC_STATS(ip6_dst_idev(skb->dst),
 786				      IPSTATS_MIB_FRAGFAILS);
 787			err = -ENOMEM;
 788			goto fail;
 789		}
 790
 791		/*
 792		 *	Set up data on packet
 793		 */
 794
 795		ip6_copy_metadata(frag, skb);
 796		skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
 797		skb_put(frag, len + hlen + sizeof(struct frag_hdr));
 798		skb_reset_network_header(frag);
 799		fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
 800		frag->transport_header = (frag->network_header + hlen +
 801					  sizeof(struct frag_hdr));
 802
 803		/*
 804		 *	Charge the memory for the fragment to any owner
 805		 *	it might possess
 806		 */
 807		if (skb->sk)
 808			skb_set_owner_w(frag, skb->sk);
 809
 810		/*
 811		 *	Copy the packet header into the new buffer.
 812		 */
 813		skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
 814
 815		/*
 816		 *	Build fragment header.
 817		 */
 818		fh->nexthdr = nexthdr;
 819		fh->reserved = 0;
 820		if (!frag_id) {
 821			ipv6_select_ident(skb, fh);
 822			frag_id = fh->identification;
 823		} else
 824			fh->identification = frag_id;
 825
 826		/*
 827		 *	Copy a block of the IP datagram.
 828		 */
 829		if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
 830			BUG();
 831		left -= len;
 832
 833		fh->frag_off = htons(offset);
 834		if (left > 0)
 835			fh->frag_off |= htons(IP6_MF);
 836		ipv6_hdr(frag)->payload_len = htons(frag->len -
 837						    sizeof(struct ipv6hdr));
 838
 839		ptr += len;
 840		offset += len;
 841
 842		/*
 843		 *	Put this fragment into the sending queue.
 844		 */
 845		err = output(frag);
 846		if (err)
 847			goto fail;
 848
 849		IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGCREATES);
 850	}
 851	IP6_INC_STATS(ip6_dst_idev(skb->dst),
 852		      IPSTATS_MIB_FRAGOKS);
 853	kfree_skb(skb);
 854	return err;
 855
 856fail:
 857	IP6_INC_STATS(ip6_dst_idev(skb->dst),
 858		      IPSTATS_MIB_FRAGFAILS);
 859	kfree_skb(skb);
 860	return err;
 861}
 862
 863static inline int ip6_rt_check(struct rt6key *rt_key,
 864			       struct in6_addr *fl_addr,
 865			       struct in6_addr *addr_cache)
 866{
 867	return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
 868		(addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
 869}
 870
 871static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
 872					  struct dst_entry *dst,
 873					  struct flowi *fl)
 874{
 875	struct ipv6_pinfo *np = inet6_sk(sk);
 876	struct rt6_info *rt = (struct rt6_info *)dst;
 877
 878	if (!dst)
 879		goto out;
 880
 881	/* Yes, checking route validity in not connected
 882	 * case is not very simple. Take into account,
 883	 * that we do not support routing by source, TOS,
 884	 * and MSG_DONTROUTE 		--ANK (980726)
 885	 *
 886	 * 1. ip6_rt_check(): If route was host route,
 887	 *    check that cached destination is current.
 888	 *    If it is network route, we still may
 889	 *    check its validity using saved pointer
 890	 *    to the last used address: daddr_cache.
 891	 *    We do not want to save whole address now,
 892	 *    (because main consumer of this service
 893	 *    is tcp, which has not this problem),
 894	 *    so that the last trick works only on connected
 895	 *    sockets.
 896	 * 2. oif also should be the same.
 897	 */
 898	if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
 899#ifdef CONFIG_IPV6_SUBTREES
 900	    ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
 901#endif
 902	    (fl->oif && fl->oif != dst->dev->ifindex)) {
 903		dst_release(dst);
 904		dst = NULL;
 905	}
 906
 907out:
 908	return dst;
 909}
 910
 911static int ip6_dst_lookup_tail(struct sock *sk,
 912			       struct dst_entry **dst, struct flowi *fl)
 913{
 914	int err;
 915
 916	if (*dst == NULL)
 917		*dst = ip6_route_output(sk, fl);
 918
 919	if ((err = (*dst)->error))
 920		goto out_err_release;
 921
 922	if (ipv6_addr_any(&fl->fl6_src)) {
 923		err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
 924		if (err)
 925			goto out_err_release;
 926	}
 927
 928#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 929		/*
 930		 * Here if the dst entry we've looked up
 931		 * has a neighbour entry that is in the INCOMPLETE
 932		 * state and the src address from the flow is
 933		 * marked as OPTIMISTIC, we release the found
 934		 * dst entry and replace it instead with the
 935		 * dst entry of the nexthop router
 936		 */
 937		if (!((*dst)->neighbour->nud_state & NUD_VALID)) {
 938			struct inet6_ifaddr *ifp;
 939			struct flowi fl_gw;
 940			int redirect;
 941
 942			ifp = ipv6_get_ifaddr(&init_net, &fl->fl6_src,
 943					      (*dst)->dev, 1);
 944
 945			redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
 946			if (ifp)
 947				in6_ifa_put(ifp);
 948
 949			if (redirect) {
 950				/*
 951				 * We need to get the dst entry for the
 952				 * default router instead
 953				 */
 954				dst_release(*dst);
 955				memcpy(&fl_gw, fl, sizeof(struct flowi));
 956				memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
 957				*dst = ip6_route_output(sk, &fl_gw);
 958				if ((err = (*dst)->error))
 959					goto out_err_release;
 960			}
 961		}
 962#endif
 963
 964	return 0;
 965
 966out_err_release:
 967	if (err == -ENETUNREACH)
 968		IP6_INC_STATS_BH(NULL, IPSTATS_MIB_OUTNOROUTES);
 969	dst_release(*dst);
 970	*dst = NULL;
 971	return err;
 972}
 973
 974/**
 975 *	ip6_dst_lookup - perform route lookup on flow
 976 *	@sk: socket which provides route info
 977 *	@dst: pointer to dst_entry * for result
 978 *	@fl: flow to lookup
 979 *
 980 *	This function performs a route lookup on the given flow.
 981 *
 982 *	It returns zero on success, or a standard errno code on error.
 983 */
 984int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
 985{
 986	*dst = NULL;
 987	return ip6_dst_lookup_tail(sk, dst, fl);
 988}
 989EXPORT_SYMBOL_GPL(ip6_dst_lookup);
 990
 991/**
 992 *	ip6_sk_dst_lookup - perform socket cached route lookup on flow
 993 *	@sk: socket which provides the dst cache and route info
 994 *	@dst: pointer to dst_entry * for result
 995 *	@fl: flow to lookup
 996 *
 997 *	This function performs a route lookup on the given flow with the
 998 *	possibility of using the cached route in the socket if it is valid.
 999 *	It will take the socket dst lock when operating on the dst cache.
1000 *	As a result, this function can only be used in process context.
1001 *
1002 *	It returns zero on success, or a standard errno code on error.
1003 */
1004int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1005{
1006	*dst = NULL;
1007	if (sk) {
1008		*dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1009		*dst = ip6_sk_dst_check(sk, *dst, fl);
1010	}
1011
1012	return ip6_dst_lookup_tail(sk, dst, fl);
1013}
1014EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1015
1016static inline int ip6_ufo_append_data(struct sock *sk,
1017			int getfrag(void *from, char *to, int offset, int len,
1018			int odd, struct sk_buff *skb),
1019			void *from, int length, int hh_len, int fragheaderlen,
1020			int transhdrlen, int mtu,unsigned int flags)
1021
1022{
1023	struct sk_buff *skb;
1024	int err;
1025
1026	/* There is support for UDP large send offload by network
1027	 * device, so create one single skb packet containing complete
1028	 * udp datagram
1029	 */
1030	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1031		skb = sock_alloc_send_skb(sk,
1032			hh_len + fragheaderlen + transhdrlen + 20,
1033			(flags & MSG_DONTWAIT), &err);
1034		if (skb == NULL)
1035			return -ENOMEM;
1036
1037		/* reserve space for Hardware header */
1038		skb_reserve(skb, hh_len);
1039
1040		/* create space for UDP/IP header */
1041		skb_put(skb,fragheaderlen + transhdrlen);
1042
1043		/* initialize network header pointer */
1044		skb_reset_network_header(skb);
1045
1046		/* initialize protocol header pointer */
1047		skb->transport_header = skb->network_header + fragheaderlen;
1048
1049		skb->ip_summed = CHECKSUM_PARTIAL;
1050		skb->csum = 0;
1051		sk->sk_sndmsg_off = 0;
1052	}
1053
1054	err = skb_append_datato_frags(sk,skb, getfrag, from,
1055				      (length - transhdrlen));
1056	if (!err) {
1057		struct frag_hdr fhdr;
1058
1059		/* specify the length of each IP datagram fragment*/
1060		skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
1061					    sizeof(struct frag_hdr);
1062		skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1063		ipv6_select_ident(skb, &fhdr);
1064		skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1065		__skb_queue_tail(&sk->sk_write_queue, skb);
1066
1067		return 0;
1068	}
1069	/* There is not enough support do UPD LSO,
1070	 * so follow normal path
1071	 */
1072	kfree_skb(skb);
1073
1074	return err;
1075}
1076
1077int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1078	int offset, int len, int odd, struct sk_buff *skb),
1079	void *from, int length, int transhdrlen,
1080	int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1081	struct rt6_info *rt, unsigned int flags)
1082{
1083	struct inet_sock *inet = inet_sk(sk);
1084	struct ipv6_pinfo *np = inet6_sk(sk);
1085	struct sk_buff *skb;
1086	unsigned int maxfraglen, fragheaderlen;
1087	int exthdrlen;
1088	int hh_len;
1089	int mtu;
1090	int copy;
1091	int err;
1092	int offset = 0;
1093	int csummode = CHECKSUM_NONE;
1094
1095	if (flags&MSG_PROBE)
1096		return 0;
1097	if (skb_queue_empty(&sk->sk_write_queue)) {
1098		/*
1099		 * setup for corking
1100		 */
1101		if (opt) {
1102			if (np->cork.opt == NULL) {
1103				np->cork.opt = kmalloc(opt->tot_len,
1104						       sk->sk_allocation);
1105				if (unlikely(np->cork.opt == NULL))
1106					return -ENOBUFS;
1107			} else if (np->cork.opt->tot_len < opt->tot_len) {
1108				printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
1109				return -EINVAL;
1110			}
1111			memcpy(np->cork.opt, opt, opt->tot_len);
1112			inet->cork.flags |= IPCORK_OPT;
1113			/* need source address above miyazawa*/
1114		}
1115		dst_hold(&rt->u.dst);
1116		np->cork.rt = rt;
1117		inet->cork.fl = *fl;
1118		np->cork.hop_limit = hlimit;
1119		np->cork.tclass = tclass;
1120		mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1121		      rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1122		if (np->frag_size < mtu) {
1123			if (np->frag_size)
1124				mtu = np->frag_size;
1125		}
1126		inet->cork.fragsize = mtu;
1127		if (dst_allfrag(rt->u.dst.path))
1128			inet->cork.flags |= IPCORK_ALLFRAG;
1129		inet->cork.length = 0;
1130		sk->sk_sndmsg_page = NULL;
1131		sk->sk_sndmsg_off = 0;
1132		exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
1133			    rt->rt6i_nfheader_len;
1134		length += exthdrlen;
1135		transhdrlen += exthdrlen;
1136	} else {
1137		rt = np->cork.rt;
1138		fl = &inet->cork.fl;
1139		if (inet->cork.flags & IPCORK_OPT)
1140			opt = np->cork.opt;
1141		transhdrlen = 0;
1142		exthdrlen = 0;
1143		mtu = inet->cork.fragsize;
1144	}
1145
1146	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1147
1148	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1149			(opt ? opt->opt_nflen : 0);
1150	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1151
1152	if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1153		if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1154			ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1155			return -EMSGSIZE;
1156		}
1157	}
1158
1159	/*
1160	 * Let's try using as much space as possible.
1161	 * Use MTU if total length of the message fits into the MTU.
1162	 * Otherwise, we need to reserve fragment header and
1163	 * fragment alignment (= 8-15 octects, in total).
1164	 *
1165	 * Note that we may need to "move" the data from the tail of
1166	 * of the buffer to the new fragment when we split
1167	 * the message.
1168	 *
1169	 * FIXME: It may be fragmented into multiple chunks
1170	 *        at once if non-fragmentable extension headers
1171	 *        are too large.
1172	 * --yoshfuji
1173	 */
1174
1175	inet->cork.length += length;
1176	if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1177	    (rt->u.dst.dev->features & NETIF_F_UFO)) {
1178
1179		err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1180					  fragheaderlen, transhdrlen, mtu,
1181					  flags);
1182		if (err)
1183			goto error;
1184		return 0;
1185	}
1186
1187	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1188		goto alloc_new_skb;
1189
1190	while (length > 0) {
1191		/* Check if the remaining data fits into current packet. */
1192		copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1193		if (copy < length)
1194			copy = maxfraglen - skb->len;
1195
1196		if (copy <= 0) {
1197			char *data;
1198			unsigned int datalen;
1199			unsigned int fraglen;
1200			unsigned int fraggap;
1201			unsigned int alloclen;
1202			struct sk_buff *skb_prev;
1203alloc_new_skb:
1204			skb_prev = skb;
1205
1206			/* There's no room in the current skb */
1207			if (skb_prev)
1208				fraggap = skb_prev->len - maxfraglen;
1209			else
1210				fraggap = 0;
1211
1212			/*
1213			 * If remaining data exceeds the mtu,
1214			 * we know we need more fragment(s).
1215			 */
1216			datalen = length + fraggap;
1217			if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1218				datalen = maxfraglen - fragheaderlen;
1219
1220			fraglen = datalen + fragheaderlen;
1221			if ((flags & MSG_MORE) &&
1222			    !(rt->u.dst.dev->features&NETIF_F_SG))
1223				alloclen = mtu;
1224			else
1225				alloclen = datalen + fragheaderlen;
1226
1227			/*
1228			 * The last fragment gets additional space at tail.
1229			 * Note: we overallocate on fragments with MSG_MODE
1230			 * because we have no idea if we're the last one.
1231			 */
1232			if (datalen == length + fraggap)
1233				alloclen += rt->u.dst.trailer_len;
1234
1235			/*
1236			 * We just reserve space for fragment header.
1237			 * Note: this may be overallocation if the message
1238			 * (without MSG_MORE) fits into the MTU.
1239			 */
1240			alloclen += sizeof(struct frag_hdr);
1241
1242			if (transhdrlen) {
1243				skb = sock_alloc_send_skb(sk,
1244						alloclen + hh_len,
1245						(flags & MSG_DONTWAIT), &err);
1246			} else {
1247				skb = NULL;
1248				if (atomic_read(&sk->sk_wmem_alloc) <=
1249				    2 * sk->sk_sndbuf)
1250					skb = sock_wmalloc(sk,
1251							   alloclen + hh_len, 1,
1252							   sk->sk_allocation);
1253				if (unlikely(skb == NULL))
1254					err = -ENOBUFS;
1255			}
1256			if (skb == NULL)
1257				goto error;
1258			/*
1259			 *	Fill in the control structures
1260			 */
1261			skb->ip_summed = csummode;
1262			skb->csum = 0;
1263			/* reserve for fragmentation */
1264			skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1265
1266			/*
1267			 *	Find where to start putting bytes
1268			 */
1269			data = skb_put(skb, fraglen);
1270			skb_set_network_header(skb, exthdrlen);
1271			data += fragheaderlen;
1272			skb->transport_header = (skb->network_header +
1273						 fragheaderlen);
1274			if (fraggap) {
1275				skb->csum = skb_copy_and_csum_bits(
1276					skb_prev, maxfraglen,
1277					data + transhdrlen, fraggap, 0);
1278				skb_prev->csum = csum_sub(skb_prev->csum,
1279							  skb->csum);
1280				data += fraggap;
1281				pskb_trim_unique(skb_prev, maxfraglen);
1282			}
1283			copy = datalen - transhdrlen - fraggap;
1284			if (copy < 0) {
1285				err = -EINVAL;
1286				kfree_skb(skb);
1287				goto error;
1288			} else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1289				err = -EFAULT;
1290				kfree_skb(skb);
1291				goto error;
1292			}
1293
1294			offset += copy;
1295			length -= datalen - fraggap;
1296			transhdrlen = 0;
1297			exthdrlen = 0;
1298			csummode = CHECKSUM_NONE;
1299
1300			/*
1301			 * Put the packet on the pending queue
1302			 */
1303			__skb_queue_tail(&sk->sk_write_queue, skb);
1304			continue;
1305		}
1306
1307		if (copy > length)
1308			copy = length;
1309
1310		if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1311			unsigned int off;
1312
1313			off = skb->len;
1314			if (getfrag(from, skb_put(skb, copy),
1315						offset, copy, off, skb) < 0) {
1316				__skb_trim(skb, off);
1317				err = -EFAULT;
1318				goto error;
1319			}
1320		} else {
1321			int i = skb_shinfo(skb)->nr_frags;
1322			skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1323			struct page *page = sk->sk_sndmsg_page;
1324			int off = sk->sk_sndmsg_off;
1325			unsigned int left;
1326
1327			if (page && (left = PAGE_SIZE - off) > 0) {
1328				if (copy >= left)
1329					copy = left;
1330				if (page != frag->page) {
1331					if (i == MAX_SKB_FRAGS) {
1332						err = -EMSGSIZE;
1333						goto error;
1334					}
1335					get_page(page);
1336					skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1337					frag = &skb_shinfo(skb)->frags[i];
1338				}
1339			} else if(i < MAX_SKB_FRAGS) {
1340				if (copy > PAGE_SIZE)
1341					copy = PAGE_SIZE;
1342				page = alloc_pages(sk->sk_allocation, 0);
1343				if (page == NULL) {
1344					err = -ENOMEM;
1345					goto error;
1346				}
1347				sk->sk_sndmsg_page = page;
1348				sk->sk_sndmsg_off = 0;
1349
1350				skb_fill_page_desc(skb, i, page, 0, 0);
1351				frag = &skb_shinfo(skb)->frags[i];
1352			} else {
1353				err = -EMSGSIZE;
1354				goto error;
1355			}
1356			if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1357				err = -EFAULT;
1358				goto error;
1359			}
1360			sk->sk_sndmsg_off += copy;
1361			frag->size += copy;
1362			skb->len += copy;
1363			skb->data_len += copy;
1364			skb->truesize += copy;
1365			atomic_add(copy, &sk->sk_wmem_alloc);
1366		}
1367		offset += copy;
1368		length -= copy;
1369	}
1370	return 0;
1371error:
1372	inet->cork.length -= length;
1373	IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1374	return err;
1375}
1376
1377static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1378{
1379	inet->cork.flags &= ~IPCORK_OPT;
1380	kfree(np->cork.opt);
1381	np->cork.opt = NULL;
1382	if (np->cork.rt) {
1383		dst_release(&np->cork.rt->u.dst);
1384		np->cork.rt = NULL;
1385		inet->cork.flags &= ~IPCORK_ALLFRAG;
1386	}
1387	memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1388}
1389
1390int ip6_push_pending_frames(struct sock *sk)
1391{
1392	struct sk_buff *skb, *tmp_skb;
1393	struct sk_buff **tail_skb;
1394	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1395	struct inet_sock *inet = inet_sk(sk);
1396	struct ipv6_pinfo *np = inet6_sk(sk);
1397	struct ipv6hdr *hdr;
1398	struct ipv6_txoptions *opt = np->cork.opt;
1399	struct rt6_info *rt = np->cork.rt;
1400	struct flowi *fl = &inet->cork.fl;
1401	unsigned char proto = fl->proto;
1402	int err = 0;
1403
1404	if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1405		goto out;
1406	tail_skb = &(skb_shinfo(skb)->frag_list);
1407
1408	/* move skb->data to ip header from ext header */
1409	if (skb->data < skb_network_header(skb))
1410		__skb_pull(skb, skb_network_offset(skb));
1411	while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1412		__skb_pull(tmp_skb, skb_network_header_len(skb));
1413		*tail_skb = tmp_skb;
1414		tail_skb = &(tmp_skb->next);
1415		skb->len += tmp_skb->len;
1416		skb->data_len += tmp_skb->len;
1417		skb->truesize += tmp_skb->truesize;
1418		__sock_put(tmp_skb->sk);
1419		tmp_skb->destructor = NULL;
1420		tmp_skb->sk = NULL;
1421	}
1422
1423	ipv6_addr_copy(final_dst, &fl->fl6_dst);
1424	__skb_pull(skb, skb_network_header_len(skb));
1425	if (opt && opt->opt_flen)
1426		ipv6_push_frag_opts(skb, opt, &proto);
1427	if (opt && opt->opt_nflen)
1428		ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1429
1430	skb_push(skb, sizeof(struct ipv6hdr));
1431	skb_reset_network_header(skb);
1432	hdr = ipv6_hdr(skb);
1433
1434	*(__be32*)hdr = fl->fl6_flowlabel |
1435		     htonl(0x60000000 | ((int)np->cork.tclass << 20));
1436
1437	hdr->hop_limit = np->cork.hop_limit;
1438	hdr->nexthdr = proto;
1439	ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1440	ipv6_addr_copy(&hdr->daddr, final_dst);
1441
1442	skb->priority = sk->sk_priority;
1443	skb->mark = sk->sk_mark;
1444
1445	skb->dst = dst_clone(&rt->u.dst);
1446	IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
1447	if (proto == IPPROTO_ICMPV6) {
1448		struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1449
1450		ICMP6MSGOUT_INC_STATS_BH(idev, icmp6_hdr(skb)->icmp6_type);
1451		ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
1452	}
1453
1454	err = ip6_local_out(skb);
1455	if (err) {
1456		if (err > 0)
1457			err = np->recverr ? net_xmit_errno(err) : 0;
1458		if (err)
1459			goto error;
1460	}
1461
1462out:
1463	ip6_cork_release(inet, np);
1464	return err;
1465error:
1466	goto out;
1467}
1468
1469void ip6_flush_pending_frames(struct sock *sk)
1470{
1471	struct sk_buff *skb;
1472
1473	while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1474		if (skb->dst)
1475			IP6_INC_STATS(ip6_dst_idev(skb->dst),
1476				      IPSTATS_MIB_OUTDISCARDS);
1477		kfree_skb(skb);
1478	}
1479
1480	ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1481}
Configure Feed

Configure Feed