Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
fork
Configure Feed
Select the types of activity you want to include in your feed.
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * IPv6 output functions
4 * Linux INET6 implementation
5 *
6 * Authors:
7 * Pedro Roque <roque@di.fc.ul.pt>
8 *
9 * Based on linux/net/ipv4/ip_output.c
10 *
11 * Changes:
12 * A.N.Kuznetsov : airthmetics in fragmentation.
13 * extension headers are implemented.
14 * route changes now work.
15 * ip6_forward does not confuse sniffers.
16 * etc.
17 *
18 * H. von Brand : Added missing #include <linux/string.h>
19 * Imran Patel : frag id should be in NBO
20 * Kazunori MIYAZAWA @USAGI
21 * : add ip6_append_data and related functions
22 * for datagram xmit
23 */
24
25#include <linux/errno.h>
26#include <linux/kernel.h>
27#include <linux/string.h>
28#include <linux/socket.h>
29#include <linux/net.h>
30#include <linux/netdevice.h>
31#include <linux/if_arp.h>
32#include <linux/in6.h>
33#include <linux/tcp.h>
34#include <linux/route.h>
35#include <linux/module.h>
36#include <linux/slab.h>
37
38#include <linux/bpf-cgroup.h>
39#include <linux/netfilter.h>
40#include <linux/netfilter_ipv6.h>
41
42#include <net/sock.h>
43#include <net/snmp.h>
44
45#include <net/ipv6.h>
46#include <net/ndisc.h>
47#include <net/protocol.h>
48#include <net/ip6_route.h>
49#include <net/addrconf.h>
50#include <net/rawv6.h>
51#include <net/icmp.h>
52#include <net/xfrm.h>
53#include <net/checksum.h>
54#include <linux/mroute6.h>
55#include <net/l3mdev.h>
56#include <net/lwtunnel.h>
57#include <net/ip_tunnels.h>
58
59static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
60{
61 struct dst_entry *dst = skb_dst(skb);
62 struct net_device *dev = dst->dev;
63 const struct in6_addr *nexthop;
64 struct neighbour *neigh;
65 int ret;
66
67 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
68 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
69
70 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
71 ((mroute6_is_socket(net, skb) &&
72 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
73 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
74 &ipv6_hdr(skb)->saddr))) {
75 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
76
77 /* Do not check for IFF_ALLMULTI; multicast routing
78 is not supported in any case.
79 */
80 if (newskb)
81 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
82 net, sk, newskb, NULL, newskb->dev,
83 dev_loopback_xmit);
84
85 if (ipv6_hdr(skb)->hop_limit == 0) {
86 IP6_INC_STATS(net, idev,
87 IPSTATS_MIB_OUTDISCARDS);
88 kfree_skb(skb);
89 return 0;
90 }
91 }
92
93 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
94
95 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
96 IPV6_ADDR_SCOPE_NODELOCAL &&
97 !(dev->flags & IFF_LOOPBACK)) {
98 kfree_skb(skb);
99 return 0;
100 }
101 }
102
103 if (lwtunnel_xmit_redirect(dst->lwtstate)) {
104 int res = lwtunnel_xmit(skb);
105
106 if (res < 0 || res == LWTUNNEL_XMIT_DONE)
107 return res;
108 }
109
110 rcu_read_lock_bh();
111 nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
112 neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
113 if (unlikely(!neigh))
114 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
115 if (!IS_ERR(neigh)) {
116 sock_confirm_neigh(skb, neigh);
117 ret = neigh_output(neigh, skb, false);
118 rcu_read_unlock_bh();
119 return ret;
120 }
121 rcu_read_unlock_bh();
122
123 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
124 kfree_skb(skb);
125 return -EINVAL;
126}
127
128static int
129ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk,
130 struct sk_buff *skb, unsigned int mtu)
131{
132 struct sk_buff *segs, *nskb;
133 netdev_features_t features;
134 int ret = 0;
135
136 /* Please see corresponding comment in ip_finish_output_gso
137 * describing the cases where GSO segment length exceeds the
138 * egress MTU.
139 */
140 features = netif_skb_features(skb);
141 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
142 if (IS_ERR_OR_NULL(segs)) {
143 kfree_skb(skb);
144 return -ENOMEM;
145 }
146
147 consume_skb(skb);
148
149 skb_list_walk_safe(segs, segs, nskb) {
150 int err;
151
152 skb_mark_not_on_list(segs);
153 err = ip6_fragment(net, sk, segs, ip6_finish_output2);
154 if (err && ret == 0)
155 ret = err;
156 }
157
158 return ret;
159}
160
161static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
162{
163 unsigned int mtu;
164
165#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
166 /* Policy lookup after SNAT yielded a new policy */
167 if (skb_dst(skb)->xfrm) {
168 IPCB(skb)->flags |= IPSKB_REROUTED;
169 return dst_output(net, sk, skb);
170 }
171#endif
172
173 mtu = ip6_skb_dst_mtu(skb);
174 if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu))
175 return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu);
176
177 if ((skb->len > mtu && !skb_is_gso(skb)) ||
178 dst_allfrag(skb_dst(skb)) ||
179 (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
180 return ip6_fragment(net, sk, skb, ip6_finish_output2);
181 else
182 return ip6_finish_output2(net, sk, skb);
183}
184
185static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
186{
187 int ret;
188
189 ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
190 switch (ret) {
191 case NET_XMIT_SUCCESS:
192 return __ip6_finish_output(net, sk, skb);
193 case NET_XMIT_CN:
194 return __ip6_finish_output(net, sk, skb) ? : ret;
195 default:
196 kfree_skb(skb);
197 return ret;
198 }
199}
200
201int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
202{
203 struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
204 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
205
206 skb->protocol = htons(ETH_P_IPV6);
207 skb->dev = dev;
208
209 if (unlikely(idev->cnf.disable_ipv6)) {
210 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
211 kfree_skb(skb);
212 return 0;
213 }
214
215 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
216 net, sk, skb, indev, dev,
217 ip6_finish_output,
218 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
219}
220
221bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
222{
223 if (!np->autoflowlabel_set)
224 return ip6_default_np_autolabel(net);
225 else
226 return np->autoflowlabel;
227}
228
229/*
230 * xmit an sk_buff (used by TCP, SCTP and DCCP)
231 * Note : socket lock is not held for SYNACK packets, but might be modified
232 * by calls to skb_set_owner_w() and ipv6_local_error(),
233 * which are using proper atomic operations or spinlocks.
234 */
235int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
236 __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
237{
238 struct net *net = sock_net(sk);
239 const struct ipv6_pinfo *np = inet6_sk(sk);
240 struct in6_addr *first_hop = &fl6->daddr;
241 struct dst_entry *dst = skb_dst(skb);
242 unsigned int head_room;
243 struct ipv6hdr *hdr;
244 u8 proto = fl6->flowi6_proto;
245 int seg_len = skb->len;
246 int hlimit = -1;
247 u32 mtu;
248
249 head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
250 if (opt)
251 head_room += opt->opt_nflen + opt->opt_flen;
252
253 if (unlikely(skb_headroom(skb) < head_room)) {
254 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
255 if (!skb2) {
256 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
257 IPSTATS_MIB_OUTDISCARDS);
258 kfree_skb(skb);
259 return -ENOBUFS;
260 }
261 if (skb->sk)
262 skb_set_owner_w(skb2, skb->sk);
263 consume_skb(skb);
264 skb = skb2;
265 }
266
267 if (opt) {
268 seg_len += opt->opt_nflen + opt->opt_flen;
269
270 if (opt->opt_flen)
271 ipv6_push_frag_opts(skb, opt, &proto);
272
273 if (opt->opt_nflen)
274 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
275 &fl6->saddr);
276 }
277
278 skb_push(skb, sizeof(struct ipv6hdr));
279 skb_reset_network_header(skb);
280 hdr = ipv6_hdr(skb);
281
282 /*
283 * Fill in the IPv6 header
284 */
285 if (np)
286 hlimit = np->hop_limit;
287 if (hlimit < 0)
288 hlimit = ip6_dst_hoplimit(dst);
289
290 ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
291 ip6_autoflowlabel(net, np), fl6));
292
293 hdr->payload_len = htons(seg_len);
294 hdr->nexthdr = proto;
295 hdr->hop_limit = hlimit;
296
297 hdr->saddr = fl6->saddr;
298 hdr->daddr = *first_hop;
299
300 skb->protocol = htons(ETH_P_IPV6);
301 skb->priority = priority;
302 skb->mark = mark;
303
304 mtu = dst_mtu(dst);
305 if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
306 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
307 IPSTATS_MIB_OUT, skb->len);
308
309 /* if egress device is enslaved to an L3 master device pass the
310 * skb to its handler for processing
311 */
312 skb = l3mdev_ip6_out((struct sock *)sk, skb);
313 if (unlikely(!skb))
314 return 0;
315
316 /* hooks should never assume socket lock is held.
317 * we promote our socket to non const
318 */
319 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
320 net, (struct sock *)sk, skb, NULL, dst->dev,
321 dst_output);
322 }
323
324 skb->dev = dst->dev;
325 /* ipv6_local_error() does not require socket lock,
326 * we promote our socket to non const
327 */
328 ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
329
330 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
331 kfree_skb(skb);
332 return -EMSGSIZE;
333}
334EXPORT_SYMBOL(ip6_xmit);
335
336static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
337{
338 struct ip6_ra_chain *ra;
339 struct sock *last = NULL;
340
341 read_lock(&ip6_ra_lock);
342 for (ra = ip6_ra_chain; ra; ra = ra->next) {
343 struct sock *sk = ra->sk;
344 if (sk && ra->sel == sel &&
345 (!sk->sk_bound_dev_if ||
346 sk->sk_bound_dev_if == skb->dev->ifindex)) {
347 struct ipv6_pinfo *np = inet6_sk(sk);
348
349 if (np && np->rtalert_isolate &&
350 !net_eq(sock_net(sk), dev_net(skb->dev))) {
351 continue;
352 }
353 if (last) {
354 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
355 if (skb2)
356 rawv6_rcv(last, skb2);
357 }
358 last = sk;
359 }
360 }
361
362 if (last) {
363 rawv6_rcv(last, skb);
364 read_unlock(&ip6_ra_lock);
365 return 1;
366 }
367 read_unlock(&ip6_ra_lock);
368 return 0;
369}
370
371static int ip6_forward_proxy_check(struct sk_buff *skb)
372{
373 struct ipv6hdr *hdr = ipv6_hdr(skb);
374 u8 nexthdr = hdr->nexthdr;
375 __be16 frag_off;
376 int offset;
377
378 if (ipv6_ext_hdr(nexthdr)) {
379 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
380 if (offset < 0)
381 return 0;
382 } else
383 offset = sizeof(struct ipv6hdr);
384
385 if (nexthdr == IPPROTO_ICMPV6) {
386 struct icmp6hdr *icmp6;
387
388 if (!pskb_may_pull(skb, (skb_network_header(skb) +
389 offset + 1 - skb->data)))
390 return 0;
391
392 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
393
394 switch (icmp6->icmp6_type) {
395 case NDISC_ROUTER_SOLICITATION:
396 case NDISC_ROUTER_ADVERTISEMENT:
397 case NDISC_NEIGHBOUR_SOLICITATION:
398 case NDISC_NEIGHBOUR_ADVERTISEMENT:
399 case NDISC_REDIRECT:
400 /* For reaction involving unicast neighbor discovery
401 * message destined to the proxied address, pass it to
402 * input function.
403 */
404 return 1;
405 default:
406 break;
407 }
408 }
409
410 /*
411 * The proxying router can't forward traffic sent to a link-local
412 * address, so signal the sender and discard the packet. This
413 * behavior is clarified by the MIPv6 specification.
414 */
415 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
416 dst_link_failure(skb);
417 return -1;
418 }
419
420 return 0;
421}
422
423static inline int ip6_forward_finish(struct net *net, struct sock *sk,
424 struct sk_buff *skb)
425{
426 struct dst_entry *dst = skb_dst(skb);
427
428 __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
429 __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
430
431#ifdef CONFIG_NET_SWITCHDEV
432 if (skb->offload_l3_fwd_mark) {
433 consume_skb(skb);
434 return 0;
435 }
436#endif
437
438 skb->tstamp = 0;
439 return dst_output(net, sk, skb);
440}
441
442static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
443{
444 if (skb->len <= mtu)
445 return false;
446
447 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
448 if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
449 return true;
450
451 if (skb->ignore_df)
452 return false;
453
454 if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
455 return false;
456
457 return true;
458}
459
460int ip6_forward(struct sk_buff *skb)
461{
462 struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
463 struct dst_entry *dst = skb_dst(skb);
464 struct ipv6hdr *hdr = ipv6_hdr(skb);
465 struct inet6_skb_parm *opt = IP6CB(skb);
466 struct net *net = dev_net(dst->dev);
467 u32 mtu;
468
469 if (net->ipv6.devconf_all->forwarding == 0)
470 goto error;
471
472 if (skb->pkt_type != PACKET_HOST)
473 goto drop;
474
475 if (unlikely(skb->sk))
476 goto drop;
477
478 if (skb_warn_if_lro(skb))
479 goto drop;
480
481 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
482 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
483 goto drop;
484 }
485
486 skb_forward_csum(skb);
487
488 /*
489 * We DO NOT make any processing on
490 * RA packets, pushing them to user level AS IS
491 * without ane WARRANTY that application will be able
492 * to interpret them. The reason is that we
493 * cannot make anything clever here.
494 *
495 * We are not end-node, so that if packet contains
496 * AH/ESP, we cannot make anything.
497 * Defragmentation also would be mistake, RA packets
498 * cannot be fragmented, because there is no warranty
499 * that different fragments will go along one path. --ANK
500 */
501 if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
502 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
503 return 0;
504 }
505
506 /*
507 * check and decrement ttl
508 */
509 if (hdr->hop_limit <= 1) {
510 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
511 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
512
513 kfree_skb(skb);
514 return -ETIMEDOUT;
515 }
516
517 /* XXX: idev->cnf.proxy_ndp? */
518 if (net->ipv6.devconf_all->proxy_ndp &&
519 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
520 int proxied = ip6_forward_proxy_check(skb);
521 if (proxied > 0)
522 return ip6_input(skb);
523 else if (proxied < 0) {
524 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
525 goto drop;
526 }
527 }
528
529 if (!xfrm6_route_forward(skb)) {
530 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
531 goto drop;
532 }
533 dst = skb_dst(skb);
534
535 /* IPv6 specs say nothing about it, but it is clear that we cannot
536 send redirects to source routed frames.
537 We don't send redirects to frames decapsulated from IPsec.
538 */
539 if (IP6CB(skb)->iif == dst->dev->ifindex &&
540 opt->srcrt == 0 && !skb_sec_path(skb)) {
541 struct in6_addr *target = NULL;
542 struct inet_peer *peer;
543 struct rt6_info *rt;
544
545 /*
546 * incoming and outgoing devices are the same
547 * send a redirect.
548 */
549
550 rt = (struct rt6_info *) dst;
551 if (rt->rt6i_flags & RTF_GATEWAY)
552 target = &rt->rt6i_gateway;
553 else
554 target = &hdr->daddr;
555
556 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
557
558 /* Limit redirects both by destination (here)
559 and by source (inside ndisc_send_redirect)
560 */
561 if (inet_peer_xrlim_allow(peer, 1*HZ))
562 ndisc_send_redirect(skb, target);
563 if (peer)
564 inet_putpeer(peer);
565 } else {
566 int addrtype = ipv6_addr_type(&hdr->saddr);
567
568 /* This check is security critical. */
569 if (addrtype == IPV6_ADDR_ANY ||
570 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
571 goto error;
572 if (addrtype & IPV6_ADDR_LINKLOCAL) {
573 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
574 ICMPV6_NOT_NEIGHBOUR, 0);
575 goto error;
576 }
577 }
578
579 mtu = ip6_dst_mtu_forward(dst);
580 if (mtu < IPV6_MIN_MTU)
581 mtu = IPV6_MIN_MTU;
582
583 if (ip6_pkt_too_big(skb, mtu)) {
584 /* Again, force OUTPUT device used as source address */
585 skb->dev = dst->dev;
586 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
587 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
588 __IP6_INC_STATS(net, ip6_dst_idev(dst),
589 IPSTATS_MIB_FRAGFAILS);
590 kfree_skb(skb);
591 return -EMSGSIZE;
592 }
593
594 if (skb_cow(skb, dst->dev->hard_header_len)) {
595 __IP6_INC_STATS(net, ip6_dst_idev(dst),
596 IPSTATS_MIB_OUTDISCARDS);
597 goto drop;
598 }
599
600 hdr = ipv6_hdr(skb);
601
602 /* Mangling hops number delayed to point after skb COW */
603
604 hdr->hop_limit--;
605
606 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
607 net, NULL, skb, skb->dev, dst->dev,
608 ip6_forward_finish);
609
610error:
611 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
612drop:
613 kfree_skb(skb);
614 return -EINVAL;
615}
616
617static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
618{
619 to->pkt_type = from->pkt_type;
620 to->priority = from->priority;
621 to->protocol = from->protocol;
622 skb_dst_drop(to);
623 skb_dst_set(to, dst_clone(skb_dst(from)));
624 to->dev = from->dev;
625 to->mark = from->mark;
626
627 skb_copy_hash(to, from);
628
629#ifdef CONFIG_NET_SCHED
630 to->tc_index = from->tc_index;
631#endif
632 nf_copy(to, from);
633 skb_ext_copy(to, from);
634 skb_copy_secmark(to, from);
635}
636
637int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
638 u8 nexthdr, __be32 frag_id,
639 struct ip6_fraglist_iter *iter)
640{
641 unsigned int first_len;
642 struct frag_hdr *fh;
643
644 /* BUILD HEADER */
645 *prevhdr = NEXTHDR_FRAGMENT;
646 iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
647 if (!iter->tmp_hdr)
648 return -ENOMEM;
649
650 iter->frag = skb_shinfo(skb)->frag_list;
651 skb_frag_list_init(skb);
652
653 iter->offset = 0;
654 iter->hlen = hlen;
655 iter->frag_id = frag_id;
656 iter->nexthdr = nexthdr;
657
658 __skb_pull(skb, hlen);
659 fh = __skb_push(skb, sizeof(struct frag_hdr));
660 __skb_push(skb, hlen);
661 skb_reset_network_header(skb);
662 memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
663
664 fh->nexthdr = nexthdr;
665 fh->reserved = 0;
666 fh->frag_off = htons(IP6_MF);
667 fh->identification = frag_id;
668
669 first_len = skb_pagelen(skb);
670 skb->data_len = first_len - skb_headlen(skb);
671 skb->len = first_len;
672 ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
673
674 return 0;
675}
676EXPORT_SYMBOL(ip6_fraglist_init);
677
678void ip6_fraglist_prepare(struct sk_buff *skb,
679 struct ip6_fraglist_iter *iter)
680{
681 struct sk_buff *frag = iter->frag;
682 unsigned int hlen = iter->hlen;
683 struct frag_hdr *fh;
684
685 frag->ip_summed = CHECKSUM_NONE;
686 skb_reset_transport_header(frag);
687 fh = __skb_push(frag, sizeof(struct frag_hdr));
688 __skb_push(frag, hlen);
689 skb_reset_network_header(frag);
690 memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
691 iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
692 fh->nexthdr = iter->nexthdr;
693 fh->reserved = 0;
694 fh->frag_off = htons(iter->offset);
695 if (frag->next)
696 fh->frag_off |= htons(IP6_MF);
697 fh->identification = iter->frag_id;
698 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
699 ip6_copy_metadata(frag, skb);
700}
701EXPORT_SYMBOL(ip6_fraglist_prepare);
702
703void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
704 unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
705 u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
706{
707 state->prevhdr = prevhdr;
708 state->nexthdr = nexthdr;
709 state->frag_id = frag_id;
710
711 state->hlen = hlen;
712 state->mtu = mtu;
713
714 state->left = skb->len - hlen; /* Space per frame */
715 state->ptr = hlen; /* Where to start from */
716
717 state->hroom = hdr_room;
718 state->troom = needed_tailroom;
719
720 state->offset = 0;
721}
722EXPORT_SYMBOL(ip6_frag_init);
723
724struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
725{
726 u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
727 struct sk_buff *frag;
728 struct frag_hdr *fh;
729 unsigned int len;
730
731 len = state->left;
732 /* IF: it doesn't fit, use 'mtu' - the data space left */
733 if (len > state->mtu)
734 len = state->mtu;
735 /* IF: we are not sending up to and including the packet end
736 then align the next start on an eight byte boundary */
737 if (len < state->left)
738 len &= ~7;
739
740 /* Allocate buffer */
741 frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
742 state->hroom + state->troom, GFP_ATOMIC);
743 if (!frag)
744 return ERR_PTR(-ENOMEM);
745
746 /*
747 * Set up data on packet
748 */
749
750 ip6_copy_metadata(frag, skb);
751 skb_reserve(frag, state->hroom);
752 skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
753 skb_reset_network_header(frag);
754 fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
755 frag->transport_header = (frag->network_header + state->hlen +
756 sizeof(struct frag_hdr));
757
758 /*
759 * Charge the memory for the fragment to any owner
760 * it might possess
761 */
762 if (skb->sk)
763 skb_set_owner_w(frag, skb->sk);
764
765 /*
766 * Copy the packet header into the new buffer.
767 */
768 skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
769
770 fragnexthdr_offset = skb_network_header(frag);
771 fragnexthdr_offset += prevhdr - skb_network_header(skb);
772 *fragnexthdr_offset = NEXTHDR_FRAGMENT;
773
774 /*
775 * Build fragment header.
776 */
777 fh->nexthdr = state->nexthdr;
778 fh->reserved = 0;
779 fh->identification = state->frag_id;
780
781 /*
782 * Copy a block of the IP datagram.
783 */
784 BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
785 len));
786 state->left -= len;
787
788 fh->frag_off = htons(state->offset);
789 if (state->left > 0)
790 fh->frag_off |= htons(IP6_MF);
791 ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
792
793 state->ptr += len;
794 state->offset += len;
795
796 return frag;
797}
798EXPORT_SYMBOL(ip6_frag_next);
799
800int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
801 int (*output)(struct net *, struct sock *, struct sk_buff *))
802{
803 struct sk_buff *frag;
804 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
805 struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
806 inet6_sk(skb->sk) : NULL;
807 struct ip6_frag_state state;
808 unsigned int mtu, hlen, nexthdr_offset;
809 ktime_t tstamp = skb->tstamp;
810 int hroom, err = 0;
811 __be32 frag_id;
812 u8 *prevhdr, nexthdr = 0;
813
814 err = ip6_find_1stfragopt(skb, &prevhdr);
815 if (err < 0)
816 goto fail;
817 hlen = err;
818 nexthdr = *prevhdr;
819 nexthdr_offset = prevhdr - skb_network_header(skb);
820
821 mtu = ip6_skb_dst_mtu(skb);
822
823 /* We must not fragment if the socket is set to force MTU discovery
824 * or if the skb it not generated by a local socket.
825 */
826 if (unlikely(!skb->ignore_df && skb->len > mtu))
827 goto fail_toobig;
828
829 if (IP6CB(skb)->frag_max_size) {
830 if (IP6CB(skb)->frag_max_size > mtu)
831 goto fail_toobig;
832
833 /* don't send fragments larger than what we received */
834 mtu = IP6CB(skb)->frag_max_size;
835 if (mtu < IPV6_MIN_MTU)
836 mtu = IPV6_MIN_MTU;
837 }
838
839 if (np && np->frag_size < mtu) {
840 if (np->frag_size)
841 mtu = np->frag_size;
842 }
843 if (mtu < hlen + sizeof(struct frag_hdr) + 8)
844 goto fail_toobig;
845 mtu -= hlen + sizeof(struct frag_hdr);
846
847 frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
848 &ipv6_hdr(skb)->saddr);
849
850 if (skb->ip_summed == CHECKSUM_PARTIAL &&
851 (err = skb_checksum_help(skb)))
852 goto fail;
853
854 prevhdr = skb_network_header(skb) + nexthdr_offset;
855 hroom = LL_RESERVED_SPACE(rt->dst.dev);
856 if (skb_has_frag_list(skb)) {
857 unsigned int first_len = skb_pagelen(skb);
858 struct ip6_fraglist_iter iter;
859 struct sk_buff *frag2;
860
861 if (first_len - hlen > mtu ||
862 ((first_len - hlen) & 7) ||
863 skb_cloned(skb) ||
864 skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
865 goto slow_path;
866
867 skb_walk_frags(skb, frag) {
868 /* Correct geometry. */
869 if (frag->len > mtu ||
870 ((frag->len & 7) && frag->next) ||
871 skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
872 goto slow_path_clean;
873
874 /* Partially cloned skb? */
875 if (skb_shared(frag))
876 goto slow_path_clean;
877
878 BUG_ON(frag->sk);
879 if (skb->sk) {
880 frag->sk = skb->sk;
881 frag->destructor = sock_wfree;
882 }
883 skb->truesize -= frag->truesize;
884 }
885
886 err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
887 &iter);
888 if (err < 0)
889 goto fail;
890
891 for (;;) {
892 /* Prepare header of the next frame,
893 * before previous one went down. */
894 if (iter.frag)
895 ip6_fraglist_prepare(skb, &iter);
896
897 skb->tstamp = tstamp;
898 err = output(net, sk, skb);
899 if (!err)
900 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
901 IPSTATS_MIB_FRAGCREATES);
902
903 if (err || !iter.frag)
904 break;
905
906 skb = ip6_fraglist_next(&iter);
907 }
908
909 kfree(iter.tmp_hdr);
910
911 if (err == 0) {
912 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
913 IPSTATS_MIB_FRAGOKS);
914 return 0;
915 }
916
917 kfree_skb_list(iter.frag);
918
919 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
920 IPSTATS_MIB_FRAGFAILS);
921 return err;
922
923slow_path_clean:
924 skb_walk_frags(skb, frag2) {
925 if (frag2 == frag)
926 break;
927 frag2->sk = NULL;
928 frag2->destructor = NULL;
929 skb->truesize += frag2->truesize;
930 }
931 }
932
933slow_path:
934 /*
935 * Fragment the datagram.
936 */
937
938 ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
939 LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
940 &state);
941
942 /*
943 * Keep copying data until we run out.
944 */
945
946 while (state.left > 0) {
947 frag = ip6_frag_next(skb, &state);
948 if (IS_ERR(frag)) {
949 err = PTR_ERR(frag);
950 goto fail;
951 }
952
953 /*
954 * Put this fragment into the sending queue.
955 */
956 frag->tstamp = tstamp;
957 err = output(net, sk, frag);
958 if (err)
959 goto fail;
960
961 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
962 IPSTATS_MIB_FRAGCREATES);
963 }
964 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
965 IPSTATS_MIB_FRAGOKS);
966 consume_skb(skb);
967 return err;
968
969fail_toobig:
970 if (skb->sk && dst_allfrag(skb_dst(skb)))
971 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
972
973 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
974 err = -EMSGSIZE;
975
976fail:
977 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
978 IPSTATS_MIB_FRAGFAILS);
979 kfree_skb(skb);
980 return err;
981}
982
983static inline int ip6_rt_check(const struct rt6key *rt_key,
984 const struct in6_addr *fl_addr,
985 const struct in6_addr *addr_cache)
986{
987 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
988 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
989}
990
991static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
992 struct dst_entry *dst,
993 const struct flowi6 *fl6)
994{
995 struct ipv6_pinfo *np = inet6_sk(sk);
996 struct rt6_info *rt;
997
998 if (!dst)
999 goto out;
1000
1001 if (dst->ops->family != AF_INET6) {
1002 dst_release(dst);
1003 return NULL;
1004 }
1005
1006 rt = (struct rt6_info *)dst;
1007 /* Yes, checking route validity in not connected
1008 * case is not very simple. Take into account,
1009 * that we do not support routing by source, TOS,
1010 * and MSG_DONTROUTE --ANK (980726)
1011 *
1012 * 1. ip6_rt_check(): If route was host route,
1013 * check that cached destination is current.
1014 * If it is network route, we still may
1015 * check its validity using saved pointer
1016 * to the last used address: daddr_cache.
1017 * We do not want to save whole address now,
1018 * (because main consumer of this service
1019 * is tcp, which has not this problem),
1020 * so that the last trick works only on connected
1021 * sockets.
1022 * 2. oif also should be the same.
1023 */
1024 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
1025#ifdef CONFIG_IPV6_SUBTREES
1026 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
1027#endif
1028 (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
1029 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
1030 dst_release(dst);
1031 dst = NULL;
1032 }
1033
1034out:
1035 return dst;
1036}
1037
1038static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
1039 struct dst_entry **dst, struct flowi6 *fl6)
1040{
1041#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1042 struct neighbour *n;
1043 struct rt6_info *rt;
1044#endif
1045 int err;
1046 int flags = 0;
1047
1048 /* The correct way to handle this would be to do
1049 * ip6_route_get_saddr, and then ip6_route_output; however,
1050 * the route-specific preferred source forces the
1051 * ip6_route_output call _before_ ip6_route_get_saddr.
1052 *
1053 * In source specific routing (no src=any default route),
1054 * ip6_route_output will fail given src=any saddr, though, so
1055 * that's why we try it again later.
1056 */
1057 if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) {
1058 struct fib6_info *from;
1059 struct rt6_info *rt;
1060 bool had_dst = *dst != NULL;
1061
1062 if (!had_dst)
1063 *dst = ip6_route_output(net, sk, fl6);
1064 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
1065
1066 rcu_read_lock();
1067 from = rt ? rcu_dereference(rt->from) : NULL;
1068 err = ip6_route_get_saddr(net, from, &fl6->daddr,
1069 sk ? inet6_sk(sk)->srcprefs : 0,
1070 &fl6->saddr);
1071 rcu_read_unlock();
1072
1073 if (err)
1074 goto out_err_release;
1075
1076 /* If we had an erroneous initial result, pretend it
1077 * never existed and let the SA-enabled version take
1078 * over.
1079 */
1080 if (!had_dst && (*dst)->error) {
1081 dst_release(*dst);
1082 *dst = NULL;
1083 }
1084
1085 if (fl6->flowi6_oif)
1086 flags |= RT6_LOOKUP_F_IFACE;
1087 }
1088
1089 if (!*dst)
1090 *dst = ip6_route_output_flags(net, sk, fl6, flags);
1091
1092 err = (*dst)->error;
1093 if (err)
1094 goto out_err_release;
1095
1096#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1097 /*
1098 * Here if the dst entry we've looked up
1099 * has a neighbour entry that is in the INCOMPLETE
1100 * state and the src address from the flow is
1101 * marked as OPTIMISTIC, we release the found
1102 * dst entry and replace it instead with the
1103 * dst entry of the nexthop router
1104 */
1105 rt = (struct rt6_info *) *dst;
1106 rcu_read_lock_bh();
1107 n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1108 rt6_nexthop(rt, &fl6->daddr));
1109 err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1110 rcu_read_unlock_bh();
1111
1112 if (err) {
1113 struct inet6_ifaddr *ifp;
1114 struct flowi6 fl_gw6;
1115 int redirect;
1116
1117 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1118 (*dst)->dev, 1);
1119
1120 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1121 if (ifp)
1122 in6_ifa_put(ifp);
1123
1124 if (redirect) {
1125 /*
1126 * We need to get the dst entry for the
1127 * default router instead
1128 */
1129 dst_release(*dst);
1130 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1131 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1132 *dst = ip6_route_output(net, sk, &fl_gw6);
1133 err = (*dst)->error;
1134 if (err)
1135 goto out_err_release;
1136 }
1137 }
1138#endif
1139 if (ipv6_addr_v4mapped(&fl6->saddr) &&
1140 !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1141 err = -EAFNOSUPPORT;
1142 goto out_err_release;
1143 }
1144
1145 return 0;
1146
1147out_err_release:
1148 dst_release(*dst);
1149 *dst = NULL;
1150
1151 if (err == -ENETUNREACH)
1152 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1153 return err;
1154}
1155
1156/**
1157 * ip6_dst_lookup - perform route lookup on flow
1158 * @net: Network namespace to perform lookup in
1159 * @sk: socket which provides route info
1160 * @dst: pointer to dst_entry * for result
1161 * @fl6: flow to lookup
1162 *
1163 * This function performs a route lookup on the given flow.
1164 *
1165 * It returns zero on success, or a standard errno code on error.
1166 */
1167int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1168 struct flowi6 *fl6)
1169{
1170 *dst = NULL;
1171 return ip6_dst_lookup_tail(net, sk, dst, fl6);
1172}
1173EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1174
1175/**
1176 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1177 * @net: Network namespace to perform lookup in
1178 * @sk: socket which provides route info
1179 * @fl6: flow to lookup
1180 * @final_dst: final destination address for ipsec lookup
1181 *
1182 * This function performs a route lookup on the given flow.
1183 *
1184 * It returns a valid dst pointer on success, or a pointer encoded
1185 * error code.
1186 */
1187struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
1188 const struct in6_addr *final_dst)
1189{
1190 struct dst_entry *dst = NULL;
1191 int err;
1192
1193 err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
1194 if (err)
1195 return ERR_PTR(err);
1196 if (final_dst)
1197 fl6->daddr = *final_dst;
1198
1199 return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
1200}
1201EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1202
1203/**
1204 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1205 * @sk: socket which provides the dst cache and route info
1206 * @fl6: flow to lookup
1207 * @final_dst: final destination address for ipsec lookup
1208 * @connected: whether @sk is connected or not
1209 *
1210 * This function performs a route lookup on the given flow with the
1211 * possibility of using the cached route in the socket if it is valid.
1212 * It will take the socket dst lock when operating on the dst cache.
1213 * As a result, this function can only be used in process context.
1214 *
1215 * In addition, for a connected socket, cache the dst in the socket
1216 * if the current cache is not valid.
1217 *
1218 * It returns a valid dst pointer on success, or a pointer encoded
1219 * error code.
1220 */
1221struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1222 const struct in6_addr *final_dst,
1223 bool connected)
1224{
1225 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1226
1227 dst = ip6_sk_dst_check(sk, dst, fl6);
1228 if (dst)
1229 return dst;
1230
1231 dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
1232 if (connected && !IS_ERR(dst))
1233 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1234
1235 return dst;
1236}
1237EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1238
1239/**
1240 * ip6_dst_lookup_tunnel - perform route lookup on tunnel
1241 * @skb: Packet for which lookup is done
1242 * @dev: Tunnel device
1243 * @net: Network namespace of tunnel device
1244 * @sock: Socket which provides route info
1245 * @saddr: Memory to store the src ip address
1246 * @info: Tunnel information
1247 * @protocol: IP protocol
1248 * @use_cache: Flag to enable cache usage
1249 * This function performs a route lookup on a tunnel
1250 *
1251 * It returns a valid dst pointer and stores src address to be used in
1252 * tunnel in param saddr on success, else a pointer encoded error code.
1253 */
1254
1255struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb,
1256 struct net_device *dev,
1257 struct net *net,
1258 struct socket *sock,
1259 struct in6_addr *saddr,
1260 const struct ip_tunnel_info *info,
1261 u8 protocol,
1262 bool use_cache)
1263{
1264 struct dst_entry *dst = NULL;
1265#ifdef CONFIG_DST_CACHE
1266 struct dst_cache *dst_cache;
1267#endif
1268 struct flowi6 fl6;
1269 __u8 prio;
1270
1271#ifdef CONFIG_DST_CACHE
1272 dst_cache = (struct dst_cache *)&info->dst_cache;
1273 if (use_cache) {
1274 dst = dst_cache_get_ip6(dst_cache, saddr);
1275 if (dst)
1276 return dst;
1277 }
1278#endif
1279 memset(&fl6, 0, sizeof(fl6));
1280 fl6.flowi6_mark = skb->mark;
1281 fl6.flowi6_proto = protocol;
1282 fl6.daddr = info->key.u.ipv6.dst;
1283 fl6.saddr = info->key.u.ipv6.src;
1284 prio = info->key.tos;
1285 fl6.flowlabel = ip6_make_flowinfo(RT_TOS(prio),
1286 info->key.label);
1287
1288 dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6,
1289 NULL);
1290 if (IS_ERR(dst)) {
1291 netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr);
1292 return ERR_PTR(-ENETUNREACH);
1293 }
1294 if (dst->dev == dev) { /* is this necessary? */
1295 netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr);
1296 dst_release(dst);
1297 return ERR_PTR(-ELOOP);
1298 }
1299#ifdef CONFIG_DST_CACHE
1300 if (use_cache)
1301 dst_cache_set_ip6(dst_cache, dst, &fl6.saddr);
1302#endif
1303 *saddr = fl6.saddr;
1304 return dst;
1305}
1306EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel);
1307
1308static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1309 gfp_t gfp)
1310{
1311 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1312}
1313
1314static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1315 gfp_t gfp)
1316{
1317 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1318}
1319
1320static void ip6_append_data_mtu(unsigned int *mtu,
1321 int *maxfraglen,
1322 unsigned int fragheaderlen,
1323 struct sk_buff *skb,
1324 struct rt6_info *rt,
1325 unsigned int orig_mtu)
1326{
1327 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1328 if (!skb) {
1329 /* first fragment, reserve header_len */
1330 *mtu = orig_mtu - rt->dst.header_len;
1331
1332 } else {
1333 /*
1334 * this fragment is not first, the headers
1335 * space is regarded as data space.
1336 */
1337 *mtu = orig_mtu;
1338 }
1339 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1340 + fragheaderlen - sizeof(struct frag_hdr);
1341 }
1342}
1343
1344static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1345 struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1346 struct rt6_info *rt, struct flowi6 *fl6)
1347{
1348 struct ipv6_pinfo *np = inet6_sk(sk);
1349 unsigned int mtu;
1350 struct ipv6_txoptions *opt = ipc6->opt;
1351
1352 /*
1353 * setup for corking
1354 */
1355 if (opt) {
1356 if (WARN_ON(v6_cork->opt))
1357 return -EINVAL;
1358
1359 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1360 if (unlikely(!v6_cork->opt))
1361 return -ENOBUFS;
1362
1363 v6_cork->opt->tot_len = sizeof(*opt);
1364 v6_cork->opt->opt_flen = opt->opt_flen;
1365 v6_cork->opt->opt_nflen = opt->opt_nflen;
1366
1367 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1368 sk->sk_allocation);
1369 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1370 return -ENOBUFS;
1371
1372 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1373 sk->sk_allocation);
1374 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1375 return -ENOBUFS;
1376
1377 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1378 sk->sk_allocation);
1379 if (opt->hopopt && !v6_cork->opt->hopopt)
1380 return -ENOBUFS;
1381
1382 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1383 sk->sk_allocation);
1384 if (opt->srcrt && !v6_cork->opt->srcrt)
1385 return -ENOBUFS;
1386
1387 /* need source address above miyazawa*/
1388 }
1389 dst_hold(&rt->dst);
1390 cork->base.dst = &rt->dst;
1391 cork->fl.u.ip6 = *fl6;
1392 v6_cork->hop_limit = ipc6->hlimit;
1393 v6_cork->tclass = ipc6->tclass;
1394 if (rt->dst.flags & DST_XFRM_TUNNEL)
1395 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1396 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1397 else
1398 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1399 READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1400 if (np->frag_size < mtu) {
1401 if (np->frag_size)
1402 mtu = np->frag_size;
1403 }
1404 if (mtu < IPV6_MIN_MTU)
1405 return -EINVAL;
1406 cork->base.fragsize = mtu;
1407 cork->base.gso_size = ipc6->gso_size;
1408 cork->base.tx_flags = 0;
1409 cork->base.mark = ipc6->sockc.mark;
1410 sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1411
1412 if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1413 cork->base.flags |= IPCORK_ALLFRAG;
1414 cork->base.length = 0;
1415
1416 cork->base.transmit_time = ipc6->sockc.transmit_time;
1417
1418 return 0;
1419}
1420
1421static int __ip6_append_data(struct sock *sk,
1422 struct flowi6 *fl6,
1423 struct sk_buff_head *queue,
1424 struct inet_cork *cork,
1425 struct inet6_cork *v6_cork,
1426 struct page_frag *pfrag,
1427 int getfrag(void *from, char *to, int offset,
1428 int len, int odd, struct sk_buff *skb),
1429 void *from, int length, int transhdrlen,
1430 unsigned int flags, struct ipcm6_cookie *ipc6)
1431{
1432 struct sk_buff *skb, *skb_prev = NULL;
1433 unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1434 struct ubuf_info *uarg = NULL;
1435 int exthdrlen = 0;
1436 int dst_exthdrlen = 0;
1437 int hh_len;
1438 int copy;
1439 int err;
1440 int offset = 0;
1441 u32 tskey = 0;
1442 struct rt6_info *rt = (struct rt6_info *)cork->dst;
1443 struct ipv6_txoptions *opt = v6_cork->opt;
1444 int csummode = CHECKSUM_NONE;
1445 unsigned int maxnonfragsize, headersize;
1446 unsigned int wmem_alloc_delta = 0;
1447 bool paged, extra_uref = false;
1448
1449 skb = skb_peek_tail(queue);
1450 if (!skb) {
1451 exthdrlen = opt ? opt->opt_flen : 0;
1452 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1453 }
1454
1455 paged = !!cork->gso_size;
1456 mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1457 orig_mtu = mtu;
1458
1459 if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
1460 sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1461 tskey = sk->sk_tskey++;
1462
1463 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1464
1465 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1466 (opt ? opt->opt_nflen : 0);
1467 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1468 sizeof(struct frag_hdr);
1469
1470 headersize = sizeof(struct ipv6hdr) +
1471 (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1472 (dst_allfrag(&rt->dst) ?
1473 sizeof(struct frag_hdr) : 0) +
1474 rt->rt6i_nfheader_len;
1475
1476 /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1477 * the first fragment
1478 */
1479 if (headersize + transhdrlen > mtu)
1480 goto emsgsize;
1481
1482 if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1483 (sk->sk_protocol == IPPROTO_UDP ||
1484 sk->sk_protocol == IPPROTO_RAW)) {
1485 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1486 sizeof(struct ipv6hdr));
1487 goto emsgsize;
1488 }
1489
1490 if (ip6_sk_ignore_df(sk))
1491 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1492 else
1493 maxnonfragsize = mtu;
1494
1495 if (cork->length + length > maxnonfragsize - headersize) {
1496emsgsize:
1497 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1498 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1499 return -EMSGSIZE;
1500 }
1501
1502 /* CHECKSUM_PARTIAL only with no extension headers and when
1503 * we are not going to fragment
1504 */
1505 if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1506 headersize == sizeof(struct ipv6hdr) &&
1507 length <= mtu - headersize &&
1508 (!(flags & MSG_MORE) || cork->gso_size) &&
1509 rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1510 csummode = CHECKSUM_PARTIAL;
1511
1512 if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
1513 uarg = sock_zerocopy_realloc(sk, length, skb_zcopy(skb));
1514 if (!uarg)
1515 return -ENOBUFS;
1516 extra_uref = !skb_zcopy(skb); /* only ref on new uarg */
1517 if (rt->dst.dev->features & NETIF_F_SG &&
1518 csummode == CHECKSUM_PARTIAL) {
1519 paged = true;
1520 } else {
1521 uarg->zerocopy = 0;
1522 skb_zcopy_set(skb, uarg, &extra_uref);
1523 }
1524 }
1525
1526 /*
1527 * Let's try using as much space as possible.
1528 * Use MTU if total length of the message fits into the MTU.
1529 * Otherwise, we need to reserve fragment header and
1530 * fragment alignment (= 8-15 octects, in total).
1531 *
1532 * Note that we may need to "move" the data from the tail
1533 * of the buffer to the new fragment when we split
1534 * the message.
1535 *
1536 * FIXME: It may be fragmented into multiple chunks
1537 * at once if non-fragmentable extension headers
1538 * are too large.
1539 * --yoshfuji
1540 */
1541
1542 cork->length += length;
1543 if (!skb)
1544 goto alloc_new_skb;
1545
1546 while (length > 0) {
1547 /* Check if the remaining data fits into current packet. */
1548 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1549 if (copy < length)
1550 copy = maxfraglen - skb->len;
1551
1552 if (copy <= 0) {
1553 char *data;
1554 unsigned int datalen;
1555 unsigned int fraglen;
1556 unsigned int fraggap;
1557 unsigned int alloclen;
1558 unsigned int pagedlen;
1559alloc_new_skb:
1560 /* There's no room in the current skb */
1561 if (skb)
1562 fraggap = skb->len - maxfraglen;
1563 else
1564 fraggap = 0;
1565 /* update mtu and maxfraglen if necessary */
1566 if (!skb || !skb_prev)
1567 ip6_append_data_mtu(&mtu, &maxfraglen,
1568 fragheaderlen, skb, rt,
1569 orig_mtu);
1570
1571 skb_prev = skb;
1572
1573 /*
1574 * If remaining data exceeds the mtu,
1575 * we know we need more fragment(s).
1576 */
1577 datalen = length + fraggap;
1578
1579 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1580 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1581 fraglen = datalen + fragheaderlen;
1582 pagedlen = 0;
1583
1584 if ((flags & MSG_MORE) &&
1585 !(rt->dst.dev->features&NETIF_F_SG))
1586 alloclen = mtu;
1587 else if (!paged)
1588 alloclen = fraglen;
1589 else {
1590 alloclen = min_t(int, fraglen, MAX_HEADER);
1591 pagedlen = fraglen - alloclen;
1592 }
1593
1594 alloclen += dst_exthdrlen;
1595
1596 if (datalen != length + fraggap) {
1597 /*
1598 * this is not the last fragment, the trailer
1599 * space is regarded as data space.
1600 */
1601 datalen += rt->dst.trailer_len;
1602 }
1603
1604 alloclen += rt->dst.trailer_len;
1605 fraglen = datalen + fragheaderlen;
1606
1607 /*
1608 * We just reserve space for fragment header.
1609 * Note: this may be overallocation if the message
1610 * (without MSG_MORE) fits into the MTU.
1611 */
1612 alloclen += sizeof(struct frag_hdr);
1613
1614 copy = datalen - transhdrlen - fraggap - pagedlen;
1615 if (copy < 0) {
1616 err = -EINVAL;
1617 goto error;
1618 }
1619 if (transhdrlen) {
1620 skb = sock_alloc_send_skb(sk,
1621 alloclen + hh_len,
1622 (flags & MSG_DONTWAIT), &err);
1623 } else {
1624 skb = NULL;
1625 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1626 2 * sk->sk_sndbuf)
1627 skb = alloc_skb(alloclen + hh_len,
1628 sk->sk_allocation);
1629 if (unlikely(!skb))
1630 err = -ENOBUFS;
1631 }
1632 if (!skb)
1633 goto error;
1634 /*
1635 * Fill in the control structures
1636 */
1637 skb->protocol = htons(ETH_P_IPV6);
1638 skb->ip_summed = csummode;
1639 skb->csum = 0;
1640 /* reserve for fragmentation and ipsec header */
1641 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1642 dst_exthdrlen);
1643
1644 /*
1645 * Find where to start putting bytes
1646 */
1647 data = skb_put(skb, fraglen - pagedlen);
1648 skb_set_network_header(skb, exthdrlen);
1649 data += fragheaderlen;
1650 skb->transport_header = (skb->network_header +
1651 fragheaderlen);
1652 if (fraggap) {
1653 skb->csum = skb_copy_and_csum_bits(
1654 skb_prev, maxfraglen,
1655 data + transhdrlen, fraggap);
1656 skb_prev->csum = csum_sub(skb_prev->csum,
1657 skb->csum);
1658 data += fraggap;
1659 pskb_trim_unique(skb_prev, maxfraglen);
1660 }
1661 if (copy > 0 &&
1662 getfrag(from, data + transhdrlen, offset,
1663 copy, fraggap, skb) < 0) {
1664 err = -EFAULT;
1665 kfree_skb(skb);
1666 goto error;
1667 }
1668
1669 offset += copy;
1670 length -= copy + transhdrlen;
1671 transhdrlen = 0;
1672 exthdrlen = 0;
1673 dst_exthdrlen = 0;
1674
1675 /* Only the initial fragment is time stamped */
1676 skb_shinfo(skb)->tx_flags = cork->tx_flags;
1677 cork->tx_flags = 0;
1678 skb_shinfo(skb)->tskey = tskey;
1679 tskey = 0;
1680 skb_zcopy_set(skb, uarg, &extra_uref);
1681
1682 if ((flags & MSG_CONFIRM) && !skb_prev)
1683 skb_set_dst_pending_confirm(skb, 1);
1684
1685 /*
1686 * Put the packet on the pending queue
1687 */
1688 if (!skb->destructor) {
1689 skb->destructor = sock_wfree;
1690 skb->sk = sk;
1691 wmem_alloc_delta += skb->truesize;
1692 }
1693 __skb_queue_tail(queue, skb);
1694 continue;
1695 }
1696
1697 if (copy > length)
1698 copy = length;
1699
1700 if (!(rt->dst.dev->features&NETIF_F_SG) &&
1701 skb_tailroom(skb) >= copy) {
1702 unsigned int off;
1703
1704 off = skb->len;
1705 if (getfrag(from, skb_put(skb, copy),
1706 offset, copy, off, skb) < 0) {
1707 __skb_trim(skb, off);
1708 err = -EFAULT;
1709 goto error;
1710 }
1711 } else if (!uarg || !uarg->zerocopy) {
1712 int i = skb_shinfo(skb)->nr_frags;
1713
1714 err = -ENOMEM;
1715 if (!sk_page_frag_refill(sk, pfrag))
1716 goto error;
1717
1718 if (!skb_can_coalesce(skb, i, pfrag->page,
1719 pfrag->offset)) {
1720 err = -EMSGSIZE;
1721 if (i == MAX_SKB_FRAGS)
1722 goto error;
1723
1724 __skb_fill_page_desc(skb, i, pfrag->page,
1725 pfrag->offset, 0);
1726 skb_shinfo(skb)->nr_frags = ++i;
1727 get_page(pfrag->page);
1728 }
1729 copy = min_t(int, copy, pfrag->size - pfrag->offset);
1730 if (getfrag(from,
1731 page_address(pfrag->page) + pfrag->offset,
1732 offset, copy, skb->len, skb) < 0)
1733 goto error_efault;
1734
1735 pfrag->offset += copy;
1736 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1737 skb->len += copy;
1738 skb->data_len += copy;
1739 skb->truesize += copy;
1740 wmem_alloc_delta += copy;
1741 } else {
1742 err = skb_zerocopy_iter_dgram(skb, from, copy);
1743 if (err < 0)
1744 goto error;
1745 }
1746 offset += copy;
1747 length -= copy;
1748 }
1749
1750 if (wmem_alloc_delta)
1751 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1752 return 0;
1753
1754error_efault:
1755 err = -EFAULT;
1756error:
1757 if (uarg)
1758 sock_zerocopy_put_abort(uarg, extra_uref);
1759 cork->length -= length;
1760 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1761 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1762 return err;
1763}
1764
1765int ip6_append_data(struct sock *sk,
1766 int getfrag(void *from, char *to, int offset, int len,
1767 int odd, struct sk_buff *skb),
1768 void *from, int length, int transhdrlen,
1769 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1770 struct rt6_info *rt, unsigned int flags)
1771{
1772 struct inet_sock *inet = inet_sk(sk);
1773 struct ipv6_pinfo *np = inet6_sk(sk);
1774 int exthdrlen;
1775 int err;
1776
1777 if (flags&MSG_PROBE)
1778 return 0;
1779 if (skb_queue_empty(&sk->sk_write_queue)) {
1780 /*
1781 * setup for corking
1782 */
1783 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1784 ipc6, rt, fl6);
1785 if (err)
1786 return err;
1787
1788 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1789 length += exthdrlen;
1790 transhdrlen += exthdrlen;
1791 } else {
1792 fl6 = &inet->cork.fl.u.ip6;
1793 transhdrlen = 0;
1794 }
1795
1796 return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1797 &np->cork, sk_page_frag(sk), getfrag,
1798 from, length, transhdrlen, flags, ipc6);
1799}
1800EXPORT_SYMBOL_GPL(ip6_append_data);
1801
1802static void ip6_cork_release(struct inet_cork_full *cork,
1803 struct inet6_cork *v6_cork)
1804{
1805 if (v6_cork->opt) {
1806 kfree(v6_cork->opt->dst0opt);
1807 kfree(v6_cork->opt->dst1opt);
1808 kfree(v6_cork->opt->hopopt);
1809 kfree(v6_cork->opt->srcrt);
1810 kfree(v6_cork->opt);
1811 v6_cork->opt = NULL;
1812 }
1813
1814 if (cork->base.dst) {
1815 dst_release(cork->base.dst);
1816 cork->base.dst = NULL;
1817 cork->base.flags &= ~IPCORK_ALLFRAG;
1818 }
1819 memset(&cork->fl, 0, sizeof(cork->fl));
1820}
1821
1822struct sk_buff *__ip6_make_skb(struct sock *sk,
1823 struct sk_buff_head *queue,
1824 struct inet_cork_full *cork,
1825 struct inet6_cork *v6_cork)
1826{
1827 struct sk_buff *skb, *tmp_skb;
1828 struct sk_buff **tail_skb;
1829 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1830 struct ipv6_pinfo *np = inet6_sk(sk);
1831 struct net *net = sock_net(sk);
1832 struct ipv6hdr *hdr;
1833 struct ipv6_txoptions *opt = v6_cork->opt;
1834 struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1835 struct flowi6 *fl6 = &cork->fl.u.ip6;
1836 unsigned char proto = fl6->flowi6_proto;
1837
1838 skb = __skb_dequeue(queue);
1839 if (!skb)
1840 goto out;
1841 tail_skb = &(skb_shinfo(skb)->frag_list);
1842
1843 /* move skb->data to ip header from ext header */
1844 if (skb->data < skb_network_header(skb))
1845 __skb_pull(skb, skb_network_offset(skb));
1846 while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1847 __skb_pull(tmp_skb, skb_network_header_len(skb));
1848 *tail_skb = tmp_skb;
1849 tail_skb = &(tmp_skb->next);
1850 skb->len += tmp_skb->len;
1851 skb->data_len += tmp_skb->len;
1852 skb->truesize += tmp_skb->truesize;
1853 tmp_skb->destructor = NULL;
1854 tmp_skb->sk = NULL;
1855 }
1856
1857 /* Allow local fragmentation. */
1858 skb->ignore_df = ip6_sk_ignore_df(sk);
1859
1860 *final_dst = fl6->daddr;
1861 __skb_pull(skb, skb_network_header_len(skb));
1862 if (opt && opt->opt_flen)
1863 ipv6_push_frag_opts(skb, opt, &proto);
1864 if (opt && opt->opt_nflen)
1865 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1866
1867 skb_push(skb, sizeof(struct ipv6hdr));
1868 skb_reset_network_header(skb);
1869 hdr = ipv6_hdr(skb);
1870
1871 ip6_flow_hdr(hdr, v6_cork->tclass,
1872 ip6_make_flowlabel(net, skb, fl6->flowlabel,
1873 ip6_autoflowlabel(net, np), fl6));
1874 hdr->hop_limit = v6_cork->hop_limit;
1875 hdr->nexthdr = proto;
1876 hdr->saddr = fl6->saddr;
1877 hdr->daddr = *final_dst;
1878
1879 skb->priority = sk->sk_priority;
1880 skb->mark = cork->base.mark;
1881
1882 skb->tstamp = cork->base.transmit_time;
1883
1884 skb_dst_set(skb, dst_clone(&rt->dst));
1885 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1886 if (proto == IPPROTO_ICMPV6) {
1887 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1888
1889 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1890 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1891 }
1892
1893 ip6_cork_release(cork, v6_cork);
1894out:
1895 return skb;
1896}
1897
1898int ip6_send_skb(struct sk_buff *skb)
1899{
1900 struct net *net = sock_net(skb->sk);
1901 struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1902 int err;
1903
1904 err = ip6_local_out(net, skb->sk, skb);
1905 if (err) {
1906 if (err > 0)
1907 err = net_xmit_errno(err);
1908 if (err)
1909 IP6_INC_STATS(net, rt->rt6i_idev,
1910 IPSTATS_MIB_OUTDISCARDS);
1911 }
1912
1913 return err;
1914}
1915
1916int ip6_push_pending_frames(struct sock *sk)
1917{
1918 struct sk_buff *skb;
1919
1920 skb = ip6_finish_skb(sk);
1921 if (!skb)
1922 return 0;
1923
1924 return ip6_send_skb(skb);
1925}
1926EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1927
1928static void __ip6_flush_pending_frames(struct sock *sk,
1929 struct sk_buff_head *queue,
1930 struct inet_cork_full *cork,
1931 struct inet6_cork *v6_cork)
1932{
1933 struct sk_buff *skb;
1934
1935 while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1936 if (skb_dst(skb))
1937 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1938 IPSTATS_MIB_OUTDISCARDS);
1939 kfree_skb(skb);
1940 }
1941
1942 ip6_cork_release(cork, v6_cork);
1943}
1944
1945void ip6_flush_pending_frames(struct sock *sk)
1946{
1947 __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1948 &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1949}
1950EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1951
1952struct sk_buff *ip6_make_skb(struct sock *sk,
1953 int getfrag(void *from, char *to, int offset,
1954 int len, int odd, struct sk_buff *skb),
1955 void *from, int length, int transhdrlen,
1956 struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1957 struct rt6_info *rt, unsigned int flags,
1958 struct inet_cork_full *cork)
1959{
1960 struct inet6_cork v6_cork;
1961 struct sk_buff_head queue;
1962 int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1963 int err;
1964
1965 if (flags & MSG_PROBE)
1966 return NULL;
1967
1968 __skb_queue_head_init(&queue);
1969
1970 cork->base.flags = 0;
1971 cork->base.addr = 0;
1972 cork->base.opt = NULL;
1973 cork->base.dst = NULL;
1974 v6_cork.opt = NULL;
1975 err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
1976 if (err) {
1977 ip6_cork_release(cork, &v6_cork);
1978 return ERR_PTR(err);
1979 }
1980 if (ipc6->dontfrag < 0)
1981 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1982
1983 err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
1984 ¤t->task_frag, getfrag, from,
1985 length + exthdrlen, transhdrlen + exthdrlen,
1986 flags, ipc6);
1987 if (err) {
1988 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
1989 return ERR_PTR(err);
1990 }
1991
1992 return __ip6_make_skb(sk, &queue, cork, &v6_cork);
1993}