Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

tun: add ioctl to modify vnet header size

virtio added mergeable buffers mode where 2 bytes of extra info is put
after vnet header but before actual data (tun does not need this data).
In hindsight, it would have been better to add the new info *before* the
packet: as it is, users need a lot of tricky code to skip the extra 2
bytes in the middle of the iovec, and in fact applications seem to get
it wrong, and only work with specific iovec layout. The fact we might
need to split iovec also means we might in theory overflow iovec max
size.

This patch adds a simpler way for applications to handle this,
and future proofs the interface against further extensions,
by making the size of the virtio net header configurable
from userspace. As a result, tun driver will simply
skip the extra 2 bytes on both input and output.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: David S. Miller <davem@davemloft.net>

+30 -4
+28 -4
drivers/net/tun.c
··· 110 110 struct tap_filter txflt; 111 111 struct socket socket; 112 112 struct socket_wq wq; 113 + 114 + int vnet_hdr_sz; 115 + 113 116 #ifdef TUN_DEBUG 114 117 int debug; 115 118 #endif ··· 566 563 } 567 564 568 565 if (tun->flags & TUN_VNET_HDR) { 569 - if ((len -= sizeof(gso)) > count) 566 + if ((len -= tun->vnet_hdr_sz) > count) 570 567 return -EINVAL; 571 568 572 569 if (memcpy_fromiovecend((void *)&gso, iv, offset, sizeof(gso))) ··· 578 575 579 576 if (gso.hdr_len > len) 580 577 return -EINVAL; 581 - offset += sizeof(gso); 578 + offset += tun->vnet_hdr_sz; 582 579 } 583 580 584 581 if ((tun->flags & TUN_TYPE_MASK) == TUN_TAP_DEV) { ··· 721 718 722 719 if (tun->flags & TUN_VNET_HDR) { 723 720 struct virtio_net_hdr gso = { 0 }; /* no info leak */ 724 - if ((len -= sizeof(gso)) < 0) 721 + if ((len -= tun->vnet_hdr_sz) < 0) 725 722 return -EINVAL; 726 723 727 724 if (skb_is_gso(skb)) { ··· 752 749 if (unlikely(memcpy_toiovecend(iv, (void *)&gso, total, 753 750 sizeof(gso)))) 754 751 return -EFAULT; 755 - total += sizeof(gso); 752 + total += tun->vnet_hdr_sz; 756 753 } 757 754 758 755 len = min_t(int, skb->len, len); ··· 1038 1035 tun->dev = dev; 1039 1036 tun->flags = flags; 1040 1037 tun->txflt.count = 0; 1038 + tun->vnet_hdr_sz = sizeof(struct virtio_net_hdr); 1041 1039 1042 1040 err = -ENOMEM; 1043 1041 sk = sk_alloc(net, AF_UNSPEC, GFP_KERNEL, &tun_proto); ··· 1181 1177 struct sock_fprog fprog; 1182 1178 struct ifreq ifr; 1183 1179 int sndbuf; 1180 + int vnet_hdr_sz; 1184 1181 int ret; 1185 1182 1186 1183 if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89) ··· 1325 1320 } 1326 1321 1327 1322 tun->socket.sk->sk_sndbuf = sndbuf; 1323 + break; 1324 + 1325 + case TUNGETVNETHDRSZ: 1326 + vnet_hdr_sz = tun->vnet_hdr_sz; 1327 + if (copy_to_user(argp, &vnet_hdr_sz, sizeof(vnet_hdr_sz))) 1328 + ret = -EFAULT; 1329 + break; 1330 + 1331 + case TUNSETVNETHDRSZ: 1332 + if (copy_from_user(&vnet_hdr_sz, argp, sizeof(vnet_hdr_sz))) { 1333 + ret = -EFAULT; 1334 + break; 1335 + } 1336 + if (vnet_hdr_sz < (int)sizeof(struct virtio_net_hdr)) { 1337 + ret = -EINVAL; 1338 + break; 1339 + } 1340 + 1341 + tun->vnet_hdr_sz = vnet_hdr_sz; 1328 1342 break; 1329 1343 1330 1344 case TUNATTACHFILTER:
+2
include/linux/if_tun.h
··· 51 51 #define TUNSETSNDBUF _IOW('T', 212, int) 52 52 #define TUNATTACHFILTER _IOW('T', 213, struct sock_fprog) 53 53 #define TUNDETACHFILTER _IOW('T', 214, struct sock_fprog) 54 + #define TUNGETVNETHDRSZ _IOR('T', 215, int) 55 + #define TUNSETVNETHDRSZ _IOW('T', 216, int) 54 56 55 57 /* TUNSETIFF ifr flags */ 56 58 #define IFF_TUN 0x0001