Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net/macvtap: add vhost support

This adds support for passing a macvtap file descriptor into
vhost-net, much like we already do for tun/tap.

Most of the new code is taken from the respective patch
in the tun driver and may get consolidated in the future.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Sridhar Samudrala <sri@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Arnd Bergmann and committed by
David S. Miller
501c774c 02df55d2

+97 -24
+77 -21
drivers/net/macvtap.c
··· 58 58 static struct class *macvtap_class; 59 59 static struct cdev macvtap_cdev; 60 60 61 + static const struct proto_ops macvtap_socket_ops; 62 + 61 63 /* 62 64 * RCU usage: 63 65 * The macvtap_queue and the macvlan_dev are loosely coupled, the ··· 178 176 return -ENOLINK; 179 177 180 178 skb_queue_tail(&q->sk.sk_receive_queue, skb); 181 - wake_up(q->sk.sk_sleep); 179 + wake_up_interruptible_poll(q->sk.sk_sleep, POLLIN | POLLRDNORM | POLLRDBAND); 182 180 return 0; 183 181 } 184 182 ··· 244 242 return; 245 243 246 244 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 247 - wake_up_interruptible_sync(sk->sk_sleep); 245 + wake_up_interruptible_poll(sk->sk_sleep, POLLOUT | POLLWRNORM | POLLWRBAND); 248 246 } 249 247 250 248 static int macvtap_open(struct inode *inode, struct file *file) ··· 272 270 init_waitqueue_head(&q->sock.wait); 273 271 q->sock.type = SOCK_RAW; 274 272 q->sock.state = SS_CONNECTED; 273 + q->sock.file = file; 274 + q->sock.ops = &macvtap_socket_ops; 275 275 sock_init_data(&q->sock, &q->sk); 276 276 q->sk.sk_write_space = macvtap_sock_write_space; 277 277 ··· 391 387 392 388 rcu_read_lock_bh(); 393 389 vlan = rcu_dereference(q->vlan); 394 - macvlan_count_rx(vlan, len, ret == 0, 0); 390 + if (vlan) 391 + macvlan_count_rx(vlan, len, ret == 0, 0); 395 392 rcu_read_unlock_bh(); 396 393 397 394 return ret ? ret : len; 398 395 } 399 396 400 - static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv, 401 - unsigned long count, loff_t pos) 397 + static ssize_t macvtap_do_read(struct macvtap_queue *q, struct kiocb *iocb, 398 + const struct iovec *iv, unsigned long len, 399 + int noblock) 402 400 { 403 - struct file *file = iocb->ki_filp; 404 - struct macvtap_queue *q = file->private_data; 405 - 406 401 DECLARE_WAITQUEUE(wait, current); 407 402 struct sk_buff *skb; 408 - ssize_t len, ret = 0; 409 - 410 - if (!q) { 411 - ret = -ENOLINK; 412 - goto out; 413 - } 414 - 415 - len = iov_length(iv, count); 416 - if (len < 0) { 417 - ret = -EINVAL; 418 - goto out; 419 - } 403 + ssize_t ret = 0; 420 404 421 405 add_wait_queue(q->sk.sk_sleep, &wait); 422 406 while (len) { ··· 413 421 /* Read frames from the queue */ 414 422 skb = skb_dequeue(&q->sk.sk_receive_queue); 415 423 if (!skb) { 416 - if (file->f_flags & O_NONBLOCK) { 424 + if (noblock) { 417 425 ret = -EAGAIN; 418 426 break; 419 427 } ··· 432 440 433 441 current->state = TASK_RUNNING; 434 442 remove_wait_queue(q->sk.sk_sleep, &wait); 443 + return ret; 444 + } 435 445 446 + static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv, 447 + unsigned long count, loff_t pos) 448 + { 449 + struct file *file = iocb->ki_filp; 450 + struct macvtap_queue *q = file->private_data; 451 + ssize_t len, ret = 0; 452 + 453 + len = iov_length(iv, count); 454 + if (len < 0) { 455 + ret = -EINVAL; 456 + goto out; 457 + } 458 + 459 + ret = macvtap_do_read(q, iocb, iv, len, file->f_flags & O_NONBLOCK); 460 + ret = min_t(ssize_t, ret, len); /* XXX copied from tun.c. Why? */ 436 461 out: 437 462 return ret; 438 463 } ··· 546 537 .compat_ioctl = macvtap_compat_ioctl, 547 538 #endif 548 539 }; 540 + 541 + static int macvtap_sendmsg(struct kiocb *iocb, struct socket *sock, 542 + struct msghdr *m, size_t total_len) 543 + { 544 + struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock); 545 + return macvtap_get_user(q, m->msg_iov, total_len, 546 + m->msg_flags & MSG_DONTWAIT); 547 + } 548 + 549 + static int macvtap_recvmsg(struct kiocb *iocb, struct socket *sock, 550 + struct msghdr *m, size_t total_len, 551 + int flags) 552 + { 553 + struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock); 554 + int ret; 555 + if (flags & ~(MSG_DONTWAIT|MSG_TRUNC)) 556 + return -EINVAL; 557 + ret = macvtap_do_read(q, iocb, m->msg_iov, total_len, 558 + flags & MSG_DONTWAIT); 559 + if (ret > total_len) { 560 + m->msg_flags |= MSG_TRUNC; 561 + ret = flags & MSG_TRUNC ? ret : total_len; 562 + } 563 + return ret; 564 + } 565 + 566 + /* Ops structure to mimic raw sockets with tun */ 567 + static const struct proto_ops macvtap_socket_ops = { 568 + .sendmsg = macvtap_sendmsg, 569 + .recvmsg = macvtap_recvmsg, 570 + }; 571 + 572 + /* Get an underlying socket object from tun file. Returns error unless file is 573 + * attached to a device. The returned object works like a packet socket, it 574 + * can be used for sock_sendmsg/sock_recvmsg. The caller is responsible for 575 + * holding a reference to the file for as long as the socket is in use. */ 576 + struct socket *macvtap_get_socket(struct file *file) 577 + { 578 + struct macvtap_queue *q; 579 + if (file->f_op != &macvtap_fops) 580 + return ERR_PTR(-EINVAL); 581 + q = file->private_data; 582 + if (!q) 583 + return ERR_PTR(-EBADFD); 584 + return &q->sock; 585 + } 586 + EXPORT_SYMBOL_GPL(macvtap_get_socket); 549 587 550 588 static int macvtap_init(void) 551 589 {
+1 -1
drivers/vhost/Kconfig
··· 1 1 config VHOST_NET 2 2 tristate "Host kernel accelerator for virtio net (EXPERIMENTAL)" 3 - depends on NET && EVENTFD && (TUN || !TUN) && EXPERIMENTAL 3 + depends on NET && EVENTFD && (TUN || !TUN) && (MACVTAP || !MACVTAP) && EXPERIMENTAL 4 4 ---help--- 5 5 This kernel module can be loaded in host kernel to accelerate 6 6 guest networking with virtio_net. Not to be confused with virtio_net
+6 -2
drivers/vhost/net.c
··· 22 22 #include <linux/if_packet.h> 23 23 #include <linux/if_arp.h> 24 24 #include <linux/if_tun.h> 25 + #include <linux/if_macvlan.h> 25 26 26 27 #include <net/sock.h> 27 28 ··· 453 452 return ERR_PTR(r); 454 453 } 455 454 456 - static struct socket *get_tun_socket(int fd) 455 + static struct socket *get_tap_socket(int fd) 457 456 { 458 457 struct file *file = fget(fd); 459 458 struct socket *sock; 460 459 if (!file) 461 460 return ERR_PTR(-EBADF); 462 461 sock = tun_get_socket(file); 462 + if (!IS_ERR(sock)) 463 + return sock; 464 + sock = macvtap_get_socket(file); 463 465 if (IS_ERR(sock)) 464 466 fput(file); 465 467 return sock; ··· 477 473 sock = get_raw_socket(fd); 478 474 if (!IS_ERR(sock)) 479 475 return sock; 480 - sock = get_tun_socket(fd); 476 + sock = get_tap_socket(fd); 481 477 if (!IS_ERR(sock)) 482 478 return sock; 483 479 return ERR_PTR(-ENOTSOCK);
+13
include/linux/if_macvlan.h
··· 7 7 #include <linux/netlink.h> 8 8 #include <net/netlink.h> 9 9 10 + #if defined(CONFIG_MACVTAP) || defined(CONFIG_MACVTAP_MODULE) 11 + struct socket *macvtap_get_socket(struct file *); 12 + #else 13 + #include <linux/err.h> 14 + #include <linux/errno.h> 15 + struct file; 16 + struct socket; 17 + static inline struct socket *macvtap_get_socket(struct file *f) 18 + { 19 + return ERR_PTR(-EINVAL); 20 + } 21 + #endif /* CONFIG_MACVTAP */ 22 + 10 23 struct macvlan_port; 11 24 struct macvtap_queue; 12 25