Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

vsock: add multi-transports support

This patch adds the support of multiple transports in the
VSOCK core.

With the multi-transports support, we can use vsock with nested VMs
(using also different hypervisors) loading both guest->host and
host->guest transports at the same time.

Major changes:
- vsock core module can be loaded regardless of the transports
- vsock_core_init() and vsock_core_exit() are renamed to
vsock_core_register() and vsock_core_unregister()
- vsock_core_register() has a feature parameter (H2G, G2H, DGRAM)
to identify which directions the transport can handle and if it's
support DGRAM (only vmci)
- each stream socket is assigned to a transport when the remote CID
is set (during the connect() or when we receive a connection request
on a listener socket).
The remote CID is used to decide which transport to use:
- remote CID <= VMADDR_CID_HOST will use guest->host transport;
- remote CID == local_cid (guest->host transport) will use guest->host
transport for loopback (host->guest transports don't support loopback);
- remote CID > VMADDR_CID_HOST will use host->guest transport;
- listener sockets are not bound to any transports since no transport
operations are done on it. In this way we can create a listener
socket, also if the transports are not loaded or with VMADDR_CID_ANY
to listen on all transports.
- DGRAM sockets are handled as before, since only the vmci_transport
provides this feature.

Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Stefano Garzarella and committed by
David S. Miller
c0cfa2d8 03964257

+298 -98
+3 -2
drivers/vhost/vsock.c
··· 831 831 { 832 832 int ret; 833 833 834 - ret = vsock_core_init(&vhost_transport.transport); 834 + ret = vsock_core_register(&vhost_transport.transport, 835 + VSOCK_TRANSPORT_F_H2G); 835 836 if (ret < 0) 836 837 return ret; 837 838 return misc_register(&vhost_vsock_misc); ··· 841 840 static void __exit vhost_vsock_exit(void) 842 841 { 843 842 misc_deregister(&vhost_vsock_misc); 844 - vsock_core_exit(); 843 + vsock_core_unregister(&vhost_transport.transport); 845 844 }; 846 845 847 846 module_init(vhost_vsock_init);
+12 -6
include/net/af_vsock.h
··· 91 91 u64 data2; /* Transport-defined. */ 92 92 }; 93 93 94 + /* Transport features flags */ 95 + /* Transport provides host->guest communication */ 96 + #define VSOCK_TRANSPORT_F_H2G 0x00000001 97 + /* Transport provides guest->host communication */ 98 + #define VSOCK_TRANSPORT_F_G2H 0x00000002 99 + /* Transport provides DGRAM communication */ 100 + #define VSOCK_TRANSPORT_F_DGRAM 0x00000004 101 + 94 102 struct vsock_transport { 95 103 /* Initialize/tear-down socket. */ 96 104 int (*init)(struct vsock_sock *, struct vsock_sock *); ··· 162 154 163 155 /**** CORE ****/ 164 156 165 - int __vsock_core_init(const struct vsock_transport *t, struct module *owner); 166 - static inline int vsock_core_init(const struct vsock_transport *t) 167 - { 168 - return __vsock_core_init(t, THIS_MODULE); 169 - } 170 - void vsock_core_exit(void); 157 + int vsock_core_register(const struct vsock_transport *t, int features); 158 + void vsock_core_unregister(const struct vsock_transport *t); 171 159 172 160 /* The transport may downcast this to access transport-specific functions */ 173 161 const struct vsock_transport *vsock_core_get_transport(struct vsock_sock *vsk); ··· 194 190 struct sockaddr_vm *dst); 195 191 void vsock_remove_sock(struct vsock_sock *vsk); 196 192 void vsock_for_each_connected_socket(void (*fn)(struct sock *sk)); 193 + int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk); 194 + bool vsock_find_cid(unsigned int cid); 197 195 198 196 /**** TAP ****/ 199 197
+185 -60
net/vmw_vsock/af_vsock.c
··· 130 130 #define VSOCK_DEFAULT_BUFFER_MAX_SIZE (1024 * 256) 131 131 #define VSOCK_DEFAULT_BUFFER_MIN_SIZE 128 132 132 133 - static const struct vsock_transport *transport_single; 133 + /* Transport used for host->guest communication */ 134 + static const struct vsock_transport *transport_h2g; 135 + /* Transport used for guest->host communication */ 136 + static const struct vsock_transport *transport_g2h; 137 + /* Transport used for DGRAM communication */ 138 + static const struct vsock_transport *transport_dgram; 134 139 static DEFINE_MUTEX(vsock_register_mutex); 135 140 136 141 /**** UTILS ****/ ··· 187 182 return __vsock_bind(sk, &local_addr); 188 183 } 189 184 190 - static int __init vsock_init_tables(void) 185 + static void vsock_init_tables(void) 191 186 { 192 187 int i; 193 188 ··· 196 191 197 192 for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) 198 193 INIT_LIST_HEAD(&vsock_connected_table[i]); 199 - return 0; 200 194 } 201 195 202 196 static void __vsock_insert_bound(struct list_head *list, ··· 380 376 } 381 377 EXPORT_SYMBOL_GPL(vsock_enqueue_accept); 382 378 379 + /* Assign a transport to a socket and call the .init transport callback. 380 + * 381 + * Note: for stream socket this must be called when vsk->remote_addr is set 382 + * (e.g. during the connect() or when a connection request on a listener 383 + * socket is received). 384 + * The vsk->remote_addr is used to decide which transport to use: 385 + * - remote CID <= VMADDR_CID_HOST will use guest->host transport; 386 + * - remote CID == local_cid (guest->host transport) will use guest->host 387 + * transport for loopback (host->guest transports don't support loopback); 388 + * - remote CID > VMADDR_CID_HOST will use host->guest transport; 389 + */ 390 + int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) 391 + { 392 + const struct vsock_transport *new_transport; 393 + struct sock *sk = sk_vsock(vsk); 394 + unsigned int remote_cid = vsk->remote_addr.svm_cid; 395 + 396 + switch (sk->sk_type) { 397 + case SOCK_DGRAM: 398 + new_transport = transport_dgram; 399 + break; 400 + case SOCK_STREAM: 401 + if (remote_cid <= VMADDR_CID_HOST || 402 + (transport_g2h && 403 + remote_cid == transport_g2h->get_local_cid())) 404 + new_transport = transport_g2h; 405 + else 406 + new_transport = transport_h2g; 407 + break; 408 + default: 409 + return -ESOCKTNOSUPPORT; 410 + } 411 + 412 + if (vsk->transport) { 413 + if (vsk->transport == new_transport) 414 + return 0; 415 + 416 + vsk->transport->release(vsk); 417 + vsk->transport->destruct(vsk); 418 + } 419 + 420 + if (!new_transport) 421 + return -ENODEV; 422 + 423 + vsk->transport = new_transport; 424 + 425 + return vsk->transport->init(vsk, psk); 426 + } 427 + EXPORT_SYMBOL_GPL(vsock_assign_transport); 428 + 429 + bool vsock_find_cid(unsigned int cid) 430 + { 431 + if (transport_g2h && cid == transport_g2h->get_local_cid()) 432 + return true; 433 + 434 + if (transport_h2g && cid == VMADDR_CID_HOST) 435 + return true; 436 + 437 + return false; 438 + } 439 + EXPORT_SYMBOL_GPL(vsock_find_cid); 440 + 383 441 static struct sock *vsock_dequeue_accept(struct sock *listener) 384 442 { 385 443 struct vsock_sock *vlistener; ··· 479 413 static int vsock_send_shutdown(struct sock *sk, int mode) 480 414 { 481 415 struct vsock_sock *vsk = vsock_sk(sk); 416 + 417 + if (!vsk->transport) 418 + return -ENODEV; 482 419 483 420 return vsk->transport->shutdown(vsk, mode); 484 421 } ··· 599 530 static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr) 600 531 { 601 532 struct vsock_sock *vsk = vsock_sk(sk); 602 - u32 cid; 603 533 int retval; 604 534 605 535 /* First ensure this socket isn't already bound. */ ··· 608 540 /* Now bind to the provided address or select appropriate values if 609 541 * none are provided (VMADDR_CID_ANY and VMADDR_PORT_ANY). Note that 610 542 * like AF_INET prevents binding to a non-local IP address (in most 611 - * cases), we only allow binding to the local CID. 543 + * cases), we only allow binding to a local CID. 612 544 */ 613 - cid = vsk->transport->get_local_cid(); 614 - if (addr->svm_cid != cid && addr->svm_cid != VMADDR_CID_ANY) 545 + if (addr->svm_cid != VMADDR_CID_ANY && !vsock_find_cid(addr->svm_cid)) 615 546 return -EADDRNOTAVAIL; 616 547 617 548 switch (sk->sk_socket->type) { ··· 659 592 sk->sk_type = type; 660 593 661 594 vsk = vsock_sk(sk); 662 - vsk->transport = transport_single; 663 595 vsock_addr_init(&vsk->local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); 664 596 vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY); 665 597 ··· 695 629 vsk->buffer_max_size = VSOCK_DEFAULT_BUFFER_MAX_SIZE; 696 630 } 697 631 698 - if (vsk->transport->init(vsk, psk) < 0) { 699 - sk_free(sk); 700 - return NULL; 701 - } 702 - 703 632 return sk; 704 633 } 705 634 ··· 710 649 /* The release call is supposed to use lock_sock_nested() 711 650 * rather than lock_sock(), if a sock lock should be acquired. 712 651 */ 713 - vsk->transport->release(vsk); 652 + if (vsk->transport) 653 + vsk->transport->release(vsk); 654 + else if (sk->sk_type == SOCK_STREAM) 655 + vsock_remove_sock(vsk); 714 656 715 657 /* When "level" is SINGLE_DEPTH_NESTING, use the nested 716 658 * version to avoid the warning "possible recursive locking ··· 741 677 { 742 678 struct vsock_sock *vsk = vsock_sk(sk); 743 679 744 - vsk->transport->destruct(vsk); 680 + if (vsk->transport) 681 + vsk->transport->destruct(vsk); 745 682 746 683 /* When clearing these addresses, there's no need to set the family and 747 684 * possibly register the address family with the kernel. ··· 959 894 mask |= EPOLLIN | EPOLLRDNORM; 960 895 961 896 /* If there is something in the queue then we can read. */ 962 - if (transport->stream_is_active(vsk) && 897 + if (transport && transport->stream_is_active(vsk) && 963 898 !(sk->sk_shutdown & RCV_SHUTDOWN)) { 964 899 bool data_ready_now = false; 965 900 int ret = transport->notify_poll_in( ··· 1209 1144 err = 0; 1210 1145 sk = sock->sk; 1211 1146 vsk = vsock_sk(sk); 1212 - transport = vsk->transport; 1213 1147 1214 1148 lock_sock(sk); 1215 1149 ··· 1236 1172 goto out; 1237 1173 } 1238 1174 1175 + /* Set the remote address that we are connecting to. */ 1176 + memcpy(&vsk->remote_addr, remote_addr, 1177 + sizeof(vsk->remote_addr)); 1178 + 1179 + err = vsock_assign_transport(vsk, NULL); 1180 + if (err) 1181 + goto out; 1182 + 1183 + transport = vsk->transport; 1184 + 1239 1185 /* The hypervisor and well-known contexts do not have socket 1240 1186 * endpoints. 1241 1187 */ 1242 - if (!transport->stream_allow(remote_addr->svm_cid, 1188 + if (!transport || 1189 + !transport->stream_allow(remote_addr->svm_cid, 1243 1190 remote_addr->svm_port)) { 1244 1191 err = -ENETUNREACH; 1245 1192 goto out; 1246 1193 } 1247 - 1248 - /* Set the remote address that we are connecting to. */ 1249 - memcpy(&vsk->remote_addr, remote_addr, 1250 - sizeof(vsk->remote_addr)); 1251 1194 1252 1195 err = vsock_auto_bind(vsk); 1253 1196 if (err) ··· 1655 1584 goto out; 1656 1585 } 1657 1586 1658 - if (sk->sk_state != TCP_ESTABLISHED || 1587 + if (!transport || sk->sk_state != TCP_ESTABLISHED || 1659 1588 !vsock_addr_bound(&vsk->local_addr)) { 1660 1589 err = -ENOTCONN; 1661 1590 goto out; ··· 1781 1710 1782 1711 lock_sock(sk); 1783 1712 1784 - if (sk->sk_state != TCP_ESTABLISHED) { 1713 + if (!transport || sk->sk_state != TCP_ESTABLISHED) { 1785 1714 /* Recvmsg is supposed to return 0 if a peer performs an 1786 1715 * orderly shutdown. Differentiate between that case and when a 1787 1716 * peer has not connected or a local shutdown occured with the ··· 1955 1884 static int vsock_create(struct net *net, struct socket *sock, 1956 1885 int protocol, int kern) 1957 1886 { 1887 + struct vsock_sock *vsk; 1958 1888 struct sock *sk; 1889 + int ret; 1959 1890 1960 1891 if (!sock) 1961 1892 return -EINVAL; ··· 1982 1909 if (!sk) 1983 1910 return -ENOMEM; 1984 1911 1985 - vsock_insert_unbound(vsock_sk(sk)); 1912 + vsk = vsock_sk(sk); 1913 + 1914 + if (sock->type == SOCK_DGRAM) { 1915 + ret = vsock_assign_transport(vsk, NULL); 1916 + if (ret < 0) { 1917 + sock_put(sk); 1918 + return ret; 1919 + } 1920 + } 1921 + 1922 + vsock_insert_unbound(vsk); 1986 1923 1987 1924 return 0; 1988 1925 } ··· 2007 1924 unsigned int cmd, void __user *ptr) 2008 1925 { 2009 1926 u32 __user *p = ptr; 1927 + u32 cid = VMADDR_CID_ANY; 2010 1928 int retval = 0; 2011 1929 2012 1930 switch (cmd) { 2013 1931 case IOCTL_VM_SOCKETS_GET_LOCAL_CID: 2014 - if (put_user(transport_single->get_local_cid(), p) != 0) 1932 + /* To be compatible with the VMCI behavior, we prioritize the 1933 + * guest CID instead of well-know host CID (VMADDR_CID_HOST). 1934 + */ 1935 + if (transport_g2h) 1936 + cid = transport_g2h->get_local_cid(); 1937 + else if (transport_h2g) 1938 + cid = transport_h2g->get_local_cid(); 1939 + 1940 + if (put_user(cid, p) != 0) 2015 1941 retval = -EFAULT; 2016 1942 break; 2017 1943 ··· 2060 1968 .fops = &vsock_device_ops, 2061 1969 }; 2062 1970 2063 - int __vsock_core_init(const struct vsock_transport *t, struct module *owner) 1971 + static int __init vsock_init(void) 2064 1972 { 2065 - int err = mutex_lock_interruptible(&vsock_register_mutex); 1973 + int err = 0; 2066 1974 2067 - if (err) 2068 - return err; 1975 + vsock_init_tables(); 2069 1976 2070 - if (transport_single) { 2071 - err = -EBUSY; 2072 - goto err_busy; 2073 - } 2074 - 2075 - /* Transport must be the owner of the protocol so that it can't 2076 - * unload while there are open sockets. 2077 - */ 2078 - vsock_proto.owner = owner; 2079 - transport_single = t; 2080 - 1977 + vsock_proto.owner = THIS_MODULE; 2081 1978 vsock_device.minor = MISC_DYNAMIC_MINOR; 2082 1979 err = misc_register(&vsock_device); 2083 1980 if (err) { ··· 2087 2006 goto err_unregister_proto; 2088 2007 } 2089 2008 2090 - mutex_unlock(&vsock_register_mutex); 2091 2009 return 0; 2092 2010 2093 2011 err_unregister_proto: ··· 2094 2014 err_deregister_misc: 2095 2015 misc_deregister(&vsock_device); 2096 2016 err_reset_transport: 2097 - transport_single = NULL; 2098 - err_busy: 2099 - mutex_unlock(&vsock_register_mutex); 2100 2017 return err; 2101 2018 } 2102 - EXPORT_SYMBOL_GPL(__vsock_core_init); 2103 2019 2104 - void vsock_core_exit(void) 2020 + static void __exit vsock_exit(void) 2105 2021 { 2106 - mutex_lock(&vsock_register_mutex); 2107 - 2108 2022 misc_deregister(&vsock_device); 2109 2023 sock_unregister(AF_VSOCK); 2110 2024 proto_unregister(&vsock_proto); 2111 - 2112 - /* We do not want the assignment below re-ordered. */ 2113 - mb(); 2114 - transport_single = NULL; 2115 - 2116 - mutex_unlock(&vsock_register_mutex); 2117 2025 } 2118 - EXPORT_SYMBOL_GPL(vsock_core_exit); 2119 2026 2120 2027 const struct vsock_transport *vsock_core_get_transport(struct vsock_sock *vsk) 2121 2028 { ··· 2110 2043 } 2111 2044 EXPORT_SYMBOL_GPL(vsock_core_get_transport); 2112 2045 2113 - static void __exit vsock_exit(void) 2046 + int vsock_core_register(const struct vsock_transport *t, int features) 2114 2047 { 2115 - /* Do nothing. This function makes this module removable. */ 2116 - } 2048 + const struct vsock_transport *t_h2g, *t_g2h, *t_dgram; 2049 + int err = mutex_lock_interruptible(&vsock_register_mutex); 2117 2050 2118 - module_init(vsock_init_tables); 2051 + if (err) 2052 + return err; 2053 + 2054 + t_h2g = transport_h2g; 2055 + t_g2h = transport_g2h; 2056 + t_dgram = transport_dgram; 2057 + 2058 + if (features & VSOCK_TRANSPORT_F_H2G) { 2059 + if (t_h2g) { 2060 + err = -EBUSY; 2061 + goto err_busy; 2062 + } 2063 + t_h2g = t; 2064 + } 2065 + 2066 + if (features & VSOCK_TRANSPORT_F_G2H) { 2067 + if (t_g2h) { 2068 + err = -EBUSY; 2069 + goto err_busy; 2070 + } 2071 + t_g2h = t; 2072 + } 2073 + 2074 + if (features & VSOCK_TRANSPORT_F_DGRAM) { 2075 + if (t_dgram) { 2076 + err = -EBUSY; 2077 + goto err_busy; 2078 + } 2079 + t_dgram = t; 2080 + } 2081 + 2082 + transport_h2g = t_h2g; 2083 + transport_g2h = t_g2h; 2084 + transport_dgram = t_dgram; 2085 + 2086 + err_busy: 2087 + mutex_unlock(&vsock_register_mutex); 2088 + return err; 2089 + } 2090 + EXPORT_SYMBOL_GPL(vsock_core_register); 2091 + 2092 + void vsock_core_unregister(const struct vsock_transport *t) 2093 + { 2094 + mutex_lock(&vsock_register_mutex); 2095 + 2096 + if (transport_h2g == t) 2097 + transport_h2g = NULL; 2098 + 2099 + if (transport_g2h == t) 2100 + transport_g2h = NULL; 2101 + 2102 + if (transport_dgram == t) 2103 + transport_dgram = NULL; 2104 + 2105 + mutex_unlock(&vsock_register_mutex); 2106 + } 2107 + EXPORT_SYMBOL_GPL(vsock_core_unregister); 2108 + 2109 + module_init(vsock_init); 2119 2110 module_exit(vsock_exit); 2120 2111 2121 2112 MODULE_AUTHOR("VMware, Inc.");
+21 -5
net/vmw_vsock/hyperv_transport.c
··· 165 165 GUID_INIT(0x00000000, 0xfacb, 0x11e6, 0xbd, 0x58, 166 166 0x64, 0x00, 0x6a, 0x79, 0x86, 0xd3); 167 167 168 + static bool hvs_check_transport(struct vsock_sock *vsk); 169 + 168 170 static bool is_valid_srv_id(const guid_t *id) 169 171 { 170 172 return !memcmp(&id->b[4], &srv_id_template.b[4], sizeof(guid_t) - 4); ··· 369 367 370 368 new->sk_state = TCP_SYN_SENT; 371 369 vnew = vsock_sk(new); 370 + 371 + hvs_addr_init(&vnew->local_addr, if_type); 372 + hvs_remote_addr_init(&vnew->remote_addr, &vnew->local_addr); 373 + 374 + ret = vsock_assign_transport(vnew, vsock_sk(sk)); 375 + /* Transport assigned (looking at remote_addr) must be the 376 + * same where we received the request. 377 + */ 378 + if (ret || !hvs_check_transport(vnew)) { 379 + sock_put(new); 380 + goto out; 381 + } 372 382 hvs_new = vnew->trans; 373 383 hvs_new->chan = chan; 374 384 } else { ··· 443 429 if (conn_from_host) { 444 430 new->sk_state = TCP_ESTABLISHED; 445 431 sk_acceptq_added(sk); 446 - 447 - hvs_addr_init(&vnew->local_addr, if_type); 448 - hvs_remote_addr_init(&vnew->remote_addr, &vnew->local_addr); 449 432 450 433 hvs_new->vm_srv_id = *if_type; 451 434 hvs_new->host_srv_id = *if_instance; ··· 891 880 892 881 }; 893 882 883 + static bool hvs_check_transport(struct vsock_sock *vsk) 884 + { 885 + return vsk->transport == &hvs_transport; 886 + } 887 + 894 888 static int hvs_probe(struct hv_device *hdev, 895 889 const struct hv_vmbus_device_id *dev_id) 896 890 { ··· 944 928 if (ret != 0) 945 929 return ret; 946 930 947 - ret = vsock_core_init(&hvs_transport); 931 + ret = vsock_core_register(&hvs_transport, VSOCK_TRANSPORT_F_G2H); 948 932 if (ret) { 949 933 vmbus_driver_unregister(&hvs_drv); 950 934 return ret; ··· 955 939 956 940 static void __exit hvs_exit(void) 957 941 { 958 - vsock_core_exit(); 942 + vsock_core_unregister(&hvs_transport); 959 943 vmbus_driver_unregister(&hvs_drv); 960 944 } 961 945
+4 -3
net/vmw_vsock/virtio_transport.c
··· 770 770 if (!virtio_vsock_workqueue) 771 771 return -ENOMEM; 772 772 773 - ret = vsock_core_init(&virtio_transport.transport); 773 + ret = vsock_core_register(&virtio_transport.transport, 774 + VSOCK_TRANSPORT_F_G2H); 774 775 if (ret) 775 776 goto out_wq; 776 777 ··· 782 781 return 0; 783 782 784 783 out_vci: 785 - vsock_core_exit(); 784 + vsock_core_unregister(&virtio_transport.transport); 786 785 out_wq: 787 786 destroy_workqueue(virtio_vsock_workqueue); 788 787 return ret; ··· 791 790 static void __exit virtio_vsock_exit(void) 792 791 { 793 792 unregister_virtio_driver(&virtio_vsock_driver); 794 - vsock_core_exit(); 793 + vsock_core_unregister(&virtio_transport.transport); 795 794 destroy_workqueue(virtio_vsock_workqueue); 796 795 } 797 796
+44 -19
net/vmw_vsock/virtio_transport_common.c
··· 453 453 454 454 vsk->trans = vvs; 455 455 vvs->vsk = vsk; 456 - if (psk) { 456 + if (psk && psk->trans) { 457 457 struct virtio_vsock_sock *ptrans = psk->trans; 458 458 459 459 vvs->peer_buf_alloc = ptrans->peer_buf_alloc; ··· 986 986 return virtio_transport_send_pkt_info(vsk, &info); 987 987 } 988 988 989 + static bool virtio_transport_space_update(struct sock *sk, 990 + struct virtio_vsock_pkt *pkt) 991 + { 992 + struct vsock_sock *vsk = vsock_sk(sk); 993 + struct virtio_vsock_sock *vvs = vsk->trans; 994 + bool space_available; 995 + 996 + /* Listener sockets are not associated with any transport, so we are 997 + * not able to take the state to see if there is space available in the 998 + * remote peer, but since they are only used to receive requests, we 999 + * can assume that there is always space available in the other peer. 1000 + */ 1001 + if (!vvs) 1002 + return true; 1003 + 1004 + /* buf_alloc and fwd_cnt is always included in the hdr */ 1005 + spin_lock_bh(&vvs->tx_lock); 1006 + vvs->peer_buf_alloc = le32_to_cpu(pkt->hdr.buf_alloc); 1007 + vvs->peer_fwd_cnt = le32_to_cpu(pkt->hdr.fwd_cnt); 1008 + space_available = virtio_transport_has_space(vsk); 1009 + spin_unlock_bh(&vvs->tx_lock); 1010 + return space_available; 1011 + } 1012 + 989 1013 /* Handle server socket */ 990 1014 static int 991 - virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt) 1015 + virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt, 1016 + struct virtio_transport *t) 992 1017 { 993 1018 struct vsock_sock *vsk = vsock_sk(sk); 994 1019 struct vsock_sock *vchild; 995 1020 struct sock *child; 1021 + int ret; 996 1022 997 1023 if (le16_to_cpu(pkt->hdr.op) != VIRTIO_VSOCK_OP_REQUEST) { 998 1024 virtio_transport_reset(vsk, pkt); ··· 1048 1022 vsock_addr_init(&vchild->remote_addr, le64_to_cpu(pkt->hdr.src_cid), 1049 1023 le32_to_cpu(pkt->hdr.src_port)); 1050 1024 1025 + ret = vsock_assign_transport(vchild, vsk); 1026 + /* Transport assigned (looking at remote_addr) must be the same 1027 + * where we received the request. 1028 + */ 1029 + if (ret || vchild->transport != &t->transport) { 1030 + release_sock(child); 1031 + virtio_transport_reset(vsk, pkt); 1032 + sock_put(child); 1033 + return ret; 1034 + } 1035 + 1036 + if (virtio_transport_space_update(child, pkt)) 1037 + child->sk_write_space(child); 1038 + 1051 1039 vsock_insert_connected(vchild); 1052 1040 vsock_enqueue_accept(sk, child); 1053 1041 virtio_transport_send_response(vchild, pkt); ··· 1070 1030 1071 1031 sk->sk_data_ready(sk); 1072 1032 return 0; 1073 - } 1074 - 1075 - static bool virtio_transport_space_update(struct sock *sk, 1076 - struct virtio_vsock_pkt *pkt) 1077 - { 1078 - struct vsock_sock *vsk = vsock_sk(sk); 1079 - struct virtio_vsock_sock *vvs = vsk->trans; 1080 - bool space_available; 1081 - 1082 - /* buf_alloc and fwd_cnt is always included in the hdr */ 1083 - spin_lock_bh(&vvs->tx_lock); 1084 - vvs->peer_buf_alloc = le32_to_cpu(pkt->hdr.buf_alloc); 1085 - vvs->peer_fwd_cnt = le32_to_cpu(pkt->hdr.fwd_cnt); 1086 - space_available = virtio_transport_has_space(vsk); 1087 - spin_unlock_bh(&vvs->tx_lock); 1088 - return space_available; 1089 1033 } 1090 1034 1091 1035 /* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex ··· 1128 1104 1129 1105 switch (sk->sk_state) { 1130 1106 case TCP_LISTEN: 1131 - virtio_transport_recv_listen(sk, pkt); 1107 + virtio_transport_recv_listen(sk, pkt, t); 1132 1108 virtio_transport_free_pkt(pkt); 1133 1109 break; 1134 1110 case TCP_SYN_SENT: ··· 1146 1122 virtio_transport_free_pkt(pkt); 1147 1123 break; 1148 1124 } 1125 + 1149 1126 release_sock(sk); 1150 1127 1151 1128 /* Release refcnt obtained when we fetched this socket out of the
+29 -3
net/vmw_vsock/vmci_transport.c
··· 57 57 static u16 vmci_transport_new_proto_supported_versions(void); 58 58 static bool vmci_transport_proto_to_notify_struct(struct sock *sk, u16 *proto, 59 59 bool old_pkt_proto); 60 + static bool vmci_check_transport(struct vsock_sock *vsk); 60 61 61 62 struct vmci_transport_recv_pkt_info { 62 63 struct work_struct work; ··· 1017 1016 pkt->dst_port); 1018 1017 vsock_addr_init(&vpending->remote_addr, pkt->dg.src.context, 1019 1018 pkt->src_port); 1019 + 1020 + err = vsock_assign_transport(vpending, vsock_sk(sk)); 1021 + /* Transport assigned (looking at remote_addr) must be the same 1022 + * where we received the request. 1023 + */ 1024 + if (err || !vmci_check_transport(vpending)) { 1025 + vmci_transport_send_reset(sk, pkt); 1026 + sock_put(pending); 1027 + return err; 1028 + } 1020 1029 1021 1030 /* If the proposed size fits within our min/max, accept it. Otherwise 1022 1031 * propose our own size. ··· 2019 2008 return vmci_get_context_id(); 2020 2009 } 2021 2010 2022 - static const struct vsock_transport vmci_transport = { 2011 + static struct vsock_transport vmci_transport = { 2023 2012 .init = vmci_transport_socket_init, 2024 2013 .destruct = vmci_transport_destruct, 2025 2014 .release = vmci_transport_release, ··· 2049 2038 .get_local_cid = vmci_transport_get_local_cid, 2050 2039 }; 2051 2040 2041 + static bool vmci_check_transport(struct vsock_sock *vsk) 2042 + { 2043 + return vsk->transport == &vmci_transport; 2044 + } 2045 + 2052 2046 static int __init vmci_transport_init(void) 2053 2047 { 2048 + int features = VSOCK_TRANSPORT_F_DGRAM | VSOCK_TRANSPORT_F_H2G; 2049 + int cid; 2054 2050 int err; 2051 + 2052 + cid = vmci_get_context_id(); 2053 + 2054 + if (cid == VMCI_INVALID_ID) 2055 + return -EINVAL; 2056 + 2057 + if (cid != VMCI_HOST_CONTEXT_ID) 2058 + features |= VSOCK_TRANSPORT_F_G2H; 2055 2059 2056 2060 /* Create the datagram handle that we will use to send and receive all 2057 2061 * VSocket control messages for this context. ··· 2091 2065 goto err_destroy_stream_handle; 2092 2066 } 2093 2067 2094 - err = vsock_core_init(&vmci_transport); 2068 + err = vsock_core_register(&vmci_transport, features); 2095 2069 if (err < 0) 2096 2070 goto err_unsubscribe; 2097 2071 ··· 2122 2096 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID; 2123 2097 } 2124 2098 2125 - vsock_core_exit(); 2099 + vsock_core_unregister(&vmci_transport); 2126 2100 } 2127 2101 module_exit(vmci_transport_exit); 2128 2102