Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * common code for virtio vsock
4 *
5 * Copyright (C) 2013-2015 Red Hat, Inc.
6 * Author: Asias He <asias@redhat.com>
7 * Stefan Hajnoczi <stefanha@redhat.com>
8 */
9#include <linux/spinlock.h>
10#include <linux/module.h>
11#include <linux/sched/signal.h>
12#include <linux/ctype.h>
13#include <linux/list.h>
14#include <linux/virtio_vsock.h>
15#include <uapi/linux/vsockmon.h>
16
17#include <net/sock.h>
18#include <net/af_vsock.h>
19
20#define CREATE_TRACE_POINTS
21#include <trace/events/vsock_virtio_transport_common.h>
22
23/* How long to wait for graceful shutdown of a connection */
24#define VSOCK_CLOSE_TIMEOUT (8 * HZ)
25
26/* Threshold for detecting small packets to copy */
27#define GOOD_COPY_LEN 128
28
29static const struct virtio_transport *
30virtio_transport_get_ops(struct vsock_sock *vsk)
31{
32 const struct vsock_transport *t = vsock_core_get_transport(vsk);
33
34 if (WARN_ON(!t))
35 return NULL;
36
37 return container_of(t, struct virtio_transport, transport);
38}
39
40static struct virtio_vsock_pkt *
41virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info,
42 size_t len,
43 u32 src_cid,
44 u32 src_port,
45 u32 dst_cid,
46 u32 dst_port)
47{
48 struct virtio_vsock_pkt *pkt;
49 int err;
50
51 pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
52 if (!pkt)
53 return NULL;
54
55 pkt->hdr.type = cpu_to_le16(info->type);
56 pkt->hdr.op = cpu_to_le16(info->op);
57 pkt->hdr.src_cid = cpu_to_le64(src_cid);
58 pkt->hdr.dst_cid = cpu_to_le64(dst_cid);
59 pkt->hdr.src_port = cpu_to_le32(src_port);
60 pkt->hdr.dst_port = cpu_to_le32(dst_port);
61 pkt->hdr.flags = cpu_to_le32(info->flags);
62 pkt->len = len;
63 pkt->hdr.len = cpu_to_le32(len);
64 pkt->reply = info->reply;
65 pkt->vsk = info->vsk;
66
67 if (info->msg && len > 0) {
68 pkt->buf = kmalloc(len, GFP_KERNEL);
69 if (!pkt->buf)
70 goto out_pkt;
71
72 pkt->buf_len = len;
73
74 err = memcpy_from_msg(pkt->buf, info->msg, len);
75 if (err)
76 goto out;
77
78 if (msg_data_left(info->msg) == 0 &&
79 info->type == VIRTIO_VSOCK_TYPE_SEQPACKET) {
80 pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM);
81
82 if (info->msg->msg_flags & MSG_EOR)
83 pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
84 }
85 }
86
87 trace_virtio_transport_alloc_pkt(src_cid, src_port,
88 dst_cid, dst_port,
89 len,
90 info->type,
91 info->op,
92 info->flags);
93
94 return pkt;
95
96out:
97 kfree(pkt->buf);
98out_pkt:
99 kfree(pkt);
100 return NULL;
101}
102
103/* Packet capture */
104static struct sk_buff *virtio_transport_build_skb(void *opaque)
105{
106 struct virtio_vsock_pkt *pkt = opaque;
107 struct af_vsockmon_hdr *hdr;
108 struct sk_buff *skb;
109 size_t payload_len;
110 void *payload_buf;
111
112 /* A packet could be split to fit the RX buffer, so we can retrieve
113 * the payload length from the header and the buffer pointer taking
114 * care of the offset in the original packet.
115 */
116 payload_len = le32_to_cpu(pkt->hdr.len);
117 payload_buf = pkt->buf + pkt->off;
118
119 skb = alloc_skb(sizeof(*hdr) + sizeof(pkt->hdr) + payload_len,
120 GFP_ATOMIC);
121 if (!skb)
122 return NULL;
123
124 hdr = skb_put(skb, sizeof(*hdr));
125
126 /* pkt->hdr is little-endian so no need to byteswap here */
127 hdr->src_cid = pkt->hdr.src_cid;
128 hdr->src_port = pkt->hdr.src_port;
129 hdr->dst_cid = pkt->hdr.dst_cid;
130 hdr->dst_port = pkt->hdr.dst_port;
131
132 hdr->transport = cpu_to_le16(AF_VSOCK_TRANSPORT_VIRTIO);
133 hdr->len = cpu_to_le16(sizeof(pkt->hdr));
134 memset(hdr->reserved, 0, sizeof(hdr->reserved));
135
136 switch (le16_to_cpu(pkt->hdr.op)) {
137 case VIRTIO_VSOCK_OP_REQUEST:
138 case VIRTIO_VSOCK_OP_RESPONSE:
139 hdr->op = cpu_to_le16(AF_VSOCK_OP_CONNECT);
140 break;
141 case VIRTIO_VSOCK_OP_RST:
142 case VIRTIO_VSOCK_OP_SHUTDOWN:
143 hdr->op = cpu_to_le16(AF_VSOCK_OP_DISCONNECT);
144 break;
145 case VIRTIO_VSOCK_OP_RW:
146 hdr->op = cpu_to_le16(AF_VSOCK_OP_PAYLOAD);
147 break;
148 case VIRTIO_VSOCK_OP_CREDIT_UPDATE:
149 case VIRTIO_VSOCK_OP_CREDIT_REQUEST:
150 hdr->op = cpu_to_le16(AF_VSOCK_OP_CONTROL);
151 break;
152 default:
153 hdr->op = cpu_to_le16(AF_VSOCK_OP_UNKNOWN);
154 break;
155 }
156
157 skb_put_data(skb, &pkt->hdr, sizeof(pkt->hdr));
158
159 if (payload_len) {
160 skb_put_data(skb, payload_buf, payload_len);
161 }
162
163 return skb;
164}
165
166void virtio_transport_deliver_tap_pkt(struct virtio_vsock_pkt *pkt)
167{
168 if (pkt->tap_delivered)
169 return;
170
171 vsock_deliver_tap(virtio_transport_build_skb, pkt);
172 pkt->tap_delivered = true;
173}
174EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
175
176static u16 virtio_transport_get_type(struct sock *sk)
177{
178 if (sk->sk_type == SOCK_STREAM)
179 return VIRTIO_VSOCK_TYPE_STREAM;
180 else
181 return VIRTIO_VSOCK_TYPE_SEQPACKET;
182}
183
184/* This function can only be used on connecting/connected sockets,
185 * since a socket assigned to a transport is required.
186 *
187 * Do not use on listener sockets!
188 */
189static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
190 struct virtio_vsock_pkt_info *info)
191{
192 u32 src_cid, src_port, dst_cid, dst_port;
193 const struct virtio_transport *t_ops;
194 struct virtio_vsock_sock *vvs;
195 struct virtio_vsock_pkt *pkt;
196 u32 pkt_len = info->pkt_len;
197
198 info->type = virtio_transport_get_type(sk_vsock(vsk));
199
200 t_ops = virtio_transport_get_ops(vsk);
201 if (unlikely(!t_ops))
202 return -EFAULT;
203
204 src_cid = t_ops->transport.get_local_cid();
205 src_port = vsk->local_addr.svm_port;
206 if (!info->remote_cid) {
207 dst_cid = vsk->remote_addr.svm_cid;
208 dst_port = vsk->remote_addr.svm_port;
209 } else {
210 dst_cid = info->remote_cid;
211 dst_port = info->remote_port;
212 }
213
214 vvs = vsk->trans;
215
216 /* we can send less than pkt_len bytes */
217 if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE)
218 pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE;
219
220 /* virtio_transport_get_credit might return less than pkt_len credit */
221 pkt_len = virtio_transport_get_credit(vvs, pkt_len);
222
223 /* Do not send zero length OP_RW pkt */
224 if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
225 return pkt_len;
226
227 pkt = virtio_transport_alloc_pkt(info, pkt_len,
228 src_cid, src_port,
229 dst_cid, dst_port);
230 if (!pkt) {
231 virtio_transport_put_credit(vvs, pkt_len);
232 return -ENOMEM;
233 }
234
235 virtio_transport_inc_tx_pkt(vvs, pkt);
236
237 return t_ops->send_pkt(pkt);
238}
239
240static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs,
241 struct virtio_vsock_pkt *pkt)
242{
243 if (vvs->rx_bytes + pkt->len > vvs->buf_alloc)
244 return false;
245
246 vvs->rx_bytes += pkt->len;
247 return true;
248}
249
250static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs,
251 struct virtio_vsock_pkt *pkt)
252{
253 vvs->rx_bytes -= pkt->len;
254 vvs->fwd_cnt += pkt->len;
255}
256
257void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct virtio_vsock_pkt *pkt)
258{
259 spin_lock_bh(&vvs->rx_lock);
260 vvs->last_fwd_cnt = vvs->fwd_cnt;
261 pkt->hdr.fwd_cnt = cpu_to_le32(vvs->fwd_cnt);
262 pkt->hdr.buf_alloc = cpu_to_le32(vvs->buf_alloc);
263 spin_unlock_bh(&vvs->rx_lock);
264}
265EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt);
266
267u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 credit)
268{
269 u32 ret;
270
271 spin_lock_bh(&vvs->tx_lock);
272 ret = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt);
273 if (ret > credit)
274 ret = credit;
275 vvs->tx_cnt += ret;
276 spin_unlock_bh(&vvs->tx_lock);
277
278 return ret;
279}
280EXPORT_SYMBOL_GPL(virtio_transport_get_credit);
281
282void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit)
283{
284 spin_lock_bh(&vvs->tx_lock);
285 vvs->tx_cnt -= credit;
286 spin_unlock_bh(&vvs->tx_lock);
287}
288EXPORT_SYMBOL_GPL(virtio_transport_put_credit);
289
290static int virtio_transport_send_credit_update(struct vsock_sock *vsk)
291{
292 struct virtio_vsock_pkt_info info = {
293 .op = VIRTIO_VSOCK_OP_CREDIT_UPDATE,
294 .vsk = vsk,
295 };
296
297 return virtio_transport_send_pkt_info(vsk, &info);
298}
299
300static ssize_t
301virtio_transport_stream_do_peek(struct vsock_sock *vsk,
302 struct msghdr *msg,
303 size_t len)
304{
305 struct virtio_vsock_sock *vvs = vsk->trans;
306 struct virtio_vsock_pkt *pkt;
307 size_t bytes, total = 0, off;
308 int err = -EFAULT;
309
310 spin_lock_bh(&vvs->rx_lock);
311
312 list_for_each_entry(pkt, &vvs->rx_queue, list) {
313 off = pkt->off;
314
315 if (total == len)
316 break;
317
318 while (total < len && off < pkt->len) {
319 bytes = len - total;
320 if (bytes > pkt->len - off)
321 bytes = pkt->len - off;
322
323 /* sk_lock is held by caller so no one else can dequeue.
324 * Unlock rx_lock since memcpy_to_msg() may sleep.
325 */
326 spin_unlock_bh(&vvs->rx_lock);
327
328 err = memcpy_to_msg(msg, pkt->buf + off, bytes);
329 if (err)
330 goto out;
331
332 spin_lock_bh(&vvs->rx_lock);
333
334 total += bytes;
335 off += bytes;
336 }
337 }
338
339 spin_unlock_bh(&vvs->rx_lock);
340
341 return total;
342
343out:
344 if (total)
345 err = total;
346 return err;
347}
348
349static ssize_t
350virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
351 struct msghdr *msg,
352 size_t len)
353{
354 struct virtio_vsock_sock *vvs = vsk->trans;
355 struct virtio_vsock_pkt *pkt;
356 size_t bytes, total = 0;
357 u32 free_space;
358 int err = -EFAULT;
359
360 spin_lock_bh(&vvs->rx_lock);
361 while (total < len && !list_empty(&vvs->rx_queue)) {
362 pkt = list_first_entry(&vvs->rx_queue,
363 struct virtio_vsock_pkt, list);
364
365 bytes = len - total;
366 if (bytes > pkt->len - pkt->off)
367 bytes = pkt->len - pkt->off;
368
369 /* sk_lock is held by caller so no one else can dequeue.
370 * Unlock rx_lock since memcpy_to_msg() may sleep.
371 */
372 spin_unlock_bh(&vvs->rx_lock);
373
374 err = memcpy_to_msg(msg, pkt->buf + pkt->off, bytes);
375 if (err)
376 goto out;
377
378 spin_lock_bh(&vvs->rx_lock);
379
380 total += bytes;
381 pkt->off += bytes;
382 if (pkt->off == pkt->len) {
383 virtio_transport_dec_rx_pkt(vvs, pkt);
384 list_del(&pkt->list);
385 virtio_transport_free_pkt(pkt);
386 }
387 }
388
389 free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt);
390
391 spin_unlock_bh(&vvs->rx_lock);
392
393 /* To reduce the number of credit update messages,
394 * don't update credits as long as lots of space is available.
395 * Note: the limit chosen here is arbitrary. Setting the limit
396 * too high causes extra messages. Too low causes transmitter
397 * stalls. As stalls are in theory more expensive than extra
398 * messages, we set the limit to a high value. TODO: experiment
399 * with different values.
400 */
401 if (free_space < VIRTIO_VSOCK_MAX_PKT_BUF_SIZE)
402 virtio_transport_send_credit_update(vsk);
403
404 return total;
405
406out:
407 if (total)
408 err = total;
409 return err;
410}
411
412static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
413 struct msghdr *msg,
414 int flags)
415{
416 struct virtio_vsock_sock *vvs = vsk->trans;
417 struct virtio_vsock_pkt *pkt;
418 int dequeued_len = 0;
419 size_t user_buf_len = msg_data_left(msg);
420 bool msg_ready = false;
421
422 spin_lock_bh(&vvs->rx_lock);
423
424 if (vvs->msg_count == 0) {
425 spin_unlock_bh(&vvs->rx_lock);
426 return 0;
427 }
428
429 while (!msg_ready) {
430 pkt = list_first_entry(&vvs->rx_queue, struct virtio_vsock_pkt, list);
431
432 if (dequeued_len >= 0) {
433 size_t pkt_len;
434 size_t bytes_to_copy;
435
436 pkt_len = (size_t)le32_to_cpu(pkt->hdr.len);
437 bytes_to_copy = min(user_buf_len, pkt_len);
438
439 if (bytes_to_copy) {
440 int err;
441
442 /* sk_lock is held by caller so no one else can dequeue.
443 * Unlock rx_lock since memcpy_to_msg() may sleep.
444 */
445 spin_unlock_bh(&vvs->rx_lock);
446
447 err = memcpy_to_msg(msg, pkt->buf, bytes_to_copy);
448 if (err) {
449 /* Copy of message failed. Rest of
450 * fragments will be freed without copy.
451 */
452 dequeued_len = err;
453 } else {
454 user_buf_len -= bytes_to_copy;
455 }
456
457 spin_lock_bh(&vvs->rx_lock);
458 }
459
460 if (dequeued_len >= 0)
461 dequeued_len += pkt_len;
462 }
463
464 if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM) {
465 msg_ready = true;
466 vvs->msg_count--;
467
468 if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR)
469 msg->msg_flags |= MSG_EOR;
470 }
471
472 virtio_transport_dec_rx_pkt(vvs, pkt);
473 list_del(&pkt->list);
474 virtio_transport_free_pkt(pkt);
475 }
476
477 spin_unlock_bh(&vvs->rx_lock);
478
479 virtio_transport_send_credit_update(vsk);
480
481 return dequeued_len;
482}
483
484ssize_t
485virtio_transport_stream_dequeue(struct vsock_sock *vsk,
486 struct msghdr *msg,
487 size_t len, int flags)
488{
489 if (flags & MSG_PEEK)
490 return virtio_transport_stream_do_peek(vsk, msg, len);
491 else
492 return virtio_transport_stream_do_dequeue(vsk, msg, len);
493}
494EXPORT_SYMBOL_GPL(virtio_transport_stream_dequeue);
495
496ssize_t
497virtio_transport_seqpacket_dequeue(struct vsock_sock *vsk,
498 struct msghdr *msg,
499 int flags)
500{
501 if (flags & MSG_PEEK)
502 return -EOPNOTSUPP;
503
504 return virtio_transport_seqpacket_do_dequeue(vsk, msg, flags);
505}
506EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_dequeue);
507
508int
509virtio_transport_seqpacket_enqueue(struct vsock_sock *vsk,
510 struct msghdr *msg,
511 size_t len)
512{
513 struct virtio_vsock_sock *vvs = vsk->trans;
514
515 spin_lock_bh(&vvs->tx_lock);
516
517 if (len > vvs->peer_buf_alloc) {
518 spin_unlock_bh(&vvs->tx_lock);
519 return -EMSGSIZE;
520 }
521
522 spin_unlock_bh(&vvs->tx_lock);
523
524 return virtio_transport_stream_enqueue(vsk, msg, len);
525}
526EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_enqueue);
527
528int
529virtio_transport_dgram_dequeue(struct vsock_sock *vsk,
530 struct msghdr *msg,
531 size_t len, int flags)
532{
533 return -EOPNOTSUPP;
534}
535EXPORT_SYMBOL_GPL(virtio_transport_dgram_dequeue);
536
537s64 virtio_transport_stream_has_data(struct vsock_sock *vsk)
538{
539 struct virtio_vsock_sock *vvs = vsk->trans;
540 s64 bytes;
541
542 spin_lock_bh(&vvs->rx_lock);
543 bytes = vvs->rx_bytes;
544 spin_unlock_bh(&vvs->rx_lock);
545
546 return bytes;
547}
548EXPORT_SYMBOL_GPL(virtio_transport_stream_has_data);
549
550u32 virtio_transport_seqpacket_has_data(struct vsock_sock *vsk)
551{
552 struct virtio_vsock_sock *vvs = vsk->trans;
553 u32 msg_count;
554
555 spin_lock_bh(&vvs->rx_lock);
556 msg_count = vvs->msg_count;
557 spin_unlock_bh(&vvs->rx_lock);
558
559 return msg_count;
560}
561EXPORT_SYMBOL_GPL(virtio_transport_seqpacket_has_data);
562
563static s64 virtio_transport_has_space(struct vsock_sock *vsk)
564{
565 struct virtio_vsock_sock *vvs = vsk->trans;
566 s64 bytes;
567
568 bytes = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt);
569 if (bytes < 0)
570 bytes = 0;
571
572 return bytes;
573}
574
575s64 virtio_transport_stream_has_space(struct vsock_sock *vsk)
576{
577 struct virtio_vsock_sock *vvs = vsk->trans;
578 s64 bytes;
579
580 spin_lock_bh(&vvs->tx_lock);
581 bytes = virtio_transport_has_space(vsk);
582 spin_unlock_bh(&vvs->tx_lock);
583
584 return bytes;
585}
586EXPORT_SYMBOL_GPL(virtio_transport_stream_has_space);
587
588int virtio_transport_do_socket_init(struct vsock_sock *vsk,
589 struct vsock_sock *psk)
590{
591 struct virtio_vsock_sock *vvs;
592
593 vvs = kzalloc(sizeof(*vvs), GFP_KERNEL);
594 if (!vvs)
595 return -ENOMEM;
596
597 vsk->trans = vvs;
598 vvs->vsk = vsk;
599 if (psk && psk->trans) {
600 struct virtio_vsock_sock *ptrans = psk->trans;
601
602 vvs->peer_buf_alloc = ptrans->peer_buf_alloc;
603 }
604
605 if (vsk->buffer_size > VIRTIO_VSOCK_MAX_BUF_SIZE)
606 vsk->buffer_size = VIRTIO_VSOCK_MAX_BUF_SIZE;
607
608 vvs->buf_alloc = vsk->buffer_size;
609
610 spin_lock_init(&vvs->rx_lock);
611 spin_lock_init(&vvs->tx_lock);
612 INIT_LIST_HEAD(&vvs->rx_queue);
613
614 return 0;
615}
616EXPORT_SYMBOL_GPL(virtio_transport_do_socket_init);
617
618/* sk_lock held by the caller */
619void virtio_transport_notify_buffer_size(struct vsock_sock *vsk, u64 *val)
620{
621 struct virtio_vsock_sock *vvs = vsk->trans;
622
623 if (*val > VIRTIO_VSOCK_MAX_BUF_SIZE)
624 *val = VIRTIO_VSOCK_MAX_BUF_SIZE;
625
626 vvs->buf_alloc = *val;
627
628 virtio_transport_send_credit_update(vsk);
629}
630EXPORT_SYMBOL_GPL(virtio_transport_notify_buffer_size);
631
632int
633virtio_transport_notify_poll_in(struct vsock_sock *vsk,
634 size_t target,
635 bool *data_ready_now)
636{
637 *data_ready_now = vsock_stream_has_data(vsk) >= target;
638
639 return 0;
640}
641EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_in);
642
643int
644virtio_transport_notify_poll_out(struct vsock_sock *vsk,
645 size_t target,
646 bool *space_avail_now)
647{
648 s64 free_space;
649
650 free_space = vsock_stream_has_space(vsk);
651 if (free_space > 0)
652 *space_avail_now = true;
653 else if (free_space == 0)
654 *space_avail_now = false;
655
656 return 0;
657}
658EXPORT_SYMBOL_GPL(virtio_transport_notify_poll_out);
659
660int virtio_transport_notify_recv_init(struct vsock_sock *vsk,
661 size_t target, struct vsock_transport_recv_notify_data *data)
662{
663 return 0;
664}
665EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_init);
666
667int virtio_transport_notify_recv_pre_block(struct vsock_sock *vsk,
668 size_t target, struct vsock_transport_recv_notify_data *data)
669{
670 return 0;
671}
672EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_block);
673
674int virtio_transport_notify_recv_pre_dequeue(struct vsock_sock *vsk,
675 size_t target, struct vsock_transport_recv_notify_data *data)
676{
677 return 0;
678}
679EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_pre_dequeue);
680
681int virtio_transport_notify_recv_post_dequeue(struct vsock_sock *vsk,
682 size_t target, ssize_t copied, bool data_read,
683 struct vsock_transport_recv_notify_data *data)
684{
685 return 0;
686}
687EXPORT_SYMBOL_GPL(virtio_transport_notify_recv_post_dequeue);
688
689int virtio_transport_notify_send_init(struct vsock_sock *vsk,
690 struct vsock_transport_send_notify_data *data)
691{
692 return 0;
693}
694EXPORT_SYMBOL_GPL(virtio_transport_notify_send_init);
695
696int virtio_transport_notify_send_pre_block(struct vsock_sock *vsk,
697 struct vsock_transport_send_notify_data *data)
698{
699 return 0;
700}
701EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_block);
702
703int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk,
704 struct vsock_transport_send_notify_data *data)
705{
706 return 0;
707}
708EXPORT_SYMBOL_GPL(virtio_transport_notify_send_pre_enqueue);
709
710int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk,
711 ssize_t written, struct vsock_transport_send_notify_data *data)
712{
713 return 0;
714}
715EXPORT_SYMBOL_GPL(virtio_transport_notify_send_post_enqueue);
716
717u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk)
718{
719 return vsk->buffer_size;
720}
721EXPORT_SYMBOL_GPL(virtio_transport_stream_rcvhiwat);
722
723bool virtio_transport_stream_is_active(struct vsock_sock *vsk)
724{
725 return true;
726}
727EXPORT_SYMBOL_GPL(virtio_transport_stream_is_active);
728
729bool virtio_transport_stream_allow(u32 cid, u32 port)
730{
731 return true;
732}
733EXPORT_SYMBOL_GPL(virtio_transport_stream_allow);
734
735int virtio_transport_dgram_bind(struct vsock_sock *vsk,
736 struct sockaddr_vm *addr)
737{
738 return -EOPNOTSUPP;
739}
740EXPORT_SYMBOL_GPL(virtio_transport_dgram_bind);
741
742bool virtio_transport_dgram_allow(u32 cid, u32 port)
743{
744 return false;
745}
746EXPORT_SYMBOL_GPL(virtio_transport_dgram_allow);
747
748int virtio_transport_connect(struct vsock_sock *vsk)
749{
750 struct virtio_vsock_pkt_info info = {
751 .op = VIRTIO_VSOCK_OP_REQUEST,
752 .vsk = vsk,
753 };
754
755 return virtio_transport_send_pkt_info(vsk, &info);
756}
757EXPORT_SYMBOL_GPL(virtio_transport_connect);
758
759int virtio_transport_shutdown(struct vsock_sock *vsk, int mode)
760{
761 struct virtio_vsock_pkt_info info = {
762 .op = VIRTIO_VSOCK_OP_SHUTDOWN,
763 .flags = (mode & RCV_SHUTDOWN ?
764 VIRTIO_VSOCK_SHUTDOWN_RCV : 0) |
765 (mode & SEND_SHUTDOWN ?
766 VIRTIO_VSOCK_SHUTDOWN_SEND : 0),
767 .vsk = vsk,
768 };
769
770 return virtio_transport_send_pkt_info(vsk, &info);
771}
772EXPORT_SYMBOL_GPL(virtio_transport_shutdown);
773
774int
775virtio_transport_dgram_enqueue(struct vsock_sock *vsk,
776 struct sockaddr_vm *remote_addr,
777 struct msghdr *msg,
778 size_t dgram_len)
779{
780 return -EOPNOTSUPP;
781}
782EXPORT_SYMBOL_GPL(virtio_transport_dgram_enqueue);
783
784ssize_t
785virtio_transport_stream_enqueue(struct vsock_sock *vsk,
786 struct msghdr *msg,
787 size_t len)
788{
789 struct virtio_vsock_pkt_info info = {
790 .op = VIRTIO_VSOCK_OP_RW,
791 .msg = msg,
792 .pkt_len = len,
793 .vsk = vsk,
794 };
795
796 return virtio_transport_send_pkt_info(vsk, &info);
797}
798EXPORT_SYMBOL_GPL(virtio_transport_stream_enqueue);
799
800void virtio_transport_destruct(struct vsock_sock *vsk)
801{
802 struct virtio_vsock_sock *vvs = vsk->trans;
803
804 kfree(vvs);
805}
806EXPORT_SYMBOL_GPL(virtio_transport_destruct);
807
808static int virtio_transport_reset(struct vsock_sock *vsk,
809 struct virtio_vsock_pkt *pkt)
810{
811 struct virtio_vsock_pkt_info info = {
812 .op = VIRTIO_VSOCK_OP_RST,
813 .reply = !!pkt,
814 .vsk = vsk,
815 };
816
817 /* Send RST only if the original pkt is not a RST pkt */
818 if (pkt && le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST)
819 return 0;
820
821 return virtio_transport_send_pkt_info(vsk, &info);
822}
823
824/* Normally packets are associated with a socket. There may be no socket if an
825 * attempt was made to connect to a socket that does not exist.
826 */
827static int virtio_transport_reset_no_sock(const struct virtio_transport *t,
828 struct virtio_vsock_pkt *pkt)
829{
830 struct virtio_vsock_pkt *reply;
831 struct virtio_vsock_pkt_info info = {
832 .op = VIRTIO_VSOCK_OP_RST,
833 .type = le16_to_cpu(pkt->hdr.type),
834 .reply = true,
835 };
836
837 /* Send RST only if the original pkt is not a RST pkt */
838 if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST)
839 return 0;
840
841 reply = virtio_transport_alloc_pkt(&info, 0,
842 le64_to_cpu(pkt->hdr.dst_cid),
843 le32_to_cpu(pkt->hdr.dst_port),
844 le64_to_cpu(pkt->hdr.src_cid),
845 le32_to_cpu(pkt->hdr.src_port));
846 if (!reply)
847 return -ENOMEM;
848
849 if (!t) {
850 virtio_transport_free_pkt(reply);
851 return -ENOTCONN;
852 }
853
854 return t->send_pkt(reply);
855}
856
857/* This function should be called with sk_lock held and SOCK_DONE set */
858static void virtio_transport_remove_sock(struct vsock_sock *vsk)
859{
860 struct virtio_vsock_sock *vvs = vsk->trans;
861 struct virtio_vsock_pkt *pkt, *tmp;
862
863 /* We don't need to take rx_lock, as the socket is closing and we are
864 * removing it.
865 */
866 list_for_each_entry_safe(pkt, tmp, &vvs->rx_queue, list) {
867 list_del(&pkt->list);
868 virtio_transport_free_pkt(pkt);
869 }
870
871 vsock_remove_sock(vsk);
872}
873
874static void virtio_transport_wait_close(struct sock *sk, long timeout)
875{
876 if (timeout) {
877 DEFINE_WAIT_FUNC(wait, woken_wake_function);
878
879 add_wait_queue(sk_sleep(sk), &wait);
880
881 do {
882 if (sk_wait_event(sk, &timeout,
883 sock_flag(sk, SOCK_DONE), &wait))
884 break;
885 } while (!signal_pending(current) && timeout);
886
887 remove_wait_queue(sk_sleep(sk), &wait);
888 }
889}
890
891static void virtio_transport_do_close(struct vsock_sock *vsk,
892 bool cancel_timeout)
893{
894 struct sock *sk = sk_vsock(vsk);
895
896 sock_set_flag(sk, SOCK_DONE);
897 vsk->peer_shutdown = SHUTDOWN_MASK;
898 if (vsock_stream_has_data(vsk) <= 0)
899 sk->sk_state = TCP_CLOSING;
900 sk->sk_state_change(sk);
901
902 if (vsk->close_work_scheduled &&
903 (!cancel_timeout || cancel_delayed_work(&vsk->close_work))) {
904 vsk->close_work_scheduled = false;
905
906 virtio_transport_remove_sock(vsk);
907
908 /* Release refcnt obtained when we scheduled the timeout */
909 sock_put(sk);
910 }
911}
912
913static void virtio_transport_close_timeout(struct work_struct *work)
914{
915 struct vsock_sock *vsk =
916 container_of(work, struct vsock_sock, close_work.work);
917 struct sock *sk = sk_vsock(vsk);
918
919 sock_hold(sk);
920 lock_sock(sk);
921
922 if (!sock_flag(sk, SOCK_DONE)) {
923 (void)virtio_transport_reset(vsk, NULL);
924
925 virtio_transport_do_close(vsk, false);
926 }
927
928 vsk->close_work_scheduled = false;
929
930 release_sock(sk);
931 sock_put(sk);
932}
933
934/* User context, vsk->sk is locked */
935static bool virtio_transport_close(struct vsock_sock *vsk)
936{
937 struct sock *sk = &vsk->sk;
938
939 if (!(sk->sk_state == TCP_ESTABLISHED ||
940 sk->sk_state == TCP_CLOSING))
941 return true;
942
943 /* Already received SHUTDOWN from peer, reply with RST */
944 if ((vsk->peer_shutdown & SHUTDOWN_MASK) == SHUTDOWN_MASK) {
945 (void)virtio_transport_reset(vsk, NULL);
946 return true;
947 }
948
949 if ((sk->sk_shutdown & SHUTDOWN_MASK) != SHUTDOWN_MASK)
950 (void)virtio_transport_shutdown(vsk, SHUTDOWN_MASK);
951
952 if (sock_flag(sk, SOCK_LINGER) && !(current->flags & PF_EXITING))
953 virtio_transport_wait_close(sk, sk->sk_lingertime);
954
955 if (sock_flag(sk, SOCK_DONE)) {
956 return true;
957 }
958
959 sock_hold(sk);
960 INIT_DELAYED_WORK(&vsk->close_work,
961 virtio_transport_close_timeout);
962 vsk->close_work_scheduled = true;
963 schedule_delayed_work(&vsk->close_work, VSOCK_CLOSE_TIMEOUT);
964 return false;
965}
966
967void virtio_transport_release(struct vsock_sock *vsk)
968{
969 struct sock *sk = &vsk->sk;
970 bool remove_sock = true;
971
972 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)
973 remove_sock = virtio_transport_close(vsk);
974
975 if (remove_sock) {
976 sock_set_flag(sk, SOCK_DONE);
977 virtio_transport_remove_sock(vsk);
978 }
979}
980EXPORT_SYMBOL_GPL(virtio_transport_release);
981
982static int
983virtio_transport_recv_connecting(struct sock *sk,
984 struct virtio_vsock_pkt *pkt)
985{
986 struct vsock_sock *vsk = vsock_sk(sk);
987 int err;
988 int skerr;
989
990 switch (le16_to_cpu(pkt->hdr.op)) {
991 case VIRTIO_VSOCK_OP_RESPONSE:
992 sk->sk_state = TCP_ESTABLISHED;
993 sk->sk_socket->state = SS_CONNECTED;
994 vsock_insert_connected(vsk);
995 sk->sk_state_change(sk);
996 break;
997 case VIRTIO_VSOCK_OP_INVALID:
998 break;
999 case VIRTIO_VSOCK_OP_RST:
1000 skerr = ECONNRESET;
1001 err = 0;
1002 goto destroy;
1003 default:
1004 skerr = EPROTO;
1005 err = -EINVAL;
1006 goto destroy;
1007 }
1008 return 0;
1009
1010destroy:
1011 virtio_transport_reset(vsk, pkt);
1012 sk->sk_state = TCP_CLOSE;
1013 sk->sk_err = skerr;
1014 sk_error_report(sk);
1015 return err;
1016}
1017
1018static void
1019virtio_transport_recv_enqueue(struct vsock_sock *vsk,
1020 struct virtio_vsock_pkt *pkt)
1021{
1022 struct virtio_vsock_sock *vvs = vsk->trans;
1023 bool can_enqueue, free_pkt = false;
1024
1025 pkt->len = le32_to_cpu(pkt->hdr.len);
1026 pkt->off = 0;
1027
1028 spin_lock_bh(&vvs->rx_lock);
1029
1030 can_enqueue = virtio_transport_inc_rx_pkt(vvs, pkt);
1031 if (!can_enqueue) {
1032 free_pkt = true;
1033 goto out;
1034 }
1035
1036 if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM)
1037 vvs->msg_count++;
1038
1039 /* Try to copy small packets into the buffer of last packet queued,
1040 * to avoid wasting memory queueing the entire buffer with a small
1041 * payload.
1042 */
1043 if (pkt->len <= GOOD_COPY_LEN && !list_empty(&vvs->rx_queue)) {
1044 struct virtio_vsock_pkt *last_pkt;
1045
1046 last_pkt = list_last_entry(&vvs->rx_queue,
1047 struct virtio_vsock_pkt, list);
1048
1049 /* If there is space in the last packet queued, we copy the
1050 * new packet in its buffer. We avoid this if the last packet
1051 * queued has VIRTIO_VSOCK_SEQ_EOM set, because this is
1052 * delimiter of SEQPACKET message, so 'pkt' is the first packet
1053 * of a new message.
1054 */
1055 if ((pkt->len <= last_pkt->buf_len - last_pkt->len) &&
1056 !(le32_to_cpu(last_pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM)) {
1057 memcpy(last_pkt->buf + last_pkt->len, pkt->buf,
1058 pkt->len);
1059 last_pkt->len += pkt->len;
1060 free_pkt = true;
1061 last_pkt->hdr.flags |= pkt->hdr.flags;
1062 goto out;
1063 }
1064 }
1065
1066 list_add_tail(&pkt->list, &vvs->rx_queue);
1067
1068out:
1069 spin_unlock_bh(&vvs->rx_lock);
1070 if (free_pkt)
1071 virtio_transport_free_pkt(pkt);
1072}
1073
1074static int
1075virtio_transport_recv_connected(struct sock *sk,
1076 struct virtio_vsock_pkt *pkt)
1077{
1078 struct vsock_sock *vsk = vsock_sk(sk);
1079 int err = 0;
1080
1081 switch (le16_to_cpu(pkt->hdr.op)) {
1082 case VIRTIO_VSOCK_OP_RW:
1083 virtio_transport_recv_enqueue(vsk, pkt);
1084 vsock_data_ready(sk);
1085 return err;
1086 case VIRTIO_VSOCK_OP_CREDIT_REQUEST:
1087 virtio_transport_send_credit_update(vsk);
1088 break;
1089 case VIRTIO_VSOCK_OP_CREDIT_UPDATE:
1090 sk->sk_write_space(sk);
1091 break;
1092 case VIRTIO_VSOCK_OP_SHUTDOWN:
1093 if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_RCV)
1094 vsk->peer_shutdown |= RCV_SHUTDOWN;
1095 if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_SEND)
1096 vsk->peer_shutdown |= SEND_SHUTDOWN;
1097 if (vsk->peer_shutdown == SHUTDOWN_MASK &&
1098 vsock_stream_has_data(vsk) <= 0 &&
1099 !sock_flag(sk, SOCK_DONE)) {
1100 (void)virtio_transport_reset(vsk, NULL);
1101
1102 virtio_transport_do_close(vsk, true);
1103 }
1104 if (le32_to_cpu(pkt->hdr.flags))
1105 sk->sk_state_change(sk);
1106 break;
1107 case VIRTIO_VSOCK_OP_RST:
1108 virtio_transport_do_close(vsk, true);
1109 break;
1110 default:
1111 err = -EINVAL;
1112 break;
1113 }
1114
1115 virtio_transport_free_pkt(pkt);
1116 return err;
1117}
1118
1119static void
1120virtio_transport_recv_disconnecting(struct sock *sk,
1121 struct virtio_vsock_pkt *pkt)
1122{
1123 struct vsock_sock *vsk = vsock_sk(sk);
1124
1125 if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST)
1126 virtio_transport_do_close(vsk, true);
1127}
1128
1129static int
1130virtio_transport_send_response(struct vsock_sock *vsk,
1131 struct virtio_vsock_pkt *pkt)
1132{
1133 struct virtio_vsock_pkt_info info = {
1134 .op = VIRTIO_VSOCK_OP_RESPONSE,
1135 .remote_cid = le64_to_cpu(pkt->hdr.src_cid),
1136 .remote_port = le32_to_cpu(pkt->hdr.src_port),
1137 .reply = true,
1138 .vsk = vsk,
1139 };
1140
1141 return virtio_transport_send_pkt_info(vsk, &info);
1142}
1143
1144static bool virtio_transport_space_update(struct sock *sk,
1145 struct virtio_vsock_pkt *pkt)
1146{
1147 struct vsock_sock *vsk = vsock_sk(sk);
1148 struct virtio_vsock_sock *vvs = vsk->trans;
1149 bool space_available;
1150
1151 /* Listener sockets are not associated with any transport, so we are
1152 * not able to take the state to see if there is space available in the
1153 * remote peer, but since they are only used to receive requests, we
1154 * can assume that there is always space available in the other peer.
1155 */
1156 if (!vvs)
1157 return true;
1158
1159 /* buf_alloc and fwd_cnt is always included in the hdr */
1160 spin_lock_bh(&vvs->tx_lock);
1161 vvs->peer_buf_alloc = le32_to_cpu(pkt->hdr.buf_alloc);
1162 vvs->peer_fwd_cnt = le32_to_cpu(pkt->hdr.fwd_cnt);
1163 space_available = virtio_transport_has_space(vsk);
1164 spin_unlock_bh(&vvs->tx_lock);
1165 return space_available;
1166}
1167
1168/* Handle server socket */
1169static int
1170virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt,
1171 struct virtio_transport *t)
1172{
1173 struct vsock_sock *vsk = vsock_sk(sk);
1174 struct vsock_sock *vchild;
1175 struct sock *child;
1176 int ret;
1177
1178 if (le16_to_cpu(pkt->hdr.op) != VIRTIO_VSOCK_OP_REQUEST) {
1179 virtio_transport_reset_no_sock(t, pkt);
1180 return -EINVAL;
1181 }
1182
1183 if (sk_acceptq_is_full(sk)) {
1184 virtio_transport_reset_no_sock(t, pkt);
1185 return -ENOMEM;
1186 }
1187
1188 child = vsock_create_connected(sk);
1189 if (!child) {
1190 virtio_transport_reset_no_sock(t, pkt);
1191 return -ENOMEM;
1192 }
1193
1194 sk_acceptq_added(sk);
1195
1196 lock_sock_nested(child, SINGLE_DEPTH_NESTING);
1197
1198 child->sk_state = TCP_ESTABLISHED;
1199
1200 vchild = vsock_sk(child);
1201 vsock_addr_init(&vchild->local_addr, le64_to_cpu(pkt->hdr.dst_cid),
1202 le32_to_cpu(pkt->hdr.dst_port));
1203 vsock_addr_init(&vchild->remote_addr, le64_to_cpu(pkt->hdr.src_cid),
1204 le32_to_cpu(pkt->hdr.src_port));
1205
1206 ret = vsock_assign_transport(vchild, vsk);
1207 /* Transport assigned (looking at remote_addr) must be the same
1208 * where we received the request.
1209 */
1210 if (ret || vchild->transport != &t->transport) {
1211 release_sock(child);
1212 virtio_transport_reset_no_sock(t, pkt);
1213 sock_put(child);
1214 return ret;
1215 }
1216
1217 if (virtio_transport_space_update(child, pkt))
1218 child->sk_write_space(child);
1219
1220 vsock_insert_connected(vchild);
1221 vsock_enqueue_accept(sk, child);
1222 virtio_transport_send_response(vchild, pkt);
1223
1224 release_sock(child);
1225
1226 sk->sk_data_ready(sk);
1227 return 0;
1228}
1229
1230static bool virtio_transport_valid_type(u16 type)
1231{
1232 return (type == VIRTIO_VSOCK_TYPE_STREAM) ||
1233 (type == VIRTIO_VSOCK_TYPE_SEQPACKET);
1234}
1235
1236/* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex
1237 * lock.
1238 */
1239void virtio_transport_recv_pkt(struct virtio_transport *t,
1240 struct virtio_vsock_pkt *pkt)
1241{
1242 struct sockaddr_vm src, dst;
1243 struct vsock_sock *vsk;
1244 struct sock *sk;
1245 bool space_available;
1246
1247 vsock_addr_init(&src, le64_to_cpu(pkt->hdr.src_cid),
1248 le32_to_cpu(pkt->hdr.src_port));
1249 vsock_addr_init(&dst, le64_to_cpu(pkt->hdr.dst_cid),
1250 le32_to_cpu(pkt->hdr.dst_port));
1251
1252 trace_virtio_transport_recv_pkt(src.svm_cid, src.svm_port,
1253 dst.svm_cid, dst.svm_port,
1254 le32_to_cpu(pkt->hdr.len),
1255 le16_to_cpu(pkt->hdr.type),
1256 le16_to_cpu(pkt->hdr.op),
1257 le32_to_cpu(pkt->hdr.flags),
1258 le32_to_cpu(pkt->hdr.buf_alloc),
1259 le32_to_cpu(pkt->hdr.fwd_cnt));
1260
1261 if (!virtio_transport_valid_type(le16_to_cpu(pkt->hdr.type))) {
1262 (void)virtio_transport_reset_no_sock(t, pkt);
1263 goto free_pkt;
1264 }
1265
1266 /* The socket must be in connected or bound table
1267 * otherwise send reset back
1268 */
1269 sk = vsock_find_connected_socket(&src, &dst);
1270 if (!sk) {
1271 sk = vsock_find_bound_socket(&dst);
1272 if (!sk) {
1273 (void)virtio_transport_reset_no_sock(t, pkt);
1274 goto free_pkt;
1275 }
1276 }
1277
1278 if (virtio_transport_get_type(sk) != le16_to_cpu(pkt->hdr.type)) {
1279 (void)virtio_transport_reset_no_sock(t, pkt);
1280 sock_put(sk);
1281 goto free_pkt;
1282 }
1283
1284 vsk = vsock_sk(sk);
1285
1286 lock_sock(sk);
1287
1288 /* Check if sk has been closed before lock_sock */
1289 if (sock_flag(sk, SOCK_DONE)) {
1290 (void)virtio_transport_reset_no_sock(t, pkt);
1291 release_sock(sk);
1292 sock_put(sk);
1293 goto free_pkt;
1294 }
1295
1296 space_available = virtio_transport_space_update(sk, pkt);
1297
1298 /* Update CID in case it has changed after a transport reset event */
1299 if (vsk->local_addr.svm_cid != VMADDR_CID_ANY)
1300 vsk->local_addr.svm_cid = dst.svm_cid;
1301
1302 if (space_available)
1303 sk->sk_write_space(sk);
1304
1305 switch (sk->sk_state) {
1306 case TCP_LISTEN:
1307 virtio_transport_recv_listen(sk, pkt, t);
1308 virtio_transport_free_pkt(pkt);
1309 break;
1310 case TCP_SYN_SENT:
1311 virtio_transport_recv_connecting(sk, pkt);
1312 virtio_transport_free_pkt(pkt);
1313 break;
1314 case TCP_ESTABLISHED:
1315 virtio_transport_recv_connected(sk, pkt);
1316 break;
1317 case TCP_CLOSING:
1318 virtio_transport_recv_disconnecting(sk, pkt);
1319 virtio_transport_free_pkt(pkt);
1320 break;
1321 default:
1322 (void)virtio_transport_reset_no_sock(t, pkt);
1323 virtio_transport_free_pkt(pkt);
1324 break;
1325 }
1326
1327 release_sock(sk);
1328
1329 /* Release refcnt obtained when we fetched this socket out of the
1330 * bound or connected list.
1331 */
1332 sock_put(sk);
1333 return;
1334
1335free_pkt:
1336 virtio_transport_free_pkt(pkt);
1337}
1338EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt);
1339
1340void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt)
1341{
1342 kvfree(pkt->buf);
1343 kfree(pkt);
1344}
1345EXPORT_SYMBOL_GPL(virtio_transport_free_pkt);
1346
1347MODULE_LICENSE("GPL v2");
1348MODULE_AUTHOR("Asias He");
1349MODULE_DESCRIPTION("common code for virtio vsock");