Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

vhost_net: basic polling support

This patch tries to poll for new added tx buffer or socket receive
queue for a while at the end of tx/rx processing. The maximum time
spent on polling were specified through a new kind of vring ioctl.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

authored by

Jason Wang and committed by
Michael S. Tsirkin
03088137 d4a60603

+94 -5
+73 -5
drivers/vhost/net.c
··· 287 287 rcu_read_unlock_bh(); 288 288 } 289 289 290 + static inline unsigned long busy_clock(void) 291 + { 292 + return local_clock() >> 10; 293 + } 294 + 295 + static bool vhost_can_busy_poll(struct vhost_dev *dev, 296 + unsigned long endtime) 297 + { 298 + return likely(!need_resched()) && 299 + likely(!time_after(busy_clock(), endtime)) && 300 + likely(!signal_pending(current)) && 301 + !vhost_has_work(dev); 302 + } 303 + 304 + static int vhost_net_tx_get_vq_desc(struct vhost_net *net, 305 + struct vhost_virtqueue *vq, 306 + struct iovec iov[], unsigned int iov_size, 307 + unsigned int *out_num, unsigned int *in_num) 308 + { 309 + unsigned long uninitialized_var(endtime); 310 + int r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), 311 + out_num, in_num, NULL, NULL); 312 + 313 + if (r == vq->num && vq->busyloop_timeout) { 314 + preempt_disable(); 315 + endtime = busy_clock() + vq->busyloop_timeout; 316 + while (vhost_can_busy_poll(vq->dev, endtime) && 317 + vhost_vq_avail_empty(vq->dev, vq)) 318 + cpu_relax_lowlatency(); 319 + preempt_enable(); 320 + r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), 321 + out_num, in_num, NULL, NULL); 322 + } 323 + 324 + return r; 325 + } 326 + 290 327 /* Expects to be always run from workqueue - which acts as 291 328 * read-size critical section for our kind of RCU. */ 292 329 static void handle_tx(struct vhost_net *net) ··· 368 331 % UIO_MAXIOV == nvq->done_idx)) 369 332 break; 370 333 371 - head = vhost_get_vq_desc(vq, vq->iov, 372 - ARRAY_SIZE(vq->iov), 373 - &out, &in, 374 - NULL, NULL); 334 + head = vhost_net_tx_get_vq_desc(net, vq, vq->iov, 335 + ARRAY_SIZE(vq->iov), 336 + &out, &in); 375 337 /* On error, stop handling until the next kick. */ 376 338 if (unlikely(head < 0)) 377 339 break; ··· 468 432 } 469 433 470 434 spin_unlock_irqrestore(&sk->sk_receive_queue.lock, flags); 435 + return len; 436 + } 437 + 438 + static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk) 439 + { 440 + struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; 441 + struct vhost_virtqueue *vq = &nvq->vq; 442 + unsigned long uninitialized_var(endtime); 443 + int len = peek_head_len(sk); 444 + 445 + if (!len && vq->busyloop_timeout) { 446 + /* Both tx vq and rx socket were polled here */ 447 + mutex_lock(&vq->mutex); 448 + vhost_disable_notify(&net->dev, vq); 449 + 450 + preempt_disable(); 451 + endtime = busy_clock() + vq->busyloop_timeout; 452 + 453 + while (vhost_can_busy_poll(&net->dev, endtime) && 454 + skb_queue_empty(&sk->sk_receive_queue) && 455 + vhost_vq_avail_empty(&net->dev, vq)) 456 + cpu_relax_lowlatency(); 457 + 458 + preempt_enable(); 459 + 460 + if (vhost_enable_notify(&net->dev, vq)) 461 + vhost_poll_queue(&vq->poll); 462 + mutex_unlock(&vq->mutex); 463 + 464 + len = peek_head_len(sk); 465 + } 466 + 471 467 return len; 472 468 } 473 469 ··· 621 553 vq->log : NULL; 622 554 mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF); 623 555 624 - while ((sock_len = peek_head_len(sock->sk))) { 556 + while ((sock_len = vhost_net_rx_peek_head_len(net, sock->sk))) { 625 557 sock_len += sock_hlen; 626 558 vhost_len = sock_len + vhost_hlen; 627 559 headcount = get_rx_bufs(vq, vq->heads, vhost_len,
+14
drivers/vhost/vhost.c
··· 303 303 vq->memory = NULL; 304 304 vhost_reset_is_le(vq); 305 305 vhost_disable_cross_endian(vq); 306 + vq->busyloop_timeout = 0; 306 307 } 307 308 308 309 static int vhost_worker(void *data) ··· 937 936 break; 938 937 case VHOST_GET_VRING_ENDIAN: 939 938 r = vhost_get_vring_endian(vq, idx, argp); 939 + break; 940 + case VHOST_SET_VRING_BUSYLOOP_TIMEOUT: 941 + if (copy_from_user(&s, argp, sizeof(s))) { 942 + r = -EFAULT; 943 + break; 944 + } 945 + vq->busyloop_timeout = s.num; 946 + break; 947 + case VHOST_GET_VRING_BUSYLOOP_TIMEOUT: 948 + s.index = idx; 949 + s.num = vq->busyloop_timeout; 950 + if (copy_to_user(argp, &s, sizeof(s))) 951 + r = -EFAULT; 940 952 break; 941 953 default: 942 954 r = -ENOIOCTLCMD;
+1
drivers/vhost/vhost.h
··· 115 115 /* Ring endianness requested by userspace for cross-endian support. */ 116 116 bool user_be; 117 117 #endif 118 + u32 busyloop_timeout; 118 119 }; 119 120 120 121 struct vhost_dev {
+6
include/uapi/linux/vhost.h
··· 126 126 #define VHOST_SET_VRING_CALL _IOW(VHOST_VIRTIO, 0x21, struct vhost_vring_file) 127 127 /* Set eventfd to signal an error */ 128 128 #define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file) 129 + /* Set busy loop timeout (in us) */ 130 + #define VHOST_SET_VRING_BUSYLOOP_TIMEOUT _IOW(VHOST_VIRTIO, 0x23, \ 131 + struct vhost_vring_state) 132 + /* Get busy loop timeout (in us) */ 133 + #define VHOST_GET_VRING_BUSYLOOP_TIMEOUT _IOW(VHOST_VIRTIO, 0x24, \ 134 + struct vhost_vring_state) 129 135 130 136 /* VHOST_NET specific defines */ 131 137