Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

vhost_net: remove tx polling state

After commit 2b8b328b61c799957a456a5a8dab8cc7dea68575 (vhost_net: handle polling
errors when setting backend), we in fact track the polling state through
poll->wqh, so there's no need to duplicate the work with an extra
vhost_net_polling_state. So this patch removes this and make the code simpler.

This patch also removes the all tx starting/stopping code in tx path according
to Michael's suggestion.

Netperf test shows almost the same result in stream test, but gets improvements
on TCP_RR tests (both zerocopy or copy) especially on low load cases.

Tested between multiqueue kvm guest and external host with two direct
connected 82599s.

zerocopy disabled:

sessions|transaction rates|normalize|
before/after/+improvements
1 | 9510.24/11727.29/+23.3% | 693.54/887.68/+28.0% |
25| 192931.50/241729.87/+25.3% | 2376.80/2771.70/+16.6% |
50| 277634.64/291905.76/+5% | 3118.36/3230.11/+3.6% |

zerocopy enabled:

sessions|transaction rates|normalize|
before/after/+improvements
1 | 7318.33/11929.76/+63.0% | 521.86/843.30/+61.6% |
25| 167264.88/242422.15/+44.9% | 2181.60/2788.16/+27.8% |
50| 272181.02/294347.04/+8.1% | 3071.56/3257.85/+6.1% |

Signed-off-by: Jason Wang <jasowang@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Jason Wang and committed by
David S. Miller
70181d51 16e3d964

+9 -68
+6 -68
drivers/vhost/net.c
··· 64 64 VHOST_NET_VQ_MAX = 2, 65 65 }; 66 66 67 - enum vhost_net_poll_state { 68 - VHOST_NET_POLL_DISABLED = 0, 69 - VHOST_NET_POLL_STARTED = 1, 70 - VHOST_NET_POLL_STOPPED = 2, 71 - }; 72 - 73 67 struct vhost_net { 74 68 struct vhost_dev dev; 75 69 struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX]; 76 70 struct vhost_poll poll[VHOST_NET_VQ_MAX]; 77 - /* Tells us whether we are polling a socket for TX. 78 - * We only do this when socket buffer fills up. 79 - * Protected by tx vq lock. */ 80 - enum vhost_net_poll_state tx_poll_state; 81 71 /* Number of TX recently submitted. 82 72 * Protected by tx vq lock. */ 83 73 unsigned tx_packets; ··· 145 155 } 146 156 } 147 157 148 - /* Caller must have TX VQ lock */ 149 - static void tx_poll_stop(struct vhost_net *net) 150 - { 151 - if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED)) 152 - return; 153 - vhost_poll_stop(net->poll + VHOST_NET_VQ_TX); 154 - net->tx_poll_state = VHOST_NET_POLL_STOPPED; 155 - } 156 - 157 - /* Caller must have TX VQ lock */ 158 - static int tx_poll_start(struct vhost_net *net, struct socket *sock) 159 - { 160 - int ret; 161 - 162 - if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED)) 163 - return 0; 164 - ret = vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file); 165 - if (!ret) 166 - net->tx_poll_state = VHOST_NET_POLL_STARTED; 167 - return ret; 168 - } 169 - 170 158 /* In case of DMA done not in order in lower device driver for some reason. 171 159 * upend_idx is used to track end of used idx, done_idx is used to track head 172 160 * of used idx. Once lower device DMA done contiguously, we will signal KVM ··· 210 242 .msg_flags = MSG_DONTWAIT, 211 243 }; 212 244 size_t len, total_len = 0; 213 - int err, wmem; 245 + int err; 214 246 size_t hdr_size; 215 247 struct socket *sock; 216 248 struct vhost_ubuf_ref *uninitialized_var(ubufs); ··· 221 253 if (!sock) 222 254 return; 223 255 224 - wmem = atomic_read(&sock->sk->sk_wmem_alloc); 225 - if (wmem >= sock->sk->sk_sndbuf) { 226 - mutex_lock(&vq->mutex); 227 - tx_poll_start(net, sock); 228 - mutex_unlock(&vq->mutex); 229 - return; 230 - } 231 - 232 256 mutex_lock(&vq->mutex); 233 257 vhost_disable_notify(&net->dev, vq); 234 258 235 - if (wmem < sock->sk->sk_sndbuf / 2) 236 - tx_poll_stop(net); 237 259 hdr_size = vq->vhost_hlen; 238 260 zcopy = vq->ubufs; 239 261 ··· 243 285 if (head == vq->num) { 244 286 int num_pends; 245 287 246 - wmem = atomic_read(&sock->sk->sk_wmem_alloc); 247 - if (wmem >= sock->sk->sk_sndbuf * 3 / 4) { 248 - tx_poll_start(net, sock); 249 - set_bit(SOCK_ASYNC_NOSPACE, &sock->flags); 250 - break; 251 - } 252 288 /* If more outstanding DMAs, queue the work. 253 289 * Handle upend_idx wrap around 254 290 */ 255 291 num_pends = likely(vq->upend_idx >= vq->done_idx) ? 256 292 (vq->upend_idx - vq->done_idx) : 257 293 (vq->upend_idx + UIO_MAXIOV - vq->done_idx); 258 - if (unlikely(num_pends > VHOST_MAX_PEND)) { 259 - tx_poll_start(net, sock); 260 - set_bit(SOCK_ASYNC_NOSPACE, &sock->flags); 294 + if (unlikely(num_pends > VHOST_MAX_PEND)) 261 295 break; 262 - } 263 296 if (unlikely(vhost_enable_notify(&net->dev, vq))) { 264 297 vhost_disable_notify(&net->dev, vq); 265 298 continue; ··· 313 364 UIO_MAXIOV; 314 365 } 315 366 vhost_discard_vq_desc(vq, 1); 316 - if (err == -EAGAIN || err == -ENOBUFS) 317 - tx_poll_start(net, sock); 318 367 break; 319 368 } 320 369 if (err != len) ··· 575 628 576 629 vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT, dev); 577 630 vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN, dev); 578 - n->tx_poll_state = VHOST_NET_POLL_DISABLED; 579 631 580 632 f->private_data = n; 581 633 ··· 584 638 static void vhost_net_disable_vq(struct vhost_net *n, 585 639 struct vhost_virtqueue *vq) 586 640 { 641 + struct vhost_poll *poll = n->poll + (vq - n->vqs); 587 642 if (!vq->private_data) 588 643 return; 589 - if (vq == n->vqs + VHOST_NET_VQ_TX) { 590 - tx_poll_stop(n); 591 - n->tx_poll_state = VHOST_NET_POLL_DISABLED; 592 - } else 593 - vhost_poll_stop(n->poll + VHOST_NET_VQ_RX); 644 + vhost_poll_stop(poll); 594 645 } 595 646 596 647 static int vhost_net_enable_vq(struct vhost_net *n, 597 648 struct vhost_virtqueue *vq) 598 649 { 650 + struct vhost_poll *poll = n->poll + (vq - n->vqs); 599 651 struct socket *sock; 600 - int ret; 601 652 602 653 sock = rcu_dereference_protected(vq->private_data, 603 654 lockdep_is_held(&vq->mutex)); 604 655 if (!sock) 605 656 return 0; 606 - if (vq == n->vqs + VHOST_NET_VQ_TX) { 607 - n->tx_poll_state = VHOST_NET_POLL_STOPPED; 608 - ret = tx_poll_start(n, sock); 609 - } else 610 - ret = vhost_poll_start(n->poll + VHOST_NET_VQ_RX, sock->file); 611 657 612 - return ret; 658 + return vhost_poll_start(poll, sock->file); 613 659 } 614 660 615 661 static struct socket *vhost_net_stop_vq(struct vhost_net *n,
+3
drivers/vhost/vhost.c
··· 89 89 unsigned long mask; 90 90 int ret = 0; 91 91 92 + if (poll->wqh) 93 + return 0; 94 + 92 95 mask = file->f_op->poll(file, &poll->table); 93 96 if (mask) 94 97 vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask);