Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

um: Migrate vector drivers to NAPI

Migrate UML vector drivers from a bespoke scheduling mechanism
to NAPI.

Signed-off-by: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Signed-off-by: Richard Weinberger <richard@nod.at>

authored by

Anton Ivanov and committed by
Richard Weinberger
b35507a4 39508aab

+51 -57
+49 -56
arch/um/drivers/vector_kern.c
··· 67 67 static int driver_registered; 68 68 69 69 static void vector_eth_configure(int n, struct arglist *def); 70 + static int vector_mmsg_rx(struct vector_private *vp, int budget); 70 71 71 72 /* Argument accessors to set variables (and/or set default values) 72 73 * mtu, buffer sizing, default headroom, etc ··· 78 77 #define DEFAULT_VECTOR_SIZE 64 79 78 #define TX_SMALL_PACKET 128 80 79 #define MAX_IOV_SIZE (MAX_SKB_FRAGS + 1) 81 - #define MAX_ITERATIONS 64 82 80 83 81 static const struct { 84 82 const char string[ETH_GSTRING_LEN]; ··· 458 458 vp->estats.tx_queue_running_average = 459 459 (vp->estats.tx_queue_running_average + result) >> 1; 460 460 } 461 - netif_trans_update(qi->dev); 462 461 netif_wake_queue(qi->dev); 463 462 /* if TX is busy, break out of the send loop, 464 463 * poll write IRQ will reschedule xmit for us ··· 469 470 } 470 471 } 471 472 spin_unlock(&qi->head_lock); 472 - } else { 473 - tasklet_schedule(&vp->tx_poll); 474 473 } 475 474 return queue_depth; 476 475 } ··· 605 608 606 609 /* 607 610 * We do not use the RX queue as a proper wraparound queue for now 608 - * This is not necessary because the consumption via netif_rx() 611 + * This is not necessary because the consumption via napi_gro_receive() 609 612 * happens in-line. While we can try using the return code of 610 613 * netif_rx() for flow control there are no drivers doing this today. 611 614 * For this RX specific use we ignore the tail/head locks and ··· 893 896 skb->protocol = eth_type_trans(skb, skb->dev); 894 897 vp->dev->stats.rx_bytes += skb->len; 895 898 vp->dev->stats.rx_packets++; 896 - netif_rx(skb); 899 + napi_gro_receive(&vp->napi, skb); 897 900 } else { 898 901 dev_kfree_skb_irq(skb); 899 902 } ··· 952 955 * mmsg vector matched to an skb vector which we prepared earlier. 953 956 */ 954 957 955 - static int vector_mmsg_rx(struct vector_private *vp) 958 + static int vector_mmsg_rx(struct vector_private *vp, int budget) 956 959 { 957 960 int packet_count, i; 958 961 struct vector_queue *qi = vp->rx_queue; ··· 968 971 prep_queue_for_rx(qi); 969 972 970 973 /* Fire the Lazy Gun - get as many packets as we can in one go. */ 974 + 975 + if (budget > qi->max_depth) 976 + budget = qi->max_depth; 971 977 972 978 packet_count = uml_vector_recvmmsg( 973 979 vp->fds->rx_fd, qi->mmsg_vector, qi->max_depth, 0); ··· 1021 1021 */ 1022 1022 vp->dev->stats.rx_bytes += skb->len; 1023 1023 vp->dev->stats.rx_packets++; 1024 - netif_rx(skb); 1024 + napi_gro_receive(&vp->napi, skb); 1025 1025 } else { 1026 1026 /* Overlay header too short to do anything - discard. 1027 1027 * We can actually keep this skb and reuse it, ··· 1042 1042 (vp->estats.rx_queue_running_average + packet_count) >> 1; 1043 1043 } 1044 1044 return packet_count; 1045 - } 1046 - 1047 - static void vector_rx(struct vector_private *vp) 1048 - { 1049 - int err; 1050 - int iter = 0; 1051 - 1052 - if ((vp->options & VECTOR_RX) > 0) 1053 - while (((err = vector_mmsg_rx(vp)) > 0) && (iter < MAX_ITERATIONS)) 1054 - iter++; 1055 - else 1056 - while (((err = vector_legacy_rx(vp)) > 0) && (iter < MAX_ITERATIONS)) 1057 - iter++; 1058 - if ((err != 0) && net_ratelimit()) 1059 - netdev_err(vp->dev, "vector_rx: error(%d)\n", err); 1060 - if (iter == MAX_ITERATIONS) 1061 - netdev_err(vp->dev, "vector_rx: device stuck, remote end may have closed the connection\n"); 1062 1045 } 1063 1046 1064 1047 static int vector_net_start_xmit(struct sk_buff *skb, struct net_device *dev) ··· 1068 1085 netdev_sent_queue(vp->dev, skb->len); 1069 1086 queue_depth = vector_enqueue(vp->tx_queue, skb); 1070 1087 1071 - /* if the device queue is full, stop the upper layers and 1072 - * flush it. 1073 - */ 1074 - 1075 - if (queue_depth >= vp->tx_queue->max_depth - 1) { 1076 - vp->estats.tx_kicks++; 1077 - netif_stop_queue(dev); 1078 - vector_send(vp->tx_queue); 1079 - return NETDEV_TX_OK; 1080 - } 1081 - if (netdev_xmit_more()) { 1088 + if (queue_depth < vp->tx_queue->max_depth && netdev_xmit_more()) { 1082 1089 mod_timer(&vp->tl, vp->coalesce); 1083 1090 return NETDEV_TX_OK; 1091 + } else { 1092 + queue_depth = vector_send(vp->tx_queue); 1093 + if (queue_depth > 0) 1094 + napi_schedule(&vp->napi); 1084 1095 } 1085 - if (skb->len < TX_SMALL_PACKET) { 1086 - vp->estats.tx_kicks++; 1087 - vector_send(vp->tx_queue); 1088 - } else 1089 - tasklet_schedule(&vp->tx_poll); 1096 + 1090 1097 return NETDEV_TX_OK; 1091 1098 } 1092 1099 ··· 1087 1114 1088 1115 if (!netif_running(dev)) 1089 1116 return IRQ_NONE; 1090 - vector_rx(vp); 1117 + napi_schedule(&vp->napi); 1091 1118 return IRQ_HANDLED; 1092 1119 1093 1120 } ··· 1106 1133 * tweaking the IRQ mask less costly 1107 1134 */ 1108 1135 1109 - if (vp->in_write_poll) 1110 - tasklet_schedule(&vp->tx_poll); 1136 + napi_schedule(&vp->napi); 1111 1137 return IRQ_HANDLED; 1112 1138 1113 1139 } ··· 1133 1161 um_free_irq(vp->tx_irq, dev); 1134 1162 vp->tx_irq = 0; 1135 1163 } 1136 - tasklet_kill(&vp->tx_poll); 1164 + napi_disable(&vp->napi); 1165 + netif_napi_del(&vp->napi); 1137 1166 if (vp->fds->rx_fd > 0) { 1138 1167 if (vp->bpf) 1139 1168 uml_vector_detach_bpf(vp->fds->rx_fd, vp->bpf); ··· 1166 1193 return 0; 1167 1194 } 1168 1195 1169 - /* TX tasklet */ 1170 - 1171 - static void vector_tx_poll(struct tasklet_struct *t) 1196 + static int vector_poll(struct napi_struct *napi, int budget) 1172 1197 { 1173 - struct vector_private *vp = from_tasklet(vp, t, tx_poll); 1198 + struct vector_private *vp = container_of(napi, struct vector_private, napi); 1199 + int work_done = 0; 1200 + int err; 1201 + bool tx_enqueued = false; 1174 1202 1175 - vp->estats.tx_kicks++; 1176 - vector_send(vp->tx_queue); 1203 + if ((vp->options & VECTOR_TX) != 0) 1204 + tx_enqueued = (vector_send(vp->tx_queue) > 0); 1205 + if ((vp->options & VECTOR_RX) > 0) 1206 + err = vector_mmsg_rx(vp, budget); 1207 + else { 1208 + err = vector_legacy_rx(vp); 1209 + if (err > 0) 1210 + err = 1; 1211 + } 1212 + if (err > 0) 1213 + work_done += err; 1214 + 1215 + if (tx_enqueued || err > 0) 1216 + napi_schedule(napi); 1217 + if (work_done < budget) 1218 + napi_complete_done(napi, work_done); 1219 + return work_done; 1177 1220 } 1221 + 1178 1222 static void vector_reset_tx(struct work_struct *work) 1179 1223 { 1180 1224 struct vector_private *vp = ··· 1255 1265 goto out_close; 1256 1266 } 1257 1267 1268 + netif_napi_add(vp->dev, &vp->napi, vector_poll, get_depth(vp->parsed)); 1269 + napi_enable(&vp->napi); 1270 + 1258 1271 /* READ IRQ */ 1259 1272 err = um_request_irq( 1260 1273 irq_rr + VECTOR_BASE_IRQ, vp->fds->rx_fd, ··· 1299 1306 uml_vector_attach_bpf(vp->fds->rx_fd, vp->bpf); 1300 1307 1301 1308 netif_start_queue(dev); 1309 + vector_reset_stats(vp); 1302 1310 1303 1311 /* clear buffer - it can happen that the host side of the interface 1304 1312 * is full when we get here. In this case, new data is never queued, 1305 1313 * SIGIOs never arrive, and the net never works. 1306 1314 */ 1307 1315 1308 - vector_rx(vp); 1316 + napi_schedule(&vp->napi); 1309 1317 1310 - vector_reset_stats(vp); 1311 1318 vdevice = find_device(vp->unit); 1312 1319 vdevice->opened = 1; 1313 1320 ··· 1536 1543 #endif 1537 1544 }; 1538 1545 1539 - 1540 1546 static void vector_timer_expire(struct timer_list *t) 1541 1547 { 1542 1548 struct vector_private *vp = from_timer(vp, t, tl); 1543 1549 1544 1550 vp->estats.tx_kicks++; 1545 - vector_send(vp->tx_queue); 1551 + napi_schedule(&vp->napi); 1546 1552 } 1553 + 1554 + 1547 1555 1548 1556 static void vector_eth_configure( 1549 1557 int n, ··· 1628 1634 }); 1629 1635 1630 1636 dev->features = dev->hw_features = (NETIF_F_SG | NETIF_F_FRAGLIST); 1631 - tasklet_setup(&vp->tx_poll, vector_tx_poll); 1632 1637 INIT_WORK(&vp->reset_tx, vector_reset_tx); 1633 1638 1634 1639 timer_setup(&vp->tl, vector_timer_expire, 0);
+2 -1
arch/um/drivers/vector_kern.h
··· 14 14 #include <linux/ctype.h> 15 15 #include <linux/workqueue.h> 16 16 #include <linux/interrupt.h> 17 + 17 18 #include "vector_user.h" 18 19 19 20 /* Queue structure specially adapted for multiple enqueue/dequeue ··· 73 72 struct list_head list; 74 73 spinlock_t lock; 75 74 struct net_device *dev; 75 + struct napi_struct napi ____cacheline_aligned; 76 76 77 77 int unit; 78 78 ··· 117 115 118 116 spinlock_t stats_lock; 119 117 120 - struct tasklet_struct tx_poll; 121 118 bool rexmit_scheduled; 122 119 bool opened; 123 120 bool in_write_poll;