High Performance UML Vector Network Driver

+11

arch/um/Kconfig.net

··· 109 109 more than one without conflict. If you don't need UML networking, 110 110 say N. 111 111 112 + config UML_NET_VECTOR 113 + bool "Vector I/O high performance network devices" 114 + depends on UML_NET 115 + help 116 + This User-Mode Linux network driver uses multi-message send 117 + and receive functions. The host running the UML guest must have 118 + a linux kernel version above 3.0 and a libc version > 2.13. 119 + This driver provides tap, raw, gre and l2tpv3 network transports 120 + with up to 4 times higher network throughput than the UML network 121 + drivers. 122 + 112 123 config UML_NET_VDE 113 124 bool "VDE transport" 114 125 depends on UML_NET

+3 -1

arch/um/drivers/Makefile

··· 9 9 slip-objs := slip_kern.o slip_user.o 10 10 slirp-objs := slirp_kern.o slirp_user.o 11 11 daemon-objs := daemon_kern.o daemon_user.o 12 + vector-objs := vector_kern.o vector_user.o vector_transports.o 12 13 umcast-objs := umcast_kern.o umcast_user.o 13 14 net-objs := net_kern.o net_user.o 14 15 mconsole-objs := mconsole_kern.o mconsole_user.o ··· 44 43 obj-$(CONFIG_UML_NET_SLIP) += slip.o slip_common.o 45 44 obj-$(CONFIG_UML_NET_SLIRP) += slirp.o slip_common.o 46 45 obj-$(CONFIG_UML_NET_DAEMON) += daemon.o 46 + obj-$(CONFIG_UML_NET_VECTOR) += vector.o 47 47 obj-$(CONFIG_UML_NET_VDE) += vde.o 48 48 obj-$(CONFIG_UML_NET_MCAST) += umcast.o 49 49 obj-$(CONFIG_UML_NET_PCAP) += pcap.o ··· 63 61 obj-$(CONFIG_UML_RANDOM) += random.o 64 62 65 63 # pcap_user.o must be added explicitly. 66 - USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o 64 + USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o pcap_user.o vde_user.o vector_user.o 67 65 CFLAGS_null.o = -DDEV_NULL=$(DEV_NULL_PATH) 68 66 69 67 include arch/um/scripts/Makefile.rules

+2 -2

arch/um/drivers/net_kern.c

··· 288 288 #endif 289 289 } 290 290 291 - static void setup_etheraddr(struct net_device *dev, char *str) 291 + void uml_net_setup_etheraddr(struct net_device *dev, char *str) 292 292 { 293 293 unsigned char *addr = dev->dev_addr; 294 294 char *end; ··· 412 412 */ 413 413 snprintf(dev->name, sizeof(dev->name), "eth%d", n); 414 414 415 - setup_etheraddr(dev, mac); 415 + uml_net_setup_etheraddr(dev, mac); 416 416 417 417 printk(KERN_INFO "Netdevice %d (%pM) : ", n, dev->dev_addr); 418 418

+1630

arch/um/drivers/vector_kern.c

··· 1 + /* 2 + * Copyright (C) 2017 - Cambridge Greys Limited 3 + * Copyright (C) 2011 - 2014 Cisco Systems Inc 4 + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) 5 + * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and 6 + * James Leu (jleu@mindspring.net). 7 + * Copyright (C) 2001 by various other people who didn't put their name here. 8 + * Licensed under the GPL. 9 + */ 10 + 11 + #include <linux/version.h> 12 + #include <linux/bootmem.h> 13 + #include <linux/etherdevice.h> 14 + #include <linux/ethtool.h> 15 + #include <linux/inetdevice.h> 16 + #include <linux/init.h> 17 + #include <linux/list.h> 18 + #include <linux/netdevice.h> 19 + #include <linux/platform_device.h> 20 + #include <linux/rtnetlink.h> 21 + #include <linux/skbuff.h> 22 + #include <linux/slab.h> 23 + #include <linux/interrupt.h> 24 + #include <init.h> 25 + #include <irq_kern.h> 26 + #include <irq_user.h> 27 + #include <net_kern.h> 28 + #include <os.h> 29 + #include "mconsole_kern.h" 30 + #include "vector_user.h" 31 + #include "vector_kern.h" 32 + 33 + /* 34 + * Adapted from network devices with the following major changes: 35 + * All transports are static - simplifies the code significantly 36 + * Multiple FDs/IRQs per device 37 + * Vector IO optionally used for read/write, falling back to legacy 38 + * based on configuration and/or availability 39 + * Configuration is no longer positional - L2TPv3 and GRE require up to 40 + * 10 parameters, passing this as positional is not fit for purpose. 41 + * Only socket transports are supported 42 + */ 43 + 44 + 45 + #define DRIVER_NAME "uml-vector" 46 + #define DRIVER_VERSION "01" 47 + struct vector_cmd_line_arg { 48 + struct list_head list; 49 + int unit; 50 + char *arguments; 51 + }; 52 + 53 + struct vector_device { 54 + struct list_head list; 55 + struct net_device *dev; 56 + struct platform_device pdev; 57 + int unit; 58 + int opened; 59 + }; 60 + 61 + static LIST_HEAD(vec_cmd_line); 62 + 63 + static DEFINE_SPINLOCK(vector_devices_lock); 64 + static LIST_HEAD(vector_devices); 65 + 66 + static int driver_registered; 67 + 68 + static void vector_eth_configure(int n, struct arglist *def); 69 + 70 + /* Argument accessors to set variables (and/or set default values) 71 + * mtu, buffer sizing, default headroom, etc 72 + */ 73 + 74 + #define DEFAULT_HEADROOM 2 75 + #define SAFETY_MARGIN 32 76 + #define DEFAULT_VECTOR_SIZE 64 77 + #define TX_SMALL_PACKET 128 78 + #define MAX_IOV_SIZE (MAX_SKB_FRAGS + 1) 79 + 80 + static const struct { 81 + const char string[ETH_GSTRING_LEN]; 82 + } ethtool_stats_keys[] = { 83 + { "rx_queue_max" }, 84 + { "rx_queue_running_average" }, 85 + { "tx_queue_max" }, 86 + { "tx_queue_running_average" }, 87 + { "rx_encaps_errors" }, 88 + { "tx_timeout_count" }, 89 + { "tx_restart_queue" }, 90 + { "tx_kicks" }, 91 + { "tx_flow_control_xon" }, 92 + { "tx_flow_control_xoff" }, 93 + { "rx_csum_offload_good" }, 94 + { "rx_csum_offload_errors"}, 95 + { "sg_ok"}, 96 + { "sg_linearized"}, 97 + }; 98 + 99 + #define VECTOR_NUM_STATS ARRAY_SIZE(ethtool_stats_keys) 100 + 101 + static void vector_reset_stats(struct vector_private *vp) 102 + { 103 + vp->estats.rx_queue_max = 0; 104 + vp->estats.rx_queue_running_average = 0; 105 + vp->estats.tx_queue_max = 0; 106 + vp->estats.tx_queue_running_average = 0; 107 + vp->estats.rx_encaps_errors = 0; 108 + vp->estats.tx_timeout_count = 0; 109 + vp->estats.tx_restart_queue = 0; 110 + vp->estats.tx_kicks = 0; 111 + vp->estats.tx_flow_control_xon = 0; 112 + vp->estats.tx_flow_control_xoff = 0; 113 + vp->estats.sg_ok = 0; 114 + vp->estats.sg_linearized = 0; 115 + } 116 + 117 + static int get_mtu(struct arglist *def) 118 + { 119 + char *mtu = uml_vector_fetch_arg(def, "mtu"); 120 + long result; 121 + 122 + if (mtu != NULL) { 123 + if (kstrtoul(mtu, 10, &result) == 0) 124 + return result; 125 + } 126 + return ETH_MAX_PACKET; 127 + } 128 + 129 + static int get_depth(struct arglist *def) 130 + { 131 + char *mtu = uml_vector_fetch_arg(def, "depth"); 132 + long result; 133 + 134 + if (mtu != NULL) { 135 + if (kstrtoul(mtu, 10, &result) == 0) 136 + return result; 137 + } 138 + return DEFAULT_VECTOR_SIZE; 139 + } 140 + 141 + static int get_headroom(struct arglist *def) 142 + { 143 + char *mtu = uml_vector_fetch_arg(def, "headroom"); 144 + long result; 145 + 146 + if (mtu != NULL) { 147 + if (kstrtoul(mtu, 10, &result) == 0) 148 + return result; 149 + } 150 + return DEFAULT_HEADROOM; 151 + } 152 + 153 + static int get_req_size(struct arglist *def) 154 + { 155 + char *gro = uml_vector_fetch_arg(def, "gro"); 156 + long result; 157 + 158 + if (gro != NULL) { 159 + if (kstrtoul(gro, 10, &result) == 0) { 160 + if (result > 0) 161 + return 65536; 162 + } 163 + } 164 + return get_mtu(def) + ETH_HEADER_OTHER + 165 + get_headroom(def) + SAFETY_MARGIN; 166 + } 167 + 168 + 169 + static int get_transport_options(struct arglist *def) 170 + { 171 + char *transport = uml_vector_fetch_arg(def, "transport"); 172 + char *vector = uml_vector_fetch_arg(def, "vec"); 173 + 174 + int vec_rx = VECTOR_RX; 175 + int vec_tx = VECTOR_TX; 176 + long parsed; 177 + 178 + if (vector != NULL) { 179 + if (kstrtoul(vector, 10, &parsed) == 0) { 180 + if (parsed == 0) { 181 + vec_rx = 0; 182 + vec_tx = 0; 183 + } 184 + } 185 + } 186 + 187 + 188 + if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0) 189 + return (vec_rx | VECTOR_BPF); 190 + if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0) 191 + return (vec_rx | vec_tx | VECTOR_BPF); 192 + return (vec_rx | vec_tx); 193 + } 194 + 195 + 196 + /* A mini-buffer for packet drop read 197 + * All of our supported transports are datagram oriented and we always 198 + * read using recvmsg or recvmmsg. If we pass a buffer which is smaller 199 + * than the packet size it still counts as full packet read and will 200 + * clean the incoming stream to keep sigio/epoll happy 201 + */ 202 + 203 + #define DROP_BUFFER_SIZE 32 204 + 205 + static char *drop_buffer; 206 + 207 + /* Array backed queues optimized for bulk enqueue/dequeue and 208 + * 1:N (small values of N) or 1:1 enqueuer/dequeuer ratios. 209 + * For more details and full design rationale see 210 + * http://foswiki.cambridgegreys.com/Main/EatYourTailAndEnjoyIt 211 + */ 212 + 213 + 214 + /* 215 + * Advance the mmsg queue head by n = advance. Resets the queue to 216 + * maximum enqueue/dequeue-at-once capacity if possible. Called by 217 + * dequeuers. Caller must hold the head_lock! 218 + */ 219 + 220 + static int vector_advancehead(struct vector_queue *qi, int advance) 221 + { 222 + int queue_depth; 223 + 224 + qi->head = 225 + (qi->head + advance) 226 + % qi->max_depth; 227 + 228 + 229 + spin_lock(&qi->tail_lock); 230 + qi->queue_depth -= advance; 231 + 232 + /* we are at 0, use this to 233 + * reset head and tail so we can use max size vectors 234 + */ 235 + 236 + if (qi->queue_depth == 0) { 237 + qi->head = 0; 238 + qi->tail = 0; 239 + } 240 + queue_depth = qi->queue_depth; 241 + spin_unlock(&qi->tail_lock); 242 + return queue_depth; 243 + } 244 + 245 + /* Advance the queue tail by n = advance. 246 + * This is called by enqueuers which should hold the 247 + * head lock already 248 + */ 249 + 250 + static int vector_advancetail(struct vector_queue *qi, int advance) 251 + { 252 + int queue_depth; 253 + 254 + qi->tail = 255 + (qi->tail + advance) 256 + % qi->max_depth; 257 + spin_lock(&qi->head_lock); 258 + qi->queue_depth += advance; 259 + queue_depth = qi->queue_depth; 260 + spin_unlock(&qi->head_lock); 261 + return queue_depth; 262 + } 263 + 264 + static int prep_msg(struct vector_private *vp, 265 + struct sk_buff *skb, 266 + struct iovec *iov) 267 + { 268 + int iov_index = 0; 269 + int nr_frags, frag; 270 + skb_frag_t *skb_frag; 271 + 272 + nr_frags = skb_shinfo(skb)->nr_frags; 273 + if (nr_frags > MAX_IOV_SIZE) { 274 + if (skb_linearize(skb) != 0) 275 + goto drop; 276 + } 277 + if (vp->header_size > 0) { 278 + iov[iov_index].iov_len = vp->header_size; 279 + vp->form_header(iov[iov_index].iov_base, skb, vp); 280 + iov_index++; 281 + } 282 + iov[iov_index].iov_base = skb->data; 283 + if (nr_frags > 0) { 284 + iov[iov_index].iov_len = skb->len - skb->data_len; 285 + vp->estats.sg_ok++; 286 + } else 287 + iov[iov_index].iov_len = skb->len; 288 + iov_index++; 289 + for (frag = 0; frag < nr_frags; frag++) { 290 + skb_frag = &skb_shinfo(skb)->frags[frag]; 291 + iov[iov_index].iov_base = skb_frag_address_safe(skb_frag); 292 + iov[iov_index].iov_len = skb_frag_size(skb_frag); 293 + iov_index++; 294 + } 295 + return iov_index; 296 + drop: 297 + return -1; 298 + } 299 + /* 300 + * Generic vector enqueue with support for forming headers using transport 301 + * specific callback. Allows GRE, L2TPv3, RAW and other transports 302 + * to use a common enqueue procedure in vector mode 303 + */ 304 + 305 + static int vector_enqueue(struct vector_queue *qi, struct sk_buff *skb) 306 + { 307 + struct vector_private *vp = netdev_priv(qi->dev); 308 + int queue_depth; 309 + int packet_len; 310 + struct mmsghdr *mmsg_vector = qi->mmsg_vector; 311 + int iov_count; 312 + 313 + spin_lock(&qi->tail_lock); 314 + spin_lock(&qi->head_lock); 315 + queue_depth = qi->queue_depth; 316 + spin_unlock(&qi->head_lock); 317 + 318 + if (skb) 319 + packet_len = skb->len; 320 + 321 + if (queue_depth < qi->max_depth) { 322 + 323 + *(qi->skbuff_vector + qi->tail) = skb; 324 + mmsg_vector += qi->tail; 325 + iov_count = prep_msg( 326 + vp, 327 + skb, 328 + mmsg_vector->msg_hdr.msg_iov 329 + ); 330 + if (iov_count < 1) 331 + goto drop; 332 + mmsg_vector->msg_hdr.msg_iovlen = iov_count; 333 + mmsg_vector->msg_hdr.msg_name = vp->fds->remote_addr; 334 + mmsg_vector->msg_hdr.msg_namelen = vp->fds->remote_addr_size; 335 + queue_depth = vector_advancetail(qi, 1); 336 + } else 337 + goto drop; 338 + spin_unlock(&qi->tail_lock); 339 + return queue_depth; 340 + drop: 341 + qi->dev->stats.tx_dropped++; 342 + if (skb != NULL) { 343 + packet_len = skb->len; 344 + dev_consume_skb_any(skb); 345 + netdev_completed_queue(qi->dev, 1, packet_len); 346 + } 347 + spin_unlock(&qi->tail_lock); 348 + return queue_depth; 349 + } 350 + 351 + static int consume_vector_skbs(struct vector_queue *qi, int count) 352 + { 353 + struct sk_buff *skb; 354 + int skb_index; 355 + int bytes_compl = 0; 356 + 357 + for (skb_index = qi->head; skb_index < qi->head + count; skb_index++) { 358 + skb = *(qi->skbuff_vector + skb_index); 359 + /* mark as empty to ensure correct destruction if 360 + * needed 361 + */ 362 + bytes_compl += skb->len; 363 + *(qi->skbuff_vector + skb_index) = NULL; 364 + dev_consume_skb_any(skb); 365 + } 366 + qi->dev->stats.tx_bytes += bytes_compl; 367 + qi->dev->stats.tx_packets += count; 368 + netdev_completed_queue(qi->dev, count, bytes_compl); 369 + return vector_advancehead(qi, count); 370 + } 371 + 372 + /* 373 + * Generic vector deque via sendmmsg with support for forming headers 374 + * using transport specific callback. Allows GRE, L2TPv3, RAW and 375 + * other transports to use a common dequeue procedure in vector mode 376 + */ 377 + 378 + 379 + static int vector_send(struct vector_queue *qi) 380 + { 381 + struct vector_private *vp = netdev_priv(qi->dev); 382 + struct mmsghdr *send_from; 383 + int result = 0, send_len, queue_depth = qi->max_depth; 384 + 385 + if (spin_trylock(&qi->head_lock)) { 386 + if (spin_trylock(&qi->tail_lock)) { 387 + /* update queue_depth to current value */ 388 + queue_depth = qi->queue_depth; 389 + spin_unlock(&qi->tail_lock); 390 + while (queue_depth > 0) { 391 + /* Calculate the start of the vector */ 392 + send_len = queue_depth; 393 + send_from = qi->mmsg_vector; 394 + send_from += qi->head; 395 + /* Adjust vector size if wraparound */ 396 + if (send_len + qi->head > qi->max_depth) 397 + send_len = qi->max_depth - qi->head; 398 + /* Try to TX as many packets as possible */ 399 + if (send_len > 0) { 400 + result = uml_vector_sendmmsg( 401 + vp->fds->tx_fd, 402 + send_from, 403 + send_len, 404 + 0 405 + ); 406 + vp->in_write_poll = 407 + (result != send_len); 408 + } 409 + /* For some of the sendmmsg error scenarios 410 + * we may end being unsure in the TX success 411 + * for all packets. It is safer to declare 412 + * them all TX-ed and blame the network. 413 + */ 414 + if (result < 0) { 415 + if (net_ratelimit()) 416 + netdev_err(vp->dev, "sendmmsg err=%i\n", 417 + result); 418 + result = send_len; 419 + } 420 + if (result > 0) { 421 + queue_depth = 422 + consume_vector_skbs(qi, result); 423 + /* This is equivalent to an TX IRQ. 424 + * Restart the upper layers to feed us 425 + * more packets. 426 + */ 427 + if (result > vp->estats.tx_queue_max) 428 + vp->estats.tx_queue_max = result; 429 + vp->estats.tx_queue_running_average = 430 + (vp->estats.tx_queue_running_average + result) >> 1; 431 + } 432 + netif_trans_update(qi->dev); 433 + netif_wake_queue(qi->dev); 434 + /* if TX is busy, break out of the send loop, 435 + * poll write IRQ will reschedule xmit for us 436 + */ 437 + if (result != send_len) { 438 + vp->estats.tx_restart_queue++; 439 + break; 440 + } 441 + } 442 + } 443 + spin_unlock(&qi->head_lock); 444 + } else { 445 + tasklet_schedule(&vp->tx_poll); 446 + } 447 + return queue_depth; 448 + } 449 + 450 + /* Queue destructor. Deliberately stateless so we can use 451 + * it in queue cleanup if initialization fails. 452 + */ 453 + 454 + static void destroy_queue(struct vector_queue *qi) 455 + { 456 + int i; 457 + struct iovec *iov; 458 + struct vector_private *vp = netdev_priv(qi->dev); 459 + struct mmsghdr *mmsg_vector; 460 + 461 + if (qi == NULL) 462 + return; 463 + /* deallocate any skbuffs - we rely on any unused to be 464 + * set to NULL. 465 + */ 466 + if (qi->skbuff_vector != NULL) { 467 + for (i = 0; i < qi->max_depth; i++) { 468 + if (*(qi->skbuff_vector + i) != NULL) 469 + dev_kfree_skb_any(*(qi->skbuff_vector + i)); 470 + } 471 + kfree(qi->skbuff_vector); 472 + } 473 + /* deallocate matching IOV structures including header buffs */ 474 + if (qi->mmsg_vector != NULL) { 475 + mmsg_vector = qi->mmsg_vector; 476 + for (i = 0; i < qi->max_depth; i++) { 477 + iov = mmsg_vector->msg_hdr.msg_iov; 478 + if (iov != NULL) { 479 + if ((vp->header_size > 0) && 480 + (iov->iov_base != NULL)) 481 + kfree(iov->iov_base); 482 + kfree(iov); 483 + } 484 + mmsg_vector++; 485 + } 486 + kfree(qi->mmsg_vector); 487 + } 488 + kfree(qi); 489 + } 490 + 491 + /* 492 + * Queue constructor. Create a queue with a given side. 493 + */ 494 + static struct vector_queue *create_queue( 495 + struct vector_private *vp, 496 + int max_size, 497 + int header_size, 498 + int num_extra_frags) 499 + { 500 + struct vector_queue *result; 501 + int i; 502 + struct iovec *iov; 503 + struct mmsghdr *mmsg_vector; 504 + 505 + result = kmalloc(sizeof(struct vector_queue), GFP_KERNEL); 506 + if (result == NULL) 507 + goto out_fail; 508 + result->max_depth = max_size; 509 + result->dev = vp->dev; 510 + result->mmsg_vector = kmalloc( 511 + (sizeof(struct mmsghdr) * max_size), GFP_KERNEL); 512 + result->skbuff_vector = kmalloc( 513 + (sizeof(void *) * max_size), GFP_KERNEL); 514 + if (result->mmsg_vector == NULL || result->skbuff_vector == NULL) 515 + goto out_fail; 516 + 517 + mmsg_vector = result->mmsg_vector; 518 + for (i = 0; i < max_size; i++) { 519 + /* Clear all pointers - we use non-NULL as marking on 520 + * what to free on destruction 521 + */ 522 + *(result->skbuff_vector + i) = NULL; 523 + mmsg_vector->msg_hdr.msg_iov = NULL; 524 + mmsg_vector++; 525 + } 526 + mmsg_vector = result->mmsg_vector; 527 + result->max_iov_frags = num_extra_frags; 528 + for (i = 0; i < max_size; i++) { 529 + if (vp->header_size > 0) 530 + iov = kmalloc( 531 + sizeof(struct iovec) * (3 + num_extra_frags), 532 + GFP_KERNEL 533 + ); 534 + else 535 + iov = kmalloc( 536 + sizeof(struct iovec) * (2 + num_extra_frags), 537 + GFP_KERNEL 538 + ); 539 + if (iov == NULL) 540 + goto out_fail; 541 + mmsg_vector->msg_hdr.msg_iov = iov; 542 + mmsg_vector->msg_hdr.msg_iovlen = 1; 543 + mmsg_vector->msg_hdr.msg_control = NULL; 544 + mmsg_vector->msg_hdr.msg_controllen = 0; 545 + mmsg_vector->msg_hdr.msg_flags = MSG_DONTWAIT; 546 + mmsg_vector->msg_hdr.msg_name = NULL; 547 + mmsg_vector->msg_hdr.msg_namelen = 0; 548 + if (vp->header_size > 0) { 549 + iov->iov_base = kmalloc(header_size, GFP_KERNEL); 550 + if (iov->iov_base == NULL) 551 + goto out_fail; 552 + iov->iov_len = header_size; 553 + mmsg_vector->msg_hdr.msg_iovlen = 2; 554 + iov++; 555 + } 556 + iov->iov_base = NULL; 557 + iov->iov_len = 0; 558 + mmsg_vector++; 559 + } 560 + spin_lock_init(&result->head_lock); 561 + spin_lock_init(&result->tail_lock); 562 + result->queue_depth = 0; 563 + result->head = 0; 564 + result->tail = 0; 565 + return result; 566 + out_fail: 567 + destroy_queue(result); 568 + return NULL; 569 + } 570 + 571 + /* 572 + * We do not use the RX queue as a proper wraparound queue for now 573 + * This is not necessary because the consumption via netif_rx() 574 + * happens in-line. While we can try using the return code of 575 + * netif_rx() for flow control there are no drivers doing this today. 576 + * For this RX specific use we ignore the tail/head locks and 577 + * just read into a prepared queue filled with skbuffs. 578 + */ 579 + 580 + static struct sk_buff *prep_skb( 581 + struct vector_private *vp, 582 + struct user_msghdr *msg) 583 + { 584 + int linear = vp->max_packet + vp->headroom + SAFETY_MARGIN; 585 + struct sk_buff *result; 586 + int iov_index = 0, len; 587 + struct iovec *iov = msg->msg_iov; 588 + int err, nr_frags, frag; 589 + skb_frag_t *skb_frag; 590 + 591 + if (vp->req_size <= linear) 592 + len = linear; 593 + else 594 + len = vp->req_size; 595 + result = alloc_skb_with_frags( 596 + linear, 597 + len - vp->max_packet, 598 + 3, 599 + &err, 600 + GFP_ATOMIC 601 + ); 602 + if (vp->header_size > 0) 603 + iov_index++; 604 + if (result == NULL) { 605 + iov[iov_index].iov_base = NULL; 606 + iov[iov_index].iov_len = 0; 607 + goto done; 608 + } 609 + skb_reserve(result, vp->headroom); 610 + result->dev = vp->dev; 611 + skb_put(result, vp->max_packet); 612 + result->data_len = len - vp->max_packet; 613 + result->len += len - vp->max_packet; 614 + skb_reset_mac_header(result); 615 + result->ip_summed = CHECKSUM_NONE; 616 + iov[iov_index].iov_base = result->data; 617 + iov[iov_index].iov_len = vp->max_packet; 618 + iov_index++; 619 + 620 + nr_frags = skb_shinfo(result)->nr_frags; 621 + for (frag = 0; frag < nr_frags; frag++) { 622 + skb_frag = &skb_shinfo(result)->frags[frag]; 623 + iov[iov_index].iov_base = skb_frag_address_safe(skb_frag); 624 + if (iov[iov_index].iov_base != NULL) 625 + iov[iov_index].iov_len = skb_frag_size(skb_frag); 626 + else 627 + iov[iov_index].iov_len = 0; 628 + iov_index++; 629 + } 630 + done: 631 + msg->msg_iovlen = iov_index; 632 + return result; 633 + } 634 + 635 + 636 + /* Prepare queue for recvmmsg one-shot rx - fill with fresh sk_buffs*/ 637 + 638 + static void prep_queue_for_rx(struct vector_queue *qi) 639 + { 640 + struct vector_private *vp = netdev_priv(qi->dev); 641 + struct mmsghdr *mmsg_vector = qi->mmsg_vector; 642 + void **skbuff_vector = qi->skbuff_vector; 643 + int i; 644 + 645 + if (qi->queue_depth == 0) 646 + return; 647 + for (i = 0; i < qi->queue_depth; i++) { 648 + /* it is OK if allocation fails - recvmmsg with NULL data in 649 + * iov argument still performs an RX, just drops the packet 650 + * This allows us stop faffing around with a "drop buffer" 651 + */ 652 + 653 + *skbuff_vector = prep_skb(vp, &mmsg_vector->msg_hdr); 654 + skbuff_vector++; 655 + mmsg_vector++; 656 + } 657 + qi->queue_depth = 0; 658 + } 659 + 660 + static struct vector_device *find_device(int n) 661 + { 662 + struct vector_device *device; 663 + struct list_head *ele; 664 + 665 + spin_lock(&vector_devices_lock); 666 + list_for_each(ele, &vector_devices) { 667 + device = list_entry(ele, struct vector_device, list); 668 + if (device->unit == n) 669 + goto out; 670 + } 671 + device = NULL; 672 + out: 673 + spin_unlock(&vector_devices_lock); 674 + return device; 675 + } 676 + 677 + static int vector_parse(char *str, int *index_out, char **str_out, 678 + char **error_out) 679 + { 680 + int n, len, err = -EINVAL; 681 + char *start = str; 682 + 683 + len = strlen(str); 684 + 685 + while ((*str != ':') && (strlen(str) > 1)) 686 + str++; 687 + if (*str != ':') { 688 + *error_out = "Expected ':' after device number"; 689 + return err; 690 + } 691 + *str = '\0'; 692 + 693 + err = kstrtouint(start, 0, &n); 694 + if (err < 0) { 695 + *error_out = "Bad device number"; 696 + return err; 697 + } 698 + 699 + str++; 700 + if (find_device(n)) { 701 + *error_out = "Device already configured"; 702 + return err; 703 + } 704 + 705 + *index_out = n; 706 + *str_out = str; 707 + return 0; 708 + } 709 + 710 + static int vector_config(char *str, char **error_out) 711 + { 712 + int err, n; 713 + char *params; 714 + struct arglist *parsed; 715 + 716 + err = vector_parse(str, &n, &params, error_out); 717 + if (err != 0) 718 + return err; 719 + 720 + /* This string is broken up and the pieces used by the underlying 721 + * driver. We should copy it to make sure things do not go wrong 722 + * later. 723 + */ 724 + 725 + params = kstrdup(params, GFP_KERNEL); 726 + if (str == NULL) { 727 + *error_out = "vector_config failed to strdup string"; 728 + return -ENOMEM; 729 + } 730 + 731 + parsed = uml_parse_vector_ifspec(params); 732 + 733 + if (parsed == NULL) { 734 + *error_out = "vector_config failed to parse parameters"; 735 + return -EINVAL; 736 + } 737 + 738 + vector_eth_configure(n, parsed); 739 + return 0; 740 + } 741 + 742 + static int vector_id(char **str, int *start_out, int *end_out) 743 + { 744 + char *end; 745 + int n; 746 + 747 + n = simple_strtoul(*str, &end, 0); 748 + if ((*end != '\0') || (end == *str)) 749 + return -1; 750 + 751 + *start_out = n; 752 + *end_out = n; 753 + *str = end; 754 + return n; 755 + } 756 + 757 + static int vector_remove(int n, char **error_out) 758 + { 759 + struct vector_device *vec_d; 760 + struct net_device *dev; 761 + struct vector_private *vp; 762 + 763 + vec_d = find_device(n); 764 + if (vec_d == NULL) 765 + return -ENODEV; 766 + dev = vec_d->dev; 767 + vp = netdev_priv(dev); 768 + if (vp->fds != NULL) 769 + return -EBUSY; 770 + unregister_netdev(dev); 771 + platform_device_unregister(&vec_d->pdev); 772 + return 0; 773 + } 774 + 775 + /* 776 + * There is no shared per-transport initialization code, so 777 + * we will just initialize each interface one by one and 778 + * add them to a list 779 + */ 780 + 781 + static struct platform_driver uml_net_driver = { 782 + .driver = { 783 + .name = DRIVER_NAME, 784 + }, 785 + }; 786 + 787 + 788 + static void vector_device_release(struct device *dev) 789 + { 790 + struct vector_device *device = dev_get_drvdata(dev); 791 + struct net_device *netdev = device->dev; 792 + 793 + list_del(&device->list); 794 + kfree(device); 795 + free_netdev(netdev); 796 + } 797 + 798 + /* Bog standard recv using recvmsg - not used normally unless the user 799 + * explicitly specifies not to use recvmmsg vector RX. 800 + */ 801 + 802 + static int vector_legacy_rx(struct vector_private *vp) 803 + { 804 + int pkt_len; 805 + struct user_msghdr hdr; 806 + struct iovec iov[2 + MAX_IOV_SIZE]; /* header + data use case only */ 807 + int iovpos = 0; 808 + struct sk_buff *skb; 809 + int header_check; 810 + 811 + hdr.msg_name = NULL; 812 + hdr.msg_namelen = 0; 813 + hdr.msg_iov = (struct iovec *) &iov; 814 + hdr.msg_control = NULL; 815 + hdr.msg_controllen = 0; 816 + hdr.msg_flags = 0; 817 + 818 + if (vp->header_size > 0) { 819 + iov[0].iov_base = vp->header_rxbuffer; 820 + iov[0].iov_len = vp->header_size; 821 + } 822 + 823 + skb = prep_skb(vp, &hdr); 824 + 825 + if (skb == NULL) { 826 + /* Read a packet into drop_buffer and don't do 827 + * anything with it. 828 + */ 829 + iov[iovpos].iov_base = drop_buffer; 830 + iov[iovpos].iov_len = DROP_BUFFER_SIZE; 831 + hdr.msg_iovlen = 1; 832 + vp->dev->stats.rx_dropped++; 833 + } 834 + 835 + pkt_len = uml_vector_recvmsg(vp->fds->rx_fd, &hdr, 0); 836 + 837 + if (skb != NULL) { 838 + if (pkt_len > vp->header_size) { 839 + if (vp->header_size > 0) { 840 + header_check = vp->verify_header( 841 + vp->header_rxbuffer, skb, vp); 842 + if (header_check < 0) { 843 + dev_kfree_skb_irq(skb); 844 + vp->dev->stats.rx_dropped++; 845 + vp->estats.rx_encaps_errors++; 846 + return 0; 847 + } 848 + if (header_check > 0) { 849 + vp->estats.rx_csum_offload_good++; 850 + skb->ip_summed = CHECKSUM_UNNECESSARY; 851 + } 852 + } 853 + pskb_trim(skb, pkt_len - vp->rx_header_size); 854 + skb->protocol = eth_type_trans(skb, skb->dev); 855 + vp->dev->stats.rx_bytes += skb->len; 856 + vp->dev->stats.rx_packets++; 857 + netif_rx(skb); 858 + } else { 859 + dev_kfree_skb_irq(skb); 860 + } 861 + } 862 + return pkt_len; 863 + } 864 + 865 + /* 866 + * Packet at a time TX which falls back to vector TX if the 867 + * underlying transport is busy. 868 + */ 869 + 870 + 871 + 872 + static int writev_tx(struct vector_private *vp, struct sk_buff *skb) 873 + { 874 + struct iovec iov[3 + MAX_IOV_SIZE]; 875 + int iov_count, pkt_len = 0; 876 + 877 + iov[0].iov_base = vp->header_txbuffer; 878 + iov_count = prep_msg(vp, skb, (struct iovec *) &iov); 879 + 880 + if (iov_count < 1) 881 + goto drop; 882 + pkt_len = uml_vector_writev( 883 + vp->fds->tx_fd, 884 + (struct iovec *) &iov, 885 + iov_count 886 + ); 887 + 888 + netif_trans_update(vp->dev); 889 + netif_wake_queue(vp->dev); 890 + 891 + if (pkt_len > 0) { 892 + vp->dev->stats.tx_bytes += skb->len; 893 + vp->dev->stats.tx_packets++; 894 + } else { 895 + vp->dev->stats.tx_dropped++; 896 + } 897 + consume_skb(skb); 898 + return pkt_len; 899 + drop: 900 + vp->dev->stats.tx_dropped++; 901 + consume_skb(skb); 902 + return pkt_len; 903 + } 904 + 905 + /* 906 + * Receive as many messages as we can in one call using the special 907 + * mmsg vector matched to an skb vector which we prepared earlier. 908 + */ 909 + 910 + static int vector_mmsg_rx(struct vector_private *vp) 911 + { 912 + int packet_count, i; 913 + struct vector_queue *qi = vp->rx_queue; 914 + struct sk_buff *skb; 915 + struct mmsghdr *mmsg_vector = qi->mmsg_vector; 916 + void **skbuff_vector = qi->skbuff_vector; 917 + int header_check; 918 + 919 + /* Refresh the vector and make sure it is with new skbs and the 920 + * iovs are updated to point to them. 921 + */ 922 + 923 + prep_queue_for_rx(qi); 924 + 925 + /* Fire the Lazy Gun - get as many packets as we can in one go. */ 926 + 927 + packet_count = uml_vector_recvmmsg( 928 + vp->fds->rx_fd, qi->mmsg_vector, qi->max_depth, 0); 929 + 930 + if (packet_count <= 0) 931 + return packet_count; 932 + 933 + /* We treat packet processing as enqueue, buffer refresh as dequeue 934 + * The queue_depth tells us how many buffers have been used and how 935 + * many do we need to prep the next time prep_queue_for_rx() is called. 936 + */ 937 + 938 + qi->queue_depth = packet_count; 939 + 940 + for (i = 0; i < packet_count; i++) { 941 + skb = (*skbuff_vector); 942 + if (mmsg_vector->msg_len > vp->header_size) { 943 + if (vp->header_size > 0) { 944 + header_check = vp->verify_header( 945 + mmsg_vector->msg_hdr.msg_iov->iov_base, 946 + skb, 947 + vp 948 + ); 949 + if (header_check < 0) { 950 + /* Overlay header failed to verify - discard. 951 + * We can actually keep this skb and reuse it, 952 + * but that will make the prep logic too 953 + * complex. 954 + */ 955 + dev_kfree_skb_irq(skb); 956 + vp->estats.rx_encaps_errors++; 957 + continue; 958 + } 959 + if (header_check > 0) { 960 + vp->estats.rx_csum_offload_good++; 961 + skb->ip_summed = CHECKSUM_UNNECESSARY; 962 + } 963 + } 964 + pskb_trim(skb, 965 + mmsg_vector->msg_len - vp->rx_header_size); 966 + skb->protocol = eth_type_trans(skb, skb->dev); 967 + /* 968 + * We do not need to lock on updating stats here 969 + * The interrupt loop is non-reentrant. 970 + */ 971 + vp->dev->stats.rx_bytes += skb->len; 972 + vp->dev->stats.rx_packets++; 973 + netif_rx(skb); 974 + } else { 975 + /* Overlay header too short to do anything - discard. 976 + * We can actually keep this skb and reuse it, 977 + * but that will make the prep logic too complex. 978 + */ 979 + if (skb != NULL) 980 + dev_kfree_skb_irq(skb); 981 + } 982 + (*skbuff_vector) = NULL; 983 + /* Move to the next buffer element */ 984 + mmsg_vector++; 985 + skbuff_vector++; 986 + } 987 + if (packet_count > 0) { 988 + if (vp->estats.rx_queue_max < packet_count) 989 + vp->estats.rx_queue_max = packet_count; 990 + vp->estats.rx_queue_running_average = 991 + (vp->estats.rx_queue_running_average + packet_count) >> 1; 992 + } 993 + return packet_count; 994 + } 995 + 996 + static void vector_rx(struct vector_private *vp) 997 + { 998 + int err; 999 + 1000 + if ((vp->options & VECTOR_RX) > 0) 1001 + while ((err = vector_mmsg_rx(vp)) > 0) 1002 + ; 1003 + else 1004 + while ((err = vector_legacy_rx(vp)) > 0) 1005 + ; 1006 + if ((err != 0) && net_ratelimit()) 1007 + netdev_err(vp->dev, "vector_rx: error(%d)\n", err); 1008 + } 1009 + 1010 + static int vector_net_start_xmit(struct sk_buff *skb, struct net_device *dev) 1011 + { 1012 + struct vector_private *vp = netdev_priv(dev); 1013 + int queue_depth = 0; 1014 + 1015 + if ((vp->options & VECTOR_TX) == 0) { 1016 + writev_tx(vp, skb); 1017 + return NETDEV_TX_OK; 1018 + } 1019 + 1020 + /* We do BQL only in the vector path, no point doing it in 1021 + * packet at a time mode as there is no device queue 1022 + */ 1023 + 1024 + netdev_sent_queue(vp->dev, skb->len); 1025 + queue_depth = vector_enqueue(vp->tx_queue, skb); 1026 + 1027 + /* if the device queue is full, stop the upper layers and 1028 + * flush it. 1029 + */ 1030 + 1031 + if (queue_depth >= vp->tx_queue->max_depth - 1) { 1032 + vp->estats.tx_kicks++; 1033 + netif_stop_queue(dev); 1034 + vector_send(vp->tx_queue); 1035 + return NETDEV_TX_OK; 1036 + } 1037 + if (skb->xmit_more) { 1038 + mod_timer(&vp->tl, vp->coalesce); 1039 + return NETDEV_TX_OK; 1040 + } 1041 + if (skb->len < TX_SMALL_PACKET) { 1042 + vp->estats.tx_kicks++; 1043 + vector_send(vp->tx_queue); 1044 + } else 1045 + tasklet_schedule(&vp->tx_poll); 1046 + return NETDEV_TX_OK; 1047 + } 1048 + 1049 + static irqreturn_t vector_rx_interrupt(int irq, void *dev_id) 1050 + { 1051 + struct net_device *dev = dev_id; 1052 + struct vector_private *vp = netdev_priv(dev); 1053 + 1054 + if (!netif_running(dev)) 1055 + return IRQ_NONE; 1056 + vector_rx(vp); 1057 + return IRQ_HANDLED; 1058 + 1059 + } 1060 + 1061 + static irqreturn_t vector_tx_interrupt(int irq, void *dev_id) 1062 + { 1063 + struct net_device *dev = dev_id; 1064 + struct vector_private *vp = netdev_priv(dev); 1065 + 1066 + if (!netif_running(dev)) 1067 + return IRQ_NONE; 1068 + /* We need to pay attention to it only if we got 1069 + * -EAGAIN or -ENOBUFFS from sendmmsg. Otherwise 1070 + * we ignore it. In the future, it may be worth 1071 + * it to improve the IRQ controller a bit to make 1072 + * tweaking the IRQ mask less costly 1073 + */ 1074 + 1075 + if (vp->in_write_poll) 1076 + tasklet_schedule(&vp->tx_poll); 1077 + return IRQ_HANDLED; 1078 + 1079 + } 1080 + 1081 + static int irq_rr; 1082 + 1083 + static int vector_net_close(struct net_device *dev) 1084 + { 1085 + struct vector_private *vp = netdev_priv(dev); 1086 + unsigned long flags; 1087 + 1088 + netif_stop_queue(dev); 1089 + del_timer(&vp->tl); 1090 + 1091 + if (vp->fds == NULL) 1092 + return 0; 1093 + 1094 + /* Disable and free all IRQS */ 1095 + if (vp->rx_irq > 0) { 1096 + um_free_irq(vp->rx_irq, dev); 1097 + vp->rx_irq = 0; 1098 + } 1099 + if (vp->tx_irq > 0) { 1100 + um_free_irq(vp->tx_irq, dev); 1101 + vp->tx_irq = 0; 1102 + } 1103 + tasklet_kill(&vp->tx_poll); 1104 + if (vp->fds->rx_fd > 0) { 1105 + os_close_file(vp->fds->rx_fd); 1106 + vp->fds->rx_fd = -1; 1107 + } 1108 + if (vp->fds->tx_fd > 0) { 1109 + os_close_file(vp->fds->tx_fd); 1110 + vp->fds->tx_fd = -1; 1111 + } 1112 + if (vp->bpf != NULL) 1113 + kfree(vp->bpf); 1114 + if (vp->fds->remote_addr != NULL) 1115 + kfree(vp->fds->remote_addr); 1116 + if (vp->transport_data != NULL) 1117 + kfree(vp->transport_data); 1118 + if (vp->header_rxbuffer != NULL) 1119 + kfree(vp->header_rxbuffer); 1120 + if (vp->header_txbuffer != NULL) 1121 + kfree(vp->header_txbuffer); 1122 + if (vp->rx_queue != NULL) 1123 + destroy_queue(vp->rx_queue); 1124 + if (vp->tx_queue != NULL) 1125 + destroy_queue(vp->tx_queue); 1126 + kfree(vp->fds); 1127 + vp->fds = NULL; 1128 + spin_lock_irqsave(&vp->lock, flags); 1129 + vp->opened = false; 1130 + spin_unlock_irqrestore(&vp->lock, flags); 1131 + return 0; 1132 + } 1133 + 1134 + /* TX tasklet */ 1135 + 1136 + static void vector_tx_poll(unsigned long data) 1137 + { 1138 + struct vector_private *vp = (struct vector_private *)data; 1139 + 1140 + vp->estats.tx_kicks++; 1141 + vector_send(vp->tx_queue); 1142 + } 1143 + static void vector_reset_tx(struct work_struct *work) 1144 + { 1145 + struct vector_private *vp = 1146 + container_of(work, struct vector_private, reset_tx); 1147 + netdev_reset_queue(vp->dev); 1148 + netif_start_queue(vp->dev); 1149 + netif_wake_queue(vp->dev); 1150 + } 1151 + static int vector_net_open(struct net_device *dev) 1152 + { 1153 + struct vector_private *vp = netdev_priv(dev); 1154 + unsigned long flags; 1155 + int err = -EINVAL; 1156 + struct vector_device *vdevice; 1157 + 1158 + spin_lock_irqsave(&vp->lock, flags); 1159 + if (vp->opened) 1160 + return -ENXIO; 1161 + vp->opened = true; 1162 + spin_unlock_irqrestore(&vp->lock, flags); 1163 + 1164 + vp->fds = uml_vector_user_open(vp->unit, vp->parsed); 1165 + 1166 + if (vp->fds == NULL) 1167 + goto out_close; 1168 + 1169 + if (build_transport_data(vp) < 0) 1170 + goto out_close; 1171 + 1172 + if ((vp->options & VECTOR_RX) > 0) { 1173 + vp->rx_queue = create_queue( 1174 + vp, 1175 + get_depth(vp->parsed), 1176 + vp->rx_header_size, 1177 + MAX_IOV_SIZE 1178 + ); 1179 + vp->rx_queue->queue_depth = get_depth(vp->parsed); 1180 + } else { 1181 + vp->header_rxbuffer = kmalloc( 1182 + vp->rx_header_size, 1183 + GFP_KERNEL 1184 + ); 1185 + if (vp->header_rxbuffer == NULL) 1186 + goto out_close; 1187 + } 1188 + if ((vp->options & VECTOR_TX) > 0) { 1189 + vp->tx_queue = create_queue( 1190 + vp, 1191 + get_depth(vp->parsed), 1192 + vp->header_size, 1193 + MAX_IOV_SIZE 1194 + ); 1195 + } else { 1196 + vp->header_txbuffer = kmalloc(vp->header_size, GFP_KERNEL); 1197 + if (vp->header_txbuffer == NULL) 1198 + goto out_close; 1199 + } 1200 + 1201 + /* READ IRQ */ 1202 + err = um_request_irq( 1203 + irq_rr + VECTOR_BASE_IRQ, vp->fds->rx_fd, 1204 + IRQ_READ, vector_rx_interrupt, 1205 + IRQF_SHARED, dev->name, dev); 1206 + if (err != 0) { 1207 + netdev_err(dev, "vector_open: failed to get rx irq(%d)\n", err); 1208 + err = -ENETUNREACH; 1209 + goto out_close; 1210 + } 1211 + vp->rx_irq = irq_rr + VECTOR_BASE_IRQ; 1212 + dev->irq = irq_rr + VECTOR_BASE_IRQ; 1213 + irq_rr = (irq_rr + 1) % VECTOR_IRQ_SPACE; 1214 + 1215 + /* WRITE IRQ - we need it only if we have vector TX */ 1216 + if ((vp->options & VECTOR_TX) > 0) { 1217 + err = um_request_irq( 1218 + irq_rr + VECTOR_BASE_IRQ, vp->fds->tx_fd, 1219 + IRQ_WRITE, vector_tx_interrupt, 1220 + IRQF_SHARED, dev->name, dev); 1221 + if (err != 0) { 1222 + netdev_err(dev, 1223 + "vector_open: failed to get tx irq(%d)\n", err); 1224 + err = -ENETUNREACH; 1225 + goto out_close; 1226 + } 1227 + vp->tx_irq = irq_rr + VECTOR_BASE_IRQ; 1228 + irq_rr = (irq_rr + 1) % VECTOR_IRQ_SPACE; 1229 + } 1230 + 1231 + if ((vp->options & VECTOR_BPF) != 0) 1232 + vp->bpf = uml_vector_default_bpf(vp->fds->rx_fd, dev->dev_addr); 1233 + 1234 + /* Write Timeout Timer */ 1235 + 1236 + vp->tl.data = (unsigned long) vp; 1237 + netif_start_queue(dev); 1238 + 1239 + /* clear buffer - it can happen that the host side of the interface 1240 + * is full when we get here. In this case, new data is never queued, 1241 + * SIGIOs never arrive, and the net never works. 1242 + */ 1243 + 1244 + vector_rx(vp); 1245 + 1246 + vector_reset_stats(vp); 1247 + vdevice = find_device(vp->unit); 1248 + vdevice->opened = 1; 1249 + 1250 + if ((vp->options & VECTOR_TX) != 0) 1251 + add_timer(&vp->tl); 1252 + return 0; 1253 + out_close: 1254 + vector_net_close(dev); 1255 + return err; 1256 + } 1257 + 1258 + 1259 + static void vector_net_set_multicast_list(struct net_device *dev) 1260 + { 1261 + /* TODO: - we can do some BPF games here */ 1262 + return; 1263 + } 1264 + 1265 + static void vector_net_tx_timeout(struct net_device *dev) 1266 + { 1267 + struct vector_private *vp = netdev_priv(dev); 1268 + 1269 + vp->estats.tx_timeout_count++; 1270 + netif_trans_update(dev); 1271 + schedule_work(&vp->reset_tx); 1272 + } 1273 + 1274 + static netdev_features_t vector_fix_features(struct net_device *dev, 1275 + netdev_features_t features) 1276 + { 1277 + features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM); 1278 + return features; 1279 + } 1280 + 1281 + static int vector_set_features(struct net_device *dev, 1282 + netdev_features_t features) 1283 + { 1284 + struct vector_private *vp = netdev_priv(dev); 1285 + /* Adjust buffer sizes for GSO/GRO. Unfortunately, there is 1286 + * no way to negotiate it on raw sockets, so we can change 1287 + * only our side. 1288 + */ 1289 + if (features & NETIF_F_GRO) 1290 + /* All new frame buffers will be GRO-sized */ 1291 + vp->req_size = 65536; 1292 + else 1293 + /* All new frame buffers will be normal sized */ 1294 + vp->req_size = vp->max_packet + vp->headroom + SAFETY_MARGIN; 1295 + return 0; 1296 + } 1297 + 1298 + #ifdef CONFIG_NET_POLL_CONTROLLER 1299 + static void vector_net_poll_controller(struct net_device *dev) 1300 + { 1301 + disable_irq(dev->irq); 1302 + vector_rx_interrupt(dev->irq, dev); 1303 + enable_irq(dev->irq); 1304 + } 1305 + #endif 1306 + 1307 + static void vector_net_get_drvinfo(struct net_device *dev, 1308 + struct ethtool_drvinfo *info) 1309 + { 1310 + strlcpy(info->driver, DRIVER_NAME, sizeof(info->driver)); 1311 + strlcpy(info->version, DRIVER_VERSION, sizeof(info->version)); 1312 + } 1313 + 1314 + static void vector_get_ringparam(struct net_device *netdev, 1315 + struct ethtool_ringparam *ring) 1316 + { 1317 + struct vector_private *vp = netdev_priv(netdev); 1318 + 1319 + ring->rx_max_pending = vp->rx_queue->max_depth; 1320 + ring->tx_max_pending = vp->tx_queue->max_depth; 1321 + ring->rx_pending = vp->rx_queue->max_depth; 1322 + ring->tx_pending = vp->tx_queue->max_depth; 1323 + } 1324 + 1325 + static void vector_get_strings(struct net_device *dev, u32 stringset, u8 *buf) 1326 + { 1327 + switch (stringset) { 1328 + case ETH_SS_TEST: 1329 + *buf = '\0'; 1330 + break; 1331 + case ETH_SS_STATS: 1332 + memcpy(buf, &ethtool_stats_keys, sizeof(ethtool_stats_keys)); 1333 + break; 1334 + default: 1335 + WARN_ON(1); 1336 + break; 1337 + } 1338 + } 1339 + 1340 + static int vector_get_sset_count(struct net_device *dev, int sset) 1341 + { 1342 + switch (sset) { 1343 + case ETH_SS_TEST: 1344 + return 0; 1345 + case ETH_SS_STATS: 1346 + return VECTOR_NUM_STATS; 1347 + default: 1348 + return -EOPNOTSUPP; 1349 + } 1350 + } 1351 + 1352 + static void vector_get_ethtool_stats(struct net_device *dev, 1353 + struct ethtool_stats *estats, 1354 + u64 *tmp_stats) 1355 + { 1356 + struct vector_private *vp = netdev_priv(dev); 1357 + 1358 + memcpy(tmp_stats, &vp->estats, sizeof(struct vector_estats)); 1359 + } 1360 + 1361 + static int vector_get_coalesce(struct net_device *netdev, 1362 + struct ethtool_coalesce *ec) 1363 + { 1364 + struct vector_private *vp = netdev_priv(netdev); 1365 + 1366 + ec->tx_coalesce_usecs = (vp->coalesce * 1000000) / HZ; 1367 + return 0; 1368 + } 1369 + 1370 + static int vector_set_coalesce(struct net_device *netdev, 1371 + struct ethtool_coalesce *ec) 1372 + { 1373 + struct vector_private *vp = netdev_priv(netdev); 1374 + 1375 + vp->coalesce = (ec->tx_coalesce_usecs * HZ) / 1000000; 1376 + if (vp->coalesce == 0) 1377 + vp->coalesce = 1; 1378 + return 0; 1379 + } 1380 + 1381 + static const struct ethtool_ops vector_net_ethtool_ops = { 1382 + .get_drvinfo = vector_net_get_drvinfo, 1383 + .get_link = ethtool_op_get_link, 1384 + .get_ts_info = ethtool_op_get_ts_info, 1385 + .get_ringparam = vector_get_ringparam, 1386 + .get_strings = vector_get_strings, 1387 + .get_sset_count = vector_get_sset_count, 1388 + .get_ethtool_stats = vector_get_ethtool_stats, 1389 + .get_coalesce = vector_get_coalesce, 1390 + .set_coalesce = vector_set_coalesce, 1391 + }; 1392 + 1393 + 1394 + static const struct net_device_ops vector_netdev_ops = { 1395 + .ndo_open = vector_net_open, 1396 + .ndo_stop = vector_net_close, 1397 + .ndo_start_xmit = vector_net_start_xmit, 1398 + .ndo_set_rx_mode = vector_net_set_multicast_list, 1399 + .ndo_tx_timeout = vector_net_tx_timeout, 1400 + .ndo_set_mac_address = eth_mac_addr, 1401 + .ndo_validate_addr = eth_validate_addr, 1402 + .ndo_fix_features = vector_fix_features, 1403 + .ndo_set_features = vector_set_features, 1404 + #ifdef CONFIG_NET_POLL_CONTROLLER 1405 + .ndo_poll_controller = vector_net_poll_controller, 1406 + #endif 1407 + }; 1408 + 1409 + 1410 + static void vector_timer_expire(unsigned long _conn) 1411 + { 1412 + struct vector_private *vp = (struct vector_private *)_conn; 1413 + 1414 + vp->estats.tx_kicks++; 1415 + vector_send(vp->tx_queue); 1416 + } 1417 + 1418 + static void vector_eth_configure( 1419 + int n, 1420 + struct arglist *def 1421 + ) 1422 + { 1423 + struct vector_device *device; 1424 + struct net_device *dev; 1425 + struct vector_private *vp; 1426 + int err; 1427 + 1428 + device = kzalloc(sizeof(*device), GFP_KERNEL); 1429 + if (device == NULL) { 1430 + printk(KERN_ERR "eth_configure failed to allocate struct " 1431 + "vector_device\n"); 1432 + return; 1433 + } 1434 + dev = alloc_etherdev(sizeof(struct vector_private)); 1435 + if (dev == NULL) { 1436 + printk(KERN_ERR "eth_configure: failed to allocate struct " 1437 + "net_device for vec%d\n", n); 1438 + goto out_free_device; 1439 + } 1440 + 1441 + dev->mtu = get_mtu(def); 1442 + 1443 + INIT_LIST_HEAD(&device->list); 1444 + device->unit = n; 1445 + 1446 + /* If this name ends up conflicting with an existing registered 1447 + * netdevice, that is OK, register_netdev{,ice}() will notice this 1448 + * and fail. 1449 + */ 1450 + snprintf(dev->name, sizeof(dev->name), "vec%d", n); 1451 + uml_net_setup_etheraddr(dev, uml_vector_fetch_arg(def, "mac")); 1452 + vp = netdev_priv(dev); 1453 + 1454 + /* sysfs register */ 1455 + if (!driver_registered) { 1456 + platform_driver_register(&uml_net_driver); 1457 + driver_registered = 1; 1458 + } 1459 + device->pdev.id = n; 1460 + device->pdev.name = DRIVER_NAME; 1461 + device->pdev.dev.release = vector_device_release; 1462 + dev_set_drvdata(&device->pdev.dev, device); 1463 + if (platform_device_register(&device->pdev)) 1464 + goto out_free_netdev; 1465 + SET_NETDEV_DEV(dev, &device->pdev.dev); 1466 + 1467 + device->dev = dev; 1468 + 1469 + *vp = ((struct vector_private) 1470 + { 1471 + .list = LIST_HEAD_INIT(vp->list), 1472 + .dev = dev, 1473 + .unit = n, 1474 + .options = get_transport_options(def), 1475 + .rx_irq = 0, 1476 + .tx_irq = 0, 1477 + .parsed = def, 1478 + .max_packet = get_mtu(def) + ETH_HEADER_OTHER, 1479 + /* TODO - we need to calculate headroom so that ip header 1480 + * is 16 byte aligned all the time 1481 + */ 1482 + .headroom = get_headroom(def), 1483 + .form_header = NULL, 1484 + .verify_header = NULL, 1485 + .header_rxbuffer = NULL, 1486 + .header_txbuffer = NULL, 1487 + .header_size = 0, 1488 + .rx_header_size = 0, 1489 + .rexmit_scheduled = false, 1490 + .opened = false, 1491 + .transport_data = NULL, 1492 + .in_write_poll = false, 1493 + .coalesce = 2, 1494 + .req_size = get_req_size(def) 1495 + }); 1496 + 1497 + dev->features = dev->hw_features = (NETIF_F_SG | NETIF_F_FRAGLIST); 1498 + tasklet_init(&vp->tx_poll, vector_tx_poll, (unsigned long)vp); 1499 + INIT_WORK(&vp->reset_tx, vector_reset_tx); 1500 + 1501 + init_timer(&vp->tl); 1502 + spin_lock_init(&vp->lock); 1503 + vp->tl.function = vector_timer_expire; 1504 + 1505 + /* FIXME */ 1506 + dev->netdev_ops = &vector_netdev_ops; 1507 + dev->ethtool_ops = &vector_net_ethtool_ops; 1508 + dev->watchdog_timeo = (HZ >> 1); 1509 + /* primary IRQ - fixme */ 1510 + dev->irq = 0; /* we will adjust this once opened */ 1511 + 1512 + rtnl_lock(); 1513 + err = register_netdevice(dev); 1514 + rtnl_unlock(); 1515 + if (err) 1516 + goto out_undo_user_init; 1517 + 1518 + spin_lock(&vector_devices_lock); 1519 + list_add(&device->list, &vector_devices); 1520 + spin_unlock(&vector_devices_lock); 1521 + 1522 + return; 1523 + 1524 + out_undo_user_init: 1525 + return; 1526 + out_free_netdev: 1527 + free_netdev(dev); 1528 + out_free_device: 1529 + kfree(device); 1530 + } 1531 + 1532 + 1533 + 1534 + 1535 + /* 1536 + * Invoked late in the init 1537 + */ 1538 + 1539 + static int __init vector_init(void) 1540 + { 1541 + struct list_head *ele; 1542 + struct vector_cmd_line_arg *def; 1543 + struct arglist *parsed; 1544 + 1545 + list_for_each(ele, &vec_cmd_line) { 1546 + def = list_entry(ele, struct vector_cmd_line_arg, list); 1547 + parsed = uml_parse_vector_ifspec(def->arguments); 1548 + if (parsed != NULL) 1549 + vector_eth_configure(def->unit, parsed); 1550 + } 1551 + return 0; 1552 + } 1553 + 1554 + 1555 + /* Invoked at initial argument parsing, only stores 1556 + * arguments until a proper vector_init is called 1557 + * later 1558 + */ 1559 + 1560 + static int __init vector_setup(char *str) 1561 + { 1562 + char *error; 1563 + int n, err; 1564 + struct vector_cmd_line_arg *new; 1565 + 1566 + err = vector_parse(str, &n, &str, &error); 1567 + if (err) { 1568 + printk(KERN_ERR "vector_setup - Couldn't parse '%s' : %s\n", 1569 + str, error); 1570 + return 1; 1571 + } 1572 + new = alloc_bootmem(sizeof(*new)); 1573 + INIT_LIST_HEAD(&new->list); 1574 + new->unit = n; 1575 + new->arguments = str; 1576 + list_add_tail(&new->list, &vec_cmd_line); 1577 + return 1; 1578 + } 1579 + 1580 + __setup("vec", vector_setup); 1581 + __uml_help(vector_setup, 1582 + "vec[0-9]+:<option>=<value>,<option>=<value>\n" 1583 + " Configure a vector io network device.\n\n" 1584 + ); 1585 + 1586 + late_initcall(vector_init); 1587 + 1588 + static struct mc_device vector_mc = { 1589 + .list = LIST_HEAD_INIT(vector_mc.list), 1590 + .name = "vec", 1591 + .config = vector_config, 1592 + .get_config = NULL, 1593 + .id = vector_id, 1594 + .remove = vector_remove, 1595 + }; 1596 + 1597 + #ifdef CONFIG_INET 1598 + static int vector_inetaddr_event( 1599 + struct notifier_block *this, 1600 + unsigned long event, 1601 + void *ptr) 1602 + { 1603 + return NOTIFY_DONE; 1604 + } 1605 + 1606 + static struct notifier_block vector_inetaddr_notifier = { 1607 + .notifier_call = vector_inetaddr_event, 1608 + }; 1609 + 1610 + static void inet_register(void) 1611 + { 1612 + register_inetaddr_notifier(&vector_inetaddr_notifier); 1613 + } 1614 + #else 1615 + static inline void inet_register(void) 1616 + { 1617 + } 1618 + #endif 1619 + 1620 + static int vector_net_init(void) 1621 + { 1622 + mconsole_register_dev(&vector_mc); 1623 + inet_register(); 1624 + return 0; 1625 + } 1626 + 1627 + __initcall(vector_net_init); 1628 + 1629 + 1630 +

+129

arch/um/drivers/vector_kern.h

··· 1 + /* 2 + * Copyright (C) 2002 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) 3 + * Licensed under the GPL 4 + */ 5 + 6 + #ifndef __UM_VECTOR_KERN_H 7 + #define __UM_VECTOR_KERN_H 8 + 9 + #include <linux/netdevice.h> 10 + #include <linux/platform_device.h> 11 + #include <linux/skbuff.h> 12 + #include <linux/socket.h> 13 + #include <linux/list.h> 14 + #include <linux/ctype.h> 15 + #include <linux/workqueue.h> 16 + #include <linux/interrupt.h> 17 + #include "vector_user.h" 18 + 19 + /* Queue structure specially adapted for multiple enqueue/dequeue 20 + * in a mmsgrecv/mmsgsend context 21 + */ 22 + 23 + /* Dequeue method */ 24 + 25 + #define QUEUE_SENDMSG 0 26 + #define QUEUE_SENDMMSG 1 27 + 28 + #define VECTOR_RX 1 29 + #define VECTOR_TX (1 << 1) 30 + #define VECTOR_BPF (1 << 2) 31 + 32 + #define ETH_MAX_PACKET 1500 33 + #define ETH_HEADER_OTHER 32 /* just in case someone decides to go mad on QnQ */ 34 + 35 + struct vector_queue { 36 + struct mmsghdr *mmsg_vector; 37 + void **skbuff_vector; 38 + /* backlink to device which owns us */ 39 + struct net_device *dev; 40 + spinlock_t head_lock; 41 + spinlock_t tail_lock; 42 + int queue_depth, head, tail, max_depth, max_iov_frags; 43 + short options; 44 + }; 45 + 46 + struct vector_estats { 47 + uint64_t rx_queue_max; 48 + uint64_t rx_queue_running_average; 49 + uint64_t tx_queue_max; 50 + uint64_t tx_queue_running_average; 51 + uint64_t rx_encaps_errors; 52 + uint64_t tx_timeout_count; 53 + uint64_t tx_restart_queue; 54 + uint64_t tx_kicks; 55 + uint64_t tx_flow_control_xon; 56 + uint64_t tx_flow_control_xoff; 57 + uint64_t rx_csum_offload_good; 58 + uint64_t rx_csum_offload_errors; 59 + uint64_t sg_ok; 60 + uint64_t sg_linearized; 61 + }; 62 + 63 + #define VERIFY_HEADER_NOK -1 64 + #define VERIFY_HEADER_OK 0 65 + #define VERIFY_CSUM_OK 1 66 + 67 + struct vector_private { 68 + struct list_head list; 69 + spinlock_t lock; 70 + struct net_device *dev; 71 + 72 + int unit; 73 + 74 + /* Timeout timer in TX */ 75 + 76 + struct timer_list tl; 77 + 78 + /* Scheduled "remove device" work */ 79 + struct work_struct reset_tx; 80 + struct vector_fds *fds; 81 + 82 + struct vector_queue *rx_queue; 83 + struct vector_queue *tx_queue; 84 + 85 + int rx_irq; 86 + int tx_irq; 87 + 88 + struct arglist *parsed; 89 + 90 + void *transport_data; /* transport specific params if needed */ 91 + 92 + int max_packet; 93 + int req_size; /* different from max packet - used for TSO */ 94 + int headroom; 95 + 96 + int options; 97 + 98 + /* remote address if any - some transports will leave this as null */ 99 + 100 + int header_size; 101 + int rx_header_size; 102 + int coalesce; 103 + 104 + void *header_rxbuffer; 105 + void *header_txbuffer; 106 + 107 + int (*form_header)(uint8_t *header, 108 + struct sk_buff *skb, struct vector_private *vp); 109 + int (*verify_header)(uint8_t *header, 110 + struct sk_buff *skb, struct vector_private *vp); 111 + 112 + spinlock_t stats_lock; 113 + 114 + struct tasklet_struct tx_poll; 115 + bool rexmit_scheduled; 116 + bool opened; 117 + bool in_write_poll; 118 + 119 + /* ethtool stats */ 120 + 121 + struct vector_estats estats; 122 + void *bpf; 123 + 124 + char user[0]; 125 + }; 126 + 127 + extern int build_transport_data(struct vector_private *vp); 128 + 129 + #endif

+458

arch/um/drivers/vector_transports.c

··· 1 + /* 2 + * Copyright (C) 2017 - Cambridge Greys Limited 3 + * Copyright (C) 2011 - 2014 Cisco Systems Inc 4 + * Licensed under the GPL. 5 + */ 6 + 7 + #include <linux/etherdevice.h> 8 + #include <linux/netdevice.h> 9 + #include <linux/skbuff.h> 10 + #include <linux/slab.h> 11 + #include <asm/byteorder.h> 12 + #include <uapi/linux/ip.h> 13 + #include <uapi/linux/virtio_net.h> 14 + #include <linux/virtio_net.h> 15 + #include <linux/virtio_byteorder.h> 16 + #include <linux/netdev_features.h> 17 + #include "vector_user.h" 18 + #include "vector_kern.h" 19 + 20 + #define GOOD_LINEAR 512 21 + #define GSO_ERROR "Incoming GSO frames and GRO disabled on the interface" 22 + 23 + struct gre_minimal_header { 24 + uint16_t header; 25 + uint16_t arptype; 26 + }; 27 + 28 + 29 + struct uml_gre_data { 30 + uint32_t rx_key; 31 + uint32_t tx_key; 32 + uint32_t sequence; 33 + 34 + bool ipv6; 35 + bool has_sequence; 36 + bool pin_sequence; 37 + bool checksum; 38 + bool key; 39 + struct gre_minimal_header expected_header; 40 + 41 + uint32_t checksum_offset; 42 + uint32_t key_offset; 43 + uint32_t sequence_offset; 44 + 45 + }; 46 + 47 + struct uml_l2tpv3_data { 48 + uint64_t rx_cookie; 49 + uint64_t tx_cookie; 50 + uint64_t rx_session; 51 + uint64_t tx_session; 52 + uint32_t counter; 53 + 54 + bool udp; 55 + bool ipv6; 56 + bool has_counter; 57 + bool pin_counter; 58 + bool cookie; 59 + bool cookie_is_64; 60 + 61 + uint32_t cookie_offset; 62 + uint32_t session_offset; 63 + uint32_t counter_offset; 64 + }; 65 + 66 + static int l2tpv3_form_header(uint8_t *header, 67 + struct sk_buff *skb, struct vector_private *vp) 68 + { 69 + struct uml_l2tpv3_data *td = vp->transport_data; 70 + uint32_t *counter; 71 + 72 + if (td->udp) 73 + *(uint32_t *) header = cpu_to_be32(L2TPV3_DATA_PACKET); 74 + (*(uint32_t *) (header + td->session_offset)) = td->tx_session; 75 + 76 + if (td->cookie) { 77 + if (td->cookie_is_64) 78 + (*(uint64_t *)(header + td->cookie_offset)) = 79 + td->tx_cookie; 80 + else 81 + (*(uint32_t *)(header + td->cookie_offset)) = 82 + td->tx_cookie; 83 + } 84 + if (td->has_counter) { 85 + counter = (uint32_t *)(header + td->counter_offset); 86 + if (td->pin_counter) { 87 + *counter = 0; 88 + } else { 89 + td->counter++; 90 + *counter = cpu_to_be32(td->counter); 91 + } 92 + } 93 + return 0; 94 + } 95 + 96 + static int gre_form_header(uint8_t *header, 97 + struct sk_buff *skb, struct vector_private *vp) 98 + { 99 + struct uml_gre_data *td = vp->transport_data; 100 + uint32_t *sequence; 101 + *((uint32_t *) header) = *((uint32_t *) &td->expected_header); 102 + if (td->key) 103 + (*(uint32_t *) (header + td->key_offset)) = td->tx_key; 104 + if (td->has_sequence) { 105 + sequence = (uint32_t *)(header + td->sequence_offset); 106 + if (td->pin_sequence) 107 + *sequence = 0; 108 + else 109 + *sequence = cpu_to_be32(++td->sequence); 110 + } 111 + return 0; 112 + } 113 + 114 + static int raw_form_header(uint8_t *header, 115 + struct sk_buff *skb, struct vector_private *vp) 116 + { 117 + struct virtio_net_hdr *vheader = (struct virtio_net_hdr *) header; 118 + 119 + virtio_net_hdr_from_skb( 120 + skb, 121 + vheader, 122 + virtio_legacy_is_little_endian(), 123 + false 124 + ); 125 + 126 + return 0; 127 + } 128 + 129 + static int l2tpv3_verify_header( 130 + uint8_t *header, struct sk_buff *skb, struct vector_private *vp) 131 + { 132 + struct uml_l2tpv3_data *td = vp->transport_data; 133 + uint32_t *session; 134 + uint64_t cookie; 135 + 136 + if ((!td->udp) && (!td->ipv6)) 137 + header += sizeof(struct iphdr) /* fix for ipv4 raw */; 138 + 139 + /* we do not do a strict check for "data" packets as per 140 + * the RFC spec because the pure IP spec does not have 141 + * that anyway. 142 + */ 143 + 144 + if (td->cookie) { 145 + if (td->cookie_is_64) 146 + cookie = *(uint64_t *)(header + td->cookie_offset); 147 + else 148 + cookie = *(uint32_t *)(header + td->cookie_offset); 149 + if (cookie != td->rx_cookie) { 150 + if (net_ratelimit()) 151 + netdev_err(vp->dev, "uml_l2tpv3: unknown cookie id"); 152 + return -1; 153 + } 154 + } 155 + session = (uint32_t *) (header + td->session_offset); 156 + if (*session != td->rx_session) { 157 + if (net_ratelimit()) 158 + netdev_err(vp->dev, "uml_l2tpv3: session mismatch"); 159 + return -1; 160 + } 161 + return 0; 162 + } 163 + 164 + static int gre_verify_header( 165 + uint8_t *header, struct sk_buff *skb, struct vector_private *vp) 166 + { 167 + 168 + uint32_t key; 169 + struct uml_gre_data *td = vp->transport_data; 170 + 171 + if (!td->ipv6) 172 + header += sizeof(struct iphdr) /* fix for ipv4 raw */; 173 + 174 + if (*((uint32_t *) header) != *((uint32_t *) &td->expected_header)) { 175 + if (net_ratelimit()) 176 + netdev_err(vp->dev, "header type disagreement, expecting %0x, got %0x", 177 + *((uint32_t *) &td->expected_header), 178 + *((uint32_t *) header) 179 + ); 180 + return -1; 181 + } 182 + 183 + if (td->key) { 184 + key = (*(uint32_t *)(header + td->key_offset)); 185 + if (key != td->rx_key) { 186 + if (net_ratelimit()) 187 + netdev_err(vp->dev, "unknown key id %0x, expecting %0x", 188 + key, td->rx_key); 189 + return -1; 190 + } 191 + } 192 + return 0; 193 + } 194 + 195 + static int raw_verify_header( 196 + uint8_t *header, struct sk_buff *skb, struct vector_private *vp) 197 + { 198 + struct virtio_net_hdr *vheader = (struct virtio_net_hdr *) header; 199 + 200 + if ((vheader->gso_type != VIRTIO_NET_HDR_GSO_NONE) && 201 + (vp->req_size != 65536)) { 202 + if (net_ratelimit()) 203 + netdev_err( 204 + vp->dev, 205 + GSO_ERROR 206 + ); 207 + } 208 + if ((vheader->flags & VIRTIO_NET_HDR_F_DATA_VALID) > 0) 209 + return 1; 210 + 211 + virtio_net_hdr_to_skb(skb, vheader, virtio_legacy_is_little_endian()); 212 + return 0; 213 + } 214 + 215 + static bool get_uint_param( 216 + struct arglist *def, char *param, unsigned int *result) 217 + { 218 + char *arg = uml_vector_fetch_arg(def, param); 219 + 220 + if (arg != NULL) { 221 + if (kstrtoint(arg, 0, result) == 0) 222 + return true; 223 + } 224 + return false; 225 + } 226 + 227 + static bool get_ulong_param( 228 + struct arglist *def, char *param, unsigned long *result) 229 + { 230 + char *arg = uml_vector_fetch_arg(def, param); 231 + 232 + if (arg != NULL) { 233 + if (kstrtoul(arg, 0, result) == 0) 234 + return true; 235 + return true; 236 + } 237 + return false; 238 + } 239 + 240 + static int build_gre_transport_data(struct vector_private *vp) 241 + { 242 + struct uml_gre_data *td; 243 + int temp_int; 244 + int temp_rx; 245 + int temp_tx; 246 + 247 + vp->transport_data = kmalloc(sizeof(struct uml_gre_data), GFP_KERNEL); 248 + if (vp->transport_data == NULL) 249 + return -ENOMEM; 250 + td = vp->transport_data; 251 + td->sequence = 0; 252 + 253 + td->expected_header.arptype = GRE_IRB; 254 + td->expected_header.header = 0; 255 + 256 + vp->form_header = &gre_form_header; 257 + vp->verify_header = &gre_verify_header; 258 + vp->header_size = 4; 259 + td->key_offset = 4; 260 + td->sequence_offset = 4; 261 + td->checksum_offset = 4; 262 + 263 + td->ipv6 = false; 264 + if (get_uint_param(vp->parsed, "v6", &temp_int)) { 265 + if (temp_int > 0) 266 + td->ipv6 = true; 267 + } 268 + td->key = false; 269 + if (get_uint_param(vp->parsed, "rx_key", &temp_rx)) { 270 + if (get_uint_param(vp->parsed, "tx_key", &temp_tx)) { 271 + td->key = true; 272 + td->expected_header.header |= GRE_MODE_KEY; 273 + td->rx_key = cpu_to_be32(temp_rx); 274 + td->tx_key = cpu_to_be32(temp_tx); 275 + vp->header_size += 4; 276 + td->sequence_offset += 4; 277 + } else { 278 + return -EINVAL; 279 + } 280 + } 281 + 282 + td->sequence = false; 283 + if (get_uint_param(vp->parsed, "sequence", &temp_int)) { 284 + if (temp_int > 0) { 285 + vp->header_size += 4; 286 + td->has_sequence = true; 287 + td->expected_header.header |= GRE_MODE_SEQUENCE; 288 + if (get_uint_param( 289 + vp->parsed, "pin_sequence", &temp_int)) { 290 + if (temp_int > 0) 291 + td->pin_sequence = true; 292 + } 293 + } 294 + } 295 + vp->rx_header_size = vp->header_size; 296 + if (!td->ipv6) 297 + vp->rx_header_size += sizeof(struct iphdr); 298 + return 0; 299 + } 300 + 301 + static int build_l2tpv3_transport_data(struct vector_private *vp) 302 + { 303 + 304 + struct uml_l2tpv3_data *td; 305 + int temp_int, temp_rxs, temp_txs; 306 + unsigned long temp_rx; 307 + unsigned long temp_tx; 308 + 309 + vp->transport_data = kmalloc( 310 + sizeof(struct uml_l2tpv3_data), GFP_KERNEL); 311 + 312 + if (vp->transport_data == NULL) 313 + return -ENOMEM; 314 + 315 + td = vp->transport_data; 316 + 317 + vp->form_header = &l2tpv3_form_header; 318 + vp->verify_header = &l2tpv3_verify_header; 319 + td->counter = 0; 320 + 321 + vp->header_size = 4; 322 + td->session_offset = 0; 323 + td->cookie_offset = 4; 324 + td->counter_offset = 4; 325 + 326 + 327 + td->ipv6 = false; 328 + if (get_uint_param(vp->parsed, "v6", &temp_int)) { 329 + if (temp_int > 0) 330 + td->ipv6 = true; 331 + } 332 + 333 + if (get_uint_param(vp->parsed, "rx_session", &temp_rxs)) { 334 + if (get_uint_param(vp->parsed, "tx_session", &temp_txs)) { 335 + td->tx_session = cpu_to_be32(temp_txs); 336 + td->rx_session = cpu_to_be32(temp_rxs); 337 + } else { 338 + return -EINVAL; 339 + } 340 + } else { 341 + return -EINVAL; 342 + } 343 + 344 + td->cookie_is_64 = false; 345 + if (get_uint_param(vp->parsed, "cookie64", &temp_int)) { 346 + if (temp_int > 0) 347 + td->cookie_is_64 = true; 348 + } 349 + td->cookie = false; 350 + if (get_ulong_param(vp->parsed, "rx_cookie", &temp_rx)) { 351 + if (get_ulong_param(vp->parsed, "tx_cookie", &temp_tx)) { 352 + td->cookie = true; 353 + if (td->cookie_is_64) { 354 + td->rx_cookie = cpu_to_be64(temp_rx); 355 + td->tx_cookie = cpu_to_be64(temp_tx); 356 + vp->header_size += 8; 357 + td->counter_offset += 8; 358 + } else { 359 + td->rx_cookie = cpu_to_be32(temp_rx); 360 + td->tx_cookie = cpu_to_be32(temp_tx); 361 + vp->header_size += 4; 362 + td->counter_offset += 4; 363 + } 364 + } else { 365 + return -EINVAL; 366 + } 367 + } 368 + 369 + td->has_counter = false; 370 + if (get_uint_param(vp->parsed, "counter", &temp_int)) { 371 + if (temp_int > 0) { 372 + td->has_counter = true; 373 + vp->header_size += 4; 374 + if (get_uint_param( 375 + vp->parsed, "pin_counter", &temp_int)) { 376 + if (temp_int > 0) 377 + td->pin_counter = true; 378 + } 379 + } 380 + } 381 + 382 + if (get_uint_param(vp->parsed, "udp", &temp_int)) { 383 + if (temp_int > 0) { 384 + td->udp = true; 385 + vp->header_size += 4; 386 + td->counter_offset += 4; 387 + td->session_offset += 4; 388 + td->cookie_offset += 4; 389 + } 390 + } 391 + 392 + vp->rx_header_size = vp->header_size; 393 + if ((!td->ipv6) && (!td->udp)) 394 + vp->rx_header_size += sizeof(struct iphdr); 395 + 396 + return 0; 397 + } 398 + 399 + static int build_raw_transport_data(struct vector_private *vp) 400 + { 401 + if (uml_raw_enable_vnet_headers(vp->fds->rx_fd)) { 402 + if (!uml_raw_enable_vnet_headers(vp->fds->tx_fd)) 403 + return -1; 404 + vp->form_header = &raw_form_header; 405 + vp->verify_header = &raw_verify_header; 406 + vp->header_size = sizeof(struct virtio_net_hdr); 407 + vp->rx_header_size = sizeof(struct virtio_net_hdr); 408 + vp->dev->hw_features |= (NETIF_F_TSO | NETIF_F_GRO); 409 + vp->dev->features |= 410 + (NETIF_F_RXCSUM | NETIF_F_HW_CSUM | 411 + NETIF_F_TSO | NETIF_F_GRO); 412 + netdev_info( 413 + vp->dev, 414 + "raw: using vnet headers for tso and tx/rx checksum" 415 + ); 416 + } 417 + return 0; 418 + } 419 + 420 + static int build_tap_transport_data(struct vector_private *vp) 421 + { 422 + if (uml_raw_enable_vnet_headers(vp->fds->rx_fd)) { 423 + vp->form_header = &raw_form_header; 424 + vp->verify_header = &raw_verify_header; 425 + vp->header_size = sizeof(struct virtio_net_hdr); 426 + vp->rx_header_size = sizeof(struct virtio_net_hdr); 427 + vp->dev->hw_features |= 428 + (NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GRO); 429 + vp->dev->features |= 430 + (NETIF_F_RXCSUM | NETIF_F_HW_CSUM | 431 + NETIF_F_TSO | NETIF_F_GSO | NETIF_F_GRO); 432 + netdev_info( 433 + vp->dev, 434 + "tap/raw: using vnet headers for tso and tx/rx checksum" 435 + ); 436 + } else { 437 + return 0; /* do not try to enable tap too if raw failed */ 438 + } 439 + if (uml_tap_enable_vnet_headers(vp->fds->tx_fd)) 440 + return 0; 441 + return -1; 442 + } 443 + 444 + int build_transport_data(struct vector_private *vp) 445 + { 446 + char *transport = uml_vector_fetch_arg(vp->parsed, "transport"); 447 + 448 + if (strncmp(transport, TRANS_GRE, TRANS_GRE_LEN) == 0) 449 + return build_gre_transport_data(vp); 450 + if (strncmp(transport, TRANS_L2TPV3, TRANS_L2TPV3_LEN) == 0) 451 + return build_l2tpv3_transport_data(vp); 452 + if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0) 453 + return build_raw_transport_data(vp); 454 + if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0) 455 + return build_tap_transport_data(vp); 456 + return 0; 457 + } 458 +

+586

arch/um/drivers/vector_user.c

··· 1 + /* 2 + * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) 3 + * Licensed under the GPL 4 + */ 5 + 6 + #include <stdio.h> 7 + #include <unistd.h> 8 + #include <stdarg.h> 9 + #include <errno.h> 10 + #include <stddef.h> 11 + #include <string.h> 12 + #include <sys/ioctl.h> 13 + #include <net/if.h> 14 + #include <linux/if_tun.h> 15 + #include <arpa/inet.h> 16 + #include <sys/types.h> 17 + #include <sys/stat.h> 18 + #include <fcntl.h> 19 + #include <sys/types.h> 20 + #include <sys/socket.h> 21 + #include <net/ethernet.h> 22 + #include <netinet/ip.h> 23 + #include <netinet/ether.h> 24 + #include <linux/if_ether.h> 25 + #include <linux/if_packet.h> 26 + #include <sys/socket.h> 27 + #include <sys/wait.h> 28 + #include <linux/virtio_net.h> 29 + #include <netdb.h> 30 + #include <stdlib.h> 31 + #include <os.h> 32 + #include <um_malloc.h> 33 + #include "vector_user.h" 34 + 35 + #define ID_GRE 0 36 + #define ID_L2TPV3 1 37 + #define ID_MAX 1 38 + 39 + #define TOKEN_IFNAME "ifname" 40 + 41 + #define TRANS_RAW "raw" 42 + #define TRANS_RAW_LEN strlen(TRANS_RAW) 43 + 44 + #define QDISC_FAIL "user_init_raw: could not disable qdisc on interface" 45 + #define VNET_HDR_FAIL "could not enable vnet headers on fd %d" 46 + #define TUN_GET_F_FAIL "tapraw: TUNGETFEATURES failed: %s" 47 + #define L2TPV3_BIND_FAIL "l2tpv3_open : could not bind socket err=%i" 48 + #define BPF_ATTACH_FAIL "Failed to attach filter size %d to %d, err %d\n" 49 + 50 + /* This is very ugly and brute force lookup, but it is done 51 + * only once at initialization so not worth doing hashes or 52 + * anything more intelligent 53 + */ 54 + 55 + char *uml_vector_fetch_arg(struct arglist *ifspec, char *token) 56 + { 57 + int i; 58 + 59 + for (i = 0; i < ifspec->numargs; i++) { 60 + if (strcmp(ifspec->tokens[i], token) == 0) 61 + return ifspec->values[i]; 62 + } 63 + return NULL; 64 + 65 + } 66 + 67 + struct arglist *uml_parse_vector_ifspec(char *arg) 68 + { 69 + struct arglist *result; 70 + int pos, len; 71 + bool parsing_token = true, next_starts = true; 72 + 73 + if (arg == NULL) 74 + return NULL; 75 + result = uml_kmalloc(sizeof(struct arglist), UM_GFP_KERNEL); 76 + if (result == NULL) 77 + return NULL; 78 + result->numargs = 0; 79 + len = strlen(arg); 80 + for (pos = 0; pos < len; pos++) { 81 + if (next_starts) { 82 + if (parsing_token) { 83 + result->tokens[result->numargs] = arg + pos; 84 + } else { 85 + result->values[result->numargs] = arg + pos; 86 + result->numargs++; 87 + } 88 + next_starts = false; 89 + } 90 + if (*(arg + pos) == '=') { 91 + if (parsing_token) 92 + parsing_token = false; 93 + else 94 + goto cleanup; 95 + next_starts = true; 96 + (*(arg + pos)) = '\0'; 97 + } 98 + if (*(arg + pos) == ',') { 99 + parsing_token = true; 100 + next_starts = true; 101 + (*(arg + pos)) = '\0'; 102 + } 103 + } 104 + return result; 105 + cleanup: 106 + printk(UM_KERN_ERR "vector_setup - Couldn't parse '%s'\n", arg); 107 + kfree(result); 108 + return NULL; 109 + } 110 + 111 + /* 112 + * Socket/FD configuration functions. These return an structure 113 + * of rx and tx descriptors to cover cases where these are not 114 + * the same (f.e. read via raw socket and write via tap). 115 + */ 116 + 117 + #define PATH_NET_TUN "/dev/net/tun" 118 + 119 + static struct vector_fds *user_init_tap_fds(struct arglist *ifspec) 120 + { 121 + struct ifreq ifr; 122 + int fd = -1; 123 + struct sockaddr_ll sock; 124 + int err = -ENOMEM, offload; 125 + char *iface; 126 + struct vector_fds *result = NULL; 127 + 128 + iface = uml_vector_fetch_arg(ifspec, TOKEN_IFNAME); 129 + if (iface == NULL) { 130 + printk(UM_KERN_ERR "uml_tap: failed to parse interface spec\n"); 131 + goto tap_cleanup; 132 + } 133 + 134 + result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL); 135 + if (result == NULL) { 136 + printk(UM_KERN_ERR "uml_tap: failed to allocate file descriptors\n"); 137 + goto tap_cleanup; 138 + } 139 + result->rx_fd = -1; 140 + result->tx_fd = -1; 141 + result->remote_addr = NULL; 142 + result->remote_addr_size = 0; 143 + 144 + /* TAP */ 145 + 146 + fd = open(PATH_NET_TUN, O_RDWR); 147 + if (fd < 0) { 148 + printk(UM_KERN_ERR "uml_tap: failed to open tun device\n"); 149 + goto tap_cleanup; 150 + } 151 + result->tx_fd = fd; 152 + memset(&ifr, 0, sizeof(ifr)); 153 + ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; 154 + strncpy((char *)&ifr.ifr_name, iface, sizeof(ifr.ifr_name) - 1); 155 + 156 + err = ioctl(fd, TUNSETIFF, (void *) &ifr); 157 + if (err != 0) { 158 + printk(UM_KERN_ERR "uml_tap: failed to select tap interface\n"); 159 + goto tap_cleanup; 160 + } 161 + 162 + offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6; 163 + ioctl(fd, TUNSETOFFLOAD, offload); 164 + 165 + /* RAW */ 166 + 167 + fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); 168 + if (fd == -1) { 169 + printk(UM_KERN_ERR 170 + "uml_tap: failed to create socket: %i\n", -errno); 171 + goto tap_cleanup; 172 + } 173 + result->rx_fd = fd; 174 + memset(&ifr, 0, sizeof(ifr)); 175 + strncpy((char *)&ifr.ifr_name, iface, sizeof(ifr.ifr_name) - 1); 176 + if (ioctl(fd, SIOCGIFINDEX, (void *) &ifr) < 0) { 177 + printk(UM_KERN_ERR 178 + "uml_tap: failed to set interface: %i\n", -errno); 179 + goto tap_cleanup; 180 + } 181 + 182 + sock.sll_family = AF_PACKET; 183 + sock.sll_protocol = htons(ETH_P_ALL); 184 + sock.sll_ifindex = ifr.ifr_ifindex; 185 + 186 + if (bind(fd, 187 + (struct sockaddr *) &sock, sizeof(struct sockaddr_ll)) < 0) { 188 + printk(UM_KERN_ERR 189 + "user_init_tap: failed to bind raw pair, err %d\n", 190 + -errno); 191 + goto tap_cleanup; 192 + } 193 + return result; 194 + tap_cleanup: 195 + printk(UM_KERN_ERR "user_init_tap: init failed, error %d", err); 196 + if (result != NULL) { 197 + if (result->rx_fd >= 0) 198 + os_close_file(result->rx_fd); 199 + if (result->tx_fd >= 0) 200 + os_close_file(result->tx_fd); 201 + kfree(result); 202 + } 203 + return NULL; 204 + } 205 + 206 + 207 + static struct vector_fds *user_init_raw_fds(struct arglist *ifspec) 208 + { 209 + struct ifreq ifr; 210 + int rxfd = -1, txfd = -1; 211 + struct sockaddr_ll sock; 212 + int err = -ENOMEM; 213 + char *iface; 214 + struct vector_fds *result = NULL; 215 + int optval = 1; 216 + 217 + 218 + iface = uml_vector_fetch_arg(ifspec, TOKEN_IFNAME); 219 + if (iface == NULL) 220 + goto cleanup; 221 + 222 + rxfd = socket(AF_PACKET, SOCK_RAW, ETH_P_ALL); 223 + if (rxfd == -1) { 224 + err = -errno; 225 + goto cleanup; 226 + } 227 + txfd = socket(AF_PACKET, SOCK_RAW, 0); /* Turn off RX on this fd */ 228 + if (txfd == -1) { 229 + err = -errno; 230 + goto cleanup; 231 + } 232 + memset(&ifr, 0, sizeof(ifr)); 233 + strncpy((char *)&ifr.ifr_name, iface, sizeof(ifr.ifr_name) - 1); 234 + if (ioctl(rxfd, SIOCGIFINDEX, (void *) &ifr) < 0) { 235 + err = -errno; 236 + goto cleanup; 237 + } 238 + 239 + sock.sll_family = AF_PACKET; 240 + sock.sll_protocol = htons(ETH_P_ALL); 241 + sock.sll_ifindex = ifr.ifr_ifindex; 242 + 243 + if (bind(rxfd, 244 + (struct sockaddr *) &sock, sizeof(struct sockaddr_ll)) < 0) { 245 + err = -errno; 246 + goto cleanup; 247 + } 248 + 249 + sock.sll_family = AF_PACKET; 250 + sock.sll_protocol = htons(ETH_P_IP); 251 + sock.sll_ifindex = ifr.ifr_ifindex; 252 + 253 + if (bind(txfd, 254 + (struct sockaddr *) &sock, sizeof(struct sockaddr_ll)) < 0) { 255 + err = -errno; 256 + goto cleanup; 257 + } 258 + 259 + if (setsockopt(txfd, 260 + SOL_PACKET, PACKET_QDISC_BYPASS, 261 + &optval, sizeof(optval)) != 0) { 262 + printk(UM_KERN_INFO QDISC_FAIL); 263 + } 264 + 265 + result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL); 266 + if (result != NULL) { 267 + result->rx_fd = rxfd; 268 + result->tx_fd = txfd; 269 + result->remote_addr = NULL; 270 + result->remote_addr_size = 0; 271 + } 272 + return result; 273 + cleanup: 274 + printk(UM_KERN_ERR "user_init_raw: init failed, error %d", err); 275 + if (rxfd >= 0) 276 + os_close_file(rxfd); 277 + if (txfd >= 0) 278 + os_close_file(txfd); 279 + if (result != NULL) 280 + kfree(result); 281 + return NULL; 282 + } 283 + 284 + bool uml_raw_enable_vnet_headers(int fd) 285 + { 286 + int optval = 1; 287 + 288 + if (setsockopt(fd, 289 + SOL_PACKET, PACKET_VNET_HDR, 290 + &optval, sizeof(optval)) != 0) { 291 + printk(UM_KERN_INFO VNET_HDR_FAIL, fd); 292 + return false; 293 + } 294 + return true; 295 + } 296 + bool uml_tap_enable_vnet_headers(int fd) 297 + { 298 + unsigned int features; 299 + int len = sizeof(struct virtio_net_hdr); 300 + 301 + if (ioctl(fd, TUNGETFEATURES, &features) == -1) { 302 + printk(UM_KERN_INFO TUN_GET_F_FAIL, strerror(errno)); 303 + return false; 304 + } 305 + if ((features & IFF_VNET_HDR) == 0) { 306 + printk(UM_KERN_INFO "tapraw: No VNET HEADER support"); 307 + return false; 308 + } 309 + ioctl(fd, TUNSETVNETHDRSZ, &len); 310 + return true; 311 + } 312 + 313 + static struct vector_fds *user_init_socket_fds(struct arglist *ifspec, int id) 314 + { 315 + int err = -ENOMEM; 316 + int fd = -1, gairet; 317 + struct addrinfo srchints; 318 + struct addrinfo dsthints; 319 + bool v6, udp; 320 + char *value; 321 + char *src, *dst, *srcport, *dstport; 322 + struct addrinfo *gairesult = NULL; 323 + struct vector_fds *result = NULL; 324 + 325 + 326 + value = uml_vector_fetch_arg(ifspec, "v6"); 327 + v6 = false; 328 + udp = false; 329 + if (value != NULL) { 330 + if (strtol((const char *) value, NULL, 10) > 0) 331 + v6 = true; 332 + } 333 + 334 + value = uml_vector_fetch_arg(ifspec, "udp"); 335 + if (value != NULL) { 336 + if (strtol((const char *) value, NULL, 10) > 0) 337 + udp = true; 338 + } 339 + src = uml_vector_fetch_arg(ifspec, "src"); 340 + dst = uml_vector_fetch_arg(ifspec, "dst"); 341 + srcport = uml_vector_fetch_arg(ifspec, "srcport"); 342 + dstport = uml_vector_fetch_arg(ifspec, "dstport"); 343 + 344 + memset(&dsthints, 0, sizeof(dsthints)); 345 + 346 + if (v6) 347 + dsthints.ai_family = AF_INET6; 348 + else 349 + dsthints.ai_family = AF_INET; 350 + 351 + switch (id) { 352 + case ID_GRE: 353 + dsthints.ai_socktype = SOCK_RAW; 354 + dsthints.ai_protocol = IPPROTO_GRE; 355 + break; 356 + case ID_L2TPV3: 357 + if (udp) { 358 + dsthints.ai_socktype = SOCK_DGRAM; 359 + dsthints.ai_protocol = 0; 360 + } else { 361 + dsthints.ai_socktype = SOCK_RAW; 362 + dsthints.ai_protocol = IPPROTO_L2TP; 363 + } 364 + break; 365 + default: 366 + printk(KERN_ERR "Unsupported socket type\n"); 367 + return NULL; 368 + } 369 + memcpy(&srchints, &dsthints, sizeof(struct addrinfo)); 370 + 371 + gairet = getaddrinfo(src, srcport, &dsthints, &gairesult); 372 + if ((gairet != 0) || (gairesult == NULL)) { 373 + printk(UM_KERN_ERR 374 + "socket_open : could not resolve src, error = %s", 375 + gai_strerror(gairet) 376 + ); 377 + return NULL; 378 + } 379 + fd = socket(gairesult->ai_family, 380 + gairesult->ai_socktype, gairesult->ai_protocol); 381 + if (fd == -1) { 382 + printk(UM_KERN_ERR 383 + "socket_open : could not open socket, error = %d", 384 + -errno 385 + ); 386 + goto cleanup; 387 + } 388 + if (bind(fd, 389 + (struct sockaddr *) gairesult->ai_addr, 390 + gairesult->ai_addrlen)) { 391 + printk(UM_KERN_ERR L2TPV3_BIND_FAIL, errno); 392 + goto cleanup; 393 + } 394 + 395 + if (gairesult != NULL) 396 + freeaddrinfo(gairesult); 397 + 398 + gairesult = NULL; 399 + 400 + gairet = getaddrinfo(dst, dstport, &dsthints, &gairesult); 401 + if ((gairet != 0) || (gairesult == NULL)) { 402 + printk(UM_KERN_ERR 403 + "socket_open : could not resolve dst, error = %s", 404 + gai_strerror(gairet) 405 + ); 406 + return NULL; 407 + } 408 + 409 + result = uml_kmalloc(sizeof(struct vector_fds), UM_GFP_KERNEL); 410 + if (result != NULL) { 411 + result->rx_fd = fd; 412 + result->tx_fd = fd; 413 + result->remote_addr = uml_kmalloc( 414 + gairesult->ai_addrlen, UM_GFP_KERNEL); 415 + if (result->remote_addr == NULL) 416 + goto cleanup; 417 + result->remote_addr_size = gairesult->ai_addrlen; 418 + memcpy( 419 + result->remote_addr, 420 + gairesult->ai_addr, 421 + gairesult->ai_addrlen 422 + ); 423 + } 424 + freeaddrinfo(gairesult); 425 + return result; 426 + cleanup: 427 + if (gairesult != NULL) 428 + freeaddrinfo(gairesult); 429 + printk(UM_KERN_ERR "user_init_socket: init failed, error %d", err); 430 + if (fd >= 0) 431 + os_close_file(fd); 432 + if (result != NULL) { 433 + if (result->remote_addr != NULL) 434 + kfree(result->remote_addr); 435 + kfree(result); 436 + } 437 + return NULL; 438 + } 439 + 440 + struct vector_fds *uml_vector_user_open( 441 + int unit, 442 + struct arglist *parsed 443 + ) 444 + { 445 + char *transport; 446 + 447 + if (parsed == NULL) { 448 + printk(UM_KERN_ERR "no parsed config for unit %d\n", unit); 449 + return NULL; 450 + } 451 + transport = uml_vector_fetch_arg(parsed, "transport"); 452 + if (transport == NULL) { 453 + printk(UM_KERN_ERR "missing transport for unit %d\n", unit); 454 + return NULL; 455 + } 456 + if (strncmp(transport, TRANS_RAW, TRANS_RAW_LEN) == 0) 457 + return user_init_raw_fds(parsed); 458 + if (strncmp(transport, TRANS_TAP, TRANS_TAP_LEN) == 0) 459 + return user_init_tap_fds(parsed); 460 + if (strncmp(transport, TRANS_GRE, TRANS_GRE_LEN) == 0) 461 + return user_init_socket_fds(parsed, ID_GRE); 462 + if (strncmp(transport, TRANS_L2TPV3, TRANS_L2TPV3_LEN) == 0) 463 + return user_init_socket_fds(parsed, ID_L2TPV3); 464 + return NULL; 465 + } 466 + 467 + 468 + int uml_vector_sendmsg(int fd, void *hdr, int flags) 469 + { 470 + int n; 471 + 472 + CATCH_EINTR(n = sendmsg(fd, (struct msghdr *) hdr, flags)); 473 + if ((n < 0) && (errno == EAGAIN)) 474 + return 0; 475 + if (n >= 0) 476 + return n; 477 + else 478 + return -errno; 479 + } 480 + 481 + int uml_vector_recvmsg(int fd, void *hdr, int flags) 482 + { 483 + int n; 484 + 485 + CATCH_EINTR(n = recvmsg(fd, (struct msghdr *) hdr, flags)); 486 + if ((n < 0) && (errno == EAGAIN)) 487 + return 0; 488 + if (n >= 0) 489 + return n; 490 + else 491 + return -errno; 492 + } 493 + 494 + int uml_vector_writev(int fd, void *hdr, int iovcount) 495 + { 496 + int n; 497 + 498 + CATCH_EINTR(n = writev(fd, (struct iovec *) hdr, iovcount)); 499 + if ((n < 0) && (errno == EAGAIN)) 500 + return 0; 501 + if (n >= 0) 502 + return n; 503 + else 504 + return -errno; 505 + } 506 + 507 + int uml_vector_sendmmsg( 508 + int fd, 509 + void *msgvec, 510 + unsigned int vlen, 511 + unsigned int flags) 512 + { 513 + int n; 514 + 515 + CATCH_EINTR(n = sendmmsg(fd, (struct mmsghdr *) msgvec, vlen, flags)); 516 + if ((n < 0) && (errno == EAGAIN)) 517 + return 0; 518 + if (n >= 0) 519 + return n; 520 + else 521 + return -errno; 522 + } 523 + 524 + int uml_vector_recvmmsg( 525 + int fd, 526 + void *msgvec, 527 + unsigned int vlen, 528 + unsigned int flags) 529 + { 530 + int n; 531 + 532 + CATCH_EINTR( 533 + n = recvmmsg(fd, (struct mmsghdr *) msgvec, vlen, flags, 0)); 534 + if ((n < 0) && (errno == EAGAIN)) 535 + return 0; 536 + if (n >= 0) 537 + return n; 538 + else 539 + return -errno; 540 + } 541 + int uml_vector_attach_bpf(int fd, void *bpf, int bpf_len) 542 + { 543 + int err = setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, bpf, bpf_len); 544 + 545 + if (err < 0) 546 + printk(KERN_ERR BPF_ATTACH_FAIL, bpf_len, fd, -errno); 547 + return err; 548 + } 549 + 550 + #define DEFAULT_BPF_LEN 6 551 + 552 + void *uml_vector_default_bpf(int fd, void *mac) 553 + { 554 + struct sock_filter *bpf; 555 + uint32_t *mac1 = (uint32_t *)(mac + 2); 556 + uint16_t *mac2 = (uint16_t *) mac; 557 + struct sock_fprog bpf_prog = { 558 + .len = 6, 559 + .filter = NULL, 560 + }; 561 + 562 + bpf = uml_kmalloc( 563 + sizeof(struct sock_filter) * DEFAULT_BPF_LEN, UM_GFP_KERNEL); 564 + if (bpf != NULL) { 565 + bpf_prog.filter = bpf; 566 + /* ld [8] */ 567 + bpf[0] = (struct sock_filter){ 0x20, 0, 0, 0x00000008 }; 568 + /* jeq #0xMAC[2-6] jt 2 jf 5*/ 569 + bpf[1] = (struct sock_filter){ 0x15, 0, 3, ntohl(*mac1)}; 570 + /* ldh [6] */ 571 + bpf[2] = (struct sock_filter){ 0x28, 0, 0, 0x00000006 }; 572 + /* jeq #0xMAC[0-1] jt 4 jf 5 */ 573 + bpf[3] = (struct sock_filter){ 0x15, 0, 1, ntohs(*mac2)}; 574 + /* ret #0 */ 575 + bpf[4] = (struct sock_filter){ 0x6, 0, 0, 0x00000000 }; 576 + /* ret #0x40000 */ 577 + bpf[5] = (struct sock_filter){ 0x6, 0, 0, 0x00040000 }; 578 + if (uml_vector_attach_bpf( 579 + fd, &bpf_prog, sizeof(struct sock_fprog)) < 0) { 580 + kfree(bpf); 581 + bpf = NULL; 582 + } 583 + } 584 + return bpf; 585 + } 586 +

+99

arch/um/drivers/vector_user.h

··· 1 + /* 2 + * Copyright (C) 2002 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) 3 + * Licensed under the GPL 4 + */ 5 + 6 + #ifndef __UM_VECTOR_USER_H 7 + #define __UM_VECTOR_USER_H 8 + 9 + #define MAXVARGS 20 10 + 11 + #define TOKEN_IFNAME "ifname" 12 + 13 + #define TRANS_RAW "raw" 14 + #define TRANS_RAW_LEN strlen(TRANS_RAW) 15 + 16 + #define TRANS_TAP "tap" 17 + #define TRANS_TAP_LEN strlen(TRANS_TAP) 18 + 19 + 20 + #define TRANS_GRE "gre" 21 + #define TRANS_GRE_LEN strlen(TRANS_RAW) 22 + 23 + #define TRANS_L2TPV3 "l2tpv3" 24 + #define TRANS_L2TPV3_LEN strlen(TRANS_L2TPV3) 25 + 26 + #ifndef IPPROTO_GRE 27 + #define IPPROTO_GRE 0x2F 28 + #endif 29 + 30 + #define GRE_MODE_CHECKSUM cpu_to_be16(8 << 12) /* checksum */ 31 + #define GRE_MODE_RESERVED cpu_to_be16(4 << 12) /* unused */ 32 + #define GRE_MODE_KEY cpu_to_be16(2 << 12) /* KEY present */ 33 + #define GRE_MODE_SEQUENCE cpu_to_be16(1 << 12) /* sequence */ 34 + 35 + #define GRE_IRB cpu_to_be16(0x6558) 36 + 37 + #define L2TPV3_DATA_PACKET 0x30000 38 + 39 + /* IANA-assigned IP protocol ID for L2TPv3 */ 40 + 41 + #ifndef IPPROTO_L2TP 42 + #define IPPROTO_L2TP 0x73 43 + #endif 44 + 45 + struct arglist { 46 + int numargs; 47 + char *tokens[MAXVARGS]; 48 + char *values[MAXVARGS]; 49 + }; 50 + 51 + /* Separating read and write FDs allows us to have different 52 + * rx and tx method. Example - read tap via raw socket using 53 + * recvmmsg, write using legacy tap write calls 54 + */ 55 + 56 + struct vector_fds { 57 + int rx_fd; 58 + int tx_fd; 59 + void *remote_addr; 60 + int remote_addr_size; 61 + }; 62 + 63 + #define VECTOR_READ 1 64 + #define VECTOR_WRITE (1 < 1) 65 + #define VECTOR_HEADERS (1 < 2) 66 + 67 + extern struct arglist *uml_parse_vector_ifspec(char *arg); 68 + 69 + extern struct vector_fds *uml_vector_user_open( 70 + int unit, 71 + struct arglist *parsed 72 + ); 73 + 74 + extern char *uml_vector_fetch_arg( 75 + struct arglist *ifspec, 76 + char *token 77 + ); 78 + 79 + extern int uml_vector_recvmsg(int fd, void *hdr, int flags); 80 + extern int uml_vector_sendmsg(int fd, void *hdr, int flags); 81 + extern int uml_vector_writev(int fd, void *hdr, int iovcount); 82 + extern int uml_vector_sendmmsg( 83 + int fd, void *msgvec, 84 + unsigned int vlen, 85 + unsigned int flags 86 + ); 87 + extern int uml_vector_recvmmsg( 88 + int fd, 89 + void *msgvec, 90 + unsigned int vlen, 91 + unsigned int flags 92 + ); 93 + extern void *uml_vector_default_bpf(int fd, void *mac); 94 + extern int uml_vector_attach_bpf(int fd, void *bpf, int bpf_len); 95 + extern bool uml_raw_enable_vnet_headers(int fd); 96 + extern bool uml_tap_enable_vnet_headers(int fd); 97 + 98 + 99 + #endif

+12

arch/um/include/asm/irq.h

··· 18 18 #define XTERM_IRQ 13 19 19 #define RANDOM_IRQ 14 20 20 21 + #ifdef CONFIG_UML_NET_VECTOR 22 + 23 + #define VECTOR_BASE_IRQ 15 24 + #define VECTOR_IRQ_SPACE 8 25 + 26 + #define LAST_IRQ (VECTOR_IRQ_SPACE + VECTOR_BASE_IRQ) 27 + 28 + #else 29 + 21 30 #define LAST_IRQ RANDOM_IRQ 31 + 32 + #endif 33 + 22 34 #define NR_IRQS (LAST_IRQ + 1) 23 35 24 36 #endif

+2

arch/um/include/shared/net_kern.h

··· 65 65 char **mac_out, char **gate_addr); 66 66 extern void register_transport(struct transport *new); 67 67 extern unsigned short eth_protocol(struct sk_buff *skb); 68 + extern void uml_net_setup_etheraddr(struct net_device *dev, char *str); 69 + 68 70 69 71 #endif