Merge git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-for-linus

* git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-for-linus: (25 commits)
virtio: balloon driver
virtio: Use PCI revision field to indicate virtio PCI ABI version
virtio: PCI device
virtio_blk: implement naming for vda-vdz,vdaa-vdzz,vdaaa-vdzzz
virtio_blk: Dont waste major numbers
virtio_blk: provide getgeo
virtio_net: parametrize the napi_weight for virtio receive queue.
virtio: free transmit skbs when notified, not on next xmit.
virtio: flush buffers on open
virtnet: remove double ether_setup
virtio: Allow virtio to be modular and used by modules
virtio: Use the sg_phys convenience function.
virtio: Put the virtio under the virtualization menu
virtio: handle interrupts after callbacks turned off
virtio: reset function
virtio: populate network rings in the probe routine, not open
virtio: Tweak virtio_net defines
virtio: Net header needs hdr_len
virtio: remove unused id field from struct virtio_blk_outhdr
virtio: clarify NO_NOTIFY flag usage
...

+1422 -421
+155 -78
Documentation/lguest/lguest.c
··· 34 #include <zlib.h> 35 #include <assert.h> 36 #include <sched.h> 37 #include "linux/lguest_launcher.h" 38 #include "linux/virtio_config.h" 39 #include "linux/virtio_net.h" ··· 101 /* The descriptor page for the devices. */ 102 u8 *descpage; 103 104 - /* The tail of the last descriptor. */ 105 - unsigned int desc_used; 106 - 107 /* A single linked list of devices. */ 108 struct device *dev; 109 - /* ... And an end pointer so we can easily append new devices */ 110 - struct device **lastdev; 111 }; 112 113 /* The list of Guest devices, based on command line arguments. */ ··· 191 #define cpu_to_le64(v64) (v64) 192 #define le16_to_cpu(v16) (v16) 193 #define le32_to_cpu(v32) (v32) 194 - #define le64_to_cpu(v32) (v64) 195 196 /*L:100 The Launcher code itself takes us out into userspace, that scary place 197 * where pointers run wild and free! Unfortunately, like most userspace ··· 921 write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd)); 922 } 923 924 /* This is the generic routine we call when the Guest uses LHCALL_NOTIFY. */ 925 static void handle_output(int fd, unsigned long addr) 926 { 927 struct device *i; 928 struct virtqueue *vq; 929 930 - /* Check each virtqueue. */ 931 for (i = devices.dev; i; i = i->next) { 932 for (vq = i->vq; vq; vq = vq->next) { 933 - if (vq->config.pfn == addr/getpagesize() 934 - && vq->handle_output) { 935 - verbose("Output to %s\n", vq->dev->name); 936 - vq->handle_output(fd, vq); 937 return; 938 } 939 } 940 } 941 ··· 1030 * 1031 * All devices need a descriptor so the Guest knows it exists, and a "struct 1032 * device" so the Launcher can keep track of it. We have common helper 1033 - * routines to allocate them. 1034 - * 1035 - * This routine allocates a new "struct lguest_device_desc" from descriptor 1036 - * table just above the Guest's normal memory. It returns a pointer to that 1037 - * descriptor. */ 1038 static struct lguest_device_desc *new_dev_desc(u16 type) 1039 { 1040 - struct lguest_device_desc *d; 1041 1042 /* We only have one page for all the descriptors. */ 1043 - if (devices.desc_used + sizeof(*d) > getpagesize()) 1044 errx(1, "Too many devices"); 1045 1046 - /* We don't need to set config_len or status: page is 0 already. */ 1047 - d = (void *)devices.descpage + devices.desc_used; 1048 - d->type = type; 1049 - devices.desc_used += sizeof(*d); 1050 - 1051 - return d; 1052 } 1053 1054 - /* Each device descriptor is followed by some configuration information. 1055 - * Each configuration field looks like: u8 type, u8 len, [... len bytes...]. 1056 - * 1057 - * This routine adds a new field to an existing device's descriptor. It only 1058 - * works for the last device, but that's OK because that's how we use it. */ 1059 - static void add_desc_field(struct device *dev, u8 type, u8 len, const void *c) 1060 - { 1061 - /* This is the last descriptor, right? */ 1062 - assert(devices.descpage + devices.desc_used 1063 - == (u8 *)(dev->desc + 1) + dev->desc->config_len); 1064 - 1065 - /* We only have one page of device descriptions. */ 1066 - if (devices.desc_used + 2 + len > getpagesize()) 1067 - errx(1, "Too many devices"); 1068 - 1069 - /* Copy in the new config header: type then length. */ 1070 - devices.descpage[devices.desc_used++] = type; 1071 - devices.descpage[devices.desc_used++] = len; 1072 - memcpy(devices.descpage + devices.desc_used, c, len); 1073 - devices.desc_used += len; 1074 - 1075 - /* Update the device descriptor length: two byte head then data. */ 1076 - dev->desc->config_len += 2 + len; 1077 - } 1078 - 1079 - /* This routine adds a virtqueue to a device. We specify how many descriptors 1080 - * the virtqueue is to have. */ 1081 static void add_virtqueue(struct device *dev, unsigned int num_descs, 1082 void (*handle_output)(int fd, struct virtqueue *me)) 1083 { ··· 1093 /* Initialize the vring. */ 1094 vring_init(&vq->vring, num_descs, p, getpagesize()); 1095 1096 - /* Add the configuration information to this device's descriptor. */ 1097 - add_desc_field(dev, VIRTIO_CONFIG_F_VIRTQUEUE, 1098 - sizeof(vq->config), &vq->config); 1099 1100 /* Add to tail of list, so dev->vq is first vq, dev->vq->next is 1101 * second. */ ··· 1112 * virtqueue. */ 1113 vq->handle_output = handle_output; 1114 1115 - /* Set the "Don't Notify Me" flag if we don't have a handler */ 1116 if (!handle_output) 1117 vq->vring.used->flags = VRING_USED_F_NO_NOTIFY; 1118 } 1119 1120 /* This routine does all the creation and setup of a new device, including ··· 1153 bool (*handle_input)(int, struct device *)) 1154 { 1155 struct device *dev = malloc(sizeof(*dev)); 1156 - 1157 - /* Append to device list. Prepending to a single-linked list is 1158 - * easier, but the user expects the devices to be arranged on the bus 1159 - * in command-line order. The first network device on the command line 1160 - * is eth0, the first block device /dev/vda, etc. */ 1161 - *devices.lastdev = dev; 1162 - dev->next = NULL; 1163 - devices.lastdev = &dev->next; 1164 1165 /* Now we populate the fields one at a time. */ 1166 dev->fd = fd; ··· 1164 dev->handle_input = handle_input; 1165 dev->name = name; 1166 dev->vq = NULL; 1167 return dev; 1168 } 1169 ··· 1299 int netfd, ipfd; 1300 u32 ip; 1301 const char *br_name = NULL; 1302 - u8 hwaddr[6]; 1303 1304 /* We open the /dev/net/tun device and tell it we want a tap device. A 1305 * tap device is like a tun device, only somehow different. To tell ··· 1338 ip = str2ip(arg); 1339 1340 /* Set up the tun device, and get the mac address for the interface. */ 1341 - configure_device(ipfd, ifr.ifr_name, ip, hwaddr); 1342 1343 /* Tell Guest what MAC address to use. */ 1344 - add_desc_field(dev, VIRTIO_CONFIG_NET_MAC_F, sizeof(hwaddr), hwaddr); 1345 1346 - /* We don't seed the socket any more; setup is done. */ 1347 close(ipfd); 1348 1349 verbose("device %u: tun net %u.%u.%u.%u\n", ··· 1532 struct device *dev; 1533 struct vblk_info *vblk; 1534 void *stack; 1535 - u64 cap; 1536 - unsigned int val; 1537 1538 /* This is the pipe the I/O thread will use to tell us I/O is done. */ 1539 pipe(p); ··· 1550 vblk->fd = open_or_die(filename, O_RDWR|O_LARGEFILE); 1551 vblk->len = lseek64(vblk->fd, 0, SEEK_END); 1552 1553 /* Tell Guest how many sectors this device has. */ 1554 - cap = cpu_to_le64(vblk->len / 512); 1555 - add_desc_field(dev, VIRTIO_CONFIG_BLK_F_CAPACITY, sizeof(cap), &cap); 1556 1557 /* Tell Guest not to put in too many descriptors at once: two are used 1558 * for the in and out elements. */ 1559 - val = cpu_to_le32(VIRTQUEUE_NUM - 2); 1560 - add_desc_field(dev, VIRTIO_CONFIG_BLK_F_SEG_MAX, sizeof(val), &val); 1561 1562 /* The I/O thread writes to this end of the pipe when done. */ 1563 vblk->done_fd = p[1]; ··· 1582 close(vblk->workpipe[0]); 1583 1584 verbose("device %u: virtblock %llu sectors\n", 1585 - devices.device_num, cap); 1586 } 1587 /* That's the end of device setup. :*/ 1588 ··· 1687 /* First we initialize the device list. Since console and network 1688 * device receive input from a file descriptor, we keep an fdset 1689 * (infds) and the maximum fd number (max_infd) with the head of the 1690 - * list. We also keep a pointer to the last device, for easy appending 1691 - * to the list. Finally, we keep the next interrupt number to hand out 1692 - * (1: remember that 0 is used by the timer). */ 1693 FD_ZERO(&devices.infds); 1694 devices.max_infd = -1; 1695 - devices.lastdev = &devices.dev; 1696 devices.next_irq = 1; 1697 1698 cpu_id = 0;
··· 34 #include <zlib.h> 35 #include <assert.h> 36 #include <sched.h> 37 + #include <limits.h> 38 + #include <stddef.h> 39 #include "linux/lguest_launcher.h" 40 #include "linux/virtio_config.h" 41 #include "linux/virtio_net.h" ··· 99 /* The descriptor page for the devices. */ 100 u8 *descpage; 101 102 /* A single linked list of devices. */ 103 struct device *dev; 104 + /* And a pointer to the last device for easy append and also for 105 + * configuration appending. */ 106 + struct device *lastdev; 107 }; 108 109 /* The list of Guest devices, based on command line arguments. */ ··· 191 #define cpu_to_le64(v64) (v64) 192 #define le16_to_cpu(v16) (v16) 193 #define le32_to_cpu(v32) (v32) 194 + #define le64_to_cpu(v64) (v64) 195 + 196 + /* The device virtqueue descriptors are followed by feature bitmasks. */ 197 + static u8 *get_feature_bits(struct device *dev) 198 + { 199 + return (u8 *)(dev->desc + 1) 200 + + dev->desc->num_vq * sizeof(struct lguest_vqconfig); 201 + } 202 203 /*L:100 The Launcher code itself takes us out into userspace, that scary place 204 * where pointers run wild and free! Unfortunately, like most userspace ··· 914 write(waker_fd, &vq->dev->fd, sizeof(vq->dev->fd)); 915 } 916 917 + /* Resetting a device is fairly easy. */ 918 + static void reset_device(struct device *dev) 919 + { 920 + struct virtqueue *vq; 921 + 922 + verbose("Resetting device %s\n", dev->name); 923 + /* Clear the status. */ 924 + dev->desc->status = 0; 925 + 926 + /* Clear any features they've acked. */ 927 + memset(get_feature_bits(dev) + dev->desc->feature_len, 0, 928 + dev->desc->feature_len); 929 + 930 + /* Zero out the virtqueues. */ 931 + for (vq = dev->vq; vq; vq = vq->next) { 932 + memset(vq->vring.desc, 0, 933 + vring_size(vq->config.num, getpagesize())); 934 + vq->last_avail_idx = 0; 935 + } 936 + } 937 + 938 /* This is the generic routine we call when the Guest uses LHCALL_NOTIFY. */ 939 static void handle_output(int fd, unsigned long addr) 940 { 941 struct device *i; 942 struct virtqueue *vq; 943 944 + /* Check each device and virtqueue. */ 945 for (i = devices.dev; i; i = i->next) { 946 + /* Notifications to device descriptors reset the device. */ 947 + if (from_guest_phys(addr) == i->desc) { 948 + reset_device(i); 949 + return; 950 + } 951 + 952 + /* Notifications to virtqueues mean output has occurred. */ 953 for (vq = i->vq; vq; vq = vq->next) { 954 + if (vq->config.pfn != addr/getpagesize()) 955 + continue; 956 + 957 + /* Guest should acknowledge (and set features!) before 958 + * using the device. */ 959 + if (i->desc->status == 0) { 960 + warnx("%s gave early output", i->name); 961 return; 962 } 963 + 964 + if (strcmp(vq->dev->name, "console") != 0) 965 + verbose("Output to %s\n", vq->dev->name); 966 + if (vq->handle_output) 967 + vq->handle_output(fd, vq); 968 + return; 969 } 970 } 971 ··· 986 * 987 * All devices need a descriptor so the Guest knows it exists, and a "struct 988 * device" so the Launcher can keep track of it. We have common helper 989 + * routines to allocate and manage them. */ 990 + 991 + /* The layout of the device page is a "struct lguest_device_desc" followed by a 992 + * number of virtqueue descriptors, then two sets of feature bits, then an 993 + * array of configuration bytes. This routine returns the configuration 994 + * pointer. */ 995 + static u8 *device_config(const struct device *dev) 996 + { 997 + return (void *)(dev->desc + 1) 998 + + dev->desc->num_vq * sizeof(struct lguest_vqconfig) 999 + + dev->desc->feature_len * 2; 1000 + } 1001 + 1002 + /* This routine allocates a new "struct lguest_device_desc" from descriptor 1003 + * table page just above the Guest's normal memory. It returns a pointer to 1004 + * that descriptor. */ 1005 static struct lguest_device_desc *new_dev_desc(u16 type) 1006 { 1007 + struct lguest_device_desc d = { .type = type }; 1008 + void *p; 1009 + 1010 + /* Figure out where the next device config is, based on the last one. */ 1011 + if (devices.lastdev) 1012 + p = device_config(devices.lastdev) 1013 + + devices.lastdev->desc->config_len; 1014 + else 1015 + p = devices.descpage; 1016 1017 /* We only have one page for all the descriptors. */ 1018 + if (p + sizeof(d) > (void *)devices.descpage + getpagesize()) 1019 errx(1, "Too many devices"); 1020 1021 + /* p might not be aligned, so we memcpy in. */ 1022 + return memcpy(p, &d, sizeof(d)); 1023 } 1024 1025 + /* Each device descriptor is followed by the description of its virtqueues. We 1026 + * specify how many descriptors the virtqueue is to have. */ 1027 static void add_virtqueue(struct device *dev, unsigned int num_descs, 1028 void (*handle_output)(int fd, struct virtqueue *me)) 1029 { ··· 1059 /* Initialize the vring. */ 1060 vring_init(&vq->vring, num_descs, p, getpagesize()); 1061 1062 + /* Append virtqueue to this device's descriptor. We use 1063 + * device_config() to get the end of the device's current virtqueues; 1064 + * we check that we haven't added any config or feature information 1065 + * yet, otherwise we'd be overwriting them. */ 1066 + assert(dev->desc->config_len == 0 && dev->desc->feature_len == 0); 1067 + memcpy(device_config(dev), &vq->config, sizeof(vq->config)); 1068 + dev->desc->num_vq++; 1069 + 1070 + verbose("Virtqueue page %#lx\n", to_guest_phys(p)); 1071 1072 /* Add to tail of list, so dev->vq is first vq, dev->vq->next is 1073 * second. */ ··· 1072 * virtqueue. */ 1073 vq->handle_output = handle_output; 1074 1075 + /* As an optimization, set the advisory "Don't Notify Me" flag if we 1076 + * don't have a handler */ 1077 if (!handle_output) 1078 vq->vring.used->flags = VRING_USED_F_NO_NOTIFY; 1079 + } 1080 + 1081 + /* The first half of the feature bitmask is for us to advertise features. The 1082 + * second half if for the Guest to accept features. */ 1083 + static void add_feature(struct device *dev, unsigned bit) 1084 + { 1085 + u8 *features = get_feature_bits(dev); 1086 + 1087 + /* We can't extend the feature bits once we've added config bytes */ 1088 + if (dev->desc->feature_len <= bit / CHAR_BIT) { 1089 + assert(dev->desc->config_len == 0); 1090 + dev->desc->feature_len = (bit / CHAR_BIT) + 1; 1091 + } 1092 + 1093 + features[bit / CHAR_BIT] |= (1 << (bit % CHAR_BIT)); 1094 + } 1095 + 1096 + /* This routine sets the configuration fields for an existing device's 1097 + * descriptor. It only works for the last device, but that's OK because that's 1098 + * how we use it. */ 1099 + static void set_config(struct device *dev, unsigned len, const void *conf) 1100 + { 1101 + /* Check we haven't overflowed our single page. */ 1102 + if (device_config(dev) + len > devices.descpage + getpagesize()) 1103 + errx(1, "Too many devices"); 1104 + 1105 + /* Copy in the config information, and store the length. */ 1106 + memcpy(device_config(dev), conf, len); 1107 + dev->desc->config_len = len; 1108 } 1109 1110 /* This routine does all the creation and setup of a new device, including ··· 1083 bool (*handle_input)(int, struct device *)) 1084 { 1085 struct device *dev = malloc(sizeof(*dev)); 1086 1087 /* Now we populate the fields one at a time. */ 1088 dev->fd = fd; ··· 1102 dev->handle_input = handle_input; 1103 dev->name = name; 1104 dev->vq = NULL; 1105 + 1106 + /* Append to device list. Prepending to a single-linked list is 1107 + * easier, but the user expects the devices to be arranged on the bus 1108 + * in command-line order. The first network device on the command line 1109 + * is eth0, the first block device /dev/vda, etc. */ 1110 + if (devices.lastdev) 1111 + devices.lastdev->next = dev; 1112 + else 1113 + devices.dev = dev; 1114 + devices.lastdev = dev; 1115 + 1116 return dev; 1117 } 1118 ··· 1226 int netfd, ipfd; 1227 u32 ip; 1228 const char *br_name = NULL; 1229 + struct virtio_net_config conf; 1230 1231 /* We open the /dev/net/tun device and tell it we want a tap device. A 1232 * tap device is like a tun device, only somehow different. To tell ··· 1265 ip = str2ip(arg); 1266 1267 /* Set up the tun device, and get the mac address for the interface. */ 1268 + configure_device(ipfd, ifr.ifr_name, ip, conf.mac); 1269 1270 /* Tell Guest what MAC address to use. */ 1271 + add_feature(dev, VIRTIO_NET_F_MAC); 1272 + set_config(dev, sizeof(conf), &conf); 1273 1274 + /* We don't need the socket any more; setup is done. */ 1275 close(ipfd); 1276 1277 verbose("device %u: tun net %u.%u.%u.%u\n", ··· 1458 struct device *dev; 1459 struct vblk_info *vblk; 1460 void *stack; 1461 + struct virtio_blk_config conf; 1462 1463 /* This is the pipe the I/O thread will use to tell us I/O is done. */ 1464 pipe(p); ··· 1477 vblk->fd = open_or_die(filename, O_RDWR|O_LARGEFILE); 1478 vblk->len = lseek64(vblk->fd, 0, SEEK_END); 1479 1480 + /* We support barriers. */ 1481 + add_feature(dev, VIRTIO_BLK_F_BARRIER); 1482 + 1483 /* Tell Guest how many sectors this device has. */ 1484 + conf.capacity = cpu_to_le64(vblk->len / 512); 1485 1486 /* Tell Guest not to put in too many descriptors at once: two are used 1487 * for the in and out elements. */ 1488 + add_feature(dev, VIRTIO_BLK_F_SEG_MAX); 1489 + conf.seg_max = cpu_to_le32(VIRTQUEUE_NUM - 2); 1490 + 1491 + set_config(dev, sizeof(conf), &conf); 1492 1493 /* The I/O thread writes to this end of the pipe when done. */ 1494 vblk->done_fd = p[1]; ··· 1505 close(vblk->workpipe[0]); 1506 1507 verbose("device %u: virtblock %llu sectors\n", 1508 + devices.device_num, le64_to_cpu(conf.capacity)); 1509 } 1510 /* That's the end of device setup. :*/ 1511 ··· 1610 /* First we initialize the device list. Since console and network 1611 * device receive input from a file descriptor, we keep an fdset 1612 * (infds) and the maximum fd number (max_infd) with the head of the 1613 + * list. We also keep a pointer to the last device. Finally, we keep 1614 + * the next interrupt number to hand out (1: remember that 0 is used by 1615 + * the timer). */ 1616 FD_ZERO(&devices.infds); 1617 devices.max_infd = -1; 1618 + devices.lastdev = NULL; 1619 devices.next_irq = 1; 1620 1621 cpu_id = 0;
+1
arch/x86/kvm/Kconfig
··· 53 # OK, it's a little counter-intuitive to do this, but it puts it neatly under 54 # the virtualization menu. 55 source drivers/lguest/Kconfig 56 57 endif # VIRTUALIZATION
··· 53 # OK, it's a little counter-intuitive to do this, but it puts it neatly under 54 # the virtualization menu. 55 source drivers/lguest/Kconfig 56 + source drivers/virtio/Kconfig 57 58 endif # VIRTUALIZATION
-2
drivers/Kconfig
··· 91 source "drivers/auxdisplay/Kconfig" 92 93 source "drivers/uio/Kconfig" 94 - 95 - source "drivers/virtio/Kconfig" 96 endmenu
··· 91 source "drivers/auxdisplay/Kconfig" 92 93 source "drivers/uio/Kconfig" 94 endmenu
+2 -1
drivers/block/Kconfig
··· 440 tristate "Virtio block driver (EXPERIMENTAL)" 441 depends on EXPERIMENTAL && VIRTIO 442 ---help--- 443 - This is the virtual block driver for lguest. Say Y or M. 444 445 endif # BLK_DEV
··· 440 tristate "Virtio block driver (EXPERIMENTAL)" 441 depends on EXPERIMENTAL && VIRTIO 442 ---help--- 443 + This is the virtual block driver for virtio. It can be used with 444 + lguest or QEMU based VMMs (like KVM or Xen). Say Y or M. 445 446 endif # BLK_DEV
+65 -41
drivers/block/virtio_blk.c
··· 7 #include <linux/scatterlist.h> 8 9 #define VIRTIO_MAX_SG (3+MAX_PHYS_SEGMENTS) 10 11 - static unsigned char virtblk_index = 'a'; 12 struct virtio_blk 13 { 14 spinlock_t lock; ··· 38 struct virtio_blk_inhdr in_hdr; 39 }; 40 41 - static bool blk_done(struct virtqueue *vq) 42 { 43 struct virtio_blk *vblk = vq->vdev->priv; 44 struct virtblk_req *vbr; ··· 67 /* In case queue is stopped waiting for more buffers. */ 68 blk_start_queue(vblk->disk->queue); 69 spin_unlock_irqrestore(&vblk->lock, flags); 70 - return true; 71 } 72 73 static bool do_req(struct request_queue *q, struct virtio_blk *vblk, ··· 154 (void __user *)data); 155 } 156 157 static struct block_device_operations virtblk_fops = { 158 - .ioctl = virtblk_ioctl, 159 - .owner = THIS_MODULE, 160 }; 161 162 static int virtblk_probe(struct virtio_device *vdev) 163 { 164 struct virtio_blk *vblk; 165 - int err, major; 166 - void *token; 167 - unsigned int len; 168 u64 cap; 169 u32 v; 170 171 vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL); 172 if (!vblk) { ··· 196 vblk->vdev = vdev; 197 198 /* We expect one virtqueue, for output. */ 199 - vblk->vq = vdev->config->find_vq(vdev, blk_done); 200 if (IS_ERR(vblk->vq)) { 201 err = PTR_ERR(vblk->vq); 202 goto out_free_vblk; ··· 208 goto out_free_vq; 209 } 210 211 - major = register_blkdev(0, "virtblk"); 212 - if (major < 0) { 213 - err = major; 214 - goto out_mempool; 215 - } 216 - 217 /* FIXME: How many partitions? How long is a piece of string? */ 218 - vblk->disk = alloc_disk(1 << 4); 219 if (!vblk->disk) { 220 err = -ENOMEM; 221 - goto out_unregister_blkdev; 222 } 223 224 vblk->disk->queue = blk_init_queue(do_virtblk_request, &vblk->lock); ··· 221 goto out_put_disk; 222 } 223 224 - sprintf(vblk->disk->disk_name, "vd%c", virtblk_index++); 225 vblk->disk->major = major; 226 - vblk->disk->first_minor = 0; 227 vblk->disk->private_data = vblk; 228 vblk->disk->fops = &virtblk_fops; 229 230 /* If barriers are supported, tell block layer that queue is ordered */ 231 - token = vdev->config->find(vdev, VIRTIO_CONFIG_BLK_F, &len); 232 - if (virtio_use_bit(vdev, token, len, VIRTIO_BLK_F_BARRIER)) 233 blk_queue_ordered(vblk->disk->queue, QUEUE_ORDERED_TAG, NULL); 234 235 - err = virtio_config_val(vdev, VIRTIO_CONFIG_BLK_F_CAPACITY, &cap); 236 - if (err) { 237 - dev_err(&vdev->dev, "Bad/missing capacity in config\n"); 238 - goto out_cleanup_queue; 239 - } 240 241 /* If capacity is too big, truncate with warning. */ 242 if ((sector_t)cap != cap) { ··· 256 } 257 set_capacity(vblk->disk, cap); 258 259 - err = virtio_config_val(vdev, VIRTIO_CONFIG_BLK_F_SIZE_MAX, &v); 260 if (!err) 261 blk_queue_max_segment_size(vblk->disk->queue, v); 262 - else if (err != -ENOENT) { 263 - dev_err(&vdev->dev, "Bad SIZE_MAX in config\n"); 264 - goto out_cleanup_queue; 265 - } 266 267 - err = virtio_config_val(vdev, VIRTIO_CONFIG_BLK_F_SEG_MAX, &v); 268 if (!err) 269 blk_queue_max_hw_segments(vblk->disk->queue, v); 270 - else if (err != -ENOENT) { 271 - dev_err(&vdev->dev, "Bad SEG_MAX in config\n"); 272 - goto out_cleanup_queue; 273 - } 274 275 add_disk(vblk->disk); 276 return 0; 277 278 - out_cleanup_queue: 279 - blk_cleanup_queue(vblk->disk->queue); 280 out_put_disk: 281 put_disk(vblk->disk); 282 - out_unregister_blkdev: 283 - unregister_blkdev(major, "virtblk"); 284 out_mempool: 285 mempool_destroy(vblk->pool); 286 out_free_vq: ··· 290 struct virtio_blk *vblk = vdev->priv; 291 int major = vblk->disk->major; 292 293 BUG_ON(!list_empty(&vblk->reqs)); 294 blk_cleanup_queue(vblk->disk->queue); 295 put_disk(vblk->disk); 296 unregister_blkdev(major, "virtblk"); 297 mempool_destroy(vblk->pool); 298 - /* There should be nothing in the queue now, so no need to shutdown */ 299 vdev->config->del_vq(vblk->vq); 300 kfree(vblk); 301 } ··· 319 320 static int __init init(void) 321 { 322 return register_virtio_driver(&virtio_blk); 323 } 324 325 static void __exit fini(void) 326 { 327 unregister_virtio_driver(&virtio_blk); 328 } 329 module_init(init);
··· 7 #include <linux/scatterlist.h> 8 9 #define VIRTIO_MAX_SG (3+MAX_PHYS_SEGMENTS) 10 + #define PART_BITS 4 11 12 + static int major, index; 13 + 14 struct virtio_blk 15 { 16 spinlock_t lock; ··· 36 struct virtio_blk_inhdr in_hdr; 37 }; 38 39 + static void blk_done(struct virtqueue *vq) 40 { 41 struct virtio_blk *vblk = vq->vdev->priv; 42 struct virtblk_req *vbr; ··· 65 /* In case queue is stopped waiting for more buffers. */ 66 blk_start_queue(vblk->disk->queue); 67 spin_unlock_irqrestore(&vblk->lock, flags); 68 } 69 70 static bool do_req(struct request_queue *q, struct virtio_blk *vblk, ··· 153 (void __user *)data); 154 } 155 156 + /* We provide getgeo only to please some old bootloader/partitioning tools */ 157 + static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) 158 + { 159 + /* some standard values, similar to sd */ 160 + geo->heads = 1 << 6; 161 + geo->sectors = 1 << 5; 162 + geo->cylinders = get_capacity(bd->bd_disk) >> 11; 163 + return 0; 164 + } 165 + 166 static struct block_device_operations virtblk_fops = { 167 + .ioctl = virtblk_ioctl, 168 + .owner = THIS_MODULE, 169 + .getgeo = virtblk_getgeo, 170 }; 171 + 172 + static int index_to_minor(int index) 173 + { 174 + return index << PART_BITS; 175 + } 176 177 static int virtblk_probe(struct virtio_device *vdev) 178 { 179 struct virtio_blk *vblk; 180 + int err; 181 u64 cap; 182 u32 v; 183 + 184 + if (index_to_minor(index) >= 1 << MINORBITS) 185 + return -ENOSPC; 186 187 vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL); 188 if (!vblk) { ··· 178 vblk->vdev = vdev; 179 180 /* We expect one virtqueue, for output. */ 181 + vblk->vq = vdev->config->find_vq(vdev, 0, blk_done); 182 if (IS_ERR(vblk->vq)) { 183 err = PTR_ERR(vblk->vq); 184 goto out_free_vblk; ··· 190 goto out_free_vq; 191 } 192 193 /* FIXME: How many partitions? How long is a piece of string? */ 194 + vblk->disk = alloc_disk(1 << PART_BITS); 195 if (!vblk->disk) { 196 err = -ENOMEM; 197 + goto out_mempool; 198 } 199 200 vblk->disk->queue = blk_init_queue(do_virtblk_request, &vblk->lock); ··· 209 goto out_put_disk; 210 } 211 212 + if (index < 26) { 213 + sprintf(vblk->disk->disk_name, "vd%c", 'a' + index % 26); 214 + } else if (index < (26 + 1) * 26) { 215 + sprintf(vblk->disk->disk_name, "vd%c%c", 216 + 'a' + index / 26 - 1, 'a' + index % 26); 217 + } else { 218 + const unsigned int m1 = (index / 26 - 1) / 26 - 1; 219 + const unsigned int m2 = (index / 26 - 1) % 26; 220 + const unsigned int m3 = index % 26; 221 + sprintf(vblk->disk->disk_name, "vd%c%c%c", 222 + 'a' + m1, 'a' + m2, 'a' + m3); 223 + } 224 + 225 vblk->disk->major = major; 226 + vblk->disk->first_minor = index_to_minor(index); 227 vblk->disk->private_data = vblk; 228 vblk->disk->fops = &virtblk_fops; 229 + index++; 230 231 /* If barriers are supported, tell block layer that queue is ordered */ 232 + if (vdev->config->feature(vdev, VIRTIO_BLK_F_BARRIER)) 233 blk_queue_ordered(vblk->disk->queue, QUEUE_ORDERED_TAG, NULL); 234 235 + /* Host must always specify the capacity. */ 236 + __virtio_config_val(vdev, offsetof(struct virtio_blk_config, capacity), 237 + &cap); 238 239 /* If capacity is too big, truncate with warning. */ 240 if ((sector_t)cap != cap) { ··· 234 } 235 set_capacity(vblk->disk, cap); 236 237 + /* Host can optionally specify maximum segment size and number of 238 + * segments. */ 239 + err = virtio_config_val(vdev, VIRTIO_BLK_F_SIZE_MAX, 240 + offsetof(struct virtio_blk_config, size_max), 241 + &v); 242 if (!err) 243 blk_queue_max_segment_size(vblk->disk->queue, v); 244 245 + err = virtio_config_val(vdev, VIRTIO_BLK_F_SEG_MAX, 246 + offsetof(struct virtio_blk_config, seg_max), 247 + &v); 248 if (!err) 249 blk_queue_max_hw_segments(vblk->disk->queue, v); 250 251 add_disk(vblk->disk); 252 return 0; 253 254 out_put_disk: 255 put_disk(vblk->disk); 256 out_mempool: 257 mempool_destroy(vblk->pool); 258 out_free_vq: ··· 274 struct virtio_blk *vblk = vdev->priv; 275 int major = vblk->disk->major; 276 277 + /* Nothing should be pending. */ 278 BUG_ON(!list_empty(&vblk->reqs)); 279 + 280 + /* Stop all the virtqueues. */ 281 + vdev->config->reset(vdev); 282 + 283 blk_cleanup_queue(vblk->disk->queue); 284 put_disk(vblk->disk); 285 unregister_blkdev(major, "virtblk"); 286 mempool_destroy(vblk->pool); 287 vdev->config->del_vq(vblk->vq); 288 kfree(vblk); 289 } ··· 299 300 static int __init init(void) 301 { 302 + major = register_blkdev(0, "virtblk"); 303 + if (major < 0) 304 + return major; 305 return register_virtio_driver(&virtio_blk); 306 } 307 308 static void __exit fini(void) 309 { 310 + unregister_blkdev(major, "virtblk"); 311 unregister_virtio_driver(&virtio_blk); 312 } 313 module_init(init);
+2 -2
drivers/char/virtio_console.c
··· 158 /* Find the input queue. */ 159 /* FIXME: This is why we want to wean off hvc: we do nothing 160 * when input comes in. */ 161 - in_vq = vdev->config->find_vq(vdev, NULL); 162 if (IS_ERR(in_vq)) { 163 err = PTR_ERR(in_vq); 164 goto free; 165 } 166 167 - out_vq = vdev->config->find_vq(vdev, NULL); 168 if (IS_ERR(out_vq)) { 169 err = PTR_ERR(out_vq); 170 goto free_in_vq;
··· 158 /* Find the input queue. */ 159 /* FIXME: This is why we want to wean off hvc: we do nothing 160 * when input comes in. */ 161 + in_vq = vdev->config->find_vq(vdev, 0, NULL); 162 if (IS_ERR(in_vq)) { 163 err = PTR_ERR(in_vq); 164 goto free; 165 } 166 167 + out_vq = vdev->config->find_vq(vdev, 1, NULL); 168 if (IS_ERR(out_vq)) { 169 err = PTR_ERR(out_vq); 170 goto free_in_vq;
+85 -57
drivers/lguest/lguest_device.c
··· 52 /*D:130 53 * Device configurations 54 * 55 - * The configuration information for a device consists of a series of fields. 56 - * We don't really care what they are: the Launcher set them up, and the driver 57 - * will look at them during setup. 58 * 59 - * For us these fields come immediately after that device's descriptor in the 60 - * lguest_devices page. 61 - * 62 - * Each field starts with a "type" byte, a "length" byte, then that number of 63 - * bytes of configuration information. The device descriptor tells us the 64 - * total configuration length so we know when we've reached the last field. */ 65 66 - /* type + length bytes */ 67 - #define FHDR_LEN 2 68 69 - /* This finds the first field of a given type for a device's configuration. */ 70 - static void *lg_find(struct virtio_device *vdev, u8 type, unsigned int *len) 71 { 72 struct lguest_device_desc *desc = to_lgdev(vdev)->desc; 73 - int i; 74 75 - for (i = 0; i < desc->config_len; i += FHDR_LEN + desc->config[i+1]) { 76 - if (desc->config[i] == type) { 77 - /* Mark it used, so Host can know we looked at it, and 78 - * also so we won't find the same one twice. */ 79 - desc->config[i] |= 0x80; 80 - /* Remember, the second byte is the length. */ 81 - *len = desc->config[i+1]; 82 - /* We return a pointer to the field header. */ 83 - return desc->config + i; 84 - } 85 - } 86 87 - /* Not found: return NULL for failure. */ 88 - return NULL; 89 } 90 91 /* Once they've found a field, getting a copy of it is easy. */ 92 - static void lg_get(struct virtio_device *vdev, void *token, 93 void *buf, unsigned len) 94 { 95 - /* Check they didn't ask for more than the length of the field! */ 96 - BUG_ON(len > ((u8 *)token)[1]); 97 - memcpy(buf, token + FHDR_LEN, len); 98 } 99 100 /* Setting the contents is also trivial. */ 101 - static void lg_set(struct virtio_device *vdev, void *token, 102 const void *buf, unsigned len) 103 { 104 - BUG_ON(len > ((u8 *)token)[1]); 105 - memcpy(token + FHDR_LEN, buf, len); 106 } 107 108 /* The operations to get and set the status word just access the status field ··· 139 140 static void lg_set_status(struct virtio_device *vdev, u8 status) 141 { 142 to_lgdev(vdev)->desc->status = status; 143 } 144 145 /* ··· 201 * 202 * So we provide devices with a "find virtqueue and set it up" function. */ 203 static struct virtqueue *lg_find_vq(struct virtio_device *vdev, 204 - bool (*callback)(struct virtqueue *vq)) 205 { 206 struct lguest_vq_info *lvq; 207 struct virtqueue *vq; 208 - unsigned int len; 209 - void *token; 210 int err; 211 212 - /* Look for a field of the correct type to mark a virtqueue. Note that 213 - * if this succeeds, then the type will be changed so it won't be found 214 - * again, and future lg_find_vq() calls will find the next 215 - * virtqueue (if any). */ 216 - token = vdev->config->find(vdev, VIRTIO_CONFIG_F_VIRTQUEUE, &len); 217 - if (!token) 218 return ERR_PTR(-ENOENT); 219 220 lvq = kmalloc(sizeof(*lvq), GFP_KERNEL); 221 if (!lvq) 222 return ERR_PTR(-ENOMEM); 223 224 - /* Note: we could use a configuration space inside here, just like we 225 - * do for the device. This would allow expansion in future, because 226 - * our configuration system is designed to be expansible. But this is 227 - * way easier. */ 228 - if (len != sizeof(lvq->config)) { 229 - dev_err(&vdev->dev, "Unexpected virtio config len %u\n", len); 230 - err = -EIO; 231 - goto free_lvq; 232 - } 233 - /* Make a copy of the "struct lguest_vqconfig" field. We need a copy 234 - * because the config space might not be aligned correctly. */ 235 - vdev->config->get(vdev, token, &lvq->config, sizeof(lvq->config)); 236 237 /* Figure out how many pages the ring will take, and map that memory */ 238 lvq->pages = lguest_map((unsigned long)lvq->config.pfn << PAGE_SHIFT, 239 DIV_ROUND_UP(vring_size(lvq->config.num, ··· 285 286 /* The ops structure which hooks everything together. */ 287 static struct virtio_config_ops lguest_config_ops = { 288 - .find = lg_find, 289 .get = lg_get, 290 .set = lg_set, 291 .get_status = lg_get_status, 292 .set_status = lg_set_status, 293 .find_vq = lg_find_vq, 294 .del_vq = lg_del_vq, 295 }; ··· 356 struct lguest_device_desc *d; 357 358 /* We start at the page beginning, and skip over each entry. */ 359 - for (i = 0; i < PAGE_SIZE; i += sizeof(*d) + d->config_len) { 360 d = lguest_devices + i; 361 362 /* Once we hit a zero, stop. */ 363 if (d->type == 0) 364 break; 365 366 add_lguest_device(d); 367 } 368 }
··· 52 /*D:130 53 * Device configurations 54 * 55 + * The configuration information for a device consists of one or more 56 + * virtqueues, a feature bitmaks, and some configuration bytes. The 57 + * configuration bytes don't really matter to us: the Launcher sets them up, and 58 + * the driver will look at them during setup. 59 * 60 + * A convenient routine to return the device's virtqueue config array: 61 + * immediately after the descriptor. */ 62 + static struct lguest_vqconfig *lg_vq(const struct lguest_device_desc *desc) 63 + { 64 + return (void *)(desc + 1); 65 + } 66 67 + /* The features come immediately after the virtqueues. */ 68 + static u8 *lg_features(const struct lguest_device_desc *desc) 69 + { 70 + return (void *)(lg_vq(desc) + desc->num_vq); 71 + } 72 73 + /* The config space comes after the two feature bitmasks. */ 74 + static u8 *lg_config(const struct lguest_device_desc *desc) 75 + { 76 + return lg_features(desc) + desc->feature_len * 2; 77 + } 78 + 79 + /* The total size of the config page used by this device (incl. desc) */ 80 + static unsigned desc_size(const struct lguest_device_desc *desc) 81 + { 82 + return sizeof(*desc) 83 + + desc->num_vq * sizeof(struct lguest_vqconfig) 84 + + desc->feature_len * 2 85 + + desc->config_len; 86 + } 87 + 88 + /* This tests (and acknowleges) a feature bit. */ 89 + static bool lg_feature(struct virtio_device *vdev, unsigned fbit) 90 { 91 struct lguest_device_desc *desc = to_lgdev(vdev)->desc; 92 + u8 *features; 93 94 + /* Obviously if they ask for a feature off the end of our feature 95 + * bitmap, it's not set. */ 96 + if (fbit / 8 > desc->feature_len) 97 + return false; 98 99 + /* The feature bitmap comes after the virtqueues. */ 100 + features = lg_features(desc); 101 + if (!(features[fbit / 8] & (1 << (fbit % 8)))) 102 + return false; 103 + 104 + /* We set the matching bit in the other half of the bitmap to tell the 105 + * Host we want to use this feature. We don't use this yet, but we 106 + * could in future. */ 107 + features[desc->feature_len + fbit / 8] |= (1 << (fbit % 8)); 108 + return true; 109 } 110 111 /* Once they've found a field, getting a copy of it is easy. */ 112 + static void lg_get(struct virtio_device *vdev, unsigned int offset, 113 void *buf, unsigned len) 114 { 115 + struct lguest_device_desc *desc = to_lgdev(vdev)->desc; 116 + 117 + /* Check they didn't ask for more than the length of the config! */ 118 + BUG_ON(offset + len > desc->config_len); 119 + memcpy(buf, lg_config(desc) + offset, len); 120 } 121 122 /* Setting the contents is also trivial. */ 123 + static void lg_set(struct virtio_device *vdev, unsigned int offset, 124 const void *buf, unsigned len) 125 { 126 + struct lguest_device_desc *desc = to_lgdev(vdev)->desc; 127 + 128 + /* Check they didn't ask for more than the length of the config! */ 129 + BUG_ON(offset + len > desc->config_len); 130 + memcpy(lg_config(desc) + offset, buf, len); 131 } 132 133 /* The operations to get and set the status word just access the status field ··· 114 115 static void lg_set_status(struct virtio_device *vdev, u8 status) 116 { 117 + BUG_ON(!status); 118 to_lgdev(vdev)->desc->status = status; 119 + } 120 + 121 + /* To reset the device, we (ab)use the NOTIFY hypercall, with the descriptor 122 + * address of the device. The Host will zero the status and all the 123 + * features. */ 124 + static void lg_reset(struct virtio_device *vdev) 125 + { 126 + unsigned long offset = (void *)to_lgdev(vdev)->desc - lguest_devices; 127 + 128 + hcall(LHCALL_NOTIFY, (max_pfn<<PAGE_SHIFT) + offset, 0, 0); 129 } 130 131 /* ··· 165 * 166 * So we provide devices with a "find virtqueue and set it up" function. */ 167 static struct virtqueue *lg_find_vq(struct virtio_device *vdev, 168 + unsigned index, 169 + void (*callback)(struct virtqueue *vq)) 170 { 171 + struct lguest_device *ldev = to_lgdev(vdev); 172 struct lguest_vq_info *lvq; 173 struct virtqueue *vq; 174 int err; 175 176 + /* We must have this many virtqueues. */ 177 + if (index >= ldev->desc->num_vq) 178 return ERR_PTR(-ENOENT); 179 180 lvq = kmalloc(sizeof(*lvq), GFP_KERNEL); 181 if (!lvq) 182 return ERR_PTR(-ENOMEM); 183 184 + /* Make a copy of the "struct lguest_vqconfig" entry, which sits after 185 + * the descriptor. We need a copy because the config space might not 186 + * be aligned correctly. */ 187 + memcpy(&lvq->config, lg_vq(ldev->desc)+index, sizeof(lvq->config)); 188 189 + printk("Mapping virtqueue %i addr %lx\n", index, 190 + (unsigned long)lvq->config.pfn << PAGE_SHIFT); 191 /* Figure out how many pages the ring will take, and map that memory */ 192 lvq->pages = lguest_map((unsigned long)lvq->config.pfn << PAGE_SHIFT, 193 DIV_ROUND_UP(vring_size(lvq->config.num, ··· 259 260 /* The ops structure which hooks everything together. */ 261 static struct virtio_config_ops lguest_config_ops = { 262 + .feature = lg_feature, 263 .get = lg_get, 264 .set = lg_set, 265 .get_status = lg_get_status, 266 .set_status = lg_set_status, 267 + .reset = lg_reset, 268 .find_vq = lg_find_vq, 269 .del_vq = lg_del_vq, 270 }; ··· 329 struct lguest_device_desc *d; 330 331 /* We start at the page beginning, and skip over each entry. */ 332 + for (i = 0; i < PAGE_SIZE; i += desc_size(d)) { 333 d = lguest_devices + i; 334 335 /* Once we hit a zero, stop. */ 336 if (d->type == 0) 337 break; 338 339 + printk("Device at %i has size %u\n", i, desc_size(d)); 340 add_lguest_device(d); 341 } 342 }
+2 -1
drivers/net/Kconfig
··· 3114 tristate "Virtio network driver (EXPERIMENTAL)" 3115 depends on EXPERIMENTAL && VIRTIO 3116 ---help--- 3117 - This is the virtual network driver for lguest. Say Y or M. 3118 3119 endif # NETDEVICES
··· 3114 tristate "Virtio network driver (EXPERIMENTAL)" 3115 depends on EXPERIMENTAL && VIRTIO 3116 ---help--- 3117 + This is the virtual network driver for virtio. It can be used with 3118 + lguest or QEMU based VMMs (like KVM or Xen). Say Y or M. 3119 3120 endif # NETDEVICES
+88 -69
drivers/net/virtio_net.c
··· 24 #include <linux/virtio_net.h> 25 #include <linux/scatterlist.h> 26 27 /* FIXME: MTU in config. */ 28 #define MAX_PACKET_LEN (ETH_HLEN+ETH_DATA_LEN) 29 ··· 59 sg_init_one(sg, skb_vnet_hdr(skb), sizeof(struct virtio_net_hdr)); 60 } 61 62 - static bool skb_xmit_done(struct virtqueue *rvq) 63 { 64 - struct virtnet_info *vi = rvq->vdev->priv; 65 66 - /* In case we were waiting for output buffers. */ 67 netif_wake_queue(vi->dev); 68 - return true; 69 } 70 71 static void receive_skb(struct net_device *dev, struct sk_buff *skb, ··· 91 92 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { 93 pr_debug("Needs csum!\n"); 94 - skb->ip_summed = CHECKSUM_PARTIAL; 95 - skb->csum_start = hdr->csum_start; 96 - skb->csum_offset = hdr->csum_offset; 97 - if (skb->csum_start > skb->len - 2 98 - || skb->csum_offset > skb->len - 2) { 99 - if (net_ratelimit()) 100 - printk(KERN_WARNING "%s: csum=%u/%u len=%u\n", 101 - dev->name, skb->csum_start, 102 - skb->csum_offset, skb->len); 103 goto frame_err; 104 - } 105 } 106 107 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 108 pr_debug("GSO!\n"); 109 - switch (hdr->gso_type) { 110 case VIRTIO_NET_HDR_GSO_TCPV4: 111 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; 112 - break; 113 - case VIRTIO_NET_HDR_GSO_TCPV4_ECN: 114 - skb_shinfo(skb)->gso_type = SKB_GSO_TCP_ECN; 115 break; 116 case VIRTIO_NET_HDR_GSO_UDP: 117 skb_shinfo(skb)->gso_type = SKB_GSO_UDP; ··· 113 dev->name, hdr->gso_type); 114 goto frame_err; 115 } 116 117 skb_shinfo(skb)->gso_size = hdr->gso_size; 118 if (skb_shinfo(skb)->gso_size == 0) { ··· 169 vi->rvq->vq_ops->kick(vi->rvq); 170 } 171 172 - static bool skb_recv_done(struct virtqueue *rvq) 173 { 174 struct virtnet_info *vi = rvq->vdev->priv; 175 - netif_rx_schedule(vi->dev, &vi->napi); 176 - /* Suppress further interrupts. */ 177 - return false; 178 } 179 180 static int virtnet_poll(struct napi_struct *napi, int budget) ··· 202 /* Out of packets? */ 203 if (received < budget) { 204 netif_rx_complete(vi->dev, napi); 205 - if (unlikely(!vi->rvq->vq_ops->restart(vi->rvq)) 206 && netif_rx_reschedule(vi->dev, napi)) 207 goto again; 208 } ··· 237 238 pr_debug("%s: xmit %p %s\n", dev->name, skb, print_mac(mac, dest)); 239 240 - free_old_xmit_skbs(vi); 241 - 242 /* Encode metadata header at front. */ 243 hdr = skb_vnet_hdr(skb); 244 if (skb->ip_summed == CHECKSUM_PARTIAL) { ··· 249 } 250 251 if (skb_is_gso(skb)) { 252 hdr->gso_size = skb_shinfo(skb)->gso_size; 253 - if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_ECN) 254 - hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4_ECN; 255 - else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) 256 hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 257 else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) 258 hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; ··· 259 hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP; 260 else 261 BUG(); 262 } else { 263 hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE; 264 - hdr->gso_size = 0; 265 } 266 267 vnet_hdr_to_sg(sg, skb); 268 num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1; 269 __skb_queue_head(&vi->send, skb); 270 err = vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb); 271 if (err) { 272 pr_debug("%s: virtio not prepared to send\n", dev->name); 273 - skb_unlink(skb, &vi->send); 274 netif_stop_queue(dev); 275 return NETDEV_TX_BUSY; 276 } 277 vi->svq->vq_ops->kick(vi->svq); ··· 298 { 299 struct virtnet_info *vi = netdev_priv(dev); 300 301 - try_fill_recv(vi); 302 - 303 - /* If we didn't even get one input buffer, we're useless. */ 304 - if (vi->num == 0) 305 - return -ENOMEM; 306 - 307 napi_enable(&vi->napi); 308 return 0; 309 } 310 311 static int virtnet_close(struct net_device *dev) 312 { 313 struct virtnet_info *vi = netdev_priv(dev); 314 - struct sk_buff *skb; 315 316 napi_disable(&vi->napi); 317 318 - /* networking core has neutered skb_xmit_done/skb_recv_done, so don't 319 - * worry about races vs. get(). */ 320 - vi->rvq->vq_ops->shutdown(vi->rvq); 321 - while ((skb = __skb_dequeue(&vi->recv)) != NULL) { 322 - kfree_skb(skb); 323 - vi->num--; 324 - } 325 - vi->svq->vq_ops->shutdown(vi->svq); 326 - while ((skb = __skb_dequeue(&vi->send)) != NULL) 327 - kfree_skb(skb); 328 - 329 - BUG_ON(vi->num != 0); 330 return 0; 331 } 332 333 static int virtnet_probe(struct virtio_device *vdev) 334 { 335 int err; 336 - unsigned int len; 337 struct net_device *dev; 338 struct virtnet_info *vi; 339 - void *token; 340 341 /* Allocate ourselves a network device with room for our info */ 342 dev = alloc_etherdev(sizeof(struct virtnet_info)); ··· 330 return -ENOMEM; 331 332 /* Set up network device as normal. */ 333 - ether_setup(dev); 334 dev->open = virtnet_open; 335 dev->stop = virtnet_close; 336 dev->hard_start_xmit = start_xmit; ··· 337 SET_NETDEV_DEV(dev, &vdev->dev); 338 339 /* Do we support "hardware" checksums? */ 340 - token = vdev->config->find(vdev, VIRTIO_CONFIG_NET_F, &len); 341 - if (virtio_use_bit(vdev, token, len, VIRTIO_NET_F_NO_CSUM)) { 342 /* This opens up the world of extra features. */ 343 dev->features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST; 344 - if (virtio_use_bit(vdev, token, len, VIRTIO_NET_F_TSO4)) 345 - dev->features |= NETIF_F_TSO; 346 - if (virtio_use_bit(vdev, token, len, VIRTIO_NET_F_UFO)) 347 - dev->features |= NETIF_F_UFO; 348 - if (virtio_use_bit(vdev, token, len, VIRTIO_NET_F_TSO4_ECN)) 349 - dev->features |= NETIF_F_TSO_ECN; 350 - if (virtio_use_bit(vdev, token, len, VIRTIO_NET_F_TSO6)) 351 - dev->features |= NETIF_F_TSO6; 352 } 353 354 /* Configuration may specify what MAC to use. Otherwise random. */ 355 - token = vdev->config->find(vdev, VIRTIO_CONFIG_NET_MAC_F, &len); 356 - if (token) { 357 - dev->addr_len = len; 358 - vdev->config->get(vdev, token, dev->dev_addr, len); 359 } else 360 random_ether_addr(dev->dev_addr); 361 362 /* Set up our device-specific information */ 363 vi = netdev_priv(dev); 364 - netif_napi_add(dev, &vi->napi, virtnet_poll, 16); 365 vi->dev = dev; 366 vi->vdev = vdev; 367 368 /* We expect two virtqueues, receive then send. */ 369 - vi->rvq = vdev->config->find_vq(vdev, skb_recv_done); 370 if (IS_ERR(vi->rvq)) { 371 err = PTR_ERR(vi->rvq); 372 goto free; 373 } 374 375 - vi->svq = vdev->config->find_vq(vdev, skb_xmit_done); 376 if (IS_ERR(vi->svq)) { 377 err = PTR_ERR(vi->svq); 378 goto free_recv; ··· 382 pr_debug("virtio_net: registering device failed\n"); 383 goto free_send; 384 } 385 pr_debug("virtnet: registered device %s\n", dev->name); 386 vdev->priv = vi; 387 return 0; 388 389 free_send: 390 vdev->config->del_vq(vi->svq); 391 free_recv: ··· 410 static void virtnet_remove(struct virtio_device *vdev) 411 { 412 struct virtnet_info *vi = vdev->priv; 413 414 vdev->config->del_vq(vi->svq); 415 vdev->config->del_vq(vi->rvq);
··· 24 #include <linux/virtio_net.h> 25 #include <linux/scatterlist.h> 26 27 + static int napi_weight = 128; 28 + module_param(napi_weight, int, 0444); 29 + 30 + static int csum = 1, gso = 1; 31 + module_param(csum, bool, 0444); 32 + module_param(gso, bool, 0444); 33 + 34 /* FIXME: MTU in config. */ 35 #define MAX_PACKET_LEN (ETH_HLEN+ETH_DATA_LEN) 36 ··· 52 sg_init_one(sg, skb_vnet_hdr(skb), sizeof(struct virtio_net_hdr)); 53 } 54 55 + static void skb_xmit_done(struct virtqueue *svq) 56 { 57 + struct virtnet_info *vi = svq->vdev->priv; 58 59 + /* Suppress further interrupts. */ 60 + svq->vq_ops->disable_cb(svq); 61 + /* We were waiting for more output buffers. */ 62 netif_wake_queue(vi->dev); 63 } 64 65 static void receive_skb(struct net_device *dev, struct sk_buff *skb, ··· 83 84 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { 85 pr_debug("Needs csum!\n"); 86 + if (!skb_partial_csum_set(skb,hdr->csum_start,hdr->csum_offset)) 87 goto frame_err; 88 } 89 90 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { 91 pr_debug("GSO!\n"); 92 + switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 93 case VIRTIO_NET_HDR_GSO_TCPV4: 94 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; 95 break; 96 case VIRTIO_NET_HDR_GSO_UDP: 97 skb_shinfo(skb)->gso_type = SKB_GSO_UDP; ··· 117 dev->name, hdr->gso_type); 118 goto frame_err; 119 } 120 + 121 + if (hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) 122 + skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; 123 124 skb_shinfo(skb)->gso_size = hdr->gso_size; 125 if (skb_shinfo(skb)->gso_size == 0) { ··· 170 vi->rvq->vq_ops->kick(vi->rvq); 171 } 172 173 + static void skb_recv_done(struct virtqueue *rvq) 174 { 175 struct virtnet_info *vi = rvq->vdev->priv; 176 + /* Schedule NAPI, Suppress further interrupts if successful. */ 177 + if (netif_rx_schedule_prep(vi->dev, &vi->napi)) { 178 + rvq->vq_ops->disable_cb(rvq); 179 + __netif_rx_schedule(vi->dev, &vi->napi); 180 + } 181 } 182 183 static int virtnet_poll(struct napi_struct *napi, int budget) ··· 201 /* Out of packets? */ 202 if (received < budget) { 203 netif_rx_complete(vi->dev, napi); 204 + if (unlikely(!vi->rvq->vq_ops->enable_cb(vi->rvq)) 205 && netif_rx_reschedule(vi->dev, napi)) 206 goto again; 207 } ··· 236 237 pr_debug("%s: xmit %p %s\n", dev->name, skb, print_mac(mac, dest)); 238 239 /* Encode metadata header at front. */ 240 hdr = skb_vnet_hdr(skb); 241 if (skb->ip_summed == CHECKSUM_PARTIAL) { ··· 250 } 251 252 if (skb_is_gso(skb)) { 253 + hdr->hdr_len = skb_transport_header(skb) - skb->data; 254 hdr->gso_size = skb_shinfo(skb)->gso_size; 255 + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) 256 hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; 257 else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) 258 hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; ··· 261 hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP; 262 else 263 BUG(); 264 + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_ECN) 265 + hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN; 266 } else { 267 hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE; 268 + hdr->gso_size = hdr->hdr_len = 0; 269 } 270 271 vnet_hdr_to_sg(sg, skb); 272 num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1; 273 __skb_queue_head(&vi->send, skb); 274 + 275 + again: 276 + /* Free up any pending old buffers before queueing new ones. */ 277 + free_old_xmit_skbs(vi); 278 err = vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb); 279 if (err) { 280 pr_debug("%s: virtio not prepared to send\n", dev->name); 281 netif_stop_queue(dev); 282 + 283 + /* Activate callback for using skbs: if this fails it 284 + * means some were used in the meantime. */ 285 + if (unlikely(!vi->svq->vq_ops->enable_cb(vi->svq))) { 286 + printk("Unlikely: restart svq failed\n"); 287 + netif_start_queue(dev); 288 + goto again; 289 + } 290 + __skb_unlink(skb, &vi->send); 291 + 292 return NETDEV_TX_BUSY; 293 } 294 vi->svq->vq_ops->kick(vi->svq); ··· 285 { 286 struct virtnet_info *vi = netdev_priv(dev); 287 288 napi_enable(&vi->napi); 289 + 290 + /* If all buffers were filled by other side before we napi_enabled, we 291 + * won't get another interrupt, so process any outstanding packets 292 + * now. virtnet_poll wants re-enable the queue, so we disable here. */ 293 + vi->rvq->vq_ops->disable_cb(vi->rvq); 294 + netif_rx_schedule(vi->dev, &vi->napi); 295 + 296 return 0; 297 } 298 299 static int virtnet_close(struct net_device *dev) 300 { 301 struct virtnet_info *vi = netdev_priv(dev); 302 303 napi_disable(&vi->napi); 304 305 return 0; 306 } 307 308 static int virtnet_probe(struct virtio_device *vdev) 309 { 310 int err; 311 struct net_device *dev; 312 struct virtnet_info *vi; 313 314 /* Allocate ourselves a network device with room for our info */ 315 dev = alloc_etherdev(sizeof(struct virtnet_info)); ··· 331 return -ENOMEM; 332 333 /* Set up network device as normal. */ 334 dev->open = virtnet_open; 335 dev->stop = virtnet_close; 336 dev->hard_start_xmit = start_xmit; ··· 339 SET_NETDEV_DEV(dev, &vdev->dev); 340 341 /* Do we support "hardware" checksums? */ 342 + if (csum && vdev->config->feature(vdev, VIRTIO_NET_F_CSUM)) { 343 /* This opens up the world of extra features. */ 344 dev->features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST; 345 + if (gso && vdev->config->feature(vdev, VIRTIO_NET_F_GSO)) { 346 + dev->features |= NETIF_F_TSO | NETIF_F_UFO 347 + | NETIF_F_TSO_ECN | NETIF_F_TSO6; 348 + } 349 } 350 351 /* Configuration may specify what MAC to use. Otherwise random. */ 352 + if (vdev->config->feature(vdev, VIRTIO_NET_F_MAC)) { 353 + vdev->config->get(vdev, 354 + offsetof(struct virtio_net_config, mac), 355 + dev->dev_addr, dev->addr_len); 356 } else 357 random_ether_addr(dev->dev_addr); 358 359 /* Set up our device-specific information */ 360 vi = netdev_priv(dev); 361 + netif_napi_add(dev, &vi->napi, virtnet_poll, napi_weight); 362 vi->dev = dev; 363 vi->vdev = vdev; 364 365 /* We expect two virtqueues, receive then send. */ 366 + vi->rvq = vdev->config->find_vq(vdev, 0, skb_recv_done); 367 if (IS_ERR(vi->rvq)) { 368 err = PTR_ERR(vi->rvq); 369 goto free; 370 } 371 372 + vi->svq = vdev->config->find_vq(vdev, 1, skb_xmit_done); 373 if (IS_ERR(vi->svq)) { 374 err = PTR_ERR(vi->svq); 375 goto free_recv; ··· 389 pr_debug("virtio_net: registering device failed\n"); 390 goto free_send; 391 } 392 + 393 + /* Last of all, set up some receive buffers. */ 394 + try_fill_recv(vi); 395 + 396 + /* If we didn't even get one input buffer, we're useless. */ 397 + if (vi->num == 0) { 398 + err = -ENOMEM; 399 + goto unregister; 400 + } 401 + 402 pr_debug("virtnet: registered device %s\n", dev->name); 403 vdev->priv = vi; 404 return 0; 405 406 + unregister: 407 + unregister_netdev(dev); 408 free_send: 409 vdev->config->del_vq(vi->svq); 410 free_recv: ··· 405 static void virtnet_remove(struct virtio_device *vdev) 406 { 407 struct virtnet_info *vi = vdev->priv; 408 + struct sk_buff *skb; 409 + 410 + /* Stop all the virtqueues. */ 411 + vdev->config->reset(vdev); 412 + 413 + /* Free our skbs in send and recv queues, if any. */ 414 + while ((skb = __skb_dequeue(&vi->recv)) != NULL) { 415 + kfree_skb(skb); 416 + vi->num--; 417 + } 418 + while ((skb = __skb_dequeue(&vi->send)) != NULL) 419 + kfree_skb(skb); 420 + 421 + BUG_ON(vi->num != 0); 422 423 vdev->config->del_vq(vi->svq); 424 vdev->config->del_vq(vi->rvq);
+29 -2
drivers/virtio/Kconfig
··· 1 # Virtio always gets selected by whoever wants it. 2 config VIRTIO 3 - bool 4 5 # Similarly the virtio ring implementation. 6 config VIRTIO_RING 7 - bool 8 depends on VIRTIO
··· 1 # Virtio always gets selected by whoever wants it. 2 config VIRTIO 3 + tristate 4 5 # Similarly the virtio ring implementation. 6 config VIRTIO_RING 7 + tristate 8 depends on VIRTIO 9 + 10 + config VIRTIO_PCI 11 + tristate "PCI driver for virtio devices (EXPERIMENTAL)" 12 + depends on PCI && EXPERIMENTAL 13 + select VIRTIO 14 + select VIRTIO_RING 15 + ---help--- 16 + This drivers provides support for virtio based paravirtual device 17 + drivers over PCI. This requires that your VMM has appropriate PCI 18 + virtio backends. Most QEMU based VMMs should support these devices 19 + (like KVM or Xen). 20 + 21 + Currently, the ABI is not considered stable so there is no guarantee 22 + that this version of the driver will work with your VMM. 23 + 24 + If unsure, say M. 25 + 26 + config VIRTIO_BALLOON 27 + tristate "Virtio balloon driver (EXPERIMENTAL)" 28 + select VIRTIO 29 + select VIRTIO_RING 30 + ---help--- 31 + This driver supports increasing and decreasing the amount 32 + of memory within a KVM guest. 33 + 34 + If unsure, say M. 35 +
+2
drivers/virtio/Makefile
··· 1 obj-$(CONFIG_VIRTIO) += virtio.o 2 obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o
··· 1 obj-$(CONFIG_VIRTIO) += virtio.o 2 obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o 3 + obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o 4 + obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o
+18 -47
drivers/virtio/virtio.c
··· 102 struct virtio_driver *drv = container_of(dev->dev.driver, 103 struct virtio_driver, driver); 104 105 - dev->config->set_status(dev, dev->config->get_status(dev) 106 - & ~VIRTIO_CONFIG_S_DRIVER); 107 drv->remove(dev); 108 return 0; 109 } 110 ··· 134 dev->dev.bus = &virtio_bus; 135 sprintf(dev->dev.bus_id, "%u", dev->index); 136 137 /* Acknowledge that we've seen the device. */ 138 add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); 139 ··· 156 } 157 EXPORT_SYMBOL_GPL(unregister_virtio_device); 158 159 - int __virtio_config_val(struct virtio_device *vdev, 160 - u8 type, void *val, size_t size) 161 - { 162 - void *token; 163 - unsigned int len; 164 - 165 - token = vdev->config->find(vdev, type, &len); 166 - if (!token) 167 - return -ENOENT; 168 - 169 - if (len != size) 170 - return -EIO; 171 - 172 - vdev->config->get(vdev, token, val, size); 173 - return 0; 174 - } 175 - EXPORT_SYMBOL_GPL(__virtio_config_val); 176 - 177 - int virtio_use_bit(struct virtio_device *vdev, 178 - void *token, unsigned int len, unsigned int bitnum) 179 - { 180 - unsigned long bits[16]; 181 - 182 - /* This makes it convenient to pass-through find() results. */ 183 - if (!token) 184 - return 0; 185 - 186 - /* bit not in range of this bitfield? */ 187 - if (bitnum * 8 >= len / 2) 188 - return 0; 189 - 190 - /* Giant feature bitfields are silly. */ 191 - BUG_ON(len > sizeof(bits)); 192 - vdev->config->get(vdev, token, bits, len); 193 - 194 - if (!test_bit(bitnum, bits)) 195 - return 0; 196 - 197 - /* Set acknowledge bit, and write it back. */ 198 - set_bit(bitnum + len * 8 / 2, bits); 199 - vdev->config->set(vdev, token, bits, len); 200 - return 1; 201 - } 202 - EXPORT_SYMBOL_GPL(virtio_use_bit); 203 - 204 static int virtio_init(void) 205 { 206 if (bus_register(&virtio_bus) != 0) 207 panic("virtio bus registration failed"); 208 return 0; 209 } 210 core_initcall(virtio_init);
··· 102 struct virtio_driver *drv = container_of(dev->dev.driver, 103 struct virtio_driver, driver); 104 105 drv->remove(dev); 106 + 107 + /* Driver should have reset device. */ 108 + BUG_ON(dev->config->get_status(dev)); 109 + 110 + /* Acknowledge the device's existence again. */ 111 + add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); 112 return 0; 113 } 114 ··· 130 dev->dev.bus = &virtio_bus; 131 sprintf(dev->dev.bus_id, "%u", dev->index); 132 133 + /* We always start by resetting the device, in case a previous 134 + * driver messed it up. This also tests that code path a little. */ 135 + dev->config->reset(dev); 136 + 137 /* Acknowledge that we've seen the device. */ 138 add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE); 139 ··· 148 } 149 EXPORT_SYMBOL_GPL(unregister_virtio_device); 150 151 static int virtio_init(void) 152 { 153 if (bus_register(&virtio_bus) != 0) 154 panic("virtio bus registration failed"); 155 return 0; 156 } 157 + 158 + static void __exit virtio_exit(void) 159 + { 160 + bus_unregister(&virtio_bus); 161 + } 162 core_initcall(virtio_init); 163 + module_exit(virtio_exit); 164 + 165 + MODULE_LICENSE("GPL");
+284
drivers/virtio/virtio_balloon.c
···
··· 1 + /* Virtio balloon implementation, inspired by Dor Loar and Marcelo 2 + * Tosatti's implementations. 3 + * 4 + * Copyright 2008 Rusty Russell IBM Corporation 5 + * 6 + * This program is free software; you can redistribute it and/or modify 7 + * it under the terms of the GNU General Public License as published by 8 + * the Free Software Foundation; either version 2 of the License, or 9 + * (at your option) any later version. 10 + * 11 + * This program is distributed in the hope that it will be useful, 12 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 + * GNU General Public License for more details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program; if not, write to the Free Software 18 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 19 + */ 20 + //#define DEBUG 21 + #include <linux/virtio.h> 22 + #include <linux/virtio_balloon.h> 23 + #include <linux/swap.h> 24 + #include <linux/kthread.h> 25 + #include <linux/freezer.h> 26 + 27 + struct virtio_balloon 28 + { 29 + struct virtio_device *vdev; 30 + struct virtqueue *inflate_vq, *deflate_vq; 31 + 32 + /* Where the ballooning thread waits for config to change. */ 33 + wait_queue_head_t config_change; 34 + 35 + /* The thread servicing the balloon. */ 36 + struct task_struct *thread; 37 + 38 + /* Waiting for host to ack the pages we released. */ 39 + struct completion acked; 40 + 41 + /* Do we have to tell Host *before* we reuse pages? */ 42 + bool tell_host_first; 43 + 44 + /* The pages we've told the Host we're not using. */ 45 + unsigned int num_pages; 46 + struct list_head pages; 47 + 48 + /* The array of pfns we tell the Host about. */ 49 + unsigned int num_pfns; 50 + u32 pfns[256]; 51 + }; 52 + 53 + static struct virtio_device_id id_table[] = { 54 + { VIRTIO_ID_BALLOON, VIRTIO_DEV_ANY_ID }, 55 + { 0 }, 56 + }; 57 + 58 + static void balloon_ack(struct virtqueue *vq) 59 + { 60 + struct virtio_balloon *vb; 61 + unsigned int len; 62 + 63 + vb = vq->vq_ops->get_buf(vq, &len); 64 + if (vb) 65 + complete(&vb->acked); 66 + } 67 + 68 + static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq) 69 + { 70 + struct scatterlist sg; 71 + 72 + sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns); 73 + 74 + init_completion(&vb->acked); 75 + 76 + /* We should always be able to add one buffer to an empty queue. */ 77 + if (vq->vq_ops->add_buf(vq, &sg, 1, 0, vb) != 0) 78 + BUG(); 79 + vq->vq_ops->kick(vq); 80 + 81 + /* When host has read buffer, this completes via balloon_ack */ 82 + wait_for_completion(&vb->acked); 83 + } 84 + 85 + static void fill_balloon(struct virtio_balloon *vb, size_t num) 86 + { 87 + /* We can only do one array worth at a time. */ 88 + num = min(num, ARRAY_SIZE(vb->pfns)); 89 + 90 + for (vb->num_pfns = 0; vb->num_pfns < num; vb->num_pfns++) { 91 + struct page *page = alloc_page(GFP_HIGHUSER | __GFP_NORETRY); 92 + if (!page) { 93 + if (printk_ratelimit()) 94 + dev_printk(KERN_INFO, &vb->vdev->dev, 95 + "Out of puff! Can't get %zu pages\n", 96 + num); 97 + /* Sleep for at least 1/5 of a second before retry. */ 98 + msleep(200); 99 + break; 100 + } 101 + vb->pfns[vb->num_pfns] = page_to_pfn(page); 102 + totalram_pages--; 103 + vb->num_pages++; 104 + list_add(&page->lru, &vb->pages); 105 + } 106 + 107 + /* Didn't get any? Oh well. */ 108 + if (vb->num_pfns == 0) 109 + return; 110 + 111 + tell_host(vb, vb->inflate_vq); 112 + } 113 + 114 + static void release_pages_by_pfn(const u32 pfns[], unsigned int num) 115 + { 116 + unsigned int i; 117 + 118 + for (i = 0; i < num; i++) { 119 + __free_page(pfn_to_page(pfns[i])); 120 + totalram_pages++; 121 + } 122 + } 123 + 124 + static void leak_balloon(struct virtio_balloon *vb, size_t num) 125 + { 126 + struct page *page; 127 + 128 + /* We can only do one array worth at a time. */ 129 + num = min(num, ARRAY_SIZE(vb->pfns)); 130 + 131 + for (vb->num_pfns = 0; vb->num_pfns < num; vb->num_pfns++) { 132 + page = list_first_entry(&vb->pages, struct page, lru); 133 + list_del(&page->lru); 134 + vb->pfns[vb->num_pfns] = page_to_pfn(page); 135 + vb->num_pages--; 136 + } 137 + 138 + if (vb->tell_host_first) { 139 + tell_host(vb, vb->deflate_vq); 140 + release_pages_by_pfn(vb->pfns, vb->num_pfns); 141 + } else { 142 + release_pages_by_pfn(vb->pfns, vb->num_pfns); 143 + tell_host(vb, vb->deflate_vq); 144 + } 145 + } 146 + 147 + static void virtballoon_changed(struct virtio_device *vdev) 148 + { 149 + struct virtio_balloon *vb = vdev->priv; 150 + 151 + wake_up(&vb->config_change); 152 + } 153 + 154 + static inline int towards_target(struct virtio_balloon *vb) 155 + { 156 + u32 v; 157 + __virtio_config_val(vb->vdev, 158 + offsetof(struct virtio_balloon_config, num_pages), 159 + &v); 160 + return v - vb->num_pages; 161 + } 162 + 163 + static void update_balloon_size(struct virtio_balloon *vb) 164 + { 165 + __le32 actual = cpu_to_le32(vb->num_pages); 166 + 167 + vb->vdev->config->set(vb->vdev, 168 + offsetof(struct virtio_balloon_config, actual), 169 + &actual, sizeof(actual)); 170 + } 171 + 172 + static int balloon(void *_vballoon) 173 + { 174 + struct virtio_balloon *vb = _vballoon; 175 + 176 + set_freezable(); 177 + while (!kthread_should_stop()) { 178 + int diff; 179 + 180 + try_to_freeze(); 181 + wait_event_interruptible(vb->config_change, 182 + (diff = towards_target(vb)) != 0 183 + || kthread_should_stop()); 184 + if (diff > 0) 185 + fill_balloon(vb, diff); 186 + else if (diff < 0) 187 + leak_balloon(vb, -diff); 188 + update_balloon_size(vb); 189 + } 190 + return 0; 191 + } 192 + 193 + static int virtballoon_probe(struct virtio_device *vdev) 194 + { 195 + struct virtio_balloon *vb; 196 + int err; 197 + 198 + vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL); 199 + if (!vb) { 200 + err = -ENOMEM; 201 + goto out; 202 + } 203 + 204 + INIT_LIST_HEAD(&vb->pages); 205 + vb->num_pages = 0; 206 + init_waitqueue_head(&vb->config_change); 207 + vb->vdev = vdev; 208 + 209 + /* We expect two virtqueues. */ 210 + vb->inflate_vq = vdev->config->find_vq(vdev, 0, balloon_ack); 211 + if (IS_ERR(vb->inflate_vq)) { 212 + err = PTR_ERR(vb->inflate_vq); 213 + goto out_free_vb; 214 + } 215 + 216 + vb->deflate_vq = vdev->config->find_vq(vdev, 1, balloon_ack); 217 + if (IS_ERR(vb->deflate_vq)) { 218 + err = PTR_ERR(vb->deflate_vq); 219 + goto out_del_inflate_vq; 220 + } 221 + 222 + vb->thread = kthread_run(balloon, vb, "vballoon"); 223 + if (IS_ERR(vb->thread)) { 224 + err = PTR_ERR(vb->thread); 225 + goto out_del_deflate_vq; 226 + } 227 + 228 + vb->tell_host_first 229 + = vdev->config->feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST); 230 + 231 + return 0; 232 + 233 + out_del_deflate_vq: 234 + vdev->config->del_vq(vb->deflate_vq); 235 + out_del_inflate_vq: 236 + vdev->config->del_vq(vb->inflate_vq); 237 + out_free_vb: 238 + kfree(vb); 239 + out: 240 + return err; 241 + } 242 + 243 + static void virtballoon_remove(struct virtio_device *vdev) 244 + { 245 + struct virtio_balloon *vb = vdev->priv; 246 + 247 + kthread_stop(vb->thread); 248 + 249 + /* There might be pages left in the balloon: free them. */ 250 + while (vb->num_pages) 251 + leak_balloon(vb, vb->num_pages); 252 + 253 + /* Now we reset the device so we can clean up the queues. */ 254 + vdev->config->reset(vdev); 255 + 256 + vdev->config->del_vq(vb->deflate_vq); 257 + vdev->config->del_vq(vb->inflate_vq); 258 + kfree(vb); 259 + } 260 + 261 + static struct virtio_driver virtio_balloon = { 262 + .driver.name = KBUILD_MODNAME, 263 + .driver.owner = THIS_MODULE, 264 + .id_table = id_table, 265 + .probe = virtballoon_probe, 266 + .remove = __devexit_p(virtballoon_remove), 267 + .config_changed = virtballoon_changed, 268 + }; 269 + 270 + static int __init init(void) 271 + { 272 + return register_virtio_driver(&virtio_balloon); 273 + } 274 + 275 + static void __exit fini(void) 276 + { 277 + unregister_virtio_driver(&virtio_balloon); 278 + } 279 + module_init(init); 280 + module_exit(fini); 281 + 282 + MODULE_DEVICE_TABLE(virtio, id_table); 283 + MODULE_DESCRIPTION("Virtio balloon driver"); 284 + MODULE_LICENSE("GPL");
+446
drivers/virtio/virtio_pci.c
···
··· 1 + /* 2 + * Virtio PCI driver 3 + * 4 + * This module allows virtio devices to be used over a virtual PCI device. 5 + * This can be used with QEMU based VMMs like KVM or Xen. 6 + * 7 + * Copyright IBM Corp. 2007 8 + * 9 + * Authors: 10 + * Anthony Liguori <aliguori@us.ibm.com> 11 + * 12 + * This work is licensed under the terms of the GNU GPL, version 2 or later. 13 + * See the COPYING file in the top-level directory. 14 + * 15 + */ 16 + 17 + #include <linux/module.h> 18 + #include <linux/list.h> 19 + #include <linux/pci.h> 20 + #include <linux/interrupt.h> 21 + #include <linux/virtio.h> 22 + #include <linux/virtio_config.h> 23 + #include <linux/virtio_ring.h> 24 + #include <linux/virtio_pci.h> 25 + #include <linux/highmem.h> 26 + #include <linux/spinlock.h> 27 + 28 + MODULE_AUTHOR("Anthony Liguori <aliguori@us.ibm.com>"); 29 + MODULE_DESCRIPTION("virtio-pci"); 30 + MODULE_LICENSE("GPL"); 31 + MODULE_VERSION("1"); 32 + 33 + /* Our device structure */ 34 + struct virtio_pci_device 35 + { 36 + struct virtio_device vdev; 37 + struct pci_dev *pci_dev; 38 + 39 + /* the IO mapping for the PCI config space */ 40 + void *ioaddr; 41 + 42 + /* a list of queues so we can dispatch IRQs */ 43 + spinlock_t lock; 44 + struct list_head virtqueues; 45 + }; 46 + 47 + struct virtio_pci_vq_info 48 + { 49 + /* the actual virtqueue */ 50 + struct virtqueue *vq; 51 + 52 + /* the number of entries in the queue */ 53 + int num; 54 + 55 + /* the index of the queue */ 56 + int queue_index; 57 + 58 + /* the virtual address of the ring queue */ 59 + void *queue; 60 + 61 + /* the list node for the virtqueues list */ 62 + struct list_head node; 63 + }; 64 + 65 + /* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */ 66 + static struct pci_device_id virtio_pci_id_table[] = { 67 + { 0x1af4, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, 68 + { 0 }, 69 + }; 70 + 71 + MODULE_DEVICE_TABLE(pci, virtio_pci_id_table); 72 + 73 + /* A PCI device has it's own struct device and so does a virtio device so 74 + * we create a place for the virtio devices to show up in sysfs. I think it 75 + * would make more sense for virtio to not insist on having it's own device. */ 76 + static struct device virtio_pci_root = { 77 + .parent = NULL, 78 + .bus_id = "virtio-pci", 79 + }; 80 + 81 + /* Unique numbering for devices under the kvm root */ 82 + static unsigned int dev_index; 83 + 84 + /* Convert a generic virtio device to our structure */ 85 + static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev) 86 + { 87 + return container_of(vdev, struct virtio_pci_device, vdev); 88 + } 89 + 90 + /* virtio config->feature() implementation */ 91 + static bool vp_feature(struct virtio_device *vdev, unsigned bit) 92 + { 93 + struct virtio_pci_device *vp_dev = to_vp_device(vdev); 94 + u32 mask; 95 + 96 + /* Since this function is supposed to have the side effect of 97 + * enabling a queried feature, we simulate that by doing a read 98 + * from the host feature bitmask and then writing to the guest 99 + * feature bitmask */ 100 + mask = ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES); 101 + if (mask & (1 << bit)) { 102 + mask |= (1 << bit); 103 + iowrite32(mask, vp_dev->ioaddr + VIRTIO_PCI_GUEST_FEATURES); 104 + } 105 + 106 + return !!(mask & (1 << bit)); 107 + } 108 + 109 + /* virtio config->get() implementation */ 110 + static void vp_get(struct virtio_device *vdev, unsigned offset, 111 + void *buf, unsigned len) 112 + { 113 + struct virtio_pci_device *vp_dev = to_vp_device(vdev); 114 + void *ioaddr = vp_dev->ioaddr + VIRTIO_PCI_CONFIG + offset; 115 + u8 *ptr = buf; 116 + int i; 117 + 118 + for (i = 0; i < len; i++) 119 + ptr[i] = ioread8(ioaddr + i); 120 + } 121 + 122 + /* the config->set() implementation. it's symmetric to the config->get() 123 + * implementation */ 124 + static void vp_set(struct virtio_device *vdev, unsigned offset, 125 + const void *buf, unsigned len) 126 + { 127 + struct virtio_pci_device *vp_dev = to_vp_device(vdev); 128 + void *ioaddr = vp_dev->ioaddr + VIRTIO_PCI_CONFIG + offset; 129 + const u8 *ptr = buf; 130 + int i; 131 + 132 + for (i = 0; i < len; i++) 133 + iowrite8(ptr[i], ioaddr + i); 134 + } 135 + 136 + /* config->{get,set}_status() implementations */ 137 + static u8 vp_get_status(struct virtio_device *vdev) 138 + { 139 + struct virtio_pci_device *vp_dev = to_vp_device(vdev); 140 + return ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS); 141 + } 142 + 143 + static void vp_set_status(struct virtio_device *vdev, u8 status) 144 + { 145 + struct virtio_pci_device *vp_dev = to_vp_device(vdev); 146 + /* We should never be setting status to 0. */ 147 + BUG_ON(status == 0); 148 + return iowrite8(status, vp_dev->ioaddr + VIRTIO_PCI_STATUS); 149 + } 150 + 151 + static void vp_reset(struct virtio_device *vdev) 152 + { 153 + struct virtio_pci_device *vp_dev = to_vp_device(vdev); 154 + /* 0 status means a reset. */ 155 + return iowrite8(0, vp_dev->ioaddr + VIRTIO_PCI_STATUS); 156 + } 157 + 158 + /* the notify function used when creating a virt queue */ 159 + static void vp_notify(struct virtqueue *vq) 160 + { 161 + struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); 162 + struct virtio_pci_vq_info *info = vq->priv; 163 + 164 + /* we write the queue's selector into the notification register to 165 + * signal the other end */ 166 + iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY); 167 + } 168 + 169 + /* A small wrapper to also acknowledge the interrupt when it's handled. 170 + * I really need an EIO hook for the vring so I can ack the interrupt once we 171 + * know that we'll be handling the IRQ but before we invoke the callback since 172 + * the callback may notify the host which results in the host attempting to 173 + * raise an interrupt that we would then mask once we acknowledged the 174 + * interrupt. */ 175 + static irqreturn_t vp_interrupt(int irq, void *opaque) 176 + { 177 + struct virtio_pci_device *vp_dev = opaque; 178 + struct virtio_pci_vq_info *info; 179 + irqreturn_t ret = IRQ_NONE; 180 + u8 isr; 181 + 182 + /* reading the ISR has the effect of also clearing it so it's very 183 + * important to save off the value. */ 184 + isr = ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR); 185 + 186 + /* It's definitely not us if the ISR was not high */ 187 + if (!isr) 188 + return IRQ_NONE; 189 + 190 + /* Configuration change? Tell driver if it wants to know. */ 191 + if (isr & VIRTIO_PCI_ISR_CONFIG) { 192 + struct virtio_driver *drv; 193 + drv = container_of(vp_dev->vdev.dev.driver, 194 + struct virtio_driver, driver); 195 + 196 + if (drv->config_changed) 197 + drv->config_changed(&vp_dev->vdev); 198 + } 199 + 200 + spin_lock(&vp_dev->lock); 201 + list_for_each_entry(info, &vp_dev->virtqueues, node) { 202 + if (vring_interrupt(irq, info->vq) == IRQ_HANDLED) 203 + ret = IRQ_HANDLED; 204 + } 205 + spin_unlock(&vp_dev->lock); 206 + 207 + return ret; 208 + } 209 + 210 + /* the config->find_vq() implementation */ 211 + static struct virtqueue *vp_find_vq(struct virtio_device *vdev, unsigned index, 212 + void (*callback)(struct virtqueue *vq)) 213 + { 214 + struct virtio_pci_device *vp_dev = to_vp_device(vdev); 215 + struct virtio_pci_vq_info *info; 216 + struct virtqueue *vq; 217 + u16 num; 218 + int err; 219 + 220 + /* Select the queue we're interested in */ 221 + iowrite16(index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); 222 + 223 + /* Check if queue is either not available or already active. */ 224 + num = ioread16(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NUM); 225 + if (!num || ioread32(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN)) 226 + return ERR_PTR(-ENOENT); 227 + 228 + /* allocate and fill out our structure the represents an active 229 + * queue */ 230 + info = kmalloc(sizeof(struct virtio_pci_vq_info), GFP_KERNEL); 231 + if (!info) 232 + return ERR_PTR(-ENOMEM); 233 + 234 + info->queue_index = index; 235 + info->num = num; 236 + 237 + info->queue = kzalloc(PAGE_ALIGN(vring_size(num,PAGE_SIZE)), GFP_KERNEL); 238 + if (info->queue == NULL) { 239 + err = -ENOMEM; 240 + goto out_info; 241 + } 242 + 243 + /* activate the queue */ 244 + iowrite32(virt_to_phys(info->queue) >> PAGE_SHIFT, 245 + vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); 246 + 247 + /* create the vring */ 248 + vq = vring_new_virtqueue(info->num, vdev, info->queue, 249 + vp_notify, callback); 250 + if (!vq) { 251 + err = -ENOMEM; 252 + goto out_activate_queue; 253 + } 254 + 255 + vq->priv = info; 256 + info->vq = vq; 257 + 258 + spin_lock(&vp_dev->lock); 259 + list_add(&info->node, &vp_dev->virtqueues); 260 + spin_unlock(&vp_dev->lock); 261 + 262 + return vq; 263 + 264 + out_activate_queue: 265 + iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); 266 + kfree(info->queue); 267 + out_info: 268 + kfree(info); 269 + return ERR_PTR(err); 270 + } 271 + 272 + /* the config->del_vq() implementation */ 273 + static void vp_del_vq(struct virtqueue *vq) 274 + { 275 + struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); 276 + struct virtio_pci_vq_info *info = vq->priv; 277 + 278 + spin_lock(&vp_dev->lock); 279 + list_del(&info->node); 280 + spin_unlock(&vp_dev->lock); 281 + 282 + vring_del_virtqueue(vq); 283 + 284 + /* Select and deactivate the queue */ 285 + iowrite16(info->queue_index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); 286 + iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); 287 + 288 + kfree(info->queue); 289 + kfree(info); 290 + } 291 + 292 + static struct virtio_config_ops virtio_pci_config_ops = { 293 + .feature = vp_feature, 294 + .get = vp_get, 295 + .set = vp_set, 296 + .get_status = vp_get_status, 297 + .set_status = vp_set_status, 298 + .reset = vp_reset, 299 + .find_vq = vp_find_vq, 300 + .del_vq = vp_del_vq, 301 + }; 302 + 303 + /* the PCI probing function */ 304 + static int __devinit virtio_pci_probe(struct pci_dev *pci_dev, 305 + const struct pci_device_id *id) 306 + { 307 + struct virtio_pci_device *vp_dev; 308 + int err; 309 + 310 + /* We only own devices >= 0x1000 and <= 0x103f: leave the rest. */ 311 + if (pci_dev->device < 0x1000 || pci_dev->device > 0x103f) 312 + return -ENODEV; 313 + 314 + if (pci_dev->revision != VIRTIO_PCI_ABI_VERSION) { 315 + printk(KERN_ERR "virtio_pci: expected ABI version %d, got %d\n", 316 + VIRTIO_PCI_ABI_VERSION, pci_dev->revision); 317 + return -ENODEV; 318 + } 319 + 320 + /* allocate our structure and fill it out */ 321 + vp_dev = kzalloc(sizeof(struct virtio_pci_device), GFP_KERNEL); 322 + if (vp_dev == NULL) 323 + return -ENOMEM; 324 + 325 + snprintf(vp_dev->vdev.dev.bus_id, BUS_ID_SIZE, "virtio%d", dev_index); 326 + vp_dev->vdev.index = dev_index; 327 + dev_index++; 328 + 329 + vp_dev->vdev.dev.parent = &virtio_pci_root; 330 + vp_dev->vdev.config = &virtio_pci_config_ops; 331 + vp_dev->pci_dev = pci_dev; 332 + INIT_LIST_HEAD(&vp_dev->virtqueues); 333 + spin_lock_init(&vp_dev->lock); 334 + 335 + /* enable the device */ 336 + err = pci_enable_device(pci_dev); 337 + if (err) 338 + goto out; 339 + 340 + err = pci_request_regions(pci_dev, "virtio-pci"); 341 + if (err) 342 + goto out_enable_device; 343 + 344 + vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0); 345 + if (vp_dev->ioaddr == NULL) 346 + goto out_req_regions; 347 + 348 + pci_set_drvdata(pci_dev, vp_dev); 349 + 350 + /* we use the subsystem vendor/device id as the virtio vendor/device 351 + * id. this allows us to use the same PCI vendor/device id for all 352 + * virtio devices and to identify the particular virtio driver by 353 + * the subsytem ids */ 354 + vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor; 355 + vp_dev->vdev.id.device = pci_dev->subsystem_device; 356 + 357 + /* register a handler for the queue with the PCI device's interrupt */ 358 + err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED, 359 + vp_dev->vdev.dev.bus_id, vp_dev); 360 + if (err) 361 + goto out_set_drvdata; 362 + 363 + /* finally register the virtio device */ 364 + err = register_virtio_device(&vp_dev->vdev); 365 + if (err) 366 + goto out_req_irq; 367 + 368 + return 0; 369 + 370 + out_req_irq: 371 + free_irq(pci_dev->irq, vp_dev); 372 + out_set_drvdata: 373 + pci_set_drvdata(pci_dev, NULL); 374 + pci_iounmap(pci_dev, vp_dev->ioaddr); 375 + out_req_regions: 376 + pci_release_regions(pci_dev); 377 + out_enable_device: 378 + pci_disable_device(pci_dev); 379 + out: 380 + kfree(vp_dev); 381 + return err; 382 + } 383 + 384 + static void __devexit virtio_pci_remove(struct pci_dev *pci_dev) 385 + { 386 + struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); 387 + 388 + free_irq(pci_dev->irq, vp_dev); 389 + pci_set_drvdata(pci_dev, NULL); 390 + pci_iounmap(pci_dev, vp_dev->ioaddr); 391 + pci_release_regions(pci_dev); 392 + pci_disable_device(pci_dev); 393 + kfree(vp_dev); 394 + } 395 + 396 + #ifdef CONFIG_PM 397 + static int virtio_pci_suspend(struct pci_dev *pci_dev, pm_message_t state) 398 + { 399 + pci_save_state(pci_dev); 400 + pci_set_power_state(pci_dev, PCI_D3hot); 401 + return 0; 402 + } 403 + 404 + static int virtio_pci_resume(struct pci_dev *pci_dev) 405 + { 406 + pci_restore_state(pci_dev); 407 + pci_set_power_state(pci_dev, PCI_D0); 408 + return 0; 409 + } 410 + #endif 411 + 412 + static struct pci_driver virtio_pci_driver = { 413 + .name = "virtio-pci", 414 + .id_table = virtio_pci_id_table, 415 + .probe = virtio_pci_probe, 416 + .remove = virtio_pci_remove, 417 + #ifdef CONFIG_PM 418 + .suspend = virtio_pci_suspend, 419 + .resume = virtio_pci_resume, 420 + #endif 421 + }; 422 + 423 + static int __init virtio_pci_init(void) 424 + { 425 + int err; 426 + 427 + err = device_register(&virtio_pci_root); 428 + if (err) 429 + return err; 430 + 431 + err = pci_register_driver(&virtio_pci_driver); 432 + if (err) 433 + device_unregister(&virtio_pci_root); 434 + 435 + return err; 436 + } 437 + 438 + module_init(virtio_pci_init); 439 + 440 + static void __exit virtio_pci_exit(void) 441 + { 442 + device_unregister(&virtio_pci_root); 443 + pci_unregister_driver(&virtio_pci_driver); 444 + } 445 + 446 + module_exit(virtio_pci_exit);
+31 -20
drivers/virtio/virtio_ring.c
··· 87 if (vq->num_free < out + in) { 88 pr_debug("Can't add buf len %i - avail = %i\n", 89 out + in, vq->num_free); 90 END_USE(vq); 91 return -ENOSPC; 92 } ··· 99 head = vq->free_head; 100 for (i = vq->free_head; out; i = vq->vring.desc[i].next, out--) { 101 vq->vring.desc[i].flags = VRING_DESC_F_NEXT; 102 - vq->vring.desc[i].addr = (page_to_pfn(sg_page(sg))<<PAGE_SHIFT) 103 - + sg->offset; 104 vq->vring.desc[i].len = sg->length; 105 prev = i; 106 sg++; 107 } 108 for (; in; i = vq->vring.desc[i].next, in--) { 109 vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; 110 - vq->vring.desc[i].addr = (page_to_pfn(sg_page(sg))<<PAGE_SHIFT) 111 - + sg->offset; 112 vq->vring.desc[i].len = sg->length; 113 prev = i; 114 sg++; ··· 171 vq->num_free++; 172 } 173 174 - /* FIXME: We need to tell other side about removal, to synchronize. */ 175 - static void vring_shutdown(struct virtqueue *_vq) 176 - { 177 - struct vring_virtqueue *vq = to_vvq(_vq); 178 - unsigned int i; 179 - 180 - for (i = 0; i < vq->vring.num; i++) 181 - detach_buf(vq, i); 182 - } 183 - 184 static inline bool more_used(const struct vring_virtqueue *vq) 185 { 186 return vq->last_used_idx != vq->vring.used->idx; ··· 210 return ret; 211 } 212 213 - static bool vring_restart(struct virtqueue *_vq) 214 { 215 struct vring_virtqueue *vq = to_vvq(_vq); 216 ··· 253 if (unlikely(vq->broken)) 254 return IRQ_HANDLED; 255 256 pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); 257 - if (vq->vq.callback && !vq->vq.callback(&vq->vq)) 258 - vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; 259 260 return IRQ_HANDLED; 261 } 262 263 static struct virtqueue_ops vring_vq_ops = { 264 .add_buf = vring_add_buf, 265 .get_buf = vring_get_buf, 266 .kick = vring_kick, 267 - .restart = vring_restart, 268 - .shutdown = vring_shutdown, 269 }; 270 271 struct virtqueue *vring_new_virtqueue(unsigned int num, 272 struct virtio_device *vdev, 273 void *pages, 274 void (*notify)(struct virtqueue *), 275 - bool (*callback)(struct virtqueue *)) 276 { 277 struct vring_virtqueue *vq; 278 unsigned int i; ··· 319 320 return &vq->vq; 321 } 322 323 void vring_del_virtqueue(struct virtqueue *vq) 324 { 325 kfree(to_vvq(vq)); 326 } 327
··· 87 if (vq->num_free < out + in) { 88 pr_debug("Can't add buf len %i - avail = %i\n", 89 out + in, vq->num_free); 90 + /* We notify *even if* VRING_USED_F_NO_NOTIFY is set here. */ 91 + vq->notify(&vq->vq); 92 END_USE(vq); 93 return -ENOSPC; 94 } ··· 97 head = vq->free_head; 98 for (i = vq->free_head; out; i = vq->vring.desc[i].next, out--) { 99 vq->vring.desc[i].flags = VRING_DESC_F_NEXT; 100 + vq->vring.desc[i].addr = sg_phys(sg); 101 vq->vring.desc[i].len = sg->length; 102 prev = i; 103 sg++; 104 } 105 for (; in; i = vq->vring.desc[i].next, in--) { 106 vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE; 107 + vq->vring.desc[i].addr = sg_phys(sg); 108 vq->vring.desc[i].len = sg->length; 109 prev = i; 110 sg++; ··· 171 vq->num_free++; 172 } 173 174 static inline bool more_used(const struct vring_virtqueue *vq) 175 { 176 return vq->last_used_idx != vq->vring.used->idx; ··· 220 return ret; 221 } 222 223 + static void vring_disable_cb(struct virtqueue *_vq) 224 + { 225 + struct vring_virtqueue *vq = to_vvq(_vq); 226 + 227 + START_USE(vq); 228 + BUG_ON(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT); 229 + vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; 230 + END_USE(vq); 231 + } 232 + 233 + static bool vring_enable_cb(struct virtqueue *_vq) 234 { 235 struct vring_virtqueue *vq = to_vvq(_vq); 236 ··· 253 if (unlikely(vq->broken)) 254 return IRQ_HANDLED; 255 256 + /* Other side may have missed us turning off the interrupt, 257 + * but we should preserve disable semantic for virtio users. */ 258 + if (unlikely(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) { 259 + pr_debug("virtqueue interrupt after disable for %p\n", vq); 260 + return IRQ_HANDLED; 261 + } 262 + 263 pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback); 264 + if (vq->vq.callback) 265 + vq->vq.callback(&vq->vq); 266 267 return IRQ_HANDLED; 268 } 269 + EXPORT_SYMBOL_GPL(vring_interrupt); 270 271 static struct virtqueue_ops vring_vq_ops = { 272 .add_buf = vring_add_buf, 273 .get_buf = vring_get_buf, 274 .kick = vring_kick, 275 + .disable_cb = vring_disable_cb, 276 + .enable_cb = vring_enable_cb, 277 }; 278 279 struct virtqueue *vring_new_virtqueue(unsigned int num, 280 struct virtio_device *vdev, 281 void *pages, 282 void (*notify)(struct virtqueue *), 283 + void (*callback)(struct virtqueue *)) 284 { 285 struct vring_virtqueue *vq; 286 unsigned int i; ··· 311 312 return &vq->vq; 313 } 314 + EXPORT_SYMBOL_GPL(vring_new_virtqueue); 315 316 void vring_del_virtqueue(struct virtqueue *vq) 317 { 318 kfree(to_vvq(vq)); 319 } 320 + EXPORT_SYMBOL_GPL(vring_del_virtqueue); 321 322 + MODULE_LICENSE("GPL");
+7 -2
include/linux/lguest_launcher.h
··· 23 struct lguest_device_desc { 24 /* The device type: console, network, disk etc. Type 0 terminates. */ 25 __u8 type; 26 - /* The number of bytes of the config array. */ 27 __u8 config_len; 28 /* A status byte, written by the Guest. */ 29 __u8 status; ··· 36 }; 37 38 /*D:135 This is how we expect the device configuration field for a virtqueue 39 - * (type VIRTIO_CONFIG_F_VIRTQUEUE) to be laid out: */ 40 struct lguest_vqconfig { 41 /* The number of entries in the virtio_ring */ 42 __u16 num;
··· 23 struct lguest_device_desc { 24 /* The device type: console, network, disk etc. Type 0 terminates. */ 25 __u8 type; 26 + /* The number of virtqueues (first in config array) */ 27 + __u8 num_vq; 28 + /* The number of bytes of feature bits. Multiply by 2: one for host 29 + * features and one for guest acknowledgements. */ 30 + __u8 feature_len; 31 + /* The number of bytes of the config array after virtqueues. */ 32 __u8 config_len; 33 /* A status byte, written by the Guest. */ 34 __u8 status; ··· 31 }; 32 33 /*D:135 This is how we expect the device configuration field for a virtqueue 34 + * to be laid out in config space. */ 35 struct lguest_vqconfig { 36 /* The number of entries in the virtio_ring */ 37 __u16 num;
+1
include/linux/skbuff.h
··· 1810 skb->ip_summed = CHECKSUM_NONE; 1811 } 1812 1813 #endif /* __KERNEL__ */ 1814 #endif /* _LINUX_SKBUFF_H */
··· 1810 skb->ip_summed = CHECKSUM_NONE; 1811 } 1812 1813 + bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off); 1814 #endif /* __KERNEL__ */ 1815 #endif /* _LINUX_SKBUFF_H */
+9 -10
include/linux/virtio.h
··· 11 /** 12 * virtqueue - a queue to register buffers for sending or receiving. 13 * @callback: the function to call when buffers are consumed (can be NULL). 14 - * If this returns false, callbacks are suppressed until vq_ops->restart 15 - * is called. 16 * @vdev: the virtio device this queue was created for. 17 * @vq_ops: the operations for this virtqueue (see below). 18 * @priv: a pointer for the virtqueue implementation to use. 19 */ 20 struct virtqueue 21 { 22 - bool (*callback)(struct virtqueue *vq); 23 struct virtio_device *vdev; 24 struct virtqueue_ops *vq_ops; 25 void *priv; ··· 39 * vq: the struct virtqueue we're talking about. 40 * len: the length written into the buffer 41 * Returns NULL or the "data" token handed to add_buf. 42 - * @restart: restart callbacks after callback returned false. 43 * vq: the struct virtqueue we're talking about. 44 * This returns "false" (and doesn't re-enable) if there are pending 45 * buffers in the queue, to avoid a race. 46 - * @shutdown: "unadd" all buffers. 47 - * vq: the struct virtqueue we're talking about. 48 - * Remove everything from the queue. 49 * 50 * Locking rules are straightforward: the driver is responsible for 51 * locking. No two operations may be invoked simultaneously. ··· 62 63 void *(*get_buf)(struct virtqueue *vq, unsigned int *len); 64 65 - bool (*restart)(struct virtqueue *vq); 66 - 67 - void (*shutdown)(struct virtqueue *vq); 68 }; 69 70 /** ··· 93 * @probe: the function to call when a device is found. Returns a token for 94 * remove, or PTR_ERR(). 95 * @remove: the function when a device is removed. 96 */ 97 struct virtio_driver { 98 struct device_driver driver; 99 const struct virtio_device_id *id_table; 100 int (*probe)(struct virtio_device *dev); 101 void (*remove)(struct virtio_device *dev); 102 }; 103 104 int register_virtio_driver(struct virtio_driver *drv);
··· 11 /** 12 * virtqueue - a queue to register buffers for sending or receiving. 13 * @callback: the function to call when buffers are consumed (can be NULL). 14 * @vdev: the virtio device this queue was created for. 15 * @vq_ops: the operations for this virtqueue (see below). 16 * @priv: a pointer for the virtqueue implementation to use. 17 */ 18 struct virtqueue 19 { 20 + void (*callback)(struct virtqueue *vq); 21 struct virtio_device *vdev; 22 struct virtqueue_ops *vq_ops; 23 void *priv; ··· 41 * vq: the struct virtqueue we're talking about. 42 * len: the length written into the buffer 43 * Returns NULL or the "data" token handed to add_buf. 44 + * @disable_cb: disable callbacks 45 + * vq: the struct virtqueue we're talking about. 46 + * @enable_cb: restart callbacks after disable_cb. 47 * vq: the struct virtqueue we're talking about. 48 * This returns "false" (and doesn't re-enable) if there are pending 49 * buffers in the queue, to avoid a race. 50 * 51 * Locking rules are straightforward: the driver is responsible for 52 * locking. No two operations may be invoked simultaneously. ··· 65 66 void *(*get_buf)(struct virtqueue *vq, unsigned int *len); 67 68 + void (*disable_cb)(struct virtqueue *vq); 69 + bool (*enable_cb)(struct virtqueue *vq); 70 }; 71 72 /** ··· 97 * @probe: the function to call when a device is found. Returns a token for 98 * remove, or PTR_ERR(). 99 * @remove: the function when a device is removed. 100 + * @config_changed: optional function to call when the device configuration 101 + * changes; may be called in interrupt context. 102 */ 103 struct virtio_driver { 104 struct device_driver driver; 105 const struct virtio_device_id *id_table; 106 int (*probe)(struct virtio_device *dev); 107 void (*remove)(struct virtio_device *dev); 108 + void (*config_changed)(struct virtio_device *dev); 109 }; 110 111 int register_virtio_driver(struct virtio_driver *drv);
+18
include/linux/virtio_balloon.h
···
··· 1 + #ifndef _LINUX_VIRTIO_BALLOON_H 2 + #define _LINUX_VIRTIO_BALLOON_H 3 + #include <linux/virtio_config.h> 4 + 5 + /* The ID for virtio_balloon */ 6 + #define VIRTIO_ID_BALLOON 5 7 + 8 + /* The feature bitmap for virtio balloon */ 9 + #define VIRTIO_BALLOON_F_MUST_TELL_HOST 0 /* Tell before reclaiming pages */ 10 + 11 + struct virtio_balloon_config 12 + { 13 + /* Number of pages host wants Guest to give up. */ 14 + __le32 num_pages; 15 + /* Number of pages we've actually got in balloon. */ 16 + __le32 actual; 17 + }; 18 + #endif /* _LINUX_VIRTIO_BALLOON_H */
+12 -10
include/linux/virtio_blk.h
··· 6 #define VIRTIO_ID_BLOCK 2 7 8 /* Feature bits */ 9 - #define VIRTIO_CONFIG_BLK_F 0x40 10 - #define VIRTIO_BLK_F_BARRIER 1 /* Does host support barriers? */ 11 12 - /* The capacity (in 512-byte sectors). */ 13 - #define VIRTIO_CONFIG_BLK_F_CAPACITY 0x41 14 - /* The maximum segment size. */ 15 - #define VIRTIO_CONFIG_BLK_F_SIZE_MAX 0x42 16 - /* The maximum number of segments. */ 17 - #define VIRTIO_CONFIG_BLK_F_SEG_MAX 0x43 18 19 /* These two define direction. */ 20 #define VIRTIO_BLK_T_IN 0 ··· 39 __u32 ioprio; 40 /* Sector (ie. 512 byte offset) */ 41 __u64 sector; 42 - /* Where to put reply. */ 43 - __u64 id; 44 }; 45 46 #define VIRTIO_BLK_S_OK 0
··· 6 #define VIRTIO_ID_BLOCK 2 7 8 /* Feature bits */ 9 + #define VIRTIO_BLK_F_BARRIER 0 /* Does host support barriers? */ 10 + #define VIRTIO_BLK_F_SIZE_MAX 1 /* Indicates maximum segment size */ 11 + #define VIRTIO_BLK_F_SEG_MAX 2 /* Indicates maximum # of segments */ 12 13 + struct virtio_blk_config 14 + { 15 + /* The capacity (in 512-byte sectors). */ 16 + __le64 capacity; 17 + /* The maximum segment size (if VIRTIO_BLK_F_SIZE_MAX) */ 18 + __le32 size_max; 19 + /* The maximum number of segments (if VIRTIO_BLK_F_SEG_MAX) */ 20 + __le32 seg_max; 21 + } __attribute__((packed)); 22 23 /* These two define direction. */ 24 #define VIRTIO_BLK_T_IN 0 ··· 35 __u32 ioprio; 36 /* Sector (ie. 512 byte offset) */ 37 __u64 sector; 38 }; 39 40 #define VIRTIO_BLK_S_OK 0
+51 -53
include/linux/virtio_config.h
··· 5 * store and access that space differently. */ 6 #include <linux/types.h> 7 8 - /* Status byte for guest to report progress, and synchronize config. */ 9 /* We have seen device and processed generic fields (VIRTIO_CONFIG_F_VIRTIO) */ 10 #define VIRTIO_CONFIG_S_ACKNOWLEDGE 1 11 /* We have found a driver for the device. */ ··· 15 /* We've given up on this device. */ 16 #define VIRTIO_CONFIG_S_FAILED 0x80 17 18 - /* Feature byte (actually 7 bits availabe): */ 19 - /* Requirements/features of the virtio implementation. */ 20 - #define VIRTIO_CONFIG_F_VIRTIO 1 21 - /* Requirements/features of the virtqueue (may have more than one). */ 22 - #define VIRTIO_CONFIG_F_VIRTQUEUE 2 23 - 24 #ifdef __KERNEL__ 25 struct virtio_device; 26 27 /** 28 * virtio_config_ops - operations for configuring a virtio device 29 - * @find: search for the next configuration field of the given type. 30 * vdev: the virtio_device 31 - * type: the feature type 32 - * len: the (returned) length of the field if found. 33 - * Returns a token if found, or NULL. Never returnes the same field twice 34 - * (ie. it's used up). 35 - * @get: read the value of a configuration field after find(). 36 * vdev: the virtio_device 37 - * token: the token returned from find(). 38 * buf: the buffer to write the field value into. 39 - * len: the length of the buffer (given by find()). 40 * Note that contents are conventionally little-endian. 41 - * @set: write the value of a configuration field after find(). 42 * vdev: the virtio_device 43 - * token: the token returned from find(). 44 * buf: the buffer to read the field value from. 45 - * len: the length of the buffer (given by find()). 46 * Note that contents are conventionally little-endian. 47 * @get_status: read the status byte 48 * vdev: the virtio_device ··· 43 * @set_status: write the status byte 44 * vdev: the virtio_device 45 * status: the new status byte 46 - * @find_vq: find the first VIRTIO_CONFIG_F_VIRTQUEUE and create a virtqueue. 47 * vdev: the virtio_device 48 * callback: the virqtueue callback 49 - * Returns the new virtqueue or ERR_PTR(). 50 * @del_vq: free a virtqueue found by find_vq(). 51 */ 52 struct virtio_config_ops 53 { 54 - void *(*find)(struct virtio_device *vdev, u8 type, unsigned *len); 55 - void (*get)(struct virtio_device *vdev, void *token, 56 void *buf, unsigned len); 57 - void (*set)(struct virtio_device *vdev, void *token, 58 const void *buf, unsigned len); 59 u8 (*get_status)(struct virtio_device *vdev); 60 void (*set_status)(struct virtio_device *vdev, u8 status); 61 struct virtqueue *(*find_vq)(struct virtio_device *vdev, 62 - bool (*callback)(struct virtqueue *)); 63 void (*del_vq)(struct virtqueue *vq); 64 }; 65 66 /** 67 - * virtio_config_val - get a single virtio config and mark it used. 68 - * @config: the virtio config space 69 - * @type: the type to search for. 70 * @val: a pointer to the value to fill in. 71 * 72 - * Once used, the config type is marked with VIRTIO_CONFIG_F_USED so it can't 73 - * be found again. This version does endian conversion. */ 74 - #define virtio_config_val(vdev, type, v) ({ \ 75 - int _err = __virtio_config_val((vdev),(type),(v),sizeof(*(v))); \ 76 - \ 77 - BUILD_BUG_ON(sizeof(*(v)) != 1 && sizeof(*(v)) != 2 \ 78 - && sizeof(*(v)) != 4 && sizeof(*(v)) != 8); \ 79 - if (!_err) { \ 80 - switch (sizeof(*(v))) { \ 81 - case 2: le16_to_cpus((__u16 *) v); break; \ 82 - case 4: le32_to_cpus((__u32 *) v); break; \ 83 - case 8: le64_to_cpus((__u64 *) v); break; \ 84 - } \ 85 - } \ 86 _err; \ 87 }) 88 89 - int __virtio_config_val(struct virtio_device *dev, 90 - u8 type, void *val, size_t size); 91 - 92 /** 93 - * virtio_use_bit - helper to use a feature bit in a bitfield value. 94 - * @dev: the virtio device 95 - * @token: the token as returned from vdev->config->find(). 96 - * @len: the length of the field. 97 - * @bitnum: the bit to test. 98 * 99 - * If handed a NULL token, it returns false, otherwise returns bit status. 100 - * If it's one, it sets the mirroring acknowledgement bit. */ 101 - int virtio_use_bit(struct virtio_device *vdev, 102 - void *token, unsigned int len, unsigned int bitnum); 103 #endif /* __KERNEL__ */ 104 #endif /* _LINUX_VIRTIO_CONFIG_H */
··· 5 * store and access that space differently. */ 6 #include <linux/types.h> 7 8 + /* Status byte for guest to report progress, and synchronize features. */ 9 /* We have seen device and processed generic fields (VIRTIO_CONFIG_F_VIRTIO) */ 10 #define VIRTIO_CONFIG_S_ACKNOWLEDGE 1 11 /* We have found a driver for the device. */ ··· 15 /* We've given up on this device. */ 16 #define VIRTIO_CONFIG_S_FAILED 0x80 17 18 #ifdef __KERNEL__ 19 struct virtio_device; 20 21 /** 22 * virtio_config_ops - operations for configuring a virtio device 23 + * @feature: search for a feature in this config 24 * vdev: the virtio_device 25 + * bit: the feature bit 26 + * Returns true if the feature is supported. Acknowledges the feature 27 + * so the host can see it. 28 + * @get: read the value of a configuration field 29 * vdev: the virtio_device 30 + * offset: the offset of the configuration field 31 * buf: the buffer to write the field value into. 32 + * len: the length of the buffer 33 * Note that contents are conventionally little-endian. 34 + * @set: write the value of a configuration field 35 * vdev: the virtio_device 36 + * offset: the offset of the configuration field 37 * buf: the buffer to read the field value from. 38 + * len: the length of the buffer 39 * Note that contents are conventionally little-endian. 40 * @get_status: read the status byte 41 * vdev: the virtio_device ··· 50 * @set_status: write the status byte 51 * vdev: the virtio_device 52 * status: the new status byte 53 + * @reset: reset the device 54 + * vdev: the virtio device 55 + * After this, status and feature negotiation must be done again 56 + * @find_vq: find a virtqueue and instantiate it. 57 * vdev: the virtio_device 58 + * index: the 0-based virtqueue number in case there's more than one. 59 * callback: the virqtueue callback 60 + * Returns the new virtqueue or ERR_PTR() (eg. -ENOENT). 61 * @del_vq: free a virtqueue found by find_vq(). 62 */ 63 struct virtio_config_ops 64 { 65 + bool (*feature)(struct virtio_device *vdev, unsigned bit); 66 + void (*get)(struct virtio_device *vdev, unsigned offset, 67 void *buf, unsigned len); 68 + void (*set)(struct virtio_device *vdev, unsigned offset, 69 const void *buf, unsigned len); 70 u8 (*get_status)(struct virtio_device *vdev); 71 void (*set_status)(struct virtio_device *vdev, u8 status); 72 + void (*reset)(struct virtio_device *vdev); 73 struct virtqueue *(*find_vq)(struct virtio_device *vdev, 74 + unsigned index, 75 + void (*callback)(struct virtqueue *)); 76 void (*del_vq)(struct virtqueue *vq); 77 }; 78 79 /** 80 + * virtio_config_val - look for a feature and get a single virtio config. 81 + * @vdev: the virtio device 82 + * @fbit: the feature bit 83 + * @offset: the type to search for. 84 * @val: a pointer to the value to fill in. 85 * 86 + * The return value is -ENOENT if the feature doesn't exist. Otherwise 87 + * the value is endian-corrected and returned in v. */ 88 + #define virtio_config_val(vdev, fbit, offset, v) ({ \ 89 + int _err; \ 90 + if ((vdev)->config->feature((vdev), (fbit))) { \ 91 + __virtio_config_val((vdev), (offset), (v)); \ 92 + _err = 0; \ 93 + } else \ 94 + _err = -ENOENT; \ 95 _err; \ 96 }) 97 98 /** 99 + * __virtio_config_val - get a single virtio config without feature check. 100 + * @vdev: the virtio device 101 + * @offset: the type to search for. 102 + * @val: a pointer to the value to fill in. 103 * 104 + * The value is endian-corrected and returned in v. */ 105 + #define __virtio_config_val(vdev, offset, v) do { \ 106 + BUILD_BUG_ON(sizeof(*(v)) != 1 && sizeof(*(v)) != 2 \ 107 + && sizeof(*(v)) != 4 && sizeof(*(v)) != 8); \ 108 + (vdev)->config->get((vdev), (offset), (v), sizeof(*(v))); \ 109 + switch (sizeof(*(v))) { \ 110 + case 2: le16_to_cpus((__u16 *) v); break; \ 111 + case 4: le32_to_cpus((__u32 *) v); break; \ 112 + case 8: le64_to_cpus((__u64 *) v); break; \ 113 + } \ 114 + } while(0) 115 #endif /* __KERNEL__ */ 116 #endif /* _LINUX_VIRTIO_CONFIG_H */
+16 -16
include/linux/virtio_net.h
··· 5 /* The ID for virtio_net */ 6 #define VIRTIO_ID_NET 1 7 8 - /* The bitmap of config for virtio net */ 9 - #define VIRTIO_CONFIG_NET_F 0x40 10 - #define VIRTIO_NET_F_NO_CSUM 0 11 - #define VIRTIO_NET_F_TSO4 1 12 - #define VIRTIO_NET_F_UFO 2 13 - #define VIRTIO_NET_F_TSO4_ECN 3 14 - #define VIRTIO_NET_F_TSO6 4 15 16 - /* The config defining mac address. */ 17 - #define VIRTIO_CONFIG_NET_MAC_F 0x41 18 19 /* This is the first element of the scatter-gather list. If you don't 20 * specify GSO or CSUM features, you can simply ignore the header. */ 21 struct virtio_net_hdr 22 { 23 #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 // Use csum_start, csum_offset 24 - __u8 flags; 25 #define VIRTIO_NET_HDR_GSO_NONE 0 // Not a GSO frame 26 #define VIRTIO_NET_HDR_GSO_TCPV4 1 // GSO frame, IPv4 TCP (TSO) 27 - /* FIXME: Do we need this? If they said they can handle ECN, do they care? */ 28 - #define VIRTIO_NET_HDR_GSO_TCPV4_ECN 2 // GSO frame, IPv4 TCP w/ ECN 29 #define VIRTIO_NET_HDR_GSO_UDP 3 // GSO frame, IPv4 UDP (UFO) 30 #define VIRTIO_NET_HDR_GSO_TCPV6 4 // GSO frame, IPv6 TCP 31 - __u8 gso_type; 32 - __u16 gso_size; 33 - __u16 csum_start; 34 - __u16 csum_offset; 35 }; 36 #endif /* _LINUX_VIRTIO_NET_H */
··· 5 /* The ID for virtio_net */ 6 #define VIRTIO_ID_NET 1 7 8 + /* The feature bitmap for virtio net */ 9 + #define VIRTIO_NET_F_CSUM 0 /* Can handle pkts w/ partial csum */ 10 + #define VIRTIO_NET_F_MAC 5 /* Host has given MAC address. */ 11 + #define VIRTIO_NET_F_GSO 6 /* Can handle pkts w/ any GSO type */ 12 13 + struct virtio_net_config 14 + { 15 + /* The config defining mac address (if VIRTIO_NET_F_MAC) */ 16 + __u8 mac[6]; 17 + } __attribute__((packed)); 18 19 /* This is the first element of the scatter-gather list. If you don't 20 * specify GSO or CSUM features, you can simply ignore the header. */ 21 struct virtio_net_hdr 22 { 23 #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 // Use csum_start, csum_offset 24 + __u8 flags; 25 #define VIRTIO_NET_HDR_GSO_NONE 0 // Not a GSO frame 26 #define VIRTIO_NET_HDR_GSO_TCPV4 1 // GSO frame, IPv4 TCP (TSO) 27 #define VIRTIO_NET_HDR_GSO_UDP 3 // GSO frame, IPv4 UDP (UFO) 28 #define VIRTIO_NET_HDR_GSO_TCPV6 4 // GSO frame, IPv6 TCP 29 + #define VIRTIO_NET_HDR_GSO_ECN 0x80 // TCP has ECN set 30 + __u8 gso_type; 31 + __u16 hdr_len; /* Ethernet + IP + tcp/udp hdrs */ 32 + __u16 gso_size; /* Bytes to append to gso_hdr_len per frame */ 33 + __u16 csum_start; /* Position to start checksumming from */ 34 + __u16 csum_offset; /* Offset after that to place checksum */ 35 }; 36 #endif /* _LINUX_VIRTIO_NET_H */
+57
include/linux/virtio_pci.h
···
··· 1 + /* 2 + * Virtio PCI driver 3 + * 4 + * This module allows virtio devices to be used over a virtual PCI device. 5 + * This can be used with QEMU based VMMs like KVM or Xen. 6 + * 7 + * Copyright IBM Corp. 2007 8 + * 9 + * Authors: 10 + * Anthony Liguori <aliguori@us.ibm.com> 11 + * 12 + * This work is licensed under the terms of the GNU GPL, version 2 or later. 13 + * See the COPYING file in the top-level directory. 14 + * 15 + */ 16 + 17 + #ifndef _LINUX_VIRTIO_PCI_H 18 + #define _LINUX_VIRTIO_PCI_H 19 + 20 + #include <linux/virtio_config.h> 21 + 22 + /* A 32-bit r/o bitmask of the features supported by the host */ 23 + #define VIRTIO_PCI_HOST_FEATURES 0 24 + 25 + /* A 32-bit r/w bitmask of features activated by the guest */ 26 + #define VIRTIO_PCI_GUEST_FEATURES 4 27 + 28 + /* A 32-bit r/w PFN for the currently selected queue */ 29 + #define VIRTIO_PCI_QUEUE_PFN 8 30 + 31 + /* A 16-bit r/o queue size for the currently selected queue */ 32 + #define VIRTIO_PCI_QUEUE_NUM 12 33 + 34 + /* A 16-bit r/w queue selector */ 35 + #define VIRTIO_PCI_QUEUE_SEL 14 36 + 37 + /* A 16-bit r/w queue notifier */ 38 + #define VIRTIO_PCI_QUEUE_NOTIFY 16 39 + 40 + /* An 8-bit device status register. */ 41 + #define VIRTIO_PCI_STATUS 18 42 + 43 + /* An 8-bit r/o interrupt status register. Reading the value will return the 44 + * current contents of the ISR and will also clear it. This is effectively 45 + * a read-and-acknowledge. */ 46 + #define VIRTIO_PCI_ISR 19 47 + 48 + /* The bit of the ISR which indicates a device configuration change. */ 49 + #define VIRTIO_PCI_ISR_CONFIG 0x2 50 + 51 + /* The remaining space is defined by each driver as the per-driver 52 + * configuration space */ 53 + #define VIRTIO_PCI_CONFIG 20 54 + 55 + /* Virtio ABI version, this must match exactly */ 56 + #define VIRTIO_PCI_ABI_VERSION 0 57 + #endif
+9 -5
include/linux/virtio_ring.h
··· 15 /* This marks a buffer as write-only (otherwise read-only). */ 16 #define VRING_DESC_F_WRITE 2 17 18 - /* This means don't notify other side when buffer added. */ 19 #define VRING_USED_F_NO_NOTIFY 1 20 - /* This means don't interrupt guest when buffer consumed. */ 21 #define VRING_AVAIL_F_NO_INTERRUPT 1 22 23 /* Virtio ring descriptors: 16 bytes. These can chain together via "next". */ ··· 93 * }; 94 */ 95 static inline void vring_init(struct vring *vr, unsigned int num, void *p, 96 - unsigned int pagesize) 97 { 98 vr->num = num; 99 vr->desc = p; ··· 102 & ~(pagesize - 1)); 103 } 104 105 - static inline unsigned vring_size(unsigned int num, unsigned int pagesize) 106 { 107 return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (2 + num) 108 + pagesize - 1) & ~(pagesize - 1)) ··· 118 struct virtio_device *vdev, 119 void *pages, 120 void (*notify)(struct virtqueue *vq), 121 - bool (*callback)(struct virtqueue *vq)); 122 void vring_del_virtqueue(struct virtqueue *vq); 123 124 irqreturn_t vring_interrupt(int irq, void *_vq);
··· 15 /* This marks a buffer as write-only (otherwise read-only). */ 16 #define VRING_DESC_F_WRITE 2 17 18 + /* The Host uses this in used->flags to advise the Guest: don't kick me when 19 + * you add a buffer. It's unreliable, so it's simply an optimization. Guest 20 + * will still kick if it's out of buffers. */ 21 #define VRING_USED_F_NO_NOTIFY 1 22 + /* The Guest uses this in avail->flags to advise the Host: don't interrupt me 23 + * when you consume a buffer. It's unreliable, so it's simply an 24 + * optimization. */ 25 #define VRING_AVAIL_F_NO_INTERRUPT 1 26 27 /* Virtio ring descriptors: 16 bytes. These can chain together via "next". */ ··· 89 * }; 90 */ 91 static inline void vring_init(struct vring *vr, unsigned int num, void *p, 92 + unsigned long pagesize) 93 { 94 vr->num = num; 95 vr->desc = p; ··· 98 & ~(pagesize - 1)); 99 } 100 101 + static inline unsigned vring_size(unsigned int num, unsigned long pagesize) 102 { 103 return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (2 + num) 104 + pagesize - 1) & ~(pagesize - 1)) ··· 114 struct virtio_device *vdev, 115 void *pages, 116 void (*notify)(struct virtqueue *vq), 117 + void (*callback)(struct virtqueue *vq)); 118 void vring_del_virtqueue(struct virtqueue *vq); 119 120 irqreturn_t vring_interrupt(int irq, void *_vq);
+3 -5
net/9p/trans_virtio.c
··· 199 kfree(trans); 200 } 201 202 - static bool p9_virtio_intr(struct virtqueue *q) 203 { 204 struct virtio_chan *chan = q->vdev->priv; 205 206 P9_DPRINTK(P9_DEBUG_TRANS, "9p poll_wakeup: %p\n", &chan->wq); 207 wake_up_interruptible(&chan->wq); 208 - 209 - return true; 210 } 211 212 static int p9_virtio_probe(struct virtio_device *dev) ··· 234 235 /* Find the input queue. */ 236 dev->priv = chan; 237 - chan->in_vq = dev->config->find_vq(dev, p9_virtio_intr); 238 if (IS_ERR(chan->in_vq)) { 239 err = PTR_ERR(chan->in_vq); 240 goto free; 241 } 242 243 - chan->out_vq = dev->config->find_vq(dev, NULL); 244 if (IS_ERR(chan->out_vq)) { 245 err = PTR_ERR(chan->out_vq); 246 goto free_in_vq;
··· 199 kfree(trans); 200 } 201 202 + static void p9_virtio_intr(struct virtqueue *q) 203 { 204 struct virtio_chan *chan = q->vdev->priv; 205 206 P9_DPRINTK(P9_DEBUG_TRANS, "9p poll_wakeup: %p\n", &chan->wq); 207 wake_up_interruptible(&chan->wq); 208 } 209 210 static int p9_virtio_probe(struct virtio_device *dev) ··· 236 237 /* Find the input queue. */ 238 dev->priv = chan; 239 + chan->in_vq = dev->config->find_vq(dev, 0, p9_virtio_intr); 240 if (IS_ERR(chan->in_vq)) { 241 err = PTR_ERR(chan->in_vq); 242 goto free; 243 } 244 245 + chan->out_vq = dev->config->find_vq(dev, 1, NULL); 246 if (IS_ERR(chan->out_vq)) { 247 err = PTR_ERR(chan->out_vq); 248 goto free_in_vq;
+29
net/core/skbuff.c
··· 2461 return elt; 2462 } 2463 2464 EXPORT_SYMBOL(___pskb_trim); 2465 EXPORT_SYMBOL(__kfree_skb); 2466 EXPORT_SYMBOL(kfree_skb); ··· 2525 2526 EXPORT_SYMBOL_GPL(skb_to_sgvec); 2527 EXPORT_SYMBOL_GPL(skb_cow_data);
··· 2461 return elt; 2462 } 2463 2464 + /** 2465 + * skb_partial_csum_set - set up and verify partial csum values for packet 2466 + * @skb: the skb to set 2467 + * @start: the number of bytes after skb->data to start checksumming. 2468 + * @off: the offset from start to place the checksum. 2469 + * 2470 + * For untrusted partially-checksummed packets, we need to make sure the values 2471 + * for skb->csum_start and skb->csum_offset are valid so we don't oops. 2472 + * 2473 + * This function checks and sets those values and skb->ip_summed: if this 2474 + * returns false you should drop the packet. 2475 + */ 2476 + bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off) 2477 + { 2478 + if (unlikely(start > skb->len - 2) || 2479 + unlikely((int)start + off > skb->len - 2)) { 2480 + if (net_ratelimit()) 2481 + printk(KERN_WARNING 2482 + "bad partial csum: csum=%u/%u len=%u\n", 2483 + start, off, skb->len); 2484 + return false; 2485 + } 2486 + skb->ip_summed = CHECKSUM_PARTIAL; 2487 + skb->csum_start = skb_headroom(skb) + start; 2488 + skb->csum_offset = off; 2489 + return true; 2490 + } 2491 + 2492 EXPORT_SYMBOL(___pskb_trim); 2493 EXPORT_SYMBOL(__kfree_skb); 2494 EXPORT_SYMBOL(kfree_skb); ··· 2497 2498 EXPORT_SYMBOL_GPL(skb_to_sgvec); 2499 EXPORT_SYMBOL_GPL(skb_cow_data); 2500 + EXPORT_SYMBOL_GPL(skb_partial_csum_set);