Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

virtio: explicit advertisement of driver features

A recent proposed feature addition to the virtio block driver revealed
some flaws in the API: in particular, we assume that feature
negotiation is complete once a driver's probe function returns.

There is nothing in the API to require this, however, and even I
didn't notice when it was violated.

So instead, we require the driver to specify what features it supports
in a table, we can then move the feature negotiation into the virtio
core. The intersection of device and driver features are presented in
a new 'features' bitmap in the struct virtio_device.

Note that this highlights the difference between Linux unsigned-long
bitmaps where each unsigned long is in native endian, and a
straight-forward little-endian array of bytes.

Drivers can still remove feature bits in their probe routine if they
really have to.

API changes:
- dev->config->feature() no longer gets and acks a feature.
- drivers should advertise their features in the 'feature_table' field
- use virtio_has_feature() for extra sanity when checking feature bits

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>

+138 -49
+7 -1
drivers/block/virtio_blk.c
··· 242 242 index++; 243 243 244 244 /* If barriers are supported, tell block layer that queue is ordered */ 245 - if (vdev->config->feature(vdev, VIRTIO_BLK_F_BARRIER)) 245 + if (virtio_has_feature(vdev, VIRTIO_BLK_F_BARRIER)) 246 246 blk_queue_ordered(vblk->disk->queue, QUEUE_ORDERED_TAG, NULL); 247 247 248 248 /* Host must always specify the capacity. */ ··· 308 308 { 0 }, 309 309 }; 310 310 311 + static unsigned int features[] = { 312 + VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, 313 + }; 314 + 311 315 static struct virtio_driver virtio_blk = { 316 + .feature_table = features, 317 + .feature_table_size = ARRAY_SIZE(features), 312 318 .driver.name = KBUILD_MODNAME, 313 319 .driver.owner = THIS_MODULE, 314 320 .id_table = id_table,
+25 -17
drivers/lguest/lguest_device.c
··· 85 85 + desc->config_len; 86 86 } 87 87 88 - /* This tests (and acknowleges) a feature bit. */ 89 - static bool lg_feature(struct virtio_device *vdev, unsigned fbit) 88 + /* This gets the device's feature bits. */ 89 + static u32 lg_get_features(struct virtio_device *vdev) 90 90 { 91 + unsigned int i; 92 + u32 features = 0; 91 93 struct lguest_device_desc *desc = to_lgdev(vdev)->desc; 92 - u8 *features; 94 + u8 *in_features = lg_features(desc); 93 95 94 - /* Obviously if they ask for a feature off the end of our feature 95 - * bitmap, it's not set. */ 96 - if (fbit / 8 > desc->feature_len) 97 - return false; 96 + /* We do this the slow but generic way. */ 97 + for (i = 0; i < min(desc->feature_len * 8, 32); i++) 98 + if (in_features[i / 8] & (1 << (i % 8))) 99 + features |= (1 << i); 98 100 99 - /* The feature bitmap comes after the virtqueues. */ 100 - features = lg_features(desc); 101 - if (!(features[fbit / 8] & (1 << (fbit % 8)))) 102 - return false; 101 + return features; 102 + } 103 103 104 - /* We set the matching bit in the other half of the bitmap to tell the 105 - * Host we want to use this feature. We don't use this yet, but we 106 - * could in future. */ 107 - features[desc->feature_len + fbit / 8] |= (1 << (fbit % 8)); 108 - return true; 104 + static void lg_set_features(struct virtio_device *vdev, u32 features) 105 + { 106 + unsigned int i; 107 + struct lguest_device_desc *desc = to_lgdev(vdev)->desc; 108 + /* Second half of bitmap is features we accept. */ 109 + u8 *out_features = lg_features(desc) + desc->feature_len; 110 + 111 + memset(out_features, 0, desc->feature_len); 112 + for (i = 0; i < min(desc->feature_len * 8, 32); i++) { 113 + if (features & (1 << i)) 114 + out_features[i / 8] |= (1 << (i % 8)); 115 + } 109 116 } 110 117 111 118 /* Once they've found a field, getting a copy of it is easy. */ ··· 293 286 294 287 /* The ops structure which hooks everything together. */ 295 288 static struct virtio_config_ops lguest_config_ops = { 296 - .feature = lg_feature, 289 + .get_features = lg_get_features, 290 + .set_features = lg_set_features, 297 291 .get = lg_get, 298 292 .set = lg_set, 299 293 .get_status = lg_get_status,
+15 -7
drivers/net/virtio_net.c
··· 378 378 SET_NETDEV_DEV(dev, &vdev->dev); 379 379 380 380 /* Do we support "hardware" checksums? */ 381 - if (csum && vdev->config->feature(vdev, VIRTIO_NET_F_CSUM)) { 381 + if (csum && virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) { 382 382 /* This opens up the world of extra features. */ 383 383 dev->features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST; 384 - if (gso && vdev->config->feature(vdev, VIRTIO_NET_F_GSO)) { 384 + if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) { 385 385 dev->features |= NETIF_F_TSO | NETIF_F_UFO 386 386 | NETIF_F_TSO_ECN | NETIF_F_TSO6; 387 387 } 388 388 /* Individual feature bits: what can host handle? */ 389 - if (gso && vdev->config->feature(vdev, VIRTIO_NET_F_HOST_TSO4)) 389 + if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4)) 390 390 dev->features |= NETIF_F_TSO; 391 - if (gso && vdev->config->feature(vdev, VIRTIO_NET_F_HOST_TSO6)) 391 + if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6)) 392 392 dev->features |= NETIF_F_TSO6; 393 - if (gso && vdev->config->feature(vdev, VIRTIO_NET_F_HOST_ECN)) 393 + if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN)) 394 394 dev->features |= NETIF_F_TSO_ECN; 395 - if (gso && vdev->config->feature(vdev, VIRTIO_NET_F_HOST_UFO)) 395 + if (gso && virtio_has_feature(vdev, VIRTIO_NET_F_HOST_UFO)) 396 396 dev->features |= NETIF_F_UFO; 397 397 } 398 398 399 399 /* Configuration may specify what MAC to use. Otherwise random. */ 400 - if (vdev->config->feature(vdev, VIRTIO_NET_F_MAC)) { 400 + if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) { 401 401 vdev->config->get(vdev, 402 402 offsetof(struct virtio_net_config, mac), 403 403 dev->dev_addr, dev->addr_len); ··· 486 486 { 0 }, 487 487 }; 488 488 489 + static unsigned int features[] = { 490 + VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GSO, VIRTIO_NET_F_MAC, 491 + VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, 492 + VIRTIO_NET_F_HOST_ECN, 493 + }; 494 + 489 495 static struct virtio_driver virtio_net = { 496 + .feature_table = features, 497 + .feature_table_size = ARRAY_SIZE(features), 490 498 .driver.name = KBUILD_MODNAME, 491 499 .driver.owner = THIS_MODULE, 492 500 .id_table = id_table,
+36 -2
drivers/virtio/virtio.c
··· 80 80 dev->config->set_status(dev, dev->config->get_status(dev) | status); 81 81 } 82 82 83 + void virtio_check_driver_offered_feature(const struct virtio_device *vdev, 84 + unsigned int fbit) 85 + { 86 + unsigned int i; 87 + struct virtio_driver *drv = container_of(vdev->dev.driver, 88 + struct virtio_driver, driver); 89 + 90 + for (i = 0; i < drv->feature_table_size; i++) 91 + if (drv->feature_table[i] == fbit) 92 + return; 93 + BUG(); 94 + } 95 + EXPORT_SYMBOL_GPL(virtio_check_driver_offered_feature); 96 + 83 97 static int virtio_dev_probe(struct device *_d) 84 98 { 85 - int err; 99 + int err, i; 86 100 struct virtio_device *dev = container_of(_d,struct virtio_device,dev); 87 101 struct virtio_driver *drv = container_of(dev->dev.driver, 88 102 struct virtio_driver, driver); 103 + u32 device_features; 89 104 105 + /* We have a driver! */ 90 106 add_status(dev, VIRTIO_CONFIG_S_DRIVER); 107 + 108 + /* Figure out what features the device supports. */ 109 + device_features = dev->config->get_features(dev); 110 + 111 + /* Features supported by both device and driver into dev->features. */ 112 + memset(dev->features, 0, sizeof(dev->features)); 113 + for (i = 0; i < drv->feature_table_size; i++) { 114 + unsigned int f = drv->feature_table[i]; 115 + BUG_ON(f >= 32); 116 + if (device_features & (1 << f)) 117 + set_bit(f, dev->features); 118 + } 119 + 91 120 err = drv->probe(dev); 92 121 if (err) 93 122 add_status(dev, VIRTIO_CONFIG_S_FAILED); 94 - else 123 + else { 95 124 add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); 125 + /* They should never have set feature bits beyond 32 */ 126 + dev->config->set_features(dev, dev->features[0]); 127 + } 96 128 return err; 97 129 } 98 130 ··· 146 114 147 115 int register_virtio_driver(struct virtio_driver *driver) 148 116 { 117 + /* Catch this early. */ 118 + BUG_ON(driver->feature_table_size && !driver->feature_table); 149 119 driver->driver.bus = &virtio_bus; 150 120 driver->driver.probe = virtio_dev_probe; 151 121 driver->driver.remove = virtio_dev_remove;
+5 -1
drivers/virtio/virtio_balloon.c
··· 227 227 } 228 228 229 229 vb->tell_host_first 230 - = vdev->config->feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST); 230 + = virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST); 231 231 232 232 return 0; 233 233 ··· 259 259 kfree(vb); 260 260 } 261 261 262 + static unsigned int features[] = { VIRTIO_BALLOON_F_MUST_TELL_HOST }; 263 + 262 264 static struct virtio_driver virtio_balloon = { 265 + .feature_table = features, 266 + .feature_table_size = ARRAY_SIZE(features), 263 267 .driver.name = KBUILD_MODNAME, 264 268 .driver.owner = THIS_MODULE, 265 269 .id_table = id_table,
+14 -14
drivers/virtio/virtio_pci.c
··· 87 87 return container_of(vdev, struct virtio_pci_device, vdev); 88 88 } 89 89 90 - /* virtio config->feature() implementation */ 91 - static bool vp_feature(struct virtio_device *vdev, unsigned bit) 90 + /* virtio config->get_features() implementation */ 91 + static u32 vp_get_features(struct virtio_device *vdev) 92 92 { 93 93 struct virtio_pci_device *vp_dev = to_vp_device(vdev); 94 - u32 mask; 95 94 96 - /* Since this function is supposed to have the side effect of 97 - * enabling a queried feature, we simulate that by doing a read 98 - * from the host feature bitmask and then writing to the guest 99 - * feature bitmask */ 100 - mask = ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES); 101 - if (mask & (1 << bit)) { 102 - mask |= (1 << bit); 103 - iowrite32(mask, vp_dev->ioaddr + VIRTIO_PCI_GUEST_FEATURES); 104 - } 95 + /* When someone needs more than 32 feature bits, we'll need to 96 + * steal a bit to indicate that the rest are somewhere else. */ 97 + return ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES); 98 + } 105 99 106 - return !!(mask & (1 << bit)); 100 + /* virtio config->set_features() implementation */ 101 + static void vp_set_features(struct virtio_device *vdev, u32 features) 102 + { 103 + struct virtio_pci_device *vp_dev = to_vp_device(vdev); 104 + 105 + iowrite32(features, vp_dev->ioaddr + VIRTIO_PCI_GUEST_FEATURES); 107 106 } 108 107 109 108 /* virtio config->get() implementation */ ··· 292 293 } 293 294 294 295 static struct virtio_config_ops virtio_pci_config_ops = { 295 - .feature = vp_feature, 296 296 .get = vp_get, 297 297 .set = vp_set, 298 298 .get_status = vp_get_status, ··· 299 301 .reset = vp_reset, 300 302 .find_vq = vp_find_vq, 301 303 .del_vq = vp_del_vq, 304 + .get_features = vp_get_features, 305 + .set_features = vp_set_features, 302 306 }; 303 307 304 308 /* the PCI probing function */
+7
include/linux/virtio.h
··· 76 76 * @dev: underlying device. 77 77 * @id: the device type identification (used to match it with a driver). 78 78 * @config: the configuration ops for this device. 79 + * @features: the features supported by both driver and device. 79 80 * @priv: private pointer for the driver's use. 80 81 */ 81 82 struct virtio_device ··· 85 84 struct device dev; 86 85 struct virtio_device_id id; 87 86 struct virtio_config_ops *config; 87 + /* Note that this is a Linux set_bit-style bitmap. */ 88 + unsigned long features[1]; 88 89 void *priv; 89 90 }; 90 91 ··· 97 94 * virtio_driver - operations for a virtio I/O driver 98 95 * @driver: underlying device driver (populate name and owner). 99 96 * @id_table: the ids serviced by this driver. 97 + * @feature_table: an array of feature numbers supported by this device. 98 + * @feature_table_size: number of entries in the feature table array. 100 99 * @probe: the function to call when a device is found. Returns a token for 101 100 * remove, or PTR_ERR(). 102 101 * @remove: the function when a device is removed. ··· 108 103 struct virtio_driver { 109 104 struct device_driver driver; 110 105 const struct virtio_device_id *id_table; 106 + const unsigned int *feature_table; 107 + unsigned int feature_table_size; 111 108 int (*probe)(struct virtio_device *dev); 112 109 void (*remove)(struct virtio_device *dev); 113 110 void (*config_changed)(struct virtio_device *dev);
+29 -7
include/linux/virtio_config.h
··· 20 20 21 21 /** 22 22 * virtio_config_ops - operations for configuring a virtio device 23 - * @feature: search for a feature in this config 24 - * vdev: the virtio_device 25 - * bit: the feature bit 26 - * Returns true if the feature is supported. Acknowledges the feature 27 - * so the host can see it. 28 23 * @get: read the value of a configuration field 29 24 * vdev: the virtio_device 30 25 * offset: the offset of the configuration field ··· 45 50 * callback: the virqtueue callback 46 51 * Returns the new virtqueue or ERR_PTR() (eg. -ENOENT). 47 52 * @del_vq: free a virtqueue found by find_vq(). 53 + * @get_features: get the array of feature bits for this device. 54 + * vdev: the virtio_device 55 + * Returns the first 32 feature bits (all we currently need). 56 + * @set_features: confirm what device features we'll be using. 57 + * vdev: the virtio_device 58 + * feature: the first 32 feature bits 48 59 */ 49 60 struct virtio_config_ops 50 61 { 51 - bool (*feature)(struct virtio_device *vdev, unsigned bit); 52 62 void (*get)(struct virtio_device *vdev, unsigned offset, 53 63 void *buf, unsigned len); 54 64 void (*set)(struct virtio_device *vdev, unsigned offset, ··· 65 65 unsigned index, 66 66 void (*callback)(struct virtqueue *)); 67 67 void (*del_vq)(struct virtqueue *vq); 68 + u32 (*get_features)(struct virtio_device *vdev); 69 + void (*set_features)(struct virtio_device *vdev, u32 features); 68 70 }; 71 + 72 + /* If driver didn't advertise the feature, it will never appear. */ 73 + void virtio_check_driver_offered_feature(const struct virtio_device *vdev, 74 + unsigned int fbit); 75 + 76 + /** 77 + * virtio_has_feature - helper to determine if this device has this feature. 78 + * @vdev: the device 79 + * @fbit: the feature bit 80 + */ 81 + static inline bool virtio_has_feature(const struct virtio_device *vdev, 82 + unsigned int fbit) 83 + { 84 + /* Did you forget to fix assumptions on max features? */ 85 + if (__builtin_constant_p(fbit)) 86 + BUILD_BUG_ON(fbit >= 32); 87 + 88 + virtio_check_driver_offered_feature(vdev, fbit); 89 + return test_bit(fbit, vdev->features); 90 + } 69 91 70 92 /** 71 93 * virtio_config_val - look for a feature and get a virtio config entry. ··· 106 84 unsigned int offset, 107 85 void *buf, unsigned len) 108 86 { 109 - if (!vdev->config->feature(vdev, fbit)) 87 + if (!virtio_has_feature(vdev, fbit)) 110 88 return -ENOENT; 111 89 112 90 vdev->config->get(vdev, offset, buf, len);