Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull virtio updates from Michael Tsirkin:
"vhost and virtio fixes and features:

- Hardening work by Jason

- vdpa driver for Alibaba ENI

- Performance tweaks for virtio blk

- virtio rng rework using an internal buffer

- mac/mtu programming for mlx5 vdpa

- Misc fixes, cleanups"

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (45 commits)
vdpa/mlx5: Forward only packets with allowed MAC address
vdpa/mlx5: Support configuration of MAC
vdpa/mlx5: Fix clearing of VIRTIO_NET_F_MAC feature bit
vdpa_sim_net: Enable user to set mac address and mtu
vdpa: Enable user to set mac and mtu of vdpa device
vdpa: Use kernel coding style for structure comments
vdpa: Introduce query of device config layout
vdpa: Introduce and use vdpa device get, set config helpers
virtio-scsi: don't let virtio core to validate used buffer length
virtio-blk: don't let virtio core to validate used length
virtio-net: don't let virtio core to validate used length
virtio_ring: validate used buffer length
virtio_blk: correct types for status handling
virtio_blk: allow 0 as num_request_queues
i2c: virtio: Add support for zero-length requests
virtio-blk: fixup coccinelle warnings
virtio_ring: fix typos in vring_desc_extra
virtio-pci: harden INTX interrupts
virtio_pci: harden MSI-X interrupts
virtio_config: introduce a new .enable_cbs method
...

+1779 -295
+7
MAINTAINERS
··· 20083 20083 F: drivers/i2c/busses/i2c-virtio.c 20084 20084 F: include/uapi/linux/virtio_i2c.h 20085 20085 20086 + VIRTIO PMEM DRIVER 20087 + M: Pankaj Gupta <pankaj.gupta.linux@gmail.com> 20088 + L: virtualization@lists.linux-foundation.org 20089 + S: Maintained 20090 + F: drivers/nvdimm/virtio_pmem.c 20091 + F: drivers/nvdimm/nd_virtio.c 20092 + 20086 20093 VIRTUAL BOX GUEST DEVICE DRIVER 20087 20094 M: Hans de Goede <hdegoede@redhat.com> 20088 20095 M: Arnd Bergmann <arnd@arndb.de>
+1
drivers/block/Kconfig
··· 371 371 config VIRTIO_BLK 372 372 tristate "Virtio block driver" 373 373 depends on VIRTIO 374 + select SG_POOL 374 375 help 375 376 This is the virtual block driver for virtio. It can be used with 376 377 QEMU based VMMs (like KVM or Xen). Say Y or M.
+119 -59
drivers/block/virtio_blk.c
··· 24 24 /* The maximum number of sg elements that fit into a virtqueue */ 25 25 #define VIRTIO_BLK_MAX_SG_ELEMS 32768 26 26 27 + #ifdef CONFIG_ARCH_NO_SG_CHAIN 28 + #define VIRTIO_BLK_INLINE_SG_CNT 0 29 + #else 30 + #define VIRTIO_BLK_INLINE_SG_CNT 2 31 + #endif 32 + 33 + static unsigned int num_request_queues; 34 + module_param(num_request_queues, uint, 0644); 35 + MODULE_PARM_DESC(num_request_queues, 36 + "Limit the number of request queues to use for blk device. " 37 + "0 for no limit. " 38 + "Values > nr_cpu_ids truncated to nr_cpu_ids."); 39 + 27 40 static int major; 28 41 static DEFINE_IDA(vd_index_ida); 29 42 ··· 90 77 struct virtblk_req { 91 78 struct virtio_blk_outhdr out_hdr; 92 79 u8 status; 80 + struct sg_table sg_table; 93 81 struct scatterlist sg[]; 94 82 }; 95 83 ··· 176 162 return 0; 177 163 } 178 164 165 + static void virtblk_unmap_data(struct request *req, struct virtblk_req *vbr) 166 + { 167 + if (blk_rq_nr_phys_segments(req)) 168 + sg_free_table_chained(&vbr->sg_table, 169 + VIRTIO_BLK_INLINE_SG_CNT); 170 + } 171 + 172 + static int virtblk_map_data(struct blk_mq_hw_ctx *hctx, struct request *req, 173 + struct virtblk_req *vbr) 174 + { 175 + int err; 176 + 177 + if (!blk_rq_nr_phys_segments(req)) 178 + return 0; 179 + 180 + vbr->sg_table.sgl = vbr->sg; 181 + err = sg_alloc_table_chained(&vbr->sg_table, 182 + blk_rq_nr_phys_segments(req), 183 + vbr->sg_table.sgl, 184 + VIRTIO_BLK_INLINE_SG_CNT); 185 + if (unlikely(err)) 186 + return -ENOMEM; 187 + 188 + return blk_rq_map_sg(hctx->queue, req, vbr->sg_table.sgl); 189 + } 190 + 191 + static void virtblk_cleanup_cmd(struct request *req) 192 + { 193 + if (req->rq_flags & RQF_SPECIAL_PAYLOAD) 194 + kfree(bvec_virt(&req->special_vec)); 195 + } 196 + 197 + static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev, 198 + struct request *req, 199 + struct virtblk_req *vbr) 200 + { 201 + bool unmap = false; 202 + u32 type; 203 + 204 + vbr->out_hdr.sector = 0; 205 + 206 + switch (req_op(req)) { 207 + case REQ_OP_READ: 208 + type = VIRTIO_BLK_T_IN; 209 + vbr->out_hdr.sector = cpu_to_virtio64(vdev, 210 + blk_rq_pos(req)); 211 + break; 212 + case REQ_OP_WRITE: 213 + type = VIRTIO_BLK_T_OUT; 214 + vbr->out_hdr.sector = cpu_to_virtio64(vdev, 215 + blk_rq_pos(req)); 216 + break; 217 + case REQ_OP_FLUSH: 218 + type = VIRTIO_BLK_T_FLUSH; 219 + break; 220 + case REQ_OP_DISCARD: 221 + type = VIRTIO_BLK_T_DISCARD; 222 + break; 223 + case REQ_OP_WRITE_ZEROES: 224 + type = VIRTIO_BLK_T_WRITE_ZEROES; 225 + unmap = !(req->cmd_flags & REQ_NOUNMAP); 226 + break; 227 + case REQ_OP_DRV_IN: 228 + type = VIRTIO_BLK_T_GET_ID; 229 + break; 230 + default: 231 + WARN_ON_ONCE(1); 232 + return BLK_STS_IOERR; 233 + } 234 + 235 + vbr->out_hdr.type = cpu_to_virtio32(vdev, type); 236 + vbr->out_hdr.ioprio = cpu_to_virtio32(vdev, req_get_ioprio(req)); 237 + 238 + if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) { 239 + if (virtblk_setup_discard_write_zeroes(req, unmap)) 240 + return BLK_STS_RESOURCE; 241 + } 242 + 243 + return 0; 244 + } 245 + 179 246 static inline void virtblk_request_done(struct request *req) 180 247 { 181 248 struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); 182 249 183 - if (req->rq_flags & RQF_SPECIAL_PAYLOAD) 184 - kfree(bvec_virt(&req->special_vec)); 250 + virtblk_unmap_data(req, vbr); 251 + virtblk_cleanup_cmd(req); 185 252 blk_mq_end_request(req, virtblk_result(vbr)); 186 253 } 187 254 ··· 318 223 unsigned long flags; 319 224 unsigned int num; 320 225 int qid = hctx->queue_num; 321 - int err; 322 226 bool notify = false; 323 - bool unmap = false; 324 - u32 type; 227 + blk_status_t status; 228 + int err; 325 229 326 230 BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems); 327 231 328 - switch (req_op(req)) { 329 - case REQ_OP_READ: 330 - case REQ_OP_WRITE: 331 - type = 0; 332 - break; 333 - case REQ_OP_FLUSH: 334 - type = VIRTIO_BLK_T_FLUSH; 335 - break; 336 - case REQ_OP_DISCARD: 337 - type = VIRTIO_BLK_T_DISCARD; 338 - break; 339 - case REQ_OP_WRITE_ZEROES: 340 - type = VIRTIO_BLK_T_WRITE_ZEROES; 341 - unmap = !(req->cmd_flags & REQ_NOUNMAP); 342 - break; 343 - case REQ_OP_DRV_IN: 344 - type = VIRTIO_BLK_T_GET_ID; 345 - break; 346 - default: 347 - WARN_ON_ONCE(1); 348 - return BLK_STS_IOERR; 349 - } 350 - 351 - vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, type); 352 - vbr->out_hdr.sector = type ? 353 - 0 : cpu_to_virtio64(vblk->vdev, blk_rq_pos(req)); 354 - vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(req)); 232 + status = virtblk_setup_cmd(vblk->vdev, req, vbr); 233 + if (unlikely(status)) 234 + return status; 355 235 356 236 blk_mq_start_request(req); 357 237 358 - if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) { 359 - err = virtblk_setup_discard_write_zeroes(req, unmap); 360 - if (err) 361 - return BLK_STS_RESOURCE; 362 - } 363 - 364 - num = blk_rq_map_sg(hctx->queue, req, vbr->sg); 365 - if (num) { 366 - if (rq_data_dir(req) == WRITE) 367 - vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_OUT); 368 - else 369 - vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_IN); 238 + num = virtblk_map_data(hctx, req, vbr); 239 + if (unlikely(num < 0)) { 240 + virtblk_cleanup_cmd(req); 241 + return BLK_STS_RESOURCE; 370 242 } 371 243 372 244 spin_lock_irqsave(&vblk->vqs[qid].lock, flags); 373 - err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num); 245 + err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg_table.sgl, num); 374 246 if (err) { 375 247 virtqueue_kick(vblk->vqs[qid].vq); 376 248 /* Don't stop the queue if -ENOMEM: we may have failed to ··· 346 284 if (err == -ENOSPC) 347 285 blk_mq_stop_hw_queue(hctx); 348 286 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); 287 + virtblk_unmap_data(req, vbr); 288 + virtblk_cleanup_cmd(req); 349 289 switch (err) { 350 290 case -ENOSPC: 351 291 return BLK_STS_DEV_RESOURCE; ··· 561 497 &num_vqs); 562 498 if (err) 563 499 num_vqs = 1; 500 + if (!err && !num_vqs) { 501 + dev_err(&vdev->dev, "MQ advertised but zero queues reported\n"); 502 + return -EINVAL; 503 + } 564 504 565 - num_vqs = min_t(unsigned int, nr_cpu_ids, num_vqs); 505 + num_vqs = min_t(unsigned int, 506 + min_not_zero(num_request_queues, nr_cpu_ids), 507 + num_vqs); 566 508 567 509 vblk->vqs = kmalloc_array(num_vqs, sizeof(*vblk->vqs), GFP_KERNEL); 568 510 if (!vblk->vqs) ··· 694 624 u8 writeback = virtblk_get_cache_mode(vblk->vdev); 695 625 696 626 BUG_ON(writeback >= ARRAY_SIZE(virtblk_cache_types)); 697 - return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]); 627 + return sysfs_emit(buf, "%s\n", virtblk_cache_types[writeback]); 698 628 } 699 629 700 630 static DEVICE_ATTR_RW(cache_type); ··· 730 660 NULL, 731 661 }; 732 662 733 - static int virtblk_init_request(struct blk_mq_tag_set *set, struct request *rq, 734 - unsigned int hctx_idx, unsigned int numa_node) 735 - { 736 - struct virtio_blk *vblk = set->driver_data; 737 - struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq); 738 - 739 - sg_init_table(vbr->sg, vblk->sg_elems); 740 - return 0; 741 - } 742 - 743 663 static int virtblk_map_queues(struct blk_mq_tag_set *set) 744 664 { 745 665 struct virtio_blk *vblk = set->driver_data; ··· 742 682 .queue_rq = virtio_queue_rq, 743 683 .commit_rqs = virtio_commit_rqs, 744 684 .complete = virtblk_request_done, 745 - .init_request = virtblk_init_request, 746 685 .map_queues = virtblk_map_queues, 747 686 }; 748 687 ··· 821 762 vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; 822 763 vblk->tag_set.cmd_size = 823 764 sizeof(struct virtblk_req) + 824 - sizeof(struct scatterlist) * sg_elems; 765 + sizeof(struct scatterlist) * VIRTIO_BLK_INLINE_SG_CNT; 825 766 vblk->tag_set.driver_data = vblk; 826 767 vblk->tag_set.nr_hw_queues = vblk->num_vqs; 827 768 ··· 1049 990 .feature_table_size = ARRAY_SIZE(features), 1050 991 .feature_table_legacy = features_legacy, 1051 992 .feature_table_size_legacy = ARRAY_SIZE(features_legacy), 993 + .suppress_used_validation = true, 1052 994 .driver.name = KBUILD_MODNAME, 1053 995 .driver.owner = THIS_MODULE, 1054 996 .id_table = id_table,
+64 -20
drivers/char/hw_random/virtio-rng.c
··· 18 18 struct virtrng_info { 19 19 struct hwrng hwrng; 20 20 struct virtqueue *vq; 21 - struct completion have_data; 22 21 char name[25]; 23 - unsigned int data_avail; 24 22 int index; 25 - bool busy; 26 23 bool hwrng_register_done; 27 24 bool hwrng_removed; 25 + /* data transfer */ 26 + struct completion have_data; 27 + unsigned int data_avail; 28 + unsigned int data_idx; 29 + /* minimal size returned by rng_buffer_size() */ 30 + #if SMP_CACHE_BYTES < 32 31 + u8 data[32]; 32 + #else 33 + u8 data[SMP_CACHE_BYTES]; 34 + #endif 28 35 }; 29 36 30 37 static void random_recv_done(struct virtqueue *vq) ··· 42 35 if (!virtqueue_get_buf(vi->vq, &vi->data_avail)) 43 36 return; 44 37 38 + vi->data_idx = 0; 39 + 45 40 complete(&vi->have_data); 46 41 } 47 42 48 - /* The host will fill any buffer we give it with sweet, sweet randomness. */ 49 - static void register_buffer(struct virtrng_info *vi, u8 *buf, size_t size) 43 + static void request_entropy(struct virtrng_info *vi) 50 44 { 51 45 struct scatterlist sg; 52 46 53 - sg_init_one(&sg, buf, size); 47 + reinit_completion(&vi->have_data); 48 + vi->data_avail = 0; 49 + vi->data_idx = 0; 50 + 51 + sg_init_one(&sg, vi->data, sizeof(vi->data)); 54 52 55 53 /* There should always be room for one buffer. */ 56 - virtqueue_add_inbuf(vi->vq, &sg, 1, buf, GFP_KERNEL); 54 + virtqueue_add_inbuf(vi->vq, &sg, 1, vi->data, GFP_KERNEL); 57 55 58 56 virtqueue_kick(vi->vq); 57 + } 58 + 59 + static unsigned int copy_data(struct virtrng_info *vi, void *buf, 60 + unsigned int size) 61 + { 62 + size = min_t(unsigned int, size, vi->data_avail); 63 + memcpy(buf, vi->data + vi->data_idx, size); 64 + vi->data_idx += size; 65 + vi->data_avail -= size; 66 + if (vi->data_avail == 0) 67 + request_entropy(vi); 68 + return size; 59 69 } 60 70 61 71 static int virtio_read(struct hwrng *rng, void *buf, size_t size, bool wait) 62 72 { 63 73 int ret; 64 74 struct virtrng_info *vi = (struct virtrng_info *)rng->priv; 75 + unsigned int chunk; 76 + size_t read; 65 77 66 78 if (vi->hwrng_removed) 67 79 return -ENODEV; 68 80 69 - if (!vi->busy) { 70 - vi->busy = true; 71 - reinit_completion(&vi->have_data); 72 - register_buffer(vi, buf, size); 81 + read = 0; 82 + 83 + /* copy available data */ 84 + if (vi->data_avail) { 85 + chunk = copy_data(vi, buf, size); 86 + size -= chunk; 87 + read += chunk; 73 88 } 74 89 75 90 if (!wait) 76 - return 0; 91 + return read; 77 92 78 - ret = wait_for_completion_killable(&vi->have_data); 79 - if (ret < 0) 80 - return ret; 93 + /* We have already copied available entropy, 94 + * so either size is 0 or data_avail is 0 95 + */ 96 + while (size != 0) { 97 + /* data_avail is 0 but a request is pending */ 98 + ret = wait_for_completion_killable(&vi->have_data); 99 + if (ret < 0) 100 + return ret; 101 + /* if vi->data_avail is 0, we have been interrupted 102 + * by a cleanup, but buffer stays in the queue 103 + */ 104 + if (vi->data_avail == 0) 105 + return read; 81 106 82 - vi->busy = false; 107 + chunk = copy_data(vi, buf + read, size); 108 + size -= chunk; 109 + read += chunk; 110 + } 83 111 84 - return vi->data_avail; 112 + return read; 85 113 } 86 114 87 115 static void virtio_cleanup(struct hwrng *rng) 88 116 { 89 117 struct virtrng_info *vi = (struct virtrng_info *)rng->priv; 90 118 91 - if (vi->busy) 92 - wait_for_completion(&vi->have_data); 119 + complete(&vi->have_data); 93 120 } 94 121 95 122 static int probe_common(struct virtio_device *vdev) ··· 159 118 goto err_find; 160 119 } 161 120 121 + /* we always have a pending entropy request */ 122 + request_entropy(vi); 123 + 162 124 return 0; 163 125 164 126 err_find: ··· 177 133 178 134 vi->hwrng_removed = true; 179 135 vi->data_avail = 0; 136 + vi->data_idx = 0; 180 137 complete(&vi->have_data); 181 138 vdev->config->reset(vdev); 182 - vi->busy = false; 183 139 if (vi->hwrng_register_done) 184 140 hwrng_unregister(&vi->hwrng); 185 141 vdev->config->del_vqs(vdev);
+9
drivers/char/virtio_console.c
··· 28 28 #include "../tty/hvc/hvc_console.h" 29 29 30 30 #define is_rproc_enabled IS_ENABLED(CONFIG_REMOTEPROC) 31 + #define VIRTCONS_MAX_PORTS 0x8000 31 32 32 33 /* 33 34 * This is a global struct for storing common data for all the devices ··· 2037 2036 virtio_cread_feature(vdev, VIRTIO_CONSOLE_F_MULTIPORT, 2038 2037 struct virtio_console_config, max_nr_ports, 2039 2038 &portdev->max_nr_ports) == 0) { 2039 + if (portdev->max_nr_ports == 0 || 2040 + portdev->max_nr_ports > VIRTCONS_MAX_PORTS) { 2041 + dev_err(&vdev->dev, 2042 + "Invalidate max_nr_ports %d", 2043 + portdev->max_nr_ports); 2044 + err = -EINVAL; 2045 + goto free; 2046 + } 2040 2047 multiport = true; 2041 2048 } 2042 2049
+30 -26
drivers/i2c/busses/i2c-virtio.c
··· 63 63 int outcnt = 0, incnt = 0; 64 64 65 65 /* 66 - * We don't support 0 length messages and so filter out 67 - * 0 length transfers by using i2c_adapter_quirks. 68 - */ 69 - if (!msgs[i].len) 70 - break; 71 - 72 - /* 73 66 * Only 7-bit mode supported for this moment. For the address 74 67 * format, Please check the Virtio I2C Specification. 75 68 */ 76 69 reqs[i].out_hdr.addr = cpu_to_le16(msgs[i].addr << 1); 77 70 71 + if (msgs[i].flags & I2C_M_RD) 72 + reqs[i].out_hdr.flags |= cpu_to_le32(VIRTIO_I2C_FLAGS_M_RD); 73 + 78 74 if (i != num - 1) 79 - reqs[i].out_hdr.flags = cpu_to_le32(VIRTIO_I2C_FLAGS_FAIL_NEXT); 75 + reqs[i].out_hdr.flags |= cpu_to_le32(VIRTIO_I2C_FLAGS_FAIL_NEXT); 80 76 81 77 sg_init_one(&out_hdr, &reqs[i].out_hdr, sizeof(reqs[i].out_hdr)); 82 78 sgs[outcnt++] = &out_hdr; 83 79 84 - reqs[i].buf = i2c_get_dma_safe_msg_buf(&msgs[i], 1); 85 - if (!reqs[i].buf) 86 - break; 80 + if (msgs[i].len) { 81 + reqs[i].buf = i2c_get_dma_safe_msg_buf(&msgs[i], 1); 82 + if (!reqs[i].buf) 83 + break; 87 84 88 - sg_init_one(&msg_buf, reqs[i].buf, msgs[i].len); 85 + sg_init_one(&msg_buf, reqs[i].buf, msgs[i].len); 89 86 90 - if (msgs[i].flags & I2C_M_RD) 91 - sgs[outcnt + incnt++] = &msg_buf; 92 - else 93 - sgs[outcnt++] = &msg_buf; 87 + if (msgs[i].flags & I2C_M_RD) 88 + sgs[outcnt + incnt++] = &msg_buf; 89 + else 90 + sgs[outcnt++] = &msg_buf; 91 + } 94 92 95 93 sg_init_one(&in_hdr, &reqs[i].in_hdr, sizeof(reqs[i].in_hdr)); 96 94 sgs[outcnt + incnt++] = &in_hdr; ··· 189 191 190 192 static u32 virtio_i2c_func(struct i2c_adapter *adap) 191 193 { 192 - return I2C_FUNC_I2C | (I2C_FUNC_SMBUS_EMUL & ~I2C_FUNC_SMBUS_QUICK); 194 + return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; 193 195 } 194 196 195 197 static struct i2c_algorithm virtio_algorithm = { ··· 197 199 .functionality = virtio_i2c_func, 198 200 }; 199 201 200 - static const struct i2c_adapter_quirks virtio_i2c_quirks = { 201 - .flags = I2C_AQ_NO_ZERO_LEN, 202 - }; 203 - 204 202 static int virtio_i2c_probe(struct virtio_device *vdev) 205 203 { 206 204 struct virtio_i2c *vi; 207 205 int ret; 206 + 207 + if (!virtio_has_feature(vdev, VIRTIO_I2C_F_ZERO_LENGTH_REQUEST)) { 208 + dev_err(&vdev->dev, "Zero-length request feature is mandatory\n"); 209 + return -EINVAL; 210 + } 208 211 209 212 vi = devm_kzalloc(&vdev->dev, sizeof(*vi), GFP_KERNEL); 210 213 if (!vi) ··· 224 225 snprintf(vi->adap.name, sizeof(vi->adap.name), 225 226 "i2c_virtio at virtio bus %d", vdev->index); 226 227 vi->adap.algo = &virtio_algorithm; 227 - vi->adap.quirks = &virtio_i2c_quirks; 228 228 vi->adap.dev.parent = &vdev->dev; 229 229 vi->adap.dev.of_node = vdev->dev.of_node; 230 230 i2c_set_adapdata(&vi->adap, vi); ··· 268 270 } 269 271 #endif 270 272 273 + static const unsigned int features[] = { 274 + VIRTIO_I2C_F_ZERO_LENGTH_REQUEST, 275 + }; 276 + 271 277 static struct virtio_driver virtio_i2c_driver = { 272 - .id_table = id_table, 273 - .probe = virtio_i2c_probe, 274 - .remove = virtio_i2c_remove, 275 - .driver = { 278 + .feature_table = features, 279 + .feature_table_size = ARRAY_SIZE(features), 280 + .id_table = id_table, 281 + .probe = virtio_i2c_probe, 282 + .remove = virtio_i2c_remove, 283 + .driver = { 276 284 .name = "i2c_virtio", 277 285 }, 278 286 #ifdef CONFIG_PM_SLEEP
+3 -1
drivers/net/virtio_net.c
··· 408 408 * add_recvbuf_mergeable() + get_mergeable_buf_len() 409 409 */ 410 410 truesize = headroom ? PAGE_SIZE : truesize; 411 - tailroom = truesize - len - headroom - (hdr_padded_len - hdr_len); 411 + tailroom = truesize - headroom; 412 412 buf = p - headroom; 413 413 414 414 len -= hdr_len; 415 415 offset += hdr_padded_len; 416 416 p += hdr_padded_len; 417 + tailroom -= hdr_padded_len + len; 417 418 418 419 shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 419 420 ··· 3423 3422 .feature_table_size = ARRAY_SIZE(features), 3424 3423 .feature_table_legacy = features_legacy, 3425 3424 .feature_table_size_legacy = ARRAY_SIZE(features_legacy), 3425 + .suppress_used_validation = true, 3426 3426 .driver.name = KBUILD_MODNAME, 3427 3427 .driver.owner = THIS_MODULE, 3428 3428 .id_table = id_table,
+1
drivers/scsi/virtio_scsi.c
··· 978 978 static struct virtio_driver virtio_scsi_driver = { 979 979 .feature_table = features, 980 980 .feature_table_size = ARRAY_SIZE(features), 981 + .suppress_used_validation = true, 981 982 .driver.name = KBUILD_MODNAME, 982 983 .driver.owner = THIS_MODULE, 983 984 .id_table = id_table,
+8
drivers/vdpa/Kconfig
··· 78 78 help 79 79 This kernel module bridges virtio PCI device to vDPA bus. 80 80 81 + config ALIBABA_ENI_VDPA 82 + tristate "vDPA driver for Alibaba ENI" 83 + select VIRTIO_PCI_LIB_LEGACY 84 + depends on PCI_MSI && X86 85 + help 86 + VDPA driver for Alibaba ENI (Elastic Network Interface) which is built upon 87 + virtio 0.9.5 specification. 88 + 81 89 endif # VDPA
+1
drivers/vdpa/Makefile
··· 5 5 obj-$(CONFIG_IFCVF) += ifcvf/ 6 6 obj-$(CONFIG_MLX5_VDPA) += mlx5/ 7 7 obj-$(CONFIG_VP_VDPA) += virtio_pci/ 8 + obj-$(CONFIG_ALIBABA_ENI_VDPA) += alibaba/
+3
drivers/vdpa/alibaba/Makefile
··· 1 + # SPDX-License-Identifier: GPL-2.0 2 + obj-$(CONFIG_ALIBABA_ENI_VDPA) += eni_vdpa.o 3 +
+553
drivers/vdpa/alibaba/eni_vdpa.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * vDPA bridge driver for Alibaba ENI(Elastic Network Interface) 4 + * 5 + * Copyright (c) 2021, Alibaba Inc. All rights reserved. 6 + * Author: Wu Zongyong <wuzongyong@linux.alibaba.com> 7 + * 8 + */ 9 + 10 + #include "linux/bits.h" 11 + #include <linux/interrupt.h> 12 + #include <linux/module.h> 13 + #include <linux/pci.h> 14 + #include <linux/vdpa.h> 15 + #include <linux/virtio.h> 16 + #include <linux/virtio_config.h> 17 + #include <linux/virtio_ring.h> 18 + #include <linux/virtio_pci.h> 19 + #include <linux/virtio_pci_legacy.h> 20 + #include <uapi/linux/virtio_net.h> 21 + 22 + #define ENI_MSIX_NAME_SIZE 256 23 + 24 + #define ENI_ERR(pdev, fmt, ...) \ 25 + dev_err(&pdev->dev, "%s"fmt, "eni_vdpa: ", ##__VA_ARGS__) 26 + #define ENI_DBG(pdev, fmt, ...) \ 27 + dev_dbg(&pdev->dev, "%s"fmt, "eni_vdpa: ", ##__VA_ARGS__) 28 + #define ENI_INFO(pdev, fmt, ...) \ 29 + dev_info(&pdev->dev, "%s"fmt, "eni_vdpa: ", ##__VA_ARGS__) 30 + 31 + struct eni_vring { 32 + void __iomem *notify; 33 + char msix_name[ENI_MSIX_NAME_SIZE]; 34 + struct vdpa_callback cb; 35 + int irq; 36 + }; 37 + 38 + struct eni_vdpa { 39 + struct vdpa_device vdpa; 40 + struct virtio_pci_legacy_device ldev; 41 + struct eni_vring *vring; 42 + struct vdpa_callback config_cb; 43 + char msix_name[ENI_MSIX_NAME_SIZE]; 44 + int config_irq; 45 + int queues; 46 + int vectors; 47 + }; 48 + 49 + static struct eni_vdpa *vdpa_to_eni(struct vdpa_device *vdpa) 50 + { 51 + return container_of(vdpa, struct eni_vdpa, vdpa); 52 + } 53 + 54 + static struct virtio_pci_legacy_device *vdpa_to_ldev(struct vdpa_device *vdpa) 55 + { 56 + struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa); 57 + 58 + return &eni_vdpa->ldev; 59 + } 60 + 61 + static u64 eni_vdpa_get_features(struct vdpa_device *vdpa) 62 + { 63 + struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa); 64 + u64 features = vp_legacy_get_features(ldev); 65 + 66 + features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM); 67 + features |= BIT_ULL(VIRTIO_F_ORDER_PLATFORM); 68 + 69 + return features; 70 + } 71 + 72 + static int eni_vdpa_set_features(struct vdpa_device *vdpa, u64 features) 73 + { 74 + struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa); 75 + 76 + if (!(features & BIT_ULL(VIRTIO_NET_F_MRG_RXBUF)) && features) { 77 + ENI_ERR(ldev->pci_dev, 78 + "VIRTIO_NET_F_MRG_RXBUF is not negotiated\n"); 79 + return -EINVAL; 80 + } 81 + 82 + vp_legacy_set_features(ldev, (u32)features); 83 + 84 + return 0; 85 + } 86 + 87 + static u8 eni_vdpa_get_status(struct vdpa_device *vdpa) 88 + { 89 + struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa); 90 + 91 + return vp_legacy_get_status(ldev); 92 + } 93 + 94 + static int eni_vdpa_get_vq_irq(struct vdpa_device *vdpa, u16 idx) 95 + { 96 + struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa); 97 + int irq = eni_vdpa->vring[idx].irq; 98 + 99 + if (irq == VIRTIO_MSI_NO_VECTOR) 100 + return -EINVAL; 101 + 102 + return irq; 103 + } 104 + 105 + static void eni_vdpa_free_irq(struct eni_vdpa *eni_vdpa) 106 + { 107 + struct virtio_pci_legacy_device *ldev = &eni_vdpa->ldev; 108 + struct pci_dev *pdev = ldev->pci_dev; 109 + int i; 110 + 111 + for (i = 0; i < eni_vdpa->queues; i++) { 112 + if (eni_vdpa->vring[i].irq != VIRTIO_MSI_NO_VECTOR) { 113 + vp_legacy_queue_vector(ldev, i, VIRTIO_MSI_NO_VECTOR); 114 + devm_free_irq(&pdev->dev, eni_vdpa->vring[i].irq, 115 + &eni_vdpa->vring[i]); 116 + eni_vdpa->vring[i].irq = VIRTIO_MSI_NO_VECTOR; 117 + } 118 + } 119 + 120 + if (eni_vdpa->config_irq != VIRTIO_MSI_NO_VECTOR) { 121 + vp_legacy_config_vector(ldev, VIRTIO_MSI_NO_VECTOR); 122 + devm_free_irq(&pdev->dev, eni_vdpa->config_irq, eni_vdpa); 123 + eni_vdpa->config_irq = VIRTIO_MSI_NO_VECTOR; 124 + } 125 + 126 + if (eni_vdpa->vectors) { 127 + pci_free_irq_vectors(pdev); 128 + eni_vdpa->vectors = 0; 129 + } 130 + } 131 + 132 + static irqreturn_t eni_vdpa_vq_handler(int irq, void *arg) 133 + { 134 + struct eni_vring *vring = arg; 135 + 136 + if (vring->cb.callback) 137 + return vring->cb.callback(vring->cb.private); 138 + 139 + return IRQ_HANDLED; 140 + } 141 + 142 + static irqreturn_t eni_vdpa_config_handler(int irq, void *arg) 143 + { 144 + struct eni_vdpa *eni_vdpa = arg; 145 + 146 + if (eni_vdpa->config_cb.callback) 147 + return eni_vdpa->config_cb.callback(eni_vdpa->config_cb.private); 148 + 149 + return IRQ_HANDLED; 150 + } 151 + 152 + static int eni_vdpa_request_irq(struct eni_vdpa *eni_vdpa) 153 + { 154 + struct virtio_pci_legacy_device *ldev = &eni_vdpa->ldev; 155 + struct pci_dev *pdev = ldev->pci_dev; 156 + int i, ret, irq; 157 + int queues = eni_vdpa->queues; 158 + int vectors = queues + 1; 159 + 160 + ret = pci_alloc_irq_vectors(pdev, vectors, vectors, PCI_IRQ_MSIX); 161 + if (ret != vectors) { 162 + ENI_ERR(pdev, 163 + "failed to allocate irq vectors want %d but %d\n", 164 + vectors, ret); 165 + return ret; 166 + } 167 + 168 + eni_vdpa->vectors = vectors; 169 + 170 + for (i = 0; i < queues; i++) { 171 + snprintf(eni_vdpa->vring[i].msix_name, ENI_MSIX_NAME_SIZE, 172 + "eni-vdpa[%s]-%d\n", pci_name(pdev), i); 173 + irq = pci_irq_vector(pdev, i); 174 + ret = devm_request_irq(&pdev->dev, irq, 175 + eni_vdpa_vq_handler, 176 + 0, eni_vdpa->vring[i].msix_name, 177 + &eni_vdpa->vring[i]); 178 + if (ret) { 179 + ENI_ERR(pdev, "failed to request irq for vq %d\n", i); 180 + goto err; 181 + } 182 + vp_legacy_queue_vector(ldev, i, i); 183 + eni_vdpa->vring[i].irq = irq; 184 + } 185 + 186 + snprintf(eni_vdpa->msix_name, ENI_MSIX_NAME_SIZE, "eni-vdpa[%s]-config\n", 187 + pci_name(pdev)); 188 + irq = pci_irq_vector(pdev, queues); 189 + ret = devm_request_irq(&pdev->dev, irq, eni_vdpa_config_handler, 0, 190 + eni_vdpa->msix_name, eni_vdpa); 191 + if (ret) { 192 + ENI_ERR(pdev, "failed to request irq for config vq %d\n", i); 193 + goto err; 194 + } 195 + vp_legacy_config_vector(ldev, queues); 196 + eni_vdpa->config_irq = irq; 197 + 198 + return 0; 199 + err: 200 + eni_vdpa_free_irq(eni_vdpa); 201 + return ret; 202 + } 203 + 204 + static void eni_vdpa_set_status(struct vdpa_device *vdpa, u8 status) 205 + { 206 + struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa); 207 + struct virtio_pci_legacy_device *ldev = &eni_vdpa->ldev; 208 + u8 s = eni_vdpa_get_status(vdpa); 209 + 210 + if (status & VIRTIO_CONFIG_S_DRIVER_OK && 211 + !(s & VIRTIO_CONFIG_S_DRIVER_OK)) { 212 + eni_vdpa_request_irq(eni_vdpa); 213 + } 214 + 215 + vp_legacy_set_status(ldev, status); 216 + 217 + if (!(status & VIRTIO_CONFIG_S_DRIVER_OK) && 218 + (s & VIRTIO_CONFIG_S_DRIVER_OK)) 219 + eni_vdpa_free_irq(eni_vdpa); 220 + } 221 + 222 + static int eni_vdpa_reset(struct vdpa_device *vdpa) 223 + { 224 + struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa); 225 + struct virtio_pci_legacy_device *ldev = &eni_vdpa->ldev; 226 + u8 s = eni_vdpa_get_status(vdpa); 227 + 228 + vp_legacy_set_status(ldev, 0); 229 + 230 + if (s & VIRTIO_CONFIG_S_DRIVER_OK) 231 + eni_vdpa_free_irq(eni_vdpa); 232 + 233 + return 0; 234 + } 235 + 236 + static u16 eni_vdpa_get_vq_num_max(struct vdpa_device *vdpa) 237 + { 238 + struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa); 239 + 240 + return vp_legacy_get_queue_size(ldev, 0); 241 + } 242 + 243 + static u16 eni_vdpa_get_vq_num_min(struct vdpa_device *vdpa) 244 + { 245 + struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa); 246 + 247 + return vp_legacy_get_queue_size(ldev, 0); 248 + } 249 + 250 + static int eni_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 qid, 251 + struct vdpa_vq_state *state) 252 + { 253 + return -EOPNOTSUPP; 254 + } 255 + 256 + static int eni_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 qid, 257 + const struct vdpa_vq_state *state) 258 + { 259 + struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa); 260 + const struct vdpa_vq_state_split *split = &state->split; 261 + 262 + /* ENI is build upon virtio-pci specfication which not support 263 + * to set state of virtqueue. But if the state is equal to the 264 + * device initial state by chance, we can let it go. 265 + */ 266 + if (!vp_legacy_get_queue_enable(ldev, qid) 267 + && split->avail_index == 0) 268 + return 0; 269 + 270 + return -EOPNOTSUPP; 271 + } 272 + 273 + 274 + static void eni_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 qid, 275 + struct vdpa_callback *cb) 276 + { 277 + struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa); 278 + 279 + eni_vdpa->vring[qid].cb = *cb; 280 + } 281 + 282 + static void eni_vdpa_set_vq_ready(struct vdpa_device *vdpa, u16 qid, 283 + bool ready) 284 + { 285 + struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa); 286 + 287 + /* ENI is a legacy virtio-pci device. This is not supported 288 + * by specification. But we can disable virtqueue by setting 289 + * address to 0. 290 + */ 291 + if (!ready) 292 + vp_legacy_set_queue_address(ldev, qid, 0); 293 + } 294 + 295 + static bool eni_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 qid) 296 + { 297 + struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa); 298 + 299 + return vp_legacy_get_queue_enable(ldev, qid); 300 + } 301 + 302 + static void eni_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 qid, 303 + u32 num) 304 + { 305 + struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa); 306 + struct pci_dev *pdev = ldev->pci_dev; 307 + u16 n = vp_legacy_get_queue_size(ldev, qid); 308 + 309 + /* ENI is a legacy virtio-pci device which not allow to change 310 + * virtqueue size. Just report a error if someone tries to 311 + * change it. 312 + */ 313 + if (num != n) 314 + ENI_ERR(pdev, 315 + "not support to set vq %u fixed num %u to %u\n", 316 + qid, n, num); 317 + } 318 + 319 + static int eni_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 qid, 320 + u64 desc_area, u64 driver_area, 321 + u64 device_area) 322 + { 323 + struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa); 324 + u32 pfn = desc_area >> VIRTIO_PCI_QUEUE_ADDR_SHIFT; 325 + 326 + vp_legacy_set_queue_address(ldev, qid, pfn); 327 + 328 + return 0; 329 + } 330 + 331 + static void eni_vdpa_kick_vq(struct vdpa_device *vdpa, u16 qid) 332 + { 333 + struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa); 334 + 335 + iowrite16(qid, eni_vdpa->vring[qid].notify); 336 + } 337 + 338 + static u32 eni_vdpa_get_device_id(struct vdpa_device *vdpa) 339 + { 340 + struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa); 341 + 342 + return ldev->id.device; 343 + } 344 + 345 + static u32 eni_vdpa_get_vendor_id(struct vdpa_device *vdpa) 346 + { 347 + struct virtio_pci_legacy_device *ldev = vdpa_to_ldev(vdpa); 348 + 349 + return ldev->id.vendor; 350 + } 351 + 352 + static u32 eni_vdpa_get_vq_align(struct vdpa_device *vdpa) 353 + { 354 + return VIRTIO_PCI_VRING_ALIGN; 355 + } 356 + 357 + static size_t eni_vdpa_get_config_size(struct vdpa_device *vdpa) 358 + { 359 + return sizeof(struct virtio_net_config); 360 + } 361 + 362 + 363 + static void eni_vdpa_get_config(struct vdpa_device *vdpa, 364 + unsigned int offset, 365 + void *buf, unsigned int len) 366 + { 367 + struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa); 368 + struct virtio_pci_legacy_device *ldev = &eni_vdpa->ldev; 369 + void __iomem *ioaddr = ldev->ioaddr + 370 + VIRTIO_PCI_CONFIG_OFF(eni_vdpa->vectors) + 371 + offset; 372 + u8 *p = buf; 373 + int i; 374 + 375 + for (i = 0; i < len; i++) 376 + *p++ = ioread8(ioaddr + i); 377 + } 378 + 379 + static void eni_vdpa_set_config(struct vdpa_device *vdpa, 380 + unsigned int offset, const void *buf, 381 + unsigned int len) 382 + { 383 + struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa); 384 + struct virtio_pci_legacy_device *ldev = &eni_vdpa->ldev; 385 + void __iomem *ioaddr = ldev->ioaddr + 386 + VIRTIO_PCI_CONFIG_OFF(eni_vdpa->vectors) + 387 + offset; 388 + const u8 *p = buf; 389 + int i; 390 + 391 + for (i = 0; i < len; i++) 392 + iowrite8(*p++, ioaddr + i); 393 + } 394 + 395 + static void eni_vdpa_set_config_cb(struct vdpa_device *vdpa, 396 + struct vdpa_callback *cb) 397 + { 398 + struct eni_vdpa *eni_vdpa = vdpa_to_eni(vdpa); 399 + 400 + eni_vdpa->config_cb = *cb; 401 + } 402 + 403 + static const struct vdpa_config_ops eni_vdpa_ops = { 404 + .get_features = eni_vdpa_get_features, 405 + .set_features = eni_vdpa_set_features, 406 + .get_status = eni_vdpa_get_status, 407 + .set_status = eni_vdpa_set_status, 408 + .reset = eni_vdpa_reset, 409 + .get_vq_num_max = eni_vdpa_get_vq_num_max, 410 + .get_vq_num_min = eni_vdpa_get_vq_num_min, 411 + .get_vq_state = eni_vdpa_get_vq_state, 412 + .set_vq_state = eni_vdpa_set_vq_state, 413 + .set_vq_cb = eni_vdpa_set_vq_cb, 414 + .set_vq_ready = eni_vdpa_set_vq_ready, 415 + .get_vq_ready = eni_vdpa_get_vq_ready, 416 + .set_vq_num = eni_vdpa_set_vq_num, 417 + .set_vq_address = eni_vdpa_set_vq_address, 418 + .kick_vq = eni_vdpa_kick_vq, 419 + .get_device_id = eni_vdpa_get_device_id, 420 + .get_vendor_id = eni_vdpa_get_vendor_id, 421 + .get_vq_align = eni_vdpa_get_vq_align, 422 + .get_config_size = eni_vdpa_get_config_size, 423 + .get_config = eni_vdpa_get_config, 424 + .set_config = eni_vdpa_set_config, 425 + .set_config_cb = eni_vdpa_set_config_cb, 426 + .get_vq_irq = eni_vdpa_get_vq_irq, 427 + }; 428 + 429 + 430 + static u16 eni_vdpa_get_num_queues(struct eni_vdpa *eni_vdpa) 431 + { 432 + struct virtio_pci_legacy_device *ldev = &eni_vdpa->ldev; 433 + u32 features = vp_legacy_get_features(ldev); 434 + u16 num = 2; 435 + 436 + if (features & BIT_ULL(VIRTIO_NET_F_MQ)) { 437 + __virtio16 max_virtqueue_pairs; 438 + 439 + eni_vdpa_get_config(&eni_vdpa->vdpa, 440 + offsetof(struct virtio_net_config, max_virtqueue_pairs), 441 + &max_virtqueue_pairs, 442 + sizeof(max_virtqueue_pairs)); 443 + num = 2 * __virtio16_to_cpu(virtio_legacy_is_little_endian(), 444 + max_virtqueue_pairs); 445 + } 446 + 447 + if (features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) 448 + num += 1; 449 + 450 + return num; 451 + } 452 + 453 + static void eni_vdpa_free_irq_vectors(void *data) 454 + { 455 + pci_free_irq_vectors(data); 456 + } 457 + 458 + static int eni_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id) 459 + { 460 + struct device *dev = &pdev->dev; 461 + struct eni_vdpa *eni_vdpa; 462 + struct virtio_pci_legacy_device *ldev; 463 + int ret, i; 464 + 465 + ret = pcim_enable_device(pdev); 466 + if (ret) 467 + return ret; 468 + 469 + eni_vdpa = vdpa_alloc_device(struct eni_vdpa, vdpa, 470 + dev, &eni_vdpa_ops, NULL, false); 471 + if (IS_ERR(eni_vdpa)) { 472 + ENI_ERR(pdev, "failed to allocate vDPA structure\n"); 473 + return PTR_ERR(eni_vdpa); 474 + } 475 + 476 + ldev = &eni_vdpa->ldev; 477 + ldev->pci_dev = pdev; 478 + 479 + ret = vp_legacy_probe(ldev); 480 + if (ret) { 481 + ENI_ERR(pdev, "failed to probe legacy PCI device\n"); 482 + goto err; 483 + } 484 + 485 + pci_set_master(pdev); 486 + pci_set_drvdata(pdev, eni_vdpa); 487 + 488 + eni_vdpa->vdpa.dma_dev = &pdev->dev; 489 + eni_vdpa->queues = eni_vdpa_get_num_queues(eni_vdpa); 490 + 491 + ret = devm_add_action_or_reset(dev, eni_vdpa_free_irq_vectors, pdev); 492 + if (ret) { 493 + ENI_ERR(pdev, 494 + "failed for adding devres for freeing irq vectors\n"); 495 + goto err; 496 + } 497 + 498 + eni_vdpa->vring = devm_kcalloc(&pdev->dev, eni_vdpa->queues, 499 + sizeof(*eni_vdpa->vring), 500 + GFP_KERNEL); 501 + if (!eni_vdpa->vring) { 502 + ret = -ENOMEM; 503 + ENI_ERR(pdev, "failed to allocate virtqueues\n"); 504 + goto err; 505 + } 506 + 507 + for (i = 0; i < eni_vdpa->queues; i++) { 508 + eni_vdpa->vring[i].irq = VIRTIO_MSI_NO_VECTOR; 509 + eni_vdpa->vring[i].notify = ldev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY; 510 + } 511 + eni_vdpa->config_irq = VIRTIO_MSI_NO_VECTOR; 512 + 513 + ret = vdpa_register_device(&eni_vdpa->vdpa, eni_vdpa->queues); 514 + if (ret) { 515 + ENI_ERR(pdev, "failed to register to vdpa bus\n"); 516 + goto err; 517 + } 518 + 519 + return 0; 520 + 521 + err: 522 + put_device(&eni_vdpa->vdpa.dev); 523 + return ret; 524 + } 525 + 526 + static void eni_vdpa_remove(struct pci_dev *pdev) 527 + { 528 + struct eni_vdpa *eni_vdpa = pci_get_drvdata(pdev); 529 + 530 + vdpa_unregister_device(&eni_vdpa->vdpa); 531 + vp_legacy_remove(&eni_vdpa->ldev); 532 + } 533 + 534 + static struct pci_device_id eni_pci_ids[] = { 535 + { PCI_DEVICE_SUB(PCI_VENDOR_ID_REDHAT_QUMRANET, 536 + VIRTIO_TRANS_ID_NET, 537 + PCI_SUBVENDOR_ID_REDHAT_QUMRANET, 538 + VIRTIO_ID_NET) }, 539 + { 0 }, 540 + }; 541 + 542 + static struct pci_driver eni_vdpa_driver = { 543 + .name = "alibaba-eni-vdpa", 544 + .id_table = eni_pci_ids, 545 + .probe = eni_vdpa_probe, 546 + .remove = eni_vdpa_remove, 547 + }; 548 + 549 + module_pci_driver(eni_vdpa_driver); 550 + 551 + MODULE_AUTHOR("Wu Zongyong <wuzongyong@linux.alibaba.com>"); 552 + MODULE_DESCRIPTION("Alibaba ENI vDPA driver"); 553 + MODULE_LICENSE("GPL v2");
+2 -1
drivers/vdpa/ifcvf/ifcvf_main.c
··· 499 499 return dev_type; 500 500 } 501 501 502 - static int ifcvf_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name) 502 + static int ifcvf_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, 503 + const struct vdpa_dev_set_config *config) 503 504 { 504 505 struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev; 505 506 struct ifcvf_adapter *adapter;
+1 -1
drivers/vdpa/mlx5/core/mlx5_vdpa.h
··· 63 63 unsigned short head; 64 64 }; 65 65 66 - struct mlx5_ctrl_wq_ent { 66 + struct mlx5_vdpa_wq_ent { 67 67 struct work_struct work; 68 68 struct mlx5_vdpa_dev *mvdev; 69 69 };
+169 -33
drivers/vdpa/mlx5/net/mlx5_vnet.c
··· 6 6 #include <linux/vringh.h> 7 7 #include <uapi/linux/virtio_net.h> 8 8 #include <uapi/linux/virtio_ids.h> 9 + #include <uapi/linux/vdpa.h> 9 10 #include <linux/virtio_config.h> 10 11 #include <linux/auxiliary_bus.h> 11 12 #include <linux/mlx5/cq.h> ··· 158 157 struct mutex reslock; 159 158 struct mlx5_flow_table *rxft; 160 159 struct mlx5_fc *rx_counter; 161 - struct mlx5_flow_handle *rx_rule; 160 + struct mlx5_flow_handle *rx_rule_ucast; 161 + struct mlx5_flow_handle *rx_rule_mcast; 162 162 bool setup; 163 - u16 mtu; 164 163 u32 cur_num_vqs; 164 + struct notifier_block nb; 165 + struct vdpa_callback config_cb; 165 166 }; 166 167 167 168 static void free_resources(struct mlx5_vdpa_net *ndev); ··· 1384 1381 struct mlx5_flow_table_attr ft_attr = {}; 1385 1382 struct mlx5_flow_act flow_act = {}; 1386 1383 struct mlx5_flow_namespace *ns; 1384 + struct mlx5_flow_spec *spec; 1385 + void *headers_c; 1386 + void *headers_v; 1387 + u8 *dmac_c; 1388 + u8 *dmac_v; 1387 1389 int err; 1388 1390 1389 - /* for now, one entry, match all, forward to tir */ 1390 - ft_attr.max_fte = 1; 1391 - ft_attr.autogroup.max_num_groups = 1; 1391 + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); 1392 + if (!spec) 1393 + return -ENOMEM; 1394 + 1395 + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; 1396 + ft_attr.max_fte = 2; 1397 + ft_attr.autogroup.max_num_groups = 2; 1392 1398 1393 1399 ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS); 1394 1400 if (!ns) { 1395 - mlx5_vdpa_warn(&ndev->mvdev, "get flow namespace\n"); 1396 - return -EOPNOTSUPP; 1401 + mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n"); 1402 + err = -EOPNOTSUPP; 1403 + goto err_ns; 1397 1404 } 1398 1405 1399 1406 ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); 1400 - if (IS_ERR(ndev->rxft)) 1401 - return PTR_ERR(ndev->rxft); 1407 + if (IS_ERR(ndev->rxft)) { 1408 + err = PTR_ERR(ndev->rxft); 1409 + goto err_ns; 1410 + } 1402 1411 1403 1412 ndev->rx_counter = mlx5_fc_create(ndev->mvdev.mdev, false); 1404 1413 if (IS_ERR(ndev->rx_counter)) { ··· 1418 1403 goto err_fc; 1419 1404 } 1420 1405 1406 + headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers); 1407 + dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16); 1408 + memset(dmac_c, 0xff, ETH_ALEN); 1409 + headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); 1410 + dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16); 1411 + ether_addr_copy(dmac_v, ndev->config.mac); 1412 + 1421 1413 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; 1422 1414 dest[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR; 1423 1415 dest[0].tir_num = ndev->res.tirn; 1424 1416 dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; 1425 1417 dest[1].counter_id = mlx5_fc_id(ndev->rx_counter); 1426 - ndev->rx_rule = mlx5_add_flow_rules(ndev->rxft, NULL, &flow_act, dest, 2); 1427 - if (IS_ERR(ndev->rx_rule)) { 1428 - err = PTR_ERR(ndev->rx_rule); 1429 - ndev->rx_rule = NULL; 1430 - goto err_rule; 1418 + ndev->rx_rule_ucast = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dest, 2); 1419 + 1420 + if (IS_ERR(ndev->rx_rule_ucast)) { 1421 + err = PTR_ERR(ndev->rx_rule_ucast); 1422 + ndev->rx_rule_ucast = NULL; 1423 + goto err_rule_ucast; 1431 1424 } 1432 1425 1426 + memset(dmac_c, 0, ETH_ALEN); 1427 + memset(dmac_v, 0, ETH_ALEN); 1428 + dmac_c[0] = 1; 1429 + dmac_v[0] = 1; 1430 + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; 1431 + ndev->rx_rule_mcast = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dest, 1); 1432 + if (IS_ERR(ndev->rx_rule_mcast)) { 1433 + err = PTR_ERR(ndev->rx_rule_mcast); 1434 + ndev->rx_rule_mcast = NULL; 1435 + goto err_rule_mcast; 1436 + } 1437 + 1438 + kvfree(spec); 1433 1439 return 0; 1434 1440 1435 - err_rule: 1441 + err_rule_mcast: 1442 + mlx5_del_flow_rules(ndev->rx_rule_ucast); 1443 + ndev->rx_rule_ucast = NULL; 1444 + err_rule_ucast: 1436 1445 mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter); 1437 1446 err_fc: 1438 1447 mlx5_destroy_flow_table(ndev->rxft); 1448 + err_ns: 1449 + kvfree(spec); 1439 1450 return err; 1440 1451 } 1441 1452 1442 1453 static void remove_fwd_to_tir(struct mlx5_vdpa_net *ndev) 1443 1454 { 1444 - if (!ndev->rx_rule) 1455 + if (!ndev->rx_rule_ucast) 1445 1456 return; 1446 1457 1447 - mlx5_del_flow_rules(ndev->rx_rule); 1458 + mlx5_del_flow_rules(ndev->rx_rule_mcast); 1459 + ndev->rx_rule_mcast = NULL; 1460 + mlx5_del_flow_rules(ndev->rx_rule_ucast); 1461 + ndev->rx_rule_ucast = NULL; 1448 1462 mlx5_fc_destroy(ndev->mvdev.mdev, ndev->rx_counter); 1449 1463 mlx5_destroy_flow_table(ndev->rxft); 1450 - 1451 - ndev->rx_rule = NULL; 1452 1464 } 1453 1465 1454 1466 static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd) ··· 1599 1557 { 1600 1558 virtio_net_ctrl_ack status = VIRTIO_NET_ERR; 1601 1559 struct virtio_net_ctrl_hdr ctrl; 1602 - struct mlx5_ctrl_wq_ent *wqent; 1560 + struct mlx5_vdpa_wq_ent *wqent; 1603 1561 struct mlx5_vdpa_dev *mvdev; 1604 1562 struct mlx5_control_vq *cvq; 1605 1563 struct mlx5_vdpa_net *ndev; 1606 1564 size_t read, write; 1607 1565 int err; 1608 1566 1609 - wqent = container_of(work, struct mlx5_ctrl_wq_ent, work); 1567 + wqent = container_of(work, struct mlx5_vdpa_wq_ent, work); 1610 1568 mvdev = wqent->mvdev; 1611 1569 ndev = to_mlx5_vdpa_ndev(mvdev); 1612 1570 cvq = &mvdev->cvq; ··· 1658 1616 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 1659 1617 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 1660 1618 struct mlx5_vdpa_virtqueue *mvq; 1661 - struct mlx5_ctrl_wq_ent *wqent; 1619 + struct mlx5_vdpa_wq_ent *wqent; 1662 1620 1663 1621 if (!is_index_valid(mvdev, idx)) 1664 1622 return; ··· 1894 1852 ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ); 1895 1853 ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR); 1896 1854 ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MQ); 1855 + ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_STATUS); 1897 1856 1898 1857 print_features(mvdev, ndev->mvdev.mlx_features, false); 1899 1858 return ndev->mvdev.mlx_features; ··· 1985 1942 return err; 1986 1943 1987 1944 ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features; 1988 - ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, ndev->mtu); 1989 - ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP); 1990 1945 update_cvq_info(mvdev); 1991 1946 return err; 1992 1947 } 1993 1948 1994 1949 static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb) 1995 1950 { 1996 - /* not implemented */ 1997 - mlx5_vdpa_warn(to_mvdev(vdev), "set config callback not supported\n"); 1951 + struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 1952 + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 1953 + 1954 + ndev->config_cb = *cb; 1998 1955 } 1999 1956 2000 1957 #define MLX5_VDPA_MAX_VQ_ENTRIES 256 ··· 2235 2192 clear_vqs_ready(ndev); 2236 2193 mlx5_vdpa_destroy_mr(&ndev->mvdev); 2237 2194 ndev->mvdev.status = 0; 2238 - ndev->mvdev.mlx_features = 0; 2239 2195 memset(ndev->event_cbs, 0, sizeof(ndev->event_cbs)); 2240 2196 ndev->mvdev.actual_features = 0; 2241 2197 ++mvdev->generation; ··· 2446 2404 struct mlx5_vdpa_net *ndev; 2447 2405 }; 2448 2406 2449 - static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name) 2407 + static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport) 2408 + { 2409 + u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {}; 2410 + u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {}; 2411 + int err; 2412 + 2413 + MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE); 2414 + MLX5_SET(query_vport_state_in, in, op_mod, opmod); 2415 + MLX5_SET(query_vport_state_in, in, vport_number, vport); 2416 + if (vport) 2417 + MLX5_SET(query_vport_state_in, in, other_vport, 1); 2418 + 2419 + err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out); 2420 + if (err) 2421 + return 0; 2422 + 2423 + return MLX5_GET(query_vport_state_out, out, state); 2424 + } 2425 + 2426 + static bool get_link_state(struct mlx5_vdpa_dev *mvdev) 2427 + { 2428 + if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) == 2429 + VPORT_STATE_UP) 2430 + return true; 2431 + 2432 + return false; 2433 + } 2434 + 2435 + static void update_carrier(struct work_struct *work) 2436 + { 2437 + struct mlx5_vdpa_wq_ent *wqent; 2438 + struct mlx5_vdpa_dev *mvdev; 2439 + struct mlx5_vdpa_net *ndev; 2440 + 2441 + wqent = container_of(work, struct mlx5_vdpa_wq_ent, work); 2442 + mvdev = wqent->mvdev; 2443 + ndev = to_mlx5_vdpa_ndev(mvdev); 2444 + if (get_link_state(mvdev)) 2445 + ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP); 2446 + else 2447 + ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP); 2448 + 2449 + if (ndev->config_cb.callback) 2450 + ndev->config_cb.callback(ndev->config_cb.private); 2451 + 2452 + kfree(wqent); 2453 + } 2454 + 2455 + static int event_handler(struct notifier_block *nb, unsigned long event, void *param) 2456 + { 2457 + struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb); 2458 + struct mlx5_eqe *eqe = param; 2459 + int ret = NOTIFY_DONE; 2460 + struct mlx5_vdpa_wq_ent *wqent; 2461 + 2462 + if (event == MLX5_EVENT_TYPE_PORT_CHANGE) { 2463 + switch (eqe->sub_type) { 2464 + case MLX5_PORT_CHANGE_SUBTYPE_DOWN: 2465 + case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: 2466 + wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC); 2467 + if (!wqent) 2468 + return NOTIFY_DONE; 2469 + 2470 + wqent->mvdev = &ndev->mvdev; 2471 + INIT_WORK(&wqent->work, update_carrier); 2472 + queue_work(ndev->mvdev.wq, &wqent->work); 2473 + ret = NOTIFY_OK; 2474 + break; 2475 + default: 2476 + return NOTIFY_DONE; 2477 + } 2478 + return ret; 2479 + } 2480 + return ret; 2481 + } 2482 + 2483 + static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, 2484 + const struct vdpa_dev_set_config *add_config) 2450 2485 { 2451 2486 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev); 2452 2487 struct virtio_net_config *config; ··· 2532 2413 struct mlx5_vdpa_net *ndev; 2533 2414 struct mlx5_core_dev *mdev; 2534 2415 u32 max_vqs; 2416 + u16 mtu; 2535 2417 int err; 2536 2418 2537 2419 if (mgtdev->ndev) ··· 2560 2440 init_mvqs(ndev); 2561 2441 mutex_init(&ndev->reslock); 2562 2442 config = &ndev->config; 2563 - err = query_mtu(mdev, &ndev->mtu); 2443 + err = query_mtu(mdev, &mtu); 2564 2444 if (err) 2565 2445 goto err_mtu; 2566 2446 2567 - err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac); 2568 - if (err) 2569 - goto err_mtu; 2447 + ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu); 2448 + 2449 + if (get_link_state(mvdev)) 2450 + ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP); 2451 + else 2452 + ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP); 2453 + 2454 + if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) { 2455 + memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN); 2456 + } else { 2457 + err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac); 2458 + if (err) 2459 + goto err_mtu; 2460 + } 2570 2461 2571 2462 if (!is_zero_ether_addr(config->mac)) { 2572 2463 pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev)); ··· 2604 2473 if (err) 2605 2474 goto err_mr; 2606 2475 2607 - mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_ctrl_wq"); 2476 + mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq"); 2608 2477 if (!mvdev->wq) { 2609 2478 err = -ENOMEM; 2610 2479 goto err_res2; 2611 2480 } 2612 2481 2482 + ndev->nb.notifier_call = event_handler; 2483 + mlx5_notifier_register(mdev, &ndev->nb); 2613 2484 ndev->cur_num_vqs = 2 * mlx5_vdpa_max_qps(max_vqs); 2614 2485 mvdev->vdev.mdev = &mgtdev->mgtdev; 2615 2486 err = _vdpa_register_device(&mvdev->vdev, ndev->cur_num_vqs + 1); ··· 2642 2509 { 2643 2510 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev); 2644 2511 struct mlx5_vdpa_dev *mvdev = to_mvdev(dev); 2512 + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2645 2513 2514 + mlx5_notifier_unregister(mvdev->mdev, &ndev->nb); 2646 2515 destroy_workqueue(mvdev->wq); 2647 2516 _vdpa_unregister_device(dev); 2648 2517 mgtdev->ndev = NULL; ··· 2676 2541 mgtdev->mgtdev.ops = &mdev_ops; 2677 2542 mgtdev->mgtdev.device = mdev->device; 2678 2543 mgtdev->mgtdev.id_table = id_table; 2544 + mgtdev->mgtdev.config_attr_mask = (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR); 2679 2545 mgtdev->madev = madev; 2680 2546 2681 2547 err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
+260 -1
drivers/vdpa/vdpa.c
··· 14 14 #include <uapi/linux/vdpa.h> 15 15 #include <net/genetlink.h> 16 16 #include <linux/mod_devicetable.h> 17 + #include <linux/virtio_ids.h> 17 18 18 19 static LIST_HEAD(mdev_head); 19 20 /* A global mutex that protects vdpa management device and device level operations. */ ··· 27 26 { 28 27 struct vdpa_device *vdev = dev_to_vdpa(d); 29 28 struct vdpa_driver *drv = drv_to_vdpa(vdev->dev.driver); 29 + const struct vdpa_config_ops *ops = vdev->config; 30 + u32 max_num, min_num = 1; 30 31 int ret = 0; 32 + 33 + max_num = ops->get_vq_num_max(vdev); 34 + if (ops->get_vq_num_min) 35 + min_num = ops->get_vq_num_min(vdev); 36 + if (max_num < min_num) 37 + return -EINVAL; 31 38 32 39 if (drv && drv->probe) 33 40 ret = drv->probe(vdev); ··· 67 58 ops->free(vdev); 68 59 69 60 ida_simple_remove(&vdpa_index_ida, vdev->index); 61 + mutex_destroy(&vdev->cf_mutex); 70 62 kfree(vdev); 71 63 } 72 64 ··· 129 119 if (err) 130 120 goto err_name; 131 121 122 + mutex_init(&vdev->cf_mutex); 132 123 device_initialize(&vdev->dev); 133 124 134 125 return vdev; ··· 300 289 } 301 290 EXPORT_SYMBOL_GPL(vdpa_mgmtdev_unregister); 302 291 292 + /** 293 + * vdpa_get_config - Get one or more device configuration fields. 294 + * @vdev: vdpa device to operate on 295 + * @offset: starting byte offset of the field 296 + * @buf: buffer pointer to read to 297 + * @len: length of the configuration fields in bytes 298 + */ 299 + void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, 300 + void *buf, unsigned int len) 301 + { 302 + const struct vdpa_config_ops *ops = vdev->config; 303 + 304 + mutex_lock(&vdev->cf_mutex); 305 + /* 306 + * Config accesses aren't supposed to trigger before features are set. 307 + * If it does happen we assume a legacy guest. 308 + */ 309 + if (!vdev->features_valid) 310 + vdpa_set_features(vdev, 0); 311 + ops->get_config(vdev, offset, buf, len); 312 + mutex_unlock(&vdev->cf_mutex); 313 + } 314 + EXPORT_SYMBOL_GPL(vdpa_get_config); 315 + 316 + /** 317 + * vdpa_set_config - Set one or more device configuration fields. 318 + * @vdev: vdpa device to operate on 319 + * @offset: starting byte offset of the field 320 + * @buf: buffer pointer to read from 321 + * @length: length of the configuration fields in bytes 322 + */ 323 + void vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, 324 + const void *buf, unsigned int length) 325 + { 326 + mutex_lock(&vdev->cf_mutex); 327 + vdev->config->set_config(vdev, offset, buf, length); 328 + mutex_unlock(&vdev->cf_mutex); 329 + } 330 + EXPORT_SYMBOL_GPL(vdpa_set_config); 331 + 303 332 static bool mgmtdev_handle_match(const struct vdpa_mgmt_dev *mdev, 304 333 const char *busname, const char *devname) 305 334 { ··· 479 428 return msg->len; 480 429 } 481 430 431 + #define VDPA_DEV_NET_ATTRS_MASK ((1 << VDPA_ATTR_DEV_NET_CFG_MACADDR) | \ 432 + (1 << VDPA_ATTR_DEV_NET_CFG_MTU)) 433 + 482 434 static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *info) 483 435 { 436 + struct vdpa_dev_set_config config = {}; 437 + struct nlattr **nl_attrs = info->attrs; 484 438 struct vdpa_mgmt_dev *mdev; 439 + const u8 *macaddr; 485 440 const char *name; 486 441 int err = 0; 487 442 ··· 496 439 497 440 name = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]); 498 441 442 + if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MACADDR]) { 443 + macaddr = nla_data(nl_attrs[VDPA_ATTR_DEV_NET_CFG_MACADDR]); 444 + memcpy(config.net.mac, macaddr, sizeof(config.net.mac)); 445 + config.mask |= (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR); 446 + } 447 + if (nl_attrs[VDPA_ATTR_DEV_NET_CFG_MTU]) { 448 + config.net.mtu = 449 + nla_get_u16(nl_attrs[VDPA_ATTR_DEV_NET_CFG_MTU]); 450 + config.mask |= (1 << VDPA_ATTR_DEV_NET_CFG_MTU); 451 + } 452 + 453 + /* Skip checking capability if user didn't prefer to configure any 454 + * device networking attributes. It is likely that user might have used 455 + * a device specific method to configure such attributes or using device 456 + * default attributes. 457 + */ 458 + if ((config.mask & VDPA_DEV_NET_ATTRS_MASK) && 459 + !netlink_capable(skb, CAP_NET_ADMIN)) 460 + return -EPERM; 461 + 499 462 mutex_lock(&vdpa_dev_mutex); 500 463 mdev = vdpa_mgmtdev_get_from_attr(info->attrs); 501 464 if (IS_ERR(mdev)) { ··· 523 446 err = PTR_ERR(mdev); 524 447 goto err; 525 448 } 449 + if ((config.mask & mdev->config_attr_mask) != config.mask) { 450 + NL_SET_ERR_MSG_MOD(info->extack, 451 + "All provided attributes are not supported"); 452 + err = -EOPNOTSUPP; 453 + goto err; 454 + } 526 455 527 - err = mdev->ops->dev_add(mdev, name); 456 + err = mdev->ops->dev_add(mdev, name, &config); 528 457 err: 529 458 mutex_unlock(&vdpa_dev_mutex); 530 459 return err; ··· 575 492 int flags, struct netlink_ext_ack *extack) 576 493 { 577 494 u16 max_vq_size; 495 + u16 min_vq_size = 1; 578 496 u32 device_id; 579 497 u32 vendor_id; 580 498 void *hdr; ··· 592 508 device_id = vdev->config->get_device_id(vdev); 593 509 vendor_id = vdev->config->get_vendor_id(vdev); 594 510 max_vq_size = vdev->config->get_vq_num_max(vdev); 511 + if (vdev->config->get_vq_num_min) 512 + min_vq_size = vdev->config->get_vq_num_min(vdev); 595 513 596 514 err = -EMSGSIZE; 597 515 if (nla_put_string(msg, VDPA_ATTR_DEV_NAME, dev_name(&vdev->dev))) ··· 605 519 if (nla_put_u32(msg, VDPA_ATTR_DEV_MAX_VQS, vdev->nvqs)) 606 520 goto msg_err; 607 521 if (nla_put_u16(msg, VDPA_ATTR_DEV_MAX_VQ_SIZE, max_vq_size)) 522 + goto msg_err; 523 + if (nla_put_u16(msg, VDPA_ATTR_DEV_MIN_VQ_SIZE, min_vq_size)) 608 524 goto msg_err; 609 525 610 526 genlmsg_end(msg, hdr); ··· 700 612 return msg->len; 701 613 } 702 614 615 + static int vdpa_dev_net_mq_config_fill(struct vdpa_device *vdev, 616 + struct sk_buff *msg, u64 features, 617 + const struct virtio_net_config *config) 618 + { 619 + u16 val_u16; 620 + 621 + if ((features & (1ULL << VIRTIO_NET_F_MQ)) == 0) 622 + return 0; 623 + 624 + val_u16 = le16_to_cpu(config->max_virtqueue_pairs); 625 + return nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MAX_VQP, val_u16); 626 + } 627 + 628 + static int vdpa_dev_net_config_fill(struct vdpa_device *vdev, struct sk_buff *msg) 629 + { 630 + struct virtio_net_config config = {}; 631 + u64 features; 632 + u16 val_u16; 633 + 634 + vdpa_get_config(vdev, 0, &config, sizeof(config)); 635 + 636 + if (nla_put(msg, VDPA_ATTR_DEV_NET_CFG_MACADDR, sizeof(config.mac), 637 + config.mac)) 638 + return -EMSGSIZE; 639 + 640 + val_u16 = le16_to_cpu(config.status); 641 + if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_STATUS, val_u16)) 642 + return -EMSGSIZE; 643 + 644 + val_u16 = le16_to_cpu(config.mtu); 645 + if (nla_put_u16(msg, VDPA_ATTR_DEV_NET_CFG_MTU, val_u16)) 646 + return -EMSGSIZE; 647 + 648 + features = vdev->config->get_features(vdev); 649 + 650 + return vdpa_dev_net_mq_config_fill(vdev, msg, features, &config); 651 + } 652 + 653 + static int 654 + vdpa_dev_config_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid, u32 seq, 655 + int flags, struct netlink_ext_ack *extack) 656 + { 657 + u32 device_id; 658 + void *hdr; 659 + int err; 660 + 661 + hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags, 662 + VDPA_CMD_DEV_CONFIG_GET); 663 + if (!hdr) 664 + return -EMSGSIZE; 665 + 666 + if (nla_put_string(msg, VDPA_ATTR_DEV_NAME, dev_name(&vdev->dev))) { 667 + err = -EMSGSIZE; 668 + goto msg_err; 669 + } 670 + 671 + device_id = vdev->config->get_device_id(vdev); 672 + if (nla_put_u32(msg, VDPA_ATTR_DEV_ID, device_id)) { 673 + err = -EMSGSIZE; 674 + goto msg_err; 675 + } 676 + 677 + switch (device_id) { 678 + case VIRTIO_ID_NET: 679 + err = vdpa_dev_net_config_fill(vdev, msg); 680 + break; 681 + default: 682 + err = -EOPNOTSUPP; 683 + break; 684 + } 685 + if (err) 686 + goto msg_err; 687 + 688 + genlmsg_end(msg, hdr); 689 + return 0; 690 + 691 + msg_err: 692 + genlmsg_cancel(msg, hdr); 693 + return err; 694 + } 695 + 696 + static int vdpa_nl_cmd_dev_config_get_doit(struct sk_buff *skb, struct genl_info *info) 697 + { 698 + struct vdpa_device *vdev; 699 + struct sk_buff *msg; 700 + const char *devname; 701 + struct device *dev; 702 + int err; 703 + 704 + if (!info->attrs[VDPA_ATTR_DEV_NAME]) 705 + return -EINVAL; 706 + devname = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]); 707 + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 708 + if (!msg) 709 + return -ENOMEM; 710 + 711 + mutex_lock(&vdpa_dev_mutex); 712 + dev = bus_find_device(&vdpa_bus, NULL, devname, vdpa_name_match); 713 + if (!dev) { 714 + NL_SET_ERR_MSG_MOD(info->extack, "device not found"); 715 + err = -ENODEV; 716 + goto dev_err; 717 + } 718 + vdev = container_of(dev, struct vdpa_device, dev); 719 + if (!vdev->mdev) { 720 + NL_SET_ERR_MSG_MOD(info->extack, "unmanaged vdpa device"); 721 + err = -EINVAL; 722 + goto mdev_err; 723 + } 724 + err = vdpa_dev_config_fill(vdev, msg, info->snd_portid, info->snd_seq, 725 + 0, info->extack); 726 + if (!err) 727 + err = genlmsg_reply(msg, info); 728 + 729 + mdev_err: 730 + put_device(dev); 731 + dev_err: 732 + mutex_unlock(&vdpa_dev_mutex); 733 + if (err) 734 + nlmsg_free(msg); 735 + return err; 736 + } 737 + 738 + static int vdpa_dev_config_dump(struct device *dev, void *data) 739 + { 740 + struct vdpa_device *vdev = container_of(dev, struct vdpa_device, dev); 741 + struct vdpa_dev_dump_info *info = data; 742 + int err; 743 + 744 + if (!vdev->mdev) 745 + return 0; 746 + if (info->idx < info->start_idx) { 747 + info->idx++; 748 + return 0; 749 + } 750 + err = vdpa_dev_config_fill(vdev, info->msg, NETLINK_CB(info->cb->skb).portid, 751 + info->cb->nlh->nlmsg_seq, NLM_F_MULTI, 752 + info->cb->extack); 753 + if (err) 754 + return err; 755 + 756 + info->idx++; 757 + return 0; 758 + } 759 + 760 + static int 761 + vdpa_nl_cmd_dev_config_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb) 762 + { 763 + struct vdpa_dev_dump_info info; 764 + 765 + info.msg = msg; 766 + info.cb = cb; 767 + info.start_idx = cb->args[0]; 768 + info.idx = 0; 769 + 770 + mutex_lock(&vdpa_dev_mutex); 771 + bus_for_each_dev(&vdpa_bus, NULL, &info, vdpa_dev_config_dump); 772 + mutex_unlock(&vdpa_dev_mutex); 773 + cb->args[0] = info.idx; 774 + return msg->len; 775 + } 776 + 703 777 static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1] = { 704 778 [VDPA_ATTR_MGMTDEV_BUS_NAME] = { .type = NLA_NUL_STRING }, 705 779 [VDPA_ATTR_MGMTDEV_DEV_NAME] = { .type = NLA_STRING }, 706 780 [VDPA_ATTR_DEV_NAME] = { .type = NLA_STRING }, 781 + [VDPA_ATTR_DEV_NET_CFG_MACADDR] = NLA_POLICY_ETH_ADDR, 782 + /* virtio spec 1.1 section 5.1.4.1 for valid MTU range */ 783 + [VDPA_ATTR_DEV_NET_CFG_MTU] = NLA_POLICY_MIN(NLA_U16, 68), 707 784 }; 708 785 709 786 static const struct genl_ops vdpa_nl_ops[] = { ··· 895 642 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 896 643 .doit = vdpa_nl_cmd_dev_get_doit, 897 644 .dumpit = vdpa_nl_cmd_dev_get_dumpit, 645 + }, 646 + { 647 + .cmd = VDPA_CMD_DEV_CONFIG_GET, 648 + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 649 + .doit = vdpa_nl_cmd_dev_config_get_doit, 650 + .dumpit = vdpa_nl_cmd_dev_config_get_dumpit, 898 651 }, 899 652 }; 900 653
+2 -1
drivers/vdpa/vdpa_sim/vdpa_sim_blk.c
··· 248 248 .release = vdpasim_blk_mgmtdev_release, 249 249 }; 250 250 251 - static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name) 251 + static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, 252 + const struct vdpa_dev_set_config *config) 252 253 { 253 254 struct vdpasim_dev_attr dev_attr = {}; 254 255 struct vdpasim *simdev;
+21 -17
drivers/vdpa/vdpa_sim/vdpa_sim_net.c
··· 16 16 #include <linux/vringh.h> 17 17 #include <linux/vdpa.h> 18 18 #include <uapi/linux/virtio_net.h> 19 + #include <uapi/linux/vdpa.h> 19 20 20 21 #include "vdpa_sim.h" 21 22 ··· 29 28 (1ULL << VIRTIO_NET_F_MAC)) 30 29 31 30 #define VDPASIM_NET_VQ_NUM 2 32 - 33 - static char *macaddr; 34 - module_param(macaddr, charp, 0); 35 - MODULE_PARM_DESC(macaddr, "Ethernet MAC address"); 36 - 37 - static u8 macaddr_buf[ETH_ALEN]; 38 31 39 32 static void vdpasim_net_work(struct work_struct *work) 40 33 { ··· 107 112 { 108 113 struct virtio_net_config *net_config = config; 109 114 110 - net_config->mtu = cpu_to_vdpasim16(vdpasim, 1500); 111 115 net_config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP); 112 - memcpy(net_config->mac, macaddr_buf, ETH_ALEN); 116 + } 117 + 118 + static void vdpasim_net_setup_config(struct vdpasim *vdpasim, 119 + const struct vdpa_dev_set_config *config) 120 + { 121 + struct virtio_net_config *vio_config = vdpasim->config; 122 + 123 + if (config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) 124 + memcpy(vio_config->mac, config->net.mac, ETH_ALEN); 125 + if (config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MTU)) 126 + vio_config->mtu = cpu_to_vdpasim16(vdpasim, config->net.mtu); 127 + else 128 + /* Setup default MTU to be 1500 */ 129 + vio_config->mtu = cpu_to_vdpasim16(vdpasim, 1500); 113 130 } 114 131 115 132 static void vdpasim_net_mgmtdev_release(struct device *dev) ··· 133 126 .release = vdpasim_net_mgmtdev_release, 134 127 }; 135 128 136 - static int vdpasim_net_dev_add(struct vdpa_mgmt_dev *mdev, const char *name) 129 + static int vdpasim_net_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, 130 + const struct vdpa_dev_set_config *config) 137 131 { 138 132 struct vdpasim_dev_attr dev_attr = {}; 139 133 struct vdpasim *simdev; ··· 153 145 simdev = vdpasim_create(&dev_attr); 154 146 if (IS_ERR(simdev)) 155 147 return PTR_ERR(simdev); 148 + 149 + vdpasim_net_setup_config(simdev, config); 156 150 157 151 ret = _vdpa_register_device(&simdev->vdpa, VDPASIM_NET_VQ_NUM); 158 152 if (ret) ··· 189 179 .device = &vdpasim_net_mgmtdev, 190 180 .id_table = id_table, 191 181 .ops = &vdpasim_net_mgmtdev_ops, 182 + .config_attr_mask = (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR | 183 + 1 << VDPA_ATTR_DEV_NET_CFG_MTU), 192 184 }; 193 185 194 186 static int __init vdpasim_net_init(void) 195 187 { 196 188 int ret; 197 - 198 - if (macaddr) { 199 - mac_pton(macaddr, macaddr_buf); 200 - if (!is_valid_ether_addr(macaddr_buf)) 201 - return -EADDRNOTAVAIL; 202 - } else { 203 - eth_random_addr(macaddr_buf); 204 - } 205 189 206 190 ret = device_register(&vdpasim_net_mgmtdev); 207 191 if (ret)
+2 -1
drivers/vdpa/vdpa_user/vduse_dev.c
··· 1503 1503 return 0; 1504 1504 } 1505 1505 1506 - static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name) 1506 + static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name, 1507 + const struct vdpa_dev_set_config *config) 1507 1508 { 1508 1509 struct vduse_dev *dev; 1509 1510 int ret;
+12
drivers/vdpa/virtio_pci/vp_vdpa.c
··· 76 76 return vp_modern_get_status(mdev); 77 77 } 78 78 79 + static int vp_vdpa_get_vq_irq(struct vdpa_device *vdpa, u16 idx) 80 + { 81 + struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa); 82 + int irq = vp_vdpa->vring[idx].irq; 83 + 84 + if (irq == VIRTIO_MSI_NO_VECTOR) 85 + return -EINVAL; 86 + 87 + return irq; 88 + } 89 + 79 90 static void vp_vdpa_free_irq(struct vp_vdpa *vp_vdpa) 80 91 { 81 92 struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev; ··· 438 427 .get_config = vp_vdpa_get_config, 439 428 .set_config = vp_vdpa_set_config, 440 429 .set_config_cb = vp_vdpa_set_config_cb, 430 + .get_vq_irq = vp_vdpa_get_vq_irq, 441 431 }; 442 432 443 433 static void vp_vdpa_free_irq_vectors(void *data)
+1 -2
drivers/vhost/vdpa.c
··· 237 237 struct vhost_vdpa_config __user *c) 238 238 { 239 239 struct vdpa_device *vdpa = v->vdpa; 240 - const struct vdpa_config_ops *ops = vdpa->config; 241 240 struct vhost_vdpa_config config; 242 241 unsigned long size = offsetof(struct vhost_vdpa_config, buf); 243 242 u8 *buf; ··· 250 251 if (IS_ERR(buf)) 251 252 return PTR_ERR(buf); 252 253 253 - ops->set_config(vdpa, config.off, buf, config.len); 254 + vdpa_set_config(vdpa, config.off, buf, config.len); 254 255 255 256 kvfree(buf); 256 257 return 0;
+10
drivers/virtio/Kconfig
··· 20 20 PCI device with possible vendor specific extensions. Any 21 21 module that selects this module must depend on PCI. 22 22 23 + config VIRTIO_PCI_LIB_LEGACY 24 + tristate 25 + help 26 + Legacy PCI device (Virtio PCI Card 0.9.x Draft and older device) 27 + implementation. 28 + This module implements the basic probe and control for devices 29 + which are based on legacy PCI device. Any module that selects this 30 + module must depend on PCI. 31 + 23 32 menuconfig VIRTIO_MENU 24 33 bool "Virtio drivers" 25 34 default y ··· 52 43 bool "Support for legacy virtio draft 0.9.X and older devices" 53 44 default y 54 45 depends on VIRTIO_PCI 46 + select VIRTIO_PCI_LIB_LEGACY 55 47 help 56 48 Virtio PCI Card 0.9.X Draft (circa 2014) and older device support. 57 49
+1
drivers/virtio/Makefile
··· 1 1 # SPDX-License-Identifier: GPL-2.0 2 2 obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o 3 3 obj-$(CONFIG_VIRTIO_PCI_LIB) += virtio_pci_modern_dev.o 4 + obj-$(CONFIG_VIRTIO_PCI_LIB_LEGACY) += virtio_pci_legacy_dev.o 4 5 obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o 5 6 obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o 6 7 virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o
+47 -11
drivers/virtio/virtio_pci_common.c
··· 24 24 "Force legacy mode for transitional virtio 1 devices"); 25 25 #endif 26 26 27 - /* wait for pending irq handlers */ 28 - void vp_synchronize_vectors(struct virtio_device *vdev) 27 + /* disable irq handlers */ 28 + void vp_disable_cbs(struct virtio_device *vdev) 29 29 { 30 30 struct virtio_pci_device *vp_dev = to_vp_device(vdev); 31 31 int i; 32 32 33 - if (vp_dev->intx_enabled) 33 + if (vp_dev->intx_enabled) { 34 + /* 35 + * The below synchronize() guarantees that any 36 + * interrupt for this line arriving after 37 + * synchronize_irq() has completed is guaranteed to see 38 + * intx_soft_enabled == false. 39 + */ 40 + WRITE_ONCE(vp_dev->intx_soft_enabled, false); 34 41 synchronize_irq(vp_dev->pci_dev->irq); 42 + } 35 43 36 44 for (i = 0; i < vp_dev->msix_vectors; ++i) 37 - synchronize_irq(pci_irq_vector(vp_dev->pci_dev, i)); 45 + disable_irq(pci_irq_vector(vp_dev->pci_dev, i)); 46 + } 47 + 48 + /* enable irq handlers */ 49 + void vp_enable_cbs(struct virtio_device *vdev) 50 + { 51 + struct virtio_pci_device *vp_dev = to_vp_device(vdev); 52 + int i; 53 + 54 + if (vp_dev->intx_enabled) { 55 + disable_irq(vp_dev->pci_dev->irq); 56 + /* 57 + * The above disable_irq() provides TSO ordering and 58 + * as such promotes the below store to store-release. 59 + */ 60 + WRITE_ONCE(vp_dev->intx_soft_enabled, true); 61 + enable_irq(vp_dev->pci_dev->irq); 62 + return; 63 + } 64 + 65 + for (i = 0; i < vp_dev->msix_vectors; ++i) 66 + enable_irq(pci_irq_vector(vp_dev->pci_dev, i)); 38 67 } 39 68 40 69 /* the notify function used when creating a virt queue */ ··· 112 83 { 113 84 struct virtio_pci_device *vp_dev = opaque; 114 85 u8 isr; 86 + 87 + if (!READ_ONCE(vp_dev->intx_soft_enabled)) 88 + return IRQ_NONE; 115 89 116 90 /* reading the ISR has the effect of also clearing it so it's very 117 91 * important to save off the value. */ ··· 173 141 snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, 174 142 "%s-config", name); 175 143 err = request_irq(pci_irq_vector(vp_dev->pci_dev, v), 176 - vp_config_changed, 0, vp_dev->msix_names[v], 144 + vp_config_changed, IRQF_NO_AUTOEN, 145 + vp_dev->msix_names[v], 177 146 vp_dev); 178 147 if (err) 179 148 goto error; ··· 193 160 snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, 194 161 "%s-virtqueues", name); 195 162 err = request_irq(pci_irq_vector(vp_dev->pci_dev, v), 196 - vp_vring_interrupt, 0, vp_dev->msix_names[v], 163 + vp_vring_interrupt, IRQF_NO_AUTOEN, 164 + vp_dev->msix_names[v], 197 165 vp_dev); 198 166 if (err) 199 167 goto error; ··· 371 337 "%s-%s", 372 338 dev_name(&vp_dev->vdev.dev), names[i]); 373 339 err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec), 374 - vring_interrupt, 0, 340 + vring_interrupt, IRQF_NO_AUTOEN, 375 341 vp_dev->msix_names[msix_vec], 376 342 vqs[i]); 377 343 if (err) ··· 583 549 584 550 pci_set_master(pci_dev); 585 551 552 + vp_dev->is_legacy = vp_dev->ldev.ioaddr ? true : false; 553 + 586 554 rc = register_virtio_device(&vp_dev->vdev); 587 555 reg_dev = vp_dev; 588 556 if (rc) ··· 593 557 return 0; 594 558 595 559 err_register: 596 - if (vp_dev->ioaddr) 597 - virtio_pci_legacy_remove(vp_dev); 560 + if (vp_dev->is_legacy) 561 + virtio_pci_legacy_remove(vp_dev); 598 562 else 599 - virtio_pci_modern_remove(vp_dev); 563 + virtio_pci_modern_remove(vp_dev); 600 564 err_probe: 601 565 pci_disable_device(pci_dev); 602 566 err_enable_device: ··· 623 587 624 588 unregister_virtio_device(&vp_dev->vdev); 625 589 626 - if (vp_dev->ioaddr) 590 + if (vp_dev->is_legacy) 627 591 virtio_pci_legacy_remove(vp_dev); 628 592 else 629 593 virtio_pci_modern_remove(vp_dev);
+9 -7
drivers/virtio/virtio_pci_common.h
··· 25 25 #include <linux/virtio_config.h> 26 26 #include <linux/virtio_ring.h> 27 27 #include <linux/virtio_pci.h> 28 + #include <linux/virtio_pci_legacy.h> 28 29 #include <linux/virtio_pci_modern.h> 29 30 #include <linux/highmem.h> 30 31 #include <linux/spinlock.h> ··· 45 44 struct virtio_pci_device { 46 45 struct virtio_device vdev; 47 46 struct pci_dev *pci_dev; 47 + struct virtio_pci_legacy_device ldev; 48 48 struct virtio_pci_modern_device mdev; 49 49 50 - /* In legacy mode, these two point to within ->legacy. */ 50 + bool is_legacy; 51 + 51 52 /* Where to read and clear interrupt */ 52 53 u8 __iomem *isr; 53 - 54 - /* Legacy only field */ 55 - /* the IO mapping for the PCI config space */ 56 - void __iomem *ioaddr; 57 54 58 55 /* a list of queues so we can dispatch IRQs */ 59 56 spinlock_t lock; ··· 63 64 /* MSI-X support */ 64 65 int msix_enabled; 65 66 int intx_enabled; 67 + bool intx_soft_enabled; 66 68 cpumask_var_t *msix_affinity_masks; 67 69 /* Name strings for interrupts. This size should be enough, 68 70 * and I'm too lazy to allocate each name separately. */ ··· 102 102 return container_of(vdev, struct virtio_pci_device, vdev); 103 103 } 104 104 105 - /* wait for pending irq handlers */ 106 - void vp_synchronize_vectors(struct virtio_device *vdev); 105 + /* disable irq handlers */ 106 + void vp_disable_cbs(struct virtio_device *vdev); 107 + /* enable irq handlers */ 108 + void vp_enable_cbs(struct virtio_device *vdev); 107 109 /* the notify function used when creating a virt queue */ 108 110 bool vp_notify(struct virtqueue *vq); 109 111 /* the config->del_vqs() implementation */
+30 -76
drivers/virtio/virtio_pci_legacy.c
··· 14 14 * Michael S. Tsirkin <mst@redhat.com> 15 15 */ 16 16 17 + #include "linux/virtio_pci_legacy.h" 17 18 #include "virtio_pci_common.h" 18 19 19 20 /* virtio config->get_features() implementation */ ··· 24 23 25 24 /* When someone needs more than 32 feature bits, we'll need to 26 25 * steal a bit to indicate that the rest are somewhere else. */ 27 - return ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES); 26 + return vp_legacy_get_features(&vp_dev->ldev); 28 27 } 29 28 30 29 /* virtio config->finalize_features() implementation */ ··· 39 38 BUG_ON((u32)vdev->features != vdev->features); 40 39 41 40 /* We only support 32 feature bits. */ 42 - iowrite32(vdev->features, vp_dev->ioaddr + VIRTIO_PCI_GUEST_FEATURES); 41 + vp_legacy_set_features(&vp_dev->ldev, vdev->features); 43 42 44 43 return 0; 45 44 } ··· 49 48 void *buf, unsigned len) 50 49 { 51 50 struct virtio_pci_device *vp_dev = to_vp_device(vdev); 52 - void __iomem *ioaddr = vp_dev->ioaddr + 51 + void __iomem *ioaddr = vp_dev->ldev.ioaddr + 53 52 VIRTIO_PCI_CONFIG_OFF(vp_dev->msix_enabled) + 54 53 offset; 55 54 u8 *ptr = buf; ··· 65 64 const void *buf, unsigned len) 66 65 { 67 66 struct virtio_pci_device *vp_dev = to_vp_device(vdev); 68 - void __iomem *ioaddr = vp_dev->ioaddr + 67 + void __iomem *ioaddr = vp_dev->ldev.ioaddr + 69 68 VIRTIO_PCI_CONFIG_OFF(vp_dev->msix_enabled) + 70 69 offset; 71 70 const u8 *ptr = buf; ··· 79 78 static u8 vp_get_status(struct virtio_device *vdev) 80 79 { 81 80 struct virtio_pci_device *vp_dev = to_vp_device(vdev); 82 - return ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS); 81 + return vp_legacy_get_status(&vp_dev->ldev); 83 82 } 84 83 85 84 static void vp_set_status(struct virtio_device *vdev, u8 status) ··· 87 86 struct virtio_pci_device *vp_dev = to_vp_device(vdev); 88 87 /* We should never be setting status to 0. */ 89 88 BUG_ON(status == 0); 90 - iowrite8(status, vp_dev->ioaddr + VIRTIO_PCI_STATUS); 89 + vp_legacy_set_status(&vp_dev->ldev, status); 91 90 } 92 91 93 92 static void vp_reset(struct virtio_device *vdev) 94 93 { 95 94 struct virtio_pci_device *vp_dev = to_vp_device(vdev); 96 95 /* 0 status means a reset. */ 97 - iowrite8(0, vp_dev->ioaddr + VIRTIO_PCI_STATUS); 96 + vp_legacy_set_status(&vp_dev->ldev, 0); 98 97 /* Flush out the status write, and flush in device writes, 99 98 * including MSi-X interrupts, if any. */ 100 - ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS); 101 - /* Flush pending VQ/configuration callbacks. */ 102 - vp_synchronize_vectors(vdev); 99 + vp_legacy_get_status(&vp_dev->ldev); 100 + /* Disable VQ/configuration callbacks. */ 101 + vp_disable_cbs(vdev); 103 102 } 104 103 105 104 static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector) 106 105 { 107 - /* Setup the vector used for configuration events */ 108 - iowrite16(vector, vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); 109 - /* Verify we had enough resources to assign the vector */ 110 - /* Will also flush the write out to device */ 111 - return ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); 106 + return vp_legacy_config_vector(&vp_dev->ldev, vector); 112 107 } 113 108 114 109 static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, ··· 120 123 int err; 121 124 u64 q_pfn; 122 125 123 - /* Select the queue we're interested in */ 124 - iowrite16(index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); 125 - 126 126 /* Check if queue is either not available or already active. */ 127 - num = ioread16(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NUM); 128 - if (!num || ioread32(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN)) 127 + num = vp_legacy_get_queue_size(&vp_dev->ldev, index); 128 + if (!num || vp_legacy_get_queue_enable(&vp_dev->ldev, index)) 129 129 return ERR_PTR(-ENOENT); 130 130 131 131 info->msix_vector = msix_vec; ··· 145 151 } 146 152 147 153 /* activate the queue */ 148 - iowrite32(q_pfn, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); 154 + vp_legacy_set_queue_address(&vp_dev->ldev, index, q_pfn); 149 155 150 - vq->priv = (void __force *)vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY; 156 + vq->priv = (void __force *)vp_dev->ldev.ioaddr + VIRTIO_PCI_QUEUE_NOTIFY; 151 157 152 158 if (msix_vec != VIRTIO_MSI_NO_VECTOR) { 153 - iowrite16(msix_vec, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); 154 - msix_vec = ioread16(vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); 159 + msix_vec = vp_legacy_queue_vector(&vp_dev->ldev, index, msix_vec); 155 160 if (msix_vec == VIRTIO_MSI_NO_VECTOR) { 156 161 err = -EBUSY; 157 162 goto out_deactivate; ··· 160 167 return vq; 161 168 162 169 out_deactivate: 163 - iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); 170 + vp_legacy_set_queue_address(&vp_dev->ldev, index, 0); 164 171 out_del_vq: 165 172 vring_del_virtqueue(vq); 166 173 return ERR_PTR(err); ··· 171 178 struct virtqueue *vq = info->vq; 172 179 struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); 173 180 174 - iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); 175 - 176 181 if (vp_dev->msix_enabled) { 177 - iowrite16(VIRTIO_MSI_NO_VECTOR, 178 - vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); 182 + vp_legacy_queue_vector(&vp_dev->ldev, vq->index, 183 + VIRTIO_MSI_NO_VECTOR); 179 184 /* Flush the write out to device */ 180 - ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR); 185 + ioread8(vp_dev->ldev.ioaddr + VIRTIO_PCI_ISR); 181 186 } 182 187 183 188 /* Select and deactivate the queue */ 184 - iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); 189 + vp_legacy_set_queue_address(&vp_dev->ldev, vq->index, 0); 185 190 186 191 vring_del_virtqueue(vq); 187 192 } 188 193 189 194 static const struct virtio_config_ops virtio_pci_config_ops = { 195 + .enable_cbs = vp_enable_cbs, 190 196 .get = vp_get, 191 197 .set = vp_set, 192 198 .get_status = vp_get_status, ··· 203 211 /* the PCI probing function */ 204 212 int virtio_pci_legacy_probe(struct virtio_pci_device *vp_dev) 205 213 { 214 + struct virtio_pci_legacy_device *ldev = &vp_dev->ldev; 206 215 struct pci_dev *pci_dev = vp_dev->pci_dev; 207 216 int rc; 208 217 209 - /* We only own devices >= 0x1000 and <= 0x103f: leave the rest. */ 210 - if (pci_dev->device < 0x1000 || pci_dev->device > 0x103f) 211 - return -ENODEV; 218 + ldev->pci_dev = pci_dev; 212 219 213 - if (pci_dev->revision != VIRTIO_PCI_ABI_VERSION) { 214 - printk(KERN_ERR "virtio_pci: expected ABI version %d, got %d\n", 215 - VIRTIO_PCI_ABI_VERSION, pci_dev->revision); 216 - return -ENODEV; 217 - } 218 - 219 - rc = dma_set_mask(&pci_dev->dev, DMA_BIT_MASK(64)); 220 - if (rc) { 221 - rc = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32)); 222 - } else { 223 - /* 224 - * The virtio ring base address is expressed as a 32-bit PFN, 225 - * with a page size of 1 << VIRTIO_PCI_QUEUE_ADDR_SHIFT. 226 - */ 227 - dma_set_coherent_mask(&pci_dev->dev, 228 - DMA_BIT_MASK(32 + VIRTIO_PCI_QUEUE_ADDR_SHIFT)); 229 - } 230 - 231 - if (rc) 232 - dev_warn(&pci_dev->dev, "Failed to enable 64-bit or 32-bit DMA. Trying to continue, but this might not work.\n"); 233 - 234 - rc = pci_request_region(pci_dev, 0, "virtio-pci-legacy"); 220 + rc = vp_legacy_probe(ldev); 235 221 if (rc) 236 222 return rc; 237 223 238 - rc = -ENOMEM; 239 - vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0); 240 - if (!vp_dev->ioaddr) 241 - goto err_iomap; 242 - 243 - vp_dev->isr = vp_dev->ioaddr + VIRTIO_PCI_ISR; 244 - 245 - /* we use the subsystem vendor/device id as the virtio vendor/device 246 - * id. this allows us to use the same PCI vendor/device id for all 247 - * virtio devices and to identify the particular virtio driver by 248 - * the subsystem ids */ 249 - vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor; 250 - vp_dev->vdev.id.device = pci_dev->subsystem_device; 224 + vp_dev->isr = ldev->isr; 225 + vp_dev->vdev.id = ldev->id; 251 226 252 227 vp_dev->vdev.config = &virtio_pci_config_ops; 253 228 ··· 223 264 vp_dev->del_vq = del_vq; 224 265 225 266 return 0; 226 - 227 - err_iomap: 228 - pci_release_region(pci_dev, 0); 229 - return rc; 230 267 } 231 268 232 269 void virtio_pci_legacy_remove(struct virtio_pci_device *vp_dev) 233 270 { 234 - struct pci_dev *pci_dev = vp_dev->pci_dev; 271 + struct virtio_pci_legacy_device *ldev = &vp_dev->ldev; 235 272 236 - pci_iounmap(pci_dev, vp_dev->ioaddr); 237 - pci_release_region(pci_dev, 0); 273 + vp_legacy_remove(ldev); 238 274 }
+220
drivers/virtio/virtio_pci_legacy_dev.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-or-later 2 + 3 + #include "linux/virtio_pci.h" 4 + #include <linux/virtio_pci_legacy.h> 5 + #include <linux/module.h> 6 + #include <linux/pci.h> 7 + 8 + 9 + /* 10 + * vp_legacy_probe: probe the legacy virtio pci device, note that the 11 + * caller is required to enable PCI device before calling this function. 12 + * @ldev: the legacy virtio-pci device 13 + * 14 + * Return 0 on succeed otherwise fail 15 + */ 16 + int vp_legacy_probe(struct virtio_pci_legacy_device *ldev) 17 + { 18 + struct pci_dev *pci_dev = ldev->pci_dev; 19 + int rc; 20 + 21 + /* We only own devices >= 0x1000 and <= 0x103f: leave the rest. */ 22 + if (pci_dev->device < 0x1000 || pci_dev->device > 0x103f) 23 + return -ENODEV; 24 + 25 + if (pci_dev->revision != VIRTIO_PCI_ABI_VERSION) 26 + return -ENODEV; 27 + 28 + rc = dma_set_mask(&pci_dev->dev, DMA_BIT_MASK(64)); 29 + if (rc) { 30 + rc = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(32)); 31 + } else { 32 + /* 33 + * The virtio ring base address is expressed as a 32-bit PFN, 34 + * with a page size of 1 << VIRTIO_PCI_QUEUE_ADDR_SHIFT. 35 + */ 36 + dma_set_coherent_mask(&pci_dev->dev, 37 + DMA_BIT_MASK(32 + VIRTIO_PCI_QUEUE_ADDR_SHIFT)); 38 + } 39 + 40 + if (rc) 41 + dev_warn(&pci_dev->dev, "Failed to enable 64-bit or 32-bit DMA. Trying to continue, but this might not work.\n"); 42 + 43 + rc = pci_request_region(pci_dev, 0, "virtio-pci-legacy"); 44 + if (rc) 45 + return rc; 46 + 47 + ldev->ioaddr = pci_iomap(pci_dev, 0, 0); 48 + if (!ldev->ioaddr) 49 + goto err_iomap; 50 + 51 + ldev->isr = ldev->ioaddr + VIRTIO_PCI_ISR; 52 + 53 + ldev->id.vendor = pci_dev->subsystem_vendor; 54 + ldev->id.device = pci_dev->subsystem_device; 55 + 56 + return 0; 57 + err_iomap: 58 + pci_release_region(pci_dev, 0); 59 + return rc; 60 + } 61 + EXPORT_SYMBOL_GPL(vp_legacy_probe); 62 + 63 + /* 64 + * vp_legacy_probe: remove and cleanup the legacy virtio pci device 65 + * @ldev: the legacy virtio-pci device 66 + */ 67 + void vp_legacy_remove(struct virtio_pci_legacy_device *ldev) 68 + { 69 + struct pci_dev *pci_dev = ldev->pci_dev; 70 + 71 + pci_iounmap(pci_dev, ldev->ioaddr); 72 + pci_release_region(pci_dev, 0); 73 + } 74 + EXPORT_SYMBOL_GPL(vp_legacy_remove); 75 + 76 + /* 77 + * vp_legacy_get_features - get features from device 78 + * @ldev: the legacy virtio-pci device 79 + * 80 + * Returns the features read from the device 81 + */ 82 + u64 vp_legacy_get_features(struct virtio_pci_legacy_device *ldev) 83 + { 84 + 85 + return ioread32(ldev->ioaddr + VIRTIO_PCI_HOST_FEATURES); 86 + } 87 + EXPORT_SYMBOL_GPL(vp_legacy_get_features); 88 + 89 + /* 90 + * vp_legacy_get_driver_features - get driver features from device 91 + * @ldev: the legacy virtio-pci device 92 + * 93 + * Returns the driver features read from the device 94 + */ 95 + u64 vp_legacy_get_driver_features(struct virtio_pci_legacy_device *ldev) 96 + { 97 + return ioread32(ldev->ioaddr + VIRTIO_PCI_GUEST_FEATURES); 98 + } 99 + EXPORT_SYMBOL_GPL(vp_legacy_get_driver_features); 100 + 101 + /* 102 + * vp_legacy_set_features - set features to device 103 + * @ldev: the legacy virtio-pci device 104 + * @features: the features set to device 105 + */ 106 + void vp_legacy_set_features(struct virtio_pci_legacy_device *ldev, 107 + u32 features) 108 + { 109 + iowrite32(features, ldev->ioaddr + VIRTIO_PCI_GUEST_FEATURES); 110 + } 111 + EXPORT_SYMBOL_GPL(vp_legacy_set_features); 112 + 113 + /* 114 + * vp_legacy_get_status - get the device status 115 + * @ldev: the legacy virtio-pci device 116 + * 117 + * Returns the status read from device 118 + */ 119 + u8 vp_legacy_get_status(struct virtio_pci_legacy_device *ldev) 120 + { 121 + return ioread8(ldev->ioaddr + VIRTIO_PCI_STATUS); 122 + } 123 + EXPORT_SYMBOL_GPL(vp_legacy_get_status); 124 + 125 + /* 126 + * vp_legacy_set_status - set status to device 127 + * @ldev: the legacy virtio-pci device 128 + * @status: the status set to device 129 + */ 130 + void vp_legacy_set_status(struct virtio_pci_legacy_device *ldev, 131 + u8 status) 132 + { 133 + iowrite8(status, ldev->ioaddr + VIRTIO_PCI_STATUS); 134 + } 135 + EXPORT_SYMBOL_GPL(vp_legacy_set_status); 136 + 137 + /* 138 + * vp_legacy_queue_vector - set the MSIX vector for a specific virtqueue 139 + * @ldev: the legacy virtio-pci device 140 + * @index: queue index 141 + * @vector: the config vector 142 + * 143 + * Returns the config vector read from the device 144 + */ 145 + u16 vp_legacy_queue_vector(struct virtio_pci_legacy_device *ldev, 146 + u16 index, u16 vector) 147 + { 148 + iowrite16(index, ldev->ioaddr + VIRTIO_PCI_QUEUE_SEL); 149 + iowrite16(vector, ldev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); 150 + /* Flush the write out to device */ 151 + return ioread16(ldev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR); 152 + } 153 + EXPORT_SYMBOL_GPL(vp_legacy_queue_vector); 154 + 155 + /* 156 + * vp_legacy_config_vector - set the vector for config interrupt 157 + * @ldev: the legacy virtio-pci device 158 + * @vector: the config vector 159 + * 160 + * Returns the config vector read from the device 161 + */ 162 + u16 vp_legacy_config_vector(struct virtio_pci_legacy_device *ldev, 163 + u16 vector) 164 + { 165 + /* Setup the vector used for configuration events */ 166 + iowrite16(vector, ldev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); 167 + /* Verify we had enough resources to assign the vector */ 168 + /* Will also flush the write out to device */ 169 + return ioread16(ldev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR); 170 + } 171 + EXPORT_SYMBOL_GPL(vp_legacy_config_vector); 172 + 173 + /* 174 + * vp_legacy_set_queue_address - set the virtqueue address 175 + * @ldev: the legacy virtio-pci device 176 + * @index: the queue index 177 + * @queue_pfn: pfn of the virtqueue 178 + */ 179 + void vp_legacy_set_queue_address(struct virtio_pci_legacy_device *ldev, 180 + u16 index, u32 queue_pfn) 181 + { 182 + iowrite16(index, ldev->ioaddr + VIRTIO_PCI_QUEUE_SEL); 183 + iowrite32(queue_pfn, ldev->ioaddr + VIRTIO_PCI_QUEUE_PFN); 184 + } 185 + EXPORT_SYMBOL_GPL(vp_legacy_set_queue_address); 186 + 187 + /* 188 + * vp_legacy_get_queue_enable - enable a virtqueue 189 + * @ldev: the legacy virtio-pci device 190 + * @index: the queue index 191 + * 192 + * Returns whether a virtqueue is enabled or not 193 + */ 194 + bool vp_legacy_get_queue_enable(struct virtio_pci_legacy_device *ldev, 195 + u16 index) 196 + { 197 + iowrite16(index, ldev->ioaddr + VIRTIO_PCI_QUEUE_SEL); 198 + return ioread32(ldev->ioaddr + VIRTIO_PCI_QUEUE_PFN); 199 + } 200 + EXPORT_SYMBOL_GPL(vp_legacy_get_queue_enable); 201 + 202 + /* 203 + * vp_legacy_get_queue_size - get size for a virtqueue 204 + * @ldev: the legacy virtio-pci device 205 + * @index: the queue index 206 + * 207 + * Returns the size of the virtqueue 208 + */ 209 + u16 vp_legacy_get_queue_size(struct virtio_pci_legacy_device *ldev, 210 + u16 index) 211 + { 212 + iowrite16(index, ldev->ioaddr + VIRTIO_PCI_QUEUE_SEL); 213 + return ioread16(ldev->ioaddr + VIRTIO_PCI_QUEUE_NUM); 214 + } 215 + EXPORT_SYMBOL_GPL(vp_legacy_get_queue_size); 216 + 217 + MODULE_VERSION("0.1"); 218 + MODULE_DESCRIPTION("Legacy Virtio PCI Device"); 219 + MODULE_AUTHOR("Wu Zongyong <wuzongyong@linux.alibaba.com>"); 220 + MODULE_LICENSE("GPL");
+4 -2
drivers/virtio/virtio_pci_modern.c
··· 172 172 */ 173 173 while (vp_modern_get_status(mdev)) 174 174 msleep(1); 175 - /* Flush pending VQ/configuration callbacks. */ 176 - vp_synchronize_vectors(vdev); 175 + /* Disable VQ/configuration callbacks. */ 176 + vp_disable_cbs(vdev); 177 177 } 178 178 179 179 static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector) ··· 380 380 } 381 381 382 382 static const struct virtio_config_ops virtio_pci_config_nodev_ops = { 383 + .enable_cbs = vp_enable_cbs, 383 384 .get = NULL, 384 385 .set = NULL, 385 386 .generation = vp_generation, ··· 398 397 }; 399 398 400 399 static const struct virtio_config_ops virtio_pci_config_ops = { 400 + .enable_cbs = vp_enable_cbs, 401 401 .get = vp_get, 402 402 .set = vp_set, 403 403 .generation = vp_generation,
+79 -11
drivers/virtio/virtio_ring.c
··· 14 14 #include <linux/spinlock.h> 15 15 #include <xen/xen.h> 16 16 17 + static bool force_used_validation = false; 18 + module_param(force_used_validation, bool, 0444); 19 + 17 20 #ifdef DEBUG 18 21 /* For development, we want to crash whenever the ring is screwed. */ 19 22 #define BAD_RING(_vq, fmt, args...) \ ··· 82 79 }; 83 80 84 81 struct vring_desc_extra { 85 - dma_addr_t addr; /* Buffer DMA addr. */ 86 - u32 len; /* Buffer length. */ 82 + dma_addr_t addr; /* Descriptor DMA addr. */ 83 + u32 len; /* Descriptor length. */ 87 84 u16 flags; /* Descriptor flags. */ 88 85 u16 next; /* The next desc state in a list. */ 89 86 }; ··· 184 181 size_t event_size_in_bytes; 185 182 } packed; 186 183 }; 184 + 185 + /* Per-descriptor in buffer length */ 186 + u32 *buflen; 187 187 188 188 /* How to notify other side. FIXME: commonalize hcalls! */ 189 189 bool (*notify)(struct virtqueue *vq); ··· 496 490 unsigned int i, n, avail, descs_used, prev, err_idx; 497 491 int head; 498 492 bool indirect; 493 + u32 buflen = 0; 499 494 500 495 START_USE(vq); 501 496 ··· 578 571 VRING_DESC_F_NEXT | 579 572 VRING_DESC_F_WRITE, 580 573 indirect); 574 + buflen += sg->length; 581 575 } 582 576 } 583 577 /* Last one doesn't continue. */ ··· 617 609 vq->split.desc_state[head].indir_desc = desc; 618 610 else 619 611 vq->split.desc_state[head].indir_desc = ctx; 612 + 613 + /* Store in buffer length if necessary */ 614 + if (vq->buflen) 615 + vq->buflen[head] = buflen; 620 616 621 617 /* Put entry in available array (but don't update avail->idx until they 622 618 * do sync). */ ··· 794 782 } 795 783 if (unlikely(!vq->split.desc_state[i].data)) { 796 784 BAD_RING(vq, "id %u is not a head!\n", i); 785 + return NULL; 786 + } 787 + if (vq->buflen && unlikely(*len > vq->buflen[i])) { 788 + BAD_RING(vq, "used len %d is larger than in buflen %u\n", 789 + *len, vq->buflen[i]); 797 790 return NULL; 798 791 } 799 792 ··· 1067 1050 } 1068 1051 1069 1052 static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, 1070 - struct scatterlist *sgs[], 1071 - unsigned int total_sg, 1072 - unsigned int out_sgs, 1073 - unsigned int in_sgs, 1074 - void *data, 1075 - gfp_t gfp) 1053 + struct scatterlist *sgs[], 1054 + unsigned int total_sg, 1055 + unsigned int out_sgs, 1056 + unsigned int in_sgs, 1057 + void *data, 1058 + gfp_t gfp) 1076 1059 { 1077 1060 struct vring_packed_desc *desc; 1078 1061 struct scatterlist *sg; 1079 1062 unsigned int i, n, err_idx; 1080 1063 u16 head, id; 1081 1064 dma_addr_t addr; 1065 + u32 buflen = 0; 1082 1066 1083 1067 head = vq->packed.next_avail_idx; 1084 1068 desc = alloc_indirect_packed(total_sg, gfp); 1069 + if (!desc) 1070 + return -ENOMEM; 1085 1071 1086 1072 if (unlikely(vq->vq.num_free < 1)) { 1087 1073 pr_debug("Can't add buf len 1 - avail = 0\n"); ··· 1109 1089 desc[i].addr = cpu_to_le64(addr); 1110 1090 desc[i].len = cpu_to_le32(sg->length); 1111 1091 i++; 1092 + if (n >= out_sgs) 1093 + buflen += sg->length; 1112 1094 } 1113 1095 } 1114 1096 ··· 1164 1142 vq->packed.desc_state[id].indir_desc = desc; 1165 1143 vq->packed.desc_state[id].last = id; 1166 1144 1145 + /* Store in buffer length if necessary */ 1146 + if (vq->buflen) 1147 + vq->buflen[id] = buflen; 1148 + 1167 1149 vq->num_added += 1; 1168 1150 1169 1151 pr_debug("Added buffer head %i to %p\n", head, vq); ··· 1202 1176 unsigned int i, n, c, descs_used, err_idx; 1203 1177 __le16 head_flags, flags; 1204 1178 u16 head, id, prev, curr, avail_used_flags; 1179 + int err; 1180 + u32 buflen = 0; 1205 1181 1206 1182 START_USE(vq); 1207 1183 ··· 1219 1191 1220 1192 BUG_ON(total_sg == 0); 1221 1193 1222 - if (virtqueue_use_indirect(_vq, total_sg)) 1223 - return virtqueue_add_indirect_packed(vq, sgs, total_sg, 1224 - out_sgs, in_sgs, data, gfp); 1194 + if (virtqueue_use_indirect(_vq, total_sg)) { 1195 + err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, 1196 + in_sgs, data, gfp); 1197 + if (err != -ENOMEM) 1198 + return err; 1199 + 1200 + /* fall back on direct */ 1201 + } 1225 1202 1226 1203 head = vq->packed.next_avail_idx; 1227 1204 avail_used_flags = vq->packed.avail_used_flags; ··· 1283 1250 1 << VRING_PACKED_DESC_F_AVAIL | 1284 1251 1 << VRING_PACKED_DESC_F_USED; 1285 1252 } 1253 + if (n >= out_sgs) 1254 + buflen += sg->length; 1286 1255 } 1287 1256 } 1288 1257 ··· 1303 1268 vq->packed.desc_state[id].data = data; 1304 1269 vq->packed.desc_state[id].indir_desc = ctx; 1305 1270 vq->packed.desc_state[id].last = prev; 1271 + 1272 + /* Store in buffer length if necessary */ 1273 + if (vq->buflen) 1274 + vq->buflen[id] = buflen; 1306 1275 1307 1276 /* 1308 1277 * A driver MUST NOT make the first descriptor in the list ··· 1492 1453 } 1493 1454 if (unlikely(!vq->packed.desc_state[id].data)) { 1494 1455 BAD_RING(vq, "id %u is not a head!\n", id); 1456 + return NULL; 1457 + } 1458 + if (vq->buflen && unlikely(*len > vq->buflen[id])) { 1459 + BAD_RING(vq, "used len %d is larger than in buflen %u\n", 1460 + *len, vq->buflen[id]); 1495 1461 return NULL; 1496 1462 } 1497 1463 ··· 1704 1660 struct vring_virtqueue *vq; 1705 1661 struct vring_packed_desc *ring; 1706 1662 struct vring_packed_desc_event *driver, *device; 1663 + struct virtio_driver *drv = drv_to_virtio(vdev->dev.driver); 1707 1664 dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr; 1708 1665 size_t ring_size_in_bytes, event_size_in_bytes; 1709 1666 ··· 1794 1749 if (!vq->packed.desc_extra) 1795 1750 goto err_desc_extra; 1796 1751 1752 + if (!drv->suppress_used_validation || force_used_validation) { 1753 + vq->buflen = kmalloc_array(num, sizeof(*vq->buflen), 1754 + GFP_KERNEL); 1755 + if (!vq->buflen) 1756 + goto err_buflen; 1757 + } else { 1758 + vq->buflen = NULL; 1759 + } 1760 + 1797 1761 /* No callback? Tell other side not to bother us. */ 1798 1762 if (!callback) { 1799 1763 vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; ··· 1815 1761 spin_unlock(&vdev->vqs_list_lock); 1816 1762 return &vq->vq; 1817 1763 1764 + err_buflen: 1765 + kfree(vq->packed.desc_extra); 1818 1766 err_desc_extra: 1819 1767 kfree(vq->packed.desc_state); 1820 1768 err_desc_state: ··· 2224 2168 void (*callback)(struct virtqueue *), 2225 2169 const char *name) 2226 2170 { 2171 + struct virtio_driver *drv = drv_to_virtio(vdev->dev.driver); 2227 2172 struct vring_virtqueue *vq; 2228 2173 2229 2174 if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) ··· 2284 2227 if (!vq->split.desc_extra) 2285 2228 goto err_extra; 2286 2229 2230 + if (!drv->suppress_used_validation || force_used_validation) { 2231 + vq->buflen = kmalloc_array(vring.num, sizeof(*vq->buflen), 2232 + GFP_KERNEL); 2233 + if (!vq->buflen) 2234 + goto err_buflen; 2235 + } else { 2236 + vq->buflen = NULL; 2237 + } 2238 + 2287 2239 /* Put everything in free lists. */ 2288 2240 vq->free_head = 0; 2289 2241 memset(vq->split.desc_state, 0, vring.num * ··· 2303 2237 spin_unlock(&vdev->vqs_list_lock); 2304 2238 return &vq->vq; 2305 2239 2240 + err_buflen: 2241 + kfree(vq->split.desc_extra); 2306 2242 err_extra: 2307 2243 kfree(vq->split.desc_state); 2308 2244 err_state:
+12 -7
drivers/virtio/virtio_vdpa.c
··· 65 65 const void *buf, unsigned len) 66 66 { 67 67 struct vdpa_device *vdpa = vd_get_vdpa(vdev); 68 - const struct vdpa_config_ops *ops = vdpa->config; 69 68 70 - ops->set_config(vdpa, offset, buf, len); 69 + vdpa_set_config(vdpa, offset, buf, len); 71 70 } 72 71 73 72 static u32 virtio_vdpa_generation(struct virtio_device *vdev) ··· 144 145 /* Assume split virtqueue, switch to packed if necessary */ 145 146 struct vdpa_vq_state state = {0}; 146 147 unsigned long flags; 147 - u32 align, num; 148 + u32 align, max_num, min_num = 1; 149 + bool may_reduce_num = true; 148 150 int err; 149 151 150 152 if (!name) ··· 163 163 if (!info) 164 164 return ERR_PTR(-ENOMEM); 165 165 166 - num = ops->get_vq_num_max(vdpa); 167 - if (num == 0) { 166 + max_num = ops->get_vq_num_max(vdpa); 167 + if (max_num == 0) { 168 168 err = -ENOENT; 169 169 goto error_new_virtqueue; 170 170 } 171 171 172 + if (ops->get_vq_num_min) 173 + min_num = ops->get_vq_num_min(vdpa); 174 + 175 + may_reduce_num = (max_num == min_num) ? false : true; 176 + 172 177 /* Create the vring */ 173 178 align = ops->get_vq_align(vdpa); 174 - vq = vring_create_virtqueue(index, num, align, vdev, 175 - true, true, ctx, 179 + vq = vring_create_virtqueue(index, max_num, align, vdev, 180 + true, may_reduce_num, ctx, 176 181 virtio_vdpa_notify, callback, name); 177 182 if (!vq) { 178 183 err = -ENOMEM;
+35 -18
include/linux/vdpa.h
··· 6 6 #include <linux/device.h> 7 7 #include <linux/interrupt.h> 8 8 #include <linux/vhost_iotlb.h> 9 + #include <linux/virtio_net.h> 10 + #include <linux/if_ether.h> 9 11 10 12 /** 11 13 * struct vdpa_calllback - vDPA callback definition. ··· 65 63 * @dev: underlying device 66 64 * @dma_dev: the actual device that is performing DMA 67 65 * @config: the configuration ops for this device. 66 + * @cf_mutex: Protects get and set access to configuration layout. 68 67 * @index: device index 69 68 * @features_valid: were features initialized? for legacy guests 70 69 * @use_va: indicate whether virtual address must be used by this device ··· 77 74 struct device dev; 78 75 struct device *dma_dev; 79 76 const struct vdpa_config_ops *config; 77 + struct mutex cf_mutex; /* Protects get/set config */ 80 78 unsigned int index; 81 79 bool features_valid; 82 80 bool use_va; ··· 93 89 struct vdpa_iova_range { 94 90 u64 first; 95 91 u64 last; 92 + }; 93 + 94 + struct vdpa_dev_set_config { 95 + struct { 96 + u8 mac[ETH_ALEN]; 97 + u16 mtu; 98 + } net; 99 + u64 mask; 96 100 }; 97 101 98 102 /** ··· 183 171 * @get_vq_num_max: Get the max size of virtqueue 184 172 * @vdev: vdpa device 185 173 * Returns u16: max size of virtqueue 174 + * @get_vq_num_min: Get the min size of virtqueue (optional) 175 + * @vdev: vdpa device 176 + * Returns u16: min size of virtqueue 186 177 * @get_device_id: Get virtio device id 187 178 * @vdev: vdpa device 188 179 * Returns u32: virtio device id ··· 272 257 struct vdpa_notification_area 273 258 (*get_vq_notification)(struct vdpa_device *vdev, u16 idx); 274 259 /* vq irq is not expected to be changed once DRIVER_OK is set */ 275 - int (*get_vq_irq)(struct vdpa_device *vdv, u16 idx); 260 + int (*get_vq_irq)(struct vdpa_device *vdev, u16 idx); 276 261 277 262 /* Device ops */ 278 263 u32 (*get_vq_align)(struct vdpa_device *vdev); ··· 281 266 void (*set_config_cb)(struct vdpa_device *vdev, 282 267 struct vdpa_callback *cb); 283 268 u16 (*get_vq_num_max)(struct vdpa_device *vdev); 269 + u16 (*get_vq_num_min)(struct vdpa_device *vdev); 284 270 u32 (*get_device_id)(struct vdpa_device *vdev); 285 271 u32 (*get_vendor_id)(struct vdpa_device *vdev); 286 272 u8 (*get_status)(struct vdpa_device *vdev); ··· 398 382 return ops->set_features(vdev, features); 399 383 } 400 384 401 - static inline void vdpa_get_config(struct vdpa_device *vdev, 402 - unsigned int offset, void *buf, 403 - unsigned int len) 404 - { 405 - const struct vdpa_config_ops *ops = vdev->config; 406 - 407 - /* 408 - * Config accesses aren't supposed to trigger before features are set. 409 - * If it does happen we assume a legacy guest. 410 - */ 411 - if (!vdev->features_valid) 412 - vdpa_set_features(vdev, 0); 413 - ops->get_config(vdev, offset, buf, len); 414 - } 415 - 385 + void vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, 386 + void *buf, unsigned int len); 387 + void vdpa_set_config(struct vdpa_device *dev, unsigned int offset, 388 + const void *buf, unsigned int length); 416 389 /** 417 390 * struct vdpa_mgmtdev_ops - vdpa device ops 418 391 * @dev_add: Add a vdpa device using alloc and register 419 392 * @mdev: parent device to use for device addition 420 393 * @name: name of the new vdpa device 394 + * @config: config attributes to apply to the device under creation 421 395 * Driver need to add a new device using _vdpa_register_device() 422 396 * after fully initializing the vdpa device. Driver must return 0 423 397 * on success or appropriate error code. ··· 418 412 * _vdpa_unregister_device(). 419 413 */ 420 414 struct vdpa_mgmtdev_ops { 421 - int (*dev_add)(struct vdpa_mgmt_dev *mdev, const char *name); 415 + int (*dev_add)(struct vdpa_mgmt_dev *mdev, const char *name, 416 + const struct vdpa_dev_set_config *config); 422 417 void (*dev_del)(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev); 423 418 }; 424 419 420 + /** 421 + * struct vdpa_mgmt_dev - vdpa management device 422 + * @device: Management parent device 423 + * @ops: operations supported by management device 424 + * @id_table: Pointer to device id table of supported ids 425 + * @config_attr_mask: bit mask of attributes of type enum vdpa_attr that 426 + * management device support during dev_add callback 427 + * @list: list entry 428 + */ 425 429 struct vdpa_mgmt_dev { 426 430 struct device *device; 427 431 const struct vdpa_mgmtdev_ops *ops; 428 - const struct virtio_device_id *id_table; /* supported ids */ 432 + const struct virtio_device_id *id_table; 433 + u64 config_attr_mask; 429 434 struct list_head list; 430 435 }; 431 436
+2
include/linux/virtio.h
··· 152 152 * @feature_table_size: number of entries in the feature table array. 153 153 * @feature_table_legacy: same as feature_table but when working in legacy mode. 154 154 * @feature_table_size_legacy: number of entries in feature table legacy array. 155 + * @suppress_used_validation: set to not have core validate used length 155 156 * @probe: the function to call when a device is found. Returns 0 or -errno. 156 157 * @scan: optional function to call after successful probe; intended 157 158 * for virtio-scsi to invoke a scan. ··· 169 168 unsigned int feature_table_size; 170 169 const unsigned int *feature_table_legacy; 171 170 unsigned int feature_table_size_legacy; 171 + bool suppress_used_validation; 172 172 int (*validate)(struct virtio_device *dev); 173 173 int (*probe)(struct virtio_device *dev); 174 174 void (*scan)(struct virtio_device *dev);
+6
include/linux/virtio_config.h
··· 23 23 * any of @get/@set, @get_status/@set_status, or @get_features/ 24 24 * @finalize_features are NOT safe to be called from an atomic 25 25 * context. 26 + * @enable_cbs: enable the callbacks 27 + * vdev: the virtio_device 26 28 * @get: read the value of a configuration field 27 29 * vdev: the virtio_device 28 30 * offset: the offset of the configuration field ··· 77 75 */ 78 76 typedef void vq_callback_t(struct virtqueue *); 79 77 struct virtio_config_ops { 78 + void (*enable_cbs)(struct virtio_device *vdev); 80 79 void (*get)(struct virtio_device *vdev, unsigned offset, 81 80 void *buf, unsigned len); 82 81 void (*set)(struct virtio_device *vdev, unsigned offset, ··· 231 228 void virtio_device_ready(struct virtio_device *dev) 232 229 { 233 230 unsigned status = dev->config->get_status(dev); 231 + 232 + if (dev->config->enable_cbs) 233 + dev->config->enable_cbs(dev); 234 234 235 235 BUG_ON(status & VIRTIO_CONFIG_S_DRIVER_OK); 236 236 dev->config->set_status(dev, status | VIRTIO_CONFIG_S_DRIVER_OK);
+42
include/linux/virtio_pci_legacy.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef _LINUX_VIRTIO_PCI_LEGACY_H 3 + #define _LINUX_VIRTIO_PCI_LEGACY_H 4 + 5 + #include "linux/mod_devicetable.h" 6 + #include <linux/pci.h> 7 + #include <linux/virtio_pci.h> 8 + 9 + struct virtio_pci_legacy_device { 10 + struct pci_dev *pci_dev; 11 + 12 + /* Where to read and clear interrupt */ 13 + u8 __iomem *isr; 14 + /* The IO mapping for the PCI config space (legacy mode only) */ 15 + void __iomem *ioaddr; 16 + 17 + struct virtio_device_id id; 18 + }; 19 + 20 + u64 vp_legacy_get_features(struct virtio_pci_legacy_device *ldev); 21 + u64 vp_legacy_get_driver_features(struct virtio_pci_legacy_device *ldev); 22 + void vp_legacy_set_features(struct virtio_pci_legacy_device *ldev, 23 + u32 features); 24 + u8 vp_legacy_get_status(struct virtio_pci_legacy_device *ldev); 25 + void vp_legacy_set_status(struct virtio_pci_legacy_device *ldev, 26 + u8 status); 27 + u16 vp_legacy_queue_vector(struct virtio_pci_legacy_device *ldev, 28 + u16 idx, u16 vector); 29 + u16 vp_legacy_config_vector(struct virtio_pci_legacy_device *ldev, 30 + u16 vector); 31 + void vp_legacy_set_queue_address(struct virtio_pci_legacy_device *ldev, 32 + u16 index, u32 queue_pfn); 33 + bool vp_legacy_get_queue_enable(struct virtio_pci_legacy_device *ldev, 34 + u16 idx); 35 + void vp_legacy_set_queue_size(struct virtio_pci_legacy_device *ldev, 36 + u16 idx, u16 size); 37 + u16 vp_legacy_get_queue_size(struct virtio_pci_legacy_device *ldev, 38 + u16 idx); 39 + int vp_legacy_probe(struct virtio_pci_legacy_device *ldev); 40 + void vp_legacy_remove(struct virtio_pci_legacy_device *ldev); 41 + 42 + #endif
+7
include/uapi/linux/vdpa.h
··· 17 17 VDPA_CMD_DEV_NEW, 18 18 VDPA_CMD_DEV_DEL, 19 19 VDPA_CMD_DEV_GET, /* can dump */ 20 + VDPA_CMD_DEV_CONFIG_GET, /* can dump */ 20 21 }; 21 22 22 23 enum vdpa_attr { ··· 33 32 VDPA_ATTR_DEV_VENDOR_ID, /* u32 */ 34 33 VDPA_ATTR_DEV_MAX_VQS, /* u32 */ 35 34 VDPA_ATTR_DEV_MAX_VQ_SIZE, /* u16 */ 35 + VDPA_ATTR_DEV_MIN_VQ_SIZE, /* u16 */ 36 + 37 + VDPA_ATTR_DEV_NET_CFG_MACADDR, /* binary */ 38 + VDPA_ATTR_DEV_NET_STATUS, /* u8 */ 39 + VDPA_ATTR_DEV_NET_CFG_MAX_VQP, /* u16 */ 40 + VDPA_ATTR_DEV_NET_CFG_MTU, /* u16 */ 36 41 37 42 /* new attributes must be added above here */ 38 43 VDPA_ATTR_MAX,
+6
include/uapi/linux/virtio_i2c.h
··· 11 11 #include <linux/const.h> 12 12 #include <linux/types.h> 13 13 14 + /* Virtio I2C Feature bits */ 15 + #define VIRTIO_I2C_F_ZERO_LENGTH_REQUEST 0 16 + 14 17 /* The bit 0 of the @virtio_i2c_out_hdr.@flags, used to group the requests */ 15 18 #define VIRTIO_I2C_FLAGS_FAIL_NEXT _BITUL(0) 19 + 20 + /* The bit 1 of the @virtio_i2c_out_hdr.@flags, used to mark a buffer as read */ 21 + #define VIRTIO_I2C_FLAGS_M_RD _BITUL(1) 16 22 17 23 /** 18 24 * struct virtio_i2c_out_hdr - the virtio I2C message OUT header