Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Intel MIC Host Driver Changes for Virtio Devices.

This patch introduces the host "Virtio over PCIe" interface for
Intel MIC. It allows creating user space backends on the host and instantiating
virtio devices for them on the Intel MIC card. It uses the existing VRINGH
infrastructure in the kernel to access virtio rings from the host. A character
device per MIC is exposed with IOCTL, mmap and poll callbacks. This allows the
user space backend to:
(a) add/remove a virtio device via a device page.
(b) map (R/O) virtio rings and device page to user space.
(c) poll for availability of data.
(d) copy a descriptor or entire descriptor chain to/from the card.
(e) modify virtio configuration.
(f) handle virtio device reset.
The buffers are copied over using CPU copies for this initial patch
and host initiated MIC DMA support is planned for future patches.
The avail and desc virtio rings are in host memory and the used ring
is in card memory to maximize writes across PCIe for performance.

Co-author: Sudeep Dutt <sudeep.dutt@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: Caz Yokoyama <Caz.Yokoyama@intel.com>
Signed-off-by: Dasaratharaman Chandramouli <dasaratharaman.chandramouli@intel.com>
Signed-off-by: Nikhil Rao <nikhil.rao@intel.com>
Signed-off-by: Harshavardhan R Kharche <harshavardhan.r.kharche@intel.com>
Signed-off-by: Sudeep Dutt <sudeep.dutt@intel.com>
Acked-by: Yaozu (Eddie) Dong <eddie.dong@intel.com>
Reviewed-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

authored by

Ashutosh Dixit and committed by
Greg Kroah-Hartman
f69bcbf3 aa27badd

+1517 -2
+1
drivers/misc/mic/Kconfig
··· 3 3 config INTEL_MIC_HOST 4 4 tristate "Intel MIC Host Driver" 5 5 depends on 64BIT && PCI 6 + select VHOST_RING 6 7 default N 7 8 help 8 9 This enables Host Driver support for the Intel Many Integrated
+7
drivers/misc/mic/common/mic_device.h
··· 41 41 #define MIC_DPLO_SPAD 14 42 42 #define MIC_DPHI_SPAD 15 43 43 44 + /* 45 + * These values are supposed to be in the config_change field of the 46 + * device page when the host sends a config change interrupt to the card. 47 + */ 48 + #define MIC_VIRTIO_PARAM_DEV_REMOVE 0x1 49 + #define MIC_VIRTIO_PARAM_CONFIG_CHANGED 0x2 50 + 44 51 #endif
+2
drivers/misc/mic/host/Makefile
··· 10 10 mic_host-objs += mic_intr.o 11 11 mic_host-objs += mic_boot.o 12 12 mic_host-objs += mic_debugfs.o 13 + mic_host-objs += mic_fops.o 14 + mic_host-objs += mic_virtio.o
+2 -1
drivers/misc/mic/host/mic_boot.c
··· 20 20 */ 21 21 #include <linux/delay.h> 22 22 #include <linux/firmware.h> 23 - #include <linux/interrupt.h> 24 23 25 24 #include <linux/mic_common.h> 26 25 #include "../common/mic_device.h" 27 26 #include "mic_device.h" 28 27 #include "mic_smpt.h" 28 + #include "mic_virtio.h" 29 29 30 30 /** 31 31 * mic_reset - Reset the MIC device. ··· 117 117 { 118 118 mutex_lock(&mdev->mic_mutex); 119 119 if (MIC_OFFLINE != mdev->state || force) { 120 + mic_virtio_reset_devices(mdev); 120 121 mic_bootparam_init(mdev); 121 122 mic_reset(mdev); 122 123 if (MIC_RESET_FAILED == mdev->state)
+140
drivers/misc/mic/host/mic_debugfs.c
··· 26 26 #include "../common/mic_device.h" 27 27 #include "mic_device.h" 28 28 #include "mic_smpt.h" 29 + #include "mic_virtio.h" 29 30 30 31 /* Debugfs parent dir */ 31 32 static struct dentry *mic_dbg; ··· 194 193 static int mic_dp_show(struct seq_file *s, void *pos) 195 194 { 196 195 struct mic_device *mdev = s->private; 196 + struct mic_device_desc *d; 197 + struct mic_device_ctrl *dc; 198 + struct mic_vqconfig *vqconfig; 199 + __u32 *features; 200 + __u8 *config; 197 201 struct mic_bootparam *bootparam = mdev->dp; 202 + int i, j; 198 203 199 204 seq_printf(s, "Bootparam: magic 0x%x\n", 200 205 bootparam->magic); ··· 214 207 bootparam->shutdown_status); 215 208 seq_printf(s, "Bootparam: shutdown_card %d\n", 216 209 bootparam->shutdown_card); 210 + 211 + for (i = sizeof(*bootparam); i < MIC_DP_SIZE; 212 + i += mic_total_desc_size(d)) { 213 + d = mdev->dp + i; 214 + dc = (void *)d + mic_aligned_desc_size(d); 215 + 216 + /* end of list */ 217 + if (d->type == 0) 218 + break; 219 + 220 + if (d->type == -1) 221 + continue; 222 + 223 + seq_printf(s, "Type %d ", d->type); 224 + seq_printf(s, "Num VQ %d ", d->num_vq); 225 + seq_printf(s, "Feature Len %d\n", d->feature_len); 226 + seq_printf(s, "Config Len %d ", d->config_len); 227 + seq_printf(s, "Shutdown Status %d\n", d->status); 228 + 229 + for (j = 0; j < d->num_vq; j++) { 230 + vqconfig = mic_vq_config(d) + j; 231 + seq_printf(s, "vqconfig[%d]: ", j); 232 + seq_printf(s, "address 0x%llx ", vqconfig->address); 233 + seq_printf(s, "num %d ", vqconfig->num); 234 + seq_printf(s, "used address 0x%llx\n", 235 + vqconfig->used_address); 236 + } 237 + 238 + features = (__u32 *) mic_vq_features(d); 239 + seq_printf(s, "Features: Host 0x%x ", features[0]); 240 + seq_printf(s, "Guest 0x%x\n", features[1]); 241 + 242 + config = mic_vq_configspace(d); 243 + for (j = 0; j < d->config_len; j++) 244 + seq_printf(s, "config[%d]=%d\n", j, config[j]); 245 + 246 + seq_puts(s, "Device control:\n"); 247 + seq_printf(s, "Config Change %d ", dc->config_change); 248 + seq_printf(s, "Vdev reset %d\n", dc->vdev_reset); 249 + seq_printf(s, "Guest Ack %d ", dc->guest_ack); 250 + seq_printf(s, "Host ack %d\n", dc->host_ack); 251 + seq_printf(s, "Used address updated %d ", 252 + dc->used_address_updated); 253 + seq_printf(s, "Vdev 0x%llx\n", dc->vdev); 254 + seq_printf(s, "c2h doorbell %d ", dc->c2h_vdev_db); 255 + seq_printf(s, "h2c doorbell %d\n", dc->h2c_vdev_db); 256 + } 217 257 218 258 return 0; 219 259 } ··· 281 227 .read = seq_read, 282 228 .llseek = seq_lseek, 283 229 .release = mic_dp_debug_release 230 + }; 231 + 232 + static int mic_vdev_info_show(struct seq_file *s, void *unused) 233 + { 234 + struct mic_device *mdev = s->private; 235 + struct list_head *pos, *tmp; 236 + struct mic_vdev *mvdev; 237 + int i, j; 238 + 239 + mutex_lock(&mdev->mic_mutex); 240 + list_for_each_safe(pos, tmp, &mdev->vdev_list) { 241 + mvdev = list_entry(pos, struct mic_vdev, list); 242 + seq_printf(s, "VDEV type %d state %s in %ld out %ld\n", 243 + mvdev->virtio_id, 244 + mic_vdevup(mvdev) ? "UP" : "DOWN", 245 + mvdev->in_bytes, 246 + mvdev->out_bytes); 247 + for (i = 0; i < MIC_MAX_VRINGS; i++) { 248 + struct vring_desc *desc; 249 + struct vring_avail *avail; 250 + struct vring_used *used; 251 + struct mic_vringh *mvr = &mvdev->mvr[i]; 252 + struct vringh *vrh = &mvr->vrh; 253 + int num = vrh->vring.num; 254 + if (!num) 255 + continue; 256 + desc = vrh->vring.desc; 257 + seq_printf(s, "vring i %d avail_idx %d", 258 + i, mvr->vring.info->avail_idx & (num - 1)); 259 + seq_printf(s, " vring i %d avail_idx %d\n", 260 + i, mvr->vring.info->avail_idx); 261 + seq_printf(s, "vrh i %d weak_barriers %d", 262 + i, vrh->weak_barriers); 263 + seq_printf(s, " last_avail_idx %d last_used_idx %d", 264 + vrh->last_avail_idx, vrh->last_used_idx); 265 + seq_printf(s, " completed %d\n", vrh->completed); 266 + for (j = 0; j < num; j++) { 267 + seq_printf(s, "desc[%d] addr 0x%llx len %d", 268 + j, desc->addr, desc->len); 269 + seq_printf(s, " flags 0x%x next %d\n", 270 + desc->flags, 271 + desc->next); 272 + desc++; 273 + } 274 + avail = vrh->vring.avail; 275 + seq_printf(s, "avail flags 0x%x idx %d\n", 276 + avail->flags, avail->idx & (num - 1)); 277 + seq_printf(s, "avail flags 0x%x idx %d\n", 278 + avail->flags, avail->idx); 279 + for (j = 0; j < num; j++) 280 + seq_printf(s, "avail ring[%d] %d\n", 281 + j, avail->ring[j]); 282 + used = vrh->vring.used; 283 + seq_printf(s, "used flags 0x%x idx %d\n", 284 + used->flags, used->idx & (num - 1)); 285 + seq_printf(s, "used flags 0x%x idx %d\n", 286 + used->flags, used->idx); 287 + for (j = 0; j < num; j++) 288 + seq_printf(s, "used ring[%d] id %d len %d\n", 289 + j, used->ring[j].id, used->ring[j].len); 290 + } 291 + } 292 + mutex_unlock(&mdev->mic_mutex); 293 + 294 + return 0; 295 + } 296 + 297 + static int mic_vdev_info_debug_open(struct inode *inode, struct file *file) 298 + { 299 + return single_open(file, mic_vdev_info_show, inode->i_private); 300 + } 301 + 302 + static int mic_vdev_info_debug_release(struct inode *inode, struct file *file) 303 + { 304 + return single_release(inode, file); 305 + } 306 + 307 + static const struct file_operations vdev_info_ops = { 308 + .owner = THIS_MODULE, 309 + .open = mic_vdev_info_debug_open, 310 + .read = seq_read, 311 + .llseek = seq_lseek, 312 + .release = mic_vdev_info_debug_release 284 313 }; 285 314 286 315 static int mic_msi_irq_info_show(struct seq_file *s, void *pos) ··· 457 320 458 321 debugfs_create_file("dp", 0444, mdev->dbg_dir, 459 322 mdev, &dp_ops); 323 + 324 + debugfs_create_file("vdev_info", 0444, mdev->dbg_dir, 325 + mdev, &vdev_info_ops); 460 326 461 327 debugfs_create_file("msi_irq_info", 0444, mdev->dbg_dir, 462 328 mdev, &msi_irq_info_ops);
+5
drivers/misc/mic/host/mic_device.h
··· 21 21 #ifndef _MIC_DEVICE_H_ 22 22 #define _MIC_DEVICE_H_ 23 23 24 + #include <linux/cdev.h> 24 25 #include <linux/idr.h> 25 26 26 27 #include "mic_intr.h" ··· 81 80 * @dp_dma_addr: virtio device page DMA address. 82 81 * @shutdown_db: shutdown doorbell. 83 82 * @shutdown_cookie: shutdown cookie. 83 + * @cdev: Character device for MIC. 84 + * @vdev_list: list of virtio devices. 84 85 */ 85 86 struct mic_device { 86 87 struct mic_mw mmio; ··· 116 113 dma_addr_t dp_dma_addr; 117 114 int shutdown_db; 118 115 struct mic_irq *shutdown_cookie; 116 + struct cdev cdev; 117 + struct list_head vdev_list; 119 118 }; 120 119 121 120 /**
+221
drivers/misc/mic/host/mic_fops.c
··· 1 + /* 2 + * Intel MIC Platform Software Stack (MPSS) 3 + * 4 + * Copyright(c) 2013 Intel Corporation. 5 + * 6 + * This program is free software; you can redistribute it and/or modify 7 + * it under the terms of the GNU General Public License, version 2, as 8 + * published by the Free Software Foundation. 9 + * 10 + * This program is distributed in the hope that it will be useful, but 11 + * WITHOUT ANY WARRANTY; without even the implied warranty of 12 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 + * General Public License for more details. 14 + * 15 + * The full GNU General Public License is included in this distribution in 16 + * the file called "COPYING". 17 + * 18 + * Intel MIC Host driver. 19 + * 20 + */ 21 + #include <linux/poll.h> 22 + 23 + #include <linux/mic_common.h> 24 + #include "../common/mic_device.h" 25 + #include "mic_device.h" 26 + #include "mic_fops.h" 27 + #include "mic_virtio.h" 28 + 29 + int mic_open(struct inode *inode, struct file *f) 30 + { 31 + struct mic_vdev *mvdev; 32 + struct mic_device *mdev = container_of(inode->i_cdev, 33 + struct mic_device, cdev); 34 + 35 + mvdev = kzalloc(sizeof(*mvdev), GFP_KERNEL); 36 + if (!mvdev) 37 + return -ENOMEM; 38 + 39 + init_waitqueue_head(&mvdev->waitq); 40 + INIT_LIST_HEAD(&mvdev->list); 41 + mvdev->mdev = mdev; 42 + mvdev->virtio_id = -1; 43 + 44 + f->private_data = mvdev; 45 + return 0; 46 + } 47 + 48 + int mic_release(struct inode *inode, struct file *f) 49 + { 50 + struct mic_vdev *mvdev = (struct mic_vdev *)f->private_data; 51 + 52 + if (-1 != mvdev->virtio_id) 53 + mic_virtio_del_device(mvdev); 54 + f->private_data = NULL; 55 + kfree(mvdev); 56 + return 0; 57 + } 58 + 59 + long mic_ioctl(struct file *f, unsigned int cmd, unsigned long arg) 60 + { 61 + struct mic_vdev *mvdev = (struct mic_vdev *)f->private_data; 62 + void __user *argp = (void __user *)arg; 63 + int ret; 64 + 65 + switch (cmd) { 66 + case MIC_VIRTIO_ADD_DEVICE: 67 + { 68 + ret = mic_virtio_add_device(mvdev, argp); 69 + if (ret < 0) { 70 + dev_err(mic_dev(mvdev), 71 + "%s %d errno ret %d\n", 72 + __func__, __LINE__, ret); 73 + return ret; 74 + } 75 + break; 76 + } 77 + case MIC_VIRTIO_COPY_DESC: 78 + { 79 + struct mic_copy_desc copy; 80 + 81 + ret = mic_vdev_inited(mvdev); 82 + if (ret) 83 + return ret; 84 + 85 + if (copy_from_user(&copy, argp, sizeof(copy))) 86 + return -EFAULT; 87 + 88 + dev_dbg(mic_dev(mvdev), 89 + "%s %d === iovcnt 0x%x vr_idx 0x%x update_used %d\n", 90 + __func__, __LINE__, copy.iovcnt, copy.vr_idx, 91 + copy.update_used); 92 + 93 + ret = mic_virtio_copy_desc(mvdev, &copy); 94 + if (ret < 0) { 95 + dev_err(mic_dev(mvdev), 96 + "%s %d errno ret %d\n", 97 + __func__, __LINE__, ret); 98 + return ret; 99 + } 100 + if (copy_to_user( 101 + &((struct mic_copy_desc __user *)argp)->out_len, 102 + &copy.out_len, sizeof(copy.out_len))) { 103 + dev_err(mic_dev(mvdev), "%s %d errno ret %d\n", 104 + __func__, __LINE__, -EFAULT); 105 + return -EFAULT; 106 + } 107 + break; 108 + } 109 + case MIC_VIRTIO_CONFIG_CHANGE: 110 + { 111 + ret = mic_vdev_inited(mvdev); 112 + if (ret) 113 + return ret; 114 + 115 + ret = mic_virtio_config_change(mvdev, argp); 116 + if (ret < 0) { 117 + dev_err(mic_dev(mvdev), 118 + "%s %d errno ret %d\n", 119 + __func__, __LINE__, ret); 120 + return ret; 121 + } 122 + break; 123 + } 124 + default: 125 + return -ENOIOCTLCMD; 126 + }; 127 + return 0; 128 + } 129 + 130 + /* 131 + * We return POLLIN | POLLOUT from poll when new buffers are enqueued, and 132 + * not when previously enqueued buffers may be available. This means that 133 + * in the card->host (TX) path, when userspace is unblocked by poll it 134 + * must drain all available descriptors or it can stall. 135 + */ 136 + unsigned int mic_poll(struct file *f, poll_table *wait) 137 + { 138 + struct mic_vdev *mvdev = (struct mic_vdev *)f->private_data; 139 + int mask = 0; 140 + 141 + poll_wait(f, &mvdev->waitq, wait); 142 + 143 + if (mic_vdev_inited(mvdev)) 144 + mask = POLLERR; 145 + else if (mvdev->poll_wake) { 146 + mvdev->poll_wake = 0; 147 + mask = POLLIN | POLLOUT; 148 + } 149 + 150 + return mask; 151 + } 152 + 153 + static inline int 154 + mic_query_offset(struct mic_vdev *mvdev, unsigned long offset, 155 + unsigned long *size, unsigned long *pa) 156 + { 157 + struct mic_device *mdev = mvdev->mdev; 158 + unsigned long start = MIC_DP_SIZE; 159 + int i; 160 + 161 + /* 162 + * MMAP interface is as follows: 163 + * offset region 164 + * 0x0 virtio device_page 165 + * 0x1000 first vring 166 + * 0x1000 + size of 1st vring second vring 167 + * .... 168 + */ 169 + if (!offset) { 170 + *pa = virt_to_phys(mdev->dp); 171 + *size = MIC_DP_SIZE; 172 + return 0; 173 + } 174 + 175 + for (i = 0; i < mvdev->dd->num_vq; i++) { 176 + struct mic_vringh *mvr = &mvdev->mvr[i]; 177 + if (offset == start) { 178 + *pa = virt_to_phys(mvr->vring.va); 179 + *size = mvr->vring.len; 180 + return 0; 181 + } 182 + start += mvr->vring.len; 183 + } 184 + return -1; 185 + } 186 + 187 + /* 188 + * Maps the device page and virtio rings to user space for readonly access. 189 + */ 190 + int 191 + mic_mmap(struct file *f, struct vm_area_struct *vma) 192 + { 193 + struct mic_vdev *mvdev = (struct mic_vdev *)f->private_data; 194 + unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; 195 + unsigned long pa, size = vma->vm_end - vma->vm_start, size_rem = size; 196 + int i, err; 197 + 198 + err = mic_vdev_inited(mvdev); 199 + if (err) 200 + return err; 201 + 202 + if (vma->vm_flags & VM_WRITE) 203 + return -EACCES; 204 + 205 + while (size_rem) { 206 + i = mic_query_offset(mvdev, offset, &size, &pa); 207 + if (i < 0) 208 + return -EINVAL; 209 + err = remap_pfn_range(vma, vma->vm_start + offset, 210 + pa >> PAGE_SHIFT, size, vma->vm_page_prot); 211 + if (err) 212 + return err; 213 + dev_dbg(mic_dev(mvdev), 214 + "%s %d type %d size 0x%lx off 0x%lx pa 0x%lx vma 0x%lx\n", 215 + __func__, __LINE__, mvdev->virtio_id, size, offset, 216 + pa, vma->vm_start + offset); 217 + size_rem -= size; 218 + offset += size; 219 + } 220 + return 0; 221 + }
+32
drivers/misc/mic/host/mic_fops.h
··· 1 + /* 2 + * Intel MIC Platform Software Stack (MPSS) 3 + * 4 + * Copyright(c) 2013 Intel Corporation. 5 + * 6 + * This program is free software; you can redistribute it and/or modify 7 + * it under the terms of the GNU General Public License, version 2, as 8 + * published by the Free Software Foundation. 9 + * 10 + * This program is distributed in the hope that it will be useful, but 11 + * WITHOUT ANY WARRANTY; without even the implied warranty of 12 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 + * General Public License for more details. 14 + * 15 + * The full GNU General Public License is included in this distribution in 16 + * the file called "COPYING". 17 + * 18 + * Intel MIC Host driver. 19 + * 20 + */ 21 + #ifndef _MIC_FOPS_H_ 22 + #define _MIC_FOPS_H_ 23 + 24 + int mic_open(struct inode *inode, struct file *filp); 25 + int mic_release(struct inode *inode, struct file *filp); 26 + ssize_t mic_read(struct file *filp, char __user *buf, 27 + size_t count, loff_t *pos); 28 + long mic_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); 29 + int mic_mmap(struct file *f, struct vm_area_struct *vma); 30 + unsigned int mic_poll(struct file *f, poll_table *wait); 31 + 32 + #endif
+26
drivers/misc/mic/host/mic_main.c
··· 25 25 #include <linux/fs.h> 26 26 #include <linux/module.h> 27 27 #include <linux/pci.h> 28 + #include <linux/poll.h> 28 29 29 30 #include <linux/mic_common.h> 30 31 #include "../common/mic_device.h" 31 32 #include "mic_device.h" 32 33 #include "mic_x100.h" 33 34 #include "mic_smpt.h" 35 + #include "mic_fops.h" 36 + #include "mic_virtio.h" 34 37 35 38 static const char mic_driver_name[] = "mic"; 36 39 ··· 66 63 static struct class *g_mic_class; 67 64 /* Base device node number for MIC devices */ 68 65 static dev_t g_mic_devno; 66 + 67 + static const struct file_operations mic_fops = { 68 + .open = mic_open, 69 + .release = mic_release, 70 + .unlocked_ioctl = mic_ioctl, 71 + .poll = mic_poll, 72 + .mmap = mic_mmap, 73 + .owner = THIS_MODULE, 74 + }; 69 75 70 76 /* Initialize the device page */ 71 77 static int mic_dp_init(struct mic_device *mdev) ··· 205 193 mdev->irq_info.next_avail_src = 0; 206 194 INIT_WORK(&mdev->reset_trigger_work, mic_reset_trigger_work); 207 195 INIT_WORK(&mdev->shutdown_work, mic_shutdown_work); 196 + INIT_LIST_HEAD(&mdev->vdev_list); 208 197 } 209 198 210 199 /** ··· 343 330 mic_bootparam_init(mdev); 344 331 345 332 mic_create_debug_dir(mdev); 333 + cdev_init(&mdev->cdev, &mic_fops); 334 + mdev->cdev.owner = THIS_MODULE; 335 + rc = cdev_add(&mdev->cdev, MKDEV(MAJOR(g_mic_devno), mdev->id), 1); 336 + if (rc) { 337 + dev_err(&pdev->dev, "cdev_add err id %d rc %d\n", mdev->id, rc); 338 + goto cleanup_debug_dir; 339 + } 346 340 return 0; 341 + cleanup_debug_dir: 342 + mic_delete_debug_dir(mdev); 343 + mutex_lock(&mdev->mic_mutex); 344 + mic_free_irq(mdev, mdev->shutdown_cookie, mdev); 345 + mutex_unlock(&mdev->mic_mutex); 347 346 dp_uninit: 348 347 mic_dp_uninit(mdev); 349 348 sysfs_put: ··· 400 375 return; 401 376 402 377 mic_stop(mdev, false); 378 + cdev_del(&mdev->cdev); 403 379 mic_delete_debug_dir(mdev); 404 380 mutex_lock(&mdev->mic_mutex); 405 381 mic_free_irq(mdev, mdev->shutdown_cookie, mdev);
+703
drivers/misc/mic/host/mic_virtio.c
··· 1 + /* 2 + * Intel MIC Platform Software Stack (MPSS) 3 + * 4 + * Copyright(c) 2013 Intel Corporation. 5 + * 6 + * This program is free software; you can redistribute it and/or modify 7 + * it under the terms of the GNU General Public License, version 2, as 8 + * published by the Free Software Foundation. 9 + * 10 + * This program is distributed in the hope that it will be useful, but 11 + * WITHOUT ANY WARRANTY; without even the implied warranty of 12 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 + * General Public License for more details. 14 + * 15 + * The full GNU General Public License is included in this distribution in 16 + * the file called "COPYING". 17 + * 18 + * Intel MIC Host driver. 19 + * 20 + */ 21 + #include <linux/pci.h> 22 + #include <linux/sched.h> 23 + #include <linux/uaccess.h> 24 + 25 + #include <linux/mic_common.h> 26 + #include "../common/mic_device.h" 27 + #include "mic_device.h" 28 + #include "mic_smpt.h" 29 + #include "mic_virtio.h" 30 + 31 + /* 32 + * Initiates the copies across the PCIe bus from card memory to 33 + * a user space buffer. 34 + */ 35 + static int mic_virtio_copy_to_user(struct mic_vdev *mvdev, 36 + void __user *ubuf, size_t len, u64 addr) 37 + { 38 + int err; 39 + void __iomem *dbuf = mvdev->mdev->aper.va + addr; 40 + /* 41 + * We are copying from IO below an should ideally use something 42 + * like copy_to_user_fromio(..) if it existed. 43 + */ 44 + if (copy_to_user(ubuf, dbuf, len)) { 45 + err = -EFAULT; 46 + dev_err(mic_dev(mvdev), "%s %d err %d\n", 47 + __func__, __LINE__, err); 48 + goto err; 49 + } 50 + mvdev->in_bytes += len; 51 + err = 0; 52 + err: 53 + return err; 54 + } 55 + 56 + /* 57 + * Initiates copies across the PCIe bus from a user space 58 + * buffer to card memory. 59 + */ 60 + static int mic_virtio_copy_from_user(struct mic_vdev *mvdev, 61 + void __user *ubuf, size_t len, u64 addr) 62 + { 63 + int err; 64 + void __iomem *dbuf = mvdev->mdev->aper.va + addr; 65 + /* 66 + * We are copying to IO below and should ideally use something 67 + * like copy_from_user_toio(..) if it existed. 68 + */ 69 + if (copy_from_user(dbuf, ubuf, len)) { 70 + err = -EFAULT; 71 + dev_err(mic_dev(mvdev), "%s %d err %d\n", 72 + __func__, __LINE__, err); 73 + goto err; 74 + } 75 + mvdev->out_bytes += len; 76 + err = 0; 77 + err: 78 + return err; 79 + } 80 + 81 + #define MIC_VRINGH_READ true 82 + 83 + /* The function to call to notify the card about added buffers */ 84 + static void mic_notify(struct vringh *vrh) 85 + { 86 + struct mic_vringh *mvrh = container_of(vrh, struct mic_vringh, vrh); 87 + struct mic_vdev *mvdev = mvrh->mvdev; 88 + s8 db = mvdev->dc->h2c_vdev_db; 89 + 90 + if (db != -1) 91 + mvdev->mdev->ops->send_intr(mvdev->mdev, db); 92 + } 93 + 94 + /* Determine the total number of bytes consumed in a VRINGH KIOV */ 95 + static inline u32 mic_vringh_iov_consumed(struct vringh_kiov *iov) 96 + { 97 + int i; 98 + u32 total = iov->consumed; 99 + 100 + for (i = 0; i < iov->i; i++) 101 + total += iov->iov[i].iov_len; 102 + return total; 103 + } 104 + 105 + /* 106 + * Traverse the VRINGH KIOV and issue the APIs to trigger the copies. 107 + * This API is heavily based on the vringh_iov_xfer(..) implementation 108 + * in vringh.c. The reason we cannot reuse vringh_iov_pull_kern(..) 109 + * and vringh_iov_push_kern(..) directly is because there is no 110 + * way to override the VRINGH xfer(..) routines as of v3.10. 111 + */ 112 + static int mic_vringh_copy(struct mic_vdev *mvdev, struct vringh_kiov *iov, 113 + void __user *ubuf, size_t len, bool read, size_t *out_len) 114 + { 115 + int ret = 0; 116 + size_t partlen, tot_len = 0; 117 + 118 + while (len && iov->i < iov->used) { 119 + partlen = min(iov->iov[iov->i].iov_len, len); 120 + if (read) 121 + ret = mic_virtio_copy_to_user(mvdev, 122 + ubuf, partlen, 123 + (u64)iov->iov[iov->i].iov_base); 124 + else 125 + ret = mic_virtio_copy_from_user(mvdev, 126 + ubuf, partlen, 127 + (u64)iov->iov[iov->i].iov_base); 128 + if (ret) { 129 + dev_err(mic_dev(mvdev), "%s %d err %d\n", 130 + __func__, __LINE__, ret); 131 + break; 132 + } 133 + len -= partlen; 134 + ubuf += partlen; 135 + tot_len += partlen; 136 + iov->consumed += partlen; 137 + iov->iov[iov->i].iov_len -= partlen; 138 + iov->iov[iov->i].iov_base += partlen; 139 + if (!iov->iov[iov->i].iov_len) { 140 + /* Fix up old iov element then increment. */ 141 + iov->iov[iov->i].iov_len = iov->consumed; 142 + iov->iov[iov->i].iov_base -= iov->consumed; 143 + 144 + iov->consumed = 0; 145 + iov->i++; 146 + } 147 + } 148 + *out_len = tot_len; 149 + return ret; 150 + } 151 + 152 + /* 153 + * Use the standard VRINGH infrastructure in the kernel to fetch new 154 + * descriptors, initiate the copies and update the used ring. 155 + */ 156 + static int _mic_virtio_copy(struct mic_vdev *mvdev, 157 + struct mic_copy_desc *copy) 158 + { 159 + int ret = 0, iovcnt = copy->iovcnt; 160 + struct iovec iov; 161 + struct iovec __user *u_iov = copy->iov; 162 + void __user *ubuf = NULL; 163 + struct mic_vringh *mvr = &mvdev->mvr[copy->vr_idx]; 164 + struct vringh_kiov *riov = &mvr->riov; 165 + struct vringh_kiov *wiov = &mvr->wiov; 166 + struct vringh *vrh = &mvr->vrh; 167 + u16 *head = &mvr->head; 168 + struct mic_vring *vr = &mvr->vring; 169 + size_t len = 0, out_len; 170 + 171 + copy->out_len = 0; 172 + /* Fetch a new IOVEC if all previous elements have been processed */ 173 + if (riov->i == riov->used && wiov->i == wiov->used) { 174 + ret = vringh_getdesc_kern(vrh, riov, wiov, 175 + head, GFP_KERNEL); 176 + /* Check if there are available descriptors */ 177 + if (ret <= 0) 178 + return ret; 179 + } 180 + while (iovcnt) { 181 + if (!len) { 182 + /* Copy over a new iovec from user space. */ 183 + ret = copy_from_user(&iov, u_iov, sizeof(*u_iov)); 184 + if (ret) { 185 + ret = -EINVAL; 186 + dev_err(mic_dev(mvdev), "%s %d err %d\n", 187 + __func__, __LINE__, ret); 188 + break; 189 + } 190 + len = iov.iov_len; 191 + ubuf = iov.iov_base; 192 + } 193 + /* Issue all the read descriptors first */ 194 + ret = mic_vringh_copy(mvdev, riov, ubuf, len, 195 + MIC_VRINGH_READ, &out_len); 196 + if (ret) { 197 + dev_err(mic_dev(mvdev), "%s %d err %d\n", 198 + __func__, __LINE__, ret); 199 + break; 200 + } 201 + len -= out_len; 202 + ubuf += out_len; 203 + copy->out_len += out_len; 204 + /* Issue the write descriptors next */ 205 + ret = mic_vringh_copy(mvdev, wiov, ubuf, len, 206 + !MIC_VRINGH_READ, &out_len); 207 + if (ret) { 208 + dev_err(mic_dev(mvdev), "%s %d err %d\n", 209 + __func__, __LINE__, ret); 210 + break; 211 + } 212 + len -= out_len; 213 + ubuf += out_len; 214 + copy->out_len += out_len; 215 + if (!len) { 216 + /* One user space iovec is now completed */ 217 + iovcnt--; 218 + u_iov++; 219 + } 220 + /* Exit loop if all elements in KIOVs have been processed. */ 221 + if (riov->i == riov->used && wiov->i == wiov->used) 222 + break; 223 + } 224 + /* 225 + * Update the used ring if a descriptor was available and some data was 226 + * copied in/out and the user asked for a used ring update. 227 + */ 228 + if (*head != USHRT_MAX && copy->out_len && 229 + copy->update_used) { 230 + u32 total = 0; 231 + 232 + /* Determine the total data consumed */ 233 + total += mic_vringh_iov_consumed(riov); 234 + total += mic_vringh_iov_consumed(wiov); 235 + vringh_complete_kern(vrh, *head, total); 236 + *head = USHRT_MAX; 237 + if (vringh_need_notify_kern(vrh) > 0) 238 + vringh_notify(vrh); 239 + vringh_kiov_cleanup(riov); 240 + vringh_kiov_cleanup(wiov); 241 + /* Update avail idx for user space */ 242 + vr->info->avail_idx = vrh->last_avail_idx; 243 + } 244 + return ret; 245 + } 246 + 247 + static inline int mic_verify_copy_args(struct mic_vdev *mvdev, 248 + struct mic_copy_desc *copy) 249 + { 250 + if (copy->vr_idx >= mvdev->dd->num_vq) { 251 + dev_err(mic_dev(mvdev), "%s %d err %d\n", 252 + __func__, __LINE__, -EINVAL); 253 + return -EINVAL; 254 + } 255 + return 0; 256 + } 257 + 258 + /* Copy a specified number of virtio descriptors in a chain */ 259 + int mic_virtio_copy_desc(struct mic_vdev *mvdev, 260 + struct mic_copy_desc *copy) 261 + { 262 + int err; 263 + struct mic_vringh *mvr = &mvdev->mvr[copy->vr_idx]; 264 + 265 + err = mic_verify_copy_args(mvdev, copy); 266 + if (err) 267 + return err; 268 + 269 + mutex_lock(&mvr->vr_mutex); 270 + if (!mic_vdevup(mvdev)) { 271 + err = -ENODEV; 272 + dev_err(mic_dev(mvdev), "%s %d err %d\n", 273 + __func__, __LINE__, err); 274 + goto err; 275 + } 276 + err = _mic_virtio_copy(mvdev, copy); 277 + if (err) { 278 + dev_err(mic_dev(mvdev), "%s %d err %d\n", 279 + __func__, __LINE__, err); 280 + } 281 + err: 282 + mutex_unlock(&mvr->vr_mutex); 283 + return err; 284 + } 285 + 286 + static void mic_virtio_init_post(struct mic_vdev *mvdev) 287 + { 288 + struct mic_vqconfig *vqconfig = mic_vq_config(mvdev->dd); 289 + int i; 290 + 291 + for (i = 0; i < mvdev->dd->num_vq; i++) { 292 + if (!le64_to_cpu(vqconfig[i].used_address)) { 293 + dev_warn(mic_dev(mvdev), "used_address zero??\n"); 294 + continue; 295 + } 296 + mvdev->mvr[i].vrh.vring.used = 297 + mvdev->mdev->aper.va + 298 + le64_to_cpu(vqconfig[i].used_address); 299 + } 300 + 301 + mvdev->dc->used_address_updated = 0; 302 + 303 + dev_dbg(mic_dev(mvdev), "%s: device type %d LINKUP\n", 304 + __func__, mvdev->virtio_id); 305 + } 306 + 307 + static inline void mic_virtio_device_reset(struct mic_vdev *mvdev) 308 + { 309 + int i; 310 + 311 + dev_dbg(mic_dev(mvdev), "%s: status %d device type %d RESET\n", 312 + __func__, mvdev->dd->status, mvdev->virtio_id); 313 + 314 + for (i = 0; i < mvdev->dd->num_vq; i++) 315 + /* 316 + * Avoid lockdep false positive. The + 1 is for the mic 317 + * mutex which is held in the reset devices code path. 318 + */ 319 + mutex_lock_nested(&mvdev->mvr[i].vr_mutex, i + 1); 320 + 321 + /* 0 status means "reset" */ 322 + mvdev->dd->status = 0; 323 + mvdev->dc->vdev_reset = 0; 324 + mvdev->dc->host_ack = 1; 325 + 326 + for (i = 0; i < mvdev->dd->num_vq; i++) { 327 + struct vringh *vrh = &mvdev->mvr[i].vrh; 328 + mvdev->mvr[i].vring.info->avail_idx = 0; 329 + vrh->completed = 0; 330 + vrh->last_avail_idx = 0; 331 + vrh->last_used_idx = 0; 332 + } 333 + 334 + for (i = 0; i < mvdev->dd->num_vq; i++) 335 + mutex_unlock(&mvdev->mvr[i].vr_mutex); 336 + } 337 + 338 + void mic_virtio_reset_devices(struct mic_device *mdev) 339 + { 340 + struct list_head *pos, *tmp; 341 + struct mic_vdev *mvdev; 342 + 343 + dev_dbg(mdev->sdev->parent, "%s\n", __func__); 344 + 345 + list_for_each_safe(pos, tmp, &mdev->vdev_list) { 346 + mvdev = list_entry(pos, struct mic_vdev, list); 347 + mic_virtio_device_reset(mvdev); 348 + mvdev->poll_wake = 1; 349 + wake_up(&mvdev->waitq); 350 + } 351 + } 352 + 353 + void mic_bh_handler(struct work_struct *work) 354 + { 355 + struct mic_vdev *mvdev = container_of(work, struct mic_vdev, 356 + virtio_bh_work); 357 + 358 + if (mvdev->dc->used_address_updated) 359 + mic_virtio_init_post(mvdev); 360 + 361 + if (mvdev->dc->vdev_reset) 362 + mic_virtio_device_reset(mvdev); 363 + 364 + mvdev->poll_wake = 1; 365 + wake_up(&mvdev->waitq); 366 + } 367 + 368 + static irqreturn_t mic_virtio_intr_handler(int irq, void *data) 369 + { 370 + 371 + struct mic_vdev *mvdev = data; 372 + struct mic_device *mdev = mvdev->mdev; 373 + 374 + mdev->ops->ack_interrupt(mdev); 375 + schedule_work(&mvdev->virtio_bh_work); 376 + return IRQ_HANDLED; 377 + } 378 + 379 + int mic_virtio_config_change(struct mic_vdev *mvdev, 380 + void __user *argp) 381 + { 382 + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake); 383 + int ret = 0, retry = 100, i; 384 + struct mic_bootparam *bootparam = mvdev->mdev->dp; 385 + s8 db = bootparam->h2c_config_db; 386 + 387 + mutex_lock(&mvdev->mdev->mic_mutex); 388 + for (i = 0; i < mvdev->dd->num_vq; i++) 389 + mutex_lock_nested(&mvdev->mvr[i].vr_mutex, i + 1); 390 + 391 + if (db == -1 || mvdev->dd->type == -1) { 392 + ret = -EIO; 393 + goto exit; 394 + } 395 + 396 + if (copy_from_user(mic_vq_configspace(mvdev->dd), 397 + argp, mvdev->dd->config_len)) { 398 + dev_err(mic_dev(mvdev), "%s %d err %d\n", 399 + __func__, __LINE__, -EFAULT); 400 + ret = -EFAULT; 401 + goto exit; 402 + } 403 + mvdev->dc->config_change = MIC_VIRTIO_PARAM_CONFIG_CHANGED; 404 + mvdev->mdev->ops->send_intr(mvdev->mdev, db); 405 + 406 + for (i = retry; i--;) { 407 + ret = wait_event_timeout(wake, 408 + mvdev->dc->guest_ack, msecs_to_jiffies(100)); 409 + if (ret) 410 + break; 411 + } 412 + 413 + dev_dbg(mic_dev(mvdev), 414 + "%s %d retry: %d\n", __func__, __LINE__, retry); 415 + mvdev->dc->config_change = 0; 416 + mvdev->dc->guest_ack = 0; 417 + exit: 418 + for (i = 0; i < mvdev->dd->num_vq; i++) 419 + mutex_unlock(&mvdev->mvr[i].vr_mutex); 420 + mutex_unlock(&mvdev->mdev->mic_mutex); 421 + return ret; 422 + } 423 + 424 + static int mic_copy_dp_entry(struct mic_vdev *mvdev, 425 + void __user *argp, 426 + __u8 *type, 427 + struct mic_device_desc **devpage) 428 + { 429 + struct mic_device *mdev = mvdev->mdev; 430 + struct mic_device_desc dd, *dd_config, *devp; 431 + struct mic_vqconfig *vqconfig; 432 + int ret = 0, i; 433 + bool slot_found = false; 434 + 435 + if (copy_from_user(&dd, argp, sizeof(dd))) { 436 + dev_err(mic_dev(mvdev), "%s %d err %d\n", 437 + __func__, __LINE__, -EFAULT); 438 + return -EFAULT; 439 + } 440 + 441 + if (mic_aligned_desc_size(&dd) > MIC_MAX_DESC_BLK_SIZE 442 + || dd.num_vq > MIC_MAX_VRINGS) { 443 + dev_err(mic_dev(mvdev), "%s %d err %d\n", 444 + __func__, __LINE__, -EINVAL); 445 + return -EINVAL; 446 + } 447 + 448 + dd_config = kmalloc(mic_desc_size(&dd), GFP_KERNEL); 449 + if (dd_config == NULL) { 450 + dev_err(mic_dev(mvdev), "%s %d err %d\n", 451 + __func__, __LINE__, -ENOMEM); 452 + return -ENOMEM; 453 + } 454 + if (copy_from_user(dd_config, argp, mic_desc_size(&dd))) { 455 + ret = -EFAULT; 456 + dev_err(mic_dev(mvdev), "%s %d err %d\n", 457 + __func__, __LINE__, ret); 458 + goto exit; 459 + } 460 + 461 + vqconfig = mic_vq_config(dd_config); 462 + for (i = 0; i < dd.num_vq; i++) { 463 + if (le16_to_cpu(vqconfig[i].num) > MIC_MAX_VRING_ENTRIES) { 464 + ret = -EINVAL; 465 + dev_err(mic_dev(mvdev), "%s %d err %d\n", 466 + __func__, __LINE__, ret); 467 + goto exit; 468 + } 469 + } 470 + 471 + /* Find the first free device page entry */ 472 + for (i = mic_aligned_size(struct mic_bootparam); 473 + i < MIC_DP_SIZE - mic_total_desc_size(dd_config); 474 + i += mic_total_desc_size(devp)) { 475 + devp = mdev->dp + i; 476 + if (devp->type == 0 || devp->type == -1) { 477 + slot_found = true; 478 + break; 479 + } 480 + } 481 + if (!slot_found) { 482 + ret = -EINVAL; 483 + dev_err(mic_dev(mvdev), "%s %d err %d\n", 484 + __func__, __LINE__, ret); 485 + goto exit; 486 + } 487 + /* 488 + * Save off the type before doing the memcpy. Type will be set in the 489 + * end after completing all initialization for the new device. 490 + */ 491 + *type = dd_config->type; 492 + dd_config->type = 0; 493 + memcpy(devp, dd_config, mic_desc_size(dd_config)); 494 + 495 + *devpage = devp; 496 + exit: 497 + kfree(dd_config); 498 + return ret; 499 + } 500 + 501 + static void mic_init_device_ctrl(struct mic_vdev *mvdev, 502 + struct mic_device_desc *devpage) 503 + { 504 + struct mic_device_ctrl *dc; 505 + 506 + dc = mvdev->dc = (void *)devpage + mic_aligned_desc_size(devpage); 507 + 508 + dc->config_change = 0; 509 + dc->guest_ack = 0; 510 + dc->vdev_reset = 0; 511 + dc->host_ack = 0; 512 + dc->used_address_updated = 0; 513 + dc->c2h_vdev_db = -1; 514 + dc->h2c_vdev_db = -1; 515 + } 516 + 517 + int mic_virtio_add_device(struct mic_vdev *mvdev, 518 + void __user *argp) 519 + { 520 + struct mic_device *mdev = mvdev->mdev; 521 + struct mic_device_desc *dd; 522 + struct mic_vqconfig *vqconfig; 523 + int vr_size, i, j, ret; 524 + u8 type; 525 + s8 db; 526 + char irqname[10]; 527 + struct mic_bootparam *bootparam = mdev->dp; 528 + u16 num; 529 + 530 + mutex_lock(&mdev->mic_mutex); 531 + 532 + ret = mic_copy_dp_entry(mvdev, argp, &type, &dd); 533 + if (ret) { 534 + mutex_unlock(&mdev->mic_mutex); 535 + return ret; 536 + } 537 + 538 + mic_init_device_ctrl(mvdev, dd); 539 + 540 + mvdev->dd = dd; 541 + mvdev->virtio_id = type; 542 + vqconfig = mic_vq_config(dd); 543 + INIT_WORK(&mvdev->virtio_bh_work, mic_bh_handler); 544 + 545 + for (i = 0; i < dd->num_vq; i++) { 546 + struct mic_vringh *mvr = &mvdev->mvr[i]; 547 + struct mic_vring *vr = &mvdev->mvr[i].vring; 548 + num = le16_to_cpu(vqconfig[i].num); 549 + mutex_init(&mvr->vr_mutex); 550 + vr_size = PAGE_ALIGN(vring_size(num, MIC_VIRTIO_RING_ALIGN) + 551 + sizeof(struct _mic_vring_info)); 552 + vr->va = (void *) 553 + __get_free_pages(GFP_KERNEL | __GFP_ZERO, 554 + get_order(vr_size)); 555 + if (!vr->va) { 556 + ret = -ENOMEM; 557 + dev_err(mic_dev(mvdev), "%s %d err %d\n", 558 + __func__, __LINE__, ret); 559 + goto err; 560 + } 561 + vr->len = vr_size; 562 + vr->info = vr->va + vring_size(num, MIC_VIRTIO_RING_ALIGN); 563 + vr->info->magic = MIC_MAGIC + mvdev->virtio_id + i; 564 + vqconfig[i].address = mic_map_single(mdev, 565 + vr->va, vr_size); 566 + if (mic_map_error(vqconfig[i].address)) { 567 + free_pages((unsigned long)vr->va, 568 + get_order(vr_size)); 569 + ret = -ENOMEM; 570 + dev_err(mic_dev(mvdev), "%s %d err %d\n", 571 + __func__, __LINE__, ret); 572 + goto err; 573 + } 574 + vqconfig[i].address = cpu_to_le64(vqconfig[i].address); 575 + 576 + vring_init(&vr->vr, num, 577 + vr->va, MIC_VIRTIO_RING_ALIGN); 578 + ret = vringh_init_kern(&mvr->vrh, 579 + *(u32 *)mic_vq_features(mvdev->dd), num, false, 580 + vr->vr.desc, vr->vr.avail, vr->vr.used); 581 + if (ret) { 582 + dev_err(mic_dev(mvdev), "%s %d err %d\n", 583 + __func__, __LINE__, ret); 584 + goto err; 585 + } 586 + vringh_kiov_init(&mvr->riov, NULL, 0); 587 + vringh_kiov_init(&mvr->wiov, NULL, 0); 588 + mvr->head = USHRT_MAX; 589 + mvr->mvdev = mvdev; 590 + mvr->vrh.notify = mic_notify; 591 + dev_dbg(mdev->sdev->parent, 592 + "%s %d index %d va %p info %p vr_size 0x%x\n", 593 + __func__, __LINE__, i, vr->va, vr->info, vr_size); 594 + } 595 + 596 + snprintf(irqname, sizeof(irqname), 597 + "mic%dvirtio%d", mdev->id, mvdev->virtio_id); 598 + mvdev->virtio_db = mic_next_db(mdev); 599 + mvdev->virtio_cookie = mic_request_irq(mdev, mic_virtio_intr_handler, 600 + irqname, mvdev, mvdev->virtio_db, MIC_INTR_DB); 601 + if (IS_ERR(mvdev->virtio_cookie)) { 602 + ret = PTR_ERR(mvdev->virtio_cookie); 603 + dev_dbg(mdev->sdev->parent, "request irq failed\n"); 604 + goto err; 605 + } 606 + 607 + mvdev->dc->c2h_vdev_db = mvdev->virtio_db; 608 + 609 + list_add_tail(&mvdev->list, &mdev->vdev_list); 610 + /* 611 + * Order the type update with previous stores. This write barrier 612 + * is paired with the corresponding read barrier before the uncached 613 + * system memory read of the type, on the card while scanning the 614 + * device page. 615 + */ 616 + smp_wmb(); 617 + dd->type = type; 618 + 619 + dev_dbg(mdev->sdev->parent, "Added virtio device id %d\n", dd->type); 620 + 621 + db = bootparam->h2c_config_db; 622 + if (db != -1) 623 + mdev->ops->send_intr(mdev, db); 624 + mutex_unlock(&mdev->mic_mutex); 625 + return 0; 626 + err: 627 + vqconfig = mic_vq_config(dd); 628 + for (j = 0; j < i; j++) { 629 + struct mic_vringh *mvr = &mvdev->mvr[j]; 630 + mic_unmap_single(mdev, le64_to_cpu(vqconfig[j].address), 631 + mvr->vring.len); 632 + free_pages((unsigned long)mvr->vring.va, 633 + get_order(mvr->vring.len)); 634 + } 635 + mutex_unlock(&mdev->mic_mutex); 636 + return ret; 637 + } 638 + 639 + void mic_virtio_del_device(struct mic_vdev *mvdev) 640 + { 641 + struct list_head *pos, *tmp; 642 + struct mic_vdev *tmp_mvdev; 643 + struct mic_device *mdev = mvdev->mdev; 644 + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake); 645 + int i, ret, retry = 100; 646 + struct mic_vqconfig *vqconfig; 647 + struct mic_bootparam *bootparam = mdev->dp; 648 + s8 db; 649 + 650 + mutex_lock(&mdev->mic_mutex); 651 + db = bootparam->h2c_config_db; 652 + if (db == -1) 653 + goto skip_hot_remove; 654 + dev_dbg(mdev->sdev->parent, 655 + "Requesting hot remove id %d\n", mvdev->virtio_id); 656 + mvdev->dc->config_change = MIC_VIRTIO_PARAM_DEV_REMOVE; 657 + mdev->ops->send_intr(mdev, db); 658 + for (i = retry; i--;) { 659 + ret = wait_event_timeout(wake, 660 + mvdev->dc->guest_ack, msecs_to_jiffies(100)); 661 + if (ret) 662 + break; 663 + } 664 + dev_dbg(mdev->sdev->parent, 665 + "Device id %d config_change %d guest_ack %d\n", 666 + mvdev->virtio_id, mvdev->dc->config_change, 667 + mvdev->dc->guest_ack); 668 + mvdev->dc->config_change = 0; 669 + mvdev->dc->guest_ack = 0; 670 + skip_hot_remove: 671 + mic_free_irq(mdev, mvdev->virtio_cookie, mvdev); 672 + flush_work(&mvdev->virtio_bh_work); 673 + vqconfig = mic_vq_config(mvdev->dd); 674 + for (i = 0; i < mvdev->dd->num_vq; i++) { 675 + struct mic_vringh *mvr = &mvdev->mvr[i]; 676 + vringh_kiov_cleanup(&mvr->riov); 677 + vringh_kiov_cleanup(&mvr->wiov); 678 + mic_unmap_single(mdev, le64_to_cpu(vqconfig[i].address), 679 + mvr->vring.len); 680 + free_pages((unsigned long)mvr->vring.va, 681 + get_order(mvr->vring.len)); 682 + } 683 + 684 + list_for_each_safe(pos, tmp, &mdev->vdev_list) { 685 + tmp_mvdev = list_entry(pos, struct mic_vdev, list); 686 + if (tmp_mvdev == mvdev) { 687 + list_del(pos); 688 + dev_dbg(mdev->sdev->parent, 689 + "Removing virtio device id %d\n", 690 + mvdev->virtio_id); 691 + break; 692 + } 693 + } 694 + /* 695 + * Order the type update with previous stores. This write barrier 696 + * is paired with the corresponding read barrier before the uncached 697 + * system memory read of the type, on the card while scanning the 698 + * device page. 699 + */ 700 + smp_wmb(); 701 + mvdev->dd->type = -1; 702 + mutex_unlock(&mdev->mic_mutex); 703 + }
+138
drivers/misc/mic/host/mic_virtio.h
··· 1 + /* 2 + * Intel MIC Platform Software Stack (MPSS) 3 + * 4 + * Copyright(c) 2013 Intel Corporation. 5 + * 6 + * This program is free software; you can redistribute it and/or modify 7 + * it under the terms of the GNU General Public License, version 2, as 8 + * published by the Free Software Foundation. 9 + * 10 + * This program is distributed in the hope that it will be useful, but 11 + * WITHOUT ANY WARRANTY; without even the implied warranty of 12 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 + * General Public License for more details. 14 + * 15 + * The full GNU General Public License is included in this distribution in 16 + * the file called "COPYING". 17 + * 18 + * Intel MIC Host driver. 19 + * 20 + */ 21 + #ifndef MIC_VIRTIO_H 22 + #define MIC_VIRTIO_H 23 + 24 + #include <linux/virtio_config.h> 25 + #include <linux/mic_ioctl.h> 26 + 27 + /* 28 + * Note on endianness. 29 + * 1. Host can be both BE or LE 30 + * 2. Guest/card is LE. Host uses le_to_cpu to access desc/avail 31 + * rings and ioreadXX/iowriteXX to access used ring. 32 + * 3. Device page exposed by host to guest contains LE values. Guest 33 + * accesses these using ioreadXX/iowriteXX etc. This way in general we 34 + * obey the virtio spec according to which guest works with native 35 + * endianness and host is aware of guest endianness and does all 36 + * required endianness conversion. 37 + * 4. Data provided from user space to guest (in ADD_DEVICE and 38 + * CONFIG_CHANGE ioctl's) is not interpreted by the driver and should be 39 + * in guest endianness. 40 + */ 41 + 42 + /** 43 + * struct mic_vringh - Virtio ring host information. 44 + * 45 + * @vring: The MIC vring used for setting up user space mappings. 46 + * @vrh: The host VRINGH used for accessing the card vrings. 47 + * @riov: The VRINGH read kernel IOV. 48 + * @wiov: The VRINGH write kernel IOV. 49 + * @head: The VRINGH head index address passed to vringh_getdesc_kern(..). 50 + * @vr_mutex: Mutex for synchronizing access to the VRING. 51 + * @mvdev: Back pointer to MIC virtio device for vringh_notify(..). 52 + */ 53 + struct mic_vringh { 54 + struct mic_vring vring; 55 + struct vringh vrh; 56 + struct vringh_kiov riov; 57 + struct vringh_kiov wiov; 58 + u16 head; 59 + struct mutex vr_mutex; 60 + struct mic_vdev *mvdev; 61 + }; 62 + 63 + /** 64 + * struct mic_vdev - Host information for a card Virtio device. 65 + * 66 + * @virtio_id - Virtio device id. 67 + * @waitq - Waitqueue to allow ring3 apps to poll. 68 + * @mdev - Back pointer to host MIC device. 69 + * @poll_wake - Used for waking up threads blocked in poll. 70 + * @out_bytes - Debug stats for number of bytes copied from host to card. 71 + * @in_bytes - Debug stats for number of bytes copied from card to host. 72 + * @mvr - Store per VRING data structures. 73 + * @virtio_bh_work - Work struct used to schedule virtio bottom half handling. 74 + * @dd - Virtio device descriptor. 75 + * @dc - Virtio device control fields. 76 + * @list - List of Virtio devices. 77 + * @virtio_db - The doorbell used by the card to interrupt the host. 78 + * @virtio_cookie - The cookie returned while requesting interrupts. 79 + */ 80 + struct mic_vdev { 81 + int virtio_id; 82 + wait_queue_head_t waitq; 83 + struct mic_device *mdev; 84 + int poll_wake; 85 + unsigned long out_bytes; 86 + unsigned long in_bytes; 87 + struct mic_vringh mvr[MIC_MAX_VRINGS]; 88 + struct work_struct virtio_bh_work; 89 + struct mic_device_desc *dd; 90 + struct mic_device_ctrl *dc; 91 + struct list_head list; 92 + int virtio_db; 93 + struct mic_irq *virtio_cookie; 94 + }; 95 + 96 + void mic_virtio_uninit(struct mic_device *mdev); 97 + int mic_virtio_add_device(struct mic_vdev *mvdev, 98 + void __user *argp); 99 + void mic_virtio_del_device(struct mic_vdev *mvdev); 100 + int mic_virtio_config_change(struct mic_vdev *mvdev, 101 + void __user *argp); 102 + int mic_virtio_copy_desc(struct mic_vdev *mvdev, 103 + struct mic_copy_desc *request); 104 + void mic_virtio_reset_devices(struct mic_device *mdev); 105 + void mic_bh_handler(struct work_struct *work); 106 + 107 + /* Helper API to obtain the MIC PCIe device */ 108 + static inline struct device *mic_dev(struct mic_vdev *mvdev) 109 + { 110 + return mvdev->mdev->sdev->parent; 111 + } 112 + 113 + /* Helper API to check if a virtio device is initialized */ 114 + static inline int mic_vdev_inited(struct mic_vdev *mvdev) 115 + { 116 + /* Device has not been created yet */ 117 + if (!mvdev->dd || !mvdev->dd->type) { 118 + dev_err(mic_dev(mvdev), "%s %d err %d\n", 119 + __func__, __LINE__, -EINVAL); 120 + return -EINVAL; 121 + } 122 + 123 + /* Device has been removed/deleted */ 124 + if (mvdev->dd->type == -1) { 125 + dev_err(mic_dev(mvdev), "%s %d err %d\n", 126 + __func__, __LINE__, -ENODEV); 127 + return -ENODEV; 128 + } 129 + 130 + return 0; 131 + } 132 + 133 + /* Helper API to check if a virtio device is running */ 134 + static inline bool mic_vdevup(struct mic_vdev *mvdev) 135 + { 136 + return !!mvdev->dd->status; 137 + } 138 + #endif
+1
include/uapi/linux/Kbuild
··· 242 242 header-y += mempolicy.h 243 243 header-y += meye.h 244 244 header-y += mic_common.h 245 + header-y += mic_ioctl.h 245 246 header-y += mii.h 246 247 header-y += minix_fs.h 247 248 header-y += mman.h
+165 -1
include/uapi/linux/mic_common.h
··· 21 21 #ifndef __MIC_COMMON_H_ 22 22 #define __MIC_COMMON_H_ 23 23 24 - #include <linux/types.h> 24 + #include <linux/virtio_ring.h> 25 + 26 + #ifndef __KERNEL__ 27 + #define ALIGN(a, x) (((a) + (x) - 1) & ~((x) - 1)) 28 + #define __aligned(x) __attribute__ ((aligned(x))) 29 + #endif 30 + 31 + #define mic_aligned_size(x) ALIGN(sizeof(x), 8) 32 + 33 + /** 34 + * struct mic_device_desc: Virtio device information shared between the 35 + * virtio driver and userspace backend 36 + * 37 + * @type: Device type: console/network/disk etc. Type 0/-1 terminates. 38 + * @num_vq: Number of virtqueues. 39 + * @feature_len: Number of bytes of feature bits. Multiply by 2: one for 40 + host features and one for guest acknowledgements. 41 + * @config_len: Number of bytes of the config array after virtqueues. 42 + * @status: A status byte, written by the Guest. 43 + * @config: Start of the following variable length config. 44 + */ 45 + struct mic_device_desc { 46 + __s8 type; 47 + __u8 num_vq; 48 + __u8 feature_len; 49 + __u8 config_len; 50 + __u8 status; 51 + __u64 config[0]; 52 + } __aligned(8); 53 + 54 + /** 55 + * struct mic_device_ctrl: Per virtio device information in the device page 56 + * used internally by the host and card side drivers. 57 + * 58 + * @vdev: Used for storing MIC vdev information by the guest. 59 + * @config_change: Set to 1 by host when a config change is requested. 60 + * @vdev_reset: Set to 1 by guest to indicate virtio device has been reset. 61 + * @guest_ack: Set to 1 by guest to ack a command. 62 + * @host_ack: Set to 1 by host to ack a command. 63 + * @used_address_updated: Set to 1 by guest when the used address should be 64 + * updated. 65 + * @c2h_vdev_db: The doorbell number to be used by guest. Set by host. 66 + * @h2c_vdev_db: The doorbell number to be used by host. Set by guest. 67 + */ 68 + struct mic_device_ctrl { 69 + __u64 vdev; 70 + __u8 config_change; 71 + __u8 vdev_reset; 72 + __u8 guest_ack; 73 + __u8 host_ack; 74 + __u8 used_address_updated; 75 + __s8 c2h_vdev_db; 76 + __s8 h2c_vdev_db; 77 + } __aligned(8); 25 78 26 79 /** 27 80 * struct mic_bootparam: Virtio device independent information in device page ··· 94 41 __u8 shutdown_status; 95 42 __u8 shutdown_card; 96 43 } __aligned(8); 44 + 45 + /** 46 + * struct mic_device_page: High level representation of the device page 47 + * 48 + * @bootparam: The bootparam structure is used for sharing information and 49 + * status updates between MIC host and card drivers. 50 + * @desc: Array of MIC virtio device descriptors. 51 + */ 52 + struct mic_device_page { 53 + struct mic_bootparam bootparam; 54 + struct mic_device_desc desc[0]; 55 + }; 56 + /** 57 + * struct mic_vqconfig: This is how we expect the device configuration field 58 + * for a virtqueue to be laid out in config space. 59 + * 60 + * @address: Guest/MIC physical address of the virtio ring 61 + * (avail and desc rings) 62 + * @used_address: Guest/MIC physical address of the used ring 63 + * @num: The number of entries in the virtio_ring 64 + */ 65 + struct mic_vqconfig { 66 + __u64 address; 67 + __u64 used_address; 68 + __u16 num; 69 + } __aligned(8); 70 + 71 + /* 72 + * The alignment to use between consumer and producer parts of vring. 73 + * This is pagesize for historical reasons. 74 + */ 75 + #define MIC_VIRTIO_RING_ALIGN 4096 76 + 77 + #define MIC_MAX_VRINGS 4 78 + #define MIC_VRING_ENTRIES 128 79 + 80 + /* 81 + * Max vring entries (power of 2) to ensure desc and avail rings 82 + * fit in a single page 83 + */ 84 + #define MIC_MAX_VRING_ENTRIES 128 85 + 86 + /** 87 + * Max size of the desc block in bytes: includes: 88 + * - struct mic_device_desc 89 + * - struct mic_vqconfig (num_vq of these) 90 + * - host and guest features 91 + * - virtio device config space 92 + */ 93 + #define MIC_MAX_DESC_BLK_SIZE 256 94 + 95 + /** 96 + * struct _mic_vring_info - Host vring info exposed to userspace backend 97 + * for the avail index and magic for the card. 98 + * 99 + * @avail_idx: host avail idx 100 + * @magic: A magic debug cookie. 101 + */ 102 + struct _mic_vring_info { 103 + __u16 avail_idx; 104 + int magic; 105 + }; 106 + 107 + /** 108 + * struct mic_vring - Vring information. 109 + * 110 + * @vr: The virtio ring. 111 + * @info: Host vring information exposed to the userspace backend for the 112 + * avail index and magic for the card. 113 + * @va: The va for the buffer allocated for vr and info. 114 + * @len: The length of the buffer required for allocating vr and info. 115 + */ 116 + struct mic_vring { 117 + struct vring vr; 118 + struct _mic_vring_info *info; 119 + void *va; 120 + int len; 121 + }; 122 + 123 + #define mic_aligned_desc_size(d) ALIGN(mic_desc_size(d), 8) 124 + 125 + #ifndef INTEL_MIC_CARD 126 + static inline unsigned mic_desc_size(const struct mic_device_desc *desc) 127 + { 128 + return mic_aligned_size(*desc) 129 + + desc->num_vq * mic_aligned_size(struct mic_vqconfig) 130 + + desc->feature_len * 2 131 + + desc->config_len; 132 + } 133 + 134 + static inline struct mic_vqconfig * 135 + mic_vq_config(const struct mic_device_desc *desc) 136 + { 137 + return (struct mic_vqconfig *)(desc + 1); 138 + } 139 + 140 + static inline __u8 *mic_vq_features(const struct mic_device_desc *desc) 141 + { 142 + return (__u8 *)(mic_vq_config(desc) + desc->num_vq); 143 + } 144 + 145 + static inline __u8 *mic_vq_configspace(const struct mic_device_desc *desc) 146 + { 147 + return mic_vq_features(desc) + desc->feature_len * 2; 148 + } 149 + static inline unsigned mic_total_desc_size(struct mic_device_desc *desc) 150 + { 151 + return mic_aligned_desc_size(desc) + 152 + mic_aligned_size(struct mic_device_ctrl); 153 + } 154 + #endif 97 155 98 156 /* Device page size */ 99 157 #define MIC_DP_SIZE 4096
+74
include/uapi/linux/mic_ioctl.h
··· 1 + /* 2 + * Intel MIC Platform Software Stack (MPSS) 3 + * 4 + * Copyright(c) 2013 Intel Corporation. 5 + * 6 + * This program is free software; you can redistribute it and/or modify 7 + * it under the terms of the GNU General Public License, version 2, as 8 + * published by the Free Software Foundation. 9 + * 10 + * This program is distributed in the hope that it will be useful, but 11 + * WITHOUT ANY WARRANTY; without even the implied warranty of 12 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 + * General Public License for more details. 14 + * 15 + * The full GNU General Public License is included in this distribution in 16 + * the file called "COPYING". 17 + * 18 + * Intel MIC Host driver. 19 + * 20 + */ 21 + #ifndef _MIC_IOCTL_H_ 22 + #define _MIC_IOCTL_H_ 23 + 24 + /* 25 + * mic_copy - MIC virtio descriptor copy. 26 + * 27 + * @iov: An array of IOVEC structures containing user space buffers. 28 + * @iovcnt: Number of IOVEC structures in iov. 29 + * @vr_idx: The vring index. 30 + * @update_used: A non zero value results in used index being updated. 31 + * @out_len: The aggregate of the total length written to or read from 32 + * the virtio device. 33 + */ 34 + struct mic_copy_desc { 35 + #ifdef __KERNEL__ 36 + struct iovec __user *iov; 37 + #else 38 + struct iovec *iov; 39 + #endif 40 + int iovcnt; 41 + __u8 vr_idx; 42 + __u8 update_used; 43 + __u32 out_len; 44 + }; 45 + 46 + /* 47 + * Add a new virtio device 48 + * The (struct mic_device_desc *) pointer points to a device page entry 49 + * for the virtio device consisting of: 50 + * - struct mic_device_desc 51 + * - struct mic_vqconfig (num_vq of these) 52 + * - host and guest features 53 + * - virtio device config space 54 + * The total size referenced by the pointer should equal the size returned 55 + * by desc_size() in mic_common.h 56 + */ 57 + #define MIC_VIRTIO_ADD_DEVICE _IOWR('s', 1, struct mic_device_desc *) 58 + 59 + /* 60 + * Copy the number of entries in the iovec and update the used index 61 + * if requested by the user. 62 + */ 63 + #define MIC_VIRTIO_COPY_DESC _IOWR('s', 2, struct mic_copy_desc *) 64 + 65 + /* 66 + * Notify virtio device of a config change 67 + * The (__u8 *) pointer points to config space values for the device 68 + * as they should be written into the device page. The total size 69 + * referenced by the pointer should equal the config_len field of struct 70 + * mic_device_desc. 71 + */ 72 + #define MIC_VIRTIO_CONFIG_CHANGE _IOWR('s', 5, __u8 *) 73 + 74 + #endif