Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[SCSI] vmw_pvscsi: SCSI driver for VMware's virtual HBA.

This is a driver for VMware's paravirtualized SCSI device,
which should improve disk performance for guests running
under control of VMware hypervisors that support such devices.

Signed-off-by: Alok N Kataria <akataria@vmware.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>

authored by

Alok Kataria and committed by
James Bottomley
851b1642 96e65865

+1821
+8
MAINTAINERS
··· 5696 5696 S: Maintained 5697 5697 F: drivers/net/vmxnet3/ 5698 5698 5699 + VMware PVSCSI driver 5700 + M: Alok Kataria <akataria@vmware.com> 5701 + M: VMware PV-Drivers <pv-drivers@vmware.com> 5702 + L: linux-scsi@vger.kernel.org 5703 + S: Maintained 5704 + F: drivers/scsi/vmw_pvscsi.c 5705 + F: drivers/scsi/vmw_pvscsi.h 5706 + 5699 5707 VOLTAGE AND CURRENT REGULATOR FRAMEWORK 5700 5708 M: Liam Girdwood <lrg@slimlogic.co.uk> 5701 5709 M: Mark Brown <broonie@opensource.wolfsonmicro.com>
+8
drivers/scsi/Kconfig
··· 621 621 substantial, so users of MultiMaster Host Adapters may not 622 622 wish to include it. 623 623 624 + config VMWARE_PVSCSI 625 + tristate "VMware PVSCSI driver support" 626 + depends on PCI && SCSI && X86 627 + help 628 + This driver supports VMware's para virtualized SCSI HBA. 629 + To compile this driver as a module, choose M here: the 630 + module will be called vmw_pvscsi. 631 + 624 632 config LIBFC 625 633 tristate "LibFC module" 626 634 select SCSI_FC_ATTRS
+1
drivers/scsi/Makefile
··· 134 134 obj-$(CONFIG_SCSI_BNX2_ISCSI) += libiscsi.o bnx2i/ 135 135 obj-$(CONFIG_BE2ISCSI) += libiscsi.o be2iscsi/ 136 136 obj-$(CONFIG_SCSI_PMCRAID) += pmcraid.o 137 + obj-$(CONFIG_VMWARE_PVSCSI) += vmw_pvscsi.o 137 138 138 139 obj-$(CONFIG_ARM) += arm/ 139 140
+1407
drivers/scsi/vmw_pvscsi.c
··· 1 + /* 2 + * Linux driver for VMware's para-virtualized SCSI HBA. 3 + * 4 + * Copyright (C) 2008-2009, VMware, Inc. All Rights Reserved. 5 + * 6 + * This program is free software; you can redistribute it and/or modify it 7 + * under the terms of the GNU General Public License as published by the 8 + * Free Software Foundation; version 2 of the License and no later version. 9 + * 10 + * This program is distributed in the hope that it will be useful, but 11 + * WITHOUT ANY WARRANTY; without even the implied warranty of 12 + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or 13 + * NON INFRINGEMENT. See the GNU General Public License for more 14 + * details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program; if not, write to the Free Software 18 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 + * 20 + * Maintained by: Alok N Kataria <akataria@vmware.com> 21 + * 22 + */ 23 + 24 + #include <linux/kernel.h> 25 + #include <linux/module.h> 26 + #include <linux/interrupt.h> 27 + #include <linux/workqueue.h> 28 + #include <linux/pci.h> 29 + 30 + #include <scsi/scsi.h> 31 + #include <scsi/scsi_host.h> 32 + #include <scsi/scsi_cmnd.h> 33 + #include <scsi/scsi_device.h> 34 + 35 + #include "vmw_pvscsi.h" 36 + 37 + #define PVSCSI_LINUX_DRIVER_DESC "VMware PVSCSI driver" 38 + 39 + MODULE_DESCRIPTION(PVSCSI_LINUX_DRIVER_DESC); 40 + MODULE_AUTHOR("VMware, Inc."); 41 + MODULE_LICENSE("GPL"); 42 + MODULE_VERSION(PVSCSI_DRIVER_VERSION_STRING); 43 + 44 + #define PVSCSI_DEFAULT_NUM_PAGES_PER_RING 8 45 + #define PVSCSI_DEFAULT_NUM_PAGES_MSG_RING 1 46 + #define PVSCSI_DEFAULT_QUEUE_DEPTH 64 47 + #define SGL_SIZE PAGE_SIZE 48 + 49 + struct pvscsi_sg_list { 50 + struct PVSCSISGElement sge[PVSCSI_MAX_NUM_SG_ENTRIES_PER_SEGMENT]; 51 + }; 52 + 53 + struct pvscsi_ctx { 54 + /* 55 + * The index of the context in cmd_map serves as the context ID for a 56 + * 1-to-1 mapping completions back to requests. 57 + */ 58 + struct scsi_cmnd *cmd; 59 + struct pvscsi_sg_list *sgl; 60 + struct list_head list; 61 + dma_addr_t dataPA; 62 + dma_addr_t sensePA; 63 + dma_addr_t sglPA; 64 + }; 65 + 66 + struct pvscsi_adapter { 67 + char *mmioBase; 68 + unsigned int irq; 69 + u8 rev; 70 + bool use_msi; 71 + bool use_msix; 72 + bool use_msg; 73 + 74 + spinlock_t hw_lock; 75 + 76 + struct workqueue_struct *workqueue; 77 + struct work_struct work; 78 + 79 + struct PVSCSIRingReqDesc *req_ring; 80 + unsigned req_pages; 81 + unsigned req_depth; 82 + dma_addr_t reqRingPA; 83 + 84 + struct PVSCSIRingCmpDesc *cmp_ring; 85 + unsigned cmp_pages; 86 + dma_addr_t cmpRingPA; 87 + 88 + struct PVSCSIRingMsgDesc *msg_ring; 89 + unsigned msg_pages; 90 + dma_addr_t msgRingPA; 91 + 92 + struct PVSCSIRingsState *rings_state; 93 + dma_addr_t ringStatePA; 94 + 95 + struct pci_dev *dev; 96 + struct Scsi_Host *host; 97 + 98 + struct list_head cmd_pool; 99 + struct pvscsi_ctx *cmd_map; 100 + }; 101 + 102 + 103 + /* Command line parameters */ 104 + static int pvscsi_ring_pages = PVSCSI_DEFAULT_NUM_PAGES_PER_RING; 105 + static int pvscsi_msg_ring_pages = PVSCSI_DEFAULT_NUM_PAGES_MSG_RING; 106 + static int pvscsi_cmd_per_lun = PVSCSI_DEFAULT_QUEUE_DEPTH; 107 + static bool pvscsi_disable_msi; 108 + static bool pvscsi_disable_msix; 109 + static bool pvscsi_use_msg = true; 110 + 111 + #define PVSCSI_RW (S_IRUSR | S_IWUSR) 112 + 113 + module_param_named(ring_pages, pvscsi_ring_pages, int, PVSCSI_RW); 114 + MODULE_PARM_DESC(ring_pages, "Number of pages per req/cmp ring - (default=" 115 + __stringify(PVSCSI_DEFAULT_NUM_PAGES_PER_RING) ")"); 116 + 117 + module_param_named(msg_ring_pages, pvscsi_msg_ring_pages, int, PVSCSI_RW); 118 + MODULE_PARM_DESC(msg_ring_pages, "Number of pages for the msg ring - (default=" 119 + __stringify(PVSCSI_DEFAULT_NUM_PAGES_MSG_RING) ")"); 120 + 121 + module_param_named(cmd_per_lun, pvscsi_cmd_per_lun, int, PVSCSI_RW); 122 + MODULE_PARM_DESC(cmd_per_lun, "Maximum commands per lun - (default=" 123 + __stringify(PVSCSI_MAX_REQ_QUEUE_DEPTH) ")"); 124 + 125 + module_param_named(disable_msi, pvscsi_disable_msi, bool, PVSCSI_RW); 126 + MODULE_PARM_DESC(disable_msi, "Disable MSI use in driver - (default=0)"); 127 + 128 + module_param_named(disable_msix, pvscsi_disable_msix, bool, PVSCSI_RW); 129 + MODULE_PARM_DESC(disable_msix, "Disable MSI-X use in driver - (default=0)"); 130 + 131 + module_param_named(use_msg, pvscsi_use_msg, bool, PVSCSI_RW); 132 + MODULE_PARM_DESC(use_msg, "Use msg ring when available - (default=1)"); 133 + 134 + static const struct pci_device_id pvscsi_pci_tbl[] = { 135 + { PCI_VDEVICE(VMWARE, PCI_DEVICE_ID_VMWARE_PVSCSI) }, 136 + { 0 } 137 + }; 138 + 139 + MODULE_DEVICE_TABLE(pci, pvscsi_pci_tbl); 140 + 141 + static struct device * 142 + pvscsi_dev(const struct pvscsi_adapter *adapter) 143 + { 144 + return &(adapter->dev->dev); 145 + } 146 + 147 + static struct pvscsi_ctx * 148 + pvscsi_find_context(const struct pvscsi_adapter *adapter, struct scsi_cmnd *cmd) 149 + { 150 + struct pvscsi_ctx *ctx, *end; 151 + 152 + end = &adapter->cmd_map[adapter->req_depth]; 153 + for (ctx = adapter->cmd_map; ctx < end; ctx++) 154 + if (ctx->cmd == cmd) 155 + return ctx; 156 + 157 + return NULL; 158 + } 159 + 160 + static struct pvscsi_ctx * 161 + pvscsi_acquire_context(struct pvscsi_adapter *adapter, struct scsi_cmnd *cmd) 162 + { 163 + struct pvscsi_ctx *ctx; 164 + 165 + if (list_empty(&adapter->cmd_pool)) 166 + return NULL; 167 + 168 + ctx = list_first_entry(&adapter->cmd_pool, struct pvscsi_ctx, list); 169 + ctx->cmd = cmd; 170 + list_del(&ctx->list); 171 + 172 + return ctx; 173 + } 174 + 175 + static void pvscsi_release_context(struct pvscsi_adapter *adapter, 176 + struct pvscsi_ctx *ctx) 177 + { 178 + ctx->cmd = NULL; 179 + list_add(&ctx->list, &adapter->cmd_pool); 180 + } 181 + 182 + /* 183 + * Map a pvscsi_ctx struct to a context ID field value; we map to a simple 184 + * non-zero integer. ctx always points to an entry in cmd_map array, hence 185 + * the return value is always >=1. 186 + */ 187 + static u64 pvscsi_map_context(const struct pvscsi_adapter *adapter, 188 + const struct pvscsi_ctx *ctx) 189 + { 190 + return ctx - adapter->cmd_map + 1; 191 + } 192 + 193 + static struct pvscsi_ctx * 194 + pvscsi_get_context(const struct pvscsi_adapter *adapter, u64 context) 195 + { 196 + return &adapter->cmd_map[context - 1]; 197 + } 198 + 199 + static void pvscsi_reg_write(const struct pvscsi_adapter *adapter, 200 + u32 offset, u32 val) 201 + { 202 + writel(val, adapter->mmioBase + offset); 203 + } 204 + 205 + static u32 pvscsi_reg_read(const struct pvscsi_adapter *adapter, u32 offset) 206 + { 207 + return readl(adapter->mmioBase + offset); 208 + } 209 + 210 + static u32 pvscsi_read_intr_status(const struct pvscsi_adapter *adapter) 211 + { 212 + return pvscsi_reg_read(adapter, PVSCSI_REG_OFFSET_INTR_STATUS); 213 + } 214 + 215 + static void pvscsi_write_intr_status(const struct pvscsi_adapter *adapter, 216 + u32 val) 217 + { 218 + pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_INTR_STATUS, val); 219 + } 220 + 221 + static void pvscsi_unmask_intr(const struct pvscsi_adapter *adapter) 222 + { 223 + u32 intr_bits; 224 + 225 + intr_bits = PVSCSI_INTR_CMPL_MASK; 226 + if (adapter->use_msg) 227 + intr_bits |= PVSCSI_INTR_MSG_MASK; 228 + 229 + pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_INTR_MASK, intr_bits); 230 + } 231 + 232 + static void pvscsi_mask_intr(const struct pvscsi_adapter *adapter) 233 + { 234 + pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_INTR_MASK, 0); 235 + } 236 + 237 + static void pvscsi_write_cmd_desc(const struct pvscsi_adapter *adapter, 238 + u32 cmd, const void *desc, size_t len) 239 + { 240 + const u32 *ptr = desc; 241 + size_t i; 242 + 243 + len /= sizeof(*ptr); 244 + pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_COMMAND, cmd); 245 + for (i = 0; i < len; i++) 246 + pvscsi_reg_write(adapter, 247 + PVSCSI_REG_OFFSET_COMMAND_DATA, ptr[i]); 248 + } 249 + 250 + static void pvscsi_abort_cmd(const struct pvscsi_adapter *adapter, 251 + const struct pvscsi_ctx *ctx) 252 + { 253 + struct PVSCSICmdDescAbortCmd cmd = { 0 }; 254 + 255 + cmd.target = ctx->cmd->device->id; 256 + cmd.context = pvscsi_map_context(adapter, ctx); 257 + 258 + pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_ABORT_CMD, &cmd, sizeof(cmd)); 259 + } 260 + 261 + static void pvscsi_kick_rw_io(const struct pvscsi_adapter *adapter) 262 + { 263 + pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_KICK_RW_IO, 0); 264 + } 265 + 266 + static void pvscsi_process_request_ring(const struct pvscsi_adapter *adapter) 267 + { 268 + pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_KICK_NON_RW_IO, 0); 269 + } 270 + 271 + static int scsi_is_rw(unsigned char op) 272 + { 273 + return op == READ_6 || op == WRITE_6 || 274 + op == READ_10 || op == WRITE_10 || 275 + op == READ_12 || op == WRITE_12 || 276 + op == READ_16 || op == WRITE_16; 277 + } 278 + 279 + static void pvscsi_kick_io(const struct pvscsi_adapter *adapter, 280 + unsigned char op) 281 + { 282 + if (scsi_is_rw(op)) 283 + pvscsi_kick_rw_io(adapter); 284 + else 285 + pvscsi_process_request_ring(adapter); 286 + } 287 + 288 + static void ll_adapter_reset(const struct pvscsi_adapter *adapter) 289 + { 290 + dev_dbg(pvscsi_dev(adapter), "Adapter Reset on %p\n", adapter); 291 + 292 + pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_ADAPTER_RESET, NULL, 0); 293 + } 294 + 295 + static void ll_bus_reset(const struct pvscsi_adapter *adapter) 296 + { 297 + dev_dbg(pvscsi_dev(adapter), "Reseting bus on %p\n", adapter); 298 + 299 + pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_RESET_BUS, NULL, 0); 300 + } 301 + 302 + static void ll_device_reset(const struct pvscsi_adapter *adapter, u32 target) 303 + { 304 + struct PVSCSICmdDescResetDevice cmd = { 0 }; 305 + 306 + dev_dbg(pvscsi_dev(adapter), "Reseting device: target=%u\n", target); 307 + 308 + cmd.target = target; 309 + 310 + pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_RESET_DEVICE, 311 + &cmd, sizeof(cmd)); 312 + } 313 + 314 + static void pvscsi_create_sg(struct pvscsi_ctx *ctx, 315 + struct scatterlist *sg, unsigned count) 316 + { 317 + unsigned i; 318 + struct PVSCSISGElement *sge; 319 + 320 + BUG_ON(count > PVSCSI_MAX_NUM_SG_ENTRIES_PER_SEGMENT); 321 + 322 + sge = &ctx->sgl->sge[0]; 323 + for (i = 0; i < count; i++, sg++) { 324 + sge[i].addr = sg_dma_address(sg); 325 + sge[i].length = sg_dma_len(sg); 326 + sge[i].flags = 0; 327 + } 328 + } 329 + 330 + /* 331 + * Map all data buffers for a command into PCI space and 332 + * setup the scatter/gather list if needed. 333 + */ 334 + static void pvscsi_map_buffers(struct pvscsi_adapter *adapter, 335 + struct pvscsi_ctx *ctx, struct scsi_cmnd *cmd, 336 + struct PVSCSIRingReqDesc *e) 337 + { 338 + unsigned count; 339 + unsigned bufflen = scsi_bufflen(cmd); 340 + struct scatterlist *sg; 341 + 342 + e->dataLen = bufflen; 343 + e->dataAddr = 0; 344 + if (bufflen == 0) 345 + return; 346 + 347 + sg = scsi_sglist(cmd); 348 + count = scsi_sg_count(cmd); 349 + if (count != 0) { 350 + int segs = scsi_dma_map(cmd); 351 + if (segs > 1) { 352 + pvscsi_create_sg(ctx, sg, segs); 353 + 354 + e->flags |= PVSCSI_FLAG_CMD_WITH_SG_LIST; 355 + ctx->sglPA = pci_map_single(adapter->dev, ctx->sgl, 356 + SGL_SIZE, PCI_DMA_TODEVICE); 357 + e->dataAddr = ctx->sglPA; 358 + } else 359 + e->dataAddr = sg_dma_address(sg); 360 + } else { 361 + /* 362 + * In case there is no S/G list, scsi_sglist points 363 + * directly to the buffer. 364 + */ 365 + ctx->dataPA = pci_map_single(adapter->dev, sg, bufflen, 366 + cmd->sc_data_direction); 367 + e->dataAddr = ctx->dataPA; 368 + } 369 + } 370 + 371 + static void pvscsi_unmap_buffers(const struct pvscsi_adapter *adapter, 372 + struct pvscsi_ctx *ctx) 373 + { 374 + struct scsi_cmnd *cmd; 375 + unsigned bufflen; 376 + 377 + cmd = ctx->cmd; 378 + bufflen = scsi_bufflen(cmd); 379 + 380 + if (bufflen != 0) { 381 + unsigned count = scsi_sg_count(cmd); 382 + 383 + if (count != 0) { 384 + scsi_dma_unmap(cmd); 385 + if (ctx->sglPA) { 386 + pci_unmap_single(adapter->dev, ctx->sglPA, 387 + SGL_SIZE, PCI_DMA_TODEVICE); 388 + ctx->sglPA = 0; 389 + } 390 + } else 391 + pci_unmap_single(adapter->dev, ctx->dataPA, bufflen, 392 + cmd->sc_data_direction); 393 + } 394 + if (cmd->sense_buffer) 395 + pci_unmap_single(adapter->dev, ctx->sensePA, 396 + SCSI_SENSE_BUFFERSIZE, PCI_DMA_FROMDEVICE); 397 + } 398 + 399 + static int __devinit pvscsi_allocate_rings(struct pvscsi_adapter *adapter) 400 + { 401 + adapter->rings_state = pci_alloc_consistent(adapter->dev, PAGE_SIZE, 402 + &adapter->ringStatePA); 403 + if (!adapter->rings_state) 404 + return -ENOMEM; 405 + 406 + adapter->req_pages = min(PVSCSI_MAX_NUM_PAGES_REQ_RING, 407 + pvscsi_ring_pages); 408 + adapter->req_depth = adapter->req_pages 409 + * PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE; 410 + adapter->req_ring = pci_alloc_consistent(adapter->dev, 411 + adapter->req_pages * PAGE_SIZE, 412 + &adapter->reqRingPA); 413 + if (!adapter->req_ring) 414 + return -ENOMEM; 415 + 416 + adapter->cmp_pages = min(PVSCSI_MAX_NUM_PAGES_CMP_RING, 417 + pvscsi_ring_pages); 418 + adapter->cmp_ring = pci_alloc_consistent(adapter->dev, 419 + adapter->cmp_pages * PAGE_SIZE, 420 + &adapter->cmpRingPA); 421 + if (!adapter->cmp_ring) 422 + return -ENOMEM; 423 + 424 + BUG_ON(!IS_ALIGNED(adapter->ringStatePA, PAGE_SIZE)); 425 + BUG_ON(!IS_ALIGNED(adapter->reqRingPA, PAGE_SIZE)); 426 + BUG_ON(!IS_ALIGNED(adapter->cmpRingPA, PAGE_SIZE)); 427 + 428 + if (!adapter->use_msg) 429 + return 0; 430 + 431 + adapter->msg_pages = min(PVSCSI_MAX_NUM_PAGES_MSG_RING, 432 + pvscsi_msg_ring_pages); 433 + adapter->msg_ring = pci_alloc_consistent(adapter->dev, 434 + adapter->msg_pages * PAGE_SIZE, 435 + &adapter->msgRingPA); 436 + if (!adapter->msg_ring) 437 + return -ENOMEM; 438 + BUG_ON(!IS_ALIGNED(adapter->msgRingPA, PAGE_SIZE)); 439 + 440 + return 0; 441 + } 442 + 443 + static void pvscsi_setup_all_rings(const struct pvscsi_adapter *adapter) 444 + { 445 + struct PVSCSICmdDescSetupRings cmd = { 0 }; 446 + dma_addr_t base; 447 + unsigned i; 448 + 449 + cmd.ringsStatePPN = adapter->ringStatePA >> PAGE_SHIFT; 450 + cmd.reqRingNumPages = adapter->req_pages; 451 + cmd.cmpRingNumPages = adapter->cmp_pages; 452 + 453 + base = adapter->reqRingPA; 454 + for (i = 0; i < adapter->req_pages; i++) { 455 + cmd.reqRingPPNs[i] = base >> PAGE_SHIFT; 456 + base += PAGE_SIZE; 457 + } 458 + 459 + base = adapter->cmpRingPA; 460 + for (i = 0; i < adapter->cmp_pages; i++) { 461 + cmd.cmpRingPPNs[i] = base >> PAGE_SHIFT; 462 + base += PAGE_SIZE; 463 + } 464 + 465 + memset(adapter->rings_state, 0, PAGE_SIZE); 466 + memset(adapter->req_ring, 0, adapter->req_pages * PAGE_SIZE); 467 + memset(adapter->cmp_ring, 0, adapter->cmp_pages * PAGE_SIZE); 468 + 469 + pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_SETUP_RINGS, 470 + &cmd, sizeof(cmd)); 471 + 472 + if (adapter->use_msg) { 473 + struct PVSCSICmdDescSetupMsgRing cmd_msg = { 0 }; 474 + 475 + cmd_msg.numPages = adapter->msg_pages; 476 + 477 + base = adapter->msgRingPA; 478 + for (i = 0; i < adapter->msg_pages; i++) { 479 + cmd_msg.ringPPNs[i] = base >> PAGE_SHIFT; 480 + base += PAGE_SIZE; 481 + } 482 + memset(adapter->msg_ring, 0, adapter->msg_pages * PAGE_SIZE); 483 + 484 + pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_SETUP_MSG_RING, 485 + &cmd_msg, sizeof(cmd_msg)); 486 + } 487 + } 488 + 489 + /* 490 + * Pull a completion descriptor off and pass the completion back 491 + * to the SCSI mid layer. 492 + */ 493 + static void pvscsi_complete_request(struct pvscsi_adapter *adapter, 494 + const struct PVSCSIRingCmpDesc *e) 495 + { 496 + struct pvscsi_ctx *ctx; 497 + struct scsi_cmnd *cmd; 498 + u32 btstat = e->hostStatus; 499 + u32 sdstat = e->scsiStatus; 500 + 501 + ctx = pvscsi_get_context(adapter, e->context); 502 + cmd = ctx->cmd; 503 + pvscsi_unmap_buffers(adapter, ctx); 504 + pvscsi_release_context(adapter, ctx); 505 + cmd->result = 0; 506 + 507 + if (sdstat != SAM_STAT_GOOD && 508 + (btstat == BTSTAT_SUCCESS || 509 + btstat == BTSTAT_LINKED_COMMAND_COMPLETED || 510 + btstat == BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG)) { 511 + cmd->result = (DID_OK << 16) | sdstat; 512 + if (sdstat == SAM_STAT_CHECK_CONDITION && cmd->sense_buffer) 513 + cmd->result |= (DRIVER_SENSE << 24); 514 + } else 515 + switch (btstat) { 516 + case BTSTAT_SUCCESS: 517 + case BTSTAT_LINKED_COMMAND_COMPLETED: 518 + case BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG: 519 + /* If everything went fine, let's move on.. */ 520 + cmd->result = (DID_OK << 16); 521 + break; 522 + 523 + case BTSTAT_DATARUN: 524 + case BTSTAT_DATA_UNDERRUN: 525 + /* Report residual data in underruns */ 526 + scsi_set_resid(cmd, scsi_bufflen(cmd) - e->dataLen); 527 + cmd->result = (DID_ERROR << 16); 528 + break; 529 + 530 + case BTSTAT_SELTIMEO: 531 + /* Our emulation returns this for non-connected devs */ 532 + cmd->result = (DID_BAD_TARGET << 16); 533 + break; 534 + 535 + case BTSTAT_LUNMISMATCH: 536 + case BTSTAT_TAGREJECT: 537 + case BTSTAT_BADMSG: 538 + cmd->result = (DRIVER_INVALID << 24); 539 + /* fall through */ 540 + 541 + case BTSTAT_HAHARDWARE: 542 + case BTSTAT_INVPHASE: 543 + case BTSTAT_HATIMEOUT: 544 + case BTSTAT_NORESPONSE: 545 + case BTSTAT_DISCONNECT: 546 + case BTSTAT_HASOFTWARE: 547 + case BTSTAT_BUSFREE: 548 + case BTSTAT_SENSFAILED: 549 + cmd->result |= (DID_ERROR << 16); 550 + break; 551 + 552 + case BTSTAT_SENTRST: 553 + case BTSTAT_RECVRST: 554 + case BTSTAT_BUSRESET: 555 + cmd->result = (DID_RESET << 16); 556 + break; 557 + 558 + case BTSTAT_ABORTQUEUE: 559 + cmd->result = (DID_ABORT << 16); 560 + break; 561 + 562 + case BTSTAT_SCSIPARITY: 563 + cmd->result = (DID_PARITY << 16); 564 + break; 565 + 566 + default: 567 + cmd->result = (DID_ERROR << 16); 568 + scmd_printk(KERN_DEBUG, cmd, 569 + "Unknown completion status: 0x%x\n", 570 + btstat); 571 + } 572 + 573 + dev_dbg(&cmd->device->sdev_gendev, 574 + "cmd=%p %x ctx=%p result=0x%x status=0x%x,%x\n", 575 + cmd, cmd->cmnd[0], ctx, cmd->result, btstat, sdstat); 576 + 577 + cmd->scsi_done(cmd); 578 + } 579 + 580 + /* 581 + * barrier usage : Since the PVSCSI device is emulated, there could be cases 582 + * where we may want to serialize some accesses between the driver and the 583 + * emulation layer. We use compiler barriers instead of the more expensive 584 + * memory barriers because PVSCSI is only supported on X86 which has strong 585 + * memory access ordering. 586 + */ 587 + static void pvscsi_process_completion_ring(struct pvscsi_adapter *adapter) 588 + { 589 + struct PVSCSIRingsState *s = adapter->rings_state; 590 + struct PVSCSIRingCmpDesc *ring = adapter->cmp_ring; 591 + u32 cmp_entries = s->cmpNumEntriesLog2; 592 + 593 + while (s->cmpConsIdx != s->cmpProdIdx) { 594 + struct PVSCSIRingCmpDesc *e = ring + (s->cmpConsIdx & 595 + MASK(cmp_entries)); 596 + /* 597 + * This barrier() ensures that *e is not dereferenced while 598 + * the device emulation still writes data into the slot. 599 + * Since the device emulation advances s->cmpProdIdx only after 600 + * updating the slot we want to check it first. 601 + */ 602 + barrier(); 603 + pvscsi_complete_request(adapter, e); 604 + /* 605 + * This barrier() ensures that compiler doesn't reorder write 606 + * to s->cmpConsIdx before the read of (*e) inside 607 + * pvscsi_complete_request. Otherwise, device emulation may 608 + * overwrite *e before we had a chance to read it. 609 + */ 610 + barrier(); 611 + s->cmpConsIdx++; 612 + } 613 + } 614 + 615 + /* 616 + * Translate a Linux SCSI request into a request ring entry. 617 + */ 618 + static int pvscsi_queue_ring(struct pvscsi_adapter *adapter, 619 + struct pvscsi_ctx *ctx, struct scsi_cmnd *cmd) 620 + { 621 + struct PVSCSIRingsState *s; 622 + struct PVSCSIRingReqDesc *e; 623 + struct scsi_device *sdev; 624 + u32 req_entries; 625 + 626 + s = adapter->rings_state; 627 + sdev = cmd->device; 628 + req_entries = s->reqNumEntriesLog2; 629 + 630 + /* 631 + * If this condition holds, we might have room on the request ring, but 632 + * we might not have room on the completion ring for the response. 633 + * However, we have already ruled out this possibility - we would not 634 + * have successfully allocated a context if it were true, since we only 635 + * have one context per request entry. Check for it anyway, since it 636 + * would be a serious bug. 637 + */ 638 + if (s->reqProdIdx - s->cmpConsIdx >= 1 << req_entries) { 639 + scmd_printk(KERN_ERR, cmd, "vmw_pvscsi: " 640 + "ring full: reqProdIdx=%d cmpConsIdx=%d\n", 641 + s->reqProdIdx, s->cmpConsIdx); 642 + return -1; 643 + } 644 + 645 + e = adapter->req_ring + (s->reqProdIdx & MASK(req_entries)); 646 + 647 + e->bus = sdev->channel; 648 + e->target = sdev->id; 649 + memset(e->lun, 0, sizeof(e->lun)); 650 + e->lun[1] = sdev->lun; 651 + 652 + if (cmd->sense_buffer) { 653 + ctx->sensePA = pci_map_single(adapter->dev, cmd->sense_buffer, 654 + SCSI_SENSE_BUFFERSIZE, 655 + PCI_DMA_FROMDEVICE); 656 + e->senseAddr = ctx->sensePA; 657 + e->senseLen = SCSI_SENSE_BUFFERSIZE; 658 + } else { 659 + e->senseLen = 0; 660 + e->senseAddr = 0; 661 + } 662 + e->cdbLen = cmd->cmd_len; 663 + e->vcpuHint = smp_processor_id(); 664 + memcpy(e->cdb, cmd->cmnd, e->cdbLen); 665 + 666 + e->tag = SIMPLE_QUEUE_TAG; 667 + if (sdev->tagged_supported && 668 + (cmd->tag == HEAD_OF_QUEUE_TAG || 669 + cmd->tag == ORDERED_QUEUE_TAG)) 670 + e->tag = cmd->tag; 671 + 672 + if (cmd->sc_data_direction == DMA_FROM_DEVICE) 673 + e->flags = PVSCSI_FLAG_CMD_DIR_TOHOST; 674 + else if (cmd->sc_data_direction == DMA_TO_DEVICE) 675 + e->flags = PVSCSI_FLAG_CMD_DIR_TODEVICE; 676 + else if (cmd->sc_data_direction == DMA_NONE) 677 + e->flags = PVSCSI_FLAG_CMD_DIR_NONE; 678 + else 679 + e->flags = 0; 680 + 681 + pvscsi_map_buffers(adapter, ctx, cmd, e); 682 + 683 + e->context = pvscsi_map_context(adapter, ctx); 684 + 685 + barrier(); 686 + 687 + s->reqProdIdx++; 688 + 689 + return 0; 690 + } 691 + 692 + static int pvscsi_queue(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *)) 693 + { 694 + struct Scsi_Host *host = cmd->device->host; 695 + struct pvscsi_adapter *adapter = shost_priv(host); 696 + struct pvscsi_ctx *ctx; 697 + unsigned long flags; 698 + 699 + spin_lock_irqsave(&adapter->hw_lock, flags); 700 + 701 + ctx = pvscsi_acquire_context(adapter, cmd); 702 + if (!ctx || pvscsi_queue_ring(adapter, ctx, cmd) != 0) { 703 + if (ctx) 704 + pvscsi_release_context(adapter, ctx); 705 + spin_unlock_irqrestore(&adapter->hw_lock, flags); 706 + return SCSI_MLQUEUE_HOST_BUSY; 707 + } 708 + 709 + cmd->scsi_done = done; 710 + 711 + dev_dbg(&cmd->device->sdev_gendev, 712 + "queued cmd %p, ctx %p, op=%x\n", cmd, ctx, cmd->cmnd[0]); 713 + 714 + spin_unlock_irqrestore(&adapter->hw_lock, flags); 715 + 716 + pvscsi_kick_io(adapter, cmd->cmnd[0]); 717 + 718 + return 0; 719 + } 720 + 721 + static int pvscsi_abort(struct scsi_cmnd *cmd) 722 + { 723 + struct pvscsi_adapter *adapter = shost_priv(cmd->device->host); 724 + struct pvscsi_ctx *ctx; 725 + unsigned long flags; 726 + 727 + scmd_printk(KERN_DEBUG, cmd, "task abort on host %u, %p\n", 728 + adapter->host->host_no, cmd); 729 + 730 + spin_lock_irqsave(&adapter->hw_lock, flags); 731 + 732 + /* 733 + * Poll the completion ring first - we might be trying to abort 734 + * a command that is waiting to be dispatched in the completion ring. 735 + */ 736 + pvscsi_process_completion_ring(adapter); 737 + 738 + /* 739 + * If there is no context for the command, it either already succeeded 740 + * or else was never properly issued. Not our problem. 741 + */ 742 + ctx = pvscsi_find_context(adapter, cmd); 743 + if (!ctx) { 744 + scmd_printk(KERN_DEBUG, cmd, "Failed to abort cmd %p\n", cmd); 745 + goto out; 746 + } 747 + 748 + pvscsi_abort_cmd(adapter, ctx); 749 + 750 + pvscsi_process_completion_ring(adapter); 751 + 752 + out: 753 + spin_unlock_irqrestore(&adapter->hw_lock, flags); 754 + return SUCCESS; 755 + } 756 + 757 + /* 758 + * Abort all outstanding requests. This is only safe to use if the completion 759 + * ring will never be walked again or the device has been reset, because it 760 + * destroys the 1-1 mapping between context field passed to emulation and our 761 + * request structure. 762 + */ 763 + static void pvscsi_reset_all(struct pvscsi_adapter *adapter) 764 + { 765 + unsigned i; 766 + 767 + for (i = 0; i < adapter->req_depth; i++) { 768 + struct pvscsi_ctx *ctx = &adapter->cmd_map[i]; 769 + struct scsi_cmnd *cmd = ctx->cmd; 770 + if (cmd) { 771 + scmd_printk(KERN_ERR, cmd, 772 + "Forced reset on cmd %p\n", cmd); 773 + pvscsi_unmap_buffers(adapter, ctx); 774 + pvscsi_release_context(adapter, ctx); 775 + cmd->result = (DID_RESET << 16); 776 + cmd->scsi_done(cmd); 777 + } 778 + } 779 + } 780 + 781 + static int pvscsi_host_reset(struct scsi_cmnd *cmd) 782 + { 783 + struct Scsi_Host *host = cmd->device->host; 784 + struct pvscsi_adapter *adapter = shost_priv(host); 785 + unsigned long flags; 786 + bool use_msg; 787 + 788 + scmd_printk(KERN_INFO, cmd, "SCSI Host reset\n"); 789 + 790 + spin_lock_irqsave(&adapter->hw_lock, flags); 791 + 792 + use_msg = adapter->use_msg; 793 + 794 + if (use_msg) { 795 + adapter->use_msg = 0; 796 + spin_unlock_irqrestore(&adapter->hw_lock, flags); 797 + 798 + /* 799 + * Now that we know that the ISR won't add more work on the 800 + * workqueue we can safely flush any outstanding work. 801 + */ 802 + flush_workqueue(adapter->workqueue); 803 + spin_lock_irqsave(&adapter->hw_lock, flags); 804 + } 805 + 806 + /* 807 + * We're going to tear down the entire ring structure and set it back 808 + * up, so stalling new requests until all completions are flushed and 809 + * the rings are back in place. 810 + */ 811 + 812 + pvscsi_process_request_ring(adapter); 813 + 814 + ll_adapter_reset(adapter); 815 + 816 + /* 817 + * Now process any completions. Note we do this AFTER adapter reset, 818 + * which is strange, but stops races where completions get posted 819 + * between processing the ring and issuing the reset. The backend will 820 + * not touch the ring memory after reset, so the immediately pre-reset 821 + * completion ring state is still valid. 822 + */ 823 + pvscsi_process_completion_ring(adapter); 824 + 825 + pvscsi_reset_all(adapter); 826 + adapter->use_msg = use_msg; 827 + pvscsi_setup_all_rings(adapter); 828 + pvscsi_unmask_intr(adapter); 829 + 830 + spin_unlock_irqrestore(&adapter->hw_lock, flags); 831 + 832 + return SUCCESS; 833 + } 834 + 835 + static int pvscsi_bus_reset(struct scsi_cmnd *cmd) 836 + { 837 + struct Scsi_Host *host = cmd->device->host; 838 + struct pvscsi_adapter *adapter = shost_priv(host); 839 + unsigned long flags; 840 + 841 + scmd_printk(KERN_INFO, cmd, "SCSI Bus reset\n"); 842 + 843 + /* 844 + * We don't want to queue new requests for this bus after 845 + * flushing all pending requests to emulation, since new 846 + * requests could then sneak in during this bus reset phase, 847 + * so take the lock now. 848 + */ 849 + spin_lock_irqsave(&adapter->hw_lock, flags); 850 + 851 + pvscsi_process_request_ring(adapter); 852 + ll_bus_reset(adapter); 853 + pvscsi_process_completion_ring(adapter); 854 + 855 + spin_unlock_irqrestore(&adapter->hw_lock, flags); 856 + 857 + return SUCCESS; 858 + } 859 + 860 + static int pvscsi_device_reset(struct scsi_cmnd *cmd) 861 + { 862 + struct Scsi_Host *host = cmd->device->host; 863 + struct pvscsi_adapter *adapter = shost_priv(host); 864 + unsigned long flags; 865 + 866 + scmd_printk(KERN_INFO, cmd, "SCSI device reset on scsi%u:%u\n", 867 + host->host_no, cmd->device->id); 868 + 869 + /* 870 + * We don't want to queue new requests for this device after flushing 871 + * all pending requests to emulation, since new requests could then 872 + * sneak in during this device reset phase, so take the lock now. 873 + */ 874 + spin_lock_irqsave(&adapter->hw_lock, flags); 875 + 876 + pvscsi_process_request_ring(adapter); 877 + ll_device_reset(adapter, cmd->device->id); 878 + pvscsi_process_completion_ring(adapter); 879 + 880 + spin_unlock_irqrestore(&adapter->hw_lock, flags); 881 + 882 + return SUCCESS; 883 + } 884 + 885 + static struct scsi_host_template pvscsi_template; 886 + 887 + static const char *pvscsi_info(struct Scsi_Host *host) 888 + { 889 + struct pvscsi_adapter *adapter = shost_priv(host); 890 + static char buf[256]; 891 + 892 + sprintf(buf, "VMware PVSCSI storage adapter rev %d, req/cmp/msg rings: " 893 + "%u/%u/%u pages, cmd_per_lun=%u", adapter->rev, 894 + adapter->req_pages, adapter->cmp_pages, adapter->msg_pages, 895 + pvscsi_template.cmd_per_lun); 896 + 897 + return buf; 898 + } 899 + 900 + static struct scsi_host_template pvscsi_template = { 901 + .module = THIS_MODULE, 902 + .name = "VMware PVSCSI Host Adapter", 903 + .proc_name = "vmw_pvscsi", 904 + .info = pvscsi_info, 905 + .queuecommand = pvscsi_queue, 906 + .this_id = -1, 907 + .sg_tablesize = PVSCSI_MAX_NUM_SG_ENTRIES_PER_SEGMENT, 908 + .dma_boundary = UINT_MAX, 909 + .max_sectors = 0xffff, 910 + .use_clustering = ENABLE_CLUSTERING, 911 + .eh_abort_handler = pvscsi_abort, 912 + .eh_device_reset_handler = pvscsi_device_reset, 913 + .eh_bus_reset_handler = pvscsi_bus_reset, 914 + .eh_host_reset_handler = pvscsi_host_reset, 915 + }; 916 + 917 + static void pvscsi_process_msg(const struct pvscsi_adapter *adapter, 918 + const struct PVSCSIRingMsgDesc *e) 919 + { 920 + struct PVSCSIRingsState *s = adapter->rings_state; 921 + struct Scsi_Host *host = adapter->host; 922 + struct scsi_device *sdev; 923 + 924 + printk(KERN_INFO "vmw_pvscsi: msg type: 0x%x - MSG RING: %u/%u (%u) \n", 925 + e->type, s->msgProdIdx, s->msgConsIdx, s->msgNumEntriesLog2); 926 + 927 + BUILD_BUG_ON(PVSCSI_MSG_LAST != 2); 928 + 929 + if (e->type == PVSCSI_MSG_DEV_ADDED) { 930 + struct PVSCSIMsgDescDevStatusChanged *desc; 931 + desc = (struct PVSCSIMsgDescDevStatusChanged *)e; 932 + 933 + printk(KERN_INFO 934 + "vmw_pvscsi: msg: device added at scsi%u:%u:%u\n", 935 + desc->bus, desc->target, desc->lun[1]); 936 + 937 + if (!scsi_host_get(host)) 938 + return; 939 + 940 + sdev = scsi_device_lookup(host, desc->bus, desc->target, 941 + desc->lun[1]); 942 + if (sdev) { 943 + printk(KERN_INFO "vmw_pvscsi: device already exists\n"); 944 + scsi_device_put(sdev); 945 + } else 946 + scsi_add_device(adapter->host, desc->bus, 947 + desc->target, desc->lun[1]); 948 + 949 + scsi_host_put(host); 950 + } else if (e->type == PVSCSI_MSG_DEV_REMOVED) { 951 + struct PVSCSIMsgDescDevStatusChanged *desc; 952 + desc = (struct PVSCSIMsgDescDevStatusChanged *)e; 953 + 954 + printk(KERN_INFO 955 + "vmw_pvscsi: msg: device removed at scsi%u:%u:%u\n", 956 + desc->bus, desc->target, desc->lun[1]); 957 + 958 + if (!scsi_host_get(host)) 959 + return; 960 + 961 + sdev = scsi_device_lookup(host, desc->bus, desc->target, 962 + desc->lun[1]); 963 + if (sdev) { 964 + scsi_remove_device(sdev); 965 + scsi_device_put(sdev); 966 + } else 967 + printk(KERN_INFO 968 + "vmw_pvscsi: failed to lookup scsi%u:%u:%u\n", 969 + desc->bus, desc->target, desc->lun[1]); 970 + 971 + scsi_host_put(host); 972 + } 973 + } 974 + 975 + static int pvscsi_msg_pending(const struct pvscsi_adapter *adapter) 976 + { 977 + struct PVSCSIRingsState *s = adapter->rings_state; 978 + 979 + return s->msgProdIdx != s->msgConsIdx; 980 + } 981 + 982 + static void pvscsi_process_msg_ring(const struct pvscsi_adapter *adapter) 983 + { 984 + struct PVSCSIRingsState *s = adapter->rings_state; 985 + struct PVSCSIRingMsgDesc *ring = adapter->msg_ring; 986 + u32 msg_entries = s->msgNumEntriesLog2; 987 + 988 + while (pvscsi_msg_pending(adapter)) { 989 + struct PVSCSIRingMsgDesc *e = ring + (s->msgConsIdx & 990 + MASK(msg_entries)); 991 + 992 + barrier(); 993 + pvscsi_process_msg(adapter, e); 994 + barrier(); 995 + s->msgConsIdx++; 996 + } 997 + } 998 + 999 + static void pvscsi_msg_workqueue_handler(struct work_struct *data) 1000 + { 1001 + struct pvscsi_adapter *adapter; 1002 + 1003 + adapter = container_of(data, struct pvscsi_adapter, work); 1004 + 1005 + pvscsi_process_msg_ring(adapter); 1006 + } 1007 + 1008 + static int pvscsi_setup_msg_workqueue(struct pvscsi_adapter *adapter) 1009 + { 1010 + char name[32]; 1011 + 1012 + if (!pvscsi_use_msg) 1013 + return 0; 1014 + 1015 + pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_COMMAND, 1016 + PVSCSI_CMD_SETUP_MSG_RING); 1017 + 1018 + if (pvscsi_reg_read(adapter, PVSCSI_REG_OFFSET_COMMAND_STATUS) == -1) 1019 + return 0; 1020 + 1021 + snprintf(name, sizeof(name), 1022 + "vmw_pvscsi_wq_%u", adapter->host->host_no); 1023 + 1024 + adapter->workqueue = create_singlethread_workqueue(name); 1025 + if (!adapter->workqueue) { 1026 + printk(KERN_ERR "vmw_pvscsi: failed to create work queue\n"); 1027 + return 0; 1028 + } 1029 + INIT_WORK(&adapter->work, pvscsi_msg_workqueue_handler); 1030 + 1031 + return 1; 1032 + } 1033 + 1034 + static irqreturn_t pvscsi_isr(int irq, void *devp) 1035 + { 1036 + struct pvscsi_adapter *adapter = devp; 1037 + int handled; 1038 + 1039 + if (adapter->use_msi || adapter->use_msix) 1040 + handled = true; 1041 + else { 1042 + u32 val = pvscsi_read_intr_status(adapter); 1043 + handled = (val & PVSCSI_INTR_ALL_SUPPORTED) != 0; 1044 + if (handled) 1045 + pvscsi_write_intr_status(devp, val); 1046 + } 1047 + 1048 + if (handled) { 1049 + unsigned long flags; 1050 + 1051 + spin_lock_irqsave(&adapter->hw_lock, flags); 1052 + 1053 + pvscsi_process_completion_ring(adapter); 1054 + if (adapter->use_msg && pvscsi_msg_pending(adapter)) 1055 + queue_work(adapter->workqueue, &adapter->work); 1056 + 1057 + spin_unlock_irqrestore(&adapter->hw_lock, flags); 1058 + } 1059 + 1060 + return IRQ_RETVAL(handled); 1061 + } 1062 + 1063 + static void pvscsi_free_sgls(const struct pvscsi_adapter *adapter) 1064 + { 1065 + struct pvscsi_ctx *ctx = adapter->cmd_map; 1066 + unsigned i; 1067 + 1068 + for (i = 0; i < adapter->req_depth; ++i, ++ctx) 1069 + free_pages((unsigned long)ctx->sgl, get_order(SGL_SIZE)); 1070 + } 1071 + 1072 + static int pvscsi_setup_msix(const struct pvscsi_adapter *adapter, int *irq) 1073 + { 1074 + struct msix_entry entry = { 0, PVSCSI_VECTOR_COMPLETION }; 1075 + int ret; 1076 + 1077 + ret = pci_enable_msix(adapter->dev, &entry, 1); 1078 + if (ret) 1079 + return ret; 1080 + 1081 + *irq = entry.vector; 1082 + 1083 + return 0; 1084 + } 1085 + 1086 + static void pvscsi_shutdown_intr(struct pvscsi_adapter *adapter) 1087 + { 1088 + if (adapter->irq) { 1089 + free_irq(adapter->irq, adapter); 1090 + adapter->irq = 0; 1091 + } 1092 + if (adapter->use_msi) { 1093 + pci_disable_msi(adapter->dev); 1094 + adapter->use_msi = 0; 1095 + } else if (adapter->use_msix) { 1096 + pci_disable_msix(adapter->dev); 1097 + adapter->use_msix = 0; 1098 + } 1099 + } 1100 + 1101 + static void pvscsi_release_resources(struct pvscsi_adapter *adapter) 1102 + { 1103 + pvscsi_shutdown_intr(adapter); 1104 + 1105 + if (adapter->workqueue) 1106 + destroy_workqueue(adapter->workqueue); 1107 + 1108 + if (adapter->mmioBase) 1109 + pci_iounmap(adapter->dev, adapter->mmioBase); 1110 + 1111 + pci_release_regions(adapter->dev); 1112 + 1113 + if (adapter->cmd_map) { 1114 + pvscsi_free_sgls(adapter); 1115 + kfree(adapter->cmd_map); 1116 + } 1117 + 1118 + if (adapter->rings_state) 1119 + pci_free_consistent(adapter->dev, PAGE_SIZE, 1120 + adapter->rings_state, adapter->ringStatePA); 1121 + 1122 + if (adapter->req_ring) 1123 + pci_free_consistent(adapter->dev, 1124 + adapter->req_pages * PAGE_SIZE, 1125 + adapter->req_ring, adapter->reqRingPA); 1126 + 1127 + if (adapter->cmp_ring) 1128 + pci_free_consistent(adapter->dev, 1129 + adapter->cmp_pages * PAGE_SIZE, 1130 + adapter->cmp_ring, adapter->cmpRingPA); 1131 + 1132 + if (adapter->msg_ring) 1133 + pci_free_consistent(adapter->dev, 1134 + adapter->msg_pages * PAGE_SIZE, 1135 + adapter->msg_ring, adapter->msgRingPA); 1136 + } 1137 + 1138 + /* 1139 + * Allocate scatter gather lists. 1140 + * 1141 + * These are statically allocated. Trying to be clever was not worth it. 1142 + * 1143 + * Dynamic allocation can fail, and we can't go deeep into the memory 1144 + * allocator, since we're a SCSI driver, and trying too hard to allocate 1145 + * memory might generate disk I/O. We also don't want to fail disk I/O 1146 + * in that case because we can't get an allocation - the I/O could be 1147 + * trying to swap out data to free memory. Since that is pathological, 1148 + * just use a statically allocated scatter list. 1149 + * 1150 + */ 1151 + static int __devinit pvscsi_allocate_sg(struct pvscsi_adapter *adapter) 1152 + { 1153 + struct pvscsi_ctx *ctx; 1154 + int i; 1155 + 1156 + ctx = adapter->cmd_map; 1157 + BUILD_BUG_ON(sizeof(struct pvscsi_sg_list) > SGL_SIZE); 1158 + 1159 + for (i = 0; i < adapter->req_depth; ++i, ++ctx) { 1160 + ctx->sgl = (void *)__get_free_pages(GFP_KERNEL, 1161 + get_order(SGL_SIZE)); 1162 + ctx->sglPA = 0; 1163 + BUG_ON(!IS_ALIGNED(((unsigned long)ctx->sgl), PAGE_SIZE)); 1164 + if (!ctx->sgl) { 1165 + for (; i >= 0; --i, --ctx) { 1166 + free_pages((unsigned long)ctx->sgl, 1167 + get_order(SGL_SIZE)); 1168 + ctx->sgl = NULL; 1169 + } 1170 + return -ENOMEM; 1171 + } 1172 + } 1173 + 1174 + return 0; 1175 + } 1176 + 1177 + static int __devinit pvscsi_probe(struct pci_dev *pdev, 1178 + const struct pci_device_id *id) 1179 + { 1180 + struct pvscsi_adapter *adapter; 1181 + struct Scsi_Host *host; 1182 + unsigned int i; 1183 + unsigned long flags = 0; 1184 + int error; 1185 + 1186 + error = -ENODEV; 1187 + 1188 + if (pci_enable_device(pdev)) 1189 + return error; 1190 + 1191 + if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0 && 1192 + pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) { 1193 + printk(KERN_INFO "vmw_pvscsi: using 64bit dma\n"); 1194 + } else if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) == 0 && 1195 + pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)) == 0) { 1196 + printk(KERN_INFO "vmw_pvscsi: using 32bit dma\n"); 1197 + } else { 1198 + printk(KERN_ERR "vmw_pvscsi: failed to set DMA mask\n"); 1199 + goto out_disable_device; 1200 + } 1201 + 1202 + pvscsi_template.can_queue = 1203 + min(PVSCSI_MAX_NUM_PAGES_REQ_RING, pvscsi_ring_pages) * 1204 + PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE; 1205 + pvscsi_template.cmd_per_lun = 1206 + min(pvscsi_template.can_queue, pvscsi_cmd_per_lun); 1207 + host = scsi_host_alloc(&pvscsi_template, sizeof(struct pvscsi_adapter)); 1208 + if (!host) { 1209 + printk(KERN_ERR "vmw_pvscsi: failed to allocate host\n"); 1210 + goto out_disable_device; 1211 + } 1212 + 1213 + adapter = shost_priv(host); 1214 + memset(adapter, 0, sizeof(*adapter)); 1215 + adapter->dev = pdev; 1216 + adapter->host = host; 1217 + 1218 + spin_lock_init(&adapter->hw_lock); 1219 + 1220 + host->max_channel = 0; 1221 + host->max_id = 16; 1222 + host->max_lun = 1; 1223 + host->max_cmd_len = 16; 1224 + 1225 + adapter->rev = pdev->revision; 1226 + 1227 + if (pci_request_regions(pdev, "vmw_pvscsi")) { 1228 + printk(KERN_ERR "vmw_pvscsi: pci memory selection failed\n"); 1229 + goto out_free_host; 1230 + } 1231 + 1232 + for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { 1233 + if ((pci_resource_flags(pdev, i) & PCI_BASE_ADDRESS_SPACE_IO)) 1234 + continue; 1235 + 1236 + if (pci_resource_len(pdev, i) < PVSCSI_MEM_SPACE_SIZE) 1237 + continue; 1238 + 1239 + break; 1240 + } 1241 + 1242 + if (i == DEVICE_COUNT_RESOURCE) { 1243 + printk(KERN_ERR 1244 + "vmw_pvscsi: adapter has no suitable MMIO region\n"); 1245 + goto out_release_resources; 1246 + } 1247 + 1248 + adapter->mmioBase = pci_iomap(pdev, i, PVSCSI_MEM_SPACE_SIZE); 1249 + 1250 + if (!adapter->mmioBase) { 1251 + printk(KERN_ERR 1252 + "vmw_pvscsi: can't iomap for BAR %d memsize %lu\n", 1253 + i, PVSCSI_MEM_SPACE_SIZE); 1254 + goto out_release_resources; 1255 + } 1256 + 1257 + pci_set_master(pdev); 1258 + pci_set_drvdata(pdev, host); 1259 + 1260 + ll_adapter_reset(adapter); 1261 + 1262 + adapter->use_msg = pvscsi_setup_msg_workqueue(adapter); 1263 + 1264 + error = pvscsi_allocate_rings(adapter); 1265 + if (error) { 1266 + printk(KERN_ERR "vmw_pvscsi: unable to allocate ring memory\n"); 1267 + goto out_release_resources; 1268 + } 1269 + 1270 + /* 1271 + * From this point on we should reset the adapter if anything goes 1272 + * wrong. 1273 + */ 1274 + pvscsi_setup_all_rings(adapter); 1275 + 1276 + adapter->cmd_map = kcalloc(adapter->req_depth, 1277 + sizeof(struct pvscsi_ctx), GFP_KERNEL); 1278 + if (!adapter->cmd_map) { 1279 + printk(KERN_ERR "vmw_pvscsi: failed to allocate memory.\n"); 1280 + error = -ENOMEM; 1281 + goto out_reset_adapter; 1282 + } 1283 + 1284 + INIT_LIST_HEAD(&adapter->cmd_pool); 1285 + for (i = 0; i < adapter->req_depth; i++) { 1286 + struct pvscsi_ctx *ctx = adapter->cmd_map + i; 1287 + list_add(&ctx->list, &adapter->cmd_pool); 1288 + } 1289 + 1290 + error = pvscsi_allocate_sg(adapter); 1291 + if (error) { 1292 + printk(KERN_ERR "vmw_pvscsi: unable to allocate s/g table\n"); 1293 + goto out_reset_adapter; 1294 + } 1295 + 1296 + if (!pvscsi_disable_msix && 1297 + pvscsi_setup_msix(adapter, &adapter->irq) == 0) { 1298 + printk(KERN_INFO "vmw_pvscsi: using MSI-X\n"); 1299 + adapter->use_msix = 1; 1300 + } else if (!pvscsi_disable_msi && pci_enable_msi(pdev) == 0) { 1301 + printk(KERN_INFO "vmw_pvscsi: using MSI\n"); 1302 + adapter->use_msi = 1; 1303 + adapter->irq = pdev->irq; 1304 + } else { 1305 + printk(KERN_INFO "vmw_pvscsi: using INTx\n"); 1306 + adapter->irq = pdev->irq; 1307 + flags = IRQF_SHARED; 1308 + } 1309 + 1310 + error = request_irq(adapter->irq, pvscsi_isr, flags, 1311 + "vmw_pvscsi", adapter); 1312 + if (error) { 1313 + printk(KERN_ERR 1314 + "vmw_pvscsi: unable to request IRQ: %d\n", error); 1315 + adapter->irq = 0; 1316 + goto out_reset_adapter; 1317 + } 1318 + 1319 + error = scsi_add_host(host, &pdev->dev); 1320 + if (error) { 1321 + printk(KERN_ERR 1322 + "vmw_pvscsi: scsi_add_host failed: %d\n", error); 1323 + goto out_reset_adapter; 1324 + } 1325 + 1326 + dev_info(&pdev->dev, "VMware PVSCSI rev %d host #%u\n", 1327 + adapter->rev, host->host_no); 1328 + 1329 + pvscsi_unmask_intr(adapter); 1330 + 1331 + scsi_scan_host(host); 1332 + 1333 + return 0; 1334 + 1335 + out_reset_adapter: 1336 + ll_adapter_reset(adapter); 1337 + out_release_resources: 1338 + pvscsi_release_resources(adapter); 1339 + out_free_host: 1340 + scsi_host_put(host); 1341 + out_disable_device: 1342 + pci_set_drvdata(pdev, NULL); 1343 + pci_disable_device(pdev); 1344 + 1345 + return error; 1346 + } 1347 + 1348 + static void __pvscsi_shutdown(struct pvscsi_adapter *adapter) 1349 + { 1350 + pvscsi_mask_intr(adapter); 1351 + 1352 + if (adapter->workqueue) 1353 + flush_workqueue(adapter->workqueue); 1354 + 1355 + pvscsi_shutdown_intr(adapter); 1356 + 1357 + pvscsi_process_request_ring(adapter); 1358 + pvscsi_process_completion_ring(adapter); 1359 + ll_adapter_reset(adapter); 1360 + } 1361 + 1362 + static void pvscsi_shutdown(struct pci_dev *dev) 1363 + { 1364 + struct Scsi_Host *host = pci_get_drvdata(dev); 1365 + struct pvscsi_adapter *adapter = shost_priv(host); 1366 + 1367 + __pvscsi_shutdown(adapter); 1368 + } 1369 + 1370 + static void pvscsi_remove(struct pci_dev *pdev) 1371 + { 1372 + struct Scsi_Host *host = pci_get_drvdata(pdev); 1373 + struct pvscsi_adapter *adapter = shost_priv(host); 1374 + 1375 + scsi_remove_host(host); 1376 + 1377 + __pvscsi_shutdown(adapter); 1378 + pvscsi_release_resources(adapter); 1379 + 1380 + scsi_host_put(host); 1381 + 1382 + pci_set_drvdata(pdev, NULL); 1383 + pci_disable_device(pdev); 1384 + } 1385 + 1386 + static struct pci_driver pvscsi_pci_driver = { 1387 + .name = "vmw_pvscsi", 1388 + .id_table = pvscsi_pci_tbl, 1389 + .probe = pvscsi_probe, 1390 + .remove = __devexit_p(pvscsi_remove), 1391 + .shutdown = pvscsi_shutdown, 1392 + }; 1393 + 1394 + static int __init pvscsi_init(void) 1395 + { 1396 + pr_info("%s - version %s\n", 1397 + PVSCSI_LINUX_DRIVER_DESC, PVSCSI_DRIVER_VERSION_STRING); 1398 + return pci_register_driver(&pvscsi_pci_driver); 1399 + } 1400 + 1401 + static void __exit pvscsi_exit(void) 1402 + { 1403 + pci_unregister_driver(&pvscsi_pci_driver); 1404 + } 1405 + 1406 + module_init(pvscsi_init); 1407 + module_exit(pvscsi_exit);
+397
drivers/scsi/vmw_pvscsi.h
··· 1 + /* 2 + * VMware PVSCSI header file 3 + * 4 + * Copyright (C) 2008-2009, VMware, Inc. All Rights Reserved. 5 + * 6 + * This program is free software; you can redistribute it and/or modify it 7 + * under the terms of the GNU General Public License as published by the 8 + * Free Software Foundation; version 2 of the License and no later version. 9 + * 10 + * This program is distributed in the hope that it will be useful, but 11 + * WITHOUT ANY WARRANTY; without even the implied warranty of 12 + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or 13 + * NON INFRINGEMENT. See the GNU General Public License for more 14 + * details. 15 + * 16 + * You should have received a copy of the GNU General Public License 17 + * along with this program; if not, write to the Free Software 18 + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 + * 20 + * Maintained by: Alok N Kataria <akataria@vmware.com> 21 + * 22 + */ 23 + 24 + #ifndef _VMW_PVSCSI_H_ 25 + #define _VMW_PVSCSI_H_ 26 + 27 + #include <linux/types.h> 28 + 29 + #define PVSCSI_DRIVER_VERSION_STRING "1.0.1.0-k" 30 + 31 + #define PVSCSI_MAX_NUM_SG_ENTRIES_PER_SEGMENT 128 32 + 33 + #define MASK(n) ((1 << (n)) - 1) /* make an n-bit mask */ 34 + 35 + #define PCI_VENDOR_ID_VMWARE 0x15AD 36 + #define PCI_DEVICE_ID_VMWARE_PVSCSI 0x07C0 37 + 38 + /* 39 + * host adapter status/error codes 40 + */ 41 + enum HostBusAdapterStatus { 42 + BTSTAT_SUCCESS = 0x00, /* CCB complete normally with no errors */ 43 + BTSTAT_LINKED_COMMAND_COMPLETED = 0x0a, 44 + BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG = 0x0b, 45 + BTSTAT_DATA_UNDERRUN = 0x0c, 46 + BTSTAT_SELTIMEO = 0x11, /* SCSI selection timeout */ 47 + BTSTAT_DATARUN = 0x12, /* data overrun/underrun */ 48 + BTSTAT_BUSFREE = 0x13, /* unexpected bus free */ 49 + BTSTAT_INVPHASE = 0x14, /* invalid bus phase or sequence requested by target */ 50 + BTSTAT_LUNMISMATCH = 0x17, /* linked CCB has different LUN from first CCB */ 51 + BTSTAT_SENSFAILED = 0x1b, /* auto request sense failed */ 52 + BTSTAT_TAGREJECT = 0x1c, /* SCSI II tagged queueing message rejected by target */ 53 + BTSTAT_BADMSG = 0x1d, /* unsupported message received by the host adapter */ 54 + BTSTAT_HAHARDWARE = 0x20, /* host adapter hardware failed */ 55 + BTSTAT_NORESPONSE = 0x21, /* target did not respond to SCSI ATN, sent a SCSI RST */ 56 + BTSTAT_SENTRST = 0x22, /* host adapter asserted a SCSI RST */ 57 + BTSTAT_RECVRST = 0x23, /* other SCSI devices asserted a SCSI RST */ 58 + BTSTAT_DISCONNECT = 0x24, /* target device reconnected improperly (w/o tag) */ 59 + BTSTAT_BUSRESET = 0x25, /* host adapter issued BUS device reset */ 60 + BTSTAT_ABORTQUEUE = 0x26, /* abort queue generated */ 61 + BTSTAT_HASOFTWARE = 0x27, /* host adapter software error */ 62 + BTSTAT_HATIMEOUT = 0x30, /* host adapter hardware timeout error */ 63 + BTSTAT_SCSIPARITY = 0x34, /* SCSI parity error detected */ 64 + }; 65 + 66 + /* 67 + * Register offsets. 68 + * 69 + * These registers are accessible both via i/o space and mm i/o. 70 + */ 71 + 72 + enum PVSCSIRegOffset { 73 + PVSCSI_REG_OFFSET_COMMAND = 0x0, 74 + PVSCSI_REG_OFFSET_COMMAND_DATA = 0x4, 75 + PVSCSI_REG_OFFSET_COMMAND_STATUS = 0x8, 76 + PVSCSI_REG_OFFSET_LAST_STS_0 = 0x100, 77 + PVSCSI_REG_OFFSET_LAST_STS_1 = 0x104, 78 + PVSCSI_REG_OFFSET_LAST_STS_2 = 0x108, 79 + PVSCSI_REG_OFFSET_LAST_STS_3 = 0x10c, 80 + PVSCSI_REG_OFFSET_INTR_STATUS = 0x100c, 81 + PVSCSI_REG_OFFSET_INTR_MASK = 0x2010, 82 + PVSCSI_REG_OFFSET_KICK_NON_RW_IO = 0x3014, 83 + PVSCSI_REG_OFFSET_DEBUG = 0x3018, 84 + PVSCSI_REG_OFFSET_KICK_RW_IO = 0x4018, 85 + }; 86 + 87 + /* 88 + * Virtual h/w commands. 89 + */ 90 + 91 + enum PVSCSICommands { 92 + PVSCSI_CMD_FIRST = 0, /* has to be first */ 93 + 94 + PVSCSI_CMD_ADAPTER_RESET = 1, 95 + PVSCSI_CMD_ISSUE_SCSI = 2, 96 + PVSCSI_CMD_SETUP_RINGS = 3, 97 + PVSCSI_CMD_RESET_BUS = 4, 98 + PVSCSI_CMD_RESET_DEVICE = 5, 99 + PVSCSI_CMD_ABORT_CMD = 6, 100 + PVSCSI_CMD_CONFIG = 7, 101 + PVSCSI_CMD_SETUP_MSG_RING = 8, 102 + PVSCSI_CMD_DEVICE_UNPLUG = 9, 103 + 104 + PVSCSI_CMD_LAST = 10 /* has to be last */ 105 + }; 106 + 107 + /* 108 + * Command descriptor for PVSCSI_CMD_RESET_DEVICE -- 109 + */ 110 + 111 + struct PVSCSICmdDescResetDevice { 112 + u32 target; 113 + u8 lun[8]; 114 + } __packed; 115 + 116 + /* 117 + * Command descriptor for PVSCSI_CMD_ABORT_CMD -- 118 + * 119 + * - currently does not support specifying the LUN. 120 + * - _pad should be 0. 121 + */ 122 + 123 + struct PVSCSICmdDescAbortCmd { 124 + u64 context; 125 + u32 target; 126 + u32 _pad; 127 + } __packed; 128 + 129 + /* 130 + * Command descriptor for PVSCSI_CMD_SETUP_RINGS -- 131 + * 132 + * Notes: 133 + * - reqRingNumPages and cmpRingNumPages need to be power of two. 134 + * - reqRingNumPages and cmpRingNumPages need to be different from 0, 135 + * - reqRingNumPages and cmpRingNumPages need to be inferior to 136 + * PVSCSI_SETUP_RINGS_MAX_NUM_PAGES. 137 + */ 138 + 139 + #define PVSCSI_SETUP_RINGS_MAX_NUM_PAGES 32 140 + struct PVSCSICmdDescSetupRings { 141 + u32 reqRingNumPages; 142 + u32 cmpRingNumPages; 143 + u64 ringsStatePPN; 144 + u64 reqRingPPNs[PVSCSI_SETUP_RINGS_MAX_NUM_PAGES]; 145 + u64 cmpRingPPNs[PVSCSI_SETUP_RINGS_MAX_NUM_PAGES]; 146 + } __packed; 147 + 148 + /* 149 + * Command descriptor for PVSCSI_CMD_SETUP_MSG_RING -- 150 + * 151 + * Notes: 152 + * - this command was not supported in the initial revision of the h/w 153 + * interface. Before using it, you need to check that it is supported by 154 + * writing PVSCSI_CMD_SETUP_MSG_RING to the 'command' register, then 155 + * immediately after read the 'command status' register: 156 + * * a value of -1 means that the cmd is NOT supported, 157 + * * a value != -1 means that the cmd IS supported. 158 + * If it's supported the 'command status' register should return: 159 + * sizeof(PVSCSICmdDescSetupMsgRing) / sizeof(u32). 160 + * - this command should be issued _after_ the usual SETUP_RINGS so that the 161 + * RingsState page is already setup. If not, the command is a nop. 162 + * - numPages needs to be a power of two, 163 + * - numPages needs to be different from 0, 164 + * - _pad should be zero. 165 + */ 166 + 167 + #define PVSCSI_SETUP_MSG_RING_MAX_NUM_PAGES 16 168 + 169 + struct PVSCSICmdDescSetupMsgRing { 170 + u32 numPages; 171 + u32 _pad; 172 + u64 ringPPNs[PVSCSI_SETUP_MSG_RING_MAX_NUM_PAGES]; 173 + } __packed; 174 + 175 + enum PVSCSIMsgType { 176 + PVSCSI_MSG_DEV_ADDED = 0, 177 + PVSCSI_MSG_DEV_REMOVED = 1, 178 + PVSCSI_MSG_LAST = 2, 179 + }; 180 + 181 + /* 182 + * Msg descriptor. 183 + * 184 + * sizeof(struct PVSCSIRingMsgDesc) == 128. 185 + * 186 + * - type is of type enum PVSCSIMsgType. 187 + * - the content of args depend on the type of event being delivered. 188 + */ 189 + 190 + struct PVSCSIRingMsgDesc { 191 + u32 type; 192 + u32 args[31]; 193 + } __packed; 194 + 195 + struct PVSCSIMsgDescDevStatusChanged { 196 + u32 type; /* PVSCSI_MSG_DEV _ADDED / _REMOVED */ 197 + u32 bus; 198 + u32 target; 199 + u8 lun[8]; 200 + u32 pad[27]; 201 + } __packed; 202 + 203 + /* 204 + * Rings state. 205 + * 206 + * - the fields: 207 + * . msgProdIdx, 208 + * . msgConsIdx, 209 + * . msgNumEntriesLog2, 210 + * .. are only used once the SETUP_MSG_RING cmd has been issued. 211 + * - '_pad' helps to ensure that the msg related fields are on their own 212 + * cache-line. 213 + */ 214 + 215 + struct PVSCSIRingsState { 216 + u32 reqProdIdx; 217 + u32 reqConsIdx; 218 + u32 reqNumEntriesLog2; 219 + 220 + u32 cmpProdIdx; 221 + u32 cmpConsIdx; 222 + u32 cmpNumEntriesLog2; 223 + 224 + u8 _pad[104]; 225 + 226 + u32 msgProdIdx; 227 + u32 msgConsIdx; 228 + u32 msgNumEntriesLog2; 229 + } __packed; 230 + 231 + /* 232 + * Request descriptor. 233 + * 234 + * sizeof(RingReqDesc) = 128 235 + * 236 + * - context: is a unique identifier of a command. It could normally be any 237 + * 64bit value, however we currently store it in the serialNumber variable 238 + * of struct SCSI_Command, so we have the following restrictions due to the 239 + * way this field is handled in the vmkernel storage stack: 240 + * * this value can't be 0, 241 + * * the upper 32bit need to be 0 since serialNumber is as a u32. 242 + * Currently tracked as PR 292060. 243 + * - dataLen: contains the total number of bytes that need to be transferred. 244 + * - dataAddr: 245 + * * if PVSCSI_FLAG_CMD_WITH_SG_LIST is set: dataAddr is the PA of the first 246 + * s/g table segment, each s/g segment is entirely contained on a single 247 + * page of physical memory, 248 + * * if PVSCSI_FLAG_CMD_WITH_SG_LIST is NOT set, then dataAddr is the PA of 249 + * the buffer used for the DMA transfer, 250 + * - flags: 251 + * * PVSCSI_FLAG_CMD_WITH_SG_LIST: see dataAddr above, 252 + * * PVSCSI_FLAG_CMD_DIR_NONE: no DMA involved, 253 + * * PVSCSI_FLAG_CMD_DIR_TOHOST: transfer from device to main memory, 254 + * * PVSCSI_FLAG_CMD_DIR_TODEVICE: transfer from main memory to device, 255 + * * PVSCSI_FLAG_CMD_OUT_OF_BAND_CDB: reserved to handle CDBs larger than 256 + * 16bytes. To be specified. 257 + * - vcpuHint: vcpuId of the processor that will be most likely waiting for the 258 + * completion of the i/o. For guest OSes that use lowest priority message 259 + * delivery mode (such as windows), we use this "hint" to deliver the 260 + * completion action to the proper vcpu. For now, we can use the vcpuId of 261 + * the processor that initiated the i/o as a likely candidate for the vcpu 262 + * that will be waiting for the completion.. 263 + * - bus should be 0: we currently only support bus 0 for now. 264 + * - unused should be zero'd. 265 + */ 266 + 267 + #define PVSCSI_FLAG_CMD_WITH_SG_LIST (1 << 0) 268 + #define PVSCSI_FLAG_CMD_OUT_OF_BAND_CDB (1 << 1) 269 + #define PVSCSI_FLAG_CMD_DIR_NONE (1 << 2) 270 + #define PVSCSI_FLAG_CMD_DIR_TOHOST (1 << 3) 271 + #define PVSCSI_FLAG_CMD_DIR_TODEVICE (1 << 4) 272 + 273 + struct PVSCSIRingReqDesc { 274 + u64 context; 275 + u64 dataAddr; 276 + u64 dataLen; 277 + u64 senseAddr; 278 + u32 senseLen; 279 + u32 flags; 280 + u8 cdb[16]; 281 + u8 cdbLen; 282 + u8 lun[8]; 283 + u8 tag; 284 + u8 bus; 285 + u8 target; 286 + u8 vcpuHint; 287 + u8 unused[59]; 288 + } __packed; 289 + 290 + /* 291 + * Scatter-gather list management. 292 + * 293 + * As described above, when PVSCSI_FLAG_CMD_WITH_SG_LIST is set in the 294 + * RingReqDesc.flags, then RingReqDesc.dataAddr is the PA of the first s/g 295 + * table segment. 296 + * 297 + * - each segment of the s/g table contain a succession of struct 298 + * PVSCSISGElement. 299 + * - each segment is entirely contained on a single physical page of memory. 300 + * - a "chain" s/g element has the flag PVSCSI_SGE_FLAG_CHAIN_ELEMENT set in 301 + * PVSCSISGElement.flags and in this case: 302 + * * addr is the PA of the next s/g segment, 303 + * * length is undefined, assumed to be 0. 304 + */ 305 + 306 + struct PVSCSISGElement { 307 + u64 addr; 308 + u32 length; 309 + u32 flags; 310 + } __packed; 311 + 312 + /* 313 + * Completion descriptor. 314 + * 315 + * sizeof(RingCmpDesc) = 32 316 + * 317 + * - context: identifier of the command. The same thing that was specified 318 + * under "context" as part of struct RingReqDesc at initiation time, 319 + * - dataLen: number of bytes transferred for the actual i/o operation, 320 + * - senseLen: number of bytes written into the sense buffer, 321 + * - hostStatus: adapter status, 322 + * - scsiStatus: device status, 323 + * - _pad should be zero. 324 + */ 325 + 326 + struct PVSCSIRingCmpDesc { 327 + u64 context; 328 + u64 dataLen; 329 + u32 senseLen; 330 + u16 hostStatus; 331 + u16 scsiStatus; 332 + u32 _pad[2]; 333 + } __packed; 334 + 335 + /* 336 + * Interrupt status / IRQ bits. 337 + */ 338 + 339 + #define PVSCSI_INTR_CMPL_0 (1 << 0) 340 + #define PVSCSI_INTR_CMPL_1 (1 << 1) 341 + #define PVSCSI_INTR_CMPL_MASK MASK(2) 342 + 343 + #define PVSCSI_INTR_MSG_0 (1 << 2) 344 + #define PVSCSI_INTR_MSG_1 (1 << 3) 345 + #define PVSCSI_INTR_MSG_MASK (MASK(2) << 2) 346 + 347 + #define PVSCSI_INTR_ALL_SUPPORTED MASK(4) 348 + 349 + /* 350 + * Number of MSI-X vectors supported. 351 + */ 352 + #define PVSCSI_MAX_INTRS 24 353 + 354 + /* 355 + * Enumeration of supported MSI-X vectors 356 + */ 357 + #define PVSCSI_VECTOR_COMPLETION 0 358 + 359 + /* 360 + * Misc constants for the rings. 361 + */ 362 + 363 + #define PVSCSI_MAX_NUM_PAGES_REQ_RING PVSCSI_SETUP_RINGS_MAX_NUM_PAGES 364 + #define PVSCSI_MAX_NUM_PAGES_CMP_RING PVSCSI_SETUP_RINGS_MAX_NUM_PAGES 365 + #define PVSCSI_MAX_NUM_PAGES_MSG_RING PVSCSI_SETUP_MSG_RING_MAX_NUM_PAGES 366 + 367 + #define PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE \ 368 + (PAGE_SIZE / sizeof(struct PVSCSIRingReqDesc)) 369 + 370 + #define PVSCSI_MAX_REQ_QUEUE_DEPTH \ 371 + (PVSCSI_MAX_NUM_PAGES_REQ_RING * PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE) 372 + 373 + #define PVSCSI_MEM_SPACE_COMMAND_NUM_PAGES 1 374 + #define PVSCSI_MEM_SPACE_INTR_STATUS_NUM_PAGES 1 375 + #define PVSCSI_MEM_SPACE_MISC_NUM_PAGES 2 376 + #define PVSCSI_MEM_SPACE_KICK_IO_NUM_PAGES 2 377 + #define PVSCSI_MEM_SPACE_MSIX_NUM_PAGES 2 378 + 379 + enum PVSCSIMemSpace { 380 + PVSCSI_MEM_SPACE_COMMAND_PAGE = 0, 381 + PVSCSI_MEM_SPACE_INTR_STATUS_PAGE = 1, 382 + PVSCSI_MEM_SPACE_MISC_PAGE = 2, 383 + PVSCSI_MEM_SPACE_KICK_IO_PAGE = 4, 384 + PVSCSI_MEM_SPACE_MSIX_TABLE_PAGE = 6, 385 + PVSCSI_MEM_SPACE_MSIX_PBA_PAGE = 7, 386 + }; 387 + 388 + #define PVSCSI_MEM_SPACE_NUM_PAGES \ 389 + (PVSCSI_MEM_SPACE_COMMAND_NUM_PAGES + \ 390 + PVSCSI_MEM_SPACE_INTR_STATUS_NUM_PAGES + \ 391 + PVSCSI_MEM_SPACE_MISC_NUM_PAGES + \ 392 + PVSCSI_MEM_SPACE_KICK_IO_NUM_PAGES + \ 393 + PVSCSI_MEM_SPACE_MSIX_NUM_PAGES) 394 + 395 + #define PVSCSI_MEM_SPACE_SIZE (PVSCSI_MEM_SPACE_NUM_PAGES * PAGE_SIZE) 396 + 397 + #endif /* _VMW_PVSCSI_H_ */