Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v4.1-rc7 3178 lines 81 kB view raw
1/* 2 * NVM Express device driver 3 * Copyright (c) 2011-2014, Intel Corporation. 4 * 5 * This program is free software; you can redistribute it and/or modify it 6 * under the terms and conditions of the GNU General Public License, 7 * version 2, as published by the Free Software Foundation. 8 * 9 * This program is distributed in the hope it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 * more details. 13 */ 14 15#include <linux/nvme.h> 16#include <linux/bitops.h> 17#include <linux/blkdev.h> 18#include <linux/blk-mq.h> 19#include <linux/cpu.h> 20#include <linux/delay.h> 21#include <linux/errno.h> 22#include <linux/fs.h> 23#include <linux/genhd.h> 24#include <linux/hdreg.h> 25#include <linux/idr.h> 26#include <linux/init.h> 27#include <linux/interrupt.h> 28#include <linux/io.h> 29#include <linux/kdev_t.h> 30#include <linux/kthread.h> 31#include <linux/kernel.h> 32#include <linux/mm.h> 33#include <linux/module.h> 34#include <linux/moduleparam.h> 35#include <linux/pci.h> 36#include <linux/poison.h> 37#include <linux/ptrace.h> 38#include <linux/sched.h> 39#include <linux/slab.h> 40#include <linux/t10-pi.h> 41#include <linux/types.h> 42#include <scsi/sg.h> 43#include <asm-generic/io-64-nonatomic-lo-hi.h> 44 45#define NVME_MINORS (1U << MINORBITS) 46#define NVME_Q_DEPTH 1024 47#define NVME_AQ_DEPTH 256 48#define SQ_SIZE(depth) (depth * sizeof(struct nvme_command)) 49#define CQ_SIZE(depth) (depth * sizeof(struct nvme_completion)) 50#define ADMIN_TIMEOUT (admin_timeout * HZ) 51#define SHUTDOWN_TIMEOUT (shutdown_timeout * HZ) 52 53static unsigned char admin_timeout = 60; 54module_param(admin_timeout, byte, 0644); 55MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands"); 56 57unsigned char nvme_io_timeout = 30; 58module_param_named(io_timeout, nvme_io_timeout, byte, 0644); 59MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O"); 60 61static unsigned char shutdown_timeout = 5; 62module_param(shutdown_timeout, byte, 0644); 63MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown"); 64 65static int nvme_major; 66module_param(nvme_major, int, 0); 67 68static int nvme_char_major; 69module_param(nvme_char_major, int, 0); 70 71static int use_threaded_interrupts; 72module_param(use_threaded_interrupts, int, 0); 73 74static DEFINE_SPINLOCK(dev_list_lock); 75static LIST_HEAD(dev_list); 76static struct task_struct *nvme_thread; 77static struct workqueue_struct *nvme_workq; 78static wait_queue_head_t nvme_kthread_wait; 79 80static struct class *nvme_class; 81 82static void nvme_reset_failed_dev(struct work_struct *ws); 83static int nvme_process_cq(struct nvme_queue *nvmeq); 84 85struct async_cmd_info { 86 struct kthread_work work; 87 struct kthread_worker *worker; 88 struct request *req; 89 u32 result; 90 int status; 91 void *ctx; 92}; 93 94/* 95 * An NVM Express queue. Each device has at least two (one for admin 96 * commands and one for I/O commands). 97 */ 98struct nvme_queue { 99 struct device *q_dmadev; 100 struct nvme_dev *dev; 101 char irqname[24]; /* nvme4294967295-65535\0 */ 102 spinlock_t q_lock; 103 struct nvme_command *sq_cmds; 104 volatile struct nvme_completion *cqes; 105 dma_addr_t sq_dma_addr; 106 dma_addr_t cq_dma_addr; 107 u32 __iomem *q_db; 108 u16 q_depth; 109 s16 cq_vector; 110 u16 sq_head; 111 u16 sq_tail; 112 u16 cq_head; 113 u16 qid; 114 u8 cq_phase; 115 u8 cqe_seen; 116 struct async_cmd_info cmdinfo; 117 struct blk_mq_hw_ctx *hctx; 118}; 119 120/* 121 * Check we didin't inadvertently grow the command struct 122 */ 123static inline void _nvme_check_size(void) 124{ 125 BUILD_BUG_ON(sizeof(struct nvme_rw_command) != 64); 126 BUILD_BUG_ON(sizeof(struct nvme_create_cq) != 64); 127 BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64); 128 BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64); 129 BUILD_BUG_ON(sizeof(struct nvme_features) != 64); 130 BUILD_BUG_ON(sizeof(struct nvme_format_cmd) != 64); 131 BUILD_BUG_ON(sizeof(struct nvme_abort_cmd) != 64); 132 BUILD_BUG_ON(sizeof(struct nvme_command) != 64); 133 BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != 4096); 134 BUILD_BUG_ON(sizeof(struct nvme_id_ns) != 4096); 135 BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64); 136 BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512); 137} 138 139typedef void (*nvme_completion_fn)(struct nvme_queue *, void *, 140 struct nvme_completion *); 141 142struct nvme_cmd_info { 143 nvme_completion_fn fn; 144 void *ctx; 145 int aborted; 146 struct nvme_queue *nvmeq; 147 struct nvme_iod iod[0]; 148}; 149 150/* 151 * Max size of iod being embedded in the request payload 152 */ 153#define NVME_INT_PAGES 2 154#define NVME_INT_BYTES(dev) (NVME_INT_PAGES * (dev)->page_size) 155#define NVME_INT_MASK 0x01 156 157/* 158 * Will slightly overestimate the number of pages needed. This is OK 159 * as it only leads to a small amount of wasted memory for the lifetime of 160 * the I/O. 161 */ 162static int nvme_npages(unsigned size, struct nvme_dev *dev) 163{ 164 unsigned nprps = DIV_ROUND_UP(size + dev->page_size, dev->page_size); 165 return DIV_ROUND_UP(8 * nprps, PAGE_SIZE - 8); 166} 167 168static unsigned int nvme_cmd_size(struct nvme_dev *dev) 169{ 170 unsigned int ret = sizeof(struct nvme_cmd_info); 171 172 ret += sizeof(struct nvme_iod); 173 ret += sizeof(__le64 *) * nvme_npages(NVME_INT_BYTES(dev), dev); 174 ret += sizeof(struct scatterlist) * NVME_INT_PAGES; 175 176 return ret; 177} 178 179static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, 180 unsigned int hctx_idx) 181{ 182 struct nvme_dev *dev = data; 183 struct nvme_queue *nvmeq = dev->queues[0]; 184 185 WARN_ON(nvmeq->hctx); 186 nvmeq->hctx = hctx; 187 hctx->driver_data = nvmeq; 188 return 0; 189} 190 191static int nvme_admin_init_request(void *data, struct request *req, 192 unsigned int hctx_idx, unsigned int rq_idx, 193 unsigned int numa_node) 194{ 195 struct nvme_dev *dev = data; 196 struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req); 197 struct nvme_queue *nvmeq = dev->queues[0]; 198 199 BUG_ON(!nvmeq); 200 cmd->nvmeq = nvmeq; 201 return 0; 202} 203 204static void nvme_exit_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) 205{ 206 struct nvme_queue *nvmeq = hctx->driver_data; 207 208 nvmeq->hctx = NULL; 209} 210 211static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, 212 unsigned int hctx_idx) 213{ 214 struct nvme_dev *dev = data; 215 struct nvme_queue *nvmeq = dev->queues[ 216 (hctx_idx % dev->queue_count) + 1]; 217 218 if (!nvmeq->hctx) 219 nvmeq->hctx = hctx; 220 221 /* nvmeq queues are shared between namespaces. We assume here that 222 * blk-mq map the tags so they match up with the nvme queue tags. */ 223 WARN_ON(nvmeq->hctx->tags != hctx->tags); 224 225 hctx->driver_data = nvmeq; 226 return 0; 227} 228 229static int nvme_init_request(void *data, struct request *req, 230 unsigned int hctx_idx, unsigned int rq_idx, 231 unsigned int numa_node) 232{ 233 struct nvme_dev *dev = data; 234 struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req); 235 struct nvme_queue *nvmeq = dev->queues[hctx_idx + 1]; 236 237 BUG_ON(!nvmeq); 238 cmd->nvmeq = nvmeq; 239 return 0; 240} 241 242static void nvme_set_info(struct nvme_cmd_info *cmd, void *ctx, 243 nvme_completion_fn handler) 244{ 245 cmd->fn = handler; 246 cmd->ctx = ctx; 247 cmd->aborted = 0; 248 blk_mq_start_request(blk_mq_rq_from_pdu(cmd)); 249} 250 251static void *iod_get_private(struct nvme_iod *iod) 252{ 253 return (void *) (iod->private & ~0x1UL); 254} 255 256/* 257 * If bit 0 is set, the iod is embedded in the request payload. 258 */ 259static bool iod_should_kfree(struct nvme_iod *iod) 260{ 261 return (iod->private & NVME_INT_MASK) == 0; 262} 263 264/* Special values must be less than 0x1000 */ 265#define CMD_CTX_BASE ((void *)POISON_POINTER_DELTA) 266#define CMD_CTX_CANCELLED (0x30C + CMD_CTX_BASE) 267#define CMD_CTX_COMPLETED (0x310 + CMD_CTX_BASE) 268#define CMD_CTX_INVALID (0x314 + CMD_CTX_BASE) 269 270static void special_completion(struct nvme_queue *nvmeq, void *ctx, 271 struct nvme_completion *cqe) 272{ 273 if (ctx == CMD_CTX_CANCELLED) 274 return; 275 if (ctx == CMD_CTX_COMPLETED) { 276 dev_warn(nvmeq->q_dmadev, 277 "completed id %d twice on queue %d\n", 278 cqe->command_id, le16_to_cpup(&cqe->sq_id)); 279 return; 280 } 281 if (ctx == CMD_CTX_INVALID) { 282 dev_warn(nvmeq->q_dmadev, 283 "invalid id %d completed on queue %d\n", 284 cqe->command_id, le16_to_cpup(&cqe->sq_id)); 285 return; 286 } 287 dev_warn(nvmeq->q_dmadev, "Unknown special completion %p\n", ctx); 288} 289 290static void *cancel_cmd_info(struct nvme_cmd_info *cmd, nvme_completion_fn *fn) 291{ 292 void *ctx; 293 294 if (fn) 295 *fn = cmd->fn; 296 ctx = cmd->ctx; 297 cmd->fn = special_completion; 298 cmd->ctx = CMD_CTX_CANCELLED; 299 return ctx; 300} 301 302static void async_req_completion(struct nvme_queue *nvmeq, void *ctx, 303 struct nvme_completion *cqe) 304{ 305 u32 result = le32_to_cpup(&cqe->result); 306 u16 status = le16_to_cpup(&cqe->status) >> 1; 307 308 if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ) 309 ++nvmeq->dev->event_limit; 310 if (status == NVME_SC_SUCCESS) 311 dev_warn(nvmeq->q_dmadev, 312 "async event result %08x\n", result); 313} 314 315static void abort_completion(struct nvme_queue *nvmeq, void *ctx, 316 struct nvme_completion *cqe) 317{ 318 struct request *req = ctx; 319 320 u16 status = le16_to_cpup(&cqe->status) >> 1; 321 u32 result = le32_to_cpup(&cqe->result); 322 323 blk_mq_free_hctx_request(nvmeq->hctx, req); 324 325 dev_warn(nvmeq->q_dmadev, "Abort status:%x result:%x", status, result); 326 ++nvmeq->dev->abort_limit; 327} 328 329static void async_completion(struct nvme_queue *nvmeq, void *ctx, 330 struct nvme_completion *cqe) 331{ 332 struct async_cmd_info *cmdinfo = ctx; 333 cmdinfo->result = le32_to_cpup(&cqe->result); 334 cmdinfo->status = le16_to_cpup(&cqe->status) >> 1; 335 queue_kthread_work(cmdinfo->worker, &cmdinfo->work); 336 blk_mq_free_hctx_request(nvmeq->hctx, cmdinfo->req); 337} 338 339static inline struct nvme_cmd_info *get_cmd_from_tag(struct nvme_queue *nvmeq, 340 unsigned int tag) 341{ 342 struct blk_mq_hw_ctx *hctx = nvmeq->hctx; 343 struct request *req = blk_mq_tag_to_rq(hctx->tags, tag); 344 345 return blk_mq_rq_to_pdu(req); 346} 347 348/* 349 * Called with local interrupts disabled and the q_lock held. May not sleep. 350 */ 351static void *nvme_finish_cmd(struct nvme_queue *nvmeq, int tag, 352 nvme_completion_fn *fn) 353{ 354 struct nvme_cmd_info *cmd = get_cmd_from_tag(nvmeq, tag); 355 void *ctx; 356 if (tag >= nvmeq->q_depth) { 357 *fn = special_completion; 358 return CMD_CTX_INVALID; 359 } 360 if (fn) 361 *fn = cmd->fn; 362 ctx = cmd->ctx; 363 cmd->fn = special_completion; 364 cmd->ctx = CMD_CTX_COMPLETED; 365 return ctx; 366} 367 368/** 369 * nvme_submit_cmd() - Copy a command into a queue and ring the doorbell 370 * @nvmeq: The queue to use 371 * @cmd: The command to send 372 * 373 * Safe to use from interrupt context 374 */ 375static int __nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd) 376{ 377 u16 tail = nvmeq->sq_tail; 378 379 memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd)); 380 if (++tail == nvmeq->q_depth) 381 tail = 0; 382 writel(tail, nvmeq->q_db); 383 nvmeq->sq_tail = tail; 384 385 return 0; 386} 387 388static int nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd) 389{ 390 unsigned long flags; 391 int ret; 392 spin_lock_irqsave(&nvmeq->q_lock, flags); 393 ret = __nvme_submit_cmd(nvmeq, cmd); 394 spin_unlock_irqrestore(&nvmeq->q_lock, flags); 395 return ret; 396} 397 398static __le64 **iod_list(struct nvme_iod *iod) 399{ 400 return ((void *)iod) + iod->offset; 401} 402 403static inline void iod_init(struct nvme_iod *iod, unsigned nbytes, 404 unsigned nseg, unsigned long private) 405{ 406 iod->private = private; 407 iod->offset = offsetof(struct nvme_iod, sg[nseg]); 408 iod->npages = -1; 409 iod->length = nbytes; 410 iod->nents = 0; 411} 412 413static struct nvme_iod * 414__nvme_alloc_iod(unsigned nseg, unsigned bytes, struct nvme_dev *dev, 415 unsigned long priv, gfp_t gfp) 416{ 417 struct nvme_iod *iod = kmalloc(sizeof(struct nvme_iod) + 418 sizeof(__le64 *) * nvme_npages(bytes, dev) + 419 sizeof(struct scatterlist) * nseg, gfp); 420 421 if (iod) 422 iod_init(iod, bytes, nseg, priv); 423 424 return iod; 425} 426 427static struct nvme_iod *nvme_alloc_iod(struct request *rq, struct nvme_dev *dev, 428 gfp_t gfp) 429{ 430 unsigned size = !(rq->cmd_flags & REQ_DISCARD) ? blk_rq_bytes(rq) : 431 sizeof(struct nvme_dsm_range); 432 struct nvme_iod *iod; 433 434 if (rq->nr_phys_segments <= NVME_INT_PAGES && 435 size <= NVME_INT_BYTES(dev)) { 436 struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(rq); 437 438 iod = cmd->iod; 439 iod_init(iod, size, rq->nr_phys_segments, 440 (unsigned long) rq | NVME_INT_MASK); 441 return iod; 442 } 443 444 return __nvme_alloc_iod(rq->nr_phys_segments, size, dev, 445 (unsigned long) rq, gfp); 446} 447 448void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod) 449{ 450 const int last_prp = dev->page_size / 8 - 1; 451 int i; 452 __le64 **list = iod_list(iod); 453 dma_addr_t prp_dma = iod->first_dma; 454 455 if (iod->npages == 0) 456 dma_pool_free(dev->prp_small_pool, list[0], prp_dma); 457 for (i = 0; i < iod->npages; i++) { 458 __le64 *prp_list = list[i]; 459 dma_addr_t next_prp_dma = le64_to_cpu(prp_list[last_prp]); 460 dma_pool_free(dev->prp_page_pool, prp_list, prp_dma); 461 prp_dma = next_prp_dma; 462 } 463 464 if (iod_should_kfree(iod)) 465 kfree(iod); 466} 467 468static int nvme_error_status(u16 status) 469{ 470 switch (status & 0x7ff) { 471 case NVME_SC_SUCCESS: 472 return 0; 473 case NVME_SC_CAP_EXCEEDED: 474 return -ENOSPC; 475 default: 476 return -EIO; 477 } 478} 479 480#ifdef CONFIG_BLK_DEV_INTEGRITY 481static void nvme_dif_prep(u32 p, u32 v, struct t10_pi_tuple *pi) 482{ 483 if (be32_to_cpu(pi->ref_tag) == v) 484 pi->ref_tag = cpu_to_be32(p); 485} 486 487static void nvme_dif_complete(u32 p, u32 v, struct t10_pi_tuple *pi) 488{ 489 if (be32_to_cpu(pi->ref_tag) == p) 490 pi->ref_tag = cpu_to_be32(v); 491} 492 493/** 494 * nvme_dif_remap - remaps ref tags to bip seed and physical lba 495 * 496 * The virtual start sector is the one that was originally submitted by the 497 * block layer. Due to partitioning, MD/DM cloning, etc. the actual physical 498 * start sector may be different. Remap protection information to match the 499 * physical LBA on writes, and back to the original seed on reads. 500 * 501 * Type 0 and 3 do not have a ref tag, so no remapping required. 502 */ 503static void nvme_dif_remap(struct request *req, 504 void (*dif_swap)(u32 p, u32 v, struct t10_pi_tuple *pi)) 505{ 506 struct nvme_ns *ns = req->rq_disk->private_data; 507 struct bio_integrity_payload *bip; 508 struct t10_pi_tuple *pi; 509 void *p, *pmap; 510 u32 i, nlb, ts, phys, virt; 511 512 if (!ns->pi_type || ns->pi_type == NVME_NS_DPS_PI_TYPE3) 513 return; 514 515 bip = bio_integrity(req->bio); 516 if (!bip) 517 return; 518 519 pmap = kmap_atomic(bip->bip_vec->bv_page) + bip->bip_vec->bv_offset; 520 521 p = pmap; 522 virt = bip_get_seed(bip); 523 phys = nvme_block_nr(ns, blk_rq_pos(req)); 524 nlb = (blk_rq_bytes(req) >> ns->lba_shift); 525 ts = ns->disk->integrity->tuple_size; 526 527 for (i = 0; i < nlb; i++, virt++, phys++) { 528 pi = (struct t10_pi_tuple *)p; 529 dif_swap(phys, virt, pi); 530 p += ts; 531 } 532 kunmap_atomic(pmap); 533} 534 535static int nvme_noop_verify(struct blk_integrity_iter *iter) 536{ 537 return 0; 538} 539 540static int nvme_noop_generate(struct blk_integrity_iter *iter) 541{ 542 return 0; 543} 544 545struct blk_integrity nvme_meta_noop = { 546 .name = "NVME_META_NOOP", 547 .generate_fn = nvme_noop_generate, 548 .verify_fn = nvme_noop_verify, 549}; 550 551static void nvme_init_integrity(struct nvme_ns *ns) 552{ 553 struct blk_integrity integrity; 554 555 switch (ns->pi_type) { 556 case NVME_NS_DPS_PI_TYPE3: 557 integrity = t10_pi_type3_crc; 558 break; 559 case NVME_NS_DPS_PI_TYPE1: 560 case NVME_NS_DPS_PI_TYPE2: 561 integrity = t10_pi_type1_crc; 562 break; 563 default: 564 integrity = nvme_meta_noop; 565 break; 566 } 567 integrity.tuple_size = ns->ms; 568 blk_integrity_register(ns->disk, &integrity); 569 blk_queue_max_integrity_segments(ns->queue, 1); 570} 571#else /* CONFIG_BLK_DEV_INTEGRITY */ 572static void nvme_dif_remap(struct request *req, 573 void (*dif_swap)(u32 p, u32 v, struct t10_pi_tuple *pi)) 574{ 575} 576static void nvme_dif_prep(u32 p, u32 v, struct t10_pi_tuple *pi) 577{ 578} 579static void nvme_dif_complete(u32 p, u32 v, struct t10_pi_tuple *pi) 580{ 581} 582static void nvme_init_integrity(struct nvme_ns *ns) 583{ 584} 585#endif 586 587static void req_completion(struct nvme_queue *nvmeq, void *ctx, 588 struct nvme_completion *cqe) 589{ 590 struct nvme_iod *iod = ctx; 591 struct request *req = iod_get_private(iod); 592 struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req); 593 594 u16 status = le16_to_cpup(&cqe->status) >> 1; 595 596 if (unlikely(status)) { 597 if (!(status & NVME_SC_DNR || blk_noretry_request(req)) 598 && (jiffies - req->start_time) < req->timeout) { 599 unsigned long flags; 600 601 blk_mq_requeue_request(req); 602 spin_lock_irqsave(req->q->queue_lock, flags); 603 if (!blk_queue_stopped(req->q)) 604 blk_mq_kick_requeue_list(req->q); 605 spin_unlock_irqrestore(req->q->queue_lock, flags); 606 return; 607 } 608 req->errors = nvme_error_status(status); 609 } else 610 req->errors = 0; 611 612 if (cmd_rq->aborted) 613 dev_warn(&nvmeq->dev->pci_dev->dev, 614 "completing aborted command with status:%04x\n", 615 status); 616 617 if (iod->nents) { 618 dma_unmap_sg(&nvmeq->dev->pci_dev->dev, iod->sg, iod->nents, 619 rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); 620 if (blk_integrity_rq(req)) { 621 if (!rq_data_dir(req)) 622 nvme_dif_remap(req, nvme_dif_complete); 623 dma_unmap_sg(&nvmeq->dev->pci_dev->dev, iod->meta_sg, 1, 624 rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); 625 } 626 } 627 nvme_free_iod(nvmeq->dev, iod); 628 629 blk_mq_complete_request(req); 630} 631 632/* length is in bytes. gfp flags indicates whether we may sleep. */ 633int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, int total_len, 634 gfp_t gfp) 635{ 636 struct dma_pool *pool; 637 int length = total_len; 638 struct scatterlist *sg = iod->sg; 639 int dma_len = sg_dma_len(sg); 640 u64 dma_addr = sg_dma_address(sg); 641 u32 page_size = dev->page_size; 642 int offset = dma_addr & (page_size - 1); 643 __le64 *prp_list; 644 __le64 **list = iod_list(iod); 645 dma_addr_t prp_dma; 646 int nprps, i; 647 648 length -= (page_size - offset); 649 if (length <= 0) 650 return total_len; 651 652 dma_len -= (page_size - offset); 653 if (dma_len) { 654 dma_addr += (page_size - offset); 655 } else { 656 sg = sg_next(sg); 657 dma_addr = sg_dma_address(sg); 658 dma_len = sg_dma_len(sg); 659 } 660 661 if (length <= page_size) { 662 iod->first_dma = dma_addr; 663 return total_len; 664 } 665 666 nprps = DIV_ROUND_UP(length, page_size); 667 if (nprps <= (256 / 8)) { 668 pool = dev->prp_small_pool; 669 iod->npages = 0; 670 } else { 671 pool = dev->prp_page_pool; 672 iod->npages = 1; 673 } 674 675 prp_list = dma_pool_alloc(pool, gfp, &prp_dma); 676 if (!prp_list) { 677 iod->first_dma = dma_addr; 678 iod->npages = -1; 679 return (total_len - length) + page_size; 680 } 681 list[0] = prp_list; 682 iod->first_dma = prp_dma; 683 i = 0; 684 for (;;) { 685 if (i == page_size >> 3) { 686 __le64 *old_prp_list = prp_list; 687 prp_list = dma_pool_alloc(pool, gfp, &prp_dma); 688 if (!prp_list) 689 return total_len - length; 690 list[iod->npages++] = prp_list; 691 prp_list[0] = old_prp_list[i - 1]; 692 old_prp_list[i - 1] = cpu_to_le64(prp_dma); 693 i = 1; 694 } 695 prp_list[i++] = cpu_to_le64(dma_addr); 696 dma_len -= page_size; 697 dma_addr += page_size; 698 length -= page_size; 699 if (length <= 0) 700 break; 701 if (dma_len > 0) 702 continue; 703 BUG_ON(dma_len < 0); 704 sg = sg_next(sg); 705 dma_addr = sg_dma_address(sg); 706 dma_len = sg_dma_len(sg); 707 } 708 709 return total_len; 710} 711 712/* 713 * We reuse the small pool to allocate the 16-byte range here as it is not 714 * worth having a special pool for these or additional cases to handle freeing 715 * the iod. 716 */ 717static void nvme_submit_discard(struct nvme_queue *nvmeq, struct nvme_ns *ns, 718 struct request *req, struct nvme_iod *iod) 719{ 720 struct nvme_dsm_range *range = 721 (struct nvme_dsm_range *)iod_list(iod)[0]; 722 struct nvme_command *cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail]; 723 724 range->cattr = cpu_to_le32(0); 725 range->nlb = cpu_to_le32(blk_rq_bytes(req) >> ns->lba_shift); 726 range->slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req))); 727 728 memset(cmnd, 0, sizeof(*cmnd)); 729 cmnd->dsm.opcode = nvme_cmd_dsm; 730 cmnd->dsm.command_id = req->tag; 731 cmnd->dsm.nsid = cpu_to_le32(ns->ns_id); 732 cmnd->dsm.prp1 = cpu_to_le64(iod->first_dma); 733 cmnd->dsm.nr = 0; 734 cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD); 735 736 if (++nvmeq->sq_tail == nvmeq->q_depth) 737 nvmeq->sq_tail = 0; 738 writel(nvmeq->sq_tail, nvmeq->q_db); 739} 740 741static void nvme_submit_flush(struct nvme_queue *nvmeq, struct nvme_ns *ns, 742 int cmdid) 743{ 744 struct nvme_command *cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail]; 745 746 memset(cmnd, 0, sizeof(*cmnd)); 747 cmnd->common.opcode = nvme_cmd_flush; 748 cmnd->common.command_id = cmdid; 749 cmnd->common.nsid = cpu_to_le32(ns->ns_id); 750 751 if (++nvmeq->sq_tail == nvmeq->q_depth) 752 nvmeq->sq_tail = 0; 753 writel(nvmeq->sq_tail, nvmeq->q_db); 754} 755 756static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod, 757 struct nvme_ns *ns) 758{ 759 struct request *req = iod_get_private(iod); 760 struct nvme_command *cmnd; 761 u16 control = 0; 762 u32 dsmgmt = 0; 763 764 if (req->cmd_flags & REQ_FUA) 765 control |= NVME_RW_FUA; 766 if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD)) 767 control |= NVME_RW_LR; 768 769 if (req->cmd_flags & REQ_RAHEAD) 770 dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH; 771 772 cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail]; 773 memset(cmnd, 0, sizeof(*cmnd)); 774 775 cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read); 776 cmnd->rw.command_id = req->tag; 777 cmnd->rw.nsid = cpu_to_le32(ns->ns_id); 778 cmnd->rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); 779 cmnd->rw.prp2 = cpu_to_le64(iod->first_dma); 780 cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req))); 781 cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); 782 783 if (blk_integrity_rq(req)) { 784 cmnd->rw.metadata = cpu_to_le64(sg_dma_address(iod->meta_sg)); 785 switch (ns->pi_type) { 786 case NVME_NS_DPS_PI_TYPE3: 787 control |= NVME_RW_PRINFO_PRCHK_GUARD; 788 break; 789 case NVME_NS_DPS_PI_TYPE1: 790 case NVME_NS_DPS_PI_TYPE2: 791 control |= NVME_RW_PRINFO_PRCHK_GUARD | 792 NVME_RW_PRINFO_PRCHK_REF; 793 cmnd->rw.reftag = cpu_to_le32( 794 nvme_block_nr(ns, blk_rq_pos(req))); 795 break; 796 } 797 } else if (ns->ms) 798 control |= NVME_RW_PRINFO_PRACT; 799 800 cmnd->rw.control = cpu_to_le16(control); 801 cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt); 802 803 if (++nvmeq->sq_tail == nvmeq->q_depth) 804 nvmeq->sq_tail = 0; 805 writel(nvmeq->sq_tail, nvmeq->q_db); 806 807 return 0; 808} 809 810static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, 811 const struct blk_mq_queue_data *bd) 812{ 813 struct nvme_ns *ns = hctx->queue->queuedata; 814 struct nvme_queue *nvmeq = hctx->driver_data; 815 struct request *req = bd->rq; 816 struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req); 817 struct nvme_iod *iod; 818 enum dma_data_direction dma_dir; 819 820 /* 821 * If formated with metadata, require the block layer provide a buffer 822 * unless this namespace is formated such that the metadata can be 823 * stripped/generated by the controller with PRACT=1. 824 */ 825 if (ns->ms && !blk_integrity_rq(req)) { 826 if (!(ns->pi_type && ns->ms == 8)) { 827 req->errors = -EFAULT; 828 blk_mq_complete_request(req); 829 return BLK_MQ_RQ_QUEUE_OK; 830 } 831 } 832 833 iod = nvme_alloc_iod(req, ns->dev, GFP_ATOMIC); 834 if (!iod) 835 return BLK_MQ_RQ_QUEUE_BUSY; 836 837 if (req->cmd_flags & REQ_DISCARD) { 838 void *range; 839 /* 840 * We reuse the small pool to allocate the 16-byte range here 841 * as it is not worth having a special pool for these or 842 * additional cases to handle freeing the iod. 843 */ 844 range = dma_pool_alloc(nvmeq->dev->prp_small_pool, 845 GFP_ATOMIC, 846 &iod->first_dma); 847 if (!range) 848 goto retry_cmd; 849 iod_list(iod)[0] = (__le64 *)range; 850 iod->npages = 0; 851 } else if (req->nr_phys_segments) { 852 dma_dir = rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE; 853 854 sg_init_table(iod->sg, req->nr_phys_segments); 855 iod->nents = blk_rq_map_sg(req->q, req, iod->sg); 856 if (!iod->nents) 857 goto error_cmd; 858 859 if (!dma_map_sg(nvmeq->q_dmadev, iod->sg, iod->nents, dma_dir)) 860 goto retry_cmd; 861 862 if (blk_rq_bytes(req) != 863 nvme_setup_prps(nvmeq->dev, iod, blk_rq_bytes(req), GFP_ATOMIC)) { 864 dma_unmap_sg(&nvmeq->dev->pci_dev->dev, iod->sg, 865 iod->nents, dma_dir); 866 goto retry_cmd; 867 } 868 if (blk_integrity_rq(req)) { 869 if (blk_rq_count_integrity_sg(req->q, req->bio) != 1) 870 goto error_cmd; 871 872 sg_init_table(iod->meta_sg, 1); 873 if (blk_rq_map_integrity_sg( 874 req->q, req->bio, iod->meta_sg) != 1) 875 goto error_cmd; 876 877 if (rq_data_dir(req)) 878 nvme_dif_remap(req, nvme_dif_prep); 879 880 if (!dma_map_sg(nvmeq->q_dmadev, iod->meta_sg, 1, dma_dir)) 881 goto error_cmd; 882 } 883 } 884 885 nvme_set_info(cmd, iod, req_completion); 886 spin_lock_irq(&nvmeq->q_lock); 887 if (req->cmd_flags & REQ_DISCARD) 888 nvme_submit_discard(nvmeq, ns, req, iod); 889 else if (req->cmd_flags & REQ_FLUSH) 890 nvme_submit_flush(nvmeq, ns, req->tag); 891 else 892 nvme_submit_iod(nvmeq, iod, ns); 893 894 nvme_process_cq(nvmeq); 895 spin_unlock_irq(&nvmeq->q_lock); 896 return BLK_MQ_RQ_QUEUE_OK; 897 898 error_cmd: 899 nvme_free_iod(nvmeq->dev, iod); 900 return BLK_MQ_RQ_QUEUE_ERROR; 901 retry_cmd: 902 nvme_free_iod(nvmeq->dev, iod); 903 return BLK_MQ_RQ_QUEUE_BUSY; 904} 905 906static int nvme_process_cq(struct nvme_queue *nvmeq) 907{ 908 u16 head, phase; 909 910 head = nvmeq->cq_head; 911 phase = nvmeq->cq_phase; 912 913 for (;;) { 914 void *ctx; 915 nvme_completion_fn fn; 916 struct nvme_completion cqe = nvmeq->cqes[head]; 917 if ((le16_to_cpu(cqe.status) & 1) != phase) 918 break; 919 nvmeq->sq_head = le16_to_cpu(cqe.sq_head); 920 if (++head == nvmeq->q_depth) { 921 head = 0; 922 phase = !phase; 923 } 924 ctx = nvme_finish_cmd(nvmeq, cqe.command_id, &fn); 925 fn(nvmeq, ctx, &cqe); 926 } 927 928 /* If the controller ignores the cq head doorbell and continuously 929 * writes to the queue, it is theoretically possible to wrap around 930 * the queue twice and mistakenly return IRQ_NONE. Linux only 931 * requires that 0.1% of your interrupts are handled, so this isn't 932 * a big problem. 933 */ 934 if (head == nvmeq->cq_head && phase == nvmeq->cq_phase) 935 return 0; 936 937 writel(head, nvmeq->q_db + nvmeq->dev->db_stride); 938 nvmeq->cq_head = head; 939 nvmeq->cq_phase = phase; 940 941 nvmeq->cqe_seen = 1; 942 return 1; 943} 944 945/* Admin queue isn't initialized as a request queue. If at some point this 946 * happens anyway, make sure to notify the user */ 947static int nvme_admin_queue_rq(struct blk_mq_hw_ctx *hctx, 948 const struct blk_mq_queue_data *bd) 949{ 950 WARN_ON_ONCE(1); 951 return BLK_MQ_RQ_QUEUE_ERROR; 952} 953 954static irqreturn_t nvme_irq(int irq, void *data) 955{ 956 irqreturn_t result; 957 struct nvme_queue *nvmeq = data; 958 spin_lock(&nvmeq->q_lock); 959 nvme_process_cq(nvmeq); 960 result = nvmeq->cqe_seen ? IRQ_HANDLED : IRQ_NONE; 961 nvmeq->cqe_seen = 0; 962 spin_unlock(&nvmeq->q_lock); 963 return result; 964} 965 966static irqreturn_t nvme_irq_check(int irq, void *data) 967{ 968 struct nvme_queue *nvmeq = data; 969 struct nvme_completion cqe = nvmeq->cqes[nvmeq->cq_head]; 970 if ((le16_to_cpu(cqe.status) & 1) != nvmeq->cq_phase) 971 return IRQ_NONE; 972 return IRQ_WAKE_THREAD; 973} 974 975struct sync_cmd_info { 976 struct task_struct *task; 977 u32 result; 978 int status; 979}; 980 981static void sync_completion(struct nvme_queue *nvmeq, void *ctx, 982 struct nvme_completion *cqe) 983{ 984 struct sync_cmd_info *cmdinfo = ctx; 985 cmdinfo->result = le32_to_cpup(&cqe->result); 986 cmdinfo->status = le16_to_cpup(&cqe->status) >> 1; 987 wake_up_process(cmdinfo->task); 988} 989 990/* 991 * Returns 0 on success. If the result is negative, it's a Linux error code; 992 * if the result is positive, it's an NVM Express status code 993 */ 994static int nvme_submit_sync_cmd(struct request *req, struct nvme_command *cmd, 995 u32 *result, unsigned timeout) 996{ 997 struct sync_cmd_info cmdinfo; 998 struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req); 999 struct nvme_queue *nvmeq = cmd_rq->nvmeq; 1000 1001 cmdinfo.task = current; 1002 cmdinfo.status = -EINTR; 1003 1004 cmd->common.command_id = req->tag; 1005 1006 nvme_set_info(cmd_rq, &cmdinfo, sync_completion); 1007 1008 set_current_state(TASK_UNINTERRUPTIBLE); 1009 nvme_submit_cmd(nvmeq, cmd); 1010 schedule(); 1011 1012 if (result) 1013 *result = cmdinfo.result; 1014 return cmdinfo.status; 1015} 1016 1017static int nvme_submit_async_admin_req(struct nvme_dev *dev) 1018{ 1019 struct nvme_queue *nvmeq = dev->queues[0]; 1020 struct nvme_command c; 1021 struct nvme_cmd_info *cmd_info; 1022 struct request *req; 1023 1024 req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_ATOMIC, true); 1025 if (IS_ERR(req)) 1026 return PTR_ERR(req); 1027 1028 req->cmd_flags |= REQ_NO_TIMEOUT; 1029 cmd_info = blk_mq_rq_to_pdu(req); 1030 nvme_set_info(cmd_info, NULL, async_req_completion); 1031 1032 memset(&c, 0, sizeof(c)); 1033 c.common.opcode = nvme_admin_async_event; 1034 c.common.command_id = req->tag; 1035 1036 blk_mq_free_hctx_request(nvmeq->hctx, req); 1037 return __nvme_submit_cmd(nvmeq, &c); 1038} 1039 1040static int nvme_submit_admin_async_cmd(struct nvme_dev *dev, 1041 struct nvme_command *cmd, 1042 struct async_cmd_info *cmdinfo, unsigned timeout) 1043{ 1044 struct nvme_queue *nvmeq = dev->queues[0]; 1045 struct request *req; 1046 struct nvme_cmd_info *cmd_rq; 1047 1048 req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_KERNEL, false); 1049 if (IS_ERR(req)) 1050 return PTR_ERR(req); 1051 1052 req->timeout = timeout; 1053 cmd_rq = blk_mq_rq_to_pdu(req); 1054 cmdinfo->req = req; 1055 nvme_set_info(cmd_rq, cmdinfo, async_completion); 1056 cmdinfo->status = -EINTR; 1057 1058 cmd->common.command_id = req->tag; 1059 1060 return nvme_submit_cmd(nvmeq, cmd); 1061} 1062 1063static int __nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd, 1064 u32 *result, unsigned timeout) 1065{ 1066 int res; 1067 struct request *req; 1068 1069 req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_KERNEL, false); 1070 if (IS_ERR(req)) 1071 return PTR_ERR(req); 1072 res = nvme_submit_sync_cmd(req, cmd, result, timeout); 1073 blk_mq_free_request(req); 1074 return res; 1075} 1076 1077int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd, 1078 u32 *result) 1079{ 1080 return __nvme_submit_admin_cmd(dev, cmd, result, ADMIN_TIMEOUT); 1081} 1082 1083int nvme_submit_io_cmd(struct nvme_dev *dev, struct nvme_ns *ns, 1084 struct nvme_command *cmd, u32 *result) 1085{ 1086 int res; 1087 struct request *req; 1088 1089 req = blk_mq_alloc_request(ns->queue, WRITE, (GFP_KERNEL|__GFP_WAIT), 1090 false); 1091 if (IS_ERR(req)) 1092 return PTR_ERR(req); 1093 res = nvme_submit_sync_cmd(req, cmd, result, NVME_IO_TIMEOUT); 1094 blk_mq_free_request(req); 1095 return res; 1096} 1097 1098static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) 1099{ 1100 struct nvme_command c; 1101 1102 memset(&c, 0, sizeof(c)); 1103 c.delete_queue.opcode = opcode; 1104 c.delete_queue.qid = cpu_to_le16(id); 1105 1106 return nvme_submit_admin_cmd(dev, &c, NULL); 1107} 1108 1109static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, 1110 struct nvme_queue *nvmeq) 1111{ 1112 struct nvme_command c; 1113 int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED; 1114 1115 memset(&c, 0, sizeof(c)); 1116 c.create_cq.opcode = nvme_admin_create_cq; 1117 c.create_cq.prp1 = cpu_to_le64(nvmeq->cq_dma_addr); 1118 c.create_cq.cqid = cpu_to_le16(qid); 1119 c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1); 1120 c.create_cq.cq_flags = cpu_to_le16(flags); 1121 c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector); 1122 1123 return nvme_submit_admin_cmd(dev, &c, NULL); 1124} 1125 1126static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid, 1127 struct nvme_queue *nvmeq) 1128{ 1129 struct nvme_command c; 1130 int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM; 1131 1132 memset(&c, 0, sizeof(c)); 1133 c.create_sq.opcode = nvme_admin_create_sq; 1134 c.create_sq.prp1 = cpu_to_le64(nvmeq->sq_dma_addr); 1135 c.create_sq.sqid = cpu_to_le16(qid); 1136 c.create_sq.qsize = cpu_to_le16(nvmeq->q_depth - 1); 1137 c.create_sq.sq_flags = cpu_to_le16(flags); 1138 c.create_sq.cqid = cpu_to_le16(qid); 1139 1140 return nvme_submit_admin_cmd(dev, &c, NULL); 1141} 1142 1143static int adapter_delete_cq(struct nvme_dev *dev, u16 cqid) 1144{ 1145 return adapter_delete_queue(dev, nvme_admin_delete_cq, cqid); 1146} 1147 1148static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid) 1149{ 1150 return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid); 1151} 1152 1153int nvme_identify(struct nvme_dev *dev, unsigned nsid, unsigned cns, 1154 dma_addr_t dma_addr) 1155{ 1156 struct nvme_command c; 1157 1158 memset(&c, 0, sizeof(c)); 1159 c.identify.opcode = nvme_admin_identify; 1160 c.identify.nsid = cpu_to_le32(nsid); 1161 c.identify.prp1 = cpu_to_le64(dma_addr); 1162 c.identify.cns = cpu_to_le32(cns); 1163 1164 return nvme_submit_admin_cmd(dev, &c, NULL); 1165} 1166 1167int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, 1168 dma_addr_t dma_addr, u32 *result) 1169{ 1170 struct nvme_command c; 1171 1172 memset(&c, 0, sizeof(c)); 1173 c.features.opcode = nvme_admin_get_features; 1174 c.features.nsid = cpu_to_le32(nsid); 1175 c.features.prp1 = cpu_to_le64(dma_addr); 1176 c.features.fid = cpu_to_le32(fid); 1177 1178 return nvme_submit_admin_cmd(dev, &c, result); 1179} 1180 1181int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, 1182 dma_addr_t dma_addr, u32 *result) 1183{ 1184 struct nvme_command c; 1185 1186 memset(&c, 0, sizeof(c)); 1187 c.features.opcode = nvme_admin_set_features; 1188 c.features.prp1 = cpu_to_le64(dma_addr); 1189 c.features.fid = cpu_to_le32(fid); 1190 c.features.dword11 = cpu_to_le32(dword11); 1191 1192 return nvme_submit_admin_cmd(dev, &c, result); 1193} 1194 1195/** 1196 * nvme_abort_req - Attempt aborting a request 1197 * 1198 * Schedule controller reset if the command was already aborted once before and 1199 * still hasn't been returned to the driver, or if this is the admin queue. 1200 */ 1201static void nvme_abort_req(struct request *req) 1202{ 1203 struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req); 1204 struct nvme_queue *nvmeq = cmd_rq->nvmeq; 1205 struct nvme_dev *dev = nvmeq->dev; 1206 struct request *abort_req; 1207 struct nvme_cmd_info *abort_cmd; 1208 struct nvme_command cmd; 1209 1210 if (!nvmeq->qid || cmd_rq->aborted) { 1211 unsigned long flags; 1212 1213 spin_lock_irqsave(&dev_list_lock, flags); 1214 if (work_busy(&dev->reset_work)) 1215 goto out; 1216 list_del_init(&dev->node); 1217 dev_warn(&dev->pci_dev->dev, 1218 "I/O %d QID %d timeout, reset controller\n", 1219 req->tag, nvmeq->qid); 1220 dev->reset_workfn = nvme_reset_failed_dev; 1221 queue_work(nvme_workq, &dev->reset_work); 1222 out: 1223 spin_unlock_irqrestore(&dev_list_lock, flags); 1224 return; 1225 } 1226 1227 if (!dev->abort_limit) 1228 return; 1229 1230 abort_req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_ATOMIC, 1231 false); 1232 if (IS_ERR(abort_req)) 1233 return; 1234 1235 abort_cmd = blk_mq_rq_to_pdu(abort_req); 1236 nvme_set_info(abort_cmd, abort_req, abort_completion); 1237 1238 memset(&cmd, 0, sizeof(cmd)); 1239 cmd.abort.opcode = nvme_admin_abort_cmd; 1240 cmd.abort.cid = req->tag; 1241 cmd.abort.sqid = cpu_to_le16(nvmeq->qid); 1242 cmd.abort.command_id = abort_req->tag; 1243 1244 --dev->abort_limit; 1245 cmd_rq->aborted = 1; 1246 1247 dev_warn(nvmeq->q_dmadev, "Aborting I/O %d QID %d\n", req->tag, 1248 nvmeq->qid); 1249 if (nvme_submit_cmd(dev->queues[0], &cmd) < 0) { 1250 dev_warn(nvmeq->q_dmadev, 1251 "Could not abort I/O %d QID %d", 1252 req->tag, nvmeq->qid); 1253 blk_mq_free_request(abort_req); 1254 } 1255} 1256 1257static void nvme_cancel_queue_ios(struct blk_mq_hw_ctx *hctx, 1258 struct request *req, void *data, bool reserved) 1259{ 1260 struct nvme_queue *nvmeq = data; 1261 void *ctx; 1262 nvme_completion_fn fn; 1263 struct nvme_cmd_info *cmd; 1264 struct nvme_completion cqe; 1265 1266 if (!blk_mq_request_started(req)) 1267 return; 1268 1269 cmd = blk_mq_rq_to_pdu(req); 1270 1271 if (cmd->ctx == CMD_CTX_CANCELLED) 1272 return; 1273 1274 if (blk_queue_dying(req->q)) 1275 cqe.status = cpu_to_le16((NVME_SC_ABORT_REQ | NVME_SC_DNR) << 1); 1276 else 1277 cqe.status = cpu_to_le16(NVME_SC_ABORT_REQ << 1); 1278 1279 1280 dev_warn(nvmeq->q_dmadev, "Cancelling I/O %d QID %d\n", 1281 req->tag, nvmeq->qid); 1282 ctx = cancel_cmd_info(cmd, &fn); 1283 fn(nvmeq, ctx, &cqe); 1284} 1285 1286static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) 1287{ 1288 struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req); 1289 struct nvme_queue *nvmeq = cmd->nvmeq; 1290 1291 dev_warn(nvmeq->q_dmadev, "Timeout I/O %d QID %d\n", req->tag, 1292 nvmeq->qid); 1293 spin_lock_irq(&nvmeq->q_lock); 1294 nvme_abort_req(req); 1295 spin_unlock_irq(&nvmeq->q_lock); 1296 1297 /* 1298 * The aborted req will be completed on receiving the abort req. 1299 * We enable the timer again. If hit twice, it'll cause a device reset, 1300 * as the device then is in a faulty state. 1301 */ 1302 return BLK_EH_RESET_TIMER; 1303} 1304 1305static void nvme_free_queue(struct nvme_queue *nvmeq) 1306{ 1307 dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), 1308 (void *)nvmeq->cqes, nvmeq->cq_dma_addr); 1309 dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), 1310 nvmeq->sq_cmds, nvmeq->sq_dma_addr); 1311 kfree(nvmeq); 1312} 1313 1314static void nvme_free_queues(struct nvme_dev *dev, int lowest) 1315{ 1316 int i; 1317 1318 for (i = dev->queue_count - 1; i >= lowest; i--) { 1319 struct nvme_queue *nvmeq = dev->queues[i]; 1320 dev->queue_count--; 1321 dev->queues[i] = NULL; 1322 nvme_free_queue(nvmeq); 1323 } 1324} 1325 1326/** 1327 * nvme_suspend_queue - put queue into suspended state 1328 * @nvmeq - queue to suspend 1329 */ 1330static int nvme_suspend_queue(struct nvme_queue *nvmeq) 1331{ 1332 int vector; 1333 1334 spin_lock_irq(&nvmeq->q_lock); 1335 if (nvmeq->cq_vector == -1) { 1336 spin_unlock_irq(&nvmeq->q_lock); 1337 return 1; 1338 } 1339 vector = nvmeq->dev->entry[nvmeq->cq_vector].vector; 1340 nvmeq->dev->online_queues--; 1341 nvmeq->cq_vector = -1; 1342 spin_unlock_irq(&nvmeq->q_lock); 1343 1344 if (!nvmeq->qid && nvmeq->dev->admin_q) 1345 blk_mq_freeze_queue_start(nvmeq->dev->admin_q); 1346 1347 irq_set_affinity_hint(vector, NULL); 1348 free_irq(vector, nvmeq); 1349 1350 return 0; 1351} 1352 1353static void nvme_clear_queue(struct nvme_queue *nvmeq) 1354{ 1355 struct blk_mq_hw_ctx *hctx = nvmeq->hctx; 1356 1357 spin_lock_irq(&nvmeq->q_lock); 1358 if (hctx && hctx->tags) 1359 blk_mq_tag_busy_iter(hctx, nvme_cancel_queue_ios, nvmeq); 1360 spin_unlock_irq(&nvmeq->q_lock); 1361} 1362 1363static void nvme_disable_queue(struct nvme_dev *dev, int qid) 1364{ 1365 struct nvme_queue *nvmeq = dev->queues[qid]; 1366 1367 if (!nvmeq) 1368 return; 1369 if (nvme_suspend_queue(nvmeq)) 1370 return; 1371 1372 /* Don't tell the adapter to delete the admin queue. 1373 * Don't tell a removed adapter to delete IO queues. */ 1374 if (qid && readl(&dev->bar->csts) != -1) { 1375 adapter_delete_sq(dev, qid); 1376 adapter_delete_cq(dev, qid); 1377 } 1378 1379 spin_lock_irq(&nvmeq->q_lock); 1380 nvme_process_cq(nvmeq); 1381 spin_unlock_irq(&nvmeq->q_lock); 1382} 1383 1384static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, 1385 int depth) 1386{ 1387 struct device *dmadev = &dev->pci_dev->dev; 1388 struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq), GFP_KERNEL); 1389 if (!nvmeq) 1390 return NULL; 1391 1392 nvmeq->cqes = dma_zalloc_coherent(dmadev, CQ_SIZE(depth), 1393 &nvmeq->cq_dma_addr, GFP_KERNEL); 1394 if (!nvmeq->cqes) 1395 goto free_nvmeq; 1396 1397 nvmeq->sq_cmds = dma_alloc_coherent(dmadev, SQ_SIZE(depth), 1398 &nvmeq->sq_dma_addr, GFP_KERNEL); 1399 if (!nvmeq->sq_cmds) 1400 goto free_cqdma; 1401 1402 nvmeq->q_dmadev = dmadev; 1403 nvmeq->dev = dev; 1404 snprintf(nvmeq->irqname, sizeof(nvmeq->irqname), "nvme%dq%d", 1405 dev->instance, qid); 1406 spin_lock_init(&nvmeq->q_lock); 1407 nvmeq->cq_head = 0; 1408 nvmeq->cq_phase = 1; 1409 nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; 1410 nvmeq->q_depth = depth; 1411 nvmeq->qid = qid; 1412 dev->queue_count++; 1413 dev->queues[qid] = nvmeq; 1414 1415 return nvmeq; 1416 1417 free_cqdma: 1418 dma_free_coherent(dmadev, CQ_SIZE(depth), (void *)nvmeq->cqes, 1419 nvmeq->cq_dma_addr); 1420 free_nvmeq: 1421 kfree(nvmeq); 1422 return NULL; 1423} 1424 1425static int queue_request_irq(struct nvme_dev *dev, struct nvme_queue *nvmeq, 1426 const char *name) 1427{ 1428 if (use_threaded_interrupts) 1429 return request_threaded_irq(dev->entry[nvmeq->cq_vector].vector, 1430 nvme_irq_check, nvme_irq, IRQF_SHARED, 1431 name, nvmeq); 1432 return request_irq(dev->entry[nvmeq->cq_vector].vector, nvme_irq, 1433 IRQF_SHARED, name, nvmeq); 1434} 1435 1436static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) 1437{ 1438 struct nvme_dev *dev = nvmeq->dev; 1439 1440 spin_lock_irq(&nvmeq->q_lock); 1441 nvmeq->sq_tail = 0; 1442 nvmeq->cq_head = 0; 1443 nvmeq->cq_phase = 1; 1444 nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; 1445 memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth)); 1446 dev->online_queues++; 1447 spin_unlock_irq(&nvmeq->q_lock); 1448} 1449 1450static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) 1451{ 1452 struct nvme_dev *dev = nvmeq->dev; 1453 int result; 1454 1455 nvmeq->cq_vector = qid - 1; 1456 result = adapter_alloc_cq(dev, qid, nvmeq); 1457 if (result < 0) 1458 return result; 1459 1460 result = adapter_alloc_sq(dev, qid, nvmeq); 1461 if (result < 0) 1462 goto release_cq; 1463 1464 result = queue_request_irq(dev, nvmeq, nvmeq->irqname); 1465 if (result < 0) 1466 goto release_sq; 1467 1468 nvme_init_queue(nvmeq, qid); 1469 return result; 1470 1471 release_sq: 1472 adapter_delete_sq(dev, qid); 1473 release_cq: 1474 adapter_delete_cq(dev, qid); 1475 return result; 1476} 1477 1478static int nvme_wait_ready(struct nvme_dev *dev, u64 cap, bool enabled) 1479{ 1480 unsigned long timeout; 1481 u32 bit = enabled ? NVME_CSTS_RDY : 0; 1482 1483 timeout = ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies; 1484 1485 while ((readl(&dev->bar->csts) & NVME_CSTS_RDY) != bit) { 1486 msleep(100); 1487 if (fatal_signal_pending(current)) 1488 return -EINTR; 1489 if (time_after(jiffies, timeout)) { 1490 dev_err(&dev->pci_dev->dev, 1491 "Device not ready; aborting %s\n", enabled ? 1492 "initialisation" : "reset"); 1493 return -ENODEV; 1494 } 1495 } 1496 1497 return 0; 1498} 1499 1500/* 1501 * If the device has been passed off to us in an enabled state, just clear 1502 * the enabled bit. The spec says we should set the 'shutdown notification 1503 * bits', but doing so may cause the device to complete commands to the 1504 * admin queue ... and we don't know what memory that might be pointing at! 1505 */ 1506static int nvme_disable_ctrl(struct nvme_dev *dev, u64 cap) 1507{ 1508 dev->ctrl_config &= ~NVME_CC_SHN_MASK; 1509 dev->ctrl_config &= ~NVME_CC_ENABLE; 1510 writel(dev->ctrl_config, &dev->bar->cc); 1511 1512 return nvme_wait_ready(dev, cap, false); 1513} 1514 1515static int nvme_enable_ctrl(struct nvme_dev *dev, u64 cap) 1516{ 1517 dev->ctrl_config &= ~NVME_CC_SHN_MASK; 1518 dev->ctrl_config |= NVME_CC_ENABLE; 1519 writel(dev->ctrl_config, &dev->bar->cc); 1520 1521 return nvme_wait_ready(dev, cap, true); 1522} 1523 1524static int nvme_shutdown_ctrl(struct nvme_dev *dev) 1525{ 1526 unsigned long timeout; 1527 1528 dev->ctrl_config &= ~NVME_CC_SHN_MASK; 1529 dev->ctrl_config |= NVME_CC_SHN_NORMAL; 1530 1531 writel(dev->ctrl_config, &dev->bar->cc); 1532 1533 timeout = SHUTDOWN_TIMEOUT + jiffies; 1534 while ((readl(&dev->bar->csts) & NVME_CSTS_SHST_MASK) != 1535 NVME_CSTS_SHST_CMPLT) { 1536 msleep(100); 1537 if (fatal_signal_pending(current)) 1538 return -EINTR; 1539 if (time_after(jiffies, timeout)) { 1540 dev_err(&dev->pci_dev->dev, 1541 "Device shutdown incomplete; abort shutdown\n"); 1542 return -ENODEV; 1543 } 1544 } 1545 1546 return 0; 1547} 1548 1549static struct blk_mq_ops nvme_mq_admin_ops = { 1550 .queue_rq = nvme_admin_queue_rq, 1551 .map_queue = blk_mq_map_queue, 1552 .init_hctx = nvme_admin_init_hctx, 1553 .exit_hctx = nvme_exit_hctx, 1554 .init_request = nvme_admin_init_request, 1555 .timeout = nvme_timeout, 1556}; 1557 1558static struct blk_mq_ops nvme_mq_ops = { 1559 .queue_rq = nvme_queue_rq, 1560 .map_queue = blk_mq_map_queue, 1561 .init_hctx = nvme_init_hctx, 1562 .exit_hctx = nvme_exit_hctx, 1563 .init_request = nvme_init_request, 1564 .timeout = nvme_timeout, 1565}; 1566 1567static void nvme_dev_remove_admin(struct nvme_dev *dev) 1568{ 1569 if (dev->admin_q && !blk_queue_dying(dev->admin_q)) { 1570 blk_cleanup_queue(dev->admin_q); 1571 blk_mq_free_tag_set(&dev->admin_tagset); 1572 } 1573} 1574 1575static int nvme_alloc_admin_tags(struct nvme_dev *dev) 1576{ 1577 if (!dev->admin_q) { 1578 dev->admin_tagset.ops = &nvme_mq_admin_ops; 1579 dev->admin_tagset.nr_hw_queues = 1; 1580 dev->admin_tagset.queue_depth = NVME_AQ_DEPTH - 1; 1581 dev->admin_tagset.reserved_tags = 1; 1582 dev->admin_tagset.timeout = ADMIN_TIMEOUT; 1583 dev->admin_tagset.numa_node = dev_to_node(&dev->pci_dev->dev); 1584 dev->admin_tagset.cmd_size = nvme_cmd_size(dev); 1585 dev->admin_tagset.driver_data = dev; 1586 1587 if (blk_mq_alloc_tag_set(&dev->admin_tagset)) 1588 return -ENOMEM; 1589 1590 dev->admin_q = blk_mq_init_queue(&dev->admin_tagset); 1591 if (IS_ERR(dev->admin_q)) { 1592 blk_mq_free_tag_set(&dev->admin_tagset); 1593 return -ENOMEM; 1594 } 1595 if (!blk_get_queue(dev->admin_q)) { 1596 nvme_dev_remove_admin(dev); 1597 return -ENODEV; 1598 } 1599 } else 1600 blk_mq_unfreeze_queue(dev->admin_q); 1601 1602 return 0; 1603} 1604 1605static int nvme_configure_admin_queue(struct nvme_dev *dev) 1606{ 1607 int result; 1608 u32 aqa; 1609 u64 cap = readq(&dev->bar->cap); 1610 struct nvme_queue *nvmeq; 1611 unsigned page_shift = PAGE_SHIFT; 1612 unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12; 1613 unsigned dev_page_max = NVME_CAP_MPSMAX(cap) + 12; 1614 1615 if (page_shift < dev_page_min) { 1616 dev_err(&dev->pci_dev->dev, 1617 "Minimum device page size (%u) too large for " 1618 "host (%u)\n", 1 << dev_page_min, 1619 1 << page_shift); 1620 return -ENODEV; 1621 } 1622 if (page_shift > dev_page_max) { 1623 dev_info(&dev->pci_dev->dev, 1624 "Device maximum page size (%u) smaller than " 1625 "host (%u); enabling work-around\n", 1626 1 << dev_page_max, 1 << page_shift); 1627 page_shift = dev_page_max; 1628 } 1629 1630 result = nvme_disable_ctrl(dev, cap); 1631 if (result < 0) 1632 return result; 1633 1634 nvmeq = dev->queues[0]; 1635 if (!nvmeq) { 1636 nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH); 1637 if (!nvmeq) 1638 return -ENOMEM; 1639 } 1640 1641 aqa = nvmeq->q_depth - 1; 1642 aqa |= aqa << 16; 1643 1644 dev->page_size = 1 << page_shift; 1645 1646 dev->ctrl_config = NVME_CC_CSS_NVM; 1647 dev->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT; 1648 dev->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE; 1649 dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES; 1650 1651 writel(aqa, &dev->bar->aqa); 1652 writeq(nvmeq->sq_dma_addr, &dev->bar->asq); 1653 writeq(nvmeq->cq_dma_addr, &dev->bar->acq); 1654 1655 result = nvme_enable_ctrl(dev, cap); 1656 if (result) 1657 goto free_nvmeq; 1658 1659 nvmeq->cq_vector = 0; 1660 result = queue_request_irq(dev, nvmeq, nvmeq->irqname); 1661 if (result) 1662 goto free_nvmeq; 1663 1664 return result; 1665 1666 free_nvmeq: 1667 nvme_free_queues(dev, 0); 1668 return result; 1669} 1670 1671struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, 1672 unsigned long addr, unsigned length) 1673{ 1674 int i, err, count, nents, offset; 1675 struct scatterlist *sg; 1676 struct page **pages; 1677 struct nvme_iod *iod; 1678 1679 if (addr & 3) 1680 return ERR_PTR(-EINVAL); 1681 if (!length || length > INT_MAX - PAGE_SIZE) 1682 return ERR_PTR(-EINVAL); 1683 1684 offset = offset_in_page(addr); 1685 count = DIV_ROUND_UP(offset + length, PAGE_SIZE); 1686 pages = kcalloc(count, sizeof(*pages), GFP_KERNEL); 1687 if (!pages) 1688 return ERR_PTR(-ENOMEM); 1689 1690 err = get_user_pages_fast(addr, count, 1, pages); 1691 if (err < count) { 1692 count = err; 1693 err = -EFAULT; 1694 goto put_pages; 1695 } 1696 1697 err = -ENOMEM; 1698 iod = __nvme_alloc_iod(count, length, dev, 0, GFP_KERNEL); 1699 if (!iod) 1700 goto put_pages; 1701 1702 sg = iod->sg; 1703 sg_init_table(sg, count); 1704 for (i = 0; i < count; i++) { 1705 sg_set_page(&sg[i], pages[i], 1706 min_t(unsigned, length, PAGE_SIZE - offset), 1707 offset); 1708 length -= (PAGE_SIZE - offset); 1709 offset = 0; 1710 } 1711 sg_mark_end(&sg[i - 1]); 1712 iod->nents = count; 1713 1714 nents = dma_map_sg(&dev->pci_dev->dev, sg, count, 1715 write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); 1716 if (!nents) 1717 goto free_iod; 1718 1719 kfree(pages); 1720 return iod; 1721 1722 free_iod: 1723 kfree(iod); 1724 put_pages: 1725 for (i = 0; i < count; i++) 1726 put_page(pages[i]); 1727 kfree(pages); 1728 return ERR_PTR(err); 1729} 1730 1731void nvme_unmap_user_pages(struct nvme_dev *dev, int write, 1732 struct nvme_iod *iod) 1733{ 1734 int i; 1735 1736 dma_unmap_sg(&dev->pci_dev->dev, iod->sg, iod->nents, 1737 write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); 1738 1739 for (i = 0; i < iod->nents; i++) 1740 put_page(sg_page(&iod->sg[i])); 1741} 1742 1743static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) 1744{ 1745 struct nvme_dev *dev = ns->dev; 1746 struct nvme_user_io io; 1747 struct nvme_command c; 1748 unsigned length, meta_len, prp_len; 1749 int status, write; 1750 struct nvme_iod *iod; 1751 dma_addr_t meta_dma = 0; 1752 void *meta = NULL; 1753 void __user *metadata; 1754 1755 if (copy_from_user(&io, uio, sizeof(io))) 1756 return -EFAULT; 1757 length = (io.nblocks + 1) << ns->lba_shift; 1758 meta_len = (io.nblocks + 1) * ns->ms; 1759 1760 if (meta_len && ((io.metadata & 3) || !io.metadata) && !ns->ext) 1761 return -EINVAL; 1762 else if (meta_len && ns->ext) { 1763 length += meta_len; 1764 meta_len = 0; 1765 } 1766 1767 metadata = (void __user *)(unsigned long)io.metadata; 1768 1769 write = io.opcode & 1; 1770 1771 switch (io.opcode) { 1772 case nvme_cmd_write: 1773 case nvme_cmd_read: 1774 case nvme_cmd_compare: 1775 iod = nvme_map_user_pages(dev, write, io.addr, length); 1776 break; 1777 default: 1778 return -EINVAL; 1779 } 1780 1781 if (IS_ERR(iod)) 1782 return PTR_ERR(iod); 1783 1784 prp_len = nvme_setup_prps(dev, iod, length, GFP_KERNEL); 1785 if (length != prp_len) { 1786 status = -ENOMEM; 1787 goto unmap; 1788 } 1789 if (meta_len) { 1790 meta = dma_alloc_coherent(&dev->pci_dev->dev, meta_len, 1791 &meta_dma, GFP_KERNEL); 1792 1793 if (!meta) { 1794 status = -ENOMEM; 1795 goto unmap; 1796 } 1797 if (write) { 1798 if (copy_from_user(meta, metadata, meta_len)) { 1799 status = -EFAULT; 1800 goto unmap; 1801 } 1802 } 1803 } 1804 1805 memset(&c, 0, sizeof(c)); 1806 c.rw.opcode = io.opcode; 1807 c.rw.flags = io.flags; 1808 c.rw.nsid = cpu_to_le32(ns->ns_id); 1809 c.rw.slba = cpu_to_le64(io.slba); 1810 c.rw.length = cpu_to_le16(io.nblocks); 1811 c.rw.control = cpu_to_le16(io.control); 1812 c.rw.dsmgmt = cpu_to_le32(io.dsmgmt); 1813 c.rw.reftag = cpu_to_le32(io.reftag); 1814 c.rw.apptag = cpu_to_le16(io.apptag); 1815 c.rw.appmask = cpu_to_le16(io.appmask); 1816 c.rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); 1817 c.rw.prp2 = cpu_to_le64(iod->first_dma); 1818 c.rw.metadata = cpu_to_le64(meta_dma); 1819 status = nvme_submit_io_cmd(dev, ns, &c, NULL); 1820 unmap: 1821 nvme_unmap_user_pages(dev, write, iod); 1822 nvme_free_iod(dev, iod); 1823 if (meta) { 1824 if (status == NVME_SC_SUCCESS && !write) { 1825 if (copy_to_user(metadata, meta, meta_len)) 1826 status = -EFAULT; 1827 } 1828 dma_free_coherent(&dev->pci_dev->dev, meta_len, meta, meta_dma); 1829 } 1830 return status; 1831} 1832 1833static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns, 1834 struct nvme_passthru_cmd __user *ucmd) 1835{ 1836 struct nvme_passthru_cmd cmd; 1837 struct nvme_command c; 1838 int status, length; 1839 struct nvme_iod *uninitialized_var(iod); 1840 unsigned timeout; 1841 1842 if (!capable(CAP_SYS_ADMIN)) 1843 return -EACCES; 1844 if (copy_from_user(&cmd, ucmd, sizeof(cmd))) 1845 return -EFAULT; 1846 1847 memset(&c, 0, sizeof(c)); 1848 c.common.opcode = cmd.opcode; 1849 c.common.flags = cmd.flags; 1850 c.common.nsid = cpu_to_le32(cmd.nsid); 1851 c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); 1852 c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); 1853 c.common.cdw10[0] = cpu_to_le32(cmd.cdw10); 1854 c.common.cdw10[1] = cpu_to_le32(cmd.cdw11); 1855 c.common.cdw10[2] = cpu_to_le32(cmd.cdw12); 1856 c.common.cdw10[3] = cpu_to_le32(cmd.cdw13); 1857 c.common.cdw10[4] = cpu_to_le32(cmd.cdw14); 1858 c.common.cdw10[5] = cpu_to_le32(cmd.cdw15); 1859 1860 length = cmd.data_len; 1861 if (cmd.data_len) { 1862 iod = nvme_map_user_pages(dev, cmd.opcode & 1, cmd.addr, 1863 length); 1864 if (IS_ERR(iod)) 1865 return PTR_ERR(iod); 1866 length = nvme_setup_prps(dev, iod, length, GFP_KERNEL); 1867 c.common.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); 1868 c.common.prp2 = cpu_to_le64(iod->first_dma); 1869 } 1870 1871 timeout = cmd.timeout_ms ? msecs_to_jiffies(cmd.timeout_ms) : 1872 ADMIN_TIMEOUT; 1873 1874 if (length != cmd.data_len) 1875 status = -ENOMEM; 1876 else if (ns) { 1877 struct request *req; 1878 1879 req = blk_mq_alloc_request(ns->queue, WRITE, 1880 (GFP_KERNEL|__GFP_WAIT), false); 1881 if (IS_ERR(req)) 1882 status = PTR_ERR(req); 1883 else { 1884 status = nvme_submit_sync_cmd(req, &c, &cmd.result, 1885 timeout); 1886 blk_mq_free_request(req); 1887 } 1888 } else 1889 status = __nvme_submit_admin_cmd(dev, &c, &cmd.result, timeout); 1890 1891 if (cmd.data_len) { 1892 nvme_unmap_user_pages(dev, cmd.opcode & 1, iod); 1893 nvme_free_iod(dev, iod); 1894 } 1895 1896 if ((status >= 0) && copy_to_user(&ucmd->result, &cmd.result, 1897 sizeof(cmd.result))) 1898 status = -EFAULT; 1899 1900 return status; 1901} 1902 1903static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, 1904 unsigned long arg) 1905{ 1906 struct nvme_ns *ns = bdev->bd_disk->private_data; 1907 1908 switch (cmd) { 1909 case NVME_IOCTL_ID: 1910 force_successful_syscall_return(); 1911 return ns->ns_id; 1912 case NVME_IOCTL_ADMIN_CMD: 1913 return nvme_user_cmd(ns->dev, NULL, (void __user *)arg); 1914 case NVME_IOCTL_IO_CMD: 1915 return nvme_user_cmd(ns->dev, ns, (void __user *)arg); 1916 case NVME_IOCTL_SUBMIT_IO: 1917 return nvme_submit_io(ns, (void __user *)arg); 1918 case SG_GET_VERSION_NUM: 1919 return nvme_sg_get_version_num((void __user *)arg); 1920 case SG_IO: 1921 return nvme_sg_io(ns, (void __user *)arg); 1922 default: 1923 return -ENOTTY; 1924 } 1925} 1926 1927#ifdef CONFIG_COMPAT 1928static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode, 1929 unsigned int cmd, unsigned long arg) 1930{ 1931 switch (cmd) { 1932 case SG_IO: 1933 return -ENOIOCTLCMD; 1934 } 1935 return nvme_ioctl(bdev, mode, cmd, arg); 1936} 1937#else 1938#define nvme_compat_ioctl NULL 1939#endif 1940 1941static int nvme_open(struct block_device *bdev, fmode_t mode) 1942{ 1943 int ret = 0; 1944 struct nvme_ns *ns; 1945 1946 spin_lock(&dev_list_lock); 1947 ns = bdev->bd_disk->private_data; 1948 if (!ns) 1949 ret = -ENXIO; 1950 else if (!kref_get_unless_zero(&ns->dev->kref)) 1951 ret = -ENXIO; 1952 spin_unlock(&dev_list_lock); 1953 1954 return ret; 1955} 1956 1957static void nvme_free_dev(struct kref *kref); 1958 1959static void nvme_release(struct gendisk *disk, fmode_t mode) 1960{ 1961 struct nvme_ns *ns = disk->private_data; 1962 struct nvme_dev *dev = ns->dev; 1963 1964 kref_put(&dev->kref, nvme_free_dev); 1965} 1966 1967static int nvme_getgeo(struct block_device *bd, struct hd_geometry *geo) 1968{ 1969 /* some standard values */ 1970 geo->heads = 1 << 6; 1971 geo->sectors = 1 << 5; 1972 geo->cylinders = get_capacity(bd->bd_disk) >> 11; 1973 return 0; 1974} 1975 1976static void nvme_config_discard(struct nvme_ns *ns) 1977{ 1978 u32 logical_block_size = queue_logical_block_size(ns->queue); 1979 ns->queue->limits.discard_zeroes_data = 0; 1980 ns->queue->limits.discard_alignment = logical_block_size; 1981 ns->queue->limits.discard_granularity = logical_block_size; 1982 ns->queue->limits.max_discard_sectors = 0xffffffff; 1983 queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, ns->queue); 1984} 1985 1986static int nvme_revalidate_disk(struct gendisk *disk) 1987{ 1988 struct nvme_ns *ns = disk->private_data; 1989 struct nvme_dev *dev = ns->dev; 1990 struct nvme_id_ns *id; 1991 dma_addr_t dma_addr; 1992 u8 lbaf, pi_type; 1993 u16 old_ms; 1994 unsigned short bs; 1995 1996 id = dma_alloc_coherent(&dev->pci_dev->dev, 4096, &dma_addr, 1997 GFP_KERNEL); 1998 if (!id) { 1999 dev_warn(&dev->pci_dev->dev, "%s: Memory alocation failure\n", 2000 __func__); 2001 return 0; 2002 } 2003 if (nvme_identify(dev, ns->ns_id, 0, dma_addr)) { 2004 dev_warn(&dev->pci_dev->dev, 2005 "identify failed ns:%d, setting capacity to 0\n", 2006 ns->ns_id); 2007 memset(id, 0, sizeof(*id)); 2008 } 2009 2010 old_ms = ns->ms; 2011 lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK; 2012 ns->lba_shift = id->lbaf[lbaf].ds; 2013 ns->ms = le16_to_cpu(id->lbaf[lbaf].ms); 2014 ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT); 2015 2016 /* 2017 * If identify namespace failed, use default 512 byte block size so 2018 * block layer can use before failing read/write for 0 capacity. 2019 */ 2020 if (ns->lba_shift == 0) 2021 ns->lba_shift = 9; 2022 bs = 1 << ns->lba_shift; 2023 2024 /* XXX: PI implementation requires metadata equal t10 pi tuple size */ 2025 pi_type = ns->ms == sizeof(struct t10_pi_tuple) ? 2026 id->dps & NVME_NS_DPS_PI_MASK : 0; 2027 2028 if (blk_get_integrity(disk) && (ns->pi_type != pi_type || 2029 ns->ms != old_ms || 2030 bs != queue_logical_block_size(disk->queue) || 2031 (ns->ms && ns->ext))) 2032 blk_integrity_unregister(disk); 2033 2034 ns->pi_type = pi_type; 2035 blk_queue_logical_block_size(ns->queue, bs); 2036 2037 if (ns->ms && !blk_get_integrity(disk) && (disk->flags & GENHD_FL_UP) && 2038 !ns->ext) 2039 nvme_init_integrity(ns); 2040 2041 if (id->ncap == 0 || (ns->ms && !blk_get_integrity(disk))) 2042 set_capacity(disk, 0); 2043 else 2044 set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9)); 2045 2046 if (dev->oncs & NVME_CTRL_ONCS_DSM) 2047 nvme_config_discard(ns); 2048 2049 dma_free_coherent(&dev->pci_dev->dev, 4096, id, dma_addr); 2050 return 0; 2051} 2052 2053static const struct block_device_operations nvme_fops = { 2054 .owner = THIS_MODULE, 2055 .ioctl = nvme_ioctl, 2056 .compat_ioctl = nvme_compat_ioctl, 2057 .open = nvme_open, 2058 .release = nvme_release, 2059 .getgeo = nvme_getgeo, 2060 .revalidate_disk= nvme_revalidate_disk, 2061}; 2062 2063static int nvme_kthread(void *data) 2064{ 2065 struct nvme_dev *dev, *next; 2066 2067 while (!kthread_should_stop()) { 2068 set_current_state(TASK_INTERRUPTIBLE); 2069 spin_lock(&dev_list_lock); 2070 list_for_each_entry_safe(dev, next, &dev_list, node) { 2071 int i; 2072 if (readl(&dev->bar->csts) & NVME_CSTS_CFS) { 2073 if (work_busy(&dev->reset_work)) 2074 continue; 2075 list_del_init(&dev->node); 2076 dev_warn(&dev->pci_dev->dev, 2077 "Failed status: %x, reset controller\n", 2078 readl(&dev->bar->csts)); 2079 dev->reset_workfn = nvme_reset_failed_dev; 2080 queue_work(nvme_workq, &dev->reset_work); 2081 continue; 2082 } 2083 for (i = 0; i < dev->queue_count; i++) { 2084 struct nvme_queue *nvmeq = dev->queues[i]; 2085 if (!nvmeq) 2086 continue; 2087 spin_lock_irq(&nvmeq->q_lock); 2088 nvme_process_cq(nvmeq); 2089 2090 while ((i == 0) && (dev->event_limit > 0)) { 2091 if (nvme_submit_async_admin_req(dev)) 2092 break; 2093 dev->event_limit--; 2094 } 2095 spin_unlock_irq(&nvmeq->q_lock); 2096 } 2097 } 2098 spin_unlock(&dev_list_lock); 2099 schedule_timeout(round_jiffies_relative(HZ)); 2100 } 2101 return 0; 2102} 2103 2104static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid) 2105{ 2106 struct nvme_ns *ns; 2107 struct gendisk *disk; 2108 int node = dev_to_node(&dev->pci_dev->dev); 2109 2110 ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node); 2111 if (!ns) 2112 return; 2113 2114 ns->queue = blk_mq_init_queue(&dev->tagset); 2115 if (IS_ERR(ns->queue)) 2116 goto out_free_ns; 2117 queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, ns->queue); 2118 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue); 2119 queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS, ns->queue); 2120 ns->dev = dev; 2121 ns->queue->queuedata = ns; 2122 2123 disk = alloc_disk_node(0, node); 2124 if (!disk) 2125 goto out_free_queue; 2126 2127 ns->ns_id = nsid; 2128 ns->disk = disk; 2129 ns->lba_shift = 9; /* set to a default value for 512 until disk is validated */ 2130 list_add_tail(&ns->list, &dev->namespaces); 2131 2132 blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift); 2133 if (dev->max_hw_sectors) 2134 blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors); 2135 if (dev->stripe_size) 2136 blk_queue_chunk_sectors(ns->queue, dev->stripe_size >> 9); 2137 if (dev->vwc & NVME_CTRL_VWC_PRESENT) 2138 blk_queue_flush(ns->queue, REQ_FLUSH | REQ_FUA); 2139 2140 disk->major = nvme_major; 2141 disk->first_minor = 0; 2142 disk->fops = &nvme_fops; 2143 disk->private_data = ns; 2144 disk->queue = ns->queue; 2145 disk->driverfs_dev = dev->device; 2146 disk->flags = GENHD_FL_EXT_DEVT; 2147 sprintf(disk->disk_name, "nvme%dn%d", dev->instance, nsid); 2148 2149 /* 2150 * Initialize capacity to 0 until we establish the namespace format and 2151 * setup integrity extentions if necessary. The revalidate_disk after 2152 * add_disk allows the driver to register with integrity if the format 2153 * requires it. 2154 */ 2155 set_capacity(disk, 0); 2156 nvme_revalidate_disk(ns->disk); 2157 add_disk(ns->disk); 2158 if (ns->ms) 2159 revalidate_disk(ns->disk); 2160 return; 2161 out_free_queue: 2162 blk_cleanup_queue(ns->queue); 2163 out_free_ns: 2164 kfree(ns); 2165} 2166 2167static void nvme_create_io_queues(struct nvme_dev *dev) 2168{ 2169 unsigned i; 2170 2171 for (i = dev->queue_count; i <= dev->max_qid; i++) 2172 if (!nvme_alloc_queue(dev, i, dev->q_depth)) 2173 break; 2174 2175 for (i = dev->online_queues; i <= dev->queue_count - 1; i++) 2176 if (nvme_create_queue(dev->queues[i], i)) 2177 break; 2178} 2179 2180static int set_queue_count(struct nvme_dev *dev, int count) 2181{ 2182 int status; 2183 u32 result; 2184 u32 q_count = (count - 1) | ((count - 1) << 16); 2185 2186 status = nvme_set_features(dev, NVME_FEAT_NUM_QUEUES, q_count, 0, 2187 &result); 2188 if (status < 0) 2189 return status; 2190 if (status > 0) { 2191 dev_err(&dev->pci_dev->dev, "Could not set queue count (%d)\n", 2192 status); 2193 return 0; 2194 } 2195 return min(result & 0xffff, result >> 16) + 1; 2196} 2197 2198static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues) 2199{ 2200 return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride); 2201} 2202 2203static int nvme_setup_io_queues(struct nvme_dev *dev) 2204{ 2205 struct nvme_queue *adminq = dev->queues[0]; 2206 struct pci_dev *pdev = dev->pci_dev; 2207 int result, i, vecs, nr_io_queues, size; 2208 2209 nr_io_queues = num_possible_cpus(); 2210 result = set_queue_count(dev, nr_io_queues); 2211 if (result <= 0) 2212 return result; 2213 if (result < nr_io_queues) 2214 nr_io_queues = result; 2215 2216 size = db_bar_size(dev, nr_io_queues); 2217 if (size > 8192) { 2218 iounmap(dev->bar); 2219 do { 2220 dev->bar = ioremap(pci_resource_start(pdev, 0), size); 2221 if (dev->bar) 2222 break; 2223 if (!--nr_io_queues) 2224 return -ENOMEM; 2225 size = db_bar_size(dev, nr_io_queues); 2226 } while (1); 2227 dev->dbs = ((void __iomem *)dev->bar) + 4096; 2228 adminq->q_db = dev->dbs; 2229 } 2230 2231 /* Deregister the admin queue's interrupt */ 2232 free_irq(dev->entry[0].vector, adminq); 2233 2234 /* 2235 * If we enable msix early due to not intx, disable it again before 2236 * setting up the full range we need. 2237 */ 2238 if (!pdev->irq) 2239 pci_disable_msix(pdev); 2240 2241 for (i = 0; i < nr_io_queues; i++) 2242 dev->entry[i].entry = i; 2243 vecs = pci_enable_msix_range(pdev, dev->entry, 1, nr_io_queues); 2244 if (vecs < 0) { 2245 vecs = pci_enable_msi_range(pdev, 1, min(nr_io_queues, 32)); 2246 if (vecs < 0) { 2247 vecs = 1; 2248 } else { 2249 for (i = 0; i < vecs; i++) 2250 dev->entry[i].vector = i + pdev->irq; 2251 } 2252 } 2253 2254 /* 2255 * Should investigate if there's a performance win from allocating 2256 * more queues than interrupt vectors; it might allow the submission 2257 * path to scale better, even if the receive path is limited by the 2258 * number of interrupts. 2259 */ 2260 nr_io_queues = vecs; 2261 dev->max_qid = nr_io_queues; 2262 2263 result = queue_request_irq(dev, adminq, adminq->irqname); 2264 if (result) 2265 goto free_queues; 2266 2267 /* Free previously allocated queues that are no longer usable */ 2268 nvme_free_queues(dev, nr_io_queues + 1); 2269 nvme_create_io_queues(dev); 2270 2271 return 0; 2272 2273 free_queues: 2274 nvme_free_queues(dev, 1); 2275 return result; 2276} 2277 2278/* 2279 * Return: error value if an error occurred setting up the queues or calling 2280 * Identify Device. 0 if these succeeded, even if adding some of the 2281 * namespaces failed. At the moment, these failures are silent. TBD which 2282 * failures should be reported. 2283 */ 2284static int nvme_dev_add(struct nvme_dev *dev) 2285{ 2286 struct pci_dev *pdev = dev->pci_dev; 2287 int res; 2288 unsigned nn, i; 2289 struct nvme_id_ctrl *ctrl; 2290 void *mem; 2291 dma_addr_t dma_addr; 2292 int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12; 2293 2294 mem = dma_alloc_coherent(&pdev->dev, 4096, &dma_addr, GFP_KERNEL); 2295 if (!mem) 2296 return -ENOMEM; 2297 2298 res = nvme_identify(dev, 0, 1, dma_addr); 2299 if (res) { 2300 dev_err(&pdev->dev, "Identify Controller failed (%d)\n", res); 2301 dma_free_coherent(&dev->pci_dev->dev, 4096, mem, dma_addr); 2302 return -EIO; 2303 } 2304 2305 ctrl = mem; 2306 nn = le32_to_cpup(&ctrl->nn); 2307 dev->oncs = le16_to_cpup(&ctrl->oncs); 2308 dev->abort_limit = ctrl->acl + 1; 2309 dev->vwc = ctrl->vwc; 2310 memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn)); 2311 memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn)); 2312 memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr)); 2313 if (ctrl->mdts) 2314 dev->max_hw_sectors = 1 << (ctrl->mdts + shift - 9); 2315 if ((pdev->vendor == PCI_VENDOR_ID_INTEL) && 2316 (pdev->device == 0x0953) && ctrl->vs[3]) { 2317 unsigned int max_hw_sectors; 2318 2319 dev->stripe_size = 1 << (ctrl->vs[3] + shift); 2320 max_hw_sectors = dev->stripe_size >> (shift - 9); 2321 if (dev->max_hw_sectors) { 2322 dev->max_hw_sectors = min(max_hw_sectors, 2323 dev->max_hw_sectors); 2324 } else 2325 dev->max_hw_sectors = max_hw_sectors; 2326 } 2327 dma_free_coherent(&dev->pci_dev->dev, 4096, mem, dma_addr); 2328 2329 dev->tagset.ops = &nvme_mq_ops; 2330 dev->tagset.nr_hw_queues = dev->online_queues - 1; 2331 dev->tagset.timeout = NVME_IO_TIMEOUT; 2332 dev->tagset.numa_node = dev_to_node(&dev->pci_dev->dev); 2333 dev->tagset.queue_depth = 2334 min_t(int, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1; 2335 dev->tagset.cmd_size = nvme_cmd_size(dev); 2336 dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE; 2337 dev->tagset.driver_data = dev; 2338 2339 if (blk_mq_alloc_tag_set(&dev->tagset)) 2340 return 0; 2341 2342 for (i = 1; i <= nn; i++) 2343 nvme_alloc_ns(dev, i); 2344 2345 return 0; 2346} 2347 2348static int nvme_dev_map(struct nvme_dev *dev) 2349{ 2350 u64 cap; 2351 int bars, result = -ENOMEM; 2352 struct pci_dev *pdev = dev->pci_dev; 2353 2354 if (pci_enable_device_mem(pdev)) 2355 return result; 2356 2357 dev->entry[0].vector = pdev->irq; 2358 pci_set_master(pdev); 2359 bars = pci_select_bars(pdev, IORESOURCE_MEM); 2360 if (!bars) 2361 goto disable_pci; 2362 2363 if (pci_request_selected_regions(pdev, bars, "nvme")) 2364 goto disable_pci; 2365 2366 if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)) && 2367 dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32))) 2368 goto disable; 2369 2370 dev->bar = ioremap(pci_resource_start(pdev, 0), 8192); 2371 if (!dev->bar) 2372 goto disable; 2373 2374 if (readl(&dev->bar->csts) == -1) { 2375 result = -ENODEV; 2376 goto unmap; 2377 } 2378 2379 /* 2380 * Some devices don't advertse INTx interrupts, pre-enable a single 2381 * MSIX vec for setup. We'll adjust this later. 2382 */ 2383 if (!pdev->irq) { 2384 result = pci_enable_msix(pdev, dev->entry, 1); 2385 if (result < 0) 2386 goto unmap; 2387 } 2388 2389 cap = readq(&dev->bar->cap); 2390 dev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH); 2391 dev->db_stride = 1 << NVME_CAP_STRIDE(cap); 2392 dev->dbs = ((void __iomem *)dev->bar) + 4096; 2393 2394 return 0; 2395 2396 unmap: 2397 iounmap(dev->bar); 2398 dev->bar = NULL; 2399 disable: 2400 pci_release_regions(pdev); 2401 disable_pci: 2402 pci_disable_device(pdev); 2403 return result; 2404} 2405 2406static void nvme_dev_unmap(struct nvme_dev *dev) 2407{ 2408 if (dev->pci_dev->msi_enabled) 2409 pci_disable_msi(dev->pci_dev); 2410 else if (dev->pci_dev->msix_enabled) 2411 pci_disable_msix(dev->pci_dev); 2412 2413 if (dev->bar) { 2414 iounmap(dev->bar); 2415 dev->bar = NULL; 2416 pci_release_regions(dev->pci_dev); 2417 } 2418 2419 if (pci_is_enabled(dev->pci_dev)) 2420 pci_disable_device(dev->pci_dev); 2421} 2422 2423struct nvme_delq_ctx { 2424 struct task_struct *waiter; 2425 struct kthread_worker *worker; 2426 atomic_t refcount; 2427}; 2428 2429static void nvme_wait_dq(struct nvme_delq_ctx *dq, struct nvme_dev *dev) 2430{ 2431 dq->waiter = current; 2432 mb(); 2433 2434 for (;;) { 2435 set_current_state(TASK_KILLABLE); 2436 if (!atomic_read(&dq->refcount)) 2437 break; 2438 if (!schedule_timeout(ADMIN_TIMEOUT) || 2439 fatal_signal_pending(current)) { 2440 /* 2441 * Disable the controller first since we can't trust it 2442 * at this point, but leave the admin queue enabled 2443 * until all queue deletion requests are flushed. 2444 * FIXME: This may take a while if there are more h/w 2445 * queues than admin tags. 2446 */ 2447 set_current_state(TASK_RUNNING); 2448 nvme_disable_ctrl(dev, readq(&dev->bar->cap)); 2449 nvme_clear_queue(dev->queues[0]); 2450 flush_kthread_worker(dq->worker); 2451 nvme_disable_queue(dev, 0); 2452 return; 2453 } 2454 } 2455 set_current_state(TASK_RUNNING); 2456} 2457 2458static void nvme_put_dq(struct nvme_delq_ctx *dq) 2459{ 2460 atomic_dec(&dq->refcount); 2461 if (dq->waiter) 2462 wake_up_process(dq->waiter); 2463} 2464 2465static struct nvme_delq_ctx *nvme_get_dq(struct nvme_delq_ctx *dq) 2466{ 2467 atomic_inc(&dq->refcount); 2468 return dq; 2469} 2470 2471static void nvme_del_queue_end(struct nvme_queue *nvmeq) 2472{ 2473 struct nvme_delq_ctx *dq = nvmeq->cmdinfo.ctx; 2474 nvme_put_dq(dq); 2475} 2476 2477static int adapter_async_del_queue(struct nvme_queue *nvmeq, u8 opcode, 2478 kthread_work_func_t fn) 2479{ 2480 struct nvme_command c; 2481 2482 memset(&c, 0, sizeof(c)); 2483 c.delete_queue.opcode = opcode; 2484 c.delete_queue.qid = cpu_to_le16(nvmeq->qid); 2485 2486 init_kthread_work(&nvmeq->cmdinfo.work, fn); 2487 return nvme_submit_admin_async_cmd(nvmeq->dev, &c, &nvmeq->cmdinfo, 2488 ADMIN_TIMEOUT); 2489} 2490 2491static void nvme_del_cq_work_handler(struct kthread_work *work) 2492{ 2493 struct nvme_queue *nvmeq = container_of(work, struct nvme_queue, 2494 cmdinfo.work); 2495 nvme_del_queue_end(nvmeq); 2496} 2497 2498static int nvme_delete_cq(struct nvme_queue *nvmeq) 2499{ 2500 return adapter_async_del_queue(nvmeq, nvme_admin_delete_cq, 2501 nvme_del_cq_work_handler); 2502} 2503 2504static void nvme_del_sq_work_handler(struct kthread_work *work) 2505{ 2506 struct nvme_queue *nvmeq = container_of(work, struct nvme_queue, 2507 cmdinfo.work); 2508 int status = nvmeq->cmdinfo.status; 2509 2510 if (!status) 2511 status = nvme_delete_cq(nvmeq); 2512 if (status) 2513 nvme_del_queue_end(nvmeq); 2514} 2515 2516static int nvme_delete_sq(struct nvme_queue *nvmeq) 2517{ 2518 return adapter_async_del_queue(nvmeq, nvme_admin_delete_sq, 2519 nvme_del_sq_work_handler); 2520} 2521 2522static void nvme_del_queue_start(struct kthread_work *work) 2523{ 2524 struct nvme_queue *nvmeq = container_of(work, struct nvme_queue, 2525 cmdinfo.work); 2526 if (nvme_delete_sq(nvmeq)) 2527 nvme_del_queue_end(nvmeq); 2528} 2529 2530static void nvme_disable_io_queues(struct nvme_dev *dev) 2531{ 2532 int i; 2533 DEFINE_KTHREAD_WORKER_ONSTACK(worker); 2534 struct nvme_delq_ctx dq; 2535 struct task_struct *kworker_task = kthread_run(kthread_worker_fn, 2536 &worker, "nvme%d", dev->instance); 2537 2538 if (IS_ERR(kworker_task)) { 2539 dev_err(&dev->pci_dev->dev, 2540 "Failed to create queue del task\n"); 2541 for (i = dev->queue_count - 1; i > 0; i--) 2542 nvme_disable_queue(dev, i); 2543 return; 2544 } 2545 2546 dq.waiter = NULL; 2547 atomic_set(&dq.refcount, 0); 2548 dq.worker = &worker; 2549 for (i = dev->queue_count - 1; i > 0; i--) { 2550 struct nvme_queue *nvmeq = dev->queues[i]; 2551 2552 if (nvme_suspend_queue(nvmeq)) 2553 continue; 2554 nvmeq->cmdinfo.ctx = nvme_get_dq(&dq); 2555 nvmeq->cmdinfo.worker = dq.worker; 2556 init_kthread_work(&nvmeq->cmdinfo.work, nvme_del_queue_start); 2557 queue_kthread_work(dq.worker, &nvmeq->cmdinfo.work); 2558 } 2559 nvme_wait_dq(&dq, dev); 2560 kthread_stop(kworker_task); 2561} 2562 2563/* 2564* Remove the node from the device list and check 2565* for whether or not we need to stop the nvme_thread. 2566*/ 2567static void nvme_dev_list_remove(struct nvme_dev *dev) 2568{ 2569 struct task_struct *tmp = NULL; 2570 2571 spin_lock(&dev_list_lock); 2572 list_del_init(&dev->node); 2573 if (list_empty(&dev_list) && !IS_ERR_OR_NULL(nvme_thread)) { 2574 tmp = nvme_thread; 2575 nvme_thread = NULL; 2576 } 2577 spin_unlock(&dev_list_lock); 2578 2579 if (tmp) 2580 kthread_stop(tmp); 2581} 2582 2583static void nvme_freeze_queues(struct nvme_dev *dev) 2584{ 2585 struct nvme_ns *ns; 2586 2587 list_for_each_entry(ns, &dev->namespaces, list) { 2588 blk_mq_freeze_queue_start(ns->queue); 2589 2590 spin_lock(ns->queue->queue_lock); 2591 queue_flag_set(QUEUE_FLAG_STOPPED, ns->queue); 2592 spin_unlock(ns->queue->queue_lock); 2593 2594 blk_mq_cancel_requeue_work(ns->queue); 2595 blk_mq_stop_hw_queues(ns->queue); 2596 } 2597} 2598 2599static void nvme_unfreeze_queues(struct nvme_dev *dev) 2600{ 2601 struct nvme_ns *ns; 2602 2603 list_for_each_entry(ns, &dev->namespaces, list) { 2604 queue_flag_clear_unlocked(QUEUE_FLAG_STOPPED, ns->queue); 2605 blk_mq_unfreeze_queue(ns->queue); 2606 blk_mq_start_stopped_hw_queues(ns->queue, true); 2607 blk_mq_kick_requeue_list(ns->queue); 2608 } 2609} 2610 2611static void nvme_dev_shutdown(struct nvme_dev *dev) 2612{ 2613 int i; 2614 u32 csts = -1; 2615 2616 nvme_dev_list_remove(dev); 2617 2618 if (dev->bar) { 2619 nvme_freeze_queues(dev); 2620 csts = readl(&dev->bar->csts); 2621 } 2622 if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) { 2623 for (i = dev->queue_count - 1; i >= 0; i--) { 2624 struct nvme_queue *nvmeq = dev->queues[i]; 2625 nvme_suspend_queue(nvmeq); 2626 } 2627 } else { 2628 nvme_disable_io_queues(dev); 2629 nvme_shutdown_ctrl(dev); 2630 nvme_disable_queue(dev, 0); 2631 } 2632 nvme_dev_unmap(dev); 2633 2634 for (i = dev->queue_count - 1; i >= 0; i--) 2635 nvme_clear_queue(dev->queues[i]); 2636} 2637 2638static void nvme_dev_remove(struct nvme_dev *dev) 2639{ 2640 struct nvme_ns *ns; 2641 2642 list_for_each_entry(ns, &dev->namespaces, list) { 2643 if (ns->disk->flags & GENHD_FL_UP) { 2644 if (blk_get_integrity(ns->disk)) 2645 blk_integrity_unregister(ns->disk); 2646 del_gendisk(ns->disk); 2647 } 2648 if (!blk_queue_dying(ns->queue)) { 2649 blk_mq_abort_requeue_list(ns->queue); 2650 blk_cleanup_queue(ns->queue); 2651 } 2652 } 2653} 2654 2655static int nvme_setup_prp_pools(struct nvme_dev *dev) 2656{ 2657 struct device *dmadev = &dev->pci_dev->dev; 2658 dev->prp_page_pool = dma_pool_create("prp list page", dmadev, 2659 PAGE_SIZE, PAGE_SIZE, 0); 2660 if (!dev->prp_page_pool) 2661 return -ENOMEM; 2662 2663 /* Optimisation for I/Os between 4k and 128k */ 2664 dev->prp_small_pool = dma_pool_create("prp list 256", dmadev, 2665 256, 256, 0); 2666 if (!dev->prp_small_pool) { 2667 dma_pool_destroy(dev->prp_page_pool); 2668 return -ENOMEM; 2669 } 2670 return 0; 2671} 2672 2673static void nvme_release_prp_pools(struct nvme_dev *dev) 2674{ 2675 dma_pool_destroy(dev->prp_page_pool); 2676 dma_pool_destroy(dev->prp_small_pool); 2677} 2678 2679static DEFINE_IDA(nvme_instance_ida); 2680 2681static int nvme_set_instance(struct nvme_dev *dev) 2682{ 2683 int instance, error; 2684 2685 do { 2686 if (!ida_pre_get(&nvme_instance_ida, GFP_KERNEL)) 2687 return -ENODEV; 2688 2689 spin_lock(&dev_list_lock); 2690 error = ida_get_new(&nvme_instance_ida, &instance); 2691 spin_unlock(&dev_list_lock); 2692 } while (error == -EAGAIN); 2693 2694 if (error) 2695 return -ENODEV; 2696 2697 dev->instance = instance; 2698 return 0; 2699} 2700 2701static void nvme_release_instance(struct nvme_dev *dev) 2702{ 2703 spin_lock(&dev_list_lock); 2704 ida_remove(&nvme_instance_ida, dev->instance); 2705 spin_unlock(&dev_list_lock); 2706} 2707 2708static void nvme_free_namespaces(struct nvme_dev *dev) 2709{ 2710 struct nvme_ns *ns, *next; 2711 2712 list_for_each_entry_safe(ns, next, &dev->namespaces, list) { 2713 list_del(&ns->list); 2714 2715 spin_lock(&dev_list_lock); 2716 ns->disk->private_data = NULL; 2717 spin_unlock(&dev_list_lock); 2718 2719 put_disk(ns->disk); 2720 kfree(ns); 2721 } 2722} 2723 2724static void nvme_free_dev(struct kref *kref) 2725{ 2726 struct nvme_dev *dev = container_of(kref, struct nvme_dev, kref); 2727 2728 pci_dev_put(dev->pci_dev); 2729 put_device(dev->device); 2730 nvme_free_namespaces(dev); 2731 nvme_release_instance(dev); 2732 blk_mq_free_tag_set(&dev->tagset); 2733 blk_put_queue(dev->admin_q); 2734 kfree(dev->queues); 2735 kfree(dev->entry); 2736 kfree(dev); 2737} 2738 2739static int nvme_dev_open(struct inode *inode, struct file *f) 2740{ 2741 struct nvme_dev *dev; 2742 int instance = iminor(inode); 2743 int ret = -ENODEV; 2744 2745 spin_lock(&dev_list_lock); 2746 list_for_each_entry(dev, &dev_list, node) { 2747 if (dev->instance == instance) { 2748 if (!dev->admin_q) { 2749 ret = -EWOULDBLOCK; 2750 break; 2751 } 2752 if (!kref_get_unless_zero(&dev->kref)) 2753 break; 2754 f->private_data = dev; 2755 ret = 0; 2756 break; 2757 } 2758 } 2759 spin_unlock(&dev_list_lock); 2760 2761 return ret; 2762} 2763 2764static int nvme_dev_release(struct inode *inode, struct file *f) 2765{ 2766 struct nvme_dev *dev = f->private_data; 2767 kref_put(&dev->kref, nvme_free_dev); 2768 return 0; 2769} 2770 2771static long nvme_dev_ioctl(struct file *f, unsigned int cmd, unsigned long arg) 2772{ 2773 struct nvme_dev *dev = f->private_data; 2774 struct nvme_ns *ns; 2775 2776 switch (cmd) { 2777 case NVME_IOCTL_ADMIN_CMD: 2778 return nvme_user_cmd(dev, NULL, (void __user *)arg); 2779 case NVME_IOCTL_IO_CMD: 2780 if (list_empty(&dev->namespaces)) 2781 return -ENOTTY; 2782 ns = list_first_entry(&dev->namespaces, struct nvme_ns, list); 2783 return nvme_user_cmd(dev, ns, (void __user *)arg); 2784 default: 2785 return -ENOTTY; 2786 } 2787} 2788 2789static const struct file_operations nvme_dev_fops = { 2790 .owner = THIS_MODULE, 2791 .open = nvme_dev_open, 2792 .release = nvme_dev_release, 2793 .unlocked_ioctl = nvme_dev_ioctl, 2794 .compat_ioctl = nvme_dev_ioctl, 2795}; 2796 2797static void nvme_set_irq_hints(struct nvme_dev *dev) 2798{ 2799 struct nvme_queue *nvmeq; 2800 int i; 2801 2802 for (i = 0; i < dev->online_queues; i++) { 2803 nvmeq = dev->queues[i]; 2804 2805 if (!nvmeq->hctx) 2806 continue; 2807 2808 irq_set_affinity_hint(dev->entry[nvmeq->cq_vector].vector, 2809 nvmeq->hctx->cpumask); 2810 } 2811} 2812 2813static int nvme_dev_start(struct nvme_dev *dev) 2814{ 2815 int result; 2816 bool start_thread = false; 2817 2818 result = nvme_dev_map(dev); 2819 if (result) 2820 return result; 2821 2822 result = nvme_configure_admin_queue(dev); 2823 if (result) 2824 goto unmap; 2825 2826 spin_lock(&dev_list_lock); 2827 if (list_empty(&dev_list) && IS_ERR_OR_NULL(nvme_thread)) { 2828 start_thread = true; 2829 nvme_thread = NULL; 2830 } 2831 list_add(&dev->node, &dev_list); 2832 spin_unlock(&dev_list_lock); 2833 2834 if (start_thread) { 2835 nvme_thread = kthread_run(nvme_kthread, NULL, "nvme"); 2836 wake_up_all(&nvme_kthread_wait); 2837 } else 2838 wait_event_killable(nvme_kthread_wait, nvme_thread); 2839 2840 if (IS_ERR_OR_NULL(nvme_thread)) { 2841 result = nvme_thread ? PTR_ERR(nvme_thread) : -EINTR; 2842 goto disable; 2843 } 2844 2845 nvme_init_queue(dev->queues[0], 0); 2846 result = nvme_alloc_admin_tags(dev); 2847 if (result) 2848 goto disable; 2849 2850 result = nvme_setup_io_queues(dev); 2851 if (result) 2852 goto free_tags; 2853 2854 nvme_set_irq_hints(dev); 2855 2856 dev->event_limit = 1; 2857 return result; 2858 2859 free_tags: 2860 nvme_dev_remove_admin(dev); 2861 disable: 2862 nvme_disable_queue(dev, 0); 2863 nvme_dev_list_remove(dev); 2864 unmap: 2865 nvme_dev_unmap(dev); 2866 return result; 2867} 2868 2869static int nvme_remove_dead_ctrl(void *arg) 2870{ 2871 struct nvme_dev *dev = (struct nvme_dev *)arg; 2872 struct pci_dev *pdev = dev->pci_dev; 2873 2874 if (pci_get_drvdata(pdev)) 2875 pci_stop_and_remove_bus_device_locked(pdev); 2876 kref_put(&dev->kref, nvme_free_dev); 2877 return 0; 2878} 2879 2880static void nvme_remove_disks(struct work_struct *ws) 2881{ 2882 struct nvme_dev *dev = container_of(ws, struct nvme_dev, reset_work); 2883 2884 nvme_free_queues(dev, 1); 2885 nvme_dev_remove(dev); 2886} 2887 2888static int nvme_dev_resume(struct nvme_dev *dev) 2889{ 2890 int ret; 2891 2892 ret = nvme_dev_start(dev); 2893 if (ret) 2894 return ret; 2895 if (dev->online_queues < 2) { 2896 spin_lock(&dev_list_lock); 2897 dev->reset_workfn = nvme_remove_disks; 2898 queue_work(nvme_workq, &dev->reset_work); 2899 spin_unlock(&dev_list_lock); 2900 } else { 2901 nvme_unfreeze_queues(dev); 2902 nvme_set_irq_hints(dev); 2903 } 2904 return 0; 2905} 2906 2907static void nvme_dev_reset(struct nvme_dev *dev) 2908{ 2909 nvme_dev_shutdown(dev); 2910 if (nvme_dev_resume(dev)) { 2911 dev_warn(&dev->pci_dev->dev, "Device failed to resume\n"); 2912 kref_get(&dev->kref); 2913 if (IS_ERR(kthread_run(nvme_remove_dead_ctrl, dev, "nvme%d", 2914 dev->instance))) { 2915 dev_err(&dev->pci_dev->dev, 2916 "Failed to start controller remove task\n"); 2917 kref_put(&dev->kref, nvme_free_dev); 2918 } 2919 } 2920} 2921 2922static void nvme_reset_failed_dev(struct work_struct *ws) 2923{ 2924 struct nvme_dev *dev = container_of(ws, struct nvme_dev, reset_work); 2925 nvme_dev_reset(dev); 2926} 2927 2928static void nvme_reset_workfn(struct work_struct *work) 2929{ 2930 struct nvme_dev *dev = container_of(work, struct nvme_dev, reset_work); 2931 dev->reset_workfn(work); 2932} 2933 2934static void nvme_async_probe(struct work_struct *work); 2935static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) 2936{ 2937 int node, result = -ENOMEM; 2938 struct nvme_dev *dev; 2939 2940 node = dev_to_node(&pdev->dev); 2941 if (node == NUMA_NO_NODE) 2942 set_dev_node(&pdev->dev, 0); 2943 2944 dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, node); 2945 if (!dev) 2946 return -ENOMEM; 2947 dev->entry = kzalloc_node(num_possible_cpus() * sizeof(*dev->entry), 2948 GFP_KERNEL, node); 2949 if (!dev->entry) 2950 goto free; 2951 dev->queues = kzalloc_node((num_possible_cpus() + 1) * sizeof(void *), 2952 GFP_KERNEL, node); 2953 if (!dev->queues) 2954 goto free; 2955 2956 INIT_LIST_HEAD(&dev->namespaces); 2957 dev->reset_workfn = nvme_reset_failed_dev; 2958 INIT_WORK(&dev->reset_work, nvme_reset_workfn); 2959 dev->pci_dev = pci_dev_get(pdev); 2960 pci_set_drvdata(pdev, dev); 2961 result = nvme_set_instance(dev); 2962 if (result) 2963 goto put_pci; 2964 2965 result = nvme_setup_prp_pools(dev); 2966 if (result) 2967 goto release; 2968 2969 kref_init(&dev->kref); 2970 dev->device = device_create(nvme_class, &pdev->dev, 2971 MKDEV(nvme_char_major, dev->instance), 2972 dev, "nvme%d", dev->instance); 2973 if (IS_ERR(dev->device)) { 2974 result = PTR_ERR(dev->device); 2975 goto release_pools; 2976 } 2977 get_device(dev->device); 2978 2979 INIT_LIST_HEAD(&dev->node); 2980 INIT_WORK(&dev->probe_work, nvme_async_probe); 2981 schedule_work(&dev->probe_work); 2982 return 0; 2983 2984 release_pools: 2985 nvme_release_prp_pools(dev); 2986 release: 2987 nvme_release_instance(dev); 2988 put_pci: 2989 pci_dev_put(dev->pci_dev); 2990 free: 2991 kfree(dev->queues); 2992 kfree(dev->entry); 2993 kfree(dev); 2994 return result; 2995} 2996 2997static void nvme_async_probe(struct work_struct *work) 2998{ 2999 struct nvme_dev *dev = container_of(work, struct nvme_dev, probe_work); 3000 int result; 3001 3002 result = nvme_dev_start(dev); 3003 if (result) 3004 goto reset; 3005 3006 if (dev->online_queues > 1) 3007 result = nvme_dev_add(dev); 3008 if (result) 3009 goto reset; 3010 3011 nvme_set_irq_hints(dev); 3012 return; 3013 reset: 3014 if (!work_busy(&dev->reset_work)) { 3015 dev->reset_workfn = nvme_reset_failed_dev; 3016 queue_work(nvme_workq, &dev->reset_work); 3017 } 3018} 3019 3020static void nvme_reset_notify(struct pci_dev *pdev, bool prepare) 3021{ 3022 struct nvme_dev *dev = pci_get_drvdata(pdev); 3023 3024 if (prepare) 3025 nvme_dev_shutdown(dev); 3026 else 3027 nvme_dev_resume(dev); 3028} 3029 3030static void nvme_shutdown(struct pci_dev *pdev) 3031{ 3032 struct nvme_dev *dev = pci_get_drvdata(pdev); 3033 nvme_dev_shutdown(dev); 3034} 3035 3036static void nvme_remove(struct pci_dev *pdev) 3037{ 3038 struct nvme_dev *dev = pci_get_drvdata(pdev); 3039 3040 spin_lock(&dev_list_lock); 3041 list_del_init(&dev->node); 3042 spin_unlock(&dev_list_lock); 3043 3044 pci_set_drvdata(pdev, NULL); 3045 flush_work(&dev->probe_work); 3046 flush_work(&dev->reset_work); 3047 nvme_dev_shutdown(dev); 3048 nvme_dev_remove(dev); 3049 nvme_dev_remove_admin(dev); 3050 device_destroy(nvme_class, MKDEV(nvme_char_major, dev->instance)); 3051 nvme_free_queues(dev, 0); 3052 nvme_release_prp_pools(dev); 3053 kref_put(&dev->kref, nvme_free_dev); 3054} 3055 3056/* These functions are yet to be implemented */ 3057#define nvme_error_detected NULL 3058#define nvme_dump_registers NULL 3059#define nvme_link_reset NULL 3060#define nvme_slot_reset NULL 3061#define nvme_error_resume NULL 3062 3063#ifdef CONFIG_PM_SLEEP 3064static int nvme_suspend(struct device *dev) 3065{ 3066 struct pci_dev *pdev = to_pci_dev(dev); 3067 struct nvme_dev *ndev = pci_get_drvdata(pdev); 3068 3069 nvme_dev_shutdown(ndev); 3070 return 0; 3071} 3072 3073static int nvme_resume(struct device *dev) 3074{ 3075 struct pci_dev *pdev = to_pci_dev(dev); 3076 struct nvme_dev *ndev = pci_get_drvdata(pdev); 3077 3078 if (nvme_dev_resume(ndev) && !work_busy(&ndev->reset_work)) { 3079 ndev->reset_workfn = nvme_reset_failed_dev; 3080 queue_work(nvme_workq, &ndev->reset_work); 3081 } 3082 return 0; 3083} 3084#endif 3085 3086static SIMPLE_DEV_PM_OPS(nvme_dev_pm_ops, nvme_suspend, nvme_resume); 3087 3088static const struct pci_error_handlers nvme_err_handler = { 3089 .error_detected = nvme_error_detected, 3090 .mmio_enabled = nvme_dump_registers, 3091 .link_reset = nvme_link_reset, 3092 .slot_reset = nvme_slot_reset, 3093 .resume = nvme_error_resume, 3094 .reset_notify = nvme_reset_notify, 3095}; 3096 3097/* Move to pci_ids.h later */ 3098#define PCI_CLASS_STORAGE_EXPRESS 0x010802 3099 3100static const struct pci_device_id nvme_id_table[] = { 3101 { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, 3102 { 0, } 3103}; 3104MODULE_DEVICE_TABLE(pci, nvme_id_table); 3105 3106static struct pci_driver nvme_driver = { 3107 .name = "nvme", 3108 .id_table = nvme_id_table, 3109 .probe = nvme_probe, 3110 .remove = nvme_remove, 3111 .shutdown = nvme_shutdown, 3112 .driver = { 3113 .pm = &nvme_dev_pm_ops, 3114 }, 3115 .err_handler = &nvme_err_handler, 3116}; 3117 3118static int __init nvme_init(void) 3119{ 3120 int result; 3121 3122 init_waitqueue_head(&nvme_kthread_wait); 3123 3124 nvme_workq = create_singlethread_workqueue("nvme"); 3125 if (!nvme_workq) 3126 return -ENOMEM; 3127 3128 result = register_blkdev(nvme_major, "nvme"); 3129 if (result < 0) 3130 goto kill_workq; 3131 else if (result > 0) 3132 nvme_major = result; 3133 3134 result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme", 3135 &nvme_dev_fops); 3136 if (result < 0) 3137 goto unregister_blkdev; 3138 else if (result > 0) 3139 nvme_char_major = result; 3140 3141 nvme_class = class_create(THIS_MODULE, "nvme"); 3142 if (IS_ERR(nvme_class)) { 3143 result = PTR_ERR(nvme_class); 3144 goto unregister_chrdev; 3145 } 3146 3147 result = pci_register_driver(&nvme_driver); 3148 if (result) 3149 goto destroy_class; 3150 return 0; 3151 3152 destroy_class: 3153 class_destroy(nvme_class); 3154 unregister_chrdev: 3155 __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme"); 3156 unregister_blkdev: 3157 unregister_blkdev(nvme_major, "nvme"); 3158 kill_workq: 3159 destroy_workqueue(nvme_workq); 3160 return result; 3161} 3162 3163static void __exit nvme_exit(void) 3164{ 3165 pci_unregister_driver(&nvme_driver); 3166 unregister_blkdev(nvme_major, "nvme"); 3167 destroy_workqueue(nvme_workq); 3168 class_destroy(nvme_class); 3169 __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme"); 3170 BUG_ON(nvme_thread && !IS_ERR(nvme_thread)); 3171 _nvme_check_size(); 3172} 3173 3174MODULE_AUTHOR("Matthew Wilcox <willy@linux.intel.com>"); 3175MODULE_LICENSE("GPL"); 3176MODULE_VERSION("1.0"); 3177module_init(nvme_init); 3178module_exit(nvme_exit);