Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Common code for the NVMe target.
4 * Copyright (c) 2015-2016 HGST, a Western Digital Company.
5 */
6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7#include <linux/module.h>
8#include <linux/random.h>
9#include <linux/rculist.h>
10#include <linux/pci-p2pdma.h>
11#include <linux/scatterlist.h>
12
13#include "nvmet.h"
14
15struct workqueue_struct *buffered_io_wq;
16static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
17static DEFINE_IDA(cntlid_ida);
18
19/*
20 * This read/write semaphore is used to synchronize access to configuration
21 * information on a target system that will result in discovery log page
22 * information change for at least one host.
23 * The full list of resources to protected by this semaphore is:
24 *
25 * - subsystems list
26 * - per-subsystem allowed hosts list
27 * - allow_any_host subsystem attribute
28 * - nvmet_genctr
29 * - the nvmet_transports array
30 *
31 * When updating any of those lists/structures write lock should be obtained,
32 * while when reading (popolating discovery log page or checking host-subsystem
33 * link) read lock is obtained to allow concurrent reads.
34 */
35DECLARE_RWSEM(nvmet_config_sem);
36
37u32 nvmet_ana_group_enabled[NVMET_MAX_ANAGRPS + 1];
38u64 nvmet_ana_chgcnt;
39DECLARE_RWSEM(nvmet_ana_sem);
40
41inline u16 errno_to_nvme_status(struct nvmet_req *req, int errno)
42{
43 u16 status;
44
45 switch (errno) {
46 case -ENOSPC:
47 req->error_loc = offsetof(struct nvme_rw_command, length);
48 status = NVME_SC_CAP_EXCEEDED | NVME_SC_DNR;
49 break;
50 case -EREMOTEIO:
51 req->error_loc = offsetof(struct nvme_rw_command, slba);
52 status = NVME_SC_LBA_RANGE | NVME_SC_DNR;
53 break;
54 case -EOPNOTSUPP:
55 req->error_loc = offsetof(struct nvme_common_command, opcode);
56 switch (req->cmd->common.opcode) {
57 case nvme_cmd_dsm:
58 case nvme_cmd_write_zeroes:
59 status = NVME_SC_ONCS_NOT_SUPPORTED | NVME_SC_DNR;
60 break;
61 default:
62 status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
63 }
64 break;
65 case -ENODATA:
66 req->error_loc = offsetof(struct nvme_rw_command, nsid);
67 status = NVME_SC_ACCESS_DENIED;
68 break;
69 case -EIO:
70 /* FALLTHRU */
71 default:
72 req->error_loc = offsetof(struct nvme_common_command, opcode);
73 status = NVME_SC_INTERNAL | NVME_SC_DNR;
74 }
75
76 return status;
77}
78
79static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
80 const char *subsysnqn);
81
82u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf,
83 size_t len)
84{
85 if (sg_pcopy_from_buffer(req->sg, req->sg_cnt, buf, len, off) != len) {
86 req->error_loc = offsetof(struct nvme_common_command, dptr);
87 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
88 }
89 return 0;
90}
91
92u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len)
93{
94 if (sg_pcopy_to_buffer(req->sg, req->sg_cnt, buf, len, off) != len) {
95 req->error_loc = offsetof(struct nvme_common_command, dptr);
96 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
97 }
98 return 0;
99}
100
101u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len)
102{
103 if (sg_zero_buffer(req->sg, req->sg_cnt, len, off) != len) {
104 req->error_loc = offsetof(struct nvme_common_command, dptr);
105 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
106 }
107 return 0;
108}
109
110static unsigned int nvmet_max_nsid(struct nvmet_subsys *subsys)
111{
112 struct nvmet_ns *ns;
113
114 if (list_empty(&subsys->namespaces))
115 return 0;
116
117 ns = list_last_entry(&subsys->namespaces, struct nvmet_ns, dev_link);
118 return ns->nsid;
119}
120
121static u32 nvmet_async_event_result(struct nvmet_async_event *aen)
122{
123 return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16);
124}
125
126static void nvmet_async_events_free(struct nvmet_ctrl *ctrl)
127{
128 struct nvmet_req *req;
129
130 while (1) {
131 mutex_lock(&ctrl->lock);
132 if (!ctrl->nr_async_event_cmds) {
133 mutex_unlock(&ctrl->lock);
134 return;
135 }
136
137 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
138 mutex_unlock(&ctrl->lock);
139 nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR);
140 }
141}
142
143static void nvmet_async_event_work(struct work_struct *work)
144{
145 struct nvmet_ctrl *ctrl =
146 container_of(work, struct nvmet_ctrl, async_event_work);
147 struct nvmet_async_event *aen;
148 struct nvmet_req *req;
149
150 while (1) {
151 mutex_lock(&ctrl->lock);
152 aen = list_first_entry_or_null(&ctrl->async_events,
153 struct nvmet_async_event, entry);
154 if (!aen || !ctrl->nr_async_event_cmds) {
155 mutex_unlock(&ctrl->lock);
156 return;
157 }
158
159 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
160 nvmet_set_result(req, nvmet_async_event_result(aen));
161
162 list_del(&aen->entry);
163 kfree(aen);
164
165 mutex_unlock(&ctrl->lock);
166 nvmet_req_complete(req, 0);
167 }
168}
169
170void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
171 u8 event_info, u8 log_page)
172{
173 struct nvmet_async_event *aen;
174
175 aen = kmalloc(sizeof(*aen), GFP_KERNEL);
176 if (!aen)
177 return;
178
179 aen->event_type = event_type;
180 aen->event_info = event_info;
181 aen->log_page = log_page;
182
183 mutex_lock(&ctrl->lock);
184 list_add_tail(&aen->entry, &ctrl->async_events);
185 mutex_unlock(&ctrl->lock);
186
187 schedule_work(&ctrl->async_event_work);
188}
189
190static void nvmet_add_to_changed_ns_log(struct nvmet_ctrl *ctrl, __le32 nsid)
191{
192 u32 i;
193
194 mutex_lock(&ctrl->lock);
195 if (ctrl->nr_changed_ns > NVME_MAX_CHANGED_NAMESPACES)
196 goto out_unlock;
197
198 for (i = 0; i < ctrl->nr_changed_ns; i++) {
199 if (ctrl->changed_ns_list[i] == nsid)
200 goto out_unlock;
201 }
202
203 if (ctrl->nr_changed_ns == NVME_MAX_CHANGED_NAMESPACES) {
204 ctrl->changed_ns_list[0] = cpu_to_le32(0xffffffff);
205 ctrl->nr_changed_ns = U32_MAX;
206 goto out_unlock;
207 }
208
209 ctrl->changed_ns_list[ctrl->nr_changed_ns++] = nsid;
210out_unlock:
211 mutex_unlock(&ctrl->lock);
212}
213
214void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid)
215{
216 struct nvmet_ctrl *ctrl;
217
218 lockdep_assert_held(&subsys->lock);
219
220 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
221 nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid));
222 if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_NS_ATTR))
223 continue;
224 nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE,
225 NVME_AER_NOTICE_NS_CHANGED,
226 NVME_LOG_CHANGED_NS);
227 }
228}
229
230void nvmet_send_ana_event(struct nvmet_subsys *subsys,
231 struct nvmet_port *port)
232{
233 struct nvmet_ctrl *ctrl;
234
235 mutex_lock(&subsys->lock);
236 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
237 if (port && ctrl->port != port)
238 continue;
239 if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_ANA_CHANGE))
240 continue;
241 nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE,
242 NVME_AER_NOTICE_ANA, NVME_LOG_ANA);
243 }
244 mutex_unlock(&subsys->lock);
245}
246
247void nvmet_port_send_ana_event(struct nvmet_port *port)
248{
249 struct nvmet_subsys_link *p;
250
251 down_read(&nvmet_config_sem);
252 list_for_each_entry(p, &port->subsystems, entry)
253 nvmet_send_ana_event(p->subsys, port);
254 up_read(&nvmet_config_sem);
255}
256
257int nvmet_register_transport(const struct nvmet_fabrics_ops *ops)
258{
259 int ret = 0;
260
261 down_write(&nvmet_config_sem);
262 if (nvmet_transports[ops->type])
263 ret = -EINVAL;
264 else
265 nvmet_transports[ops->type] = ops;
266 up_write(&nvmet_config_sem);
267
268 return ret;
269}
270EXPORT_SYMBOL_GPL(nvmet_register_transport);
271
272void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops)
273{
274 down_write(&nvmet_config_sem);
275 nvmet_transports[ops->type] = NULL;
276 up_write(&nvmet_config_sem);
277}
278EXPORT_SYMBOL_GPL(nvmet_unregister_transport);
279
280int nvmet_enable_port(struct nvmet_port *port)
281{
282 const struct nvmet_fabrics_ops *ops;
283 int ret;
284
285 lockdep_assert_held(&nvmet_config_sem);
286
287 ops = nvmet_transports[port->disc_addr.trtype];
288 if (!ops) {
289 up_write(&nvmet_config_sem);
290 request_module("nvmet-transport-%d", port->disc_addr.trtype);
291 down_write(&nvmet_config_sem);
292 ops = nvmet_transports[port->disc_addr.trtype];
293 if (!ops) {
294 pr_err("transport type %d not supported\n",
295 port->disc_addr.trtype);
296 return -EINVAL;
297 }
298 }
299
300 if (!try_module_get(ops->owner))
301 return -EINVAL;
302
303 ret = ops->add_port(port);
304 if (ret) {
305 module_put(ops->owner);
306 return ret;
307 }
308
309 /* If the transport didn't set inline_data_size, then disable it. */
310 if (port->inline_data_size < 0)
311 port->inline_data_size = 0;
312
313 port->enabled = true;
314 return 0;
315}
316
317void nvmet_disable_port(struct nvmet_port *port)
318{
319 const struct nvmet_fabrics_ops *ops;
320
321 lockdep_assert_held(&nvmet_config_sem);
322
323 port->enabled = false;
324
325 ops = nvmet_transports[port->disc_addr.trtype];
326 ops->remove_port(port);
327 module_put(ops->owner);
328}
329
330static void nvmet_keep_alive_timer(struct work_struct *work)
331{
332 struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work),
333 struct nvmet_ctrl, ka_work);
334 bool cmd_seen = ctrl->cmd_seen;
335
336 ctrl->cmd_seen = false;
337 if (cmd_seen) {
338 pr_debug("ctrl %d reschedule traffic based keep-alive timer\n",
339 ctrl->cntlid);
340 schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
341 return;
342 }
343
344 pr_err("ctrl %d keep-alive timer (%d seconds) expired!\n",
345 ctrl->cntlid, ctrl->kato);
346
347 nvmet_ctrl_fatal_error(ctrl);
348}
349
350static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl)
351{
352 pr_debug("ctrl %d start keep-alive timer for %d secs\n",
353 ctrl->cntlid, ctrl->kato);
354
355 INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer);
356 schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
357}
358
359static void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl)
360{
361 pr_debug("ctrl %d stop keep-alive\n", ctrl->cntlid);
362
363 cancel_delayed_work_sync(&ctrl->ka_work);
364}
365
366static struct nvmet_ns *__nvmet_find_namespace(struct nvmet_ctrl *ctrl,
367 __le32 nsid)
368{
369 struct nvmet_ns *ns;
370
371 list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) {
372 if (ns->nsid == le32_to_cpu(nsid))
373 return ns;
374 }
375
376 return NULL;
377}
378
379struct nvmet_ns *nvmet_find_namespace(struct nvmet_ctrl *ctrl, __le32 nsid)
380{
381 struct nvmet_ns *ns;
382
383 rcu_read_lock();
384 ns = __nvmet_find_namespace(ctrl, nsid);
385 if (ns)
386 percpu_ref_get(&ns->ref);
387 rcu_read_unlock();
388
389 return ns;
390}
391
392static void nvmet_destroy_namespace(struct percpu_ref *ref)
393{
394 struct nvmet_ns *ns = container_of(ref, struct nvmet_ns, ref);
395
396 complete(&ns->disable_done);
397}
398
399void nvmet_put_namespace(struct nvmet_ns *ns)
400{
401 percpu_ref_put(&ns->ref);
402}
403
404static void nvmet_ns_dev_disable(struct nvmet_ns *ns)
405{
406 nvmet_bdev_ns_disable(ns);
407 nvmet_file_ns_disable(ns);
408}
409
410static int nvmet_p2pmem_ns_enable(struct nvmet_ns *ns)
411{
412 int ret;
413 struct pci_dev *p2p_dev;
414
415 if (!ns->use_p2pmem)
416 return 0;
417
418 if (!ns->bdev) {
419 pr_err("peer-to-peer DMA is not supported by non-block device namespaces\n");
420 return -EINVAL;
421 }
422
423 if (!blk_queue_pci_p2pdma(ns->bdev->bd_queue)) {
424 pr_err("peer-to-peer DMA is not supported by the driver of %s\n",
425 ns->device_path);
426 return -EINVAL;
427 }
428
429 if (ns->p2p_dev) {
430 ret = pci_p2pdma_distance(ns->p2p_dev, nvmet_ns_dev(ns), true);
431 if (ret < 0)
432 return -EINVAL;
433 } else {
434 /*
435 * Right now we just check that there is p2pmem available so
436 * we can report an error to the user right away if there
437 * is not. We'll find the actual device to use once we
438 * setup the controller when the port's device is available.
439 */
440
441 p2p_dev = pci_p2pmem_find(nvmet_ns_dev(ns));
442 if (!p2p_dev) {
443 pr_err("no peer-to-peer memory is available for %s\n",
444 ns->device_path);
445 return -EINVAL;
446 }
447
448 pci_dev_put(p2p_dev);
449 }
450
451 return 0;
452}
453
454/*
455 * Note: ctrl->subsys->lock should be held when calling this function
456 */
457static void nvmet_p2pmem_ns_add_p2p(struct nvmet_ctrl *ctrl,
458 struct nvmet_ns *ns)
459{
460 struct device *clients[2];
461 struct pci_dev *p2p_dev;
462 int ret;
463
464 if (!ctrl->p2p_client || !ns->use_p2pmem)
465 return;
466
467 if (ns->p2p_dev) {
468 ret = pci_p2pdma_distance(ns->p2p_dev, ctrl->p2p_client, true);
469 if (ret < 0)
470 return;
471
472 p2p_dev = pci_dev_get(ns->p2p_dev);
473 } else {
474 clients[0] = ctrl->p2p_client;
475 clients[1] = nvmet_ns_dev(ns);
476
477 p2p_dev = pci_p2pmem_find_many(clients, ARRAY_SIZE(clients));
478 if (!p2p_dev) {
479 pr_err("no peer-to-peer memory is available that's supported by %s and %s\n",
480 dev_name(ctrl->p2p_client), ns->device_path);
481 return;
482 }
483 }
484
485 ret = radix_tree_insert(&ctrl->p2p_ns_map, ns->nsid, p2p_dev);
486 if (ret < 0)
487 pci_dev_put(p2p_dev);
488
489 pr_info("using p2pmem on %s for nsid %d\n", pci_name(p2p_dev),
490 ns->nsid);
491}
492
493int nvmet_ns_enable(struct nvmet_ns *ns)
494{
495 struct nvmet_subsys *subsys = ns->subsys;
496 struct nvmet_ctrl *ctrl;
497 int ret;
498
499 mutex_lock(&subsys->lock);
500 ret = 0;
501 if (ns->enabled)
502 goto out_unlock;
503
504 ret = -EMFILE;
505 if (subsys->nr_namespaces == NVMET_MAX_NAMESPACES)
506 goto out_unlock;
507
508 ret = nvmet_bdev_ns_enable(ns);
509 if (ret == -ENOTBLK)
510 ret = nvmet_file_ns_enable(ns);
511 if (ret)
512 goto out_unlock;
513
514 ret = nvmet_p2pmem_ns_enable(ns);
515 if (ret)
516 goto out_dev_disable;
517
518 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
519 nvmet_p2pmem_ns_add_p2p(ctrl, ns);
520
521 ret = percpu_ref_init(&ns->ref, nvmet_destroy_namespace,
522 0, GFP_KERNEL);
523 if (ret)
524 goto out_dev_put;
525
526 if (ns->nsid > subsys->max_nsid)
527 subsys->max_nsid = ns->nsid;
528
529 /*
530 * The namespaces list needs to be sorted to simplify the implementation
531 * of the Identify Namepace List subcommand.
532 */
533 if (list_empty(&subsys->namespaces)) {
534 list_add_tail_rcu(&ns->dev_link, &subsys->namespaces);
535 } else {
536 struct nvmet_ns *old;
537
538 list_for_each_entry_rcu(old, &subsys->namespaces, dev_link) {
539 BUG_ON(ns->nsid == old->nsid);
540 if (ns->nsid < old->nsid)
541 break;
542 }
543
544 list_add_tail_rcu(&ns->dev_link, &old->dev_link);
545 }
546 subsys->nr_namespaces++;
547
548 nvmet_ns_changed(subsys, ns->nsid);
549 ns->enabled = true;
550 ret = 0;
551out_unlock:
552 mutex_unlock(&subsys->lock);
553 return ret;
554out_dev_put:
555 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
556 pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid));
557out_dev_disable:
558 nvmet_ns_dev_disable(ns);
559 goto out_unlock;
560}
561
562void nvmet_ns_disable(struct nvmet_ns *ns)
563{
564 struct nvmet_subsys *subsys = ns->subsys;
565 struct nvmet_ctrl *ctrl;
566
567 mutex_lock(&subsys->lock);
568 if (!ns->enabled)
569 goto out_unlock;
570
571 ns->enabled = false;
572 list_del_rcu(&ns->dev_link);
573 if (ns->nsid == subsys->max_nsid)
574 subsys->max_nsid = nvmet_max_nsid(subsys);
575
576 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
577 pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid));
578
579 mutex_unlock(&subsys->lock);
580
581 /*
582 * Now that we removed the namespaces from the lookup list, we
583 * can kill the per_cpu ref and wait for any remaining references
584 * to be dropped, as well as a RCU grace period for anyone only
585 * using the namepace under rcu_read_lock(). Note that we can't
586 * use call_rcu here as we need to ensure the namespaces have
587 * been fully destroyed before unloading the module.
588 */
589 percpu_ref_kill(&ns->ref);
590 synchronize_rcu();
591 wait_for_completion(&ns->disable_done);
592 percpu_ref_exit(&ns->ref);
593
594 mutex_lock(&subsys->lock);
595
596 subsys->nr_namespaces--;
597 nvmet_ns_changed(subsys, ns->nsid);
598 nvmet_ns_dev_disable(ns);
599out_unlock:
600 mutex_unlock(&subsys->lock);
601}
602
603void nvmet_ns_free(struct nvmet_ns *ns)
604{
605 nvmet_ns_disable(ns);
606
607 down_write(&nvmet_ana_sem);
608 nvmet_ana_group_enabled[ns->anagrpid]--;
609 up_write(&nvmet_ana_sem);
610
611 kfree(ns->device_path);
612 kfree(ns);
613}
614
615struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
616{
617 struct nvmet_ns *ns;
618
619 ns = kzalloc(sizeof(*ns), GFP_KERNEL);
620 if (!ns)
621 return NULL;
622
623 INIT_LIST_HEAD(&ns->dev_link);
624 init_completion(&ns->disable_done);
625
626 ns->nsid = nsid;
627 ns->subsys = subsys;
628
629 down_write(&nvmet_ana_sem);
630 ns->anagrpid = NVMET_DEFAULT_ANA_GRPID;
631 nvmet_ana_group_enabled[ns->anagrpid]++;
632 up_write(&nvmet_ana_sem);
633
634 uuid_gen(&ns->uuid);
635 ns->buffered_io = false;
636
637 return ns;
638}
639
640static void nvmet_update_sq_head(struct nvmet_req *req)
641{
642 if (req->sq->size) {
643 u32 old_sqhd, new_sqhd;
644
645 do {
646 old_sqhd = req->sq->sqhd;
647 new_sqhd = (old_sqhd + 1) % req->sq->size;
648 } while (cmpxchg(&req->sq->sqhd, old_sqhd, new_sqhd) !=
649 old_sqhd);
650 }
651 req->cqe->sq_head = cpu_to_le16(req->sq->sqhd & 0x0000FFFF);
652}
653
654static void nvmet_set_error(struct nvmet_req *req, u16 status)
655{
656 struct nvmet_ctrl *ctrl = req->sq->ctrl;
657 struct nvme_error_slot *new_error_slot;
658 unsigned long flags;
659
660 req->cqe->status = cpu_to_le16(status << 1);
661
662 if (!ctrl || req->error_loc == NVMET_NO_ERROR_LOC)
663 return;
664
665 spin_lock_irqsave(&ctrl->error_lock, flags);
666 ctrl->err_counter++;
667 new_error_slot =
668 &ctrl->slots[ctrl->err_counter % NVMET_ERROR_LOG_SLOTS];
669
670 new_error_slot->error_count = cpu_to_le64(ctrl->err_counter);
671 new_error_slot->sqid = cpu_to_le16(req->sq->qid);
672 new_error_slot->cmdid = cpu_to_le16(req->cmd->common.command_id);
673 new_error_slot->status_field = cpu_to_le16(status << 1);
674 new_error_slot->param_error_location = cpu_to_le16(req->error_loc);
675 new_error_slot->lba = cpu_to_le64(req->error_slba);
676 new_error_slot->nsid = req->cmd->common.nsid;
677 spin_unlock_irqrestore(&ctrl->error_lock, flags);
678
679 /* set the more bit for this request */
680 req->cqe->status |= cpu_to_le16(1 << 14);
681}
682
683static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
684{
685 if (!req->sq->sqhd_disabled)
686 nvmet_update_sq_head(req);
687 req->cqe->sq_id = cpu_to_le16(req->sq->qid);
688 req->cqe->command_id = req->cmd->common.command_id;
689
690 if (unlikely(status))
691 nvmet_set_error(req, status);
692 if (req->ns)
693 nvmet_put_namespace(req->ns);
694 req->ops->queue_response(req);
695}
696
697void nvmet_req_complete(struct nvmet_req *req, u16 status)
698{
699 __nvmet_req_complete(req, status);
700 percpu_ref_put(&req->sq->ref);
701}
702EXPORT_SYMBOL_GPL(nvmet_req_complete);
703
704void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq,
705 u16 qid, u16 size)
706{
707 cq->qid = qid;
708 cq->size = size;
709
710 ctrl->cqs[qid] = cq;
711}
712
713void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq,
714 u16 qid, u16 size)
715{
716 sq->sqhd = 0;
717 sq->qid = qid;
718 sq->size = size;
719
720 ctrl->sqs[qid] = sq;
721}
722
723static void nvmet_confirm_sq(struct percpu_ref *ref)
724{
725 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref);
726
727 complete(&sq->confirm_done);
728}
729
730void nvmet_sq_destroy(struct nvmet_sq *sq)
731{
732 /*
733 * If this is the admin queue, complete all AERs so that our
734 * queue doesn't have outstanding requests on it.
735 */
736 if (sq->ctrl && sq->ctrl->sqs && sq->ctrl->sqs[0] == sq)
737 nvmet_async_events_free(sq->ctrl);
738 percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq);
739 wait_for_completion(&sq->confirm_done);
740 wait_for_completion(&sq->free_done);
741 percpu_ref_exit(&sq->ref);
742
743 if (sq->ctrl) {
744 nvmet_ctrl_put(sq->ctrl);
745 sq->ctrl = NULL; /* allows reusing the queue later */
746 }
747}
748EXPORT_SYMBOL_GPL(nvmet_sq_destroy);
749
750static void nvmet_sq_free(struct percpu_ref *ref)
751{
752 struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref);
753
754 complete(&sq->free_done);
755}
756
757int nvmet_sq_init(struct nvmet_sq *sq)
758{
759 int ret;
760
761 ret = percpu_ref_init(&sq->ref, nvmet_sq_free, 0, GFP_KERNEL);
762 if (ret) {
763 pr_err("percpu_ref init failed!\n");
764 return ret;
765 }
766 init_completion(&sq->free_done);
767 init_completion(&sq->confirm_done);
768
769 return 0;
770}
771EXPORT_SYMBOL_GPL(nvmet_sq_init);
772
773static inline u16 nvmet_check_ana_state(struct nvmet_port *port,
774 struct nvmet_ns *ns)
775{
776 enum nvme_ana_state state = port->ana_state[ns->anagrpid];
777
778 if (unlikely(state == NVME_ANA_INACCESSIBLE))
779 return NVME_SC_ANA_INACCESSIBLE;
780 if (unlikely(state == NVME_ANA_PERSISTENT_LOSS))
781 return NVME_SC_ANA_PERSISTENT_LOSS;
782 if (unlikely(state == NVME_ANA_CHANGE))
783 return NVME_SC_ANA_TRANSITION;
784 return 0;
785}
786
787static inline u16 nvmet_io_cmd_check_access(struct nvmet_req *req)
788{
789 if (unlikely(req->ns->readonly)) {
790 switch (req->cmd->common.opcode) {
791 case nvme_cmd_read:
792 case nvme_cmd_flush:
793 break;
794 default:
795 return NVME_SC_NS_WRITE_PROTECTED;
796 }
797 }
798
799 return 0;
800}
801
802static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
803{
804 struct nvme_command *cmd = req->cmd;
805 u16 ret;
806
807 ret = nvmet_check_ctrl_status(req, cmd);
808 if (unlikely(ret))
809 return ret;
810
811 req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid);
812 if (unlikely(!req->ns)) {
813 req->error_loc = offsetof(struct nvme_common_command, nsid);
814 return NVME_SC_INVALID_NS | NVME_SC_DNR;
815 }
816 ret = nvmet_check_ana_state(req->port, req->ns);
817 if (unlikely(ret)) {
818 req->error_loc = offsetof(struct nvme_common_command, nsid);
819 return ret;
820 }
821 ret = nvmet_io_cmd_check_access(req);
822 if (unlikely(ret)) {
823 req->error_loc = offsetof(struct nvme_common_command, nsid);
824 return ret;
825 }
826
827 if (req->ns->file)
828 return nvmet_file_parse_io_cmd(req);
829 else
830 return nvmet_bdev_parse_io_cmd(req);
831}
832
833bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
834 struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops)
835{
836 u8 flags = req->cmd->common.flags;
837 u16 status;
838
839 req->cq = cq;
840 req->sq = sq;
841 req->ops = ops;
842 req->sg = NULL;
843 req->sg_cnt = 0;
844 req->transfer_len = 0;
845 req->cqe->status = 0;
846 req->cqe->sq_head = 0;
847 req->ns = NULL;
848 req->error_loc = NVMET_NO_ERROR_LOC;
849 req->error_slba = 0;
850
851 /* no support for fused commands yet */
852 if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) {
853 req->error_loc = offsetof(struct nvme_common_command, flags);
854 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
855 goto fail;
856 }
857
858 /*
859 * For fabrics, PSDT field shall describe metadata pointer (MPTR) that
860 * contains an address of a single contiguous physical buffer that is
861 * byte aligned.
862 */
863 if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF)) {
864 req->error_loc = offsetof(struct nvme_common_command, flags);
865 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
866 goto fail;
867 }
868
869 if (unlikely(!req->sq->ctrl))
870 /* will return an error for any Non-connect command: */
871 status = nvmet_parse_connect_cmd(req);
872 else if (likely(req->sq->qid != 0))
873 status = nvmet_parse_io_cmd(req);
874 else if (req->cmd->common.opcode == nvme_fabrics_command)
875 status = nvmet_parse_fabrics_cmd(req);
876 else if (req->sq->ctrl->subsys->type == NVME_NQN_DISC)
877 status = nvmet_parse_discovery_cmd(req);
878 else
879 status = nvmet_parse_admin_cmd(req);
880
881 if (status)
882 goto fail;
883
884 if (unlikely(!percpu_ref_tryget_live(&sq->ref))) {
885 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
886 goto fail;
887 }
888
889 if (sq->ctrl)
890 sq->ctrl->cmd_seen = true;
891
892 return true;
893
894fail:
895 __nvmet_req_complete(req, status);
896 return false;
897}
898EXPORT_SYMBOL_GPL(nvmet_req_init);
899
900void nvmet_req_uninit(struct nvmet_req *req)
901{
902 percpu_ref_put(&req->sq->ref);
903 if (req->ns)
904 nvmet_put_namespace(req->ns);
905}
906EXPORT_SYMBOL_GPL(nvmet_req_uninit);
907
908void nvmet_req_execute(struct nvmet_req *req)
909{
910 if (unlikely(req->data_len != req->transfer_len)) {
911 req->error_loc = offsetof(struct nvme_common_command, dptr);
912 nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR);
913 } else
914 req->execute(req);
915}
916EXPORT_SYMBOL_GPL(nvmet_req_execute);
917
918int nvmet_req_alloc_sgl(struct nvmet_req *req)
919{
920 struct pci_dev *p2p_dev = NULL;
921
922 if (IS_ENABLED(CONFIG_PCI_P2PDMA)) {
923 if (req->sq->ctrl && req->ns)
924 p2p_dev = radix_tree_lookup(&req->sq->ctrl->p2p_ns_map,
925 req->ns->nsid);
926
927 req->p2p_dev = NULL;
928 if (req->sq->qid && p2p_dev) {
929 req->sg = pci_p2pmem_alloc_sgl(p2p_dev, &req->sg_cnt,
930 req->transfer_len);
931 if (req->sg) {
932 req->p2p_dev = p2p_dev;
933 return 0;
934 }
935 }
936
937 /*
938 * If no P2P memory was available we fallback to using
939 * regular memory
940 */
941 }
942
943 req->sg = sgl_alloc(req->transfer_len, GFP_KERNEL, &req->sg_cnt);
944 if (!req->sg)
945 return -ENOMEM;
946
947 return 0;
948}
949EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgl);
950
951void nvmet_req_free_sgl(struct nvmet_req *req)
952{
953 if (req->p2p_dev)
954 pci_p2pmem_free_sgl(req->p2p_dev, req->sg);
955 else
956 sgl_free(req->sg);
957
958 req->sg = NULL;
959 req->sg_cnt = 0;
960}
961EXPORT_SYMBOL_GPL(nvmet_req_free_sgl);
962
963static inline bool nvmet_cc_en(u32 cc)
964{
965 return (cc >> NVME_CC_EN_SHIFT) & 0x1;
966}
967
968static inline u8 nvmet_cc_css(u32 cc)
969{
970 return (cc >> NVME_CC_CSS_SHIFT) & 0x7;
971}
972
973static inline u8 nvmet_cc_mps(u32 cc)
974{
975 return (cc >> NVME_CC_MPS_SHIFT) & 0xf;
976}
977
978static inline u8 nvmet_cc_ams(u32 cc)
979{
980 return (cc >> NVME_CC_AMS_SHIFT) & 0x7;
981}
982
983static inline u8 nvmet_cc_shn(u32 cc)
984{
985 return (cc >> NVME_CC_SHN_SHIFT) & 0x3;
986}
987
988static inline u8 nvmet_cc_iosqes(u32 cc)
989{
990 return (cc >> NVME_CC_IOSQES_SHIFT) & 0xf;
991}
992
993static inline u8 nvmet_cc_iocqes(u32 cc)
994{
995 return (cc >> NVME_CC_IOCQES_SHIFT) & 0xf;
996}
997
998static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
999{
1000 lockdep_assert_held(&ctrl->lock);
1001
1002 if (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES ||
1003 nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES ||
1004 nvmet_cc_mps(ctrl->cc) != 0 ||
1005 nvmet_cc_ams(ctrl->cc) != 0 ||
1006 nvmet_cc_css(ctrl->cc) != 0) {
1007 ctrl->csts = NVME_CSTS_CFS;
1008 return;
1009 }
1010
1011 ctrl->csts = NVME_CSTS_RDY;
1012
1013 /*
1014 * Controllers that are not yet enabled should not really enforce the
1015 * keep alive timeout, but we still want to track a timeout and cleanup
1016 * in case a host died before it enabled the controller. Hence, simply
1017 * reset the keep alive timer when the controller is enabled.
1018 */
1019 mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ);
1020}
1021
1022static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl)
1023{
1024 lockdep_assert_held(&ctrl->lock);
1025
1026 /* XXX: tear down queues? */
1027 ctrl->csts &= ~NVME_CSTS_RDY;
1028 ctrl->cc = 0;
1029}
1030
1031void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new)
1032{
1033 u32 old;
1034
1035 mutex_lock(&ctrl->lock);
1036 old = ctrl->cc;
1037 ctrl->cc = new;
1038
1039 if (nvmet_cc_en(new) && !nvmet_cc_en(old))
1040 nvmet_start_ctrl(ctrl);
1041 if (!nvmet_cc_en(new) && nvmet_cc_en(old))
1042 nvmet_clear_ctrl(ctrl);
1043 if (nvmet_cc_shn(new) && !nvmet_cc_shn(old)) {
1044 nvmet_clear_ctrl(ctrl);
1045 ctrl->csts |= NVME_CSTS_SHST_CMPLT;
1046 }
1047 if (!nvmet_cc_shn(new) && nvmet_cc_shn(old))
1048 ctrl->csts &= ~NVME_CSTS_SHST_CMPLT;
1049 mutex_unlock(&ctrl->lock);
1050}
1051
1052static void nvmet_init_cap(struct nvmet_ctrl *ctrl)
1053{
1054 /* command sets supported: NVMe command set: */
1055 ctrl->cap = (1ULL << 37);
1056 /* CC.EN timeout in 500msec units: */
1057 ctrl->cap |= (15ULL << 24);
1058 /* maximum queue entries supported: */
1059 ctrl->cap |= NVMET_QUEUE_SIZE - 1;
1060}
1061
1062u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid,
1063 struct nvmet_req *req, struct nvmet_ctrl **ret)
1064{
1065 struct nvmet_subsys *subsys;
1066 struct nvmet_ctrl *ctrl;
1067 u16 status = 0;
1068
1069 subsys = nvmet_find_get_subsys(req->port, subsysnqn);
1070 if (!subsys) {
1071 pr_warn("connect request for invalid subsystem %s!\n",
1072 subsysnqn);
1073 req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn);
1074 return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
1075 }
1076
1077 mutex_lock(&subsys->lock);
1078 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
1079 if (ctrl->cntlid == cntlid) {
1080 if (strncmp(hostnqn, ctrl->hostnqn, NVMF_NQN_SIZE)) {
1081 pr_warn("hostnqn mismatch.\n");
1082 continue;
1083 }
1084 if (!kref_get_unless_zero(&ctrl->ref))
1085 continue;
1086
1087 *ret = ctrl;
1088 goto out;
1089 }
1090 }
1091
1092 pr_warn("could not find controller %d for subsys %s / host %s\n",
1093 cntlid, subsysnqn, hostnqn);
1094 req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid);
1095 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
1096
1097out:
1098 mutex_unlock(&subsys->lock);
1099 nvmet_subsys_put(subsys);
1100 return status;
1101}
1102
1103u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd)
1104{
1105 if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) {
1106 pr_err("got cmd %d while CC.EN == 0 on qid = %d\n",
1107 cmd->common.opcode, req->sq->qid);
1108 return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
1109 }
1110
1111 if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) {
1112 pr_err("got cmd %d while CSTS.RDY == 0 on qid = %d\n",
1113 cmd->common.opcode, req->sq->qid);
1114 return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
1115 }
1116 return 0;
1117}
1118
1119bool nvmet_host_allowed(struct nvmet_subsys *subsys, const char *hostnqn)
1120{
1121 struct nvmet_host_link *p;
1122
1123 lockdep_assert_held(&nvmet_config_sem);
1124
1125 if (subsys->allow_any_host)
1126 return true;
1127
1128 if (subsys->type == NVME_NQN_DISC) /* allow all access to disc subsys */
1129 return true;
1130
1131 list_for_each_entry(p, &subsys->hosts, entry) {
1132 if (!strcmp(nvmet_host_name(p->host), hostnqn))
1133 return true;
1134 }
1135
1136 return false;
1137}
1138
1139/*
1140 * Note: ctrl->subsys->lock should be held when calling this function
1141 */
1142static void nvmet_setup_p2p_ns_map(struct nvmet_ctrl *ctrl,
1143 struct nvmet_req *req)
1144{
1145 struct nvmet_ns *ns;
1146
1147 if (!req->p2p_client)
1148 return;
1149
1150 ctrl->p2p_client = get_device(req->p2p_client);
1151
1152 list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link)
1153 nvmet_p2pmem_ns_add_p2p(ctrl, ns);
1154}
1155
1156/*
1157 * Note: ctrl->subsys->lock should be held when calling this function
1158 */
1159static void nvmet_release_p2p_ns_map(struct nvmet_ctrl *ctrl)
1160{
1161 struct radix_tree_iter iter;
1162 void __rcu **slot;
1163
1164 radix_tree_for_each_slot(slot, &ctrl->p2p_ns_map, &iter, 0)
1165 pci_dev_put(radix_tree_deref_slot(slot));
1166
1167 put_device(ctrl->p2p_client);
1168}
1169
1170static void nvmet_fatal_error_handler(struct work_struct *work)
1171{
1172 struct nvmet_ctrl *ctrl =
1173 container_of(work, struct nvmet_ctrl, fatal_err_work);
1174
1175 pr_err("ctrl %d fatal error occurred!\n", ctrl->cntlid);
1176 ctrl->ops->delete_ctrl(ctrl);
1177}
1178
1179u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
1180 struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp)
1181{
1182 struct nvmet_subsys *subsys;
1183 struct nvmet_ctrl *ctrl;
1184 int ret;
1185 u16 status;
1186
1187 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
1188 subsys = nvmet_find_get_subsys(req->port, subsysnqn);
1189 if (!subsys) {
1190 pr_warn("connect request for invalid subsystem %s!\n",
1191 subsysnqn);
1192 req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn);
1193 goto out;
1194 }
1195
1196 status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
1197 down_read(&nvmet_config_sem);
1198 if (!nvmet_host_allowed(subsys, hostnqn)) {
1199 pr_info("connect by host %s for subsystem %s not allowed\n",
1200 hostnqn, subsysnqn);
1201 req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn);
1202 up_read(&nvmet_config_sem);
1203 status = NVME_SC_CONNECT_INVALID_HOST | NVME_SC_DNR;
1204 goto out_put_subsystem;
1205 }
1206 up_read(&nvmet_config_sem);
1207
1208 status = NVME_SC_INTERNAL;
1209 ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
1210 if (!ctrl)
1211 goto out_put_subsystem;
1212 mutex_init(&ctrl->lock);
1213
1214 nvmet_init_cap(ctrl);
1215
1216 ctrl->port = req->port;
1217
1218 INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work);
1219 INIT_LIST_HEAD(&ctrl->async_events);
1220 INIT_RADIX_TREE(&ctrl->p2p_ns_map, GFP_KERNEL);
1221 INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler);
1222
1223 memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE);
1224 memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE);
1225
1226 kref_init(&ctrl->ref);
1227 ctrl->subsys = subsys;
1228 WRITE_ONCE(ctrl->aen_enabled, NVMET_AEN_CFG_OPTIONAL);
1229
1230 ctrl->changed_ns_list = kmalloc_array(NVME_MAX_CHANGED_NAMESPACES,
1231 sizeof(__le32), GFP_KERNEL);
1232 if (!ctrl->changed_ns_list)
1233 goto out_free_ctrl;
1234
1235 ctrl->cqs = kcalloc(subsys->max_qid + 1,
1236 sizeof(struct nvmet_cq *),
1237 GFP_KERNEL);
1238 if (!ctrl->cqs)
1239 goto out_free_changed_ns_list;
1240
1241 ctrl->sqs = kcalloc(subsys->max_qid + 1,
1242 sizeof(struct nvmet_sq *),
1243 GFP_KERNEL);
1244 if (!ctrl->sqs)
1245 goto out_free_cqs;
1246
1247 ret = ida_simple_get(&cntlid_ida,
1248 NVME_CNTLID_MIN, NVME_CNTLID_MAX,
1249 GFP_KERNEL);
1250 if (ret < 0) {
1251 status = NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR;
1252 goto out_free_sqs;
1253 }
1254 ctrl->cntlid = ret;
1255
1256 ctrl->ops = req->ops;
1257
1258 /*
1259 * Discovery controllers may use some arbitrary high value
1260 * in order to cleanup stale discovery sessions
1261 */
1262 if ((ctrl->subsys->type == NVME_NQN_DISC) && !kato)
1263 kato = NVMET_DISC_KATO_MS;
1264
1265 /* keep-alive timeout in seconds */
1266 ctrl->kato = DIV_ROUND_UP(kato, 1000);
1267
1268 ctrl->err_counter = 0;
1269 spin_lock_init(&ctrl->error_lock);
1270
1271 nvmet_start_keep_alive_timer(ctrl);
1272
1273 mutex_lock(&subsys->lock);
1274 list_add_tail(&ctrl->subsys_entry, &subsys->ctrls);
1275 nvmet_setup_p2p_ns_map(ctrl, req);
1276 mutex_unlock(&subsys->lock);
1277
1278 *ctrlp = ctrl;
1279 return 0;
1280
1281out_free_sqs:
1282 kfree(ctrl->sqs);
1283out_free_cqs:
1284 kfree(ctrl->cqs);
1285out_free_changed_ns_list:
1286 kfree(ctrl->changed_ns_list);
1287out_free_ctrl:
1288 kfree(ctrl);
1289out_put_subsystem:
1290 nvmet_subsys_put(subsys);
1291out:
1292 return status;
1293}
1294
1295static void nvmet_ctrl_free(struct kref *ref)
1296{
1297 struct nvmet_ctrl *ctrl = container_of(ref, struct nvmet_ctrl, ref);
1298 struct nvmet_subsys *subsys = ctrl->subsys;
1299
1300 mutex_lock(&subsys->lock);
1301 nvmet_release_p2p_ns_map(ctrl);
1302 list_del(&ctrl->subsys_entry);
1303 mutex_unlock(&subsys->lock);
1304
1305 nvmet_stop_keep_alive_timer(ctrl);
1306
1307 flush_work(&ctrl->async_event_work);
1308 cancel_work_sync(&ctrl->fatal_err_work);
1309
1310 ida_simple_remove(&cntlid_ida, ctrl->cntlid);
1311
1312 kfree(ctrl->sqs);
1313 kfree(ctrl->cqs);
1314 kfree(ctrl->changed_ns_list);
1315 kfree(ctrl);
1316
1317 nvmet_subsys_put(subsys);
1318}
1319
1320void nvmet_ctrl_put(struct nvmet_ctrl *ctrl)
1321{
1322 kref_put(&ctrl->ref, nvmet_ctrl_free);
1323}
1324
1325void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl)
1326{
1327 mutex_lock(&ctrl->lock);
1328 if (!(ctrl->csts & NVME_CSTS_CFS)) {
1329 ctrl->csts |= NVME_CSTS_CFS;
1330 schedule_work(&ctrl->fatal_err_work);
1331 }
1332 mutex_unlock(&ctrl->lock);
1333}
1334EXPORT_SYMBOL_GPL(nvmet_ctrl_fatal_error);
1335
1336static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
1337 const char *subsysnqn)
1338{
1339 struct nvmet_subsys_link *p;
1340
1341 if (!port)
1342 return NULL;
1343
1344 if (!strcmp(NVME_DISC_SUBSYS_NAME, subsysnqn)) {
1345 if (!kref_get_unless_zero(&nvmet_disc_subsys->ref))
1346 return NULL;
1347 return nvmet_disc_subsys;
1348 }
1349
1350 down_read(&nvmet_config_sem);
1351 list_for_each_entry(p, &port->subsystems, entry) {
1352 if (!strncmp(p->subsys->subsysnqn, subsysnqn,
1353 NVMF_NQN_SIZE)) {
1354 if (!kref_get_unless_zero(&p->subsys->ref))
1355 break;
1356 up_read(&nvmet_config_sem);
1357 return p->subsys;
1358 }
1359 }
1360 up_read(&nvmet_config_sem);
1361 return NULL;
1362}
1363
1364struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
1365 enum nvme_subsys_type type)
1366{
1367 struct nvmet_subsys *subsys;
1368
1369 subsys = kzalloc(sizeof(*subsys), GFP_KERNEL);
1370 if (!subsys)
1371 return ERR_PTR(-ENOMEM);
1372
1373 subsys->ver = NVME_VS(1, 3, 0); /* NVMe 1.3.0 */
1374 /* generate a random serial number as our controllers are ephemeral: */
1375 get_random_bytes(&subsys->serial, sizeof(subsys->serial));
1376
1377 switch (type) {
1378 case NVME_NQN_NVME:
1379 subsys->max_qid = NVMET_NR_QUEUES;
1380 break;
1381 case NVME_NQN_DISC:
1382 subsys->max_qid = 0;
1383 break;
1384 default:
1385 pr_err("%s: Unknown Subsystem type - %d\n", __func__, type);
1386 kfree(subsys);
1387 return ERR_PTR(-EINVAL);
1388 }
1389 subsys->type = type;
1390 subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE,
1391 GFP_KERNEL);
1392 if (!subsys->subsysnqn) {
1393 kfree(subsys);
1394 return ERR_PTR(-ENOMEM);
1395 }
1396
1397 kref_init(&subsys->ref);
1398
1399 mutex_init(&subsys->lock);
1400 INIT_LIST_HEAD(&subsys->namespaces);
1401 INIT_LIST_HEAD(&subsys->ctrls);
1402 INIT_LIST_HEAD(&subsys->hosts);
1403
1404 return subsys;
1405}
1406
1407static void nvmet_subsys_free(struct kref *ref)
1408{
1409 struct nvmet_subsys *subsys =
1410 container_of(ref, struct nvmet_subsys, ref);
1411
1412 WARN_ON_ONCE(!list_empty(&subsys->namespaces));
1413
1414 kfree(subsys->subsysnqn);
1415 kfree(subsys);
1416}
1417
1418void nvmet_subsys_del_ctrls(struct nvmet_subsys *subsys)
1419{
1420 struct nvmet_ctrl *ctrl;
1421
1422 mutex_lock(&subsys->lock);
1423 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
1424 ctrl->ops->delete_ctrl(ctrl);
1425 mutex_unlock(&subsys->lock);
1426}
1427
1428void nvmet_subsys_put(struct nvmet_subsys *subsys)
1429{
1430 kref_put(&subsys->ref, nvmet_subsys_free);
1431}
1432
1433static int __init nvmet_init(void)
1434{
1435 int error;
1436
1437 nvmet_ana_group_enabled[NVMET_DEFAULT_ANA_GRPID] = 1;
1438
1439 buffered_io_wq = alloc_workqueue("nvmet-buffered-io-wq",
1440 WQ_MEM_RECLAIM, 0);
1441 if (!buffered_io_wq) {
1442 error = -ENOMEM;
1443 goto out;
1444 }
1445
1446 error = nvmet_init_discovery();
1447 if (error)
1448 goto out_free_work_queue;
1449
1450 error = nvmet_init_configfs();
1451 if (error)
1452 goto out_exit_discovery;
1453 return 0;
1454
1455out_exit_discovery:
1456 nvmet_exit_discovery();
1457out_free_work_queue:
1458 destroy_workqueue(buffered_io_wq);
1459out:
1460 return error;
1461}
1462
1463static void __exit nvmet_exit(void)
1464{
1465 nvmet_exit_configfs();
1466 nvmet_exit_discovery();
1467 ida_destroy(&cntlid_ida);
1468 destroy_workqueue(buffered_io_wq);
1469
1470 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024);
1471 BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024);
1472}
1473
1474module_init(nvmet_init);
1475module_exit(nvmet_exit);
1476
1477MODULE_LICENSE("GPL v2");