Serenity Operating System
1/*
2 * Copyright (c) 2021, Pankaj R <pankydev8@gmail.com>
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 */
6
7#include <Kernel/Arch/Delay.h>
8#include <Kernel/StdLib.h>
9#include <Kernel/Storage/NVMe/NVMeController.h>
10#include <Kernel/Storage/NVMe/NVMeInterruptQueue.h>
11#include <Kernel/Storage/NVMe/NVMePollQueue.h>
12#include <Kernel/Storage/NVMe/NVMeQueue.h>
13
14namespace Kernel {
15ErrorOr<NonnullLockRefPtr<NVMeQueue>> NVMeQueue::try_create(u16 qid, Optional<u8> irq, u32 q_depth, OwnPtr<Memory::Region> cq_dma_region, Vector<NonnullRefPtr<Memory::PhysicalPage>> cq_dma_page, OwnPtr<Memory::Region> sq_dma_region, Vector<NonnullRefPtr<Memory::PhysicalPage>> sq_dma_page, Memory::TypedMapping<DoorbellRegister volatile> db_regs)
16{
17 // Note: Allocate DMA region for RW operation. For now the requests don't exceed more than 4096 bytes (Storage device takes care of it)
18 RefPtr<Memory::PhysicalPage> rw_dma_page;
19 auto rw_dma_region = TRY(MM.allocate_dma_buffer_page("NVMe Queue Read/Write DMA"sv, Memory::Region::Access::ReadWrite, rw_dma_page));
20 if (!irq.has_value()) {
21 auto queue = TRY(adopt_nonnull_lock_ref_or_enomem(new (nothrow) NVMePollQueue(move(rw_dma_region), *rw_dma_page, qid, q_depth, move(cq_dma_region), cq_dma_page, move(sq_dma_region), sq_dma_page, move(db_regs))));
22 return queue;
23 }
24 auto queue = TRY(adopt_nonnull_lock_ref_or_enomem(new (nothrow) NVMeInterruptQueue(move(rw_dma_region), *rw_dma_page, qid, irq.value(), q_depth, move(cq_dma_region), cq_dma_page, move(sq_dma_region), sq_dma_page, move(db_regs))));
25 return queue;
26}
27
28UNMAP_AFTER_INIT NVMeQueue::NVMeQueue(NonnullOwnPtr<Memory::Region> rw_dma_region, Memory::PhysicalPage const& rw_dma_page, u16 qid, u32 q_depth, OwnPtr<Memory::Region> cq_dma_region, Vector<NonnullRefPtr<Memory::PhysicalPage>> cq_dma_page, OwnPtr<Memory::Region> sq_dma_region, Vector<NonnullRefPtr<Memory::PhysicalPage>> sq_dma_page, Memory::TypedMapping<DoorbellRegister volatile> db_regs)
29 : m_current_request(nullptr)
30 , m_rw_dma_region(move(rw_dma_region))
31 , m_qid(qid)
32 , m_admin_queue(qid == 0)
33 , m_qdepth(q_depth)
34 , m_cq_dma_region(move(cq_dma_region))
35 , m_cq_dma_page(cq_dma_page)
36 , m_sq_dma_region(move(sq_dma_region))
37 , m_sq_dma_page(sq_dma_page)
38 , m_db_regs(move(db_regs))
39 , m_rw_dma_page(rw_dma_page)
40
41{
42 m_sqe_array = { reinterpret_cast<NVMeSubmission*>(m_sq_dma_region->vaddr().as_ptr()), m_qdepth };
43 m_cqe_array = { reinterpret_cast<NVMeCompletion*>(m_cq_dma_region->vaddr().as_ptr()), m_qdepth };
44}
45
46bool NVMeQueue::cqe_available()
47{
48 return PHASE_TAG(m_cqe_array[m_cq_head].status) == m_cq_valid_phase;
49}
50
51void NVMeQueue::update_cqe_head()
52{
53 // To prevent overflow, use a temp variable
54 u32 temp_cq_head = m_cq_head + 1;
55 if (temp_cq_head == m_qdepth) {
56 m_cq_head = 0;
57 m_cq_valid_phase ^= 1;
58 } else {
59 m_cq_head = temp_cq_head;
60 }
61}
62
63u32 NVMeQueue::process_cq()
64{
65 u32 nr_of_processed_cqes = 0;
66 while (cqe_available()) {
67 u16 status;
68 u16 cmdid;
69 ++nr_of_processed_cqes;
70 status = CQ_STATUS_FIELD(m_cqe_array[m_cq_head].status);
71 cmdid = m_cqe_array[m_cq_head].command_id;
72 dbgln_if(NVME_DEBUG, "NVMe: Completion with status {:x} and command identifier {}. CQ_HEAD: {}", status, cmdid, m_cq_head);
73 // TODO: We don't use AsyncBlockDevice requests for admin queue as it is only applicable for a block device (NVMe namespace)
74 // But admin commands precedes namespace creation. Unify requests to avoid special conditions
75 if (m_admin_queue == false) {
76 // As the block layer calls are now sync (as we wait on each requests),
77 // everything is operated on a single request similar to BMIDE driver.
78 // TODO: Remove this constraint eventually.
79 VERIFY(cmdid == m_prev_sq_tail);
80 if (m_current_request) {
81 complete_current_request(status);
82 }
83 }
84 update_cqe_head();
85 }
86 if (nr_of_processed_cqes) {
87 update_cq_doorbell();
88 }
89 return nr_of_processed_cqes;
90}
91
92void NVMeQueue::submit_sqe(NVMeSubmission& sub)
93{
94 SpinlockLocker lock(m_sq_lock);
95 // For now let's use sq tail as a unique command id.
96 sub.cmdid = m_sq_tail;
97 m_prev_sq_tail = m_sq_tail;
98
99 memcpy(&m_sqe_array[m_sq_tail], &sub, sizeof(NVMeSubmission));
100 {
101 u32 temp_sq_tail = m_sq_tail + 1;
102 if (temp_sq_tail == m_qdepth)
103 m_sq_tail = 0;
104 else
105 m_sq_tail = temp_sq_tail;
106 }
107
108 dbgln_if(NVME_DEBUG, "NVMe: Submission with command identifier {}. SQ_TAIL: {}", sub.cmdid, m_sq_tail);
109 full_memory_barrier();
110 update_sq_doorbell();
111}
112
113u16 NVMeQueue::submit_sync_sqe(NVMeSubmission& sub)
114{
115 // For now let's use sq tail as a unique command id.
116 u16 cqe_cid;
117 u16 cid = m_sq_tail;
118
119 submit_sqe(sub);
120 do {
121 int index;
122 {
123 SpinlockLocker lock(m_cq_lock);
124 index = m_cq_head - 1;
125 if (index < 0)
126 index = m_qdepth - 1;
127 }
128 cqe_cid = m_cqe_array[index].command_id;
129 microseconds_delay(1);
130 } while (cid != cqe_cid);
131
132 auto status = CQ_STATUS_FIELD(m_cqe_array[m_cq_head].status);
133 return status;
134}
135
136void NVMeQueue::read(AsyncBlockDeviceRequest& request, u16 nsid, u64 index, u32 count)
137{
138 NVMeSubmission sub {};
139 SpinlockLocker m_lock(m_request_lock);
140 m_current_request = request;
141
142 sub.op = OP_NVME_READ;
143 sub.rw.nsid = nsid;
144 sub.rw.slba = AK::convert_between_host_and_little_endian(index);
145 // No. of lbas is 0 based
146 sub.rw.length = AK::convert_between_host_and_little_endian((count - 1) & 0xFFFF);
147 sub.rw.data_ptr.prp1 = reinterpret_cast<u64>(AK::convert_between_host_and_little_endian(m_rw_dma_page->paddr().as_ptr()));
148
149 full_memory_barrier();
150 submit_sqe(sub);
151}
152
153void NVMeQueue::write(AsyncBlockDeviceRequest& request, u16 nsid, u64 index, u32 count)
154{
155 NVMeSubmission sub {};
156 SpinlockLocker m_lock(m_request_lock);
157 m_current_request = request;
158
159 if (auto result = m_current_request->read_from_buffer(m_current_request->buffer(), m_rw_dma_region->vaddr().as_ptr(), m_current_request->buffer_size()); result.is_error()) {
160 complete_current_request(AsyncDeviceRequest::MemoryFault);
161 return;
162 }
163 sub.op = OP_NVME_WRITE;
164 sub.rw.nsid = nsid;
165 sub.rw.slba = AK::convert_between_host_and_little_endian(index);
166 // No. of lbas is 0 based
167 sub.rw.length = AK::convert_between_host_and_little_endian((count - 1) & 0xFFFF);
168 sub.rw.data_ptr.prp1 = reinterpret_cast<u64>(AK::convert_between_host_and_little_endian(m_rw_dma_page->paddr().as_ptr()));
169
170 full_memory_barrier();
171 submit_sqe(sub);
172}
173
174UNMAP_AFTER_INIT NVMeQueue::~NVMeQueue() = default;
175}