1 /*
2 * Copyright (c) 2022 Intel Corporation
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Derived from FreeBSD original driver made by Jim Harris
6 * with contributions from Alexander Motin, Wojciech Macek, and Warner Losh
7 */
8
9 #define DT_DRV_COMPAT nvme_controller
10
11 #include <zephyr/logging/log.h>
12 LOG_MODULE_REGISTER(nvme, CONFIG_NVME_LOG_LEVEL);
13
14 #include <errno.h>
15
16 #include <zephyr/kernel.h>
17
18 #include <soc.h>
19 #include <zephyr/device.h>
20 #include <zephyr/init.h>
21
22 #include "nvme_helpers.h"
23 #include "nvme.h"
24
nvme_controller_wait_for_ready(const struct device * dev,const int desired_val)25 static int nvme_controller_wait_for_ready(const struct device *dev,
26 const int desired_val)
27 {
28 struct nvme_controller *nvme_ctrlr = dev->data;
29 mm_reg_t regs = DEVICE_MMIO_GET(dev);
30 int timeout = sys_clock_tick_get_32() +
31 k_ms_to_ticks_ceil32(nvme_ctrlr->ready_timeout_in_ms);
32 uint32_t delta_t = USEC_PER_MSEC;
33 uint32_t csts;
34
35 while (1) {
36 csts = nvme_mmio_read_4(regs, csts);
37 if (csts == NVME_GONE) {
38 LOG_ERR("Controller is unreachable");
39 return -EIO;
40 }
41
42 if (((csts >> NVME_CSTS_REG_RDY_SHIFT) &
43 NVME_CSTS_REG_RDY_MASK) == desired_val) {
44 break;
45 }
46
47 if ((int64_t)timeout - sys_clock_tick_get_32() < 0) {
48 LOG_ERR("Timeout error");
49 return -EIO;
50 }
51
52 k_busy_wait(delta_t);
53 delta_t = MIN((MSEC_PER_SEC * USEC_PER_MSEC), delta_t * 3 / 2);
54 }
55
56 return 0;
57 }
58
nvme_controller_disable(const struct device * dev)59 static int nvme_controller_disable(const struct device *dev)
60 {
61 mm_reg_t regs = DEVICE_MMIO_GET(dev);
62 uint32_t cc, csts;
63 uint8_t enabled, ready;
64 int err;
65
66 cc = nvme_mmio_read_4(regs, cc);
67 csts = nvme_mmio_read_4(regs, csts);
68
69 ready = (csts >> NVME_CSTS_REG_RDY_SHIFT) & NVME_CSTS_REG_RDY_MASK;
70
71 enabled = (cc >> NVME_CC_REG_EN_SHIFT) & NVME_CC_REG_EN_MASK;
72 if (enabled == 0) {
73 /* Wait for RDY == 0 or timeout & fail */
74 if (ready == 0) {
75 return 0;
76 }
77
78 return nvme_controller_wait_for_ready(dev, 0);
79 }
80
81 if (ready == 0) {
82 /* EN == 1, wait for RDY == 1 or timeout & fail */
83 err = nvme_controller_wait_for_ready(dev, 1);
84 if (err != 0) {
85 return err;
86 }
87 }
88
89 cc &= ~NVME_CC_REG_EN_MASK;
90 nvme_mmio_write_4(regs, cc, cc);
91
92 return nvme_controller_wait_for_ready(dev, 0);
93 }
94
nvme_controller_enable(const struct device * dev)95 static int nvme_controller_enable(const struct device *dev)
96 {
97 struct nvme_controller *nvme_ctrlr = dev->data;
98 mm_reg_t regs = DEVICE_MMIO_GET(dev);
99 uint8_t enabled, ready;
100 uint32_t cc, csts;
101 int err;
102
103 cc = nvme_mmio_read_4(regs, cc);
104 csts = nvme_mmio_read_4(regs, csts);
105
106 ready = (csts >> NVME_CSTS_REG_RDY_SHIFT) & NVME_CSTS_REG_RDY_MASK;
107
108 enabled = (cc >> NVME_CC_REG_EN_SHIFT) & NVME_CC_REG_EN_MASK;
109 if (enabled == 1) {
110 if (ready == 1) {
111 LOG_DBG("Already enabled");
112 return 0;
113 }
114
115 return nvme_controller_wait_for_ready(dev, 1);
116 }
117
118 /* EN == 0 already wait for RDY == 0 or timeout & fail */
119 err = nvme_controller_wait_for_ready(dev, 0);
120 if (err != 0) {
121 return err;
122 }
123
124 /* Initialization values for CC */
125 cc = 0;
126 cc |= 1 << NVME_CC_REG_EN_SHIFT;
127 cc |= 0 << NVME_CC_REG_CSS_SHIFT;
128 cc |= 0 << NVME_CC_REG_AMS_SHIFT;
129 cc |= 0 << NVME_CC_REG_SHN_SHIFT;
130 cc |= 6 << NVME_CC_REG_IOSQES_SHIFT; /* SQ entry size == 64 == 2^6 */
131 cc |= 4 << NVME_CC_REG_IOCQES_SHIFT; /* CQ entry size == 16 == 2^4 */
132 cc |= nvme_ctrlr->mps << NVME_CC_REG_MPS_SHIFT;
133
134 nvme_mmio_write_4(regs, cc, cc);
135
136 return nvme_controller_wait_for_ready(dev, 1);
137 }
138
nvme_controller_setup_admin_queues(const struct device * dev)139 static int nvme_controller_setup_admin_queues(const struct device *dev)
140 {
141 struct nvme_controller *nvme_ctrlr = dev->data;
142 mm_reg_t regs = DEVICE_MMIO_GET(dev);
143 uint32_t aqa, qsize;
144
145 nvme_cmd_qpair_reset(nvme_ctrlr->adminq);
146
147 /* Admin queue is always id 0 */
148 if (nvme_cmd_qpair_setup(nvme_ctrlr->adminq, nvme_ctrlr, 0) != 0) {
149 LOG_ERR("Admin cmd qpair setup failed");
150 return -EIO;
151 }
152
153 nvme_mmio_write_8(regs, asq, nvme_ctrlr->adminq->cmd_bus_addr);
154 nvme_mmio_write_8(regs, acq, nvme_ctrlr->adminq->cpl_bus_addr);
155
156 /* acqs and asqs are 0-based. */
157 qsize = CONFIG_NVME_ADMIN_ENTRIES - 1;
158 aqa = 0;
159 aqa = (qsize & NVME_AQA_REG_ACQS_MASK) << NVME_AQA_REG_ACQS_SHIFT;
160 aqa |= (qsize & NVME_AQA_REG_ASQS_MASK) << NVME_AQA_REG_ASQS_SHIFT;
161
162 nvme_mmio_write_4(regs, aqa, aqa);
163
164 return 0;
165 }
166
nvme_controller_setup_io_queues(const struct device * dev)167 static int nvme_controller_setup_io_queues(const struct device *dev)
168 {
169 struct nvme_controller *nvme_ctrlr = dev->data;
170 struct nvme_completion_poll_status status;
171 struct nvme_cmd_qpair *io_qpair;
172 int cq_allocated, sq_allocated;
173 int ret, idx;
174
175 nvme_cpl_status_poll_init(&status);
176
177 ret = nvme_ctrlr_cmd_set_num_queues(nvme_ctrlr,
178 nvme_ctrlr->num_io_queues,
179 nvme_completion_poll_cb, &status);
180 if (ret != 0) {
181 return ret;
182 }
183
184 nvme_completion_poll(&status);
185 if (nvme_cpl_status_is_error(&status)) {
186 LOG_ERR("Could not set IO num queues to %u",
187 nvme_ctrlr->num_io_queues);
188 nvme_completion_print(&status.cpl);
189 return -EIO;
190 }
191
192 /*
193 * Data in cdw0 is 0-based.
194 * Lower 16-bits indicate number of submission queues allocated.
195 * Upper 16-bits indicate number of completion queues allocated.
196 */
197 sq_allocated = (status.cpl.cdw0 & 0xFFFF) + 1;
198 cq_allocated = (status.cpl.cdw0 >> 16) + 1;
199
200 /*
201 * Controller may allocate more queues than we requested,
202 * so use the minimum of the number requested and what was
203 * actually allocated.
204 */
205 nvme_ctrlr->num_io_queues = MIN(nvme_ctrlr->num_io_queues,
206 sq_allocated);
207 nvme_ctrlr->num_io_queues = MIN(nvme_ctrlr->num_io_queues,
208 cq_allocated);
209
210 for (idx = 0; idx < nvme_ctrlr->num_io_queues; idx++) {
211 io_qpair = &nvme_ctrlr->ioq[idx];
212 if (nvme_cmd_qpair_setup(io_qpair, nvme_ctrlr, idx+1) != 0) {
213 LOG_ERR("IO cmd qpair %u setup failed", idx+1);
214 return -EIO;
215 }
216
217 nvme_cmd_qpair_reset(io_qpair);
218
219 nvme_cpl_status_poll_init(&status);
220
221 ret = nvme_ctrlr_cmd_create_io_cq(nvme_ctrlr, io_qpair,
222 nvme_completion_poll_cb,
223 &status);
224 if (ret != 0) {
225 return ret;
226 }
227
228 nvme_completion_poll(&status);
229 if (nvme_cpl_status_is_error(&status)) {
230 LOG_ERR("IO CQ creation failed");
231 nvme_completion_print(&status.cpl);
232 return -EIO;
233 }
234
235 nvme_cpl_status_poll_init(&status);
236
237 ret = nvme_ctrlr_cmd_create_io_sq(nvme_ctrlr, io_qpair,
238 nvme_completion_poll_cb,
239 &status);
240 if (ret != 0) {
241 return ret;
242 }
243
244 nvme_completion_poll(&status);
245 if (nvme_cpl_status_is_error(&status)) {
246 LOG_ERR("IO CQ creation failed");
247 nvme_completion_print(&status.cpl);
248 return -EIO;
249 }
250 }
251
252 return 0;
253 }
254
nvme_controller_gather_info(const struct device * dev)255 static void nvme_controller_gather_info(const struct device *dev)
256 {
257 struct nvme_controller *nvme_ctrlr = dev->data;
258 mm_reg_t regs = DEVICE_MMIO_GET(dev);
259
260 uint32_t cap_lo, cap_hi, to, vs, pmrcap;
261
262 nvme_ctrlr->cap_lo = cap_lo = nvme_mmio_read_4(regs, cap_lo);
263 LOG_DBG("CapLo: 0x%08x: MQES %u%s%s%s%s, TO %u",
264 cap_lo, NVME_CAP_LO_MQES(cap_lo),
265 NVME_CAP_LO_CQR(cap_lo) ? ", CQR" : "",
266 NVME_CAP_LO_AMS(cap_lo) ? ", AMS" : "",
267 (NVME_CAP_LO_AMS(cap_lo) & 0x1) ? " WRRwUPC" : "",
268 (NVME_CAP_LO_AMS(cap_lo) & 0x2) ? " VS" : "",
269 NVME_CAP_LO_TO(cap_lo));
270
271
272 nvme_ctrlr->cap_hi = cap_hi = nvme_mmio_read_4(regs, cap_hi);
273 LOG_DBG("CapHi: 0x%08x: DSTRD %u%s, CSS %x%s, "
274 "MPSMIN %u, MPSMAX %u%s%s", cap_hi,
275 NVME_CAP_HI_DSTRD(cap_hi),
276 NVME_CAP_HI_NSSRS(cap_hi) ? ", NSSRS" : "",
277 NVME_CAP_HI_CSS(cap_hi),
278 NVME_CAP_HI_BPS(cap_hi) ? ", BPS" : "",
279 NVME_CAP_HI_MPSMIN(cap_hi),
280 NVME_CAP_HI_MPSMAX(cap_hi),
281 NVME_CAP_HI_PMRS(cap_hi) ? ", PMRS" : "",
282 NVME_CAP_HI_CMBS(cap_hi) ? ", CMBS" : "");
283
284 vs = nvme_mmio_read_4(regs, vs);
285 LOG_DBG("Version: 0x%08x: %d.%d", vs,
286 NVME_MAJOR(vs), NVME_MINOR(vs));
287
288 if (NVME_CAP_HI_PMRS(cap_hi)) {
289 pmrcap = nvme_mmio_read_4(regs, pmrcap);
290 LOG_DBG("PMRCap: 0x%08x: BIR %u%s%s, PMRTU %u, "
291 "PMRWBM %x, PMRTO %u%s", pmrcap,
292 NVME_PMRCAP_BIR(pmrcap),
293 NVME_PMRCAP_RDS(pmrcap) ? ", RDS" : "",
294 NVME_PMRCAP_WDS(pmrcap) ? ", WDS" : "",
295 NVME_PMRCAP_PMRTU(pmrcap),
296 NVME_PMRCAP_PMRWBM(pmrcap),
297 NVME_PMRCAP_PMRTO(pmrcap),
298 NVME_PMRCAP_CMSS(pmrcap) ? ", CMSS" : "");
299 }
300
301 nvme_ctrlr->dstrd = NVME_CAP_HI_DSTRD(cap_hi) + 2;
302
303 nvme_ctrlr->mps = NVME_CAP_HI_MPSMIN(cap_hi);
304 nvme_ctrlr->page_size = 1 << (NVME_MPS_SHIFT + nvme_ctrlr->mps);
305
306 LOG_DBG("MPS: %u - Page Size: %u bytes",
307 nvme_ctrlr->mps, nvme_ctrlr->page_size);
308
309 /* Get ready timeout value from controller, in units of 500ms. */
310 to = NVME_CAP_LO_TO(cap_lo) + 1;
311 nvme_ctrlr->ready_timeout_in_ms = to * 500;
312
313 /* Cap transfers by the maximum addressable by
314 * page-sized PRP (4KB pages -> 2MB).
315 * ToDo: it could be less -> take the minimum.
316 */
317 nvme_ctrlr->max_xfer_size = nvme_ctrlr->page_size /
318 8 * nvme_ctrlr->page_size;
319
320 LOG_DBG("Max transfer size: %u bytes", nvme_ctrlr->max_xfer_size);
321 }
322
nvme_controller_pcie_configure(const struct device * dev)323 static int nvme_controller_pcie_configure(const struct device *dev)
324 {
325 const struct nvme_controller_config *nvme_ctrlr_cfg = dev->config;
326 struct nvme_controller *nvme_ctrlr = dev->data;
327 struct pcie_bar mbar_regs;
328 uint8_t n_vectors;
329
330 if (nvme_ctrlr_cfg->pcie->bdf == PCIE_BDF_NONE) {
331 LOG_ERR("Controller not found");
332 return -ENODEV;
333 }
334
335 LOG_DBG("Configuring NVME controller ID %x:%x at %d:%x.%d",
336 PCIE_ID_TO_VEND(nvme_ctrlr_cfg->pcie->id),
337 PCIE_ID_TO_DEV(nvme_ctrlr_cfg->pcie->id),
338 PCIE_BDF_TO_BUS(nvme_ctrlr_cfg->pcie->bdf),
339 PCIE_BDF_TO_DEV(nvme_ctrlr_cfg->pcie->bdf),
340 PCIE_BDF_TO_FUNC(nvme_ctrlr_cfg->pcie->bdf));
341
342 if (!pcie_get_mbar(nvme_ctrlr_cfg->pcie->bdf,
343 NVME_PCIE_BAR_IDX, &mbar_regs)) {
344 LOG_ERR("Could not get NVME registers");
345 return -EIO;
346 }
347
348 device_map(DEVICE_MMIO_RAM_PTR(dev), mbar_regs.phys_addr,
349 mbar_regs.size, K_MEM_CACHE_NONE);
350
351 /* Allocating vectors */
352 n_vectors = pcie_msi_vectors_allocate(nvme_ctrlr_cfg->pcie->bdf,
353 CONFIG_NVME_INT_PRIORITY,
354 nvme_ctrlr->vectors,
355 NVME_PCIE_MSIX_VECTORS);
356 if (n_vectors == 0) {
357 LOG_ERR("Could not allocate %u MSI-X vectors",
358 NVME_PCIE_MSIX_VECTORS);
359 return -EIO;
360 }
361
362 /* Enabling MSI-X and the vectors */
363 if (!pcie_msi_enable(nvme_ctrlr_cfg->pcie->bdf,
364 nvme_ctrlr->vectors, n_vectors, 0)) {
365 LOG_ERR("Could not enable MSI-X");
366 return -EIO;
367 }
368
369 return 0;
370 }
371
nvme_controller_identify(struct nvme_controller * nvme_ctrlr)372 static int nvme_controller_identify(struct nvme_controller *nvme_ctrlr)
373 {
374 struct nvme_completion_poll_status status =
375 NVME_CPL_STATUS_POLL_INIT(status);
376
377 nvme_ctrlr_cmd_identify_controller(nvme_ctrlr,
378 nvme_completion_poll_cb, &status);
379 nvme_completion_poll(&status);
380 if (nvme_cpl_status_is_error(&status)) {
381 LOG_ERR("Could not identify the controller");
382 nvme_completion_print(&status.cpl);
383 return -EIO;
384 }
385
386 nvme_controller_data_swapbytes(&nvme_ctrlr->cdata);
387
388 /*
389 * Use MDTS to ensure our default max_xfer_size doesn't exceed what the
390 * controller supports.
391 */
392 if (nvme_ctrlr->cdata.mdts > 0) {
393 nvme_ctrlr->max_xfer_size =
394 MIN(nvme_ctrlr->max_xfer_size,
395 1 << (nvme_ctrlr->cdata.mdts + NVME_MPS_SHIFT +
396 NVME_CAP_HI_MPSMIN(nvme_ctrlr->cap_hi)));
397 }
398
399 return 0;
400 }
401
nvme_controller_setup_namespaces(struct nvme_controller * nvme_ctrlr)402 static void nvme_controller_setup_namespaces(struct nvme_controller *nvme_ctrlr)
403 {
404 uint32_t i;
405
406 for (i = 0;
407 i < MIN(nvme_ctrlr->cdata.nn, CONFIG_NVME_MAX_NAMESPACES); i++) {
408 struct nvme_namespace *ns = &nvme_ctrlr->ns[i];
409
410 if (nvme_namespace_construct(ns, i+1, nvme_ctrlr) != 0) {
411 break;
412 }
413
414 LOG_DBG("Namespace id %u setup and running", i);
415 }
416 }
417
nvme_controller_init(const struct device * dev)418 static int nvme_controller_init(const struct device *dev)
419 {
420 struct nvme_controller *nvme_ctrlr = dev->data;
421 int ret;
422
423 k_mutex_init(&nvme_ctrlr->lock);
424
425 nvme_cmd_init();
426
427 nvme_ctrlr->dev = dev;
428
429 ret = nvme_controller_pcie_configure(dev);
430 if (ret != 0) {
431 return ret;
432 }
433
434 nvme_controller_gather_info(dev);
435
436 ret = nvme_controller_disable(dev);
437 if (ret != 0) {
438 LOG_ERR("Controller cannot be disabled");
439 return ret;
440 }
441
442 ret = nvme_controller_setup_admin_queues(dev);
443 if (ret != 0) {
444 return ret;
445 }
446
447 ret = nvme_controller_enable(dev);
448 if (ret != 0) {
449 LOG_ERR("Controller cannot be enabled");
450 return ret;
451 }
452
453 ret = nvme_controller_setup_io_queues(dev);
454 if (ret != 0) {
455 return ret;
456 }
457
458 ret = nvme_controller_identify(nvme_ctrlr);
459 if (ret != 0) {
460 return ret;
461 }
462
463 nvme_controller_setup_namespaces(nvme_ctrlr);
464
465 return 0;
466 }
467
468 #define NVME_CONTROLLER_DEVICE_INIT(n) \
469 DEVICE_PCIE_INST_DECLARE(n); \
470 NVME_ADMINQ_ALLOCATE(n, CONFIG_NVME_ADMIN_ENTRIES); \
471 NVME_IOQ_ALLOCATE(n, CONFIG_NVME_IO_ENTRIES); \
472 \
473 static struct nvme_controller nvme_ctrlr_data_##n = { \
474 .id = n, \
475 .num_io_queues = CONFIG_NVME_IO_QUEUES, \
476 .adminq = &admin_##n, \
477 .ioq = &io_##n, \
478 }; \
479 \
480 static struct nvme_controller_config nvme_ctrlr_cfg_##n = \
481 { \
482 DEVICE_PCIE_INST_INIT(n, pcie), \
483 }; \
484 \
485 DEVICE_DT_INST_DEFINE(n, &nvme_controller_init, \
486 NULL, &nvme_ctrlr_data_##n, \
487 &nvme_ctrlr_cfg_##n, POST_KERNEL, \
488 CONFIG_KERNEL_INIT_PRIORITY_DEVICE, NULL);
489
490 DT_INST_FOREACH_STATUS_OKAY(NVME_CONTROLLER_DEVICE_INIT)
491