1 /*
2  * Copyright (c) 2022 Intel Corporation
3  * SPDX-License-Identifier: Apache-2.0
4  *
5  * Derived from FreeBSD original driver made by Jim Harris
6  * with contributions from Alexander Motin, Wojciech Macek, and Warner Losh
7  */
8 
9 #define DT_DRV_COMPAT nvme_controller
10 
11 #include <zephyr/logging/log.h>
12 LOG_MODULE_REGISTER(nvme, CONFIG_NVME_LOG_LEVEL);
13 
14 #include <errno.h>
15 
16 #include <zephyr/kernel.h>
17 
18 #include <soc.h>
19 #include <zephyr/device.h>
20 #include <zephyr/init.h>
21 
22 #include "nvme_helpers.h"
23 #include "nvme.h"
24 
nvme_controller_wait_for_ready(const struct device * dev,const int desired_val)25 static int nvme_controller_wait_for_ready(const struct device *dev,
26 					  const int desired_val)
27 {
28 	struct nvme_controller *nvme_ctrlr = dev->data;
29 	mm_reg_t regs = DEVICE_MMIO_GET(dev);
30 	int timeout = sys_clock_tick_get_32() +
31 		k_ms_to_ticks_ceil32(nvme_ctrlr->ready_timeout_in_ms);
32 	uint32_t delta_t = USEC_PER_MSEC;
33 	uint32_t csts;
34 
35 	while (1) {
36 		csts = nvme_mmio_read_4(regs, csts);
37 		if (csts == NVME_GONE) {
38 			LOG_ERR("Controller is unreachable");
39 			return -EIO;
40 		}
41 
42 		if (((csts >> NVME_CSTS_REG_RDY_SHIFT) &
43 		     NVME_CSTS_REG_RDY_MASK) == desired_val) {
44 			break;
45 		}
46 
47 		if ((int64_t)timeout - sys_clock_tick_get_32() < 0) {
48 			LOG_ERR("Timeout error");
49 			return -EIO;
50 		}
51 
52 		k_busy_wait(delta_t);
53 		delta_t = MIN((MSEC_PER_SEC * USEC_PER_MSEC), delta_t * 3 / 2);
54 	}
55 
56 	return 0;
57 }
58 
nvme_controller_disable(const struct device * dev)59 static int nvme_controller_disable(const struct device *dev)
60 {
61 	mm_reg_t regs = DEVICE_MMIO_GET(dev);
62 	uint32_t cc, csts;
63 	uint8_t enabled, ready;
64 	int err;
65 
66 	cc = nvme_mmio_read_4(regs, cc);
67 	csts = nvme_mmio_read_4(regs, csts);
68 
69 	ready = (csts >> NVME_CSTS_REG_RDY_SHIFT) & NVME_CSTS_REG_RDY_MASK;
70 
71 	enabled = (cc >> NVME_CC_REG_EN_SHIFT) & NVME_CC_REG_EN_MASK;
72 	if (enabled == 0) {
73 		/* Wait for RDY == 0 or timeout & fail */
74 		if (ready == 0) {
75 			return 0;
76 		}
77 
78 		return nvme_controller_wait_for_ready(dev, 0);
79 	}
80 
81 	if (ready == 0) {
82 		/* EN == 1, wait for  RDY == 1 or timeout & fail */
83 		err = nvme_controller_wait_for_ready(dev, 1);
84 		if (err != 0) {
85 			return err;
86 		}
87 	}
88 
89 	cc &= ~NVME_CC_REG_EN_MASK;
90 	nvme_mmio_write_4(regs, cc, cc);
91 
92 	return nvme_controller_wait_for_ready(dev, 0);
93 }
94 
nvme_controller_enable(const struct device * dev)95 static int nvme_controller_enable(const struct device *dev)
96 {
97 	struct nvme_controller *nvme_ctrlr = dev->data;
98 	mm_reg_t regs = DEVICE_MMIO_GET(dev);
99 	uint8_t enabled, ready;
100 	uint32_t cc, csts;
101 	int err;
102 
103 	cc = nvme_mmio_read_4(regs, cc);
104 	csts = nvme_mmio_read_4(regs, csts);
105 
106 	ready = (csts >> NVME_CSTS_REG_RDY_SHIFT) & NVME_CSTS_REG_RDY_MASK;
107 
108 	enabled = (cc >> NVME_CC_REG_EN_SHIFT) & NVME_CC_REG_EN_MASK;
109 	if (enabled == 1) {
110 		if (ready == 1) {
111 			LOG_DBG("Already enabled");
112 			return 0;
113 		}
114 
115 		return nvme_controller_wait_for_ready(dev, 1);
116 	}
117 
118 	/* EN == 0 already wait for RDY == 0 or timeout & fail */
119 	err = nvme_controller_wait_for_ready(dev, 0);
120 	if (err != 0) {
121 		return err;
122 	}
123 
124 	/* Initialization values for CC */
125 	cc = 0;
126 	cc |= 1 << NVME_CC_REG_EN_SHIFT;
127 	cc |= 0 << NVME_CC_REG_CSS_SHIFT;
128 	cc |= 0 << NVME_CC_REG_AMS_SHIFT;
129 	cc |= 0 << NVME_CC_REG_SHN_SHIFT;
130 	cc |= 6 << NVME_CC_REG_IOSQES_SHIFT; /* SQ entry size == 64 == 2^6 */
131 	cc |= 4 << NVME_CC_REG_IOCQES_SHIFT; /* CQ entry size == 16 == 2^4 */
132 	cc |= nvme_ctrlr->mps << NVME_CC_REG_MPS_SHIFT;
133 
134 	nvme_mmio_write_4(regs, cc, cc);
135 
136 	return nvme_controller_wait_for_ready(dev, 1);
137 }
138 
nvme_controller_setup_admin_queues(const struct device * dev)139 static int nvme_controller_setup_admin_queues(const struct device *dev)
140 {
141 	struct nvme_controller *nvme_ctrlr = dev->data;
142 	mm_reg_t regs = DEVICE_MMIO_GET(dev);
143 	uint32_t aqa, qsize;
144 
145 	nvme_cmd_qpair_reset(nvme_ctrlr->adminq);
146 
147 	/* Admin queue is always id 0 */
148 	if (nvme_cmd_qpair_setup(nvme_ctrlr->adminq, nvme_ctrlr, 0) != 0) {
149 		LOG_ERR("Admin cmd qpair setup failed");
150 		return -EIO;
151 	}
152 
153 	nvme_mmio_write_8(regs, asq, nvme_ctrlr->adminq->cmd_bus_addr);
154 	nvme_mmio_write_8(regs, acq, nvme_ctrlr->adminq->cpl_bus_addr);
155 
156 	/* acqs and asqs are 0-based. */
157 	qsize = CONFIG_NVME_ADMIN_ENTRIES - 1;
158 	aqa = 0;
159 	aqa = (qsize & NVME_AQA_REG_ACQS_MASK) << NVME_AQA_REG_ACQS_SHIFT;
160 	aqa |= (qsize & NVME_AQA_REG_ASQS_MASK) << NVME_AQA_REG_ASQS_SHIFT;
161 
162 	nvme_mmio_write_4(regs, aqa, aqa);
163 
164 	return 0;
165 }
166 
nvme_controller_setup_io_queues(const struct device * dev)167 static int nvme_controller_setup_io_queues(const struct device *dev)
168 {
169 	struct nvme_controller *nvme_ctrlr = dev->data;
170 	struct nvme_completion_poll_status status;
171 	struct nvme_cmd_qpair *io_qpair;
172 	int cq_allocated, sq_allocated;
173 	int ret, idx;
174 
175 	nvme_cpl_status_poll_init(&status);
176 
177 	ret =  nvme_ctrlr_cmd_set_num_queues(nvme_ctrlr,
178 					     nvme_ctrlr->num_io_queues,
179 					     nvme_completion_poll_cb, &status);
180 	if (ret != 0) {
181 		return ret;
182 	}
183 
184 	nvme_completion_poll(&status);
185 	if (nvme_cpl_status_is_error(&status)) {
186 		LOG_ERR("Could not set IO num queues to %u",
187 			nvme_ctrlr->num_io_queues);
188 		nvme_completion_print(&status.cpl);
189 		return -EIO;
190 	}
191 
192 	/*
193 	 * Data in cdw0 is 0-based.
194 	 * Lower 16-bits indicate number of submission queues allocated.
195 	 * Upper 16-bits indicate number of completion queues allocated.
196 	 */
197 	sq_allocated = (status.cpl.cdw0 & 0xFFFF) + 1;
198 	cq_allocated = (status.cpl.cdw0 >> 16) + 1;
199 
200 	/*
201 	 * Controller may allocate more queues than we requested,
202 	 * so use the minimum of the number requested and what was
203 	 * actually allocated.
204 	 */
205 	nvme_ctrlr->num_io_queues = MIN(nvme_ctrlr->num_io_queues,
206 					sq_allocated);
207 	nvme_ctrlr->num_io_queues = MIN(nvme_ctrlr->num_io_queues,
208 					cq_allocated);
209 
210 	for (idx = 0; idx < nvme_ctrlr->num_io_queues; idx++) {
211 		io_qpair = &nvme_ctrlr->ioq[idx];
212 		if (nvme_cmd_qpair_setup(io_qpair, nvme_ctrlr, idx+1) != 0) {
213 			LOG_ERR("IO cmd qpair %u setup failed", idx+1);
214 			return -EIO;
215 		}
216 
217 		nvme_cmd_qpair_reset(io_qpair);
218 
219 		nvme_cpl_status_poll_init(&status);
220 
221 		ret = nvme_ctrlr_cmd_create_io_cq(nvme_ctrlr, io_qpair,
222 						  nvme_completion_poll_cb,
223 						  &status);
224 		if (ret != 0) {
225 			return ret;
226 		}
227 
228 		nvme_completion_poll(&status);
229 		if (nvme_cpl_status_is_error(&status)) {
230 			LOG_ERR("IO CQ creation failed");
231 			nvme_completion_print(&status.cpl);
232 			return -EIO;
233 		}
234 
235 		nvme_cpl_status_poll_init(&status);
236 
237 		ret = nvme_ctrlr_cmd_create_io_sq(nvme_ctrlr, io_qpair,
238 						  nvme_completion_poll_cb,
239 						  &status);
240 		if (ret != 0) {
241 			return ret;
242 		}
243 
244 		nvme_completion_poll(&status);
245 		if (nvme_cpl_status_is_error(&status)) {
246 			LOG_ERR("IO CQ creation failed");
247 			nvme_completion_print(&status.cpl);
248 			return -EIO;
249 		}
250 	}
251 
252 	return 0;
253 }
254 
nvme_controller_gather_info(const struct device * dev)255 static void nvme_controller_gather_info(const struct device *dev)
256 {
257 	struct nvme_controller *nvme_ctrlr = dev->data;
258 	mm_reg_t regs = DEVICE_MMIO_GET(dev);
259 
260 	uint32_t cap_lo, cap_hi, to, vs, pmrcap;
261 
262 	nvme_ctrlr->cap_lo = cap_lo = nvme_mmio_read_4(regs, cap_lo);
263 	LOG_DBG("CapLo: 0x%08x: MQES %u%s%s%s%s, TO %u",
264 		cap_lo, NVME_CAP_LO_MQES(cap_lo),
265 		NVME_CAP_LO_CQR(cap_lo) ? ", CQR" : "",
266 		NVME_CAP_LO_AMS(cap_lo) ? ", AMS" : "",
267 		(NVME_CAP_LO_AMS(cap_lo) & 0x1) ? " WRRwUPC" : "",
268 		(NVME_CAP_LO_AMS(cap_lo) & 0x2) ? " VS" : "",
269 		NVME_CAP_LO_TO(cap_lo));
270 
271 
272 	nvme_ctrlr->cap_hi = cap_hi = nvme_mmio_read_4(regs, cap_hi);
273 	LOG_DBG("CapHi: 0x%08x: DSTRD %u%s, CSS %x%s, "
274 		"MPSMIN %u, MPSMAX %u%s%s", cap_hi,
275 		NVME_CAP_HI_DSTRD(cap_hi),
276 		NVME_CAP_HI_NSSRS(cap_hi) ? ", NSSRS" : "",
277 		NVME_CAP_HI_CSS(cap_hi),
278 		NVME_CAP_HI_BPS(cap_hi) ? ", BPS" : "",
279 		NVME_CAP_HI_MPSMIN(cap_hi),
280 		NVME_CAP_HI_MPSMAX(cap_hi),
281 		NVME_CAP_HI_PMRS(cap_hi) ? ", PMRS" : "",
282 		NVME_CAP_HI_CMBS(cap_hi) ? ", CMBS" : "");
283 
284 	vs = nvme_mmio_read_4(regs, vs);
285 	LOG_DBG("Version: 0x%08x: %d.%d", vs,
286 		NVME_MAJOR(vs), NVME_MINOR(vs));
287 
288 	if (NVME_CAP_HI_PMRS(cap_hi)) {
289 		pmrcap = nvme_mmio_read_4(regs, pmrcap);
290 		LOG_DBG("PMRCap: 0x%08x: BIR %u%s%s, PMRTU %u, "
291 			"PMRWBM %x, PMRTO %u%s", pmrcap,
292 			NVME_PMRCAP_BIR(pmrcap),
293 			NVME_PMRCAP_RDS(pmrcap) ? ", RDS" : "",
294 			NVME_PMRCAP_WDS(pmrcap) ? ", WDS" : "",
295 			NVME_PMRCAP_PMRTU(pmrcap),
296 			NVME_PMRCAP_PMRWBM(pmrcap),
297 			NVME_PMRCAP_PMRTO(pmrcap),
298 			NVME_PMRCAP_CMSS(pmrcap) ? ", CMSS" : "");
299 	}
300 
301 	nvme_ctrlr->dstrd = NVME_CAP_HI_DSTRD(cap_hi) + 2;
302 
303 	nvme_ctrlr->mps = NVME_CAP_HI_MPSMIN(cap_hi);
304 	nvme_ctrlr->page_size = 1 << (NVME_MPS_SHIFT + nvme_ctrlr->mps);
305 
306 	LOG_DBG("MPS: %u - Page Size: %u bytes",
307 		nvme_ctrlr->mps, nvme_ctrlr->page_size);
308 
309 	/* Get ready timeout value from controller, in units of 500ms. */
310 	to = NVME_CAP_LO_TO(cap_lo) + 1;
311 	nvme_ctrlr->ready_timeout_in_ms = to * 500;
312 
313 	/* Cap transfers by the maximum addressable by
314 	 * page-sized PRP (4KB pages -> 2MB).
315 	 * ToDo: it could be less -> take the minimum.
316 	 */
317 	nvme_ctrlr->max_xfer_size = nvme_ctrlr->page_size /
318 		8 * nvme_ctrlr->page_size;
319 
320 	LOG_DBG("Max transfer size: %u bytes", nvme_ctrlr->max_xfer_size);
321 }
322 
nvme_controller_pcie_configure(const struct device * dev)323 static int nvme_controller_pcie_configure(const struct device *dev)
324 {
325 	const struct nvme_controller_config *nvme_ctrlr_cfg = dev->config;
326 	struct nvme_controller *nvme_ctrlr = dev->data;
327 	struct pcie_bar mbar_regs;
328 	uint8_t n_vectors;
329 
330 	if (nvme_ctrlr_cfg->pcie->bdf == PCIE_BDF_NONE) {
331 		LOG_ERR("Controller not found");
332 		return -ENODEV;
333 	}
334 
335 	LOG_DBG("Configuring NVME controller ID %x:%x at %d:%x.%d",
336 		PCIE_ID_TO_VEND(nvme_ctrlr_cfg->pcie->id),
337 		PCIE_ID_TO_DEV(nvme_ctrlr_cfg->pcie->id),
338 		PCIE_BDF_TO_BUS(nvme_ctrlr_cfg->pcie->bdf),
339 		PCIE_BDF_TO_DEV(nvme_ctrlr_cfg->pcie->bdf),
340 		PCIE_BDF_TO_FUNC(nvme_ctrlr_cfg->pcie->bdf));
341 
342 	if (!pcie_get_mbar(nvme_ctrlr_cfg->pcie->bdf,
343 			   NVME_PCIE_BAR_IDX, &mbar_regs)) {
344 		LOG_ERR("Could not get NVME registers");
345 		return -EIO;
346 	}
347 
348 	device_map(DEVICE_MMIO_RAM_PTR(dev), mbar_regs.phys_addr,
349 		   mbar_regs.size, K_MEM_CACHE_NONE);
350 
351 	/* Allocating vectors */
352 	n_vectors = pcie_msi_vectors_allocate(nvme_ctrlr_cfg->pcie->bdf,
353 					      CONFIG_NVME_INT_PRIORITY,
354 					      nvme_ctrlr->vectors,
355 					      NVME_PCIE_MSIX_VECTORS);
356 	if (n_vectors == 0) {
357 		LOG_ERR("Could not allocate %u MSI-X vectors",
358 			NVME_PCIE_MSIX_VECTORS);
359 		return -EIO;
360 	}
361 
362 	/* Enabling MSI-X and the vectors */
363 	if (!pcie_msi_enable(nvme_ctrlr_cfg->pcie->bdf,
364 			     nvme_ctrlr->vectors, n_vectors, 0)) {
365 		LOG_ERR("Could not enable MSI-X");
366 		return -EIO;
367 	}
368 
369 	return 0;
370 }
371 
nvme_controller_identify(struct nvme_controller * nvme_ctrlr)372 static int nvme_controller_identify(struct nvme_controller *nvme_ctrlr)
373 {
374 	struct nvme_completion_poll_status status =
375 		NVME_CPL_STATUS_POLL_INIT(status);
376 
377 	nvme_ctrlr_cmd_identify_controller(nvme_ctrlr,
378 					   nvme_completion_poll_cb, &status);
379 	nvme_completion_poll(&status);
380 	if (nvme_cpl_status_is_error(&status)) {
381 		LOG_ERR("Could not identify the controller");
382 		nvme_completion_print(&status.cpl);
383 		return -EIO;
384 	}
385 
386 	nvme_controller_data_swapbytes(&nvme_ctrlr->cdata);
387 
388 	/*
389 	 * Use MDTS to ensure our default max_xfer_size doesn't exceed what the
390 	 * controller supports.
391 	 */
392 	if (nvme_ctrlr->cdata.mdts > 0) {
393 		nvme_ctrlr->max_xfer_size =
394 			MIN(nvme_ctrlr->max_xfer_size,
395 			    1 << (nvme_ctrlr->cdata.mdts + NVME_MPS_SHIFT +
396 				  NVME_CAP_HI_MPSMIN(nvme_ctrlr->cap_hi)));
397 	}
398 
399 	return 0;
400 }
401 
nvme_controller_setup_namespaces(struct nvme_controller * nvme_ctrlr)402 static void nvme_controller_setup_namespaces(struct nvme_controller *nvme_ctrlr)
403 {
404 	uint32_t i;
405 
406 	for (i = 0;
407 	     i < MIN(nvme_ctrlr->cdata.nn, CONFIG_NVME_MAX_NAMESPACES); i++) {
408 		struct nvme_namespace *ns = &nvme_ctrlr->ns[i];
409 
410 		if (nvme_namespace_construct(ns, i+1, nvme_ctrlr) != 0) {
411 			break;
412 		}
413 
414 		LOG_DBG("Namespace id %u setup and running", i);
415 	}
416 }
417 
nvme_controller_init(const struct device * dev)418 static int nvme_controller_init(const struct device *dev)
419 {
420 	struct nvme_controller *nvme_ctrlr = dev->data;
421 	int ret;
422 
423 	k_mutex_init(&nvme_ctrlr->lock);
424 
425 	nvme_cmd_init();
426 
427 	nvme_ctrlr->dev = dev;
428 
429 	ret = nvme_controller_pcie_configure(dev);
430 	if (ret != 0) {
431 		return ret;
432 	}
433 
434 	nvme_controller_gather_info(dev);
435 
436 	ret = nvme_controller_disable(dev);
437 	if (ret != 0) {
438 		LOG_ERR("Controller cannot be disabled");
439 		return ret;
440 	}
441 
442 	ret = nvme_controller_setup_admin_queues(dev);
443 	if (ret != 0) {
444 		return ret;
445 	}
446 
447 	ret = nvme_controller_enable(dev);
448 	if (ret != 0) {
449 		LOG_ERR("Controller cannot be enabled");
450 		return ret;
451 	}
452 
453 	ret = nvme_controller_setup_io_queues(dev);
454 	if (ret != 0) {
455 		return ret;
456 	}
457 
458 	ret = nvme_controller_identify(nvme_ctrlr);
459 	if (ret != 0) {
460 		return ret;
461 	}
462 
463 	nvme_controller_setup_namespaces(nvme_ctrlr);
464 
465 	return 0;
466 }
467 
468 #define NVME_CONTROLLER_DEVICE_INIT(n)					\
469 	DEVICE_PCIE_INST_DECLARE(n);					\
470 	NVME_ADMINQ_ALLOCATE(n, CONFIG_NVME_ADMIN_ENTRIES);		\
471 	NVME_IOQ_ALLOCATE(n, CONFIG_NVME_IO_ENTRIES);			\
472 									\
473 	static struct nvme_controller nvme_ctrlr_data_##n = {		\
474 		.id = n,						\
475 		.num_io_queues = CONFIG_NVME_IO_QUEUES,			\
476 		.adminq = &admin_##n,					\
477 		.ioq = &io_##n,						\
478 	};								\
479 									\
480 	static struct nvme_controller_config nvme_ctrlr_cfg_##n =	\
481 	{								\
482 		DEVICE_PCIE_INST_INIT(n, pcie),				\
483 	};								\
484 									\
485 	DEVICE_DT_INST_DEFINE(n, &nvme_controller_init,			\
486 			      NULL, &nvme_ctrlr_data_##n,		\
487 			      &nvme_ctrlr_cfg_##n, POST_KERNEL,		\
488 			      CONFIG_KERNEL_INIT_PRIORITY_DEVICE, NULL);
489 
490 DT_INST_FOREACH_STATUS_OKAY(NVME_CONTROLLER_DEVICE_INIT)
491