1 /*
2  * Copyright (c) 2022 Intel Corporation
3  * SPDX-License-Identifier: Apache-2.0
4  *
5  * Derived from FreeBSD original driver made by Jim Harris
6  * with contributions from Alexander Motin, Wojciech Macek, and Warner Losh
7  */
8 
9 #include <zephyr/logging/log.h>
10 LOG_MODULE_DECLARE(nvme, CONFIG_NVME_LOG_LEVEL);
11 
12 #include <zephyr/kernel.h>
13 #include <zephyr/cache.h>
14 #include <zephyr/sys/byteorder.h>
15 
16 #include <string.h>
17 
18 #include "nvme.h"
19 #include "nvme_helpers.h"
20 
21 static struct nvme_prp_list prp_list_pool[CONFIG_NVME_PRP_LIST_AMOUNT];
22 static sys_dlist_t free_prp_list;
23 
24 static struct nvme_request request_pool[NVME_REQUEST_AMOUNT];
25 static sys_dlist_t free_request;
26 static sys_dlist_t pending_request;
27 
28 static void request_timeout(struct k_work *work);
29 
30 static K_WORK_DELAYABLE_DEFINE(request_timer, request_timeout);
31 
32 #ifdef CONFIG_NVME_LOG_LEVEL_DBG
33 struct nvme_status_string {
34 	uint16_t   sc;
35 	const char *str;
36 };
37 
38 static struct nvme_status_string generic_status[] = {
39 	{ NVME_SC_SUCCESS, "SUCCESS" },
40 	{ NVME_SC_INVALID_OPCODE, "INVALID OPCODE" },
41 	{ NVME_SC_INVALID_FIELD, "INVALID_FIELD" },
42 	{ NVME_SC_COMMAND_ID_CONFLICT, "COMMAND ID CONFLICT" },
43 	{ NVME_SC_DATA_TRANSFER_ERROR, "DATA TRANSFER ERROR" },
44 	{ NVME_SC_ABORTED_POWER_LOSS, "ABORTED - POWER LOSS" },
45 	{ NVME_SC_INTERNAL_DEVICE_ERROR, "INTERNAL DEVICE ERROR" },
46 	{ NVME_SC_ABORTED_BY_REQUEST, "ABORTED - BY REQUEST" },
47 	{ NVME_SC_ABORTED_SQ_DELETION, "ABORTED - SQ DELETION" },
48 	{ NVME_SC_ABORTED_FAILED_FUSED, "ABORTED - FAILED FUSED" },
49 	{ NVME_SC_ABORTED_MISSING_FUSED, "ABORTED - MISSING FUSED" },
50 	{ NVME_SC_INVALID_NAMESPACE_OR_FORMAT, "INVALID NAMESPACE OR FORMAT" },
51 	{ NVME_SC_COMMAND_SEQUENCE_ERROR, "COMMAND SEQUENCE ERROR" },
52 	{ NVME_SC_INVALID_SGL_SEGMENT_DESCR, "INVALID SGL SEGMENT DESCRIPTOR" },
53 	{ NVME_SC_INVALID_NUMBER_OF_SGL_DESCR, "INVALID NUMBER OF SGL DESCRIPTORS" },
54 	{ NVME_SC_DATA_SGL_LENGTH_INVALID, "DATA SGL LENGTH INVALID" },
55 	{ NVME_SC_METADATA_SGL_LENGTH_INVALID, "METADATA SGL LENGTH INVALID" },
56 	{ NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID, "SGL DESCRIPTOR TYPE INVALID" },
57 	{ NVME_SC_INVALID_USE_OF_CMB, "INVALID USE OF CONTROLLER MEMORY BUFFER" },
58 	{ NVME_SC_PRP_OFFSET_INVALID, "PRP OFFSET INVALID" },
59 	{ NVME_SC_ATOMIC_WRITE_UNIT_EXCEEDED, "ATOMIC WRITE UNIT EXCEEDED" },
60 	{ NVME_SC_OPERATION_DENIED, "OPERATION DENIED" },
61 	{ NVME_SC_SGL_OFFSET_INVALID, "SGL OFFSET INVALID" },
62 	{ NVME_SC_HOST_ID_INCONSISTENT_FORMAT, "HOST IDENTIFIER INCONSISTENT FORMAT" },
63 	{ NVME_SC_KEEP_ALIVE_TIMEOUT_EXPIRED, "KEEP ALIVE TIMEOUT EXPIRED" },
64 	{ NVME_SC_KEEP_ALIVE_TIMEOUT_INVALID, "KEEP ALIVE TIMEOUT INVALID" },
65 	{ NVME_SC_ABORTED_DUE_TO_PREEMPT, "COMMAND ABORTED DUE TO PREEMPT AND ABORT" },
66 	{ NVME_SC_SANITIZE_FAILED, "SANITIZE FAILED" },
67 	{ NVME_SC_SANITIZE_IN_PROGRESS, "SANITIZE IN PROGRESS" },
68 	{ NVME_SC_SGL_DATA_BLOCK_GRAN_INVALID, "SGL_DATA_BLOCK_GRANULARITY_INVALID" },
69 	{ NVME_SC_NOT_SUPPORTED_IN_CMB, "COMMAND NOT SUPPORTED FOR QUEUE IN CMB" },
70 	{ NVME_SC_NAMESPACE_IS_WRITE_PROTECTED, "NAMESPACE IS WRITE PROTECTED" },
71 	{ NVME_SC_COMMAND_INTERRUPTED, "COMMAND INTERRUPTED" },
72 	{ NVME_SC_TRANSIENT_TRANSPORT_ERROR, "TRANSIENT TRANSPORT ERROR" },
73 	{ NVME_SC_LBA_OUT_OF_RANGE, "LBA OUT OF RANGE" },
74 	{ NVME_SC_CAPACITY_EXCEEDED, "CAPACITY EXCEEDED" },
75 	{ NVME_SC_NAMESPACE_NOT_READY, "NAMESPACE NOT READY" },
76 	{ NVME_SC_RESERVATION_CONFLICT, "RESERVATION CONFLICT" },
77 	{ NVME_SC_FORMAT_IN_PROGRESS, "FORMAT IN PROGRESS" },
78 	{ 0xFFFF, "GENERIC" }
79 };
80 
81 static struct nvme_status_string command_specific_status[] = {
82 	{ NVME_SC_COMPLETION_QUEUE_INVALID, "INVALID COMPLETION QUEUE" },
83 	{ NVME_SC_INVALID_QUEUE_IDENTIFIER, "INVALID QUEUE IDENTIFIER" },
84 	{ NVME_SC_MAXIMUM_QUEUE_SIZE_EXCEEDED, "MAX QUEUE SIZE EXCEEDED" },
85 	{ NVME_SC_ABORT_COMMAND_LIMIT_EXCEEDED, "ABORT CMD LIMIT EXCEEDED" },
86 	{ NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED, "ASYNC LIMIT EXCEEDED" },
87 	{ NVME_SC_INVALID_FIRMWARE_SLOT, "INVALID FIRMWARE SLOT" },
88 	{ NVME_SC_INVALID_FIRMWARE_IMAGE, "INVALID FIRMWARE IMAGE" },
89 	{ NVME_SC_INVALID_INTERRUPT_VECTOR, "INVALID INTERRUPT VECTOR" },
90 	{ NVME_SC_INVALID_LOG_PAGE, "INVALID LOG PAGE" },
91 	{ NVME_SC_INVALID_FORMAT, "INVALID FORMAT" },
92 	{ NVME_SC_FIRMWARE_REQUIRES_RESET, "FIRMWARE REQUIRES RESET" },
93 	{ NVME_SC_INVALID_QUEUE_DELETION, "INVALID QUEUE DELETION" },
94 	{ NVME_SC_FEATURE_NOT_SAVEABLE, "FEATURE IDENTIFIER NOT SAVEABLE" },
95 	{ NVME_SC_FEATURE_NOT_CHANGEABLE, "FEATURE NOT CHANGEABLE" },
96 	{ NVME_SC_FEATURE_NOT_NS_SPECIFIC, "FEATURE NOT NAMESPACE SPECIFIC" },
97 	{ NVME_SC_FW_ACT_REQUIRES_NVMS_RESET, "FIRMWARE ACTIVATION REQUIRES NVM SUBSYSTEM RESET" },
98 	{ NVME_SC_FW_ACT_REQUIRES_RESET, "FIRMWARE ACTIVATION REQUIRES RESET" },
99 	{ NVME_SC_FW_ACT_REQUIRES_TIME, "FIRMWARE ACTIVATION REQUIRES MAXIMUM TIME VIOLATION" },
100 	{ NVME_SC_FW_ACT_PROHIBITED, "FIRMWARE ACTIVATION PROHIBITED" },
101 	{ NVME_SC_OVERLAPPING_RANGE, "OVERLAPPING RANGE" },
102 	{ NVME_SC_NS_INSUFFICIENT_CAPACITY, "NAMESPACE INSUFFICIENT CAPACITY" },
103 	{ NVME_SC_NS_ID_UNAVAILABLE, "NAMESPACE IDENTIFIER UNAVAILABLE" },
104 	{ NVME_SC_NS_ALREADY_ATTACHED, "NAMESPACE ALREADY ATTACHED" },
105 	{ NVME_SC_NS_IS_PRIVATE, "NAMESPACE IS PRIVATE" },
106 	{ NVME_SC_NS_NOT_ATTACHED, "NS NOT ATTACHED" },
107 	{ NVME_SC_THIN_PROV_NOT_SUPPORTED, "THIN PROVISIONING NOT SUPPORTED" },
108 	{ NVME_SC_CTRLR_LIST_INVALID, "CONTROLLER LIST INVALID" },
109 	{ NVME_SC_SELF_TEST_IN_PROGRESS, "DEVICE SELF-TEST IN PROGRESS" },
110 	{ NVME_SC_BOOT_PART_WRITE_PROHIB, "BOOT PARTITION WRITE PROHIBITED" },
111 	{ NVME_SC_INVALID_CTRLR_ID, "INVALID CONTROLLER IDENTIFIER" },
112 	{ NVME_SC_INVALID_SEC_CTRLR_STATE, "INVALID SECONDARY CONTROLLER STATE" },
113 	{ NVME_SC_INVALID_NUM_OF_CTRLR_RESRC, "INVALID NUMBER OF CONTROLLER RESOURCES" },
114 	{ NVME_SC_INVALID_RESOURCE_ID, "INVALID RESOURCE IDENTIFIER" },
115 	{ NVME_SC_SANITIZE_PROHIBITED_WPMRE,
116 	  "SANITIZE PROHIBITED WRITE PERSISTENT MEMORY REGION ENABLED" },
117 	{ NVME_SC_ANA_GROUP_ID_INVALID, "ANA GROUP IDENTIFIED INVALID" },
118 	{ NVME_SC_ANA_ATTACH_FAILED, "ANA ATTACH FAILED" },
119 	{ NVME_SC_CONFLICTING_ATTRIBUTES, "CONFLICTING ATTRIBUTES" },
120 	{ NVME_SC_INVALID_PROTECTION_INFO, "INVALID PROTECTION INFO" },
121 	{ NVME_SC_ATTEMPTED_WRITE_TO_RO_PAGE, "WRITE TO RO PAGE" },
122 	{ 0xFFFF, "COMMAND SPECIFIC" }
123 };
124 
125 static struct nvme_status_string media_error_status[] = {
126 	{ NVME_SC_WRITE_FAULTS, "WRITE FAULTS" },
127 	{ NVME_SC_UNRECOVERED_READ_ERROR, "UNRECOVERED READ ERROR" },
128 	{ NVME_SC_GUARD_CHECK_ERROR, "GUARD CHECK ERROR" },
129 	{ NVME_SC_APPLICATION_TAG_CHECK_ERROR, "APPLICATION TAG CHECK ERROR" },
130 	{ NVME_SC_REFERENCE_TAG_CHECK_ERROR, "REFERENCE TAG CHECK ERROR" },
131 	{ NVME_SC_COMPARE_FAILURE, "COMPARE FAILURE" },
132 	{ NVME_SC_ACCESS_DENIED, "ACCESS DENIED" },
133 	{ NVME_SC_DEALLOCATED_OR_UNWRITTEN, "DEALLOCATED OR UNWRITTEN LOGICAL BLOCK" },
134 	{ 0xFFFF, "MEDIA ERROR" }
135 };
136 
137 static struct nvme_status_string path_related_status[] = {
138 	{ NVME_SC_INTERNAL_PATH_ERROR, "INTERNAL PATH ERROR" },
139 	{ NVME_SC_ASYMMETRIC_ACCESS_PERSISTENT_LOSS, "ASYMMETRIC ACCESS PERSISTENT LOSS" },
140 	{ NVME_SC_ASYMMETRIC_ACCESS_INACCESSIBLE, "ASYMMETRIC ACCESS INACCESSIBLE" },
141 	{ NVME_SC_ASYMMETRIC_ACCESS_TRANSITION, "ASYMMETRIC ACCESS TRANSITION" },
142 	{ NVME_SC_CONTROLLER_PATHING_ERROR, "CONTROLLER PATHING ERROR" },
143 	{ NVME_SC_HOST_PATHING_ERROR, "HOST PATHING ERROR" },
144 	{ NVME_SC_COMMAND_ABORTED_BY_HOST, "COMMAND ABORTED BY HOST" },
145 	{ 0xFFFF, "PATH RELATED" },
146 };
147 
get_status_string(uint16_t sct,uint16_t sc)148 static const char *get_status_string(uint16_t sct, uint16_t sc)
149 {
150 	struct nvme_status_string *entry;
151 
152 	switch (sct) {
153 	case NVME_SCT_GENERIC:
154 		entry = generic_status;
155 		break;
156 	case NVME_SCT_COMMAND_SPECIFIC:
157 		entry = command_specific_status;
158 		break;
159 	case NVME_SCT_MEDIA_ERROR:
160 		entry = media_error_status;
161 		break;
162 	case NVME_SCT_PATH_RELATED:
163 		entry = path_related_status;
164 		break;
165 	case NVME_SCT_VENDOR_SPECIFIC:
166 		return "VENDOR SPECIFIC";
167 	default:
168 		return "RESERVED";
169 	}
170 
171 	while (entry->sc != 0xFFFF) {
172 		if (entry->sc == sc) {
173 			return entry->str;
174 		}
175 
176 		entry++;
177 	}
178 	return entry->str;
179 }
180 
nvme_completion_print(const struct nvme_completion * cpl)181 void nvme_completion_print(const struct nvme_completion *cpl)
182 {
183 	uint8_t sct, sc, crd, m, dnr, p;
184 
185 	sct = NVME_STATUS_GET_SCT(cpl->status);
186 	sc = NVME_STATUS_GET_SC(cpl->status);
187 	crd = NVME_STATUS_GET_CRD(cpl->status);
188 	m = NVME_STATUS_GET_M(cpl->status);
189 	dnr = NVME_STATUS_GET_DNR(cpl->status);
190 	p = NVME_STATUS_GET_P(cpl->status);
191 
192 	LOG_DBG("%s (%02x/%02x) crd:%x m:%x dnr:%x p:%d "
193 		"sqid:%d cid:%d cdw0:%x\n",
194 		get_status_string(sct, sc), sct, sc, crd, m, dnr, p,
195 		cpl->sqid, cpl->cid, cpl->cdw0);
196 }
197 
198 #endif /* CONFIG_NVME_LOG_LEVEL_DBG */
199 
nvme_cmd_init(void)200 void nvme_cmd_init(void)
201 {
202 	int idx;
203 
204 	sys_dlist_init(&free_request);
205 	sys_dlist_init(&pending_request);
206 	sys_dlist_init(&free_prp_list);
207 
208 	for (idx = 0; idx < NVME_REQUEST_AMOUNT; idx++) {
209 		sys_dlist_append(&free_request, &request_pool[idx].node);
210 	}
211 
212 	for (idx = 0; idx < CONFIG_NVME_PRP_LIST_AMOUNT; idx++) {
213 		sys_dlist_append(&free_prp_list, &prp_list_pool[idx].node);
214 	}
215 }
216 
nvme_prp_list_alloc(void)217 static struct nvme_prp_list *nvme_prp_list_alloc(void)
218 {
219 	sys_dnode_t *node;
220 
221 	node = sys_dlist_peek_head(&free_prp_list);
222 	if (!node) {
223 		LOG_ERR("Could not allocate PRP list");
224 		return NULL;
225 	}
226 
227 	sys_dlist_remove(node);
228 
229 	return CONTAINER_OF(node, struct nvme_prp_list, node);
230 }
231 
nvme_prp_list_free(struct nvme_prp_list * prp_list)232 static void nvme_prp_list_free(struct nvme_prp_list *prp_list)
233 {
234 	memset(prp_list, 0, sizeof(struct nvme_prp_list));
235 	sys_dlist_append(&free_prp_list, &prp_list->node);
236 }
237 
nvme_cmd_request_free(struct nvme_request * request)238 void nvme_cmd_request_free(struct nvme_request *request)
239 {
240 	if (sys_dnode_is_linked(&request->node)) {
241 		sys_dlist_remove(&request->node);
242 	}
243 
244 	if (request->prp_list != NULL) {
245 		nvme_prp_list_free(request->prp_list);
246 	}
247 
248 	memset(request, 0, sizeof(struct nvme_request));
249 	sys_dlist_append(&free_request, &request->node);
250 }
251 
nvme_cmd_request_alloc(void)252 struct nvme_request *nvme_cmd_request_alloc(void)
253 {
254 	sys_dnode_t *node;
255 
256 	node = sys_dlist_peek_head(&free_request);
257 	if (!node) {
258 		LOG_ERR("Could not allocate request");
259 		return NULL;
260 	}
261 
262 	sys_dlist_remove(node);
263 
264 	return CONTAINER_OF(node, struct nvme_request, node);
265 }
266 
nvme_cmd_register_request(struct nvme_request * request)267 static void nvme_cmd_register_request(struct nvme_request *request)
268 {
269 	sys_dlist_append(&pending_request, &request->node);
270 
271 	request->req_start = k_uptime_get_32();
272 
273 	if (!k_work_delayable_remaining_get(&request_timer)) {
274 		k_work_reschedule(&request_timer,
275 				  K_SECONDS(CONFIG_NVME_REQUEST_TIMEOUT));
276 	}
277 }
278 
request_timeout(struct k_work * work)279 static void request_timeout(struct k_work *work)
280 {
281 	uint32_t current = k_uptime_get_32();
282 	struct nvme_request *request, *next;
283 
284 	ARG_UNUSED(work);
285 
286 	SYS_DLIST_FOR_EACH_CONTAINER_SAFE(&pending_request,
287 					  request, next, node) {
288 		if ((int32_t)(request->req_start +
289 			      CONFIG_NVME_REQUEST_TIMEOUT - current) > 0) {
290 			break;
291 		}
292 
293 		LOG_WRN("Request %p CID %u timed-out",
294 			request, request->cmd.cdw0.cid);
295 
296 		/* ToDo:
297 		 * - check CSTS for fatal fault
298 		 * - reset hw otherwise if it's the case
299 		 * - or check completion for missed interruption
300 		 */
301 
302 		if (request->cb_fn) {
303 			request->cb_fn(request->cb_arg, NULL);
304 		}
305 
306 		nvme_cmd_request_free(request);
307 	}
308 
309 	if (request) {
310 		k_work_reschedule(&request_timer,
311 				  K_SECONDS(request->req_start +
312 					    CONFIG_NVME_REQUEST_TIMEOUT -
313 					    current));
314 	}
315 }
316 
nvme_completion_is_retry(const struct nvme_completion * cpl)317 static bool nvme_completion_is_retry(const struct nvme_completion *cpl)
318 {
319 	uint8_t sct, sc, dnr;
320 
321 	sct = NVME_STATUS_GET_SCT(cpl->status);
322 	sc = NVME_STATUS_GET_SC(cpl->status);
323 	dnr = NVME_STATUS_GET_DNR(cpl->status);
324 
325 	/*
326 	 * TODO: spec is not clear how commands that are aborted due
327 	 *  to TLER will be marked.  So for now, it seems
328 	 *  NAMESPACE_NOT_READY is the only case where we should
329 	 *  look at the DNR bit. Requests failed with ABORTED_BY_REQUEST
330 	 *  set the DNR bit correctly since the driver controls that.
331 	 */
332 	switch (sct) {
333 	case NVME_SCT_GENERIC:
334 		switch (sc) {
335 		case NVME_SC_ABORTED_BY_REQUEST:
336 		case NVME_SC_NAMESPACE_NOT_READY:
337 			if (dnr) {
338 				return false;
339 			}
340 
341 			return true;
342 		case NVME_SC_INVALID_OPCODE:
343 		case NVME_SC_INVALID_FIELD:
344 		case NVME_SC_COMMAND_ID_CONFLICT:
345 		case NVME_SC_DATA_TRANSFER_ERROR:
346 		case NVME_SC_ABORTED_POWER_LOSS:
347 		case NVME_SC_INTERNAL_DEVICE_ERROR:
348 		case NVME_SC_ABORTED_SQ_DELETION:
349 		case NVME_SC_ABORTED_FAILED_FUSED:
350 		case NVME_SC_ABORTED_MISSING_FUSED:
351 		case NVME_SC_INVALID_NAMESPACE_OR_FORMAT:
352 		case NVME_SC_COMMAND_SEQUENCE_ERROR:
353 		case NVME_SC_LBA_OUT_OF_RANGE:
354 		case NVME_SC_CAPACITY_EXCEEDED:
355 		default:
356 			return false;
357 		}
358 	case NVME_SCT_COMMAND_SPECIFIC:
359 	case NVME_SCT_MEDIA_ERROR:
360 		return false;
361 	case NVME_SCT_PATH_RELATED:
362 		switch (sc) {
363 		case NVME_SC_INTERNAL_PATH_ERROR:
364 			if (dnr) {
365 				return false;
366 			}
367 
368 			return true;
369 		default:
370 			return false;
371 		}
372 	case NVME_SCT_VENDOR_SPECIFIC:
373 	default:
374 		return false;
375 	}
376 }
377 
nvme_cmd_request_complete(struct nvme_request * request,struct nvme_completion * cpl)378 static void nvme_cmd_request_complete(struct nvme_request *request,
379 				      struct nvme_completion *cpl)
380 {
381 	bool error, retriable, retry;
382 
383 	error = nvme_completion_is_error(cpl);
384 	retriable = nvme_completion_is_retry(cpl);
385 	retry = error && retriable &&
386 		request->retries < CONFIG_NVME_RETRY_COUNT;
387 
388 	if (retry) {
389 		LOG_DBG("CMD will be retried");
390 		request->qpair->num_retries++;
391 	}
392 
393 	if (error &&
394 	    (!retriable || (request->retries >= CONFIG_NVME_RETRY_COUNT))) {
395 		LOG_DBG("CMD error");
396 		request->qpair->num_failures++;
397 	}
398 
399 	if (cpl->cid != request->cmd.cdw0.cid) {
400 		LOG_ERR("cpl cid != cmd cid");
401 	}
402 
403 	if (retry) {
404 		LOG_DBG("Retrying CMD");
405 		/* Let's remove it from pending... */
406 		sys_dlist_remove(&request->node);
407 		/* ...and re-submit, thus re-adding to pending */
408 		nvme_cmd_qpair_submit_request(request->qpair, request);
409 		request->retries++;
410 	} else {
411 		LOG_DBG("Request %p CMD complete on %p/%p",
412 			request, request->cb_fn, request->cb_arg);
413 
414 		if (request->cb_fn) {
415 			request->cb_fn(request->cb_arg, cpl);
416 		}
417 
418 		nvme_cmd_request_free(request);
419 	}
420 }
421 
nvme_cmd_qpair_process_completion(struct nvme_cmd_qpair * qpair)422 static void nvme_cmd_qpair_process_completion(struct nvme_cmd_qpair *qpair)
423 {
424 	struct nvme_request *request;
425 	struct nvme_completion cpl;
426 	int done = 0;
427 
428 	if (qpair->num_intr_handler_calls == 0 && qpair->phase == 0) {
429 		LOG_WRN("Phase wrong for first interrupt call.");
430 	}
431 
432 	qpair->num_intr_handler_calls++;
433 
434 	while (1) {
435 		uint16_t status;
436 
437 		status = sys_le16_to_cpu(qpair->cpl[qpair->cq_head].status);
438 		if (NVME_STATUS_GET_P(status) != qpair->phase) {
439 			break;
440 		}
441 
442 		cpl = qpair->cpl[qpair->cq_head];
443 		nvme_completion_swapbytes(&cpl);
444 
445 		if (NVME_STATUS_GET_P(status) != NVME_STATUS_GET_P(cpl.status)) {
446 			LOG_WRN("Phase unexpectedly inconsistent");
447 		}
448 
449 		if (cpl.cid < NVME_REQUEST_AMOUNT) {
450 			request = &request_pool[cpl.cid];
451 		} else {
452 			request = NULL;
453 		}
454 
455 		done++;
456 		if (request != NULL) {
457 			nvme_cmd_request_complete(request, &cpl);
458 			qpair->sq_head = cpl.sqhd;
459 		} else {
460 			LOG_ERR("cpl (cid = %u) does not map to cmd", cpl.cid);
461 		}
462 
463 		qpair->cq_head++;
464 		if (qpair->cq_head == qpair->num_entries) {
465 			qpair->cq_head = 0;
466 			qpair->phase = !qpair->phase;
467 		}
468 	}
469 
470 	if (done != 0) {
471 		mm_reg_t regs = DEVICE_MMIO_GET(qpair->ctrlr->dev);
472 
473 		sys_write32(qpair->cq_head, regs + qpair->cq_hdbl_off);
474 	}
475 }
476 
nvme_cmd_qpair_msi_handler(const void * arg)477 static void nvme_cmd_qpair_msi_handler(const void *arg)
478 {
479 	const struct nvme_cmd_qpair *qpair = arg;
480 
481 	nvme_cmd_qpair_process_completion((struct nvme_cmd_qpair *)qpair);
482 }
483 
nvme_cmd_qpair_setup(struct nvme_cmd_qpair * qpair,struct nvme_controller * ctrlr,uint32_t id)484 int nvme_cmd_qpair_setup(struct nvme_cmd_qpair *qpair,
485 			 struct nvme_controller *ctrlr,
486 			 uint32_t id)
487 {
488 	const struct nvme_controller_config *nvme_ctrlr_cfg =
489 		ctrlr->dev->config;
490 
491 	qpair->ctrlr = ctrlr;
492 	qpair->id = id;
493 	qpair->vector = qpair->id;
494 
495 	qpair->num_cmds = 0;
496 	qpair->num_intr_handler_calls = 0;
497 	qpair->num_retries = 0;
498 	qpair->num_failures = 0;
499 	qpair->num_ignored = 0;
500 
501 	qpair->cmd_bus_addr = (uintptr_t)qpair->cmd;
502 	qpair->cpl_bus_addr = (uintptr_t)qpair->cpl;
503 
504 	qpair->sq_tdbl_off = nvme_mmio_offsetof(doorbell) +
505 		(qpair->id << (ctrlr->dstrd + 1));
506 	qpair->cq_hdbl_off = nvme_mmio_offsetof(doorbell) +
507 		(qpair->id << (ctrlr->dstrd + 1)) + (1 << ctrlr->dstrd);
508 
509 	if (!pcie_msi_vector_connect(nvme_ctrlr_cfg->pcie->bdf,
510 				     &ctrlr->vectors[qpair->vector],
511 				     nvme_cmd_qpair_msi_handler, qpair, 0)) {
512 		LOG_ERR("Failed to connect MSI-X vector %u", qpair->id);
513 		return -EIO;
514 	}
515 
516 	LOG_DBG("CMD Qpair created ID %u, %u entries - cmd/cpl addr "
517 		"0x%lx/0x%lx - sq/cq offsets %u/%u",
518 		qpair->id, qpair->num_entries, qpair->cmd_bus_addr,
519 		qpair->cpl_bus_addr, qpair->sq_tdbl_off, qpair->cq_hdbl_off);
520 
521 	return 0;
522 }
523 
nvme_cmd_qpair_reset(struct nvme_cmd_qpair * qpair)524 void nvme_cmd_qpair_reset(struct nvme_cmd_qpair *qpair)
525 {
526 	qpair->sq_head = qpair->sq_tail = qpair->cq_head = 0;
527 
528 	/*
529 	 * First time through the completion queue, HW will set phase
530 	 * bit on completions to 1.  So set this to 1 here, indicating
531 	 * we're looking for a 1 to know which entries have completed.
532 	 * we'll toggle the bit each time when the completion queue
533 	 * rolls over.
534 	 */
535 	qpair->phase = 1;
536 
537 	memset(qpair->cmd, 0,
538 	       qpair->num_entries * sizeof(struct nvme_command));
539 	memset(qpair->cpl, 0,
540 	       qpair->num_entries * sizeof(struct nvme_completion));
541 }
542 
nvme_cmd_qpair_fill_prp_list(struct nvme_cmd_qpair * qpair,struct nvme_request * request,int n_prp)543 static int nvme_cmd_qpair_fill_prp_list(struct nvme_cmd_qpair *qpair,
544 					struct nvme_request *request,
545 					int n_prp)
546 {
547 	struct nvme_prp_list *prp_list;
548 	uintptr_t p_addr;
549 	int idx;
550 
551 	prp_list = nvme_prp_list_alloc();
552 	if (prp_list == NULL) {
553 		return -ENOMEM;
554 	}
555 
556 	p_addr = (uintptr_t)request->payload;
557 	request->cmd.dptr.prp1 =
558 		(uint64_t)sys_cpu_to_le64(p_addr);
559 	request->cmd.dptr.prp2 =
560 		(uint64_t)sys_cpu_to_le64(&prp_list->prp);
561 	p_addr = NVME_PRP_NEXT_PAGE(p_addr);
562 
563 	for (idx = 0; idx < n_prp; idx++) {
564 		prp_list->prp[idx] = (uint64_t)sys_cpu_to_le64(p_addr);
565 		p_addr = NVME_PRP_NEXT_PAGE(p_addr);
566 	}
567 
568 	request->prp_list = prp_list;
569 
570 	return 0;
571 }
572 
compute_n_prp(uintptr_t addr,uint32_t size)573 static int compute_n_prp(uintptr_t addr, uint32_t size)
574 {
575 	int n_prp;
576 
577 	/* See Common Command Format, Data Pointer (DPTR) field */
578 
579 	n_prp = size / CONFIG_MMU_PAGE_SIZE;
580 	if (n_prp == 0) {
581 		n_prp = 1;
582 	}
583 
584 	if (size != CONFIG_MMU_PAGE_SIZE) {
585 		size = size % CONFIG_MMU_PAGE_SIZE;
586 	}
587 
588 	if (n_prp == 1) {
589 		if ((addr + (uintptr_t)size) > NVME_PRP_NEXT_PAGE(addr)) {
590 			n_prp++;
591 		}
592 	} else if (size > 0) {
593 		n_prp++;
594 	}
595 
596 	return n_prp;
597 }
598 
nvme_cmd_qpair_fill_dptr(struct nvme_cmd_qpair * qpair,struct nvme_request * request)599 static int nvme_cmd_qpair_fill_dptr(struct nvme_cmd_qpair *qpair,
600 				    struct nvme_request *request)
601 {
602 	switch (request->type) {
603 	case NVME_REQUEST_NULL:
604 		break;
605 	case NVME_REQUEST_VADDR:
606 		int n_prp;
607 
608 		if (request->payload_size > qpair->ctrlr->max_xfer_size) {
609 			LOG_ERR("VADDR request's payload too big");
610 			return -EINVAL;
611 		}
612 
613 		n_prp = compute_n_prp((uintptr_t)request->payload,
614 				      request->payload_size);
615 		if (n_prp <= 2) {
616 			request->cmd.dptr.prp1 =
617 				(uint64_t)sys_cpu_to_le64(request->payload);
618 			if (n_prp == 2) {
619 				request->cmd.dptr.prp2 = (uint64_t)sys_cpu_to_le64(
620 					NVME_PRP_NEXT_PAGE(
621 						(uintptr_t)request->payload));
622 			} else {
623 				request->cmd.dptr.prp2 = 0;
624 			}
625 
626 			break;
627 		}
628 
629 		return nvme_cmd_qpair_fill_prp_list(qpair, request, n_prp);
630 	default:
631 		break;
632 	}
633 
634 	return 0;
635 }
636 
nvme_cmd_qpair_submit_request(struct nvme_cmd_qpair * qpair,struct nvme_request * request)637 int nvme_cmd_qpair_submit_request(struct nvme_cmd_qpair *qpair,
638 				  struct nvme_request *request)
639 {
640 	mm_reg_t regs = DEVICE_MMIO_GET(qpair->ctrlr->dev);
641 	int ret;
642 
643 	request->qpair = qpair;
644 
645 	request->cmd.cdw0.cid = sys_cpu_to_le16((uint16_t)(request -
646 							   request_pool));
647 
648 	ret = nvme_cmd_qpair_fill_dptr(qpair, request);
649 	if (ret != 0) {
650 		nvme_cmd_request_free(request);
651 		return ret;
652 	}
653 
654 	nvme_cmd_register_request(request);
655 
656 	memcpy(&qpair->cmd[qpair->sq_tail],
657 	       &request->cmd, sizeof(request->cmd));
658 
659 	qpair->sq_tail++;
660 	if (qpair->sq_tail == qpair->num_entries) {
661 		qpair->sq_tail = 0;
662 	}
663 
664 	sys_write32(qpair->sq_tail, regs + qpair->sq_tdbl_off);
665 	qpair->num_cmds++;
666 
667 	LOG_DBG("Request %p %llu submitted: CID %u - sq_tail %u",
668 		request, qpair->num_cmds, request->cmd.cdw0.cid,
669 		qpair->sq_tail - 1);
670 	return 0;
671 }
672 
673 void
nvme_completion_poll_cb(void * arg,const struct nvme_completion * cpl)674 nvme_completion_poll_cb(void *arg, const struct nvme_completion *cpl)
675 {
676 	struct nvme_completion_poll_status *status = arg;
677 
678 	if (cpl != NULL) {
679 		memcpy(&status->cpl, cpl, sizeof(*cpl));
680 	} else {
681 		status->status = -ETIMEDOUT;
682 	}
683 
684 	k_sem_give(&status->sem);
685 }
686