1 /*
2 * SPDX-License-Identifier: Apache-2.0
3 * Copyright (c) 2022 Intel Corp.
4 */
5
6 #include <zephyr/logging/log.h>
7 LOG_MODULE_DECLARE(nvme, CONFIG_NVME_LOG_LEVEL);
8
9 #include <zephyr/kernel.h>
10 #include <zephyr/cache.h>
11 #include <zephyr/sys/byteorder.h>
12
13 #include <string.h>
14
15 #include "nvme.h"
16 #include "nvme_helpers.h"
17
18 static struct nvme_prp_list prp_list_pool[CONFIG_NVME_PRP_LIST_AMOUNT];
19 static sys_dlist_t free_prp_list;
20
21 static struct nvme_request request_pool[NVME_REQUEST_AMOUNT];
22 static sys_dlist_t free_request;
23 static sys_dlist_t pending_request;
24
25 static void request_timeout(struct k_work *work);
26
27 static K_WORK_DELAYABLE_DEFINE(request_timer, request_timeout);
28
29 #ifdef CONFIG_NVME_LOG_LEVEL_DBG
30 struct nvme_status_string {
31 uint16_t sc;
32 const char *str;
33 };
34
35 static struct nvme_status_string generic_status[] = {
36 { NVME_SC_SUCCESS, "SUCCESS" },
37 { NVME_SC_INVALID_OPCODE, "INVALID OPCODE" },
38 { NVME_SC_INVALID_FIELD, "INVALID_FIELD" },
39 { NVME_SC_COMMAND_ID_CONFLICT, "COMMAND ID CONFLICT" },
40 { NVME_SC_DATA_TRANSFER_ERROR, "DATA TRANSFER ERROR" },
41 { NVME_SC_ABORTED_POWER_LOSS, "ABORTED - POWER LOSS" },
42 { NVME_SC_INTERNAL_DEVICE_ERROR, "INTERNAL DEVICE ERROR" },
43 { NVME_SC_ABORTED_BY_REQUEST, "ABORTED - BY REQUEST" },
44 { NVME_SC_ABORTED_SQ_DELETION, "ABORTED - SQ DELETION" },
45 { NVME_SC_ABORTED_FAILED_FUSED, "ABORTED - FAILED FUSED" },
46 { NVME_SC_ABORTED_MISSING_FUSED, "ABORTED - MISSING FUSED" },
47 { NVME_SC_INVALID_NAMESPACE_OR_FORMAT, "INVALID NAMESPACE OR FORMAT" },
48 { NVME_SC_COMMAND_SEQUENCE_ERROR, "COMMAND SEQUENCE ERROR" },
49 { NVME_SC_INVALID_SGL_SEGMENT_DESCR, "INVALID SGL SEGMENT DESCRIPTOR" },
50 { NVME_SC_INVALID_NUMBER_OF_SGL_DESCR, "INVALID NUMBER OF SGL DESCRIPTORS" },
51 { NVME_SC_DATA_SGL_LENGTH_INVALID, "DATA SGL LENGTH INVALID" },
52 { NVME_SC_METADATA_SGL_LENGTH_INVALID, "METADATA SGL LENGTH INVALID" },
53 { NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID, "SGL DESCRIPTOR TYPE INVALID" },
54 { NVME_SC_INVALID_USE_OF_CMB, "INVALID USE OF CONTROLLER MEMORY BUFFER" },
55 { NVME_SC_PRP_OFFSET_INVALID, "PRP OFFSET INVALID" },
56 { NVME_SC_ATOMIC_WRITE_UNIT_EXCEEDED, "ATOMIC WRITE UNIT EXCEEDED" },
57 { NVME_SC_OPERATION_DENIED, "OPERATION DENIED" },
58 { NVME_SC_SGL_OFFSET_INVALID, "SGL OFFSET INVALID" },
59 { NVME_SC_HOST_ID_INCONSISTENT_FORMAT, "HOST IDENTIFIER INCONSISTENT FORMAT" },
60 { NVME_SC_KEEP_ALIVE_TIMEOUT_EXPIRED, "KEEP ALIVE TIMEOUT EXPIRED" },
61 { NVME_SC_KEEP_ALIVE_TIMEOUT_INVALID, "KEEP ALIVE TIMEOUT INVALID" },
62 { NVME_SC_ABORTED_DUE_TO_PREEMPT, "COMMAND ABORTED DUE TO PREEMPT AND ABORT" },
63 { NVME_SC_SANITIZE_FAILED, "SANITIZE FAILED" },
64 { NVME_SC_SANITIZE_IN_PROGRESS, "SANITIZE IN PROGRESS" },
65 { NVME_SC_SGL_DATA_BLOCK_GRAN_INVALID, "SGL_DATA_BLOCK_GRANULARITY_INVALID" },
66 { NVME_SC_NOT_SUPPORTED_IN_CMB, "COMMAND NOT SUPPORTED FOR QUEUE IN CMB" },
67 { NVME_SC_NAMESPACE_IS_WRITE_PROTECTED, "NAMESPACE IS WRITE PROTECTED" },
68 { NVME_SC_COMMAND_INTERRUPTED, "COMMAND INTERRUPTED" },
69 { NVME_SC_TRANSIENT_TRANSPORT_ERROR, "TRANSIENT TRANSPORT ERROR" },
70 { NVME_SC_LBA_OUT_OF_RANGE, "LBA OUT OF RANGE" },
71 { NVME_SC_CAPACITY_EXCEEDED, "CAPACITY EXCEEDED" },
72 { NVME_SC_NAMESPACE_NOT_READY, "NAMESPACE NOT READY" },
73 { NVME_SC_RESERVATION_CONFLICT, "RESERVATION CONFLICT" },
74 { NVME_SC_FORMAT_IN_PROGRESS, "FORMAT IN PROGRESS" },
75 { 0xFFFF, "GENERIC" }
76 };
77
78 static struct nvme_status_string command_specific_status[] = {
79 { NVME_SC_COMPLETION_QUEUE_INVALID, "INVALID COMPLETION QUEUE" },
80 { NVME_SC_INVALID_QUEUE_IDENTIFIER, "INVALID QUEUE IDENTIFIER" },
81 { NVME_SC_MAXIMUM_QUEUE_SIZE_EXCEEDED, "MAX QUEUE SIZE EXCEEDED" },
82 { NVME_SC_ABORT_COMMAND_LIMIT_EXCEEDED, "ABORT CMD LIMIT EXCEEDED" },
83 { NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED, "ASYNC LIMIT EXCEEDED" },
84 { NVME_SC_INVALID_FIRMWARE_SLOT, "INVALID FIRMWARE SLOT" },
85 { NVME_SC_INVALID_FIRMWARE_IMAGE, "INVALID FIRMWARE IMAGE" },
86 { NVME_SC_INVALID_INTERRUPT_VECTOR, "INVALID INTERRUPT VECTOR" },
87 { NVME_SC_INVALID_LOG_PAGE, "INVALID LOG PAGE" },
88 { NVME_SC_INVALID_FORMAT, "INVALID FORMAT" },
89 { NVME_SC_FIRMWARE_REQUIRES_RESET, "FIRMWARE REQUIRES RESET" },
90 { NVME_SC_INVALID_QUEUE_DELETION, "INVALID QUEUE DELETION" },
91 { NVME_SC_FEATURE_NOT_SAVEABLE, "FEATURE IDENTIFIER NOT SAVEABLE" },
92 { NVME_SC_FEATURE_NOT_CHANGEABLE, "FEATURE NOT CHANGEABLE" },
93 { NVME_SC_FEATURE_NOT_NS_SPECIFIC, "FEATURE NOT NAMESPACE SPECIFIC" },
94 { NVME_SC_FW_ACT_REQUIRES_NVMS_RESET, "FIRMWARE ACTIVATION REQUIRES NVM SUBSYSTEM RESET" },
95 { NVME_SC_FW_ACT_REQUIRES_RESET, "FIRMWARE ACTIVATION REQUIRES RESET" },
96 { NVME_SC_FW_ACT_REQUIRES_TIME, "FIRMWARE ACTIVATION REQUIRES MAXIMUM TIME VIOLATION" },
97 { NVME_SC_FW_ACT_PROHIBITED, "FIRMWARE ACTIVATION PROHIBITED" },
98 { NVME_SC_OVERLAPPING_RANGE, "OVERLAPPING RANGE" },
99 { NVME_SC_NS_INSUFFICIENT_CAPACITY, "NAMESPACE INSUFFICIENT CAPACITY" },
100 { NVME_SC_NS_ID_UNAVAILABLE, "NAMESPACE IDENTIFIER UNAVAILABLE" },
101 { NVME_SC_NS_ALREADY_ATTACHED, "NAMESPACE ALREADY ATTACHED" },
102 { NVME_SC_NS_IS_PRIVATE, "NAMESPACE IS PRIVATE" },
103 { NVME_SC_NS_NOT_ATTACHED, "NS NOT ATTACHED" },
104 { NVME_SC_THIN_PROV_NOT_SUPPORTED, "THIN PROVISIONING NOT SUPPORTED" },
105 { NVME_SC_CTRLR_LIST_INVALID, "CONTROLLER LIST INVALID" },
106 { NVME_SC_SELF_TEST_IN_PROGRESS, "DEVICE SELF-TEST IN PROGRESS" },
107 { NVME_SC_BOOT_PART_WRITE_PROHIB, "BOOT PARTITION WRITE PROHIBITED" },
108 { NVME_SC_INVALID_CTRLR_ID, "INVALID CONTROLLER IDENTIFIER" },
109 { NVME_SC_INVALID_SEC_CTRLR_STATE, "INVALID SECONDARY CONTROLLER STATE" },
110 { NVME_SC_INVALID_NUM_OF_CTRLR_RESRC, "INVALID NUMBER OF CONTROLLER RESOURCES" },
111 { NVME_SC_INVALID_RESOURCE_ID, "INVALID RESOURCE IDENTIFIER" },
112 { NVME_SC_SANITIZE_PROHIBITED_WPMRE,
113 "SANITIZE PROHIBITED WRITE PERSISTENT MEMORY REGION ENABLED" },
114 { NVME_SC_ANA_GROUP_ID_INVALID, "ANA GROUP IDENTIFIED INVALID" },
115 { NVME_SC_ANA_ATTACH_FAILED, "ANA ATTACH FAILED" },
116 { NVME_SC_CONFLICTING_ATTRIBUTES, "CONFLICTING ATTRIBUTES" },
117 { NVME_SC_INVALID_PROTECTION_INFO, "INVALID PROTECTION INFO" },
118 { NVME_SC_ATTEMPTED_WRITE_TO_RO_PAGE, "WRITE TO RO PAGE" },
119 { 0xFFFF, "COMMAND SPECIFIC" }
120 };
121
122 static struct nvme_status_string media_error_status[] = {
123 { NVME_SC_WRITE_FAULTS, "WRITE FAULTS" },
124 { NVME_SC_UNRECOVERED_READ_ERROR, "UNRECOVERED READ ERROR" },
125 { NVME_SC_GUARD_CHECK_ERROR, "GUARD CHECK ERROR" },
126 { NVME_SC_APPLICATION_TAG_CHECK_ERROR, "APPLICATION TAG CHECK ERROR" },
127 { NVME_SC_REFERENCE_TAG_CHECK_ERROR, "REFERENCE TAG CHECK ERROR" },
128 { NVME_SC_COMPARE_FAILURE, "COMPARE FAILURE" },
129 { NVME_SC_ACCESS_DENIED, "ACCESS DENIED" },
130 { NVME_SC_DEALLOCATED_OR_UNWRITTEN, "DEALLOCATED OR UNWRITTEN LOGICAL BLOCK" },
131 { 0xFFFF, "MEDIA ERROR" }
132 };
133
134 static struct nvme_status_string path_related_status[] = {
135 { NVME_SC_INTERNAL_PATH_ERROR, "INTERNAL PATH ERROR" },
136 { NVME_SC_ASYMMETRIC_ACCESS_PERSISTENT_LOSS, "ASYMMETRIC ACCESS PERSISTENT LOSS" },
137 { NVME_SC_ASYMMETRIC_ACCESS_INACCESSIBLE, "ASYMMETRIC ACCESS INACCESSIBLE" },
138 { NVME_SC_ASYMMETRIC_ACCESS_TRANSITION, "ASYMMETRIC ACCESS TRANSITION" },
139 { NVME_SC_CONTROLLER_PATHING_ERROR, "CONTROLLER PATHING ERROR" },
140 { NVME_SC_HOST_PATHING_ERROR, "HOST PATHING ERROR" },
141 { NVME_SC_COMMAND_ABORTED_BY_HOST, "COMMAND ABORTED BY HOST" },
142 { 0xFFFF, "PATH RELATED" },
143 };
144
get_status_string(uint16_t sct,uint16_t sc)145 static const char *get_status_string(uint16_t sct, uint16_t sc)
146 {
147 struct nvme_status_string *entry;
148
149 switch (sct) {
150 case NVME_SCT_GENERIC:
151 entry = generic_status;
152 break;
153 case NVME_SCT_COMMAND_SPECIFIC:
154 entry = command_specific_status;
155 break;
156 case NVME_SCT_MEDIA_ERROR:
157 entry = media_error_status;
158 break;
159 case NVME_SCT_PATH_RELATED:
160 entry = path_related_status;
161 break;
162 case NVME_SCT_VENDOR_SPECIFIC:
163 return "VENDOR SPECIFIC";
164 default:
165 return "RESERVED";
166 }
167
168 while (entry->sc != 0xFFFF) {
169 if (entry->sc == sc) {
170 return entry->str;
171 }
172
173 entry++;
174 }
175 return entry->str;
176 }
177
nvme_completion_print(const struct nvme_completion * cpl)178 void nvme_completion_print(const struct nvme_completion *cpl)
179 {
180 uint8_t sct, sc, crd, m, dnr, p;
181
182 sct = NVME_STATUS_GET_SCT(cpl->status);
183 sc = NVME_STATUS_GET_SC(cpl->status);
184 crd = NVME_STATUS_GET_CRD(cpl->status);
185 m = NVME_STATUS_GET_M(cpl->status);
186 dnr = NVME_STATUS_GET_DNR(cpl->status);
187 p = NVME_STATUS_GET_P(cpl->status);
188
189 LOG_DBG("%s (%02x/%02x) crd:%x m:%x dnr:%x p:%d "
190 "sqid:%d cid:%d cdw0:%x\n",
191 get_status_string(sct, sc), sct, sc, crd, m, dnr, p,
192 cpl->sqid, cpl->cid, cpl->cdw0);
193 }
194
195 #endif /* CONFIG_NVME_LOG_LEVEL_DBG */
196
nvme_cmd_init(void)197 void nvme_cmd_init(void)
198 {
199 int idx;
200
201 sys_dlist_init(&free_request);
202 sys_dlist_init(&pending_request);
203 sys_dlist_init(&free_prp_list);
204
205 for (idx = 0; idx < NVME_REQUEST_AMOUNT; idx++) {
206 sys_dlist_append(&free_request, &request_pool[idx].node);
207 }
208
209 for (idx = 0; idx < CONFIG_NVME_PRP_LIST_AMOUNT; idx++) {
210 sys_dlist_append(&free_prp_list, &prp_list_pool[idx].node);
211 }
212 }
213
nvme_prp_list_alloc(void)214 static struct nvme_prp_list *nvme_prp_list_alloc(void)
215 {
216 sys_dnode_t *node;
217
218 node = sys_dlist_peek_head(&free_prp_list);
219 if (!node) {
220 LOG_ERR("Could not allocate PRP list");
221 return NULL;
222 }
223
224 sys_dlist_remove(node);
225
226 return CONTAINER_OF(node, struct nvme_prp_list, node);
227 }
228
nvme_prp_list_free(struct nvme_prp_list * prp_list)229 static void nvme_prp_list_free(struct nvme_prp_list *prp_list)
230 {
231 memset(prp_list, 0, sizeof(struct nvme_prp_list));
232 sys_dlist_append(&free_prp_list, &prp_list->node);
233 }
234
nvme_cmd_request_free(struct nvme_request * request)235 void nvme_cmd_request_free(struct nvme_request *request)
236 {
237 if (sys_dnode_is_linked(&request->node)) {
238 sys_dlist_remove(&request->node);
239 }
240
241 if (request->prp_list != NULL) {
242 nvme_prp_list_free(request->prp_list);
243 }
244
245 memset(request, 0, sizeof(struct nvme_request));
246 sys_dlist_append(&free_request, &request->node);
247 }
248
nvme_cmd_request_alloc(void)249 struct nvme_request *nvme_cmd_request_alloc(void)
250 {
251 sys_dnode_t *node;
252
253 node = sys_dlist_peek_head(&free_request);
254 if (!node) {
255 LOG_ERR("Could not allocate request");
256 return NULL;
257 }
258
259 sys_dlist_remove(node);
260
261 return CONTAINER_OF(node, struct nvme_request, node);
262 }
263
nvme_cmd_register_request(struct nvme_request * request)264 static void nvme_cmd_register_request(struct nvme_request *request)
265 {
266 sys_dlist_append(&pending_request, &request->node);
267
268 request->req_start = k_uptime_get_32();
269
270 if (!k_work_delayable_remaining_get(&request_timer)) {
271 k_work_reschedule(&request_timer,
272 K_SECONDS(CONFIG_NVME_REQUEST_TIMEOUT));
273 }
274 }
275
request_timeout(struct k_work * work)276 static void request_timeout(struct k_work *work)
277 {
278 uint32_t current = k_uptime_get_32();
279 struct nvme_request *request, *next;
280
281 ARG_UNUSED(work);
282
283 SYS_DLIST_FOR_EACH_CONTAINER_SAFE(&pending_request,
284 request, next, node) {
285 if ((int32_t)(request->req_start +
286 CONFIG_NVME_REQUEST_TIMEOUT - current) > 0) {
287 break;
288 }
289
290 LOG_WRN("Request %p CID %u timed-out",
291 request, request->cmd.cdw0.cid);
292
293 /* ToDo:
294 * - check CSTS for fatal fault
295 * - reset hw otherwise if it's the case
296 * - or check completion for missed interruption
297 */
298
299 if (request->cb_fn) {
300 request->cb_fn(request->cb_arg, NULL);
301 }
302
303 nvme_cmd_request_free(request);
304 }
305
306 if (request) {
307 k_work_reschedule(&request_timer,
308 K_SECONDS(request->req_start +
309 CONFIG_NVME_REQUEST_TIMEOUT -
310 current));
311 }
312 }
313
nvme_completion_is_retry(const struct nvme_completion * cpl)314 static bool nvme_completion_is_retry(const struct nvme_completion *cpl)
315 {
316 uint8_t sct, sc, dnr;
317
318 sct = NVME_STATUS_GET_SCT(cpl->status);
319 sc = NVME_STATUS_GET_SC(cpl->status);
320 dnr = NVME_STATUS_GET_DNR(cpl->status);
321
322 /*
323 * TODO: spec is not clear how commands that are aborted due
324 * to TLER will be marked. So for now, it seems
325 * NAMESPACE_NOT_READY is the only case where we should
326 * look at the DNR bit. Requests failed with ABORTED_BY_REQUEST
327 * set the DNR bit correctly since the driver controls that.
328 */
329 switch (sct) {
330 case NVME_SCT_GENERIC:
331 switch (sc) {
332 case NVME_SC_ABORTED_BY_REQUEST:
333 case NVME_SC_NAMESPACE_NOT_READY:
334 if (dnr) {
335 return false;
336 }
337
338 return true;
339 case NVME_SC_INVALID_OPCODE:
340 case NVME_SC_INVALID_FIELD:
341 case NVME_SC_COMMAND_ID_CONFLICT:
342 case NVME_SC_DATA_TRANSFER_ERROR:
343 case NVME_SC_ABORTED_POWER_LOSS:
344 case NVME_SC_INTERNAL_DEVICE_ERROR:
345 case NVME_SC_ABORTED_SQ_DELETION:
346 case NVME_SC_ABORTED_FAILED_FUSED:
347 case NVME_SC_ABORTED_MISSING_FUSED:
348 case NVME_SC_INVALID_NAMESPACE_OR_FORMAT:
349 case NVME_SC_COMMAND_SEQUENCE_ERROR:
350 case NVME_SC_LBA_OUT_OF_RANGE:
351 case NVME_SC_CAPACITY_EXCEEDED:
352 default:
353 return false;
354 }
355 case NVME_SCT_COMMAND_SPECIFIC:
356 case NVME_SCT_MEDIA_ERROR:
357 return false;
358 case NVME_SCT_PATH_RELATED:
359 switch (sc) {
360 case NVME_SC_INTERNAL_PATH_ERROR:
361 if (dnr) {
362 return false;
363 }
364
365 return true;
366 default:
367 return false;
368 }
369 case NVME_SCT_VENDOR_SPECIFIC:
370 default:
371 return false;
372 }
373 }
374
nvme_cmd_request_complete(struct nvme_request * request,struct nvme_completion * cpl)375 static void nvme_cmd_request_complete(struct nvme_request *request,
376 struct nvme_completion *cpl)
377 {
378 bool error, retriable, retry;
379
380 error = nvme_completion_is_error(cpl);
381 retriable = nvme_completion_is_retry(cpl);
382 retry = error && retriable &&
383 request->retries < CONFIG_NVME_RETRY_COUNT;
384
385 if (retry) {
386 LOG_DBG("CMD will be retried");
387 request->qpair->num_retries++;
388 }
389
390 if (error &&
391 (!retriable || (request->retries >= CONFIG_NVME_RETRY_COUNT))) {
392 LOG_DBG("CMD error");
393 request->qpair->num_failures++;
394 }
395
396 if (cpl->cid != request->cmd.cdw0.cid) {
397 LOG_ERR("cpl cid != cmd cid");
398 }
399
400 if (retry) {
401 LOG_DBG("Retrying CMD");
402 /* Let's remove it from pending... */
403 sys_dlist_remove(&request->node);
404 /* ...and re-submit, thus re-adding to pending */
405 nvme_cmd_qpair_submit_request(request->qpair, request);
406 request->retries++;
407 } else {
408 LOG_DBG("Request %p CMD complete on %p/%p",
409 request, request->cb_fn, request->cb_arg);
410
411 if (request->cb_fn) {
412 request->cb_fn(request->cb_arg, cpl);
413 }
414
415 nvme_cmd_request_free(request);
416 }
417 }
418
nvme_cmd_qpair_process_completion(struct nvme_cmd_qpair * qpair)419 static void nvme_cmd_qpair_process_completion(struct nvme_cmd_qpair *qpair)
420 {
421 struct nvme_request *request;
422 struct nvme_completion cpl;
423 int done = 0;
424
425 if (qpair->num_intr_handler_calls == 0 && qpair->phase == 0) {
426 LOG_WRN("Phase wrong for first interrupt call.");
427 }
428
429 qpair->num_intr_handler_calls++;
430
431 while (1) {
432 uint16_t status;
433
434 status = sys_le16_to_cpu(qpair->cpl[qpair->cq_head].status);
435 if (NVME_STATUS_GET_P(status) != qpair->phase) {
436 break;
437 }
438
439 cpl = qpair->cpl[qpair->cq_head];
440 nvme_completion_swapbytes(&cpl);
441
442 if (NVME_STATUS_GET_P(status) != NVME_STATUS_GET_P(cpl.status)) {
443 LOG_WRN("Phase unexpectedly inconsistent");
444 }
445
446 if (cpl.cid < NVME_REQUEST_AMOUNT) {
447 request = &request_pool[cpl.cid];
448 } else {
449 request = NULL;
450 }
451
452 done++;
453 if (request != NULL) {
454 nvme_cmd_request_complete(request, &cpl);
455 qpair->sq_head = cpl.sqhd;
456 } else {
457 LOG_ERR("cpl (cid = %u) does not map to cmd", cpl.cid);
458 }
459
460 qpair->cq_head++;
461 if (qpair->cq_head == qpair->num_entries) {
462 qpair->cq_head = 0;
463 qpair->phase = !qpair->phase;
464 }
465 }
466
467 if (done != 0) {
468 mm_reg_t regs = DEVICE_MMIO_GET(qpair->ctrlr->dev);
469
470 sys_write32(qpair->cq_head, regs + qpair->cq_hdbl_off);
471 }
472 }
473
nvme_cmd_qpair_msi_handler(const void * arg)474 static void nvme_cmd_qpair_msi_handler(const void *arg)
475 {
476 const struct nvme_cmd_qpair *qpair = arg;
477
478 nvme_cmd_qpair_process_completion((struct nvme_cmd_qpair *)qpair);
479 }
480
nvme_cmd_qpair_setup(struct nvme_cmd_qpair * qpair,struct nvme_controller * ctrlr,uint32_t id)481 int nvme_cmd_qpair_setup(struct nvme_cmd_qpair *qpair,
482 struct nvme_controller *ctrlr,
483 uint32_t id)
484 {
485 const struct nvme_controller_config *nvme_ctrlr_cfg =
486 ctrlr->dev->config;
487
488 qpair->ctrlr = ctrlr;
489 qpair->id = id;
490 qpair->vector = qpair->id;
491
492 qpair->num_cmds = 0;
493 qpair->num_intr_handler_calls = 0;
494 qpair->num_retries = 0;
495 qpair->num_failures = 0;
496 qpair->num_ignored = 0;
497
498 qpair->cmd_bus_addr = (uintptr_t)qpair->cmd;
499 qpair->cpl_bus_addr = (uintptr_t)qpair->cpl;
500
501 qpair->sq_tdbl_off = nvme_mmio_offsetof(doorbell) +
502 (qpair->id << (ctrlr->dstrd + 1));
503 qpair->cq_hdbl_off = nvme_mmio_offsetof(doorbell) +
504 (qpair->id << (ctrlr->dstrd + 1)) + (1 << ctrlr->dstrd);
505
506 if (!pcie_msi_vector_connect(nvme_ctrlr_cfg->pcie->bdf,
507 &ctrlr->vectors[qpair->vector],
508 nvme_cmd_qpair_msi_handler, qpair, 0)) {
509 LOG_ERR("Failed to connect MSI-X vector %u", qpair->id);
510 return -EIO;
511 }
512
513 LOG_DBG("CMD Qpair created ID %u, %u entries - cmd/cpl addr "
514 "0x%lx/0x%lx - sq/cq offsets %u/%u",
515 qpair->id, qpair->num_entries, qpair->cmd_bus_addr,
516 qpair->cpl_bus_addr, qpair->sq_tdbl_off, qpair->cq_hdbl_off);
517
518 return 0;
519 }
520
nvme_cmd_qpair_reset(struct nvme_cmd_qpair * qpair)521 void nvme_cmd_qpair_reset(struct nvme_cmd_qpair *qpair)
522 {
523 qpair->sq_head = qpair->sq_tail = qpair->cq_head = 0;
524
525 /*
526 * First time through the completion queue, HW will set phase
527 * bit on completions to 1. So set this to 1 here, indicating
528 * we're looking for a 1 to know which entries have completed.
529 * we'll toggle the bit each time when the completion queue
530 * rolls over.
531 */
532 qpair->phase = 1;
533
534 memset(qpair->cmd, 0,
535 qpair->num_entries * sizeof(struct nvme_command));
536 memset(qpair->cpl, 0,
537 qpair->num_entries * sizeof(struct nvme_completion));
538 }
539
nvme_cmd_qpair_fill_prp_list(struct nvme_cmd_qpair * qpair,struct nvme_request * request,int n_prp)540 static int nvme_cmd_qpair_fill_prp_list(struct nvme_cmd_qpair *qpair,
541 struct nvme_request *request,
542 int n_prp)
543 {
544 struct nvme_prp_list *prp_list;
545 uintptr_t p_addr;
546 int idx;
547
548 prp_list = nvme_prp_list_alloc();
549 if (prp_list == NULL) {
550 return -ENOMEM;
551 }
552
553 p_addr = (uintptr_t)request->payload;
554 request->cmd.dptr.prp1 =
555 (uint64_t)sys_cpu_to_le64(p_addr);
556 request->cmd.dptr.prp2 =
557 (uint64_t)sys_cpu_to_le64(&prp_list->prp);
558 p_addr = NVME_PRP_NEXT_PAGE(p_addr);
559
560 for (idx = 0; idx < n_prp; idx++) {
561 prp_list->prp[idx] = (uint64_t)sys_cpu_to_le64(p_addr);
562 p_addr = NVME_PRP_NEXT_PAGE(p_addr);
563 }
564
565 request->prp_list = prp_list;
566
567 return 0;
568 }
569
compute_n_prp(uintptr_t addr,uint32_t size)570 static int compute_n_prp(uintptr_t addr, uint32_t size)
571 {
572 int n_prp;
573
574 /* See Common Command Format, Data Pointer (DPTR) field */
575
576 n_prp = size / CONFIG_MMU_PAGE_SIZE;
577 if (n_prp == 0) {
578 n_prp = 1;
579 }
580
581 if (size != CONFIG_MMU_PAGE_SIZE) {
582 size = size % CONFIG_MMU_PAGE_SIZE;
583 }
584
585 if (n_prp == 1) {
586 if ((addr + (uintptr_t)size) > NVME_PRP_NEXT_PAGE(addr)) {
587 n_prp++;
588 }
589 } else if (size > 0) {
590 n_prp++;
591 }
592
593 return n_prp;
594 }
595
nvme_cmd_qpair_fill_dptr(struct nvme_cmd_qpair * qpair,struct nvme_request * request)596 static int nvme_cmd_qpair_fill_dptr(struct nvme_cmd_qpair *qpair,
597 struct nvme_request *request)
598 {
599 switch (request->type) {
600 case NVME_REQUEST_NULL:
601 break;
602 case NVME_REQUEST_VADDR:
603 int n_prp;
604
605 if (request->payload_size > qpair->ctrlr->max_xfer_size) {
606 LOG_ERR("VADDR request's payload too big");
607 return -EINVAL;
608 }
609
610 n_prp = compute_n_prp((uintptr_t)request->payload,
611 request->payload_size);
612 if (n_prp <= 2) {
613 request->cmd.dptr.prp1 =
614 (uint64_t)sys_cpu_to_le64(request->payload);
615 if (n_prp == 2) {
616 request->cmd.dptr.prp2 = (uint64_t)sys_cpu_to_le64(
617 NVME_PRP_NEXT_PAGE(
618 (uintptr_t)request->payload));
619 } else {
620 request->cmd.dptr.prp2 = 0;
621 }
622
623 break;
624 }
625
626 return nvme_cmd_qpair_fill_prp_list(qpair, request, n_prp);
627 default:
628 break;
629 }
630
631 return 0;
632 }
633
nvme_cmd_qpair_submit_request(struct nvme_cmd_qpair * qpair,struct nvme_request * request)634 int nvme_cmd_qpair_submit_request(struct nvme_cmd_qpair *qpair,
635 struct nvme_request *request)
636 {
637 mm_reg_t regs = DEVICE_MMIO_GET(qpair->ctrlr->dev);
638 int ret;
639
640 request->qpair = qpair;
641
642 request->cmd.cdw0.cid = sys_cpu_to_le16((uint16_t)(request -
643 request_pool));
644
645 ret = nvme_cmd_qpair_fill_dptr(qpair, request);
646 if (ret != 0) {
647 nvme_cmd_request_free(request);
648 return ret;
649 }
650
651 nvme_cmd_register_request(request);
652
653 memcpy(&qpair->cmd[qpair->sq_tail],
654 &request->cmd, sizeof(request->cmd));
655
656 qpair->sq_tail++;
657 if (qpair->sq_tail == qpair->num_entries) {
658 qpair->sq_tail = 0;
659 }
660
661 sys_write32(qpair->sq_tail, regs + qpair->sq_tdbl_off);
662 qpair->num_cmds++;
663
664 LOG_DBG("Request %p %llu submitted: CID %u - sq_tail %u",
665 request, qpair->num_cmds, request->cmd.cdw0.cid,
666 qpair->sq_tail - 1);
667 return 0;
668 }
669
670 void
nvme_completion_poll_cb(void * arg,const struct nvme_completion * cpl)671 nvme_completion_poll_cb(void *arg, const struct nvme_completion *cpl)
672 {
673 struct nvme_completion_poll_status *status = arg;
674
675 if (cpl != NULL) {
676 memcpy(&status->cpl, cpl, sizeof(*cpl));
677 } else {
678 status->status = -ETIMEDOUT;
679 }
680
681 k_sem_give(&status->sem);
682 }
683