1 /*
2  * Copyright (c) 2022 Intel Corporation
3  * SPDX-License-Identifier: Apache-2.0
4  *
5  * Derived from FreeBSD original driver made by Jim Harris
6  * with contributions from Alexander Motin and Wojciech Macek
7  */
8 
9 #ifndef ZEPHYR_DRIVERS_DISK_NVME_NHME_H_
10 #define ZEPHYR_DRIVERS_DISK_NVME_NVME_H_
11 
12 #include "nvme_helpers.h"
13 #include "nvme_cmd.h"
14 #include "nvme_namespace.h"
15 
16 struct nvme_registers {
17 	uint32_t	cap_lo; /* controller capabilities */
18 	uint32_t	cap_hi;
19 	uint32_t	vs;	/* version */
20 	uint32_t	intms;	/* interrupt mask set */
21 	uint32_t	intmc;	/* interrupt mask clear */
22 	uint32_t	cc;	/* controller configuration */
23 	uint32_t	reserved1;
24 	uint32_t	csts;	/* controller status */
25 	uint32_t	nssr;	/* NVM Subsystem Reset */
26 	uint32_t	aqa;	/* admin queue attributes */
27 	uint64_t	asq;	/* admin submission queue base addr */
28 	uint64_t	acq;	/* admin completion queue base addr */
29 	uint32_t	cmbloc;	/* Controller Memory Buffer Location */
30 	uint32_t	cmbsz;	/* Controller Memory Buffer Size */
31 	uint32_t	bpinfo;	/* Boot Partition Information */
32 	uint32_t	bprsel;	/* Boot Partition Read Select */
33 	uint64_t	bpmbl;	/* Boot Partition Memory Buffer Location */
34 	uint64_t	cmbmsc;	/* Controller Memory Buffer Memory Space Control */
35 	uint32_t	cmbsts;	/* Controller Memory Buffer Status */
36 	uint8_t		reserved3[3492]; /* 5Ch - DFFh */
37 	uint32_t	pmrcap;	/* Persistent Memory Capabilities */
38 	uint32_t	pmrctl;	/* Persistent Memory Region Control */
39 	uint32_t	pmrsts;	/* Persistent Memory Region Status */
40 	uint32_t	pmrebs;	/* Persistent Memory Region Elasticity Buffer Size */
41 	uint32_t	pmrswtp; /* Persistent Memory Region Sustained Write Throughput */
42 	uint32_t	pmrmsc_lo; /* Persistent Memory Region Controller Memory Space Control */
43 	uint32_t	pmrmsc_hi;
44 	uint8_t		reserved4[484]; /* E1Ch - FFFh */
45 	struct {
46 		uint32_t	sq_tdbl; /* submission queue tail doorbell */
47 		uint32_t	cq_hdbl; /* completion queue head doorbell */
48 	} doorbell[1];
49 };
50 
51 struct nvme_power_state {
52 	/** Maximum Power */
53 	uint16_t	mp;
54 	uint8_t		ps_rsvd1;
55 
56 	/** Max Power Scale, Non-Operational State */
57 	uint8_t		mps_nops;
58 
59 	/** Entry Latency */
60 	uint32_t	enlat;
61 
62 	/** Exit Latency */
63 	uint32_t	exlat;
64 
65 	/** Relative Read Throughput */
66 	uint8_t		rrt;
67 
68 	/** Relative Read Latency */
69 	uint8_t		rrl;
70 
71 	/** Relative Write Throughput */
72 	uint8_t		rwt;
73 
74 	/** Relative Write Latency */
75 	uint8_t		rwl;
76 
77 	/** Idle Power */
78 	uint16_t	idlp;
79 
80 	/** Idle Power Scale */
81 	uint8_t		ips;
82 	uint8_t		ps_rsvd8;
83 
84 	/** Active Power */
85 	uint16_t	actp;
86 
87 	/** Active Power Workload, Active Power Scale */
88 	uint8_t		apw_aps;
89 
90 	uint8_t		ps_rsvd10[9];
91 } __packed;
92 
93 #define NVME_SERIAL_NUMBER_LENGTH	20
94 #define NVME_MODEL_NUMBER_LENGTH	40
95 #define NVME_FIRMWARE_REVISION_LENGTH	8
96 
97 struct nvme_controller_data {
98 	/* bytes 0-255: controller capabilities and features */
99 
100 	/** pci vendor id */
101 	uint16_t		vid;
102 
103 	/** pci subsystem vendor id */
104 	uint16_t		ssvid;
105 
106 	/** serial number */
107 	uint8_t			sn[NVME_SERIAL_NUMBER_LENGTH];
108 
109 	/** model number */
110 	uint8_t			mn[NVME_MODEL_NUMBER_LENGTH];
111 
112 	/** firmware revision */
113 	uint8_t			fr[NVME_FIRMWARE_REVISION_LENGTH];
114 
115 	/** recommended arbitration burst */
116 	uint8_t			rab;
117 
118 	/** ieee oui identifier */
119 	uint8_t			ieee[3];
120 
121 	/** multi-interface capabilities */
122 	uint8_t			mic;
123 
124 	/** maximum data transfer size */
125 	uint8_t			mdts;
126 
127 	/** Controller ID */
128 	uint16_t		ctrlr_id;
129 
130 	/** Version */
131 	uint32_t		ver;
132 
133 	/** RTD3 Resume Latency */
134 	uint32_t		rtd3r;
135 
136 	/** RTD3 Enter Latency */
137 	uint32_t		rtd3e;
138 
139 	/** Optional Asynchronous Events Supported */
140 	uint32_t		oaes;	/* bitfield really */
141 
142 	/** Controller Attributes */
143 	uint32_t		ctratt;	/* bitfield really */
144 
145 	/** Read Recovery Levels Supported */
146 	uint16_t		rrls;
147 
148 	uint8_t			reserved1[9];
149 
150 	/** Controller Type */
151 	uint8_t			cntrltype;
152 
153 	/** FRU Globally Unique Identifier */
154 	uint8_t			fguid[16];
155 
156 	/** Command Retry Delay Time 1 */
157 	uint16_t		crdt1;
158 
159 	/** Command Retry Delay Time 2 */
160 	uint16_t		crdt2;
161 
162 	/** Command Retry Delay Time 3 */
163 	uint16_t		crdt3;
164 
165 	uint8_t			reserved2[122];
166 
167 	/* bytes 256-511: admin command set attributes */
168 
169 	/** optional admin command support */
170 	uint16_t		oacs;
171 
172 	/** abort command limit */
173 	uint8_t			acl;
174 
175 	/** asynchronous event request limit */
176 	uint8_t			aerl;
177 
178 	/** firmware updates */
179 	uint8_t			frmw;
180 
181 	/** log page attributes */
182 	uint8_t			lpa;
183 
184 	/** error log page entries */
185 	uint8_t			elpe;
186 
187 	/** number of power states supported */
188 	uint8_t			npss;
189 
190 	/** admin vendor specific command configuration */
191 	uint8_t			avscc;
192 
193 	/** Autonomous Power State Transition Attributes */
194 	uint8_t			apsta;
195 
196 	/** Warning Composite Temperature Threshold */
197 	uint16_t		wctemp;
198 
199 	/** Critical Composite Temperature Threshold */
200 	uint16_t		cctemp;
201 
202 	/** Maximum Time for Firmware Activation */
203 	uint16_t		mtfa;
204 
205 	/** Host Memory Buffer Preferred Size */
206 	uint32_t		hmpre;
207 
208 	/** Host Memory Buffer Minimum Size */
209 	uint32_t		hmmin;
210 
211 	/** Name space capabilities  */
212 	struct {
213 		/* if nsmgmt, report tnvmcap and unvmcap */
214 		uint8_t    tnvmcap[16];
215 		uint8_t    unvmcap[16];
216 	} __packed untncap;
217 
218 	/** Replay Protected Memory Block Support */
219 	uint32_t		rpmbs; /* Really a bitfield */
220 
221 	/** Extended Device Self-test Time */
222 	uint16_t		edstt;
223 
224 	/** Device Self-test Options */
225 	uint8_t			dsto; /* Really a bitfield */
226 
227 	/** Firmware Update Granularity */
228 	uint8_t			fwug;
229 
230 	/** Keep Alive Support */
231 	uint16_t		kas;
232 
233 	/** Host Controlled Thermal Management Attributes */
234 	uint16_t		hctma; /* Really a bitfield */
235 
236 	/** Minimum Thermal Management Temperature */
237 	uint16_t		mntmt;
238 
239 	/** Maximum Thermal Management Temperature */
240 	uint16_t		mxtmt;
241 
242 	/** Sanitize Capabilities */
243 	uint32_t		sanicap; /* Really a bitfield */
244 
245 	/** Host Memory Buffer Minimum Descriptor Entry Size */
246 	uint32_t		hmminds;
247 
248 	/** Host Memory Maximum Descriptors Entries */
249 	uint16_t		hmmaxd;
250 
251 	/** NVM Set Identifier Maximum */
252 	uint16_t		nsetidmax;
253 
254 	/** Endurance Group Identifier Maximum */
255 	uint16_t		endgidmax;
256 
257 	/** ANA Transition Time */
258 	uint8_t			anatt;
259 
260 	/** Asymmetric Namespace Access Capabilities */
261 	uint8_t			anacap;
262 
263 	/** ANA Group Identifier Maximum */
264 	uint32_t		anagrpmax;
265 
266 	/** Number of ANA Group Identifiers */
267 	uint32_t		nanagrpid;
268 
269 	/** Persistent Event Log Size */
270 	uint32_t		pels;
271 
272 	uint8_t			reserved3[156];
273 	/* bytes 512-703: nvm command set attributes */
274 
275 	/** submission queue entry size */
276 	uint8_t			sqes;
277 
278 	/** completion queue entry size */
279 	uint8_t			cqes;
280 
281 	/** Maximum Outstanding Commands */
282 	uint16_t		maxcmd;
283 
284 	/** number of namespaces */
285 	uint32_t		nn;
286 
287 	/** optional nvm command support */
288 	uint16_t		oncs;
289 
290 	/** fused operation support */
291 	uint16_t		fuses;
292 
293 	/** format nvm attributes */
294 	uint8_t			fna;
295 
296 	/** volatile write cache */
297 	uint8_t			vwc;
298 
299 	/** Atomic Write Unit Normal */
300 	uint16_t		awun;
301 
302 	/** Atomic Write Unit Power Fail */
303 	uint16_t		awupf;
304 
305 	/** NVM Vendor Specific Command Configuration */
306 	uint8_t			nvscc;
307 
308 	/** Namespace Write Protection Capabilities */
309 	uint8_t			nwpc;
310 
311 	/** Atomic Compare & Write Unit */
312 	uint16_t		acwu;
313 	uint16_t		reserved6;
314 
315 	/** SGL Support */
316 	uint32_t		sgls;
317 
318 	/** Maximum Number of Allowed Namespaces */
319 	uint32_t		mnan;
320 
321 	/* bytes 540-767: Reserved */
322 	uint8_t			reserved7[224];
323 
324 	/** NVM Subsystem NVMe Qualified Name */
325 	uint8_t			subnqn[256];
326 
327 	/* bytes 1024-1791: Reserved */
328 	uint8_t			reserved8[768];
329 
330 	/* bytes 1792-2047: NVMe over Fabrics specification */
331 	uint8_t			reserved9[256];
332 
333 	/* bytes 2048-3071: power state descriptors */
334 	struct nvme_power_state power_state[32];
335 
336 	/* bytes 3072-4095: vendor specific */
337 	uint8_t			vs[1024];
338 } __packed __aligned(4);
339 
340 static inline
nvme_controller_data_swapbytes(struct nvme_controller_data * s)341 void nvme_controller_data_swapbytes(struct nvme_controller_data *s)
342 {
343 #if _BYTE_ORDER != _LITTLE_ENDIAN
344 	s->vid = sys_le16_to_cpu(s->vid);
345 	s->ssvid = sys_le16_to_cpu(s->ssvid);
346 	s->ctrlr_id = sys_le16_to_cpu(s->ctrlr_id);
347 	s->ver = sys_le32_to_cpu(s->ver);
348 	s->rtd3r = sys_le32_to_cpu(s->rtd3r);
349 	s->rtd3e = sys_le32_to_cpu(s->rtd3e);
350 	s->oaes = sys_le32_to_cpu(s->oaes);
351 	s->ctratt = sys_le32_to_cpu(s->ctratt);
352 	s->rrls = sys_le16_to_cpu(s->rrls);
353 	s->crdt1 = sys_le16_to_cpu(s->crdt1);
354 	s->crdt2 = sys_le16_to_cpu(s->crdt2);
355 	s->crdt3 = sys_le16_to_cpu(s->crdt3);
356 	s->oacs = sys_le16_to_cpu(s->oacs);
357 	s->wctemp = sys_le16_to_cpu(s->wctemp);
358 	s->cctemp = sys_le16_to_cpu(s->cctemp);
359 	s->mtfa = sys_le16_to_cpu(s->mtfa);
360 	s->hmpre = sys_le32_to_cpu(s->hmpre);
361 	s->hmmin = sys_le32_to_cpu(s->hmmin);
362 	s->rpmbs = sys_le32_to_cpu(s->rpmbs);
363 	s->edstt = sys_le16_to_cpu(s->edstt);
364 	s->kas = sys_le16_to_cpu(s->kas);
365 	s->hctma = sys_le16_to_cpu(s->hctma);
366 	s->mntmt = sys_le16_to_cpu(s->mntmt);
367 	s->mxtmt = sys_le16_to_cpu(s->mxtmt);
368 	s->sanicap = sys_le32_to_cpu(s->sanicap);
369 	s->hmminds = sys_le32_to_cpu(s->hmminds);
370 	s->hmmaxd = sys_le16_to_cpu(s->hmmaxd);
371 	s->nsetidmax = sys_le16_to_cpu(s->nsetidmax);
372 	s->endgidmax = sys_le16_to_cpu(s->endgidmax);
373 	s->anagrpmax = sys_le32_to_cpu(s->anagrpmax);
374 	s->nanagrpid = sys_le32_to_cpu(s->nanagrpid);
375 	s->pels = sys_le32_to_cpu(s->pels);
376 	s->maxcmd = sys_le16_to_cpu(s->maxcmd);
377 	s->nn = sys_le32_to_cpu(s->nn);
378 	s->oncs = sys_le16_to_cpu(s->oncs);
379 	s->fuses = sys_le16_to_cpu(s->fuses);
380 	s->awun = sys_le16_to_cpu(s->awun);
381 	s->awupf = sys_le16_to_cpu(s->awupf);
382 	s->acwu = sys_le16_to_cpu(s->acwu);
383 	s->sgls = sys_le32_to_cpu(s->sgls);
384 	s->mnan = sys_le32_to_cpu(s->mnan);
385 #else
386 	ARG_UNUSED(s);
387 #endif
388 }
389 
390 #include <zephyr/device.h>
391 #include <zephyr/drivers/pcie/pcie.h>
392 #include <zephyr/drivers/pcie/msi.h>
393 
394 #define NVME_PCIE_BAR_IDX 0
395 
396 #define NVME_REQUEST_AMOUNT (CONFIG_NVME_ADMIN_ENTRIES +	\
397 			     CONFIG_NVME_IO_ENTRIES)
398 
399 /* admin queue + io queue(s) */
400 #define NVME_PCIE_MSIX_VECTORS 1 + CONFIG_NVME_IO_QUEUES
401 
402 #define NVME_QUEUE_ALLOCATE(name, n_entries)				\
403 	static struct nvme_command cmd_##name[n_entries] __aligned(0x1000); \
404 	static struct nvme_completion cpl_##name[n_entries] __aligned(0x1000); \
405 									\
406 	static struct nvme_cmd_qpair name = {				\
407 		.num_entries = n_entries,				\
408 		.cmd = cmd_##name,					\
409 		.cpl = cpl_##name,					\
410 	}
411 
412 #define NVME_ADMINQ_ALLOCATE(n, n_entries)		\
413 	NVME_QUEUE_ALLOCATE(admin_##n, n_entries)
414 #define NVME_IOQ_ALLOCATE(n, n_entries)			\
415 	NVME_QUEUE_ALLOCATE(io_##n, n_entries)
416 
417 struct nvme_controller_config {
418 	struct pcie_dev *pcie;
419 };
420 
421 struct nvme_controller {
422 	DEVICE_MMIO_RAM;
423 
424 	const struct device *dev;
425 
426 	struct k_mutex lock;
427 
428 	uint32_t id;
429 
430 	msi_vector_t vectors[NVME_PCIE_MSIX_VECTORS];
431 
432 	struct nvme_controller_data cdata;
433 
434 	uint32_t num_io_queues;
435 	struct nvme_cmd_qpair *adminq;
436 	struct nvme_cmd_qpair *ioq;
437 
438 	uint32_t ready_timeout_in_ms;
439 
440 	/** LO and HI capacity mask */
441 	uint32_t cap_lo;
442 	uint32_t cap_hi;
443 
444 	/** Page size and log2(page_size) - 12 that we're currently using */
445 	uint32_t page_size;
446 	uint32_t mps;
447 
448 	/** doorbell stride */
449 	uint32_t dstrd;
450 
451 	/** maximum i/o size in bytes */
452 	uint32_t max_xfer_size;
453 
454 	struct nvme_namespace ns[CONFIG_NVME_MAX_NAMESPACES];
455 };
456 
457 static inline
nvme_controller_has_dataset_mgmt(struct nvme_controller * ctrlr)458 bool nvme_controller_has_dataset_mgmt(struct nvme_controller *ctrlr)
459 {
460 	/* Assumes cd was byte swapped by nvme_controller_data_swapbytes() */
461 	return ((ctrlr->cdata.oncs >> NVME_CTRLR_DATA_ONCS_DSM_SHIFT) &
462 		NVME_CTRLR_DATA_ONCS_DSM_MASK);
463 }
464 
nvme_lock(const struct device * dev)465 static inline void nvme_lock(const struct device *dev)
466 {
467 	struct nvme_controller *nvme_ctrlr = dev->data;
468 
469 	k_mutex_lock(&nvme_ctrlr->lock, K_FOREVER);
470 }
471 
nvme_unlock(const struct device * dev)472 static inline void nvme_unlock(const struct device *dev)
473 {
474 	struct nvme_controller *nvme_ctrlr = dev->data;
475 
476 	k_mutex_unlock(&nvme_ctrlr->lock);
477 }
478 
479 #endif /* ZEPHYR_DRIVERS_DISK_NVME_NHME_H_ */
480