1 /*
2  * Copyright (c) 2022 Intel Corporation
3  *
4  * SPDX-License-Identifier: Apache-2.0
5  */
6 
7 #ifndef ZEPHYR_DRIVERS_DISK_NVME_NHME_H_
8 #define ZEPHYR_DRIVERS_DISK_NVME_NVME_H_
9 
10 #include "nvme_helpers.h"
11 #include "nvme_cmd.h"
12 #include "nvme_namespace.h"
13 
14 struct nvme_registers {
15 	uint32_t	cap_lo; /* controller capabilities */
16 	uint32_t	cap_hi;
17 	uint32_t	vs;	/* version */
18 	uint32_t	intms;	/* interrupt mask set */
19 	uint32_t	intmc;	/* interrupt mask clear */
20 	uint32_t	cc;	/* controller configuration */
21 	uint32_t	reserved1;
22 	uint32_t	csts;	/* controller status */
23 	uint32_t	nssr;	/* NVM Subsystem Reset */
24 	uint32_t	aqa;	/* admin queue attributes */
25 	uint64_t	asq;	/* admin submission queue base addr */
26 	uint64_t	acq;	/* admin completion queue base addr */
27 	uint32_t	cmbloc;	/* Controller Memory Buffer Location */
28 	uint32_t	cmbsz;	/* Controller Memory Buffer Size */
29 	uint32_t	bpinfo;	/* Boot Partition Information */
30 	uint32_t	bprsel;	/* Boot Partition Read Select */
31 	uint64_t	bpmbl;	/* Boot Partition Memory Buffer Location */
32 	uint64_t	cmbmsc;	/* Controller Memory Buffer Memory Space Control */
33 	uint32_t	cmbsts;	/* Controller Memory Buffer Status */
34 	uint8_t		reserved3[3492]; /* 5Ch - DFFh */
35 	uint32_t	pmrcap;	/* Persistent Memory Capabilities */
36 	uint32_t	pmrctl;	/* Persistent Memory Region Control */
37 	uint32_t	pmrsts;	/* Persistent Memory Region Status */
38 	uint32_t	pmrebs;	/* Persistent Memory Region Elasticity Buffer Size */
39 	uint32_t	pmrswtp; /* Persistent Memory Region Sustained Write Throughput */
40 	uint32_t	pmrmsc_lo; /* Persistent Memory Region Controller Memory Space Control */
41 	uint32_t	pmrmsc_hi;
42 	uint8_t		reserved4[484]; /* E1Ch - FFFh */
43 	struct {
44 		uint32_t	sq_tdbl; /* submission queue tail doorbell */
45 		uint32_t	cq_hdbl; /* completion queue head doorbell */
46 	} doorbell[1];
47 };
48 
49 struct nvme_power_state {
50 	/** Maximum Power */
51 	uint16_t	mp;
52 	uint8_t		ps_rsvd1;
53 
54 	/** Max Power Scale, Non-Operational State */
55 	uint8_t		mps_nops;
56 
57 	/** Entry Latency */
58 	uint32_t	enlat;
59 
60 	/** Exit Latency */
61 	uint32_t	exlat;
62 
63 	/** Relative Read Throughput */
64 	uint8_t		rrt;
65 
66 	/** Relative Read Latency */
67 	uint8_t		rrl;
68 
69 	/** Relative Write Throughput */
70 	uint8_t		rwt;
71 
72 	/** Relative Write Latency */
73 	uint8_t		rwl;
74 
75 	/** Idle Power */
76 	uint16_t	idlp;
77 
78 	/** Idle Power Scale */
79 	uint8_t		ips;
80 	uint8_t		ps_rsvd8;
81 
82 	/** Active Power */
83 	uint16_t	actp;
84 
85 	/** Active Power Workload, Active Power Scale */
86 	uint8_t		apw_aps;
87 
88 	uint8_t		ps_rsvd10[9];
89 } __packed;
90 
91 #define NVME_SERIAL_NUMBER_LENGTH	20
92 #define NVME_MODEL_NUMBER_LENGTH	40
93 #define NVME_FIRMWARE_REVISION_LENGTH	8
94 
95 struct nvme_controller_data {
96 	/* bytes 0-255: controller capabilities and features */
97 
98 	/** pci vendor id */
99 	uint16_t		vid;
100 
101 	/** pci subsystem vendor id */
102 	uint16_t		ssvid;
103 
104 	/** serial number */
105 	uint8_t			sn[NVME_SERIAL_NUMBER_LENGTH];
106 
107 	/** model number */
108 	uint8_t			mn[NVME_MODEL_NUMBER_LENGTH];
109 
110 	/** firmware revision */
111 	uint8_t			fr[NVME_FIRMWARE_REVISION_LENGTH];
112 
113 	/** recommended arbitration burst */
114 	uint8_t			rab;
115 
116 	/** ieee oui identifier */
117 	uint8_t			ieee[3];
118 
119 	/** multi-interface capabilities */
120 	uint8_t			mic;
121 
122 	/** maximum data transfer size */
123 	uint8_t			mdts;
124 
125 	/** Controller ID */
126 	uint16_t		ctrlr_id;
127 
128 	/** Version */
129 	uint32_t		ver;
130 
131 	/** RTD3 Resume Latency */
132 	uint32_t		rtd3r;
133 
134 	/** RTD3 Enter Latency */
135 	uint32_t		rtd3e;
136 
137 	/** Optional Asynchronous Events Supported */
138 	uint32_t		oaes;	/* bitfield really */
139 
140 	/** Controller Attributes */
141 	uint32_t		ctratt;	/* bitfield really */
142 
143 	/** Read Recovery Levels Supported */
144 	uint16_t		rrls;
145 
146 	uint8_t			reserved1[9];
147 
148 	/** Controller Type */
149 	uint8_t			cntrltype;
150 
151 	/** FRU Globally Unique Identifier */
152 	uint8_t			fguid[16];
153 
154 	/** Command Retry Delay Time 1 */
155 	uint16_t		crdt1;
156 
157 	/** Command Retry Delay Time 2 */
158 	uint16_t		crdt2;
159 
160 	/** Command Retry Delay Time 3 */
161 	uint16_t		crdt3;
162 
163 	uint8_t			reserved2[122];
164 
165 	/* bytes 256-511: admin command set attributes */
166 
167 	/** optional admin command support */
168 	uint16_t		oacs;
169 
170 	/** abort command limit */
171 	uint8_t			acl;
172 
173 	/** asynchronous event request limit */
174 	uint8_t			aerl;
175 
176 	/** firmware updates */
177 	uint8_t			frmw;
178 
179 	/** log page attributes */
180 	uint8_t			lpa;
181 
182 	/** error log page entries */
183 	uint8_t			elpe;
184 
185 	/** number of power states supported */
186 	uint8_t			npss;
187 
188 	/** admin vendor specific command configuration */
189 	uint8_t			avscc;
190 
191 	/** Autonomous Power State Transition Attributes */
192 	uint8_t			apsta;
193 
194 	/** Warning Composite Temperature Threshold */
195 	uint16_t		wctemp;
196 
197 	/** Critical Composite Temperature Threshold */
198 	uint16_t		cctemp;
199 
200 	/** Maximum Time for Firmware Activation */
201 	uint16_t		mtfa;
202 
203 	/** Host Memory Buffer Preferred Size */
204 	uint32_t		hmpre;
205 
206 	/** Host Memory Buffer Minimum Size */
207 	uint32_t		hmmin;
208 
209 	/** Name space capabilities  */
210 	struct {
211 		/* if nsmgmt, report tnvmcap and unvmcap */
212 		uint8_t    tnvmcap[16];
213 		uint8_t    unvmcap[16];
214 	} __packed untncap;
215 
216 	/** Replay Protected Memory Block Support */
217 	uint32_t		rpmbs; /* Really a bitfield */
218 
219 	/** Extended Device Self-test Time */
220 	uint16_t		edstt;
221 
222 	/** Device Self-test Options */
223 	uint8_t			dsto; /* Really a bitfield */
224 
225 	/** Firmware Update Granularity */
226 	uint8_t			fwug;
227 
228 	/** Keep Alive Support */
229 	uint16_t		kas;
230 
231 	/** Host Controlled Thermal Management Attributes */
232 	uint16_t		hctma; /* Really a bitfield */
233 
234 	/** Minimum Thermal Management Temperature */
235 	uint16_t		mntmt;
236 
237 	/** Maximum Thermal Management Temperature */
238 	uint16_t		mxtmt;
239 
240 	/** Sanitize Capabilities */
241 	uint32_t		sanicap; /* Really a bitfield */
242 
243 	/** Host Memory Buffer Minimum Descriptor Entry Size */
244 	uint32_t		hmminds;
245 
246 	/** Host Memory Maximum Descriptors Entries */
247 	uint16_t		hmmaxd;
248 
249 	/** NVM Set Identifier Maximum */
250 	uint16_t		nsetidmax;
251 
252 	/** Endurance Group Identifier Maximum */
253 	uint16_t		endgidmax;
254 
255 	/** ANA Transition Time */
256 	uint8_t			anatt;
257 
258 	/** Asymmetric Namespace Access Capabilities */
259 	uint8_t			anacap;
260 
261 	/** ANA Group Identifier Maximum */
262 	uint32_t		anagrpmax;
263 
264 	/** Number of ANA Group Identifiers */
265 	uint32_t		nanagrpid;
266 
267 	/** Persistent Event Log Size */
268 	uint32_t		pels;
269 
270 	uint8_t			reserved3[156];
271 	/* bytes 512-703: nvm command set attributes */
272 
273 	/** submission queue entry size */
274 	uint8_t			sqes;
275 
276 	/** completion queue entry size */
277 	uint8_t			cqes;
278 
279 	/** Maximum Outstanding Commands */
280 	uint16_t		maxcmd;
281 
282 	/** number of namespaces */
283 	uint32_t		nn;
284 
285 	/** optional nvm command support */
286 	uint16_t		oncs;
287 
288 	/** fused operation support */
289 	uint16_t		fuses;
290 
291 	/** format nvm attributes */
292 	uint8_t			fna;
293 
294 	/** volatile write cache */
295 	uint8_t			vwc;
296 
297 	/** Atomic Write Unit Normal */
298 	uint16_t		awun;
299 
300 	/** Atomic Write Unit Power Fail */
301 	uint16_t		awupf;
302 
303 	/** NVM Vendor Specific Command Configuration */
304 	uint8_t			nvscc;
305 
306 	/** Namespace Write Protection Capabilities */
307 	uint8_t			nwpc;
308 
309 	/** Atomic Compare & Write Unit */
310 	uint16_t		acwu;
311 	uint16_t		reserved6;
312 
313 	/** SGL Support */
314 	uint32_t		sgls;
315 
316 	/** Maximum Number of Allowed Namespaces */
317 	uint32_t		mnan;
318 
319 	/* bytes 540-767: Reserved */
320 	uint8_t			reserved7[224];
321 
322 	/** NVM Subsystem NVMe Qualified Name */
323 	uint8_t			subnqn[256];
324 
325 	/* bytes 1024-1791: Reserved */
326 	uint8_t			reserved8[768];
327 
328 	/* bytes 1792-2047: NVMe over Fabrics specification */
329 	uint8_t			reserved9[256];
330 
331 	/* bytes 2048-3071: power state descriptors */
332 	struct nvme_power_state power_state[32];
333 
334 	/* bytes 3072-4095: vendor specific */
335 	uint8_t			vs[1024];
336 } __packed __aligned(4);
337 
338 static inline
nvme_controller_data_swapbytes(struct nvme_controller_data * s)339 void nvme_controller_data_swapbytes(struct nvme_controller_data *s)
340 {
341 #if _BYTE_ORDER != _LITTLE_ENDIAN
342 	s->vid = sys_le16_to_cpu(s->vid);
343 	s->ssvid = sys_le16_to_cpu(s->ssvid);
344 	s->ctrlr_id = sys_le16_to_cpu(s->ctrlr_id);
345 	s->ver = sys_le32_to_cpu(s->ver);
346 	s->rtd3r = sys_le32_to_cpu(s->rtd3r);
347 	s->rtd3e = sys_le32_to_cpu(s->rtd3e);
348 	s->oaes = sys_le32_to_cpu(s->oaes);
349 	s->ctratt = sys_le32_to_cpu(s->ctratt);
350 	s->rrls = sys_le16_to_cpu(s->rrls);
351 	s->crdt1 = sys_le16_to_cpu(s->crdt1);
352 	s->crdt2 = sys_le16_to_cpu(s->crdt2);
353 	s->crdt3 = sys_le16_to_cpu(s->crdt3);
354 	s->oacs = sys_le16_to_cpu(s->oacs);
355 	s->wctemp = sys_le16_to_cpu(s->wctemp);
356 	s->cctemp = sys_le16_to_cpu(s->cctemp);
357 	s->mtfa = sys_le16_to_cpu(s->mtfa);
358 	s->hmpre = sys_le32_to_cpu(s->hmpre);
359 	s->hmmin = sys_le32_to_cpu(s->hmmin);
360 	s->rpmbs = sys_le32_to_cpu(s->rpmbs);
361 	s->edstt = sys_le16_to_cpu(s->edstt);
362 	s->kas = sys_le16_to_cpu(s->kas);
363 	s->hctma = sys_le16_to_cpu(s->hctma);
364 	s->mntmt = sys_le16_to_cpu(s->mntmt);
365 	s->mxtmt = sys_le16_to_cpu(s->mxtmt);
366 	s->sanicap = sys_le32_to_cpu(s->sanicap);
367 	s->hmminds = sys_le32_to_cpu(s->hmminds);
368 	s->hmmaxd = sys_le16_to_cpu(s->hmmaxd);
369 	s->nsetidmax = sys_le16_to_cpu(s->nsetidmax);
370 	s->endgidmax = sys_le16_to_cpu(s->endgidmax);
371 	s->anagrpmax = sys_le32_to_cpu(s->anagrpmax);
372 	s->nanagrpid = sys_le32_to_cpu(s->nanagrpid);
373 	s->pels = sys_le32_to_cpu(s->pels);
374 	s->maxcmd = sys_le16_to_cpu(s->maxcmd);
375 	s->nn = sys_le32_to_cpu(s->nn);
376 	s->oncs = sys_le16_to_cpu(s->oncs);
377 	s->fuses = sys_le16_to_cpu(s->fuses);
378 	s->awun = sys_le16_to_cpu(s->awun);
379 	s->awupf = sys_le16_to_cpu(s->awupf);
380 	s->acwu = sys_le16_to_cpu(s->acwu);
381 	s->sgls = sys_le32_to_cpu(s->sgls);
382 	s->mnan = sys_le32_to_cpu(s->mnan);
383 #else
384 	ARG_UNUSED(s);
385 #endif
386 }
387 
388 #include <zephyr/device.h>
389 #include <zephyr/drivers/pcie/pcie.h>
390 #include <zephyr/drivers/pcie/msi.h>
391 
392 #define NVME_PCIE_BAR_IDX 0
393 
394 #define NVME_REQUEST_AMOUNT (CONFIG_NVME_ADMIN_ENTRIES +	\
395 			     CONFIG_NVME_IO_ENTRIES)
396 
397 /* admin queue + io queue(s) */
398 #define NVME_PCIE_MSIX_VECTORS 1 + CONFIG_NVME_IO_QUEUES
399 
400 #define NVME_QUEUE_ALLOCATE(name, n_entries)				\
401 	static struct nvme_command cmd_##name[n_entries] __aligned(0x1000); \
402 	static struct nvme_completion cpl_##name[n_entries] __aligned(0x1000); \
403 									\
404 	static struct nvme_cmd_qpair name = {				\
405 		.num_entries = n_entries,				\
406 		.cmd = cmd_##name,					\
407 		.cpl = cpl_##name,					\
408 	}
409 
410 #define NVME_ADMINQ_ALLOCATE(n, n_entries)		\
411 	NVME_QUEUE_ALLOCATE(admin_##n, n_entries)
412 #define NVME_IOQ_ALLOCATE(n, n_entries)			\
413 	NVME_QUEUE_ALLOCATE(io_##n, n_entries)
414 
415 struct nvme_controller_config {
416 	struct pcie_dev *pcie;
417 };
418 
419 struct nvme_controller {
420 	DEVICE_MMIO_RAM;
421 
422 	const struct device *dev;
423 
424 	struct k_mutex lock;
425 
426 	uint32_t id;
427 
428 	msi_vector_t vectors[NVME_PCIE_MSIX_VECTORS];
429 
430 	struct nvme_controller_data cdata;
431 
432 	uint32_t num_io_queues;
433 	struct nvme_cmd_qpair *adminq;
434 	struct nvme_cmd_qpair *ioq;
435 
436 	uint32_t ready_timeout_in_ms;
437 
438 	/** LO and HI capacity mask */
439 	uint32_t cap_lo;
440 	uint32_t cap_hi;
441 
442 	/** Page size and log2(page_size) - 12 that we're currently using */
443 	uint32_t page_size;
444 	uint32_t mps;
445 
446 	/** doorbell stride */
447 	uint32_t dstrd;
448 
449 	/** maximum i/o size in bytes */
450 	uint32_t max_xfer_size;
451 
452 	struct nvme_namespace ns[CONFIG_NVME_MAX_NAMESPACES];
453 };
454 
455 static inline
nvme_controller_has_dataset_mgmt(struct nvme_controller * ctrlr)456 bool nvme_controller_has_dataset_mgmt(struct nvme_controller *ctrlr)
457 {
458 	/* Assumes cd was byte swapped by nvme_controller_data_swapbytes() */
459 	return ((ctrlr->cdata.oncs >> NVME_CTRLR_DATA_ONCS_DSM_SHIFT) &
460 		NVME_CTRLR_DATA_ONCS_DSM_MASK);
461 }
462 
nvme_lock(const struct device * dev)463 static inline void nvme_lock(const struct device *dev)
464 {
465 	struct nvme_controller *nvme_ctrlr = dev->data;
466 
467 	k_mutex_lock(&nvme_ctrlr->lock, K_FOREVER);
468 }
469 
nvme_unlock(const struct device * dev)470 static inline void nvme_unlock(const struct device *dev)
471 {
472 	struct nvme_controller *nvme_ctrlr = dev->data;
473 
474 	k_mutex_unlock(&nvme_ctrlr->lock);
475 }
476 
477 #endif /* ZEPHYR_DRIVERS_DISK_NVME_NHME_H_ */
478