1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
4 */
5
6 /**
7 * DOC: Nitro Enclaves (NE) PCI device driver.
8 */
9
10 #include <linux/delay.h>
11 #include <linux/device.h>
12 #include <linux/list.h>
13 #include <linux/module.h>
14 #include <linux/mutex.h>
15 #include <linux/nitro_enclaves.h>
16 #include <linux/pci.h>
17 #include <linux/types.h>
18 #include <linux/wait.h>
19
20 #include "ne_misc_dev.h"
21 #include "ne_pci_dev.h"
22
23 /**
24 * NE_DEFAULT_TIMEOUT_MSECS - Default timeout to wait for a reply from
25 * the NE PCI device.
26 */
27 #define NE_DEFAULT_TIMEOUT_MSECS (120000) /* 120 sec */
28
29 static const struct pci_device_id ne_pci_ids[] = {
30 { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_NE) },
31 { 0, }
32 };
33
34 MODULE_DEVICE_TABLE(pci, ne_pci_ids);
35
36 /**
37 * ne_submit_request() - Submit command request to the PCI device based on the
38 * command type.
39 * @pdev: PCI device to send the command to.
40 * @cmd_type: Command type of the request sent to the PCI device.
41 * @cmd_request: Command request payload.
42 * @cmd_request_size: Size of the command request payload.
43 *
44 * Context: Process context. This function is called with the ne_pci_dev mutex held.
45 */
ne_submit_request(struct pci_dev * pdev,enum ne_pci_dev_cmd_type cmd_type,void * cmd_request,size_t cmd_request_size)46 static void ne_submit_request(struct pci_dev *pdev, enum ne_pci_dev_cmd_type cmd_type,
47 void *cmd_request, size_t cmd_request_size)
48 {
49 struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
50
51 memcpy_toio(ne_pci_dev->iomem_base + NE_SEND_DATA, cmd_request, cmd_request_size);
52
53 iowrite32(cmd_type, ne_pci_dev->iomem_base + NE_COMMAND);
54 }
55
56 /**
57 * ne_retrieve_reply() - Retrieve reply from the PCI device.
58 * @pdev: PCI device to receive the reply from.
59 * @cmd_reply: Command reply payload.
60 * @cmd_reply_size: Size of the command reply payload.
61 *
62 * Context: Process context. This function is called with the ne_pci_dev mutex held.
63 */
ne_retrieve_reply(struct pci_dev * pdev,struct ne_pci_dev_cmd_reply * cmd_reply,size_t cmd_reply_size)64 static void ne_retrieve_reply(struct pci_dev *pdev, struct ne_pci_dev_cmd_reply *cmd_reply,
65 size_t cmd_reply_size)
66 {
67 struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
68
69 memcpy_fromio(cmd_reply, ne_pci_dev->iomem_base + NE_RECV_DATA, cmd_reply_size);
70 }
71
72 /**
73 * ne_wait_for_reply() - Wait for a reply of a PCI device command.
74 * @pdev: PCI device for which a reply is waited.
75 *
76 * Context: Process context. This function is called with the ne_pci_dev mutex held.
77 * Return:
78 * * 0 on success.
79 * * Negative return value on failure.
80 */
ne_wait_for_reply(struct pci_dev * pdev)81 static int ne_wait_for_reply(struct pci_dev *pdev)
82 {
83 struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
84 int rc = -EINVAL;
85
86 /*
87 * TODO: Update to _interruptible and handle interrupted wait event
88 * e.g. -ERESTARTSYS, incoming signals + update timeout, if needed.
89 */
90 rc = wait_event_timeout(ne_pci_dev->cmd_reply_wait_q,
91 atomic_read(&ne_pci_dev->cmd_reply_avail) != 0,
92 msecs_to_jiffies(NE_DEFAULT_TIMEOUT_MSECS));
93 if (!rc)
94 return -ETIMEDOUT;
95
96 return 0;
97 }
98
ne_do_request(struct pci_dev * pdev,enum ne_pci_dev_cmd_type cmd_type,void * cmd_request,size_t cmd_request_size,struct ne_pci_dev_cmd_reply * cmd_reply,size_t cmd_reply_size)99 int ne_do_request(struct pci_dev *pdev, enum ne_pci_dev_cmd_type cmd_type,
100 void *cmd_request, size_t cmd_request_size,
101 struct ne_pci_dev_cmd_reply *cmd_reply, size_t cmd_reply_size)
102 {
103 struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
104 int rc = -EINVAL;
105
106 if (cmd_type <= INVALID_CMD || cmd_type >= MAX_CMD) {
107 dev_err_ratelimited(&pdev->dev, "Invalid cmd type=%u\n", cmd_type);
108
109 return -EINVAL;
110 }
111
112 if (!cmd_request) {
113 dev_err_ratelimited(&pdev->dev, "Null cmd request for cmd type=%u\n",
114 cmd_type);
115
116 return -EINVAL;
117 }
118
119 if (cmd_request_size > NE_SEND_DATA_SIZE) {
120 dev_err_ratelimited(&pdev->dev, "Invalid req size=%zu for cmd type=%u\n",
121 cmd_request_size, cmd_type);
122
123 return -EINVAL;
124 }
125
126 if (!cmd_reply) {
127 dev_err_ratelimited(&pdev->dev, "Null cmd reply for cmd type=%u\n",
128 cmd_type);
129
130 return -EINVAL;
131 }
132
133 if (cmd_reply_size > NE_RECV_DATA_SIZE) {
134 dev_err_ratelimited(&pdev->dev, "Invalid reply size=%zu for cmd type=%u\n",
135 cmd_reply_size, cmd_type);
136
137 return -EINVAL;
138 }
139
140 /*
141 * Use this mutex so that the PCI device handles one command request at
142 * a time.
143 */
144 mutex_lock(&ne_pci_dev->pci_dev_mutex);
145
146 atomic_set(&ne_pci_dev->cmd_reply_avail, 0);
147
148 ne_submit_request(pdev, cmd_type, cmd_request, cmd_request_size);
149
150 rc = ne_wait_for_reply(pdev);
151 if (rc < 0) {
152 dev_err_ratelimited(&pdev->dev, "Error in wait for reply for cmd type=%u [rc=%d]\n",
153 cmd_type, rc);
154
155 goto unlock_mutex;
156 }
157
158 ne_retrieve_reply(pdev, cmd_reply, cmd_reply_size);
159
160 atomic_set(&ne_pci_dev->cmd_reply_avail, 0);
161
162 if (cmd_reply->rc < 0) {
163 rc = cmd_reply->rc;
164
165 dev_err_ratelimited(&pdev->dev, "Error in cmd process logic, cmd type=%u [rc=%d]\n",
166 cmd_type, rc);
167
168 goto unlock_mutex;
169 }
170
171 rc = 0;
172
173 unlock_mutex:
174 mutex_unlock(&ne_pci_dev->pci_dev_mutex);
175
176 return rc;
177 }
178
179 /**
180 * ne_reply_handler() - Interrupt handler for retrieving a reply matching a
181 * request sent to the PCI device for enclave lifetime
182 * management.
183 * @irq: Received interrupt for a reply sent by the PCI device.
184 * @args: PCI device private data structure.
185 *
186 * Context: Interrupt context.
187 * Return:
188 * * IRQ_HANDLED on handled interrupt.
189 */
ne_reply_handler(int irq,void * args)190 static irqreturn_t ne_reply_handler(int irq, void *args)
191 {
192 struct ne_pci_dev *ne_pci_dev = (struct ne_pci_dev *)args;
193
194 atomic_set(&ne_pci_dev->cmd_reply_avail, 1);
195
196 /* TODO: Update to _interruptible. */
197 wake_up(&ne_pci_dev->cmd_reply_wait_q);
198
199 return IRQ_HANDLED;
200 }
201
202 /**
203 * ne_event_work_handler() - Work queue handler for notifying enclaves on a
204 * state change received by the event interrupt
205 * handler.
206 * @work: Item containing the NE PCI device for which an out-of-band event
207 * was issued.
208 *
209 * An out-of-band event is being issued by the Nitro Hypervisor when at least
210 * one enclave is changing state without client interaction.
211 *
212 * Context: Work queue context.
213 */
ne_event_work_handler(struct work_struct * work)214 static void ne_event_work_handler(struct work_struct *work)
215 {
216 struct ne_pci_dev_cmd_reply cmd_reply = {};
217 struct ne_enclave *ne_enclave = NULL;
218 struct ne_pci_dev *ne_pci_dev =
219 container_of(work, struct ne_pci_dev, notify_work);
220 struct pci_dev *pdev = ne_pci_dev->pdev;
221 int rc = -EINVAL;
222 struct slot_info_req slot_info_req = {};
223
224 mutex_lock(&ne_pci_dev->enclaves_list_mutex);
225
226 /*
227 * Iterate over all enclaves registered for the Nitro Enclaves
228 * PCI device and determine for which enclave(s) the out-of-band event
229 * is corresponding to.
230 */
231 list_for_each_entry(ne_enclave, &ne_pci_dev->enclaves_list, enclave_list_entry) {
232 mutex_lock(&ne_enclave->enclave_info_mutex);
233
234 /*
235 * Enclaves that were never started cannot receive out-of-band
236 * events.
237 */
238 if (ne_enclave->state != NE_STATE_RUNNING)
239 goto unlock;
240
241 slot_info_req.slot_uid = ne_enclave->slot_uid;
242
243 rc = ne_do_request(pdev, SLOT_INFO,
244 &slot_info_req, sizeof(slot_info_req),
245 &cmd_reply, sizeof(cmd_reply));
246 if (rc < 0)
247 dev_err(&pdev->dev, "Error in slot info [rc=%d]\n", rc);
248
249 /* Notify enclave process that the enclave state changed. */
250 if (ne_enclave->state != cmd_reply.state) {
251 ne_enclave->state = cmd_reply.state;
252
253 ne_enclave->has_event = true;
254
255 wake_up_interruptible(&ne_enclave->eventq);
256 }
257
258 unlock:
259 mutex_unlock(&ne_enclave->enclave_info_mutex);
260 }
261
262 mutex_unlock(&ne_pci_dev->enclaves_list_mutex);
263 }
264
265 /**
266 * ne_event_handler() - Interrupt handler for PCI device out-of-band events.
267 * This interrupt does not supply any data in the MMIO
268 * region. It notifies a change in the state of any of
269 * the launched enclaves.
270 * @irq: Received interrupt for an out-of-band event.
271 * @args: PCI device private data structure.
272 *
273 * Context: Interrupt context.
274 * Return:
275 * * IRQ_HANDLED on handled interrupt.
276 */
ne_event_handler(int irq,void * args)277 static irqreturn_t ne_event_handler(int irq, void *args)
278 {
279 struct ne_pci_dev *ne_pci_dev = (struct ne_pci_dev *)args;
280
281 queue_work(ne_pci_dev->event_wq, &ne_pci_dev->notify_work);
282
283 return IRQ_HANDLED;
284 }
285
286 /**
287 * ne_setup_msix() - Setup MSI-X vectors for the PCI device.
288 * @pdev: PCI device to setup the MSI-X for.
289 *
290 * Context: Process context.
291 * Return:
292 * * 0 on success.
293 * * Negative return value on failure.
294 */
ne_setup_msix(struct pci_dev * pdev)295 static int ne_setup_msix(struct pci_dev *pdev)
296 {
297 struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
298 int nr_vecs = 0;
299 int rc = -EINVAL;
300
301 nr_vecs = pci_msix_vec_count(pdev);
302 if (nr_vecs < 0) {
303 rc = nr_vecs;
304
305 dev_err(&pdev->dev, "Error in getting vec count [rc=%d]\n", rc);
306
307 return rc;
308 }
309
310 rc = pci_alloc_irq_vectors(pdev, nr_vecs, nr_vecs, PCI_IRQ_MSIX);
311 if (rc < 0) {
312 dev_err(&pdev->dev, "Error in alloc MSI-X vecs [rc=%d]\n", rc);
313
314 return rc;
315 }
316
317 /*
318 * This IRQ gets triggered every time the PCI device responds to a
319 * command request. The reply is then retrieved, reading from the MMIO
320 * space of the PCI device.
321 */
322 rc = request_irq(pci_irq_vector(pdev, NE_VEC_REPLY), ne_reply_handler,
323 0, "enclave_cmd", ne_pci_dev);
324 if (rc < 0) {
325 dev_err(&pdev->dev, "Error in request irq reply [rc=%d]\n", rc);
326
327 goto free_irq_vectors;
328 }
329
330 ne_pci_dev->event_wq = create_singlethread_workqueue("ne_pci_dev_wq");
331 if (!ne_pci_dev->event_wq) {
332 rc = -ENOMEM;
333
334 dev_err(&pdev->dev, "Cannot get wq for dev events [rc=%d]\n", rc);
335
336 goto free_reply_irq_vec;
337 }
338
339 INIT_WORK(&ne_pci_dev->notify_work, ne_event_work_handler);
340
341 /*
342 * This IRQ gets triggered every time any enclave's state changes. Its
343 * handler then scans for the changes and propagates them to the user
344 * space.
345 */
346 rc = request_irq(pci_irq_vector(pdev, NE_VEC_EVENT), ne_event_handler,
347 0, "enclave_evt", ne_pci_dev);
348 if (rc < 0) {
349 dev_err(&pdev->dev, "Error in request irq event [rc=%d]\n", rc);
350
351 goto destroy_wq;
352 }
353
354 return 0;
355
356 destroy_wq:
357 destroy_workqueue(ne_pci_dev->event_wq);
358 free_reply_irq_vec:
359 free_irq(pci_irq_vector(pdev, NE_VEC_REPLY), ne_pci_dev);
360 free_irq_vectors:
361 pci_free_irq_vectors(pdev);
362
363 return rc;
364 }
365
366 /**
367 * ne_teardown_msix() - Teardown MSI-X vectors for the PCI device.
368 * @pdev: PCI device to teardown the MSI-X for.
369 *
370 * Context: Process context.
371 */
ne_teardown_msix(struct pci_dev * pdev)372 static void ne_teardown_msix(struct pci_dev *pdev)
373 {
374 struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
375
376 free_irq(pci_irq_vector(pdev, NE_VEC_EVENT), ne_pci_dev);
377
378 flush_work(&ne_pci_dev->notify_work);
379 flush_workqueue(ne_pci_dev->event_wq);
380 destroy_workqueue(ne_pci_dev->event_wq);
381
382 free_irq(pci_irq_vector(pdev, NE_VEC_REPLY), ne_pci_dev);
383
384 pci_free_irq_vectors(pdev);
385 }
386
387 /**
388 * ne_pci_dev_enable() - Select the PCI device version and enable it.
389 * @pdev: PCI device to select version for and then enable.
390 *
391 * Context: Process context.
392 * Return:
393 * * 0 on success.
394 * * Negative return value on failure.
395 */
ne_pci_dev_enable(struct pci_dev * pdev)396 static int ne_pci_dev_enable(struct pci_dev *pdev)
397 {
398 u8 dev_enable_reply = 0;
399 u16 dev_version_reply = 0;
400 struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
401
402 iowrite16(NE_VERSION_MAX, ne_pci_dev->iomem_base + NE_VERSION);
403
404 dev_version_reply = ioread16(ne_pci_dev->iomem_base + NE_VERSION);
405 if (dev_version_reply != NE_VERSION_MAX) {
406 dev_err(&pdev->dev, "Error in pci dev version cmd\n");
407
408 return -EIO;
409 }
410
411 iowrite8(NE_ENABLE_ON, ne_pci_dev->iomem_base + NE_ENABLE);
412
413 dev_enable_reply = ioread8(ne_pci_dev->iomem_base + NE_ENABLE);
414 if (dev_enable_reply != NE_ENABLE_ON) {
415 dev_err(&pdev->dev, "Error in pci dev enable cmd\n");
416
417 return -EIO;
418 }
419
420 return 0;
421 }
422
423 /**
424 * ne_pci_dev_disable() - Disable the PCI device.
425 * @pdev: PCI device to disable.
426 *
427 * Context: Process context.
428 */
ne_pci_dev_disable(struct pci_dev * pdev)429 static void ne_pci_dev_disable(struct pci_dev *pdev)
430 {
431 u8 dev_disable_reply = 0;
432 struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
433 const unsigned int sleep_time = 10; /* 10 ms */
434 unsigned int sleep_time_count = 0;
435
436 iowrite8(NE_ENABLE_OFF, ne_pci_dev->iomem_base + NE_ENABLE);
437
438 /*
439 * Check for NE_ENABLE_OFF in a loop, to handle cases when the device
440 * state is not immediately set to disabled and going through a
441 * transitory state of disabling.
442 */
443 while (sleep_time_count < NE_DEFAULT_TIMEOUT_MSECS) {
444 dev_disable_reply = ioread8(ne_pci_dev->iomem_base + NE_ENABLE);
445 if (dev_disable_reply == NE_ENABLE_OFF)
446 return;
447
448 msleep_interruptible(sleep_time);
449 sleep_time_count += sleep_time;
450 }
451
452 dev_disable_reply = ioread8(ne_pci_dev->iomem_base + NE_ENABLE);
453 if (dev_disable_reply != NE_ENABLE_OFF)
454 dev_err(&pdev->dev, "Error in pci dev disable cmd\n");
455 }
456
457 /**
458 * ne_pci_probe() - Probe function for the NE PCI device.
459 * @pdev: PCI device to match with the NE PCI driver.
460 * @id : PCI device id table associated with the NE PCI driver.
461 *
462 * Context: Process context.
463 * Return:
464 * * 0 on success.
465 * * Negative return value on failure.
466 */
ne_pci_probe(struct pci_dev * pdev,const struct pci_device_id * id)467 static int ne_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
468 {
469 struct ne_pci_dev *ne_pci_dev = NULL;
470 int rc = -EINVAL;
471
472 ne_pci_dev = kzalloc(sizeof(*ne_pci_dev), GFP_KERNEL);
473 if (!ne_pci_dev)
474 return -ENOMEM;
475
476 rc = pci_enable_device(pdev);
477 if (rc < 0) {
478 dev_err(&pdev->dev, "Error in pci dev enable [rc=%d]\n", rc);
479
480 goto free_ne_pci_dev;
481 }
482
483 pci_set_master(pdev);
484
485 rc = pci_request_regions_exclusive(pdev, "nitro_enclaves");
486 if (rc < 0) {
487 dev_err(&pdev->dev, "Error in pci request regions [rc=%d]\n", rc);
488
489 goto disable_pci_dev;
490 }
491
492 ne_pci_dev->iomem_base = pci_iomap(pdev, PCI_BAR_NE, 0);
493 if (!ne_pci_dev->iomem_base) {
494 rc = -ENOMEM;
495
496 dev_err(&pdev->dev, "Error in pci iomap [rc=%d]\n", rc);
497
498 goto release_pci_regions;
499 }
500
501 pci_set_drvdata(pdev, ne_pci_dev);
502
503 rc = ne_setup_msix(pdev);
504 if (rc < 0) {
505 dev_err(&pdev->dev, "Error in pci dev msix setup [rc=%d]\n", rc);
506
507 goto iounmap_pci_bar;
508 }
509
510 ne_pci_dev_disable(pdev);
511
512 rc = ne_pci_dev_enable(pdev);
513 if (rc < 0) {
514 dev_err(&pdev->dev, "Error in ne_pci_dev enable [rc=%d]\n", rc);
515
516 goto teardown_msix;
517 }
518
519 atomic_set(&ne_pci_dev->cmd_reply_avail, 0);
520 init_waitqueue_head(&ne_pci_dev->cmd_reply_wait_q);
521 INIT_LIST_HEAD(&ne_pci_dev->enclaves_list);
522 mutex_init(&ne_pci_dev->enclaves_list_mutex);
523 mutex_init(&ne_pci_dev->pci_dev_mutex);
524 ne_pci_dev->pdev = pdev;
525
526 ne_devs.ne_pci_dev = ne_pci_dev;
527
528 rc = misc_register(ne_devs.ne_misc_dev);
529 if (rc < 0) {
530 dev_err(&pdev->dev, "Error in misc dev register [rc=%d]\n", rc);
531
532 goto disable_ne_pci_dev;
533 }
534
535 return 0;
536
537 disable_ne_pci_dev:
538 ne_devs.ne_pci_dev = NULL;
539 ne_pci_dev_disable(pdev);
540 teardown_msix:
541 ne_teardown_msix(pdev);
542 iounmap_pci_bar:
543 pci_set_drvdata(pdev, NULL);
544 pci_iounmap(pdev, ne_pci_dev->iomem_base);
545 release_pci_regions:
546 pci_release_regions(pdev);
547 disable_pci_dev:
548 pci_disable_device(pdev);
549 free_ne_pci_dev:
550 kfree(ne_pci_dev);
551
552 return rc;
553 }
554
555 /**
556 * ne_pci_remove() - Remove function for the NE PCI device.
557 * @pdev: PCI device associated with the NE PCI driver.
558 *
559 * Context: Process context.
560 */
ne_pci_remove(struct pci_dev * pdev)561 static void ne_pci_remove(struct pci_dev *pdev)
562 {
563 struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
564
565 misc_deregister(ne_devs.ne_misc_dev);
566
567 ne_devs.ne_pci_dev = NULL;
568
569 ne_pci_dev_disable(pdev);
570
571 ne_teardown_msix(pdev);
572
573 pci_set_drvdata(pdev, NULL);
574
575 pci_iounmap(pdev, ne_pci_dev->iomem_base);
576
577 pci_release_regions(pdev);
578
579 pci_disable_device(pdev);
580
581 kfree(ne_pci_dev);
582 }
583
584 /**
585 * ne_pci_shutdown() - Shutdown function for the NE PCI device.
586 * @pdev: PCI device associated with the NE PCI driver.
587 *
588 * Context: Process context.
589 */
ne_pci_shutdown(struct pci_dev * pdev)590 static void ne_pci_shutdown(struct pci_dev *pdev)
591 {
592 struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
593
594 if (!ne_pci_dev)
595 return;
596
597 misc_deregister(ne_devs.ne_misc_dev);
598
599 ne_devs.ne_pci_dev = NULL;
600
601 ne_pci_dev_disable(pdev);
602
603 ne_teardown_msix(pdev);
604
605 pci_set_drvdata(pdev, NULL);
606
607 pci_iounmap(pdev, ne_pci_dev->iomem_base);
608
609 pci_release_regions(pdev);
610
611 pci_disable_device(pdev);
612
613 kfree(ne_pci_dev);
614 }
615
616 /*
617 * TODO: Add suspend / resume functions for power management w/ CONFIG_PM, if
618 * needed.
619 */
620 /* NE PCI device driver. */
621 struct pci_driver ne_pci_driver = {
622 .name = "nitro_enclaves",
623 .id_table = ne_pci_ids,
624 .probe = ne_pci_probe,
625 .remove = ne_pci_remove,
626 .shutdown = ne_pci_shutdown,
627 };
628