1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4 * Copyright 2016-2021 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 *
7 */
8
9 #define pr_fmt(fmt) "habanalabs: " fmt
10
11 #include "habanalabs.h"
12
13 #include <linux/pci.h>
14 #include <linux/aer.h>
15 #include <linux/module.h>
16
17 #define CREATE_TRACE_POINTS
18 #include <trace/events/habanalabs.h>
19
20 #define HL_DRIVER_AUTHOR "HabanaLabs Kernel Driver Team"
21
22 #define HL_DRIVER_DESC "Driver for HabanaLabs's AI Accelerators"
23
24 MODULE_AUTHOR(HL_DRIVER_AUTHOR);
25 MODULE_DESCRIPTION(HL_DRIVER_DESC);
26 MODULE_LICENSE("GPL v2");
27
28 static int hl_major;
29 static struct class *hl_class;
30 static DEFINE_IDR(hl_devs_idr);
31 static DEFINE_MUTEX(hl_devs_idr_lock);
32
33 #define HL_DEFAULT_TIMEOUT_LOCKED 30 /* 30 seconds */
34 #define GAUDI_DEFAULT_TIMEOUT_LOCKED 600 /* 10 minutes */
35
36 static int timeout_locked = HL_DEFAULT_TIMEOUT_LOCKED;
37 static int reset_on_lockup = 1;
38 static int memory_scrub;
39 static ulong boot_error_status_mask = ULONG_MAX;
40
41 module_param(timeout_locked, int, 0444);
42 MODULE_PARM_DESC(timeout_locked,
43 "Device lockup timeout in seconds (0 = disabled, default 30s)");
44
45 module_param(reset_on_lockup, int, 0444);
46 MODULE_PARM_DESC(reset_on_lockup,
47 "Do device reset on lockup (0 = no, 1 = yes, default yes)");
48
49 module_param(memory_scrub, int, 0444);
50 MODULE_PARM_DESC(memory_scrub,
51 "Scrub device memory in various states (0 = no, 1 = yes, default no)");
52
53 module_param(boot_error_status_mask, ulong, 0444);
54 MODULE_PARM_DESC(boot_error_status_mask,
55 "Mask of the error status during device CPU boot (If bitX is cleared then error X is masked. Default all 1's)");
56
57 #define PCI_VENDOR_ID_HABANALABS 0x1da3
58
59 #define PCI_IDS_GOYA 0x0001
60 #define PCI_IDS_GAUDI 0x1000
61 #define PCI_IDS_GAUDI_SEC 0x1010
62
63 #define PCI_IDS_GAUDI2 0x1020
64
65 static const struct pci_device_id ids[] = {
66 { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), },
67 { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI), },
68 { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI_SEC), },
69 { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI2), },
70 { 0, }
71 };
72 MODULE_DEVICE_TABLE(pci, ids);
73
74 /*
75 * get_asic_type - translate device id to asic type
76 *
77 * @device: id of the PCI device
78 *
79 * Translate device id to asic type.
80 * In case of unidentified device, return -1
81 */
get_asic_type(u16 device)82 static enum hl_asic_type get_asic_type(u16 device)
83 {
84 enum hl_asic_type asic_type;
85
86 switch (device) {
87 case PCI_IDS_GOYA:
88 asic_type = ASIC_GOYA;
89 break;
90 case PCI_IDS_GAUDI:
91 asic_type = ASIC_GAUDI;
92 break;
93 case PCI_IDS_GAUDI_SEC:
94 asic_type = ASIC_GAUDI_SEC;
95 break;
96 case PCI_IDS_GAUDI2:
97 asic_type = ASIC_GAUDI2;
98 break;
99 default:
100 asic_type = ASIC_INVALID;
101 break;
102 }
103
104 return asic_type;
105 }
106
is_asic_secured(enum hl_asic_type asic_type)107 static bool is_asic_secured(enum hl_asic_type asic_type)
108 {
109 switch (asic_type) {
110 case ASIC_GAUDI_SEC:
111 return true;
112 default:
113 return false;
114 }
115 }
116
117 /*
118 * hl_device_open - open function for habanalabs device
119 *
120 * @inode: pointer to inode structure
121 * @filp: pointer to file structure
122 *
123 * Called when process opens an habanalabs device.
124 */
hl_device_open(struct inode * inode,struct file * filp)125 int hl_device_open(struct inode *inode, struct file *filp)
126 {
127 enum hl_device_status status;
128 struct hl_device *hdev;
129 struct hl_fpriv *hpriv;
130 int rc;
131
132 mutex_lock(&hl_devs_idr_lock);
133 hdev = idr_find(&hl_devs_idr, iminor(inode));
134 mutex_unlock(&hl_devs_idr_lock);
135
136 if (!hdev) {
137 pr_err("Couldn't find device %d:%d\n",
138 imajor(inode), iminor(inode));
139 return -ENXIO;
140 }
141
142 hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL);
143 if (!hpriv)
144 return -ENOMEM;
145
146 hpriv->hdev = hdev;
147 filp->private_data = hpriv;
148 hpriv->filp = filp;
149
150 mutex_init(&hpriv->notifier_event.lock);
151 mutex_init(&hpriv->restore_phase_mutex);
152 mutex_init(&hpriv->ctx_lock);
153 kref_init(&hpriv->refcount);
154 nonseekable_open(inode, filp);
155
156 hl_ctx_mgr_init(&hpriv->ctx_mgr);
157 hl_mem_mgr_init(hpriv->hdev->dev, &hpriv->mem_mgr);
158
159 hpriv->taskpid = get_task_pid(current, PIDTYPE_PID);
160
161 mutex_lock(&hdev->fpriv_list_lock);
162
163 if (!hl_device_operational(hdev, &status)) {
164 dev_dbg_ratelimited(hdev->dev,
165 "Can't open %s because it is %s\n",
166 dev_name(hdev->dev), hdev->status[status]);
167
168 if (status == HL_DEVICE_STATUS_IN_RESET ||
169 status == HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE)
170 rc = -EAGAIN;
171 else
172 rc = -EPERM;
173
174 goto out_err;
175 }
176
177 if (hdev->is_in_dram_scrub) {
178 dev_dbg_ratelimited(hdev->dev,
179 "Can't open %s during dram scrub\n",
180 dev_name(hdev->dev));
181 rc = -EAGAIN;
182 goto out_err;
183 }
184
185 if (hdev->compute_ctx_in_release) {
186 dev_dbg_ratelimited(hdev->dev,
187 "Can't open %s because another user is still releasing it\n",
188 dev_name(hdev->dev));
189 rc = -EAGAIN;
190 goto out_err;
191 }
192
193 if (hdev->is_compute_ctx_active) {
194 dev_dbg_ratelimited(hdev->dev,
195 "Can't open %s because another user is working on it\n",
196 dev_name(hdev->dev));
197 rc = -EBUSY;
198 goto out_err;
199 }
200
201 rc = hl_ctx_create(hdev, hpriv);
202 if (rc) {
203 dev_err(hdev->dev, "Failed to create context %d\n", rc);
204 goto out_err;
205 }
206
207 list_add(&hpriv->dev_node, &hdev->fpriv_list);
208 mutex_unlock(&hdev->fpriv_list_lock);
209
210 hdev->asic_funcs->send_device_activity(hdev, true);
211
212 hl_debugfs_add_file(hpriv);
213
214 atomic_set(&hdev->captured_err_info.cs_timeout.write_enable, 1);
215 atomic_set(&hdev->captured_err_info.razwi.write_enable, 1);
216 hdev->captured_err_info.undef_opcode.write_enable = true;
217
218 hdev->open_counter++;
219 hdev->last_successful_open_jif = jiffies;
220 hdev->last_successful_open_ktime = ktime_get();
221
222 return 0;
223
224 out_err:
225 mutex_unlock(&hdev->fpriv_list_lock);
226 hl_mem_mgr_fini(&hpriv->mem_mgr);
227 hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
228 filp->private_data = NULL;
229 mutex_destroy(&hpriv->ctx_lock);
230 mutex_destroy(&hpriv->restore_phase_mutex);
231 mutex_destroy(&hpriv->notifier_event.lock);
232 put_pid(hpriv->taskpid);
233
234 kfree(hpriv);
235
236 return rc;
237 }
238
hl_device_open_ctrl(struct inode * inode,struct file * filp)239 int hl_device_open_ctrl(struct inode *inode, struct file *filp)
240 {
241 struct hl_device *hdev;
242 struct hl_fpriv *hpriv;
243 int rc;
244
245 mutex_lock(&hl_devs_idr_lock);
246 hdev = idr_find(&hl_devs_idr, iminor(inode));
247 mutex_unlock(&hl_devs_idr_lock);
248
249 if (!hdev) {
250 pr_err("Couldn't find device %d:%d\n",
251 imajor(inode), iminor(inode));
252 return -ENXIO;
253 }
254
255 hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL);
256 if (!hpriv)
257 return -ENOMEM;
258
259 /* Prevent other routines from reading partial hpriv data by
260 * initializing hpriv fields before inserting it to the list
261 */
262 hpriv->hdev = hdev;
263 filp->private_data = hpriv;
264 hpriv->filp = filp;
265
266 mutex_init(&hpriv->notifier_event.lock);
267 nonseekable_open(inode, filp);
268
269 hpriv->taskpid = get_task_pid(current, PIDTYPE_PID);
270
271 mutex_lock(&hdev->fpriv_ctrl_list_lock);
272
273 if (!hl_device_operational(hdev, NULL)) {
274 dev_dbg_ratelimited(hdev->dev_ctrl,
275 "Can't open %s because it is disabled or in reset\n",
276 dev_name(hdev->dev_ctrl));
277 rc = -EPERM;
278 goto out_err;
279 }
280
281 list_add(&hpriv->dev_node, &hdev->fpriv_ctrl_list);
282 mutex_unlock(&hdev->fpriv_ctrl_list_lock);
283
284 return 0;
285
286 out_err:
287 mutex_unlock(&hdev->fpriv_ctrl_list_lock);
288 filp->private_data = NULL;
289 put_pid(hpriv->taskpid);
290
291 kfree(hpriv);
292
293 return rc;
294 }
295
set_driver_behavior_per_device(struct hl_device * hdev)296 static void set_driver_behavior_per_device(struct hl_device *hdev)
297 {
298 hdev->nic_ports_mask = 0;
299 hdev->fw_components = FW_TYPE_ALL_TYPES;
300 hdev->mmu_enable = MMU_EN_ALL;
301 hdev->cpu_queues_enable = 1;
302 hdev->pldm = 0;
303 hdev->hard_reset_on_fw_events = 1;
304 hdev->bmc_enable = 1;
305 hdev->reset_on_preboot_fail = 1;
306 hdev->heartbeat = 1;
307 }
308
copy_kernel_module_params_to_device(struct hl_device * hdev)309 static void copy_kernel_module_params_to_device(struct hl_device *hdev)
310 {
311 hdev->asic_prop.fw_security_enabled = is_asic_secured(hdev->asic_type);
312
313 hdev->major = hl_major;
314 hdev->memory_scrub = memory_scrub;
315 hdev->reset_on_lockup = reset_on_lockup;
316 hdev->boot_error_status_mask = boot_error_status_mask;
317 }
318
fixup_device_params_per_asic(struct hl_device * hdev,int timeout)319 static void fixup_device_params_per_asic(struct hl_device *hdev, int timeout)
320 {
321 switch (hdev->asic_type) {
322 case ASIC_GAUDI:
323 case ASIC_GAUDI_SEC:
324 /* If user didn't request a different timeout than the default one, we have
325 * a different default timeout for Gaudi
326 */
327 if (timeout == HL_DEFAULT_TIMEOUT_LOCKED)
328 hdev->timeout_jiffies = msecs_to_jiffies(GAUDI_DEFAULT_TIMEOUT_LOCKED *
329 MSEC_PER_SEC);
330
331 hdev->reset_upon_device_release = 0;
332 break;
333
334 case ASIC_GOYA:
335 hdev->reset_upon_device_release = 0;
336 break;
337
338 default:
339 hdev->reset_upon_device_release = 1;
340 break;
341 }
342 }
343
fixup_device_params(struct hl_device * hdev)344 static int fixup_device_params(struct hl_device *hdev)
345 {
346 int tmp_timeout;
347
348 tmp_timeout = timeout_locked;
349
350 hdev->fw_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC;
351 hdev->fw_comms_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC;
352
353 if (tmp_timeout)
354 hdev->timeout_jiffies = msecs_to_jiffies(tmp_timeout * MSEC_PER_SEC);
355 else
356 hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
357
358 hdev->stop_on_err = true;
359 hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
360 hdev->reset_info.prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT;
361
362 /* Enable only after the initialization of the device */
363 hdev->disabled = true;
364
365 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU) &&
366 (hdev->fw_components & ~FW_TYPE_PREBOOT_CPU)) {
367 pr_err("Preboot must be set along with other components");
368 return -EINVAL;
369 }
370
371 /* If CPU queues not enabled, no way to do heartbeat */
372 if (!hdev->cpu_queues_enable)
373 hdev->heartbeat = 0;
374
375 fixup_device_params_per_asic(hdev, tmp_timeout);
376
377 return 0;
378 }
379
380 /**
381 * create_hdev - create habanalabs device instance
382 *
383 * @dev: will hold the pointer to the new habanalabs device structure
384 * @pdev: pointer to the pci device
385 *
386 * Allocate memory for habanalabs device and initialize basic fields
387 * Identify the ASIC type
388 * Allocate ID (minor) for the device (only for real devices)
389 */
create_hdev(struct hl_device ** dev,struct pci_dev * pdev)390 static int create_hdev(struct hl_device **dev, struct pci_dev *pdev)
391 {
392 int main_id, ctrl_id = 0, rc = 0;
393 struct hl_device *hdev;
394
395 *dev = NULL;
396
397 hdev = kzalloc(sizeof(*hdev), GFP_KERNEL);
398 if (!hdev)
399 return -ENOMEM;
400
401 /* Will be NULL in case of simulator device */
402 hdev->pdev = pdev;
403
404 /* Assign status description string */
405 strncpy(hdev->status[HL_DEVICE_STATUS_OPERATIONAL], "operational", HL_STR_MAX);
406 strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET], "in reset", HL_STR_MAX);
407 strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION], "disabled", HL_STR_MAX);
408 strncpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET], "needs reset", HL_STR_MAX);
409 strncpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION],
410 "in device creation", HL_STR_MAX);
411 strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE],
412 "in reset after device release", HL_STR_MAX);
413
414
415 /* First, we must find out which ASIC are we handling. This is needed
416 * to configure the behavior of the driver (kernel parameters)
417 */
418 hdev->asic_type = get_asic_type(pdev->device);
419 if (hdev->asic_type == ASIC_INVALID) {
420 dev_err(&pdev->dev, "Unsupported ASIC\n");
421 rc = -ENODEV;
422 goto free_hdev;
423 }
424
425 copy_kernel_module_params_to_device(hdev);
426
427 set_driver_behavior_per_device(hdev);
428
429 fixup_device_params(hdev);
430
431 mutex_lock(&hl_devs_idr_lock);
432
433 /* Always save 2 numbers, 1 for main device and 1 for control.
434 * They must be consecutive
435 */
436 main_id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS, GFP_KERNEL);
437
438 if (main_id >= 0)
439 ctrl_id = idr_alloc(&hl_devs_idr, hdev, main_id + 1,
440 main_id + 2, GFP_KERNEL);
441
442 mutex_unlock(&hl_devs_idr_lock);
443
444 if ((main_id < 0) || (ctrl_id < 0)) {
445 if ((main_id == -ENOSPC) || (ctrl_id == -ENOSPC))
446 pr_err("too many devices in the system\n");
447
448 if (main_id >= 0) {
449 mutex_lock(&hl_devs_idr_lock);
450 idr_remove(&hl_devs_idr, main_id);
451 mutex_unlock(&hl_devs_idr_lock);
452 }
453
454 rc = -EBUSY;
455 goto free_hdev;
456 }
457
458 hdev->id = main_id;
459 hdev->id_control = ctrl_id;
460
461 *dev = hdev;
462
463 return 0;
464
465 free_hdev:
466 kfree(hdev);
467 return rc;
468 }
469
470 /*
471 * destroy_hdev - destroy habanalabs device instance
472 *
473 * @dev: pointer to the habanalabs device structure
474 *
475 */
destroy_hdev(struct hl_device * hdev)476 static void destroy_hdev(struct hl_device *hdev)
477 {
478 /* Remove device from the device list */
479 mutex_lock(&hl_devs_idr_lock);
480 idr_remove(&hl_devs_idr, hdev->id);
481 idr_remove(&hl_devs_idr, hdev->id_control);
482 mutex_unlock(&hl_devs_idr_lock);
483
484 kfree(hdev);
485 }
486
hl_pmops_suspend(struct device * dev)487 static int hl_pmops_suspend(struct device *dev)
488 {
489 struct hl_device *hdev = dev_get_drvdata(dev);
490
491 pr_debug("Going to suspend PCI device\n");
492
493 if (!hdev) {
494 pr_err("device pointer is NULL in suspend\n");
495 return 0;
496 }
497
498 return hl_device_suspend(hdev);
499 }
500
hl_pmops_resume(struct device * dev)501 static int hl_pmops_resume(struct device *dev)
502 {
503 struct hl_device *hdev = dev_get_drvdata(dev);
504
505 pr_debug("Going to resume PCI device\n");
506
507 if (!hdev) {
508 pr_err("device pointer is NULL in resume\n");
509 return 0;
510 }
511
512 return hl_device_resume(hdev);
513 }
514
515 /**
516 * hl_pci_probe - probe PCI habanalabs devices
517 *
518 * @pdev: pointer to pci device
519 * @id: pointer to pci device id structure
520 *
521 * Standard PCI probe function for habanalabs device.
522 * Create a new habanalabs device and initialize it according to the
523 * device's type
524 */
hl_pci_probe(struct pci_dev * pdev,const struct pci_device_id * id)525 static int hl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
526 {
527 struct hl_device *hdev;
528 int rc;
529
530 dev_info(&pdev->dev, HL_NAME
531 " device found [%04x:%04x] (rev %x)\n",
532 (int)pdev->vendor, (int)pdev->device, (int)pdev->revision);
533
534 rc = create_hdev(&hdev, pdev);
535 if (rc)
536 return rc;
537
538 pci_set_drvdata(pdev, hdev);
539
540 pci_enable_pcie_error_reporting(pdev);
541
542 rc = hl_device_init(hdev, hl_class);
543 if (rc) {
544 dev_err(&pdev->dev, "Fatal error during habanalabs device init\n");
545 rc = -ENODEV;
546 goto disable_device;
547 }
548
549 return 0;
550
551 disable_device:
552 pci_disable_pcie_error_reporting(pdev);
553 pci_set_drvdata(pdev, NULL);
554 destroy_hdev(hdev);
555
556 return rc;
557 }
558
559 /*
560 * hl_pci_remove - remove PCI habanalabs devices
561 *
562 * @pdev: pointer to pci device
563 *
564 * Standard PCI remove function for habanalabs device
565 */
hl_pci_remove(struct pci_dev * pdev)566 static void hl_pci_remove(struct pci_dev *pdev)
567 {
568 struct hl_device *hdev;
569
570 hdev = pci_get_drvdata(pdev);
571 if (!hdev)
572 return;
573
574 hl_device_fini(hdev);
575 pci_disable_pcie_error_reporting(pdev);
576 pci_set_drvdata(pdev, NULL);
577 destroy_hdev(hdev);
578 }
579
580 /**
581 * hl_pci_err_detected - a PCI bus error detected on this device
582 *
583 * @pdev: pointer to pci device
584 * @state: PCI error type
585 *
586 * Called by the PCI subsystem whenever a non-correctable
587 * PCI bus error is detected
588 */
589 static pci_ers_result_t
hl_pci_err_detected(struct pci_dev * pdev,pci_channel_state_t state)590 hl_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t state)
591 {
592 struct hl_device *hdev = pci_get_drvdata(pdev);
593 enum pci_ers_result result;
594
595 switch (state) {
596 case pci_channel_io_normal:
597 return PCI_ERS_RESULT_CAN_RECOVER;
598
599 case pci_channel_io_frozen:
600 dev_warn(hdev->dev, "frozen state error detected\n");
601 result = PCI_ERS_RESULT_NEED_RESET;
602 break;
603
604 case pci_channel_io_perm_failure:
605 dev_warn(hdev->dev, "failure state error detected\n");
606 result = PCI_ERS_RESULT_DISCONNECT;
607 break;
608
609 default:
610 result = PCI_ERS_RESULT_NONE;
611 }
612
613 hdev->asic_funcs->halt_engines(hdev, true, false);
614
615 return result;
616 }
617
618 /**
619 * hl_pci_err_resume - resume after a PCI slot reset
620 *
621 * @pdev: pointer to pci device
622 *
623 */
hl_pci_err_resume(struct pci_dev * pdev)624 static void hl_pci_err_resume(struct pci_dev *pdev)
625 {
626 struct hl_device *hdev = pci_get_drvdata(pdev);
627
628 dev_warn(hdev->dev, "Resuming device after PCI slot reset\n");
629 hl_device_resume(hdev);
630 }
631
632 /**
633 * hl_pci_err_slot_reset - a PCI slot reset has just happened
634 *
635 * @pdev: pointer to pci device
636 *
637 * Determine if the driver can recover from the PCI slot reset
638 */
hl_pci_err_slot_reset(struct pci_dev * pdev)639 static pci_ers_result_t hl_pci_err_slot_reset(struct pci_dev *pdev)
640 {
641 return PCI_ERS_RESULT_RECOVERED;
642 }
643
644 static const struct dev_pm_ops hl_pm_ops = {
645 .suspend = hl_pmops_suspend,
646 .resume = hl_pmops_resume,
647 };
648
649 static const struct pci_error_handlers hl_pci_err_handler = {
650 .error_detected = hl_pci_err_detected,
651 .slot_reset = hl_pci_err_slot_reset,
652 .resume = hl_pci_err_resume,
653 };
654
655 static struct pci_driver hl_pci_driver = {
656 .name = HL_NAME,
657 .id_table = ids,
658 .probe = hl_pci_probe,
659 .remove = hl_pci_remove,
660 .shutdown = hl_pci_remove,
661 .driver = {
662 .name = HL_NAME,
663 .pm = &hl_pm_ops,
664 .probe_type = PROBE_PREFER_ASYNCHRONOUS,
665 },
666 .err_handler = &hl_pci_err_handler,
667 };
668
669 /*
670 * hl_init - Initialize the habanalabs kernel driver
671 */
hl_init(void)672 static int __init hl_init(void)
673 {
674 int rc;
675 dev_t dev;
676
677 pr_info("loading driver\n");
678
679 rc = alloc_chrdev_region(&dev, 0, HL_MAX_MINORS, HL_NAME);
680 if (rc < 0) {
681 pr_err("unable to get major\n");
682 return rc;
683 }
684
685 hl_major = MAJOR(dev);
686
687 hl_class = class_create(THIS_MODULE, HL_NAME);
688 if (IS_ERR(hl_class)) {
689 pr_err("failed to allocate class\n");
690 rc = PTR_ERR(hl_class);
691 goto remove_major;
692 }
693
694 hl_debugfs_init();
695
696 rc = pci_register_driver(&hl_pci_driver);
697 if (rc) {
698 pr_err("failed to register pci device\n");
699 goto remove_debugfs;
700 }
701
702 pr_debug("driver loaded\n");
703
704 return 0;
705
706 remove_debugfs:
707 hl_debugfs_fini();
708 class_destroy(hl_class);
709 remove_major:
710 unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
711 return rc;
712 }
713
714 /*
715 * hl_exit - Release all resources of the habanalabs kernel driver
716 */
hl_exit(void)717 static void __exit hl_exit(void)
718 {
719 pci_unregister_driver(&hl_pci_driver);
720
721 /*
722 * Removing debugfs must be after all devices or simulator devices
723 * have been removed because otherwise we get a bug in the
724 * debugfs module for referencing NULL objects
725 */
726 hl_debugfs_fini();
727
728 class_destroy(hl_class);
729 unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
730
731 idr_destroy(&hl_devs_idr);
732
733 pr_debug("driver removed\n");
734 }
735
736 module_init(hl_init);
737 module_exit(hl_exit);
738