1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4 * Copyright 2016-2021 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 *
7 */
8
9 #define pr_fmt(fmt) "habanalabs: " fmt
10
11 #include "habanalabs.h"
12 #include "../include/hw_ip/pci/pci_general.h"
13
14 #include <linux/pci.h>
15 #include <linux/module.h>
16 #include <linux/vmalloc.h>
17
18 #define CREATE_TRACE_POINTS
19 #include <trace/events/habanalabs.h>
20
21 #define HL_DRIVER_AUTHOR "HabanaLabs Kernel Driver Team"
22
23 #define HL_DRIVER_DESC "Driver for HabanaLabs's AI Accelerators"
24
25 MODULE_AUTHOR(HL_DRIVER_AUTHOR);
26 MODULE_DESCRIPTION(HL_DRIVER_DESC);
27 MODULE_LICENSE("GPL v2");
28
29 static int hl_major;
30 static struct class *hl_class;
31 static DEFINE_IDR(hl_devs_idr);
32 static DEFINE_MUTEX(hl_devs_idr_lock);
33
34 #define HL_DEFAULT_TIMEOUT_LOCKED 30 /* 30 seconds */
35 #define GAUDI_DEFAULT_TIMEOUT_LOCKED 600 /* 10 minutes */
36
37 static int timeout_locked = HL_DEFAULT_TIMEOUT_LOCKED;
38 static int reset_on_lockup = 1;
39 static int memory_scrub;
40 static ulong boot_error_status_mask = ULONG_MAX;
41
42 module_param(timeout_locked, int, 0444);
43 MODULE_PARM_DESC(timeout_locked,
44 "Device lockup timeout in seconds (0 = disabled, default 30s)");
45
46 module_param(reset_on_lockup, int, 0444);
47 MODULE_PARM_DESC(reset_on_lockup,
48 "Do device reset on lockup (0 = no, 1 = yes, default yes)");
49
50 module_param(memory_scrub, int, 0444);
51 MODULE_PARM_DESC(memory_scrub,
52 "Scrub device memory in various states (0 = no, 1 = yes, default no)");
53
54 module_param(boot_error_status_mask, ulong, 0444);
55 MODULE_PARM_DESC(boot_error_status_mask,
56 "Mask of the error status during device CPU boot (If bitX is cleared then error X is masked. Default all 1's)");
57
58 #define PCI_IDS_GOYA 0x0001
59 #define PCI_IDS_GAUDI 0x1000
60 #define PCI_IDS_GAUDI_SEC 0x1010
61
62 #define PCI_IDS_GAUDI2 0x1020
63
64 static const struct pci_device_id ids[] = {
65 { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), },
66 { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI), },
67 { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI_SEC), },
68 { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI2), },
69 { 0, }
70 };
71 MODULE_DEVICE_TABLE(pci, ids);
72
73 /*
74 * get_asic_type - translate device id to asic type
75 *
76 * @hdev: pointer to habanalabs device structure.
77 *
78 * Translate device id and revision id to asic type.
79 * In case of unidentified device, return -1
80 */
get_asic_type(struct hl_device * hdev)81 static enum hl_asic_type get_asic_type(struct hl_device *hdev)
82 {
83 struct pci_dev *pdev = hdev->pdev;
84 enum hl_asic_type asic_type = ASIC_INVALID;
85
86 switch (pdev->device) {
87 case PCI_IDS_GOYA:
88 asic_type = ASIC_GOYA;
89 break;
90 case PCI_IDS_GAUDI:
91 asic_type = ASIC_GAUDI;
92 break;
93 case PCI_IDS_GAUDI_SEC:
94 asic_type = ASIC_GAUDI_SEC;
95 break;
96 case PCI_IDS_GAUDI2:
97 switch (pdev->revision) {
98 case REV_ID_A:
99 asic_type = ASIC_GAUDI2;
100 break;
101 case REV_ID_B:
102 asic_type = ASIC_GAUDI2B;
103 break;
104 default:
105 break;
106 }
107 break;
108 default:
109 break;
110 }
111
112 return asic_type;
113 }
114
is_asic_secured(enum hl_asic_type asic_type)115 static bool is_asic_secured(enum hl_asic_type asic_type)
116 {
117 switch (asic_type) {
118 case ASIC_GAUDI_SEC:
119 return true;
120 default:
121 return false;
122 }
123 }
124
125 /*
126 * hl_device_open - open function for habanalabs device
127 *
128 * @inode: pointer to inode structure
129 * @filp: pointer to file structure
130 *
131 * Called when process opens an habanalabs device.
132 */
hl_device_open(struct inode * inode,struct file * filp)133 int hl_device_open(struct inode *inode, struct file *filp)
134 {
135 enum hl_device_status status;
136 struct hl_device *hdev;
137 struct hl_fpriv *hpriv;
138 int rc;
139
140 mutex_lock(&hl_devs_idr_lock);
141 hdev = idr_find(&hl_devs_idr, iminor(inode));
142 mutex_unlock(&hl_devs_idr_lock);
143
144 if (!hdev) {
145 pr_err("Couldn't find device %d:%d\n",
146 imajor(inode), iminor(inode));
147 return -ENXIO;
148 }
149
150 hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL);
151 if (!hpriv)
152 return -ENOMEM;
153
154 hpriv->hdev = hdev;
155 filp->private_data = hpriv;
156 hpriv->filp = filp;
157
158 mutex_init(&hpriv->notifier_event.lock);
159 mutex_init(&hpriv->restore_phase_mutex);
160 mutex_init(&hpriv->ctx_lock);
161 kref_init(&hpriv->refcount);
162 nonseekable_open(inode, filp);
163
164 hl_ctx_mgr_init(&hpriv->ctx_mgr);
165 hl_mem_mgr_init(hpriv->hdev->dev, &hpriv->mem_mgr);
166
167 hpriv->taskpid = get_task_pid(current, PIDTYPE_PID);
168
169 mutex_lock(&hdev->fpriv_list_lock);
170
171 if (!hl_device_operational(hdev, &status)) {
172 dev_dbg_ratelimited(hdev->dev,
173 "Can't open %s because it is %s\n",
174 dev_name(hdev->dev), hdev->status[status]);
175
176 if (status == HL_DEVICE_STATUS_IN_RESET ||
177 status == HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE)
178 rc = -EAGAIN;
179 else
180 rc = -EPERM;
181
182 goto out_err;
183 }
184
185 if (hdev->is_in_dram_scrub) {
186 dev_dbg_ratelimited(hdev->dev,
187 "Can't open %s during dram scrub\n",
188 dev_name(hdev->dev));
189 rc = -EAGAIN;
190 goto out_err;
191 }
192
193 if (hdev->compute_ctx_in_release) {
194 dev_dbg_ratelimited(hdev->dev,
195 "Can't open %s because another user is still releasing it\n",
196 dev_name(hdev->dev));
197 rc = -EAGAIN;
198 goto out_err;
199 }
200
201 if (hdev->is_compute_ctx_active) {
202 dev_dbg_ratelimited(hdev->dev,
203 "Can't open %s because another user is working on it\n",
204 dev_name(hdev->dev));
205 rc = -EBUSY;
206 goto out_err;
207 }
208
209 rc = hl_ctx_create(hdev, hpriv);
210 if (rc) {
211 dev_err(hdev->dev, "Failed to create context %d\n", rc);
212 goto out_err;
213 }
214
215 list_add(&hpriv->dev_node, &hdev->fpriv_list);
216 mutex_unlock(&hdev->fpriv_list_lock);
217
218 hdev->asic_funcs->send_device_activity(hdev, true);
219
220 hl_debugfs_add_file(hpriv);
221
222 hl_enable_err_info_capture(&hdev->captured_err_info);
223
224 hdev->open_counter++;
225 hdev->last_successful_open_jif = jiffies;
226 hdev->last_successful_open_ktime = ktime_get();
227
228 return 0;
229
230 out_err:
231 mutex_unlock(&hdev->fpriv_list_lock);
232 hl_mem_mgr_fini(&hpriv->mem_mgr);
233 hl_mem_mgr_idr_destroy(&hpriv->mem_mgr);
234 hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
235 filp->private_data = NULL;
236 mutex_destroy(&hpriv->ctx_lock);
237 mutex_destroy(&hpriv->restore_phase_mutex);
238 mutex_destroy(&hpriv->notifier_event.lock);
239 put_pid(hpriv->taskpid);
240
241 kfree(hpriv);
242
243 return rc;
244 }
245
hl_device_open_ctrl(struct inode * inode,struct file * filp)246 int hl_device_open_ctrl(struct inode *inode, struct file *filp)
247 {
248 struct hl_device *hdev;
249 struct hl_fpriv *hpriv;
250 int rc;
251
252 mutex_lock(&hl_devs_idr_lock);
253 hdev = idr_find(&hl_devs_idr, iminor(inode));
254 mutex_unlock(&hl_devs_idr_lock);
255
256 if (!hdev) {
257 pr_err("Couldn't find device %d:%d\n",
258 imajor(inode), iminor(inode));
259 return -ENXIO;
260 }
261
262 hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL);
263 if (!hpriv)
264 return -ENOMEM;
265
266 /* Prevent other routines from reading partial hpriv data by
267 * initializing hpriv fields before inserting it to the list
268 */
269 hpriv->hdev = hdev;
270 filp->private_data = hpriv;
271 hpriv->filp = filp;
272
273 mutex_init(&hpriv->notifier_event.lock);
274 nonseekable_open(inode, filp);
275
276 hpriv->taskpid = get_task_pid(current, PIDTYPE_PID);
277
278 mutex_lock(&hdev->fpriv_ctrl_list_lock);
279
280 if (!hl_ctrl_device_operational(hdev, NULL)) {
281 dev_dbg_ratelimited(hdev->dev_ctrl,
282 "Can't open %s because it is disabled\n",
283 dev_name(hdev->dev_ctrl));
284 rc = -EPERM;
285 goto out_err;
286 }
287
288 list_add(&hpriv->dev_node, &hdev->fpriv_ctrl_list);
289 mutex_unlock(&hdev->fpriv_ctrl_list_lock);
290
291 return 0;
292
293 out_err:
294 mutex_unlock(&hdev->fpriv_ctrl_list_lock);
295 filp->private_data = NULL;
296 put_pid(hpriv->taskpid);
297
298 kfree(hpriv);
299
300 return rc;
301 }
302
set_driver_behavior_per_device(struct hl_device * hdev)303 static void set_driver_behavior_per_device(struct hl_device *hdev)
304 {
305 hdev->nic_ports_mask = 0;
306 hdev->fw_components = FW_TYPE_ALL_TYPES;
307 hdev->cpu_queues_enable = 1;
308 hdev->pldm = 0;
309 hdev->hard_reset_on_fw_events = 1;
310 hdev->bmc_enable = 1;
311 hdev->reset_on_preboot_fail = 1;
312 hdev->heartbeat = 1;
313 }
314
copy_kernel_module_params_to_device(struct hl_device * hdev)315 static void copy_kernel_module_params_to_device(struct hl_device *hdev)
316 {
317 hdev->asic_prop.fw_security_enabled = is_asic_secured(hdev->asic_type);
318
319 hdev->major = hl_major;
320 hdev->hclass = hl_class;
321 hdev->memory_scrub = memory_scrub;
322 hdev->reset_on_lockup = reset_on_lockup;
323 hdev->boot_error_status_mask = boot_error_status_mask;
324 }
325
fixup_device_params_per_asic(struct hl_device * hdev,int timeout)326 static void fixup_device_params_per_asic(struct hl_device *hdev, int timeout)
327 {
328 switch (hdev->asic_type) {
329 case ASIC_GAUDI:
330 case ASIC_GAUDI_SEC:
331 /* If user didn't request a different timeout than the default one, we have
332 * a different default timeout for Gaudi
333 */
334 if (timeout == HL_DEFAULT_TIMEOUT_LOCKED)
335 hdev->timeout_jiffies = msecs_to_jiffies(GAUDI_DEFAULT_TIMEOUT_LOCKED *
336 MSEC_PER_SEC);
337
338 hdev->reset_upon_device_release = 0;
339 break;
340
341 case ASIC_GOYA:
342 hdev->reset_upon_device_release = 0;
343 break;
344
345 default:
346 hdev->reset_upon_device_release = 1;
347 break;
348 }
349 }
350
fixup_device_params(struct hl_device * hdev)351 static int fixup_device_params(struct hl_device *hdev)
352 {
353 int tmp_timeout;
354
355 tmp_timeout = timeout_locked;
356
357 hdev->fw_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC;
358 hdev->fw_comms_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC;
359
360 if (tmp_timeout)
361 hdev->timeout_jiffies = msecs_to_jiffies(tmp_timeout * MSEC_PER_SEC);
362 else
363 hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
364
365 hdev->stop_on_err = true;
366 hdev->reset_info.curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
367 hdev->reset_info.prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT;
368
369 /* Enable only after the initialization of the device */
370 hdev->disabled = true;
371
372 if (!(hdev->fw_components & FW_TYPE_PREBOOT_CPU) &&
373 (hdev->fw_components & ~FW_TYPE_PREBOOT_CPU)) {
374 pr_err("Preboot must be set along with other components");
375 return -EINVAL;
376 }
377
378 /* If CPU queues not enabled, no way to do heartbeat */
379 if (!hdev->cpu_queues_enable)
380 hdev->heartbeat = 0;
381 fixup_device_params_per_asic(hdev, tmp_timeout);
382
383 return 0;
384 }
385
386 /**
387 * create_hdev - create habanalabs device instance
388 *
389 * @dev: will hold the pointer to the new habanalabs device structure
390 * @pdev: pointer to the pci device
391 *
392 * Allocate memory for habanalabs device and initialize basic fields
393 * Identify the ASIC type
394 * Allocate ID (minor) for the device (only for real devices)
395 */
create_hdev(struct hl_device ** dev,struct pci_dev * pdev)396 static int create_hdev(struct hl_device **dev, struct pci_dev *pdev)
397 {
398 int main_id, ctrl_id = 0, rc = 0;
399 struct hl_device *hdev;
400
401 *dev = NULL;
402
403 hdev = kzalloc(sizeof(*hdev), GFP_KERNEL);
404 if (!hdev)
405 return -ENOMEM;
406
407 /* Will be NULL in case of simulator device */
408 hdev->pdev = pdev;
409
410 /* Assign status description string */
411 strncpy(hdev->status[HL_DEVICE_STATUS_OPERATIONAL], "operational", HL_STR_MAX);
412 strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET], "in reset", HL_STR_MAX);
413 strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION], "disabled", HL_STR_MAX);
414 strncpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET], "needs reset", HL_STR_MAX);
415 strncpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION],
416 "in device creation", HL_STR_MAX);
417 strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE],
418 "in reset after device release", HL_STR_MAX);
419
420
421 /* First, we must find out which ASIC are we handling. This is needed
422 * to configure the behavior of the driver (kernel parameters)
423 */
424 hdev->asic_type = get_asic_type(hdev);
425 if (hdev->asic_type == ASIC_INVALID) {
426 dev_err(&pdev->dev, "Unsupported ASIC\n");
427 rc = -ENODEV;
428 goto free_hdev;
429 }
430
431 copy_kernel_module_params_to_device(hdev);
432
433 set_driver_behavior_per_device(hdev);
434
435 fixup_device_params(hdev);
436
437 mutex_lock(&hl_devs_idr_lock);
438
439 /* Always save 2 numbers, 1 for main device and 1 for control.
440 * They must be consecutive
441 */
442 main_id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS, GFP_KERNEL);
443
444 if (main_id >= 0)
445 ctrl_id = idr_alloc(&hl_devs_idr, hdev, main_id + 1,
446 main_id + 2, GFP_KERNEL);
447
448 mutex_unlock(&hl_devs_idr_lock);
449
450 if ((main_id < 0) || (ctrl_id < 0)) {
451 if ((main_id == -ENOSPC) || (ctrl_id == -ENOSPC))
452 pr_err("too many devices in the system\n");
453
454 if (main_id >= 0) {
455 mutex_lock(&hl_devs_idr_lock);
456 idr_remove(&hl_devs_idr, main_id);
457 mutex_unlock(&hl_devs_idr_lock);
458 }
459
460 rc = -EBUSY;
461 goto free_hdev;
462 }
463
464 hdev->id = main_id;
465 hdev->id_control = ctrl_id;
466
467 *dev = hdev;
468
469 return 0;
470
471 free_hdev:
472 kfree(hdev);
473 return rc;
474 }
475
476 /*
477 * destroy_hdev - destroy habanalabs device instance
478 *
479 * @dev: pointer to the habanalabs device structure
480 *
481 */
destroy_hdev(struct hl_device * hdev)482 static void destroy_hdev(struct hl_device *hdev)
483 {
484 /* Remove device from the device list */
485 mutex_lock(&hl_devs_idr_lock);
486 idr_remove(&hl_devs_idr, hdev->id);
487 idr_remove(&hl_devs_idr, hdev->id_control);
488 mutex_unlock(&hl_devs_idr_lock);
489
490 kfree(hdev);
491 }
492
hl_pmops_suspend(struct device * dev)493 static int hl_pmops_suspend(struct device *dev)
494 {
495 struct hl_device *hdev = dev_get_drvdata(dev);
496
497 pr_debug("Going to suspend PCI device\n");
498
499 if (!hdev) {
500 pr_err("device pointer is NULL in suspend\n");
501 return 0;
502 }
503
504 return hl_device_suspend(hdev);
505 }
506
hl_pmops_resume(struct device * dev)507 static int hl_pmops_resume(struct device *dev)
508 {
509 struct hl_device *hdev = dev_get_drvdata(dev);
510
511 pr_debug("Going to resume PCI device\n");
512
513 if (!hdev) {
514 pr_err("device pointer is NULL in resume\n");
515 return 0;
516 }
517
518 return hl_device_resume(hdev);
519 }
520
521 /**
522 * hl_pci_probe - probe PCI habanalabs devices
523 *
524 * @pdev: pointer to pci device
525 * @id: pointer to pci device id structure
526 *
527 * Standard PCI probe function for habanalabs device.
528 * Create a new habanalabs device and initialize it according to the
529 * device's type
530 */
hl_pci_probe(struct pci_dev * pdev,const struct pci_device_id * id)531 static int hl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
532 {
533 struct hl_device *hdev;
534 int rc;
535
536 dev_info(&pdev->dev, HL_NAME
537 " device found [%04x:%04x] (rev %x)\n",
538 (int)pdev->vendor, (int)pdev->device, (int)pdev->revision);
539
540 rc = create_hdev(&hdev, pdev);
541 if (rc)
542 return rc;
543
544 pci_set_drvdata(pdev, hdev);
545
546 rc = hl_device_init(hdev);
547 if (rc) {
548 dev_err(&pdev->dev, "Fatal error during habanalabs device init\n");
549 rc = -ENODEV;
550 goto disable_device;
551 }
552
553 return 0;
554
555 disable_device:
556 pci_set_drvdata(pdev, NULL);
557 destroy_hdev(hdev);
558
559 return rc;
560 }
561
562 /*
563 * hl_pci_remove - remove PCI habanalabs devices
564 *
565 * @pdev: pointer to pci device
566 *
567 * Standard PCI remove function for habanalabs device
568 */
hl_pci_remove(struct pci_dev * pdev)569 static void hl_pci_remove(struct pci_dev *pdev)
570 {
571 struct hl_device *hdev;
572
573 hdev = pci_get_drvdata(pdev);
574 if (!hdev)
575 return;
576
577 hl_device_fini(hdev);
578 pci_set_drvdata(pdev, NULL);
579 destroy_hdev(hdev);
580 }
581
582 /**
583 * hl_pci_err_detected - a PCI bus error detected on this device
584 *
585 * @pdev: pointer to pci device
586 * @state: PCI error type
587 *
588 * Called by the PCI subsystem whenever a non-correctable
589 * PCI bus error is detected
590 */
591 static pci_ers_result_t
hl_pci_err_detected(struct pci_dev * pdev,pci_channel_state_t state)592 hl_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t state)
593 {
594 struct hl_device *hdev = pci_get_drvdata(pdev);
595 enum pci_ers_result result;
596
597 switch (state) {
598 case pci_channel_io_normal:
599 dev_warn(hdev->dev, "PCI normal state error detected\n");
600 return PCI_ERS_RESULT_CAN_RECOVER;
601
602 case pci_channel_io_frozen:
603 dev_warn(hdev->dev, "PCI frozen state error detected\n");
604 result = PCI_ERS_RESULT_NEED_RESET;
605 break;
606
607 case pci_channel_io_perm_failure:
608 dev_warn(hdev->dev, "PCI failure state error detected\n");
609 result = PCI_ERS_RESULT_DISCONNECT;
610 break;
611
612 default:
613 result = PCI_ERS_RESULT_NONE;
614 }
615
616 hdev->asic_funcs->halt_engines(hdev, true, false);
617
618 return result;
619 }
620
621 /**
622 * hl_pci_err_resume - resume after a PCI slot reset
623 *
624 * @pdev: pointer to pci device
625 *
626 */
hl_pci_err_resume(struct pci_dev * pdev)627 static void hl_pci_err_resume(struct pci_dev *pdev)
628 {
629 struct hl_device *hdev = pci_get_drvdata(pdev);
630
631 dev_warn(hdev->dev, "Resuming device after PCI slot reset\n");
632 hl_device_resume(hdev);
633 }
634
635 /**
636 * hl_pci_err_slot_reset - a PCI slot reset has just happened
637 *
638 * @pdev: pointer to pci device
639 *
640 * Determine if the driver can recover from the PCI slot reset
641 */
hl_pci_err_slot_reset(struct pci_dev * pdev)642 static pci_ers_result_t hl_pci_err_slot_reset(struct pci_dev *pdev)
643 {
644 struct hl_device *hdev = pci_get_drvdata(pdev);
645
646 dev_warn(hdev->dev, "PCI slot reset detected\n");
647
648 return PCI_ERS_RESULT_RECOVERED;
649 }
650
651 static const struct dev_pm_ops hl_pm_ops = {
652 .suspend = hl_pmops_suspend,
653 .resume = hl_pmops_resume,
654 };
655
656 static const struct pci_error_handlers hl_pci_err_handler = {
657 .error_detected = hl_pci_err_detected,
658 .slot_reset = hl_pci_err_slot_reset,
659 .resume = hl_pci_err_resume,
660 };
661
662 static struct pci_driver hl_pci_driver = {
663 .name = HL_NAME,
664 .id_table = ids,
665 .probe = hl_pci_probe,
666 .remove = hl_pci_remove,
667 .shutdown = hl_pci_remove,
668 .driver = {
669 .name = HL_NAME,
670 .pm = &hl_pm_ops,
671 .probe_type = PROBE_PREFER_ASYNCHRONOUS,
672 },
673 .err_handler = &hl_pci_err_handler,
674 };
675
676 /*
677 * hl_init - Initialize the habanalabs kernel driver
678 */
hl_init(void)679 static int __init hl_init(void)
680 {
681 int rc;
682 dev_t dev;
683
684 pr_info("loading driver\n");
685
686 rc = alloc_chrdev_region(&dev, 0, HL_MAX_MINORS, HL_NAME);
687 if (rc < 0) {
688 pr_err("unable to get major\n");
689 return rc;
690 }
691
692 hl_major = MAJOR(dev);
693
694 hl_class = class_create(HL_NAME);
695 if (IS_ERR(hl_class)) {
696 pr_err("failed to allocate class\n");
697 rc = PTR_ERR(hl_class);
698 goto remove_major;
699 }
700
701 hl_debugfs_init();
702
703 rc = pci_register_driver(&hl_pci_driver);
704 if (rc) {
705 pr_err("failed to register pci device\n");
706 goto remove_debugfs;
707 }
708
709 pr_debug("driver loaded\n");
710
711 return 0;
712
713 remove_debugfs:
714 hl_debugfs_fini();
715 class_destroy(hl_class);
716 remove_major:
717 unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
718 return rc;
719 }
720
721 /*
722 * hl_exit - Release all resources of the habanalabs kernel driver
723 */
hl_exit(void)724 static void __exit hl_exit(void)
725 {
726 pci_unregister_driver(&hl_pci_driver);
727
728 /*
729 * Removing debugfs must be after all devices or simulator devices
730 * have been removed because otherwise we get a bug in the
731 * debugfs module for referencing NULL objects
732 */
733 hl_debugfs_fini();
734
735 class_destroy(hl_class);
736 unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
737
738 idr_destroy(&hl_devs_idr);
739
740 pr_debug("driver removed\n");
741 }
742
743 module_init(hl_init);
744 module_exit(hl_exit);
745