1 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note 2 * 3 * Copyright 2016-2019 HabanaLabs, Ltd. 4 * All Rights Reserved. 5 * 6 */ 7 8 #ifndef HABANALABS_H_ 9 #define HABANALABS_H_ 10 11 #include <linux/types.h> 12 #include <linux/ioctl.h> 13 14 /* 15 * Defines that are asic-specific but constitutes as ABI between kernel driver 16 * and userspace 17 */ 18 #define GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START 0x8000 /* 32KB */ 19 20 /* 21 * Queue Numbering 22 * 23 * The external queues (PCI DMA channels) MUST be before the internal queues 24 * and each group (PCI DMA channels and internal) must be contiguous inside 25 * itself but there can be a gap between the two groups (although not 26 * recommended) 27 */ 28 29 enum goya_queue_id { 30 GOYA_QUEUE_ID_DMA_0 = 0, 31 GOYA_QUEUE_ID_DMA_1 = 1, 32 GOYA_QUEUE_ID_DMA_2 = 2, 33 GOYA_QUEUE_ID_DMA_3 = 3, 34 GOYA_QUEUE_ID_DMA_4 = 4, 35 GOYA_QUEUE_ID_CPU_PQ = 5, 36 GOYA_QUEUE_ID_MME = 6, /* Internal queues start here */ 37 GOYA_QUEUE_ID_TPC0 = 7, 38 GOYA_QUEUE_ID_TPC1 = 8, 39 GOYA_QUEUE_ID_TPC2 = 9, 40 GOYA_QUEUE_ID_TPC3 = 10, 41 GOYA_QUEUE_ID_TPC4 = 11, 42 GOYA_QUEUE_ID_TPC5 = 12, 43 GOYA_QUEUE_ID_TPC6 = 13, 44 GOYA_QUEUE_ID_TPC7 = 14, 45 GOYA_QUEUE_ID_SIZE 46 }; 47 48 /* 49 * Engine Numbering 50 * 51 * Used in the "busy_engines_mask" field in `struct hl_info_hw_idle' 52 */ 53 54 enum goya_engine_id { 55 GOYA_ENGINE_ID_DMA_0 = 0, 56 GOYA_ENGINE_ID_DMA_1, 57 GOYA_ENGINE_ID_DMA_2, 58 GOYA_ENGINE_ID_DMA_3, 59 GOYA_ENGINE_ID_DMA_4, 60 GOYA_ENGINE_ID_MME_0, 61 GOYA_ENGINE_ID_TPC_0, 62 GOYA_ENGINE_ID_TPC_1, 63 GOYA_ENGINE_ID_TPC_2, 64 GOYA_ENGINE_ID_TPC_3, 65 GOYA_ENGINE_ID_TPC_4, 66 GOYA_ENGINE_ID_TPC_5, 67 GOYA_ENGINE_ID_TPC_6, 68 GOYA_ENGINE_ID_TPC_7, 69 GOYA_ENGINE_ID_SIZE 70 }; 71 72 enum hl_device_status { 73 HL_DEVICE_STATUS_OPERATIONAL, 74 HL_DEVICE_STATUS_IN_RESET, 75 HL_DEVICE_STATUS_MALFUNCTION 76 }; 77 78 /* Opcode for management ioctl 79 * 80 * HW_IP_INFO - Receive information about different IP blocks in the 81 * device. 82 * HL_INFO_HW_EVENTS - Receive an array describing how many times each event 83 * occurred since the last hard reset. 84 * HL_INFO_DRAM_USAGE - Retrieve the dram usage inside the device and of the 85 * specific context. This is relevant only for devices 86 * where the dram is managed by the kernel driver 87 * HL_INFO_HW_IDLE - Retrieve information about the idle status of each 88 * internal engine. 89 * HL_INFO_DEVICE_STATUS - Retrieve the device's status. This opcode doesn't 90 * require an open context. 91 * HL_INFO_DEVICE_UTILIZATION - Retrieve the total utilization of the device 92 * over the last period specified by the user. 93 * The period can be between 100ms to 1s, in 94 * resolution of 100ms. The return value is a 95 * percentage of the utilization rate. 96 * HL_INFO_HW_EVENTS_AGGREGATE - Receive an array describing how many times each 97 * event occurred since the driver was loaded. 98 */ 99 #define HL_INFO_HW_IP_INFO 0 100 #define HL_INFO_HW_EVENTS 1 101 #define HL_INFO_DRAM_USAGE 2 102 #define HL_INFO_HW_IDLE 3 103 #define HL_INFO_DEVICE_STATUS 4 104 #define HL_INFO_DEVICE_UTILIZATION 6 105 #define HL_INFO_HW_EVENTS_AGGREGATE 7 106 107 #define HL_INFO_VERSION_MAX_LEN 128 108 109 struct hl_info_hw_ip_info { 110 __u64 sram_base_address; 111 __u64 dram_base_address; 112 __u64 dram_size; 113 __u32 sram_size; 114 __u32 num_of_events; 115 __u32 device_id; /* PCI Device ID */ 116 __u32 reserved[3]; 117 __u32 armcp_cpld_version; 118 __u32 psoc_pci_pll_nr; 119 __u32 psoc_pci_pll_nf; 120 __u32 psoc_pci_pll_od; 121 __u32 psoc_pci_pll_div_factor; 122 __u8 tpc_enabled_mask; 123 __u8 dram_enabled; 124 __u8 pad[2]; 125 __u8 armcp_version[HL_INFO_VERSION_MAX_LEN]; 126 }; 127 128 struct hl_info_dram_usage { 129 __u64 dram_free_mem; 130 __u64 ctx_dram_mem; 131 }; 132 133 struct hl_info_hw_idle { 134 __u32 is_idle; 135 /* 136 * Bitmask of busy engines. 137 * Bits definition is according to `enum <chip>_enging_id'. 138 */ 139 __u32 busy_engines_mask; 140 }; 141 142 struct hl_info_device_status { 143 __u32 status; 144 __u32 pad; 145 }; 146 147 struct hl_info_device_utilization { 148 __u32 utilization; 149 __u32 pad; 150 }; 151 152 struct hl_info_args { 153 /* Location of relevant struct in userspace */ 154 __u64 return_pointer; 155 /* 156 * The size of the return value. Just like "size" in "snprintf", 157 * it limits how many bytes the kernel can write 158 * 159 * For hw_events array, the size should be 160 * hl_info_hw_ip_info.num_of_events * sizeof(__u32) 161 */ 162 __u32 return_size; 163 164 /* HL_INFO_* */ 165 __u32 op; 166 167 union { 168 /* Context ID - Currently not in use */ 169 __u32 ctx_id; 170 /* Period value for utilization rate (100ms - 1000ms, in 100ms 171 * resolution. 172 */ 173 __u32 period_ms; 174 }; 175 176 __u32 pad; 177 }; 178 179 /* Opcode to create a new command buffer */ 180 #define HL_CB_OP_CREATE 0 181 /* Opcode to destroy previously created command buffer */ 182 #define HL_CB_OP_DESTROY 1 183 184 struct hl_cb_in { 185 /* Handle of CB or 0 if we want to create one */ 186 __u64 cb_handle; 187 /* HL_CB_OP_* */ 188 __u32 op; 189 /* Size of CB. Maximum size is 2MB. The minimum size that will be 190 * allocated, regardless of this parameter's value, is PAGE_SIZE 191 */ 192 __u32 cb_size; 193 /* Context ID - Currently not in use */ 194 __u32 ctx_id; 195 __u32 pad; 196 }; 197 198 struct hl_cb_out { 199 /* Handle of CB */ 200 __u64 cb_handle; 201 }; 202 203 union hl_cb_args { 204 struct hl_cb_in in; 205 struct hl_cb_out out; 206 }; 207 208 /* 209 * This structure size must always be fixed to 64-bytes for backward 210 * compatibility 211 */ 212 struct hl_cs_chunk { 213 /* 214 * For external queue, this represents a Handle of CB on the Host 215 * For internal queue, this represents an SRAM or DRAM address of the 216 * internal CB 217 */ 218 __u64 cb_handle; 219 /* Index of queue to put the CB on */ 220 __u32 queue_index; 221 /* 222 * Size of command buffer with valid packets 223 * Can be smaller then actual CB size 224 */ 225 __u32 cb_size; 226 /* HL_CS_CHUNK_FLAGS_* */ 227 __u32 cs_chunk_flags; 228 /* Align structure to 64 bytes */ 229 __u32 pad[11]; 230 }; 231 232 #define HL_CS_FLAGS_FORCE_RESTORE 0x1 233 234 #define HL_CS_STATUS_SUCCESS 0 235 236 struct hl_cs_in { 237 /* this holds address of array of hl_cs_chunk for restore phase */ 238 __u64 chunks_restore; 239 /* this holds address of array of hl_cs_chunk for execution phase */ 240 __u64 chunks_execute; 241 /* this holds address of array of hl_cs_chunk for store phase - 242 * Currently not in use 243 */ 244 __u64 chunks_store; 245 /* Number of chunks in restore phase array */ 246 __u32 num_chunks_restore; 247 /* Number of chunks in execution array */ 248 __u32 num_chunks_execute; 249 /* Number of chunks in restore phase array - Currently not in use */ 250 __u32 num_chunks_store; 251 /* HL_CS_FLAGS_* */ 252 __u32 cs_flags; 253 /* Context ID - Currently not in use */ 254 __u32 ctx_id; 255 }; 256 257 struct hl_cs_out { 258 /* 259 * seq holds the sequence number of the CS to pass to wait ioctl. All 260 * values are valid except for 0 and ULLONG_MAX 261 */ 262 __u64 seq; 263 /* HL_CS_STATUS_* */ 264 __u32 status; 265 __u32 pad; 266 }; 267 268 union hl_cs_args { 269 struct hl_cs_in in; 270 struct hl_cs_out out; 271 }; 272 273 struct hl_wait_cs_in { 274 /* Command submission sequence number */ 275 __u64 seq; 276 /* Absolute timeout to wait in microseconds */ 277 __u64 timeout_us; 278 /* Context ID - Currently not in use */ 279 __u32 ctx_id; 280 __u32 pad; 281 }; 282 283 #define HL_WAIT_CS_STATUS_COMPLETED 0 284 #define HL_WAIT_CS_STATUS_BUSY 1 285 #define HL_WAIT_CS_STATUS_TIMEDOUT 2 286 #define HL_WAIT_CS_STATUS_ABORTED 3 287 #define HL_WAIT_CS_STATUS_INTERRUPTED 4 288 289 struct hl_wait_cs_out { 290 /* HL_WAIT_CS_STATUS_* */ 291 __u32 status; 292 __u32 pad; 293 }; 294 295 union hl_wait_cs_args { 296 struct hl_wait_cs_in in; 297 struct hl_wait_cs_out out; 298 }; 299 300 /* Opcode to alloc device memory */ 301 #define HL_MEM_OP_ALLOC 0 302 /* Opcode to free previously allocated device memory */ 303 #define HL_MEM_OP_FREE 1 304 /* Opcode to map host memory */ 305 #define HL_MEM_OP_MAP 2 306 /* Opcode to unmap previously mapped host memory */ 307 #define HL_MEM_OP_UNMAP 3 308 309 /* Memory flags */ 310 #define HL_MEM_CONTIGUOUS 0x1 311 #define HL_MEM_SHARED 0x2 312 #define HL_MEM_USERPTR 0x4 313 314 struct hl_mem_in { 315 union { 316 /* HL_MEM_OP_ALLOC- allocate device memory */ 317 struct { 318 /* Size to alloc */ 319 __u64 mem_size; 320 } alloc; 321 322 /* HL_MEM_OP_FREE - free device memory */ 323 struct { 324 /* Handle returned from HL_MEM_OP_ALLOC */ 325 __u64 handle; 326 } free; 327 328 /* HL_MEM_OP_MAP - map device memory */ 329 struct { 330 /* 331 * Requested virtual address of mapped memory. 332 * The driver will try to map the requested region to 333 * this hint address, as long as the address is valid 334 * and not already mapped. The user should check the 335 * returned address of the IOCTL to make sure he got 336 * the hint address. Passing 0 here means that the 337 * driver will choose the address itself. 338 */ 339 __u64 hint_addr; 340 /* Handle returned from HL_MEM_OP_ALLOC */ 341 __u64 handle; 342 } map_device; 343 344 /* HL_MEM_OP_MAP - map host memory */ 345 struct { 346 /* Address of allocated host memory */ 347 __u64 host_virt_addr; 348 /* 349 * Requested virtual address of mapped memory. 350 * The driver will try to map the requested region to 351 * this hint address, as long as the address is valid 352 * and not already mapped. The user should check the 353 * returned address of the IOCTL to make sure he got 354 * the hint address. Passing 0 here means that the 355 * driver will choose the address itself. 356 */ 357 __u64 hint_addr; 358 /* Size of allocated host memory */ 359 __u64 mem_size; 360 } map_host; 361 362 /* HL_MEM_OP_UNMAP - unmap host memory */ 363 struct { 364 /* Virtual address returned from HL_MEM_OP_MAP */ 365 __u64 device_virt_addr; 366 } unmap; 367 }; 368 369 /* HL_MEM_OP_* */ 370 __u32 op; 371 /* HL_MEM_* flags */ 372 __u32 flags; 373 /* Context ID - Currently not in use */ 374 __u32 ctx_id; 375 __u32 pad; 376 }; 377 378 struct hl_mem_out { 379 union { 380 /* 381 * Used for HL_MEM_OP_MAP as the virtual address that was 382 * assigned in the device VA space. 383 * A value of 0 means the requested operation failed. 384 */ 385 __u64 device_virt_addr; 386 387 /* 388 * Used for HL_MEM_OP_ALLOC. This is the assigned 389 * handle for the allocated memory 390 */ 391 __u64 handle; 392 }; 393 }; 394 395 union hl_mem_args { 396 struct hl_mem_in in; 397 struct hl_mem_out out; 398 }; 399 400 #define HL_DEBUG_MAX_AUX_VALUES 10 401 402 struct hl_debug_params_etr { 403 /* Address in memory to allocate buffer */ 404 __u64 buffer_address; 405 406 /* Size of buffer to allocate */ 407 __u64 buffer_size; 408 409 /* Sink operation mode: SW fifo, HW fifo, Circular buffer */ 410 __u32 sink_mode; 411 __u32 pad; 412 }; 413 414 struct hl_debug_params_etf { 415 /* Address in memory to allocate buffer */ 416 __u64 buffer_address; 417 418 /* Size of buffer to allocate */ 419 __u64 buffer_size; 420 421 /* Sink operation mode: SW fifo, HW fifo, Circular buffer */ 422 __u32 sink_mode; 423 __u32 pad; 424 }; 425 426 struct hl_debug_params_stm { 427 /* Two bit masks for HW event and Stimulus Port */ 428 __u64 he_mask; 429 __u64 sp_mask; 430 431 /* Trace source ID */ 432 __u32 id; 433 434 /* Frequency for the timestamp register */ 435 __u32 frequency; 436 }; 437 438 struct hl_debug_params_bmon { 439 /* Two address ranges that the user can request to filter */ 440 __u64 start_addr0; 441 __u64 addr_mask0; 442 443 __u64 start_addr1; 444 __u64 addr_mask1; 445 446 /* Capture window configuration */ 447 __u32 bw_win; 448 __u32 win_capture; 449 450 /* Trace source ID */ 451 __u32 id; 452 __u32 pad; 453 }; 454 455 struct hl_debug_params_spmu { 456 /* Event types selection */ 457 __u64 event_types[HL_DEBUG_MAX_AUX_VALUES]; 458 459 /* Number of event types selection */ 460 __u32 event_types_num; 461 __u32 pad; 462 }; 463 464 /* Opcode for ETR component */ 465 #define HL_DEBUG_OP_ETR 0 466 /* Opcode for ETF component */ 467 #define HL_DEBUG_OP_ETF 1 468 /* Opcode for STM component */ 469 #define HL_DEBUG_OP_STM 2 470 /* Opcode for FUNNEL component */ 471 #define HL_DEBUG_OP_FUNNEL 3 472 /* Opcode for BMON component */ 473 #define HL_DEBUG_OP_BMON 4 474 /* Opcode for SPMU component */ 475 #define HL_DEBUG_OP_SPMU 5 476 /* Opcode for timestamp (deprecated) */ 477 #define HL_DEBUG_OP_TIMESTAMP 6 478 /* Opcode for setting the device into or out of debug mode. The enable 479 * variable should be 1 for enabling debug mode and 0 for disabling it 480 */ 481 #define HL_DEBUG_OP_SET_MODE 7 482 483 struct hl_debug_args { 484 /* 485 * Pointer to user input structure. 486 * This field is relevant to specific opcodes. 487 */ 488 __u64 input_ptr; 489 /* Pointer to user output structure */ 490 __u64 output_ptr; 491 /* Size of user input structure */ 492 __u32 input_size; 493 /* Size of user output structure */ 494 __u32 output_size; 495 /* HL_DEBUG_OP_* */ 496 __u32 op; 497 /* 498 * Register index in the component, taken from the debug_regs_index enum 499 * in the various ASIC header files 500 */ 501 __u32 reg_idx; 502 /* Enable/disable */ 503 __u32 enable; 504 /* Context ID - Currently not in use */ 505 __u32 ctx_id; 506 }; 507 508 /* 509 * Various information operations such as: 510 * - H/W IP information 511 * - Current dram usage 512 * 513 * The user calls this IOCTL with an opcode that describes the required 514 * information. The user should supply a pointer to a user-allocated memory 515 * chunk, which will be filled by the driver with the requested information. 516 * 517 * The user supplies the maximum amount of size to copy into the user's memory, 518 * in order to prevent data corruption in case of differences between the 519 * definitions of structures in kernel and userspace, e.g. in case of old 520 * userspace and new kernel driver 521 */ 522 #define HL_IOCTL_INFO \ 523 _IOWR('H', 0x01, struct hl_info_args) 524 525 /* 526 * Command Buffer 527 * - Request a Command Buffer 528 * - Destroy a Command Buffer 529 * 530 * The command buffers are memory blocks that reside in DMA-able address 531 * space and are physically contiguous so they can be accessed by the device 532 * directly. They are allocated using the coherent DMA API. 533 * 534 * When creating a new CB, the IOCTL returns a handle of it, and the user-space 535 * process needs to use that handle to mmap the buffer so it can access them. 536 * 537 */ 538 #define HL_IOCTL_CB \ 539 _IOWR('H', 0x02, union hl_cb_args) 540 541 /* 542 * Command Submission 543 * 544 * To submit work to the device, the user need to call this IOCTL with a set 545 * of JOBS. That set of JOBS constitutes a CS object. 546 * Each JOB will be enqueued on a specific queue, according to the user's input. 547 * There can be more then one JOB per queue. 548 * 549 * The CS IOCTL will receive three sets of JOBS. One set is for "restore" phase, 550 * a second set is for "execution" phase and a third set is for "store" phase. 551 * The JOBS on the "restore" phase are enqueued only after context-switch 552 * (or if its the first CS for this context). The user can also order the 553 * driver to run the "restore" phase explicitly 554 * 555 * There are two types of queues - external and internal. External queues 556 * are DMA queues which transfer data from/to the Host. All other queues are 557 * internal. The driver will get completion notifications from the device only 558 * on JOBS which are enqueued in the external queues. 559 * 560 * For jobs on external queues, the user needs to create command buffers 561 * through the CB ioctl and give the CB's handle to the CS ioctl. For jobs on 562 * internal queues, the user needs to prepare a "command buffer" with packets 563 * on either the SRAM or DRAM, and give the device address of that buffer to 564 * the CS ioctl. 565 * 566 * This IOCTL is asynchronous in regard to the actual execution of the CS. This 567 * means it returns immediately after ALL the JOBS were enqueued on their 568 * relevant queues. Therefore, the user mustn't assume the CS has been completed 569 * or has even started to execute. 570 * 571 * Upon successful enqueue, the IOCTL returns a sequence number which the user 572 * can use with the "Wait for CS" IOCTL to check whether the handle's CS 573 * external JOBS have been completed. Note that if the CS has internal JOBS 574 * which can execute AFTER the external JOBS have finished, the driver might 575 * report that the CS has finished executing BEFORE the internal JOBS have 576 * actually finish executing. 577 * 578 * Even though the sequence number increments per CS, the user can NOT 579 * automatically assume that if CS with sequence number N finished, then CS 580 * with sequence number N-1 also finished. The user can make this assumption if 581 * and only if CS N and CS N-1 are exactly the same (same CBs for the same 582 * queues). 583 */ 584 #define HL_IOCTL_CS \ 585 _IOWR('H', 0x03, union hl_cs_args) 586 587 /* 588 * Wait for Command Submission 589 * 590 * The user can call this IOCTL with a handle it received from the CS IOCTL 591 * to wait until the handle's CS has finished executing. The user will wait 592 * inside the kernel until the CS has finished or until the user-requeusted 593 * timeout has expired. 594 * 595 * The return value of the IOCTL is a standard Linux error code. The possible 596 * values are: 597 * 598 * EINTR - Kernel waiting has been interrupted, e.g. due to OS signal 599 * that the user process received 600 * ETIMEDOUT - The CS has caused a timeout on the device 601 * EIO - The CS was aborted (usually because the device was reset) 602 * ENODEV - The device wants to do hard-reset (so user need to close FD) 603 * 604 * The driver also returns a custom define inside the IOCTL which can be: 605 * 606 * HL_WAIT_CS_STATUS_COMPLETED - The CS has been completed successfully (0) 607 * HL_WAIT_CS_STATUS_BUSY - The CS is still executing (0) 608 * HL_WAIT_CS_STATUS_TIMEDOUT - The CS has caused a timeout on the device 609 * (ETIMEDOUT) 610 * HL_WAIT_CS_STATUS_ABORTED - The CS was aborted, usually because the 611 * device was reset (EIO) 612 * HL_WAIT_CS_STATUS_INTERRUPTED - Waiting for the CS was interrupted (EINTR) 613 * 614 */ 615 616 #define HL_IOCTL_WAIT_CS \ 617 _IOWR('H', 0x04, union hl_wait_cs_args) 618 619 /* 620 * Memory 621 * - Map host memory to device MMU 622 * - Unmap host memory from device MMU 623 * 624 * This IOCTL allows the user to map host memory to the device MMU 625 * 626 * For host memory, the IOCTL doesn't allocate memory. The user is supposed 627 * to allocate the memory in user-space (malloc/new). The driver pins the 628 * physical pages (up to the allowed limit by the OS), assigns a virtual 629 * address in the device VA space and initializes the device MMU. 630 * 631 * There is an option for the user to specify the requested virtual address. 632 * 633 */ 634 #define HL_IOCTL_MEMORY \ 635 _IOWR('H', 0x05, union hl_mem_args) 636 637 /* 638 * Debug 639 * - Enable/disable the ETR/ETF/FUNNEL/STM/BMON/SPMU debug traces 640 * 641 * This IOCTL allows the user to get debug traces from the chip. 642 * 643 * Before the user can send configuration requests of the various 644 * debug/profile engines, it needs to set the device into debug mode. 645 * This is because the debug/profile infrastructure is shared component in the 646 * device and we can't allow multiple users to access it at the same time. 647 * 648 * Once a user set the device into debug mode, the driver won't allow other 649 * users to "work" with the device, i.e. open a FD. If there are multiple users 650 * opened on the device, the driver won't allow any user to debug the device. 651 * 652 * For each configuration request, the user needs to provide the register index 653 * and essential data such as buffer address and size. 654 * 655 * Once the user has finished using the debug/profile engines, he should 656 * set the device into non-debug mode, i.e. disable debug mode. 657 * 658 * The driver can decide to "kick out" the user if he abuses this interface. 659 * 660 */ 661 #define HL_IOCTL_DEBUG \ 662 _IOWR('H', 0x06, struct hl_debug_args) 663 664 #define HL_COMMAND_START 0x01 665 #define HL_COMMAND_END 0x07 666 667 #endif /* HABANALABS_H_ */ 668