Lines Matching +full:cs +full:- +full:number
1 /* SPDX-License-Identifier: GPL-2.0
3 * Copyright 2016-2022 HabanaLabs, Ltd.
19 #include <linux/dma-direction.h>
28 #include <linux/io-64-nonatomic-lo-hi.h>
30 #include <linux/dma-buf.h>
38 * bits[63:59] - Encode mmap type
39 * bits[45:0] - mmap offset value
44 #define HL_MMAP_TYPE_SHIFT (59 - PAGE_SHIFT)
98 * enum hl_mmu_page_table_location - mmu page table location
99 * @MMU_DR_PGT: page-table is located on device DRAM.
100 * @MMU_HR_PGT: page-table is located on host memory.
101 * @MMU_NUM_PGT_LOCATIONS: number of page-table locations currently supported.
104 MMU_DR_PGT = 0, /* device-dram-resident MMU PGT */
110 * enum hl_mmu_enablement - what mmu modules to enable
135 #define IS_POWER_OF_2(n) (n != 0 && ((n & (n - 1)) == 0))
169 * - HL_DRV_RESET_HARD
173 * - HL_DRV_RESET_FROM_RESET_THR
174 * Set if the caller is the hard-reset thread
176 * - HL_DRV_RESET_HEARTBEAT
179 * - HL_DRV_RESET_TDR
182 * - HL_DRV_RESET_DEV_RELEASE
185 * - HL_DRV_RESET_BYPASS_REQ_TO_FW
189 * - HL_DRV_RESET_FW_FATAL_ERR
192 * - HL_DRV_RESET_DELAY
217 #define HL_BLOCK_GLBL_SEC_SIZE (HL_BLOCK_SIZE - HL_BLOCK_GLBL_SEC_OFFS)
228 * struct iterate_module_ctx - HW module iterator
231 * @rc: return code for optional use of iterator/iterator-caller
239 * @offset: current HW module instance offset from the 1-st HW module instance
240 * in the 1-st block
256 * struct hl_gen_wait_properties - properties for generating a wait CB
276 * struct pgt_info - MMU hop page info.
277 * @node: hash linked-list node for the pgts on host (shadow pgts for device resident MMU and
290 * the newly allocated hop as well as to track number of PTEs in it.
304 * enum hl_pci_match_mode - pci match mode per region
314 * enum hl_fw_component - F/W components to read version through registers.
326 * enum hl_fw_types - F/W types present in the system
330 * @FW_TYPE_PREBOOT_CPU: Indicates pre-loaded CPUs are present in the system
344 * enum hl_queue_type - Supported QUEUE types.
373 * struct hl_inbound_pci_region - inbound region descriptor
389 * struct hl_outbound_pci_region - outbound region descriptor
399 * enum queue_cb_alloc_flags - Indicates queue support for CBs that
410 * struct hl_hw_sob - H/W SOB info.
434 * struct hw_queue_properties - queue information.
456 * enum vm_type - virtual memory mapping request information.
466 * enum mmu_op_flags - mmu operation relevant information.
481 * enum hl_device_hw_state - H/W device state. use this to understand whether
495 * struct hl_mmu_properties - ASIC specific MMU address translation properties.
527 * struct hl_hints_range - hint addresses reserved va range.
537 * struct asic_fixed_properties - ASIC specific immutable properties.
539 * @cpucp_info: received various information from CPU-CP regarding the H/W, e.g.
541 * @uboot_ver: F/W U-boot version.
553 * @sram_user_base_address - SRAM physical start address for user access.
590 * @max_asid: maximum number of open contexts (ASIDs).
591 * @num_of_events: number of possible internal H/W IRQs.
598 * @cb_pool_cb_cnt: number of CBs in the CB pool.
626 * @max_dec: maximum number of decoders
628 * 1- enabled, 0- isolated.
630 * 1- faulty cluster, 0- good cluster.
632 * 1- enabled, 0- isolated.
637 * @num_engine_cores: number of engine cpu cores
646 * @user_interrupt_count: number of user interrupts.
647 * @user_dec_intr_count: number of decoder interrupts exposed to user.
651 * @completion_queues_count: number of completion queues.
652 * @completion_mode: 0 - job based completion, 1 - cs based completion
653 * @mme_master_slave_mode: 0 - Each MME works independently, 1 - MME works
663 * @num_functional_hbms: number of functional HBMs in each DCORE.
671 * @supports_compute_reset: is a reset which is not a hard-reset supported by this asic.
674 * in inference ASICs, as there is no real-world
675 * use-case of doing soft-reset in training (due
678 * @configurable_stop_on_err: is stop-on-error option configurable via debugfs.
794 * struct hl_fence - software synchronization primitive
799 * masters QIDs that multi cs is waiting on
817 * struct hl_cs_compl - command submission completion object.
821 * @hw_sob: the H/W SOB used in this signal/wait CS.
823 * @cs_seq: command submission sequence number.
824 * @type: type of the CS - signal/wait.
825 * @sob_val: the SOB value that is used in this signal/wait CS.
826 * @sob_group: the SOB group that is used in this collective wait CS.
827 * @encaps_signals: indication whether it's a completion object of cs with
848 * struct hl_ts_buff - describes a timestamp buffer.
862 * struct hl_mem_mgr - describes unified memory manager for mappable memory chunks.
874 * struct hl_mmap_mem_buf_behavior - describes unified memory manager buffer behavior
893 * struct hl_mmap_mem_buf - describes a single unified memory buffer
917 * struct hl_cb - describes a Command Buffer.
928 * @cs_cnt: holds number of CS that this CB participates in.
973 /* Host <-> CPU-CP shared memory size */
977 * struct hl_sync_stream_properties -
1004 * struct hl_encaps_signals_mgr - describes sync stream encapsulated signals
1015 * struct hl_hw_queue - describes a H/W transport queue.
1026 * @msi_vec: the IRQ number of the H/W queue.
1027 * @int_queue_len: length of internal queue (number of entries).
1050 * struct hl_cq - describes a completion queue
1072 * struct hl_user_interrupt - holds user interrupt information
1088 * struct timestamp_reg_free_node - holds the timestamp registration free objects node
1099 /* struct timestamp_reg_work_obj - holds the timestamp registration free objects job
1112 /* struct timestamp_reg_info - holds the timestamp registration related data.
1132 * struct hl_user_pending_interrupt - holds a context to a user thread
1150 * struct hl_eq - describes the event queue (single one per device)
1171 * struct hl_dec - describes a decoder sw instance.
1185 * enum hl_asic_type - supported ASIC types.
1187 * @ASIC_GOYA: Goya device (HL-1000).
1188 * @ASIC_GAUDI: Gaudi device (HL-2000).
1189 * @ASIC_GAUDI_SEC: Gaudi secured device (HL-2000).
1205 * enum hl_pm_mng_profile - power management profile.
1217 * enum hl_pll_frequency - PLL frequency.
1255 * struct pci_mem_region - describe memory region in a PCI bar
1273 * struct static_fw_load_mgr - static FW load manager
1276 * @kmd_msg_to_cpu_reg: register address for KDM->CPU messages
1305 * struct fw_response - FW response to LKD command
1317 * struct dynamic_fw_load_mgr - dynamic FW load manager
1335 * struct pre_fw_load_props - needed properties for pre-FW load
1353 * struct fw_image_props - properties of FW image
1365 * struct fw_load_mgr - manager FW loading process
1398 * struct engines_data - asic engines data
1410 * struct hl_asic_funcs - ASIC specific functions that are can be called from
1414 * @late_init: sets up late driver/hw state (post hw_init) - Optional.
1415 * @late_fini: tears down what was done in late_init (pre hw_fini) - Optional.
1422 * hw_fini and before CS rollback.
1427 * @pqe_write: Write the PQ entry to the PQ. This is ASIC-specific
1451 * @hl_dma_unmap_sgtable: DMA unmap scatter-gather table.
1453 * @asic_dma_map_sgtable: DMA map scatter-gather table.
1461 * @handle_eqe: handle event queue entry (IRQ) from CPU-CP.
1467 * @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with ASID-VA-size mask.
1468 * @mmu_prefetch_cache_range: pre-fetch specific MMU STLB cache lines with ASID-VA-size mask.
1469 * @send_heartbeat: send is-alive packet to CPU-CP and verify response.
1491 * @pre_schedule_cs: Perform pre-CS-scheduling operations.
1504 * and place them in the relevant cs jobs
1505 * @collective_wait_create_jobs: allocate collective wait cs jobs
1509 * @descramble_addr: Routine to de-scramble the address prior of
1519 * after every hard-reset of the device
1521 * @get_msi_info: Retrieve asic-specific MSI ID of the f/w async event
1524 * @init_firmware_preload_params: initialize pre FW-load parameters.
1625 int (*pre_schedule_cs)(struct hl_cs *cs);
1640 int (*collective_wait_init_cs)(struct hl_cs *cs);
1642 struct hl_ctx *ctx, struct hl_cs *cs,
1683 * enum hl_va_range_type - virtual address range type.
1696 * struct hl_va_range - virtual addresses range.
1712 * struct hl_cs_counters_atomic - command submission counters
1717 * @max_cs_in_flight_drop_cnt: dropped due to maximum CS in-flight
1730 * struct hl_dmabuf_priv - a dma-buf private object.
1731 * @dmabuf: pointer to dma-buf object.
1732 * @ctx: pointer to the dma-buf owner's context.
1733 * @phys_pg_pack: pointer to physical page pack if the dma-buf was exported for
1736 * if phys_pg_pack is NULL (dma-buf was exported from address).
1749 * struct hl_cs_outcome - represents a single completed CS outcome
1753 * @seq: the original cs sequence
1754 * @error: error code cs completed with, if any
1765 * struct hl_cs_outcome_store - represents a limited store of completed CS outcomes
1766 * @outcome_map: index of completed CS searchable by sequence number
1769 * @nodes_pool: a static pool of pre-allocated outcome objects
1781 * struct hl_ctx - user/kernel context.
1785 * @hr_mmu_phys_hash: if host-resident MMU is used, holds a mapping from
1786 * MMU-hop-page physical address to its host-resident
1791 * this hits 0l. It is incremented on CS and CS_WAIT.
1792 * @cs_pending: array of hl fence objects representing pending CS.
1794 * command submissions for a long time after CS id wraparound.
1805 * @cs_sequence: sequence number for CS. Value is assigned to a CS and passed
1806 * to user so user could inquire about CS. It is used as
1851 * struct hl_ctx_mgr - for handling multiple contexts.
1866 * struct hl_userptr - memory mapping chunk information
1868 * @job_node: linked-list node for hanging the object on the Job's list.
1871 * @sgt: pointer to the scatter-gather table that holds the pages.
1875 * @addr: user-space virtual address of the start of the memory area.
1894 * struct hl_cs - command submission.
1896 * @ctx: the context this CS belongs to.
1897 * @job_list: list of the CS's jobs in the various queues.
1898 * @job_lock: spinlock for the CS's jobs list. Needed for free_job.
1899 * @refcount: reference counter for usage of the CS.
1900 * @fence: pointer to the fence object of this CS.
1901 * @signal_fence: pointer to the fence object of the signal CS (used by wait
1902 * CS only).
1903 * @finish_work: workqueue object to run when CS is completed by H/W.
1906 * @staged_cs_node: node in the staged cs list.
1909 * @sequence: the sequence number of this CS.
1910 * @staged_sequence: the sequence of the staged submission this CS is part of,
1912 * @timeout_jiffies: cs timeout in jiffies.
1913 * @submission_time_jiffies: submission time of the cs
1916 * @encaps_sig_hdl_id: encaps signals handle id, set for the first staged cs.
1919 * cs with encaps signals.
1920 * @submitted: true if CS was submitted to H/W.
1921 * @completed: true if CS was completed by device.
1922 * @timedout : true if CS was timedout.
1923 * @tdr_active: true if TDR was activated for this CS (to prevent
1925 * @aborted: true if CS was aborted due to some device error.
1927 * @staged_last: true if this is the last staged CS and needs completion.
1928 * @staged_first: true if this is the first staged CS and we need to receive
1929 * timeout for this CS.
1930 * @staged_cs: true if this CS is part of a staged submission.
1933 * @encaps_signals: true if this CS has encaps reserved signals.
1972 * struct hl_cs_job - command submission job.
1973 * @cs_node: the node to hang on the CS jobs list.
1974 * @cs: the CS this job belongs to.
1979 * @userptr_list: linked-list of userptr mappings that belong to this job and
1982 * @refcount: reference counter for usage of the CS job.
1984 * @id: the id of this job inside a CS.
1991 * handle to a kernel-allocated CB object, false
2002 struct hl_cs *cs; member
2020 * struct hl_cs_parser - command submission parser properties.
2024 * @job_userptr_list: linked-list of userptr mappings that belong to the related
2026 * @cs_sequence: the sequence number of the related CS.
2028 * @ctx_id: the ID of the context the related CS belongs to.
2032 * @job_id: the id of the related job inside the related CS.
2034 * handle to a kernel-allocated CB object, false
2042 * @completion: true if we need completion for this CS.
2065 * struct hl_vm_hash_node - hash element from virtual address to virtual
2079 * struct hl_vm_hw_block_list_node - list element from user virtual address to
2085 * @mapped_size: size of the block which is mapped. May change if partial un-mappings are done.
2098 * struct hl_vm_phys_pg_pack - physical page pack.
2105 * @mapping_cnt: number of shared mappings.
2106 * @exporting_cnt: number of dma-buf exporting.
2133 * struct hl_vm_va_block - virtual range block information.
2147 * struct hl_vm - virtual memory manager for MMU.
2169 * struct hl_debug_params - Coresight debug parameters.
2187 * struct hl_notifier_event - holds the notifier data structure
2203 * struct hl_fpriv - process information stored in FD private data.
2213 * @refcount: number of related contexts.
2239 * struct hl_info_list - debugfs file ops.
2252 * struct hl_debugfs_entry - debugfs dentry wrapper.
2262 * struct hl_dbg_device_entry - ASIC specific debugfs manager.
2280 * @state_dump: data of the system states in case of a bad cs.
2326 * struct hl_hw_obj_name_entry - single hw object name, member of
2379 * struct hl_mon_state_dump - represents a state dump of a single monitor
2397 * struct hl_sync_to_engine_map_entry - sync object id to engine mapping entry
2410 * struct hl_sync_to_engine_map - maps sync object id to associated engine id
2419 * struct hl_state_dump_specs_funcs - virtual functions used by the state dump
2441 * struct hl_state_dump_specs - defines ASIC known hw objects names
2477 #define RREG32(reg) hdev->asic_funcs->rreg(hdev, (reg))
2478 #define WREG32(reg, v) hdev->asic_funcs->wreg(hdev, (reg), (v))
2480 hdev->asic_funcs->rreg(hdev, (reg)))
2517 if (hdev->pdev) \
2549 __rc ? __rc : ((cond) ? 0 : -ETIMEDOUT); \
2563 * TODO: use read from PCI bar in other places in the code (SW-91406)
2574 if (hdev->pdev) \
2583 __rc = -EINVAL; \
2585 __elem_bitmask = BIT_ULL(arr_size) - 1; \
2609 __rc ? __rc : ((__elem_bitmask == 0) ? 0 : -ETIMEDOUT); \
2632 * might need to be byte-swapped before returning value to caller.
2638 if (hdev->pdev) \
2662 (cond) ? 0 : -ETIMEDOUT; \
2669 p->address = base; \
2670 p->size = sz; \
2685 * struct hl_device_reset_work - reset workqueue task wrapper.
2699 * struct hl_mmu_hr_pgt_priv - used for holding per-device mmu host-resident
2700 * page-table internal information.
2701 * @mmu_pgt_pool: pool of page tables used by a host-resident MMU for
2703 * @mmu_asid_hop0: per-ASID array of host-resident hop0 tables.
2711 * struct hl_mmu_dr_pgt_priv - used for holding per-device mmu device-resident
2712 * page-table internal information.
2722 * struct hl_mmu_priv - used for holding per-device mmu internal information.
2723 * @dr: information on the device-resident MMU, when exists.
2724 * @hr: information on the host-resident MMU, when exists.
2732 * struct hl_mmu_per_hop_info - A structure describing one TLB HOP and its entry
2746 * struct hl_mmu_hop_info - A structure describing the TLB hops and their
2747 * hop-entries that were created in order to translate a virtual address to a
2750 * address replaces the original virtual-address when mapped
2752 * @unscrambled_paddr: The un-scrambled physical address.
2753 * @hop_info: Array holding the per-hop information used for the translation.
2754 * @used_hops: The number of hops used for the translation.
2766 * struct hl_hr_mmu_funcs - Device related host resident MMU functions.
2782 * struct hl_mmu_funcs - Device related MMU functions.
2792 * @get_tlb_info: returns the list of hops and hop-entries used that were
2813 * struct hl_prefetch_work - prefetch work structure handler
2816 * @va: virtual address to pre-fetch.
2817 * @size: pre-fetch size.
2831 * number of user contexts allowed to call wait_for_multi_cs ioctl in
2837 * struct multi_cs_completion - multi CS wait completion.
2838 * @completion: completion of any of the CS in the list
2840 * @timestamp: timestamp for the multi-CS completion
2841 * @stream_master_qid_map: bitmap of all stream masters on which the multi-CS
2854 * struct multi_cs_data - internal data for multi CS call
2857 * @seq_arr: array of CS sequence numbers
2858 * @timeout_jiffies: timeout in jiffies for waiting for CS to complete
2859 * @timestamp: timestamp of first completed CS
2860 * @wait_status: wait for CS status
2861 * @completion_bitmap: bitmap of completed CSs (1- completed, otherwise 0)
2863 * @gone_cs: indication of gone CS (1- there was gone CS, otherwise 0)
2864 * @update_ts: update timestamp. 1- update the timestamp, otherwise 0.
2880 * struct hl_clk_throttle_timestamp - current/last clock throttling timestamp
2890 * struct hl_clk_throttle - keeps current/last clock throttling timestamps
2906 * struct user_mapped_block - describes a hw block allowed to be mmapped by user
2916 * struct cs_timeout_info - info of last CS timeout occurred.
2917 * @timestamp: CS timeout timestamp.
2918 * @write_enable: if set writing to CS parameters in the structure is enabled. otherwise - disabled,
2919 * so the first (root cause) CS timeout will not be overwritten.
2920 * @seq: CS timeout sequence number.
2929 * struct razwi_info - info about last razwi error occurred.
2932 * otherwise - disabled, so the first (root cause) razwi will not be overwritten.
2955 * struct undefined_opcode_info - info about last undefined opcode error
2959 * filled with values, it means the execution was in Lower-CP.
2962 * @cb_addr_streams_len: num of streams - actual len of cb_addr_streams array.
2964 * in Upper-CP (specific stream) and equal to 4 incase
2965 * of undefined opcode in Lower-CP.
2966 * @engine_id: engine-id that the error occurred on
2968 * MAX_QMAN_STREAMS_INFO it means the error occurred on a Lower-CP.
2985 * struct hl_error_info - holds information collected during an error.
2986 * @cs_timeout: CS timeout error information.
2997 * struct hl_reset_info - holds current device reset information.
2999 * @compute_reset_cnt: number of compute resets since the driver was loaded.
3000 * @hard_reset_cnt: number of hard resets since the driver was loaded.
3004 * @in_compute_reset: Device is currently in reset but not in hard-reset.
3014 * @skip_reset_on_timeout: Skip device reset if CS has timed out, wait for it to
3035 * struct hl_device - habanalabs device structure.
3045 * @work_heartbeat: delayed work for CPU-CP is-alive check.
3062 * @cs_cmplt_wq: work queue of CS completions for executing work in process
3065 * @pf_wq: work queue for MMU pre-fetch operations.
3068 * @cs_mirror_list: CS mirror list for TDR.
3071 * @event_queue: event queue for IRQ from CPU-CP.
3073 * @cpu_accessible_dma_mem: Host <-> CPU-CP shared memory CPU address.
3074 * @cpu_accessible_dma_address: Host <-> CPU-CP shared memory DMA address.
3075 * @cpu_accessible_dma_pool: Host <-> CPU-CP shared memory pool.
3078 * @send_cpu_message_lock: enforces only one message in Host <-> CPU-CP queue.
3106 * @aggregated_cs_counters: aggregated cs counters among all contexts
3107 * @mmu_priv: device-specific MMU data.
3108 * @mmu_func: device-related MMU functions.
3113 * @multi_cs_completion: array of multi-CS completion.
3122 * @timeout_jiffies: device CS timeout value.
3124 * value is saved so in case of hard-reset, the driver will restore
3125 * this value and update the F/W after the re-initialization
3138 * @open_counter: number of successful device open operations.
3144 * drams are binned-out
3146 * tpc engines are binned-out
3152 * indicates which decoder engines are binned-out
3154 * indicates which edma engines are binned-out
3158 * @cpu_pci_msb_addr: 50-bit extension bits for the device CPU's 40-bit
3164 * @reset_on_lockup: true if a reset should be done in case of stuck CS, false
3168 * such as context-switch, context close, page free, etc.
3183 * @process_kill_trial_cnt: number of trials reset thread tried killing
3192 * @supports_wait_for_multi_cs: true if wait for multi CS is supported
3204 * @hard_reset_on_fw_events: Whether to do device hard-reset when a fatal event is received from
3351 /* Parameters for bring-up */
3365 * struct hl_cs_encaps_sig_handle - encapsulated signals handle structure
3367 * wait cs are used to wait of the reserved encaps signals.
3371 * @cs_seq: staged cs sequence which contains encapsulated signals
3375 * @count: signals number
3394 * typedef hl_ioctl_t - typedef for ioctl function in the driver
3404 * struct hl_ioctl_desc - describes an IOCTL entry of the driver.
3419 * hl_get_sg_info() - get number of pages and the DMA address from SG list.
3423 * Calculate the number of consecutive pages described by the SG list. Take the
3425 * to the number of needed pages.
3431 return ((((*dma_addr) & (PAGE_SIZE - 1)) + sg_dma_len(sg)) + in hl_get_sg_info()
3432 (PAGE_SIZE - 1)) >> PAGE_SHIFT; in hl_get_sg_info()
3436 * hl_mem_area_inside_range() - Checks whether address+size are inside a range.
3458 * hl_mem_area_crosses_range() - Checks whether address+size crossing a range.
3470 u64 end_address = address + size - 1; in hl_mem_area_crosses_range()
3507 int hl_hw_queue_schedule_cs(struct hl_cs *cs);
3510 void hl_hw_queue_update_ci(struct hl_cs *cs);
3514 #define hl_pi_2_offset(pi) ((pi) & (HL_QUEUE_LENGTH - 1))
3590 void cs_get(struct hl_cs *cs);
3591 bool cs_needs_completion(struct hl_cs *cs);
3592 bool cs_needs_timeout(struct hl_cs *cs);
3593 bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs);
3767 struct hl_cs *cs, struct hl_cs_job *job,
3812 void hl_debugfs_add_cs(struct hl_cs *cs);
3813 void hl_debugfs_remove_cs(struct hl_cs *cs);
3858 static inline void hl_debugfs_add_cs(struct hl_cs *cs) in hl_debugfs_add_cs() argument
3862 static inline void hl_debugfs_remove_cs(struct hl_cs *cs) in hl_debugfs_remove_cs() argument