Lines Matching +full:cs +full:- +full:0
1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright 2016-2019 HabanaLabs, Ltd.
18 * enum hl_cs_wait_status - cs wait status
19 * @CS_WAIT_STATUS_BUSY: cs was not completed yet
20 * @CS_WAIT_STATUS_COMPLETED: cs completed
21 * @CS_WAIT_STATUS_GONE: cs completed but fence is already gone
39 struct hl_device *hdev = hw_sob->hdev; in hl_sob_reset()
41 dev_dbg(hdev->dev, "reset sob id %u\n", hw_sob->sob_id); in hl_sob_reset()
43 hdev->asic_funcs->reset_sob(hdev, hw_sob); in hl_sob_reset()
45 hw_sob->need_reset = false; in hl_sob_reset()
52 struct hl_device *hdev = hw_sob->hdev; in hl_sob_reset_error()
54 dev_crit(hdev->dev, in hl_sob_reset_error()
56 hw_sob->q_idx, hw_sob->sob_id); in hl_sob_reset_error()
62 kref_put(&hw_sob->kref, hl_sob_reset); in hw_sob_put()
68 kref_put(&hw_sob->kref, hl_sob_reset_error); in hw_sob_put_err()
74 kref_get(&hw_sob->kref); in hw_sob_get()
78 * hl_gen_sob_mask() - Generates a sob mask to be used in a monitor arm packet
83 * Return: 0 if given parameters are valid
89 if (sob_mask == 0) in hl_gen_sob_mask()
90 return -EINVAL; in hl_gen_sob_mask()
92 if (sob_mask == 0x1) { in hl_gen_sob_mask()
93 *mask = ~(1 << (sob_base & 0x7)); in hl_gen_sob_mask()
96 for (i = BITS_PER_BYTE - 1 ; i >= 0 ; i--) in hl_gen_sob_mask()
100 if (i > (HL_MAX_SOBS_PER_MONITOR - (sob_base & 0x7) - 1)) in hl_gen_sob_mask()
101 return -EINVAL; in hl_gen_sob_mask()
106 return 0; in hl_gen_sob_mask()
123 kref_put(&fence->refcount, hl_fence_release); in hl_fence_put()
130 for (i = 0; i < len; i++, fence++) in hl_fences_put()
137 kref_get(&fence->refcount); in hl_fence_get()
142 kref_init(&fence->refcount); in hl_fence_init()
143 fence->cs_sequence = sequence; in hl_fence_init()
144 fence->error = 0; in hl_fence_init()
145 fence->timestamp = ktime_set(0, 0); in hl_fence_init()
146 init_completion(&fence->completion); in hl_fence_init()
149 void cs_get(struct hl_cs *cs) in cs_get() argument
151 kref_get(&cs->refcount); in cs_get()
154 static int cs_get_unless_zero(struct hl_cs *cs) in cs_get_unless_zero() argument
156 return kref_get_unless_zero(&cs->refcount); in cs_get_unless_zero()
159 static void cs_put(struct hl_cs *cs) in cs_put() argument
161 kref_put(&cs->refcount, cs_do_release); in cs_put()
173 kref_put(&job->refcount, cs_job_do_release); in cs_job_put()
176 bool cs_needs_completion(struct hl_cs *cs) in cs_needs_completion() argument
178 /* In case this is a staged CS, only the last CS in sequence should in cs_needs_completion()
179 * get a completion, any non staged CS will always get a completion in cs_needs_completion()
181 if (cs->staged_cs && !cs->staged_last) in cs_needs_completion()
187 bool cs_needs_timeout(struct hl_cs *cs) in cs_needs_timeout() argument
189 /* In case this is a staged CS, only the first CS in sequence should in cs_needs_timeout()
190 * get a timeout, any non staged CS will always get a timeout in cs_needs_timeout()
192 if (cs->staged_cs && !cs->staged_first) in cs_needs_timeout()
204 return (job->queue_type == QUEUE_TYPE_EXT || in is_cb_patched()
205 (job->queue_type == QUEUE_TYPE_HW && in is_cb_patched()
206 job->is_kernel_allocated_cb && in is_cb_patched()
207 !hdev->mmu_enable)); in is_cb_patched()
211 * cs_parser - parse the user command submission
223 struct hl_device *hdev = hpriv->hdev; in cs_parser()
227 parser.ctx_id = job->cs->ctx->asid; in cs_parser()
228 parser.cs_sequence = job->cs->sequence; in cs_parser()
229 parser.job_id = job->id; in cs_parser()
231 parser.hw_queue_id = job->hw_queue_id; in cs_parser()
232 parser.job_userptr_list = &job->userptr_list; in cs_parser()
234 parser.user_cb = job->user_cb; in cs_parser()
235 parser.user_cb_size = job->user_cb_size; in cs_parser()
236 parser.queue_type = job->queue_type; in cs_parser()
237 parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb; in cs_parser()
238 job->patched_cb = NULL; in cs_parser()
239 parser.completion = cs_needs_completion(job->cs); in cs_parser()
241 rc = hdev->asic_funcs->cs_parser(hdev, &parser); in cs_parser()
245 job->patched_cb = parser.patched_cb; in cs_parser()
246 job->job_cb_size = parser.patched_cb_size; in cs_parser()
247 job->contains_dma_pkt = parser.contains_dma_pkt; in cs_parser()
248 atomic_inc(&job->patched_cb->cs_cnt); in cs_parser()
254 * won't be accessed again for this CS in cs_parser()
256 atomic_dec(&job->user_cb->cs_cnt); in cs_parser()
257 hl_cb_put(job->user_cb); in cs_parser()
258 job->user_cb = NULL; in cs_parser()
260 job->job_cb_size = job->user_cb_size; in cs_parser()
268 struct hl_cs *cs = job->cs; in complete_job() local
271 hl_userptr_delete_list(hdev, &job->userptr_list); in complete_job()
277 if (job->patched_cb) { in complete_job()
278 atomic_dec(&job->patched_cb->cs_cnt); in complete_job()
279 hl_cb_put(job->patched_cb); in complete_job()
288 if (job->is_kernel_allocated_cb && in complete_job()
289 ((job->queue_type == QUEUE_TYPE_HW && hdev->mmu_enable) || in complete_job()
290 job->queue_type == QUEUE_TYPE_INT)) { in complete_job()
291 atomic_dec(&job->user_cb->cs_cnt); in complete_job()
292 hl_cb_put(job->user_cb); in complete_job()
299 spin_lock(&cs->job_lock); in complete_job()
300 list_del(&job->cs_node); in complete_job()
301 spin_unlock(&cs->job_lock); in complete_job()
305 /* We decrement reference only for a CS that gets completion in complete_job()
306 * because the reference was incremented only for this kind of CS in complete_job()
309 * In staged submission, only the last CS marked as 'staged_last' in complete_job()
311 * As for all the rest CS's in the staged submission which do not get in complete_job()
312 * completion, their CS reference will be decremented by the in complete_job()
313 * 'staged_last' CS during the CS release flow. in complete_job()
314 * All relevant PQ CI counters will be incremented during the CS release in complete_job()
317 if (cs_needs_completion(cs) && in complete_job()
318 (job->queue_type == QUEUE_TYPE_EXT || in complete_job()
319 job->queue_type == QUEUE_TYPE_HW)) in complete_job()
320 cs_put(cs); in complete_job()
326 * hl_staged_cs_find_first - locate the first CS in this staged submission
331 * @note: This function must be called under 'hdev->cs_mirror_lock'
333 * Find and return a CS pointer with the given sequence
337 struct hl_cs *cs; in hl_staged_cs_find_first() local
339 list_for_each_entry_reverse(cs, &hdev->cs_mirror_list, mirror_node) in hl_staged_cs_find_first()
340 if (cs->staged_cs && cs->staged_first && in hl_staged_cs_find_first()
341 cs->sequence == cs_seq) in hl_staged_cs_find_first()
342 return cs; in hl_staged_cs_find_first()
348 * is_staged_cs_last_exists - returns true if the last CS in sequence exists
351 * @cs: staged submission member
354 bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs) in is_staged_cs_last_exists() argument
358 last_entry = list_last_entry(&cs->staged_cs_node, struct hl_cs, in is_staged_cs_last_exists()
361 if (last_entry->staged_last) in is_staged_cs_last_exists()
368 * staged_cs_get - get CS reference if this CS is a part of a staged CS
371 * @cs: current CS
374 * Increment CS reference for every CS in this staged submission except for
375 * the CS which get completion.
377 static void staged_cs_get(struct hl_device *hdev, struct hl_cs *cs) in staged_cs_get() argument
379 /* Only the last CS in this staged submission will get a completion. in staged_cs_get()
380 * We must increment the reference for all other CS's in this in staged_cs_get()
384 if (!cs->staged_last) in staged_cs_get()
385 cs_get(cs); in staged_cs_get()
389 * staged_cs_put - put a CS in case it is part of staged submission
392 * @cs: CS to put
394 * This function decrements a CS reference (for a non completion CS)
396 static void staged_cs_put(struct hl_device *hdev, struct hl_cs *cs) in staged_cs_put() argument
398 /* We release all CS's in a staged submission except the last in staged_cs_put()
399 * CS which we have never incremented its reference. in staged_cs_put()
401 if (!cs_needs_completion(cs)) in staged_cs_put()
402 cs_put(cs); in staged_cs_put()
405 static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs) in cs_handle_tdr() argument
410 if (!cs_needs_timeout(cs)) in cs_handle_tdr()
413 spin_lock(&hdev->cs_mirror_lock); in cs_handle_tdr()
416 * Hence, we choose the CS that reaches this function first which is in cs_handle_tdr()
417 * the CS marked as 'staged_last'. in cs_handle_tdr()
418 * In case single staged cs was submitted which has both first and last in cs_handle_tdr()
420 * removed the cs node from the list before getting here, in cs_handle_tdr()
421 * in such cases just continue with the cs to cancel it's TDR work. in cs_handle_tdr()
423 if (cs->staged_cs && cs->staged_last) { in cs_handle_tdr()
424 first_cs = hl_staged_cs_find_first(hdev, cs->staged_sequence); in cs_handle_tdr()
426 cs = first_cs; in cs_handle_tdr()
429 spin_unlock(&hdev->cs_mirror_lock); in cs_handle_tdr()
431 /* Don't cancel TDR in case this CS was timedout because we might be in cs_handle_tdr()
434 if (cs && (cs->timedout || in cs_handle_tdr()
435 hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT)) in cs_handle_tdr()
438 if (cs && cs->tdr_active) in cs_handle_tdr()
439 cancel_delayed_work_sync(&cs->work_tdr); in cs_handle_tdr()
441 spin_lock(&hdev->cs_mirror_lock); in cs_handle_tdr()
443 /* queue TDR for next CS */ in cs_handle_tdr()
444 list_for_each_entry(next, &hdev->cs_mirror_list, mirror_node) in cs_handle_tdr()
450 if (next_entry_found && !next->tdr_active) { in cs_handle_tdr()
451 next->tdr_active = true; in cs_handle_tdr()
452 schedule_delayed_work(&next->work_tdr, next->timeout_jiffies); in cs_handle_tdr()
455 spin_unlock(&hdev->cs_mirror_lock); in cs_handle_tdr()
459 * force_complete_multi_cs - complete all contexts that wait on multi-CS
467 for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) { in force_complete_multi_cs()
470 mcs_compl = &hdev->multi_cs_completion[i]; in force_complete_multi_cs()
472 spin_lock(&mcs_compl->lock); in force_complete_multi_cs()
474 if (!mcs_compl->used) { in force_complete_multi_cs()
475 spin_unlock(&mcs_compl->lock); in force_complete_multi_cs()
480 * multi-cS. in force_complete_multi_cs()
484 dev_err(hdev->dev, in force_complete_multi_cs()
485 "multi-CS completion context %d still waiting when calling force completion\n", in force_complete_multi_cs()
487 complete_all(&mcs_compl->completion); in force_complete_multi_cs()
488 spin_unlock(&mcs_compl->lock); in force_complete_multi_cs()
493 * complete_multi_cs - complete all waiting entities on multi-CS
496 * @cs: CS structure
498 * with the completed CS.
500 * - a completed CS worked on stream master QID 4, multi CS completion
503 * - a completed CS worked on stream master QID 4, multi CS completion
507 static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs) in complete_multi_cs() argument
509 struct hl_fence *fence = cs->fence; in complete_multi_cs()
512 /* in case of multi CS check for completion only for the first CS */ in complete_multi_cs()
513 if (cs->staged_cs && !cs->staged_first) in complete_multi_cs()
516 for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) { in complete_multi_cs()
519 mcs_compl = &hdev->multi_cs_completion[i]; in complete_multi_cs()
520 if (!mcs_compl->used) in complete_multi_cs()
523 spin_lock(&mcs_compl->lock); in complete_multi_cs()
528 * 2. the completed CS has at least one overlapping stream in complete_multi_cs()
531 if (mcs_compl->used && in complete_multi_cs()
532 (fence->stream_master_qid_map & in complete_multi_cs()
533 mcs_compl->stream_master_qid_map)) { in complete_multi_cs()
534 /* extract the timestamp only of first completed CS */ in complete_multi_cs()
535 if (!mcs_compl->timestamp) in complete_multi_cs()
536 mcs_compl->timestamp = in complete_multi_cs()
537 ktime_to_ns(fence->timestamp); in complete_multi_cs()
538 complete_all(&mcs_compl->completion); in complete_multi_cs()
541 spin_unlock(&mcs_compl->lock); in complete_multi_cs()
546 struct hl_cs *cs, in cs_release_sob_reset_handler() argument
549 /* Skip this handler if the cs wasn't submitted, to avoid putting in cs_release_sob_reset_handler()
553 if (!hl_cs_cmpl->hw_sob || !cs->submitted) in cs_release_sob_reset_handler()
556 spin_lock(&hl_cs_cmpl->lock); in cs_release_sob_reset_handler()
559 * we get refcount upon reservation of signals or signal/wait cs for the in cs_release_sob_reset_handler()
560 * hw_sob object, and need to put it when the first staged cs in cs_release_sob_reset_handler()
561 * (which cotains the encaps signals) or cs signal/wait is completed. in cs_release_sob_reset_handler()
563 if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) || in cs_release_sob_reset_handler()
564 (hl_cs_cmpl->type == CS_TYPE_WAIT) || in cs_release_sob_reset_handler()
565 (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) || in cs_release_sob_reset_handler()
566 (!!hl_cs_cmpl->encaps_signals)) { in cs_release_sob_reset_handler()
567 dev_dbg(hdev->dev, in cs_release_sob_reset_handler()
568 "CS 0x%llx type %d finished, sob_id: %d, sob_val: %u\n", in cs_release_sob_reset_handler()
569 hl_cs_cmpl->cs_seq, in cs_release_sob_reset_handler()
570 hl_cs_cmpl->type, in cs_release_sob_reset_handler()
571 hl_cs_cmpl->hw_sob->sob_id, in cs_release_sob_reset_handler()
572 hl_cs_cmpl->sob_val); in cs_release_sob_reset_handler()
574 hw_sob_put(hl_cs_cmpl->hw_sob); in cs_release_sob_reset_handler()
576 if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) in cs_release_sob_reset_handler()
577 hdev->asic_funcs->reset_sob_group(hdev, in cs_release_sob_reset_handler()
578 hl_cs_cmpl->sob_group); in cs_release_sob_reset_handler()
581 spin_unlock(&hl_cs_cmpl->lock); in cs_release_sob_reset_handler()
586 struct hl_cs *cs = container_of(ref, struct hl_cs, refcount); in cs_do_release() local
587 struct hl_device *hdev = cs->ctx->hdev; in cs_do_release()
590 container_of(cs->fence, struct hl_cs_compl, base_fence); in cs_do_release()
592 cs->completed = true; in cs_do_release()
596 * finished, because each one of them took refcnt to CS, we still in cs_do_release()
598 * will have leaked memory and what's worse, the CS object (and in cs_do_release()
602 list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) in cs_do_release()
605 if (!cs->submitted) { in cs_do_release()
607 * In case the wait for signal CS was submitted, the fence put in cs_do_release()
611 if (cs->type == CS_TYPE_WAIT || in cs_do_release()
612 cs->type == CS_TYPE_COLLECTIVE_WAIT) in cs_do_release()
613 hl_fence_put(cs->signal_fence); in cs_do_release()
619 hl_hw_queue_update_ci(cs); in cs_do_release()
621 /* remove CS from CS mirror list */ in cs_do_release()
622 spin_lock(&hdev->cs_mirror_lock); in cs_do_release()
623 list_del_init(&cs->mirror_node); in cs_do_release()
624 spin_unlock(&hdev->cs_mirror_lock); in cs_do_release()
626 cs_handle_tdr(hdev, cs); in cs_do_release()
628 if (cs->staged_cs) { in cs_do_release()
629 /* the completion CS decrements reference for the entire in cs_do_release()
632 if (cs->staged_last) { in cs_do_release()
636 &cs->staged_cs_node, staged_cs_node) in cs_do_release()
640 /* A staged CS will be a member in the list only after it in cs_do_release()
644 if (cs->submitted) { in cs_do_release()
645 spin_lock(&hdev->cs_mirror_lock); in cs_do_release()
646 list_del(&cs->staged_cs_node); in cs_do_release()
647 spin_unlock(&hdev->cs_mirror_lock); in cs_do_release()
650 /* decrement refcount to handle when first staged cs in cs_do_release()
653 if (hl_cs_cmpl->encaps_signals) in cs_do_release()
654 kref_put(&hl_cs_cmpl->encaps_sig_hdl->refcount, in cs_do_release()
658 if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT) in cs_do_release()
659 && cs->encaps_signals) in cs_do_release()
660 kref_put(&cs->encaps_sig_hdl->refcount, in cs_do_release()
667 hl_debugfs_remove_cs(cs); in cs_do_release()
669 hl_ctx_put(cs->ctx); in cs_do_release()
675 if (cs->timedout) in cs_do_release()
676 cs->fence->error = -ETIMEDOUT; in cs_do_release()
677 else if (cs->aborted) in cs_do_release()
678 cs->fence->error = -EIO; in cs_do_release()
679 else if (!cs->submitted) in cs_do_release()
680 cs->fence->error = -EBUSY; in cs_do_release()
682 if (unlikely(cs->skip_reset_on_timeout)) { in cs_do_release()
683 dev_err(hdev->dev, in cs_do_release()
685 cs->sequence, in cs_do_release()
686 div_u64(jiffies - cs->submission_time_jiffies, HZ)); in cs_do_release()
689 if (cs->timestamp) in cs_do_release()
690 cs->fence->timestamp = ktime_get(); in cs_do_release()
691 complete_all(&cs->fence->completion); in cs_do_release()
692 complete_multi_cs(hdev, cs); in cs_do_release()
694 cs_release_sob_reset_handler(hdev, cs, hl_cs_cmpl); in cs_do_release()
696 hl_fence_put(cs->fence); in cs_do_release()
698 kfree(cs->jobs_in_queue_cnt); in cs_do_release()
699 kfree(cs); in cs_do_release()
706 struct hl_cs *cs = container_of(work, struct hl_cs, in cs_timedout() local
708 bool skip_reset_on_timeout = cs->skip_reset_on_timeout; in cs_timedout()
710 rc = cs_get_unless_zero(cs); in cs_timedout()
714 if ((!cs->submitted) || (cs->completed)) { in cs_timedout()
715 cs_put(cs); in cs_timedout()
719 /* Mark the CS is timed out so we won't try to cancel its TDR */ in cs_timedout()
721 cs->timedout = true; in cs_timedout()
723 hdev = cs->ctx->hdev; in cs_timedout()
725 switch (cs->type) { in cs_timedout()
727 dev_err(hdev->dev, in cs_timedout()
729 cs->sequence); in cs_timedout()
733 dev_err(hdev->dev, in cs_timedout()
735 cs->sequence); in cs_timedout()
739 dev_err(hdev->dev, in cs_timedout()
741 cs->sequence); in cs_timedout()
745 dev_err(hdev->dev, in cs_timedout()
747 cs->sequence); in cs_timedout()
753 dev_err(hdev->dev, "Error during system state dump %d\n", rc); in cs_timedout()
755 cs_put(cs); in cs_timedout()
758 if (hdev->reset_on_lockup) in cs_timedout()
761 hdev->needs_reset = true; in cs_timedout()
772 struct hl_cs *cs; in allocate_cs() local
775 cntr = &hdev->aggregated_cs_counters; in allocate_cs()
777 cs = kzalloc(sizeof(*cs), GFP_ATOMIC); in allocate_cs()
778 if (!cs) in allocate_cs()
779 cs = kzalloc(sizeof(*cs), GFP_KERNEL); in allocate_cs()
781 if (!cs) { in allocate_cs()
782 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); in allocate_cs()
783 atomic64_inc(&cntr->out_of_mem_drop_cnt); in allocate_cs()
784 return -ENOMEM; in allocate_cs()
790 cs->ctx = ctx; in allocate_cs()
791 cs->submitted = false; in allocate_cs()
792 cs->completed = false; in allocate_cs()
793 cs->type = cs_type; in allocate_cs()
794 cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP); in allocate_cs()
795 cs->encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS); in allocate_cs()
796 cs->timeout_jiffies = timeout; in allocate_cs()
797 cs->skip_reset_on_timeout = in allocate_cs()
798 hdev->skip_reset_on_timeout || in allocate_cs()
800 cs->submission_time_jiffies = jiffies; in allocate_cs()
801 INIT_LIST_HEAD(&cs->job_list); in allocate_cs()
802 INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout); in allocate_cs()
803 kref_init(&cs->refcount); in allocate_cs()
804 spin_lock_init(&cs->job_lock); in allocate_cs()
811 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); in allocate_cs()
812 atomic64_inc(&cntr->out_of_mem_drop_cnt); in allocate_cs()
813 rc = -ENOMEM; in allocate_cs()
817 cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues, in allocate_cs()
818 sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC); in allocate_cs()
819 if (!cs->jobs_in_queue_cnt) in allocate_cs()
820 cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues, in allocate_cs()
821 sizeof(*cs->jobs_in_queue_cnt), GFP_KERNEL); in allocate_cs()
823 if (!cs->jobs_in_queue_cnt) { in allocate_cs()
824 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); in allocate_cs()
825 atomic64_inc(&cntr->out_of_mem_drop_cnt); in allocate_cs()
826 rc = -ENOMEM; in allocate_cs()
830 cs_cmpl->hdev = hdev; in allocate_cs()
831 cs_cmpl->type = cs->type; in allocate_cs()
832 spin_lock_init(&cs_cmpl->lock); in allocate_cs()
833 cs->fence = &cs_cmpl->base_fence; in allocate_cs()
835 spin_lock(&ctx->cs_lock); in allocate_cs()
837 cs_cmpl->cs_seq = ctx->cs_sequence; in allocate_cs()
838 other = ctx->cs_pending[cs_cmpl->cs_seq & in allocate_cs()
839 (hdev->asic_prop.max_pending_cs - 1)]; in allocate_cs()
841 if (other && !completion_done(&other->completion)) { in allocate_cs()
846 * This causes a deadlock because this CS will never be in allocate_cs()
847 * completed as it depends on future CS's for completion. in allocate_cs()
849 if (other->cs_sequence == user_sequence) in allocate_cs()
850 dev_crit_ratelimited(hdev->dev, in allocate_cs()
851 "Staged CS %llu deadlock due to lack of resources", in allocate_cs()
854 dev_dbg_ratelimited(hdev->dev, in allocate_cs()
855 "Rejecting CS because of too many in-flights CS\n"); in allocate_cs()
856 atomic64_inc(&ctx->cs_counters.max_cs_in_flight_drop_cnt); in allocate_cs()
857 atomic64_inc(&cntr->max_cs_in_flight_drop_cnt); in allocate_cs()
858 rc = -EAGAIN; in allocate_cs()
863 hl_fence_init(&cs_cmpl->base_fence, cs_cmpl->cs_seq); in allocate_cs()
865 cs->sequence = cs_cmpl->cs_seq; in allocate_cs()
867 ctx->cs_pending[cs_cmpl->cs_seq & in allocate_cs()
868 (hdev->asic_prop.max_pending_cs - 1)] = in allocate_cs()
869 &cs_cmpl->base_fence; in allocate_cs()
870 ctx->cs_sequence++; in allocate_cs()
872 hl_fence_get(&cs_cmpl->base_fence); in allocate_cs()
876 spin_unlock(&ctx->cs_lock); in allocate_cs()
878 *cs_new = cs; in allocate_cs()
880 return 0; in allocate_cs()
883 spin_unlock(&ctx->cs_lock); in allocate_cs()
884 kfree(cs->jobs_in_queue_cnt); in allocate_cs()
888 kfree(cs); in allocate_cs()
893 static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs) in cs_rollback() argument
897 staged_cs_put(hdev, cs); in cs_rollback()
899 list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) in cs_rollback()
906 struct hl_cs *cs, *tmp; in hl_cs_rollback_all() local
908 flush_workqueue(hdev->sob_reset_wq); in hl_cs_rollback_all()
910 /* flush all completions before iterating over the CS mirror list in in hl_cs_rollback_all()
913 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) in hl_cs_rollback_all()
914 flush_workqueue(hdev->cq_wq[i]); in hl_cs_rollback_all()
916 /* Make sure we don't have leftovers in the CS mirror list */ in hl_cs_rollback_all()
917 list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) { in hl_cs_rollback_all()
918 cs_get(cs); in hl_cs_rollback_all()
919 cs->aborted = true; in hl_cs_rollback_all()
920 dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n", in hl_cs_rollback_all()
921 cs->ctx->asid, cs->sequence); in hl_cs_rollback_all()
922 cs_rollback(hdev, cs); in hl_cs_rollback_all()
923 cs_put(cs); in hl_cs_rollback_all()
935 spin_lock_irqsave(&interrupt->wait_list_lock, flags); in wake_pending_user_interrupt_threads()
936 list_for_each_entry(pend, &interrupt->wait_list_head, wait_list_node) { in wake_pending_user_interrupt_threads()
937 pend->fence.error = -EIO; in wake_pending_user_interrupt_threads()
938 complete_all(&pend->fence.completion); in wake_pending_user_interrupt_threads()
940 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); in wake_pending_user_interrupt_threads()
945 struct asic_fixed_properties *prop = &hdev->asic_prop; in hl_release_pending_user_interrupts()
949 if (!prop->user_interrupt_count) in hl_release_pending_user_interrupts()
959 for (i = 0 ; i < prop->user_interrupt_count ; i++) { in hl_release_pending_user_interrupts()
960 interrupt = &hdev->user_interrupt[i]; in hl_release_pending_user_interrupts()
964 interrupt = &hdev->common_user_interrupt; in hl_release_pending_user_interrupts()
972 struct hl_cs *cs = job->cs; in job_wq_completion() local
973 struct hl_device *hdev = cs->ctx->hdev; in job_wq_completion()
984 struct asic_fixed_properties *asic = &hdev->asic_prop; in validate_queue_index()
987 /* This must be checked here to prevent out-of-bounds access to in validate_queue_index()
990 if (chunk->queue_index >= asic->max_queues) { in validate_queue_index()
991 dev_err(hdev->dev, "Queue index %d is invalid\n", in validate_queue_index()
992 chunk->queue_index); in validate_queue_index()
993 return -EINVAL; in validate_queue_index()
996 hw_queue_prop = &asic->hw_queues_props[chunk->queue_index]; in validate_queue_index()
998 if (hw_queue_prop->type == QUEUE_TYPE_NA) { in validate_queue_index()
999 dev_err(hdev->dev, "Queue index %d is invalid\n", in validate_queue_index()
1000 chunk->queue_index); in validate_queue_index()
1001 return -EINVAL; in validate_queue_index()
1004 if (hw_queue_prop->driver_only) { in validate_queue_index()
1005 dev_err(hdev->dev, in validate_queue_index()
1007 chunk->queue_index); in validate_queue_index()
1008 return -EINVAL; in validate_queue_index()
1014 if (hw_queue_prop->type == QUEUE_TYPE_HW) { in validate_queue_index()
1015 if (chunk->cs_chunk_flags & HL_CS_CHUNK_FLAGS_USER_ALLOC_CB) { in validate_queue_index()
1016 if (!(hw_queue_prop->cb_alloc_flags & CB_ALLOC_USER)) { in validate_queue_index()
1017 dev_err(hdev->dev, in validate_queue_index()
1019 chunk->queue_index); in validate_queue_index()
1020 return -EINVAL; in validate_queue_index()
1025 if (!(hw_queue_prop->cb_alloc_flags & in validate_queue_index()
1027 dev_err(hdev->dev, in validate_queue_index()
1029 chunk->queue_index); in validate_queue_index()
1030 return -EINVAL; in validate_queue_index()
1036 *is_kernel_allocated_cb = !!(hw_queue_prop->cb_alloc_flags in validate_queue_index()
1040 *queue_type = hw_queue_prop->type; in validate_queue_index()
1041 return 0; in validate_queue_index()
1051 cb_handle = (u32) (chunk->cb_handle >> PAGE_SHIFT); in get_cb_from_cs_chunk()
1055 dev_err(hdev->dev, "CB handle 0x%x invalid\n", cb_handle); in get_cb_from_cs_chunk()
1059 if ((chunk->cb_size < 8) || (chunk->cb_size > cb->size)) { in get_cb_from_cs_chunk()
1060 dev_err(hdev->dev, "CB size %u invalid\n", chunk->cb_size); in get_cb_from_cs_chunk()
1064 atomic_inc(&cb->cs_cnt); in get_cb_from_cs_chunk()
1085 kref_init(&job->refcount); in hl_cs_allocate_job()
1086 job->queue_type = queue_type; in hl_cs_allocate_job()
1087 job->is_kernel_allocated_cb = is_kernel_allocated_cb; in hl_cs_allocate_job()
1090 INIT_LIST_HEAD(&job->userptr_list); in hl_cs_allocate_job()
1092 if (job->queue_type == QUEUE_TYPE_EXT) in hl_cs_allocate_job()
1093 INIT_WORK(&job->finish_work, job_wq_completion); in hl_cs_allocate_job()
1116 struct hl_device *hdev = hpriv->hdev; in hl_cs_sanity_checks()
1117 struct hl_ctx *ctx = hpriv->ctx; in hl_cs_sanity_checks()
1123 dev_warn_ratelimited(hdev->dev, in hl_cs_sanity_checks()
1124 "Device is %s. Can't submit new CS\n", in hl_cs_sanity_checks()
1125 hdev->status[status]); in hl_cs_sanity_checks()
1126 return -EBUSY; in hl_cs_sanity_checks()
1129 if ((args->in.cs_flags & HL_CS_FLAGS_STAGED_SUBMISSION) && in hl_cs_sanity_checks()
1130 !hdev->supports_staged_submission) { in hl_cs_sanity_checks()
1131 dev_err(hdev->dev, "staged submission not supported"); in hl_cs_sanity_checks()
1132 return -EPERM; in hl_cs_sanity_checks()
1135 cs_type_flags = args->in.cs_flags & HL_CS_FLAGS_TYPE_MASK; in hl_cs_sanity_checks()
1138 dev_err(hdev->dev, in hl_cs_sanity_checks()
1139 "CS type flags are mutually exclusive, context %d\n", in hl_cs_sanity_checks()
1140 ctx->asid); in hl_cs_sanity_checks()
1141 return -EINVAL; in hl_cs_sanity_checks()
1145 num_chunks = args->in.num_chunks_execute; in hl_cs_sanity_checks()
1148 !hdev->supports_sync_stream)) { in hl_cs_sanity_checks()
1149 dev_err(hdev->dev, "Sync stream CS is not supported\n"); in hl_cs_sanity_checks()
1150 return -EINVAL; in hl_cs_sanity_checks()
1155 dev_err(hdev->dev, in hl_cs_sanity_checks()
1156 "Got execute CS with 0 chunks, context %d\n", in hl_cs_sanity_checks()
1157 ctx->asid); in hl_cs_sanity_checks()
1158 return -EINVAL; in hl_cs_sanity_checks()
1161 dev_err(hdev->dev, in hl_cs_sanity_checks()
1162 "Sync stream CS mandates one chunk only, context %d\n", in hl_cs_sanity_checks()
1163 ctx->asid); in hl_cs_sanity_checks()
1164 return -EINVAL; in hl_cs_sanity_checks()
1167 return 0; in hl_cs_sanity_checks()
1178 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in hl_cs_copy_chunk_array()
1179 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); in hl_cs_copy_chunk_array()
1180 dev_err(hdev->dev, in hl_cs_copy_chunk_array()
1183 return -EINVAL; in hl_cs_copy_chunk_array()
1192 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); in hl_cs_copy_chunk_array()
1193 atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt); in hl_cs_copy_chunk_array()
1194 return -ENOMEM; in hl_cs_copy_chunk_array()
1199 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in hl_cs_copy_chunk_array()
1200 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); in hl_cs_copy_chunk_array()
1201 dev_err(hdev->dev, "Failed to copy cs chunk array from user\n"); in hl_cs_copy_chunk_array()
1203 return -EFAULT; in hl_cs_copy_chunk_array()
1206 return 0; in hl_cs_copy_chunk_array()
1209 static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs, in cs_staged_submission() argument
1214 return 0; in cs_staged_submission()
1216 cs->staged_last = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_LAST); in cs_staged_submission()
1217 cs->staged_first = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST); in cs_staged_submission()
1219 if (cs->staged_first) { in cs_staged_submission()
1220 /* Staged CS sequence is the first CS sequence */ in cs_staged_submission()
1221 INIT_LIST_HEAD(&cs->staged_cs_node); in cs_staged_submission()
1222 cs->staged_sequence = cs->sequence; in cs_staged_submission()
1224 if (cs->encaps_signals) in cs_staged_submission()
1225 cs->encaps_sig_hdl_id = encaps_signal_handle; in cs_staged_submission()
1230 cs->staged_sequence = sequence; in cs_staged_submission()
1233 /* Increment CS reference if needed */ in cs_staged_submission()
1234 staged_cs_get(hdev, cs); in cs_staged_submission()
1236 cs->staged_cs = true; in cs_staged_submission()
1238 return 0; in cs_staged_submission()
1245 for (i = 0; i < hdev->stream_master_qid_arr_size; i++) in get_stream_master_qid_mask()
1246 if (qid == hdev->stream_master_qid_arr[i]) in get_stream_master_qid_mask()
1249 return 0; in get_stream_master_qid_mask()
1257 struct hl_device *hdev = hpriv->hdev; in cs_ioctl_default()
1260 struct hl_ctx *ctx = hpriv->ctx; in cs_ioctl_default()
1262 struct hl_cs *cs; in cs_ioctl_default() local
1265 u8 stream_master_qid_map = 0; in cs_ioctl_default()
1268 cntr = &hdev->aggregated_cs_counters; in cs_ioctl_default()
1273 hpriv->ctx); in cs_ioctl_default()
1283 rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT, in cs_ioctl_default()
1284 staged_mid ? user_sequence : ULLONG_MAX, &cs, flags, in cs_ioctl_default()
1289 *cs_seq = cs->sequence; in cs_ioctl_default()
1291 hl_debugfs_add_cs(cs); in cs_ioctl_default()
1293 rc = cs_staged_submission(hdev, cs, user_sequence, flags, in cs_ioctl_default()
1299 * rather than the internal CS sequence in cs_ioctl_default()
1301 if (cs->staged_cs) in cs_ioctl_default()
1302 *cs_seq = cs->staged_sequence; in cs_ioctl_default()
1304 /* Validate ALL the CS chunks before submitting the CS */ in cs_ioctl_default()
1305 for (i = 0 ; i < num_chunks ; i++) { in cs_ioctl_default()
1313 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_default()
1314 atomic64_inc(&cntr->validation_drop_cnt); in cs_ioctl_default()
1319 cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk); in cs_ioctl_default()
1322 &ctx->cs_counters.validation_drop_cnt); in cs_ioctl_default()
1323 atomic64_inc(&cntr->validation_drop_cnt); in cs_ioctl_default()
1324 rc = -EINVAL; in cs_ioctl_default()
1328 cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle; in cs_ioctl_default()
1337 * queues of this CS in cs_ioctl_default()
1339 if (hdev->supports_wait_for_multi_cs) in cs_ioctl_default()
1342 chunk->queue_index); in cs_ioctl_default()
1348 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); in cs_ioctl_default()
1349 atomic64_inc(&cntr->out_of_mem_drop_cnt); in cs_ioctl_default()
1350 dev_err(hdev->dev, "Failed to allocate a new job\n"); in cs_ioctl_default()
1351 rc = -ENOMEM; in cs_ioctl_default()
1358 job->id = i + 1; in cs_ioctl_default()
1359 job->cs = cs; in cs_ioctl_default()
1360 job->user_cb = cb; in cs_ioctl_default()
1361 job->user_cb_size = chunk->cb_size; in cs_ioctl_default()
1362 job->hw_queue_id = chunk->queue_index; in cs_ioctl_default()
1364 cs->jobs_in_queue_cnt[job->hw_queue_id]++; in cs_ioctl_default()
1366 list_add_tail(&job->cs_node, &cs->job_list); in cs_ioctl_default()
1369 * Increment CS reference. When CS reference is 0, CS is in cs_ioctl_default()
1374 if (cs_needs_completion(cs) && in cs_ioctl_default()
1375 (job->queue_type == QUEUE_TYPE_EXT || in cs_ioctl_default()
1376 job->queue_type == QUEUE_TYPE_HW)) in cs_ioctl_default()
1377 cs_get(cs); in cs_ioctl_default()
1383 atomic64_inc(&ctx->cs_counters.parsing_drop_cnt); in cs_ioctl_default()
1384 atomic64_inc(&cntr->parsing_drop_cnt); in cs_ioctl_default()
1385 dev_err(hdev->dev, in cs_ioctl_default()
1386 "Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n", in cs_ioctl_default()
1387 cs->ctx->asid, cs->sequence, job->id, rc); in cs_ioctl_default()
1392 /* We allow a CS with any queue type combination as long as it does in cs_ioctl_default()
1395 if (int_queues_only && cs_needs_completion(cs)) { in cs_ioctl_default()
1396 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_default()
1397 atomic64_inc(&cntr->validation_drop_cnt); in cs_ioctl_default()
1398 dev_err(hdev->dev, in cs_ioctl_default()
1399 "Reject CS %d.%llu since it contains only internal queues jobs and needs completion\n", in cs_ioctl_default()
1400 cs->ctx->asid, cs->sequence); in cs_ioctl_default()
1401 rc = -EINVAL; in cs_ioctl_default()
1406 * store the (external/HW queues) streams used by the CS in the in cs_ioctl_default()
1407 * fence object for multi-CS completion in cs_ioctl_default()
1409 if (hdev->supports_wait_for_multi_cs) in cs_ioctl_default()
1410 cs->fence->stream_master_qid_map = stream_master_qid_map; in cs_ioctl_default()
1412 rc = hl_hw_queue_schedule_cs(cs); in cs_ioctl_default()
1414 if (rc != -EAGAIN) in cs_ioctl_default()
1415 dev_err(hdev->dev, in cs_ioctl_default()
1416 "Failed to submit CS %d.%llu to H/W queues, error %d\n", in cs_ioctl_default()
1417 cs->ctx->asid, cs->sequence, rc); in cs_ioctl_default()
1425 atomic_dec(&cb->cs_cnt); in cs_ioctl_default()
1428 cs_rollback(hdev, cs); in cs_ioctl_default()
1432 /* We finished with the CS in this function, so put the ref */ in cs_ioctl_default()
1433 cs_put(cs); in cs_ioctl_default()
1443 struct hl_device *hdev = hpriv->hdev; in hl_cs_ctx_switch()
1444 struct hl_ctx *ctx = hpriv->ctx; in hl_cs_ctx_switch()
1446 int rc = 0, do_ctx_switch; in hl_cs_ctx_switch()
1451 do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0); in hl_cs_ctx_switch()
1453 if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) { in hl_cs_ctx_switch()
1454 mutex_lock(&hpriv->restore_phase_mutex); in hl_cs_ctx_switch()
1457 rc = hdev->asic_funcs->context_switch(hdev, ctx->asid); in hl_cs_ctx_switch()
1459 dev_err_ratelimited(hdev->dev, in hl_cs_ctx_switch()
1460 "Failed to switch to context %d, rejecting CS! %d\n", in hl_cs_ctx_switch()
1461 ctx->asid, rc); in hl_cs_ctx_switch()
1464 * while we want to do context-switch (-EBUSY), in hl_cs_ctx_switch()
1465 * we need to soft-reset because QMAN is in hl_cs_ctx_switch()
1471 if ((rc == -ETIMEDOUT) || (rc == -EBUSY)) in hl_cs_ctx_switch()
1473 mutex_unlock(&hpriv->restore_phase_mutex); in hl_cs_ctx_switch()
1478 hdev->asic_funcs->restore_phase_topology(hdev); in hl_cs_ctx_switch()
1480 chunks = (void __user *) (uintptr_t) args->in.chunks_restore; in hl_cs_ctx_switch()
1481 num_chunks = args->in.num_chunks_restore; in hl_cs_ctx_switch()
1484 dev_dbg(hdev->dev, in hl_cs_ctx_switch()
1485 "Need to run restore phase but restore CS is empty\n"); in hl_cs_ctx_switch()
1486 rc = 0; in hl_cs_ctx_switch()
1489 cs_seq, 0, 0, hdev->timeout_jiffies); in hl_cs_ctx_switch()
1492 mutex_unlock(&hpriv->restore_phase_mutex); in hl_cs_ctx_switch()
1495 dev_err(hdev->dev, in hl_cs_ctx_switch()
1496 "Failed to submit restore CS for context %d (%d)\n", in hl_cs_ctx_switch()
1497 ctx->asid, rc); in hl_cs_ctx_switch()
1506 jiffies_to_usecs(hdev->timeout_jiffies), in hl_cs_ctx_switch()
1509 if (ret == -ERESTARTSYS) { in hl_cs_ctx_switch()
1514 dev_err(hdev->dev, in hl_cs_ctx_switch()
1515 "Restore CS for context %d failed to complete %d\n", in hl_cs_ctx_switch()
1516 ctx->asid, ret); in hl_cs_ctx_switch()
1517 rc = -ENOEXEC; in hl_cs_ctx_switch()
1522 ctx->thread_ctx_switch_wait_token = 1; in hl_cs_ctx_switch()
1524 } else if (!ctx->thread_ctx_switch_wait_token) { in hl_cs_ctx_switch()
1526 &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1), in hl_cs_ctx_switch()
1527 100, jiffies_to_usecs(hdev->timeout_jiffies), false); in hl_cs_ctx_switch()
1529 if (rc == -ETIMEDOUT) { in hl_cs_ctx_switch()
1530 dev_err(hdev->dev, in hl_cs_ctx_switch()
1537 if ((rc == -ETIMEDOUT || rc == -EBUSY) && (need_soft_reset)) in hl_cs_ctx_switch()
1538 hl_device_reset(hdev, 0); in hl_cs_ctx_switch()
1549 * @hw_sob: the H/W SOB used in this signal CS.
1563 prop = &hdev->kernel_queues[q_idx].sync_stream_prop; in hl_cs_signal_sob_wraparound_handler()
1568 if (prop->next_sob_val + count >= HL_MAX_SOB_VAL) { in hl_cs_signal_sob_wraparound_handler()
1581 other_sob_offset = (prop->curr_sob_offset + 1) % HL_RSVD_SOBS; in hl_cs_signal_sob_wraparound_handler()
1582 other_sob = &prop->hw_sob[other_sob_offset]; in hl_cs_signal_sob_wraparound_handler()
1584 if (kref_read(&other_sob->kref) != 1) { in hl_cs_signal_sob_wraparound_handler()
1585 dev_err(hdev->dev, "error: Cannot switch SOBs q_idx: %d\n", in hl_cs_signal_sob_wraparound_handler()
1587 return -EINVAL; in hl_cs_signal_sob_wraparound_handler()
1596 prop->next_sob_val = count + 1; in hl_cs_signal_sob_wraparound_handler()
1598 prop->next_sob_val = count; in hl_cs_signal_sob_wraparound_handler()
1601 prop->curr_sob_offset = other_sob_offset; in hl_cs_signal_sob_wraparound_handler()
1606 * for the reservation or the next signal cs. in hl_cs_signal_sob_wraparound_handler()
1607 * we do it here, and for both encaps and regular signal cs in hl_cs_signal_sob_wraparound_handler()
1611 * in addition, if we have combination of cs signal and in hl_cs_signal_sob_wraparound_handler()
1613 * no more reservations and only signal cs keep coming, in hl_cs_signal_sob_wraparound_handler()
1617 if (other_sob->need_reset) in hl_cs_signal_sob_wraparound_handler()
1622 sob->need_reset = true; in hl_cs_signal_sob_wraparound_handler()
1626 dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n", in hl_cs_signal_sob_wraparound_handler()
1627 prop->curr_sob_offset, q_idx); in hl_cs_signal_sob_wraparound_handler()
1629 prop->next_sob_val += count; in hl_cs_signal_sob_wraparound_handler()
1632 return 0; in hl_cs_signal_sob_wraparound_handler()
1641 int rc = 0; in cs_ioctl_extract_signal_seq()
1644 *signal_seq = chunk->encaps_signal_seq; in cs_ioctl_extract_signal_seq()
1645 return 0; in cs_ioctl_extract_signal_seq()
1648 signal_seq_arr_len = chunk->num_signal_seq_arr; in cs_ioctl_extract_signal_seq()
1652 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_extract_signal_seq()
1653 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); in cs_ioctl_extract_signal_seq()
1654 dev_err(hdev->dev, in cs_ioctl_extract_signal_seq()
1655 "Wait for signal CS supports only one signal CS seq\n"); in cs_ioctl_extract_signal_seq()
1656 return -EINVAL; in cs_ioctl_extract_signal_seq()
1667 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); in cs_ioctl_extract_signal_seq()
1668 atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt); in cs_ioctl_extract_signal_seq()
1669 return -ENOMEM; in cs_ioctl_extract_signal_seq()
1674 u64_to_user_ptr(chunk->signal_seq_arr), in cs_ioctl_extract_signal_seq()
1676 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_extract_signal_seq()
1677 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); in cs_ioctl_extract_signal_seq()
1678 dev_err(hdev->dev, in cs_ioctl_extract_signal_seq()
1680 rc = -EFAULT; in cs_ioctl_extract_signal_seq()
1685 *signal_seq = signal_seq_arr[0]; in cs_ioctl_extract_signal_seq()
1694 struct hl_ctx *ctx, struct hl_cs *cs, in cs_ioctl_signal_wait_create_jobs() argument
1702 cntr = &hdev->aggregated_cs_counters; in cs_ioctl_signal_wait_create_jobs()
1706 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); in cs_ioctl_signal_wait_create_jobs()
1707 atomic64_inc(&cntr->out_of_mem_drop_cnt); in cs_ioctl_signal_wait_create_jobs()
1708 dev_err(hdev->dev, "Failed to allocate a new job\n"); in cs_ioctl_signal_wait_create_jobs()
1709 return -ENOMEM; in cs_ioctl_signal_wait_create_jobs()
1712 if (cs->type == CS_TYPE_WAIT) in cs_ioctl_signal_wait_create_jobs()
1713 cb_size = hdev->asic_funcs->get_wait_cb_size(hdev); in cs_ioctl_signal_wait_create_jobs()
1715 cb_size = hdev->asic_funcs->get_signal_cb_size(hdev); in cs_ioctl_signal_wait_create_jobs()
1718 q_type == QUEUE_TYPE_HW && hdev->mmu_enable); in cs_ioctl_signal_wait_create_jobs()
1720 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); in cs_ioctl_signal_wait_create_jobs()
1721 atomic64_inc(&cntr->out_of_mem_drop_cnt); in cs_ioctl_signal_wait_create_jobs()
1723 return -EFAULT; in cs_ioctl_signal_wait_create_jobs()
1726 job->id = 0; in cs_ioctl_signal_wait_create_jobs()
1727 job->cs = cs; in cs_ioctl_signal_wait_create_jobs()
1728 job->user_cb = cb; in cs_ioctl_signal_wait_create_jobs()
1729 atomic_inc(&job->user_cb->cs_cnt); in cs_ioctl_signal_wait_create_jobs()
1730 job->user_cb_size = cb_size; in cs_ioctl_signal_wait_create_jobs()
1731 job->hw_queue_id = q_idx; in cs_ioctl_signal_wait_create_jobs()
1733 if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT) in cs_ioctl_signal_wait_create_jobs()
1734 && cs->encaps_signals) in cs_ioctl_signal_wait_create_jobs()
1735 job->encaps_sig_wait_offset = encaps_signal_offset; in cs_ioctl_signal_wait_create_jobs()
1738 * We call hl_cb_destroy() out of two reasons - we don't need the CB in in cs_ioctl_signal_wait_create_jobs()
1742 job->patched_cb = job->user_cb; in cs_ioctl_signal_wait_create_jobs()
1743 job->job_cb_size = job->user_cb_size; in cs_ioctl_signal_wait_create_jobs()
1744 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT); in cs_ioctl_signal_wait_create_jobs()
1747 cs_get(cs); in cs_ioctl_signal_wait_create_jobs()
1749 cs->jobs_in_queue_cnt[job->hw_queue_id]++; in cs_ioctl_signal_wait_create_jobs()
1751 list_add_tail(&job->cs_node, &cs->job_list); in cs_ioctl_signal_wait_create_jobs()
1755 return 0; in cs_ioctl_signal_wait_create_jobs()
1765 struct hl_device *hdev = hpriv->hdev; in cs_ioctl_reserve_signals()
1770 int rc = 0; in cs_ioctl_reserve_signals()
1773 dev_err(hdev->dev, "signals count(%u) exceeds the max SOB value\n", in cs_ioctl_reserve_signals()
1775 rc = -EINVAL; in cs_ioctl_reserve_signals()
1779 if (q_idx >= hdev->asic_prop.max_queues) { in cs_ioctl_reserve_signals()
1780 dev_err(hdev->dev, "Queue index %d is invalid\n", in cs_ioctl_reserve_signals()
1782 rc = -EINVAL; in cs_ioctl_reserve_signals()
1786 hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx]; in cs_ioctl_reserve_signals()
1788 if (!hw_queue_prop->supports_sync_stream) { in cs_ioctl_reserve_signals()
1789 dev_err(hdev->dev, in cs_ioctl_reserve_signals()
1792 rc = -EINVAL; in cs_ioctl_reserve_signals()
1796 prop = &hdev->kernel_queues[q_idx].sync_stream_prop; in cs_ioctl_reserve_signals()
1800 rc = -ENOMEM; in cs_ioctl_reserve_signals()
1804 handle->count = count; in cs_ioctl_reserve_signals()
1805 mgr = &hpriv->ctx->sig_mgr; in cs_ioctl_reserve_signals()
1807 spin_lock(&mgr->lock); in cs_ioctl_reserve_signals()
1808 hdl_id = idr_alloc(&mgr->handles, handle, 1, 0, GFP_ATOMIC); in cs_ioctl_reserve_signals()
1809 spin_unlock(&mgr->lock); in cs_ioctl_reserve_signals()
1811 if (hdl_id < 0) { in cs_ioctl_reserve_signals()
1812 dev_err(hdev->dev, "Failed to allocate IDR for a new signal reservation\n"); in cs_ioctl_reserve_signals()
1813 rc = -EINVAL; in cs_ioctl_reserve_signals()
1817 handle->id = hdl_id; in cs_ioctl_reserve_signals()
1818 handle->q_idx = q_idx; in cs_ioctl_reserve_signals()
1819 handle->hdev = hdev; in cs_ioctl_reserve_signals()
1820 kref_init(&handle->refcount); in cs_ioctl_reserve_signals()
1822 hdev->asic_funcs->hw_queues_lock(hdev); in cs_ioctl_reserve_signals()
1824 hw_sob = &prop->hw_sob[prop->curr_sob_offset]; in cs_ioctl_reserve_signals()
1835 dev_err(hdev->dev, "Failed to switch SOB\n"); in cs_ioctl_reserve_signals()
1836 hdev->asic_funcs->hw_queues_unlock(hdev); in cs_ioctl_reserve_signals()
1837 rc = -EINVAL; in cs_ioctl_reserve_signals()
1843 handle->hw_sob = hw_sob; in cs_ioctl_reserve_signals()
1848 handle->pre_sob_val = prop->next_sob_val - handle->count; in cs_ioctl_reserve_signals()
1850 *signals_count = prop->next_sob_val; in cs_ioctl_reserve_signals()
1851 hdev->asic_funcs->hw_queues_unlock(hdev); in cs_ioctl_reserve_signals()
1853 *sob_addr = handle->hw_sob->sob_addr; in cs_ioctl_reserve_signals()
1856 dev_dbg(hdev->dev, in cs_ioctl_reserve_signals()
1857 "Signals reserved, sob_id: %d, sob addr: 0x%x, last sob_val: %u, q_idx: %d, hdl_id: %d\n", in cs_ioctl_reserve_signals()
1858 hw_sob->sob_id, handle->hw_sob->sob_addr, in cs_ioctl_reserve_signals()
1859 prop->next_sob_val - 1, q_idx, hdl_id); in cs_ioctl_reserve_signals()
1863 spin_lock(&mgr->lock); in cs_ioctl_reserve_signals()
1864 idr_remove(&mgr->handles, hdl_id); in cs_ioctl_reserve_signals()
1865 spin_unlock(&mgr->lock); in cs_ioctl_reserve_signals()
1876 struct hl_device *hdev = hpriv->hdev; in cs_ioctl_unreserve_signals()
1880 int rc = 0; in cs_ioctl_unreserve_signals()
1882 mgr = &hpriv->ctx->sig_mgr; in cs_ioctl_unreserve_signals()
1884 spin_lock(&mgr->lock); in cs_ioctl_unreserve_signals()
1885 encaps_sig_hdl = idr_find(&mgr->handles, handle_id); in cs_ioctl_unreserve_signals()
1887 dev_dbg(hdev->dev, "unreserve signals, handle: %u, SOB:0x%x, count: %u\n", in cs_ioctl_unreserve_signals()
1888 handle_id, encaps_sig_hdl->hw_sob->sob_addr, in cs_ioctl_unreserve_signals()
1889 encaps_sig_hdl->count); in cs_ioctl_unreserve_signals()
1891 hdev->asic_funcs->hw_queues_lock(hdev); in cs_ioctl_unreserve_signals()
1893 q_idx = encaps_sig_hdl->q_idx; in cs_ioctl_unreserve_signals()
1894 prop = &hdev->kernel_queues[q_idx].sync_stream_prop; in cs_ioctl_unreserve_signals()
1895 hw_sob = &prop->hw_sob[prop->curr_sob_offset]; in cs_ioctl_unreserve_signals()
1896 sob_addr = hdev->asic_funcs->get_sob_addr(hdev, hw_sob->sob_id); in cs_ioctl_unreserve_signals()
1900 * between the reserve-unreserve calls or SOB switch in cs_ioctl_unreserve_signals()
1903 if (encaps_sig_hdl->pre_sob_val + encaps_sig_hdl->count in cs_ioctl_unreserve_signals()
1904 != prop->next_sob_val || in cs_ioctl_unreserve_signals()
1905 sob_addr != encaps_sig_hdl->hw_sob->sob_addr) { in cs_ioctl_unreserve_signals()
1906 …dev_err(hdev->dev, "Cannot unreserve signals, SOB val ran out of sync, expected: %u, actual val: %… in cs_ioctl_unreserve_signals()
1907 encaps_sig_hdl->pre_sob_val, in cs_ioctl_unreserve_signals()
1908 (prop->next_sob_val - encaps_sig_hdl->count)); in cs_ioctl_unreserve_signals()
1910 hdev->asic_funcs->hw_queues_unlock(hdev); in cs_ioctl_unreserve_signals()
1911 rc = -EINVAL; in cs_ioctl_unreserve_signals()
1919 prop->next_sob_val -= encaps_sig_hdl->count; in cs_ioctl_unreserve_signals()
1921 hdev->asic_funcs->hw_queues_unlock(hdev); in cs_ioctl_unreserve_signals()
1926 idr_remove(&mgr->handles, handle_id); in cs_ioctl_unreserve_signals()
1929 rc = -EINVAL; in cs_ioctl_unreserve_signals()
1930 dev_err(hdev->dev, "failed to unreserve signals, cannot find handler\n"); in cs_ioctl_unreserve_signals()
1933 spin_unlock(&mgr->lock); in cs_ioctl_unreserve_signals()
1949 struct hl_device *hdev = hpriv->hdev; in cs_ioctl_signal_wait()
1951 u32 q_idx, collective_engine_id = 0; in cs_ioctl_signal_wait()
1954 struct hl_ctx *ctx = hpriv->ctx; in cs_ioctl_signal_wait()
1956 struct hl_cs *cs; in cs_ioctl_signal_wait() local
1960 cntr = &hdev->aggregated_cs_counters; in cs_ioctl_signal_wait()
1969 chunk = &cs_chunk_array[0]; in cs_ioctl_signal_wait()
1971 if (chunk->queue_index >= hdev->asic_prop.max_queues) { in cs_ioctl_signal_wait()
1972 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_signal_wait()
1973 atomic64_inc(&cntr->validation_drop_cnt); in cs_ioctl_signal_wait()
1974 dev_err(hdev->dev, "Queue index %d is invalid\n", in cs_ioctl_signal_wait()
1975 chunk->queue_index); in cs_ioctl_signal_wait()
1976 rc = -EINVAL; in cs_ioctl_signal_wait()
1980 q_idx = chunk->queue_index; in cs_ioctl_signal_wait()
1981 hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx]; in cs_ioctl_signal_wait()
1982 q_type = hw_queue_prop->type; in cs_ioctl_signal_wait()
1984 if (!hw_queue_prop->supports_sync_stream) { in cs_ioctl_signal_wait()
1985 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_signal_wait()
1986 atomic64_inc(&cntr->validation_drop_cnt); in cs_ioctl_signal_wait()
1987 dev_err(hdev->dev, in cs_ioctl_signal_wait()
1990 rc = -EINVAL; in cs_ioctl_signal_wait()
1995 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) { in cs_ioctl_signal_wait()
1996 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_signal_wait()
1997 atomic64_inc(&cntr->validation_drop_cnt); in cs_ioctl_signal_wait()
1998 dev_err(hdev->dev, in cs_ioctl_signal_wait()
2000 rc = -EINVAL; in cs_ioctl_signal_wait()
2004 if (!hdev->nic_ports_mask) { in cs_ioctl_signal_wait()
2005 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_signal_wait()
2006 atomic64_inc(&cntr->validation_drop_cnt); in cs_ioctl_signal_wait()
2007 dev_err(hdev->dev, in cs_ioctl_signal_wait()
2009 rc = -EINVAL; in cs_ioctl_signal_wait()
2013 collective_engine_id = chunk->collective_engine_id; in cs_ioctl_signal_wait()
2028 /* check if cs sequence has encapsulated in cs_ioctl_signal_wait()
2034 spin_lock(&ctx->sig_mgr.lock); in cs_ioctl_signal_wait()
2035 idp = &ctx->sig_mgr.handles; in cs_ioctl_signal_wait()
2037 if (encaps_sig_hdl->cs_seq == signal_seq) { in cs_ioctl_signal_wait()
2041 * multiple wait cs are used with offset in cs_ioctl_signal_wait()
2044 kref_get(&encaps_sig_hdl->refcount); in cs_ioctl_signal_wait()
2048 spin_unlock(&ctx->sig_mgr.lock); in cs_ioctl_signal_wait()
2051 /* treat as signal CS already finished */ in cs_ioctl_signal_wait()
2052 dev_dbg(hdev->dev, "Cannot find encapsulated signals handle for seq 0x%llx\n", in cs_ioctl_signal_wait()
2054 rc = 0; in cs_ioctl_signal_wait()
2059 if (chunk->encaps_signal_offset > in cs_ioctl_signal_wait()
2060 encaps_sig_hdl->count) { in cs_ioctl_signal_wait()
2061 dev_err(hdev->dev, "offset(%u) value exceed max reserved signals count(%u)!\n", in cs_ioctl_signal_wait()
2062 chunk->encaps_signal_offset, in cs_ioctl_signal_wait()
2063 encaps_sig_hdl->count); in cs_ioctl_signal_wait()
2064 rc = -EINVAL; in cs_ioctl_signal_wait()
2071 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_signal_wait()
2072 atomic64_inc(&cntr->validation_drop_cnt); in cs_ioctl_signal_wait()
2073 dev_err(hdev->dev, in cs_ioctl_signal_wait()
2074 "Failed to get signal CS with seq 0x%llx\n", in cs_ioctl_signal_wait()
2081 /* signal CS already finished */ in cs_ioctl_signal_wait()
2082 rc = 0; in cs_ioctl_signal_wait()
2090 (sig_waitcs_cmpl->type == CS_TYPE_DEFAULT && in cs_ioctl_signal_wait()
2093 if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL && in cs_ioctl_signal_wait()
2095 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_signal_wait()
2096 atomic64_inc(&cntr->validation_drop_cnt); in cs_ioctl_signal_wait()
2097 dev_err(hdev->dev, in cs_ioctl_signal_wait()
2098 "CS seq 0x%llx is not of a signal/encaps-signal CS\n", in cs_ioctl_signal_wait()
2101 rc = -EINVAL; in cs_ioctl_signal_wait()
2105 if (completion_done(&sig_fence->completion)) { in cs_ioctl_signal_wait()
2106 /* signal CS already finished */ in cs_ioctl_signal_wait()
2108 rc = 0; in cs_ioctl_signal_wait()
2113 rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs, flags, timeout); in cs_ioctl_signal_wait()
2122 * Save the signal CS fence for later initialization right before in cs_ioctl_signal_wait()
2123 * hanging the wait CS on the queue. in cs_ioctl_signal_wait()
2124 * for encaps signals case, we save the cs sequence and handle pointer in cs_ioctl_signal_wait()
2128 cs->signal_fence = sig_fence; in cs_ioctl_signal_wait()
2133 if (cs->encaps_signals) in cs_ioctl_signal_wait()
2134 cs->encaps_sig_hdl = encaps_sig_hdl; in cs_ioctl_signal_wait()
2137 hl_debugfs_add_cs(cs); in cs_ioctl_signal_wait()
2139 *cs_seq = cs->sequence; in cs_ioctl_signal_wait()
2142 rc = cs_ioctl_signal_wait_create_jobs(hdev, ctx, cs, q_type, in cs_ioctl_signal_wait()
2143 q_idx, chunk->encaps_signal_offset); in cs_ioctl_signal_wait()
2145 rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx, in cs_ioctl_signal_wait()
2146 cs, q_idx, collective_engine_id, in cs_ioctl_signal_wait()
2147 chunk->encaps_signal_offset); in cs_ioctl_signal_wait()
2149 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); in cs_ioctl_signal_wait()
2150 atomic64_inc(&cntr->validation_drop_cnt); in cs_ioctl_signal_wait()
2151 rc = -EINVAL; in cs_ioctl_signal_wait()
2157 rc = hl_hw_queue_schedule_cs(cs); in cs_ioctl_signal_wait()
2159 /* In case wait cs failed here, it means the signal cs in cs_ioctl_signal_wait()
2164 rc = 0; in cs_ioctl_signal_wait()
2165 else if (rc != -EAGAIN) in cs_ioctl_signal_wait()
2166 dev_err(hdev->dev, in cs_ioctl_signal_wait()
2167 "Failed to submit CS %d.%llu to H/W queues, error %d\n", in cs_ioctl_signal_wait()
2168 ctx->asid, cs->sequence, rc); in cs_ioctl_signal_wait()
2178 cs_rollback(hdev, cs); in cs_ioctl_signal_wait()
2182 /* We finished with the CS in this function, so put the ref */ in cs_ioctl_signal_wait()
2183 cs_put(cs); in cs_ioctl_signal_wait()
2187 kref_put(&encaps_sig_hdl->refcount, in cs_ioctl_signal_wait()
2197 enum hl_cs_type cs_type = 0; in hl_cs_ioctl()
2201 signals_count = 0, sob_addr = 0, handle_id = 0; in hl_cs_ioctl()
2212 cs_type = hl_cs_get_cs_type(args->in.cs_flags & in hl_cs_ioctl()
2214 chunks = (void __user *) (uintptr_t) args->in.chunks_execute; in hl_cs_ioctl()
2215 num_chunks = args->in.num_chunks_execute; in hl_cs_ioctl()
2216 flags = args->in.cs_flags; in hl_cs_ioctl()
2218 /* In case this is a staged CS, user should supply the CS sequence */ in hl_cs_ioctl()
2221 cs_seq = args->in.seq; in hl_cs_ioctl()
2224 ? msecs_to_jiffies(args->in.timeout * 1000) in hl_cs_ioctl()
2225 : hpriv->hdev->timeout_jiffies; in hl_cs_ioctl()
2232 &cs_seq, args->in.cs_flags, timeout); in hl_cs_ioctl()
2236 args->in.encaps_signals_q_idx, in hl_cs_ioctl()
2237 args->in.encaps_signals_count, in hl_cs_ioctl()
2242 args->in.encaps_sig_handle_id); in hl_cs_ioctl()
2246 args->in.cs_flags, in hl_cs_ioctl()
2247 args->in.encaps_sig_handle_id, in hl_cs_ioctl()
2252 if (rc != -EAGAIN) { in hl_cs_ioctl()
2253 memset(args, 0, sizeof(*args)); in hl_cs_ioctl()
2256 args->out.handle_id = handle_id; in hl_cs_ioctl()
2257 args->out.sob_base_addr_offset = sob_addr; in hl_cs_ioctl()
2258 args->out.count = signals_count; in hl_cs_ioctl()
2260 args->out.seq = cs_seq; in hl_cs_ioctl()
2262 args->out.status = rc; in hl_cs_ioctl()
2272 struct hl_device *hdev = ctx->hdev; in hl_wait_for_fence()
2274 int rc = 0; in hl_wait_for_fence()
2278 if (rc == -EINVAL) in hl_wait_for_fence()
2279 dev_notice_ratelimited(hdev->dev, in hl_wait_for_fence()
2280 "Can't wait on CS %llu because current CS is at seq %llu\n", in hl_wait_for_fence()
2281 seq, ctx->cs_sequence); in hl_wait_for_fence()
2286 dev_dbg(hdev->dev, in hl_wait_for_fence()
2287 "Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n", in hl_wait_for_fence()
2288 seq, ctx->cs_sequence); in hl_wait_for_fence()
2291 return 0; in hl_wait_for_fence()
2295 completion_rc = completion_done(&fence->completion); in hl_wait_for_fence()
2303 &fence->completion, timeout); in hl_wait_for_fence()
2306 if (completion_rc > 0) { in hl_wait_for_fence()
2309 *timestamp = ktime_to_ns(fence->timestamp); in hl_wait_for_fence()
2314 if (fence->error == -ETIMEDOUT) in hl_wait_for_fence()
2315 rc = -ETIMEDOUT; in hl_wait_for_fence()
2316 else if (fence->error == -EIO) in hl_wait_for_fence()
2317 rc = -EIO; in hl_wait_for_fence()
2323 * hl_cs_poll_fences - iterate CS fences to check for CS completion
2325 * @mcs_data: multi-CS internal data
2327 * @return 0 on success, otherwise non 0 error code
2329 * The function iterates on all CS sequence in the list and set bit in
2330 * completion_bitmap for each completed CS.
2337 struct hl_fence **fence_ptr = mcs_data->fence_arr; in hl_cs_poll_fences()
2338 struct hl_device *hdev = mcs_data->ctx->hdev; in hl_cs_poll_fences()
2339 int i, rc, arr_len = mcs_data->arr_len; in hl_cs_poll_fences()
2340 u64 *seq_arr = mcs_data->seq_arr; in hl_cs_poll_fences()
2344 memset(fence_ptr, 0, arr_len * sizeof(*fence_ptr)); in hl_cs_poll_fences()
2347 rc = hl_ctx_get_fences(mcs_data->ctx, seq_arr, fence_ptr, arr_len); in hl_cs_poll_fences()
2353 * this value is maintained- no timestamp was updated in hl_cs_poll_fences()
2355 max_ktime = ktime_set(KTIME_SEC_MAX, 0); in hl_cs_poll_fences()
2358 for (i = 0; i < arr_len; i++, fence_ptr++) { in hl_cs_poll_fences()
2362 * function won't sleep as it is called with timeout 0 (i.e. in hl_cs_poll_fences()
2365 rc = hl_wait_for_fence(mcs_data->ctx, seq_arr[i], fence, in hl_cs_poll_fences()
2366 &status, 0, NULL); in hl_cs_poll_fences()
2368 dev_err(hdev->dev, in hl_cs_poll_fences()
2369 "wait_for_fence error :%d for CS seq %llu\n", in hl_cs_poll_fences()
2374 mcs_data->stream_master_qid_map |= fence->stream_master_qid_map; in hl_cs_poll_fences()
2379 mcs_data->completion_bitmap |= BIT(i); in hl_cs_poll_fences()
2383 * - if even single fence is gone we cannot extract timestamp in hl_cs_poll_fences()
2385 * - for all completed CSs we take the earliest timestamp. in hl_cs_poll_fences()
2392 mcs_data->update_ts = false; in hl_cs_poll_fences()
2393 mcs_data->gone_cs = true; in hl_cs_poll_fences()
2394 } else if (mcs_data->update_ts && in hl_cs_poll_fences()
2395 (ktime_compare(fence->timestamp, in hl_cs_poll_fences()
2396 ktime_set(0, 0)) > 0) && in hl_cs_poll_fences()
2397 (ktime_compare(fence->timestamp, first_cs_time) < 0)) { in hl_cs_poll_fences()
2398 first_cs_time = fence->timestamp; in hl_cs_poll_fences()
2402 hl_fences_put(mcs_data->fence_arr, arr_len); in hl_cs_poll_fences()
2404 if (mcs_data->update_ts && in hl_cs_poll_fences()
2405 (ktime_compare(first_cs_time, max_ktime) != 0)) in hl_cs_poll_fences()
2406 mcs_data->timestamp = ktime_to_ns(first_cs_time); in hl_cs_poll_fences()
2416 int rc = 0; in _hl_cs_wait_ioctl()
2419 *timestamp = 0; in _hl_cs_wait_ioctl()
2433 * hl_wait_multi_cs_completion_init - init completion structure
2453 for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) { in hl_wait_multi_cs_completion_init()
2454 mcs_compl = &hdev->multi_cs_completion[i]; in hl_wait_multi_cs_completion_init()
2455 spin_lock(&mcs_compl->lock); in hl_wait_multi_cs_completion_init()
2456 if (!mcs_compl->used) { in hl_wait_multi_cs_completion_init()
2457 mcs_compl->used = 1; in hl_wait_multi_cs_completion_init()
2458 mcs_compl->timestamp = 0; in hl_wait_multi_cs_completion_init()
2459 mcs_compl->stream_master_qid_map = stream_master_bitmap; in hl_wait_multi_cs_completion_init()
2460 reinit_completion(&mcs_compl->completion); in hl_wait_multi_cs_completion_init()
2461 spin_unlock(&mcs_compl->lock); in hl_wait_multi_cs_completion_init()
2464 spin_unlock(&mcs_compl->lock); in hl_wait_multi_cs_completion_init()
2468 dev_err(hdev->dev, in hl_wait_multi_cs_completion_init()
2469 "no available multi-CS completion structure\n"); in hl_wait_multi_cs_completion_init()
2470 return ERR_PTR(-ENOMEM); in hl_wait_multi_cs_completion_init()
2476 * hl_wait_multi_cs_completion_fini - return completion structure and set as
2485 * free completion structure, do it under lock to be in-sync with the in hl_wait_multi_cs_completion_fini()
2488 spin_lock(&mcs_compl->lock); in hl_wait_multi_cs_completion_fini()
2489 mcs_compl->used = 0; in hl_wait_multi_cs_completion_fini()
2490 spin_unlock(&mcs_compl->lock); in hl_wait_multi_cs_completion_fini()
2494 * hl_wait_multi_cs_completion - wait for first CS to complete
2496 * @mcs_data: multi-CS internal data
2498 * @return 0 on success, otherwise non 0 error code
2502 struct hl_device *hdev = mcs_data->ctx->hdev; in hl_wait_multi_cs_completion()
2507 mcs_data->stream_master_qid_map); in hl_wait_multi_cs_completion()
2512 &mcs_compl->completion, in hl_wait_multi_cs_completion()
2513 usecs_to_jiffies(mcs_data->timeout_us)); in hl_wait_multi_cs_completion()
2516 if (completion_rc > 0) in hl_wait_multi_cs_completion()
2517 mcs_data->timestamp = mcs_compl->timestamp; in hl_wait_multi_cs_completion()
2521 mcs_data->wait_status = completion_rc; in hl_wait_multi_cs_completion()
2523 return 0; in hl_wait_multi_cs_completion()
2527 * hl_multi_cs_completion_init - init array of multi-CS completion structures
2536 for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) { in hl_multi_cs_completion_init()
2537 mcs_cmpl = &hdev->multi_cs_completion[i]; in hl_multi_cs_completion_init()
2538 mcs_cmpl->used = 0; in hl_multi_cs_completion_init()
2539 spin_lock_init(&mcs_cmpl->lock); in hl_multi_cs_completion_init()
2540 init_completion(&mcs_cmpl->completion); in hl_multi_cs_completion_init()
2545 * hl_multi_cs_wait_ioctl - implementation of the multi-CS wait ioctl
2548 * @data: pointer to multi-CS wait ioctl in/out args
2553 struct hl_device *hdev = hpriv->hdev; in hl_multi_cs_wait_ioctl()
2554 struct multi_cs_data mcs_data = {0}; in hl_multi_cs_wait_ioctl()
2556 struct hl_ctx *ctx = hpriv->ctx; in hl_multi_cs_wait_ioctl()
2564 if (!hdev->supports_wait_for_multi_cs) { in hl_multi_cs_wait_ioctl()
2565 dev_err(hdev->dev, "Wait for multi CS is not supported\n"); in hl_multi_cs_wait_ioctl()
2566 return -EPERM; in hl_multi_cs_wait_ioctl()
2569 seq_arr_len = args->in.seq_arr_len; in hl_multi_cs_wait_ioctl()
2572 dev_err(hdev->dev, "Can wait only up to %d CSs, input sequence is of length %u\n", in hl_multi_cs_wait_ioctl()
2574 return -EINVAL; in hl_multi_cs_wait_ioctl()
2581 return -ENOMEM; in hl_multi_cs_wait_ioctl()
2583 /* copy CS sequence array from user */ in hl_multi_cs_wait_ioctl()
2584 seq_arr = (void __user *) (uintptr_t) args->in.seq; in hl_multi_cs_wait_ioctl()
2587 dev_err(hdev->dev, "Failed to copy multi-cs sequence array from user\n"); in hl_multi_cs_wait_ioctl()
2588 rc = -EFAULT; in hl_multi_cs_wait_ioctl()
2595 rc = -ENOMEM; in hl_multi_cs_wait_ioctl()
2599 /* initialize the multi-CS internal data */ in hl_multi_cs_wait_ioctl()
2607 /* poll all CS fences, extract timestamp */ in hl_multi_cs_wait_ioctl()
2611 * skip wait for CS completion when one of the below is true: in hl_multi_cs_wait_ioctl()
2612 * - an error on the poll function in hl_multi_cs_wait_ioctl()
2613 * - one or more CS in the list completed in hl_multi_cs_wait_ioctl()
2614 * - the user called ioctl with timeout 0 in hl_multi_cs_wait_ioctl()
2616 if (rc || mcs_data.completion_bitmap || !args->in.timeout_us) in hl_multi_cs_wait_ioctl()
2619 /* wait (with timeout) for the first CS to be completed */ in hl_multi_cs_wait_ioctl()
2620 mcs_data.timeout_us = args->in.timeout_us; in hl_multi_cs_wait_ioctl()
2625 if (mcs_data.wait_status > 0) { in hl_multi_cs_wait_ioctl()
2627 * poll fences once again to update the CS map. in hl_multi_cs_wait_ioctl()
2635 * it got a completion) we expect to see at least one CS in hl_multi_cs_wait_ioctl()
2639 dev_warn_ratelimited(hdev->dev, in hl_multi_cs_wait_ioctl()
2640 "Multi-CS got completion on wait but no CS completed\n"); in hl_multi_cs_wait_ioctl()
2641 rc = -EFAULT; in hl_multi_cs_wait_ioctl()
2655 if (mcs_data.wait_status == -ERESTARTSYS) { in hl_multi_cs_wait_ioctl()
2656 dev_err_ratelimited(hdev->dev, in hl_multi_cs_wait_ioctl()
2657 "user process got signal while waiting for Multi-CS\n"); in hl_multi_cs_wait_ioctl()
2658 return -EINTR; in hl_multi_cs_wait_ioctl()
2662 memset(args, 0, sizeof(*args)); in hl_multi_cs_wait_ioctl()
2665 args->out.status = HL_WAIT_CS_STATUS_COMPLETED; in hl_multi_cs_wait_ioctl()
2666 args->out.cs_completion_map = mcs_data.completion_bitmap; in hl_multi_cs_wait_ioctl()
2668 /* if timestamp not 0- it's valid */ in hl_multi_cs_wait_ioctl()
2670 args->out.timestamp_nsec = mcs_data.timestamp; in hl_multi_cs_wait_ioctl()
2671 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD; in hl_multi_cs_wait_ioctl()
2674 /* update if some CS was gone */ in hl_multi_cs_wait_ioctl()
2676 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE; in hl_multi_cs_wait_ioctl()
2678 args->out.status = HL_WAIT_CS_STATUS_BUSY; in hl_multi_cs_wait_ioctl()
2681 return 0; in hl_multi_cs_wait_ioctl()
2686 struct hl_device *hdev = hpriv->hdev; in hl_cs_wait_ioctl()
2689 u64 seq = args->in.seq; in hl_cs_wait_ioctl()
2693 rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq, in hl_cs_wait_ioctl()
2696 if (rc == -ERESTARTSYS) { in hl_cs_wait_ioctl()
2697 dev_err_ratelimited(hdev->dev, in hl_cs_wait_ioctl()
2698 "user process got signal while waiting for CS handle %llu\n", in hl_cs_wait_ioctl()
2700 return -EINTR; in hl_cs_wait_ioctl()
2703 memset(args, 0, sizeof(*args)); in hl_cs_wait_ioctl()
2706 if (rc == -ETIMEDOUT) { in hl_cs_wait_ioctl()
2707 dev_err_ratelimited(hdev->dev, in hl_cs_wait_ioctl()
2708 "CS %llu has timed-out while user process is waiting for it\n", in hl_cs_wait_ioctl()
2710 args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT; in hl_cs_wait_ioctl()
2711 } else if (rc == -EIO) { in hl_cs_wait_ioctl()
2712 dev_err_ratelimited(hdev->dev, in hl_cs_wait_ioctl()
2713 "CS %llu has been aborted while user process is waiting for it\n", in hl_cs_wait_ioctl()
2715 args->out.status = HL_WAIT_CS_STATUS_ABORTED; in hl_cs_wait_ioctl()
2721 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD; in hl_cs_wait_ioctl()
2722 args->out.timestamp_nsec = timestamp; in hl_cs_wait_ioctl()
2727 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE; in hl_cs_wait_ioctl()
2730 args->out.status = HL_WAIT_CS_STATUS_COMPLETED; in hl_cs_wait_ioctl()
2734 args->out.status = HL_WAIT_CS_STATUS_BUSY; in hl_cs_wait_ioctl()
2738 return 0; in hl_cs_wait_ioctl()
2751 int rc = 0; in _hl_interrupt_wait_ioctl()
2763 return -ENOMEM; in _hl_interrupt_wait_ioctl()
2766 hl_fence_init(&pend->fence, ULONG_MAX); in _hl_interrupt_wait_ioctl()
2769 interrupt = &hdev->common_user_interrupt; in _hl_interrupt_wait_ioctl()
2771 interrupt = &hdev->user_interrupt[interrupt_offset]; in _hl_interrupt_wait_ioctl()
2776 spin_lock_irqsave(&interrupt->wait_list_lock, flags); in _hl_interrupt_wait_ioctl()
2777 list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head); in _hl_interrupt_wait_ioctl()
2778 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); in _hl_interrupt_wait_ioctl()
2784 dev_err(hdev->dev, "Failed to copy completion value from user\n"); in _hl_interrupt_wait_ioctl()
2785 rc = -EFAULT; in _hl_interrupt_wait_ioctl()
2799 completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion, in _hl_interrupt_wait_ioctl()
2805 if (completion_rc > 0) { in _hl_interrupt_wait_ioctl()
2806 spin_lock_irqsave(&interrupt->wait_list_lock, flags); in _hl_interrupt_wait_ioctl()
2812 reinit_completion(&pend->fence.completion); in _hl_interrupt_wait_ioctl()
2813 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); in _hl_interrupt_wait_ioctl()
2816 dev_err(hdev->dev, "Failed to copy completion value from user\n"); in _hl_interrupt_wait_ioctl()
2817 rc = -EFAULT; in _hl_interrupt_wait_ioctl()
2828 } else if (completion_rc == -ERESTARTSYS) { in _hl_interrupt_wait_ioctl()
2829 dev_err_ratelimited(hdev->dev, in _hl_interrupt_wait_ioctl()
2831 interrupt->interrupt_id); in _hl_interrupt_wait_ioctl()
2832 rc = -EINTR; in _hl_interrupt_wait_ioctl()
2838 spin_lock_irqsave(&interrupt->wait_list_lock, flags); in _hl_interrupt_wait_ioctl()
2839 list_del(&pend->wait_list_node); in _hl_interrupt_wait_ioctl()
2840 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); in _hl_interrupt_wait_ioctl()
2851 struct hl_device *hdev = hpriv->hdev; in hl_interrupt_wait_ioctl()
2857 prop = &hdev->asic_prop; in hl_interrupt_wait_ioctl()
2859 if (!prop->user_interrupt_count) { in hl_interrupt_wait_ioctl()
2860 dev_err(hdev->dev, "no user interrupts allowed"); in hl_interrupt_wait_ioctl()
2861 return -EPERM; in hl_interrupt_wait_ioctl()
2865 FIELD_GET(HL_WAIT_CS_FLAGS_INTERRUPT_MASK, args->in.flags); in hl_interrupt_wait_ioctl()
2867 first_interrupt = prop->first_available_user_msix_interrupt; in hl_interrupt_wait_ioctl()
2868 last_interrupt = prop->first_available_user_msix_interrupt + in hl_interrupt_wait_ioctl()
2869 prop->user_interrupt_count - 1; in hl_interrupt_wait_ioctl()
2873 dev_err(hdev->dev, "invalid user interrupt %u", interrupt_id); in hl_interrupt_wait_ioctl()
2874 return -EINVAL; in hl_interrupt_wait_ioctl()
2880 interrupt_offset = interrupt_id - first_interrupt; in hl_interrupt_wait_ioctl()
2882 rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, in hl_interrupt_wait_ioctl()
2883 args->in.interrupt_timeout_us, args->in.addr, in hl_interrupt_wait_ioctl()
2884 args->in.target, interrupt_offset, &status); in hl_interrupt_wait_ioctl()
2887 if (rc != -EINTR) in hl_interrupt_wait_ioctl()
2888 dev_err_ratelimited(hdev->dev, in hl_interrupt_wait_ioctl()
2894 memset(args, 0, sizeof(*args)); in hl_interrupt_wait_ioctl()
2898 args->out.status = HL_WAIT_CS_STATUS_COMPLETED; in hl_interrupt_wait_ioctl()
2902 args->out.status = HL_WAIT_CS_STATUS_BUSY; in hl_interrupt_wait_ioctl()
2906 return 0; in hl_interrupt_wait_ioctl()
2912 u32 flags = args->in.flags; in hl_wait_ioctl()
2918 if (!hl_device_operational(hpriv->hdev, NULL)) in hl_wait_ioctl()
2919 return -EPERM; in hl_wait_ioctl()