Lines Matching refs:kfd
527 static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
529 static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
531 static int kfd_resume(struct kfd_dev *kfd);
536 struct kfd_dev *kfd; in kgd2kfd_probe() local
555 kfd = kzalloc(sizeof(*kfd), GFP_KERNEL); in kgd2kfd_probe()
556 if (!kfd) in kgd2kfd_probe()
563 kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kgd); in kgd2kfd_probe()
565 !kfd->pci_atomic_requested) { in kgd2kfd_probe()
569 kfree(kfd); in kgd2kfd_probe()
573 kfd->kgd = kgd; in kgd2kfd_probe()
574 kfd->device_info = device_info; in kgd2kfd_probe()
575 kfd->pdev = pdev; in kgd2kfd_probe()
576 kfd->init_complete = false; in kgd2kfd_probe()
577 kfd->kfd2kgd = f2g; in kgd2kfd_probe()
578 atomic_set(&kfd->compute_profile, 0); in kgd2kfd_probe()
580 mutex_init(&kfd->doorbell_mutex); in kgd2kfd_probe()
581 memset(&kfd->doorbell_available_index, 0, in kgd2kfd_probe()
582 sizeof(kfd->doorbell_available_index)); in kgd2kfd_probe()
584 atomic_set(&kfd->sram_ecc_flag, 0); in kgd2kfd_probe()
586 ida_init(&kfd->doorbell_ida); in kgd2kfd_probe()
588 return kfd; in kgd2kfd_probe()
591 static void kfd_cwsr_init(struct kfd_dev *kfd) in kfd_cwsr_init() argument
593 if (cwsr_enable && kfd->device_info->supports_cwsr) { in kfd_cwsr_init()
594 if (kfd->device_info->asic_family < CHIP_VEGA10) { in kfd_cwsr_init()
596 kfd->cwsr_isa = cwsr_trap_gfx8_hex; in kfd_cwsr_init()
597 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex); in kfd_cwsr_init()
598 } else if (kfd->device_info->asic_family == CHIP_ARCTURUS) { in kfd_cwsr_init()
600 kfd->cwsr_isa = cwsr_trap_arcturus_hex; in kfd_cwsr_init()
601 kfd->cwsr_isa_size = sizeof(cwsr_trap_arcturus_hex); in kfd_cwsr_init()
602 } else if (kfd->device_info->asic_family < CHIP_NAVI10) { in kfd_cwsr_init()
604 kfd->cwsr_isa = cwsr_trap_gfx9_hex; in kfd_cwsr_init()
605 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex); in kfd_cwsr_init()
606 } else if (kfd->device_info->asic_family < CHIP_SIENNA_CICHLID) { in kfd_cwsr_init()
608 kfd->cwsr_isa = cwsr_trap_nv1x_hex; in kfd_cwsr_init()
609 kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex); in kfd_cwsr_init()
612 kfd->cwsr_isa = cwsr_trap_gfx10_hex; in kfd_cwsr_init()
613 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx10_hex); in kfd_cwsr_init()
616 kfd->cwsr_enabled = true; in kfd_cwsr_init()
620 static int kfd_gws_init(struct kfd_dev *kfd) in kfd_gws_init() argument
624 if (kfd->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) in kfd_gws_init()
628 || (kfd->device_info->asic_family == CHIP_VEGA10 in kfd_gws_init()
629 && kfd->mec2_fw_version >= 0x81b3) in kfd_gws_init()
630 || (kfd->device_info->asic_family >= CHIP_VEGA12 in kfd_gws_init()
631 && kfd->device_info->asic_family <= CHIP_RAVEN in kfd_gws_init()
632 && kfd->mec2_fw_version >= 0x1b3) in kfd_gws_init()
633 || (kfd->device_info->asic_family == CHIP_ARCTURUS in kfd_gws_init()
634 && kfd->mec2_fw_version >= 0x30)) in kfd_gws_init()
635 ret = amdgpu_amdkfd_alloc_gws(kfd->kgd, in kfd_gws_init()
636 amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws); in kfd_gws_init()
646 bool kgd2kfd_device_init(struct kfd_dev *kfd, in kgd2kfd_device_init() argument
652 kfd->ddev = ddev; in kgd2kfd_device_init()
653 kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd, in kgd2kfd_device_init()
655 kfd->mec2_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd, in kgd2kfd_device_init()
657 kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd, in kgd2kfd_device_init()
659 kfd->shared_resources = *gpu_resources; in kgd2kfd_device_init()
661 kfd->vm_info.first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1; in kgd2kfd_device_init()
662 kfd->vm_info.last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1; in kgd2kfd_device_init()
663 kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd in kgd2kfd_device_init()
664 - kfd->vm_info.first_vmid_kfd + 1; in kgd2kfd_device_init()
668 || (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) { in kgd2kfd_device_init()
671 hws_max_conc_proc, kfd->vm_info.vmid_num_kfd, in kgd2kfd_device_init()
672 kfd->vm_info.vmid_num_kfd); in kgd2kfd_device_init()
673 kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd; in kgd2kfd_device_init()
675 kfd->max_proc_per_quantum = hws_max_conc_proc; in kgd2kfd_device_init()
679 kfd->device_info->mqd_size_aligned; in kgd2kfd_device_init()
696 kfd->kgd, size, &kfd->gtt_mem, in kgd2kfd_device_init()
697 &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr, in kgd2kfd_device_init()
706 if (kfd_gtt_sa_init(kfd, size, 512) != 0) { in kgd2kfd_device_init()
711 if (kfd_doorbell_init(kfd)) { in kgd2kfd_device_init()
717 kfd->hive_id = amdgpu_amdkfd_get_hive_id(kfd->kgd); in kgd2kfd_device_init()
719 kfd->unique_id = amdgpu_amdkfd_get_unique_id(kfd->kgd); in kgd2kfd_device_init()
721 kfd->noretry = amdgpu_amdkfd_get_noretry(kfd->kgd); in kgd2kfd_device_init()
723 if (kfd_interrupt_init(kfd)) { in kgd2kfd_device_init()
728 kfd->dqm = device_queue_manager_init(kfd); in kgd2kfd_device_init()
729 if (!kfd->dqm) { in kgd2kfd_device_init()
737 if (kfd_gws_init(kfd)) { in kgd2kfd_device_init()
739 amdgpu_amdkfd_get_num_gws(kfd->kgd)); in kgd2kfd_device_init()
744 kfd_double_confirm_iommu_support(kfd); in kgd2kfd_device_init()
746 if (kfd_iommu_device_init(kfd)) { in kgd2kfd_device_init()
751 kfd_cwsr_init(kfd); in kgd2kfd_device_init()
753 if (kfd_resume(kfd)) in kgd2kfd_device_init()
756 kfd->dbgmgr = NULL; in kgd2kfd_device_init()
758 if (kfd_topology_add_device(kfd)) { in kgd2kfd_device_init()
763 kfd_smi_init(kfd); in kgd2kfd_device_init()
765 kfd->init_complete = true; in kgd2kfd_device_init()
766 dev_info(kfd_device, "added device %x:%x\n", kfd->pdev->vendor, in kgd2kfd_device_init()
767 kfd->pdev->device); in kgd2kfd_device_init()
770 kfd->dqm->sched_policy); in kgd2kfd_device_init()
778 device_queue_manager_uninit(kfd->dqm); in kgd2kfd_device_init()
780 kfd_interrupt_exit(kfd); in kgd2kfd_device_init()
782 kfd_doorbell_fini(kfd); in kgd2kfd_device_init()
784 kfd_gtt_sa_fini(kfd); in kgd2kfd_device_init()
786 amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem); in kgd2kfd_device_init()
788 if (kfd->gws) in kgd2kfd_device_init()
789 amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws); in kgd2kfd_device_init()
792 kfd->pdev->vendor, kfd->pdev->device); in kgd2kfd_device_init()
794 return kfd->init_complete; in kgd2kfd_device_init()
797 void kgd2kfd_device_exit(struct kfd_dev *kfd) in kgd2kfd_device_exit() argument
799 if (kfd->init_complete) { in kgd2kfd_device_exit()
800 kgd2kfd_suspend(kfd, false); in kgd2kfd_device_exit()
801 device_queue_manager_uninit(kfd->dqm); in kgd2kfd_device_exit()
802 kfd_interrupt_exit(kfd); in kgd2kfd_device_exit()
803 kfd_topology_remove_device(kfd); in kgd2kfd_device_exit()
804 kfd_doorbell_fini(kfd); in kgd2kfd_device_exit()
805 ida_destroy(&kfd->doorbell_ida); in kgd2kfd_device_exit()
806 kfd_gtt_sa_fini(kfd); in kgd2kfd_device_exit()
807 amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem); in kgd2kfd_device_exit()
808 if (kfd->gws) in kgd2kfd_device_exit()
809 amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws); in kgd2kfd_device_exit()
812 kfree(kfd); in kgd2kfd_device_exit()
815 int kgd2kfd_pre_reset(struct kfd_dev *kfd) in kgd2kfd_pre_reset() argument
817 if (!kfd->init_complete) in kgd2kfd_pre_reset()
820 kfd_smi_event_update_gpu_reset(kfd, false); in kgd2kfd_pre_reset()
822 kfd->dqm->ops.pre_reset(kfd->dqm); in kgd2kfd_pre_reset()
824 kgd2kfd_suspend(kfd, false); in kgd2kfd_pre_reset()
826 kfd_signal_reset_event(kfd); in kgd2kfd_pre_reset()
836 int kgd2kfd_post_reset(struct kfd_dev *kfd) in kgd2kfd_post_reset() argument
840 if (!kfd->init_complete) in kgd2kfd_post_reset()
843 ret = kfd_resume(kfd); in kgd2kfd_post_reset()
848 atomic_set(&kfd->sram_ecc_flag, 0); in kgd2kfd_post_reset()
850 kfd_smi_event_update_gpu_reset(kfd, true); in kgd2kfd_post_reset()
860 void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) in kgd2kfd_suspend() argument
862 if (!kfd->init_complete) in kgd2kfd_suspend()
872 kfd->dqm->ops.stop(kfd->dqm); in kgd2kfd_suspend()
873 kfd_iommu_suspend(kfd); in kgd2kfd_suspend()
876 int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) in kgd2kfd_resume() argument
880 if (!kfd->init_complete) in kgd2kfd_resume()
883 ret = kfd_resume(kfd); in kgd2kfd_resume()
898 static int kfd_resume(struct kfd_dev *kfd) in kfd_resume() argument
902 err = kfd_iommu_resume(kfd); in kfd_resume()
906 kfd->pdev->vendor, kfd->pdev->device); in kfd_resume()
910 err = kfd->dqm->ops.start(kfd->dqm); in kfd_resume()
914 kfd->pdev->vendor, kfd->pdev->device); in kfd_resume()
921 kfd_iommu_suspend(kfd); in kfd_resume()
941 void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) in kgd2kfd_interrupt() argument
947 if (!kfd->init_complete) in kgd2kfd_interrupt()
950 if (kfd->device_info->ih_ring_entry_size > sizeof(patched_ihre)) { in kgd2kfd_interrupt()
955 spin_lock_irqsave(&kfd->interrupt_lock, flags); in kgd2kfd_interrupt()
957 if (kfd->interrupts_active in kgd2kfd_interrupt()
958 && interrupt_is_wanted(kfd, ih_ring_entry, in kgd2kfd_interrupt()
960 && enqueue_ih_ring_entry(kfd, in kgd2kfd_interrupt()
962 kfd_queue_work(kfd->ih_wq, &kfd->interrupt_work); in kgd2kfd_interrupt()
964 spin_unlock_irqrestore(&kfd->interrupt_lock, flags); in kgd2kfd_interrupt()
1056 static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, in kfd_gtt_sa_init() argument
1068 kfd->gtt_sa_chunk_size = chunk_size; in kfd_gtt_sa_init()
1069 kfd->gtt_sa_num_of_chunks = buf_size / chunk_size; in kfd_gtt_sa_init()
1071 num_of_longs = (kfd->gtt_sa_num_of_chunks + BITS_PER_LONG - 1) / in kfd_gtt_sa_init()
1074 kfd->gtt_sa_bitmap = kcalloc(num_of_longs, sizeof(long), GFP_KERNEL); in kfd_gtt_sa_init()
1076 if (!kfd->gtt_sa_bitmap) in kfd_gtt_sa_init()
1080 kfd->gtt_sa_num_of_chunks, kfd->gtt_sa_bitmap); in kfd_gtt_sa_init()
1082 mutex_init(&kfd->gtt_sa_lock); in kfd_gtt_sa_init()
1088 static void kfd_gtt_sa_fini(struct kfd_dev *kfd) in kfd_gtt_sa_fini() argument
1090 mutex_destroy(&kfd->gtt_sa_lock); in kfd_gtt_sa_fini()
1091 kfree(kfd->gtt_sa_bitmap); in kfd_gtt_sa_fini()
1108 int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, in kfd_gtt_sa_allocate() argument
1116 if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size) in kfd_gtt_sa_allocate()
1127 mutex_lock(&kfd->gtt_sa_lock); in kfd_gtt_sa_allocate()
1131 found = find_next_zero_bit(kfd->gtt_sa_bitmap, in kfd_gtt_sa_allocate()
1132 kfd->gtt_sa_num_of_chunks, in kfd_gtt_sa_allocate()
1138 if (found == kfd->gtt_sa_num_of_chunks) in kfd_gtt_sa_allocate()
1145 kfd->gtt_start_gpu_addr, in kfd_gtt_sa_allocate()
1147 kfd->gtt_sa_chunk_size); in kfd_gtt_sa_allocate()
1149 kfd->gtt_start_cpu_ptr, in kfd_gtt_sa_allocate()
1151 kfd->gtt_sa_chunk_size); in kfd_gtt_sa_allocate()
1157 if (size <= kfd->gtt_sa_chunk_size) { in kfd_gtt_sa_allocate()
1159 set_bit(found, kfd->gtt_sa_bitmap); in kfd_gtt_sa_allocate()
1164 cur_size = size - kfd->gtt_sa_chunk_size; in kfd_gtt_sa_allocate()
1167 find_next_zero_bit(kfd->gtt_sa_bitmap, in kfd_gtt_sa_allocate()
1168 kfd->gtt_sa_num_of_chunks, ++found); in kfd_gtt_sa_allocate()
1182 if (found == kfd->gtt_sa_num_of_chunks) in kfd_gtt_sa_allocate()
1186 if (cur_size <= kfd->gtt_sa_chunk_size) in kfd_gtt_sa_allocate()
1189 cur_size -= kfd->gtt_sa_chunk_size; in kfd_gtt_sa_allocate()
1200 set_bit(found, kfd->gtt_sa_bitmap); in kfd_gtt_sa_allocate()
1203 mutex_unlock(&kfd->gtt_sa_lock); in kfd_gtt_sa_allocate()
1208 mutex_unlock(&kfd->gtt_sa_lock); in kfd_gtt_sa_allocate()
1213 int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj) in kfd_gtt_sa_free() argument
1224 mutex_lock(&kfd->gtt_sa_lock); in kfd_gtt_sa_free()
1230 clear_bit(bit, kfd->gtt_sa_bitmap); in kfd_gtt_sa_free()
1232 mutex_unlock(&kfd->gtt_sa_lock); in kfd_gtt_sa_free()
1238 void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd) in kgd2kfd_set_sram_ecc_flag() argument
1240 if (kfd) in kgd2kfd_set_sram_ecc_flag()
1241 atomic_inc(&kfd->sram_ecc_flag); in kgd2kfd_set_sram_ecc_flag()
1244 void kfd_inc_compute_active(struct kfd_dev *kfd) in kfd_inc_compute_active() argument
1246 if (atomic_inc_return(&kfd->compute_profile) == 1) in kfd_inc_compute_active()
1247 amdgpu_amdkfd_set_compute_idle(kfd->kgd, false); in kfd_inc_compute_active()
1250 void kfd_dec_compute_active(struct kfd_dev *kfd) in kfd_dec_compute_active() argument
1252 int count = atomic_dec_return(&kfd->compute_profile); in kfd_dec_compute_active()
1255 amdgpu_amdkfd_set_compute_idle(kfd->kgd, true); in kfd_dec_compute_active()
1259 void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask) in kgd2kfd_smi_event_throttle() argument
1261 if (kfd) in kgd2kfd_smi_event_throttle()
1262 kfd_smi_event_update_thermal_throttling(kfd, throttle_bitmask); in kgd2kfd_smi_event_throttle()