/* Copyright (c) 2021 Intel Corporation * SPDX-License-Identifier: Apache-2.0 */ #ifndef __INTEL_ADSP_CPU_INIT_H #define __INTEL_ADSP_CPU_INIT_H #include #include #include #include #include #define MEMCTL_VALUE (MEMCTL_INV_EN | MEMCTL_ICWU_MASK | MEMCTL_DCWA_MASK | \ MEMCTL_DCWU_MASK | MEMCTL_L0IBUF_EN) #define ATOMCTL_BY_RCW BIT(0) /* RCW Transaction for Bypass Memory */ #define ATOMCTL_WT_RCW BIT(2) /* RCW Transaction for Writethrough Cacheable Memory */ #define ATOMCTL_WB_RCW BIT(4) /* RCW Transaction for Writeback Cacheable Memory */ #define ATOMCTL_VALUE (ATOMCTL_BY_RCW | ATOMCTL_WT_RCW | ATOMCTL_WB_RCW) #ifdef CONFIG_INTEL_ADSP_MEMORY_IS_MIRRORED /* Utility to generate an unrolled and optimal[1] code sequence to set * the RPO TLB registers (contra the HAL cacheattr macros, which * generate larger code and can't be called from C), based on the * KERNEL_COHERENCE configuration in use. Selects RPO attribute "2" * for regions (including MMIO registers in region zero) which want to * bypass L1, "4" for the cached region which wants writeback, and * "15" (invalid) elsewhere. * * Note that on cores that have the "translation" option set, we need * to put an identity mapping in the high bits. Also per spec * changing the current code region (by definition cached) requires * that WITLB be followed by an ISYNC and that both instructions live * in the same cache line (two 3-byte instructions fit in an 8-byte * aligned region, so that's guaranteed not to cross a cache line * boundary). * * [1] With the sole exception of gcc's infuriating insistence on * emitting a precomputed literal for addr + addrincr instead of * computing it with a single ADD instruction from values it already * has in registers. Explicitly assigning the variables to registers * via an attribute works, but then emits needless MOV instructions * instead. I tell myself it's just 32 bytes of .text, but... Sigh. */ #define _REGION_ATTR(r) \ ((r) == 0 ? 2 : \ ((r) == CONFIG_INTEL_ADSP_CACHED_REGION ? 4 : \ ((r) == CONFIG_INTEL_ADSP_UNCACHED_REGION ? 2 : 15))) #define _SET_ONE_TLB(region) do { \ uint32_t attr = _REGION_ATTR(region); \ if (XCHAL_HAVE_XLT_CACHEATTR) { \ attr |= addr; /* RPO with translation */ \ } \ if (region != CONFIG_INTEL_ADSP_CACHED_REGION) { \ __asm__ volatile("wdtlb %0, %1; witlb %0, %1" \ :: "r"(attr), "r"(addr)); \ } else { \ __asm__ volatile("wdtlb %0, %1" \ :: "r"(attr), "r"(addr)); \ __asm__ volatile("j 1f; .align 8; 1:"); \ __asm__ volatile("witlb %0, %1; isync" \ :: "r"(attr), "r"(addr)); \ } \ addr += addrincr; \ } while (0) /** * @brief Setup RPO TLB registers. */ #define SET_RPO_TLB() \ do { \ register uint32_t addr = 0, addrincr = 0x20000000; \ FOR_EACH(_SET_ONE_TLB, (;), 0, 1, 2, 3, 4, 5, 6, 7); \ } while (0) #endif /* CONFIG_INTEL_ADSP_MEMORY_IS_MIRRORED */ /* Low-level CPU initialization. Call this immediately after entering * C code to initialize the cache, protection and synchronization * features. */ static ALWAYS_INLINE void cpu_early_init(void) { uint32_t reg; #ifdef CONFIG_ADSP_NEED_POWER_ON_CACHE /* First, we need to power the cache SRAM banks on! Write a bit * for each cache way in the bottom half of the L1CCFG register * and poll the top half for them to turn on. */ uint32_t dmask = BIT(ADSP_CxL1CCAP_DCMWC) - 1; uint32_t imask = BIT(ADSP_CxL1CCAP_ICMWC) - 1; uint32_t waymask = (imask << 8) | dmask; ADSP_CxL1CCFG_REG = waymask; while (((ADSP_CxL1CCFG_REG >> 16) & waymask) != waymask) { } /* Prefetcher also power gates, same interface */ ADSP_CxL1PCFG_REG = 1; while ((ADSP_CxL1PCFG_REG & 0x10000) == 0) { } #endif /* Now set up the Xtensa CPU to enable the cache logic. The * details of the fields are somewhat complicated, but per the * ISA ref: "Turning on caches at power-up usually consists of * writing a constant with bits[31:8] all 1’s to MEMCTL.". * Also set bit 0 to enable the LOOP extension instruction * fetch buffer. */ #if XCHAL_USE_MEMCTL reg = MEMCTL_VALUE; XTENSA_WSR("MEMCTL", reg); __asm__ volatile("rsync"); #endif #if XCHAL_HAVE_THREADPTR reg = 0; XTENSA_WUR("THREADPTR", reg); #endif /* Likewise enable prefetching. Sadly these values are not * architecturally defined by Xtensa (they're just documented * as priority hints), so this constant is just copied from * SOF for now. If we care about prefetch priority tuning * we're supposed to ask Cadence I guess. */ reg = ADSP_L1_CACHE_PREFCTL_VALUE; XTENSA_WSR("PREFCTL", reg); __asm__ volatile("rsync"); /* Finally we need to enable the cache in the Region * Protection Option "TLB" entries. The hardware defaults * have this set to RW/uncached everywhere. * * If we have MMU enabled, we don't need to do this right now. * Let use the default configuration and properly configure the * MMU when running from RAM. */ #if defined(CONFIG_INTEL_ADSP_MEMORY_IS_MIRRORED) && !defined(CONFIG_MMU) SET_RPO_TLB(); #endif /* CONFIG_INTEL_ADSP_MEMORY_IS_MIRRORED && !CONFIG_MMU */ /* Initialize ATOMCTL: Hardware defaults for S32C1I use * "internal" operations, meaning they are atomic only WRT the * local CPU! We need external transactions on the shared * bus. */ reg = ATOMCTL_VALUE; XTENSA_WSR("ATOMCTL", reg); /* Initialize interrupts to "disabled" */ reg = 0; XTENSA_WSR("INTENABLE", reg); /* Finally VECBASE. Note that on core 0 startup, we're still * running in IMR and the vectors at this address won't be * copied into HP-SRAM until later. That's OK, as interrupts * are still disabled at this stage and will remain so * consistently until Zephyr switches into the main thread. */ reg = VECBASE_RESET_PADDR_SRAM; XTENSA_WSR("VECBASE", reg); } #endif /* __INTEL_ADSP_CPU_INIT_H */