1#!/usr/bin/env python3 2# Copyright(c) 2022 Intel Corporation. All rights reserved. 3# SPDX-License-Identifier: Apache-2.0 4import os 5import sys 6import struct 7import logging 8import asyncio 9import time 10import subprocess 11import ctypes 12import mmap 13import argparse 14import pty 15 16start_output = True 17 18logging.basicConfig(level=logging.INFO) 19log = logging.getLogger("cavs-fw") 20 21PAGESZ = 4096 22HUGEPAGESZ = 2 * 1024 * 1024 23HUGEPAGE_FILE = "/dev/hugepages/cavs-fw-dma.tmp." 24 25# SRAM windows. Base and stride varies depending on ADSP version 26# 27# Window 0 is the FW_STATUS area, and 4k after that the IPC "outbox" 28# Window 1 is the IPC "inbox" (host-writable memory, just 384 bytes currently) 29# Window 2 is used for debug slots (Zephyr shell is one user) 30# Window 3 is winstream-formatted log output 31 32WINDOW_BASE = 0x80000 33WINDOW_STRIDE = 0x20000 34 35WINDOW_BASE_ACE = 0x180000 36WINDOW_STRIDE_ACE = 0x8000 37 38DEBUG_SLOT_SIZE = 4096 39DEBUG_SLOT_SHELL = 0 40SHELL_RX_SIZE = 256 41SHELL_MAX_VALID_SLOT_SIZE = 16777216 42 43# pylint: disable=duplicate-code 44 45# ADSPCS bits 46CRST = 0 47CSTALL = 8 48SPA = 16 49CPA = 24 50 51class HDAStream: 52 # creates an hda stream with at 2 buffers of buf_len 53 def __init__(self, stream_id: int): 54 self.stream_id = stream_id 55 self.base = hdamem + 0x0080 + (stream_id * 0x20) 56 log.info(f"Mapping registers for hda stream {self.stream_id} at base {self.base:x}") 57 58 self.hda = Regs(hdamem) 59 self.hda.GCAP = 0x0000 60 self.hda.GCTL = 0x0008 61 self.hda.DPLBASE = 0x0070 62 self.hda.DPUBASE = 0x0074 63 self.hda.SPBFCH = 0x0700 64 self.hda.SPBFCTL = 0x0704 65 self.hda.PPCH = 0x0800 66 self.hda.PPCTL = 0x0804 67 self.hda.PPSTS = 0x0808 68 self.hda.SPIB = 0x0708 + stream_id*0x08 69 self.hda.freeze() 70 71 self.regs = Regs(self.base) 72 self.regs.CTL = 0x00 73 self.regs.STS = 0x03 74 self.regs.LPIB = 0x04 75 self.regs.CBL = 0x08 76 self.regs.LVI = 0x0c 77 self.regs.FIFOW = 0x0e 78 self.regs.FIFOS = 0x10 79 self.regs.FMT = 0x12 80 self.regs.FIFOL= 0x14 81 self.regs.BDPL = 0x18 82 self.regs.BDPU = 0x1c 83 self.regs.freeze() 84 85 self.dbg0 = Regs(hdamem + 0x0084 + (0x20*stream_id)) 86 self.dbg0.DPIB = 0x00 87 self.dbg0.EFIFOS = 0x10 88 self.dbg0.freeze() 89 90 self.reset() 91 92 def __del__(self): 93 self.reset() 94 95 def config(self, buf_len: int): 96 log.info(f"Configuring stream {self.stream_id}") 97 self.buf_len = buf_len 98 log.info("Allocating huge page and setting up buffers") 99 self.mem, self.hugef, self.buf_list_addr, self.pos_buf_addr, self.n_bufs = self.setup_buf(buf_len) 100 101 log.info("Setting buffer list, length, and stream id and traffic priority bit") 102 self.regs.CTL = ((self.stream_id & 0xFF) << 20) | (1 << 18) # must be set to something other than 0? 103 self.regs.BDPU = (self.buf_list_addr >> 32) & 0xffffffff 104 self.regs.BDPL = self.buf_list_addr & 0xffffffff 105 self.regs.CBL = buf_len 106 self.regs.LVI = self.n_bufs - 1 107 self.mem.seek(0) 108 self.debug() 109 log.info(f"Configured stream {self.stream_id}") 110 111 def write(self, data): 112 113 bufl = min(len(data), self.buf_len) 114 log.info(f"Writing data to stream {self.stream_id}, len {bufl}, SPBFCTL {self.hda.SPBFCTL:x}, SPIB {self.hda.SPIB}") 115 self.mem[0:bufl] = data[0:bufl] 116 self.mem[bufl:bufl+bufl] = data[0:bufl] 117 self.hda.SPBFCTL |= (1 << self.stream_id) 118 self.hda.SPIB += bufl 119 log.info(f"Wrote data to stream {self.stream_id}, SPBFCTL {self.hda.SPBFCTL:x}, SPIB {self.hda.SPIB}") 120 121 def start(self): 122 log.info(f"Starting stream {self.stream_id}, CTL {self.regs.CTL:x}") 123 self.regs.CTL |= 2 124 log.info(f"Started stream {self.stream_id}, CTL {self.regs.CTL:x}") 125 126 def stop(self): 127 log.info(f"Stopping stream {self.stream_id}, CTL {self.regs.CTL:x}") 128 self.regs.CTL &= 2 129 time.sleep(0.1) 130 self.regs.CTL |= 1 131 log.info(f"Stopped stream {self.stream_id}, CTL {self.regs.CTL:x}") 132 133 def setup_buf(self, buf_len: int): 134 (mem, phys_addr, hugef) = map_phys_mem(self.stream_id) 135 136 log.info(f"Mapped 2M huge page at 0x{phys_addr:x} for buf size ({buf_len})") 137 138 # create two buffers in the page of buf_len and mark them 139 # in a buffer descriptor list for the hardware to use 140 buf0_len = buf_len 141 buf1_len = buf_len 142 bdl_off = buf0_len + buf1_len 143 # bdl is 2 (64bits, 16 bytes) per entry, we have two 144 mem[bdl_off:bdl_off + 32] = struct.pack("<QQQQ", 145 phys_addr, 146 buf0_len, 147 phys_addr + buf0_len, 148 buf1_len) 149 dpib_off = bdl_off+32 150 151 # ensure buffer is initialized, sanity 152 for i in range(0, buf_len*2): 153 mem[i] = 0 154 155 log.info("Filled the buffer descriptor list (BDL) for DMA.") 156 return (mem, hugef, phys_addr + bdl_off, phys_addr+dpib_off, 2) 157 158 def debug(self): 159 log.debug("HDA %d: PPROC %d, CTL 0x%x, LPIB 0x%x, BDPU 0x%x, BDPL 0x%x, CBL 0x%x, LVI 0x%x", 160 self.stream_id, (hda.PPCTL >> self.stream_id) & 1, self.regs.CTL, self.regs.LPIB, self.regs.BDPU, 161 self.regs.BDPL, self.regs.CBL, self.regs.LVI) 162 log.debug(" FIFOW %d, FIFOS %d, FMT %x, FIFOL %d, DPIB %d, EFIFOS %d", 163 self.regs.FIFOW & 0x7, self.regs.FIFOS, self.regs.FMT, self.regs.FIFOL, self.dbg0.DPIB, self.dbg0.EFIFOS) 164 log.debug(" status: FIFORDY %d, DESE %d, FIFOE %d, BCIS %d", 165 (self.regs.STS >> 5) & 1, (self.regs.STS >> 4) & 1, (self.regs.STS >> 3) & 1, (self.regs.STS >> 2) & 1) 166 167 def reset(self): 168 # Turn DMA off and reset the stream. Clearing START first is a 169 # noop per the spec, but absolutely required for stability. 170 # Apparently the reset doesn't stop the stream, and the next load 171 # starts before it's ready and kills the load (and often the DSP). 172 # The sleep too is required, on at least one board (a fast 173 # chromebook) putting the two writes next each other also hangs 174 # the DSP! 175 log.info(f"Resetting stream {self.stream_id}") 176 self.debug() 177 self.regs.CTL &= ~2 # clear START 178 time.sleep(0.1) 179 # set enter reset bit 180 self.regs.CTL = 1 181 while (self.regs.CTL & 1) == 0: pass 182 # clear enter reset bit to exit reset 183 self.regs.CTL = 0 184 while (self.regs.CTL & 1) == 1: pass 185 186 log.info(f"Disable SPIB and set position 0 of stream {self.stream_id}") 187 self.hda.SPBFCTL = 0 188 self.hda.SPIB = 0 189 190 #log.info("Setting dma position buffer and enable it") 191 #self.hda.DPUBASE = self.pos_buf_addr >> 32 & 0xffffffff 192 #self.hda.DPLBASE = self.pos_buf_addr & 0xfffffff0 | 1 193 194 log.info(f"Enabling dsp capture (PROCEN) of stream {self.stream_id}") 195 self.hda.PPCTL |= (1 << self.stream_id) 196 197 self.debug() 198 log.info(f"Reset stream {self.stream_id}") 199 200def adsp_is_ace(): 201 return ace15 or ace20 or ace30 202 203def adsp_mem_window_config(): 204 if adsp_is_ace(): 205 base = WINDOW_BASE_ACE 206 stride = WINDOW_STRIDE_ACE 207 else: 208 base = WINDOW_BASE 209 stride = WINDOW_STRIDE 210 211 return (base, stride) 212 213def map_regs(log_only): 214 try: 215 p = runx(f"grep -iEl 'PCI_CLASS=40(10|38)0' /sys/bus/pci/devices/*/uevent") 216 except subprocess.CalledProcessError: 217 # if no device found, also try 40300 class no-DSP devices 218 p = runx(f"grep -iEl 'PCI_CLASS=40300' /sys/bus/pci/devices/*/uevent") 219 pcidir = os.path.dirname(p) 220 221 # Platform/quirk detection. ID lists cribbed from the SOF kernel driver 222 global cavs25, ace15, ace20, ace30 223 did = int(open(f"{pcidir}/device").read().rstrip(), 16) 224 cavs25 = did in [ 0x43c8, 0x4b55, 0x4b58, 0x51c8, 0x51ca, 0x51cb, 0x51ce, 0x51cf, 0x54c8, 225 0x7ad0, 0xa0c8 ] 226 ace15 = did in [ 0x7728, 0x7f50, 0x7e28 ] 227 ace20 = did in [ 0xa828 ] 228 ace30 = did in [ 0xe428 ] 229 230 # Check sysfs for a loaded driver and remove it 231 if os.path.exists(f"{pcidir}/driver"): 232 mod = os.path.basename(os.readlink(f"{pcidir}/driver/module")) 233 found_msg = f"Existing driver \"{mod}\" found" 234 if log_only: 235 log.info(found_msg) 236 else: 237 log.warning(found_msg + ", unloading module") 238 runx(f"rmmod -f {mod}") 239 # Disengage runtime power management so the kernel doesn't put it to sleep 240 log.info(f"Forcing {pcidir}/power/control to always 'on'") 241 with open(f"{pcidir}/power/control", "w") as ctrl: 242 ctrl.write("on") 243 244 # Make sure PCI memory space access and busmastering are enabled. 245 # Also disable interrupts so as not to confuse the kernel. 246 with open(f"{pcidir}/config", "wb+") as cfg: 247 cfg.seek(4) 248 cfg.write(b'\x06\x04') 249 250 # Standard HD Audio Registers 251 global hdamem 252 (hdamem, _) = bar_map(pcidir, 0) 253 hda = Regs(hdamem) 254 hda.GCAP = 0x0000 255 hda.GCTL = 0x0008 256 hda.SPBFCTL = 0x0704 257 hda.PPCTL = 0x0804 258 259 # Find the ID of the first output stream 260 hda_ostream_id = (hda.GCAP >> 8) & 0x0f # number of input streams 261 log.info(f"Selected output stream {hda_ostream_id} (GCAP = 0x{hda.GCAP:x})") 262 hda.SD_SPIB = 0x0708 + (8 * hda_ostream_id) 263 hda.freeze() 264 265 266 # Standard HD Audio Stream Descriptor 267 sd = Regs(hdamem + 0x0080 + (hda_ostream_id * 0x20)) 268 sd.CTL = 0x00 269 sd.CBL = 0x08 270 sd.LVI = 0x0c 271 sd.BDPL = 0x18 272 sd.BDPU = 0x1c 273 sd.freeze() 274 275 # Intel Audio DSP Registers 276 global bar4_mmap 277 global bar4_mem 278 (bar4_mem, bar4_mmap) = bar_map(pcidir, 4) 279 dsp = Regs(bar4_mem) 280 if adsp_is_ace(): 281 dsp.HFDSSCS = 0x1000 282 dsp.HFPWRCTL = 0x1d18 if ace15 or ace20 else 0x1d20 283 dsp.HFPWRSTS = 0x1d1c if ace15 or ace20 else 0x1d24 284 dsp.DSP2CXCTL_PRIMARY = 0x178d04 285 dsp.HFIPCXTDR = 0x73200 286 dsp.HFIPCXTDA = 0x73204 287 dsp.HFIPCXIDR = 0x73210 288 dsp.HFIPCXIDA = 0x73214 289 dsp.HFIPCXCTL = 0x73228 290 dsp.HFIPCXTDDY = 0x73300 291 dsp.HFIPCXIDDY = 0x73380 292 dsp.ROM_STATUS = 0x163200 if ace15 else 0x160200 293 dsp.SRAM_FW_STATUS = WINDOW_BASE_ACE 294 else: 295 dsp.ADSPCS = 0x00004 296 dsp.HIPCTDR = 0x000c0 297 dsp.HIPCTDA = 0x000c4 298 dsp.HIPCTDD = 0x000c8 299 dsp.HIPCIDR = 0x000d0 300 dsp.HIPCIDA = 0x000d4 301 dsp.HIPCIDD = 0x000d8 302 dsp.ROM_STATUS = WINDOW_BASE # Start of first SRAM window 303 dsp.SRAM_FW_STATUS = WINDOW_BASE 304 dsp.freeze() 305 306 return (hda, sd, dsp, hda_ostream_id) 307 308def setup_dma_mem(fw_bytes): 309 (mem, phys_addr, _) = map_phys_mem(hda_ostream_id) 310 mem[0:len(fw_bytes)] = fw_bytes 311 312 log.info("Mapped 2M huge page at 0x%x to contain %d bytes of firmware" 313 % (phys_addr, len(fw_bytes))) 314 315 # HDA requires at least two buffers be defined, but we don't care about 316 # boundaries because it's all a contiguous region. Place a vestigial 317 # 128-byte (minimum size and alignment) buffer after the main one, and put 318 # the 4-entry BDL list into the final 128 bytes of the page. 319 buf0_len = HUGEPAGESZ - 2 * 128 320 buf1_len = 128 321 bdl_off = buf0_len + buf1_len 322 mem[bdl_off:bdl_off + 32] = struct.pack("<QQQQ", 323 phys_addr, buf0_len, 324 phys_addr + buf0_len, buf1_len) 325 log.info("Filled the buffer descriptor list (BDL) for DMA.") 326 return (phys_addr + bdl_off, 2) 327 328global_mmaps = [] # protect mmap mappings from garbage collection! 329 330# Maps 2M of contiguous memory using a single page from hugetlbfs, 331# then locates its physical address for use as a DMA buffer. 332def map_phys_mem(stream_id): 333 # Make sure hugetlbfs is mounted (not there on chromeos) 334 os.system("mount | grep -q hugetlbfs ||" 335 + " (mkdir -p /dev/hugepages; " 336 + " mount -t hugetlbfs hugetlbfs /dev/hugepages)") 337 338 # Ensure the kernel has enough budget for one new page 339 free = int(runx("awk '/HugePages_Free/ {print $2}' /proc/meminfo")) 340 if free == 0: 341 tot = 1 + int(runx("awk '/HugePages_Total/ {print $2}' /proc/meminfo")) 342 os.system(f"echo {tot} > /proc/sys/vm/nr_hugepages") 343 344 hugef_name = HUGEPAGE_FILE + str(stream_id) 345 hugef = open(hugef_name, "w+") 346 hugef.truncate(HUGEPAGESZ) 347 mem = mmap.mmap(hugef.fileno(), HUGEPAGESZ) 348 log.info("type of mem is %s", str(type(mem))) 349 global_mmaps.append(mem) 350 os.unlink(hugef_name) 351 352 # Find the local process address of the mapping, then use that to extract 353 # the physical address from the kernel's pagemap interface. The physical 354 # page frame number occupies the bottom bits of the entry. 355 mem[0] = 0 # Fault the page in so it has an address! 356 vaddr = ctypes.addressof(ctypes.c_int.from_buffer(mem)) 357 vpagenum = vaddr >> 12 358 pagemap = open("/proc/self/pagemap", "rb") 359 pagemap.seek(vpagenum * 8) 360 pent = pagemap.read(8) 361 paddr = (struct.unpack("Q", pent)[0] & ((1 << 55) - 1)) * PAGESZ 362 pagemap.close() 363 return (mem, paddr, hugef) 364 365# Maps a PCI BAR and returns the in-process address 366def bar_map(pcidir, barnum): 367 f = open(pcidir + "/resource" + str(barnum), "r+") 368 mm = mmap.mmap(f.fileno(), os.fstat(f.fileno()).st_size) 369 global_mmaps.append(mm) 370 log.info("Mapped PCI bar %d of length %d bytes." 371 % (barnum, os.fstat(f.fileno()).st_size)) 372 return (ctypes.addressof(ctypes.c_int.from_buffer(mm)), mm) 373 374# Syntactic sugar to make register block definition & use look nice. 375# Instantiate from a base address, assign offsets to (uint32) named registers as 376# fields, call freeze(), then the field acts as a direct alias for the register! 377class Regs: 378 def __init__(self, base_addr): 379 vars(self)["base_addr"] = base_addr 380 vars(self)["ptrs"] = {} 381 vars(self)["frozen"] = False 382 def freeze(self): 383 vars(self)["frozen"] = True 384 def __setattr__(self, name, val): 385 if not self.frozen and name not in self.ptrs: 386 addr = self.base_addr + val 387 self.ptrs[name] = ctypes.c_uint32.from_address(addr) 388 else: 389 self.ptrs[name].value = val 390 def __getattr__(self, name): 391 return self.ptrs[name].value 392 393def runx(cmd): 394 return subprocess.check_output(cmd, shell=True).decode().rstrip() 395 396def mask(bit): 397 if cavs25: 398 return 0b1 << bit 399 400def load_firmware(fw_file): 401 try: 402 fw_bytes = open(fw_file, "rb").read() 403 except Exception as e: 404 log.error(f"Could not read firmware file: `{fw_file}'") 405 log.error(e) 406 sys.exit(1) 407 408 (magic, sz) = struct.unpack("4sI", fw_bytes[0:8]) 409 if magic == b'XMan': 410 log.info(f"Trimming {sz} bytes of extended manifest") 411 fw_bytes = fw_bytes[sz:len(fw_bytes)] 412 413 # This actually means "enable access to BAR4 registers"! 414 hda.PPCTL |= (1 << 30) # GPROCEN, "global processing enable" 415 416 log.info("Resetting HDA device") 417 hda.GCTL = 0 418 while hda.GCTL & 1: pass 419 hda.GCTL = 1 420 while not hda.GCTL & 1: pass 421 422 log.info(f"Stalling and Resetting DSP cores, ADSPCS = 0x{dsp.ADSPCS:x}") 423 dsp.ADSPCS |= mask(CSTALL) 424 dsp.ADSPCS |= mask(CRST) 425 while (dsp.ADSPCS & mask(CRST)) == 0: pass 426 427 log.info(f"Powering down DSP cores, ADSPCS = 0x{dsp.ADSPCS:x}") 428 dsp.ADSPCS &= ~mask(SPA) 429 while dsp.ADSPCS & mask(CPA): pass 430 431 log.info(f"Configuring HDA stream {hda_ostream_id} to transfer firmware image") 432 (buf_list_addr, num_bufs) = setup_dma_mem(fw_bytes) 433 sd.CTL = 1 434 while (sd.CTL & 1) == 0: pass 435 sd.CTL = 0 436 while (sd.CTL & 1) == 1: pass 437 sd.CTL = 1 << 20 # Set stream ID to anything non-zero 438 sd.BDPU = (buf_list_addr >> 32) & 0xffffffff 439 sd.BDPL = buf_list_addr & 0xffffffff 440 sd.CBL = len(fw_bytes) 441 sd.LVI = num_bufs - 1 442 hda.PPCTL |= (1 << hda_ostream_id) 443 444 # SPIB ("Software Position In Buffer") is an Intel HDA extension 445 # that puts a transfer boundary into the stream beyond which the 446 # other side will not read. The ROM wants to poll on a "buffer 447 # full" bit on the other side that only works with this enabled. 448 hda.SPBFCTL |= (1 << hda_ostream_id) 449 hda.SD_SPIB = len(fw_bytes) 450 451 # Start DSP. Only start up core 0, reset is managed by DSP. 452 log.info(f"Starting DSP, ADSPCS = 0x{dsp.ADSPCS:x}") 453 dsp.ADSPCS = mask(SPA) 454 while (dsp.ADSPCS & mask(CPA)) == 0: pass 455 456 log.info(f"Unresetting DSP cores, ADSPCS = 0x{dsp.ADSPCS:x}") 457 dsp.ADSPCS &= ~mask(CRST) 458 while (dsp.ADSPCS & 1) != 0: pass 459 460 log.info(f"Running DSP cores, ADSPCS = 0x{dsp.ADSPCS:x}") 461 dsp.ADSPCS &= ~mask(CSTALL) 462 463 # Wait for the ROM to boot and signal it's ready. This not so short 464 # sleep seems to be needed; if we're banging on the memory window 465 # during initial boot (before/while the window control registers 466 # are configured?) the DSP hardware will hang fairly reliably. 467 log.info(f"Wait for ROM startup, ADSPCS = 0x{dsp.ADSPCS:x}") 468 time.sleep(1) 469 while (dsp.SRAM_FW_STATUS >> 24) != 5: pass 470 471 # Send the DSP an IPC message to tell the device how to boot. 472 # Note: with cAVS 1.8+ the ROM receives the stream argument as an 473 # index within the array of output streams (and we always use the 474 # first one by construction). 475 stream_idx = 0 476 ipcval = ( (1 << 31) # BUSY bit 477 | (0x01 << 24) # type = PURGE_FW 478 | (1 << 14) # purge_fw = 1 479 | (stream_idx << 9)) # dma_id 480 log.info(f"Sending IPC command, HIPIDR = 0x{ipcval:x}") 481 dsp.HIPCIDR = ipcval 482 483 log.info(f"Starting DMA, FW_STATUS = 0x{dsp.SRAM_FW_STATUS:x}") 484 sd.CTL |= 2 # START flag 485 486 wait_fw_entered(dsp, timeout_s=None) 487 488 # Turn DMA off and reset the stream. Clearing START first is a 489 # noop per the spec, but absolutely required for stability. 490 # Apparently the reset doesn't stop the stream, and the next load 491 # starts before it's ready and kills the load (and often the DSP). 492 # The sleep too is required, on at least one board (a fast 493 # chromebook) putting the two writes next each other also hangs 494 # the DSP! 495 sd.CTL &= ~2 # clear START 496 time.sleep(0.1) 497 sd.CTL |= 1 498 log.info(f"cAVS firmware load complete") 499 500def load_firmware_ace(fw_file): 501 try: 502 fw_bytes = open(fw_file, "rb").read() 503 # Resize fw_bytes for MTL 504 if len(fw_bytes) < 512 * 1024: 505 fw_bytes += b'\x00' * (512 * 1024 - len(fw_bytes)) 506 except Exception as e: 507 log.error(f"Could not read firmware file: `{fw_file}'") 508 log.error(e) 509 sys.exit(1) 510 511 (magic, sz) = struct.unpack("4sI", fw_bytes[0:8]) 512 if magic == b'$AE1': 513 log.info(f"Trimming {sz} bytes of extended manifest") 514 fw_bytes = fw_bytes[sz:len(fw_bytes)] 515 516 # This actually means "enable access to BAR4 registers"! 517 hda.PPCTL |= (1 << 30) # GPROCEN, "global processing enable" 518 519 log.info("Resetting HDA device") 520 hda.GCTL = 0 521 while hda.GCTL & 1: pass 522 hda.GCTL = 1 523 while not hda.GCTL & 1: pass 524 525 log.info("Turning of DSP subsystem") 526 dsp.HFDSSCS &= ~(1 << 16) # clear SPA bit 527 time.sleep(0.002) 528 # wait for CPA bit clear 529 while dsp.HFDSSCS & (1 << 24): 530 log.info("Waiting for DSP subsystem power off") 531 time.sleep(0.1) 532 533 log.info("Turning on DSP subsystem") 534 dsp.HFDSSCS |= (1 << 16) # set SPA bit 535 time.sleep(0.002) # needed as the CPA bit may be unstable 536 # wait for CPA bit 537 while not dsp.HFDSSCS & (1 << 24): 538 log.info("Waiting for DSP subsystem power on") 539 time.sleep(0.1) 540 541 log.info("Turning on Domain0") 542 dsp.HFPWRCTL |= 0x1 # set SPA bit 543 time.sleep(0.002) # needed as the CPA bit may be unstable 544 # wait for CPA bit 545 while not dsp.HFPWRSTS & 0x1: 546 log.info("Waiting for DSP domain0 power on") 547 time.sleep(0.1) 548 549 log.info("Turning off Primary Core") 550 dsp.DSP2CXCTL_PRIMARY &= ~(0x1) # clear SPA 551 time.sleep(0.002) # wait for CPA settlement 552 while dsp.DSP2CXCTL_PRIMARY & (1 << 8): 553 log.info("Waiting for DSP primary core power off") 554 time.sleep(0.1) 555 556 log.info(f"Configuring HDA stream {hda_ostream_id} to transfer firmware image") 557 (buf_list_addr, num_bufs) = setup_dma_mem(fw_bytes) 558 sd.CTL = 1 559 while (sd.CTL & 1) == 0: pass 560 sd.CTL = 0 561 while (sd.CTL & 1) == 1: pass 562 sd.CTL |= (1 << 20) # Set stream ID to anything non-zero 563 sd.BDPU = (buf_list_addr >> 32) & 0xffffffff 564 sd.BDPL = buf_list_addr & 0xffffffff 565 sd.CBL = len(fw_bytes) 566 sd.LVI = num_bufs - 1 567 hda.PPCTL |= (1 << hda_ostream_id) 568 569 # SPIB ("Software Position In Buffer") is an Intel HDA extension 570 # that puts a transfer boundary into the stream beyond which the 571 # other side will not read. The ROM wants to poll on a "buffer 572 # full" bit on the other side that only works with this enabled. 573 hda.SPBFCTL |= (1 << hda_ostream_id) 574 hda.SD_SPIB = len(fw_bytes) 575 576 577 # Send the DSP an IPC message to tell the device how to boot. 578 # Note: with cAVS 1.8+ the ROM receives the stream argument as an 579 # index within the array of output streams (and we always use the 580 # first one by construction). 581 stream_idx = 0 582 ipcval = ( (1 << 31) # BUSY bit 583 | (0x01 << 24) # type = PURGE_FW 584 | (1 << 14) # purge_fw = 1 585 | (stream_idx << 9)) # dma_id 586 log.info(f"Sending IPC command, HFIPCXIDR = 0x{ipcval:x}") 587 dsp.HFIPCXIDR = ipcval 588 589 log.info("Turning on Primary Core") 590 dsp.DSP2CXCTL_PRIMARY |= 0x1 # clear SPA 591 time.sleep(0.002) # wait for CPA settlement 592 while not dsp.DSP2CXCTL_PRIMARY & (1 << 8): 593 log.info("Waiting for DSP primary core power on") 594 time.sleep(0.1) 595 596 log.info("Waiting for IPC acceptance") 597 while dsp.HFIPCXIDR & (1 << 31): 598 log.info("Waiting for IPC busy bit clear") 599 time.sleep(0.1) 600 601 log.info("ACK IPC") 602 dsp.HFIPCXIDA |= (1 << 31) 603 604 log.info(f"Starting DMA, FW_STATUS = 0x{dsp.ROM_STATUS:x}") 605 sd.CTL |= 2 # START flag 606 607 wait_fw_entered(dsp, timeout_s=None) 608 609 # Turn DMA off and reset the stream. Clearing START first is a 610 # noop per the spec, but absolutely required for stability. 611 # Apparently the reset doesn't stop the stream, and the next load 612 # starts before it's ready and kills the load (and often the DSP). 613 # The sleep too is required, on at least one board (a fast 614 # chromebook) putting the two writes next each other also hangs 615 # the DSP! 616 sd.CTL &= ~2 # clear START 617 time.sleep(0.1) 618 sd.CTL |= 1 619 log.info(f"ACE firmware load complete") 620 621def fw_is_alive(dsp): 622 return dsp.ROM_STATUS & ((1 << 28) - 1) == 5 # "FW_ENTERED" 623 624def wait_fw_entered(dsp, timeout_s): 625 log.info("Waiting %s for firmware handoff, ROM_STATUS = 0x%x", 626 "forever" if timeout_s is None else f"{timeout_s} seconds", 627 dsp.ROM_STATUS) 628 hertz = 100 629 attempts = None if timeout_s is None else timeout_s * hertz 630 while True: 631 alive = fw_is_alive(dsp) 632 if alive: 633 break 634 if attempts is not None: 635 attempts -= 1 636 if attempts < 0: 637 break 638 time.sleep(1 / hertz) 639 640 if not alive: 641 log.warning("Load failed? ROM_STATUS = 0x%x", dsp.ROM_STATUS) 642 else: 643 log.info("FW alive, ROM_STATUS = 0x%x", dsp.ROM_STATUS) 644 645def winstream_offset(): 646 ( base, stride ) = adsp_mem_window_config() 647 return base + stride * 3 648 649# This SHOULD be just "mem[start:start+length]", but slicing an mmap 650# array seems to be unreliable on one of my machines (python 3.6.9 on 651# Ubuntu 18.04). Read out bytes individually. 652def win_read(base, start, length): 653 try: 654 return b''.join(bar4_mmap[base + x].to_bytes(1, 'little') 655 for x in range(start, start + length)) 656 except IndexError as ie: 657 # A FW in a bad state may cause winstream garbage 658 log.error("IndexError in bar4_mmap[%d + %d]", base, start) 659 log.error("bar4_mmap.size()=%d", bar4_mmap.size()) 660 raise ie 661 662def winstream_reg_hdr(base): 663 hdr = Regs(bar4_mem + base) 664 hdr.WLEN = 0x00 665 hdr.START = 0x04 666 hdr.END = 0x08 667 hdr.SEQ = 0x0c 668 hdr.freeze() 669 return hdr 670 671def win_hdr(hdr): 672 return ( hdr.WLEN, hdr.START, hdr.END, hdr.SEQ ) 673 674# Python implementation of the same algorithm in sys_winstream_read(), 675# see there for details. 676def winstream_read(base, last_seq): 677 while True: 678 hdr = winstream_reg_hdr(base) 679 (wlen, start, end, seq) = win_hdr(hdr) 680 if wlen > SHELL_MAX_VALID_SLOT_SIZE: 681 log.debug("DSP powered off at winstream_read") 682 return (seq, "") 683 if wlen == 0: 684 return (seq, "") 685 if last_seq == 0: 686 last_seq = seq if args.no_history else (seq - ((end - start) % wlen)) 687 if seq == last_seq or start == end: 688 return (seq, "") 689 behind = seq - last_seq 690 if behind > ((end - start) % wlen): 691 return (seq, "") 692 copy = (end - behind) % wlen 693 suffix = min(behind, wlen - copy) 694 result = win_read(base, 16 + copy, suffix) 695 if suffix < behind: 696 result += win_read(base, 16, behind - suffix) 697 (wlen, start1, end, seq1) = win_hdr(hdr) 698 if start1 == start and seq1 == seq: 699 # Best effort attempt at decoding, replacing unusable characters 700 # Found to be useful when it really goes wrong 701 return (seq, result.decode("utf-8", "replace")) 702 703def idx_mod(wlen, idx): 704 if idx >= wlen: 705 return idx - wlen 706 return idx 707 708def idx_sub(wlen, a, b): 709 return idx_mod(wlen, a + (wlen - b)) 710 711# Python implementation of the same algorithm in sys_winstream_write(), 712# see there for details. 713def winstream_write(base, msg): 714 hdr = winstream_reg_hdr(base) 715 (wlen, start, end, seq) = win_hdr(hdr) 716 if wlen > SHELL_MAX_VALID_SLOT_SIZE: 717 log.debug("DSP powered off at winstream_write") 718 return 719 if wlen == 0: 720 return 721 lenmsg = len(msg) 722 lenmsg0 = lenmsg 723 if len(msg) > wlen + 1: 724 start = end 725 lenmsg = wlen - 1 726 lenmsg = min(lenmsg, wlen) 727 if seq != 0: 728 avail = (wlen - 1) - idx_sub(wlen, end, start) 729 if lenmsg > avail: 730 hdr.START = idx_mod(wlen, start + (lenmsg - avail)) 731 if lenmsg < lenmsg0: 732 hdr.START = end 733 drop = lenmsg0 - lenmsg 734 msg = msg[drop : lenmsg - drop] 735 suffix = min(lenmsg, wlen - end) 736 for c in range(0, suffix): 737 bar4_mmap[base + 16 + end + c] = msg[c] 738 if lenmsg > suffix: 739 for c in range(0, lenmsg - suffix): 740 bar4_mmap[base + 16 + c] = msg[suffix + c] 741 hdr.END = idx_mod(wlen, end + lenmsg) 742 hdr.SEQ += lenmsg0 743 744def debug_offset(): 745 ( base, stride ) = adsp_mem_window_config() 746 return base + stride * 2 747 748def debug_slot_offset(num): 749 return debug_offset() + DEBUG_SLOT_SIZE * (1 + num) 750 751def debug_slot_offset_by_type(the_type, timeout_s=0.2): 752 ADSP_DW_SLOT_COUNT=15 753 hertz = 100 754 attempts = timeout_s * hertz 755 while attempts > 0: 756 data = win_read(debug_offset(), 0, ADSP_DW_SLOT_COUNT * 3 * 4) 757 for i in range(ADSP_DW_SLOT_COUNT): 758 start_index = i * (3 * 4) 759 end_index = (i + 1) * (3 * 4) 760 desc = data[start_index:end_index] 761 resource_id, type_id, vma = struct.unpack('<III', desc) 762 if type_id == the_type: 763 log.info("found desc %u resource_id 0x%08x type_id 0x%08x vma 0x%08x", 764 i, resource_id, type_id, vma) 765 return debug_slot_offset(i) 766 log.debug("not found, %u attempts left", attempts) 767 attempts -= 1 768 time.sleep(1 / hertz) 769 return None 770 771def shell_base_offset(): 772 return debug_offset() + DEBUG_SLOT_SIZE * (1 + DEBUG_SLOT_SHELL) 773 774def read_from_shell_memwindow_winstream(last_seq): 775 offset = shell_base_offset() + SHELL_RX_SIZE 776 (last_seq, output) = winstream_read(offset, last_seq) 777 if output: 778 os.write(shell_client_port, output.encode("utf-8")) 779 return last_seq 780 781def write_to_shell_memwindow_winstream(): 782 msg = os.read(shell_client_port, 1) 783 if len(msg) > 0: 784 winstream_write(shell_base_offset(), msg) 785 786def create_shell_pty(): 787 global shell_client_port 788 (shell_client_port, user_port) = pty.openpty() 789 name = os.ttyname(user_port) 790 log.info(f"shell PTY at: {name}") 791 asyncio.get_event_loop().add_reader(shell_client_port, write_to_shell_memwindow_winstream) 792 793async def ipc_delay_done(): 794 await asyncio.sleep(0.1) 795 if adsp_is_ace(): 796 dsp.HFIPCXTDA = ~(1<<31) & dsp.HFIPCXTDA # Signal done 797 else: 798 dsp.HIPCTDA = 1<<31 799 800def inbox_offset(): 801 ( base, stride ) = adsp_mem_window_config() 802 return base + stride 803 804def outbox_offset(): 805 ( base, _ ) = adsp_mem_window_config() 806 return base + 4096 807 808ipc_timestamp = 0 809 810# Super-simple command language, driven by the test code on the DSP 811def ipc_command(data, ext_data): 812 send_msg = False 813 done = True 814 log.debug ("ipc data %d, ext_data %x", data, ext_data) 815 if data == 0: # noop, with synchronous DONE 816 pass 817 elif data == 1: # async command: signal DONE after a delay (on 1.8+) 818 done = False 819 asyncio.ensure_future(ipc_delay_done()) 820 elif data == 2: # echo back ext_data as a message command 821 send_msg = True 822 elif data == 3: # set ADSPCS 823 dsp.ADSPCS = ext_data 824 elif data == 4: # echo back microseconds since last timestamp command 825 global ipc_timestamp 826 t = round(time.time() * 1e6) 827 ext_data = t - ipc_timestamp 828 ipc_timestamp = t 829 send_msg = True 830 elif data == 5: # copy word at outbox[ext_data >> 16] to inbox[ext_data & 0xffff] 831 src = outbox_offset() + 4 * (ext_data >> 16) 832 dst = inbox_offset() + 4 * (ext_data & 0xffff) 833 for i in range(4): 834 bar4_mmap[dst + i] = bar4_mmap[src + i] 835 elif data == 6: # HDA RESET (init if not exists) 836 stream_id = ext_data & 0xff 837 if stream_id in hda_streams: 838 hda_streams[stream_id].reset() 839 else: 840 hda_str = HDAStream(stream_id) 841 hda_streams[stream_id] = hda_str 842 elif data == 7: # HDA CONFIG 843 stream_id = ext_data & 0xFF 844 buf_len = ext_data >> 8 & 0xFFFF 845 hda_str = hda_streams[stream_id] 846 hda_str.config(buf_len) 847 elif data == 8: # HDA START 848 stream_id = ext_data & 0xFF 849 hda_streams[stream_id].start() 850 hda_streams[stream_id].mem.seek(0) 851 852 elif data == 9: # HDA STOP 853 stream_id = ext_data & 0xFF 854 hda_streams[stream_id].stop() 855 elif data == 10: # HDA VALIDATE 856 stream_id = ext_data & 0xFF 857 hda_str = hda_streams[stream_id] 858 hda_str.debug() 859 is_ramp_data = True 860 hda_str.mem.seek(0) 861 for (i, val) in enumerate(hda_str.mem.read(256)): 862 if i != val: 863 is_ramp_data = False 864 # log.info("stream[%d][%d]: %d", stream_id, i, val) # debug helper 865 log.info("Is ramp data? " + str(is_ramp_data)) 866 ext_data = int(is_ramp_data) 867 log.info(f"Ext data to send back on ramp status {ext_data}") 868 send_msg = True 869 elif data == 11: # HDA HOST OUT SEND 870 stream_id = ext_data & 0xff 871 buf = bytearray(256) 872 for i in range(0, 256): 873 buf[i] = i 874 hda_streams[stream_id].write(buf) 875 elif data == 12: # HDA PRINT 876 stream_id = ext_data & 0xFF 877 buf_len = ext_data >> 8 & 0xFFFF 878 hda_str = hda_streams[stream_id] 879 # check for wrap here 880 pos = hda_str.mem.tell() 881 read_lens = [buf_len, 0] 882 if pos + buf_len >= hda_str.buf_len*2: 883 read_lens[0] = hda_str.buf_len*2 - pos 884 read_lens[1] = buf_len - read_lens[0] 885 # validate the read lens 886 assert (read_lens[0] + pos) <= (hda_str.buf_len*2) 887 assert read_lens[0] % 128 == 0 888 assert read_lens[1] % 128 == 0 889 buf_data0 = hda_str.mem.read(read_lens[0]) 890 hda_msg0 = buf_data0.decode("utf-8", "replace") 891 sys.stdout.write(hda_msg0) 892 if read_lens[1] != 0: 893 hda_str.mem.seek(0) 894 buf_data1 = hda_str.mem.read(read_lens[1]) 895 hda_msg1 = buf_data1.decode("utf-8", "replace") 896 sys.stdout.write(hda_msg1) 897 pos = hda_str.mem.tell() 898 sys.stdout.flush() 899 else: 900 log.warning(f"cavstool: Unrecognized IPC command 0x{data:x} ext 0x{ext_data:x}") 901 if not fw_is_alive(dsp): 902 if args.log_only: 903 log.info("DSP power seems off") 904 wait_fw_entered(dsp, timeout_s=None) 905 else: 906 log.warning("DSP power seems off?!") 907 time.sleep(2) # potential spam reduction 908 909 return 910 911 if adsp_is_ace(): 912 dsp.HFIPCXTDR = 1<<31 # Ack local interrupt 913 if done: 914 dsp.HFIPCXTDA = ~(1<<31) & dsp.HFIPCXTDA # Signal done 915 if send_msg: 916 log.debug("ipc: sending msg 0x%08x" % ext_data) 917 dsp.HFIPCXIDDY = ext_data 918 dsp.HFIPCXIDR = (1<<31) | ext_data 919 else: 920 dsp.HIPCTDR = 1<<31 # Ack local interrupt 921 if done: 922 dsp.HIPCTDA = 1<<31 # Signal done 923 if send_msg: 924 dsp.HIPCIDD = ext_data 925 dsp.HIPCIDR = (1<<31) | ext_data 926 927def handle_ipc(): 928 if adsp_is_ace(): 929 if dsp.HFIPCXIDA & 0x80000000: 930 log.debug("ipc: Ack DSP reply with IDA_DONE") 931 dsp.HFIPCXIDA = 1<<31 # must ACK any DONE interrupts that arrive! 932 if dsp.HFIPCXTDR & 0x80000000: 933 ipc_command(dsp.HFIPCXTDR & ~0x80000000, dsp.HFIPCXTDDY) 934 return 935 936 if dsp.HIPCIDA & 0x80000000: 937 dsp.HIPCIDA = 1<<31 # must ACK any DONE interrupts that arrive! 938 if dsp.HIPCTDR & 0x80000000: 939 ipc_command(dsp.HIPCTDR & ~0x80000000, dsp.HIPCTDD) 940 941async def main(): 942 #TODO this bit me, remove the globals, write a little FirmwareLoader class or something to contain. 943 global hda, sd, dsp, hda_ostream_id, hda_streams 944 945 try: 946 (hda, sd, dsp, hda_ostream_id) = map_regs(args.log_only) 947 except Exception as e: 948 log.error("Could not map device in sysfs; run as root?") 949 log.error(e) 950 sys.exit(1) 951 952 log.info(f"Detected a supported cAVS/ACE hardware version") 953 954 if args.log_only: 955 wait_fw_entered(dsp, timeout_s=None) 956 else: 957 if not args.fw_file: 958 log.error("Firmware file argument missing") 959 sys.exit(1) 960 961 if adsp_is_ace(): 962 load_firmware_ace(args.fw_file) 963 else: 964 load_firmware(args.fw_file) 965 time.sleep(0.1) 966 967 if not args.quiet: 968 sys.stdout.write("--\n") 969 970 if args.shell_pty: 971 create_shell_pty() 972 973 hda_streams = dict() 974 975 last_seq = 0 976 last_seq_shell = 0 977 while start_output is True: 978 await asyncio.sleep(0.03) 979 if args.shell_pty: 980 last_seq_shell = read_from_shell_memwindow_winstream(last_seq_shell) 981 (last_seq, output) = winstream_read(winstream_offset(), last_seq) 982 if output: 983 sys.stdout.write(output) 984 sys.stdout.flush() 985 if not args.log_only: 986 handle_ipc() 987 988def args_parse(): 989 global args 990 ap = argparse.ArgumentParser(description="DSP loader/logger tool", allow_abbrev=False) 991 ap.add_argument("-q", "--quiet", action="store_true", 992 help="No loader output, just DSP logging") 993 ap.add_argument("-v", "--verbose", action="store_true", 994 help="More loader output, DEBUG logging level") 995 ap.add_argument("-l", "--log-only", action="store_true", 996 help="Don't load firmware, just show log output") 997 ap.add_argument("-p", "--shell-pty", action="store_true", 998 help="Create a Zephyr shell pty if enabled in firmware") 999 ap.add_argument("-n", "--no-history", action="store_true", 1000 help="No current log buffer at start, just new output") 1001 ap.add_argument("fw_file", nargs="?", help="Firmware file") 1002 1003 args = ap.parse_args() 1004 1005 if args.quiet: 1006 log.setLevel(logging.WARN) 1007 elif args.verbose: 1008 log.setLevel(logging.DEBUG) 1009 1010if __name__ == "__main__": 1011 args_parse() 1012 try: 1013 asyncio.run(main()) 1014 except KeyboardInterrupt: 1015 start_output = False 1016