1#!/usr/bin/env python3 2# Copyright(c) 2022 Intel Corporation. All rights reserved. 3# SPDX-License-Identifier: Apache-2.0 4import os 5import sys 6import struct 7import logging 8import asyncio 9import time 10import subprocess 11import ctypes 12import mmap 13import argparse 14import pty 15 16start_output = True 17 18logging.basicConfig(level=logging.INFO) 19log = logging.getLogger("cavs-fw") 20 21PAGESZ = 4096 22HUGEPAGESZ = 2 * 1024 * 1024 23HUGEPAGE_FILE = "/dev/hugepages/cavs-fw-dma.tmp." 24 25# SRAM windows. Base and stride varies depending on ADSP version 26# 27# Window 0 is the FW_STATUS area, and 4k after that the IPC "outbox" 28# Window 1 is the IPC "inbox" (host-writable memory, just 384 bytes currently) 29# Window 2 is used for debug slots (Zephyr shell is one user) 30# Window 3 is winstream-formatted log output 31 32WINDOW_BASE = 0x80000 33WINDOW_STRIDE = 0x20000 34 35WINDOW_BASE_ACE = 0x180000 36WINDOW_STRIDE_ACE = 0x8000 37 38DEBUG_SLOT_SIZE = 4096 39DEBUG_SLOT_SHELL = 0 40SHELL_RX_SIZE = 256 41SHELL_MAX_VALID_SLOT_SIZE = 16777216 42 43# pylint: disable=duplicate-code 44 45# ADSPCS bits 46CRST = 0 47CSTALL = 8 48SPA = 16 49CPA = 24 50 51class HDAStream: 52 # creates an hda stream with at 2 buffers of buf_len 53 def __init__(self, stream_id: int): 54 self.stream_id = stream_id 55 self.base = hdamem + 0x0080 + (stream_id * 0x20) 56 log.info(f"Mapping registers for hda stream {self.stream_id} at base {self.base:x}") 57 58 self.hda = Regs(hdamem) 59 self.hda.GCAP = 0x0000 60 self.hda.GCTL = 0x0008 61 self.hda.DPLBASE = 0x0070 62 self.hda.DPUBASE = 0x0074 63 self.hda.SPBFCH = 0x0700 64 self.hda.SPBFCTL = 0x0704 65 self.hda.PPCH = 0x0800 66 self.hda.PPCTL = 0x0804 67 self.hda.PPSTS = 0x0808 68 self.hda.SPIB = 0x0708 + stream_id*0x08 69 self.hda.freeze() 70 71 self.regs = Regs(self.base) 72 self.regs.CTL = 0x00 73 self.regs.STS = 0x03 74 self.regs.LPIB = 0x04 75 self.regs.CBL = 0x08 76 self.regs.LVI = 0x0c 77 self.regs.FIFOW = 0x0e 78 self.regs.FIFOS = 0x10 79 self.regs.FMT = 0x12 80 self.regs.FIFOL= 0x14 81 self.regs.BDPL = 0x18 82 self.regs.BDPU = 0x1c 83 self.regs.freeze() 84 85 self.dbg0 = Regs(hdamem + 0x0084 + (0x20*stream_id)) 86 self.dbg0.DPIB = 0x00 87 self.dbg0.EFIFOS = 0x10 88 self.dbg0.freeze() 89 90 self.reset() 91 92 def __del__(self): 93 self.reset() 94 95 def config(self, buf_len: int): 96 log.info(f"Configuring stream {self.stream_id}") 97 self.buf_len = buf_len 98 log.info("Allocating huge page and setting up buffers") 99 self.mem, self.hugef, self.buf_list_addr, self.pos_buf_addr, self.n_bufs = self.setup_buf(buf_len) 100 101 log.info("Setting buffer list, length, and stream id and traffic priority bit") 102 self.regs.CTL = ((self.stream_id & 0xFF) << 20) | (1 << 18) # must be set to something other than 0? 103 self.regs.BDPU = (self.buf_list_addr >> 32) & 0xffffffff 104 self.regs.BDPL = self.buf_list_addr & 0xffffffff 105 self.regs.CBL = buf_len 106 self.regs.LVI = self.n_bufs - 1 107 self.mem.seek(0) 108 self.debug() 109 log.info(f"Configured stream {self.stream_id}") 110 111 def write(self, data): 112 113 bufl = min(len(data), self.buf_len) 114 log.info(f"Writing data to stream {self.stream_id}, len {bufl}, SPBFCTL {self.hda.SPBFCTL:x}, SPIB {self.hda.SPIB}") 115 self.mem[0:bufl] = data[0:bufl] 116 self.mem[bufl:bufl+bufl] = data[0:bufl] 117 self.hda.SPBFCTL |= (1 << self.stream_id) 118 self.hda.SPIB += bufl 119 log.info(f"Wrote data to stream {self.stream_id}, SPBFCTL {self.hda.SPBFCTL:x}, SPIB {self.hda.SPIB}") 120 121 def start(self): 122 log.info(f"Starting stream {self.stream_id}, CTL {self.regs.CTL:x}") 123 self.regs.CTL |= 2 124 log.info(f"Started stream {self.stream_id}, CTL {self.regs.CTL:x}") 125 126 def stop(self): 127 log.info(f"Stopping stream {self.stream_id}, CTL {self.regs.CTL:x}") 128 self.regs.CTL &= 2 129 time.sleep(0.1) 130 self.regs.CTL |= 1 131 log.info(f"Stopped stream {self.stream_id}, CTL {self.regs.CTL:x}") 132 133 def setup_buf(self, buf_len: int): 134 (mem, phys_addr, hugef) = map_phys_mem(self.stream_id) 135 136 log.info(f"Mapped 2M huge page at 0x{phys_addr:x} for buf size ({buf_len})") 137 138 # create two buffers in the page of buf_len and mark them 139 # in a buffer descriptor list for the hardware to use 140 buf0_len = buf_len 141 buf1_len = buf_len 142 bdl_off = buf0_len + buf1_len 143 # bdl is 2 (64bits, 16 bytes) per entry, we have two 144 mem[bdl_off:bdl_off + 32] = struct.pack("<QQQQ", 145 phys_addr, 146 buf0_len, 147 phys_addr + buf0_len, 148 buf1_len) 149 dpib_off = bdl_off+32 150 151 # ensure buffer is initialized, sanity 152 for i in range(0, buf_len*2): 153 mem[i] = 0 154 155 log.info("Filled the buffer descriptor list (BDL) for DMA.") 156 return (mem, hugef, phys_addr + bdl_off, phys_addr+dpib_off, 2) 157 158 def debug(self): 159 log.debug("HDA %d: PPROC %d, CTL 0x%x, LPIB 0x%x, BDPU 0x%x, BDPL 0x%x, CBL 0x%x, LVI 0x%x", 160 self.stream_id, (hda.PPCTL >> self.stream_id) & 1, self.regs.CTL, self.regs.LPIB, self.regs.BDPU, 161 self.regs.BDPL, self.regs.CBL, self.regs.LVI) 162 log.debug(" FIFOW %d, FIFOS %d, FMT %x, FIFOL %d, DPIB %d, EFIFOS %d", 163 self.regs.FIFOW & 0x7, self.regs.FIFOS, self.regs.FMT, self.regs.FIFOL, self.dbg0.DPIB, self.dbg0.EFIFOS) 164 log.debug(" status: FIFORDY %d, DESE %d, FIFOE %d, BCIS %d", 165 (self.regs.STS >> 5) & 1, (self.regs.STS >> 4) & 1, (self.regs.STS >> 3) & 1, (self.regs.STS >> 2) & 1) 166 167 def reset(self): 168 # Turn DMA off and reset the stream. Clearing START first is a 169 # noop per the spec, but absolutely required for stability. 170 # Apparently the reset doesn't stop the stream, and the next load 171 # starts before it's ready and kills the load (and often the DSP). 172 # The sleep too is required, on at least one board (a fast 173 # chromebook) putting the two writes next each other also hangs 174 # the DSP! 175 log.info(f"Resetting stream {self.stream_id}") 176 self.debug() 177 self.regs.CTL &= ~2 # clear START 178 time.sleep(0.1) 179 # set enter reset bit 180 self.regs.CTL = 1 181 while (self.regs.CTL & 1) == 0: pass 182 # clear enter reset bit to exit reset 183 self.regs.CTL = 0 184 while (self.regs.CTL & 1) == 1: pass 185 186 log.info(f"Disable SPIB and set position 0 of stream {self.stream_id}") 187 self.hda.SPBFCTL = 0 188 self.hda.SPIB = 0 189 190 #log.info("Setting dma position buffer and enable it") 191 #self.hda.DPUBASE = self.pos_buf_addr >> 32 & 0xffffffff 192 #self.hda.DPLBASE = self.pos_buf_addr & 0xfffffff0 | 1 193 194 log.info(f"Enabling dsp capture (PROCEN) of stream {self.stream_id}") 195 self.hda.PPCTL |= (1 << self.stream_id) 196 197 self.debug() 198 log.info(f"Reset stream {self.stream_id}") 199 200def adsp_is_ace(): 201 return ace15 or ace20 or ace30 202 203def adsp_mem_window_config(): 204 if adsp_is_ace(): 205 base = WINDOW_BASE_ACE 206 stride = WINDOW_STRIDE_ACE 207 else: 208 base = WINDOW_BASE 209 stride = WINDOW_STRIDE 210 211 return (base, stride) 212 213def map_regs(log_only): 214 p = runx(f"grep -iEl 'PCI_CLASS=40(10|38)0' /sys/bus/pci/devices/*/uevent") 215 pcidir = os.path.dirname(p) 216 217 # Platform/quirk detection. ID lists cribbed from the SOF kernel driver 218 global cavs25, ace15, ace20, ace30 219 did = int(open(f"{pcidir}/device").read().rstrip(), 16) 220 cavs25 = did in [ 0x43c8, 0x4b55, 0x4b58, 0x51c8, 0x51ca, 0x51cb, 0x51ce, 0x51cf, 0x54c8, 221 0x7ad0, 0xa0c8 ] 222 ace15 = did in [ 0x7728, 0x7f50, 0x7e28 ] 223 ace20 = did in [ 0xa828 ] 224 ace30 = did in [ 0xe428 ] 225 226 # Check sysfs for a loaded driver and remove it 227 if os.path.exists(f"{pcidir}/driver"): 228 mod = os.path.basename(os.readlink(f"{pcidir}/driver/module")) 229 found_msg = f"Existing driver \"{mod}\" found" 230 if log_only: 231 log.info(found_msg) 232 else: 233 log.warning(found_msg + ", unloading module") 234 runx(f"rmmod -f {mod}") 235 # Disengage runtime power management so the kernel doesn't put it to sleep 236 log.info(f"Forcing {pcidir}/power/control to always 'on'") 237 with open(f"{pcidir}/power/control", "w") as ctrl: 238 ctrl.write("on") 239 240 # Make sure PCI memory space access and busmastering are enabled. 241 # Also disable interrupts so as not to confuse the kernel. 242 with open(f"{pcidir}/config", "wb+") as cfg: 243 cfg.seek(4) 244 cfg.write(b'\x06\x04') 245 246 # Standard HD Audio Registers 247 global hdamem 248 (hdamem, _) = bar_map(pcidir, 0) 249 hda = Regs(hdamem) 250 hda.GCAP = 0x0000 251 hda.GCTL = 0x0008 252 hda.SPBFCTL = 0x0704 253 hda.PPCTL = 0x0804 254 255 # Find the ID of the first output stream 256 hda_ostream_id = (hda.GCAP >> 8) & 0x0f # number of input streams 257 log.info(f"Selected output stream {hda_ostream_id} (GCAP = 0x{hda.GCAP:x})") 258 hda.SD_SPIB = 0x0708 + (8 * hda_ostream_id) 259 hda.freeze() 260 261 262 # Standard HD Audio Stream Descriptor 263 sd = Regs(hdamem + 0x0080 + (hda_ostream_id * 0x20)) 264 sd.CTL = 0x00 265 sd.CBL = 0x08 266 sd.LVI = 0x0c 267 sd.BDPL = 0x18 268 sd.BDPU = 0x1c 269 sd.freeze() 270 271 # Intel Audio DSP Registers 272 global bar4_mmap 273 global bar4_mem 274 (bar4_mem, bar4_mmap) = bar_map(pcidir, 4) 275 dsp = Regs(bar4_mem) 276 if adsp_is_ace(): 277 dsp.HFDSSCS = 0x1000 278 dsp.HFPWRCTL = 0x1d18 if ace15 or ace20 else 0x1d20 279 dsp.HFPWRSTS = 0x1d1c if ace15 or ace20 else 0x1d24 280 dsp.DSP2CXCTL_PRIMARY = 0x178d04 281 dsp.HFIPCXTDR = 0x73200 282 dsp.HFIPCXTDA = 0x73204 283 dsp.HFIPCXIDR = 0x73210 284 dsp.HFIPCXIDA = 0x73214 285 dsp.HFIPCXCTL = 0x73228 286 dsp.HFIPCXTDDY = 0x73300 287 dsp.HFIPCXIDDY = 0x73380 288 dsp.ROM_STATUS = 0x163200 if ace15 else 0x160200 289 dsp.SRAM_FW_STATUS = WINDOW_BASE_ACE 290 else: 291 dsp.ADSPCS = 0x00004 292 dsp.HIPCTDR = 0x000c0 293 dsp.HIPCTDA = 0x000c4 294 dsp.HIPCTDD = 0x000c8 295 dsp.HIPCIDR = 0x000d0 296 dsp.HIPCIDA = 0x000d4 297 dsp.HIPCIDD = 0x000d8 298 dsp.ROM_STATUS = WINDOW_BASE # Start of first SRAM window 299 dsp.SRAM_FW_STATUS = WINDOW_BASE 300 dsp.freeze() 301 302 return (hda, sd, dsp, hda_ostream_id) 303 304def setup_dma_mem(fw_bytes): 305 (mem, phys_addr, _) = map_phys_mem(hda_ostream_id) 306 mem[0:len(fw_bytes)] = fw_bytes 307 308 log.info("Mapped 2M huge page at 0x%x to contain %d bytes of firmware" 309 % (phys_addr, len(fw_bytes))) 310 311 # HDA requires at least two buffers be defined, but we don't care about 312 # boundaries because it's all a contiguous region. Place a vestigial 313 # 128-byte (minimum size and alignment) buffer after the main one, and put 314 # the 4-entry BDL list into the final 128 bytes of the page. 315 buf0_len = HUGEPAGESZ - 2 * 128 316 buf1_len = 128 317 bdl_off = buf0_len + buf1_len 318 mem[bdl_off:bdl_off + 32] = struct.pack("<QQQQ", 319 phys_addr, buf0_len, 320 phys_addr + buf0_len, buf1_len) 321 log.info("Filled the buffer descriptor list (BDL) for DMA.") 322 return (phys_addr + bdl_off, 2) 323 324global_mmaps = [] # protect mmap mappings from garbage collection! 325 326# Maps 2M of contiguous memory using a single page from hugetlbfs, 327# then locates its physical address for use as a DMA buffer. 328def map_phys_mem(stream_id): 329 # Make sure hugetlbfs is mounted (not there on chromeos) 330 os.system("mount | grep -q hugetlbfs ||" 331 + " (mkdir -p /dev/hugepages; " 332 + " mount -t hugetlbfs hugetlbfs /dev/hugepages)") 333 334 # Ensure the kernel has enough budget for one new page 335 free = int(runx("awk '/HugePages_Free/ {print $2}' /proc/meminfo")) 336 if free == 0: 337 tot = 1 + int(runx("awk '/HugePages_Total/ {print $2}' /proc/meminfo")) 338 os.system(f"echo {tot} > /proc/sys/vm/nr_hugepages") 339 340 hugef_name = HUGEPAGE_FILE + str(stream_id) 341 hugef = open(hugef_name, "w+") 342 hugef.truncate(HUGEPAGESZ) 343 mem = mmap.mmap(hugef.fileno(), HUGEPAGESZ) 344 log.info("type of mem is %s", str(type(mem))) 345 global_mmaps.append(mem) 346 os.unlink(hugef_name) 347 348 # Find the local process address of the mapping, then use that to extract 349 # the physical address from the kernel's pagemap interface. The physical 350 # page frame number occupies the bottom bits of the entry. 351 mem[0] = 0 # Fault the page in so it has an address! 352 vaddr = ctypes.addressof(ctypes.c_int.from_buffer(mem)) 353 vpagenum = vaddr >> 12 354 pagemap = open("/proc/self/pagemap", "rb") 355 pagemap.seek(vpagenum * 8) 356 pent = pagemap.read(8) 357 paddr = (struct.unpack("Q", pent)[0] & ((1 << 55) - 1)) * PAGESZ 358 pagemap.close() 359 return (mem, paddr, hugef) 360 361# Maps a PCI BAR and returns the in-process address 362def bar_map(pcidir, barnum): 363 f = open(pcidir + "/resource" + str(barnum), "r+") 364 mm = mmap.mmap(f.fileno(), os.fstat(f.fileno()).st_size) 365 global_mmaps.append(mm) 366 log.info("Mapped PCI bar %d of length %d bytes." 367 % (barnum, os.fstat(f.fileno()).st_size)) 368 return (ctypes.addressof(ctypes.c_int.from_buffer(mm)), mm) 369 370# Syntactic sugar to make register block definition & use look nice. 371# Instantiate from a base address, assign offsets to (uint32) named registers as 372# fields, call freeze(), then the field acts as a direct alias for the register! 373class Regs: 374 def __init__(self, base_addr): 375 vars(self)["base_addr"] = base_addr 376 vars(self)["ptrs"] = {} 377 vars(self)["frozen"] = False 378 def freeze(self): 379 vars(self)["frozen"] = True 380 def __setattr__(self, name, val): 381 if not self.frozen and name not in self.ptrs: 382 addr = self.base_addr + val 383 self.ptrs[name] = ctypes.c_uint32.from_address(addr) 384 else: 385 self.ptrs[name].value = val 386 def __getattr__(self, name): 387 return self.ptrs[name].value 388 389def runx(cmd): 390 return subprocess.check_output(cmd, shell=True).decode().rstrip() 391 392def mask(bit): 393 if cavs25: 394 return 0b1 << bit 395 396def load_firmware(fw_file): 397 try: 398 fw_bytes = open(fw_file, "rb").read() 399 except Exception as e: 400 log.error(f"Could not read firmware file: `{fw_file}'") 401 log.error(e) 402 sys.exit(1) 403 404 (magic, sz) = struct.unpack("4sI", fw_bytes[0:8]) 405 if magic == b'XMan': 406 log.info(f"Trimming {sz} bytes of extended manifest") 407 fw_bytes = fw_bytes[sz:len(fw_bytes)] 408 409 # This actually means "enable access to BAR4 registers"! 410 hda.PPCTL |= (1 << 30) # GPROCEN, "global processing enable" 411 412 log.info("Resetting HDA device") 413 hda.GCTL = 0 414 while hda.GCTL & 1: pass 415 hda.GCTL = 1 416 while not hda.GCTL & 1: pass 417 418 log.info(f"Stalling and Resetting DSP cores, ADSPCS = 0x{dsp.ADSPCS:x}") 419 dsp.ADSPCS |= mask(CSTALL) 420 dsp.ADSPCS |= mask(CRST) 421 while (dsp.ADSPCS & mask(CRST)) == 0: pass 422 423 log.info(f"Powering down DSP cores, ADSPCS = 0x{dsp.ADSPCS:x}") 424 dsp.ADSPCS &= ~mask(SPA) 425 while dsp.ADSPCS & mask(CPA): pass 426 427 log.info(f"Configuring HDA stream {hda_ostream_id} to transfer firmware image") 428 (buf_list_addr, num_bufs) = setup_dma_mem(fw_bytes) 429 sd.CTL = 1 430 while (sd.CTL & 1) == 0: pass 431 sd.CTL = 0 432 while (sd.CTL & 1) == 1: pass 433 sd.CTL = 1 << 20 # Set stream ID to anything non-zero 434 sd.BDPU = (buf_list_addr >> 32) & 0xffffffff 435 sd.BDPL = buf_list_addr & 0xffffffff 436 sd.CBL = len(fw_bytes) 437 sd.LVI = num_bufs - 1 438 hda.PPCTL |= (1 << hda_ostream_id) 439 440 # SPIB ("Software Position In Buffer") is an Intel HDA extension 441 # that puts a transfer boundary into the stream beyond which the 442 # other side will not read. The ROM wants to poll on a "buffer 443 # full" bit on the other side that only works with this enabled. 444 hda.SPBFCTL |= (1 << hda_ostream_id) 445 hda.SD_SPIB = len(fw_bytes) 446 447 # Start DSP. Only start up core 0, reset is managed by DSP. 448 log.info(f"Starting DSP, ADSPCS = 0x{dsp.ADSPCS:x}") 449 dsp.ADSPCS = mask(SPA) 450 while (dsp.ADSPCS & mask(CPA)) == 0: pass 451 452 log.info(f"Unresetting DSP cores, ADSPCS = 0x{dsp.ADSPCS:x}") 453 dsp.ADSPCS &= ~mask(CRST) 454 while (dsp.ADSPCS & 1) != 0: pass 455 456 log.info(f"Running DSP cores, ADSPCS = 0x{dsp.ADSPCS:x}") 457 dsp.ADSPCS &= ~mask(CSTALL) 458 459 # Wait for the ROM to boot and signal it's ready. This not so short 460 # sleep seems to be needed; if we're banging on the memory window 461 # during initial boot (before/while the window control registers 462 # are configured?) the DSP hardware will hang fairly reliably. 463 log.info(f"Wait for ROM startup, ADSPCS = 0x{dsp.ADSPCS:x}") 464 time.sleep(1) 465 while (dsp.SRAM_FW_STATUS >> 24) != 5: pass 466 467 # Send the DSP an IPC message to tell the device how to boot. 468 # Note: with cAVS 1.8+ the ROM receives the stream argument as an 469 # index within the array of output streams (and we always use the 470 # first one by construction). 471 stream_idx = 0 472 ipcval = ( (1 << 31) # BUSY bit 473 | (0x01 << 24) # type = PURGE_FW 474 | (1 << 14) # purge_fw = 1 475 | (stream_idx << 9)) # dma_id 476 log.info(f"Sending IPC command, HIPIDR = 0x{ipcval:x}") 477 dsp.HIPCIDR = ipcval 478 479 log.info(f"Starting DMA, FW_STATUS = 0x{dsp.SRAM_FW_STATUS:x}") 480 sd.CTL |= 2 # START flag 481 482 wait_fw_entered(dsp, timeout_s=None) 483 484 # Turn DMA off and reset the stream. Clearing START first is a 485 # noop per the spec, but absolutely required for stability. 486 # Apparently the reset doesn't stop the stream, and the next load 487 # starts before it's ready and kills the load (and often the DSP). 488 # The sleep too is required, on at least one board (a fast 489 # chromebook) putting the two writes next each other also hangs 490 # the DSP! 491 sd.CTL &= ~2 # clear START 492 time.sleep(0.1) 493 sd.CTL |= 1 494 log.info(f"cAVS firmware load complete") 495 496def load_firmware_ace(fw_file): 497 try: 498 fw_bytes = open(fw_file, "rb").read() 499 # Resize fw_bytes for MTL 500 if len(fw_bytes) < 512 * 1024: 501 fw_bytes += b'\x00' * (512 * 1024 - len(fw_bytes)) 502 except Exception as e: 503 log.error(f"Could not read firmware file: `{fw_file}'") 504 log.error(e) 505 sys.exit(1) 506 507 (magic, sz) = struct.unpack("4sI", fw_bytes[0:8]) 508 if magic == b'$AE1': 509 log.info(f"Trimming {sz} bytes of extended manifest") 510 fw_bytes = fw_bytes[sz:len(fw_bytes)] 511 512 # This actually means "enable access to BAR4 registers"! 513 hda.PPCTL |= (1 << 30) # GPROCEN, "global processing enable" 514 515 log.info("Resetting HDA device") 516 hda.GCTL = 0 517 while hda.GCTL & 1: pass 518 hda.GCTL = 1 519 while not hda.GCTL & 1: pass 520 521 log.info("Turning of DSP subsystem") 522 dsp.HFDSSCS &= ~(1 << 16) # clear SPA bit 523 time.sleep(0.002) 524 # wait for CPA bit clear 525 while dsp.HFDSSCS & (1 << 24): 526 log.info("Waiting for DSP subsystem power off") 527 time.sleep(0.1) 528 529 log.info("Turning on DSP subsystem") 530 dsp.HFDSSCS |= (1 << 16) # set SPA bit 531 time.sleep(0.002) # needed as the CPA bit may be unstable 532 # wait for CPA bit 533 while not dsp.HFDSSCS & (1 << 24): 534 log.info("Waiting for DSP subsystem power on") 535 time.sleep(0.1) 536 537 log.info("Turning on Domain0") 538 dsp.HFPWRCTL |= 0x1 # set SPA bit 539 time.sleep(0.002) # needed as the CPA bit may be unstable 540 # wait for CPA bit 541 while not dsp.HFPWRSTS & 0x1: 542 log.info("Waiting for DSP domain0 power on") 543 time.sleep(0.1) 544 545 log.info("Turning off Primary Core") 546 dsp.DSP2CXCTL_PRIMARY &= ~(0x1) # clear SPA 547 time.sleep(0.002) # wait for CPA settlement 548 while dsp.DSP2CXCTL_PRIMARY & (1 << 8): 549 log.info("Waiting for DSP primary core power off") 550 time.sleep(0.1) 551 552 log.info(f"Configuring HDA stream {hda_ostream_id} to transfer firmware image") 553 (buf_list_addr, num_bufs) = setup_dma_mem(fw_bytes) 554 sd.CTL = 1 555 while (sd.CTL & 1) == 0: pass 556 sd.CTL = 0 557 while (sd.CTL & 1) == 1: pass 558 sd.CTL |= (1 << 20) # Set stream ID to anything non-zero 559 sd.BDPU = (buf_list_addr >> 32) & 0xffffffff 560 sd.BDPL = buf_list_addr & 0xffffffff 561 sd.CBL = len(fw_bytes) 562 sd.LVI = num_bufs - 1 563 hda.PPCTL |= (1 << hda_ostream_id) 564 565 # SPIB ("Software Position In Buffer") is an Intel HDA extension 566 # that puts a transfer boundary into the stream beyond which the 567 # other side will not read. The ROM wants to poll on a "buffer 568 # full" bit on the other side that only works with this enabled. 569 hda.SPBFCTL |= (1 << hda_ostream_id) 570 hda.SD_SPIB = len(fw_bytes) 571 572 573 # Send the DSP an IPC message to tell the device how to boot. 574 # Note: with cAVS 1.8+ the ROM receives the stream argument as an 575 # index within the array of output streams (and we always use the 576 # first one by construction). 577 stream_idx = 0 578 ipcval = ( (1 << 31) # BUSY bit 579 | (0x01 << 24) # type = PURGE_FW 580 | (1 << 14) # purge_fw = 1 581 | (stream_idx << 9)) # dma_id 582 log.info(f"Sending IPC command, HFIPCXIDR = 0x{ipcval:x}") 583 dsp.HFIPCXIDR = ipcval 584 585 log.info("Turning on Primary Core") 586 dsp.DSP2CXCTL_PRIMARY |= 0x1 # clear SPA 587 time.sleep(0.002) # wait for CPA settlement 588 while not dsp.DSP2CXCTL_PRIMARY & (1 << 8): 589 log.info("Waiting for DSP primary core power on") 590 time.sleep(0.1) 591 592 log.info("Waiting for IPC acceptance") 593 while dsp.HFIPCXIDR & (1 << 31): 594 log.info("Waiting for IPC busy bit clear") 595 time.sleep(0.1) 596 597 log.info("ACK IPC") 598 dsp.HFIPCXIDA |= (1 << 31) 599 600 log.info(f"Starting DMA, FW_STATUS = 0x{dsp.ROM_STATUS:x}") 601 sd.CTL |= 2 # START flag 602 603 wait_fw_entered(dsp, timeout_s=None) 604 605 # Turn DMA off and reset the stream. Clearing START first is a 606 # noop per the spec, but absolutely required for stability. 607 # Apparently the reset doesn't stop the stream, and the next load 608 # starts before it's ready and kills the load (and often the DSP). 609 # The sleep too is required, on at least one board (a fast 610 # chromebook) putting the two writes next each other also hangs 611 # the DSP! 612 sd.CTL &= ~2 # clear START 613 time.sleep(0.1) 614 sd.CTL |= 1 615 log.info(f"ACE firmware load complete") 616 617def fw_is_alive(dsp): 618 return dsp.ROM_STATUS & ((1 << 28) - 1) == 5 # "FW_ENTERED" 619 620def wait_fw_entered(dsp, timeout_s): 621 log.info("Waiting %s for firmware handoff, ROM_STATUS = 0x%x", 622 "forever" if timeout_s is None else f"{timeout_s} seconds", 623 dsp.ROM_STATUS) 624 hertz = 100 625 attempts = None if timeout_s is None else timeout_s * hertz 626 while True: 627 alive = fw_is_alive(dsp) 628 if alive: 629 break 630 if attempts is not None: 631 attempts -= 1 632 if attempts < 0: 633 break 634 time.sleep(1 / hertz) 635 636 if not alive: 637 log.warning("Load failed? ROM_STATUS = 0x%x", dsp.ROM_STATUS) 638 else: 639 log.info("FW alive, ROM_STATUS = 0x%x", dsp.ROM_STATUS) 640 641def winstream_offset(): 642 ( base, stride ) = adsp_mem_window_config() 643 return base + stride * 3 644 645# This SHOULD be just "mem[start:start+length]", but slicing an mmap 646# array seems to be unreliable on one of my machines (python 3.6.9 on 647# Ubuntu 18.04). Read out bytes individually. 648def win_read(base, start, length): 649 try: 650 return b''.join(bar4_mmap[base + x].to_bytes(1, 'little') 651 for x in range(start, start + length)) 652 except IndexError as ie: 653 # A FW in a bad state may cause winstream garbage 654 log.error("IndexError in bar4_mmap[%d + %d]", base, start) 655 log.error("bar4_mmap.size()=%d", bar4_mmap.size()) 656 raise ie 657 658def winstream_reg_hdr(base): 659 hdr = Regs(bar4_mem + base) 660 hdr.WLEN = 0x00 661 hdr.START = 0x04 662 hdr.END = 0x08 663 hdr.SEQ = 0x0c 664 hdr.freeze() 665 return hdr 666 667def win_hdr(hdr): 668 return ( hdr.WLEN, hdr.START, hdr.END, hdr.SEQ ) 669 670# Python implementation of the same algorithm in sys_winstream_read(), 671# see there for details. 672def winstream_read(base, last_seq): 673 while True: 674 hdr = winstream_reg_hdr(base) 675 (wlen, start, end, seq) = win_hdr(hdr) 676 if wlen > SHELL_MAX_VALID_SLOT_SIZE: 677 log.debug("DSP powered off at winstream_read") 678 return (seq, "") 679 if wlen == 0: 680 return (seq, "") 681 if last_seq == 0: 682 last_seq = seq if args.no_history else (seq - ((end - start) % wlen)) 683 if seq == last_seq or start == end: 684 return (seq, "") 685 behind = seq - last_seq 686 if behind > ((end - start) % wlen): 687 return (seq, "") 688 copy = (end - behind) % wlen 689 suffix = min(behind, wlen - copy) 690 result = win_read(base, 16 + copy, suffix) 691 if suffix < behind: 692 result += win_read(base, 16, behind - suffix) 693 (wlen, start1, end, seq1) = win_hdr(hdr) 694 if start1 == start and seq1 == seq: 695 # Best effort attempt at decoding, replacing unusable characters 696 # Found to be useful when it really goes wrong 697 return (seq, result.decode("utf-8", "replace")) 698 699def idx_mod(wlen, idx): 700 if idx >= wlen: 701 return idx - wlen 702 return idx 703 704def idx_sub(wlen, a, b): 705 return idx_mod(wlen, a + (wlen - b)) 706 707# Python implementation of the same algorithm in sys_winstream_write(), 708# see there for details. 709def winstream_write(base, msg): 710 hdr = winstream_reg_hdr(base) 711 (wlen, start, end, seq) = win_hdr(hdr) 712 if wlen > SHELL_MAX_VALID_SLOT_SIZE: 713 log.debug("DSP powered off at winstream_write") 714 return 715 if wlen == 0: 716 return 717 lenmsg = len(msg) 718 lenmsg0 = lenmsg 719 if len(msg) > wlen + 1: 720 start = end 721 lenmsg = wlen - 1 722 lenmsg = min(lenmsg, wlen) 723 if seq != 0: 724 avail = (wlen - 1) - idx_sub(wlen, end, start) 725 if lenmsg > avail: 726 hdr.START = idx_mod(wlen, start + (lenmsg - avail)) 727 if lenmsg < lenmsg0: 728 hdr.START = end 729 drop = lenmsg0 - lenmsg 730 msg = msg[drop : lenmsg - drop] 731 suffix = min(lenmsg, wlen - end) 732 for c in range(0, suffix): 733 bar4_mmap[base + 16 + end + c] = msg[c] 734 if lenmsg > suffix: 735 for c in range(0, lenmsg - suffix): 736 bar4_mmap[base + 16 + c] = msg[suffix + c] 737 hdr.END = idx_mod(wlen, end + lenmsg) 738 hdr.SEQ += lenmsg0 739 740def debug_offset(): 741 ( base, stride ) = adsp_mem_window_config() 742 return base + stride * 2 743 744def debug_slot_offset(num): 745 return debug_offset() + DEBUG_SLOT_SIZE * (1 + num) 746 747def debug_slot_offset_by_type(the_type, timeout_s=0.2): 748 ADSP_DW_SLOT_COUNT=15 749 hertz = 100 750 attempts = timeout_s * hertz 751 while attempts > 0: 752 data = win_read(debug_offset(), 0, ADSP_DW_SLOT_COUNT * 3 * 4) 753 for i in range(ADSP_DW_SLOT_COUNT): 754 start_index = i * (3 * 4) 755 end_index = (i + 1) * (3 * 4) 756 desc = data[start_index:end_index] 757 resource_id, type_id, vma = struct.unpack('<III', desc) 758 if type_id == the_type: 759 log.info("found desc %u resource_id 0x%08x type_id 0x%08x vma 0x%08x", 760 i, resource_id, type_id, vma) 761 return debug_slot_offset(i) 762 log.debug("not found, %u attempts left", attempts) 763 attempts -= 1 764 time.sleep(1 / hertz) 765 return None 766 767def shell_base_offset(): 768 return debug_offset() + DEBUG_SLOT_SIZE * (1 + DEBUG_SLOT_SHELL) 769 770def read_from_shell_memwindow_winstream(last_seq): 771 offset = shell_base_offset() + SHELL_RX_SIZE 772 (last_seq, output) = winstream_read(offset, last_seq) 773 if output: 774 os.write(shell_client_port, output.encode("utf-8")) 775 return last_seq 776 777def write_to_shell_memwindow_winstream(): 778 msg = os.read(shell_client_port, 1) 779 if len(msg) > 0: 780 winstream_write(shell_base_offset(), msg) 781 782def create_shell_pty(): 783 global shell_client_port 784 (shell_client_port, user_port) = pty.openpty() 785 name = os.ttyname(user_port) 786 log.info(f"shell PTY at: {name}") 787 asyncio.get_event_loop().add_reader(shell_client_port, write_to_shell_memwindow_winstream) 788 789async def ipc_delay_done(): 790 await asyncio.sleep(0.1) 791 if adsp_is_ace(): 792 dsp.HFIPCXTDA = ~(1<<31) & dsp.HFIPCXTDA # Signal done 793 else: 794 dsp.HIPCTDA = 1<<31 795 796def inbox_offset(): 797 ( base, stride ) = adsp_mem_window_config() 798 return base + stride 799 800def outbox_offset(): 801 ( base, _ ) = adsp_mem_window_config() 802 return base + 4096 803 804ipc_timestamp = 0 805 806# Super-simple command language, driven by the test code on the DSP 807def ipc_command(data, ext_data): 808 send_msg = False 809 done = True 810 log.debug ("ipc data %d, ext_data %x", data, ext_data) 811 if data == 0: # noop, with synchronous DONE 812 pass 813 elif data == 1: # async command: signal DONE after a delay (on 1.8+) 814 done = False 815 asyncio.ensure_future(ipc_delay_done()) 816 elif data == 2: # echo back ext_data as a message command 817 send_msg = True 818 elif data == 3: # set ADSPCS 819 dsp.ADSPCS = ext_data 820 elif data == 4: # echo back microseconds since last timestamp command 821 global ipc_timestamp 822 t = round(time.time() * 1e6) 823 ext_data = t - ipc_timestamp 824 ipc_timestamp = t 825 send_msg = True 826 elif data == 5: # copy word at outbox[ext_data >> 16] to inbox[ext_data & 0xffff] 827 src = outbox_offset() + 4 * (ext_data >> 16) 828 dst = inbox_offset() + 4 * (ext_data & 0xffff) 829 for i in range(4): 830 bar4_mmap[dst + i] = bar4_mmap[src + i] 831 elif data == 6: # HDA RESET (init if not exists) 832 stream_id = ext_data & 0xff 833 if stream_id in hda_streams: 834 hda_streams[stream_id].reset() 835 else: 836 hda_str = HDAStream(stream_id) 837 hda_streams[stream_id] = hda_str 838 elif data == 7: # HDA CONFIG 839 stream_id = ext_data & 0xFF 840 buf_len = ext_data >> 8 & 0xFFFF 841 hda_str = hda_streams[stream_id] 842 hda_str.config(buf_len) 843 elif data == 8: # HDA START 844 stream_id = ext_data & 0xFF 845 hda_streams[stream_id].start() 846 hda_streams[stream_id].mem.seek(0) 847 848 elif data == 9: # HDA STOP 849 stream_id = ext_data & 0xFF 850 hda_streams[stream_id].stop() 851 elif data == 10: # HDA VALIDATE 852 stream_id = ext_data & 0xFF 853 hda_str = hda_streams[stream_id] 854 hda_str.debug() 855 is_ramp_data = True 856 hda_str.mem.seek(0) 857 for (i, val) in enumerate(hda_str.mem.read(256)): 858 if i != val: 859 is_ramp_data = False 860 # log.info("stream[%d][%d]: %d", stream_id, i, val) # debug helper 861 log.info("Is ramp data? " + str(is_ramp_data)) 862 ext_data = int(is_ramp_data) 863 log.info(f"Ext data to send back on ramp status {ext_data}") 864 send_msg = True 865 elif data == 11: # HDA HOST OUT SEND 866 stream_id = ext_data & 0xff 867 buf = bytearray(256) 868 for i in range(0, 256): 869 buf[i] = i 870 hda_streams[stream_id].write(buf) 871 elif data == 12: # HDA PRINT 872 stream_id = ext_data & 0xFF 873 buf_len = ext_data >> 8 & 0xFFFF 874 hda_str = hda_streams[stream_id] 875 # check for wrap here 876 pos = hda_str.mem.tell() 877 read_lens = [buf_len, 0] 878 if pos + buf_len >= hda_str.buf_len*2: 879 read_lens[0] = hda_str.buf_len*2 - pos 880 read_lens[1] = buf_len - read_lens[0] 881 # validate the read lens 882 assert (read_lens[0] + pos) <= (hda_str.buf_len*2) 883 assert read_lens[0] % 128 == 0 884 assert read_lens[1] % 128 == 0 885 buf_data0 = hda_str.mem.read(read_lens[0]) 886 hda_msg0 = buf_data0.decode("utf-8", "replace") 887 sys.stdout.write(hda_msg0) 888 if read_lens[1] != 0: 889 hda_str.mem.seek(0) 890 buf_data1 = hda_str.mem.read(read_lens[1]) 891 hda_msg1 = buf_data1.decode("utf-8", "replace") 892 sys.stdout.write(hda_msg1) 893 pos = hda_str.mem.tell() 894 sys.stdout.flush() 895 else: 896 log.warning(f"cavstool: Unrecognized IPC command 0x{data:x} ext 0x{ext_data:x}") 897 if not fw_is_alive(dsp): 898 if args.log_only: 899 log.info("DSP power seems off") 900 wait_fw_entered(dsp, timeout_s=None) 901 else: 902 log.warning("DSP power seems off?!") 903 time.sleep(2) # potential spam reduction 904 905 return 906 907 if adsp_is_ace(): 908 dsp.HFIPCXTDR = 1<<31 # Ack local interrupt 909 if done: 910 dsp.HFIPCXTDA = ~(1<<31) & dsp.HFIPCXTDA # Signal done 911 if send_msg: 912 log.debug("ipc: sending msg 0x%08x" % ext_data) 913 dsp.HFIPCXIDDY = ext_data 914 dsp.HFIPCXIDR = (1<<31) | ext_data 915 else: 916 dsp.HIPCTDR = 1<<31 # Ack local interrupt 917 if done: 918 dsp.HIPCTDA = 1<<31 # Signal done 919 if send_msg: 920 dsp.HIPCIDD = ext_data 921 dsp.HIPCIDR = (1<<31) | ext_data 922 923def handle_ipc(): 924 if adsp_is_ace(): 925 if dsp.HFIPCXIDA & 0x80000000: 926 log.debug("ipc: Ack DSP reply with IDA_DONE") 927 dsp.HFIPCXIDA = 1<<31 # must ACK any DONE interrupts that arrive! 928 if dsp.HFIPCXTDR & 0x80000000: 929 ipc_command(dsp.HFIPCXTDR & ~0x80000000, dsp.HFIPCXTDDY) 930 return 931 932 if dsp.HIPCIDA & 0x80000000: 933 dsp.HIPCIDA = 1<<31 # must ACK any DONE interrupts that arrive! 934 if dsp.HIPCTDR & 0x80000000: 935 ipc_command(dsp.HIPCTDR & ~0x80000000, dsp.HIPCTDD) 936 937async def main(): 938 #TODO this bit me, remove the globals, write a little FirmwareLoader class or something to contain. 939 global hda, sd, dsp, hda_ostream_id, hda_streams 940 941 try: 942 (hda, sd, dsp, hda_ostream_id) = map_regs(args.log_only) 943 except Exception as e: 944 log.error("Could not map device in sysfs; run as root?") 945 log.error(e) 946 sys.exit(1) 947 948 log.info(f"Detected a supported cAVS/ACE hardware version") 949 950 if args.log_only: 951 wait_fw_entered(dsp, timeout_s=None) 952 else: 953 if not args.fw_file: 954 log.error("Firmware file argument missing") 955 sys.exit(1) 956 957 if adsp_is_ace(): 958 load_firmware_ace(args.fw_file) 959 else: 960 load_firmware(args.fw_file) 961 time.sleep(0.1) 962 963 if not args.quiet: 964 sys.stdout.write("--\n") 965 966 if args.shell_pty: 967 create_shell_pty() 968 969 hda_streams = dict() 970 971 last_seq = 0 972 last_seq_shell = 0 973 while start_output is True: 974 await asyncio.sleep(0.03) 975 if args.shell_pty: 976 last_seq_shell = read_from_shell_memwindow_winstream(last_seq_shell) 977 (last_seq, output) = winstream_read(winstream_offset(), last_seq) 978 if output: 979 sys.stdout.write(output) 980 sys.stdout.flush() 981 if not args.log_only: 982 handle_ipc() 983 984def args_parse(): 985 global args 986 ap = argparse.ArgumentParser(description="DSP loader/logger tool", allow_abbrev=False) 987 ap.add_argument("-q", "--quiet", action="store_true", 988 help="No loader output, just DSP logging") 989 ap.add_argument("-v", "--verbose", action="store_true", 990 help="More loader output, DEBUG logging level") 991 ap.add_argument("-l", "--log-only", action="store_true", 992 help="Don't load firmware, just show log output") 993 ap.add_argument("-p", "--shell-pty", action="store_true", 994 help="Create a Zephyr shell pty if enabled in firmware") 995 ap.add_argument("-n", "--no-history", action="store_true", 996 help="No current log buffer at start, just new output") 997 ap.add_argument("fw_file", nargs="?", help="Firmware file") 998 999 args = ap.parse_args() 1000 1001 if args.quiet: 1002 log.setLevel(logging.WARN) 1003 elif args.verbose: 1004 log.setLevel(logging.DEBUG) 1005 1006if __name__ == "__main__": 1007 args_parse() 1008 try: 1009 asyncio.run(main()) 1010 except KeyboardInterrupt: 1011 start_output = False 1012