1#!/usr/bin/env python3 2# Copyright(c) 2022 Intel Corporation. All rights reserved. 3# SPDX-License-Identifier: Apache-2.0 4import os 5import sys 6import struct 7import logging 8import asyncio 9import time 10import subprocess 11import ctypes 12import mmap 13import argparse 14import pty 15 16start_output = True 17 18logging.basicConfig(level=logging.INFO) 19log = logging.getLogger("cavs-fw") 20 21PAGESZ = 4096 22HUGEPAGESZ = 2 * 1024 * 1024 23HUGEPAGE_FILE = "/dev/hugepages/cavs-fw-dma.tmp." 24 25# SRAM windows. Base and stride varies depending on ADSP version 26# 27# Window 0 is the FW_STATUS area, and 4k after that the IPC "outbox" 28# Window 1 is the IPC "inbox" (host-writable memory, just 384 bytes currently) 29# Window 2 is used for debug slots (Zephyr shell is one user) 30# Window 3 is winstream-formatted log output 31 32WINDOW_BASE = 0x80000 33WINDOW_STRIDE = 0x20000 34 35WINDOW_BASE_ACE = 0x180000 36WINDOW_STRIDE_ACE = 0x8000 37 38DEBUG_SLOT_SIZE = 4096 39DEBUG_SLOT_SHELL = 0 40SHELL_RX_SIZE = 256 41SHELL_MAX_VALID_SLOT_SIZE = 16777216 42 43# pylint: disable=duplicate-code 44 45# ADSPCS bits 46CRST = 0 47CSTALL = 8 48SPA = 16 49CPA = 24 50 51class HDAStream: 52 # creates an hda stream with at 2 buffers of buf_len 53 def __init__(self, stream_id: int): 54 self.stream_id = stream_id 55 self.base = hdamem + 0x0080 + (stream_id * 0x20) 56 log.info(f"Mapping registers for hda stream {self.stream_id} at base {self.base:x}") 57 58 self.hda = Regs(hdamem) 59 self.hda.GCAP = 0x0000 60 self.hda.GCTL = 0x0008 61 self.hda.DPLBASE = 0x0070 62 self.hda.DPUBASE = 0x0074 63 self.hda.SPBFCH = 0x0700 64 self.hda.SPBFCTL = 0x0704 65 self.hda.PPCH = 0x0800 66 self.hda.PPCTL = 0x0804 67 self.hda.PPSTS = 0x0808 68 self.hda.SPIB = 0x0708 + stream_id*0x08 69 self.hda.freeze() 70 71 self.regs = Regs(self.base) 72 self.regs.CTL = 0x00 73 self.regs.STS = 0x03 74 self.regs.LPIB = 0x04 75 self.regs.CBL = 0x08 76 self.regs.LVI = 0x0c 77 self.regs.FIFOW = 0x0e 78 self.regs.FIFOS = 0x10 79 self.regs.FMT = 0x12 80 self.regs.FIFOL= 0x14 81 self.regs.BDPL = 0x18 82 self.regs.BDPU = 0x1c 83 self.regs.freeze() 84 85 self.dbg0 = Regs(hdamem + 0x0084 + (0x20*stream_id)) 86 self.dbg0.DPIB = 0x00 87 self.dbg0.EFIFOS = 0x10 88 self.dbg0.freeze() 89 90 self.reset() 91 92 def __del__(self): 93 self.reset() 94 95 def config(self, buf_len: int): 96 log.info(f"Configuring stream {self.stream_id}") 97 self.buf_len = buf_len 98 log.info("Allocating huge page and setting up buffers") 99 self.mem, self.hugef, self.buf_list_addr, self.pos_buf_addr, self.n_bufs = self.setup_buf(buf_len) 100 101 log.info("Setting buffer list, length, and stream id and traffic priority bit") 102 self.regs.CTL = ((self.stream_id & 0xFF) << 20) | (1 << 18) # must be set to something other than 0? 103 self.regs.BDPU = (self.buf_list_addr >> 32) & 0xffffffff 104 self.regs.BDPL = self.buf_list_addr & 0xffffffff 105 self.regs.CBL = buf_len 106 self.regs.LVI = self.n_bufs - 1 107 self.mem.seek(0) 108 self.debug() 109 log.info(f"Configured stream {self.stream_id}") 110 111 def write(self, data): 112 113 bufl = min(len(data), self.buf_len) 114 log.info(f"Writing data to stream {self.stream_id}, len {bufl}, SPBFCTL {self.hda.SPBFCTL:x}, SPIB {self.hda.SPIB}") 115 self.mem[0:bufl] = data[0:bufl] 116 self.mem[bufl:bufl+bufl] = data[0:bufl] 117 self.hda.SPBFCTL |= (1 << self.stream_id) 118 self.hda.SPIB += bufl 119 log.info(f"Wrote data to stream {self.stream_id}, SPBFCTL {self.hda.SPBFCTL:x}, SPIB {self.hda.SPIB}") 120 121 def start(self): 122 log.info(f"Starting stream {self.stream_id}, CTL {self.regs.CTL:x}") 123 self.regs.CTL |= 2 124 log.info(f"Started stream {self.stream_id}, CTL {self.regs.CTL:x}") 125 126 def stop(self): 127 log.info(f"Stopping stream {self.stream_id}, CTL {self.regs.CTL:x}") 128 self.regs.CTL &= 2 129 time.sleep(0.1) 130 self.regs.CTL |= 1 131 log.info(f"Stopped stream {self.stream_id}, CTL {self.regs.CTL:x}") 132 133 def setup_buf(self, buf_len: int): 134 (mem, phys_addr, hugef) = map_phys_mem(self.stream_id) 135 136 log.info(f"Mapped 2M huge page at 0x{phys_addr:x} for buf size ({buf_len})") 137 138 # create two buffers in the page of buf_len and mark them 139 # in a buffer descriptor list for the hardware to use 140 buf0_len = buf_len 141 buf1_len = buf_len 142 bdl_off = buf0_len + buf1_len 143 # bdl is 2 (64bits, 16 bytes) per entry, we have two 144 mem[bdl_off:bdl_off + 32] = struct.pack("<QQQQ", 145 phys_addr, 146 buf0_len, 147 phys_addr + buf0_len, 148 buf1_len) 149 dpib_off = bdl_off+32 150 151 # ensure buffer is initialized, sanity 152 for i in range(0, buf_len*2): 153 mem[i] = 0 154 155 log.info("Filled the buffer descriptor list (BDL) for DMA.") 156 return (mem, hugef, phys_addr + bdl_off, phys_addr+dpib_off, 2) 157 158 def debug(self): 159 log.debug("HDA %d: PPROC %d, CTL 0x%x, LPIB 0x%x, BDPU 0x%x, BDPL 0x%x, CBL 0x%x, LVI 0x%x", 160 self.stream_id, (hda.PPCTL >> self.stream_id) & 1, self.regs.CTL, self.regs.LPIB, self.regs.BDPU, 161 self.regs.BDPL, self.regs.CBL, self.regs.LVI) 162 log.debug(" FIFOW %d, FIFOS %d, FMT %x, FIFOL %d, DPIB %d, EFIFOS %d", 163 self.regs.FIFOW & 0x7, self.regs.FIFOS, self.regs.FMT, self.regs.FIFOL, self.dbg0.DPIB, self.dbg0.EFIFOS) 164 log.debug(" status: FIFORDY %d, DESE %d, FIFOE %d, BCIS %d", 165 (self.regs.STS >> 5) & 1, (self.regs.STS >> 4) & 1, (self.regs.STS >> 3) & 1, (self.regs.STS >> 2) & 1) 166 167 def reset(self): 168 # Turn DMA off and reset the stream. Clearing START first is a 169 # noop per the spec, but absolutely required for stability. 170 # Apparently the reset doesn't stop the stream, and the next load 171 # starts before it's ready and kills the load (and often the DSP). 172 # The sleep too is required, on at least one board (a fast 173 # chromebook) putting the two writes next each other also hangs 174 # the DSP! 175 log.info(f"Resetting stream {self.stream_id}") 176 self.debug() 177 self.regs.CTL &= ~2 # clear START 178 time.sleep(0.1) 179 # set enter reset bit 180 self.regs.CTL = 1 181 while (self.regs.CTL & 1) == 0: pass 182 # clear enter reset bit to exit reset 183 self.regs.CTL = 0 184 while (self.regs.CTL & 1) == 1: pass 185 186 log.info(f"Disable SPIB and set position 0 of stream {self.stream_id}") 187 self.hda.SPBFCTL = 0 188 self.hda.SPIB = 0 189 190 #log.info("Setting dma position buffer and enable it") 191 #self.hda.DPUBASE = self.pos_buf_addr >> 32 & 0xffffffff 192 #self.hda.DPLBASE = self.pos_buf_addr & 0xfffffff0 | 1 193 194 log.info(f"Enabling dsp capture (PROCEN) of stream {self.stream_id}") 195 self.hda.PPCTL |= (1 << self.stream_id) 196 197 self.debug() 198 log.info(f"Reset stream {self.stream_id}") 199 200def adsp_is_cavs(): 201 return cavs15 or cavs18 or cavs15 202 203def adsp_is_ace(): 204 return ace15 or ace20 or ace30 205 206def adsp_mem_window_config(): 207 if adsp_is_ace(): 208 base = WINDOW_BASE_ACE 209 stride = WINDOW_STRIDE_ACE 210 else: 211 base = WINDOW_BASE 212 stride = WINDOW_STRIDE 213 214 return (base, stride) 215 216def map_regs(): 217 p = runx(f"grep -iEl 'PCI_CLASS=40(10|38)0' /sys/bus/pci/devices/*/uevent") 218 pcidir = os.path.dirname(p) 219 220 # Platform/quirk detection. ID lists cribbed from the SOF kernel driver 221 global cavs15, cavs18, cavs25, ace15, ace20, ace30 222 did = int(open(f"{pcidir}/device").read().rstrip(), 16) 223 cavs15 = did in [ 0x5a98, 0x1a98, 0x3198 ] 224 cavs18 = did in [ 0x9dc8, 0xa348, 0x02c8, 0x06c8, 0xa3f0 ] 225 cavs25 = did in [ 0xa0c8, 0x43c8, 0x4b55, 0x4b58, 0x7ad0, 0x51c8 ] 226 ace15 = did in [ 0x7e28 ] 227 ace20 = did in [ 0xa828 ] 228 ace30 = did in [ 0xe428 ] 229 230 # Check sysfs for a loaded driver and remove it 231 if os.path.exists(f"{pcidir}/driver"): 232 mod = os.path.basename(os.readlink(f"{pcidir}/driver/module")) 233 found_msg = f"Existing driver \"{mod}\" found" 234 if args.log_only: 235 log.info(found_msg) 236 else: 237 log.warning(found_msg + ", unloading module") 238 runx(f"rmmod -f {mod}") 239 # Disengage runtime power management so the kernel doesn't put it to sleep 240 log.info(f"Forcing {pcidir}/power/control to always 'on'") 241 with open(f"{pcidir}/power/control", "w") as ctrl: 242 ctrl.write("on") 243 244 # Make sure PCI memory space access and busmastering are enabled. 245 # Also disable interrupts so as not to confuse the kernel. 246 with open(f"{pcidir}/config", "wb+") as cfg: 247 cfg.seek(4) 248 cfg.write(b'\x06\x04') 249 250 # Standard HD Audio Registers 251 global hdamem 252 (hdamem, _) = bar_map(pcidir, 0) 253 hda = Regs(hdamem) 254 hda.GCAP = 0x0000 255 hda.GCTL = 0x0008 256 hda.SPBFCTL = 0x0704 257 hda.PPCTL = 0x0804 258 259 # Find the ID of the first output stream 260 hda_ostream_id = (hda.GCAP >> 8) & 0x0f # number of input streams 261 log.info(f"Selected output stream {hda_ostream_id} (GCAP = 0x{hda.GCAP:x})") 262 hda.SD_SPIB = 0x0708 + (8 * hda_ostream_id) 263 hda.freeze() 264 265 266 # Standard HD Audio Stream Descriptor 267 sd = Regs(hdamem + 0x0080 + (hda_ostream_id * 0x20)) 268 sd.CTL = 0x00 269 sd.CBL = 0x08 270 sd.LVI = 0x0c 271 sd.BDPL = 0x18 272 sd.BDPU = 0x1c 273 sd.freeze() 274 275 # Intel Audio DSP Registers 276 global bar4_mmap 277 global bar4_mem 278 (bar4_mem, bar4_mmap) = bar_map(pcidir, 4) 279 dsp = Regs(bar4_mem) 280 if adsp_is_ace(): 281 dsp.HFDSSCS = 0x1000 282 dsp.HFPWRCTL = 0x1d18 if ace20 else 0x1d20 283 dsp.HFPWRSTS = 0x1d1c if ace20 else 0x1d24 284 dsp.DSP2CXCTL_PRIMARY = 0x178d04 285 dsp.HFIPCXTDR = 0x73200 286 dsp.HFIPCXTDA = 0x73204 287 dsp.HFIPCXIDR = 0x73210 288 dsp.HFIPCXIDA = 0x73214 289 dsp.HFIPCXCTL = 0x73228 290 dsp.HFIPCXTDDY = 0x73300 291 dsp.HFIPCXIDDY = 0x73380 292 dsp.ROM_STATUS = 0x163200 if ace15 else 0x160200 293 dsp.SRAM_FW_STATUS = WINDOW_BASE_ACE 294 else: 295 dsp.ADSPCS = 0x00004 296 dsp.HIPCTDR = 0x00040 if cavs15 else 0x000c0 297 dsp.HIPCTDA = 0x000c4 # 1.8+ only 298 dsp.HIPCTDD = 0x00044 if cavs15 else 0x000c8 299 dsp.HIPCIDR = 0x00048 if cavs15 else 0x000d0 300 dsp.HIPCIDA = 0x000d4 # 1.8+ only 301 dsp.HIPCIDD = 0x0004c if cavs15 else 0x000d8 302 dsp.ROM_STATUS = WINDOW_BASE # Start of first SRAM window 303 dsp.SRAM_FW_STATUS = WINDOW_BASE 304 dsp.freeze() 305 306 return (hda, sd, dsp, hda_ostream_id) 307 308def setup_dma_mem(fw_bytes): 309 (mem, phys_addr, _) = map_phys_mem(hda_ostream_id) 310 mem[0:len(fw_bytes)] = fw_bytes 311 312 log.info("Mapped 2M huge page at 0x%x to contain %d bytes of firmware" 313 % (phys_addr, len(fw_bytes))) 314 315 # HDA requires at least two buffers be defined, but we don't care about 316 # boundaries because it's all a contiguous region. Place a vestigial 317 # 128-byte (minimum size and alignment) buffer after the main one, and put 318 # the 4-entry BDL list into the final 128 bytes of the page. 319 buf0_len = HUGEPAGESZ - 2 * 128 320 buf1_len = 128 321 bdl_off = buf0_len + buf1_len 322 mem[bdl_off:bdl_off + 32] = struct.pack("<QQQQ", 323 phys_addr, buf0_len, 324 phys_addr + buf0_len, buf1_len) 325 log.info("Filled the buffer descriptor list (BDL) for DMA.") 326 return (phys_addr + bdl_off, 2) 327 328global_mmaps = [] # protect mmap mappings from garbage collection! 329 330# Maps 2M of contiguous memory using a single page from hugetlbfs, 331# then locates its physical address for use as a DMA buffer. 332def map_phys_mem(stream_id): 333 # Make sure hugetlbfs is mounted (not there on chromeos) 334 os.system("mount | grep -q hugetlbfs ||" 335 + " (mkdir -p /dev/hugepages; " 336 + " mount -t hugetlbfs hugetlbfs /dev/hugepages)") 337 338 # Ensure the kernel has enough budget for one new page 339 free = int(runx("awk '/HugePages_Free/ {print $2}' /proc/meminfo")) 340 if free == 0: 341 tot = 1 + int(runx("awk '/HugePages_Total/ {print $2}' /proc/meminfo")) 342 os.system(f"echo {tot} > /proc/sys/vm/nr_hugepages") 343 344 hugef_name = HUGEPAGE_FILE + str(stream_id) 345 hugef = open(hugef_name, "w+") 346 hugef.truncate(HUGEPAGESZ) 347 mem = mmap.mmap(hugef.fileno(), HUGEPAGESZ) 348 log.info("type of mem is %s", str(type(mem))) 349 global_mmaps.append(mem) 350 os.unlink(hugef_name) 351 352 # Find the local process address of the mapping, then use that to extract 353 # the physical address from the kernel's pagemap interface. The physical 354 # page frame number occupies the bottom bits of the entry. 355 mem[0] = 0 # Fault the page in so it has an address! 356 vaddr = ctypes.addressof(ctypes.c_int.from_buffer(mem)) 357 vpagenum = vaddr >> 12 358 pagemap = open("/proc/self/pagemap", "rb") 359 pagemap.seek(vpagenum * 8) 360 pent = pagemap.read(8) 361 paddr = (struct.unpack("Q", pent)[0] & ((1 << 55) - 1)) * PAGESZ 362 pagemap.close() 363 return (mem, paddr, hugef) 364 365# Maps a PCI BAR and returns the in-process address 366def bar_map(pcidir, barnum): 367 f = open(pcidir + "/resource" + str(barnum), "r+") 368 mm = mmap.mmap(f.fileno(), os.fstat(f.fileno()).st_size) 369 global_mmaps.append(mm) 370 log.info("Mapped PCI bar %d of length %d bytes." 371 % (barnum, os.fstat(f.fileno()).st_size)) 372 return (ctypes.addressof(ctypes.c_int.from_buffer(mm)), mm) 373 374# Syntactic sugar to make register block definition & use look nice. 375# Instantiate from a base address, assign offsets to (uint32) named registers as 376# fields, call freeze(), then the field acts as a direct alias for the register! 377class Regs: 378 def __init__(self, base_addr): 379 vars(self)["base_addr"] = base_addr 380 vars(self)["ptrs"] = {} 381 vars(self)["frozen"] = False 382 def freeze(self): 383 vars(self)["frozen"] = True 384 def __setattr__(self, name, val): 385 if not self.frozen and name not in self.ptrs: 386 addr = self.base_addr + val 387 self.ptrs[name] = ctypes.c_uint32.from_address(addr) 388 else: 389 self.ptrs[name].value = val 390 def __getattr__(self, name): 391 return self.ptrs[name].value 392 393def runx(cmd): 394 return subprocess.check_output(cmd, shell=True).decode().rstrip() 395 396def mask(bit): 397 if cavs25: 398 return 0b1 << bit 399 if cavs18: 400 return 0b1111 << bit 401 if cavs15: 402 return 0b11 << bit 403 404def load_firmware(fw_file): 405 try: 406 fw_bytes = open(fw_file, "rb").read() 407 except Exception as e: 408 log.error(f"Could not read firmware file: `{fw_file}'") 409 log.error(e) 410 sys.exit(1) 411 412 (magic, sz) = struct.unpack("4sI", fw_bytes[0:8]) 413 if magic == b'XMan': 414 log.info(f"Trimming {sz} bytes of extended manifest") 415 fw_bytes = fw_bytes[sz:len(fw_bytes)] 416 417 # This actually means "enable access to BAR4 registers"! 418 hda.PPCTL |= (1 << 30) # GPROCEN, "global processing enable" 419 420 log.info("Resetting HDA device") 421 hda.GCTL = 0 422 while hda.GCTL & 1: pass 423 hda.GCTL = 1 424 while not hda.GCTL & 1: pass 425 426 log.info(f"Stalling and Resetting DSP cores, ADSPCS = 0x{dsp.ADSPCS:x}") 427 dsp.ADSPCS |= mask(CSTALL) 428 dsp.ADSPCS |= mask(CRST) 429 while (dsp.ADSPCS & mask(CRST)) == 0: pass 430 431 log.info(f"Powering down DSP cores, ADSPCS = 0x{dsp.ADSPCS:x}") 432 dsp.ADSPCS &= ~mask(SPA) 433 while dsp.ADSPCS & mask(CPA): pass 434 435 log.info(f"Configuring HDA stream {hda_ostream_id} to transfer firmware image") 436 (buf_list_addr, num_bufs) = setup_dma_mem(fw_bytes) 437 sd.CTL = 1 438 while (sd.CTL & 1) == 0: pass 439 sd.CTL = 0 440 while (sd.CTL & 1) == 1: pass 441 sd.CTL = 1 << 20 # Set stream ID to anything non-zero 442 sd.BDPU = (buf_list_addr >> 32) & 0xffffffff 443 sd.BDPL = buf_list_addr & 0xffffffff 444 sd.CBL = len(fw_bytes) 445 sd.LVI = num_bufs - 1 446 hda.PPCTL |= (1 << hda_ostream_id) 447 448 # SPIB ("Software Position In Buffer") is an Intel HDA extension 449 # that puts a transfer boundary into the stream beyond which the 450 # other side will not read. The ROM wants to poll on a "buffer 451 # full" bit on the other side that only works with this enabled. 452 hda.SPBFCTL |= (1 << hda_ostream_id) 453 hda.SD_SPIB = len(fw_bytes) 454 455 # Start DSP. Host needs to provide power to all cores on 1.5 456 # (which also starts them) and 1.8 (merely gates power, DSP also 457 # has to set PWRCTL). On 2.5 where the DSP has full control, 458 # and only core 0 is set. 459 log.info(f"Starting DSP, ADSPCS = 0x{dsp.ADSPCS:x}") 460 dsp.ADSPCS = mask(SPA) 461 while (dsp.ADSPCS & mask(CPA)) == 0: pass 462 463 log.info(f"Unresetting DSP cores, ADSPCS = 0x{dsp.ADSPCS:x}") 464 dsp.ADSPCS &= ~mask(CRST) 465 while (dsp.ADSPCS & 1) != 0: pass 466 467 log.info(f"Running DSP cores, ADSPCS = 0x{dsp.ADSPCS:x}") 468 dsp.ADSPCS &= ~mask(CSTALL) 469 470 # Wait for the ROM to boot and signal it's ready. This not so short 471 # sleep seems to be needed; if we're banging on the memory window 472 # during initial boot (before/while the window control registers 473 # are configured?) the DSP hardware will hang fairly reliably. 474 log.info(f"Wait for ROM startup, ADSPCS = 0x{dsp.ADSPCS:x}") 475 time.sleep(1) 476 while (dsp.SRAM_FW_STATUS >> 24) != 5: pass 477 478 # Send the DSP an IPC message to tell the device how to boot. 479 # Note: with cAVS 1.8+ the ROM receives the stream argument as an 480 # index within the array of output streams (and we always use the 481 # first one by construction). But with 1.5 it's the HDA index, 482 # and depends on the number of input streams on the device. 483 stream_idx = hda_ostream_id if cavs15 else 0 484 ipcval = ( (1 << 31) # BUSY bit 485 | (0x01 << 24) # type = PURGE_FW 486 | (1 << 14) # purge_fw = 1 487 | (stream_idx << 9)) # dma_id 488 log.info(f"Sending IPC command, HIPIDR = 0x{ipcval:x}") 489 dsp.HIPCIDR = ipcval 490 491 log.info(f"Starting DMA, FW_STATUS = 0x{dsp.SRAM_FW_STATUS:x}") 492 sd.CTL |= 2 # START flag 493 494 wait_fw_entered() 495 496 # Turn DMA off and reset the stream. Clearing START first is a 497 # noop per the spec, but absolutely required for stability. 498 # Apparently the reset doesn't stop the stream, and the next load 499 # starts before it's ready and kills the load (and often the DSP). 500 # The sleep too is required, on at least one board (a fast 501 # chromebook) putting the two writes next each other also hangs 502 # the DSP! 503 sd.CTL &= ~2 # clear START 504 time.sleep(0.1) 505 sd.CTL |= 1 506 log.info(f"cAVS firmware load complete") 507 508def load_firmware_ace(fw_file): 509 try: 510 fw_bytes = open(fw_file, "rb").read() 511 # Resize fw_bytes for MTL 512 if len(fw_bytes) < 512 * 1024: 513 fw_bytes += b'\x00' * (512 * 1024 - len(fw_bytes)) 514 except Exception as e: 515 log.error(f"Could not read firmware file: `{fw_file}'") 516 log.error(e) 517 sys.exit(1) 518 519 (magic, sz) = struct.unpack("4sI", fw_bytes[0:8]) 520 if magic == b'$AE1': 521 log.info(f"Trimming {sz} bytes of extended manifest") 522 fw_bytes = fw_bytes[sz:len(fw_bytes)] 523 524 # This actually means "enable access to BAR4 registers"! 525 hda.PPCTL |= (1 << 30) # GPROCEN, "global processing enable" 526 527 log.info("Resetting HDA device") 528 hda.GCTL = 0 529 while hda.GCTL & 1: pass 530 hda.GCTL = 1 531 while not hda.GCTL & 1: pass 532 533 log.info("Turning of DSP subsystem") 534 dsp.HFDSSCS &= ~(1 << 16) # clear SPA bit 535 time.sleep(0.002) 536 # wait for CPA bit clear 537 while dsp.HFDSSCS & (1 << 24): 538 log.info("Waiting for DSP subsystem power off") 539 time.sleep(0.1) 540 541 log.info("Turning on DSP subsystem") 542 dsp.HFDSSCS |= (1 << 16) # set SPA bit 543 time.sleep(0.002) # needed as the CPA bit may be unstable 544 # wait for CPA bit 545 while not dsp.HFDSSCS & (1 << 24): 546 log.info("Waiting for DSP subsystem power on") 547 time.sleep(0.1) 548 549 log.info("Turning on Domain0") 550 dsp.HFPWRCTL |= 0x1 # set SPA bit 551 time.sleep(0.002) # needed as the CPA bit may be unstable 552 # wait for CPA bit 553 while not dsp.HFPWRSTS & 0x1: 554 log.info("Waiting for DSP domain0 power on") 555 time.sleep(0.1) 556 557 log.info("Turning off Primary Core") 558 dsp.DSP2CXCTL_PRIMARY &= ~(0x1) # clear SPA 559 time.sleep(0.002) # wait for CPA settlement 560 while dsp.DSP2CXCTL_PRIMARY & (1 << 8): 561 log.info("Waiting for DSP primary core power off") 562 time.sleep(0.1) 563 564 log.info(f"Configuring HDA stream {hda_ostream_id} to transfer firmware image") 565 (buf_list_addr, num_bufs) = setup_dma_mem(fw_bytes) 566 sd.CTL = 1 567 while (sd.CTL & 1) == 0: pass 568 sd.CTL = 0 569 while (sd.CTL & 1) == 1: pass 570 sd.CTL |= (1 << 20) # Set stream ID to anything non-zero 571 sd.BDPU = (buf_list_addr >> 32) & 0xffffffff 572 sd.BDPL = buf_list_addr & 0xffffffff 573 sd.CBL = len(fw_bytes) 574 sd.LVI = num_bufs - 1 575 hda.PPCTL |= (1 << hda_ostream_id) 576 577 # SPIB ("Software Position In Buffer") is an Intel HDA extension 578 # that puts a transfer boundary into the stream beyond which the 579 # other side will not read. The ROM wants to poll on a "buffer 580 # full" bit on the other side that only works with this enabled. 581 hda.SPBFCTL |= (1 << hda_ostream_id) 582 hda.SD_SPIB = len(fw_bytes) 583 584 585 # Send the DSP an IPC message to tell the device how to boot. 586 # Note: with cAVS 1.8+ the ROM receives the stream argument as an 587 # index within the array of output streams (and we always use the 588 # first one by construction). But with 1.5 it's the HDA index, 589 # and depends on the number of input streams on the device. 590 stream_idx = 0 591 ipcval = ( (1 << 31) # BUSY bit 592 | (0x01 << 24) # type = PURGE_FW 593 | (1 << 14) # purge_fw = 1 594 | (stream_idx << 9)) # dma_id 595 log.info(f"Sending IPC command, HFIPCXIDR = 0x{ipcval:x}") 596 dsp.HFIPCXIDR = ipcval 597 598 log.info("Turning on Primary Core") 599 dsp.DSP2CXCTL_PRIMARY |= 0x1 # clear SPA 600 time.sleep(0.002) # wait for CPA settlement 601 while not dsp.DSP2CXCTL_PRIMARY & (1 << 8): 602 log.info("Waiting for DSP primary core power on") 603 time.sleep(0.1) 604 605 log.info("Waiting for IPC acceptance") 606 while dsp.HFIPCXIDR & (1 << 31): 607 log.info("Waiting for IPC busy bit clear") 608 time.sleep(0.1) 609 610 log.info("ACK IPC") 611 dsp.HFIPCXIDA |= (1 << 31) 612 613 log.info(f"Starting DMA, FW_STATUS = 0x{dsp.ROM_STATUS:x}") 614 sd.CTL |= 2 # START flag 615 616 wait_fw_entered() 617 618 # Turn DMA off and reset the stream. Clearing START first is a 619 # noop per the spec, but absolutely required for stability. 620 # Apparently the reset doesn't stop the stream, and the next load 621 # starts before it's ready and kills the load (and often the DSP). 622 # The sleep too is required, on at least one board (a fast 623 # chromebook) putting the two writes next each other also hangs 624 # the DSP! 625 sd.CTL &= ~2 # clear START 626 time.sleep(0.1) 627 sd.CTL |= 1 628 log.info(f"ACE firmware load complete") 629 630def fw_is_alive(): 631 return dsp.ROM_STATUS & ((1 << 28) - 1) == 5 # "FW_ENTERED" 632 633def wait_fw_entered(timeout_s=2): 634 log.info("Waiting %s for firmware handoff, ROM_STATUS = 0x%x", 635 "forever" if timeout_s is None else f"{timeout_s} seconds", 636 dsp.ROM_STATUS) 637 hertz = 100 638 attempts = None if timeout_s is None else timeout_s * hertz 639 while True: 640 alive = fw_is_alive() 641 if alive: 642 break 643 if attempts is not None: 644 attempts -= 1 645 if attempts < 0: 646 break 647 time.sleep(1 / hertz) 648 649 if not alive: 650 log.warning("Load failed? ROM_STATUS = 0x%x", dsp.ROM_STATUS) 651 else: 652 log.info("FW alive, ROM_STATUS = 0x%x", dsp.ROM_STATUS) 653 654def winstream_offset(): 655 ( base, stride ) = adsp_mem_window_config() 656 return base + stride * 3 657 658# This SHOULD be just "mem[start:start+length]", but slicing an mmap 659# array seems to be unreliable on one of my machines (python 3.6.9 on 660# Ubuntu 18.04). Read out bytes individually. 661def win_read(base, start, length): 662 try: 663 return b''.join(bar4_mmap[base + x].to_bytes(1, 'little') 664 for x in range(start, start + length)) 665 except IndexError as ie: 666 # A FW in a bad state may cause winstream garbage 667 log.error("IndexError in bar4_mmap[%d + %d]", base, start) 668 log.error("bar4_mmap.size()=%d", bar4_mmap.size()) 669 raise ie 670 671def winstream_reg_hdr(base): 672 hdr = Regs(bar4_mem + base) 673 hdr.WLEN = 0x00 674 hdr.START = 0x04 675 hdr.END = 0x08 676 hdr.SEQ = 0x0c 677 hdr.freeze() 678 return hdr 679 680def win_hdr(hdr): 681 return ( hdr.WLEN, hdr.START, hdr.END, hdr.SEQ ) 682 683# Python implementation of the same algorithm in sys_winstream_read(), 684# see there for details. 685def winstream_read(base, last_seq): 686 while True: 687 hdr = winstream_reg_hdr(base) 688 (wlen, start, end, seq) = win_hdr(hdr) 689 if wlen > SHELL_MAX_VALID_SLOT_SIZE: 690 log.debug("DSP powered off at winstream_read") 691 return (seq, "") 692 if wlen == 0: 693 return (seq, "") 694 if last_seq == 0: 695 last_seq = seq if args.no_history else (seq - ((end - start) % wlen)) 696 if seq == last_seq or start == end: 697 return (seq, "") 698 behind = seq - last_seq 699 if behind > ((end - start) % wlen): 700 return (seq, "") 701 copy = (end - behind) % wlen 702 suffix = min(behind, wlen - copy) 703 result = win_read(base, 16 + copy, suffix) 704 if suffix < behind: 705 result += win_read(base, 16, behind - suffix) 706 (wlen, start1, end, seq1) = win_hdr(hdr) 707 if start1 == start and seq1 == seq: 708 # Best effort attempt at decoding, replacing unusable characters 709 # Found to be useful when it really goes wrong 710 return (seq, result.decode("utf-8", "replace")) 711 712def idx_mod(wlen, idx): 713 if idx >= wlen: 714 return idx - wlen 715 return idx 716 717def idx_sub(wlen, a, b): 718 return idx_mod(wlen, a + (wlen - b)) 719 720# Python implementation of the same algorithm in sys_winstream_write(), 721# see there for details. 722def winstream_write(base, msg): 723 hdr = winstream_reg_hdr(base) 724 (wlen, start, end, seq) = win_hdr(hdr) 725 if wlen > SHELL_MAX_VALID_SLOT_SIZE: 726 log.debug("DSP powered off at winstream_write") 727 return 728 if wlen == 0: 729 return 730 lenmsg = len(msg) 731 lenmsg0 = lenmsg 732 if len(msg) > wlen + 1: 733 start = end 734 lenmsg = wlen - 1 735 lenmsg = min(lenmsg, wlen) 736 if seq != 0: 737 avail = (wlen - 1) - idx_sub(wlen, end, start) 738 if lenmsg > avail: 739 hdr.START = idx_mod(wlen, start + (lenmsg - avail)) 740 if lenmsg < lenmsg0: 741 hdr.START = end 742 drop = lenmsg0 - lenmsg 743 msg = msg[drop : lenmsg - drop] 744 suffix = min(lenmsg, wlen - end) 745 for c in range(0, suffix): 746 bar4_mmap[base + 16 + end + c] = msg[c] 747 if lenmsg > suffix: 748 for c in range(0, lenmsg - suffix): 749 bar4_mmap[base + 16 + c] = msg[suffix + c] 750 hdr.END = idx_mod(wlen, end + lenmsg) 751 hdr.SEQ += lenmsg0 752 753def debug_offset(): 754 ( base, stride ) = adsp_mem_window_config() 755 return base + stride * 2 756 757def shell_base_offset(): 758 return debug_offset() + DEBUG_SLOT_SIZE * (1 + DEBUG_SLOT_SHELL) 759 760def read_from_shell_memwindow_winstream(last_seq): 761 offset = shell_base_offset() + SHELL_RX_SIZE 762 (last_seq, output) = winstream_read(offset, last_seq) 763 if output: 764 os.write(shell_client_port, output.encode("utf-8")) 765 return last_seq 766 767def write_to_shell_memwindow_winstream(): 768 msg = os.read(shell_client_port, 1) 769 if len(msg) > 0: 770 winstream_write(shell_base_offset(), msg) 771 772def create_shell_pty(): 773 global shell_client_port 774 (shell_client_port, user_port) = pty.openpty() 775 name = os.ttyname(user_port) 776 log.info(f"shell PTY at: {name}") 777 asyncio.get_event_loop().add_reader(shell_client_port, write_to_shell_memwindow_winstream) 778 779async def ipc_delay_done(): 780 await asyncio.sleep(0.1) 781 if adsp_is_ace(): 782 dsp.HFIPCXTDA = ~(1<<31) & dsp.HFIPCXTDA # Signal done 783 else: 784 dsp.HIPCTDA = 1<<31 785 786def inbox_offset(): 787 ( base, stride ) = adsp_mem_window_config() 788 return base + stride 789 790def outbox_offset(): 791 ( base, _ ) = adsp_mem_window_config() 792 return base + 4096 793 794ipc_timestamp = 0 795 796# Super-simple command language, driven by the test code on the DSP 797def ipc_command(data, ext_data): 798 send_msg = False 799 done = True 800 log.debug ("ipc data %d, ext_data %x", data, ext_data) 801 if data == 0: # noop, with synchronous DONE 802 pass 803 elif data == 1: # async command: signal DONE after a delay (on 1.8+) 804 if not cavs15: 805 done = False 806 asyncio.ensure_future(ipc_delay_done()) 807 elif data == 2: # echo back ext_data as a message command 808 send_msg = True 809 elif data == 3: # set ADSPCS 810 dsp.ADSPCS = ext_data 811 elif data == 4: # echo back microseconds since last timestamp command 812 global ipc_timestamp 813 t = round(time.time() * 1e6) 814 ext_data = t - ipc_timestamp 815 ipc_timestamp = t 816 send_msg = True 817 elif data == 5: # copy word at outbox[ext_data >> 16] to inbox[ext_data & 0xffff] 818 src = outbox_offset() + 4 * (ext_data >> 16) 819 dst = inbox_offset() + 4 * (ext_data & 0xffff) 820 for i in range(4): 821 bar4_mmap[dst + i] = bar4_mmap[src + i] 822 elif data == 6: # HDA RESET (init if not exists) 823 stream_id = ext_data & 0xff 824 if stream_id in hda_streams: 825 hda_streams[stream_id].reset() 826 else: 827 hda_str = HDAStream(stream_id) 828 hda_streams[stream_id] = hda_str 829 elif data == 7: # HDA CONFIG 830 stream_id = ext_data & 0xFF 831 buf_len = ext_data >> 8 & 0xFFFF 832 hda_str = hda_streams[stream_id] 833 hda_str.config(buf_len) 834 elif data == 8: # HDA START 835 stream_id = ext_data & 0xFF 836 hda_streams[stream_id].start() 837 hda_streams[stream_id].mem.seek(0) 838 839 elif data == 9: # HDA STOP 840 stream_id = ext_data & 0xFF 841 hda_streams[stream_id].stop() 842 elif data == 10: # HDA VALIDATE 843 stream_id = ext_data & 0xFF 844 hda_str = hda_streams[stream_id] 845 hda_str.debug() 846 is_ramp_data = True 847 hda_str.mem.seek(0) 848 for (i, val) in enumerate(hda_str.mem.read(256)): 849 if i != val: 850 is_ramp_data = False 851 # log.info("stream[%d][%d]: %d", stream_id, i, val) # debug helper 852 log.info("Is ramp data? " + str(is_ramp_data)) 853 ext_data = int(is_ramp_data) 854 log.info(f"Ext data to send back on ramp status {ext_data}") 855 send_msg = True 856 elif data == 11: # HDA HOST OUT SEND 857 stream_id = ext_data & 0xff 858 buf = bytearray(256) 859 for i in range(0, 256): 860 buf[i] = i 861 hda_streams[stream_id].write(buf) 862 elif data == 12: # HDA PRINT 863 stream_id = ext_data & 0xFF 864 buf_len = ext_data >> 8 & 0xFFFF 865 hda_str = hda_streams[stream_id] 866 # check for wrap here 867 pos = hda_str.mem.tell() 868 read_lens = [buf_len, 0] 869 if pos + buf_len >= hda_str.buf_len*2: 870 read_lens[0] = hda_str.buf_len*2 - pos 871 read_lens[1] = buf_len - read_lens[0] 872 # validate the read lens 873 assert (read_lens[0] + pos) <= (hda_str.buf_len*2) 874 assert read_lens[0] % 128 == 0 875 assert read_lens[1] % 128 == 0 876 buf_data0 = hda_str.mem.read(read_lens[0]) 877 hda_msg0 = buf_data0.decode("utf-8", "replace") 878 sys.stdout.write(hda_msg0) 879 if read_lens[1] != 0: 880 hda_str.mem.seek(0) 881 buf_data1 = hda_str.mem.read(read_lens[1]) 882 hda_msg1 = buf_data1.decode("utf-8", "replace") 883 sys.stdout.write(hda_msg1) 884 pos = hda_str.mem.tell() 885 sys.stdout.flush() 886 else: 887 log.warning(f"cavstool: Unrecognized IPC command 0x{data:x} ext 0x{ext_data:x}") 888 if not fw_is_alive(): 889 if args.log_only: 890 log.info("DSP power seems off") 891 wait_fw_entered(timeout_s=None) 892 else: 893 log.warning("DSP power seems off?!") 894 time.sleep(2) # potential spam reduction 895 896 return 897 898 if adsp_is_ace(): 899 dsp.HFIPCXTDR = 1<<31 # Ack local interrupt, also signals DONE on v1.5 900 if done: 901 dsp.HFIPCXTDA = ~(1<<31) & dsp.HFIPCXTDA # Signal done 902 if send_msg: 903 log.debug("ipc: sending msg 0x%08x" % ext_data) 904 dsp.HFIPCXIDDY = ext_data 905 dsp.HFIPCXIDR = (1<<31) | ext_data 906 else: 907 dsp.HIPCTDR = 1<<31 # Ack local interrupt, also signals DONE on v1.5 908 if cavs18: 909 time.sleep(0.01) # Needed on 1.8, or the command below won't send! 910 if done and not cavs15: 911 dsp.HIPCTDA = 1<<31 # Signal done 912 if send_msg: 913 dsp.HIPCIDD = ext_data 914 dsp.HIPCIDR = (1<<31) | ext_data 915 916def handle_ipc(): 917 if adsp_is_ace(): 918 if dsp.HFIPCXIDA & 0x80000000: 919 log.debug("ipc: Ack DSP reply with IDA_DONE") 920 dsp.HFIPCXIDA = 1<<31 # must ACK any DONE interrupts that arrive! 921 if dsp.HFIPCXTDR & 0x80000000: 922 ipc_command(dsp.HFIPCXTDR & ~0x80000000, dsp.HFIPCXTDDY) 923 return 924 925 if dsp.HIPCIDA & 0x80000000: 926 dsp.HIPCIDA = 1<<31 # must ACK any DONE interrupts that arrive! 927 if dsp.HIPCTDR & 0x80000000: 928 ipc_command(dsp.HIPCTDR & ~0x80000000, dsp.HIPCTDD) 929 930async def main(): 931 #TODO this bit me, remove the globals, write a little FirmwareLoader class or something to contain. 932 global hda, sd, dsp, hda_ostream_id, hda_streams 933 934 try: 935 (hda, sd, dsp, hda_ostream_id) = map_regs() 936 except Exception as e: 937 log.error("Could not map device in sysfs; run as root?") 938 log.error(e) 939 sys.exit(1) 940 941 log.info(f"Detected cAVS {'1.5' if cavs15 else '1.8+'} hardware") 942 943 if args.log_only: 944 wait_fw_entered(timeout_s=None) 945 else: 946 if not args.fw_file: 947 log.error("Firmware file argument missing") 948 sys.exit(1) 949 950 if adsp_is_ace(): 951 load_firmware_ace(args.fw_file) 952 else: 953 load_firmware(args.fw_file) 954 time.sleep(0.1) 955 956 if not args.quiet: 957 sys.stdout.write("--\n") 958 959 if args.shell_pty: 960 create_shell_pty() 961 962 hda_streams = dict() 963 964 last_seq = 0 965 last_seq_shell = 0 966 while start_output is True: 967 await asyncio.sleep(0.03) 968 if args.shell_pty: 969 last_seq_shell = read_from_shell_memwindow_winstream(last_seq_shell) 970 (last_seq, output) = winstream_read(winstream_offset(), last_seq) 971 if output: 972 sys.stdout.write(output) 973 sys.stdout.flush() 974 if not args.log_only: 975 handle_ipc() 976 977 978ap = argparse.ArgumentParser(description="DSP loader/logger tool", allow_abbrev=False) 979ap.add_argument("-q", "--quiet", action="store_true", 980 help="No loader output, just DSP logging") 981ap.add_argument("-v", "--verbose", action="store_true", 982 help="More loader output, DEBUG logging level") 983ap.add_argument("-l", "--log-only", action="store_true", 984 help="Don't load firmware, just show log output") 985ap.add_argument("-p", "--shell-pty", action="store_true", 986 help="Create a Zephyr shell pty if enabled in firmware") 987ap.add_argument("-n", "--no-history", action="store_true", 988 help="No current log buffer at start, just new output") 989ap.add_argument("fw_file", nargs="?", help="Firmware file") 990 991args = ap.parse_args() 992 993if args.quiet: 994 log.setLevel(logging.WARN) 995elif args.verbose: 996 log.setLevel(logging.DEBUG) 997 998if __name__ == "__main__": 999 try: 1000 asyncio.run(main()) 1001 except KeyboardInterrupt: 1002 start_output = False 1003