1#!/usr/bin/env python3
2# Copyright(c) 2022 Intel Corporation. All rights reserved.
3# SPDX-License-Identifier: Apache-2.0
4import os
5import sys
6import struct
7import logging
8import asyncio
9import time
10import subprocess
11import ctypes
12import mmap
13import argparse
14import pty
15
16start_output = True
17
18logging.basicConfig(level=logging.INFO)
19log = logging.getLogger("cavs-fw")
20
21PAGESZ = 4096
22HUGEPAGESZ = 2 * 1024 * 1024
23HUGEPAGE_FILE = "/dev/hugepages/cavs-fw-dma.tmp."
24
25# SRAM windows. Base and stride varies depending on ADSP version
26#
27# Window 0 is the FW_STATUS area, and 4k after that the IPC "outbox"
28# Window 1 is the IPC "inbox" (host-writable memory, just 384 bytes currently)
29# Window 2 is used for debug slots (Zephyr shell is one user)
30# Window 3 is winstream-formatted log output
31
32WINDOW_BASE = 0x80000
33WINDOW_STRIDE = 0x20000
34
35WINDOW_BASE_ACE = 0x180000
36WINDOW_STRIDE_ACE = 0x8000
37
38DEBUG_SLOT_SIZE = 4096
39DEBUG_SLOT_SHELL = 0
40SHELL_RX_SIZE = 256
41SHELL_MAX_VALID_SLOT_SIZE = 16777216
42
43# pylint: disable=duplicate-code
44
45# ADSPCS bits
46CRST   = 0
47CSTALL = 8
48SPA    = 16
49CPA    = 24
50
51class HDAStream:
52    # creates an hda stream with at 2 buffers of buf_len
53    def __init__(self, stream_id: int):
54        self.stream_id = stream_id
55        self.base = hdamem + 0x0080 + (stream_id * 0x20)
56        log.info(f"Mapping registers for hda stream {self.stream_id} at base {self.base:x}")
57
58        self.hda = Regs(hdamem)
59        self.hda.GCAP    = 0x0000
60        self.hda.GCTL    = 0x0008
61        self.hda.DPLBASE = 0x0070
62        self.hda.DPUBASE = 0x0074
63        self.hda.SPBFCH  = 0x0700
64        self.hda.SPBFCTL = 0x0704
65        self.hda.PPCH    = 0x0800
66        self.hda.PPCTL   = 0x0804
67        self.hda.PPSTS   = 0x0808
68        self.hda.SPIB = 0x0708 + stream_id*0x08
69        self.hda.freeze()
70
71        self.regs = Regs(self.base)
72        self.regs.CTL  = 0x00
73        self.regs.STS  = 0x03
74        self.regs.LPIB = 0x04
75        self.regs.CBL  = 0x08
76        self.regs.LVI  = 0x0c
77        self.regs.FIFOW = 0x0e
78        self.regs.FIFOS = 0x10
79        self.regs.FMT = 0x12
80        self.regs.FIFOL= 0x14
81        self.regs.BDPL = 0x18
82        self.regs.BDPU = 0x1c
83        self.regs.freeze()
84
85        self.dbg0 = Regs(hdamem + 0x0084 + (0x20*stream_id))
86        self.dbg0.DPIB = 0x00
87        self.dbg0.EFIFOS = 0x10
88        self.dbg0.freeze()
89
90        self.reset()
91
92    def __del__(self):
93        self.reset()
94
95    def config(self, buf_len: int):
96        log.info(f"Configuring stream {self.stream_id}")
97        self.buf_len = buf_len
98        log.info("Allocating huge page and setting up buffers")
99        self.mem, self.hugef, self.buf_list_addr, self.pos_buf_addr, self.n_bufs = self.setup_buf(buf_len)
100
101        log.info("Setting buffer list, length, and stream id and traffic priority bit")
102        self.regs.CTL = ((self.stream_id & 0xFF) << 20) | (1 << 18) # must be set to something other than 0?
103        self.regs.BDPU = (self.buf_list_addr >> 32) & 0xffffffff
104        self.regs.BDPL = self.buf_list_addr & 0xffffffff
105        self.regs.CBL = buf_len
106        self.regs.LVI = self.n_bufs - 1
107        self.mem.seek(0)
108        self.debug()
109        log.info(f"Configured stream {self.stream_id}")
110
111    def write(self, data):
112
113        bufl = min(len(data), self.buf_len)
114        log.info(f"Writing data to stream {self.stream_id}, len {bufl}, SPBFCTL {self.hda.SPBFCTL:x}, SPIB {self.hda.SPIB}")
115        self.mem[0:bufl] = data[0:bufl]
116        self.mem[bufl:bufl+bufl] = data[0:bufl]
117        self.hda.SPBFCTL |= (1 << self.stream_id)
118        self.hda.SPIB += bufl
119        log.info(f"Wrote data to stream {self.stream_id}, SPBFCTL {self.hda.SPBFCTL:x}, SPIB {self.hda.SPIB}")
120
121    def start(self):
122        log.info(f"Starting stream {self.stream_id}, CTL {self.regs.CTL:x}")
123        self.regs.CTL |= 2
124        log.info(f"Started stream {self.stream_id}, CTL {self.regs.CTL:x}")
125
126    def stop(self):
127        log.info(f"Stopping stream {self.stream_id}, CTL {self.regs.CTL:x}")
128        self.regs.CTL &= 2
129        time.sleep(0.1)
130        self.regs.CTL |= 1
131        log.info(f"Stopped stream {self.stream_id}, CTL {self.regs.CTL:x}")
132
133    def setup_buf(self, buf_len: int):
134        (mem, phys_addr, hugef) = map_phys_mem(self.stream_id)
135
136        log.info(f"Mapped 2M huge page at 0x{phys_addr:x} for buf size ({buf_len})")
137
138        # create two buffers in the page of buf_len and mark them
139        # in a buffer descriptor list for the hardware to use
140        buf0_len = buf_len
141        buf1_len = buf_len
142        bdl_off = buf0_len + buf1_len
143        # bdl is 2 (64bits, 16 bytes) per entry, we have two
144        mem[bdl_off:bdl_off + 32] = struct.pack("<QQQQ",
145                                                phys_addr,
146                                                buf0_len,
147                                                phys_addr + buf0_len,
148                                                buf1_len)
149        dpib_off = bdl_off+32
150
151        # ensure buffer is initialized, sanity
152        for i in range(0, buf_len*2):
153            mem[i] = 0
154
155        log.info("Filled the buffer descriptor list (BDL) for DMA.")
156        return (mem, hugef, phys_addr + bdl_off, phys_addr+dpib_off, 2)
157
158    def debug(self):
159        log.debug("HDA %d: PPROC %d, CTL 0x%x, LPIB 0x%x, BDPU 0x%x, BDPL 0x%x, CBL 0x%x, LVI 0x%x",
160                 self.stream_id, (hda.PPCTL >> self.stream_id) & 1, self.regs.CTL, self.regs.LPIB, self.regs.BDPU,
161                 self.regs.BDPL, self.regs.CBL, self.regs.LVI)
162        log.debug("    FIFOW %d, FIFOS %d, FMT %x, FIFOL %d, DPIB %d, EFIFOS %d",
163                 self.regs.FIFOW & 0x7, self.regs.FIFOS, self.regs.FMT, self.regs.FIFOL, self.dbg0.DPIB, self.dbg0.EFIFOS)
164        log.debug("    status: FIFORDY %d, DESE %d, FIFOE %d, BCIS %d",
165                 (self.regs.STS >> 5) & 1, (self.regs.STS >> 4) & 1, (self.regs.STS >> 3) & 1, (self.regs.STS >> 2) & 1)
166
167    def reset(self):
168        # Turn DMA off and reset the stream.  Clearing START first is a
169        # noop per the spec, but absolutely required for stability.
170        # Apparently the reset doesn't stop the stream, and the next load
171        # starts before it's ready and kills the load (and often the DSP).
172        # The sleep too is required, on at least one board (a fast
173        # chromebook) putting the two writes next each other also hangs
174        # the DSP!
175        log.info(f"Resetting stream {self.stream_id}")
176        self.debug()
177        self.regs.CTL &= ~2 # clear START
178        time.sleep(0.1)
179        # set enter reset bit
180        self.regs.CTL = 1
181        while (self.regs.CTL & 1) == 0: pass
182        # clear enter reset bit to exit reset
183        self.regs.CTL = 0
184        while (self.regs.CTL & 1) == 1: pass
185
186        log.info(f"Disable SPIB and set position 0 of stream {self.stream_id}")
187        self.hda.SPBFCTL = 0
188        self.hda.SPIB = 0
189
190        #log.info("Setting dma position buffer and enable it")
191        #self.hda.DPUBASE = self.pos_buf_addr >> 32 & 0xffffffff
192        #self.hda.DPLBASE = self.pos_buf_addr & 0xfffffff0 | 1
193
194        log.info(f"Enabling dsp capture (PROCEN) of stream {self.stream_id}")
195        self.hda.PPCTL |= (1 << self.stream_id)
196
197        self.debug()
198        log.info(f"Reset stream {self.stream_id}")
199
200def adsp_is_ace():
201    return ace15 or ace20 or ace30
202
203def adsp_mem_window_config():
204    if adsp_is_ace():
205        base = WINDOW_BASE_ACE
206        stride = WINDOW_STRIDE_ACE
207    else:
208        base = WINDOW_BASE
209        stride = WINDOW_STRIDE
210
211    return (base, stride)
212
213def map_regs(log_only):
214    try:
215        p = runx(f"grep -iEl 'PCI_CLASS=40(10|38)0' /sys/bus/pci/devices/*/uevent")
216    except subprocess.CalledProcessError:
217        # if no device found, also try 40300 class no-DSP devices
218        p = runx(f"grep -iEl 'PCI_CLASS=40300' /sys/bus/pci/devices/*/uevent")
219    pcidir = os.path.dirname(p)
220
221    # Platform/quirk detection.  ID lists cribbed from the SOF kernel driver
222    global cavs25, ace15, ace20, ace30
223    did = int(open(f"{pcidir}/device").read().rstrip(), 16)
224    cavs25 = did in [ 0x43c8, 0x4b55, 0x4b58, 0x51c8, 0x51ca, 0x51cb, 0x51ce, 0x51cf, 0x54c8,
225                      0x7ad0, 0xa0c8 ]
226    ace15 = did in [ 0x7728, 0x7f50, 0x7e28 ]
227    ace20 = did in [ 0xa828 ]
228    ace30 = did in [ 0xe428 ]
229
230    # Check sysfs for a loaded driver and remove it
231    if os.path.exists(f"{pcidir}/driver"):
232        mod = os.path.basename(os.readlink(f"{pcidir}/driver/module"))
233        found_msg = f"Existing driver \"{mod}\" found"
234        if log_only:
235            log.info(found_msg)
236        else:
237            log.warning(found_msg + ", unloading module")
238            runx(f"rmmod -f {mod}")
239            # Disengage runtime power management so the kernel doesn't put it to sleep
240            log.info(f"Forcing {pcidir}/power/control to always 'on'")
241            with open(f"{pcidir}/power/control", "w") as ctrl:
242                ctrl.write("on")
243
244    # Make sure PCI memory space access and busmastering are enabled.
245    # Also disable interrupts so as not to confuse the kernel.
246    with open(f"{pcidir}/config", "wb+") as cfg:
247        cfg.seek(4)
248        cfg.write(b'\x06\x04')
249
250    # Standard HD Audio Registers
251    global hdamem
252    (hdamem, _) = bar_map(pcidir, 0)
253    hda = Regs(hdamem)
254    hda.GCAP    = 0x0000
255    hda.GCTL    = 0x0008
256    hda.SPBFCTL = 0x0704
257    hda.PPCTL   = 0x0804
258
259    # Find the ID of the first output stream
260    hda_ostream_id = (hda.GCAP >> 8) & 0x0f # number of input streams
261    log.info(f"Selected output stream {hda_ostream_id} (GCAP = 0x{hda.GCAP:x})")
262    hda.SD_SPIB = 0x0708 + (8 * hda_ostream_id)
263    hda.freeze()
264
265
266    # Standard HD Audio Stream Descriptor
267    sd = Regs(hdamem + 0x0080 + (hda_ostream_id * 0x20))
268    sd.CTL  = 0x00
269    sd.CBL  = 0x08
270    sd.LVI  = 0x0c
271    sd.BDPL = 0x18
272    sd.BDPU = 0x1c
273    sd.freeze()
274
275    # Intel Audio DSP Registers
276    global bar4_mmap
277    global bar4_mem
278    (bar4_mem, bar4_mmap) = bar_map(pcidir, 4)
279    dsp = Regs(bar4_mem)
280    if adsp_is_ace():
281        dsp.HFDSSCS        = 0x1000
282        dsp.HFPWRCTL       = 0x1d18 if ace15 or ace20 else 0x1d20
283        dsp.HFPWRSTS       = 0x1d1c if ace15 or ace20 else 0x1d24
284        dsp.DSP2CXCTL_PRIMARY = 0x178d04
285        dsp.HFIPCXTDR      = 0x73200
286        dsp.HFIPCXTDA      = 0x73204
287        dsp.HFIPCXIDR      = 0x73210
288        dsp.HFIPCXIDA      = 0x73214
289        dsp.HFIPCXCTL      = 0x73228
290        dsp.HFIPCXTDDY     = 0x73300
291        dsp.HFIPCXIDDY     = 0x73380
292        dsp.ROM_STATUS     = 0x163200 if ace15 else 0x160200
293        dsp.SRAM_FW_STATUS = WINDOW_BASE_ACE
294    else:
295        dsp.ADSPCS         = 0x00004
296        dsp.HIPCTDR        = 0x000c0
297        dsp.HIPCTDA        = 0x000c4
298        dsp.HIPCTDD        = 0x000c8
299        dsp.HIPCIDR        = 0x000d0
300        dsp.HIPCIDA        = 0x000d4
301        dsp.HIPCIDD        = 0x000d8
302        dsp.ROM_STATUS     = WINDOW_BASE # Start of first SRAM window
303        dsp.SRAM_FW_STATUS = WINDOW_BASE
304    dsp.freeze()
305
306    return (hda, sd, dsp, hda_ostream_id)
307
308def setup_dma_mem(fw_bytes):
309    (mem, phys_addr, _) = map_phys_mem(hda_ostream_id)
310    mem[0:len(fw_bytes)] = fw_bytes
311
312    log.info("Mapped 2M huge page at 0x%x to contain %d bytes of firmware"
313          % (phys_addr, len(fw_bytes)))
314
315    # HDA requires at least two buffers be defined, but we don't care about
316    # boundaries because it's all a contiguous region. Place a vestigial
317    # 128-byte (minimum size and alignment) buffer after the main one, and put
318    # the 4-entry BDL list into the final 128 bytes of the page.
319    buf0_len = HUGEPAGESZ - 2 * 128
320    buf1_len = 128
321    bdl_off = buf0_len + buf1_len
322    mem[bdl_off:bdl_off + 32] = struct.pack("<QQQQ",
323                                            phys_addr, buf0_len,
324                                            phys_addr + buf0_len, buf1_len)
325    log.info("Filled the buffer descriptor list (BDL) for DMA.")
326    return (phys_addr + bdl_off, 2)
327
328global_mmaps = [] # protect mmap mappings from garbage collection!
329
330# Maps 2M of contiguous memory using a single page from hugetlbfs,
331# then locates its physical address for use as a DMA buffer.
332def map_phys_mem(stream_id):
333    # Make sure hugetlbfs is mounted (not there on chromeos)
334    os.system("mount | grep -q hugetlbfs ||"
335              + " (mkdir -p /dev/hugepages; "
336              + "  mount -t hugetlbfs hugetlbfs /dev/hugepages)")
337
338    # Ensure the kernel has enough budget for one new page
339    free = int(runx("awk '/HugePages_Free/ {print $2}' /proc/meminfo"))
340    if free == 0:
341        tot = 1 + int(runx("awk '/HugePages_Total/ {print $2}' /proc/meminfo"))
342        os.system(f"echo {tot} > /proc/sys/vm/nr_hugepages")
343
344    hugef_name = HUGEPAGE_FILE + str(stream_id)
345    hugef = open(hugef_name, "w+")
346    hugef.truncate(HUGEPAGESZ)
347    mem = mmap.mmap(hugef.fileno(), HUGEPAGESZ)
348    log.info("type of mem is %s", str(type(mem)))
349    global_mmaps.append(mem)
350    os.unlink(hugef_name)
351
352    # Find the local process address of the mapping, then use that to extract
353    # the physical address from the kernel's pagemap interface.  The physical
354    # page frame number occupies the bottom bits of the entry.
355    mem[0] = 0 # Fault the page in so it has an address!
356    vaddr = ctypes.addressof(ctypes.c_int.from_buffer(mem))
357    vpagenum = vaddr >> 12
358    pagemap = open("/proc/self/pagemap", "rb")
359    pagemap.seek(vpagenum * 8)
360    pent = pagemap.read(8)
361    paddr = (struct.unpack("Q", pent)[0] & ((1 << 55) - 1)) * PAGESZ
362    pagemap.close()
363    return (mem, paddr, hugef)
364
365# Maps a PCI BAR and returns the in-process address
366def bar_map(pcidir, barnum):
367    f = open(pcidir + "/resource" + str(barnum), "r+")
368    mm = mmap.mmap(f.fileno(), os.fstat(f.fileno()).st_size)
369    global_mmaps.append(mm)
370    log.info("Mapped PCI bar %d of length %d bytes."
371             % (barnum, os.fstat(f.fileno()).st_size))
372    return (ctypes.addressof(ctypes.c_int.from_buffer(mm)), mm)
373
374# Syntactic sugar to make register block definition & use look nice.
375# Instantiate from a base address, assign offsets to (uint32) named registers as
376# fields, call freeze(), then the field acts as a direct alias for the register!
377class Regs:
378    def __init__(self, base_addr):
379        vars(self)["base_addr"] = base_addr
380        vars(self)["ptrs"] = {}
381        vars(self)["frozen"] = False
382    def freeze(self):
383        vars(self)["frozen"] = True
384    def __setattr__(self, name, val):
385        if not self.frozen and name not in self.ptrs:
386            addr = self.base_addr + val
387            self.ptrs[name] = ctypes.c_uint32.from_address(addr)
388        else:
389            self.ptrs[name].value = val
390    def __getattr__(self, name):
391        return self.ptrs[name].value
392
393def runx(cmd):
394    return subprocess.check_output(cmd, shell=True).decode().rstrip()
395
396def mask(bit):
397    if cavs25:
398        return 0b1 << bit
399
400def load_firmware(fw_file):
401    try:
402        fw_bytes = open(fw_file, "rb").read()
403    except Exception as e:
404        log.error(f"Could not read firmware file: `{fw_file}'")
405        log.error(e)
406        sys.exit(1)
407
408    (magic, sz) = struct.unpack("4sI", fw_bytes[0:8])
409    if magic == b'XMan':
410        log.info(f"Trimming {sz} bytes of extended manifest")
411        fw_bytes = fw_bytes[sz:len(fw_bytes)]
412
413    # This actually means "enable access to BAR4 registers"!
414    hda.PPCTL |= (1 << 30) # GPROCEN, "global processing enable"
415
416    log.info("Resetting HDA device")
417    hda.GCTL = 0
418    while hda.GCTL & 1: pass
419    hda.GCTL = 1
420    while not hda.GCTL & 1: pass
421
422    log.info(f"Stalling and Resetting DSP cores, ADSPCS = 0x{dsp.ADSPCS:x}")
423    dsp.ADSPCS |= mask(CSTALL)
424    dsp.ADSPCS |= mask(CRST)
425    while (dsp.ADSPCS & mask(CRST)) == 0: pass
426
427    log.info(f"Powering down DSP cores, ADSPCS = 0x{dsp.ADSPCS:x}")
428    dsp.ADSPCS &= ~mask(SPA)
429    while dsp.ADSPCS & mask(CPA): pass
430
431    log.info(f"Configuring HDA stream {hda_ostream_id} to transfer firmware image")
432    (buf_list_addr, num_bufs) = setup_dma_mem(fw_bytes)
433    sd.CTL = 1
434    while (sd.CTL & 1) == 0: pass
435    sd.CTL = 0
436    while (sd.CTL & 1) == 1: pass
437    sd.CTL = 1 << 20 # Set stream ID to anything non-zero
438    sd.BDPU = (buf_list_addr >> 32) & 0xffffffff
439    sd.BDPL = buf_list_addr & 0xffffffff
440    sd.CBL = len(fw_bytes)
441    sd.LVI = num_bufs - 1
442    hda.PPCTL |= (1 << hda_ostream_id)
443
444    # SPIB ("Software Position In Buffer") is an Intel HDA extension
445    # that puts a transfer boundary into the stream beyond which the
446    # other side will not read.  The ROM wants to poll on a "buffer
447    # full" bit on the other side that only works with this enabled.
448    hda.SPBFCTL |= (1 << hda_ostream_id)
449    hda.SD_SPIB = len(fw_bytes)
450
451    # Start DSP. Only start up core 0, reset is managed by DSP.
452    log.info(f"Starting DSP, ADSPCS = 0x{dsp.ADSPCS:x}")
453    dsp.ADSPCS = mask(SPA)
454    while (dsp.ADSPCS & mask(CPA)) == 0: pass
455
456    log.info(f"Unresetting DSP cores, ADSPCS = 0x{dsp.ADSPCS:x}")
457    dsp.ADSPCS &= ~mask(CRST)
458    while (dsp.ADSPCS & 1) != 0: pass
459
460    log.info(f"Running DSP cores, ADSPCS = 0x{dsp.ADSPCS:x}")
461    dsp.ADSPCS &= ~mask(CSTALL)
462
463    # Wait for the ROM to boot and signal it's ready.  This not so short
464    # sleep seems to be needed; if we're banging on the memory window
465    # during initial boot (before/while the window control registers
466    # are configured?) the DSP hardware will hang fairly reliably.
467    log.info(f"Wait for ROM startup, ADSPCS = 0x{dsp.ADSPCS:x}")
468    time.sleep(1)
469    while (dsp.SRAM_FW_STATUS >> 24) != 5: pass
470
471    # Send the DSP an IPC message to tell the device how to boot.
472    # Note: with cAVS 1.8+ the ROM receives the stream argument as an
473    # index within the array of output streams (and we always use the
474    # first one by construction).
475    stream_idx = 0
476    ipcval = (  (1 << 31)            # BUSY bit
477                | (0x01 << 24)       # type = PURGE_FW
478                | (1 << 14)          # purge_fw = 1
479                | (stream_idx << 9)) # dma_id
480    log.info(f"Sending IPC command, HIPIDR = 0x{ipcval:x}")
481    dsp.HIPCIDR = ipcval
482
483    log.info(f"Starting DMA, FW_STATUS = 0x{dsp.SRAM_FW_STATUS:x}")
484    sd.CTL |= 2 # START flag
485
486    wait_fw_entered(dsp, timeout_s=None)
487
488    # Turn DMA off and reset the stream.  Clearing START first is a
489    # noop per the spec, but absolutely required for stability.
490    # Apparently the reset doesn't stop the stream, and the next load
491    # starts before it's ready and kills the load (and often the DSP).
492    # The sleep too is required, on at least one board (a fast
493    # chromebook) putting the two writes next each other also hangs
494    # the DSP!
495    sd.CTL &= ~2 # clear START
496    time.sleep(0.1)
497    sd.CTL |= 1
498    log.info(f"cAVS firmware load complete")
499
500def load_firmware_ace(fw_file):
501    try:
502        fw_bytes = open(fw_file, "rb").read()
503        # Resize fw_bytes for MTL
504        if len(fw_bytes) < 512 * 1024:
505            fw_bytes += b'\x00' * (512 * 1024 - len(fw_bytes))
506    except Exception as e:
507        log.error(f"Could not read firmware file: `{fw_file}'")
508        log.error(e)
509        sys.exit(1)
510
511    (magic, sz) = struct.unpack("4sI", fw_bytes[0:8])
512    if magic == b'$AE1':
513        log.info(f"Trimming {sz} bytes of extended manifest")
514        fw_bytes = fw_bytes[sz:len(fw_bytes)]
515
516    # This actually means "enable access to BAR4 registers"!
517    hda.PPCTL |= (1 << 30) # GPROCEN, "global processing enable"
518
519    log.info("Resetting HDA device")
520    hda.GCTL = 0
521    while hda.GCTL & 1: pass
522    hda.GCTL = 1
523    while not hda.GCTL & 1: pass
524
525    log.info("Turning of DSP subsystem")
526    dsp.HFDSSCS &= ~(1 << 16) # clear SPA bit
527    time.sleep(0.002)
528    # wait for CPA bit clear
529    while dsp.HFDSSCS & (1 << 24):
530        log.info("Waiting for DSP subsystem power off")
531        time.sleep(0.1)
532
533    log.info("Turning on DSP subsystem")
534    dsp.HFDSSCS |= (1 << 16) # set SPA bit
535    time.sleep(0.002) # needed as the CPA bit may be unstable
536    # wait for CPA bit
537    while not dsp.HFDSSCS & (1 << 24):
538        log.info("Waiting for DSP subsystem power on")
539        time.sleep(0.1)
540
541    log.info("Turning on Domain0")
542    dsp.HFPWRCTL |= 0x1 # set SPA bit
543    time.sleep(0.002) # needed as the CPA bit may be unstable
544    # wait for CPA bit
545    while not dsp.HFPWRSTS & 0x1:
546        log.info("Waiting for DSP domain0 power on")
547        time.sleep(0.1)
548
549    log.info("Turning off Primary Core")
550    dsp.DSP2CXCTL_PRIMARY &= ~(0x1) # clear SPA
551    time.sleep(0.002) # wait for CPA settlement
552    while dsp.DSP2CXCTL_PRIMARY & (1 << 8):
553        log.info("Waiting for DSP primary core power off")
554        time.sleep(0.1)
555
556    log.info(f"Configuring HDA stream {hda_ostream_id} to transfer firmware image")
557    (buf_list_addr, num_bufs) = setup_dma_mem(fw_bytes)
558    sd.CTL = 1
559    while (sd.CTL & 1) == 0: pass
560    sd.CTL = 0
561    while (sd.CTL & 1) == 1: pass
562    sd.CTL |= (1 << 20) # Set stream ID to anything non-zero
563    sd.BDPU = (buf_list_addr >> 32) & 0xffffffff
564    sd.BDPL = buf_list_addr & 0xffffffff
565    sd.CBL = len(fw_bytes)
566    sd.LVI = num_bufs - 1
567    hda.PPCTL |= (1 << hda_ostream_id)
568
569    # SPIB ("Software Position In Buffer") is an Intel HDA extension
570    # that puts a transfer boundary into the stream beyond which the
571    # other side will not read.  The ROM wants to poll on a "buffer
572    # full" bit on the other side that only works with this enabled.
573    hda.SPBFCTL |= (1 << hda_ostream_id)
574    hda.SD_SPIB = len(fw_bytes)
575
576
577    # Send the DSP an IPC message to tell the device how to boot.
578    # Note: with cAVS 1.8+ the ROM receives the stream argument as an
579    # index within the array of output streams (and we always use the
580    # first one by construction).
581    stream_idx = 0
582    ipcval = (  (1 << 31)            # BUSY bit
583                | (0x01 << 24)       # type = PURGE_FW
584                | (1 << 14)          # purge_fw = 1
585                | (stream_idx << 9)) # dma_id
586    log.info(f"Sending IPC command, HFIPCXIDR = 0x{ipcval:x}")
587    dsp.HFIPCXIDR = ipcval
588
589    log.info("Turning on Primary Core")
590    dsp.DSP2CXCTL_PRIMARY |= 0x1 # clear SPA
591    time.sleep(0.002) # wait for CPA settlement
592    while not dsp.DSP2CXCTL_PRIMARY & (1 << 8):
593        log.info("Waiting for DSP primary core power on")
594        time.sleep(0.1)
595
596    log.info("Waiting for IPC acceptance")
597    while dsp.HFIPCXIDR & (1 << 31):
598        log.info("Waiting for IPC busy bit clear")
599        time.sleep(0.1)
600
601    log.info("ACK IPC")
602    dsp.HFIPCXIDA |= (1 << 31)
603
604    log.info(f"Starting DMA, FW_STATUS = 0x{dsp.ROM_STATUS:x}")
605    sd.CTL |= 2 # START flag
606
607    wait_fw_entered(dsp, timeout_s=None)
608
609    # Turn DMA off and reset the stream.  Clearing START first is a
610    # noop per the spec, but absolutely required for stability.
611    # Apparently the reset doesn't stop the stream, and the next load
612    # starts before it's ready and kills the load (and often the DSP).
613    # The sleep too is required, on at least one board (a fast
614    # chromebook) putting the two writes next each other also hangs
615    # the DSP!
616    sd.CTL &= ~2 # clear START
617    time.sleep(0.1)
618    sd.CTL |= 1
619    log.info(f"ACE firmware load complete")
620
621def fw_is_alive(dsp):
622    return dsp.ROM_STATUS & ((1 << 28) - 1) == 5 # "FW_ENTERED"
623
624def wait_fw_entered(dsp, timeout_s):
625    log.info("Waiting %s for firmware handoff, ROM_STATUS = 0x%x",
626             "forever" if timeout_s is None else f"{timeout_s} seconds",
627             dsp.ROM_STATUS)
628    hertz = 100
629    attempts = None if timeout_s is None else timeout_s * hertz
630    while True:
631        alive = fw_is_alive(dsp)
632        if alive:
633            break
634        if attempts is not None:
635            attempts -= 1
636            if attempts < 0:
637                break
638        time.sleep(1 / hertz)
639
640    if not alive:
641        log.warning("Load failed?  ROM_STATUS = 0x%x", dsp.ROM_STATUS)
642    else:
643        log.info("FW alive, ROM_STATUS = 0x%x", dsp.ROM_STATUS)
644
645def winstream_offset():
646    ( base, stride ) = adsp_mem_window_config()
647    return base + stride * 3
648
649# This SHOULD be just "mem[start:start+length]", but slicing an mmap
650# array seems to be unreliable on one of my machines (python 3.6.9 on
651# Ubuntu 18.04).  Read out bytes individually.
652def win_read(base, start, length):
653    try:
654        return b''.join(bar4_mmap[base + x].to_bytes(1, 'little')
655                        for x in range(start, start + length))
656    except IndexError as ie:
657        # A FW in a bad state may cause winstream garbage
658        log.error("IndexError in bar4_mmap[%d + %d]", base, start)
659        log.error("bar4_mmap.size()=%d", bar4_mmap.size())
660        raise ie
661
662def winstream_reg_hdr(base):
663    hdr = Regs(bar4_mem + base)
664    hdr.WLEN  = 0x00
665    hdr.START = 0x04
666    hdr.END   = 0x08
667    hdr.SEQ   = 0x0c
668    hdr.freeze()
669    return hdr
670
671def win_hdr(hdr):
672    return ( hdr.WLEN, hdr.START, hdr.END, hdr.SEQ )
673
674# Python implementation of the same algorithm in sys_winstream_read(),
675# see there for details.
676def winstream_read(base, last_seq):
677    while True:
678        hdr = winstream_reg_hdr(base)
679        (wlen, start, end, seq) = win_hdr(hdr)
680        if wlen > SHELL_MAX_VALID_SLOT_SIZE:
681            log.debug("DSP powered off at winstream_read")
682            return (seq, "")
683        if wlen == 0:
684            return (seq, "")
685        if last_seq == 0:
686            last_seq = seq if args.no_history else (seq - ((end - start) % wlen))
687        if seq == last_seq or start == end:
688            return (seq, "")
689        behind = seq - last_seq
690        if behind > ((end - start) % wlen):
691            return (seq, "")
692        copy = (end - behind) % wlen
693        suffix = min(behind, wlen - copy)
694        result = win_read(base, 16 + copy, suffix)
695        if suffix < behind:
696            result += win_read(base, 16, behind - suffix)
697        (wlen, start1, end, seq1) = win_hdr(hdr)
698        if start1 == start and seq1 == seq:
699            # Best effort attempt at decoding, replacing unusable characters
700            # Found to be useful when it really goes wrong
701            return (seq, result.decode("utf-8", "replace"))
702
703def idx_mod(wlen, idx):
704    if idx >= wlen:
705        return idx - wlen
706    return idx
707
708def idx_sub(wlen, a, b):
709    return idx_mod(wlen, a + (wlen - b))
710
711# Python implementation of the same algorithm in sys_winstream_write(),
712# see there for details.
713def winstream_write(base, msg):
714    hdr = winstream_reg_hdr(base)
715    (wlen, start, end, seq) = win_hdr(hdr)
716    if wlen > SHELL_MAX_VALID_SLOT_SIZE:
717        log.debug("DSP powered off at winstream_write")
718        return
719    if wlen == 0:
720        return
721    lenmsg = len(msg)
722    lenmsg0 = lenmsg
723    if len(msg) > wlen + 1:
724        start = end
725        lenmsg = wlen - 1
726    lenmsg = min(lenmsg, wlen)
727    if seq != 0:
728        avail = (wlen - 1) - idx_sub(wlen, end, start)
729        if lenmsg > avail:
730            hdr.START = idx_mod(wlen, start + (lenmsg - avail))
731    if lenmsg < lenmsg0:
732        hdr.START = end
733        drop = lenmsg0 - lenmsg
734        msg = msg[drop : lenmsg - drop]
735    suffix = min(lenmsg, wlen - end)
736    for c in range(0, suffix):
737        bar4_mmap[base + 16 + end + c] = msg[c]
738    if lenmsg > suffix:
739        for c in range(0, lenmsg - suffix):
740            bar4_mmap[base + 16 + c] = msg[suffix + c]
741    hdr.END = idx_mod(wlen, end + lenmsg)
742    hdr.SEQ += lenmsg0
743
744def debug_offset():
745    ( base, stride ) = adsp_mem_window_config()
746    return base + stride * 2
747
748def debug_slot_offset(num):
749    return debug_offset() + DEBUG_SLOT_SIZE * (1 + num)
750
751def debug_slot_offset_by_type(the_type, timeout_s=0.2):
752    ADSP_DW_SLOT_COUNT=15
753    hertz = 100
754    attempts = timeout_s * hertz
755    while attempts > 0:
756        data = win_read(debug_offset(), 0, ADSP_DW_SLOT_COUNT * 3 * 4)
757        for i in range(ADSP_DW_SLOT_COUNT):
758            start_index = i * (3 * 4)
759            end_index = (i + 1) * (3 * 4)
760            desc = data[start_index:end_index]
761            resource_id, type_id, vma = struct.unpack('<III', desc)
762            if type_id == the_type:
763                log.info("found desc %u resource_id 0x%08x type_id 0x%08x vma 0x%08x",
764                         i, resource_id, type_id, vma)
765                return debug_slot_offset(i)
766        log.debug("not found, %u attempts left", attempts)
767        attempts -= 1
768        time.sleep(1 / hertz)
769    return None
770
771def shell_base_offset():
772    return debug_offset() + DEBUG_SLOT_SIZE * (1 + DEBUG_SLOT_SHELL)
773
774def read_from_shell_memwindow_winstream(last_seq):
775    offset = shell_base_offset() + SHELL_RX_SIZE
776    (last_seq, output) = winstream_read(offset, last_seq)
777    if output:
778        os.write(shell_client_port, output.encode("utf-8"))
779    return last_seq
780
781def write_to_shell_memwindow_winstream():
782    msg = os.read(shell_client_port, 1)
783    if len(msg) > 0:
784        winstream_write(shell_base_offset(), msg)
785
786def create_shell_pty():
787    global shell_client_port
788    (shell_client_port, user_port) = pty.openpty()
789    name = os.ttyname(user_port)
790    log.info(f"shell PTY at: {name}")
791    asyncio.get_event_loop().add_reader(shell_client_port, write_to_shell_memwindow_winstream)
792
793async def ipc_delay_done():
794    await asyncio.sleep(0.1)
795    if adsp_is_ace():
796        dsp.HFIPCXTDA = ~(1<<31) & dsp.HFIPCXTDA # Signal done
797    else:
798        dsp.HIPCTDA = 1<<31
799
800def inbox_offset():
801    ( base, stride ) = adsp_mem_window_config()
802    return base + stride
803
804def outbox_offset():
805    ( base, _ ) = adsp_mem_window_config()
806    return base + 4096
807
808ipc_timestamp = 0
809
810# Super-simple command language, driven by the test code on the DSP
811def ipc_command(data, ext_data):
812    send_msg = False
813    done = True
814    log.debug ("ipc data %d, ext_data %x", data, ext_data)
815    if data == 0: # noop, with synchronous DONE
816        pass
817    elif data == 1: # async command: signal DONE after a delay (on 1.8+)
818        done = False
819        asyncio.ensure_future(ipc_delay_done())
820    elif data == 2: # echo back ext_data as a message command
821        send_msg = True
822    elif data == 3: # set ADSPCS
823        dsp.ADSPCS = ext_data
824    elif data == 4: # echo back microseconds since last timestamp command
825        global ipc_timestamp
826        t = round(time.time() * 1e6)
827        ext_data = t - ipc_timestamp
828        ipc_timestamp = t
829        send_msg = True
830    elif data == 5: # copy word at outbox[ext_data >> 16] to inbox[ext_data & 0xffff]
831        src = outbox_offset() + 4 * (ext_data >> 16)
832        dst = inbox_offset() + 4 * (ext_data & 0xffff)
833        for i in range(4):
834            bar4_mmap[dst + i] = bar4_mmap[src + i]
835    elif data == 6: # HDA RESET (init if not exists)
836        stream_id = ext_data & 0xff
837        if stream_id in hda_streams:
838            hda_streams[stream_id].reset()
839        else:
840            hda_str = HDAStream(stream_id)
841            hda_streams[stream_id] = hda_str
842    elif data == 7: # HDA CONFIG
843        stream_id = ext_data & 0xFF
844        buf_len = ext_data >> 8 & 0xFFFF
845        hda_str = hda_streams[stream_id]
846        hda_str.config(buf_len)
847    elif data == 8: # HDA START
848        stream_id = ext_data & 0xFF
849        hda_streams[stream_id].start()
850        hda_streams[stream_id].mem.seek(0)
851
852    elif data == 9: # HDA STOP
853        stream_id = ext_data & 0xFF
854        hda_streams[stream_id].stop()
855    elif data == 10: # HDA VALIDATE
856        stream_id = ext_data & 0xFF
857        hda_str = hda_streams[stream_id]
858        hda_str.debug()
859        is_ramp_data = True
860        hda_str.mem.seek(0)
861        for (i, val) in enumerate(hda_str.mem.read(256)):
862            if i != val:
863                is_ramp_data = False
864            # log.info("stream[%d][%d]: %d", stream_id, i, val) # debug helper
865        log.info("Is ramp data? " + str(is_ramp_data))
866        ext_data = int(is_ramp_data)
867        log.info(f"Ext data to send back on ramp status {ext_data}")
868        send_msg = True
869    elif data == 11: # HDA HOST OUT SEND
870        stream_id = ext_data & 0xff
871        buf = bytearray(256)
872        for i in range(0, 256):
873            buf[i] = i
874        hda_streams[stream_id].write(buf)
875    elif data == 12: # HDA PRINT
876        stream_id = ext_data & 0xFF
877        buf_len = ext_data >> 8 & 0xFFFF
878        hda_str = hda_streams[stream_id]
879        # check for wrap here
880        pos = hda_str.mem.tell()
881        read_lens = [buf_len, 0]
882        if pos + buf_len >= hda_str.buf_len*2:
883            read_lens[0] = hda_str.buf_len*2 - pos
884            read_lens[1] = buf_len - read_lens[0]
885        # validate the read lens
886        assert (read_lens[0] + pos) <= (hda_str.buf_len*2)
887        assert read_lens[0] % 128 == 0
888        assert read_lens[1] % 128 == 0
889        buf_data0 = hda_str.mem.read(read_lens[0])
890        hda_msg0 = buf_data0.decode("utf-8", "replace")
891        sys.stdout.write(hda_msg0)
892        if read_lens[1] != 0:
893            hda_str.mem.seek(0)
894            buf_data1 = hda_str.mem.read(read_lens[1])
895            hda_msg1 = buf_data1.decode("utf-8", "replace")
896            sys.stdout.write(hda_msg1)
897        pos = hda_str.mem.tell()
898        sys.stdout.flush()
899    else:
900        log.warning(f"cavstool: Unrecognized IPC command 0x{data:x} ext 0x{ext_data:x}")
901        if not fw_is_alive(dsp):
902            if args.log_only:
903                log.info("DSP power seems off")
904                wait_fw_entered(dsp, timeout_s=None)
905            else:
906                log.warning("DSP power seems off?!")
907                time.sleep(2) # potential spam reduction
908
909            return
910
911    if adsp_is_ace():
912        dsp.HFIPCXTDR = 1<<31 # Ack local interrupt
913        if done:
914            dsp.HFIPCXTDA = ~(1<<31) & dsp.HFIPCXTDA # Signal done
915        if send_msg:
916            log.debug("ipc: sending msg 0x%08x" % ext_data)
917            dsp.HFIPCXIDDY = ext_data
918            dsp.HFIPCXIDR = (1<<31) | ext_data
919    else:
920        dsp.HIPCTDR = 1<<31 # Ack local interrupt
921        if done:
922            dsp.HIPCTDA = 1<<31 # Signal done
923        if send_msg:
924            dsp.HIPCIDD = ext_data
925            dsp.HIPCIDR = (1<<31) | ext_data
926
927def handle_ipc():
928    if adsp_is_ace():
929        if dsp.HFIPCXIDA & 0x80000000:
930            log.debug("ipc: Ack DSP reply with IDA_DONE")
931            dsp.HFIPCXIDA = 1<<31 # must ACK any DONE interrupts that arrive!
932        if dsp.HFIPCXTDR & 0x80000000:
933            ipc_command(dsp.HFIPCXTDR & ~0x80000000, dsp.HFIPCXTDDY)
934        return
935
936    if dsp.HIPCIDA & 0x80000000:
937        dsp.HIPCIDA = 1<<31 # must ACK any DONE interrupts that arrive!
938    if dsp.HIPCTDR & 0x80000000:
939        ipc_command(dsp.HIPCTDR & ~0x80000000, dsp.HIPCTDD)
940
941async def main():
942    #TODO this bit me, remove the globals, write a little FirmwareLoader class or something to contain.
943    global hda, sd, dsp, hda_ostream_id, hda_streams
944
945    try:
946        (hda, sd, dsp, hda_ostream_id) = map_regs(args.log_only)
947    except Exception as e:
948        log.error("Could not map device in sysfs; run as root?")
949        log.error(e)
950        sys.exit(1)
951
952    log.info(f"Detected a supported cAVS/ACE hardware version")
953
954    if args.log_only:
955        wait_fw_entered(dsp, timeout_s=None)
956    else:
957        if not args.fw_file:
958            log.error("Firmware file argument missing")
959            sys.exit(1)
960
961        if adsp_is_ace():
962            load_firmware_ace(args.fw_file)
963        else:
964            load_firmware(args.fw_file)
965        time.sleep(0.1)
966
967        if not args.quiet:
968            sys.stdout.write("--\n")
969
970    if args.shell_pty:
971        create_shell_pty()
972
973    hda_streams = dict()
974
975    last_seq = 0
976    last_seq_shell = 0
977    while start_output is True:
978        await asyncio.sleep(0.03)
979        if args.shell_pty:
980            last_seq_shell = read_from_shell_memwindow_winstream(last_seq_shell)
981        (last_seq, output) = winstream_read(winstream_offset(), last_seq)
982        if output:
983            sys.stdout.write(output)
984            sys.stdout.flush()
985        if not args.log_only:
986            handle_ipc()
987
988def args_parse():
989    global args
990    ap = argparse.ArgumentParser(description="DSP loader/logger tool", allow_abbrev=False)
991    ap.add_argument("-q", "--quiet", action="store_true",
992                    help="No loader output, just DSP logging")
993    ap.add_argument("-v", "--verbose", action="store_true",
994                    help="More loader output, DEBUG logging level")
995    ap.add_argument("-l", "--log-only", action="store_true",
996                    help="Don't load firmware, just show log output")
997    ap.add_argument("-p", "--shell-pty", action="store_true",
998                    help="Create a Zephyr shell pty if enabled in firmware")
999    ap.add_argument("-n", "--no-history", action="store_true",
1000                    help="No current log buffer at start, just new output")
1001    ap.add_argument("fw_file", nargs="?", help="Firmware file")
1002
1003    args = ap.parse_args()
1004
1005    if args.quiet:
1006        log.setLevel(logging.WARN)
1007    elif args.verbose:
1008        log.setLevel(logging.DEBUG)
1009
1010if __name__ == "__main__":
1011    args_parse()
1012    try:
1013        asyncio.run(main())
1014    except KeyboardInterrupt:
1015        start_output = False
1016