1#!/usr/bin/env python3
2# Copyright(c) 2022 Intel Corporation. All rights reserved.
3# SPDX-License-Identifier: Apache-2.0
4import os
5import sys
6import struct
7import logging
8import asyncio
9import time
10import subprocess
11import ctypes
12import mmap
13import argparse
14import pty
15
16start_output = True
17
18logging.basicConfig(level=logging.INFO)
19log = logging.getLogger("cavs-fw")
20
21PAGESZ = 4096
22HUGEPAGESZ = 2 * 1024 * 1024
23HUGEPAGE_FILE = "/dev/hugepages/cavs-fw-dma.tmp."
24
25# SRAM windows. Base and stride varies depending on ADSP version
26#
27# Window 0 is the FW_STATUS area, and 4k after that the IPC "outbox"
28# Window 1 is the IPC "inbox" (host-writable memory, just 384 bytes currently)
29# Window 2 is used for debug slots (Zephyr shell is one user)
30# Window 3 is winstream-formatted log output
31
32WINDOW_BASE = 0x80000
33WINDOW_STRIDE = 0x20000
34
35WINDOW_BASE_ACE = 0x180000
36WINDOW_STRIDE_ACE = 0x8000
37
38DEBUG_SLOT_SIZE = 4096
39DEBUG_SLOT_SHELL = 0
40SHELL_RX_SIZE = 256
41SHELL_MAX_VALID_SLOT_SIZE = 16777216
42
43# pylint: disable=duplicate-code
44
45# ADSPCS bits
46CRST   = 0
47CSTALL = 8
48SPA    = 16
49CPA    = 24
50
51class HDAStream:
52    # creates an hda stream with at 2 buffers of buf_len
53    def __init__(self, stream_id: int):
54        self.stream_id = stream_id
55        self.base = hdamem + 0x0080 + (stream_id * 0x20)
56        log.info(f"Mapping registers for hda stream {self.stream_id} at base {self.base:x}")
57
58        self.hda = Regs(hdamem)
59        self.hda.GCAP    = 0x0000
60        self.hda.GCTL    = 0x0008
61        self.hda.DPLBASE = 0x0070
62        self.hda.DPUBASE = 0x0074
63        self.hda.SPBFCH  = 0x0700
64        self.hda.SPBFCTL = 0x0704
65        self.hda.PPCH    = 0x0800
66        self.hda.PPCTL   = 0x0804
67        self.hda.PPSTS   = 0x0808
68        self.hda.SPIB = 0x0708 + stream_id*0x08
69        self.hda.freeze()
70
71        self.regs = Regs(self.base)
72        self.regs.CTL  = 0x00
73        self.regs.STS  = 0x03
74        self.regs.LPIB = 0x04
75        self.regs.CBL  = 0x08
76        self.regs.LVI  = 0x0c
77        self.regs.FIFOW = 0x0e
78        self.regs.FIFOS = 0x10
79        self.regs.FMT = 0x12
80        self.regs.FIFOL= 0x14
81        self.regs.BDPL = 0x18
82        self.regs.BDPU = 0x1c
83        self.regs.freeze()
84
85        self.dbg0 = Regs(hdamem + 0x0084 + (0x20*stream_id))
86        self.dbg0.DPIB = 0x00
87        self.dbg0.EFIFOS = 0x10
88        self.dbg0.freeze()
89
90        self.reset()
91
92    def __del__(self):
93        self.reset()
94
95    def config(self, buf_len: int):
96        log.info(f"Configuring stream {self.stream_id}")
97        self.buf_len = buf_len
98        log.info("Allocating huge page and setting up buffers")
99        self.mem, self.hugef, self.buf_list_addr, self.pos_buf_addr, self.n_bufs = self.setup_buf(buf_len)
100
101        log.info("Setting buffer list, length, and stream id and traffic priority bit")
102        self.regs.CTL = ((self.stream_id & 0xFF) << 20) | (1 << 18) # must be set to something other than 0?
103        self.regs.BDPU = (self.buf_list_addr >> 32) & 0xffffffff
104        self.regs.BDPL = self.buf_list_addr & 0xffffffff
105        self.regs.CBL = buf_len
106        self.regs.LVI = self.n_bufs - 1
107        self.mem.seek(0)
108        self.debug()
109        log.info(f"Configured stream {self.stream_id}")
110
111    def write(self, data):
112
113        bufl = min(len(data), self.buf_len)
114        log.info(f"Writing data to stream {self.stream_id}, len {bufl}, SPBFCTL {self.hda.SPBFCTL:x}, SPIB {self.hda.SPIB}")
115        self.mem[0:bufl] = data[0:bufl]
116        self.mem[bufl:bufl+bufl] = data[0:bufl]
117        self.hda.SPBFCTL |= (1 << self.stream_id)
118        self.hda.SPIB += bufl
119        log.info(f"Wrote data to stream {self.stream_id}, SPBFCTL {self.hda.SPBFCTL:x}, SPIB {self.hda.SPIB}")
120
121    def start(self):
122        log.info(f"Starting stream {self.stream_id}, CTL {self.regs.CTL:x}")
123        self.regs.CTL |= 2
124        log.info(f"Started stream {self.stream_id}, CTL {self.regs.CTL:x}")
125
126    def stop(self):
127        log.info(f"Stopping stream {self.stream_id}, CTL {self.regs.CTL:x}")
128        self.regs.CTL &= 2
129        time.sleep(0.1)
130        self.regs.CTL |= 1
131        log.info(f"Stopped stream {self.stream_id}, CTL {self.regs.CTL:x}")
132
133    def setup_buf(self, buf_len: int):
134        (mem, phys_addr, hugef) = map_phys_mem(self.stream_id)
135
136        log.info(f"Mapped 2M huge page at 0x{phys_addr:x} for buf size ({buf_len})")
137
138        # create two buffers in the page of buf_len and mark them
139        # in a buffer descriptor list for the hardware to use
140        buf0_len = buf_len
141        buf1_len = buf_len
142        bdl_off = buf0_len + buf1_len
143        # bdl is 2 (64bits, 16 bytes) per entry, we have two
144        mem[bdl_off:bdl_off + 32] = struct.pack("<QQQQ",
145                                                phys_addr,
146                                                buf0_len,
147                                                phys_addr + buf0_len,
148                                                buf1_len)
149        dpib_off = bdl_off+32
150
151        # ensure buffer is initialized, sanity
152        for i in range(0, buf_len*2):
153            mem[i] = 0
154
155        log.info("Filled the buffer descriptor list (BDL) for DMA.")
156        return (mem, hugef, phys_addr + bdl_off, phys_addr+dpib_off, 2)
157
158    def debug(self):
159        log.debug("HDA %d: PPROC %d, CTL 0x%x, LPIB 0x%x, BDPU 0x%x, BDPL 0x%x, CBL 0x%x, LVI 0x%x",
160                 self.stream_id, (hda.PPCTL >> self.stream_id) & 1, self.regs.CTL, self.regs.LPIB, self.regs.BDPU,
161                 self.regs.BDPL, self.regs.CBL, self.regs.LVI)
162        log.debug("    FIFOW %d, FIFOS %d, FMT %x, FIFOL %d, DPIB %d, EFIFOS %d",
163                 self.regs.FIFOW & 0x7, self.regs.FIFOS, self.regs.FMT, self.regs.FIFOL, self.dbg0.DPIB, self.dbg0.EFIFOS)
164        log.debug("    status: FIFORDY %d, DESE %d, FIFOE %d, BCIS %d",
165                 (self.regs.STS >> 5) & 1, (self.regs.STS >> 4) & 1, (self.regs.STS >> 3) & 1, (self.regs.STS >> 2) & 1)
166
167    def reset(self):
168        # Turn DMA off and reset the stream.  Clearing START first is a
169        # noop per the spec, but absolutely required for stability.
170        # Apparently the reset doesn't stop the stream, and the next load
171        # starts before it's ready and kills the load (and often the DSP).
172        # The sleep too is required, on at least one board (a fast
173        # chromebook) putting the two writes next each other also hangs
174        # the DSP!
175        log.info(f"Resetting stream {self.stream_id}")
176        self.debug()
177        self.regs.CTL &= ~2 # clear START
178        time.sleep(0.1)
179        # set enter reset bit
180        self.regs.CTL = 1
181        while (self.regs.CTL & 1) == 0: pass
182        # clear enter reset bit to exit reset
183        self.regs.CTL = 0
184        while (self.regs.CTL & 1) == 1: pass
185
186        log.info(f"Disable SPIB and set position 0 of stream {self.stream_id}")
187        self.hda.SPBFCTL = 0
188        self.hda.SPIB = 0
189
190        #log.info("Setting dma position buffer and enable it")
191        #self.hda.DPUBASE = self.pos_buf_addr >> 32 & 0xffffffff
192        #self.hda.DPLBASE = self.pos_buf_addr & 0xfffffff0 | 1
193
194        log.info(f"Enabling dsp capture (PROCEN) of stream {self.stream_id}")
195        self.hda.PPCTL |= (1 << self.stream_id)
196
197        self.debug()
198        log.info(f"Reset stream {self.stream_id}")
199
200def adsp_is_ace():
201    return ace15 or ace20 or ace30
202
203def adsp_mem_window_config():
204    if adsp_is_ace():
205        base = WINDOW_BASE_ACE
206        stride = WINDOW_STRIDE_ACE
207    else:
208        base = WINDOW_BASE
209        stride = WINDOW_STRIDE
210
211    return (base, stride)
212
213def map_regs(log_only):
214    p = runx(f"grep -iEl 'PCI_CLASS=40(10|38)0' /sys/bus/pci/devices/*/uevent")
215    pcidir = os.path.dirname(p)
216
217    # Platform/quirk detection.  ID lists cribbed from the SOF kernel driver
218    global cavs25, ace15, ace20, ace30
219    did = int(open(f"{pcidir}/device").read().rstrip(), 16)
220    cavs25 = did in [ 0x43c8, 0x4b55, 0x4b58, 0x51c8, 0x51ca, 0x51cb, 0x51ce, 0x51cf, 0x54c8,
221                      0x7ad0, 0xa0c8 ]
222    ace15 = did in [ 0x7728, 0x7f50, 0x7e28 ]
223    ace20 = did in [ 0xa828 ]
224    ace30 = did in [ 0xe428 ]
225
226    # Check sysfs for a loaded driver and remove it
227    if os.path.exists(f"{pcidir}/driver"):
228        mod = os.path.basename(os.readlink(f"{pcidir}/driver/module"))
229        found_msg = f"Existing driver \"{mod}\" found"
230        if log_only:
231            log.info(found_msg)
232        else:
233            log.warning(found_msg + ", unloading module")
234            runx(f"rmmod -f {mod}")
235            # Disengage runtime power management so the kernel doesn't put it to sleep
236            log.info(f"Forcing {pcidir}/power/control to always 'on'")
237            with open(f"{pcidir}/power/control", "w") as ctrl:
238                ctrl.write("on")
239
240    # Make sure PCI memory space access and busmastering are enabled.
241    # Also disable interrupts so as not to confuse the kernel.
242    with open(f"{pcidir}/config", "wb+") as cfg:
243        cfg.seek(4)
244        cfg.write(b'\x06\x04')
245
246    # Standard HD Audio Registers
247    global hdamem
248    (hdamem, _) = bar_map(pcidir, 0)
249    hda = Regs(hdamem)
250    hda.GCAP    = 0x0000
251    hda.GCTL    = 0x0008
252    hda.SPBFCTL = 0x0704
253    hda.PPCTL   = 0x0804
254
255    # Find the ID of the first output stream
256    hda_ostream_id = (hda.GCAP >> 8) & 0x0f # number of input streams
257    log.info(f"Selected output stream {hda_ostream_id} (GCAP = 0x{hda.GCAP:x})")
258    hda.SD_SPIB = 0x0708 + (8 * hda_ostream_id)
259    hda.freeze()
260
261
262    # Standard HD Audio Stream Descriptor
263    sd = Regs(hdamem + 0x0080 + (hda_ostream_id * 0x20))
264    sd.CTL  = 0x00
265    sd.CBL  = 0x08
266    sd.LVI  = 0x0c
267    sd.BDPL = 0x18
268    sd.BDPU = 0x1c
269    sd.freeze()
270
271    # Intel Audio DSP Registers
272    global bar4_mmap
273    global bar4_mem
274    (bar4_mem, bar4_mmap) = bar_map(pcidir, 4)
275    dsp = Regs(bar4_mem)
276    if adsp_is_ace():
277        dsp.HFDSSCS        = 0x1000
278        dsp.HFPWRCTL       = 0x1d18 if ace15 or ace20 else 0x1d20
279        dsp.HFPWRSTS       = 0x1d1c if ace15 or ace20 else 0x1d24
280        dsp.DSP2CXCTL_PRIMARY = 0x178d04
281        dsp.HFIPCXTDR      = 0x73200
282        dsp.HFIPCXTDA      = 0x73204
283        dsp.HFIPCXIDR      = 0x73210
284        dsp.HFIPCXIDA      = 0x73214
285        dsp.HFIPCXCTL      = 0x73228
286        dsp.HFIPCXTDDY     = 0x73300
287        dsp.HFIPCXIDDY     = 0x73380
288        dsp.ROM_STATUS     = 0x163200 if ace15 else 0x160200
289        dsp.SRAM_FW_STATUS = WINDOW_BASE_ACE
290    else:
291        dsp.ADSPCS         = 0x00004
292        dsp.HIPCTDR        = 0x000c0
293        dsp.HIPCTDA        = 0x000c4
294        dsp.HIPCTDD        = 0x000c8
295        dsp.HIPCIDR        = 0x000d0
296        dsp.HIPCIDA        = 0x000d4
297        dsp.HIPCIDD        = 0x000d8
298        dsp.ROM_STATUS     = WINDOW_BASE # Start of first SRAM window
299        dsp.SRAM_FW_STATUS = WINDOW_BASE
300    dsp.freeze()
301
302    return (hda, sd, dsp, hda_ostream_id)
303
304def setup_dma_mem(fw_bytes):
305    (mem, phys_addr, _) = map_phys_mem(hda_ostream_id)
306    mem[0:len(fw_bytes)] = fw_bytes
307
308    log.info("Mapped 2M huge page at 0x%x to contain %d bytes of firmware"
309          % (phys_addr, len(fw_bytes)))
310
311    # HDA requires at least two buffers be defined, but we don't care about
312    # boundaries because it's all a contiguous region. Place a vestigial
313    # 128-byte (minimum size and alignment) buffer after the main one, and put
314    # the 4-entry BDL list into the final 128 bytes of the page.
315    buf0_len = HUGEPAGESZ - 2 * 128
316    buf1_len = 128
317    bdl_off = buf0_len + buf1_len
318    mem[bdl_off:bdl_off + 32] = struct.pack("<QQQQ",
319                                            phys_addr, buf0_len,
320                                            phys_addr + buf0_len, buf1_len)
321    log.info("Filled the buffer descriptor list (BDL) for DMA.")
322    return (phys_addr + bdl_off, 2)
323
324global_mmaps = [] # protect mmap mappings from garbage collection!
325
326# Maps 2M of contiguous memory using a single page from hugetlbfs,
327# then locates its physical address for use as a DMA buffer.
328def map_phys_mem(stream_id):
329    # Make sure hugetlbfs is mounted (not there on chromeos)
330    os.system("mount | grep -q hugetlbfs ||"
331              + " (mkdir -p /dev/hugepages; "
332              + "  mount -t hugetlbfs hugetlbfs /dev/hugepages)")
333
334    # Ensure the kernel has enough budget for one new page
335    free = int(runx("awk '/HugePages_Free/ {print $2}' /proc/meminfo"))
336    if free == 0:
337        tot = 1 + int(runx("awk '/HugePages_Total/ {print $2}' /proc/meminfo"))
338        os.system(f"echo {tot} > /proc/sys/vm/nr_hugepages")
339
340    hugef_name = HUGEPAGE_FILE + str(stream_id)
341    hugef = open(hugef_name, "w+")
342    hugef.truncate(HUGEPAGESZ)
343    mem = mmap.mmap(hugef.fileno(), HUGEPAGESZ)
344    log.info("type of mem is %s", str(type(mem)))
345    global_mmaps.append(mem)
346    os.unlink(hugef_name)
347
348    # Find the local process address of the mapping, then use that to extract
349    # the physical address from the kernel's pagemap interface.  The physical
350    # page frame number occupies the bottom bits of the entry.
351    mem[0] = 0 # Fault the page in so it has an address!
352    vaddr = ctypes.addressof(ctypes.c_int.from_buffer(mem))
353    vpagenum = vaddr >> 12
354    pagemap = open("/proc/self/pagemap", "rb")
355    pagemap.seek(vpagenum * 8)
356    pent = pagemap.read(8)
357    paddr = (struct.unpack("Q", pent)[0] & ((1 << 55) - 1)) * PAGESZ
358    pagemap.close()
359    return (mem, paddr, hugef)
360
361# Maps a PCI BAR and returns the in-process address
362def bar_map(pcidir, barnum):
363    f = open(pcidir + "/resource" + str(barnum), "r+")
364    mm = mmap.mmap(f.fileno(), os.fstat(f.fileno()).st_size)
365    global_mmaps.append(mm)
366    log.info("Mapped PCI bar %d of length %d bytes."
367             % (barnum, os.fstat(f.fileno()).st_size))
368    return (ctypes.addressof(ctypes.c_int.from_buffer(mm)), mm)
369
370# Syntactic sugar to make register block definition & use look nice.
371# Instantiate from a base address, assign offsets to (uint32) named registers as
372# fields, call freeze(), then the field acts as a direct alias for the register!
373class Regs:
374    def __init__(self, base_addr):
375        vars(self)["base_addr"] = base_addr
376        vars(self)["ptrs"] = {}
377        vars(self)["frozen"] = False
378    def freeze(self):
379        vars(self)["frozen"] = True
380    def __setattr__(self, name, val):
381        if not self.frozen and name not in self.ptrs:
382            addr = self.base_addr + val
383            self.ptrs[name] = ctypes.c_uint32.from_address(addr)
384        else:
385            self.ptrs[name].value = val
386    def __getattr__(self, name):
387        return self.ptrs[name].value
388
389def runx(cmd):
390    return subprocess.check_output(cmd, shell=True).decode().rstrip()
391
392def mask(bit):
393    if cavs25:
394        return 0b1 << bit
395
396def load_firmware(fw_file):
397    try:
398        fw_bytes = open(fw_file, "rb").read()
399    except Exception as e:
400        log.error(f"Could not read firmware file: `{fw_file}'")
401        log.error(e)
402        sys.exit(1)
403
404    (magic, sz) = struct.unpack("4sI", fw_bytes[0:8])
405    if magic == b'XMan':
406        log.info(f"Trimming {sz} bytes of extended manifest")
407        fw_bytes = fw_bytes[sz:len(fw_bytes)]
408
409    # This actually means "enable access to BAR4 registers"!
410    hda.PPCTL |= (1 << 30) # GPROCEN, "global processing enable"
411
412    log.info("Resetting HDA device")
413    hda.GCTL = 0
414    while hda.GCTL & 1: pass
415    hda.GCTL = 1
416    while not hda.GCTL & 1: pass
417
418    log.info(f"Stalling and Resetting DSP cores, ADSPCS = 0x{dsp.ADSPCS:x}")
419    dsp.ADSPCS |= mask(CSTALL)
420    dsp.ADSPCS |= mask(CRST)
421    while (dsp.ADSPCS & mask(CRST)) == 0: pass
422
423    log.info(f"Powering down DSP cores, ADSPCS = 0x{dsp.ADSPCS:x}")
424    dsp.ADSPCS &= ~mask(SPA)
425    while dsp.ADSPCS & mask(CPA): pass
426
427    log.info(f"Configuring HDA stream {hda_ostream_id} to transfer firmware image")
428    (buf_list_addr, num_bufs) = setup_dma_mem(fw_bytes)
429    sd.CTL = 1
430    while (sd.CTL & 1) == 0: pass
431    sd.CTL = 0
432    while (sd.CTL & 1) == 1: pass
433    sd.CTL = 1 << 20 # Set stream ID to anything non-zero
434    sd.BDPU = (buf_list_addr >> 32) & 0xffffffff
435    sd.BDPL = buf_list_addr & 0xffffffff
436    sd.CBL = len(fw_bytes)
437    sd.LVI = num_bufs - 1
438    hda.PPCTL |= (1 << hda_ostream_id)
439
440    # SPIB ("Software Position In Buffer") is an Intel HDA extension
441    # that puts a transfer boundary into the stream beyond which the
442    # other side will not read.  The ROM wants to poll on a "buffer
443    # full" bit on the other side that only works with this enabled.
444    hda.SPBFCTL |= (1 << hda_ostream_id)
445    hda.SD_SPIB = len(fw_bytes)
446
447    # Start DSP. Only start up core 0, reset is managed by DSP.
448    log.info(f"Starting DSP, ADSPCS = 0x{dsp.ADSPCS:x}")
449    dsp.ADSPCS = mask(SPA)
450    while (dsp.ADSPCS & mask(CPA)) == 0: pass
451
452    log.info(f"Unresetting DSP cores, ADSPCS = 0x{dsp.ADSPCS:x}")
453    dsp.ADSPCS &= ~mask(CRST)
454    while (dsp.ADSPCS & 1) != 0: pass
455
456    log.info(f"Running DSP cores, ADSPCS = 0x{dsp.ADSPCS:x}")
457    dsp.ADSPCS &= ~mask(CSTALL)
458
459    # Wait for the ROM to boot and signal it's ready.  This not so short
460    # sleep seems to be needed; if we're banging on the memory window
461    # during initial boot (before/while the window control registers
462    # are configured?) the DSP hardware will hang fairly reliably.
463    log.info(f"Wait for ROM startup, ADSPCS = 0x{dsp.ADSPCS:x}")
464    time.sleep(1)
465    while (dsp.SRAM_FW_STATUS >> 24) != 5: pass
466
467    # Send the DSP an IPC message to tell the device how to boot.
468    # Note: with cAVS 1.8+ the ROM receives the stream argument as an
469    # index within the array of output streams (and we always use the
470    # first one by construction).
471    stream_idx = 0
472    ipcval = (  (1 << 31)            # BUSY bit
473                | (0x01 << 24)       # type = PURGE_FW
474                | (1 << 14)          # purge_fw = 1
475                | (stream_idx << 9)) # dma_id
476    log.info(f"Sending IPC command, HIPIDR = 0x{ipcval:x}")
477    dsp.HIPCIDR = ipcval
478
479    log.info(f"Starting DMA, FW_STATUS = 0x{dsp.SRAM_FW_STATUS:x}")
480    sd.CTL |= 2 # START flag
481
482    wait_fw_entered(dsp, timeout_s=None)
483
484    # Turn DMA off and reset the stream.  Clearing START first is a
485    # noop per the spec, but absolutely required for stability.
486    # Apparently the reset doesn't stop the stream, and the next load
487    # starts before it's ready and kills the load (and often the DSP).
488    # The sleep too is required, on at least one board (a fast
489    # chromebook) putting the two writes next each other also hangs
490    # the DSP!
491    sd.CTL &= ~2 # clear START
492    time.sleep(0.1)
493    sd.CTL |= 1
494    log.info(f"cAVS firmware load complete")
495
496def load_firmware_ace(fw_file):
497    try:
498        fw_bytes = open(fw_file, "rb").read()
499        # Resize fw_bytes for MTL
500        if len(fw_bytes) < 512 * 1024:
501            fw_bytes += b'\x00' * (512 * 1024 - len(fw_bytes))
502    except Exception as e:
503        log.error(f"Could not read firmware file: `{fw_file}'")
504        log.error(e)
505        sys.exit(1)
506
507    (magic, sz) = struct.unpack("4sI", fw_bytes[0:8])
508    if magic == b'$AE1':
509        log.info(f"Trimming {sz} bytes of extended manifest")
510        fw_bytes = fw_bytes[sz:len(fw_bytes)]
511
512    # This actually means "enable access to BAR4 registers"!
513    hda.PPCTL |= (1 << 30) # GPROCEN, "global processing enable"
514
515    log.info("Resetting HDA device")
516    hda.GCTL = 0
517    while hda.GCTL & 1: pass
518    hda.GCTL = 1
519    while not hda.GCTL & 1: pass
520
521    log.info("Turning of DSP subsystem")
522    dsp.HFDSSCS &= ~(1 << 16) # clear SPA bit
523    time.sleep(0.002)
524    # wait for CPA bit clear
525    while dsp.HFDSSCS & (1 << 24):
526        log.info("Waiting for DSP subsystem power off")
527        time.sleep(0.1)
528
529    log.info("Turning on DSP subsystem")
530    dsp.HFDSSCS |= (1 << 16) # set SPA bit
531    time.sleep(0.002) # needed as the CPA bit may be unstable
532    # wait for CPA bit
533    while not dsp.HFDSSCS & (1 << 24):
534        log.info("Waiting for DSP subsystem power on")
535        time.sleep(0.1)
536
537    log.info("Turning on Domain0")
538    dsp.HFPWRCTL |= 0x1 # set SPA bit
539    time.sleep(0.002) # needed as the CPA bit may be unstable
540    # wait for CPA bit
541    while not dsp.HFPWRSTS & 0x1:
542        log.info("Waiting for DSP domain0 power on")
543        time.sleep(0.1)
544
545    log.info("Turning off Primary Core")
546    dsp.DSP2CXCTL_PRIMARY &= ~(0x1) # clear SPA
547    time.sleep(0.002) # wait for CPA settlement
548    while dsp.DSP2CXCTL_PRIMARY & (1 << 8):
549        log.info("Waiting for DSP primary core power off")
550        time.sleep(0.1)
551
552    log.info(f"Configuring HDA stream {hda_ostream_id} to transfer firmware image")
553    (buf_list_addr, num_bufs) = setup_dma_mem(fw_bytes)
554    sd.CTL = 1
555    while (sd.CTL & 1) == 0: pass
556    sd.CTL = 0
557    while (sd.CTL & 1) == 1: pass
558    sd.CTL |= (1 << 20) # Set stream ID to anything non-zero
559    sd.BDPU = (buf_list_addr >> 32) & 0xffffffff
560    sd.BDPL = buf_list_addr & 0xffffffff
561    sd.CBL = len(fw_bytes)
562    sd.LVI = num_bufs - 1
563    hda.PPCTL |= (1 << hda_ostream_id)
564
565    # SPIB ("Software Position In Buffer") is an Intel HDA extension
566    # that puts a transfer boundary into the stream beyond which the
567    # other side will not read.  The ROM wants to poll on a "buffer
568    # full" bit on the other side that only works with this enabled.
569    hda.SPBFCTL |= (1 << hda_ostream_id)
570    hda.SD_SPIB = len(fw_bytes)
571
572
573    # Send the DSP an IPC message to tell the device how to boot.
574    # Note: with cAVS 1.8+ the ROM receives the stream argument as an
575    # index within the array of output streams (and we always use the
576    # first one by construction).
577    stream_idx = 0
578    ipcval = (  (1 << 31)            # BUSY bit
579                | (0x01 << 24)       # type = PURGE_FW
580                | (1 << 14)          # purge_fw = 1
581                | (stream_idx << 9)) # dma_id
582    log.info(f"Sending IPC command, HFIPCXIDR = 0x{ipcval:x}")
583    dsp.HFIPCXIDR = ipcval
584
585    log.info("Turning on Primary Core")
586    dsp.DSP2CXCTL_PRIMARY |= 0x1 # clear SPA
587    time.sleep(0.002) # wait for CPA settlement
588    while not dsp.DSP2CXCTL_PRIMARY & (1 << 8):
589        log.info("Waiting for DSP primary core power on")
590        time.sleep(0.1)
591
592    log.info("Waiting for IPC acceptance")
593    while dsp.HFIPCXIDR & (1 << 31):
594        log.info("Waiting for IPC busy bit clear")
595        time.sleep(0.1)
596
597    log.info("ACK IPC")
598    dsp.HFIPCXIDA |= (1 << 31)
599
600    log.info(f"Starting DMA, FW_STATUS = 0x{dsp.ROM_STATUS:x}")
601    sd.CTL |= 2 # START flag
602
603    wait_fw_entered(dsp, timeout_s=None)
604
605    # Turn DMA off and reset the stream.  Clearing START first is a
606    # noop per the spec, but absolutely required for stability.
607    # Apparently the reset doesn't stop the stream, and the next load
608    # starts before it's ready and kills the load (and often the DSP).
609    # The sleep too is required, on at least one board (a fast
610    # chromebook) putting the two writes next each other also hangs
611    # the DSP!
612    sd.CTL &= ~2 # clear START
613    time.sleep(0.1)
614    sd.CTL |= 1
615    log.info(f"ACE firmware load complete")
616
617def fw_is_alive(dsp):
618    return dsp.ROM_STATUS & ((1 << 28) - 1) == 5 # "FW_ENTERED"
619
620def wait_fw_entered(dsp, timeout_s):
621    log.info("Waiting %s for firmware handoff, ROM_STATUS = 0x%x",
622             "forever" if timeout_s is None else f"{timeout_s} seconds",
623             dsp.ROM_STATUS)
624    hertz = 100
625    attempts = None if timeout_s is None else timeout_s * hertz
626    while True:
627        alive = fw_is_alive(dsp)
628        if alive:
629            break
630        if attempts is not None:
631            attempts -= 1
632            if attempts < 0:
633                break
634        time.sleep(1 / hertz)
635
636    if not alive:
637        log.warning("Load failed?  ROM_STATUS = 0x%x", dsp.ROM_STATUS)
638    else:
639        log.info("FW alive, ROM_STATUS = 0x%x", dsp.ROM_STATUS)
640
641def winstream_offset():
642    ( base, stride ) = adsp_mem_window_config()
643    return base + stride * 3
644
645# This SHOULD be just "mem[start:start+length]", but slicing an mmap
646# array seems to be unreliable on one of my machines (python 3.6.9 on
647# Ubuntu 18.04).  Read out bytes individually.
648def win_read(base, start, length):
649    try:
650        return b''.join(bar4_mmap[base + x].to_bytes(1, 'little')
651                        for x in range(start, start + length))
652    except IndexError as ie:
653        # A FW in a bad state may cause winstream garbage
654        log.error("IndexError in bar4_mmap[%d + %d]", base, start)
655        log.error("bar4_mmap.size()=%d", bar4_mmap.size())
656        raise ie
657
658def winstream_reg_hdr(base):
659    hdr = Regs(bar4_mem + base)
660    hdr.WLEN  = 0x00
661    hdr.START = 0x04
662    hdr.END   = 0x08
663    hdr.SEQ   = 0x0c
664    hdr.freeze()
665    return hdr
666
667def win_hdr(hdr):
668    return ( hdr.WLEN, hdr.START, hdr.END, hdr.SEQ )
669
670# Python implementation of the same algorithm in sys_winstream_read(),
671# see there for details.
672def winstream_read(base, last_seq):
673    while True:
674        hdr = winstream_reg_hdr(base)
675        (wlen, start, end, seq) = win_hdr(hdr)
676        if wlen > SHELL_MAX_VALID_SLOT_SIZE:
677            log.debug("DSP powered off at winstream_read")
678            return (seq, "")
679        if wlen == 0:
680            return (seq, "")
681        if last_seq == 0:
682            last_seq = seq if args.no_history else (seq - ((end - start) % wlen))
683        if seq == last_seq or start == end:
684            return (seq, "")
685        behind = seq - last_seq
686        if behind > ((end - start) % wlen):
687            return (seq, "")
688        copy = (end - behind) % wlen
689        suffix = min(behind, wlen - copy)
690        result = win_read(base, 16 + copy, suffix)
691        if suffix < behind:
692            result += win_read(base, 16, behind - suffix)
693        (wlen, start1, end, seq1) = win_hdr(hdr)
694        if start1 == start and seq1 == seq:
695            # Best effort attempt at decoding, replacing unusable characters
696            # Found to be useful when it really goes wrong
697            return (seq, result.decode("utf-8", "replace"))
698
699def idx_mod(wlen, idx):
700    if idx >= wlen:
701        return idx - wlen
702    return idx
703
704def idx_sub(wlen, a, b):
705    return idx_mod(wlen, a + (wlen - b))
706
707# Python implementation of the same algorithm in sys_winstream_write(),
708# see there for details.
709def winstream_write(base, msg):
710    hdr = winstream_reg_hdr(base)
711    (wlen, start, end, seq) = win_hdr(hdr)
712    if wlen > SHELL_MAX_VALID_SLOT_SIZE:
713        log.debug("DSP powered off at winstream_write")
714        return
715    if wlen == 0:
716        return
717    lenmsg = len(msg)
718    lenmsg0 = lenmsg
719    if len(msg) > wlen + 1:
720        start = end
721        lenmsg = wlen - 1
722    lenmsg = min(lenmsg, wlen)
723    if seq != 0:
724        avail = (wlen - 1) - idx_sub(wlen, end, start)
725        if lenmsg > avail:
726            hdr.START = idx_mod(wlen, start + (lenmsg - avail))
727    if lenmsg < lenmsg0:
728        hdr.START = end
729        drop = lenmsg0 - lenmsg
730        msg = msg[drop : lenmsg - drop]
731    suffix = min(lenmsg, wlen - end)
732    for c in range(0, suffix):
733        bar4_mmap[base + 16 + end + c] = msg[c]
734    if lenmsg > suffix:
735        for c in range(0, lenmsg - suffix):
736            bar4_mmap[base + 16 + c] = msg[suffix + c]
737    hdr.END = idx_mod(wlen, end + lenmsg)
738    hdr.SEQ += lenmsg0
739
740def debug_offset():
741    ( base, stride ) = adsp_mem_window_config()
742    return base + stride * 2
743
744def debug_slot_offset(num):
745    return debug_offset() + DEBUG_SLOT_SIZE * (1 + num)
746
747def debug_slot_offset_by_type(the_type, timeout_s=0.2):
748    ADSP_DW_SLOT_COUNT=15
749    hertz = 100
750    attempts = timeout_s * hertz
751    while attempts > 0:
752        data = win_read(debug_offset(), 0, ADSP_DW_SLOT_COUNT * 3 * 4)
753        for i in range(ADSP_DW_SLOT_COUNT):
754            start_index = i * (3 * 4)
755            end_index = (i + 1) * (3 * 4)
756            desc = data[start_index:end_index]
757            resource_id, type_id, vma = struct.unpack('<III', desc)
758            if type_id == the_type:
759                log.info("found desc %u resource_id 0x%08x type_id 0x%08x vma 0x%08x",
760                         i, resource_id, type_id, vma)
761                return debug_slot_offset(i)
762        log.debug("not found, %u attempts left", attempts)
763        attempts -= 1
764        time.sleep(1 / hertz)
765    return None
766
767def shell_base_offset():
768    return debug_offset() + DEBUG_SLOT_SIZE * (1 + DEBUG_SLOT_SHELL)
769
770def read_from_shell_memwindow_winstream(last_seq):
771    offset = shell_base_offset() + SHELL_RX_SIZE
772    (last_seq, output) = winstream_read(offset, last_seq)
773    if output:
774        os.write(shell_client_port, output.encode("utf-8"))
775    return last_seq
776
777def write_to_shell_memwindow_winstream():
778    msg = os.read(shell_client_port, 1)
779    if len(msg) > 0:
780        winstream_write(shell_base_offset(), msg)
781
782def create_shell_pty():
783    global shell_client_port
784    (shell_client_port, user_port) = pty.openpty()
785    name = os.ttyname(user_port)
786    log.info(f"shell PTY at: {name}")
787    asyncio.get_event_loop().add_reader(shell_client_port, write_to_shell_memwindow_winstream)
788
789async def ipc_delay_done():
790    await asyncio.sleep(0.1)
791    if adsp_is_ace():
792        dsp.HFIPCXTDA = ~(1<<31) & dsp.HFIPCXTDA # Signal done
793    else:
794        dsp.HIPCTDA = 1<<31
795
796def inbox_offset():
797    ( base, stride ) = adsp_mem_window_config()
798    return base + stride
799
800def outbox_offset():
801    ( base, _ ) = adsp_mem_window_config()
802    return base + 4096
803
804ipc_timestamp = 0
805
806# Super-simple command language, driven by the test code on the DSP
807def ipc_command(data, ext_data):
808    send_msg = False
809    done = True
810    log.debug ("ipc data %d, ext_data %x", data, ext_data)
811    if data == 0: # noop, with synchronous DONE
812        pass
813    elif data == 1: # async command: signal DONE after a delay (on 1.8+)
814        done = False
815        asyncio.ensure_future(ipc_delay_done())
816    elif data == 2: # echo back ext_data as a message command
817        send_msg = True
818    elif data == 3: # set ADSPCS
819        dsp.ADSPCS = ext_data
820    elif data == 4: # echo back microseconds since last timestamp command
821        global ipc_timestamp
822        t = round(time.time() * 1e6)
823        ext_data = t - ipc_timestamp
824        ipc_timestamp = t
825        send_msg = True
826    elif data == 5: # copy word at outbox[ext_data >> 16] to inbox[ext_data & 0xffff]
827        src = outbox_offset() + 4 * (ext_data >> 16)
828        dst = inbox_offset() + 4 * (ext_data & 0xffff)
829        for i in range(4):
830            bar4_mmap[dst + i] = bar4_mmap[src + i]
831    elif data == 6: # HDA RESET (init if not exists)
832        stream_id = ext_data & 0xff
833        if stream_id in hda_streams:
834            hda_streams[stream_id].reset()
835        else:
836            hda_str = HDAStream(stream_id)
837            hda_streams[stream_id] = hda_str
838    elif data == 7: # HDA CONFIG
839        stream_id = ext_data & 0xFF
840        buf_len = ext_data >> 8 & 0xFFFF
841        hda_str = hda_streams[stream_id]
842        hda_str.config(buf_len)
843    elif data == 8: # HDA START
844        stream_id = ext_data & 0xFF
845        hda_streams[stream_id].start()
846        hda_streams[stream_id].mem.seek(0)
847
848    elif data == 9: # HDA STOP
849        stream_id = ext_data & 0xFF
850        hda_streams[stream_id].stop()
851    elif data == 10: # HDA VALIDATE
852        stream_id = ext_data & 0xFF
853        hda_str = hda_streams[stream_id]
854        hda_str.debug()
855        is_ramp_data = True
856        hda_str.mem.seek(0)
857        for (i, val) in enumerate(hda_str.mem.read(256)):
858            if i != val:
859                is_ramp_data = False
860            # log.info("stream[%d][%d]: %d", stream_id, i, val) # debug helper
861        log.info("Is ramp data? " + str(is_ramp_data))
862        ext_data = int(is_ramp_data)
863        log.info(f"Ext data to send back on ramp status {ext_data}")
864        send_msg = True
865    elif data == 11: # HDA HOST OUT SEND
866        stream_id = ext_data & 0xff
867        buf = bytearray(256)
868        for i in range(0, 256):
869            buf[i] = i
870        hda_streams[stream_id].write(buf)
871    elif data == 12: # HDA PRINT
872        stream_id = ext_data & 0xFF
873        buf_len = ext_data >> 8 & 0xFFFF
874        hda_str = hda_streams[stream_id]
875        # check for wrap here
876        pos = hda_str.mem.tell()
877        read_lens = [buf_len, 0]
878        if pos + buf_len >= hda_str.buf_len*2:
879            read_lens[0] = hda_str.buf_len*2 - pos
880            read_lens[1] = buf_len - read_lens[0]
881        # validate the read lens
882        assert (read_lens[0] + pos) <= (hda_str.buf_len*2)
883        assert read_lens[0] % 128 == 0
884        assert read_lens[1] % 128 == 0
885        buf_data0 = hda_str.mem.read(read_lens[0])
886        hda_msg0 = buf_data0.decode("utf-8", "replace")
887        sys.stdout.write(hda_msg0)
888        if read_lens[1] != 0:
889            hda_str.mem.seek(0)
890            buf_data1 = hda_str.mem.read(read_lens[1])
891            hda_msg1 = buf_data1.decode("utf-8", "replace")
892            sys.stdout.write(hda_msg1)
893        pos = hda_str.mem.tell()
894        sys.stdout.flush()
895    else:
896        log.warning(f"cavstool: Unrecognized IPC command 0x{data:x} ext 0x{ext_data:x}")
897        if not fw_is_alive(dsp):
898            if args.log_only:
899                log.info("DSP power seems off")
900                wait_fw_entered(dsp, timeout_s=None)
901            else:
902                log.warning("DSP power seems off?!")
903                time.sleep(2) # potential spam reduction
904
905            return
906
907    if adsp_is_ace():
908        dsp.HFIPCXTDR = 1<<31 # Ack local interrupt
909        if done:
910            dsp.HFIPCXTDA = ~(1<<31) & dsp.HFIPCXTDA # Signal done
911        if send_msg:
912            log.debug("ipc: sending msg 0x%08x" % ext_data)
913            dsp.HFIPCXIDDY = ext_data
914            dsp.HFIPCXIDR = (1<<31) | ext_data
915    else:
916        dsp.HIPCTDR = 1<<31 # Ack local interrupt
917        if done:
918            dsp.HIPCTDA = 1<<31 # Signal done
919        if send_msg:
920            dsp.HIPCIDD = ext_data
921            dsp.HIPCIDR = (1<<31) | ext_data
922
923def handle_ipc():
924    if adsp_is_ace():
925        if dsp.HFIPCXIDA & 0x80000000:
926            log.debug("ipc: Ack DSP reply with IDA_DONE")
927            dsp.HFIPCXIDA = 1<<31 # must ACK any DONE interrupts that arrive!
928        if dsp.HFIPCXTDR & 0x80000000:
929            ipc_command(dsp.HFIPCXTDR & ~0x80000000, dsp.HFIPCXTDDY)
930        return
931
932    if dsp.HIPCIDA & 0x80000000:
933        dsp.HIPCIDA = 1<<31 # must ACK any DONE interrupts that arrive!
934    if dsp.HIPCTDR & 0x80000000:
935        ipc_command(dsp.HIPCTDR & ~0x80000000, dsp.HIPCTDD)
936
937async def main():
938    #TODO this bit me, remove the globals, write a little FirmwareLoader class or something to contain.
939    global hda, sd, dsp, hda_ostream_id, hda_streams
940
941    try:
942        (hda, sd, dsp, hda_ostream_id) = map_regs(args.log_only)
943    except Exception as e:
944        log.error("Could not map device in sysfs; run as root?")
945        log.error(e)
946        sys.exit(1)
947
948    log.info(f"Detected a supported cAVS/ACE hardware version")
949
950    if args.log_only:
951        wait_fw_entered(dsp, timeout_s=None)
952    else:
953        if not args.fw_file:
954            log.error("Firmware file argument missing")
955            sys.exit(1)
956
957        if adsp_is_ace():
958            load_firmware_ace(args.fw_file)
959        else:
960            load_firmware(args.fw_file)
961        time.sleep(0.1)
962
963        if not args.quiet:
964            sys.stdout.write("--\n")
965
966    if args.shell_pty:
967        create_shell_pty()
968
969    hda_streams = dict()
970
971    last_seq = 0
972    last_seq_shell = 0
973    while start_output is True:
974        await asyncio.sleep(0.03)
975        if args.shell_pty:
976            last_seq_shell = read_from_shell_memwindow_winstream(last_seq_shell)
977        (last_seq, output) = winstream_read(winstream_offset(), last_seq)
978        if output:
979            sys.stdout.write(output)
980            sys.stdout.flush()
981        if not args.log_only:
982            handle_ipc()
983
984def args_parse():
985    global args
986    ap = argparse.ArgumentParser(description="DSP loader/logger tool", allow_abbrev=False)
987    ap.add_argument("-q", "--quiet", action="store_true",
988                    help="No loader output, just DSP logging")
989    ap.add_argument("-v", "--verbose", action="store_true",
990                    help="More loader output, DEBUG logging level")
991    ap.add_argument("-l", "--log-only", action="store_true",
992                    help="Don't load firmware, just show log output")
993    ap.add_argument("-p", "--shell-pty", action="store_true",
994                    help="Create a Zephyr shell pty if enabled in firmware")
995    ap.add_argument("-n", "--no-history", action="store_true",
996                    help="No current log buffer at start, just new output")
997    ap.add_argument("fw_file", nargs="?", help="Firmware file")
998
999    args = ap.parse_args()
1000
1001    if args.quiet:
1002        log.setLevel(logging.WARN)
1003    elif args.verbose:
1004        log.setLevel(logging.DEBUG)
1005
1006if __name__ == "__main__":
1007    args_parse()
1008    try:
1009        asyncio.run(main())
1010    except KeyboardInterrupt:
1011        start_output = False
1012