1#!/usr/bin/env python3
2# Copyright(c) 2022 Intel Corporation. All rights reserved.
3# SPDX-License-Identifier: Apache-2.0
4import os
5import sys
6import struct
7import logging
8import asyncio
9import time
10import subprocess
11import ctypes
12import mmap
13import argparse
14import pty
15
16start_output = True
17
18logging.basicConfig(level=logging.INFO)
19log = logging.getLogger("cavs-fw")
20
21PAGESZ = 4096
22HUGEPAGESZ = 2 * 1024 * 1024
23HUGEPAGE_FILE = "/dev/hugepages/cavs-fw-dma.tmp."
24
25# SRAM windows. Base and stride varies depending on ADSP version
26#
27# Window 0 is the FW_STATUS area, and 4k after that the IPC "outbox"
28# Window 1 is the IPC "inbox" (host-writable memory, just 384 bytes currently)
29# Window 2 is used for debug slots (Zephyr shell is one user)
30# Window 3 is winstream-formatted log output
31
32WINDOW_BASE = 0x80000
33WINDOW_STRIDE = 0x20000
34
35WINDOW_BASE_ACE = 0x180000
36WINDOW_STRIDE_ACE = 0x8000
37
38DEBUG_SLOT_SIZE = 4096
39DEBUG_SLOT_SHELL = 0
40SHELL_RX_SIZE = 256
41SHELL_MAX_VALID_SLOT_SIZE = 16777216
42
43# pylint: disable=duplicate-code
44
45# ADSPCS bits
46CRST   = 0
47CSTALL = 8
48SPA    = 16
49CPA    = 24
50
51class HDAStream:
52    # creates an hda stream with at 2 buffers of buf_len
53    def __init__(self, stream_id: int):
54        self.stream_id = stream_id
55        self.base = hdamem + 0x0080 + (stream_id * 0x20)
56        log.info(f"Mapping registers for hda stream {self.stream_id} at base {self.base:x}")
57
58        self.hda = Regs(hdamem)
59        self.hda.GCAP    = 0x0000
60        self.hda.GCTL    = 0x0008
61        self.hda.DPLBASE = 0x0070
62        self.hda.DPUBASE = 0x0074
63        self.hda.SPBFCH  = 0x0700
64        self.hda.SPBFCTL = 0x0704
65        self.hda.PPCH    = 0x0800
66        self.hda.PPCTL   = 0x0804
67        self.hda.PPSTS   = 0x0808
68        self.hda.SPIB = 0x0708 + stream_id*0x08
69        self.hda.freeze()
70
71        self.regs = Regs(self.base)
72        self.regs.CTL  = 0x00
73        self.regs.STS  = 0x03
74        self.regs.LPIB = 0x04
75        self.regs.CBL  = 0x08
76        self.regs.LVI  = 0x0c
77        self.regs.FIFOW = 0x0e
78        self.regs.FIFOS = 0x10
79        self.regs.FMT = 0x12
80        self.regs.FIFOL= 0x14
81        self.regs.BDPL = 0x18
82        self.regs.BDPU = 0x1c
83        self.regs.freeze()
84
85        self.dbg0 = Regs(hdamem + 0x0084 + (0x20*stream_id))
86        self.dbg0.DPIB = 0x00
87        self.dbg0.EFIFOS = 0x10
88        self.dbg0.freeze()
89
90        self.reset()
91
92    def __del__(self):
93        self.reset()
94
95    def config(self, buf_len: int):
96        log.info(f"Configuring stream {self.stream_id}")
97        self.buf_len = buf_len
98        log.info("Allocating huge page and setting up buffers")
99        self.mem, self.hugef, self.buf_list_addr, self.pos_buf_addr, self.n_bufs = self.setup_buf(buf_len)
100
101        log.info("Setting buffer list, length, and stream id and traffic priority bit")
102        self.regs.CTL = ((self.stream_id & 0xFF) << 20) | (1 << 18) # must be set to something other than 0?
103        self.regs.BDPU = (self.buf_list_addr >> 32) & 0xffffffff
104        self.regs.BDPL = self.buf_list_addr & 0xffffffff
105        self.regs.CBL = buf_len
106        self.regs.LVI = self.n_bufs - 1
107        self.mem.seek(0)
108        self.debug()
109        log.info(f"Configured stream {self.stream_id}")
110
111    def write(self, data):
112
113        bufl = min(len(data), self.buf_len)
114        log.info(f"Writing data to stream {self.stream_id}, len {bufl}, SPBFCTL {self.hda.SPBFCTL:x}, SPIB {self.hda.SPIB}")
115        self.mem[0:bufl] = data[0:bufl]
116        self.mem[bufl:bufl+bufl] = data[0:bufl]
117        self.hda.SPBFCTL |= (1 << self.stream_id)
118        self.hda.SPIB += bufl
119        log.info(f"Wrote data to stream {self.stream_id}, SPBFCTL {self.hda.SPBFCTL:x}, SPIB {self.hda.SPIB}")
120
121    def start(self):
122        log.info(f"Starting stream {self.stream_id}, CTL {self.regs.CTL:x}")
123        self.regs.CTL |= 2
124        log.info(f"Started stream {self.stream_id}, CTL {self.regs.CTL:x}")
125
126    def stop(self):
127        log.info(f"Stopping stream {self.stream_id}, CTL {self.regs.CTL:x}")
128        self.regs.CTL &= 2
129        time.sleep(0.1)
130        self.regs.CTL |= 1
131        log.info(f"Stopped stream {self.stream_id}, CTL {self.regs.CTL:x}")
132
133    def setup_buf(self, buf_len: int):
134        (mem, phys_addr, hugef) = map_phys_mem(self.stream_id)
135
136        log.info(f"Mapped 2M huge page at 0x{phys_addr:x} for buf size ({buf_len})")
137
138        # create two buffers in the page of buf_len and mark them
139        # in a buffer descriptor list for the hardware to use
140        buf0_len = buf_len
141        buf1_len = buf_len
142        bdl_off = buf0_len + buf1_len
143        # bdl is 2 (64bits, 16 bytes) per entry, we have two
144        mem[bdl_off:bdl_off + 32] = struct.pack("<QQQQ",
145                                                phys_addr,
146                                                buf0_len,
147                                                phys_addr + buf0_len,
148                                                buf1_len)
149        dpib_off = bdl_off+32
150
151        # ensure buffer is initialized, sanity
152        for i in range(0, buf_len*2):
153            mem[i] = 0
154
155        log.info("Filled the buffer descriptor list (BDL) for DMA.")
156        return (mem, hugef, phys_addr + bdl_off, phys_addr+dpib_off, 2)
157
158    def debug(self):
159        log.debug("HDA %d: PPROC %d, CTL 0x%x, LPIB 0x%x, BDPU 0x%x, BDPL 0x%x, CBL 0x%x, LVI 0x%x",
160                 self.stream_id, (hda.PPCTL >> self.stream_id) & 1, self.regs.CTL, self.regs.LPIB, self.regs.BDPU,
161                 self.regs.BDPL, self.regs.CBL, self.regs.LVI)
162        log.debug("    FIFOW %d, FIFOS %d, FMT %x, FIFOL %d, DPIB %d, EFIFOS %d",
163                 self.regs.FIFOW & 0x7, self.regs.FIFOS, self.regs.FMT, self.regs.FIFOL, self.dbg0.DPIB, self.dbg0.EFIFOS)
164        log.debug("    status: FIFORDY %d, DESE %d, FIFOE %d, BCIS %d",
165                 (self.regs.STS >> 5) & 1, (self.regs.STS >> 4) & 1, (self.regs.STS >> 3) & 1, (self.regs.STS >> 2) & 1)
166
167    def reset(self):
168        # Turn DMA off and reset the stream.  Clearing START first is a
169        # noop per the spec, but absolutely required for stability.
170        # Apparently the reset doesn't stop the stream, and the next load
171        # starts before it's ready and kills the load (and often the DSP).
172        # The sleep too is required, on at least one board (a fast
173        # chromebook) putting the two writes next each other also hangs
174        # the DSP!
175        log.info(f"Resetting stream {self.stream_id}")
176        self.debug()
177        self.regs.CTL &= ~2 # clear START
178        time.sleep(0.1)
179        # set enter reset bit
180        self.regs.CTL = 1
181        while (self.regs.CTL & 1) == 0: pass
182        # clear enter reset bit to exit reset
183        self.regs.CTL = 0
184        while (self.regs.CTL & 1) == 1: pass
185
186        log.info(f"Disable SPIB and set position 0 of stream {self.stream_id}")
187        self.hda.SPBFCTL = 0
188        self.hda.SPIB = 0
189
190        #log.info("Setting dma position buffer and enable it")
191        #self.hda.DPUBASE = self.pos_buf_addr >> 32 & 0xffffffff
192        #self.hda.DPLBASE = self.pos_buf_addr & 0xfffffff0 | 1
193
194        log.info(f"Enabling dsp capture (PROCEN) of stream {self.stream_id}")
195        self.hda.PPCTL |= (1 << self.stream_id)
196
197        self.debug()
198        log.info(f"Reset stream {self.stream_id}")
199
200def adsp_is_cavs():
201    return cavs15 or cavs18 or cavs15
202
203def adsp_is_ace():
204    return ace15 or ace20 or ace30
205
206def adsp_mem_window_config():
207    if adsp_is_ace():
208        base = WINDOW_BASE_ACE
209        stride = WINDOW_STRIDE_ACE
210    else:
211        base = WINDOW_BASE
212        stride = WINDOW_STRIDE
213
214    return (base, stride)
215
216def map_regs():
217    p = runx(f"grep -iEl 'PCI_CLASS=40(10|38)0' /sys/bus/pci/devices/*/uevent")
218    pcidir = os.path.dirname(p)
219
220    # Platform/quirk detection.  ID lists cribbed from the SOF kernel driver
221    global cavs15, cavs18, cavs25, ace15, ace20, ace30
222    did = int(open(f"{pcidir}/device").read().rstrip(), 16)
223    cavs15 = did in [ 0x5a98, 0x1a98, 0x3198 ]
224    cavs18 = did in [ 0x9dc8, 0xa348, 0x02c8, 0x06c8, 0xa3f0 ]
225    cavs25 = did in [ 0xa0c8, 0x43c8, 0x4b55, 0x4b58, 0x7ad0, 0x51c8 ]
226    ace15 = did in [ 0x7e28 ]
227    ace20 = did in [ 0xa828 ]
228    ace30 = did in [ 0xe428 ]
229
230    # Check sysfs for a loaded driver and remove it
231    if os.path.exists(f"{pcidir}/driver"):
232        mod = os.path.basename(os.readlink(f"{pcidir}/driver/module"))
233        found_msg = f"Existing driver \"{mod}\" found"
234        if args.log_only:
235            log.info(found_msg)
236        else:
237            log.warning(found_msg + ", unloading module")
238            runx(f"rmmod -f {mod}")
239            # Disengage runtime power management so the kernel doesn't put it to sleep
240            log.info(f"Forcing {pcidir}/power/control to always 'on'")
241            with open(f"{pcidir}/power/control", "w") as ctrl:
242                ctrl.write("on")
243
244    # Make sure PCI memory space access and busmastering are enabled.
245    # Also disable interrupts so as not to confuse the kernel.
246    with open(f"{pcidir}/config", "wb+") as cfg:
247        cfg.seek(4)
248        cfg.write(b'\x06\x04')
249
250    # Standard HD Audio Registers
251    global hdamem
252    (hdamem, _) = bar_map(pcidir, 0)
253    hda = Regs(hdamem)
254    hda.GCAP    = 0x0000
255    hda.GCTL    = 0x0008
256    hda.SPBFCTL = 0x0704
257    hda.PPCTL   = 0x0804
258
259    # Find the ID of the first output stream
260    hda_ostream_id = (hda.GCAP >> 8) & 0x0f # number of input streams
261    log.info(f"Selected output stream {hda_ostream_id} (GCAP = 0x{hda.GCAP:x})")
262    hda.SD_SPIB = 0x0708 + (8 * hda_ostream_id)
263    hda.freeze()
264
265
266    # Standard HD Audio Stream Descriptor
267    sd = Regs(hdamem + 0x0080 + (hda_ostream_id * 0x20))
268    sd.CTL  = 0x00
269    sd.CBL  = 0x08
270    sd.LVI  = 0x0c
271    sd.BDPL = 0x18
272    sd.BDPU = 0x1c
273    sd.freeze()
274
275    # Intel Audio DSP Registers
276    global bar4_mmap
277    global bar4_mem
278    (bar4_mem, bar4_mmap) = bar_map(pcidir, 4)
279    dsp = Regs(bar4_mem)
280    if adsp_is_ace():
281        dsp.HFDSSCS        = 0x1000
282        dsp.HFPWRCTL       = 0x1d18 if ace20 else 0x1d20
283        dsp.HFPWRSTS       = 0x1d1c if ace20 else 0x1d24
284        dsp.DSP2CXCTL_PRIMARY = 0x178d04
285        dsp.HFIPCXTDR      = 0x73200
286        dsp.HFIPCXTDA      = 0x73204
287        dsp.HFIPCXIDR      = 0x73210
288        dsp.HFIPCXIDA      = 0x73214
289        dsp.HFIPCXCTL      = 0x73228
290        dsp.HFIPCXTDDY     = 0x73300
291        dsp.HFIPCXIDDY     = 0x73380
292        dsp.ROM_STATUS     = 0x163200 if ace15 else 0x160200
293        dsp.SRAM_FW_STATUS = WINDOW_BASE_ACE
294    else:
295        dsp.ADSPCS         = 0x00004
296        dsp.HIPCTDR        = 0x00040 if cavs15 else 0x000c0
297        dsp.HIPCTDA        =                        0x000c4 # 1.8+ only
298        dsp.HIPCTDD        = 0x00044 if cavs15 else 0x000c8
299        dsp.HIPCIDR        = 0x00048 if cavs15 else 0x000d0
300        dsp.HIPCIDA        =                        0x000d4 # 1.8+ only
301        dsp.HIPCIDD        = 0x0004c if cavs15 else 0x000d8
302        dsp.ROM_STATUS     = WINDOW_BASE # Start of first SRAM window
303        dsp.SRAM_FW_STATUS = WINDOW_BASE
304    dsp.freeze()
305
306    return (hda, sd, dsp, hda_ostream_id)
307
308def setup_dma_mem(fw_bytes):
309    (mem, phys_addr, _) = map_phys_mem(hda_ostream_id)
310    mem[0:len(fw_bytes)] = fw_bytes
311
312    log.info("Mapped 2M huge page at 0x%x to contain %d bytes of firmware"
313          % (phys_addr, len(fw_bytes)))
314
315    # HDA requires at least two buffers be defined, but we don't care about
316    # boundaries because it's all a contiguous region. Place a vestigial
317    # 128-byte (minimum size and alignment) buffer after the main one, and put
318    # the 4-entry BDL list into the final 128 bytes of the page.
319    buf0_len = HUGEPAGESZ - 2 * 128
320    buf1_len = 128
321    bdl_off = buf0_len + buf1_len
322    mem[bdl_off:bdl_off + 32] = struct.pack("<QQQQ",
323                                            phys_addr, buf0_len,
324                                            phys_addr + buf0_len, buf1_len)
325    log.info("Filled the buffer descriptor list (BDL) for DMA.")
326    return (phys_addr + bdl_off, 2)
327
328global_mmaps = [] # protect mmap mappings from garbage collection!
329
330# Maps 2M of contiguous memory using a single page from hugetlbfs,
331# then locates its physical address for use as a DMA buffer.
332def map_phys_mem(stream_id):
333    # Make sure hugetlbfs is mounted (not there on chromeos)
334    os.system("mount | grep -q hugetlbfs ||"
335              + " (mkdir -p /dev/hugepages; "
336              + "  mount -t hugetlbfs hugetlbfs /dev/hugepages)")
337
338    # Ensure the kernel has enough budget for one new page
339    free = int(runx("awk '/HugePages_Free/ {print $2}' /proc/meminfo"))
340    if free == 0:
341        tot = 1 + int(runx("awk '/HugePages_Total/ {print $2}' /proc/meminfo"))
342        os.system(f"echo {tot} > /proc/sys/vm/nr_hugepages")
343
344    hugef_name = HUGEPAGE_FILE + str(stream_id)
345    hugef = open(hugef_name, "w+")
346    hugef.truncate(HUGEPAGESZ)
347    mem = mmap.mmap(hugef.fileno(), HUGEPAGESZ)
348    log.info("type of mem is %s", str(type(mem)))
349    global_mmaps.append(mem)
350    os.unlink(hugef_name)
351
352    # Find the local process address of the mapping, then use that to extract
353    # the physical address from the kernel's pagemap interface.  The physical
354    # page frame number occupies the bottom bits of the entry.
355    mem[0] = 0 # Fault the page in so it has an address!
356    vaddr = ctypes.addressof(ctypes.c_int.from_buffer(mem))
357    vpagenum = vaddr >> 12
358    pagemap = open("/proc/self/pagemap", "rb")
359    pagemap.seek(vpagenum * 8)
360    pent = pagemap.read(8)
361    paddr = (struct.unpack("Q", pent)[0] & ((1 << 55) - 1)) * PAGESZ
362    pagemap.close()
363    return (mem, paddr, hugef)
364
365# Maps a PCI BAR and returns the in-process address
366def bar_map(pcidir, barnum):
367    f = open(pcidir + "/resource" + str(barnum), "r+")
368    mm = mmap.mmap(f.fileno(), os.fstat(f.fileno()).st_size)
369    global_mmaps.append(mm)
370    log.info("Mapped PCI bar %d of length %d bytes."
371             % (barnum, os.fstat(f.fileno()).st_size))
372    return (ctypes.addressof(ctypes.c_int.from_buffer(mm)), mm)
373
374# Syntactic sugar to make register block definition & use look nice.
375# Instantiate from a base address, assign offsets to (uint32) named registers as
376# fields, call freeze(), then the field acts as a direct alias for the register!
377class Regs:
378    def __init__(self, base_addr):
379        vars(self)["base_addr"] = base_addr
380        vars(self)["ptrs"] = {}
381        vars(self)["frozen"] = False
382    def freeze(self):
383        vars(self)["frozen"] = True
384    def __setattr__(self, name, val):
385        if not self.frozen and name not in self.ptrs:
386            addr = self.base_addr + val
387            self.ptrs[name] = ctypes.c_uint32.from_address(addr)
388        else:
389            self.ptrs[name].value = val
390    def __getattr__(self, name):
391        return self.ptrs[name].value
392
393def runx(cmd):
394    return subprocess.check_output(cmd, shell=True).decode().rstrip()
395
396def mask(bit):
397    if cavs25:
398        return 0b1 << bit
399    if cavs18:
400        return 0b1111 << bit
401    if cavs15:
402        return 0b11 << bit
403
404def load_firmware(fw_file):
405    try:
406        fw_bytes = open(fw_file, "rb").read()
407    except Exception as e:
408        log.error(f"Could not read firmware file: `{fw_file}'")
409        log.error(e)
410        sys.exit(1)
411
412    (magic, sz) = struct.unpack("4sI", fw_bytes[0:8])
413    if magic == b'XMan':
414        log.info(f"Trimming {sz} bytes of extended manifest")
415        fw_bytes = fw_bytes[sz:len(fw_bytes)]
416
417    # This actually means "enable access to BAR4 registers"!
418    hda.PPCTL |= (1 << 30) # GPROCEN, "global processing enable"
419
420    log.info("Resetting HDA device")
421    hda.GCTL = 0
422    while hda.GCTL & 1: pass
423    hda.GCTL = 1
424    while not hda.GCTL & 1: pass
425
426    log.info(f"Stalling and Resetting DSP cores, ADSPCS = 0x{dsp.ADSPCS:x}")
427    dsp.ADSPCS |= mask(CSTALL)
428    dsp.ADSPCS |= mask(CRST)
429    while (dsp.ADSPCS & mask(CRST)) == 0: pass
430
431    log.info(f"Powering down DSP cores, ADSPCS = 0x{dsp.ADSPCS:x}")
432    dsp.ADSPCS &= ~mask(SPA)
433    while dsp.ADSPCS & mask(CPA): pass
434
435    log.info(f"Configuring HDA stream {hda_ostream_id} to transfer firmware image")
436    (buf_list_addr, num_bufs) = setup_dma_mem(fw_bytes)
437    sd.CTL = 1
438    while (sd.CTL & 1) == 0: pass
439    sd.CTL = 0
440    while (sd.CTL & 1) == 1: pass
441    sd.CTL = 1 << 20 # Set stream ID to anything non-zero
442    sd.BDPU = (buf_list_addr >> 32) & 0xffffffff
443    sd.BDPL = buf_list_addr & 0xffffffff
444    sd.CBL = len(fw_bytes)
445    sd.LVI = num_bufs - 1
446    hda.PPCTL |= (1 << hda_ostream_id)
447
448    # SPIB ("Software Position In Buffer") is an Intel HDA extension
449    # that puts a transfer boundary into the stream beyond which the
450    # other side will not read.  The ROM wants to poll on a "buffer
451    # full" bit on the other side that only works with this enabled.
452    hda.SPBFCTL |= (1 << hda_ostream_id)
453    hda.SD_SPIB = len(fw_bytes)
454
455    # Start DSP.  Host needs to provide power to all cores on 1.5
456    # (which also starts them) and 1.8 (merely gates power, DSP also
457    # has to set PWRCTL). On 2.5 where the DSP has full control,
458    # and only core 0 is set.
459    log.info(f"Starting DSP, ADSPCS = 0x{dsp.ADSPCS:x}")
460    dsp.ADSPCS = mask(SPA)
461    while (dsp.ADSPCS & mask(CPA)) == 0: pass
462
463    log.info(f"Unresetting DSP cores, ADSPCS = 0x{dsp.ADSPCS:x}")
464    dsp.ADSPCS &= ~mask(CRST)
465    while (dsp.ADSPCS & 1) != 0: pass
466
467    log.info(f"Running DSP cores, ADSPCS = 0x{dsp.ADSPCS:x}")
468    dsp.ADSPCS &= ~mask(CSTALL)
469
470    # Wait for the ROM to boot and signal it's ready.  This not so short
471    # sleep seems to be needed; if we're banging on the memory window
472    # during initial boot (before/while the window control registers
473    # are configured?) the DSP hardware will hang fairly reliably.
474    log.info(f"Wait for ROM startup, ADSPCS = 0x{dsp.ADSPCS:x}")
475    time.sleep(1)
476    while (dsp.SRAM_FW_STATUS >> 24) != 5: pass
477
478    # Send the DSP an IPC message to tell the device how to boot.
479    # Note: with cAVS 1.8+ the ROM receives the stream argument as an
480    # index within the array of output streams (and we always use the
481    # first one by construction).  But with 1.5 it's the HDA index,
482    # and depends on the number of input streams on the device.
483    stream_idx = hda_ostream_id if cavs15 else 0
484    ipcval = (  (1 << 31)            # BUSY bit
485                | (0x01 << 24)       # type = PURGE_FW
486                | (1 << 14)          # purge_fw = 1
487                | (stream_idx << 9)) # dma_id
488    log.info(f"Sending IPC command, HIPIDR = 0x{ipcval:x}")
489    dsp.HIPCIDR = ipcval
490
491    log.info(f"Starting DMA, FW_STATUS = 0x{dsp.SRAM_FW_STATUS:x}")
492    sd.CTL |= 2 # START flag
493
494    wait_fw_entered()
495
496    # Turn DMA off and reset the stream.  Clearing START first is a
497    # noop per the spec, but absolutely required for stability.
498    # Apparently the reset doesn't stop the stream, and the next load
499    # starts before it's ready and kills the load (and often the DSP).
500    # The sleep too is required, on at least one board (a fast
501    # chromebook) putting the two writes next each other also hangs
502    # the DSP!
503    sd.CTL &= ~2 # clear START
504    time.sleep(0.1)
505    sd.CTL |= 1
506    log.info(f"cAVS firmware load complete")
507
508def load_firmware_ace(fw_file):
509    try:
510        fw_bytes = open(fw_file, "rb").read()
511        # Resize fw_bytes for MTL
512        if len(fw_bytes) < 512 * 1024:
513            fw_bytes += b'\x00' * (512 * 1024 - len(fw_bytes))
514    except Exception as e:
515        log.error(f"Could not read firmware file: `{fw_file}'")
516        log.error(e)
517        sys.exit(1)
518
519    (magic, sz) = struct.unpack("4sI", fw_bytes[0:8])
520    if magic == b'$AE1':
521        log.info(f"Trimming {sz} bytes of extended manifest")
522        fw_bytes = fw_bytes[sz:len(fw_bytes)]
523
524    # This actually means "enable access to BAR4 registers"!
525    hda.PPCTL |= (1 << 30) # GPROCEN, "global processing enable"
526
527    log.info("Resetting HDA device")
528    hda.GCTL = 0
529    while hda.GCTL & 1: pass
530    hda.GCTL = 1
531    while not hda.GCTL & 1: pass
532
533    log.info("Turning of DSP subsystem")
534    dsp.HFDSSCS &= ~(1 << 16) # clear SPA bit
535    time.sleep(0.002)
536    # wait for CPA bit clear
537    while dsp.HFDSSCS & (1 << 24):
538        log.info("Waiting for DSP subsystem power off")
539        time.sleep(0.1)
540
541    log.info("Turning on DSP subsystem")
542    dsp.HFDSSCS |= (1 << 16) # set SPA bit
543    time.sleep(0.002) # needed as the CPA bit may be unstable
544    # wait for CPA bit
545    while not dsp.HFDSSCS & (1 << 24):
546        log.info("Waiting for DSP subsystem power on")
547        time.sleep(0.1)
548
549    log.info("Turning on Domain0")
550    dsp.HFPWRCTL |= 0x1 # set SPA bit
551    time.sleep(0.002) # needed as the CPA bit may be unstable
552    # wait for CPA bit
553    while not dsp.HFPWRSTS & 0x1:
554        log.info("Waiting for DSP domain0 power on")
555        time.sleep(0.1)
556
557    log.info("Turning off Primary Core")
558    dsp.DSP2CXCTL_PRIMARY &= ~(0x1) # clear SPA
559    time.sleep(0.002) # wait for CPA settlement
560    while dsp.DSP2CXCTL_PRIMARY & (1 << 8):
561        log.info("Waiting for DSP primary core power off")
562        time.sleep(0.1)
563
564    log.info(f"Configuring HDA stream {hda_ostream_id} to transfer firmware image")
565    (buf_list_addr, num_bufs) = setup_dma_mem(fw_bytes)
566    sd.CTL = 1
567    while (sd.CTL & 1) == 0: pass
568    sd.CTL = 0
569    while (sd.CTL & 1) == 1: pass
570    sd.CTL |= (1 << 20) # Set stream ID to anything non-zero
571    sd.BDPU = (buf_list_addr >> 32) & 0xffffffff
572    sd.BDPL = buf_list_addr & 0xffffffff
573    sd.CBL = len(fw_bytes)
574    sd.LVI = num_bufs - 1
575    hda.PPCTL |= (1 << hda_ostream_id)
576
577    # SPIB ("Software Position In Buffer") is an Intel HDA extension
578    # that puts a transfer boundary into the stream beyond which the
579    # other side will not read.  The ROM wants to poll on a "buffer
580    # full" bit on the other side that only works with this enabled.
581    hda.SPBFCTL |= (1 << hda_ostream_id)
582    hda.SD_SPIB = len(fw_bytes)
583
584
585    # Send the DSP an IPC message to tell the device how to boot.
586    # Note: with cAVS 1.8+ the ROM receives the stream argument as an
587    # index within the array of output streams (and we always use the
588    # first one by construction).  But with 1.5 it's the HDA index,
589    # and depends on the number of input streams on the device.
590    stream_idx = 0
591    ipcval = (  (1 << 31)            # BUSY bit
592                | (0x01 << 24)       # type = PURGE_FW
593                | (1 << 14)          # purge_fw = 1
594                | (stream_idx << 9)) # dma_id
595    log.info(f"Sending IPC command, HFIPCXIDR = 0x{ipcval:x}")
596    dsp.HFIPCXIDR = ipcval
597
598    log.info("Turning on Primary Core")
599    dsp.DSP2CXCTL_PRIMARY |= 0x1 # clear SPA
600    time.sleep(0.002) # wait for CPA settlement
601    while not dsp.DSP2CXCTL_PRIMARY & (1 << 8):
602        log.info("Waiting for DSP primary core power on")
603        time.sleep(0.1)
604
605    log.info("Waiting for IPC acceptance")
606    while dsp.HFIPCXIDR & (1 << 31):
607        log.info("Waiting for IPC busy bit clear")
608        time.sleep(0.1)
609
610    log.info("ACK IPC")
611    dsp.HFIPCXIDA |= (1 << 31)
612
613    log.info(f"Starting DMA, FW_STATUS = 0x{dsp.ROM_STATUS:x}")
614    sd.CTL |= 2 # START flag
615
616    wait_fw_entered()
617
618    # Turn DMA off and reset the stream.  Clearing START first is a
619    # noop per the spec, but absolutely required for stability.
620    # Apparently the reset doesn't stop the stream, and the next load
621    # starts before it's ready and kills the load (and often the DSP).
622    # The sleep too is required, on at least one board (a fast
623    # chromebook) putting the two writes next each other also hangs
624    # the DSP!
625    sd.CTL &= ~2 # clear START
626    time.sleep(0.1)
627    sd.CTL |= 1
628    log.info(f"ACE firmware load complete")
629
630def fw_is_alive():
631    return dsp.ROM_STATUS & ((1 << 28) - 1) == 5 # "FW_ENTERED"
632
633def wait_fw_entered(timeout_s=2):
634    log.info("Waiting %s for firmware handoff, ROM_STATUS = 0x%x",
635             "forever" if timeout_s is None else f"{timeout_s} seconds",
636             dsp.ROM_STATUS)
637    hertz = 100
638    attempts = None if timeout_s is None else timeout_s * hertz
639    while True:
640        alive = fw_is_alive()
641        if alive:
642            break
643        if attempts is not None:
644            attempts -= 1
645            if attempts < 0:
646                break
647        time.sleep(1 / hertz)
648
649    if not alive:
650        log.warning("Load failed?  ROM_STATUS = 0x%x", dsp.ROM_STATUS)
651    else:
652        log.info("FW alive, ROM_STATUS = 0x%x", dsp.ROM_STATUS)
653
654def winstream_offset():
655    ( base, stride ) = adsp_mem_window_config()
656    return base + stride * 3
657
658# This SHOULD be just "mem[start:start+length]", but slicing an mmap
659# array seems to be unreliable on one of my machines (python 3.6.9 on
660# Ubuntu 18.04).  Read out bytes individually.
661def win_read(base, start, length):
662    try:
663        return b''.join(bar4_mmap[base + x].to_bytes(1, 'little')
664                        for x in range(start, start + length))
665    except IndexError as ie:
666        # A FW in a bad state may cause winstream garbage
667        log.error("IndexError in bar4_mmap[%d + %d]", base, start)
668        log.error("bar4_mmap.size()=%d", bar4_mmap.size())
669        raise ie
670
671def winstream_reg_hdr(base):
672    hdr = Regs(bar4_mem + base)
673    hdr.WLEN  = 0x00
674    hdr.START = 0x04
675    hdr.END   = 0x08
676    hdr.SEQ   = 0x0c
677    hdr.freeze()
678    return hdr
679
680def win_hdr(hdr):
681    return ( hdr.WLEN, hdr.START, hdr.END, hdr.SEQ )
682
683# Python implementation of the same algorithm in sys_winstream_read(),
684# see there for details.
685def winstream_read(base, last_seq):
686    while True:
687        hdr = winstream_reg_hdr(base)
688        (wlen, start, end, seq) = win_hdr(hdr)
689        if wlen > SHELL_MAX_VALID_SLOT_SIZE:
690            log.debug("DSP powered off at winstream_read")
691            return (seq, "")
692        if wlen == 0:
693            return (seq, "")
694        if last_seq == 0:
695            last_seq = seq if args.no_history else (seq - ((end - start) % wlen))
696        if seq == last_seq or start == end:
697            return (seq, "")
698        behind = seq - last_seq
699        if behind > ((end - start) % wlen):
700            return (seq, "")
701        copy = (end - behind) % wlen
702        suffix = min(behind, wlen - copy)
703        result = win_read(base, 16 + copy, suffix)
704        if suffix < behind:
705            result += win_read(base, 16, behind - suffix)
706        (wlen, start1, end, seq1) = win_hdr(hdr)
707        if start1 == start and seq1 == seq:
708            # Best effort attempt at decoding, replacing unusable characters
709            # Found to be useful when it really goes wrong
710            return (seq, result.decode("utf-8", "replace"))
711
712def idx_mod(wlen, idx):
713    if idx >= wlen:
714        return idx - wlen
715    return idx
716
717def idx_sub(wlen, a, b):
718    return idx_mod(wlen, a + (wlen - b))
719
720# Python implementation of the same algorithm in sys_winstream_write(),
721# see there for details.
722def winstream_write(base, msg):
723    hdr = winstream_reg_hdr(base)
724    (wlen, start, end, seq) = win_hdr(hdr)
725    if wlen > SHELL_MAX_VALID_SLOT_SIZE:
726        log.debug("DSP powered off at winstream_write")
727        return
728    if wlen == 0:
729        return
730    lenmsg = len(msg)
731    lenmsg0 = lenmsg
732    if len(msg) > wlen + 1:
733        start = end
734        lenmsg = wlen - 1
735    lenmsg = min(lenmsg, wlen)
736    if seq != 0:
737        avail = (wlen - 1) - idx_sub(wlen, end, start)
738        if lenmsg > avail:
739            hdr.START = idx_mod(wlen, start + (lenmsg - avail))
740    if lenmsg < lenmsg0:
741        hdr.START = end
742        drop = lenmsg0 - lenmsg
743        msg = msg[drop : lenmsg - drop]
744    suffix = min(lenmsg, wlen - end)
745    for c in range(0, suffix):
746        bar4_mmap[base + 16 + end + c] = msg[c]
747    if lenmsg > suffix:
748        for c in range(0, lenmsg - suffix):
749            bar4_mmap[base + 16 + c] = msg[suffix + c]
750    hdr.END = idx_mod(wlen, end + lenmsg)
751    hdr.SEQ += lenmsg0
752
753def debug_offset():
754    ( base, stride ) = adsp_mem_window_config()
755    return base + stride * 2
756
757def shell_base_offset():
758    return debug_offset() + DEBUG_SLOT_SIZE * (1 + DEBUG_SLOT_SHELL)
759
760def read_from_shell_memwindow_winstream(last_seq):
761    offset = shell_base_offset() + SHELL_RX_SIZE
762    (last_seq, output) = winstream_read(offset, last_seq)
763    if output:
764        os.write(shell_client_port, output.encode("utf-8"))
765    return last_seq
766
767def write_to_shell_memwindow_winstream():
768    msg = os.read(shell_client_port, 1)
769    if len(msg) > 0:
770        winstream_write(shell_base_offset(), msg)
771
772def create_shell_pty():
773    global shell_client_port
774    (shell_client_port, user_port) = pty.openpty()
775    name = os.ttyname(user_port)
776    log.info(f"shell PTY at: {name}")
777    asyncio.get_event_loop().add_reader(shell_client_port, write_to_shell_memwindow_winstream)
778
779async def ipc_delay_done():
780    await asyncio.sleep(0.1)
781    if adsp_is_ace():
782        dsp.HFIPCXTDA = ~(1<<31) & dsp.HFIPCXTDA # Signal done
783    else:
784        dsp.HIPCTDA = 1<<31
785
786def inbox_offset():
787    ( base, stride ) = adsp_mem_window_config()
788    return base + stride
789
790def outbox_offset():
791    ( base, _ ) = adsp_mem_window_config()
792    return base + 4096
793
794ipc_timestamp = 0
795
796# Super-simple command language, driven by the test code on the DSP
797def ipc_command(data, ext_data):
798    send_msg = False
799    done = True
800    log.debug ("ipc data %d, ext_data %x", data, ext_data)
801    if data == 0: # noop, with synchronous DONE
802        pass
803    elif data == 1: # async command: signal DONE after a delay (on 1.8+)
804        if not cavs15:
805            done = False
806            asyncio.ensure_future(ipc_delay_done())
807    elif data == 2: # echo back ext_data as a message command
808        send_msg = True
809    elif data == 3: # set ADSPCS
810        dsp.ADSPCS = ext_data
811    elif data == 4: # echo back microseconds since last timestamp command
812        global ipc_timestamp
813        t = round(time.time() * 1e6)
814        ext_data = t - ipc_timestamp
815        ipc_timestamp = t
816        send_msg = True
817    elif data == 5: # copy word at outbox[ext_data >> 16] to inbox[ext_data & 0xffff]
818        src = outbox_offset() + 4 * (ext_data >> 16)
819        dst = inbox_offset() + 4 * (ext_data & 0xffff)
820        for i in range(4):
821            bar4_mmap[dst + i] = bar4_mmap[src + i]
822    elif data == 6: # HDA RESET (init if not exists)
823        stream_id = ext_data & 0xff
824        if stream_id in hda_streams:
825            hda_streams[stream_id].reset()
826        else:
827            hda_str = HDAStream(stream_id)
828            hda_streams[stream_id] = hda_str
829    elif data == 7: # HDA CONFIG
830        stream_id = ext_data & 0xFF
831        buf_len = ext_data >> 8 & 0xFFFF
832        hda_str = hda_streams[stream_id]
833        hda_str.config(buf_len)
834    elif data == 8: # HDA START
835        stream_id = ext_data & 0xFF
836        hda_streams[stream_id].start()
837        hda_streams[stream_id].mem.seek(0)
838
839    elif data == 9: # HDA STOP
840        stream_id = ext_data & 0xFF
841        hda_streams[stream_id].stop()
842    elif data == 10: # HDA VALIDATE
843        stream_id = ext_data & 0xFF
844        hda_str = hda_streams[stream_id]
845        hda_str.debug()
846        is_ramp_data = True
847        hda_str.mem.seek(0)
848        for (i, val) in enumerate(hda_str.mem.read(256)):
849            if i != val:
850                is_ramp_data = False
851            # log.info("stream[%d][%d]: %d", stream_id, i, val) # debug helper
852        log.info("Is ramp data? " + str(is_ramp_data))
853        ext_data = int(is_ramp_data)
854        log.info(f"Ext data to send back on ramp status {ext_data}")
855        send_msg = True
856    elif data == 11: # HDA HOST OUT SEND
857        stream_id = ext_data & 0xff
858        buf = bytearray(256)
859        for i in range(0, 256):
860            buf[i] = i
861        hda_streams[stream_id].write(buf)
862    elif data == 12: # HDA PRINT
863        stream_id = ext_data & 0xFF
864        buf_len = ext_data >> 8 & 0xFFFF
865        hda_str = hda_streams[stream_id]
866        # check for wrap here
867        pos = hda_str.mem.tell()
868        read_lens = [buf_len, 0]
869        if pos + buf_len >= hda_str.buf_len*2:
870            read_lens[0] = hda_str.buf_len*2 - pos
871            read_lens[1] = buf_len - read_lens[0]
872        # validate the read lens
873        assert (read_lens[0] + pos) <= (hda_str.buf_len*2)
874        assert read_lens[0] % 128 == 0
875        assert read_lens[1] % 128 == 0
876        buf_data0 = hda_str.mem.read(read_lens[0])
877        hda_msg0 = buf_data0.decode("utf-8", "replace")
878        sys.stdout.write(hda_msg0)
879        if read_lens[1] != 0:
880            hda_str.mem.seek(0)
881            buf_data1 = hda_str.mem.read(read_lens[1])
882            hda_msg1 = buf_data1.decode("utf-8", "replace")
883            sys.stdout.write(hda_msg1)
884        pos = hda_str.mem.tell()
885        sys.stdout.flush()
886    else:
887        log.warning(f"cavstool: Unrecognized IPC command 0x{data:x} ext 0x{ext_data:x}")
888        if not fw_is_alive():
889            if args.log_only:
890                log.info("DSP power seems off")
891                wait_fw_entered(timeout_s=None)
892            else:
893                log.warning("DSP power seems off?!")
894                time.sleep(2) # potential spam reduction
895
896            return
897
898    if adsp_is_ace():
899        dsp.HFIPCXTDR = 1<<31 # Ack local interrupt, also signals DONE on v1.5
900        if done:
901            dsp.HFIPCXTDA = ~(1<<31) & dsp.HFIPCXTDA # Signal done
902        if send_msg:
903            log.debug("ipc: sending msg 0x%08x" % ext_data)
904            dsp.HFIPCXIDDY = ext_data
905            dsp.HFIPCXIDR = (1<<31) | ext_data
906    else:
907        dsp.HIPCTDR = 1<<31 # Ack local interrupt, also signals DONE on v1.5
908        if cavs18:
909            time.sleep(0.01) # Needed on 1.8, or the command below won't send!
910        if done and not cavs15:
911            dsp.HIPCTDA = 1<<31 # Signal done
912        if send_msg:
913            dsp.HIPCIDD = ext_data
914            dsp.HIPCIDR = (1<<31) | ext_data
915
916def handle_ipc():
917    if adsp_is_ace():
918        if dsp.HFIPCXIDA & 0x80000000:
919            log.debug("ipc: Ack DSP reply with IDA_DONE")
920            dsp.HFIPCXIDA = 1<<31 # must ACK any DONE interrupts that arrive!
921        if dsp.HFIPCXTDR & 0x80000000:
922            ipc_command(dsp.HFIPCXTDR & ~0x80000000, dsp.HFIPCXTDDY)
923        return
924
925    if dsp.HIPCIDA & 0x80000000:
926        dsp.HIPCIDA = 1<<31 # must ACK any DONE interrupts that arrive!
927    if dsp.HIPCTDR & 0x80000000:
928        ipc_command(dsp.HIPCTDR & ~0x80000000, dsp.HIPCTDD)
929
930async def main():
931    #TODO this bit me, remove the globals, write a little FirmwareLoader class or something to contain.
932    global hda, sd, dsp, hda_ostream_id, hda_streams
933
934    try:
935        (hda, sd, dsp, hda_ostream_id) = map_regs()
936    except Exception as e:
937        log.error("Could not map device in sysfs; run as root?")
938        log.error(e)
939        sys.exit(1)
940
941    log.info(f"Detected cAVS {'1.5' if cavs15 else '1.8+'} hardware")
942
943    if args.log_only:
944        wait_fw_entered(timeout_s=None)
945    else:
946        if not args.fw_file:
947            log.error("Firmware file argument missing")
948            sys.exit(1)
949
950        if adsp_is_ace():
951            load_firmware_ace(args.fw_file)
952        else:
953            load_firmware(args.fw_file)
954        time.sleep(0.1)
955
956        if not args.quiet:
957            sys.stdout.write("--\n")
958
959    if args.shell_pty:
960        create_shell_pty()
961
962    hda_streams = dict()
963
964    last_seq = 0
965    last_seq_shell = 0
966    while start_output is True:
967        await asyncio.sleep(0.03)
968        if args.shell_pty:
969            last_seq_shell = read_from_shell_memwindow_winstream(last_seq_shell)
970        (last_seq, output) = winstream_read(winstream_offset(), last_seq)
971        if output:
972            sys.stdout.write(output)
973            sys.stdout.flush()
974        if not args.log_only:
975            handle_ipc()
976
977
978ap = argparse.ArgumentParser(description="DSP loader/logger tool", allow_abbrev=False)
979ap.add_argument("-q", "--quiet", action="store_true",
980                help="No loader output, just DSP logging")
981ap.add_argument("-v", "--verbose", action="store_true",
982                help="More loader output, DEBUG logging level")
983ap.add_argument("-l", "--log-only", action="store_true",
984                help="Don't load firmware, just show log output")
985ap.add_argument("-p", "--shell-pty", action="store_true",
986                help="Create a Zephyr shell pty if enabled in firmware")
987ap.add_argument("-n", "--no-history", action="store_true",
988                help="No current log buffer at start, just new output")
989ap.add_argument("fw_file", nargs="?", help="Firmware file")
990
991args = ap.parse_args()
992
993if args.quiet:
994    log.setLevel(logging.WARN)
995elif args.verbose:
996    log.setLevel(logging.DEBUG)
997
998if __name__ == "__main__":
999    try:
1000        asyncio.run(main())
1001    except KeyboardInterrupt:
1002        start_output = False
1003