1# Copyright (c) 2019, Nordic Semiconductor
2# SPDX-License-Identifier: BSD-3-Clause
3
4# Tip: You can view just the documentation with 'pydoc3 devicetree.dtlib'
5
6"""
7A library for extracting information from .dts (devicetree) files. See the
8documentation for the DT and Node classes for more information.
9
10The top-level entry point of the library is the DT class. DT.__init__() takes a
11.dts file to parse and a list of directories to search for any /include/d
12files.
13"""
14
15import collections
16import enum
17import errno
18import os
19import re
20import string
21import sys
22import textwrap
23from typing import Any, Dict, Iterable, List, \
24    NamedTuple, NoReturn, Optional, Set, Tuple, TYPE_CHECKING, Union
25
26# NOTE: tests/test_dtlib.py is the test suite for this library.
27
28class DTError(Exception):
29    "Exception raised for devicetree-related errors"
30
31class Node:
32    r"""
33    Represents a node in the devicetree ('node-name { ... };').
34
35    These attributes are available on Node instances:
36
37    name:
38      The name of the node (a string).
39
40    unit_addr:
41      The portion after the '@' in the node's name, or the empty string if the
42      name has no '@' in it.
43
44      Note that this is a string. Run int(node.unit_addr, 16) to get an
45      integer.
46
47    props:
48      A dict that maps the properties defined on the node to
49      their values. 'props' is indexed by property name (a string), and values
50      are Property objects.
51
52      To convert property values to Python numbers or strings, use
53      dtlib.to_num(), dtlib.to_nums(), or dtlib.to_string().
54
55      Property values are represented as 'bytes' arrays to support the full
56      generality of DTS, which allows assignments like
57
58        x = "foo", < 0x12345678 >, [ 9A ];
59
60      This gives x the value b"foo\0\x12\x34\x56\x78\x9A". Numbers in DTS are
61      stored in big-endian format.
62
63    nodes:
64      A dict containing the subnodes of the node, indexed by name.
65
66    labels:
67      A list with all labels pointing to the node, in the same order as the
68      labels appear, but with duplicates removed.
69
70      'label_1: label_2: node { ... };' gives 'labels' the value
71      ["label_1", "label_2"].
72
73    parent:
74      The parent Node of the node. 'None' for the root node.
75
76    path:
77      The path to the node as a string, e.g. "/foo/bar".
78
79    dt:
80      The DT instance this node belongs to.
81    """
82
83    #
84    # Public interface
85    #
86
87    def __init__(self, name: str, parent: Optional['Node'], dt: 'DT'):
88        """
89        Node constructor. Not meant to be called directly by clients.
90        """
91        # Remember to update DT.__deepcopy__() if you change this.
92
93        self._name = name
94        self.props: Dict[str, 'Property'] = {}
95        self.nodes: Dict[str, 'Node'] = {}
96        self.labels: List[str] = []
97        self.parent = parent
98        self.dt = dt
99
100        self._omit_if_no_ref = False
101        self._is_referenced = False
102
103        if name.count("@") > 1:
104            dt._parse_error("multiple '@' in node name")
105        if not name == "/":
106            for char in name:
107                if char not in _nodename_chars:
108                    dt._parse_error(f"{self.path}: bad character '{char}' "
109                                    "in node name")
110
111    @property
112    def name(self) -> str:
113        """
114        See the class documentation.
115        """
116        # Converted to a property to discourage renaming -- that has to be done
117        # via DT.move_node.
118        return self._name
119
120    @property
121    def unit_addr(self) -> str:
122        """
123        See the class documentation.
124        """
125        return self.name.partition("@")[2]
126
127    @property
128    def path(self) -> str:
129        """
130        See the class documentation.
131        """
132        node_names = []
133
134        # This dynamic computation is required to be able to move
135        # nodes in the DT class.
136        cur = self
137        while cur.parent:
138            node_names.append(cur.name)
139            cur = cur.parent
140
141        return "/" + "/".join(reversed(node_names))
142
143    def node_iter(self) -> Iterable['Node']:
144        """
145        Returns a generator for iterating over the node and its children,
146        recursively.
147
148        For example, this will iterate over all nodes in the tree (like
149        dt.node_iter()).
150
151          for node in dt.root.node_iter():
152              ...
153        """
154        yield self
155        for node in self.nodes.values():
156            yield from node.node_iter()
157
158    def _get_prop(self, name: str) -> 'Property':
159        # Returns the property named 'name' on the node, creating it if it
160        # doesn't already exist
161
162        prop = self.props.get(name)
163        if not prop:
164            prop = Property(self, name)
165            self.props[name] = prop
166        return prop
167
168    def _del(self) -> None:
169        # Removes the node from the tree
170        self.parent.nodes.pop(self.name)  # type: ignore
171
172    def __str__(self):
173        """
174        Returns a DTS representation of the node. Called automatically if the
175        node is print()ed.
176        """
177        s = "".join(label + ": " for label in self.labels)
178
179        s += f"{self.name} {{\n"
180
181        for prop in self.props.values():
182            s += "\t" + str(prop) + "\n"
183
184        for child in self.nodes.values():
185            s += textwrap.indent(child.__str__(), "\t") + "\n"
186
187        s += "};"
188
189        return s
190
191    def __repr__(self):
192        """
193        Returns some information about the Node instance. Called automatically
194        if the Node instance is evaluated.
195        """
196        return f"<Node {self.path} in '{self.dt.filename}'>"
197
198# See Property.type
199class Type(enum.IntEnum):
200    EMPTY = 0
201    BYTES = 1
202    NUM = 2
203    NUMS = 3
204    STRING = 4
205    STRINGS = 5
206    PATH = 6
207    PHANDLE = 7
208    PHANDLES = 8
209    PHANDLES_AND_NUMS = 9
210    COMPOUND = 10
211
212class _MarkerType(enum.IntEnum):
213    # Types of markers in property values
214
215    # References
216    PATH = 0     # &foo
217    PHANDLE = 1  # <&foo>
218    LABEL = 2    # foo: <1 2 3>
219
220    # Start of data blocks of specific type
221    UINT8 = 3   # [00 01 02] (and also used for /incbin/)
222    UINT16 = 4  # /bits/ 16 <1 2 3>
223    UINT32 = 5  # <1 2 3>
224    UINT64 = 6  # /bits/ 64 <1 2 3>
225    STRING = 7  # "foo"
226
227class Property:
228    """
229    Represents a property ('x = ...').
230
231    These attributes are available on Property instances:
232
233    name:
234      The name of the property (a string).
235
236    value:
237      The value of the property, as a 'bytes' string. Numbers are stored in
238      big-endian format, and strings are null-terminated. Putting multiple
239      comma-separated values in an assignment (e.g., 'x = < 1 >, "foo"') will
240      concatenate the values.
241
242      See the to_*() methods for converting the value to other types.
243
244    type:
245      The type of the property, inferred from the syntax used in the
246      assignment. This is one of the following constants (with example
247      assignments):
248
249        Assignment                  | Property.type
250        ----------------------------+------------------------
251        foo;                        | dtlib.Type.EMPTY
252        foo = [];                   | dtlib.Type.BYTES
253        foo = [01 02];              | dtlib.Type.BYTES
254        foo = /bits/ 8 <1>;         | dtlib.Type.BYTES
255        foo = <1>;                  | dtlib.Type.NUM
256        foo = <>;                   | dtlib.Type.NUMS
257        foo = <1 2 3>;              | dtlib.Type.NUMS
258        foo = <1 2>, <3>;           | dtlib.Type.NUMS
259        foo = "foo";                | dtlib.Type.STRING
260        foo = "foo", "bar";         | dtlib.Type.STRINGS
261        foo = <&l>;                 | dtlib.Type.PHANDLE
262        foo = <&l1 &l2 &l3>;        | dtlib.Type.PHANDLES
263        foo = <&l1 &l2>, <&l3>;     | dtlib.Type.PHANDLES
264        foo = <&l1 1 2 &l2 3 4>;    | dtlib.Type.PHANDLES_AND_NUMS
265        foo = <&l1 1 2>, <&l2 3 4>; | dtlib.Type.PHANDLES_AND_NUMS
266        foo = &l;                   | dtlib.Type.PATH
267        *Anything else*             | dtlib.Type.COMPOUND
268
269      *Anything else* includes properties mixing phandle (<&label>) and node
270      path (&label) references with other data.
271
272      Data labels in the property value do not influence the type.
273
274    labels:
275      A list with all labels pointing to the property, in the same order as the
276      labels appear, but with duplicates removed.
277
278      'label_1: label2: x = ...' gives 'labels' the value
279      ["label_1", "label_2"].
280
281    offset_labels:
282      A dictionary that maps any labels within the property's value to their
283      offset, in bytes. For example, 'x = < 0 label_1: 1 label_2: >' gives
284      'offset_labels' the value {"label_1": 4, "label_2": 8}.
285
286      Iteration order will match the order of the labels on Python versions
287      that preserve dict insertion order.
288
289    node:
290      The Node the property is on.
291    """
292
293    #
294    # Public interface
295    #
296
297    def __init__(self, node: Node, name: str):
298        # Remember to update DT.__deepcopy__() if you change this.
299
300        if "@" in name:
301            node.dt._parse_error("'@' is only allowed in node names")
302
303        self.name = name
304        self.value = b""
305        self.labels: List[str] = []
306        # We have to wait to set this until later, when we've got
307        # the entire tree.
308        self.offset_labels: Dict[str, int] = {}
309        self.node: Node = node
310
311        self._label_offset_lst: List[Tuple[str, int]] = []
312
313        # A list of [offset, label, type] lists (sorted by offset),
314        # giving the locations of references within the value. 'type'
315        # is either _MarkerType.PATH, for a node path reference,
316        # _MarkerType.PHANDLE, for a phandle reference, or
317        # _MarkerType.LABEL, for a label on/within data. Node paths
318        # and phandles need to be patched in after parsing.
319        self._markers: List[List] = []
320
321    @property
322    def type(self) -> Type:
323        """
324        See the class documentation.
325        """
326        # Data labels (e.g. 'foo = label: <3>') are irrelevant, so filter them
327        # out
328        types = [marker[1] for marker in self._markers
329                 if marker[1] != _MarkerType.LABEL]
330
331        if not types:
332            return Type.EMPTY
333
334        if types == [_MarkerType.UINT8]:
335            return Type.BYTES
336
337        if types == [_MarkerType.UINT32]:
338            return Type.NUM if len(self.value) == 4 else Type.NUMS
339
340        # Treat 'foo = <1 2 3>, <4 5>, ...' as Type.NUMS too
341        if set(types) == {_MarkerType.UINT32}:
342            return Type.NUMS
343
344        if set(types) == {_MarkerType.STRING}:
345            return Type.STRING if len(types) == 1 else Type.STRINGS
346
347        if types == [_MarkerType.PATH]:
348            return Type.PATH
349
350        if types == [_MarkerType.UINT32, _MarkerType.PHANDLE] and \
351                len(self.value) == 4:
352            return Type.PHANDLE
353
354        if set(types) == {_MarkerType.UINT32, _MarkerType.PHANDLE}:
355            if len(self.value) == 4*types.count(_MarkerType.PHANDLE):
356                # Array with just phandles in it
357                return Type.PHANDLES
358            # Array with both phandles and numbers
359            return Type.PHANDLES_AND_NUMS
360
361        return Type.COMPOUND
362
363    def to_num(self, signed=False) -> int:
364        """
365        Returns the value of the property as a number.
366
367        Raises DTError if the property was not assigned with this syntax (has
368        Property.type Type.NUM):
369
370            foo = < 1 >;
371
372        signed (default: False):
373          If True, the value will be interpreted as signed rather than
374          unsigned.
375        """
376        if self.type is not Type.NUM:
377            _err("expected property '{0}' on {1} in {2} to be assigned with "
378                 "'{0} = < (number) >;', not '{3}'"
379                 .format(self.name, self.node.path, self.node.dt.filename,
380                         self))
381
382        return int.from_bytes(self.value, "big", signed=signed)
383
384    def to_nums(self, signed=False) -> List[int]:
385        """
386        Returns the value of the property as a list of numbers.
387
388        Raises DTError if the property was not assigned with this syntax (has
389        Property.type Type.NUM or Type.NUMS):
390
391            foo = < 1 2 ... >;
392
393        signed (default: False):
394          If True, the values will be interpreted as signed rather than
395          unsigned.
396        """
397        if self.type not in (Type.NUM, Type.NUMS):
398            _err("expected property '{0}' on {1} in {2} to be assigned with "
399                 "'{0} = < (number) (number) ... >;', not '{3}'"
400                 .format(self.name, self.node.path, self.node.dt.filename,
401                         self))
402
403        return [int.from_bytes(self.value[i:i + 4], "big", signed=signed)
404                for i in range(0, len(self.value), 4)]
405
406    def to_bytes(self) -> bytes:
407        """
408        Returns the value of the property as a raw 'bytes', like
409        Property.value, except with added type checking.
410
411        Raises DTError if the property was not assigned with this syntax (has
412        Property.type Type.BYTES):
413
414            foo = [ 01 ... ];
415        """
416        if self.type is not Type.BYTES:
417            _err("expected property '{0}' on {1} in {2} to be assigned with "
418                 "'{0} = [ (byte) (byte) ... ];', not '{3}'"
419                 .format(self.name, self.node.path, self.node.dt.filename,
420                         self))
421
422        return self.value
423
424    def to_string(self) -> str:
425        """
426        Returns the value of the property as a string.
427
428        Raises DTError if the property was not assigned with this syntax (has
429        Property.type Type.STRING):
430
431            foo = "string";
432
433        This function might also raise UnicodeDecodeError if the string is
434        not valid UTF-8.
435        """
436        if self.type is not Type.STRING:
437            _err("expected property '{0}' on {1} in {2} to be assigned with "
438                 "'{0} = \"string\";', not '{3}'"
439                 .format(self.name, self.node.path, self.node.dt.filename,
440                         self))
441
442        try:
443            ret = self.value.decode("utf-8")[:-1]  # Strip null
444        except UnicodeDecodeError:
445            _err(f"value of property '{self.name}' ({self.value!r}) "
446                 f"on {self.node.path} in {self.node.dt.filename} "
447                 "is not valid UTF-8")
448
449        return ret  # The separate 'return' appeases the type checker.
450
451    def to_strings(self) -> List[str]:
452        """
453        Returns the value of the property as a list of strings.
454
455        Raises DTError if the property was not assigned with this syntax (has
456        Property.type Type.STRING or Type.STRINGS):
457
458            foo = "string", "string", ... ;
459
460        Also raises DTError if any of the strings are not valid UTF-8.
461        """
462        if self.type not in (Type.STRING, Type.STRINGS):
463            _err("expected property '{0}' on {1} in {2} to be assigned with "
464                 "'{0} = \"string\", \"string\", ... ;', not '{3}'"
465                 .format(self.name, self.node.path, self.node.dt.filename,
466                         self))
467
468        try:
469            ret = self.value.decode("utf-8").split("\0")[:-1]
470        except UnicodeDecodeError:
471            _err(f"value of property '{self.name}' ({self.value!r}) "
472                 f"on {self.node.path} in {self.node.dt.filename} "
473                 "is not valid UTF-8")
474
475        return ret  # The separate 'return' appeases the type checker.
476
477    def to_node(self) -> Node:
478        """
479        Returns the Node the phandle in the property points to.
480
481        Raises DTError if the property was not assigned with this syntax (has
482        Property.type Type.PHANDLE).
483
484            foo = < &bar >;
485        """
486        if self.type is not Type.PHANDLE:
487            _err("expected property '{0}' on {1} in {2} to be assigned with "
488                 "'{0} = < &foo >;', not '{3}'"
489                 .format(self.name, self.node.path, self.node.dt.filename,
490                         self))
491
492        return self.node.dt.phandle2node[int.from_bytes(self.value, "big")]
493
494    def to_nodes(self) -> List[Node]:
495        """
496        Returns a list with the Nodes the phandles in the property point to.
497
498        Raises DTError if the property value contains anything other than
499        phandles. All of the following are accepted:
500
501            foo = < >
502            foo = < &bar >;
503            foo = < &bar &baz ... >;
504            foo = < &bar ... >, < &baz ... >;
505        """
506        def type_ok():
507            if self.type in (Type.PHANDLE, Type.PHANDLES):
508                return True
509            # Also accept 'foo = < >;'
510            return self.type is Type.NUMS and not self.value
511
512        if not type_ok():
513            _err("expected property '{0}' on {1} in {2} to be assigned with "
514                 "'{0} = < &foo &bar ... >;', not '{3}'"
515                 .format(self.name, self.node.path,
516                         self.node.dt.filename, self))
517
518        return [self.node.dt.phandle2node[int.from_bytes(self.value[i:i + 4],
519                                                         "big")]
520                for i in range(0, len(self.value), 4)]
521
522    def to_path(self) -> Node:
523        """
524        Returns the Node referenced by the path stored in the property.
525
526        Raises DTError if the property was not assigned with either of these
527        syntaxes (has Property.type Type.PATH or Type.STRING):
528
529            foo = &bar;
530            foo = "/bar";
531
532        For the second case, DTError is raised if the path does not exist.
533        """
534        if self.type not in (Type.PATH, Type.STRING):
535            _err("expected property '{0}' on {1} in {2} to be assigned with "
536                 "either '{0} = &foo' or '{0} = \"/path/to/node\"', not '{3}'"
537                 .format(self.name, self.node.path, self.node.dt.filename,
538                         self))
539
540        try:
541            path = self.value.decode("utf-8")[:-1]
542        except UnicodeDecodeError:
543            _err(f"value of property '{self.name}' ({self.value!r}) "
544                 f"on {self.node.path} in {self.node.dt.filename} "
545                 "is not valid UTF-8")
546
547        try:
548            ret = self.node.dt.get_node(path)
549        except DTError:
550            _err(f"property '{self.name}' on {self.node.path} in "
551                 f"{self.node.dt.filename} points to the non-existent node "
552                 f'"{path}"')
553
554        return ret  # The separate 'return' appeases the type checker.
555
556    def __str__(self):
557        s = "".join(label + ": " for label in self.labels) + self.name
558        if not self.value:
559            return s + ";"
560
561        s += " ="
562
563        for i, (pos, marker_type, ref) in enumerate(self._markers):
564            if i < len(self._markers) - 1:
565                next_marker = self._markers[i + 1]
566            else:
567                next_marker = None
568
569            # End of current marker
570            end = next_marker[0] if next_marker else len(self.value)
571
572            if marker_type is _MarkerType.STRING:
573                # end - 1 to strip off the null terminator
574                s += f' "{_decode_and_escape(self.value[pos:end - 1])}"'
575                if end != len(self.value):
576                    s += ","
577            elif marker_type is _MarkerType.PATH:
578                s += " &" + ref
579                if end != len(self.value):
580                    s += ","
581            else:
582                # <> or []
583
584                if marker_type is _MarkerType.LABEL:
585                    s += f" {ref}:"
586                elif marker_type is _MarkerType.PHANDLE:
587                    s += " &" + ref
588                    pos += 4
589                    # Subtle: There might be more data between the phandle and
590                    # the next marker, so we can't 'continue' here
591                else:  # marker_type is _MarkerType.UINT*
592                    elm_size = _TYPE_TO_N_BYTES[marker_type]
593                    s += _N_BYTES_TO_START_STR[elm_size]
594
595                while pos != end:
596                    num = int.from_bytes(self.value[pos:pos + elm_size],
597                                         "big")
598                    if elm_size == 1:
599                        s += f" {num:02X}"
600                    else:
601                        s += f" {hex(num)}"
602
603                    pos += elm_size
604
605                if pos != 0 and \
606                   (not next_marker or
607                    next_marker[1] not in (_MarkerType.PHANDLE, _MarkerType.LABEL)):
608
609                    s += _N_BYTES_TO_END_STR[elm_size]
610                    if pos != len(self.value):
611                        s += ","
612
613        return s + ";"
614
615
616    def __repr__(self):
617        return f"<Property '{self.name}' at '{self.node.path}' in " \
618            f"'{self.node.dt.filename}'>"
619
620    #
621    # Internal functions
622    #
623
624    def _add_marker(self, marker_type: _MarkerType, data: Any = None):
625        # Helper for registering markers in the value that are processed after
626        # parsing. See _fixup_props(). 'marker_type' identifies the type of
627        # marker, and 'data' has any optional data associated with the marker.
628
629        # len(self.value) gives the current offset. This function is called
630        # while the value is built. We use a list instead of a tuple to be able
631        # to fix up offsets later (they might increase if the value includes
632        # path references, e.g. 'foo = &bar, <3>;', which are expanded later).
633        self._markers.append([len(self.value), marker_type, data])
634
635        # For phandle references, add a dummy value with the same length as a
636        # phandle. This is handy for the length check in _register_phandles().
637        if marker_type is _MarkerType.PHANDLE:
638            self.value += b"\0\0\0\0"
639
640class _T(enum.IntEnum):
641    # Token IDs used by the DT lexer.
642
643    # These values must be contiguous and start from 1.
644    INCLUDE = 1
645    LINE = 2
646    STRING = 3
647    DTS_V1 = 4
648    PLUGIN = 5
649    MEMRESERVE = 6
650    BITS = 7
651    DEL_PROP = 8
652    DEL_NODE = 9
653    OMIT_IF_NO_REF = 10
654    LABEL = 11
655    CHAR_LITERAL = 12
656    REF = 13
657    INCBIN = 14
658    SKIP = 15
659    EOF = 16
660
661    # These values must be larger than the above contiguous range.
662    NUM = 17
663    PROPNODENAME = 18
664    MISC = 19
665    BYTE = 20
666    BAD = 21
667
668class _FileStackElt(NamedTuple):
669    # Used for maintaining the /include/ stack.
670
671    filename: str
672    lineno: int
673    contents: str
674    pos: int
675
676_TokVal = Union[int, str]
677
678class _Token(NamedTuple):
679    id: int
680    val: _TokVal
681
682    def __repr__(self):
683        id_repr = _T(self.id).name
684        return f'Token(id=_T.{id_repr}, val={repr(self.val)})'
685
686class DT:
687    """
688    Represents a devicetree parsed from a .dts file (or from many files, if the
689    .dts file /include/s other files). Creating many instances of this class is
690    fine. The library has no global state.
691
692    These attributes are available on DT instances:
693
694    root:
695      A Node instance representing the root (/) node.
696
697    alias2node:
698      A dictionary that maps maps alias strings (from /aliases) to Node
699      instances
700
701    label2node:
702      A dictionary that maps each node label (a string) to the Node instance
703      for the node.
704
705    label2prop:
706      A dictionary that maps each property label (a string) to a Property
707      instance.
708
709    label2prop_offset:
710      A dictionary that maps each label (a string) within a property value
711      (e.g., 'x = label_1: < 1 label2: 2 >;') to a (prop, offset) tuple, where
712      'prop' is a Property instance and 'offset' the byte offset (0 for label_1
713      and 4 for label_2 in the example).
714
715    phandle2node:
716      A dictionary that maps each phandle (a number) to a Node instance.
717
718    memreserves:
719      A list of (labels, address, length) tuples for the /memreserve/s in the
720      .dts file, in the same order as they appear in the file.
721
722      'labels' is a possibly empty set with all labels preceding the memreserve
723      (e.g., 'label1: label2: /memreserve/ ...'). 'address' and 'length' are
724      numbers.
725
726    filename:
727      The filename passed to the DT constructor.
728    """
729
730    #
731    # Public interface
732    #
733
734    def __init__(self, filename: Optional[str], include_path: Iterable[str] = (),
735                 force: bool = False):
736        """
737        Parses a DTS file to create a DT instance. Raises OSError if 'filename'
738        can't be opened, and DTError for any parse errors.
739
740        filename:
741          Path to the .dts file to parse. (If None, an empty devicetree
742          is created; this is unlikely to be what you want.)
743
744        include_path:
745          An iterable (e.g. list or tuple) containing paths to search for
746          /include/d and /incbin/'d files. By default, files are only looked up
747          relative to the .dts file that contains the /include/ or /incbin/.
748
749        force:
750          Try not to raise DTError even if the input tree has errors.
751          For experimental use; results not guaranteed.
752        """
753        # Remember to update __deepcopy__() if you change this.
754
755        self._root: Optional[Node] = None
756        self.alias2node: Dict[str, Node] = {}
757        self.label2node: Dict[str, Node] = {}
758        self.label2prop: Dict[str, Property] = {}
759        self.label2prop_offset: Dict[str, Tuple[Property, int]] = {}
760        self.phandle2node: Dict[int, Node] = {}
761        self.memreserves: List[Tuple[Set[str], int, int]] = []
762        self.filename = filename
763
764        self._force = force
765
766        if filename is not None:
767            self._parse_file(filename, include_path)
768        else:
769            self._include_path: List[str] = []
770
771    @property
772    def root(self) -> Node:
773        """
774        See the class documentation.
775        """
776        # This is necessary because mypy can't tell that we never
777        # treat self._root as a non-None value until it's initialized
778        # properly in _parse_dt().
779        return self._root       # type: ignore
780
781    def get_node(self, path: str) -> Node:
782        """
783        Returns the Node instance for the node with path or alias 'path' (a
784        string). Raises DTError if the path or alias doesn't exist.
785
786        For example, both dt.get_node("/foo/bar") and dt.get_node("bar-alias")
787        will return the 'bar' node below:
788
789          /dts-v1/;
790
791          / {
792                  foo {
793                          bar_label: bar {
794                                  baz {
795                                  };
796                          };
797                  };
798
799                  aliases {
800                          bar-alias = &bar-label;
801                  };
802          };
803
804        Fetching subnodes via aliases is supported:
805        dt.get_node("bar-alias/baz") returns the 'baz' node.
806        """
807        if path.startswith("/"):
808            return _root_and_path_to_node(self.root, path, path)
809
810        # Path does not start with '/'. First component must be an alias.
811        alias, _, rest = path.partition("/")
812        if alias not in self.alias2node:
813            _err(f"no alias '{alias}' found -- did you forget the leading "
814                 "'/' in the node path?")
815
816        return _root_and_path_to_node(self.alias2node[alias], rest, path)
817
818    def has_node(self, path: str) -> bool:
819        """
820        Returns True if the path or alias 'path' exists. See DT.get_node().
821        """
822        try:
823            self.get_node(path)
824            return True
825        except DTError:
826            return False
827
828    def move_node(self, node: Node, new_path: str):
829        """
830        Move a node 'node' to a new path 'new_path'. The entire subtree
831        rooted at 'node' is moved along with it.
832
833        You cannot move the root node or provide a 'new_path' value
834        where a node already exists. This method raises an exception
835        in both cases.
836
837        As a restriction on the current implementation, the parent node
838        of the new path must exist.
839        """
840        if node is self.root:
841            _err("the root node can't be moved")
842
843        if self.has_node(new_path):
844            _err(f"can't move '{node.path}' to '{new_path}': "
845                 'destination node exists')
846
847        if not new_path.startswith('/'):
848            _err(f"path '{new_path}' doesn't start with '/'")
849
850        for component in new_path.split('/'):
851            for char in component:
852                if char not in _nodename_chars:
853                    _err(f"new path '{new_path}': bad character '{char}'")
854
855        old_name = node.name
856        old_path = node.path
857
858        new_parent_path, _, new_name = new_path.rpartition('/')
859        if new_parent_path == '':
860            # '/foo'.rpartition('/') is ('', '/', 'foo').
861            new_parent_path = '/'
862        if not self.has_node(new_parent_path):
863            _err(f"can't move '{old_path}' to '{new_path}': "
864                 f"parent node '{new_parent_path}' doesn't exist")
865        new_parent = self.get_node(new_parent_path)
866        if TYPE_CHECKING:
867            assert new_parent is not None
868            assert node.parent is not None
869
870        del node.parent.nodes[old_name]
871        node._name = new_name
872        node.parent = new_parent
873        new_parent.nodes[new_name] = node
874
875    def node_iter(self) -> Iterable[Node]:
876        """
877        Returns a generator for iterating over all nodes in the devicetree.
878
879        For example, this will print the name of each node that has a property
880        called 'foo':
881
882          for node in dt.node_iter():
883              if "foo" in node.props:
884                  print(node.name)
885        """
886        yield from self.root.node_iter()
887
888    def __str__(self):
889        """
890        Returns a DTS representation of the devicetree. Called automatically if
891        the DT instance is print()ed.
892        """
893        s = "/dts-v1/;\n\n"
894
895        if self.memreserves:
896            for labels, address, offset in self.memreserves:
897                # List the labels in a consistent order to help with testing
898                for label in labels:
899                    s += f"{label}: "
900                s += f"/memreserve/ {address:#018x} {offset:#018x};\n"
901            s += "\n"
902
903        return s + str(self.root)
904
905    def __repr__(self):
906        """
907        Returns some information about the DT instance. Called automatically if
908        the DT instance is evaluated.
909        """
910        if self.filename:
911            return f"DT(filename='{self.filename}', " \
912                f"include_path={self._include_path})"
913        return super().__repr__()
914
915    def __deepcopy__(self, memo):
916        """
917        Implements support for the standard library copy.deepcopy()
918        function on DT instances.
919        """
920
921        # We need a new DT, obviously. Make a new, empty one.
922        ret = DT(None, (), self._force)
923
924        # Now allocate new Node objects for every node in self, to use
925        # in the new DT. Set their parents to None for now and leave
926        # them without any properties. We will recursively initialize
927        # copies of parents before copies of children next.
928        path2node_copy = {
929            node.path: Node(node.name, None, ret)
930            for node in self.node_iter()
931        }
932
933        # Point each copy of a node to the copy of its parent and set up
934        # copies of each property.
935        #
936        # Share data when possible. For example, Property.value has
937        # type 'bytes', which is immutable. We therefore don't need a
938        # copy and can just point to the original data.
939
940        for node in self.node_iter():
941            node_copy = path2node_copy[node.path]
942
943            parent = node.parent
944            if parent is not None:
945                node_copy.parent = path2node_copy[parent.path]
946
947            prop_name2prop_copy = {
948                prop.name: Property(node_copy, prop.name)
949                for prop in node.props.values()
950            }
951            for prop_name, prop_copy in prop_name2prop_copy.items():
952                prop = node.props[prop_name]
953                prop_copy.value = prop.value
954                prop_copy.labels = prop.labels[:]
955                prop_copy.offset_labels = prop.offset_labels.copy()
956                prop_copy._label_offset_lst = prop._label_offset_lst[:]
957                prop_copy._markers = [marker[:] for marker in prop._markers]
958            node_copy.props = prop_name2prop_copy
959
960            node_copy.nodes = {
961                child_name: path2node_copy[child_node.path]
962                for child_name, child_node in node.nodes.items()
963            }
964
965            node_copy.labels = node.labels[:]
966
967            node_copy._omit_if_no_ref = node._omit_if_no_ref
968            node_copy._is_referenced = node._is_referenced
969
970        # The copied nodes and properties are initialized, so
971        # we can finish initializing the copied DT object now.
972
973        ret._root = path2node_copy['/']
974
975        def copy_node_lookup_table(attr_name):
976            original = getattr(self, attr_name)
977            copy = {
978                key: path2node_copy[original[key].path]
979                for key in original
980            }
981            setattr(ret, attr_name, copy)
982
983        copy_node_lookup_table('alias2node')
984        copy_node_lookup_table('label2node')
985        copy_node_lookup_table('phandle2node')
986
987        ret_label2prop = {}
988        for label, prop in self.label2prop.items():
989            node_copy = path2node_copy[prop.node.path]
990            prop_copy = node_copy.props[prop.name]
991            ret_label2prop[label] = prop_copy
992        ret.label2prop = ret_label2prop
993
994        ret_label2prop_offset = {}
995        for label, prop_offset in self.label2prop_offset.items():
996            prop, offset = prop_offset
997            node_copy = path2node_copy[prop.node.path]
998            prop_copy = node_copy.props[prop.name]
999            ret_label2prop_offset[label] = (prop_copy, offset)
1000        ret.label2prop_offset = ret_label2prop_offset
1001
1002        ret.memreserves = [
1003            (set(memreserve[0]), memreserve[1], memreserve[2])
1004            for memreserve in self.memreserves
1005        ]
1006
1007        ret.filename = self.filename
1008
1009        return ret
1010
1011    #
1012    # Parsing
1013    #
1014
1015    def _parse_file(self, filename: str, include_path: Iterable[str]):
1016        self._include_path = list(include_path)
1017
1018        with open(filename, encoding="utf-8") as f:
1019            self._file_contents = f.read()
1020
1021        self._tok_i = self._tok_end_i = 0
1022        self._filestack: List[_FileStackElt] = []
1023
1024        self._lexer_state: int = _DEFAULT
1025        self._saved_token: Optional[_Token] = None
1026
1027        self._lineno: int = 1
1028
1029        self._parse_header()
1030        self._parse_memreserves()
1031        self._parse_dt()
1032
1033        self._register_phandles()
1034        self._fixup_props()
1035        self._register_aliases()
1036        self._remove_unreferenced()
1037        self._register_labels()
1038
1039    def _parse_header(self):
1040        # Parses /dts-v1/ (expected) and /plugin/ (unsupported) at the start of
1041        # files. There may be multiple /dts-v1/ at the start of a file.
1042
1043        has_dts_v1 = False
1044
1045        while self._peek_token().id == _T.DTS_V1:
1046            has_dts_v1 = True
1047            self._next_token()
1048            self._expect_token(";")
1049            # /plugin/ always comes after /dts-v1/
1050            if self._peek_token().id == _T.PLUGIN:
1051                self._parse_error("/plugin/ is not supported")
1052
1053        if not has_dts_v1:
1054            self._parse_error("expected '/dts-v1/;' at start of file")
1055
1056    def _parse_memreserves(self):
1057        # Parses /memreserve/, which appears after /dts-v1/
1058
1059        while True:
1060            # Labels before /memreserve/
1061            labels = []
1062            while self._peek_token().id == _T.LABEL:
1063                _append_no_dup(labels, self._next_token().val)
1064
1065            if self._peek_token().id == _T.MEMRESERVE:
1066                self._next_token()
1067                self.memreserves.append(
1068                    (labels, self._eval_prim(), self._eval_prim()))
1069                self._expect_token(";")
1070            elif labels:
1071                self._parse_error("expected /memreserve/ after labels at "
1072                                  "beginning of file")
1073            else:
1074                return
1075
1076    def _parse_dt(self):
1077        # Top-level parsing loop
1078
1079        while True:
1080            tok = self._next_token()
1081
1082            if tok.val == "/":
1083                # '/ { ... };', the root node
1084                if not self._root:
1085                    self._root = Node(name="/", parent=None, dt=self)
1086                self._parse_node(self.root)
1087
1088            elif tok.id in (_T.LABEL, _T.REF):
1089                # '&foo { ... };' or 'label: &foo { ... };'. The C tools only
1090                # support a single label here too.
1091
1092                if tok.id == _T.LABEL:
1093                    label = tok.val
1094                    tok = self._next_token()
1095                    if tok.id != _T.REF:
1096                        self._parse_error("expected label reference (&foo)")
1097                else:
1098                    label = None
1099
1100                try:
1101                    node = self._ref2node(tok.val)
1102                except DTError as e:
1103                    self._parse_error(e)
1104                self._parse_node(node)
1105
1106                if label:
1107                    _append_no_dup(node.labels, label)
1108
1109            elif tok.id == _T.DEL_NODE:
1110                self._next_ref2node()._del()
1111                self._expect_token(";")
1112
1113            elif tok.id == _T.OMIT_IF_NO_REF:
1114                self._next_ref2node()._omit_if_no_ref = True
1115                self._expect_token(";")
1116
1117            elif tok.id == _T.EOF:
1118                if not self._root:
1119                    self._parse_error("no root node defined")
1120                return
1121
1122            else:
1123                self._parse_error("expected '/' or label reference (&foo)")
1124
1125    def _parse_node(self, node):
1126        # Parses the '{ ... };' part of 'node-name { ... };'.
1127
1128        # We need to track which child nodes were defined in this set
1129        # of curly braces in order to reject duplicate node names.
1130        current_child_names = set()
1131
1132        self._expect_token("{")
1133        while True:
1134            labels, omit_if_no_ref = self._parse_propnode_labels()
1135            tok = self._next_token()
1136
1137            if tok.id == _T.PROPNODENAME:
1138                if self._peek_token().val == "{":
1139                    # '<tok> { ...', expect node
1140
1141                    # Fetch the existing node if it already exists. This
1142                    # happens when overriding nodes.
1143                    child = node.nodes.get(tok.val)
1144                    if child:
1145                        if child.name in current_child_names:
1146                            self._parse_error(f'{child.path}: duplicate node name')
1147                    else:
1148                        child = Node(name=tok.val, parent=node, dt=self)
1149                        current_child_names.add(tok.val)
1150
1151                    for label in labels:
1152                        _append_no_dup(child.labels, label)
1153
1154                    if omit_if_no_ref:
1155                        child._omit_if_no_ref = True
1156
1157                    node.nodes[child.name] = child
1158                    self._parse_node(child)
1159
1160                else:
1161                    # Not '<tok> { ...', expect property assignment
1162
1163                    if omit_if_no_ref:
1164                        self._parse_error(
1165                            "/omit-if-no-ref/ can only be used on nodes")
1166
1167                    prop = node._get_prop(tok.val)
1168
1169                    if self._check_token("="):
1170                        self._parse_assignment(prop)
1171                    elif not self._check_token(";"):
1172                        # ';' is for an empty property, like 'foo;'
1173                        self._parse_error("expected '{', '=', or ';'")
1174
1175                    for label in labels:
1176                        _append_no_dup(prop.labels, label)
1177
1178            elif tok.id == _T.DEL_NODE:
1179                tok2 = self._next_token()
1180                if tok2.id != _T.PROPNODENAME:
1181                    self._parse_error("expected node name")
1182                if tok2.val in node.nodes:
1183                    node.nodes[tok2.val]._del()
1184                self._expect_token(";")
1185
1186            elif tok.id == _T.DEL_PROP:
1187                tok2 = self._next_token()
1188                if tok2.id != _T.PROPNODENAME:
1189                    self._parse_error("expected property name")
1190                node.props.pop(tok2.val, None)
1191                self._expect_token(";")
1192
1193            elif tok.val == "}":
1194                self._expect_token(";")
1195                return
1196
1197            else:
1198                self._parse_error("expected node name, property name, or '}'")
1199
1200    def _parse_propnode_labels(self):
1201        # _parse_node() helpers for parsing labels and /omit-if-no-ref/s before
1202        # nodes and properties. Returns a (<label list>, <omit-if-no-ref bool>)
1203        # tuple.
1204
1205        labels = []
1206        omit_if_no_ref = False
1207        while True:
1208            tok = self._peek_token()
1209            if tok.id == _T.LABEL:
1210                _append_no_dup(labels, tok.val)
1211            elif tok.id == _T.OMIT_IF_NO_REF:
1212                omit_if_no_ref = True
1213            elif (labels or omit_if_no_ref) and tok.id != _T.PROPNODENAME:
1214                # Got something like 'foo: bar: }'
1215                self._parse_error("expected node or property name")
1216            else:
1217                return labels, omit_if_no_ref
1218
1219            self._next_token()
1220
1221    def _parse_assignment(self, prop):
1222        # Parses the right-hand side of property assignment
1223        #
1224        # prop:
1225        #   'Property' instance being assigned
1226
1227        # Remove any old value, path/phandle references, and in-value labels,
1228        # in case the property value is being overridden
1229        prop.value = b""
1230        prop._markers = []
1231
1232        while True:
1233            # Parse labels before the value (e.g., '..., label: < 0 >')
1234            self._parse_value_labels(prop)
1235
1236            tok = self._next_token()
1237
1238            if tok.val == "<":
1239                self._parse_cells(prop, 4)
1240
1241            elif tok.id == _T.BITS:
1242                n_bits = self._expect_num()
1243                if n_bits not in {8, 16, 32, 64}:
1244                    self._parse_error("expected 8, 16, 32, or 64")
1245                self._expect_token("<")
1246                self._parse_cells(prop, n_bits//8)
1247
1248            elif tok.val == "[":
1249                self._parse_bytes(prop)
1250
1251            elif tok.id == _T.STRING:
1252                prop._add_marker(_MarkerType.STRING)
1253                prop.value += self._unescape(tok.val.encode("utf-8")) + b"\0"
1254
1255            elif tok.id == _T.REF:
1256                prop._add_marker(_MarkerType.PATH, tok.val)
1257
1258            elif tok.id == _T.INCBIN:
1259                self._parse_incbin(prop)
1260
1261            else:
1262                self._parse_error("malformed value")
1263
1264            # Parse labels after the value (e.g., '< 0 > label:, ...')
1265            self._parse_value_labels(prop)
1266
1267            tok = self._next_token()
1268            if tok.val == ";":
1269                return
1270            if tok.val == ",":
1271                continue
1272            self._parse_error("expected ';' or ','")
1273
1274    def _parse_cells(self, prop, n_bytes):
1275        # Parses '<...>'
1276
1277        prop._add_marker(_N_BYTES_TO_TYPE[n_bytes])
1278
1279        while True:
1280            tok = self._peek_token()
1281            if tok.id == _T.REF:
1282                self._next_token()
1283                if n_bytes != 4:
1284                    self._parse_error("phandle references are only allowed in "
1285                                      "arrays with 32-bit elements")
1286                prop._add_marker(_MarkerType.PHANDLE, tok.val)
1287
1288            elif tok.id == _T.LABEL:
1289                prop._add_marker(_MarkerType.LABEL, tok.val)
1290                self._next_token()
1291
1292            elif self._check_token(">"):
1293                return
1294
1295            else:
1296                # Literal value
1297                num = self._eval_prim()
1298                try:
1299                    prop.value += num.to_bytes(n_bytes, "big")
1300                except OverflowError:
1301                    try:
1302                        # Try again as a signed number, in case it's negative
1303                        prop.value += num.to_bytes(n_bytes, "big", signed=True)
1304                    except OverflowError:
1305                        self._parse_error(
1306                            f"{num} does not fit in {8*n_bytes} bits")
1307
1308    def _parse_bytes(self, prop):
1309        # Parses '[ ... ]'
1310
1311        prop._add_marker(_MarkerType.UINT8)
1312
1313        while True:
1314            tok = self._next_token()
1315            if tok.id == _T.BYTE:
1316                prop.value += tok.val.to_bytes(1, "big")
1317
1318            elif tok.id == _T.LABEL:
1319                prop._add_marker(_MarkerType.LABEL, tok.val)
1320
1321            elif tok.val == "]":
1322                return
1323
1324            else:
1325                self._parse_error("expected two-digit byte or ']'")
1326
1327    def _parse_incbin(self, prop):
1328        # Parses
1329        #
1330        #   /incbin/ ("filename")
1331        #
1332        # and
1333        #
1334        #   /incbin/ ("filename", <offset>, <size>)
1335
1336        prop._add_marker(_MarkerType.UINT8)
1337
1338        self._expect_token("(")
1339
1340        tok = self._next_token()
1341        if tok.id != _T.STRING:
1342            self._parse_error("expected quoted filename")
1343        filename = tok.val
1344
1345        tok = self._next_token()
1346        if tok.val == ",":
1347            offset = self._eval_prim()
1348            self._expect_token(",")
1349            size = self._eval_prim()
1350            self._expect_token(")")
1351        else:
1352            if tok.val != ")":
1353                self._parse_error("expected ',' or ')'")
1354            offset = None
1355
1356        try:
1357            with self._open(filename, "rb") as f:
1358                if offset is None:
1359                    prop.value += f.read()
1360                else:
1361                    f.seek(offset)
1362                    prop.value += f.read(size)
1363        except OSError as e:
1364            self._parse_error(f"could not read '{filename}': {e}")
1365
1366    def _parse_value_labels(self, prop):
1367        # _parse_assignment() helper for parsing labels before/after each
1368        # comma-separated value
1369
1370        while True:
1371            tok = self._peek_token()
1372            if tok.id != _T.LABEL:
1373                return
1374            prop._add_marker(_MarkerType.LABEL, tok.val)
1375            self._next_token()
1376
1377    def _node_phandle(self, node):
1378        # Returns the phandle for Node 'node', creating a new phandle if the
1379        # node has no phandle, and fixing up the value for existing
1380        # self-referential phandles (which get set to b'\0\0\0\0' initially).
1381        # Self-referential phandles must be rewritten instead of recreated, so
1382        # that labels are preserved.
1383
1384        if "phandle" in node.props:
1385            phandle_prop = node.props["phandle"]
1386        else:
1387            phandle_prop = Property(node, "phandle")
1388            phandle_prop._add_marker(_MarkerType.UINT32)  # For displaying
1389            phandle_prop.value = b'\0\0\0\0'
1390
1391        if phandle_prop.value == b'\0\0\0\0':
1392            phandle_i = 1
1393            while phandle_i in self.phandle2node:
1394                phandle_i += 1
1395            self.phandle2node[phandle_i] = node
1396
1397            phandle_prop.value = phandle_i.to_bytes(4, "big")
1398            node.props["phandle"] = phandle_prop
1399
1400        return phandle_prop.value
1401
1402    # Expression evaluation
1403
1404    def _eval_prim(self):
1405        tok = self._peek_token()
1406        if tok.id in (_T.NUM, _T.CHAR_LITERAL):
1407            return self._next_token().val
1408
1409        tok = self._next_token()
1410        if tok.val != "(":
1411            self._parse_error("expected number or parenthesized expression")
1412        val = self._eval_ternary()
1413        self._expect_token(")")
1414        return val
1415
1416    def _eval_ternary(self):
1417        val = self._eval_or()
1418        if self._check_token("?"):
1419            if_val = self._eval_ternary()
1420            self._expect_token(":")
1421            else_val = self._eval_ternary()
1422            return if_val if val else else_val
1423        return val
1424
1425    def _eval_or(self):
1426        val = self._eval_and()
1427        while self._check_token("||"):
1428            val = 1 if self._eval_and() or val else 0
1429        return val
1430
1431    def _eval_and(self):
1432        val = self._eval_bitor()
1433        while self._check_token("&&"):
1434            val = 1 if self._eval_bitor() and val else 0
1435        return val
1436
1437    def _eval_bitor(self):
1438        val = self._eval_bitxor()
1439        while self._check_token("|"):
1440            val |= self._eval_bitxor()
1441        return val
1442
1443    def _eval_bitxor(self):
1444        val = self._eval_bitand()
1445        while self._check_token("^"):
1446            val ^= self._eval_bitand()
1447        return val
1448
1449    def _eval_bitand(self):
1450        val = self._eval_eq()
1451        while self._check_token("&"):
1452            val &= self._eval_eq()
1453        return val
1454
1455    def _eval_eq(self):
1456        val = self._eval_rela()
1457        while True:
1458            if self._check_token("=="):
1459                val = 1 if val == self._eval_rela() else 0
1460            elif self._check_token("!="):
1461                val = 1 if val != self._eval_rela() else 0
1462            else:
1463                return val
1464
1465    def _eval_rela(self):
1466        val = self._eval_shift()
1467        while True:
1468            if self._check_token("<"):
1469                val = 1 if val < self._eval_shift() else 0
1470            elif self._check_token(">"):
1471                val = 1 if val > self._eval_shift() else 0
1472            elif self._check_token("<="):
1473                val = 1 if val <= self._eval_shift() else 0
1474            elif self._check_token(">="):
1475                val = 1 if val >= self._eval_shift() else 0
1476            else:
1477                return val
1478
1479    def _eval_shift(self):
1480        val = self._eval_add()
1481        while True:
1482            if self._check_token("<<"):
1483                val <<= self._eval_add()
1484            elif self._check_token(">>"):
1485                val >>= self._eval_add()
1486            else:
1487                return val
1488
1489    def _eval_add(self):
1490        val = self._eval_mul()
1491        while True:
1492            if self._check_token("+"):
1493                val += self._eval_mul()
1494            elif self._check_token("-"):
1495                val -= self._eval_mul()
1496            else:
1497                return val
1498
1499    def _eval_mul(self):
1500        val = self._eval_unary()
1501        while True:
1502            if self._check_token("*"):
1503                val *= self._eval_unary()
1504            elif self._check_token("/"):
1505                denom = self._eval_unary()
1506                if not denom:
1507                    self._parse_error("division by zero")
1508                val //= denom
1509            elif self._check_token("%"):
1510                denom = self._eval_unary()
1511                if not denom:
1512                    self._parse_error("division by zero")
1513                val %= denom
1514            else:
1515                return val
1516
1517    def _eval_unary(self):
1518        if self._check_token("-"):
1519            return -self._eval_unary()
1520        if self._check_token("~"):
1521            return ~self._eval_unary()
1522        if self._check_token("!"):
1523            return 0 if self._eval_unary() else 1
1524        return self._eval_prim()
1525
1526    #
1527    # Lexing
1528    #
1529
1530    def _check_token(self, val):
1531        if self._peek_token().val == val:
1532            self._next_token()
1533            return True
1534        return False
1535
1536    def _peek_token(self):
1537        if not self._saved_token:
1538            self._saved_token = self._next_token()
1539        return self._saved_token
1540
1541    def _next_token(self):
1542        if self._saved_token:
1543            tmp = self._saved_token
1544            self._saved_token = None
1545            return tmp
1546
1547        while True:
1548            tok_id = None
1549
1550            match = _token_re.match(self._file_contents, self._tok_end_i)
1551            if match:
1552                tok_id = match.lastindex
1553                if tok_id == _T.CHAR_LITERAL:
1554                    val = self._unescape(match.group(tok_id).encode("utf-8"))
1555                    if len(val) != 1:
1556                        self._parse_error("character literals must be length 1")
1557                    tok_val = ord(val)
1558                else:
1559                    tok_val = match.group(tok_id)
1560
1561            elif self._lexer_state is _DEFAULT:
1562                match = _num_re.match(self._file_contents, self._tok_end_i)
1563                if match:
1564                    tok_id = _T.NUM
1565                    num_s = match.group(1)
1566                    tok_val = int(num_s,
1567                                  16 if num_s.startswith(("0x", "0X")) else
1568                                  8 if num_s[0] == "0" else
1569                                  10)
1570
1571            elif self._lexer_state is _EXPECT_PROPNODENAME:
1572                match = _propnodename_re.match(self._file_contents,
1573                                               self._tok_end_i)
1574                if match:
1575                    tok_id = _T.PROPNODENAME
1576                    tok_val = match.group(1)
1577                    self._lexer_state = _DEFAULT
1578
1579            else:  # self._lexer_state is _EXPECT_BYTE
1580                match = _byte_re.match(self._file_contents, self._tok_end_i)
1581                if match:
1582                    tok_id = _T.BYTE
1583                    tok_val = int(match.group(), 16)
1584
1585            if not tok_id:
1586                match = _misc_re.match(self._file_contents, self._tok_end_i)
1587                if match:
1588                    tok_id = _T.MISC
1589                    tok_val = match.group()
1590                else:
1591                    self._tok_i = self._tok_end_i
1592                    # Could get here due to a node/property naming appearing in
1593                    # an unexpected context as well as for bad characters in
1594                    # files. Generate a token for it so that the error can
1595                    # trickle up to some context where we can give a more
1596                    # helpful error message.
1597                    return _Token(_T.BAD, "<unknown token>")
1598
1599            self._tok_i = match.start()
1600            self._tok_end_i = match.end()
1601
1602            if tok_id == _T.SKIP:
1603                self._lineno += tok_val.count("\n")
1604                continue
1605
1606            # /include/ is handled in the lexer in the C tools as well, and can
1607            # appear anywhere
1608            if tok_id == _T.INCLUDE:
1609                # Can have newlines between /include/ and the filename
1610                self._lineno += tok_val.count("\n")
1611                # Do this manual extraction instead of doing it in the regex so
1612                # that we can properly count newlines
1613                filename = tok_val[tok_val.find('"') + 1:-1]
1614                self._enter_file(filename)
1615                continue
1616
1617            if tok_id == _T.LINE:
1618                # #line directive
1619                self._lineno = int(tok_val.split()[0]) - 1
1620                self.filename = tok_val[tok_val.find('"') + 1:-1]
1621                continue
1622
1623            if tok_id == _T.EOF:
1624                if self._filestack:
1625                    self._leave_file()
1626                    continue
1627                return _Token(_T.EOF, "<EOF>")
1628
1629            # State handling
1630
1631            if tok_id in (_T.DEL_PROP, _T.DEL_NODE, _T.OMIT_IF_NO_REF) or \
1632               tok_val in ("{", ";"):
1633
1634                self._lexer_state = _EXPECT_PROPNODENAME
1635
1636            elif tok_val == "[":
1637                self._lexer_state = _EXPECT_BYTE
1638
1639            elif tok_id in (_T.MEMRESERVE, _T.BITS) or tok_val == "]":
1640                self._lexer_state = _DEFAULT
1641
1642            return _Token(tok_id, tok_val)
1643
1644    def _expect_token(self, tok_val):
1645        # Raises an error if the next token does not have the string value
1646        # 'tok_val'. Returns the token.
1647
1648        tok = self._next_token()
1649        if tok.val != tok_val:
1650            self._parse_error(f"expected '{tok_val}', not '{tok.val}'")
1651
1652        return tok
1653
1654    def _expect_num(self):
1655        # Raises an error if the next token is not a number. Returns the token.
1656
1657        tok = self._next_token()
1658        if tok.id != _T.NUM:
1659            self._parse_error("expected number")
1660        return tok.val
1661
1662    def _parse_error(self, s):
1663        # This works out for the first line of the file too, where rfind()
1664        # returns -1
1665        column = self._tok_i - self._file_contents.rfind("\n", 0,
1666                                                         self._tok_i + 1)
1667        _err(f"{self.filename}:{self._lineno} (column {column}): "
1668             f"parse error: {s}")
1669
1670    def _enter_file(self, filename):
1671        # Enters the /include/d file 'filename', remembering the position in
1672        # the /include/ing file for later
1673
1674        self._filestack.append(
1675            _FileStackElt(self.filename, self._lineno,
1676                          self._file_contents, self._tok_end_i))
1677
1678        # Handle escapes in filenames, just for completeness
1679        filename = self._unescape(filename.encode("utf-8"))
1680        try:
1681            filename = filename.decode("utf-8")
1682        except UnicodeDecodeError:
1683            self._parse_error("filename is not valid UTF-8")
1684
1685        with self._open(filename, encoding="utf-8") as f:
1686            try:
1687                self._file_contents = f.read()
1688            except OSError as e:
1689                self._parse_error(e)
1690
1691        # Check for recursive /include/
1692        for i, parent in enumerate(self._filestack):
1693            if filename == parent[0]:
1694                self._parse_error("recursive /include/:\n" + " ->\n".join(
1695                    [f"{parent[0]}:{parent[1]}"
1696                     for parent in self._filestack[i:]] +
1697                    [filename]))
1698
1699        self.filename = f.name
1700        self._lineno = 1
1701        self._tok_end_i = 0
1702
1703    def _leave_file(self):
1704        # Leaves an /include/d file, returning to the file that /include/d it
1705
1706        self.filename, self._lineno, self._file_contents, self._tok_end_i = \
1707            self._filestack.pop()
1708
1709    def _next_ref2node(self):
1710        # Checks that the next token is a label/path reference and returns the
1711        # Node it points to. Only used during parsing, so uses _parse_error()
1712        # on errors to save some code in callers.
1713
1714        label = self._next_token()
1715        if label.id != _T.REF:
1716            self._parse_error(
1717                "expected label (&foo) or path (&{/foo/bar}) reference")
1718        try:
1719            return self._ref2node(label.val)
1720        except DTError as e:
1721            self._parse_error(e)
1722
1723    def _ref2node(self, s):
1724        # Returns the Node the label/path reference 's' points to
1725
1726        if s[0] == "{":
1727            # Path reference (&{/foo/bar})
1728            path = s[1:-1]
1729            if not path.startswith("/"):
1730                _err(f"node path '{path}' does not start with '/'")
1731            # Will raise DTError if the path doesn't exist
1732            return _root_and_path_to_node(self.root, path, path)
1733
1734        # Label reference (&foo).
1735
1736        # label2node hasn't been filled in yet, and using it would get messy
1737        # when nodes are deleted
1738        for node in self.node_iter():
1739            if s in node.labels:
1740                return node
1741
1742        _err(f"undefined node label '{s}'")
1743
1744    #
1745    # Post-processing
1746    #
1747
1748    def _register_phandles(self):
1749        # Registers any manually-inserted phandle properties in
1750        # self.phandle2node, so that we can avoid allocating any phandles from
1751        # that set. Also checks the format of the phandles and does misc.
1752        # sanity checking.
1753
1754        for node in self.node_iter():
1755            phandle = node.props.get("phandle")
1756            if phandle:
1757                if len(phandle.value) != 4:
1758                    _err(f"{node.path}: bad phandle length "
1759                         f"({len(phandle.value)}), expected 4 bytes")
1760
1761                is_self_referential = False
1762                for marker in phandle._markers:
1763                    _, marker_type, ref = marker
1764                    if marker_type is _MarkerType.PHANDLE:
1765                        # The phandle's value is itself a phandle reference
1766                        if self._ref2node(ref) is node:
1767                            # Alright to set a node's phandle equal to its own
1768                            # phandle. It'll force a new phandle to be
1769                            # allocated even if the node is otherwise
1770                            # unreferenced.
1771                            is_self_referential = True
1772                            break
1773
1774                        _err(f"{node.path}: {phandle.name} "
1775                             "refers to another node")
1776
1777                # Could put on else on the 'for' above too, but keep it
1778                # somewhat readable
1779                if not is_self_referential:
1780                    phandle_val = int.from_bytes(phandle.value, "big")
1781
1782                    if phandle_val in {0, 0xFFFFFFFF}:
1783                        _err(f"{node.path}: bad value {phandle_val:#010x} "
1784                             f"for {phandle.name}")
1785
1786                    if phandle_val in self.phandle2node:
1787                        _err(f"{node.path}: duplicated phandle {phandle_val:#x} "
1788                             "(seen before at "
1789                             f"{self.phandle2node[phandle_val].path})")
1790
1791                    self.phandle2node[phandle_val] = node
1792
1793    def _fixup_props(self):
1794        # Fills in node path and phandle references in property values, and
1795        # registers labels within values. This must be done after parsing,
1796        # since forwards references are allowed and nodes and properties might
1797        # be deleted.
1798
1799        for node in self.node_iter():
1800            # The tuple() avoids a 'dictionary changed size during iteration'
1801            # error
1802            for prop in tuple(node.props.values()):
1803                # 'prev_pos' and 'pos' are indices in the unpatched
1804                # property value. The result is built up in 'res'.
1805                prev_pos = 0
1806                res = b""
1807
1808                for marker in prop._markers:
1809                    pos, marker_type, ref = marker
1810
1811                    # Add data before the marker, reading from the unpatched
1812                    # property value
1813                    res += prop.value[prev_pos:pos]
1814
1815                    # Fix the marker offset so that it's correct for the
1816                    # patched property value, for later (not used in this
1817                    # function). The offset might change due to path
1818                    # references, which expand to something like "/foo/bar".
1819                    marker[0] = len(res)
1820
1821                    if marker_type is _MarkerType.LABEL:
1822                        # This is a temporary format so that we can catch
1823                        # duplicate references. prop._label_offset_lst is changed
1824                        # to a dictionary that maps labels to offsets in
1825                        # _register_labels().
1826                        _append_no_dup(prop._label_offset_lst, (ref, len(res)))
1827                    elif marker_type in (_MarkerType.PATH, _MarkerType.PHANDLE):
1828                        # Path or phandle reference
1829                        try:
1830                            ref_node = self._ref2node(ref)
1831                        except DTError as e:
1832                            _err(f"{prop.node.path}: {e}")
1833
1834                        # For /omit-if-no-ref/
1835                        ref_node._is_referenced = True
1836
1837                        if marker_type is _MarkerType.PATH:
1838                            res += ref_node.path.encode("utf-8") + b'\0'
1839                        else:  # marker_type is PHANDLE
1840                            res += self._node_phandle(ref_node)
1841                            # Skip over the dummy phandle placeholder
1842                            pos += 4
1843
1844                    prev_pos = pos
1845
1846                # Store the final fixed-up value. Add the data after the last
1847                # marker.
1848                prop.value = res + prop.value[prev_pos:]
1849
1850    def _register_aliases(self):
1851        # Registers aliases from the /aliases node in self.alias2node. Also
1852        # checks the format of the alias properties.
1853
1854        # We copy this to self.alias2node at the end to avoid get_node()
1855        # looking up paths via other aliases while verifying aliases
1856        alias2node = {}
1857
1858        alias_re = re.compile("[0-9a-z-]+$")
1859
1860        aliases = self.root.nodes.get("aliases")
1861        if aliases:
1862            for prop in aliases.props.values():
1863                if not alias_re.match(prop.name):
1864                    _err(f"/aliases: alias property name '{prop.name}' "
1865                         "should include only characters from [0-9a-z-]")
1866
1867                # Property.to_path() checks that the node exists, has
1868                # the right type, etc. Swallow errors for invalid
1869                # aliases with self._force.
1870                try:
1871                    alias2node[prop.name] = prop.to_path()
1872                except DTError:
1873                    if self._force:
1874                        continue
1875                    raise
1876
1877        self.alias2node = alias2node
1878
1879    def _remove_unreferenced(self):
1880        # Removes any unreferenced nodes marked with /omit-if-no-ref/ from the
1881        # tree
1882
1883        # tuple() is to avoid 'RuntimeError: dictionary changed size during
1884        # iteration' errors
1885        for node in tuple(self.node_iter()):
1886            if node._omit_if_no_ref and not node._is_referenced:
1887                node._del()
1888
1889    def _register_labels(self):
1890        # Checks for duplicate labels and registers labels in label2node,
1891        # label2prop, and label2prop_offset
1892
1893        label2things = collections.defaultdict(set)
1894
1895        # Register all labels and the nodes/props they point to in label2things
1896        for node in self.node_iter():
1897            for label in node.labels:
1898                label2things[label].add(node)
1899                self.label2node[label] = node
1900
1901            for prop in node.props.values():
1902                for label in prop.labels:
1903                    label2things[label].add(prop)
1904                    self.label2prop[label] = prop
1905
1906                for label, offset in prop._label_offset_lst:
1907                    label2things[label].add((prop, offset))
1908                    self.label2prop_offset[label] = (prop, offset)
1909
1910                # See _fixup_props()
1911                prop.offset_labels = dict(prop._label_offset_lst)
1912
1913        for label, things in label2things.items():
1914            if len(things) > 1:
1915                strings = []
1916                for thing in things:
1917                    if isinstance(thing, Node):
1918                        strings.append(f"on {thing.path}")
1919                    elif isinstance(thing, Property):
1920                        strings.append(f"on property '{thing.name}' "
1921                                       f"of node {thing.node.path}")
1922                    else:
1923                        # Label within property value
1924                        strings.append("in the value of property "
1925                                       f"'{thing[0].name}' of node "
1926                                       f"{thing[0].node.path}")
1927
1928                # Give consistent error messages to help with testing
1929                strings.sort()
1930
1931                _err(f"Label '{label}' appears " + " and ".join(strings))
1932
1933
1934    #
1935    # Misc.
1936    #
1937
1938    def _unescape(self, b):
1939        # Replaces backslash escapes in the 'bytes' array 'b'. We can't do this at
1940        # the string level, because the result might not be valid UTF-8 when
1941        # octal/hex escapes are involved.
1942
1943        def sub(match):
1944            esc = match.group(1)
1945            if esc == b"a": return b"\a"
1946            if esc == b"b": return b"\b"
1947            if esc == b"t": return b"\t"
1948            if esc == b"n": return b"\n"
1949            if esc == b"v": return b"\v"
1950            if esc == b"f": return b"\f"
1951            if esc == b"r": return b"\r"
1952
1953            if esc[0] in b"01234567":
1954                # Octal escape
1955                try:
1956                    return int(esc, 8).to_bytes(1, "big")
1957                except OverflowError:
1958                    self._parse_error("octal escape out of range (> 255)")
1959
1960            if esc[0] == ord("x") and len(esc) > 1:
1961                # Hex escape
1962                return int(esc[1:], 16).to_bytes(1, "big")
1963
1964            # Return <char> as-is for other \<char>
1965            return esc[0].to_bytes(1, "big")
1966
1967        return _unescape_re.sub(sub, b)
1968
1969    def _open(self, filename, mode="r", **kwargs):
1970        # Wrapper around standard Python open(), accepting the same params.
1971        # But searches for a 'filename' file in the directory of the current
1972        # file and the include path.
1973
1974        # The C tools support specifying stdin with '-' too
1975        if filename == "-":
1976            return sys.stdin.buffer if "b" in mode else sys.stdin
1977
1978        # Try the directory of the current file first
1979        dirname = os.path.dirname(self.filename)
1980        try:
1981            return open(os.path.join(dirname, filename), mode, **kwargs)
1982        except OSError as e:
1983            if e.errno != errno.ENOENT:
1984                self._parse_error(e)
1985
1986            # Try each directory from the include path
1987            for path in self._include_path:
1988                try:
1989                    return open(os.path.join(path, filename), mode, **kwargs)
1990                except OSError as e:
1991                    if e.errno != errno.ENOENT:
1992                        self._parse_error(e)
1993                    continue
1994
1995            self._parse_error(f"'{filename}' could not be found")
1996
1997#
1998# Public functions
1999#
2000
2001def to_num(data: bytes, length: Optional[int] = None,
2002           signed: bool = False) -> int:
2003    """
2004    Converts the 'bytes' array 'data' to a number. The value is expected to be
2005    in big-endian format, which is standard in devicetree.
2006
2007    length (default: None):
2008      The expected length of the value in bytes, as a simple type check. If
2009      None, the length check is skipped.
2010
2011    signed (default: False):
2012      If True, the value will be interpreted as signed rather than unsigned.
2013    """
2014    _check_is_bytes(data)
2015    if length is not None:
2016        _check_length_positive(length)
2017        if len(data) != length:
2018            _err(f"{data!r} is {len(data)} bytes long, expected {length}")
2019
2020    return int.from_bytes(data, "big", signed=signed)
2021
2022def to_nums(data: bytes, length: int = 4, signed: bool = False) -> List[int]:
2023    """
2024    Like Property.to_nums(), but takes an arbitrary 'bytes' array. The values
2025    are assumed to be in big-endian format, which is standard in devicetree.
2026    """
2027    _check_is_bytes(data)
2028    _check_length_positive(length)
2029
2030    if len(data) % length:
2031        _err(f"{data!r} is {len(data)} bytes long, "
2032             f"expected a length that's a a multiple of {length}")
2033
2034    return [int.from_bytes(data[i:i + length], "big", signed=signed)
2035            for i in range(0, len(data), length)]
2036
2037#
2038# Private helpers
2039#
2040
2041def _check_is_bytes(data):
2042    if not isinstance(data, bytes):
2043        _err(f"'{data}' has type '{type(data).__name__}', expected 'bytes'")
2044
2045def _check_length_positive(length):
2046    if length < 1:
2047        _err("'length' must be greater than zero, was " + str(length))
2048
2049def _append_no_dup(lst, elm):
2050    # Appends 'elm' to 'lst', but only if it isn't already in 'lst'. Lets us
2051    # preserve order, which a set() doesn't.
2052
2053    if elm not in lst:
2054        lst.append(elm)
2055
2056def _decode_and_escape(b):
2057    # Decodes the 'bytes' array 'b' as UTF-8 and backslash-escapes special
2058    # characters
2059
2060    # Hacky but robust way to avoid double-escaping any '\' spit out by
2061    # 'backslashreplace' bytes.translate() can't map to more than a single
2062    # byte, but str.translate() can map to more than one character, so it's
2063    # nice here. There's probably a nicer way to do this.
2064    return b.decode("utf-8", "surrogateescape") \
2065            .translate(_escape_table) \
2066            .encode("utf-8", "surrogateescape") \
2067            .decode("utf-8", "backslashreplace")
2068
2069def _root_and_path_to_node(cur, path, fullpath):
2070    # Returns the node pointed at by 'path', relative to the Node 'cur'. For
2071    # example, if 'cur' has path /foo/bar, and 'path' is "baz/qaz", then the
2072    # node with path /foo/bar/baz/qaz is returned. 'fullpath' is the path as
2073    # given in the .dts file, for error messages.
2074
2075    for component in path.split("/"):
2076        # Collapse multiple / in a row, and allow a / at the end
2077        if not component:
2078            continue
2079
2080        if component not in cur.nodes:
2081            _err(f"component '{component}' in path '{fullpath}' "
2082                 "does not exist")
2083
2084        cur = cur.nodes[component]
2085
2086    return cur
2087
2088def _err(msg) -> NoReturn:
2089    raise DTError(msg)
2090
2091_escape_table = str.maketrans({
2092    "\\": "\\\\",
2093    '"': '\\"',
2094    "\a": "\\a",
2095    "\b": "\\b",
2096    "\t": "\\t",
2097    "\n": "\\n",
2098    "\v": "\\v",
2099    "\f": "\\f",
2100    "\r": "\\r"})
2101
2102# Lexer states
2103_DEFAULT = 0
2104_EXPECT_PROPNODENAME = 1
2105_EXPECT_BYTE = 2
2106
2107_num_re = re.compile(r"(0[xX][0-9a-fA-F]+|[0-9]+)(?:ULL|UL|LL|U|L)?")
2108
2109# A leading \ is allowed property and node names, probably to allow weird node
2110# names that would clash with other stuff
2111_propnodename_re = re.compile(r"\\?([a-zA-Z0-9,._+*#?@-]+)")
2112
2113# Node names are more restrictive than property names.
2114_nodename_chars = set(string.ascii_letters + string.digits + ',._+-@')
2115
2116# Misc. tokens that are tried after a property/node name. This is important, as
2117# there's overlap with the allowed characters in names.
2118_misc_re = re.compile(
2119    "|".join(re.escape(pat) for pat in (
2120        "==", "!=", "!", "=", ",", ";", "+", "-", "*", "/", "%", "~", "?", ":",
2121        "^", "(", ")", "{", "}", "[", "]", "<<", "<=", "<", ">>", ">=", ">",
2122        "||", "|", "&&", "&")))
2123
2124_byte_re = re.compile(r"[0-9a-fA-F]{2}")
2125
2126# Matches a backslash escape within a 'bytes' array. Captures the 'c' part of
2127# '\c', where c might be a single character or an octal/hex escape.
2128_unescape_re = re.compile(br'\\([0-7]{1,3}|x[0-9A-Fa-f]{1,2}|.)')
2129
2130def _init_tokens():
2131    # Builds a (<token 1>)|(<token 2>)|... regex and returns it. The
2132    # way this is constructed makes the token's value as an int appear
2133    # in match.lastindex after a match.
2134
2135    # Each pattern must have exactly one capturing group, which can capture any
2136    # part of the pattern. This makes match.lastindex match the token type.
2137    # _Token.val is based on the captured string.
2138    token_spec = {
2139        _T.INCLUDE: r'(/include/\s*"(?:[^\\"]|\\.)*")',
2140        # #line directive or GCC linemarker
2141        _T.LINE:
2142        r'^#(?:line)?[ \t]+([0-9]+[ \t]+"(?:[^\\"]|\\.)*")(?:[ \t]+[0-9]+){0,4}',
2143
2144        _T.STRING: r'"((?:[^\\"]|\\.)*)"',
2145        _T.DTS_V1: r"(/dts-v1/)",
2146        _T.PLUGIN: r"(/plugin/)",
2147        _T.MEMRESERVE: r"(/memreserve/)",
2148        _T.BITS: r"(/bits/)",
2149        _T.DEL_PROP: r"(/delete-property/)",
2150        _T.DEL_NODE: r"(/delete-node/)",
2151        _T.OMIT_IF_NO_REF: r"(/omit-if-no-ref/)",
2152        _T.LABEL: r"([a-zA-Z_][a-zA-Z0-9_]*):",
2153        _T.CHAR_LITERAL: r"'((?:[^\\']|\\.)*)'",
2154        _T.REF: r"&([a-zA-Z_][a-zA-Z0-9_]*|{[a-zA-Z0-9,._+*#?@/-]*})",
2155        _T.INCBIN: r"(/incbin/)",
2156        # Whitespace, C comments, and C++ comments
2157        _T.SKIP: r"(\s+|(?:/\*(?:.|\n)*?\*/)|//.*$)",
2158        # Return a token for end-of-file so that the parsing code can
2159        # always assume that there are more tokens when looking
2160        # ahead. This simplifies things.
2161        _T.EOF: r"(\Z)",
2162    }
2163
2164    # MULTILINE is needed for C++ comments and #line directives
2165    return re.compile("|".join(token_spec[tok_id] for tok_id in
2166                               range(1, _T.EOF + 1)),
2167                      re.MULTILINE | re.ASCII)
2168
2169_token_re = _init_tokens()
2170
2171_TYPE_TO_N_BYTES = {
2172    _MarkerType.UINT8: 1,
2173    _MarkerType.UINT16: 2,
2174    _MarkerType.UINT32: 4,
2175    _MarkerType.UINT64: 8,
2176}
2177
2178_N_BYTES_TO_TYPE = {
2179    1: _MarkerType.UINT8,
2180    2: _MarkerType.UINT16,
2181    4: _MarkerType.UINT32,
2182    8: _MarkerType.UINT64,
2183}
2184
2185_N_BYTES_TO_START_STR = {
2186    1: " [",
2187    2: " /bits/ 16 <",
2188    4: " <",
2189    8: " /bits/ 64 <",
2190}
2191
2192_N_BYTES_TO_END_STR = {
2193    1: " ]",
2194    2: " >",
2195    4: " >",
2196    8: " >",
2197}
2198