1# Copyright (c) 2019, Nordic Semiconductor
2# SPDX-License-Identifier: BSD-3-Clause
3
4# Tip: You can view just the documentation with 'pydoc3 devicetree.dtlib'
5
6"""
7A library for extracting information from .dts (devicetree) files. See the
8documentation for the DT and Node classes for more information.
9
10The top-level entry point of the library is the DT class. DT.__init__() takes a
11.dts file to parse and a list of directories to search for any /include/d
12files.
13"""
14
15import collections
16import enum
17import errno
18import os
19import re
20import string
21import sys
22import textwrap
23from typing import (Any, Dict, Iterable, List,
24                    NamedTuple, NoReturn, Optional,
25                    Set, Tuple, TYPE_CHECKING, Union)
26
27# NOTE: tests/test_dtlib.py is the test suite for this library.
28
29class DTError(Exception):
30    "Exception raised for devicetree-related errors"
31
32class Node:
33    r"""
34    Represents a node in the devicetree ('node-name { ... };').
35
36    These attributes are available on Node instances:
37
38    name:
39      The name of the node (a string).
40
41    unit_addr:
42      The portion after the '@' in the node's name, or the empty string if the
43      name has no '@' in it.
44
45      Note that this is a string. Run int(node.unit_addr, 16) to get an
46      integer.
47
48    props:
49      A dict that maps the properties defined on the node to
50      their values. 'props' is indexed by property name (a string), and values
51      are Property objects.
52
53      To convert property values to Python numbers or strings, use
54      dtlib.to_num(), dtlib.to_nums(), or dtlib.to_string().
55
56      Property values are represented as 'bytes' arrays to support the full
57      generality of DTS, which allows assignments like
58
59        x = "foo", < 0x12345678 >, [ 9A ];
60
61      This gives x the value b"foo\0\x12\x34\x56\x78\x9A". Numbers in DTS are
62      stored in big-endian format.
63
64    nodes:
65      A dict containing the subnodes of the node, indexed by name.
66
67    labels:
68      A list with all labels pointing to the node, in the same order as the
69      labels appear, but with duplicates removed.
70
71      'label_1: label_2: node { ... };' gives 'labels' the value
72      ["label_1", "label_2"].
73
74    parent:
75      The parent Node of the node. 'None' for the root node.
76
77    path:
78      The path to the node as a string, e.g. "/foo/bar".
79
80    dt:
81      The DT instance this node belongs to.
82    """
83
84    #
85    # Public interface
86    #
87
88    def __init__(self, name: str, parent: Optional['Node'], dt: 'DT'):
89        """
90        Node constructor. Not meant to be called directly by clients.
91        """
92        # Remember to update DT.__deepcopy__() if you change this.
93
94        self._name = name
95        self.props: Dict[str, 'Property'] = {}
96        self.nodes: Dict[str, 'Node'] = {}
97        self.labels: List[str] = []
98        self.parent = parent
99        self.dt = dt
100
101        self._omit_if_no_ref = False
102        self._is_referenced = False
103
104        if name.count("@") > 1:
105            dt._parse_error("multiple '@' in node name")
106        if not name == "/":
107            for char in name:
108                if char not in _nodename_chars:
109                    dt._parse_error(f"{self.path}: bad character '{char}' "
110                                    "in node name")
111
112    @property
113    def name(self) -> str:
114        """
115        See the class documentation.
116        """
117        # Converted to a property to discourage renaming -- that has to be done
118        # via DT.move_node.
119        return self._name
120
121    @property
122    def unit_addr(self) -> str:
123        """
124        See the class documentation.
125        """
126        return self.name.partition("@")[2]
127
128    @property
129    def path(self) -> str:
130        """
131        See the class documentation.
132        """
133        node_names = []
134
135        # This dynamic computation is required to be able to move
136        # nodes in the DT class.
137        cur = self
138        while cur.parent:
139            node_names.append(cur.name)
140            cur = cur.parent
141
142        return "/" + "/".join(reversed(node_names))
143
144    def node_iter(self) -> Iterable['Node']:
145        """
146        Returns a generator for iterating over the node and its children,
147        recursively.
148
149        For example, this will iterate over all nodes in the tree (like
150        dt.node_iter()).
151
152          for node in dt.root.node_iter():
153              ...
154        """
155        yield self
156        for node in self.nodes.values():
157            yield from node.node_iter()
158
159    def _get_prop(self, name: str) -> 'Property':
160        # Returns the property named 'name' on the node, creating it if it
161        # doesn't already exist
162
163        prop = self.props.get(name)
164        if not prop:
165            prop = Property(self, name)
166            self.props[name] = prop
167        return prop
168
169    def _del(self) -> None:
170        # Removes the node from the tree. When called on the root node,
171        # this method will leave it empty but still part of the tree.
172
173        if self.parent is None:
174            self.nodes.clear()
175            self.props.clear()
176            return
177        self.parent.nodes.pop(self.name)  # type: ignore
178
179    def __str__(self):
180        """
181        Returns a DTS representation of the node. Called automatically if the
182        node is print()ed.
183        """
184        s = "".join(label + ": " for label in self.labels)
185
186        s += f"{self.name} {{\n"
187
188        for prop in self.props.values():
189            s += "\t" + str(prop) + "\n"
190
191        for child in self.nodes.values():
192            s += textwrap.indent(child.__str__(), "\t") + "\n"
193
194        s += "};"
195
196        return s
197
198    def __repr__(self):
199        """
200        Returns some information about the Node instance. Called automatically
201        if the Node instance is evaluated.
202        """
203        return f"<Node {self.path} in '{self.dt.filename}'>"
204
205# See Property.type
206class Type(enum.IntEnum):
207    EMPTY = 0
208    BYTES = 1
209    NUM = 2
210    NUMS = 3
211    STRING = 4
212    STRINGS = 5
213    PATH = 6
214    PHANDLE = 7
215    PHANDLES = 8
216    PHANDLES_AND_NUMS = 9
217    COMPOUND = 10
218
219class _MarkerType(enum.IntEnum):
220    # Types of markers in property values
221
222    # References
223    PATH = 0     # &foo
224    PHANDLE = 1  # <&foo>
225    LABEL = 2    # foo: <1 2 3>
226
227    # Start of data blocks of specific type
228    UINT8 = 3   # [00 01 02] (and also used for /incbin/)
229    UINT16 = 4  # /bits/ 16 <1 2 3>
230    UINT32 = 5  # <1 2 3>
231    UINT64 = 6  # /bits/ 64 <1 2 3>
232    STRING = 7  # "foo"
233
234class Property:
235    """
236    Represents a property ('x = ...').
237
238    These attributes are available on Property instances:
239
240    name:
241      The name of the property (a string).
242
243    value:
244      The value of the property, as a 'bytes' string. Numbers are stored in
245      big-endian format, and strings are null-terminated. Putting multiple
246      comma-separated values in an assignment (e.g., 'x = < 1 >, "foo"') will
247      concatenate the values.
248
249      See the to_*() methods for converting the value to other types.
250
251    type:
252      The type of the property, inferred from the syntax used in the
253      assignment. This is one of the following constants (with example
254      assignments):
255
256        Assignment                  | Property.type
257        ----------------------------+------------------------
258        foo;                        | dtlib.Type.EMPTY
259        foo = [];                   | dtlib.Type.BYTES
260        foo = [01 02];              | dtlib.Type.BYTES
261        foo = /bits/ 8 <1>;         | dtlib.Type.BYTES
262        foo = <1>;                  | dtlib.Type.NUM
263        foo = <>;                   | dtlib.Type.NUMS
264        foo = <1 2 3>;              | dtlib.Type.NUMS
265        foo = <1 2>, <3>;           | dtlib.Type.NUMS
266        foo = "foo";                | dtlib.Type.STRING
267        foo = "foo", "bar";         | dtlib.Type.STRINGS
268        foo = <&l>;                 | dtlib.Type.PHANDLE
269        foo = <&l1 &l2 &l3>;        | dtlib.Type.PHANDLES
270        foo = <&l1 &l2>, <&l3>;     | dtlib.Type.PHANDLES
271        foo = <&l1 1 2 &l2 3 4>;    | dtlib.Type.PHANDLES_AND_NUMS
272        foo = <&l1 1 2>, <&l2 3 4>; | dtlib.Type.PHANDLES_AND_NUMS
273        foo = &l;                   | dtlib.Type.PATH
274        *Anything else*             | dtlib.Type.COMPOUND
275
276      *Anything else* includes properties mixing phandle (<&label>) and node
277      path (&label) references with other data.
278
279      Data labels in the property value do not influence the type.
280
281    labels:
282      A list with all labels pointing to the property, in the same order as the
283      labels appear, but with duplicates removed.
284
285      'label_1: label2: x = ...' gives 'labels' the value
286      ["label_1", "label_2"].
287
288    offset_labels:
289      A dictionary that maps any labels within the property's value to their
290      offset, in bytes. For example, 'x = < 0 label_1: 1 label_2: >' gives
291      'offset_labels' the value {"label_1": 4, "label_2": 8}.
292
293      Iteration order will match the order of the labels on Python versions
294      that preserve dict insertion order.
295
296    node:
297      The Node the property is on.
298    """
299
300    #
301    # Public interface
302    #
303
304    def __init__(self, node: Node, name: str):
305        # Remember to update DT.__deepcopy__() if you change this.
306
307        if "@" in name:
308            node.dt._parse_error("'@' is only allowed in node names")
309
310        self.name = name
311        self.value = b""
312        self.labels: List[str] = []
313        # We have to wait to set this until later, when we've got
314        # the entire tree.
315        self.offset_labels: Dict[str, int] = {}
316        self.node: Node = node
317
318        self._label_offset_lst: List[Tuple[str, int]] = []
319
320        # A list of [offset, label, type] lists (sorted by offset),
321        # giving the locations of references within the value. 'type'
322        # is either _MarkerType.PATH, for a node path reference,
323        # _MarkerType.PHANDLE, for a phandle reference, or
324        # _MarkerType.LABEL, for a label on/within data. Node paths
325        # and phandles need to be patched in after parsing.
326        self._markers: List[List] = []
327
328    @property
329    def type(self) -> Type:
330        """
331        See the class documentation.
332        """
333        # Data labels (e.g. 'foo = label: <3>') are irrelevant, so filter them
334        # out
335        types = [marker[1] for marker in self._markers
336                 if marker[1] != _MarkerType.LABEL]
337
338        if not types:
339            return Type.EMPTY
340
341        if types == [_MarkerType.UINT8]:
342            return Type.BYTES
343
344        if types == [_MarkerType.UINT32]:
345            return Type.NUM if len(self.value) == 4 else Type.NUMS
346
347        # Treat 'foo = <1 2 3>, <4 5>, ...' as Type.NUMS too
348        if set(types) == {_MarkerType.UINT32}:
349            return Type.NUMS
350
351        if set(types) == {_MarkerType.STRING}:
352            return Type.STRING if len(types) == 1 else Type.STRINGS
353
354        if types == [_MarkerType.PATH]:
355            return Type.PATH
356
357        if (types == [_MarkerType.UINT32, _MarkerType.PHANDLE]
358            and len(self.value) == 4):
359            return Type.PHANDLE
360
361        if set(types) == {_MarkerType.UINT32, _MarkerType.PHANDLE}:
362            if len(self.value) == 4*types.count(_MarkerType.PHANDLE):
363                # Array with just phandles in it
364                return Type.PHANDLES
365            # Array with both phandles and numbers
366            return Type.PHANDLES_AND_NUMS
367
368        return Type.COMPOUND
369
370    def to_num(self, signed=False) -> int:
371        """
372        Returns the value of the property as a number.
373
374        Raises DTError if the property was not assigned with this syntax (has
375        Property.type Type.NUM):
376
377            foo = < 1 >;
378
379        signed (default: False):
380          If True, the value will be interpreted as signed rather than
381          unsigned.
382        """
383        if self.type is not Type.NUM:
384            _err("expected property '{0}' on {1} in {2} to be assigned with "
385                 "'{0} = < (number) >;', not '{3}'"
386                 .format(self.name, self.node.path, self.node.dt.filename,
387                         self))
388
389        return int.from_bytes(self.value, "big", signed=signed)
390
391    def to_nums(self, signed=False) -> List[int]:
392        """
393        Returns the value of the property as a list of numbers.
394
395        Raises DTError if the property was not assigned with this syntax (has
396        Property.type Type.NUM or Type.NUMS):
397
398            foo = < 1 2 ... >;
399
400        signed (default: False):
401          If True, the values will be interpreted as signed rather than
402          unsigned.
403        """
404        if self.type not in (Type.NUM, Type.NUMS):
405            _err("expected property '{0}' on {1} in {2} to be assigned with "
406                 "'{0} = < (number) (number) ... >;', not '{3}'"
407                 .format(self.name, self.node.path, self.node.dt.filename,
408                         self))
409
410        return [int.from_bytes(self.value[i:i + 4], "big", signed=signed)
411                for i in range(0, len(self.value), 4)]
412
413    def to_bytes(self) -> bytes:
414        """
415        Returns the value of the property as a raw 'bytes', like
416        Property.value, except with added type checking.
417
418        Raises DTError if the property was not assigned with this syntax (has
419        Property.type Type.BYTES):
420
421            foo = [ 01 ... ];
422        """
423        if self.type is not Type.BYTES:
424            _err("expected property '{0}' on {1} in {2} to be assigned with "
425                 "'{0} = [ (byte) (byte) ... ];', not '{3}'"
426                 .format(self.name, self.node.path, self.node.dt.filename,
427                         self))
428
429        return self.value
430
431    def to_string(self) -> str:
432        """
433        Returns the value of the property as a string.
434
435        Raises DTError if the property was not assigned with this syntax (has
436        Property.type Type.STRING):
437
438            foo = "string";
439
440        This function might also raise UnicodeDecodeError if the string is
441        not valid UTF-8.
442        """
443        if self.type is not Type.STRING:
444            _err("expected property '{0}' on {1} in {2} to be assigned with "
445                 "'{0} = \"string\";', not '{3}'"
446                 .format(self.name, self.node.path, self.node.dt.filename,
447                         self))
448
449        try:
450            ret = self.value.decode("utf-8")[:-1]  # Strip null
451        except UnicodeDecodeError:
452            _err(f"value of property '{self.name}' ({self.value!r}) "
453                 f"on {self.node.path} in {self.node.dt.filename} "
454                 "is not valid UTF-8")
455
456        return ret  # The separate 'return' appeases the type checker.
457
458    def to_strings(self) -> List[str]:
459        """
460        Returns the value of the property as a list of strings.
461
462        Raises DTError if the property was not assigned with this syntax (has
463        Property.type Type.STRING or Type.STRINGS):
464
465            foo = "string", "string", ... ;
466
467        Also raises DTError if any of the strings are not valid UTF-8.
468        """
469        if self.type not in (Type.STRING, Type.STRINGS):
470            _err("expected property '{0}' on {1} in {2} to be assigned with "
471                 "'{0} = \"string\", \"string\", ... ;', not '{3}'"
472                 .format(self.name, self.node.path, self.node.dt.filename,
473                         self))
474
475        try:
476            ret = self.value.decode("utf-8").split("\0")[:-1]
477        except UnicodeDecodeError:
478            _err(f"value of property '{self.name}' ({self.value!r}) "
479                 f"on {self.node.path} in {self.node.dt.filename} "
480                 "is not valid UTF-8")
481
482        return ret  # The separate 'return' appeases the type checker.
483
484    def to_node(self) -> Node:
485        """
486        Returns the Node the phandle in the property points to.
487
488        Raises DTError if the property was not assigned with this syntax (has
489        Property.type Type.PHANDLE).
490
491            foo = < &bar >;
492        """
493        if self.type is not Type.PHANDLE:
494            _err("expected property '{0}' on {1} in {2} to be assigned with "
495                 "'{0} = < &foo >;', not '{3}'"
496                 .format(self.name, self.node.path, self.node.dt.filename,
497                         self))
498
499        return self.node.dt.phandle2node[int.from_bytes(self.value, "big")]
500
501    def to_nodes(self) -> List[Node]:
502        """
503        Returns a list with the Nodes the phandles in the property point to.
504
505        Raises DTError if the property value contains anything other than
506        phandles. All of the following are accepted:
507
508            foo = < >
509            foo = < &bar >;
510            foo = < &bar &baz ... >;
511            foo = < &bar ... >, < &baz ... >;
512        """
513        def type_ok():
514            if self.type in (Type.PHANDLE, Type.PHANDLES):
515                return True
516            # Also accept 'foo = < >;'
517            return self.type is Type.NUMS and not self.value
518
519        if not type_ok():
520            _err("expected property '{0}' on {1} in {2} to be assigned with "
521                 "'{0} = < &foo &bar ... >;', not '{3}'"
522                 .format(self.name, self.node.path,
523                         self.node.dt.filename, self))
524
525        return [self.node.dt.phandle2node[int.from_bytes(self.value[i:i + 4],
526                                                         "big")]
527                for i in range(0, len(self.value), 4)]
528
529    def to_path(self) -> Node:
530        """
531        Returns the Node referenced by the path stored in the property.
532
533        Raises DTError if the property was not assigned with either of these
534        syntaxes (has Property.type Type.PATH or Type.STRING):
535
536            foo = &bar;
537            foo = "/bar";
538
539        For the second case, DTError is raised if the path does not exist.
540        """
541        if self.type not in (Type.PATH, Type.STRING):
542            _err("expected property '{0}' on {1} in {2} to be assigned with "
543                 "either '{0} = &foo' or '{0} = \"/path/to/node\"', not '{3}'"
544                 .format(self.name, self.node.path, self.node.dt.filename,
545                         self))
546
547        try:
548            path = self.value.decode("utf-8")[:-1]
549        except UnicodeDecodeError:
550            _err(f"value of property '{self.name}' ({self.value!r}) "
551                 f"on {self.node.path} in {self.node.dt.filename} "
552                 "is not valid UTF-8")
553
554        try:
555            ret = self.node.dt.get_node(path)
556        except DTError:
557            _err(f"property '{self.name}' on {self.node.path} in "
558                 f"{self.node.dt.filename} points to the non-existent node "
559                 f'"{path}"')
560
561        return ret  # The separate 'return' appeases the type checker.
562
563    def __str__(self):
564        s = "".join(label + ": " for label in self.labels) + self.name
565        if not self.value:
566            return s + ";"
567
568        s += " ="
569
570        for i, (pos, marker_type, ref) in enumerate(self._markers):
571            if i < len(self._markers) - 1:
572                next_marker = self._markers[i + 1]
573            else:
574                next_marker = None
575
576            # End of current marker
577            end = next_marker[0] if next_marker else len(self.value)
578
579            if marker_type is _MarkerType.STRING:
580                # end - 1 to strip off the null terminator
581                s += f' "{_decode_and_escape(self.value[pos:end - 1])}"'
582                if end != len(self.value):
583                    s += ","
584            elif marker_type is _MarkerType.PATH:
585                s += " &" + ref
586                if end != len(self.value):
587                    s += ","
588            else:
589                # <> or []
590
591                if marker_type is _MarkerType.LABEL:
592                    s += f" {ref}:"
593                elif marker_type is _MarkerType.PHANDLE:
594                    s += " &" + ref
595                    pos += 4
596                    # Subtle: There might be more data between the phandle and
597                    # the next marker, so we can't 'continue' here
598                else:  # marker_type is _MarkerType.UINT*
599                    elm_size = _TYPE_TO_N_BYTES[marker_type]
600                    s += _N_BYTES_TO_START_STR[elm_size]
601
602                while pos != end:
603                    num = int.from_bytes(self.value[pos:pos + elm_size],
604                                         "big")
605                    if elm_size == 1:
606                        s += f" {num:02X}"
607                    else:
608                        s += f" {hex(num)}"
609
610                    pos += elm_size
611
612                if (pos != 0
613                    and (not next_marker
614                         or next_marker[1]
615                         not in (_MarkerType.PHANDLE, _MarkerType.LABEL))):
616
617                    s += _N_BYTES_TO_END_STR[elm_size]
618                    if pos != len(self.value):
619                        s += ","
620
621        return s + ";"
622
623
624    def __repr__(self):
625        return (f"<Property '{self.name}' at '{self.node.path}' in "
626                f"'{self.node.dt.filename}'>")
627
628    #
629    # Internal functions
630    #
631
632    def _add_marker(self, marker_type: _MarkerType, data: Any = None):
633        # Helper for registering markers in the value that are processed after
634        # parsing. See _fixup_props(). 'marker_type' identifies the type of
635        # marker, and 'data' has any optional data associated with the marker.
636
637        # len(self.value) gives the current offset. This function is called
638        # while the value is built. We use a list instead of a tuple to be able
639        # to fix up offsets later (they might increase if the value includes
640        # path references, e.g. 'foo = &bar, <3>;', which are expanded later).
641        self._markers.append([len(self.value), marker_type, data])
642
643        # For phandle references, add a dummy value with the same length as a
644        # phandle. This is handy for the length check in _register_phandles().
645        if marker_type is _MarkerType.PHANDLE:
646            self.value += b"\0\0\0\0"
647
648class _T(enum.IntEnum):
649    # Token IDs used by the DT lexer.
650
651    # These values must be contiguous and start from 1.
652    INCLUDE = 1
653    LINE = 2
654    STRING = 3
655    DTS_V1 = 4
656    PLUGIN = 5
657    MEMRESERVE = 6
658    BITS = 7
659    DEL_PROP = 8
660    DEL_NODE = 9
661    OMIT_IF_NO_REF = 10
662    LABEL = 11
663    CHAR_LITERAL = 12
664    REF = 13
665    INCBIN = 14
666    SKIP = 15
667    EOF = 16
668
669    # These values must be larger than the above contiguous range.
670    NUM = 17
671    PROPNODENAME = 18
672    MISC = 19
673    BYTE = 20
674    BAD = 21
675
676class _FileStackElt(NamedTuple):
677    # Used for maintaining the /include/ stack.
678
679    filename: str
680    lineno: int
681    contents: str
682    pos: int
683
684_TokVal = Union[int, str]
685
686class _Token(NamedTuple):
687    id: int
688    val: _TokVal
689
690    def __repr__(self):
691        id_repr = _T(self.id).name
692        return f'Token(id=_T.{id_repr}, val={repr(self.val)})'
693
694class DT:
695    """
696    Represents a devicetree parsed from a .dts file (or from many files, if the
697    .dts file /include/s other files). Creating many instances of this class is
698    fine. The library has no global state.
699
700    These attributes are available on DT instances:
701
702    root:
703      A Node instance representing the root (/) node.
704
705    alias2node:
706      A dictionary that maps maps alias strings (from /aliases) to Node
707      instances
708
709    label2node:
710      A dictionary that maps each node label (a string) to the Node instance
711      for the node.
712
713    label2prop:
714      A dictionary that maps each property label (a string) to a Property
715      instance.
716
717    label2prop_offset:
718      A dictionary that maps each label (a string) within a property value
719      (e.g., 'x = label_1: < 1 label2: 2 >;') to a (prop, offset) tuple, where
720      'prop' is a Property instance and 'offset' the byte offset (0 for label_1
721      and 4 for label_2 in the example).
722
723    phandle2node:
724      A dictionary that maps each phandle (a number) to a Node instance.
725
726    memreserves:
727      A list of (labels, address, length) tuples for the /memreserve/s in the
728      .dts file, in the same order as they appear in the file.
729
730      'labels' is a possibly empty set with all labels preceding the memreserve
731      (e.g., 'label1: label2: /memreserve/ ...'). 'address' and 'length' are
732      numbers.
733
734    filename:
735      The filename passed to the DT constructor.
736    """
737
738    #
739    # Public interface
740    #
741
742    def __init__(self, filename: Optional[str], include_path: Iterable[str] = (),
743                 force: bool = False):
744        """
745        Parses a DTS file to create a DT instance. Raises OSError if 'filename'
746        can't be opened, and DTError for any parse errors.
747
748        filename:
749          Path to the .dts file to parse. (If None, an empty devicetree
750          is created; this is unlikely to be what you want.)
751
752        include_path:
753          An iterable (e.g. list or tuple) containing paths to search for
754          /include/d and /incbin/'d files. By default, files are only looked up
755          relative to the .dts file that contains the /include/ or /incbin/.
756
757        force:
758          Try not to raise DTError even if the input tree has errors.
759          For experimental use; results not guaranteed.
760        """
761        # Remember to update __deepcopy__() if you change this.
762
763        self._root: Optional[Node] = None
764        self.alias2node: Dict[str, Node] = {}
765        self.label2node: Dict[str, Node] = {}
766        self.label2prop: Dict[str, Property] = {}
767        self.label2prop_offset: Dict[str, Tuple[Property, int]] = {}
768        self.phandle2node: Dict[int, Node] = {}
769        self.memreserves: List[Tuple[Set[str], int, int]] = []
770        self.filename = filename
771
772        self._force = force
773
774        if filename is not None:
775            self._parse_file(filename, include_path)
776        else:
777            self._include_path: List[str] = []
778
779    @property
780    def root(self) -> Node:
781        """
782        See the class documentation.
783        """
784        # This is necessary because mypy can't tell that we never
785        # treat self._root as a non-None value until it's initialized
786        # properly in _parse_dt().
787        return self._root       # type: ignore
788
789    def get_node(self, path: str) -> Node:
790        """
791        Returns the Node instance for the node with path or alias 'path' (a
792        string). Raises DTError if the path or alias doesn't exist.
793
794        For example, both dt.get_node("/foo/bar") and dt.get_node("bar-alias")
795        will return the 'bar' node below:
796
797          /dts-v1/;
798
799          / {
800                  foo {
801                          bar_label: bar {
802                                  baz {
803                                  };
804                          };
805                  };
806
807                  aliases {
808                          bar-alias = &bar-label;
809                  };
810          };
811
812        Fetching subnodes via aliases is supported:
813        dt.get_node("bar-alias/baz") returns the 'baz' node.
814        """
815        if path.startswith("/"):
816            return _root_and_path_to_node(self.root, path, path)
817
818        # Path does not start with '/'. First component must be an alias.
819        alias, _, rest = path.partition("/")
820        if alias not in self.alias2node:
821            _err(f"no alias '{alias}' found -- did you forget the leading "
822                 "'/' in the node path?")
823
824        return _root_and_path_to_node(self.alias2node[alias], rest, path)
825
826    def has_node(self, path: str) -> bool:
827        """
828        Returns True if the path or alias 'path' exists. See DT.get_node().
829        """
830        try:
831            self.get_node(path)
832            return True
833        except DTError:
834            return False
835
836    def move_node(self, node: Node, new_path: str):
837        """
838        Move a node 'node' to a new path 'new_path'. The entire subtree
839        rooted at 'node' is moved along with it.
840
841        You cannot move the root node or provide a 'new_path' value
842        where a node already exists. This method raises an exception
843        in both cases.
844
845        As a restriction on the current implementation, the parent node
846        of the new path must exist.
847        """
848        if node is self.root:
849            _err("the root node can't be moved")
850
851        if self.has_node(new_path):
852            _err(f"can't move '{node.path}' to '{new_path}': "
853                 'destination node exists')
854
855        if not new_path.startswith('/'):
856            _err(f"path '{new_path}' doesn't start with '/'")
857
858        for component in new_path.split('/'):
859            for char in component:
860                if char not in _nodename_chars:
861                    _err(f"new path '{new_path}': bad character '{char}'")
862
863        old_name = node.name
864        old_path = node.path
865
866        new_parent_path, _, new_name = new_path.rpartition('/')
867        if new_parent_path == '':
868            # '/foo'.rpartition('/') is ('', '/', 'foo').
869            new_parent_path = '/'
870        if not self.has_node(new_parent_path):
871            _err(f"can't move '{old_path}' to '{new_path}': "
872                 f"parent node '{new_parent_path}' doesn't exist")
873        new_parent = self.get_node(new_parent_path)
874        if TYPE_CHECKING:
875            assert new_parent is not None
876            assert node.parent is not None
877
878        del node.parent.nodes[old_name]
879        node._name = new_name
880        node.parent = new_parent
881        new_parent.nodes[new_name] = node
882
883    def node_iter(self) -> Iterable[Node]:
884        """
885        Returns a generator for iterating over all nodes in the devicetree.
886
887        For example, this will print the name of each node that has a property
888        called 'foo':
889
890          for node in dt.node_iter():
891              if "foo" in node.props:
892                  print(node.name)
893        """
894        yield from self.root.node_iter()
895
896    def __str__(self):
897        """
898        Returns a DTS representation of the devicetree. Called automatically if
899        the DT instance is print()ed.
900        """
901        s = "/dts-v1/;\n\n"
902
903        if self.memreserves:
904            for labels, address, offset in self.memreserves:
905                # List the labels in a consistent order to help with testing
906                for label in labels:
907                    s += f"{label}: "
908                s += f"/memreserve/ {address:#018x} {offset:#018x};\n"
909            s += "\n"
910
911        return s + str(self.root)
912
913    def __repr__(self):
914        """
915        Returns some information about the DT instance. Called automatically if
916        the DT instance is evaluated.
917        """
918        if self.filename:
919            return (f"DT(filename='{self.filename}', "
920                    f"include_path={self._include_path})")
921        return super().__repr__()
922
923    def __deepcopy__(self, memo):
924        """
925        Implements support for the standard library copy.deepcopy()
926        function on DT instances.
927        """
928
929        # We need a new DT, obviously. Make a new, empty one.
930        ret = DT(None, (), self._force)
931
932        # Now allocate new Node objects for every node in self, to use
933        # in the new DT. Set their parents to None for now and leave
934        # them without any properties. We will recursively initialize
935        # copies of parents before copies of children next.
936        path2node_copy = {
937            node.path: Node(node.name, None, ret)
938            for node in self.node_iter()
939        }
940
941        # Point each copy of a node to the copy of its parent and set up
942        # copies of each property.
943        #
944        # Share data when possible. For example, Property.value has
945        # type 'bytes', which is immutable. We therefore don't need a
946        # copy and can just point to the original data.
947
948        for node in self.node_iter():
949            node_copy = path2node_copy[node.path]
950
951            parent = node.parent
952            if parent is not None:
953                node_copy.parent = path2node_copy[parent.path]
954
955            prop_name2prop_copy = {
956                prop.name: Property(node_copy, prop.name)
957                for prop in node.props.values()
958            }
959            for prop_name, prop_copy in prop_name2prop_copy.items():
960                prop = node.props[prop_name]
961                prop_copy.value = prop.value
962                prop_copy.labels = prop.labels[:]
963                prop_copy.offset_labels = prop.offset_labels.copy()
964                prop_copy._label_offset_lst = prop._label_offset_lst[:]
965                prop_copy._markers = [marker[:] for marker in prop._markers]
966            node_copy.props = prop_name2prop_copy
967
968            node_copy.nodes = {
969                child_name: path2node_copy[child_node.path]
970                for child_name, child_node in node.nodes.items()
971            }
972
973            node_copy.labels = node.labels[:]
974
975            node_copy._omit_if_no_ref = node._omit_if_no_ref
976            node_copy._is_referenced = node._is_referenced
977
978        # The copied nodes and properties are initialized, so
979        # we can finish initializing the copied DT object now.
980
981        ret._root = path2node_copy['/']
982
983        def copy_node_lookup_table(attr_name):
984            original = getattr(self, attr_name)
985            copy = {
986                key: path2node_copy[original[key].path]
987                for key in original
988            }
989            setattr(ret, attr_name, copy)
990
991        copy_node_lookup_table('alias2node')
992        copy_node_lookup_table('label2node')
993        copy_node_lookup_table('phandle2node')
994
995        ret_label2prop = {}
996        for label, prop in self.label2prop.items():
997            node_copy = path2node_copy[prop.node.path]
998            prop_copy = node_copy.props[prop.name]
999            ret_label2prop[label] = prop_copy
1000        ret.label2prop = ret_label2prop
1001
1002        ret_label2prop_offset = {}
1003        for label, prop_offset in self.label2prop_offset.items():
1004            prop, offset = prop_offset
1005            node_copy = path2node_copy[prop.node.path]
1006            prop_copy = node_copy.props[prop.name]
1007            ret_label2prop_offset[label] = (prop_copy, offset)
1008        ret.label2prop_offset = ret_label2prop_offset
1009
1010        ret.memreserves = [
1011            (set(memreserve[0]), memreserve[1], memreserve[2])
1012            for memreserve in self.memreserves
1013        ]
1014
1015        ret.filename = self.filename
1016
1017        return ret
1018
1019    #
1020    # Parsing
1021    #
1022
1023    def _parse_file(self, filename: str, include_path: Iterable[str]):
1024        self._include_path = list(include_path)
1025
1026        with open(filename, encoding="utf-8") as f:
1027            self._file_contents = f.read()
1028
1029        self._tok_i = self._tok_end_i = 0
1030        self._filestack: List[_FileStackElt] = []
1031
1032        self._lexer_state: int = _DEFAULT
1033        self._saved_token: Optional[_Token] = None
1034
1035        self._lineno: int = 1
1036
1037        self._parse_header()
1038        self._parse_memreserves()
1039        self._parse_dt()
1040
1041        self._register_phandles()
1042        self._fixup_props()
1043        self._register_aliases()
1044        self._remove_unreferenced()
1045        self._register_labels()
1046
1047    def _parse_header(self):
1048        # Parses /dts-v1/ (expected) and /plugin/ (unsupported) at the start of
1049        # files. There may be multiple /dts-v1/ at the start of a file.
1050
1051        has_dts_v1 = False
1052
1053        while self._peek_token().id == _T.DTS_V1:
1054            has_dts_v1 = True
1055            self._next_token()
1056            self._expect_token(";")
1057            # /plugin/ always comes after /dts-v1/
1058            if self._peek_token().id == _T.PLUGIN:
1059                self._parse_error("/plugin/ is not supported")
1060
1061        if not has_dts_v1:
1062            self._parse_error("expected '/dts-v1/;' at start of file")
1063
1064    def _parse_memreserves(self):
1065        # Parses /memreserve/, which appears after /dts-v1/
1066
1067        while True:
1068            # Labels before /memreserve/
1069            labels = []
1070            while self._peek_token().id == _T.LABEL:
1071                _append_no_dup(labels, self._next_token().val)
1072
1073            if self._peek_token().id == _T.MEMRESERVE:
1074                self._next_token()
1075                self.memreserves.append(
1076                    (labels, self._eval_prim(), self._eval_prim()))
1077                self._expect_token(";")
1078            elif labels:
1079                self._parse_error("expected /memreserve/ after labels at "
1080                                  "beginning of file")
1081            else:
1082                return
1083
1084    def _parse_dt(self):
1085        # Top-level parsing loop
1086
1087        while True:
1088            tok = self._next_token()
1089
1090            if tok.val == "/":
1091                # '/ { ... };', the root node
1092                if not self._root:
1093                    self._root = Node(name="/", parent=None, dt=self)
1094                self._parse_node(self.root)
1095
1096            elif tok.id in (_T.LABEL, _T.REF):
1097                # '&foo { ... };' or 'label: &foo { ... };'. The C tools only
1098                # support a single label here too.
1099
1100                if tok.id == _T.LABEL:
1101                    label = tok.val
1102                    tok = self._next_token()
1103                    if tok.id != _T.REF:
1104                        self._parse_error("expected label reference (&foo)")
1105                else:
1106                    label = None
1107
1108                try:
1109                    node = self._ref2node(tok.val)
1110                except DTError as e:
1111                    self._parse_error(e)
1112                self._parse_node(node)
1113
1114                if label:
1115                    _append_no_dup(node.labels, label)
1116
1117            elif tok.id == _T.DEL_NODE:
1118                self._next_ref2node()._del()
1119                self._expect_token(";")
1120
1121            elif tok.id == _T.OMIT_IF_NO_REF:
1122                self._next_ref2node()._omit_if_no_ref = True
1123                self._expect_token(";")
1124
1125            elif tok.id == _T.EOF:
1126                if not self._root:
1127                    self._parse_error("no root node defined")
1128                return
1129
1130            else:
1131                self._parse_error("expected '/' or label reference (&foo)")
1132
1133    def _parse_node(self, node):
1134        # Parses the '{ ... };' part of 'node-name { ... };'.
1135
1136        # We need to track which child nodes were defined in this set
1137        # of curly braces in order to reject duplicate node names.
1138        current_child_names = set()
1139
1140        self._expect_token("{")
1141        while True:
1142            labels, omit_if_no_ref = self._parse_propnode_labels()
1143            tok = self._next_token()
1144
1145            if tok.id == _T.PROPNODENAME:
1146                if self._peek_token().val == "{":
1147                    # '<tok> { ...', expect node
1148
1149                    # Fetch the existing node if it already exists. This
1150                    # happens when overriding nodes.
1151                    child = node.nodes.get(tok.val)
1152                    if child:
1153                        if child.name in current_child_names:
1154                            self._parse_error(f'{child.path}: duplicate node name')
1155                    else:
1156                        child = Node(name=tok.val, parent=node, dt=self)
1157                        current_child_names.add(tok.val)
1158
1159                    for label in labels:
1160                        _append_no_dup(child.labels, label)
1161
1162                    if omit_if_no_ref:
1163                        child._omit_if_no_ref = True
1164
1165                    node.nodes[child.name] = child
1166                    self._parse_node(child)
1167
1168                else:
1169                    # Not '<tok> { ...', expect property assignment
1170
1171                    if omit_if_no_ref:
1172                        self._parse_error(
1173                            "/omit-if-no-ref/ can only be used on nodes")
1174
1175                    prop = node._get_prop(tok.val)
1176
1177                    if self._check_token("="):
1178                        self._parse_assignment(prop)
1179                    elif not self._check_token(";"):
1180                        # ';' is for an empty property, like 'foo;'
1181                        self._parse_error("expected '{', '=', or ';'")
1182
1183                    for label in labels:
1184                        _append_no_dup(prop.labels, label)
1185
1186            elif tok.id == _T.DEL_NODE:
1187                tok2 = self._next_token()
1188                if tok2.id != _T.PROPNODENAME:
1189                    self._parse_error("expected node name")
1190                if tok2.val in node.nodes:
1191                    node.nodes[tok2.val]._del()
1192                self._expect_token(";")
1193
1194            elif tok.id == _T.DEL_PROP:
1195                tok2 = self._next_token()
1196                if tok2.id != _T.PROPNODENAME:
1197                    self._parse_error("expected property name")
1198                node.props.pop(tok2.val, None)
1199                self._expect_token(";")
1200
1201            elif tok.val == "}":
1202                self._expect_token(";")
1203                return
1204
1205            else:
1206                self._parse_error("expected node name, property name, or '}'")
1207
1208    def _parse_propnode_labels(self):
1209        # _parse_node() helpers for parsing labels and /omit-if-no-ref/s before
1210        # nodes and properties. Returns a (<label list>, <omit-if-no-ref bool>)
1211        # tuple.
1212
1213        labels = []
1214        omit_if_no_ref = False
1215        while True:
1216            tok = self._peek_token()
1217            if tok.id == _T.LABEL:
1218                _append_no_dup(labels, tok.val)
1219            elif tok.id == _T.OMIT_IF_NO_REF:
1220                omit_if_no_ref = True
1221            elif (labels or omit_if_no_ref) and tok.id != _T.PROPNODENAME:
1222                # Got something like 'foo: bar: }'
1223                self._parse_error("expected node or property name")
1224            else:
1225                return labels, omit_if_no_ref
1226
1227            self._next_token()
1228
1229    def _parse_assignment(self, prop):
1230        # Parses the right-hand side of property assignment
1231        #
1232        # prop:
1233        #   'Property' instance being assigned
1234
1235        # Remove any old value, path/phandle references, and in-value labels,
1236        # in case the property value is being overridden
1237        prop.value = b""
1238        prop._markers = []
1239
1240        while True:
1241            # Parse labels before the value (e.g., '..., label: < 0 >')
1242            self._parse_value_labels(prop)
1243
1244            tok = self._next_token()
1245
1246            if tok.val == "<":
1247                self._parse_cells(prop, 4)
1248
1249            elif tok.id == _T.BITS:
1250                n_bits = self._expect_num()
1251                if n_bits not in {8, 16, 32, 64}:
1252                    self._parse_error("expected 8, 16, 32, or 64")
1253                self._expect_token("<")
1254                self._parse_cells(prop, n_bits//8)
1255
1256            elif tok.val == "[":
1257                self._parse_bytes(prop)
1258
1259            elif tok.id == _T.STRING:
1260                prop._add_marker(_MarkerType.STRING)
1261                prop.value += self._unescape(tok.val.encode("utf-8")) + b"\0"
1262
1263            elif tok.id == _T.REF:
1264                prop._add_marker(_MarkerType.PATH, tok.val)
1265
1266            elif tok.id == _T.INCBIN:
1267                self._parse_incbin(prop)
1268
1269            else:
1270                self._parse_error("malformed value")
1271
1272            # Parse labels after the value (e.g., '< 0 > label:, ...')
1273            self._parse_value_labels(prop)
1274
1275            tok = self._next_token()
1276            if tok.val == ";":
1277                return
1278            if tok.val == ",":
1279                continue
1280            self._parse_error("expected ';' or ','")
1281
1282    def _parse_cells(self, prop, n_bytes):
1283        # Parses '<...>'
1284
1285        prop._add_marker(_N_BYTES_TO_TYPE[n_bytes])
1286
1287        while True:
1288            tok = self._peek_token()
1289            if tok.id == _T.REF:
1290                self._next_token()
1291                if n_bytes != 4:
1292                    self._parse_error("phandle references are only allowed in "
1293                                      "arrays with 32-bit elements")
1294                prop._add_marker(_MarkerType.PHANDLE, tok.val)
1295
1296            elif tok.id == _T.LABEL:
1297                prop._add_marker(_MarkerType.LABEL, tok.val)
1298                self._next_token()
1299
1300            elif self._check_token(">"):
1301                return
1302
1303            else:
1304                # Literal value
1305                num = self._eval_prim()
1306                try:
1307                    prop.value += num.to_bytes(n_bytes, "big")
1308                except OverflowError:
1309                    try:
1310                        # Try again as a signed number, in case it's negative
1311                        prop.value += num.to_bytes(n_bytes, "big", signed=True)
1312                    except OverflowError:
1313                        self._parse_error(
1314                            f"{num} does not fit in {8*n_bytes} bits")
1315
1316    def _parse_bytes(self, prop):
1317        # Parses '[ ... ]'
1318
1319        prop._add_marker(_MarkerType.UINT8)
1320
1321        while True:
1322            tok = self._next_token()
1323            if tok.id == _T.BYTE:
1324                prop.value += tok.val.to_bytes(1, "big")
1325
1326            elif tok.id == _T.LABEL:
1327                prop._add_marker(_MarkerType.LABEL, tok.val)
1328
1329            elif tok.val == "]":
1330                return
1331
1332            else:
1333                self._parse_error("expected two-digit byte or ']'")
1334
1335    def _parse_incbin(self, prop):
1336        # Parses
1337        #
1338        #   /incbin/ ("filename")
1339        #
1340        # and
1341        #
1342        #   /incbin/ ("filename", <offset>, <size>)
1343
1344        prop._add_marker(_MarkerType.UINT8)
1345
1346        self._expect_token("(")
1347
1348        tok = self._next_token()
1349        if tok.id != _T.STRING:
1350            self._parse_error("expected quoted filename")
1351        filename = tok.val
1352
1353        tok = self._next_token()
1354        if tok.val == ",":
1355            offset = self._eval_prim()
1356            self._expect_token(",")
1357            size = self._eval_prim()
1358            self._expect_token(")")
1359        else:
1360            if tok.val != ")":
1361                self._parse_error("expected ',' or ')'")
1362            offset = None
1363
1364        try:
1365            with self._open(filename, "rb") as f:
1366                if offset is None:
1367                    prop.value += f.read()
1368                else:
1369                    f.seek(offset)
1370                    prop.value += f.read(size)
1371        except OSError as e:
1372            self._parse_error(f"could not read '{filename}': {e}")
1373
1374    def _parse_value_labels(self, prop):
1375        # _parse_assignment() helper for parsing labels before/after each
1376        # comma-separated value
1377
1378        while True:
1379            tok = self._peek_token()
1380            if tok.id != _T.LABEL:
1381                return
1382            prop._add_marker(_MarkerType.LABEL, tok.val)
1383            self._next_token()
1384
1385    def _node_phandle(self, node):
1386        # Returns the phandle for Node 'node', creating a new phandle if the
1387        # node has no phandle, and fixing up the value for existing
1388        # self-referential phandles (which get set to b'\0\0\0\0' initially).
1389        # Self-referential phandles must be rewritten instead of recreated, so
1390        # that labels are preserved.
1391
1392        if "phandle" in node.props:
1393            phandle_prop = node.props["phandle"]
1394        else:
1395            phandle_prop = Property(node, "phandle")
1396            phandle_prop._add_marker(_MarkerType.UINT32)  # For displaying
1397            phandle_prop.value = b'\0\0\0\0'
1398
1399        if phandle_prop.value == b'\0\0\0\0':
1400            phandle_i = 1
1401            while phandle_i in self.phandle2node:
1402                phandle_i += 1
1403            self.phandle2node[phandle_i] = node
1404
1405            phandle_prop.value = phandle_i.to_bytes(4, "big")
1406            node.props["phandle"] = phandle_prop
1407
1408        return phandle_prop.value
1409
1410    # Expression evaluation
1411
1412    def _eval_prim(self):
1413        tok = self._peek_token()
1414        if tok.id in (_T.NUM, _T.CHAR_LITERAL):
1415            return self._next_token().val
1416
1417        tok = self._next_token()
1418        if tok.val != "(":
1419            self._parse_error("expected number or parenthesized expression")
1420        val = self._eval_ternary()
1421        self._expect_token(")")
1422        return val
1423
1424    def _eval_ternary(self):
1425        val = self._eval_or()
1426        if self._check_token("?"):
1427            if_val = self._eval_ternary()
1428            self._expect_token(":")
1429            else_val = self._eval_ternary()
1430            return if_val if val else else_val
1431        return val
1432
1433    def _eval_or(self):
1434        val = self._eval_and()
1435        while self._check_token("||"):
1436            val = 1 if self._eval_and() or val else 0
1437        return val
1438
1439    def _eval_and(self):
1440        val = self._eval_bitor()
1441        while self._check_token("&&"):
1442            val = 1 if self._eval_bitor() and val else 0
1443        return val
1444
1445    def _eval_bitor(self):
1446        val = self._eval_bitxor()
1447        while self._check_token("|"):
1448            val |= self._eval_bitxor()
1449        return val
1450
1451    def _eval_bitxor(self):
1452        val = self._eval_bitand()
1453        while self._check_token("^"):
1454            val ^= self._eval_bitand()
1455        return val
1456
1457    def _eval_bitand(self):
1458        val = self._eval_eq()
1459        while self._check_token("&"):
1460            val &= self._eval_eq()
1461        return val
1462
1463    def _eval_eq(self):
1464        val = self._eval_rela()
1465        while True:
1466            if self._check_token("=="):
1467                val = 1 if val == self._eval_rela() else 0
1468            elif self._check_token("!="):
1469                val = 1 if val != self._eval_rela() else 0
1470            else:
1471                return val
1472
1473    def _eval_rela(self):
1474        val = self._eval_shift()
1475        while True:
1476            if self._check_token("<"):
1477                val = 1 if val < self._eval_shift() else 0
1478            elif self._check_token(">"):
1479                val = 1 if val > self._eval_shift() else 0
1480            elif self._check_token("<="):
1481                val = 1 if val <= self._eval_shift() else 0
1482            elif self._check_token(">="):
1483                val = 1 if val >= self._eval_shift() else 0
1484            else:
1485                return val
1486
1487    def _eval_shift(self):
1488        val = self._eval_add()
1489        while True:
1490            if self._check_token("<<"):
1491                val <<= self._eval_add()
1492            elif self._check_token(">>"):
1493                val >>= self._eval_add()
1494            else:
1495                return val
1496
1497    def _eval_add(self):
1498        val = self._eval_mul()
1499        while True:
1500            if self._check_token("+"):
1501                val += self._eval_mul()
1502            elif self._check_token("-"):
1503                val -= self._eval_mul()
1504            else:
1505                return val
1506
1507    def _eval_mul(self):
1508        val = self._eval_unary()
1509        while True:
1510            if self._check_token("*"):
1511                val *= self._eval_unary()
1512            elif self._check_token("/"):
1513                denom = self._eval_unary()
1514                if not denom:
1515                    self._parse_error("division by zero")
1516                val //= denom
1517            elif self._check_token("%"):
1518                denom = self._eval_unary()
1519                if not denom:
1520                    self._parse_error("division by zero")
1521                val %= denom
1522            else:
1523                return val
1524
1525    def _eval_unary(self):
1526        if self._check_token("-"):
1527            return -self._eval_unary()
1528        if self._check_token("~"):
1529            return ~self._eval_unary()
1530        if self._check_token("!"):
1531            return 0 if self._eval_unary() else 1
1532        return self._eval_prim()
1533
1534    #
1535    # Lexing
1536    #
1537
1538    def _check_token(self, val):
1539        if self._peek_token().val == val:
1540            self._next_token()
1541            return True
1542        return False
1543
1544    def _peek_token(self):
1545        if not self._saved_token:
1546            self._saved_token = self._next_token()
1547        return self._saved_token
1548
1549    def _next_token(self):
1550        if self._saved_token:
1551            tmp = self._saved_token
1552            self._saved_token = None
1553            return tmp
1554
1555        while True:
1556            tok_id = None
1557
1558            match = _token_re.match(self._file_contents, self._tok_end_i)
1559            if match:
1560                tok_id = match.lastindex
1561                if tok_id == _T.CHAR_LITERAL:
1562                    val = self._unescape(match.group(tok_id).encode("utf-8"))
1563                    if len(val) != 1:
1564                        self._parse_error("character literals must be length 1")
1565                    tok_val = ord(val)
1566                else:
1567                    tok_val = match.group(tok_id)
1568
1569            elif self._lexer_state is _DEFAULT:
1570                match = _num_re.match(self._file_contents, self._tok_end_i)
1571                if match:
1572                    tok_id = _T.NUM
1573                    num_s = match.group(1)
1574                    tok_val = int(num_s,
1575                                  16 if num_s.startswith(("0x", "0X")) else
1576                                  8 if num_s[0] == "0" else
1577                                  10)
1578
1579            elif self._lexer_state is _EXPECT_PROPNODENAME:
1580                match = _propnodename_re.match(self._file_contents,
1581                                               self._tok_end_i)
1582                if match:
1583                    tok_id = _T.PROPNODENAME
1584                    tok_val = match.group(1)
1585                    self._lexer_state = _DEFAULT
1586
1587            else:  # self._lexer_state is _EXPECT_BYTE
1588                match = _byte_re.match(self._file_contents, self._tok_end_i)
1589                if match:
1590                    tok_id = _T.BYTE
1591                    tok_val = int(match.group(), 16)
1592
1593            if not tok_id:
1594                match = _misc_re.match(self._file_contents, self._tok_end_i)
1595                if match:
1596                    tok_id = _T.MISC
1597                    tok_val = match.group()
1598                else:
1599                    self._tok_i = self._tok_end_i
1600                    # Could get here due to a node/property naming appearing in
1601                    # an unexpected context as well as for bad characters in
1602                    # files. Generate a token for it so that the error can
1603                    # trickle up to some context where we can give a more
1604                    # helpful error message.
1605                    return _Token(_T.BAD, "<unknown token>")
1606
1607            self._tok_i = match.start()
1608            self._tok_end_i = match.end()
1609
1610            if tok_id == _T.SKIP:
1611                self._lineno += tok_val.count("\n")
1612                continue
1613
1614            # /include/ is handled in the lexer in the C tools as well, and can
1615            # appear anywhere
1616            if tok_id == _T.INCLUDE:
1617                # Can have newlines between /include/ and the filename
1618                self._lineno += tok_val.count("\n")
1619                # Do this manual extraction instead of doing it in the regex so
1620                # that we can properly count newlines
1621                filename = tok_val[tok_val.find('"') + 1:-1]
1622                self._enter_file(filename)
1623                continue
1624
1625            if tok_id == _T.LINE:
1626                # #line directive
1627                self._lineno = int(tok_val.split()[0]) - 1
1628                self.filename = tok_val[tok_val.find('"') + 1:-1]
1629                continue
1630
1631            if tok_id == _T.EOF:
1632                if self._filestack:
1633                    self._leave_file()
1634                    continue
1635                return _Token(_T.EOF, "<EOF>")
1636
1637            # State handling
1638
1639            if (tok_id in (_T.DEL_PROP, _T.DEL_NODE, _T.OMIT_IF_NO_REF)
1640                or tok_val in ("{", ";")):
1641
1642                self._lexer_state = _EXPECT_PROPNODENAME
1643
1644            elif tok_val == "[":
1645                self._lexer_state = _EXPECT_BYTE
1646
1647            elif tok_id in (_T.MEMRESERVE, _T.BITS) or tok_val == "]":
1648                self._lexer_state = _DEFAULT
1649
1650            return _Token(tok_id, tok_val)
1651
1652    def _expect_token(self, tok_val):
1653        # Raises an error if the next token does not have the string value
1654        # 'tok_val'. Returns the token.
1655
1656        tok = self._next_token()
1657        if tok.val != tok_val:
1658            self._parse_error(f"expected '{tok_val}', not '{tok.val}'")
1659
1660        return tok
1661
1662    def _expect_num(self):
1663        # Raises an error if the next token is not a number. Returns the token.
1664
1665        tok = self._next_token()
1666        if tok.id != _T.NUM:
1667            self._parse_error("expected number")
1668        return tok.val
1669
1670    def _parse_error(self, s):
1671        # This works out for the first line of the file too, where rfind()
1672        # returns -1
1673        column = self._tok_i - self._file_contents.rfind("\n", 0,
1674                                                         self._tok_i + 1)
1675        _err(f"{self.filename}:{self._lineno} (column {column}): "
1676             f"parse error: {s}")
1677
1678    def _enter_file(self, filename):
1679        # Enters the /include/d file 'filename', remembering the position in
1680        # the /include/ing file for later
1681
1682        self._filestack.append(
1683            _FileStackElt(self.filename, self._lineno,
1684                          self._file_contents, self._tok_end_i))
1685
1686        # Handle escapes in filenames, just for completeness
1687        filename = self._unescape(filename.encode("utf-8"))
1688        try:
1689            filename = filename.decode("utf-8")
1690        except UnicodeDecodeError:
1691            self._parse_error("filename is not valid UTF-8")
1692
1693        with self._open(filename, encoding="utf-8") as f:
1694            try:
1695                self._file_contents = f.read()
1696            except OSError as e:
1697                self._parse_error(e)
1698
1699        # Check for recursive /include/
1700        for i, parent in enumerate(self._filestack):
1701            if filename == parent[0]:
1702                self._parse_error("recursive /include/:\n" + " ->\n".join(
1703                    [f"{parent[0]}:{parent[1]}"
1704                     for parent in self._filestack[i:]] +
1705                    [filename]))
1706
1707        self.filename = f.name
1708        self._lineno = 1
1709        self._tok_end_i = 0
1710
1711    def _leave_file(self):
1712        # Leaves an /include/d file, returning to the file that /include/d it
1713
1714        self.filename, self._lineno, self._file_contents, self._tok_end_i = (
1715            self._filestack.pop())
1716
1717    def _next_ref2node(self):
1718        # Checks that the next token is a label/path reference and returns the
1719        # Node it points to. Only used during parsing, so uses _parse_error()
1720        # on errors to save some code in callers.
1721
1722        label = self._next_token()
1723        if label.id != _T.REF:
1724            self._parse_error(
1725                "expected label (&foo) or path (&{/foo/bar}) reference")
1726        try:
1727            return self._ref2node(label.val)
1728        except DTError as e:
1729            self._parse_error(e)
1730
1731    def _ref2node(self, s):
1732        # Returns the Node the label/path reference 's' points to
1733
1734        if s[0] == "{":
1735            # Path reference (&{/foo/bar})
1736            path = s[1:-1]
1737            if not path.startswith("/"):
1738                _err(f"node path '{path}' does not start with '/'")
1739            # Will raise DTError if the path doesn't exist
1740            return _root_and_path_to_node(self.root, path, path)
1741
1742        # Label reference (&foo).
1743
1744        # label2node hasn't been filled in yet, and using it would get messy
1745        # when nodes are deleted
1746        for node in self.node_iter():
1747            if s in node.labels:
1748                return node
1749
1750        _err(f"undefined node label '{s}'")
1751
1752    #
1753    # Post-processing
1754    #
1755
1756    def _register_phandles(self):
1757        # Registers any manually-inserted phandle properties in
1758        # self.phandle2node, so that we can avoid allocating any phandles from
1759        # that set. Also checks the format of the phandles and does misc.
1760        # sanity checking.
1761
1762        for node in self.node_iter():
1763            phandle = node.props.get("phandle")
1764            if phandle:
1765                if len(phandle.value) != 4:
1766                    _err(f"{node.path}: bad phandle length "
1767                         f"({len(phandle.value)}), expected 4 bytes")
1768
1769                is_self_referential = False
1770                for marker in phandle._markers:
1771                    _, marker_type, ref = marker
1772                    if marker_type is _MarkerType.PHANDLE:
1773                        # The phandle's value is itself a phandle reference
1774                        if self._ref2node(ref) is node:
1775                            # Alright to set a node's phandle equal to its own
1776                            # phandle. It'll force a new phandle to be
1777                            # allocated even if the node is otherwise
1778                            # unreferenced.
1779                            is_self_referential = True
1780                            break
1781
1782                        _err(f"{node.path}: {phandle.name} "
1783                             "refers to another node")
1784
1785                # Could put on else on the 'for' above too, but keep it
1786                # somewhat readable
1787                if not is_self_referential:
1788                    phandle_val = int.from_bytes(phandle.value, "big")
1789
1790                    if phandle_val in {0, 0xFFFFFFFF}:
1791                        _err(f"{node.path}: bad value {phandle_val:#010x} "
1792                             f"for {phandle.name}")
1793
1794                    if phandle_val in self.phandle2node:
1795                        _err(f"{node.path}: duplicated phandle {phandle_val:#x} "
1796                             "(seen before at "
1797                             f"{self.phandle2node[phandle_val].path})")
1798
1799                    self.phandle2node[phandle_val] = node
1800
1801    def _fixup_props(self):
1802        # Fills in node path and phandle references in property values, and
1803        # registers labels within values. This must be done after parsing,
1804        # since forwards references are allowed and nodes and properties might
1805        # be deleted.
1806
1807        for node in self.node_iter():
1808            # The tuple() avoids a 'dictionary changed size during iteration'
1809            # error
1810            for prop in tuple(node.props.values()):
1811                # 'prev_pos' and 'pos' are indices in the unpatched
1812                # property value. The result is built up in 'res'.
1813                prev_pos = 0
1814                res = b""
1815
1816                for marker in prop._markers:
1817                    pos, marker_type, ref = marker
1818
1819                    # Add data before the marker, reading from the unpatched
1820                    # property value
1821                    res += prop.value[prev_pos:pos]
1822
1823                    # Fix the marker offset so that it's correct for the
1824                    # patched property value, for later (not used in this
1825                    # function). The offset might change due to path
1826                    # references, which expand to something like "/foo/bar".
1827                    marker[0] = len(res)
1828
1829                    if marker_type is _MarkerType.LABEL:
1830                        # This is a temporary format so that we can catch
1831                        # duplicate references. prop._label_offset_lst is changed
1832                        # to a dictionary that maps labels to offsets in
1833                        # _register_labels().
1834                        _append_no_dup(prop._label_offset_lst, (ref, len(res)))
1835                    elif marker_type in (_MarkerType.PATH, _MarkerType.PHANDLE):
1836                        # Path or phandle reference
1837                        try:
1838                            ref_node = self._ref2node(ref)
1839                        except DTError as e:
1840                            _err(f"{prop.node.path}: {e}")
1841
1842                        # For /omit-if-no-ref/
1843                        ref_node._is_referenced = True
1844
1845                        if marker_type is _MarkerType.PATH:
1846                            res += ref_node.path.encode("utf-8") + b'\0'
1847                        else:  # marker_type is PHANDLE
1848                            res += self._node_phandle(ref_node)
1849                            # Skip over the dummy phandle placeholder
1850                            pos += 4
1851
1852                    prev_pos = pos
1853
1854                # Store the final fixed-up value. Add the data after the last
1855                # marker.
1856                prop.value = res + prop.value[prev_pos:]
1857
1858    def _register_aliases(self):
1859        # Registers aliases from the /aliases node in self.alias2node. Also
1860        # checks the format of the alias properties.
1861
1862        # We copy this to self.alias2node at the end to avoid get_node()
1863        # looking up paths via other aliases while verifying aliases
1864        alias2node = {}
1865
1866        alias_re = re.compile("[0-9a-z-]+$")
1867
1868        aliases = self.root.nodes.get("aliases")
1869        if aliases:
1870            for prop in aliases.props.values():
1871                if not alias_re.match(prop.name):
1872                    _err(f"/aliases: alias property name '{prop.name}' "
1873                         "should include only characters from [0-9a-z-]")
1874
1875                # Property.to_path() checks that the node exists, has
1876                # the right type, etc. Swallow errors for invalid
1877                # aliases with self._force.
1878                try:
1879                    alias2node[prop.name] = prop.to_path()
1880                except DTError:
1881                    if self._force:
1882                        continue
1883                    raise
1884
1885        self.alias2node = alias2node
1886
1887    def _remove_unreferenced(self):
1888        # Removes any unreferenced nodes marked with /omit-if-no-ref/ from the
1889        # tree
1890
1891        # tuple() is to avoid 'RuntimeError: dictionary changed size during
1892        # iteration' errors
1893        for node in tuple(self.node_iter()):
1894            if node._omit_if_no_ref and not node._is_referenced:
1895                node._del()
1896
1897    def _register_labels(self):
1898        # Checks for duplicate labels and registers labels in label2node,
1899        # label2prop, and label2prop_offset
1900
1901        label2things = collections.defaultdict(set)
1902
1903        # Register all labels and the nodes/props they point to in label2things
1904        for node in self.node_iter():
1905            for label in node.labels:
1906                label2things[label].add(node)
1907                self.label2node[label] = node
1908
1909            for prop in node.props.values():
1910                for label in prop.labels:
1911                    label2things[label].add(prop)
1912                    self.label2prop[label] = prop
1913
1914                for label, offset in prop._label_offset_lst:
1915                    label2things[label].add((prop, offset))
1916                    self.label2prop_offset[label] = (prop, offset)
1917
1918                # See _fixup_props()
1919                prop.offset_labels = dict(prop._label_offset_lst)
1920
1921        for label, things in label2things.items():
1922            if len(things) > 1:
1923                strings = []
1924                for thing in things:
1925                    if isinstance(thing, Node):
1926                        strings.append(f"on {thing.path}")
1927                    elif isinstance(thing, Property):
1928                        strings.append(f"on property '{thing.name}' "
1929                                       f"of node {thing.node.path}")
1930                    else:
1931                        # Label within property value
1932                        strings.append("in the value of property "
1933                                       f"'{thing[0].name}' of node "
1934                                       f"{thing[0].node.path}")
1935
1936                # Give consistent error messages to help with testing
1937                strings.sort()
1938
1939                _err(f"Label '{label}' appears " + " and ".join(strings))
1940
1941
1942    #
1943    # Misc.
1944    #
1945
1946    def _unescape(self, b):
1947        # Replaces backslash escapes in the 'bytes' array 'b'. We can't do this at
1948        # the string level, because the result might not be valid UTF-8 when
1949        # octal/hex escapes are involved.
1950
1951        def sub(match):
1952            esc = match.group(1)
1953            if esc == b"a": return b"\a"
1954            if esc == b"b": return b"\b"
1955            if esc == b"t": return b"\t"
1956            if esc == b"n": return b"\n"
1957            if esc == b"v": return b"\v"
1958            if esc == b"f": return b"\f"
1959            if esc == b"r": return b"\r"
1960
1961            if esc[0] in b"01234567":
1962                # Octal escape
1963                try:
1964                    return int(esc, 8).to_bytes(1, "big")
1965                except OverflowError:
1966                    self._parse_error("octal escape out of range (> 255)")
1967
1968            if esc[0] == ord("x") and len(esc) > 1:
1969                # Hex escape
1970                return int(esc[1:], 16).to_bytes(1, "big")
1971
1972            # Return <char> as-is for other \<char>
1973            return esc[0].to_bytes(1, "big")
1974
1975        return _unescape_re.sub(sub, b)
1976
1977    def _open(self, filename, mode="r", **kwargs):
1978        # Wrapper around standard Python open(), accepting the same params.
1979        # But searches for a 'filename' file in the directory of the current
1980        # file and the include path.
1981
1982        # The C tools support specifying stdin with '-' too
1983        if filename == "-":
1984            return sys.stdin.buffer if "b" in mode else sys.stdin
1985
1986        # Try the directory of the current file first
1987        dirname = os.path.dirname(self.filename)
1988        try:
1989            return open(os.path.join(dirname, filename), mode, **kwargs)
1990        except OSError as e:
1991            if e.errno != errno.ENOENT:
1992                self._parse_error(e)
1993
1994            # Try each directory from the include path
1995            for path in self._include_path:
1996                try:
1997                    return open(os.path.join(path, filename), mode, **kwargs)
1998                except OSError as e:
1999                    if e.errno != errno.ENOENT:
2000                        self._parse_error(e)
2001                    continue
2002
2003            self._parse_error(f"'{filename}' could not be found")
2004
2005#
2006# Public functions
2007#
2008
2009def to_num(data: bytes, length: Optional[int] = None,
2010           signed: bool = False) -> int:
2011    """
2012    Converts the 'bytes' array 'data' to a number. The value is expected to be
2013    in big-endian format, which is standard in devicetree.
2014
2015    length (default: None):
2016      The expected length of the value in bytes, as a simple type check. If
2017      None, the length check is skipped.
2018
2019    signed (default: False):
2020      If True, the value will be interpreted as signed rather than unsigned.
2021    """
2022    _check_is_bytes(data)
2023    if length is not None:
2024        _check_length_positive(length)
2025        if len(data) != length:
2026            _err(f"{data!r} is {len(data)} bytes long, expected {length}")
2027
2028    return int.from_bytes(data, "big", signed=signed)
2029
2030def to_nums(data: bytes, length: int = 4, signed: bool = False) -> List[int]:
2031    """
2032    Like Property.to_nums(), but takes an arbitrary 'bytes' array. The values
2033    are assumed to be in big-endian format, which is standard in devicetree.
2034    """
2035    _check_is_bytes(data)
2036    _check_length_positive(length)
2037
2038    if len(data) % length:
2039        _err(f"{data!r} is {len(data)} bytes long, "
2040             f"expected a length that's a multiple of {length}")
2041
2042    return [int.from_bytes(data[i:i + length], "big", signed=signed)
2043            for i in range(0, len(data), length)]
2044
2045#
2046# Private helpers
2047#
2048
2049def _check_is_bytes(data):
2050    if not isinstance(data, bytes):
2051        _err(f"'{data}' has type '{type(data).__name__}', expected 'bytes'")
2052
2053def _check_length_positive(length):
2054    if length < 1:
2055        _err("'length' must be greater than zero, was " + str(length))
2056
2057def _append_no_dup(lst, elm):
2058    # Appends 'elm' to 'lst', but only if it isn't already in 'lst'. Lets us
2059    # preserve order, which a set() doesn't.
2060
2061    if elm not in lst:
2062        lst.append(elm)
2063
2064def _decode_and_escape(b):
2065    # Decodes the 'bytes' array 'b' as UTF-8 and backslash-escapes special
2066    # characters
2067
2068    # Hacky but robust way to avoid double-escaping any '\' spit out by
2069    # 'backslashreplace' bytes.translate() can't map to more than a single
2070    # byte, but str.translate() can map to more than one character, so it's
2071    # nice here. There's probably a nicer way to do this.
2072    return (b.decode("utf-8", "surrogateescape")
2073            .translate(_escape_table)
2074            .encode("utf-8", "surrogateescape")
2075            .decode("utf-8", "backslashreplace"))
2076
2077def _root_and_path_to_node(cur, path, fullpath):
2078    # Returns the node pointed at by 'path', relative to the Node 'cur'. For
2079    # example, if 'cur' has path /foo/bar, and 'path' is "baz/qaz", then the
2080    # node with path /foo/bar/baz/qaz is returned. 'fullpath' is the path as
2081    # given in the .dts file, for error messages.
2082
2083    for component in path.split("/"):
2084        # Collapse multiple / in a row, and allow a / at the end
2085        if not component:
2086            continue
2087
2088        if component not in cur.nodes:
2089            _err(f"component '{component}' in path '{fullpath}' "
2090                 "does not exist")
2091
2092        cur = cur.nodes[component]
2093
2094    return cur
2095
2096def _err(msg) -> NoReturn:
2097    raise DTError(msg)
2098
2099_escape_table = str.maketrans({
2100    "\\": "\\\\",
2101    '"': '\\"',
2102    "\a": "\\a",
2103    "\b": "\\b",
2104    "\t": "\\t",
2105    "\n": "\\n",
2106    "\v": "\\v",
2107    "\f": "\\f",
2108    "\r": "\\r"})
2109
2110# Lexer states
2111_DEFAULT = 0
2112_EXPECT_PROPNODENAME = 1
2113_EXPECT_BYTE = 2
2114
2115_num_re = re.compile(r"(0[xX][0-9a-fA-F]+|[0-9]+)(?:ULL|UL|LL|U|L)?")
2116
2117# A leading \ is allowed property and node names, probably to allow weird node
2118# names that would clash with other stuff
2119_propnodename_re = re.compile(r"\\?([a-zA-Z0-9,._+*#?@-]+)")
2120
2121# Node names are more restrictive than property names.
2122_nodename_chars = set(string.ascii_letters + string.digits + ',._+-@')
2123
2124# Misc. tokens that are tried after a property/node name. This is important, as
2125# there's overlap with the allowed characters in names.
2126_misc_re = re.compile(
2127    "|".join(re.escape(pat) for pat in (
2128        "==", "!=", "!", "=", ",", ";", "+", "-", "*", "/", "%", "~", "?", ":",
2129        "^", "(", ")", "{", "}", "[", "]", "<<", "<=", "<", ">>", ">=", ">",
2130        "||", "|", "&&", "&")))
2131
2132_byte_re = re.compile(r"[0-9a-fA-F]{2}")
2133
2134# Matches a backslash escape within a 'bytes' array. Captures the 'c' part of
2135# '\c', where c might be a single character or an octal/hex escape.
2136_unescape_re = re.compile(br'\\([0-7]{1,3}|x[0-9A-Fa-f]{1,2}|.)')
2137
2138def _init_tokens():
2139    # Builds a (<token 1>)|(<token 2>)|... regex and returns it. The
2140    # way this is constructed makes the token's value as an int appear
2141    # in match.lastindex after a match.
2142
2143    # Each pattern must have exactly one capturing group, which can capture any
2144    # part of the pattern. This makes match.lastindex match the token type.
2145    # _Token.val is based on the captured string.
2146    token_spec = {
2147        _T.INCLUDE: r'(/include/\s*"(?:[^\\"]|\\.)*")',
2148        # #line directive or GCC linemarker
2149        _T.LINE:
2150        r'^#(?:line)?[ \t]+([0-9]+[ \t]+"(?:[^\\"]|\\.)*")(?:[ \t]+[0-9]+){0,4}',
2151
2152        _T.STRING: r'"((?:[^\\"]|\\.)*)"',
2153        _T.DTS_V1: r"(/dts-v1/)",
2154        _T.PLUGIN: r"(/plugin/)",
2155        _T.MEMRESERVE: r"(/memreserve/)",
2156        _T.BITS: r"(/bits/)",
2157        _T.DEL_PROP: r"(/delete-property/)",
2158        _T.DEL_NODE: r"(/delete-node/)",
2159        _T.OMIT_IF_NO_REF: r"(/omit-if-no-ref/)",
2160        _T.LABEL: r"([a-zA-Z_][a-zA-Z0-9_]*):",
2161        _T.CHAR_LITERAL: r"'((?:[^\\']|\\.)*)'",
2162        _T.REF: r"&([a-zA-Z_][a-zA-Z0-9_]*|{[a-zA-Z0-9,._+*#?@/-]*})",
2163        _T.INCBIN: r"(/incbin/)",
2164        # Whitespace, C comments, and C++ comments
2165        _T.SKIP: r"(\s+|(?:/\*(?:.|\n)*?\*/)|//.*$)",
2166        # Return a token for end-of-file so that the parsing code can
2167        # always assume that there are more tokens when looking
2168        # ahead. This simplifies things.
2169        _T.EOF: r"(\Z)",
2170    }
2171
2172    # MULTILINE is needed for C++ comments and #line directives
2173    return re.compile("|".join(token_spec[tok_id] for tok_id in
2174                               range(1, _T.EOF + 1)),
2175                      re.MULTILINE | re.ASCII)
2176
2177_token_re = _init_tokens()
2178
2179_TYPE_TO_N_BYTES = {
2180    _MarkerType.UINT8: 1,
2181    _MarkerType.UINT16: 2,
2182    _MarkerType.UINT32: 4,
2183    _MarkerType.UINT64: 8,
2184}
2185
2186_N_BYTES_TO_TYPE = {
2187    1: _MarkerType.UINT8,
2188    2: _MarkerType.UINT16,
2189    4: _MarkerType.UINT32,
2190    8: _MarkerType.UINT64,
2191}
2192
2193_N_BYTES_TO_START_STR = {
2194    1: " [",
2195    2: " /bits/ 16 <",
2196    4: " <",
2197    8: " /bits/ 64 <",
2198}
2199
2200_N_BYTES_TO_END_STR = {
2201    1: " ]",
2202    2: " >",
2203    4: " >",
2204    8: " >",
2205}
2206