1# Copyright (c) 2019, Nordic Semiconductor
2# SPDX-License-Identifier: BSD-3-Clause
3
4# Tip: You can view just the documentation with 'pydoc3 devicetree.dtlib'
5
6"""
7A library for extracting information from .dts (devicetree) files. See the
8documentation for the DT and Node classes for more information.
9
10The top-level entry point of the library is the DT class. DT.__init__() takes a
11.dts file to parse and a list of directories to search for any /include/d
12files.
13"""
14
15import collections
16import enum
17import errno
18import os
19import re
20import string
21import sys
22import textwrap
23from typing import Any, Dict, Iterable, List, \
24    NamedTuple, NoReturn, Optional, Set, Tuple, TYPE_CHECKING, Union
25
26# NOTE: tests/test_dtlib.py is the test suite for this library.
27
28class DTError(Exception):
29    "Exception raised for devicetree-related errors"
30
31class Node:
32    r"""
33    Represents a node in the devicetree ('node-name { ... };').
34
35    These attributes are available on Node instances:
36
37    name:
38      The name of the node (a string).
39
40    unit_addr:
41      The portion after the '@' in the node's name, or the empty string if the
42      name has no '@' in it.
43
44      Note that this is a string. Run int(node.unit_addr, 16) to get an
45      integer.
46
47    props:
48      A dict that maps the properties defined on the node to
49      their values. 'props' is indexed by property name (a string), and values
50      are Property objects.
51
52      To convert property values to Python numbers or strings, use
53      dtlib.to_num(), dtlib.to_nums(), or dtlib.to_string().
54
55      Property values are represented as 'bytes' arrays to support the full
56      generality of DTS, which allows assignments like
57
58        x = "foo", < 0x12345678 >, [ 9A ];
59
60      This gives x the value b"foo\0\x12\x34\x56\x78\x9A". Numbers in DTS are
61      stored in big-endian format.
62
63    nodes:
64      A dict containing the subnodes of the node, indexed by name.
65
66    labels:
67      A list with all labels pointing to the node, in the same order as the
68      labels appear, but with duplicates removed.
69
70      'label_1: label_2: node { ... };' gives 'labels' the value
71      ["label_1", "label_2"].
72
73    parent:
74      The parent Node of the node. 'None' for the root node.
75
76    path:
77      The path to the node as a string, e.g. "/foo/bar".
78
79    dt:
80      The DT instance this node belongs to.
81    """
82
83    #
84    # Public interface
85    #
86
87    def __init__(self, name: str, parent: Optional['Node'], dt: 'DT'):
88        """
89        Node constructor. Not meant to be called directly by clients.
90        """
91        # Remember to update DT.__deepcopy__() if you change this.
92
93        self._name = name
94        self.props: Dict[str, 'Property'] = {}
95        self.nodes: Dict[str, 'Node'] = {}
96        self.labels: List[str] = []
97        self.parent = parent
98        self.dt = dt
99
100        self._omit_if_no_ref = False
101        self._is_referenced = False
102
103        if name.count("@") > 1:
104            dt._parse_error("multiple '@' in node name")
105        if not name == "/":
106            for char in name:
107                if char not in _nodename_chars:
108                    dt._parse_error(f"{self.path}: bad character '{char}' "
109                                    "in node name")
110
111    @property
112    def name(self) -> str:
113        """
114        See the class documentation.
115        """
116        # Converted to a property to discourage renaming -- that has to be done
117        # via DT.move_node.
118        return self._name
119
120    @property
121    def unit_addr(self) -> str:
122        """
123        See the class documentation.
124        """
125        return self.name.partition("@")[2]
126
127    @property
128    def path(self) -> str:
129        """
130        See the class documentation.
131        """
132        node_names = []
133
134        # This dynamic computation is required to be able to move
135        # nodes in the DT class.
136        cur = self
137        while cur.parent:
138            node_names.append(cur.name)
139            cur = cur.parent
140
141        return "/" + "/".join(reversed(node_names))
142
143    def node_iter(self) -> Iterable['Node']:
144        """
145        Returns a generator for iterating over the node and its children,
146        recursively.
147
148        For example, this will iterate over all nodes in the tree (like
149        dt.node_iter()).
150
151          for node in dt.root.node_iter():
152              ...
153        """
154        yield self
155        for node in self.nodes.values():
156            yield from node.node_iter()
157
158    def _get_prop(self, name: str) -> 'Property':
159        # Returns the property named 'name' on the node, creating it if it
160        # doesn't already exist
161
162        prop = self.props.get(name)
163        if not prop:
164            prop = Property(self, name)
165            self.props[name] = prop
166        return prop
167
168    def _del(self) -> None:
169        # Removes the node from the tree. When called on the root node,
170        # this method will leave it empty but still part of the tree.
171
172        if self.parent is None:
173            self.nodes.clear()
174            self.props.clear()
175            return
176        self.parent.nodes.pop(self.name)  # type: ignore
177
178    def __str__(self):
179        """
180        Returns a DTS representation of the node. Called automatically if the
181        node is print()ed.
182        """
183        s = "".join(label + ": " for label in self.labels)
184
185        s += f"{self.name} {{\n"
186
187        for prop in self.props.values():
188            s += "\t" + str(prop) + "\n"
189
190        for child in self.nodes.values():
191            s += textwrap.indent(child.__str__(), "\t") + "\n"
192
193        s += "};"
194
195        return s
196
197    def __repr__(self):
198        """
199        Returns some information about the Node instance. Called automatically
200        if the Node instance is evaluated.
201        """
202        return f"<Node {self.path} in '{self.dt.filename}'>"
203
204# See Property.type
205class Type(enum.IntEnum):
206    EMPTY = 0
207    BYTES = 1
208    NUM = 2
209    NUMS = 3
210    STRING = 4
211    STRINGS = 5
212    PATH = 6
213    PHANDLE = 7
214    PHANDLES = 8
215    PHANDLES_AND_NUMS = 9
216    COMPOUND = 10
217
218class _MarkerType(enum.IntEnum):
219    # Types of markers in property values
220
221    # References
222    PATH = 0     # &foo
223    PHANDLE = 1  # <&foo>
224    LABEL = 2    # foo: <1 2 3>
225
226    # Start of data blocks of specific type
227    UINT8 = 3   # [00 01 02] (and also used for /incbin/)
228    UINT16 = 4  # /bits/ 16 <1 2 3>
229    UINT32 = 5  # <1 2 3>
230    UINT64 = 6  # /bits/ 64 <1 2 3>
231    STRING = 7  # "foo"
232
233class Property:
234    """
235    Represents a property ('x = ...').
236
237    These attributes are available on Property instances:
238
239    name:
240      The name of the property (a string).
241
242    value:
243      The value of the property, as a 'bytes' string. Numbers are stored in
244      big-endian format, and strings are null-terminated. Putting multiple
245      comma-separated values in an assignment (e.g., 'x = < 1 >, "foo"') will
246      concatenate the values.
247
248      See the to_*() methods for converting the value to other types.
249
250    type:
251      The type of the property, inferred from the syntax used in the
252      assignment. This is one of the following constants (with example
253      assignments):
254
255        Assignment                  | Property.type
256        ----------------------------+------------------------
257        foo;                        | dtlib.Type.EMPTY
258        foo = [];                   | dtlib.Type.BYTES
259        foo = [01 02];              | dtlib.Type.BYTES
260        foo = /bits/ 8 <1>;         | dtlib.Type.BYTES
261        foo = <1>;                  | dtlib.Type.NUM
262        foo = <>;                   | dtlib.Type.NUMS
263        foo = <1 2 3>;              | dtlib.Type.NUMS
264        foo = <1 2>, <3>;           | dtlib.Type.NUMS
265        foo = "foo";                | dtlib.Type.STRING
266        foo = "foo", "bar";         | dtlib.Type.STRINGS
267        foo = <&l>;                 | dtlib.Type.PHANDLE
268        foo = <&l1 &l2 &l3>;        | dtlib.Type.PHANDLES
269        foo = <&l1 &l2>, <&l3>;     | dtlib.Type.PHANDLES
270        foo = <&l1 1 2 &l2 3 4>;    | dtlib.Type.PHANDLES_AND_NUMS
271        foo = <&l1 1 2>, <&l2 3 4>; | dtlib.Type.PHANDLES_AND_NUMS
272        foo = &l;                   | dtlib.Type.PATH
273        *Anything else*             | dtlib.Type.COMPOUND
274
275      *Anything else* includes properties mixing phandle (<&label>) and node
276      path (&label) references with other data.
277
278      Data labels in the property value do not influence the type.
279
280    labels:
281      A list with all labels pointing to the property, in the same order as the
282      labels appear, but with duplicates removed.
283
284      'label_1: label2: x = ...' gives 'labels' the value
285      ["label_1", "label_2"].
286
287    offset_labels:
288      A dictionary that maps any labels within the property's value to their
289      offset, in bytes. For example, 'x = < 0 label_1: 1 label_2: >' gives
290      'offset_labels' the value {"label_1": 4, "label_2": 8}.
291
292      Iteration order will match the order of the labels on Python versions
293      that preserve dict insertion order.
294
295    node:
296      The Node the property is on.
297    """
298
299    #
300    # Public interface
301    #
302
303    def __init__(self, node: Node, name: str):
304        # Remember to update DT.__deepcopy__() if you change this.
305
306        if "@" in name:
307            node.dt._parse_error("'@' is only allowed in node names")
308
309        self.name = name
310        self.value = b""
311        self.labels: List[str] = []
312        # We have to wait to set this until later, when we've got
313        # the entire tree.
314        self.offset_labels: Dict[str, int] = {}
315        self.node: Node = node
316
317        self._label_offset_lst: List[Tuple[str, int]] = []
318
319        # A list of [offset, label, type] lists (sorted by offset),
320        # giving the locations of references within the value. 'type'
321        # is either _MarkerType.PATH, for a node path reference,
322        # _MarkerType.PHANDLE, for a phandle reference, or
323        # _MarkerType.LABEL, for a label on/within data. Node paths
324        # and phandles need to be patched in after parsing.
325        self._markers: List[List] = []
326
327    @property
328    def type(self) -> Type:
329        """
330        See the class documentation.
331        """
332        # Data labels (e.g. 'foo = label: <3>') are irrelevant, so filter them
333        # out
334        types = [marker[1] for marker in self._markers
335                 if marker[1] != _MarkerType.LABEL]
336
337        if not types:
338            return Type.EMPTY
339
340        if types == [_MarkerType.UINT8]:
341            return Type.BYTES
342
343        if types == [_MarkerType.UINT32]:
344            return Type.NUM if len(self.value) == 4 else Type.NUMS
345
346        # Treat 'foo = <1 2 3>, <4 5>, ...' as Type.NUMS too
347        if set(types) == {_MarkerType.UINT32}:
348            return Type.NUMS
349
350        if set(types) == {_MarkerType.STRING}:
351            return Type.STRING if len(types) == 1 else Type.STRINGS
352
353        if types == [_MarkerType.PATH]:
354            return Type.PATH
355
356        if types == [_MarkerType.UINT32, _MarkerType.PHANDLE] and \
357                len(self.value) == 4:
358            return Type.PHANDLE
359
360        if set(types) == {_MarkerType.UINT32, _MarkerType.PHANDLE}:
361            if len(self.value) == 4*types.count(_MarkerType.PHANDLE):
362                # Array with just phandles in it
363                return Type.PHANDLES
364            # Array with both phandles and numbers
365            return Type.PHANDLES_AND_NUMS
366
367        return Type.COMPOUND
368
369    def to_num(self, signed=False) -> int:
370        """
371        Returns the value of the property as a number.
372
373        Raises DTError if the property was not assigned with this syntax (has
374        Property.type Type.NUM):
375
376            foo = < 1 >;
377
378        signed (default: False):
379          If True, the value will be interpreted as signed rather than
380          unsigned.
381        """
382        if self.type is not Type.NUM:
383            _err("expected property '{0}' on {1} in {2} to be assigned with "
384                 "'{0} = < (number) >;', not '{3}'"
385                 .format(self.name, self.node.path, self.node.dt.filename,
386                         self))
387
388        return int.from_bytes(self.value, "big", signed=signed)
389
390    def to_nums(self, signed=False) -> List[int]:
391        """
392        Returns the value of the property as a list of numbers.
393
394        Raises DTError if the property was not assigned with this syntax (has
395        Property.type Type.NUM or Type.NUMS):
396
397            foo = < 1 2 ... >;
398
399        signed (default: False):
400          If True, the values will be interpreted as signed rather than
401          unsigned.
402        """
403        if self.type not in (Type.NUM, Type.NUMS):
404            _err("expected property '{0}' on {1} in {2} to be assigned with "
405                 "'{0} = < (number) (number) ... >;', not '{3}'"
406                 .format(self.name, self.node.path, self.node.dt.filename,
407                         self))
408
409        return [int.from_bytes(self.value[i:i + 4], "big", signed=signed)
410                for i in range(0, len(self.value), 4)]
411
412    def to_bytes(self) -> bytes:
413        """
414        Returns the value of the property as a raw 'bytes', like
415        Property.value, except with added type checking.
416
417        Raises DTError if the property was not assigned with this syntax (has
418        Property.type Type.BYTES):
419
420            foo = [ 01 ... ];
421        """
422        if self.type is not Type.BYTES:
423            _err("expected property '{0}' on {1} in {2} to be assigned with "
424                 "'{0} = [ (byte) (byte) ... ];', not '{3}'"
425                 .format(self.name, self.node.path, self.node.dt.filename,
426                         self))
427
428        return self.value
429
430    def to_string(self) -> str:
431        """
432        Returns the value of the property as a string.
433
434        Raises DTError if the property was not assigned with this syntax (has
435        Property.type Type.STRING):
436
437            foo = "string";
438
439        This function might also raise UnicodeDecodeError if the string is
440        not valid UTF-8.
441        """
442        if self.type is not Type.STRING:
443            _err("expected property '{0}' on {1} in {2} to be assigned with "
444                 "'{0} = \"string\";', not '{3}'"
445                 .format(self.name, self.node.path, self.node.dt.filename,
446                         self))
447
448        try:
449            ret = self.value.decode("utf-8")[:-1]  # Strip null
450        except UnicodeDecodeError:
451            _err(f"value of property '{self.name}' ({self.value!r}) "
452                 f"on {self.node.path} in {self.node.dt.filename} "
453                 "is not valid UTF-8")
454
455        return ret  # The separate 'return' appeases the type checker.
456
457    def to_strings(self) -> List[str]:
458        """
459        Returns the value of the property as a list of strings.
460
461        Raises DTError if the property was not assigned with this syntax (has
462        Property.type Type.STRING or Type.STRINGS):
463
464            foo = "string", "string", ... ;
465
466        Also raises DTError if any of the strings are not valid UTF-8.
467        """
468        if self.type not in (Type.STRING, Type.STRINGS):
469            _err("expected property '{0}' on {1} in {2} to be assigned with "
470                 "'{0} = \"string\", \"string\", ... ;', not '{3}'"
471                 .format(self.name, self.node.path, self.node.dt.filename,
472                         self))
473
474        try:
475            ret = self.value.decode("utf-8").split("\0")[:-1]
476        except UnicodeDecodeError:
477            _err(f"value of property '{self.name}' ({self.value!r}) "
478                 f"on {self.node.path} in {self.node.dt.filename} "
479                 "is not valid UTF-8")
480
481        return ret  # The separate 'return' appeases the type checker.
482
483    def to_node(self) -> Node:
484        """
485        Returns the Node the phandle in the property points to.
486
487        Raises DTError if the property was not assigned with this syntax (has
488        Property.type Type.PHANDLE).
489
490            foo = < &bar >;
491        """
492        if self.type is not Type.PHANDLE:
493            _err("expected property '{0}' on {1} in {2} to be assigned with "
494                 "'{0} = < &foo >;', not '{3}'"
495                 .format(self.name, self.node.path, self.node.dt.filename,
496                         self))
497
498        return self.node.dt.phandle2node[int.from_bytes(self.value, "big")]
499
500    def to_nodes(self) -> List[Node]:
501        """
502        Returns a list with the Nodes the phandles in the property point to.
503
504        Raises DTError if the property value contains anything other than
505        phandles. All of the following are accepted:
506
507            foo = < >
508            foo = < &bar >;
509            foo = < &bar &baz ... >;
510            foo = < &bar ... >, < &baz ... >;
511        """
512        def type_ok():
513            if self.type in (Type.PHANDLE, Type.PHANDLES):
514                return True
515            # Also accept 'foo = < >;'
516            return self.type is Type.NUMS and not self.value
517
518        if not type_ok():
519            _err("expected property '{0}' on {1} in {2} to be assigned with "
520                 "'{0} = < &foo &bar ... >;', not '{3}'"
521                 .format(self.name, self.node.path,
522                         self.node.dt.filename, self))
523
524        return [self.node.dt.phandle2node[int.from_bytes(self.value[i:i + 4],
525                                                         "big")]
526                for i in range(0, len(self.value), 4)]
527
528    def to_path(self) -> Node:
529        """
530        Returns the Node referenced by the path stored in the property.
531
532        Raises DTError if the property was not assigned with either of these
533        syntaxes (has Property.type Type.PATH or Type.STRING):
534
535            foo = &bar;
536            foo = "/bar";
537
538        For the second case, DTError is raised if the path does not exist.
539        """
540        if self.type not in (Type.PATH, Type.STRING):
541            _err("expected property '{0}' on {1} in {2} to be assigned with "
542                 "either '{0} = &foo' or '{0} = \"/path/to/node\"', not '{3}'"
543                 .format(self.name, self.node.path, self.node.dt.filename,
544                         self))
545
546        try:
547            path = self.value.decode("utf-8")[:-1]
548        except UnicodeDecodeError:
549            _err(f"value of property '{self.name}' ({self.value!r}) "
550                 f"on {self.node.path} in {self.node.dt.filename} "
551                 "is not valid UTF-8")
552
553        try:
554            ret = self.node.dt.get_node(path)
555        except DTError:
556            _err(f"property '{self.name}' on {self.node.path} in "
557                 f"{self.node.dt.filename} points to the non-existent node "
558                 f'"{path}"')
559
560        return ret  # The separate 'return' appeases the type checker.
561
562    def __str__(self):
563        s = "".join(label + ": " for label in self.labels) + self.name
564        if not self.value:
565            return s + ";"
566
567        s += " ="
568
569        for i, (pos, marker_type, ref) in enumerate(self._markers):
570            if i < len(self._markers) - 1:
571                next_marker = self._markers[i + 1]
572            else:
573                next_marker = None
574
575            # End of current marker
576            end = next_marker[0] if next_marker else len(self.value)
577
578            if marker_type is _MarkerType.STRING:
579                # end - 1 to strip off the null terminator
580                s += f' "{_decode_and_escape(self.value[pos:end - 1])}"'
581                if end != len(self.value):
582                    s += ","
583            elif marker_type is _MarkerType.PATH:
584                s += " &" + ref
585                if end != len(self.value):
586                    s += ","
587            else:
588                # <> or []
589
590                if marker_type is _MarkerType.LABEL:
591                    s += f" {ref}:"
592                elif marker_type is _MarkerType.PHANDLE:
593                    s += " &" + ref
594                    pos += 4
595                    # Subtle: There might be more data between the phandle and
596                    # the next marker, so we can't 'continue' here
597                else:  # marker_type is _MarkerType.UINT*
598                    elm_size = _TYPE_TO_N_BYTES[marker_type]
599                    s += _N_BYTES_TO_START_STR[elm_size]
600
601                while pos != end:
602                    num = int.from_bytes(self.value[pos:pos + elm_size],
603                                         "big")
604                    if elm_size == 1:
605                        s += f" {num:02X}"
606                    else:
607                        s += f" {hex(num)}"
608
609                    pos += elm_size
610
611                if pos != 0 and \
612                   (not next_marker or
613                    next_marker[1] not in (_MarkerType.PHANDLE, _MarkerType.LABEL)):
614
615                    s += _N_BYTES_TO_END_STR[elm_size]
616                    if pos != len(self.value):
617                        s += ","
618
619        return s + ";"
620
621
622    def __repr__(self):
623        return f"<Property '{self.name}' at '{self.node.path}' in " \
624            f"'{self.node.dt.filename}'>"
625
626    #
627    # Internal functions
628    #
629
630    def _add_marker(self, marker_type: _MarkerType, data: Any = None):
631        # Helper for registering markers in the value that are processed after
632        # parsing. See _fixup_props(). 'marker_type' identifies the type of
633        # marker, and 'data' has any optional data associated with the marker.
634
635        # len(self.value) gives the current offset. This function is called
636        # while the value is built. We use a list instead of a tuple to be able
637        # to fix up offsets later (they might increase if the value includes
638        # path references, e.g. 'foo = &bar, <3>;', which are expanded later).
639        self._markers.append([len(self.value), marker_type, data])
640
641        # For phandle references, add a dummy value with the same length as a
642        # phandle. This is handy for the length check in _register_phandles().
643        if marker_type is _MarkerType.PHANDLE:
644            self.value += b"\0\0\0\0"
645
646class _T(enum.IntEnum):
647    # Token IDs used by the DT lexer.
648
649    # These values must be contiguous and start from 1.
650    INCLUDE = 1
651    LINE = 2
652    STRING = 3
653    DTS_V1 = 4
654    PLUGIN = 5
655    MEMRESERVE = 6
656    BITS = 7
657    DEL_PROP = 8
658    DEL_NODE = 9
659    OMIT_IF_NO_REF = 10
660    LABEL = 11
661    CHAR_LITERAL = 12
662    REF = 13
663    INCBIN = 14
664    SKIP = 15
665    EOF = 16
666
667    # These values must be larger than the above contiguous range.
668    NUM = 17
669    PROPNODENAME = 18
670    MISC = 19
671    BYTE = 20
672    BAD = 21
673
674class _FileStackElt(NamedTuple):
675    # Used for maintaining the /include/ stack.
676
677    filename: str
678    lineno: int
679    contents: str
680    pos: int
681
682_TokVal = Union[int, str]
683
684class _Token(NamedTuple):
685    id: int
686    val: _TokVal
687
688    def __repr__(self):
689        id_repr = _T(self.id).name
690        return f'Token(id=_T.{id_repr}, val={repr(self.val)})'
691
692class DT:
693    """
694    Represents a devicetree parsed from a .dts file (or from many files, if the
695    .dts file /include/s other files). Creating many instances of this class is
696    fine. The library has no global state.
697
698    These attributes are available on DT instances:
699
700    root:
701      A Node instance representing the root (/) node.
702
703    alias2node:
704      A dictionary that maps maps alias strings (from /aliases) to Node
705      instances
706
707    label2node:
708      A dictionary that maps each node label (a string) to the Node instance
709      for the node.
710
711    label2prop:
712      A dictionary that maps each property label (a string) to a Property
713      instance.
714
715    label2prop_offset:
716      A dictionary that maps each label (a string) within a property value
717      (e.g., 'x = label_1: < 1 label2: 2 >;') to a (prop, offset) tuple, where
718      'prop' is a Property instance and 'offset' the byte offset (0 for label_1
719      and 4 for label_2 in the example).
720
721    phandle2node:
722      A dictionary that maps each phandle (a number) to a Node instance.
723
724    memreserves:
725      A list of (labels, address, length) tuples for the /memreserve/s in the
726      .dts file, in the same order as they appear in the file.
727
728      'labels' is a possibly empty set with all labels preceding the memreserve
729      (e.g., 'label1: label2: /memreserve/ ...'). 'address' and 'length' are
730      numbers.
731
732    filename:
733      The filename passed to the DT constructor.
734    """
735
736    #
737    # Public interface
738    #
739
740    def __init__(self, filename: Optional[str], include_path: Iterable[str] = (),
741                 force: bool = False):
742        """
743        Parses a DTS file to create a DT instance. Raises OSError if 'filename'
744        can't be opened, and DTError for any parse errors.
745
746        filename:
747          Path to the .dts file to parse. (If None, an empty devicetree
748          is created; this is unlikely to be what you want.)
749
750        include_path:
751          An iterable (e.g. list or tuple) containing paths to search for
752          /include/d and /incbin/'d files. By default, files are only looked up
753          relative to the .dts file that contains the /include/ or /incbin/.
754
755        force:
756          Try not to raise DTError even if the input tree has errors.
757          For experimental use; results not guaranteed.
758        """
759        # Remember to update __deepcopy__() if you change this.
760
761        self._root: Optional[Node] = None
762        self.alias2node: Dict[str, Node] = {}
763        self.label2node: Dict[str, Node] = {}
764        self.label2prop: Dict[str, Property] = {}
765        self.label2prop_offset: Dict[str, Tuple[Property, int]] = {}
766        self.phandle2node: Dict[int, Node] = {}
767        self.memreserves: List[Tuple[Set[str], int, int]] = []
768        self.filename = filename
769
770        self._force = force
771
772        if filename is not None:
773            self._parse_file(filename, include_path)
774        else:
775            self._include_path: List[str] = []
776
777    @property
778    def root(self) -> Node:
779        """
780        See the class documentation.
781        """
782        # This is necessary because mypy can't tell that we never
783        # treat self._root as a non-None value until it's initialized
784        # properly in _parse_dt().
785        return self._root       # type: ignore
786
787    def get_node(self, path: str) -> Node:
788        """
789        Returns the Node instance for the node with path or alias 'path' (a
790        string). Raises DTError if the path or alias doesn't exist.
791
792        For example, both dt.get_node("/foo/bar") and dt.get_node("bar-alias")
793        will return the 'bar' node below:
794
795          /dts-v1/;
796
797          / {
798                  foo {
799                          bar_label: bar {
800                                  baz {
801                                  };
802                          };
803                  };
804
805                  aliases {
806                          bar-alias = &bar-label;
807                  };
808          };
809
810        Fetching subnodes via aliases is supported:
811        dt.get_node("bar-alias/baz") returns the 'baz' node.
812        """
813        if path.startswith("/"):
814            return _root_and_path_to_node(self.root, path, path)
815
816        # Path does not start with '/'. First component must be an alias.
817        alias, _, rest = path.partition("/")
818        if alias not in self.alias2node:
819            _err(f"no alias '{alias}' found -- did you forget the leading "
820                 "'/' in the node path?")
821
822        return _root_and_path_to_node(self.alias2node[alias], rest, path)
823
824    def has_node(self, path: str) -> bool:
825        """
826        Returns True if the path or alias 'path' exists. See DT.get_node().
827        """
828        try:
829            self.get_node(path)
830            return True
831        except DTError:
832            return False
833
834    def move_node(self, node: Node, new_path: str):
835        """
836        Move a node 'node' to a new path 'new_path'. The entire subtree
837        rooted at 'node' is moved along with it.
838
839        You cannot move the root node or provide a 'new_path' value
840        where a node already exists. This method raises an exception
841        in both cases.
842
843        As a restriction on the current implementation, the parent node
844        of the new path must exist.
845        """
846        if node is self.root:
847            _err("the root node can't be moved")
848
849        if self.has_node(new_path):
850            _err(f"can't move '{node.path}' to '{new_path}': "
851                 'destination node exists')
852
853        if not new_path.startswith('/'):
854            _err(f"path '{new_path}' doesn't start with '/'")
855
856        for component in new_path.split('/'):
857            for char in component:
858                if char not in _nodename_chars:
859                    _err(f"new path '{new_path}': bad character '{char}'")
860
861        old_name = node.name
862        old_path = node.path
863
864        new_parent_path, _, new_name = new_path.rpartition('/')
865        if new_parent_path == '':
866            # '/foo'.rpartition('/') is ('', '/', 'foo').
867            new_parent_path = '/'
868        if not self.has_node(new_parent_path):
869            _err(f"can't move '{old_path}' to '{new_path}': "
870                 f"parent node '{new_parent_path}' doesn't exist")
871        new_parent = self.get_node(new_parent_path)
872        if TYPE_CHECKING:
873            assert new_parent is not None
874            assert node.parent is not None
875
876        del node.parent.nodes[old_name]
877        node._name = new_name
878        node.parent = new_parent
879        new_parent.nodes[new_name] = node
880
881    def node_iter(self) -> Iterable[Node]:
882        """
883        Returns a generator for iterating over all nodes in the devicetree.
884
885        For example, this will print the name of each node that has a property
886        called 'foo':
887
888          for node in dt.node_iter():
889              if "foo" in node.props:
890                  print(node.name)
891        """
892        yield from self.root.node_iter()
893
894    def __str__(self):
895        """
896        Returns a DTS representation of the devicetree. Called automatically if
897        the DT instance is print()ed.
898        """
899        s = "/dts-v1/;\n\n"
900
901        if self.memreserves:
902            for labels, address, offset in self.memreserves:
903                # List the labels in a consistent order to help with testing
904                for label in labels:
905                    s += f"{label}: "
906                s += f"/memreserve/ {address:#018x} {offset:#018x};\n"
907            s += "\n"
908
909        return s + str(self.root)
910
911    def __repr__(self):
912        """
913        Returns some information about the DT instance. Called automatically if
914        the DT instance is evaluated.
915        """
916        if self.filename:
917            return f"DT(filename='{self.filename}', " \
918                f"include_path={self._include_path})"
919        return super().__repr__()
920
921    def __deepcopy__(self, memo):
922        """
923        Implements support for the standard library copy.deepcopy()
924        function on DT instances.
925        """
926
927        # We need a new DT, obviously. Make a new, empty one.
928        ret = DT(None, (), self._force)
929
930        # Now allocate new Node objects for every node in self, to use
931        # in the new DT. Set their parents to None for now and leave
932        # them without any properties. We will recursively initialize
933        # copies of parents before copies of children next.
934        path2node_copy = {
935            node.path: Node(node.name, None, ret)
936            for node in self.node_iter()
937        }
938
939        # Point each copy of a node to the copy of its parent and set up
940        # copies of each property.
941        #
942        # Share data when possible. For example, Property.value has
943        # type 'bytes', which is immutable. We therefore don't need a
944        # copy and can just point to the original data.
945
946        for node in self.node_iter():
947            node_copy = path2node_copy[node.path]
948
949            parent = node.parent
950            if parent is not None:
951                node_copy.parent = path2node_copy[parent.path]
952
953            prop_name2prop_copy = {
954                prop.name: Property(node_copy, prop.name)
955                for prop in node.props.values()
956            }
957            for prop_name, prop_copy in prop_name2prop_copy.items():
958                prop = node.props[prop_name]
959                prop_copy.value = prop.value
960                prop_copy.labels = prop.labels[:]
961                prop_copy.offset_labels = prop.offset_labels.copy()
962                prop_copy._label_offset_lst = prop._label_offset_lst[:]
963                prop_copy._markers = [marker[:] for marker in prop._markers]
964            node_copy.props = prop_name2prop_copy
965
966            node_copy.nodes = {
967                child_name: path2node_copy[child_node.path]
968                for child_name, child_node in node.nodes.items()
969            }
970
971            node_copy.labels = node.labels[:]
972
973            node_copy._omit_if_no_ref = node._omit_if_no_ref
974            node_copy._is_referenced = node._is_referenced
975
976        # The copied nodes and properties are initialized, so
977        # we can finish initializing the copied DT object now.
978
979        ret._root = path2node_copy['/']
980
981        def copy_node_lookup_table(attr_name):
982            original = getattr(self, attr_name)
983            copy = {
984                key: path2node_copy[original[key].path]
985                for key in original
986            }
987            setattr(ret, attr_name, copy)
988
989        copy_node_lookup_table('alias2node')
990        copy_node_lookup_table('label2node')
991        copy_node_lookup_table('phandle2node')
992
993        ret_label2prop = {}
994        for label, prop in self.label2prop.items():
995            node_copy = path2node_copy[prop.node.path]
996            prop_copy = node_copy.props[prop.name]
997            ret_label2prop[label] = prop_copy
998        ret.label2prop = ret_label2prop
999
1000        ret_label2prop_offset = {}
1001        for label, prop_offset in self.label2prop_offset.items():
1002            prop, offset = prop_offset
1003            node_copy = path2node_copy[prop.node.path]
1004            prop_copy = node_copy.props[prop.name]
1005            ret_label2prop_offset[label] = (prop_copy, offset)
1006        ret.label2prop_offset = ret_label2prop_offset
1007
1008        ret.memreserves = [
1009            (set(memreserve[0]), memreserve[1], memreserve[2])
1010            for memreserve in self.memreserves
1011        ]
1012
1013        ret.filename = self.filename
1014
1015        return ret
1016
1017    #
1018    # Parsing
1019    #
1020
1021    def _parse_file(self, filename: str, include_path: Iterable[str]):
1022        self._include_path = list(include_path)
1023
1024        with open(filename, encoding="utf-8") as f:
1025            self._file_contents = f.read()
1026
1027        self._tok_i = self._tok_end_i = 0
1028        self._filestack: List[_FileStackElt] = []
1029
1030        self._lexer_state: int = _DEFAULT
1031        self._saved_token: Optional[_Token] = None
1032
1033        self._lineno: int = 1
1034
1035        self._parse_header()
1036        self._parse_memreserves()
1037        self._parse_dt()
1038
1039        self._register_phandles()
1040        self._fixup_props()
1041        self._register_aliases()
1042        self._remove_unreferenced()
1043        self._register_labels()
1044
1045    def _parse_header(self):
1046        # Parses /dts-v1/ (expected) and /plugin/ (unsupported) at the start of
1047        # files. There may be multiple /dts-v1/ at the start of a file.
1048
1049        has_dts_v1 = False
1050
1051        while self._peek_token().id == _T.DTS_V1:
1052            has_dts_v1 = True
1053            self._next_token()
1054            self._expect_token(";")
1055            # /plugin/ always comes after /dts-v1/
1056            if self._peek_token().id == _T.PLUGIN:
1057                self._parse_error("/plugin/ is not supported")
1058
1059        if not has_dts_v1:
1060            self._parse_error("expected '/dts-v1/;' at start of file")
1061
1062    def _parse_memreserves(self):
1063        # Parses /memreserve/, which appears after /dts-v1/
1064
1065        while True:
1066            # Labels before /memreserve/
1067            labels = []
1068            while self._peek_token().id == _T.LABEL:
1069                _append_no_dup(labels, self._next_token().val)
1070
1071            if self._peek_token().id == _T.MEMRESERVE:
1072                self._next_token()
1073                self.memreserves.append(
1074                    (labels, self._eval_prim(), self._eval_prim()))
1075                self._expect_token(";")
1076            elif labels:
1077                self._parse_error("expected /memreserve/ after labels at "
1078                                  "beginning of file")
1079            else:
1080                return
1081
1082    def _parse_dt(self):
1083        # Top-level parsing loop
1084
1085        while True:
1086            tok = self._next_token()
1087
1088            if tok.val == "/":
1089                # '/ { ... };', the root node
1090                if not self._root:
1091                    self._root = Node(name="/", parent=None, dt=self)
1092                self._parse_node(self.root)
1093
1094            elif tok.id in (_T.LABEL, _T.REF):
1095                # '&foo { ... };' or 'label: &foo { ... };'. The C tools only
1096                # support a single label here too.
1097
1098                if tok.id == _T.LABEL:
1099                    label = tok.val
1100                    tok = self._next_token()
1101                    if tok.id != _T.REF:
1102                        self._parse_error("expected label reference (&foo)")
1103                else:
1104                    label = None
1105
1106                try:
1107                    node = self._ref2node(tok.val)
1108                except DTError as e:
1109                    self._parse_error(e)
1110                self._parse_node(node)
1111
1112                if label:
1113                    _append_no_dup(node.labels, label)
1114
1115            elif tok.id == _T.DEL_NODE:
1116                self._next_ref2node()._del()
1117                self._expect_token(";")
1118
1119            elif tok.id == _T.OMIT_IF_NO_REF:
1120                self._next_ref2node()._omit_if_no_ref = True
1121                self._expect_token(";")
1122
1123            elif tok.id == _T.EOF:
1124                if not self._root:
1125                    self._parse_error("no root node defined")
1126                return
1127
1128            else:
1129                self._parse_error("expected '/' or label reference (&foo)")
1130
1131    def _parse_node(self, node):
1132        # Parses the '{ ... };' part of 'node-name { ... };'.
1133
1134        # We need to track which child nodes were defined in this set
1135        # of curly braces in order to reject duplicate node names.
1136        current_child_names = set()
1137
1138        self._expect_token("{")
1139        while True:
1140            labels, omit_if_no_ref = self._parse_propnode_labels()
1141            tok = self._next_token()
1142
1143            if tok.id == _T.PROPNODENAME:
1144                if self._peek_token().val == "{":
1145                    # '<tok> { ...', expect node
1146
1147                    # Fetch the existing node if it already exists. This
1148                    # happens when overriding nodes.
1149                    child = node.nodes.get(tok.val)
1150                    if child:
1151                        if child.name in current_child_names:
1152                            self._parse_error(f'{child.path}: duplicate node name')
1153                    else:
1154                        child = Node(name=tok.val, parent=node, dt=self)
1155                        current_child_names.add(tok.val)
1156
1157                    for label in labels:
1158                        _append_no_dup(child.labels, label)
1159
1160                    if omit_if_no_ref:
1161                        child._omit_if_no_ref = True
1162
1163                    node.nodes[child.name] = child
1164                    self._parse_node(child)
1165
1166                else:
1167                    # Not '<tok> { ...', expect property assignment
1168
1169                    if omit_if_no_ref:
1170                        self._parse_error(
1171                            "/omit-if-no-ref/ can only be used on nodes")
1172
1173                    prop = node._get_prop(tok.val)
1174
1175                    if self._check_token("="):
1176                        self._parse_assignment(prop)
1177                    elif not self._check_token(";"):
1178                        # ';' is for an empty property, like 'foo;'
1179                        self._parse_error("expected '{', '=', or ';'")
1180
1181                    for label in labels:
1182                        _append_no_dup(prop.labels, label)
1183
1184            elif tok.id == _T.DEL_NODE:
1185                tok2 = self._next_token()
1186                if tok2.id != _T.PROPNODENAME:
1187                    self._parse_error("expected node name")
1188                if tok2.val in node.nodes:
1189                    node.nodes[tok2.val]._del()
1190                self._expect_token(";")
1191
1192            elif tok.id == _T.DEL_PROP:
1193                tok2 = self._next_token()
1194                if tok2.id != _T.PROPNODENAME:
1195                    self._parse_error("expected property name")
1196                node.props.pop(tok2.val, None)
1197                self._expect_token(";")
1198
1199            elif tok.val == "}":
1200                self._expect_token(";")
1201                return
1202
1203            else:
1204                self._parse_error("expected node name, property name, or '}'")
1205
1206    def _parse_propnode_labels(self):
1207        # _parse_node() helpers for parsing labels and /omit-if-no-ref/s before
1208        # nodes and properties. Returns a (<label list>, <omit-if-no-ref bool>)
1209        # tuple.
1210
1211        labels = []
1212        omit_if_no_ref = False
1213        while True:
1214            tok = self._peek_token()
1215            if tok.id == _T.LABEL:
1216                _append_no_dup(labels, tok.val)
1217            elif tok.id == _T.OMIT_IF_NO_REF:
1218                omit_if_no_ref = True
1219            elif (labels or omit_if_no_ref) and tok.id != _T.PROPNODENAME:
1220                # Got something like 'foo: bar: }'
1221                self._parse_error("expected node or property name")
1222            else:
1223                return labels, omit_if_no_ref
1224
1225            self._next_token()
1226
1227    def _parse_assignment(self, prop):
1228        # Parses the right-hand side of property assignment
1229        #
1230        # prop:
1231        #   'Property' instance being assigned
1232
1233        # Remove any old value, path/phandle references, and in-value labels,
1234        # in case the property value is being overridden
1235        prop.value = b""
1236        prop._markers = []
1237
1238        while True:
1239            # Parse labels before the value (e.g., '..., label: < 0 >')
1240            self._parse_value_labels(prop)
1241
1242            tok = self._next_token()
1243
1244            if tok.val == "<":
1245                self._parse_cells(prop, 4)
1246
1247            elif tok.id == _T.BITS:
1248                n_bits = self._expect_num()
1249                if n_bits not in {8, 16, 32, 64}:
1250                    self._parse_error("expected 8, 16, 32, or 64")
1251                self._expect_token("<")
1252                self._parse_cells(prop, n_bits//8)
1253
1254            elif tok.val == "[":
1255                self._parse_bytes(prop)
1256
1257            elif tok.id == _T.STRING:
1258                prop._add_marker(_MarkerType.STRING)
1259                prop.value += self._unescape(tok.val.encode("utf-8")) + b"\0"
1260
1261            elif tok.id == _T.REF:
1262                prop._add_marker(_MarkerType.PATH, tok.val)
1263
1264            elif tok.id == _T.INCBIN:
1265                self._parse_incbin(prop)
1266
1267            else:
1268                self._parse_error("malformed value")
1269
1270            # Parse labels after the value (e.g., '< 0 > label:, ...')
1271            self._parse_value_labels(prop)
1272
1273            tok = self._next_token()
1274            if tok.val == ";":
1275                return
1276            if tok.val == ",":
1277                continue
1278            self._parse_error("expected ';' or ','")
1279
1280    def _parse_cells(self, prop, n_bytes):
1281        # Parses '<...>'
1282
1283        prop._add_marker(_N_BYTES_TO_TYPE[n_bytes])
1284
1285        while True:
1286            tok = self._peek_token()
1287            if tok.id == _T.REF:
1288                self._next_token()
1289                if n_bytes != 4:
1290                    self._parse_error("phandle references are only allowed in "
1291                                      "arrays with 32-bit elements")
1292                prop._add_marker(_MarkerType.PHANDLE, tok.val)
1293
1294            elif tok.id == _T.LABEL:
1295                prop._add_marker(_MarkerType.LABEL, tok.val)
1296                self._next_token()
1297
1298            elif self._check_token(">"):
1299                return
1300
1301            else:
1302                # Literal value
1303                num = self._eval_prim()
1304                try:
1305                    prop.value += num.to_bytes(n_bytes, "big")
1306                except OverflowError:
1307                    try:
1308                        # Try again as a signed number, in case it's negative
1309                        prop.value += num.to_bytes(n_bytes, "big", signed=True)
1310                    except OverflowError:
1311                        self._parse_error(
1312                            f"{num} does not fit in {8*n_bytes} bits")
1313
1314    def _parse_bytes(self, prop):
1315        # Parses '[ ... ]'
1316
1317        prop._add_marker(_MarkerType.UINT8)
1318
1319        while True:
1320            tok = self._next_token()
1321            if tok.id == _T.BYTE:
1322                prop.value += tok.val.to_bytes(1, "big")
1323
1324            elif tok.id == _T.LABEL:
1325                prop._add_marker(_MarkerType.LABEL, tok.val)
1326
1327            elif tok.val == "]":
1328                return
1329
1330            else:
1331                self._parse_error("expected two-digit byte or ']'")
1332
1333    def _parse_incbin(self, prop):
1334        # Parses
1335        #
1336        #   /incbin/ ("filename")
1337        #
1338        # and
1339        #
1340        #   /incbin/ ("filename", <offset>, <size>)
1341
1342        prop._add_marker(_MarkerType.UINT8)
1343
1344        self._expect_token("(")
1345
1346        tok = self._next_token()
1347        if tok.id != _T.STRING:
1348            self._parse_error("expected quoted filename")
1349        filename = tok.val
1350
1351        tok = self._next_token()
1352        if tok.val == ",":
1353            offset = self._eval_prim()
1354            self._expect_token(",")
1355            size = self._eval_prim()
1356            self._expect_token(")")
1357        else:
1358            if tok.val != ")":
1359                self._parse_error("expected ',' or ')'")
1360            offset = None
1361
1362        try:
1363            with self._open(filename, "rb") as f:
1364                if offset is None:
1365                    prop.value += f.read()
1366                else:
1367                    f.seek(offset)
1368                    prop.value += f.read(size)
1369        except OSError as e:
1370            self._parse_error(f"could not read '{filename}': {e}")
1371
1372    def _parse_value_labels(self, prop):
1373        # _parse_assignment() helper for parsing labels before/after each
1374        # comma-separated value
1375
1376        while True:
1377            tok = self._peek_token()
1378            if tok.id != _T.LABEL:
1379                return
1380            prop._add_marker(_MarkerType.LABEL, tok.val)
1381            self._next_token()
1382
1383    def _node_phandle(self, node):
1384        # Returns the phandle for Node 'node', creating a new phandle if the
1385        # node has no phandle, and fixing up the value for existing
1386        # self-referential phandles (which get set to b'\0\0\0\0' initially).
1387        # Self-referential phandles must be rewritten instead of recreated, so
1388        # that labels are preserved.
1389
1390        if "phandle" in node.props:
1391            phandle_prop = node.props["phandle"]
1392        else:
1393            phandle_prop = Property(node, "phandle")
1394            phandle_prop._add_marker(_MarkerType.UINT32)  # For displaying
1395            phandle_prop.value = b'\0\0\0\0'
1396
1397        if phandle_prop.value == b'\0\0\0\0':
1398            phandle_i = 1
1399            while phandle_i in self.phandle2node:
1400                phandle_i += 1
1401            self.phandle2node[phandle_i] = node
1402
1403            phandle_prop.value = phandle_i.to_bytes(4, "big")
1404            node.props["phandle"] = phandle_prop
1405
1406        return phandle_prop.value
1407
1408    # Expression evaluation
1409
1410    def _eval_prim(self):
1411        tok = self._peek_token()
1412        if tok.id in (_T.NUM, _T.CHAR_LITERAL):
1413            return self._next_token().val
1414
1415        tok = self._next_token()
1416        if tok.val != "(":
1417            self._parse_error("expected number or parenthesized expression")
1418        val = self._eval_ternary()
1419        self._expect_token(")")
1420        return val
1421
1422    def _eval_ternary(self):
1423        val = self._eval_or()
1424        if self._check_token("?"):
1425            if_val = self._eval_ternary()
1426            self._expect_token(":")
1427            else_val = self._eval_ternary()
1428            return if_val if val else else_val
1429        return val
1430
1431    def _eval_or(self):
1432        val = self._eval_and()
1433        while self._check_token("||"):
1434            val = 1 if self._eval_and() or val else 0
1435        return val
1436
1437    def _eval_and(self):
1438        val = self._eval_bitor()
1439        while self._check_token("&&"):
1440            val = 1 if self._eval_bitor() and val else 0
1441        return val
1442
1443    def _eval_bitor(self):
1444        val = self._eval_bitxor()
1445        while self._check_token("|"):
1446            val |= self._eval_bitxor()
1447        return val
1448
1449    def _eval_bitxor(self):
1450        val = self._eval_bitand()
1451        while self._check_token("^"):
1452            val ^= self._eval_bitand()
1453        return val
1454
1455    def _eval_bitand(self):
1456        val = self._eval_eq()
1457        while self._check_token("&"):
1458            val &= self._eval_eq()
1459        return val
1460
1461    def _eval_eq(self):
1462        val = self._eval_rela()
1463        while True:
1464            if self._check_token("=="):
1465                val = 1 if val == self._eval_rela() else 0
1466            elif self._check_token("!="):
1467                val = 1 if val != self._eval_rela() else 0
1468            else:
1469                return val
1470
1471    def _eval_rela(self):
1472        val = self._eval_shift()
1473        while True:
1474            if self._check_token("<"):
1475                val = 1 if val < self._eval_shift() else 0
1476            elif self._check_token(">"):
1477                val = 1 if val > self._eval_shift() else 0
1478            elif self._check_token("<="):
1479                val = 1 if val <= self._eval_shift() else 0
1480            elif self._check_token(">="):
1481                val = 1 if val >= self._eval_shift() else 0
1482            else:
1483                return val
1484
1485    def _eval_shift(self):
1486        val = self._eval_add()
1487        while True:
1488            if self._check_token("<<"):
1489                val <<= self._eval_add()
1490            elif self._check_token(">>"):
1491                val >>= self._eval_add()
1492            else:
1493                return val
1494
1495    def _eval_add(self):
1496        val = self._eval_mul()
1497        while True:
1498            if self._check_token("+"):
1499                val += self._eval_mul()
1500            elif self._check_token("-"):
1501                val -= self._eval_mul()
1502            else:
1503                return val
1504
1505    def _eval_mul(self):
1506        val = self._eval_unary()
1507        while True:
1508            if self._check_token("*"):
1509                val *= self._eval_unary()
1510            elif self._check_token("/"):
1511                denom = self._eval_unary()
1512                if not denom:
1513                    self._parse_error("division by zero")
1514                val //= denom
1515            elif self._check_token("%"):
1516                denom = self._eval_unary()
1517                if not denom:
1518                    self._parse_error("division by zero")
1519                val %= denom
1520            else:
1521                return val
1522
1523    def _eval_unary(self):
1524        if self._check_token("-"):
1525            return -self._eval_unary()
1526        if self._check_token("~"):
1527            return ~self._eval_unary()
1528        if self._check_token("!"):
1529            return 0 if self._eval_unary() else 1
1530        return self._eval_prim()
1531
1532    #
1533    # Lexing
1534    #
1535
1536    def _check_token(self, val):
1537        if self._peek_token().val == val:
1538            self._next_token()
1539            return True
1540        return False
1541
1542    def _peek_token(self):
1543        if not self._saved_token:
1544            self._saved_token = self._next_token()
1545        return self._saved_token
1546
1547    def _next_token(self):
1548        if self._saved_token:
1549            tmp = self._saved_token
1550            self._saved_token = None
1551            return tmp
1552
1553        while True:
1554            tok_id = None
1555
1556            match = _token_re.match(self._file_contents, self._tok_end_i)
1557            if match:
1558                tok_id = match.lastindex
1559                if tok_id == _T.CHAR_LITERAL:
1560                    val = self._unescape(match.group(tok_id).encode("utf-8"))
1561                    if len(val) != 1:
1562                        self._parse_error("character literals must be length 1")
1563                    tok_val = ord(val)
1564                else:
1565                    tok_val = match.group(tok_id)
1566
1567            elif self._lexer_state is _DEFAULT:
1568                match = _num_re.match(self._file_contents, self._tok_end_i)
1569                if match:
1570                    tok_id = _T.NUM
1571                    num_s = match.group(1)
1572                    tok_val = int(num_s,
1573                                  16 if num_s.startswith(("0x", "0X")) else
1574                                  8 if num_s[0] == "0" else
1575                                  10)
1576
1577            elif self._lexer_state is _EXPECT_PROPNODENAME:
1578                match = _propnodename_re.match(self._file_contents,
1579                                               self._tok_end_i)
1580                if match:
1581                    tok_id = _T.PROPNODENAME
1582                    tok_val = match.group(1)
1583                    self._lexer_state = _DEFAULT
1584
1585            else:  # self._lexer_state is _EXPECT_BYTE
1586                match = _byte_re.match(self._file_contents, self._tok_end_i)
1587                if match:
1588                    tok_id = _T.BYTE
1589                    tok_val = int(match.group(), 16)
1590
1591            if not tok_id:
1592                match = _misc_re.match(self._file_contents, self._tok_end_i)
1593                if match:
1594                    tok_id = _T.MISC
1595                    tok_val = match.group()
1596                else:
1597                    self._tok_i = self._tok_end_i
1598                    # Could get here due to a node/property naming appearing in
1599                    # an unexpected context as well as for bad characters in
1600                    # files. Generate a token for it so that the error can
1601                    # trickle up to some context where we can give a more
1602                    # helpful error message.
1603                    return _Token(_T.BAD, "<unknown token>")
1604
1605            self._tok_i = match.start()
1606            self._tok_end_i = match.end()
1607
1608            if tok_id == _T.SKIP:
1609                self._lineno += tok_val.count("\n")
1610                continue
1611
1612            # /include/ is handled in the lexer in the C tools as well, and can
1613            # appear anywhere
1614            if tok_id == _T.INCLUDE:
1615                # Can have newlines between /include/ and the filename
1616                self._lineno += tok_val.count("\n")
1617                # Do this manual extraction instead of doing it in the regex so
1618                # that we can properly count newlines
1619                filename = tok_val[tok_val.find('"') + 1:-1]
1620                self._enter_file(filename)
1621                continue
1622
1623            if tok_id == _T.LINE:
1624                # #line directive
1625                self._lineno = int(tok_val.split()[0]) - 1
1626                self.filename = tok_val[tok_val.find('"') + 1:-1]
1627                continue
1628
1629            if tok_id == _T.EOF:
1630                if self._filestack:
1631                    self._leave_file()
1632                    continue
1633                return _Token(_T.EOF, "<EOF>")
1634
1635            # State handling
1636
1637            if tok_id in (_T.DEL_PROP, _T.DEL_NODE, _T.OMIT_IF_NO_REF) or \
1638               tok_val in ("{", ";"):
1639
1640                self._lexer_state = _EXPECT_PROPNODENAME
1641
1642            elif tok_val == "[":
1643                self._lexer_state = _EXPECT_BYTE
1644
1645            elif tok_id in (_T.MEMRESERVE, _T.BITS) or tok_val == "]":
1646                self._lexer_state = _DEFAULT
1647
1648            return _Token(tok_id, tok_val)
1649
1650    def _expect_token(self, tok_val):
1651        # Raises an error if the next token does not have the string value
1652        # 'tok_val'. Returns the token.
1653
1654        tok = self._next_token()
1655        if tok.val != tok_val:
1656            self._parse_error(f"expected '{tok_val}', not '{tok.val}'")
1657
1658        return tok
1659
1660    def _expect_num(self):
1661        # Raises an error if the next token is not a number. Returns the token.
1662
1663        tok = self._next_token()
1664        if tok.id != _T.NUM:
1665            self._parse_error("expected number")
1666        return tok.val
1667
1668    def _parse_error(self, s):
1669        # This works out for the first line of the file too, where rfind()
1670        # returns -1
1671        column = self._tok_i - self._file_contents.rfind("\n", 0,
1672                                                         self._tok_i + 1)
1673        _err(f"{self.filename}:{self._lineno} (column {column}): "
1674             f"parse error: {s}")
1675
1676    def _enter_file(self, filename):
1677        # Enters the /include/d file 'filename', remembering the position in
1678        # the /include/ing file for later
1679
1680        self._filestack.append(
1681            _FileStackElt(self.filename, self._lineno,
1682                          self._file_contents, self._tok_end_i))
1683
1684        # Handle escapes in filenames, just for completeness
1685        filename = self._unescape(filename.encode("utf-8"))
1686        try:
1687            filename = filename.decode("utf-8")
1688        except UnicodeDecodeError:
1689            self._parse_error("filename is not valid UTF-8")
1690
1691        with self._open(filename, encoding="utf-8") as f:
1692            try:
1693                self._file_contents = f.read()
1694            except OSError as e:
1695                self._parse_error(e)
1696
1697        # Check for recursive /include/
1698        for i, parent in enumerate(self._filestack):
1699            if filename == parent[0]:
1700                self._parse_error("recursive /include/:\n" + " ->\n".join(
1701                    [f"{parent[0]}:{parent[1]}"
1702                     for parent in self._filestack[i:]] +
1703                    [filename]))
1704
1705        self.filename = f.name
1706        self._lineno = 1
1707        self._tok_end_i = 0
1708
1709    def _leave_file(self):
1710        # Leaves an /include/d file, returning to the file that /include/d it
1711
1712        self.filename, self._lineno, self._file_contents, self._tok_end_i = \
1713            self._filestack.pop()
1714
1715    def _next_ref2node(self):
1716        # Checks that the next token is a label/path reference and returns the
1717        # Node it points to. Only used during parsing, so uses _parse_error()
1718        # on errors to save some code in callers.
1719
1720        label = self._next_token()
1721        if label.id != _T.REF:
1722            self._parse_error(
1723                "expected label (&foo) or path (&{/foo/bar}) reference")
1724        try:
1725            return self._ref2node(label.val)
1726        except DTError as e:
1727            self._parse_error(e)
1728
1729    def _ref2node(self, s):
1730        # Returns the Node the label/path reference 's' points to
1731
1732        if s[0] == "{":
1733            # Path reference (&{/foo/bar})
1734            path = s[1:-1]
1735            if not path.startswith("/"):
1736                _err(f"node path '{path}' does not start with '/'")
1737            # Will raise DTError if the path doesn't exist
1738            return _root_and_path_to_node(self.root, path, path)
1739
1740        # Label reference (&foo).
1741
1742        # label2node hasn't been filled in yet, and using it would get messy
1743        # when nodes are deleted
1744        for node in self.node_iter():
1745            if s in node.labels:
1746                return node
1747
1748        _err(f"undefined node label '{s}'")
1749
1750    #
1751    # Post-processing
1752    #
1753
1754    def _register_phandles(self):
1755        # Registers any manually-inserted phandle properties in
1756        # self.phandle2node, so that we can avoid allocating any phandles from
1757        # that set. Also checks the format of the phandles and does misc.
1758        # sanity checking.
1759
1760        for node in self.node_iter():
1761            phandle = node.props.get("phandle")
1762            if phandle:
1763                if len(phandle.value) != 4:
1764                    _err(f"{node.path}: bad phandle length "
1765                         f"({len(phandle.value)}), expected 4 bytes")
1766
1767                is_self_referential = False
1768                for marker in phandle._markers:
1769                    _, marker_type, ref = marker
1770                    if marker_type is _MarkerType.PHANDLE:
1771                        # The phandle's value is itself a phandle reference
1772                        if self._ref2node(ref) is node:
1773                            # Alright to set a node's phandle equal to its own
1774                            # phandle. It'll force a new phandle to be
1775                            # allocated even if the node is otherwise
1776                            # unreferenced.
1777                            is_self_referential = True
1778                            break
1779
1780                        _err(f"{node.path}: {phandle.name} "
1781                             "refers to another node")
1782
1783                # Could put on else on the 'for' above too, but keep it
1784                # somewhat readable
1785                if not is_self_referential:
1786                    phandle_val = int.from_bytes(phandle.value, "big")
1787
1788                    if phandle_val in {0, 0xFFFFFFFF}:
1789                        _err(f"{node.path}: bad value {phandle_val:#010x} "
1790                             f"for {phandle.name}")
1791
1792                    if phandle_val in self.phandle2node:
1793                        _err(f"{node.path}: duplicated phandle {phandle_val:#x} "
1794                             "(seen before at "
1795                             f"{self.phandle2node[phandle_val].path})")
1796
1797                    self.phandle2node[phandle_val] = node
1798
1799    def _fixup_props(self):
1800        # Fills in node path and phandle references in property values, and
1801        # registers labels within values. This must be done after parsing,
1802        # since forwards references are allowed and nodes and properties might
1803        # be deleted.
1804
1805        for node in self.node_iter():
1806            # The tuple() avoids a 'dictionary changed size during iteration'
1807            # error
1808            for prop in tuple(node.props.values()):
1809                # 'prev_pos' and 'pos' are indices in the unpatched
1810                # property value. The result is built up in 'res'.
1811                prev_pos = 0
1812                res = b""
1813
1814                for marker in prop._markers:
1815                    pos, marker_type, ref = marker
1816
1817                    # Add data before the marker, reading from the unpatched
1818                    # property value
1819                    res += prop.value[prev_pos:pos]
1820
1821                    # Fix the marker offset so that it's correct for the
1822                    # patched property value, for later (not used in this
1823                    # function). The offset might change due to path
1824                    # references, which expand to something like "/foo/bar".
1825                    marker[0] = len(res)
1826
1827                    if marker_type is _MarkerType.LABEL:
1828                        # This is a temporary format so that we can catch
1829                        # duplicate references. prop._label_offset_lst is changed
1830                        # to a dictionary that maps labels to offsets in
1831                        # _register_labels().
1832                        _append_no_dup(prop._label_offset_lst, (ref, len(res)))
1833                    elif marker_type in (_MarkerType.PATH, _MarkerType.PHANDLE):
1834                        # Path or phandle reference
1835                        try:
1836                            ref_node = self._ref2node(ref)
1837                        except DTError as e:
1838                            _err(f"{prop.node.path}: {e}")
1839
1840                        # For /omit-if-no-ref/
1841                        ref_node._is_referenced = True
1842
1843                        if marker_type is _MarkerType.PATH:
1844                            res += ref_node.path.encode("utf-8") + b'\0'
1845                        else:  # marker_type is PHANDLE
1846                            res += self._node_phandle(ref_node)
1847                            # Skip over the dummy phandle placeholder
1848                            pos += 4
1849
1850                    prev_pos = pos
1851
1852                # Store the final fixed-up value. Add the data after the last
1853                # marker.
1854                prop.value = res + prop.value[prev_pos:]
1855
1856    def _register_aliases(self):
1857        # Registers aliases from the /aliases node in self.alias2node. Also
1858        # checks the format of the alias properties.
1859
1860        # We copy this to self.alias2node at the end to avoid get_node()
1861        # looking up paths via other aliases while verifying aliases
1862        alias2node = {}
1863
1864        alias_re = re.compile("[0-9a-z-]+$")
1865
1866        aliases = self.root.nodes.get("aliases")
1867        if aliases:
1868            for prop in aliases.props.values():
1869                if not alias_re.match(prop.name):
1870                    _err(f"/aliases: alias property name '{prop.name}' "
1871                         "should include only characters from [0-9a-z-]")
1872
1873                # Property.to_path() checks that the node exists, has
1874                # the right type, etc. Swallow errors for invalid
1875                # aliases with self._force.
1876                try:
1877                    alias2node[prop.name] = prop.to_path()
1878                except DTError:
1879                    if self._force:
1880                        continue
1881                    raise
1882
1883        self.alias2node = alias2node
1884
1885    def _remove_unreferenced(self):
1886        # Removes any unreferenced nodes marked with /omit-if-no-ref/ from the
1887        # tree
1888
1889        # tuple() is to avoid 'RuntimeError: dictionary changed size during
1890        # iteration' errors
1891        for node in tuple(self.node_iter()):
1892            if node._omit_if_no_ref and not node._is_referenced:
1893                node._del()
1894
1895    def _register_labels(self):
1896        # Checks for duplicate labels and registers labels in label2node,
1897        # label2prop, and label2prop_offset
1898
1899        label2things = collections.defaultdict(set)
1900
1901        # Register all labels and the nodes/props they point to in label2things
1902        for node in self.node_iter():
1903            for label in node.labels:
1904                label2things[label].add(node)
1905                self.label2node[label] = node
1906
1907            for prop in node.props.values():
1908                for label in prop.labels:
1909                    label2things[label].add(prop)
1910                    self.label2prop[label] = prop
1911
1912                for label, offset in prop._label_offset_lst:
1913                    label2things[label].add((prop, offset))
1914                    self.label2prop_offset[label] = (prop, offset)
1915
1916                # See _fixup_props()
1917                prop.offset_labels = dict(prop._label_offset_lst)
1918
1919        for label, things in label2things.items():
1920            if len(things) > 1:
1921                strings = []
1922                for thing in things:
1923                    if isinstance(thing, Node):
1924                        strings.append(f"on {thing.path}")
1925                    elif isinstance(thing, Property):
1926                        strings.append(f"on property '{thing.name}' "
1927                                       f"of node {thing.node.path}")
1928                    else:
1929                        # Label within property value
1930                        strings.append("in the value of property "
1931                                       f"'{thing[0].name}' of node "
1932                                       f"{thing[0].node.path}")
1933
1934                # Give consistent error messages to help with testing
1935                strings.sort()
1936
1937                _err(f"Label '{label}' appears " + " and ".join(strings))
1938
1939
1940    #
1941    # Misc.
1942    #
1943
1944    def _unescape(self, b):
1945        # Replaces backslash escapes in the 'bytes' array 'b'. We can't do this at
1946        # the string level, because the result might not be valid UTF-8 when
1947        # octal/hex escapes are involved.
1948
1949        def sub(match):
1950            esc = match.group(1)
1951            if esc == b"a": return b"\a"
1952            if esc == b"b": return b"\b"
1953            if esc == b"t": return b"\t"
1954            if esc == b"n": return b"\n"
1955            if esc == b"v": return b"\v"
1956            if esc == b"f": return b"\f"
1957            if esc == b"r": return b"\r"
1958
1959            if esc[0] in b"01234567":
1960                # Octal escape
1961                try:
1962                    return int(esc, 8).to_bytes(1, "big")
1963                except OverflowError:
1964                    self._parse_error("octal escape out of range (> 255)")
1965
1966            if esc[0] == ord("x") and len(esc) > 1:
1967                # Hex escape
1968                return int(esc[1:], 16).to_bytes(1, "big")
1969
1970            # Return <char> as-is for other \<char>
1971            return esc[0].to_bytes(1, "big")
1972
1973        return _unescape_re.sub(sub, b)
1974
1975    def _open(self, filename, mode="r", **kwargs):
1976        # Wrapper around standard Python open(), accepting the same params.
1977        # But searches for a 'filename' file in the directory of the current
1978        # file and the include path.
1979
1980        # The C tools support specifying stdin with '-' too
1981        if filename == "-":
1982            return sys.stdin.buffer if "b" in mode else sys.stdin
1983
1984        # Try the directory of the current file first
1985        dirname = os.path.dirname(self.filename)
1986        try:
1987            return open(os.path.join(dirname, filename), mode, **kwargs)
1988        except OSError as e:
1989            if e.errno != errno.ENOENT:
1990                self._parse_error(e)
1991
1992            # Try each directory from the include path
1993            for path in self._include_path:
1994                try:
1995                    return open(os.path.join(path, filename), mode, **kwargs)
1996                except OSError as e:
1997                    if e.errno != errno.ENOENT:
1998                        self._parse_error(e)
1999                    continue
2000
2001            self._parse_error(f"'{filename}' could not be found")
2002
2003#
2004# Public functions
2005#
2006
2007def to_num(data: bytes, length: Optional[int] = None,
2008           signed: bool = False) -> int:
2009    """
2010    Converts the 'bytes' array 'data' to a number. The value is expected to be
2011    in big-endian format, which is standard in devicetree.
2012
2013    length (default: None):
2014      The expected length of the value in bytes, as a simple type check. If
2015      None, the length check is skipped.
2016
2017    signed (default: False):
2018      If True, the value will be interpreted as signed rather than unsigned.
2019    """
2020    _check_is_bytes(data)
2021    if length is not None:
2022        _check_length_positive(length)
2023        if len(data) != length:
2024            _err(f"{data!r} is {len(data)} bytes long, expected {length}")
2025
2026    return int.from_bytes(data, "big", signed=signed)
2027
2028def to_nums(data: bytes, length: int = 4, signed: bool = False) -> List[int]:
2029    """
2030    Like Property.to_nums(), but takes an arbitrary 'bytes' array. The values
2031    are assumed to be in big-endian format, which is standard in devicetree.
2032    """
2033    _check_is_bytes(data)
2034    _check_length_positive(length)
2035
2036    if len(data) % length:
2037        _err(f"{data!r} is {len(data)} bytes long, "
2038             f"expected a length that's a multiple of {length}")
2039
2040    return [int.from_bytes(data[i:i + length], "big", signed=signed)
2041            for i in range(0, len(data), length)]
2042
2043#
2044# Private helpers
2045#
2046
2047def _check_is_bytes(data):
2048    if not isinstance(data, bytes):
2049        _err(f"'{data}' has type '{type(data).__name__}', expected 'bytes'")
2050
2051def _check_length_positive(length):
2052    if length < 1:
2053        _err("'length' must be greater than zero, was " + str(length))
2054
2055def _append_no_dup(lst, elm):
2056    # Appends 'elm' to 'lst', but only if it isn't already in 'lst'. Lets us
2057    # preserve order, which a set() doesn't.
2058
2059    if elm not in lst:
2060        lst.append(elm)
2061
2062def _decode_and_escape(b):
2063    # Decodes the 'bytes' array 'b' as UTF-8 and backslash-escapes special
2064    # characters
2065
2066    # Hacky but robust way to avoid double-escaping any '\' spit out by
2067    # 'backslashreplace' bytes.translate() can't map to more than a single
2068    # byte, but str.translate() can map to more than one character, so it's
2069    # nice here. There's probably a nicer way to do this.
2070    return b.decode("utf-8", "surrogateescape") \
2071            .translate(_escape_table) \
2072            .encode("utf-8", "surrogateescape") \
2073            .decode("utf-8", "backslashreplace")
2074
2075def _root_and_path_to_node(cur, path, fullpath):
2076    # Returns the node pointed at by 'path', relative to the Node 'cur'. For
2077    # example, if 'cur' has path /foo/bar, and 'path' is "baz/qaz", then the
2078    # node with path /foo/bar/baz/qaz is returned. 'fullpath' is the path as
2079    # given in the .dts file, for error messages.
2080
2081    for component in path.split("/"):
2082        # Collapse multiple / in a row, and allow a / at the end
2083        if not component:
2084            continue
2085
2086        if component not in cur.nodes:
2087            _err(f"component '{component}' in path '{fullpath}' "
2088                 "does not exist")
2089
2090        cur = cur.nodes[component]
2091
2092    return cur
2093
2094def _err(msg) -> NoReturn:
2095    raise DTError(msg)
2096
2097_escape_table = str.maketrans({
2098    "\\": "\\\\",
2099    '"': '\\"',
2100    "\a": "\\a",
2101    "\b": "\\b",
2102    "\t": "\\t",
2103    "\n": "\\n",
2104    "\v": "\\v",
2105    "\f": "\\f",
2106    "\r": "\\r"})
2107
2108# Lexer states
2109_DEFAULT = 0
2110_EXPECT_PROPNODENAME = 1
2111_EXPECT_BYTE = 2
2112
2113_num_re = re.compile(r"(0[xX][0-9a-fA-F]+|[0-9]+)(?:ULL|UL|LL|U|L)?")
2114
2115# A leading \ is allowed property and node names, probably to allow weird node
2116# names that would clash with other stuff
2117_propnodename_re = re.compile(r"\\?([a-zA-Z0-9,._+*#?@-]+)")
2118
2119# Node names are more restrictive than property names.
2120_nodename_chars = set(string.ascii_letters + string.digits + ',._+-@')
2121
2122# Misc. tokens that are tried after a property/node name. This is important, as
2123# there's overlap with the allowed characters in names.
2124_misc_re = re.compile(
2125    "|".join(re.escape(pat) for pat in (
2126        "==", "!=", "!", "=", ",", ";", "+", "-", "*", "/", "%", "~", "?", ":",
2127        "^", "(", ")", "{", "}", "[", "]", "<<", "<=", "<", ">>", ">=", ">",
2128        "||", "|", "&&", "&")))
2129
2130_byte_re = re.compile(r"[0-9a-fA-F]{2}")
2131
2132# Matches a backslash escape within a 'bytes' array. Captures the 'c' part of
2133# '\c', where c might be a single character or an octal/hex escape.
2134_unescape_re = re.compile(br'\\([0-7]{1,3}|x[0-9A-Fa-f]{1,2}|.)')
2135
2136def _init_tokens():
2137    # Builds a (<token 1>)|(<token 2>)|... regex and returns it. The
2138    # way this is constructed makes the token's value as an int appear
2139    # in match.lastindex after a match.
2140
2141    # Each pattern must have exactly one capturing group, which can capture any
2142    # part of the pattern. This makes match.lastindex match the token type.
2143    # _Token.val is based on the captured string.
2144    token_spec = {
2145        _T.INCLUDE: r'(/include/\s*"(?:[^\\"]|\\.)*")',
2146        # #line directive or GCC linemarker
2147        _T.LINE:
2148        r'^#(?:line)?[ \t]+([0-9]+[ \t]+"(?:[^\\"]|\\.)*")(?:[ \t]+[0-9]+){0,4}',
2149
2150        _T.STRING: r'"((?:[^\\"]|\\.)*)"',
2151        _T.DTS_V1: r"(/dts-v1/)",
2152        _T.PLUGIN: r"(/plugin/)",
2153        _T.MEMRESERVE: r"(/memreserve/)",
2154        _T.BITS: r"(/bits/)",
2155        _T.DEL_PROP: r"(/delete-property/)",
2156        _T.DEL_NODE: r"(/delete-node/)",
2157        _T.OMIT_IF_NO_REF: r"(/omit-if-no-ref/)",
2158        _T.LABEL: r"([a-zA-Z_][a-zA-Z0-9_]*):",
2159        _T.CHAR_LITERAL: r"'((?:[^\\']|\\.)*)'",
2160        _T.REF: r"&([a-zA-Z_][a-zA-Z0-9_]*|{[a-zA-Z0-9,._+*#?@/-]*})",
2161        _T.INCBIN: r"(/incbin/)",
2162        # Whitespace, C comments, and C++ comments
2163        _T.SKIP: r"(\s+|(?:/\*(?:.|\n)*?\*/)|//.*$)",
2164        # Return a token for end-of-file so that the parsing code can
2165        # always assume that there are more tokens when looking
2166        # ahead. This simplifies things.
2167        _T.EOF: r"(\Z)",
2168    }
2169
2170    # MULTILINE is needed for C++ comments and #line directives
2171    return re.compile("|".join(token_spec[tok_id] for tok_id in
2172                               range(1, _T.EOF + 1)),
2173                      re.MULTILINE | re.ASCII)
2174
2175_token_re = _init_tokens()
2176
2177_TYPE_TO_N_BYTES = {
2178    _MarkerType.UINT8: 1,
2179    _MarkerType.UINT16: 2,
2180    _MarkerType.UINT32: 4,
2181    _MarkerType.UINT64: 8,
2182}
2183
2184_N_BYTES_TO_TYPE = {
2185    1: _MarkerType.UINT8,
2186    2: _MarkerType.UINT16,
2187    4: _MarkerType.UINT32,
2188    8: _MarkerType.UINT64,
2189}
2190
2191_N_BYTES_TO_START_STR = {
2192    1: " [",
2193    2: " /bits/ 16 <",
2194    4: " <",
2195    8: " /bits/ 64 <",
2196}
2197
2198_N_BYTES_TO_END_STR = {
2199    1: " ]",
2200    2: " >",
2201    4: " >",
2202    8: " >",
2203}
2204