1# Copyright (c) 2019, Nordic Semiconductor
2# SPDX-License-Identifier: BSD-3-Clause
3
4# Tip: You can view just the documentation with 'pydoc3 devicetree.dtlib'
5
6"""
7A library for extracting information from .dts (devicetree) files. See the
8documentation for the DT and Node classes for more information.
9
10The top-level entry point of the library is the DT class. DT.__init__() takes a
11.dts file to parse and a list of directories to search for any /include/d
12files.
13"""
14
15import collections
16import enum
17import errno
18import os
19import re
20import string
21import sys
22import textwrap
23from collections.abc import Iterable
24from typing import TYPE_CHECKING, Any, NamedTuple, NoReturn, Optional, Union
25
26# NOTE: tests/test_dtlib.py is the test suite for this library.
27
28class DTError(Exception):
29    "Exception raised for devicetree-related errors"
30
31class Node:
32    r"""
33    Represents a node in the devicetree ('node-name { ... };').
34
35    These attributes are available on Node instances:
36
37    name:
38      The name of the node (a string).
39
40    filename:
41      The name of the .dts file where the node is defined
42
43    lineno:
44      The line number in the .dts file where the node starts.
45
46    unit_addr:
47      The portion after the '@' in the node's name, or the empty string if the
48      name has no '@' in it.
49
50      Note that this is a string. Run int(node.unit_addr, 16) to get an
51      integer.
52
53    props:
54      A dict that maps the properties defined on the node to
55      their values. 'props' is indexed by property name (a string), and values
56      are Property objects.
57
58      To convert property values to Python numbers or strings, use
59      dtlib.to_num(), dtlib.to_nums(), or dtlib.to_string().
60
61      Property values are represented as 'bytes' arrays to support the full
62      generality of DTS, which allows assignments like
63
64        x = "foo", < 0x12345678 >, [ 9A ];
65
66      This gives x the value b"foo\0\x12\x34\x56\x78\x9A". Numbers in DTS are
67      stored in big-endian format.
68
69    nodes:
70      A dict containing the subnodes of the node, indexed by name.
71
72    labels:
73      A list with all labels pointing to the node, in the same order as the
74      labels appear, but with duplicates removed.
75
76      'label_1: label_2: node { ... };' gives 'labels' the value
77      ["label_1", "label_2"].
78
79    parent:
80      The parent Node of the node. 'None' for the root node.
81
82    path:
83      The path to the node as a string, e.g. "/foo/bar".
84
85    dt:
86      The DT instance this node belongs to.
87    """
88
89    #
90    # Public interface
91    #
92
93    def __init__(self, name: str, parent: Optional["Node"], dt: "DT", filename: str, lineno: int):
94        """
95        Node constructor. Not meant to be called directly by clients.
96        """
97        # Remember to update DT.__deepcopy__() if you change this.
98
99        self._name = name
100        self._filename = filename
101        self._lineno = lineno
102        self.props: dict[str, Property] = {}
103        self.nodes: dict[str, Node] = {}
104        self.labels: list[str] = []
105        self.parent = parent
106        self.dt = dt
107
108        self._omit_if_no_ref = False
109        self._is_referenced = False
110
111        if name.count("@") > 1:
112            dt._parse_error("multiple '@' in node name")
113        if name != "/":
114            for char in name:
115                if char not in _nodename_chars:
116                    dt._parse_error(f"{self.path}: bad character '{char}' "
117                                    "in node name")
118
119    @property
120    def name(self) -> str:
121        """
122        See the class documentation.
123        """
124        # Converted to a property to discourage renaming -- that has to be done
125        # via DT.move_node.
126        return self._name
127
128    @property
129    def lineno(self) -> int:
130        """
131        See the class documentation.
132        """
133        return self._lineno
134
135    @property
136    def filename(self) -> str:
137        """
138        See the class documentation.
139        """
140        return self._filename
141
142    @property
143    def unit_addr(self) -> str:
144        """
145        See the class documentation.
146        """
147        return self.name.partition("@")[2]
148
149    @property
150    def path(self) -> str:
151        """
152        See the class documentation.
153        """
154        node_names = []
155
156        # This dynamic computation is required to be able to move
157        # nodes in the DT class.
158        cur = self
159        while cur.parent:
160            node_names.append(cur.name)
161            cur = cur.parent
162
163        return "/" + "/".join(reversed(node_names))
164
165    def node_iter(self) -> Iterable['Node']:
166        """
167        Returns a generator for iterating over the node and its children,
168        recursively.
169
170        For example, this will iterate over all nodes in the tree (like
171        dt.node_iter()).
172
173          for node in dt.root.node_iter():
174              ...
175        """
176        yield self
177        for node in self.nodes.values():
178            yield from node.node_iter()
179
180    def _get_prop(self, name: str) -> 'Property':
181        # Returns the property named 'name' on the node, creating it if it
182        # doesn't already exist. Move the entry to the end of the props
183        # dictionary so the order is preserved in the output DTS file.
184
185        prop = self.props.pop(name, None)
186        if not prop:
187            prop = Property(self, name)
188        self.props[name] = prop
189        return prop
190
191    def _del(self) -> None:
192        # Removes the node from the tree. When called on the root node,
193        # this method will leave it empty but still part of the tree.
194
195        if self.parent is None:
196            self.nodes.clear()
197            self.props.clear()
198            return
199        self.parent.nodes.pop(self.name)  # type: ignore
200
201    def __str__(self):
202        """
203        Returns a DTS representation of the node. Called automatically if the
204        node is print()ed.
205        """
206        def safe_relpath(filename):
207            # Returns a relative path to the file, or the absolute path if
208            # a relative path cannot be established (on Windows with files
209            # in different drives, for example).
210            try:
211                return os.path.relpath(filename, start=self.dt._base_dir)
212            except ValueError:
213                return filename
214
215        rel_filename = safe_relpath(self.filename)
216        s = f"\n/* node '{self.path}' defined in {rel_filename}:{self.lineno} */\n"
217        s += "".join(label + ": " for label in self.labels)
218
219        s += f"{self.name} {{\n"
220
221        lines = []
222        comments = {}
223        for prop in self.props.values():
224            prop_str = textwrap.indent(str(prop), "\t")
225            lines.extend(prop_str.splitlines(True))
226
227            rel_filename = safe_relpath(prop.filename)
228            comment = f"/* in {rel_filename}:{prop.lineno} */"
229            comments[len(lines)-1] = comment
230
231        if lines:
232            max_len = max([ len(line.rstrip()) for line in lines ])
233            for i, line in enumerate(lines):
234                if i in comments:
235                    line += " " * (max_len - len(line) + 1) + comments[i] + "\n"
236                s += line
237
238        for child in self.nodes.values():
239            s += textwrap.indent(child.__str__(), "\t") + "\n"
240
241        s += "};"
242
243        return s
244
245    def __repr__(self):
246        """
247        Returns some information about the Node instance. Called automatically
248        if the Node instance is evaluated.
249        """
250        return f"<Node {self.path} in {self.filename}:{self.lineno}>"
251
252# See Property.type
253class Type(enum.IntEnum):
254    EMPTY = 0
255    BYTES = 1
256    NUM = 2
257    NUMS = 3
258    STRING = 4
259    STRINGS = 5
260    PATH = 6
261    PHANDLE = 7
262    PHANDLES = 8
263    PHANDLES_AND_NUMS = 9
264    COMPOUND = 10
265
266class _MarkerType(enum.IntEnum):
267    # Types of markers in property values
268
269    # References
270    PATH = 0     # &foo
271    PHANDLE = 1  # <&foo>
272    LABEL = 2    # foo: <1 2 3>
273
274    # Start of data blocks of specific type
275    UINT8 = 3   # [00 01 02] (and also used for /incbin/)
276    UINT16 = 4  # /bits/ 16 <1 2 3>
277    UINT32 = 5  # <1 2 3>
278    UINT64 = 6  # /bits/ 64 <1 2 3>
279    STRING = 7  # "foo"
280
281class Property:
282    """
283    Represents a property ('x = ...').
284
285    These attributes are available on Property instances:
286
287    name:
288      The name of the property (a string).
289
290    filename:
291      The name of the .dts file where the property is defined
292
293    lineno:
294      The line number in the .dts file where the property starts.
295
296    value:
297      The value of the property, as a 'bytes' string. Numbers are stored in
298      big-endian format, and strings are null-terminated. Putting multiple
299      comma-separated values in an assignment (e.g., 'x = < 1 >, "foo"') will
300      concatenate the values.
301
302      See the to_*() methods for converting the value to other types.
303
304    type:
305      The type of the property, inferred from the syntax used in the
306      assignment. This is one of the following constants (with example
307      assignments):
308
309        Assignment                  | Property.type
310        ----------------------------+------------------------
311        foo;                        | dtlib.Type.EMPTY
312        foo = [];                   | dtlib.Type.BYTES
313        foo = [01 02];              | dtlib.Type.BYTES
314        foo = /bits/ 8 <1>;         | dtlib.Type.BYTES
315        foo = <1>;                  | dtlib.Type.NUM
316        foo = <>;                   | dtlib.Type.NUMS
317        foo = <1 2 3>;              | dtlib.Type.NUMS
318        foo = <1 2>, <3>;           | dtlib.Type.NUMS
319        foo = "foo";                | dtlib.Type.STRING
320        foo = "foo", "bar";         | dtlib.Type.STRINGS
321        foo = <&l>;                 | dtlib.Type.PHANDLE
322        foo = <&l1 &l2 &l3>;        | dtlib.Type.PHANDLES
323        foo = <&l1 &l2>, <&l3>;     | dtlib.Type.PHANDLES
324        foo = <&l1 1 2 &l2 3 4>;    | dtlib.Type.PHANDLES_AND_NUMS
325        foo = <&l1 1 2>, <&l2 3 4>; | dtlib.Type.PHANDLES_AND_NUMS
326        foo = &l;                   | dtlib.Type.PATH
327        *Anything else*             | dtlib.Type.COMPOUND
328
329      *Anything else* includes properties mixing phandle (<&label>) and node
330      path (&label) references with other data.
331
332      Data labels in the property value do not influence the type.
333
334    labels:
335      A list with all labels pointing to the property, in the same order as the
336      labels appear, but with duplicates removed.
337
338      'label_1: label2: x = ...' gives 'labels' the value
339      ["label_1", "label_2"].
340
341    offset_labels:
342      A dictionary that maps any labels within the property's value to their
343      offset, in bytes. For example, 'x = < 0 label_1: 1 label_2: >' gives
344      'offset_labels' the value {"label_1": 4, "label_2": 8}.
345
346      Iteration order will match the order of the labels on Python versions
347      that preserve dict insertion order.
348
349    node:
350      The Node the property is on.
351    """
352
353    #
354    # Public interface
355    #
356
357    def __init__(self, node: Node, name: str):
358        # Remember to update DT.__deepcopy__() if you change this.
359
360        if "@" in name:
361            node.dt._parse_error("'@' is only allowed in node names")
362
363        self.name = name
364        self.filename = ""
365        self.lineno = -1
366        self.value = b""
367        self.labels: list[str] = []
368        # We have to wait to set this until later, when we've got
369        # the entire tree.
370        self.offset_labels: dict[str, int] = {}
371        self.node: Node = node
372
373        self._label_offset_lst: list[tuple[str, int]] = []
374
375        # A list of [offset, label, type] lists (sorted by offset),
376        # giving the locations of references within the value. 'type'
377        # is either _MarkerType.PATH, for a node path reference,
378        # _MarkerType.PHANDLE, for a phandle reference, or
379        # _MarkerType.LABEL, for a label on/within data. Node paths
380        # and phandles need to be patched in after parsing.
381        self._markers: list[list] = []
382
383    @property
384    def type(self) -> Type:
385        """
386        See the class documentation.
387        """
388        # Data labels (e.g. 'foo = label: <3>') are irrelevant, so filter them
389        # out
390        types = [marker[1] for marker in self._markers
391                 if marker[1] != _MarkerType.LABEL]
392
393        if not types:
394            return Type.EMPTY
395
396        if types == [_MarkerType.UINT8]:
397            return Type.BYTES
398
399        if types == [_MarkerType.UINT32]:
400            return Type.NUM if len(self.value) == 4 else Type.NUMS
401
402        # Treat 'foo = <1 2 3>, <4 5>, ...' as Type.NUMS too
403        if set(types) == {_MarkerType.UINT32}:
404            return Type.NUMS
405
406        if set(types) == {_MarkerType.STRING}:
407            return Type.STRING if len(types) == 1 else Type.STRINGS
408
409        if types == [_MarkerType.PATH]:
410            return Type.PATH
411
412        if (types == [_MarkerType.UINT32, _MarkerType.PHANDLE]
413            and len(self.value) == 4):
414            return Type.PHANDLE
415
416        if set(types) == {_MarkerType.UINT32, _MarkerType.PHANDLE}:
417            if len(self.value) == 4*types.count(_MarkerType.PHANDLE):
418                # Array with just phandles in it
419                return Type.PHANDLES
420            # Array with both phandles and numbers
421            return Type.PHANDLES_AND_NUMS
422
423        return Type.COMPOUND
424
425    def to_num(self, signed=False) -> int:
426        """
427        Returns the value of the property as a number.
428
429        Raises DTError if the property was not assigned with this syntax (has
430        Property.type Type.NUM):
431
432            foo = < 1 >;
433
434        signed (default: False):
435          If True, the value will be interpreted as signed rather than
436          unsigned.
437        """
438        if self.type is not Type.NUM:
439            _err(f"expected property '{self.name}' on {self.node.path} in "
440                 f"{self.filename}:{self.lineno} to be assigned with "
441                 f"'{self.name} = < (number) >;', not '{self}'")
442
443        return int.from_bytes(self.value, "big", signed=signed)
444
445    def to_nums(self, signed=False) -> list[int]:
446        """
447        Returns the value of the property as a list of numbers.
448
449        Raises DTError if the property was not assigned with this syntax (has
450        Property.type Type.NUM or Type.NUMS):
451
452            foo = < 1 2 ... >;
453
454        signed (default: False):
455          If True, the values will be interpreted as signed rather than
456          unsigned.
457        """
458        if self.type not in (Type.NUM, Type.NUMS):
459            _err(f"expected property '{self.name}' on {self.node.path} in "
460                 f"{self.filename}:{self.lineno} to be assigned with "
461                 f"'{self.name} = < (number) (number) ... >;', not '{self}'")
462
463        return [int.from_bytes(self.value[i:i + 4], "big", signed=signed)
464                for i in range(0, len(self.value), 4)]
465
466    def to_bytes(self) -> bytes:
467        """
468        Returns the value of the property as a raw 'bytes', like
469        Property.value, except with added type checking.
470
471        Raises DTError if the property was not assigned with this syntax (has
472        Property.type Type.BYTES):
473
474            foo = [ 01 ... ];
475        """
476        if self.type is not Type.BYTES:
477            _err(f"expected property '{self.name}' on {self.node.path} "
478                 f"in {self.filename}:{self.lineno} to be assigned with "
479                 f"'{self.name} = [ (byte) (byte) ... ];', not '{self}'")
480
481        return self.value
482
483    def to_string(self) -> str:
484        """
485        Returns the value of the property as a string.
486
487        Raises DTError if the property was not assigned with this syntax (has
488        Property.type Type.STRING):
489
490            foo = "string";
491
492        This function might also raise UnicodeDecodeError if the string is
493        not valid UTF-8.
494        """
495        if self.type is not Type.STRING:
496            _err(f"expected property '{self.name}' on {self.node.path} "
497                 f"in {self.filename}:{self.lineno} to be assigned with "
498                 f"'{self.name} = \"string\";', not '{self}'")
499
500        try:
501            ret = self.value.decode("utf-8")[:-1]  # Strip null
502        except UnicodeDecodeError:
503            _err(f"value of property '{self.name}' ({self.value!r}) "
504                 f"on {self.node.path} in {self.filename}:{self.lineno} "
505                 "is not valid UTF-8")
506
507        return ret  # The separate 'return' appeases the type checker.
508
509    def to_strings(self) -> list[str]:
510        """
511        Returns the value of the property as a list of strings.
512
513        Raises DTError if the property was not assigned with this syntax (has
514        Property.type Type.STRING or Type.STRINGS):
515
516            foo = "string", "string", ... ;
517
518        Also raises DTError if any of the strings are not valid UTF-8.
519        """
520        if self.type not in (Type.STRING, Type.STRINGS):
521            _err(f"expected property '{self.name}' on {self.node.path} in "
522                 f"{self.filename}:{self.lineno} to be assigned with "
523                 f"'{self.name} = \"string\", \"string\", ... ;', not '{self}'")
524
525        try:
526            ret = self.value.decode("utf-8").split("\0")[:-1]
527        except UnicodeDecodeError:
528            _err(f"value of property '{self.name}' ({self.value!r}) "
529                 f"on {self.node.path} in {self.filename}:{self.lineno} "
530                 "is not valid UTF-8")
531
532        return ret  # The separate 'return' appeases the type checker.
533
534    def to_node(self) -> Node:
535        """
536        Returns the Node the phandle in the property points to.
537
538        Raises DTError if the property was not assigned with this syntax (has
539        Property.type Type.PHANDLE).
540
541            foo = < &bar >;
542        """
543        if self.type is not Type.PHANDLE:
544            _err(f"expected property '{self.name}' on {self.node.path} in "
545                 f"{self.filename}:{self.lineno} to be assigned with "
546                 f"'{self.name} = < &foo >;', not '{self}'")
547
548        return self.node.dt.phandle2node[int.from_bytes(self.value, "big")]
549
550    def to_nodes(self) -> list[Node]:
551        """
552        Returns a list with the Nodes the phandles in the property point to.
553
554        Raises DTError if the property value contains anything other than
555        phandles. All of the following are accepted:
556
557            foo = < >
558            foo = < &bar >;
559            foo = < &bar &baz ... >;
560            foo = < &bar ... >, < &baz ... >;
561        """
562        def type_ok():
563            if self.type in (Type.PHANDLE, Type.PHANDLES):
564                return True
565            # Also accept 'foo = < >;'
566            return self.type is Type.NUMS and not self.value
567
568        if not type_ok():
569            _err(f"expected property '{self.name}' on {self.node.path} in "
570                 f"{self.filename}:{self.lineno} to be assigned with "
571                 f"'{self.name} = < &foo &bar ... >;', not '{self}'")
572
573        return [self.node.dt.phandle2node[int.from_bytes(self.value[i:i + 4],
574                                                         "big")]
575                for i in range(0, len(self.value), 4)]
576
577    def to_path(self) -> Node:
578        """
579        Returns the Node referenced by the path stored in the property.
580
581        Raises DTError if the property was not assigned with either of these
582        syntaxes (has Property.type Type.PATH or Type.STRING):
583
584            foo = &bar;
585            foo = "/bar";
586
587        For the second case, DTError is raised if the path does not exist.
588        """
589        if self.type not in (Type.PATH, Type.STRING):
590            _err(f"expected property '{self.name}' on {self.node.path} in "
591                 f"{self.filename}:{self.lineno} to be assigned with either "
592                 f"'{self.name} = &foo' or '{self.name} = \"/path/to/node\"', "
593                 f"not '{self}'")
594
595        try:
596            path = self.value.decode("utf-8")[:-1]
597        except UnicodeDecodeError:
598            _err(f"value of property '{self.name}' ({self.value!r}) "
599                 f"on {self.node.path} in {self.filename}:{self.lineno} "
600                 "is not valid UTF-8")
601
602        try:
603            ret = self.node.dt.get_node(path)
604        except DTError:
605            _err(f"property '{self.name}' on {self.node.path} in "
606                 f"{self.filename}:{self.lineno} points to the non-existent "
607                 f'node "{path}"')
608
609        return ret  # The separate 'return' appeases the type checker.
610
611    def __str__(self):
612        s = "".join(label + ": " for label in self.labels) + self.name
613        if not self.value:
614            return s + ";"
615
616        s += " ="
617        newline = "\n" + " " * len(s)
618
619        for i, (pos, marker_type, ref) in enumerate(self._markers):
620            if i < len(self._markers) - 1:
621                next_marker = self._markers[i + 1]
622            else:
623                next_marker = None
624
625            # End of current marker
626            end = next_marker[0] if next_marker else len(self.value)
627
628            if marker_type is _MarkerType.STRING:
629                # end - 1 to strip off the null terminator
630                s += f' "{_decode_and_escape(self.value[pos:end - 1])}"'
631                if end != len(self.value):
632                    s += f",{newline}"
633            elif marker_type is _MarkerType.PATH:
634                s += " &" + ref
635                if end != len(self.value):
636                    s += f",{newline}"
637            else:
638                # <> or []
639
640                if marker_type is _MarkerType.LABEL:
641                    s += f" {ref}:"
642                elif marker_type is _MarkerType.PHANDLE:
643                    s += " &" + ref
644                    pos += 4
645                    if pos != len(self.value) and len(s) - array_start > 80:  # noqa: F821
646                        s += array_newline                                    # noqa: F821
647                        array_start = len(s)
648                    # Subtle: There might be more data between the phandle and
649                    # the next marker, so we can't 'continue' here
650                else:  # marker_type is _MarkerType.UINT*
651                    elm_size = _TYPE_TO_N_BYTES[marker_type]
652                    s += _N_BYTES_TO_START_STR[elm_size]
653                    array_start = len(s)
654                    array_newline = "\n" + " " * array_start
655
656                while pos != end:
657                    num = int.from_bytes(self.value[pos:pos + elm_size],
658                                         "big")
659                    if elm_size == 1:
660                        s += f" {num:02X}"
661                    else:
662                        s += f" {hex(num)}"
663
664                    pos += elm_size
665                    if pos != len(self.value) and len(s) - array_start > 80:
666                        s += array_newline
667                        array_start = len(s)
668
669                if (pos != 0
670                    and (not next_marker
671                         or next_marker[1]
672                         not in (_MarkerType.PHANDLE, _MarkerType.LABEL))):
673
674                    s += _N_BYTES_TO_END_STR[elm_size]
675                    if pos != len(self.value):
676                        s += f",{newline}"
677
678        return s + ";"
679
680    def __repr__(self):
681        return (f"<Property '{self.name}' on {self.node.path} in "
682                f"{self.filename}:{self.lineno}>")
683
684    #
685    # Internal functions
686    #
687
688    def _add_marker(self, marker_type: _MarkerType, data: Any = None):
689        # Helper for registering markers in the value that are processed after
690        # parsing. See _fixup_props(). 'marker_type' identifies the type of
691        # marker, and 'data' has any optional data associated with the marker.
692
693        # len(self.value) gives the current offset. This function is called
694        # while the value is built. We use a list instead of a tuple to be able
695        # to fix up offsets later (they might increase if the value includes
696        # path references, e.g. 'foo = &bar, <3>;', which are expanded later).
697        self._markers.append([len(self.value), marker_type, data])
698
699        # For phandle references, add a dummy value with the same length as a
700        # phandle. This is handy for the length check in _register_phandles().
701        if marker_type is _MarkerType.PHANDLE:
702            self.value += b"\0\0\0\0"
703
704class _T(enum.IntEnum):
705    # Token IDs used by the DT lexer.
706
707    # These values must be contiguous and start from 1.
708    INCLUDE = 1
709    LINE = 2
710    STRING = 3
711    DTS_V1 = 4
712    PLUGIN = 5
713    MEMRESERVE = 6
714    BITS = 7
715    DEL_PROP = 8
716    DEL_NODE = 9
717    OMIT_IF_NO_REF = 10
718    LABEL = 11
719    CHAR_LITERAL = 12
720    REF = 13
721    INCBIN = 14
722    SKIP = 15
723    EOF = 16
724
725    # These values must be larger than the above contiguous range.
726    NUM = 17
727    PROPNODENAME = 18
728    MISC = 19
729    BYTE = 20
730    BAD = 21
731
732class _FileStackElt(NamedTuple):
733    # Used for maintaining the /include/ stack.
734
735    filename: str
736    lineno: int
737    contents: str
738    pos: int
739
740_TokVal = Union[int, str]
741
742class _Token(NamedTuple):
743    id: int
744    val: _TokVal
745
746    def __repr__(self):
747        id_repr = _T(self.id).name
748        return f'Token(id=_T.{id_repr}, val={repr(self.val)})'
749
750class DT:
751    """
752    Represents a devicetree parsed from a .dts file (or from many files, if the
753    .dts file /include/s other files). Creating many instances of this class is
754    fine. The library has no global state.
755
756    These attributes are available on DT instances:
757
758    root:
759      A Node instance representing the root (/) node.
760
761    alias2node:
762      A dictionary that maps maps alias strings (from /aliases) to Node
763      instances
764
765    label2node:
766      A dictionary that maps each node label (a string) to the Node instance
767      for the node.
768
769    label2prop:
770      A dictionary that maps each property label (a string) to a Property
771      instance.
772
773    label2prop_offset:
774      A dictionary that maps each label (a string) within a property value
775      (e.g., 'x = label_1: < 1 label2: 2 >;') to a (prop, offset) tuple, where
776      'prop' is a Property instance and 'offset' the byte offset (0 for label_1
777      and 4 for label_2 in the example).
778
779    phandle2node:
780      A dictionary that maps each phandle (a number) to a Node instance.
781
782    memreserves:
783      A list of (labels, address, length) tuples for the /memreserve/s in the
784      .dts file, in the same order as they appear in the file.
785
786      'labels' is a possibly empty set with all labels preceding the memreserve
787      (e.g., 'label1: label2: /memreserve/ ...'). 'address' and 'length' are
788      numbers.
789
790    filename:
791      The filename passed to the DT constructor.
792    """
793
794    #
795    # Public interface
796    #
797
798    def __init__(self, filename: Optional[str], include_path: Iterable[str] = (),
799                 force: bool = False, base_dir: Optional[str] = None):
800        """
801        Parses a DTS file to create a DT instance. Raises OSError if 'filename'
802        can't be opened, and DTError for any parse errors.
803
804        filename:
805          Path to the .dts file to parse. (If None, an empty devicetree
806          is created; this is unlikely to be what you want.)
807
808        include_path:
809          An iterable (e.g. list or tuple) containing paths to search for
810          /include/d and /incbin/'d files. By default, files are only looked up
811          relative to the .dts file that contains the /include/ or /incbin/.
812
813        force:
814          Try not to raise DTError even if the input tree has errors.
815          For experimental use; results not guaranteed.
816
817        base_dir:
818          Path to the directory that is to be used as the reference for
819          the generated relative paths in comments. When not provided, the
820          current working directory is used.
821        """
822        # Remember to update __deepcopy__() if you change this.
823
824        self._root: Optional[Node] = None
825        self.alias2node: dict[str, Node] = {}
826        self.label2node: dict[str, Node] = {}
827        self.label2prop: dict[str, Property] = {}
828        self.label2prop_offset: dict[str, tuple[Property, int]] = {}
829        self.phandle2node: dict[int, Node] = {}
830        self.memreserves: list[tuple[set[str], int, int]] = []
831        self.filename = filename
832
833        self._force = force
834        self._base_dir = base_dir or os.getcwd()
835
836        if filename is not None:
837            self._parse_file(filename, include_path)
838        else:
839            self._include_path: list[str] = []
840
841    @property
842    def root(self) -> Node:
843        """
844        See the class documentation.
845        """
846        # This is necessary because mypy can't tell that we never
847        # treat self._root as a non-None value until it's initialized
848        # properly in _parse_dt().
849        return self._root       # type: ignore
850
851    def get_node(self, path: str) -> Node:
852        """
853        Returns the Node instance for the node with path or alias 'path' (a
854        string). Raises DTError if the path or alias doesn't exist.
855
856        For example, both dt.get_node("/foo/bar") and dt.get_node("bar-alias")
857        will return the 'bar' node below:
858
859          /dts-v1/;
860
861          / {
862                  foo {
863                          bar_label: bar {
864                                  baz {
865                                  };
866                          };
867                  };
868
869                  aliases {
870                          bar-alias = &bar-label;
871                  };
872          };
873
874        Fetching subnodes via aliases is supported:
875        dt.get_node("bar-alias/baz") returns the 'baz' node.
876        """
877        if path.startswith("/"):
878            return _root_and_path_to_node(self.root, path, path)
879
880        # Path does not start with '/'. First component must be an alias.
881        alias, _, rest = path.partition("/")
882        if alias not in self.alias2node:
883            _err(f"no alias '{alias}' found -- did you forget the leading "
884                 "'/' in the node path?")
885
886        return _root_and_path_to_node(self.alias2node[alias], rest, path)
887
888    def has_node(self, path: str) -> bool:
889        """
890        Returns True if the path or alias 'path' exists. See DT.get_node().
891        """
892        try:
893            self.get_node(path)
894            return True
895        except DTError:
896            return False
897
898    def move_node(self, node: Node, new_path: str):
899        """
900        Move a node 'node' to a new path 'new_path'. The entire subtree
901        rooted at 'node' is moved along with it.
902
903        You cannot move the root node or provide a 'new_path' value
904        where a node already exists. This method raises an exception
905        in both cases.
906
907        As a restriction on the current implementation, the parent node
908        of the new path must exist.
909        """
910        if node is self.root:
911            _err("the root node can't be moved")
912
913        if self.has_node(new_path):
914            _err(f"can't move '{node.path}' to '{new_path}': "
915                 'destination node exists')
916
917        if not new_path.startswith('/'):
918            _err(f"path '{new_path}' doesn't start with '/'")
919
920        for component in new_path.split('/'):
921            for char in component:
922                if char not in _nodename_chars:
923                    _err(f"new path '{new_path}': bad character '{char}'")
924
925        old_name = node.name
926        old_path = node.path
927
928        new_parent_path, _, new_name = new_path.rpartition('/')
929        if new_parent_path == '':
930            # '/foo'.rpartition('/') is ('', '/', 'foo').
931            new_parent_path = '/'
932        if not self.has_node(new_parent_path):
933            _err(f"can't move '{old_path}' to '{new_path}': "
934                 f"parent node '{new_parent_path}' doesn't exist")
935        new_parent = self.get_node(new_parent_path)
936        if TYPE_CHECKING:
937            assert new_parent is not None
938            assert node.parent is not None
939
940        del node.parent.nodes[old_name]
941        node._name = new_name
942        node.parent = new_parent
943        new_parent.nodes[new_name] = node
944
945    def node_iter(self) -> Iterable[Node]:
946        """
947        Returns a generator for iterating over all nodes in the devicetree.
948
949        For example, this will print the name of each node that has a property
950        called 'foo':
951
952          for node in dt.node_iter():
953              if "foo" in node.props:
954                  print(node.name)
955        """
956        yield from self.root.node_iter()
957
958    def __str__(self):
959        """
960        Returns a DTS representation of the devicetree. Called automatically if
961        the DT instance is print()ed.
962        """
963        s = "/dts-v1/;\n"
964
965        if self.memreserves:
966            s += "\n"
967            for labels, address, offset in self.memreserves:
968                # List the labels in a consistent order to help with testing
969                for label in labels:
970                    s += f"{label}: "
971                s += f"/memreserve/ {address:#018x} {offset:#018x};\n"
972
973        return s + str(self.root)
974
975    def __repr__(self):
976        """
977        Returns some information about the DT instance. Called automatically if
978        the DT instance is evaluated.
979        """
980        if self.filename:
981            return (f"DT(filename='{self.filename}', "
982                    f"include_path={self._include_path})")
983        return super().__repr__()
984
985    def __deepcopy__(self, memo):
986        """
987        Implements support for the standard library copy.deepcopy()
988        function on DT instances.
989        """
990
991        # We need a new DT, obviously. Make a new, empty one.
992        ret = DT(None, (), self._force, self._base_dir)
993
994        # Now allocate new Node objects for every node in self, to use
995        # in the new DT. Set their parents to None for now and leave
996        # them without any properties. We will recursively initialize
997        # copies of parents before copies of children next.
998        path2node_copy = {
999            node.path: Node(node.name, None, ret, node.filename, node.lineno)
1000            for node in self.node_iter()
1001        }
1002
1003        # Point each copy of a node to the copy of its parent and set up
1004        # copies of each property.
1005        #
1006        # Share data when possible. For example, Property.value has
1007        # type 'bytes', which is immutable. We therefore don't need a
1008        # copy and can just point to the original data.
1009
1010        for node in self.node_iter():
1011            node_copy = path2node_copy[node.path]
1012
1013            parent = node.parent
1014            if parent is not None:
1015                node_copy.parent = path2node_copy[parent.path]
1016
1017            prop_name2prop_copy = {
1018                prop.name: Property(node_copy, prop.name)
1019                for prop in node.props.values()
1020            }
1021            for prop_name, prop_copy in prop_name2prop_copy.items():
1022                prop = node.props[prop_name]
1023                prop_copy.value = prop.value
1024                prop_copy.labels = prop.labels[:]
1025                prop_copy.offset_labels = prop.offset_labels.copy()
1026                prop_copy._label_offset_lst = prop._label_offset_lst[:]
1027                prop_copy._markers = [marker[:] for marker in prop._markers]
1028                prop_copy.filename = prop.filename
1029                prop_copy.lineno = prop.lineno
1030            node_copy.props = prop_name2prop_copy
1031
1032            node_copy.nodes = {
1033                child_name: path2node_copy[child_node.path]
1034                for child_name, child_node in node.nodes.items()
1035            }
1036
1037            node_copy.labels = node.labels[:]
1038
1039            node_copy._omit_if_no_ref = node._omit_if_no_ref
1040            node_copy._is_referenced = node._is_referenced
1041
1042        # The copied nodes and properties are initialized, so
1043        # we can finish initializing the copied DT object now.
1044
1045        ret._root = path2node_copy['/']
1046
1047        def copy_node_lookup_table(attr_name):
1048            original = getattr(self, attr_name)
1049            copy = {
1050                key: path2node_copy[original[key].path]
1051                for key in original
1052            }
1053            setattr(ret, attr_name, copy)
1054
1055        copy_node_lookup_table('alias2node')
1056        copy_node_lookup_table('label2node')
1057        copy_node_lookup_table('phandle2node')
1058
1059        ret_label2prop = {}
1060        for label, prop in self.label2prop.items():
1061            node_copy = path2node_copy[prop.node.path]
1062            prop_copy = node_copy.props[prop.name]
1063            ret_label2prop[label] = prop_copy
1064        ret.label2prop = ret_label2prop
1065
1066        ret_label2prop_offset = {}
1067        for label, prop_offset in self.label2prop_offset.items():
1068            prop, offset = prop_offset
1069            node_copy = path2node_copy[prop.node.path]
1070            prop_copy = node_copy.props[prop.name]
1071            ret_label2prop_offset[label] = (prop_copy, offset)
1072        ret.label2prop_offset = ret_label2prop_offset
1073
1074        ret.memreserves = [
1075            (set(memreserve[0]), memreserve[1], memreserve[2])
1076            for memreserve in self.memreserves
1077        ]
1078
1079        ret.filename = self.filename
1080
1081        return ret
1082
1083    #
1084    # Parsing
1085    #
1086
1087    def _parse_file(self, filename: str, include_path: Iterable[str]):
1088        self._include_path = list(include_path)
1089
1090        with open(filename, encoding="utf-8") as f:
1091            self._file_contents = f.read()
1092
1093        self._tok_i = self._tok_end_i = 0
1094        self._filestack: list[_FileStackElt] = []
1095
1096        self._lexer_state: int = _DEFAULT
1097        self._saved_token: Optional[_Token] = None
1098
1099        self._lineno: int = 1
1100
1101        self._parse_header()
1102        self._parse_memreserves()
1103        self._parse_dt()
1104
1105        self._register_phandles()
1106        self._fixup_props()
1107        self._register_aliases()
1108        self._remove_unreferenced()
1109        self._register_labels()
1110
1111    def _parse_header(self):
1112        # Parses /dts-v1/ (expected) and /plugin/ (unsupported) at the start of
1113        # files. There may be multiple /dts-v1/ at the start of a file.
1114
1115        has_dts_v1 = False
1116
1117        while self._peek_token().id == _T.DTS_V1:
1118            has_dts_v1 = True
1119            self._next_token()
1120            self._expect_token(";")
1121            # /plugin/ always comes after /dts-v1/
1122            if self._peek_token().id == _T.PLUGIN:
1123                self._parse_error("/plugin/ is not supported")
1124
1125        if not has_dts_v1:
1126            self._parse_error("expected '/dts-v1/;' at start of file")
1127
1128    def _parse_memreserves(self):
1129        # Parses /memreserve/, which appears after /dts-v1/
1130
1131        while True:
1132            # Labels before /memreserve/
1133            labels = []
1134            while self._peek_token().id == _T.LABEL:
1135                _append_no_dup(labels, self._next_token().val)
1136
1137            if self._peek_token().id == _T.MEMRESERVE:
1138                self._next_token()
1139                self.memreserves.append(
1140                    (labels, self._eval_prim(), self._eval_prim()))
1141                self._expect_token(";")
1142            elif labels:
1143                self._parse_error("expected /memreserve/ after labels at "
1144                                  "beginning of file")
1145            else:
1146                return
1147
1148    def _parse_dt(self):
1149        # Top-level parsing loop
1150
1151        while True:
1152            tok = self._next_token()
1153
1154            if tok.val == "/":
1155                # '/ { ... };', the root node
1156                if not self._root:
1157                    self._root = Node(
1158                        name="/", parent=None, dt=self, filename=self.filename, lineno=self._lineno
1159                    )
1160                self._parse_node(self.root)
1161
1162            elif tok.id in (_T.LABEL, _T.REF):
1163                # '&foo { ... };' or 'label: &foo { ... };'. The C tools only
1164                # support a single label here too.
1165
1166                if tok.id == _T.LABEL:
1167                    label = tok.val
1168                    tok = self._next_token()
1169                    if tok.id != _T.REF:
1170                        self._parse_error("expected label reference (&foo)")
1171                else:
1172                    label = None
1173
1174                try:
1175                    node = self._ref2node(tok.val)
1176                except DTError as e:
1177                    self._parse_error(e)
1178                self._parse_node(node)
1179
1180                if label:
1181                    _append_no_dup(node.labels, label)
1182
1183            elif tok.id == _T.DEL_NODE:
1184                self._next_ref2node()._del()
1185                self._expect_token(";")
1186
1187            elif tok.id == _T.OMIT_IF_NO_REF:
1188                self._next_ref2node()._omit_if_no_ref = True
1189                self._expect_token(";")
1190
1191            elif tok.id == _T.EOF:
1192                if not self._root:
1193                    self._parse_error("no root node defined")
1194                return
1195
1196            else:
1197                self._parse_error("expected '/' or label reference (&foo)")
1198
1199    def _parse_node(self, node):
1200        # Parses the '{ ... };' part of 'node-name { ... };'.
1201
1202        # We need to track which child nodes were defined in this set
1203        # of curly braces in order to reject duplicate node names.
1204        current_child_names = set()
1205
1206        self._expect_token("{")
1207        while True:
1208            labels, omit_if_no_ref = self._parse_propnode_labels()
1209            tok = self._next_token()
1210
1211            if tok.id == _T.PROPNODENAME:
1212                if self._peek_token().val == "{":
1213                    # '<tok> { ...', expect node
1214
1215                    # Fetch the existing node if it already exists. This
1216                    # happens when overriding nodes.
1217                    child = node.nodes.get(tok.val)
1218                    if child:
1219                        if child.name in current_child_names:
1220                            self._parse_error(f'{child.path}: duplicate node name')
1221                    else:
1222                        child = Node(
1223                            name=tok.val,
1224                            parent=node,
1225                            dt=self,
1226                            filename=self.filename,
1227                            lineno=self._lineno,
1228                        )
1229                        current_child_names.add(tok.val)
1230
1231                    for label in labels:
1232                        _append_no_dup(child.labels, label)
1233
1234                    if omit_if_no_ref:
1235                        child._omit_if_no_ref = True
1236
1237                    node.nodes[child.name] = child
1238                    self._parse_node(child)
1239
1240                else:
1241                    # Not '<tok> { ...', expect property assignment
1242
1243                    if omit_if_no_ref:
1244                        self._parse_error(
1245                            "/omit-if-no-ref/ can only be used on nodes")
1246
1247                    prop = node._get_prop(tok.val)
1248                    prop.filename = self.filename
1249                    prop.lineno = self._lineno
1250
1251                    if self._check_token("="):
1252                        self._parse_assignment(prop)
1253                    elif not self._check_token(";"):
1254                        # ';' is for an empty property, like 'foo;'
1255                        self._parse_error("expected '{', '=', or ';'")
1256
1257                    for label in labels:
1258                        _append_no_dup(prop.labels, label)
1259
1260            elif tok.id == _T.DEL_NODE:
1261                tok2 = self._next_token()
1262                if tok2.id != _T.PROPNODENAME:
1263                    self._parse_error("expected node name")
1264                if tok2.val in node.nodes:
1265                    node.nodes[tok2.val]._del()
1266                self._expect_token(";")
1267
1268            elif tok.id == _T.DEL_PROP:
1269                tok2 = self._next_token()
1270                if tok2.id != _T.PROPNODENAME:
1271                    self._parse_error("expected property name")
1272                node.props.pop(tok2.val, None)
1273                self._expect_token(";")
1274
1275            elif tok.val == "}":
1276                self._expect_token(";")
1277                return
1278
1279            else:
1280                self._parse_error("expected node name, property name, or '}'")
1281
1282    def _parse_propnode_labels(self):
1283        # _parse_node() helpers for parsing labels and /omit-if-no-ref/s before
1284        # nodes and properties. Returns a (<label list>, <omit-if-no-ref bool>)
1285        # tuple.
1286
1287        labels = []
1288        omit_if_no_ref = False
1289        while True:
1290            tok = self._peek_token()
1291            if tok.id == _T.LABEL:
1292                _append_no_dup(labels, tok.val)
1293            elif tok.id == _T.OMIT_IF_NO_REF:
1294                omit_if_no_ref = True
1295            elif (labels or omit_if_no_ref) and tok.id != _T.PROPNODENAME:
1296                # Got something like 'foo: bar: }'
1297                self._parse_error("expected node or property name")
1298            else:
1299                return labels, omit_if_no_ref
1300
1301            self._next_token()
1302
1303    def _parse_assignment(self, prop):
1304        # Parses the right-hand side of property assignment
1305        #
1306        # prop:
1307        #   'Property' instance being assigned
1308
1309        # Remove any old value, path/phandle references, and in-value labels,
1310        # in case the property value is being overridden
1311        prop.value = b""
1312        prop._markers = []
1313
1314        while True:
1315            # Parse labels before the value (e.g., '..., label: < 0 >')
1316            self._parse_value_labels(prop)
1317
1318            tok = self._next_token()
1319
1320            if tok.val == "<":
1321                self._parse_cells(prop, 4)
1322
1323            elif tok.id == _T.BITS:
1324                n_bits = self._expect_num()
1325                if n_bits not in {8, 16, 32, 64}:
1326                    self._parse_error("expected 8, 16, 32, or 64")
1327                self._expect_token("<")
1328                self._parse_cells(prop, n_bits//8)
1329
1330            elif tok.val == "[":
1331                self._parse_bytes(prop)
1332
1333            elif tok.id == _T.STRING:
1334                prop._add_marker(_MarkerType.STRING)
1335                prop.value += self._unescape(tok.val.encode("utf-8")) + b"\0"
1336
1337            elif tok.id == _T.REF:
1338                prop._add_marker(_MarkerType.PATH, tok.val)
1339
1340            elif tok.id == _T.INCBIN:
1341                self._parse_incbin(prop)
1342
1343            else:
1344                self._parse_error("malformed value")
1345
1346            # Parse labels after the value (e.g., '< 0 > label:, ...')
1347            self._parse_value_labels(prop)
1348
1349            tok = self._next_token()
1350            if tok.val == ";":
1351                return
1352            if tok.val == ",":
1353                continue
1354            self._parse_error("expected ';' or ','")
1355
1356    def _parse_cells(self, prop, n_bytes):
1357        # Parses '<...>'
1358
1359        prop._add_marker(_N_BYTES_TO_TYPE[n_bytes])
1360
1361        while True:
1362            tok = self._peek_token()
1363            if tok.id == _T.REF:
1364                self._next_token()
1365                if n_bytes != 4:
1366                    self._parse_error("phandle references are only allowed in "
1367                                      "arrays with 32-bit elements")
1368                prop._add_marker(_MarkerType.PHANDLE, tok.val)
1369
1370            elif tok.id == _T.LABEL:
1371                prop._add_marker(_MarkerType.LABEL, tok.val)
1372                self._next_token()
1373
1374            elif self._check_token(">"):
1375                return
1376
1377            else:
1378                # Literal value
1379                num = self._eval_prim()
1380                try:
1381                    prop.value += num.to_bytes(n_bytes, "big")
1382                except OverflowError:
1383                    try:
1384                        # Try again as a signed number, in case it's negative
1385                        prop.value += num.to_bytes(n_bytes, "big", signed=True)
1386                    except OverflowError:
1387                        self._parse_error(
1388                            f"{num} does not fit in {8*n_bytes} bits")
1389
1390    def _parse_bytes(self, prop):
1391        # Parses '[ ... ]'
1392
1393        prop._add_marker(_MarkerType.UINT8)
1394
1395        while True:
1396            tok = self._next_token()
1397            if tok.id == _T.BYTE:
1398                prop.value += tok.val.to_bytes(1, "big")
1399
1400            elif tok.id == _T.LABEL:
1401                prop._add_marker(_MarkerType.LABEL, tok.val)
1402
1403            elif tok.val == "]":
1404                return
1405
1406            else:
1407                self._parse_error("expected two-digit byte or ']'")
1408
1409    def _parse_incbin(self, prop):
1410        # Parses
1411        #
1412        #   /incbin/ ("filename")
1413        #
1414        # and
1415        #
1416        #   /incbin/ ("filename", <offset>, <size>)
1417
1418        prop._add_marker(_MarkerType.UINT8)
1419
1420        self._expect_token("(")
1421
1422        tok = self._next_token()
1423        if tok.id != _T.STRING:
1424            self._parse_error("expected quoted filename")
1425        filename = tok.val
1426
1427        tok = self._next_token()
1428        if tok.val == ",":
1429            offset = self._eval_prim()
1430            self._expect_token(",")
1431            size = self._eval_prim()
1432            self._expect_token(")")
1433        else:
1434            if tok.val != ")":
1435                self._parse_error("expected ',' or ')'")
1436            offset = None
1437
1438        try:
1439            with self._open(filename, "rb") as f:
1440                if offset is None:
1441                    prop.value += f.read()
1442                else:
1443                    f.seek(offset)
1444                    prop.value += f.read(size)
1445        except OSError as e:
1446            self._parse_error(f"could not read '{filename}': {e}")
1447
1448    def _parse_value_labels(self, prop):
1449        # _parse_assignment() helper for parsing labels before/after each
1450        # comma-separated value
1451
1452        while True:
1453            tok = self._peek_token()
1454            if tok.id != _T.LABEL:
1455                return
1456            prop._add_marker(_MarkerType.LABEL, tok.val)
1457            self._next_token()
1458
1459    def _node_phandle(self, node, src_prop):
1460        # Returns the phandle for Node 'node', creating a new phandle if the
1461        # node has no phandle, and fixing up the value for existing
1462        # self-referential phandles (which get set to b'\0\0\0\0' initially).
1463        # Self-referential phandles must be rewritten instead of recreated, so
1464        # that labels are preserved.
1465
1466        if "phandle" in node.props:
1467            phandle_prop = node.props["phandle"]
1468        else:
1469            phandle_prop = Property(node, "phandle")
1470            phandle_prop._add_marker(_MarkerType.UINT32)  # For displaying
1471            phandle_prop.value = b'\0\0\0\0'
1472
1473        if phandle_prop.value == b'\0\0\0\0':
1474            phandle_i = 1
1475            while phandle_i in self.phandle2node:
1476                phandle_i += 1
1477            self.phandle2node[phandle_i] = node
1478
1479            phandle_prop.filename = src_prop.filename
1480            phandle_prop.lineno = src_prop.lineno
1481            phandle_prop.value = phandle_i.to_bytes(4, "big")
1482            node.props["phandle"] = phandle_prop
1483
1484        return phandle_prop.value
1485
1486    # Expression evaluation
1487
1488    def _eval_prim(self):
1489        tok = self._peek_token()
1490        if tok.id in (_T.NUM, _T.CHAR_LITERAL):
1491            return self._next_token().val
1492
1493        tok = self._next_token()
1494        if tok.val != "(":
1495            self._parse_error("expected number or parenthesized expression")
1496        val = self._eval_ternary()
1497        self._expect_token(")")
1498        return val
1499
1500    def _eval_ternary(self):
1501        val = self._eval_or()
1502        if self._check_token("?"):
1503            if_val = self._eval_ternary()
1504            self._expect_token(":")
1505            else_val = self._eval_ternary()
1506            return if_val if val else else_val
1507        return val
1508
1509    def _eval_or(self):
1510        val = self._eval_and()
1511        while self._check_token("||"):
1512            val = 1 if self._eval_and() or val else 0
1513        return val
1514
1515    def _eval_and(self):
1516        val = self._eval_bitor()
1517        while self._check_token("&&"):
1518            val = 1 if self._eval_bitor() and val else 0
1519        return val
1520
1521    def _eval_bitor(self):
1522        val = self._eval_bitxor()
1523        while self._check_token("|"):
1524            val |= self._eval_bitxor()
1525        return val
1526
1527    def _eval_bitxor(self):
1528        val = self._eval_bitand()
1529        while self._check_token("^"):
1530            val ^= self._eval_bitand()
1531        return val
1532
1533    def _eval_bitand(self):
1534        val = self._eval_eq()
1535        while self._check_token("&"):
1536            val &= self._eval_eq()
1537        return val
1538
1539    def _eval_eq(self):
1540        val = self._eval_rela()
1541        while True:
1542            if self._check_token("=="):
1543                val = 1 if val == self._eval_rela() else 0
1544            elif self._check_token("!="):
1545                val = 1 if val != self._eval_rela() else 0
1546            else:
1547                return val
1548
1549    def _eval_rela(self):
1550        val = self._eval_shift()
1551        while True:
1552            if self._check_token("<"):
1553                val = 1 if val < self._eval_shift() else 0
1554            elif self._check_token(">"):
1555                val = 1 if val > self._eval_shift() else 0
1556            elif self._check_token("<="):
1557                val = 1 if val <= self._eval_shift() else 0
1558            elif self._check_token(">="):
1559                val = 1 if val >= self._eval_shift() else 0
1560            else:
1561                return val
1562
1563    def _eval_shift(self):
1564        val = self._eval_add()
1565        while True:
1566            if self._check_token("<<"):
1567                val <<= self._eval_add()
1568            elif self._check_token(">>"):
1569                val >>= self._eval_add()
1570            else:
1571                return val
1572
1573    def _eval_add(self):
1574        val = self._eval_mul()
1575        while True:
1576            if self._check_token("+"):
1577                val += self._eval_mul()
1578            elif self._check_token("-"):
1579                val -= self._eval_mul()
1580            else:
1581                return val
1582
1583    def _eval_mul(self):
1584        val = self._eval_unary()
1585        while True:
1586            if self._check_token("*"):
1587                val *= self._eval_unary()
1588            elif self._check_token("/"):
1589                denom = self._eval_unary()
1590                if not denom:
1591                    self._parse_error("division by zero")
1592                val //= denom
1593            elif self._check_token("%"):
1594                denom = self._eval_unary()
1595                if not denom:
1596                    self._parse_error("division by zero")
1597                val %= denom
1598            else:
1599                return val
1600
1601    def _eval_unary(self):
1602        if self._check_token("-"):
1603            return -self._eval_unary()
1604        if self._check_token("~"):
1605            return ~self._eval_unary()
1606        if self._check_token("!"):
1607            return 0 if self._eval_unary() else 1
1608        return self._eval_prim()
1609
1610    #
1611    # Lexing
1612    #
1613
1614    def _check_token(self, val):
1615        if self._peek_token().val == val:
1616            self._next_token()
1617            return True
1618        return False
1619
1620    def _peek_token(self):
1621        if not self._saved_token:
1622            self._saved_token = self._next_token()
1623        return self._saved_token
1624
1625    def _next_token(self):
1626        if self._saved_token:
1627            tmp = self._saved_token
1628            self._saved_token = None
1629            return tmp
1630
1631        while True:
1632            tok_id = None
1633
1634            match = _token_re.match(self._file_contents, self._tok_end_i)
1635            if match:
1636                tok_id = match.lastindex
1637                if tok_id == _T.CHAR_LITERAL:
1638                    val = self._unescape(match.group(tok_id).encode("utf-8"))
1639                    if len(val) != 1:
1640                        self._parse_error("character literals must be length 1")
1641                    tok_val = ord(val)
1642                else:
1643                    tok_val = match.group(tok_id)
1644
1645            elif self._lexer_state is _DEFAULT:
1646                match = _num_re.match(self._file_contents, self._tok_end_i)
1647                if match:
1648                    tok_id = _T.NUM
1649                    num_s = match.group(1)
1650                    tok_val = int(num_s,
1651                                  16 if num_s.startswith(("0x", "0X")) else
1652                                  8 if num_s[0] == "0" else
1653                                  10)
1654
1655            elif self._lexer_state is _EXPECT_PROPNODENAME:
1656                match = _propnodename_re.match(self._file_contents,
1657                                               self._tok_end_i)
1658                if match:
1659                    tok_id = _T.PROPNODENAME
1660                    tok_val = match.group(1)
1661                    self._lexer_state = _DEFAULT
1662
1663            else:  # self._lexer_state is _EXPECT_BYTE
1664                match = _byte_re.match(self._file_contents, self._tok_end_i)
1665                if match:
1666                    tok_id = _T.BYTE
1667                    tok_val = int(match.group(), 16)
1668
1669            if not tok_id:
1670                match = _misc_re.match(self._file_contents, self._tok_end_i)
1671                if match:
1672                    tok_id = _T.MISC
1673                    tok_val = match.group()
1674                else:
1675                    self._tok_i = self._tok_end_i
1676                    # Could get here due to a node/property naming appearing in
1677                    # an unexpected context as well as for bad characters in
1678                    # files. Generate a token for it so that the error can
1679                    # trickle up to some context where we can give a more
1680                    # helpful error message.
1681                    return _Token(_T.BAD, "<unknown token>")
1682
1683            self._tok_i = match.start()
1684            self._tok_end_i = match.end()
1685
1686            if tok_id == _T.SKIP:
1687                self._lineno += tok_val.count("\n")
1688                continue
1689
1690            # /include/ is handled in the lexer in the C tools as well, and can
1691            # appear anywhere
1692            if tok_id == _T.INCLUDE:
1693                # Can have newlines between /include/ and the filename
1694                self._lineno += tok_val.count("\n")
1695                # Do this manual extraction instead of doing it in the regex so
1696                # that we can properly count newlines
1697                filename = tok_val[tok_val.find('"') + 1:-1]
1698                self._enter_file(filename)
1699                continue
1700
1701            if tok_id == _T.LINE:
1702                # #line directive
1703                self._lineno = int(tok_val.split()[0]) - 1
1704                self.filename = tok_val[tok_val.find('"') + 1:-1]
1705                continue
1706
1707            if tok_id == _T.EOF:
1708                if self._filestack:
1709                    self._leave_file()
1710                    continue
1711                return _Token(_T.EOF, "<EOF>")
1712
1713            # State handling
1714
1715            if (tok_id in (_T.DEL_PROP, _T.DEL_NODE, _T.OMIT_IF_NO_REF)
1716                or tok_val in ("{", ";")):
1717
1718                self._lexer_state = _EXPECT_PROPNODENAME
1719
1720            elif tok_val == "[":
1721                self._lexer_state = _EXPECT_BYTE
1722
1723            elif tok_id in (_T.MEMRESERVE, _T.BITS) or tok_val == "]":
1724                self._lexer_state = _DEFAULT
1725
1726            return _Token(tok_id, tok_val)
1727
1728    def _expect_token(self, tok_val):
1729        # Raises an error if the next token does not have the string value
1730        # 'tok_val'. Returns the token.
1731
1732        tok = self._next_token()
1733        if tok.val != tok_val:
1734            self._parse_error(f"expected '{tok_val}', not '{tok.val}'")
1735
1736        return tok
1737
1738    def _expect_num(self):
1739        # Raises an error if the next token is not a number. Returns the token.
1740
1741        tok = self._next_token()
1742        if tok.id != _T.NUM:
1743            self._parse_error("expected number")
1744        return tok.val
1745
1746    def _parse_error(self, s):
1747        # This works out for the first line of the file too, where rfind()
1748        # returns -1
1749        column = self._tok_i - self._file_contents.rfind("\n", 0,
1750                                                         self._tok_i + 1)
1751        _err(f"{self.filename}:{self._lineno} (column {column}): "
1752             f"parse error: {s}")
1753
1754    def _enter_file(self, filename):
1755        # Enters the /include/d file 'filename', remembering the position in
1756        # the /include/ing file for later
1757
1758        self._filestack.append(
1759            _FileStackElt(self.filename, self._lineno,
1760                          self._file_contents, self._tok_end_i))
1761
1762        # Handle escapes in filenames, just for completeness
1763        filename = self._unescape(filename.encode("utf-8"))
1764        try:
1765            filename = filename.decode("utf-8")
1766        except UnicodeDecodeError:
1767            self._parse_error("filename is not valid UTF-8")
1768
1769        with self._open(filename, encoding="utf-8") as f:
1770            try:
1771                self._file_contents = f.read()
1772            except OSError as e:
1773                self._parse_error(e)
1774
1775        # Check for recursive /include/
1776        for i, parent in enumerate(self._filestack):
1777            if filename == parent[0]:
1778                self._parse_error("recursive /include/:\n" + " ->\n".join(
1779                    [f"{parent[0]}:{parent[1]}"
1780                     for parent in self._filestack[i:]] +
1781                    [filename]))
1782
1783        self.filename = f.name
1784        self._lineno = 1
1785        self._tok_end_i = 0
1786
1787    def _leave_file(self):
1788        # Leaves an /include/d file, returning to the file that /include/d it
1789
1790        self.filename, self._lineno, self._file_contents, self._tok_end_i = (
1791            self._filestack.pop())
1792
1793    def _next_ref2node(self):
1794        # Checks that the next token is a label/path reference and returns the
1795        # Node it points to. Only used during parsing, so uses _parse_error()
1796        # on errors to save some code in callers.
1797
1798        label = self._next_token()
1799        if label.id != _T.REF:
1800            self._parse_error(
1801                "expected label (&foo) or path (&{/foo/bar}) reference")
1802        try:
1803            return self._ref2node(label.val)
1804        except DTError as e:
1805            self._parse_error(e)
1806
1807    def _ref2node(self, s):
1808        # Returns the Node the label/path reference 's' points to
1809
1810        if s[0] == "{":
1811            # Path reference (&{/foo/bar})
1812            path = s[1:-1]
1813            if not path.startswith("/"):
1814                _err(f"node path '{path}' does not start with '/'")
1815            # Will raise DTError if the path doesn't exist
1816            return _root_and_path_to_node(self.root, path, path)
1817
1818        # Label reference (&foo).
1819
1820        # label2node hasn't been filled in yet, and using it would get messy
1821        # when nodes are deleted
1822        for node in self.node_iter():
1823            if s in node.labels:
1824                return node
1825
1826        _err(f"undefined node label '{s}'")
1827
1828    #
1829    # Post-processing
1830    #
1831
1832    def _register_phandles(self):
1833        # Registers any manually-inserted phandle properties in
1834        # self.phandle2node, so that we can avoid allocating any phandles from
1835        # that set. Also checks the format of the phandles and does misc.
1836        # sanity checking.
1837
1838        for node in self.node_iter():
1839            phandle = node.props.get("phandle")
1840            if phandle:
1841                if len(phandle.value) != 4:
1842                    _err(f"{node.path}: bad phandle length "
1843                         f"({len(phandle.value)}), expected 4 bytes")
1844
1845                is_self_referential = False
1846                for marker in phandle._markers:
1847                    _, marker_type, ref = marker
1848                    if marker_type is _MarkerType.PHANDLE:
1849                        # The phandle's value is itself a phandle reference
1850                        if self._ref2node(ref) is node:
1851                            # Alright to set a node's phandle equal to its own
1852                            # phandle. It'll force a new phandle to be
1853                            # allocated even if the node is otherwise
1854                            # unreferenced.
1855                            is_self_referential = True
1856                            break
1857
1858                        _err(f"{node.path}: {phandle.name} "
1859                             "refers to another node")
1860
1861                # Could put on else on the 'for' above too, but keep it
1862                # somewhat readable
1863                if not is_self_referential:
1864                    phandle_val = int.from_bytes(phandle.value, "big")
1865
1866                    if phandle_val in {0, 0xFFFFFFFF}:
1867                        _err(f"{node.path}: bad value {phandle_val:#010x} "
1868                             f"for {phandle.name}")
1869
1870                    if phandle_val in self.phandle2node:
1871                        _err(f"{node.path}: duplicated phandle {phandle_val:#x} "
1872                             "(seen before at "
1873                             f"{self.phandle2node[phandle_val].path})")
1874
1875                    self.phandle2node[phandle_val] = node
1876
1877    def _fixup_props(self):
1878        # Fills in node path and phandle references in property values, and
1879        # registers labels within values. This must be done after parsing,
1880        # since forwards references are allowed and nodes and properties might
1881        # be deleted.
1882
1883        for node in self.node_iter():
1884            # The tuple() avoids a 'dictionary changed size during iteration'
1885            # error
1886            for prop in tuple(node.props.values()):
1887                # 'prev_pos' and 'pos' are indices in the unpatched
1888                # property value. The result is built up in 'res'.
1889                prev_pos = 0
1890                res = b""
1891
1892                for marker in prop._markers:
1893                    pos, marker_type, ref = marker
1894
1895                    # Add data before the marker, reading from the unpatched
1896                    # property value
1897                    res += prop.value[prev_pos:pos]
1898
1899                    # Fix the marker offset so that it's correct for the
1900                    # patched property value, for later (not used in this
1901                    # function). The offset might change due to path
1902                    # references, which expand to something like "/foo/bar".
1903                    marker[0] = len(res)
1904
1905                    if marker_type is _MarkerType.LABEL:
1906                        # This is a temporary format so that we can catch
1907                        # duplicate references. prop._label_offset_lst is changed
1908                        # to a dictionary that maps labels to offsets in
1909                        # _register_labels().
1910                        _append_no_dup(prop._label_offset_lst, (ref, len(res)))
1911                    elif marker_type in (_MarkerType.PATH, _MarkerType.PHANDLE):
1912                        # Path or phandle reference
1913                        try:
1914                            ref_node = self._ref2node(ref)
1915                        except DTError as e:
1916                            _err(f"{prop.node.path}: {e}")
1917
1918                        # For /omit-if-no-ref/
1919                        ref_node._is_referenced = True
1920
1921                        if marker_type is _MarkerType.PATH:
1922                            res += ref_node.path.encode("utf-8") + b'\0'
1923                        else:  # marker_type is PHANDLE
1924                            res += self._node_phandle(ref_node, prop)
1925                            # Skip over the dummy phandle placeholder
1926                            pos += 4
1927
1928                    prev_pos = pos
1929
1930                # Store the final fixed-up value. Add the data after the last
1931                # marker.
1932                prop.value = res + prop.value[prev_pos:]
1933
1934    def _register_aliases(self):
1935        # Registers aliases from the /aliases node in self.alias2node. Also
1936        # checks the format of the alias properties.
1937
1938        # We copy this to self.alias2node at the end to avoid get_node()
1939        # looking up paths via other aliases while verifying aliases
1940        alias2node = {}
1941
1942        alias_re = re.compile("[0-9a-z-]+$")
1943
1944        aliases = self.root.nodes.get("aliases")
1945        if aliases:
1946            for prop in aliases.props.values():
1947                if not alias_re.match(prop.name):
1948                    _err(f"/aliases: alias property name '{prop.name}' "
1949                         "should include only characters from [0-9a-z-]")
1950
1951                # Property.to_path() checks that the node exists, has
1952                # the right type, etc. Swallow errors for invalid
1953                # aliases with self._force.
1954                try:
1955                    alias2node[prop.name] = prop.to_path()
1956                except DTError:
1957                    if self._force:
1958                        continue
1959                    raise
1960
1961        self.alias2node = alias2node
1962
1963    def _remove_unreferenced(self):
1964        # Removes any unreferenced nodes marked with /omit-if-no-ref/ from the
1965        # tree
1966
1967        # tuple() is to avoid 'RuntimeError: dictionary changed size during
1968        # iteration' errors
1969        for node in tuple(self.node_iter()):
1970            if node._omit_if_no_ref and not node._is_referenced:
1971                node._del()
1972
1973    def _register_labels(self):
1974        # Checks for duplicate labels and registers labels in label2node,
1975        # label2prop, and label2prop_offset
1976
1977        label2things = collections.defaultdict(set)
1978
1979        # Register all labels and the nodes/props they point to in label2things
1980        for node in self.node_iter():
1981            for label in node.labels:
1982                label2things[label].add(node)
1983                self.label2node[label] = node
1984
1985            for prop in node.props.values():
1986                for label in prop.labels:
1987                    label2things[label].add(prop)
1988                    self.label2prop[label] = prop
1989
1990                for label, offset in prop._label_offset_lst:
1991                    label2things[label].add((prop, offset))
1992                    self.label2prop_offset[label] = (prop, offset)
1993
1994                # See _fixup_props()
1995                prop.offset_labels = dict(prop._label_offset_lst)
1996
1997        for label, things in label2things.items():
1998            if len(things) > 1:
1999                strings = []
2000                for thing in things:
2001                    if isinstance(thing, Node):
2002                        strings.append(f"on {thing.path}")
2003                    elif isinstance(thing, Property):
2004                        strings.append(f"on property '{thing.name}' "
2005                                       f"of node {thing.node.path}")
2006                    else:
2007                        # Label within property value
2008                        strings.append("in the value of property "
2009                                       f"'{thing[0].name}' of node "
2010                                       f"{thing[0].node.path}")
2011
2012                # Give consistent error messages to help with testing
2013                strings.sort()
2014
2015                _err(f"Label '{label}' appears " + " and ".join(strings))
2016
2017
2018    #
2019    # Misc.
2020    #
2021
2022    def _unescape(self, b):
2023        # Replaces backslash escapes in the 'bytes' array 'b'. We can't do this at
2024        # the string level, because the result might not be valid UTF-8 when
2025        # octal/hex escapes are involved.
2026
2027        def sub(match):
2028            esc = match.group(1)
2029            if esc == b"a":
2030                return b"\a"
2031            if esc == b"b":
2032                return b"\b"
2033            if esc == b"t":
2034                return b"\t"
2035            if esc == b"n":
2036                return b"\n"
2037            if esc == b"v":
2038                return b"\v"
2039            if esc == b"f":
2040                return b"\f"
2041            if esc == b"r":
2042                return b"\r"
2043
2044            if esc[0] in b"01234567":
2045                # Octal escape
2046                try:
2047                    return int(esc, 8).to_bytes(1, "big")
2048                except OverflowError:
2049                    self._parse_error("octal escape out of range (> 255)")
2050
2051            if esc[0] == ord("x") and len(esc) > 1:
2052                # Hex escape
2053                return int(esc[1:], 16).to_bytes(1, "big")
2054
2055            # Return <char> as-is for other \<char>
2056            return esc[0].to_bytes(1, "big")
2057
2058        return _unescape_re.sub(sub, b)
2059
2060    def _open(self, filename, mode="r", **kwargs):
2061        # Wrapper around standard Python open(), accepting the same params.
2062        # But searches for a 'filename' file in the directory of the current
2063        # file and the include path.
2064
2065        # The C tools support specifying stdin with '-' too
2066        if filename == "-":
2067            return sys.stdin.buffer if "b" in mode else sys.stdin
2068
2069        # Try the directory of the current file first
2070        dirname = os.path.dirname(self.filename)
2071        try:
2072            return open(os.path.join(dirname, filename), mode, **kwargs)
2073        except OSError as e:
2074            if e.errno != errno.ENOENT:
2075                self._parse_error(e)
2076
2077            # Try each directory from the include path
2078            for path in self._include_path:
2079                try:
2080                    return open(os.path.join(path, filename), mode, **kwargs)
2081                except OSError as e:
2082                    if e.errno != errno.ENOENT:
2083                        self._parse_error(e)
2084                    continue
2085
2086            self._parse_error(f"'{filename}' could not be found")
2087
2088#
2089# Public functions
2090#
2091
2092def to_num(data: bytes, length: Optional[int] = None,
2093           signed: bool = False) -> int:
2094    """
2095    Converts the 'bytes' array 'data' to a number. The value is expected to be
2096    in big-endian format, which is standard in devicetree.
2097
2098    length (default: None):
2099      The expected length of the value in bytes, as a simple type check. If
2100      None, the length check is skipped.
2101
2102    signed (default: False):
2103      If True, the value will be interpreted as signed rather than unsigned.
2104    """
2105    _check_is_bytes(data)
2106    if length is not None:
2107        _check_length_positive(length)
2108        if len(data) != length:
2109            _err(f"{data!r} is {len(data)} bytes long, expected {length}")
2110
2111    return int.from_bytes(data, "big", signed=signed)
2112
2113def to_nums(data: bytes, length: int = 4, signed: bool = False) -> list[int]:
2114    """
2115    Like Property.to_nums(), but takes an arbitrary 'bytes' array. The values
2116    are assumed to be in big-endian format, which is standard in devicetree.
2117    """
2118    _check_is_bytes(data)
2119    _check_length_positive(length)
2120
2121    if len(data) % length:
2122        _err(f"{data!r} is {len(data)} bytes long, "
2123             f"expected a length that's a multiple of {length}")
2124
2125    return [int.from_bytes(data[i:i + length], "big", signed=signed)
2126            for i in range(0, len(data), length)]
2127
2128#
2129# Private helpers
2130#
2131
2132def _check_is_bytes(data):
2133    if not isinstance(data, bytes):
2134        _err(f"'{data}' has type '{type(data).__name__}', expected 'bytes'")
2135
2136def _check_length_positive(length):
2137    if length < 1:
2138        _err("'length' must be greater than zero, was " + str(length))
2139
2140def _append_no_dup(lst, elm):
2141    # Appends 'elm' to 'lst', but only if it isn't already in 'lst'. Lets us
2142    # preserve order, which a set() doesn't.
2143
2144    if elm not in lst:
2145        lst.append(elm)
2146
2147def _decode_and_escape(b):
2148    # Decodes the 'bytes' array 'b' as UTF-8 and backslash-escapes special
2149    # characters
2150
2151    # Hacky but robust way to avoid double-escaping any '\' spit out by
2152    # 'backslashreplace' bytes.translate() can't map to more than a single
2153    # byte, but str.translate() can map to more than one character, so it's
2154    # nice here. There's probably a nicer way to do this.
2155    return (b.decode("utf-8", "surrogateescape")
2156            .translate(_escape_table)
2157            .encode("utf-8", "surrogateescape")
2158            .decode("utf-8", "backslashreplace"))
2159
2160def _root_and_path_to_node(cur, path, fullpath):
2161    # Returns the node pointed at by 'path', relative to the Node 'cur'. For
2162    # example, if 'cur' has path /foo/bar, and 'path' is "baz/qaz", then the
2163    # node with path /foo/bar/baz/qaz is returned. 'fullpath' is the path as
2164    # given in the .dts file, for error messages.
2165
2166    for component in path.split("/"):
2167        # Collapse multiple / in a row, and allow a / at the end
2168        if not component:
2169            continue
2170
2171        if component not in cur.nodes:
2172            _err(f"component '{component}' in path '{fullpath}' "
2173                 "does not exist")
2174
2175        cur = cur.nodes[component]
2176
2177    return cur
2178
2179def _err(msg) -> NoReturn:
2180    raise DTError(msg)
2181
2182_escape_table = str.maketrans({
2183    "\\": "\\\\",
2184    '"': '\\"',
2185    "\a": "\\a",
2186    "\b": "\\b",
2187    "\t": "\\t",
2188    "\n": "\\n",
2189    "\v": "\\v",
2190    "\f": "\\f",
2191    "\r": "\\r"})
2192
2193# Lexer states
2194_DEFAULT = 0
2195_EXPECT_PROPNODENAME = 1
2196_EXPECT_BYTE = 2
2197
2198_num_re = re.compile(r"(0[xX][0-9a-fA-F]+|[0-9]+)(?:ULL|UL|LL|U|L)?")
2199
2200# A leading \ is allowed property and node names, probably to allow weird node
2201# names that would clash with other stuff
2202_propnodename_re = re.compile(r"\\?([a-zA-Z0-9,._+*#?@-]+)")
2203
2204# Node names are more restrictive than property names.
2205_nodename_chars = set(string.ascii_letters + string.digits + ',._+-@')
2206
2207# Misc. tokens that are tried after a property/node name. This is important, as
2208# there's overlap with the allowed characters in names.
2209_misc_re = re.compile(
2210    "|".join(re.escape(pat) for pat in (
2211        "==", "!=", "!", "=", ",", ";", "+", "-", "*", "/", "%", "~", "?", ":",
2212        "^", "(", ")", "{", "}", "[", "]", "<<", "<=", "<", ">>", ">=", ">",
2213        "||", "|", "&&", "&")))
2214
2215_byte_re = re.compile(r"[0-9a-fA-F]{2}")
2216
2217# Matches a backslash escape within a 'bytes' array. Captures the 'c' part of
2218# '\c', where c might be a single character or an octal/hex escape.
2219_unescape_re = re.compile(br'\\([0-7]{1,3}|x[0-9A-Fa-f]{1,2}|.)')
2220
2221def _init_tokens():
2222    # Builds a (<token 1>)|(<token 2>)|... regex and returns it. The
2223    # way this is constructed makes the token's value as an int appear
2224    # in match.lastindex after a match.
2225
2226    # Each pattern must have exactly one capturing group, which can capture any
2227    # part of the pattern. This makes match.lastindex match the token type.
2228    # _Token.val is based on the captured string.
2229    token_spec = {
2230        _T.INCLUDE: r'(/include/\s*"(?:[^\\"]|\\.)*")',
2231        # #line directive or GCC linemarker
2232        _T.LINE:
2233        r'^#(?:line)?[ \t]+([0-9]+[ \t]+"(?:[^\\"]|\\.)*")(?:[ \t]+[0-9]+){0,4}',
2234
2235        _T.STRING: r'"((?:[^\\"]|\\.)*)"',
2236        _T.DTS_V1: r"(/dts-v1/)",
2237        _T.PLUGIN: r"(/plugin/)",
2238        _T.MEMRESERVE: r"(/memreserve/)",
2239        _T.BITS: r"(/bits/)",
2240        _T.DEL_PROP: r"(/delete-property/)",
2241        _T.DEL_NODE: r"(/delete-node/)",
2242        _T.OMIT_IF_NO_REF: r"(/omit-if-no-ref/)",
2243        _T.LABEL: r"([a-zA-Z_][a-zA-Z0-9_]*):",
2244        _T.CHAR_LITERAL: r"'((?:[^\\']|\\.)*)'",
2245        _T.REF: r"&([a-zA-Z_][a-zA-Z0-9_]*|{[a-zA-Z0-9,._+*#?@/-]*})",
2246        _T.INCBIN: r"(/incbin/)",
2247        # Whitespace, C comments, and C++ comments
2248        _T.SKIP: r"(\s+|(?:/\*(?:.|\n)*?\*/)|//.*$)",
2249        # Return a token for end-of-file so that the parsing code can
2250        # always assume that there are more tokens when looking
2251        # ahead. This simplifies things.
2252        _T.EOF: r"(\Z)",
2253    }
2254
2255    # MULTILINE is needed for C++ comments and #line directives
2256    return re.compile("|".join(token_spec[tok_id] for tok_id in
2257                               range(1, _T.EOF + 1)),
2258                      re.MULTILINE | re.ASCII)
2259
2260_token_re = _init_tokens()
2261
2262_TYPE_TO_N_BYTES = {
2263    _MarkerType.UINT8: 1,
2264    _MarkerType.UINT16: 2,
2265    _MarkerType.UINT32: 4,
2266    _MarkerType.UINT64: 8,
2267}
2268
2269_N_BYTES_TO_TYPE = {
2270    1: _MarkerType.UINT8,
2271    2: _MarkerType.UINT16,
2272    4: _MarkerType.UINT32,
2273    8: _MarkerType.UINT64,
2274}
2275
2276_N_BYTES_TO_START_STR = {
2277    1: " [",
2278    2: " /bits/ 16 <",
2279    4: " <",
2280    8: " /bits/ 64 <",
2281}
2282
2283_N_BYTES_TO_END_STR = {
2284    1: " ]",
2285    2: " >",
2286    4: " >",
2287    8: " >",
2288}
2289