1"""Helper functions to parse C code in heavily constrained scenarios.
2
3Currently supported functionality:
4
5* read_function_declarations: read function declarations from a header file.
6"""
7
8# Copyright The Mbed TLS Contributors
9# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
10
11### WARNING: the code in this file has not been extensively reviewed yet.
12### We do not think it is harmful, but it may be below our normal standards
13### for robustness and maintainability.
14
15import re
16from typing import Dict, Iterable, Iterator, List, Optional, Tuple
17
18
19class ArgumentInfo:
20    """Information about an argument to an API function."""
21    #pylint: disable=too-few-public-methods
22
23    _KEYWORDS = [
24        'const', 'register', 'restrict',
25        'int', 'long', 'short', 'signed', 'unsigned',
26    ]
27    _DECLARATION_RE = re.compile(
28        r'(?P<type>\w[\w\s*]*?)\s*' +
29        r'(?!(?:' + r'|'.join(_KEYWORDS) + r'))(?P<name>\b\w+\b)?' +
30        r'\s*(?P<suffix>\[[^][]*\])?\Z',
31        re.A | re.S)
32
33    @classmethod
34    def normalize_type(cls, typ: str) -> str:
35        """Normalize whitespace in a type."""
36        typ = re.sub(r'\s+', r' ', typ)
37        typ = re.sub(r'\s*\*', r' *', typ)
38        return typ
39
40    def __init__(self, decl: str) -> None:
41        self.decl = decl.strip()
42        m = self._DECLARATION_RE.match(self.decl)
43        if not m:
44            raise ValueError(self.decl)
45        self.type = self.normalize_type(m.group('type')) #type: str
46        self.name = m.group('name') #type: Optional[str]
47        self.suffix = m.group('suffix') if m.group('suffix') else '' #type: str
48
49
50class FunctionInfo:
51    """Information about an API function."""
52    #pylint: disable=too-few-public-methods
53
54    # Regex matching the declaration of a function that returns void.
55    VOID_RE = re.compile(r'\s*\bvoid\s*\Z', re.A)
56
57    def __init__(self, #pylint: disable=too-many-arguments
58                 filename: str,
59                 line_number: int,
60                 qualifiers: Iterable[str],
61                 return_type: str,
62                 name: str,
63                 arguments: List[str]) -> None:
64        self.filename = filename
65        self.line_number = line_number
66        self.qualifiers = frozenset(qualifiers)
67        self.return_type = return_type
68        self.name = name
69        self.arguments = [ArgumentInfo(arg) for arg in arguments]
70
71    def returns_void(self) -> bool:
72        """Whether the function returns void."""
73        return bool(self.VOID_RE.search(self.return_type))
74
75
76# Match one C comment.
77# Note that we match both comment types, so things like // in a /*...*/
78# comment are handled correctly.
79_C_COMMENT_RE = re.compile(r'//(?:[^\n]|\\\n)*|/\*.*?\*/', re.S)
80_NOT_NEWLINES_RE = re.compile(r'[^\n]+')
81
82def read_logical_lines(filename: str) -> Iterator[Tuple[int, str]]:
83    """Read logical lines from a file.
84
85    Logical lines are one or more physical line, with balanced parentheses.
86    """
87    with open(filename, encoding='utf-8') as inp:
88        content = inp.read()
89    # Strip comments, but keep newlines for line numbering
90    content = re.sub(_C_COMMENT_RE,
91                     lambda m: re.sub(_NOT_NEWLINES_RE, "", m.group(0)),
92                     content)
93    lines = enumerate(content.splitlines(), 1)
94    for line_number, line in lines:
95        # Read a logical line, containing balanced parentheses.
96        # We assume that parentheses are balanced (this should be ok
97        # since comments have been stripped), otherwise there will be
98        # a gigantic logical line at the end.
99        paren_level = line.count('(') - line.count(')')
100        while paren_level > 0:
101            _, more = next(lines) #pylint: disable=stop-iteration-return
102            paren_level += more.count('(') - more.count(')')
103            line += '\n' + more
104        yield line_number, line
105
106_C_FUNCTION_DECLARATION_RE = re.compile(
107    r'(?P<qualifiers>(?:(?:extern|inline|static)\b\s*)*)'
108    r'(?P<return_type>\w[\w\s*]*?)\s*' +
109    r'\b(?P<name>\w+)' +
110    r'\s*\((?P<arguments>.*)\)\s*;',
111    re.A | re.S)
112
113def read_function_declarations(functions: Dict[str, FunctionInfo],
114                               filename: str) -> None:
115    """Collect function declarations from a C header file."""
116    for line_number, line in read_logical_lines(filename):
117        m = _C_FUNCTION_DECLARATION_RE.match(line)
118        if not m:
119            continue
120        qualifiers = m.group('qualifiers').split()
121        return_type = m.group('return_type')
122        name = m.group('name')
123        arguments = m.group('arguments').split(',')
124        if len(arguments) == 1 and re.match(FunctionInfo.VOID_RE, arguments[0]):
125            arguments = []
126        # Note: we replace any existing declaration for the same name.
127        functions[name] = FunctionInfo(filename, line_number,
128                                       qualifiers,
129                                       return_type,
130                                       name,
131                                       arguments)
132