1#!/usr/bin/env python3
2#
3# Copyright (c) 2017 Intel Corporation
4#
5# SPDX-License-Identifier: Apache-2.0
6
7"""
8Script to scan Zephyr include directories and emit system call and subsystem metadata
9
10System calls require a great deal of boilerplate code in order to implement
11completely. This script is the first step in the build system's process of
12auto-generating this code by doing a text scan of directories containing
13C or header files, and building up a database of system calls and their
14function call prototypes. This information is emitted to a generated
15JSON file for further processing.
16
17This script also scans for struct definitions such as __subsystem and
18__net_socket, emitting a JSON dictionary mapping tags to all the struct
19declarations found that were tagged with them.
20
21If the output JSON file already exists, its contents are checked against
22what information this script would have outputted; if the result is that the
23file would be unchanged, it is not modified to prevent unnecessary
24incremental builds.
25"""
26
27import argparse
28import json
29import os
30import re
31import sys
32from pathlib import PurePath
33
34regex_flags = re.MULTILINE | re.VERBOSE
35
36syscall_regex = re.compile(
37    r'''
38(?:__syscall|__syscall_always_inline)\s+   # __syscall attribute, must be first
39([^(]+)                                    # type and name of system call (split later)
40[(]                                        # Function opening parenthesis
41([^)]*)                                    # Arg list (split later)
42[)]                                        # Closing parenthesis
43''',
44    regex_flags,
45)
46
47struct_tags = ["__subsystem", "__net_socket"]
48
49tagged_struct_decl_template = r'''
50%s\s+                           # tag, must be first
51struct\s+                       # struct keyword is next
52([^{]+)                         # name of subsystem
53[{]                             # Open curly bracket
54'''
55
56
57def tagged_struct_update(target_list, tag, contents):
58    regex = re.compile(tagged_struct_decl_template % tag, regex_flags)
59    items = [mo.groups()[0].strip() for mo in regex.finditer(contents)]
60    target_list.extend(items)
61
62
63def analyze_headers(include_dir, scan_dir, file_list):
64    syscall_ret = []
65    tagged_ret = {}
66
67    for tag in struct_tags:
68        tagged_ret[tag] = []
69
70    syscall_files = dict()
71
72    # Get the list of header files which contains syscalls to be emitted.
73    # If file_list does not exist, we emit all syscalls.
74    if file_list:
75        with open(file_list, encoding="utf-8") as fp:
76            contents = fp.read()
77
78            for one_file in contents.split(";"):
79                if os.path.isfile(one_file):
80                    syscall_files[one_file] = {"emit": True}
81                else:
82                    sys.stderr.write(f"{one_file} does not exists!\n")
83                    sys.exit(1)
84
85    multiple_directories = set()
86    if include_dir:
87        multiple_directories |= set(include_dir)
88    if scan_dir:
89        multiple_directories |= set(scan_dir)
90
91    # Convert to a list to keep the output deterministic
92    multiple_directories = sorted(multiple_directories)
93
94    # Look for source files under various directories.
95    # Due to "syscalls/*.h" being included unconditionally in various
96    # other header files. We must generate the associated syscall
97    # header files (e.g. for function stubs).
98    for base_path in multiple_directories:
99        for root, dirs, files in os.walk(base_path, topdown=True):
100            dirs.sort()
101            files.sort()
102            for fn in files:
103                # toolchain/common.h has the definitions of these tags which we
104                # don't want to trip over
105                path = os.path.join(root, fn)
106                if not (path.endswith(".h") or path.endswith(".c")) or path.endswith(
107                    os.path.join(os.sep, 'toolchain', 'common.h')
108                ):
109                    continue
110
111                path = PurePath(os.path.normpath(path)).as_posix()
112
113                if path not in syscall_files:
114                    if include_dir and base_path in include_dir:
115                        syscall_files[path] = {"emit": True}
116                    else:
117                        syscall_files[path] = {"emit": False}
118
119    # Parse files to extract syscall functions
120    for one_file in syscall_files:
121        with open(one_file, encoding="utf-8") as fp:
122            try:
123                contents = fp.read()
124            except Exception:
125                sys.stderr.write(f"Error decoding {one_file} (included in {path})\n")
126                raise
127
128        fn = os.path.basename(one_file)
129
130        try:
131            to_emit = syscall_files[one_file]["emit"] | args.emit_all_syscalls
132
133            syscall_result = [(mo.groups(), fn, to_emit) for mo in syscall_regex.finditer(contents)]
134            for tag in struct_tags:
135                tagged_struct_update(tagged_ret[tag], tag, contents)
136        except Exception as e:
137            sys.stderr.write(f"While parsing {fn}\n")
138            raise e
139
140        syscall_ret.extend(syscall_result)
141
142    return syscall_ret, tagged_ret
143
144
145def update_file_if_changed(path, new):
146    if os.path.exists(path):
147        with open(path) as fp:
148            old = fp.read()
149
150        if new != old:
151            with open(path, 'w') as fp:
152                fp.write(new)
153    else:
154        with open(path, 'w') as fp:
155            fp.write(new)
156
157
158def parse_args():
159    global args
160    parser = argparse.ArgumentParser(
161        description=__doc__,
162        formatter_class=argparse.RawDescriptionHelpFormatter,
163        allow_abbrev=False,
164    )
165
166    parser.add_argument(
167        "-i",
168        "--include",
169        required=False,
170        action="append",
171        help="Include directories recursively scanned for .h files "
172        "containing syscalls that must be present in final binary. "
173        "Can be specified multiple times: -i topdir1 -i topdir2 ...",
174    )
175    parser.add_argument(
176        "--scan",
177        required=False,
178        action="append",
179        help="Scan directories recursively for .h files containing "
180        "syscalls that need stubs generated but may not need to "
181        "be present in final binary. Can be specified multiple "
182        "times.",
183    )
184    parser.add_argument(
185        "-j",
186        "--json-file",
187        required=True,
188        help="Write system call prototype information as json to file",
189    )
190    parser.add_argument(
191        "-t",
192        "--tag-struct-file",
193        required=True,
194        help="Write tagged struct name information as json to file",
195    )
196    parser.add_argument(
197        "--file-list",
198        required=False,
199        help="Text file containing semi-colon separated list of "
200        "header file where only syscalls in these files "
201        "are emitted.",
202    )
203    parser.add_argument(
204        "--emit-all-syscalls",
205        required=False,
206        action="store_true",
207        help="Emit all potential syscalls in the tree",
208    )
209
210    args = parser.parse_args()
211
212
213def main():
214    parse_args()
215
216    syscalls, tagged = analyze_headers(args.include, args.scan, args.file_list)
217
218    # Only write json files if they don't exist or have changes since
219    # they will force an incremental rebuild.
220
221    syscalls_in_json = json.dumps(syscalls, indent=4, sort_keys=True)
222    update_file_if_changed(args.json_file, syscalls_in_json)
223
224    tagged_struct_in_json = json.dumps(tagged, indent=4, sort_keys=True)
225    update_file_if_changed(args.tag_struct_file, tagged_struct_in_json)
226
227
228if __name__ == "__main__":
229    main()
230