1#!/usr/bin/env python3
2#
3# Copyright (c) 2017 Intel Corporation
4#
5# SPDX-License-Identifier: Apache-2.0
6
7"""
8Script to scan Zephyr include directories and emit system call and subsystem metadata
9
10System calls require a great deal of boilerplate code in order to implement
11completely. This script is the first step in the build system's process of
12auto-generating this code by doing a text scan of directories containing
13C or header files, and building up a database of system calls and their
14function call prototypes. This information is emitted to a generated
15JSON file for further processing.
16
17This script also scans for struct definitions such as __subsystem and
18__net_socket, emitting a JSON dictionary mapping tags to all the struct
19declarations found that were tagged with them.
20
21If the output JSON file already exists, its contents are checked against
22what information this script would have outputted; if the result is that the
23file would be unchanged, it is not modified to prevent unnecessary
24incremental builds.
25"""
26
27import sys
28import re
29import argparse
30import os
31import json
32from pathlib import PurePath
33
34regex_flags = re.MULTILINE | re.VERBOSE
35
36syscall_regex = re.compile(r'''
37(?:__syscall|__syscall_always_inline)\s+   # __syscall attribute, must be first
38([^(]+)                                    # type and name of system call (split later)
39[(]                                        # Function opening parenthesis
40([^)]*)                                    # Arg list (split later)
41[)]                                        # Closing parenthesis
42''', regex_flags)
43
44struct_tags = ["__subsystem", "__net_socket"]
45
46tagged_struct_decl_template = r'''
47%s\s+                           # tag, must be first
48struct\s+                       # struct keyword is next
49([^{]+)                         # name of subsystem
50[{]                             # Open curly bracket
51'''
52
53def tagged_struct_update(target_list, tag, contents):
54    regex = re.compile(tagged_struct_decl_template % tag, regex_flags)
55    items = [mo.groups()[0].strip() for mo in regex.finditer(contents)]
56    target_list.extend(items)
57
58
59def analyze_headers(include_dir, scan_dir, file_list):
60    syscall_ret = []
61    tagged_ret = {}
62
63    for tag in struct_tags:
64        tagged_ret[tag] = []
65
66    syscall_files = dict()
67
68    # Get the list of header files which contains syscalls to be emitted.
69    # If file_list does not exist, we emit all syscalls.
70    if file_list:
71        with open(file_list, "r", encoding="utf-8") as fp:
72            contents = fp.read()
73
74            for one_file in contents.split(";"):
75                if os.path.isfile(one_file):
76                    syscall_files[one_file] = {"emit": True}
77                else:
78                    sys.stderr.write(f"{one_file} does not exists!\n")
79                    sys.exit(1)
80
81    multiple_directories = set()
82    if include_dir:
83        multiple_directories |= set(include_dir)
84    if scan_dir:
85        multiple_directories |= set(scan_dir)
86
87    # Convert to a list to keep the output deterministic
88    multiple_directories = sorted(multiple_directories)
89
90    # Look for source files under various directories.
91    # Due to "syscalls/*.h" being included unconditionally in various
92    # other header files. We must generate the associated syscall
93    # header files (e.g. for function stubs).
94    for base_path in multiple_directories:
95        for root, dirs, files in os.walk(base_path, topdown=True):
96            dirs.sort()
97            files.sort()
98            for fn in files:
99
100                # toolchain/common.h has the definitions of these tags which we
101                # don't want to trip over
102                path = os.path.join(root, fn)
103                if (not (path.endswith(".h") or path.endswith(".c")) or
104                        path.endswith(os.path.join(os.sep, 'toolchain',
105                                                   'common.h'))):
106                    continue
107
108                path = PurePath(os.path.normpath(path)).as_posix()
109
110                if path not in syscall_files:
111                    if include_dir and base_path in include_dir:
112                        syscall_files[path] = {"emit" : True}
113                    else:
114                        syscall_files[path] = {"emit" : False}
115
116    # Parse files to extract syscall functions
117    for one_file in syscall_files:
118        with open(one_file, "r", encoding="utf-8") as fp:
119            try:
120                contents = fp.read()
121            except Exception:
122                sys.stderr.write("Error decoding %s (included in %s)\n" % (one_file, path))
123                raise
124
125        fn = os.path.basename(one_file)
126
127        try:
128            to_emit = syscall_files[one_file]["emit"] | args.emit_all_syscalls
129
130            syscall_result = [(mo.groups(), fn, to_emit)
131                              for mo in syscall_regex.finditer(contents)]
132            for tag in struct_tags:
133                tagged_struct_update(tagged_ret[tag], tag, contents)
134        except Exception:
135            sys.stderr.write("While parsing %s\n" % fn)
136            raise
137
138        syscall_ret.extend(syscall_result)
139
140    return syscall_ret, tagged_ret
141
142
143def update_file_if_changed(path, new):
144    if os.path.exists(path):
145        with open(path, 'r') as fp:
146            old = fp.read()
147
148        if new != old:
149            with open(path, 'w') as fp:
150                fp.write(new)
151    else:
152        with open(path, 'w') as fp:
153            fp.write(new)
154
155
156def parse_args():
157    global args
158    parser = argparse.ArgumentParser(
159        description=__doc__,
160        formatter_class=argparse.RawDescriptionHelpFormatter, allow_abbrev=False)
161
162    parser.add_argument(
163        "-i", "--include", required=False, action="append",
164        help="Include directories recursively scanned for .h files "
165             "containing syscalls that must be present in final binary. "
166             "Can be specified multiple times: -i topdir1 -i topdir2 ...")
167    parser.add_argument(
168        "--scan", required=False, action="append",
169        help="Scan directories recursively for .h files containing "
170             "syscalls that need stubs generated but may not need to "
171             "be present in final binary. Can be specified multiple "
172             "times.")
173    parser.add_argument(
174        "-j", "--json-file", required=True,
175        help="Write system call prototype information as json to file")
176    parser.add_argument(
177        "-t", "--tag-struct-file", required=True,
178        help="Write tagged struct name information as json to file")
179    parser.add_argument(
180        "--file-list", required=False,
181        help="Text file containing semi-colon separated list of "
182             "header file where only syscalls in these files "
183             "are emitted.")
184    parser.add_argument(
185        "--emit-all-syscalls", required=False, action="store_true",
186        help="Emit all potential syscalls in the tree")
187
188    args = parser.parse_args()
189
190
191def main():
192    parse_args()
193
194    syscalls, tagged = analyze_headers(args.include, args.scan,
195                                       args.file_list)
196
197    # Only write json files if they don't exist or have changes since
198    # they will force an incremental rebuild.
199
200    syscalls_in_json = json.dumps(
201        syscalls,
202        indent=4,
203        sort_keys=True
204    )
205    update_file_if_changed(args.json_file, syscalls_in_json)
206
207    tagged_struct_in_json = json.dumps(
208        tagged,
209        indent=4,
210        sort_keys=True
211    )
212    update_file_if_changed(args.tag_struct_file, tagged_struct_in_json)
213
214
215if __name__ == "__main__":
216    main()
217