1#!/usr/bin/env python3
2# SPDX-License-Identifier: Apache-2.0
3# Copyright (c) 2021 Intel Corporation
4
5# A script to generate twister options based on modified files.
6
7import re, os
8import argparse
9import yaml
10import fnmatch
11import subprocess
12import json
13import logging
14import sys
15import glob
16from pathlib import Path
17from git import Repo
18from west.manifest import Manifest
19
20try:
21    from yaml import CSafeLoader as SafeLoader
22except ImportError:
23    from yaml import SafeLoader
24
25if "ZEPHYR_BASE" not in os.environ:
26    exit("$ZEPHYR_BASE environment variable undefined.")
27
28# These are globaly used variables. They are assigned in __main__ and are visible in further methods
29# however, pylint complains that it doesn't recognized them when used (used-before-assignment).
30zephyr_base = Path(os.environ['ZEPHYR_BASE'])
31repository_path = zephyr_base
32repo_to_scan = Repo(zephyr_base)
33args = None
34logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.INFO)
35logging.getLogger("pykwalify.core").setLevel(50)
36
37sys.path.append(os.path.join(zephyr_base, 'scripts'))
38import list_boards
39
40
41def _get_match_fn(globs, regexes):
42    # Constructs a single regex that tests for matches against the globs in
43    # 'globs' and the regexes in 'regexes'. Parts are joined with '|' (OR).
44    # Returns the search() method of the compiled regex.
45    #
46    # Returns None if there are neither globs nor regexes, which should be
47    # interpreted as no match.
48
49    if not (globs or regexes):
50        return None
51
52    regex = ""
53
54    if globs:
55        glob_regexes = []
56        for glob in globs:
57            # Construct a regex equivalent to the glob
58            glob_regex = glob.replace(".", "\\.").replace("*", "[^/]*") \
59                             .replace("?", "[^/]")
60
61            if not glob.endswith("/"):
62                # Require a full match for globs that don't end in /
63                glob_regex += "$"
64
65            glob_regexes.append(glob_regex)
66
67        # The glob regexes must anchor to the beginning of the path, since we
68        # return search(). (?:) is a non-capturing group.
69        regex += "^(?:{})".format("|".join(glob_regexes))
70
71    if regexes:
72        if regex:
73            regex += "|"
74        regex += "|".join(regexes)
75
76    return re.compile(regex).search
77
78class Tag:
79    """
80    Represents an entry for a tag in tags.yaml.
81
82    These attributes are available:
83
84    name:
85        List of GitHub labels for the area. Empty if the area has no 'labels'
86        key.
87
88    description:
89        Text from 'description' key, or None if the area has no 'description'
90        key
91    """
92    def _contains(self, path):
93        # Returns True if the area contains 'path', and False otherwise
94
95        return self._match_fn and self._match_fn(path) and not \
96            (self._exclude_match_fn and self._exclude_match_fn(path))
97
98    def __repr__(self):
99        return "<Tag {}>".format(self.name)
100
101class Filters:
102    def __init__(self, modified_files, ignore_path, alt_tags, testsuite_root,
103                 pull_request=False, platforms=[], detailed_test_id=True, quarantine_list=None, tc_roots_th=20):
104        self.modified_files = modified_files
105        self.testsuite_root = testsuite_root
106        self.resolved_files = []
107        self.twister_options = []
108        self.full_twister = False
109        self.all_tests = []
110        self.tag_options = []
111        self.pull_request = pull_request
112        self.platforms = platforms
113        self.detailed_test_id = detailed_test_id
114        self.ignore_path = ignore_path
115        self.tag_cfg_file = alt_tags
116        self.quarantine_list = quarantine_list
117        self.tc_roots_th = tc_roots_th
118
119    def process(self):
120        self.find_modules()
121        self.find_tags()
122        self.find_tests()
123        if not self.platforms:
124            self.find_archs()
125            self.find_boards()
126        self.find_excludes()
127
128    def get_plan(self, options, integration=False, use_testsuite_root=True):
129        fname = "_test_plan_partial.json"
130        cmd = [f"{zephyr_base}/scripts/twister", "-c"] + options + ["--save-tests", fname ]
131        if not self.detailed_test_id:
132            cmd += ["--no-detailed-test-id"]
133        if self.testsuite_root and use_testsuite_root:
134            for root in self.testsuite_root:
135                cmd+=["-T", root]
136        if integration:
137            cmd.append("--integration")
138        if self.quarantine_list:
139            for q in self.quarantine_list:
140                cmd += ["--quarantine-list", q]
141
142        logging.info(" ".join(cmd))
143        _ = subprocess.call(cmd)
144        with open(fname, newline='') as jsonfile:
145            json_data = json.load(jsonfile)
146            suites = json_data.get("testsuites", [])
147            self.all_tests.extend(suites)
148        if os.path.exists(fname):
149            os.remove(fname)
150
151    def find_modules(self):
152        if 'west.yml' in self.modified_files and args.commits is not None:
153            print(f"Manifest file 'west.yml' changed")
154            print("=========")
155            old_manifest_content = repo_to_scan.git.show(f"{args.commits[:-2]}:west.yml")
156            with open("west_old.yml", "w") as manifest:
157                manifest.write(old_manifest_content)
158            old_manifest = Manifest.from_file("west_old.yml")
159            new_manifest = Manifest.from_file("west.yml")
160            old_projs = set((p.name, p.revision) for p in old_manifest.projects)
161            new_projs = set((p.name, p.revision) for p in new_manifest.projects)
162            logging.debug(f'old_projs: {old_projs}')
163            logging.debug(f'new_projs: {new_projs}')
164            # Removed projects
165            rprojs = set(filter(lambda p: p[0] not in list(p[0] for p in new_projs),
166                old_projs - new_projs))
167            # Updated projects
168            uprojs = set(filter(lambda p: p[0] in list(p[0] for p in old_projs),
169                new_projs - old_projs))
170            # Added projects
171            aprojs = new_projs - old_projs - uprojs
172
173            # All projs
174            projs = rprojs | uprojs | aprojs
175            projs_names = [name for name, rev in projs]
176
177            logging.info(f'rprojs: {rprojs}')
178            logging.info(f'uprojs: {uprojs}')
179            logging.info(f'aprojs: {aprojs}')
180            logging.info(f'project: {projs_names}')
181
182            _options = []
183            for p in projs_names:
184                _options.extend(["-t", p ])
185
186            if self.platforms:
187                for platform in self.platforms:
188                    _options.extend(["-p", platform])
189
190            self.get_plan(_options, True)
191
192
193    def find_archs(self):
194        # we match both arch/<arch>/* and include/zephyr/arch/<arch> and skip common.
195        archs = set()
196
197        for f in self.modified_files:
198            p = re.match(r"^arch\/([^/]+)\/", f)
199            if not p:
200                p = re.match(r"^include\/zephyr\/arch\/([^/]+)\/", f)
201            if p:
202                if p.group(1) != 'common':
203                    archs.add(p.group(1))
204                    # Modified file is treated as resolved, since a matching scope was found
205                    self.resolved_files.append(f)
206
207        _options = []
208        for arch in archs:
209            _options.extend(["-a", arch ])
210
211        if _options:
212            logging.info(f'Potential architecture filters...')
213            if self.platforms:
214                for platform in self.platforms:
215                    _options.extend(["-p", platform])
216
217                self.get_plan(_options, True)
218            else:
219                self.get_plan(_options, True)
220
221    def find_boards(self):
222        changed_boards = set()
223        matched_boards = {}
224        resolved_files = []
225
226        for file in self.modified_files:
227            if file.endswith(".rst") or file.endswith(".png") or file.endswith(".jpg"):
228                continue
229            if file.startswith("boards/"):
230                changed_boards.add(file)
231                resolved_files.append(file)
232
233        roots = [zephyr_base]
234        if repository_path != zephyr_base:
235            roots.append(repository_path)
236
237        # Look for boards in monitored repositories
238        lb_args = argparse.Namespace(**{'arch_roots': roots, 'board_roots': roots, 'board': None, 'soc_roots':roots,
239                                        'board_dir': None})
240        known_boards = list_boards.find_v2_boards(lb_args)
241
242        for changed in changed_boards:
243            for board in known_boards:
244                c = (zephyr_base / changed).resolve()
245                if c.is_relative_to(board.dir.resolve()):
246                    for file in glob.glob(os.path.join(board.dir, f"{board.name}*.yaml")):
247                        with open(file, 'r') as f:
248                            b = yaml.load(f.read(), Loader=SafeLoader)
249                            matched_boards[b['identifier']] = board
250
251
252        logging.info(f"found boards: {','.join(matched_boards.keys())}")
253        # If modified file is caught by "find_boards" workflow (change in "boards" dir AND board recognized)
254        # it means a proper testing scope for this file was found and this file can be removed
255        # from further consideration
256        for _, board in matched_boards.items():
257            self.resolved_files.extend(list(filter(lambda f: str(board.dir.relative_to(zephyr_base)) in f, resolved_files)))
258
259        _options = []
260        if len(matched_boards) > 20:
261            logging.warning(f"{len(matched_boards)} boards changed, this looks like a global change, skipping test handling, revert to default.")
262            self.full_twister = True
263            return
264
265        for board in matched_boards:
266            _options.extend(["-p", board ])
267
268        if _options:
269            logging.info(f'Potential board filters...')
270            self.get_plan(_options)
271
272    def find_tests(self):
273        tests = set()
274        for f in self.modified_files:
275            if f.endswith(".rst"):
276                continue
277            d = os.path.dirname(f)
278            scope_found = False
279            while not scope_found and d:
280                head, tail = os.path.split(d)
281                if os.path.exists(os.path.join(d, "testcase.yaml")) or \
282                    os.path.exists(os.path.join(d, "sample.yaml")):
283                    tests.add(d)
284                    # Modified file is treated as resolved, since a matching scope was found
285                    self.resolved_files.append(f)
286                    scope_found = True
287                elif tail == "common":
288                    # Look for yamls in directories collocated with common
289
290                    yamls_found = [yaml for yaml in glob.iglob(head + '/**/testcase.yaml', recursive=True)]
291                    yamls_found.extend([yaml for yaml in glob.iglob(head + '/**/sample.yaml', recursive=True)])
292                    if yamls_found:
293                        for yaml in yamls_found:
294                            tests.add(os.path.dirname(yaml))
295                        self.resolved_files.append(f)
296                        scope_found = True
297                    else:
298                        d = os.path.dirname(d)
299                else:
300                    d = os.path.dirname(d)
301
302        _options = []
303        for t in tests:
304            _options.extend(["-T", t ])
305
306        if len(tests) > self.tc_roots_th:
307            logging.warning(f"{len(tests)} tests changed, this looks like a global change, skipping test handling, revert to default")
308            self.full_twister = True
309            return
310
311        if _options:
312            logging.info(f'Potential test filters...({len(tests)} changed...)')
313            if self.platforms:
314                for platform in self.platforms:
315                    _options.extend(["-p", platform])
316            self.get_plan(_options, use_testsuite_root=False)
317
318    def find_tags(self):
319
320        with open(self.tag_cfg_file, 'r') as ymlfile:
321            tags_config = yaml.safe_load(ymlfile)
322
323        tags = {}
324        for t,x in tags_config.items():
325            tag = Tag()
326            tag.exclude = True
327            tag.name = t
328
329            # tag._match_fn(path) tests if the path matches files and/or
330            # files-regex
331            tag._match_fn = _get_match_fn(x.get("files"), x.get("files-regex"))
332
333            # Like tag._match_fn(path), but for files-exclude and
334            # files-regex-exclude
335            tag._exclude_match_fn = \
336                _get_match_fn(x.get("files-exclude"), x.get("files-regex-exclude"))
337
338            tags[tag.name] = tag
339
340        for f in self.modified_files:
341            for t in tags.values():
342                if t._contains(f):
343                    t.exclude = False
344
345        exclude_tags = set()
346        for t in tags.values():
347            if t.exclude:
348                exclude_tags.add(t.name)
349
350        for tag in exclude_tags:
351            self.tag_options.extend(["-e", tag ])
352
353        if exclude_tags:
354            logging.info(f'Potential tag based filters: {exclude_tags}')
355
356    def find_excludes(self, skip=[]):
357        with open(self.ignore_path, "r") as twister_ignore:
358            ignores = twister_ignore.read().splitlines()
359            ignores = filter(lambda x: not x.startswith("#"), ignores)
360
361        found = set()
362        files_not_resolved = list(filter(lambda x: x not in self.resolved_files, self.modified_files))
363
364        for pattern in ignores:
365            if pattern:
366                found.update(fnmatch.filter(files_not_resolved, pattern))
367
368        logging.debug(found)
369        logging.debug(files_not_resolved)
370
371        # Full twister run can be ordered by detecting great number of tests/boards changed
372        # or if not all modified files were resolved (corresponding scope found)
373        self.full_twister = self.full_twister or sorted(files_not_resolved) != sorted(found)
374
375        if self.full_twister:
376            _options = []
377            logging.info(f'Need to run full or partial twister...')
378            if self.platforms:
379                for platform in self.platforms:
380                    _options.extend(["-p", platform])
381
382                _options.extend(self.tag_options)
383                self.get_plan(_options)
384            else:
385                _options.extend(self.tag_options)
386                self.get_plan(_options, True)
387        else:
388            logging.info(f'No twister needed or partial twister run only...')
389
390def parse_args():
391    parser = argparse.ArgumentParser(
392                description="Generate twister argument files based on modified file",
393                allow_abbrev=False)
394    parser.add_argument('-c', '--commits', default=None,
395            help="Commit range in the form: a..b")
396    parser.add_argument('-m', '--modified-files', default=None,
397            help="File with information about changed/deleted/added files.")
398    parser.add_argument('-o', '--output-file', default="testplan.json",
399            help="JSON file with the test plan to be passed to twister")
400    parser.add_argument('-P', '--pull-request', action="store_true",
401            help="This is a pull request")
402    parser.add_argument('-p', '--platform', action="append",
403            help="Limit this for a platform or a list of platforms.")
404    parser.add_argument('-t', '--tests_per_builder', default=700, type=int,
405            help="Number of tests per builder")
406    parser.add_argument('-n', '--default-matrix', default=10, type=int,
407            help="Number of tests per builder")
408    parser.add_argument('--testcase-roots-threshold', default=20, type=int,
409            help="Threshold value for number of modified testcase roots, up to which an optimized scope is still applied."
410                 "When exceeded, full scope will be triggered")
411    parser.add_argument('--detailed-test-id', action='store_true',
412            help="Include paths to tests' locations in tests' names.")
413    parser.add_argument("--no-detailed-test-id", dest='detailed_test_id', action="store_false",
414            help="Don't put paths into tests' names.")
415    parser.add_argument('-r', '--repo-to-scan', default=None,
416            help="Repo to scan")
417    parser.add_argument('--ignore-path',
418            default=os.path.join(zephyr_base, 'scripts', 'ci', 'twister_ignore.txt'),
419            help="Path to a text file with patterns of files to be matched against changed files")
420    parser.add_argument('--alt-tags',
421            default=os.path.join(zephyr_base, 'scripts', 'ci', 'tags.yaml'),
422            help="Path to a file describing relations between directories and tags")
423    parser.add_argument(
424            "-T", "--testsuite-root", action="append", default=[],
425            help="Base directory to recursively search for test cases. All "
426                "testcase.yaml files under here will be processed. May be "
427                "called multiple times. Defaults to the 'samples/' and "
428                "'tests/' directories at the base of the Zephyr tree.")
429    parser.add_argument(
430            "--quarantine-list", action="append", metavar="FILENAME",
431            help="Load list of test scenarios under quarantine. The entries in "
432                "the file need to correspond to the test scenarios names as in "
433                "corresponding tests .yaml files. These scenarios "
434                "will be skipped with quarantine as the reason.")
435
436    # Include paths in names by default.
437    parser.set_defaults(detailed_test_id=True)
438
439    return parser.parse_args()
440
441
442if __name__ == "__main__":
443
444    args = parse_args()
445    files = []
446    errors = 0
447    if args.repo_to_scan:
448        repository_path = Path(args.repo_to_scan)
449        repo_to_scan = Repo(repository_path)
450    if args.commits:
451        commit = repo_to_scan.git.diff("--name-only", args.commits)
452        files = commit.split("\n")
453    elif args.modified_files:
454        with open(args.modified_files, "r") as fp:
455            files = json.load(fp)
456
457    if files:
458        print("Changed files:\n=========")
459        print("\n".join(files))
460        print("=========")
461
462    f = Filters(files, args.ignore_path, args.alt_tags, args.testsuite_root,
463                args.pull_request, args.platform, args.detailed_test_id, args.quarantine_list,
464                args.testcase_roots_threshold)
465    f.process()
466
467    # remove dupes and filtered cases
468    dup_free = []
469    dup_free_set = set()
470    logging.info(f'Total tests gathered: {len(f.all_tests)}')
471    for ts in f.all_tests:
472        if ts.get('status') == 'filtered':
473            continue
474        n = ts.get("name")
475        a = ts.get("arch")
476        p = ts.get("platform")
477        if ts.get('status') == 'error':
478            logging.info(f"Error found: {n} on {p} ({ts.get('reason')})")
479            errors += 1
480        if (n, a, p,) not in dup_free_set:
481            dup_free.append(ts)
482            dup_free_set.add((n, a, p,))
483
484    logging.info(f'Total tests to be run: {len(dup_free)}')
485    with open(".testplan", "w") as tp:
486        total_tests = len(dup_free)
487        if total_tests and total_tests < args.tests_per_builder:
488            nodes = 1
489        else:
490            nodes = round(total_tests / args.tests_per_builder)
491
492        tp.write(f"TWISTER_TESTS={total_tests}\n")
493        tp.write(f"TWISTER_NODES={nodes}\n")
494        tp.write(f"TWISTER_FULL={f.full_twister}\n")
495        logging.info(f'Total nodes to launch: {nodes}')
496
497    header = ['test', 'arch', 'platform', 'status', 'extra_args', 'handler',
498            'handler_time', 'used_ram', 'used_rom']
499
500    # write plan
501    if dup_free:
502        data = {}
503        data['testsuites'] = dup_free
504        with open(args.output_file, 'w', newline='') as json_file:
505            json.dump(data, json_file, indent=4, separators=(',',':'))
506
507    sys.exit(errors)
508