1#!/usr/bin/env python3
2
3# Copyright (c) 2022-2024 Intel Corporation
4# SPDX-License-Identifier: Apache-2.0
5
6"""
7This script uploads ``twister.json`` file to Elasticsearch index for reporting and analysis.
8see  https://kibana.zephyrproject.io/
9
10The script expects two evironment variables with the Elasticsearch server connection parameters:
11    `ELASTICSEARCH_SERVER`
12    `ELASTICSEARCH_KEY`
13"""
14
15from elasticsearch import Elasticsearch
16from elasticsearch.helpers import bulk, BulkIndexError
17import sys
18import os
19import json
20import argparse
21import re
22
23
24def flatten(name, value, name_sep="_", names_dict=None, parent_name=None, escape_sep=""):
25    """
26    Flatten ``value`` into a plain dictionary.
27
28    :param name: the flattened name of the ``value`` to be used as a name prefix for all its items.
29    :param name_sep: string to separate flattened names; if the same string is already present
30                     in the names it will be repeated twise.
31    :param names_dict: An optional dictionary with 'foo':'bar' items to flatten 'foo' list properties
32                       where each item should be a dictionary with the 'bar' item storing an unique
33                       name, so it will be taken as a part of the flattened item's name instead of
34                       the item's index in its parent list.
35    :param parent_name: the short, single-level, name of the ``value``.
36    :param value: object to flatten, for example, a dictionary:
37                  {
38                    "ROM":{
39                        "symbols":{
40                            "name":"Root",
41                            "size":4320,
42                            "identifier":"root",
43                            "address":0,
44                            "children":[
45                                {
46                                    "name":"(no paths)",
47                                    "size":2222,
48                                    "identifier":":",
49                                    "address":0,
50                                    "children":[
51                                        {
52                                            "name":"var1",
53                                            "size":20,
54                                            "identifier":":/var1",
55                                            "address":1234
56                                        }, ...
57                                    ]
58                                } ...
59                           ]
60                        }
61                   } ...
62                 }
63
64     :return: the ``value`` flattened to a plain dictionary where each key is concatenated from
65              names of its initially nested items being separated by the ``name_sep``,
66              for the above example:
67              {
68                  "ROM/symbols/name": "Root",
69                  "ROM/symbols/size": 4320,
70                  "ROM/symbols/identifier": "root",
71                  "ROM/symbols/address": 0,
72                  "ROM/symbols/(no paths)/size": 2222,
73                  "ROM/symbols/(no paths)/identifier": ":",
74                  "ROM/symbols/(no paths)/address": 0,
75                  "ROM/symbols/(no paths)/var1/size": 20,
76                  "ROM/symbols/(no paths)/var1/identifier": ":/var1",
77                  "ROM/symbols/(no paths)/var1/address": 1234,
78              }
79    """
80    res_dict = {}
81    name_prefix = name + name_sep if name and len(name) else ''
82    if isinstance(value, list) and len(value):
83        for idx,val in enumerate(value):
84            if isinstance(val, dict) and names_dict and parent_name and isinstance(names_dict, dict) and parent_name in names_dict:
85                flat_name = name_prefix + str(val[names_dict[parent_name]]).replace(name_sep, escape_sep + name_sep)
86                val_ = val.copy()
87                val_.pop(names_dict[parent_name])
88                flat_item = flatten(flat_name, val_, name_sep, names_dict, parent_name, escape_sep)
89            else:
90                flat_name = name_prefix + str(idx)
91                flat_item = flatten(flat_name, val, name_sep, names_dict, parent_name, escape_sep)
92            res_dict = { **res_dict, **flat_item }
93    elif isinstance(value, dict) and len(value):
94        for key,val in value.items():
95            if names_dict and key in names_dict:
96                name_k = name
97            else:
98                name_k = name_prefix + str(key).replace(name_sep, escape_sep + name_sep)
99            flat_item = flatten(name_k, val, name_sep, names_dict, key, escape_sep)
100            res_dict = { **res_dict, **flat_item }
101    elif len(name):
102        res_dict[name] = value
103    return res_dict
104
105def unflatten(src_dict, name_sep):
106    """
107    Unflat ``src_dict`` at its deepest level splitting keys with ``name_sep``
108    and using the rightmost chunk to name properties.
109
110    :param src_dict: a dictionary to unflat for example:
111                     {
112                      "ROM/symbols/name": "Root",
113                      "ROM/symbols/size": 4320,
114                      "ROM/symbols/identifier": "root",
115                      "ROM/symbols/address": 0,
116                      "ROM/symbols/(no paths)/size": 2222,
117                      "ROM/symbols/(no paths)/identifier": ":",
118                      "ROM/symbols/(no paths)/address": 0,
119                      "ROM/symbols/(no paths)/var1/size": 20,
120                      "ROM/symbols/(no paths)/var1/identifier": ":/var1",
121                      "ROM/symbols/(no paths)/var1/address": 1234,
122                     }
123
124    :param name_sep: string to split the dictionary keys.
125    :return: the unflatten dictionary, for the above example:
126             {
127              "ROM/symbols": {
128                  "name": "Root",
129                  "size": 4320,
130                  "identifier": "root",
131                  "address": 0
132              },
133              "ROM/symbols/(no paths)": {
134                  "size": 2222,
135                  "identifier": ":",
136                  "address": 0
137              },
138              "ROM/symbols/(no paths)/var1": {
139                  "size": 20,
140                  "identifier": ":/var1",
141                  "address": 1234
142              }
143            }
144    """
145    res_dict = {}
146    for k,v in src_dict.items():
147        k_pref, _, k_suff = k.rpartition(name_sep)
148        if not k_pref in res_dict:
149            res_dict[k_pref] = {k_suff: v}
150        else:
151            if k_suff in res_dict[k_pref]:
152                if not isinstance(res_dict[k_pref][k_suff], list):
153                    res_dict[k_pref][k_suff] = [res_dict[k_pref][k_suff]]
154                res_dict[k_pref][k_suff].append(v)
155            else:
156                res_dict[k_pref][k_suff] = v
157    return res_dict
158
159
160def transform(t, args):
161    if args.transform:
162        rules = json.loads(str(args.transform).replace("'", "\"").replace("\\", "\\\\"))
163        for property_name, rule in rules.items():
164            if property_name in t:
165                match = re.match(rule, t[property_name])
166                if match:
167                    t.update(match.groupdict(default=""))
168            #
169        #
170    for excl_item in args.exclude:
171        if excl_item in t:
172            t.pop(excl_item)
173
174    return t
175
176def gendata(f, args):
177    with open(f, "r") as j:
178        data = json.load(j)
179        for t in data['testsuites']:
180            name = t['name']
181            _grouping = name.split("/")[-1]
182            main_group = _grouping.split(".")[0]
183            sub_group = _grouping.split(".")[1]
184            env = data['environment']
185            if args.run_date:
186                env['run_date'] = args.run_date
187            if args.run_id:
188                env['run_id'] = args.run_id
189            if args.run_attempt:
190                env['run_attempt'] = args.run_attempt
191            if args.run_branch:
192                env['run_branch'] = args.run_branch
193            if args.run_workflow:
194                env['run_workflow'] = args.run_workflow
195            t['environment'] = env
196            t['component'] = main_group
197            t['sub_component'] = sub_group
198
199            yield_records = 0
200            # If the flattered property is a dictionary, convert it to a plain list
201            # where each item is a flat dictionaly.
202            if args.flatten and args.flatten in t and isinstance(t[args.flatten], dict):
203                flat = t.pop(args.flatten)
204                flat_list_dict = {}
205                if args.flatten_list_names:
206                    flat_list_dict = json.loads(str(args.flatten_list_names).replace("'", "\"").replace("\\", "\\\\"))
207                #
208                # Normalize flattening to a plain dictionary.
209                flat = flatten('', flat, args.transpose_separator, flat_list_dict, str(args.escape_separator))
210                # Unflat one, the deepest level, expecting similar set of property names there.
211                flat = unflatten(flat, args.transpose_separator)
212                # Keep dictionary names as their properties and flatten the dictionary to a list of dictionaries.
213                as_name = args.flatten_dict_name
214                if len(as_name):
215                    flat_list = []
216                    for k,v in flat.items():
217                        v[as_name] = k + args.transpose_separator + v[as_name] if as_name in v else k
218                        v[as_name + '_depth'] = v[as_name].count(args.transpose_separator)
219                        flat_list.append(v)
220                    t[args.flatten] = flat_list
221                else:
222                    t[args.flatten] = flat
223
224            # Flatten lists or dictionaries cloning the records with the rest of their items and
225            # rename them composing the flattened property name with the item's name or index respectively.
226            if args.flatten and args.flatten in t and isinstance(t[args.flatten], list):
227                flat = t.pop(args.flatten)
228                for flat_item in flat:
229                    t_clone = t.copy()
230                    if isinstance(flat_item, dict):
231                        t_clone.update({ args.flatten + args.flatten_separator + k : v for k,v in flat_item.items() })
232                    elif isinstance(flat_item, list):
233                        t_clone.update({ args.flatten + args.flatten_separator + str(idx) : v for idx,v in enumerate(flat_item) })
234                    yield {
235                        "_index": args.index,
236                        "_source": transform(t_clone, args)
237                    }
238                    yield_records += 1
239
240            if not yield_records:  # also yields a record without an empty flat object.
241                yield {
242                        "_index": args.index,
243                        "_source": transform(t, args)
244                }
245
246
247def main():
248    args = parse_args()
249
250    settings = {
251            "index": {
252                "number_of_shards": 4
253                }
254            }
255
256    mappings = {}
257
258    if args.map_file:
259        with open(args.map_file, "rt") as json_map:
260            mappings = json.load(json_map)
261    else:
262        mappings = {
263            "properties": {
264                "execution_time": {"type": "float"},
265                "retries": {"type": "integer"},
266                "testcases.execution_time": {"type": "float"},
267                }
268            }
269
270    if args.dry_run:
271        xx = None
272        for f in args.files:
273            xx = gendata(f, args)
274            for x in xx:
275                print(json.dumps(x, indent=4))
276        sys.exit(0)
277
278    es = Elasticsearch(
279        [os.environ['ELASTICSEARCH_SERVER']],
280        api_key=os.environ['ELASTICSEARCH_KEY'],
281        verify_certs=False
282        )
283
284    if args.create_index:
285        es.indices.create(index=args.index, mappings=mappings, settings=settings)
286    else:
287        if args.run_date:
288            print(f"Setting run date from command line: {args.run_date}")
289
290        for f in args.files:
291            print(f"Process: '{f}'")
292            try:
293                bulk(es, gendata(f, args), request_timeout=args.bulk_timeout)
294            except BulkIndexError as e:
295                print(f"ERROR adding '{f}' exception: {e}")
296                error_0 = e.errors[0].get("index", {}).get("error", {})
297                reason_0 = error_0.get('reason')
298                print(f"ERROR reason: {reason_0}")
299                raise e
300            #
301        #
302#
303
304def parse_args():
305    parser = argparse.ArgumentParser(allow_abbrev=False,
306                                     formatter_class=argparse.RawTextHelpFormatter,
307                                     description=__doc__)
308    parser.add_argument('-y','--dry-run', action="store_true", help='Dry run.')
309    parser.add_argument('-c','--create-index', action="store_true", help='Create index.')
310    parser.add_argument('-m', '--map-file', required=False,
311                        help='JSON map file with Elasticsearch index structure and data types.')
312    parser.add_argument('-i', '--index', required=True, default='tests-zephyr-1',
313                        help='Elasticsearch index to push to.')
314    parser.add_argument('-r', '--run-date', help='Run date in ISO format', required=False)
315    parser.add_argument('--flatten', required=False, default=None,
316                        metavar='TESTSUITE_PROPERTY',
317                        help="Flatten one of the test suite's properties:\n"
318                        "it will be converted to a list where each list item becomes a separate index record\n"
319                        "with all other properties of the test suite object duplicated and the flattened\n"
320                        "property name used as a prefix for all its items, e.g.\n"
321                        "'recording.cycles' becomes 'recording_cycles'.")
322    parser.add_argument('--flatten-dict-name', required=False, default="name",
323                        metavar='PROPERTY_NAME',
324                        help="For dictionaries flattened into a list, use this name for additional property\n"
325                        "to store the item's flat concatenated name. One more property with that name\n"
326                        "and'_depth' suffix will be added for number of `--transpose_separator`s in the name.\n"
327                        "Default: '%(default)s'. Set empty string to disable.")
328    parser.add_argument('--flatten-list-names', required=False, default=None,
329                        metavar='DICT',
330                        help="An optional string with json dictionary like {'children':'name', ...}\n"
331                        "to use it for flattening lists of dictionaries named 'children' which should\n"
332                        "contain keys 'name' with unique string value as an actual name for the item.\n"
333                        "This name value will be composed instead of the container's name 'children' and\n"
334                        "the item's numeric index.")
335    parser.add_argument('--flatten-separator', required=False, default="_",
336                        help="Separator to use it for the flattened property names. Default: '%(default)s'")
337    parser.add_argument('--transpose-separator', required=False, default="/",
338                        help="Separator to use it for the transposed dictionary names stored in\n"
339                        "`flatten-dict-name` properties. Default: '%(default)s'")
340    parser.add_argument('--escape-separator', required=False, default='',
341                        help="Prepend name separators with the escape string if already present in names. "
342                             "Default: '%(default)s'.")
343    parser.add_argument('--transform', required=False,
344                        metavar='RULE',
345                        help="Apply regexp group parsing to selected string properties after flattening.\n"
346                        "The string is a json dictionary with property names and regexp strings to apply\n"
347                        "on them to extract values, for example:\n"
348                        r"\"{ 'recording_metric': '(?P<object>[^\.]+)\.(?P<action>[^\.]+)\.' }\"")
349    parser.add_argument('--exclude', required=False, nargs='*', default=[],
350                        metavar='TESTSUITE_PROPERTY',
351                        help="Don't store these properties in the Elasticsearch index.")
352    parser.add_argument('--run-workflow', required=False,
353                        help="Source workflow identificator, e.g. the workflow short name "
354                        "and its triggering event name.")
355    parser.add_argument('--run-branch', required=False,
356                        help="Source branch identificator.")
357    parser.add_argument('--run-id', required=False,
358                        help="unique run-id (e.g. from github.run_id context)")
359    parser.add_argument('--run-attempt', required=False,
360                        help="unique run attempt number (e.g. from github.run_attempt context)")
361    parser.add_argument('--bulk-timeout', required=False, type=int, default=60,
362                        help="Elasticsearch bulk request timeout, seconds. Default %(default)s.")
363    parser.add_argument('files', metavar='FILE', nargs='+', help='file with test data.')
364
365    args = parser.parse_args()
366
367    return args
368
369
370if __name__ == '__main__':
371    main()
372