1#!/usr/bin/env python3
2#
3# Copyright (c) 2010-2023 Antmicro
4#
5# This file is licensed under the MIT License.
6# Full license text is available in 'licenses/MIT.txt'.
7#
8
9import argparse
10import sys
11from dataclasses import dataclass
12from typing import List, Optional
13import csv
14import resd
15
16from grammar import SAMPLE_TYPE, BLOCK_TYPE
17
18
19@dataclass
20class Mapping:
21    sample_type: SAMPLE_TYPE
22    map_from: List[str]
23    map_to: Optional[List[str]]
24    channel: int
25
26    def remap(self, row):
27        output = [self._retype(row[key]) for key in self.map_from]
28        if self.map_to:
29            output = dict(zip(self.map_to, output))
30        if isinstance(output, list) and len(output) == 1:
31            output = int(output[0])
32        return output
33
34    def _retype(self, value):
35        try:
36            if all(c.isdigit() for c in value.lstrip('-')):
37                return int(value)
38            elif all(c.isdigit() or c == '.' for c in value.lstrip('-')):
39                return float(value)
40            elif value[0] == '"' and value[-1] == '"':
41                return value[1:-1]
42        except ValueError:
43            return value
44
45
46def parse_mapping(mapping):
47    chunks = mapping.split(':')
48
49    if len(chunks) >= 3 and not chunks[2]:
50        chunks[2] = '_'
51
52    if not all(chunks) or (len(chunks) < 2 or len(chunks) > 4):
53        print(f'{mapping} is invalid mapping')
54        return None
55
56    possible_types = [type_ for type_ in SAMPLE_TYPE.encmapping if chunks[0].lower() in type_.lower()]
57    if not possible_types:
58        print(f'Invalid type: {chunks[0]}')
59        print(f'Possible types: {", ".join(SAMPLE_TYPE.ksymapping.values())}')
60        return None
61
62    if len(possible_types) > 1:
63        print(f'More than one type matches: {", ".join(type_ for _, type_ in possible_types)}')
64        return None
65
66    type_ = possible_types[0]
67    map_from = chunks[1].split(',')
68    map_to = chunks[2].split(',') if len(chunks) >= 3 and chunks[2] != '_' else None
69    channel = int(chunks[3]) if len(chunks) >= 4 else 0
70
71    return type_, map_from, map_to, channel
72
73
74def parse_arguments():
75    arguments = sys.argv[1:]
76
77    entry_parser = argparse.ArgumentParser()
78    entry_parser.add_argument('-i', '--input', required=True, help='path to csv file')
79    entry_parser.add_argument('-m', '--map', action='append', type=parse_mapping,
80        help='mapping in format <type>:<index/label>[:<to_property>:<channel>], multiple mappings are possible')
81    entry_parser.add_argument('-s', '--start-time', type=int, help='start time (in nanoseconds)')
82    entry_parser.add_argument('-f', '--frequency', type=float, help='frequency of the data (in Hz)')
83    entry_parser.add_argument('-t', '--timestamp', help='index/label of a column in the csv file for the timestamps (in nanoseconds)')
84    entry_parser.add_argument('-o', '--offset', type=int, default=0, help='number of samples to skip from the beginning of the file')
85    entry_parser.add_argument('-c', '--count', type=int, default=sys.maxsize, help='number of samples to parse')
86    entry_parser.add_argument('output', nargs='?', help='output file path')
87
88    if not arguments or any(v in ('-h', '--help') for v in arguments):
89        entry_parser.parse_args(['--help'])
90        sys.exit(0)
91
92    split_indices = [i for i, v in enumerate(arguments) if v in ('-i', '--input')]
93    split_indices.append(len(arguments))
94    subentries = [arguments[a:b] for a, b in zip(split_indices, split_indices[1:])]
95
96    entries = []
97    for subentry in subentries:
98        parsed = entry_parser.parse_args(subentry)
99        if parsed.frequency is None and parsed.timestamp is None:
100            print(f'{parsed.input}: either frequency or timestamp should be provided')
101            sys.exit(1)
102        if parsed.frequency and parsed.timestamp:
103            print(f'Data will be resampled to {parsed.frequency}Hz based on provided timestamps')
104
105        entries.append(parsed)
106
107    if entries and entries[-1].output is None:
108        entry_parser.parse_args(['--help'])
109        sys.exit(1)
110
111    return entries
112
113
114def map_source(labels, source):
115    if source is None:
116        return None
117
118    source = int(source) if all(c.isdigit() for c in source) else source
119    if isinstance(source, int) and 0 <= source < len(labels):
120        source = labels[source]
121
122    if source not in labels:
123        print(f'{source} is invalid source')
124        return None
125
126    return source
127
128
129def rebuild_mapping(labels, mapping):
130    map_from = mapping[1]
131
132    for i, src in enumerate(map_from):
133        src = map_source(labels, src)
134        if src is None:
135            return None
136        map_from[i] = src
137
138    return Mapping(mapping[0], map_from, mapping[2], mapping[3])
139
140
141if __name__ == '__main__':
142    arguments = parse_arguments()
143    output_file = arguments[-1].output
144
145    resd_file = resd.RESD(output_file)
146    for group in arguments:
147        block_type = BLOCK_TYPE.ARBITRARY_TIMESTAMP
148        resampling_mode = False
149        if group.frequency is not None:
150            block_type = BLOCK_TYPE.CONSTANT_FREQUENCY
151            if group.timestamp is not None:
152                # In resampling mode we use provided timestamps to generate constant frequency sample blocks.
153                # It allows to reconstruct RESD stream spanning long time periods from the sparse data.
154                # The idea is based on the default behavior of RESD, that allows for gaps between RESD blocks.
155                # On the other side, constant frequency sample blocks contain continuous, densely packed data,
156                # so we split samples into separate groups that are used to generate separate blocks.
157                # It is based on a simple heuristic:
158                # Samples with the same timestamps are grouped together and resampled to the frequency passed from the command line.
159                # Start time of the generated block is calculated as an offset to the previous timestamp + the initial start-time passed from the command line.
160                # Therefore for sparse data you often end up with the RESD file that consists of multiple blocks made of just one sample.
161                # Start time of the block calculated from the provided timestamps is crucial,
162                # because it translates to the virtual time during emulation, when the first sample from the block appears.
163                # Gaps can be handled directly in the model using RESD APIs.
164                # Usual behavior is to provide a default sample or repeat the last sample in the place of gaps.
165                # If your CSV file contains well spaced samples, it is better to not provide timestamps explicitly
166                # and generate a single block containing all samples.
167                resampling_mode = True
168
169        with open(group.input, 'rt') as csv_file:
170            csv_reader = csv.DictReader(csv_file)
171            labels = mapping = None
172            timestamp_source = None
173
174            to_skip = group.offset
175            to_parse = group.count
176
177            # These fields are used only in resampling mode to keep track of the block's start time.
178            # In resampling mode, data is automatically split into multiple blocks based on the timestamps.
179            prev_timestamp = None
180            start_offset = group.start_time
181
182            for row in csv_reader:
183                if labels is None:
184                    labels = list(row.keys())
185                    mappings = [rebuild_mapping(labels, mapping) for mapping in group.map]
186                    if block_type == BLOCK_TYPE.ARBITRARY_TIMESTAMP or resampling_mode:
187                        timestamp_source = map_source(labels, group.timestamp)
188                        if timestamp_source is None:
189                            sys.exit(1)
190
191                if to_skip > 0:
192                    to_skip -= 1
193                    continue
194
195                if to_parse == 0:
196                    break
197
198                for mapping in mappings:
199                    block = resd_file.get_block_or_create(mapping.sample_type, block_type, mapping.channel)
200                    if block_type == BLOCK_TYPE.CONSTANT_FREQUENCY:
201                        if resampling_mode:
202                            current_sample = mapping.remap(row)
203                            current_timestamp = int(row[timestamp_source])
204
205                            if prev_timestamp is None:
206                                # First block
207                                prev_timestamp = current_timestamp
208                                block.frequency = group.frequency
209                                block.start_time = start_offset
210
211                            if current_timestamp != prev_timestamp:
212                                resd_file.flush()
213                                block = resd_file.get_block_or_create(mapping.sample_type, block_type, mapping.channel)
214                                block.frequency = group.frequency
215                                start_offset += (current_timestamp - prev_timestamp) # Gap between blocks
216                                block.start_time = start_offset
217
218                            block.add_sample(current_sample)
219                            prev_timestamp = current_timestamp
220                        else:
221                            block.add_sample(mapping.remap(row))
222                    else:
223                        block.add_sample(mapping.remap(row), int(row[timestamp_source]))
224
225                to_parse -= 1
226
227        # In resampling mode, multiple blocks are usually generated from the single input
228        # so block properties are tracked ad hoc.
229        if not resampling_mode:
230            for mapping in mappings:
231                block = resd_file.get_block(mapping.sample_type, mapping.channel)
232                if block_type == BLOCK_TYPE.CONSTANT_FREQUENCY:
233                    block.frequency = group.frequency
234                if group.start_time is not None:
235                    block.start_time = group.start_time
236
237        resd_file.flush()
238