1#!/usr/bin/env python3 2# 3# Copyright (c) 2010-2023 Antmicro 4# 5# This file is licensed under the MIT License. 6# Full license text is available in 'licenses/MIT.txt'. 7# 8 9import argparse 10import sys 11from dataclasses import dataclass 12from typing import List, Optional 13import csv 14import resd 15 16from grammar import SAMPLE_TYPE, BLOCK_TYPE 17 18 19@dataclass 20class Mapping: 21 sample_type: SAMPLE_TYPE 22 map_from: List[str] 23 map_to: Optional[List[str]] 24 channel: int 25 26 def remap(self, row): 27 output = [self._retype(row[key]) for key in self.map_from] 28 if self.map_to: 29 output = dict(zip(self.map_to, output)) 30 if isinstance(output, list) and len(output) == 1: 31 output = int(output[0]) 32 return output 33 34 def _retype(self, value): 35 try: 36 if all(c.isdigit() for c in value.lstrip('-')): 37 return int(value) 38 elif all(c.isdigit() or c == '.' for c in value.lstrip('-')): 39 return float(value) 40 elif value[0] == '"' and value[-1] == '"': 41 return value[1:-1] 42 except ValueError: 43 return value 44 45 46def parse_mapping(mapping): 47 chunks = mapping.split(':') 48 49 if len(chunks) >= 3 and not chunks[2]: 50 chunks[2] = '_' 51 52 if not all(chunks) or (len(chunks) < 2 or len(chunks) > 4): 53 print(f'{mapping} is invalid mapping') 54 return None 55 56 possible_types = [type_ for type_ in SAMPLE_TYPE.encmapping if chunks[0].lower() in type_.lower()] 57 if not possible_types: 58 print(f'Invalid type: {chunks[0]}') 59 print(f'Possible types: {", ".join(SAMPLE_TYPE.ksymapping.values())}') 60 return None 61 62 if len(possible_types) > 1: 63 print(f'More than one type matches: {", ".join(type_ for _, type_ in possible_types)}') 64 return None 65 66 type_ = possible_types[0] 67 map_from = chunks[1].split(',') 68 map_to = chunks[2].split(',') if len(chunks) >= 3 and chunks[2] != '_' else None 69 channel = int(chunks[3]) if len(chunks) >= 4 else 0 70 71 return type_, map_from, map_to, channel 72 73 74def parse_arguments(): 75 arguments = sys.argv[1:] 76 77 entry_parser = argparse.ArgumentParser() 78 entry_parser.add_argument('-i', '--input', required=True, help='path to csv file') 79 entry_parser.add_argument('-m', '--map', action='append', type=parse_mapping, 80 help='mapping in format <type>:<index/label>[:<to_property>:<channel>], multiple mappings are possible') 81 entry_parser.add_argument('-s', '--start-time', type=int, help='start time (in nanoseconds)') 82 entry_parser.add_argument('-f', '--frequency', type=float, help='frequency of the data (in Hz)') 83 entry_parser.add_argument('-t', '--timestamp', help='index/label of a column in the csv file for the timestamps (in nanoseconds)') 84 entry_parser.add_argument('-o', '--offset', type=int, default=0, help='number of samples to skip from the beginning of the file') 85 entry_parser.add_argument('-c', '--count', type=int, default=sys.maxsize, help='number of samples to parse') 86 entry_parser.add_argument('output', nargs='?', help='output file path') 87 88 if not arguments or any(v in ('-h', '--help') for v in arguments): 89 entry_parser.parse_args(['--help']) 90 sys.exit(0) 91 92 split_indices = [i for i, v in enumerate(arguments) if v in ('-i', '--input')] 93 split_indices.append(len(arguments)) 94 subentries = [arguments[a:b] for a, b in zip(split_indices, split_indices[1:])] 95 96 entries = [] 97 for subentry in subentries: 98 parsed = entry_parser.parse_args(subentry) 99 if parsed.frequency is None and parsed.timestamp is None: 100 print(f'{parsed.input}: either frequency or timestamp should be provided') 101 sys.exit(1) 102 if parsed.frequency and parsed.timestamp: 103 print(f'Data will be resampled to {parsed.frequency}Hz based on provided timestamps') 104 105 entries.append(parsed) 106 107 if entries and entries[-1].output is None: 108 entry_parser.parse_args(['--help']) 109 sys.exit(1) 110 111 return entries 112 113 114def map_source(labels, source): 115 if source is None: 116 return None 117 118 source = int(source) if all(c.isdigit() for c in source) else source 119 if isinstance(source, int) and 0 <= source < len(labels): 120 source = labels[source] 121 122 if source not in labels: 123 print(f'{source} is invalid source') 124 return None 125 126 return source 127 128 129def rebuild_mapping(labels, mapping): 130 map_from = mapping[1] 131 132 for i, src in enumerate(map_from): 133 src = map_source(labels, src) 134 if src is None: 135 return None 136 map_from[i] = src 137 138 return Mapping(mapping[0], map_from, mapping[2], mapping[3]) 139 140 141if __name__ == '__main__': 142 arguments = parse_arguments() 143 output_file = arguments[-1].output 144 145 resd_file = resd.RESD(output_file) 146 for group in arguments: 147 block_type = BLOCK_TYPE.ARBITRARY_TIMESTAMP 148 resampling_mode = False 149 if group.frequency is not None: 150 block_type = BLOCK_TYPE.CONSTANT_FREQUENCY 151 if group.timestamp is not None: 152 # In resampling mode we use provided timestamps to generate constant frequency sample blocks. 153 # It allows to reconstruct RESD stream spanning long time periods from the sparse data. 154 # The idea is based on the default behavior of RESD, that allows for gaps between RESD blocks. 155 # On the other side, constant frequency sample blocks contain continuous, densely packed data, 156 # so we split samples into separate groups that are used to generate separate blocks. 157 # It is based on a simple heuristic: 158 # Samples with the same timestamps are grouped together and resampled to the frequency passed from the command line. 159 # Start time of the generated block is calculated as an offset to the previous timestamp + the initial start-time passed from the command line. 160 # Therefore for sparse data you often end up with the RESD file that consists of multiple blocks made of just one sample. 161 # Start time of the block calculated from the provided timestamps is crucial, 162 # because it translates to the virtual time during emulation, when the first sample from the block appears. 163 # Gaps can be handled directly in the model using RESD APIs. 164 # Usual behavior is to provide a default sample or repeat the last sample in the place of gaps. 165 # If your CSV file contains well spaced samples, it is better to not provide timestamps explicitly 166 # and generate a single block containing all samples. 167 resampling_mode = True 168 169 with open(group.input, 'rt') as csv_file: 170 csv_reader = csv.DictReader(csv_file) 171 labels = mapping = None 172 timestamp_source = None 173 174 to_skip = group.offset 175 to_parse = group.count 176 177 # These fields are used only in resampling mode to keep track of the block's start time. 178 # In resampling mode, data is automatically split into multiple blocks based on the timestamps. 179 prev_timestamp = None 180 start_offset = group.start_time 181 182 for row in csv_reader: 183 if labels is None: 184 labels = list(row.keys()) 185 mappings = [rebuild_mapping(labels, mapping) for mapping in group.map] 186 if block_type == BLOCK_TYPE.ARBITRARY_TIMESTAMP or resampling_mode: 187 timestamp_source = map_source(labels, group.timestamp) 188 if timestamp_source is None: 189 sys.exit(1) 190 191 if to_skip > 0: 192 to_skip -= 1 193 continue 194 195 if to_parse == 0: 196 break 197 198 for mapping in mappings: 199 block = resd_file.get_block_or_create(mapping.sample_type, block_type, mapping.channel) 200 if block_type == BLOCK_TYPE.CONSTANT_FREQUENCY: 201 if resampling_mode: 202 current_sample = mapping.remap(row) 203 current_timestamp = int(row[timestamp_source]) 204 205 if prev_timestamp is None: 206 # First block 207 prev_timestamp = current_timestamp 208 block.frequency = group.frequency 209 block.start_time = start_offset 210 211 if current_timestamp != prev_timestamp: 212 resd_file.flush() 213 block = resd_file.get_block_or_create(mapping.sample_type, block_type, mapping.channel) 214 block.frequency = group.frequency 215 start_offset += (current_timestamp - prev_timestamp) # Gap between blocks 216 block.start_time = start_offset 217 218 block.add_sample(current_sample) 219 prev_timestamp = current_timestamp 220 else: 221 block.add_sample(mapping.remap(row)) 222 else: 223 block.add_sample(mapping.remap(row), int(row[timestamp_source])) 224 225 to_parse -= 1 226 227 # In resampling mode, multiple blocks are usually generated from the single input 228 # so block properties are tracked ad hoc. 229 if not resampling_mode: 230 for mapping in mappings: 231 block = resd_file.get_block(mapping.sample_type, mapping.channel) 232 if block_type == BLOCK_TYPE.CONSTANT_FREQUENCY: 233 block.frequency = group.frequency 234 if group.start_time is not None: 235 block.start_time = group.start_time 236 237 resd_file.flush() 238