1#!/usr/bin/env python 2 3# A check script that just works at the time of writing... 4# 5# also builds a structure tree for further reference 6# 7# Input file format must be similiar to those headers generated by regtool, or this script makes no sense at all 8# 9# Known limitation: 10# 1. won't accept /* ... */ /* ... */': badly behavior with multiline comment 11# 2. won't accept multiple expression within same line' (but will info that) 12# 3. won't accept single line struct/union definition 13# 14# Check list: 15# 1. a structure should not contain bitfield member alongside with nested struct/union 16# 2. bitfield sum in a struct should be 32 (means being well padded) 17# 3. each bitfield type should be uint32_t 18# 4. expecting union to be `union { struct {xxx}; uint32_t val; }` and complain if it is not an u32 val (but not fail) 19# 5. typedef volatile struct xxx{}: xxx must exists 20# 21# Otherwise won't fail but warning 22 23import os 24import re 25import sys 26from typing import Any 27 28 29class MemberField: 30 member_type = '' 31 bitfield = None 32 33 def __init__(self, m_type: str, m_bits: int=None) -> None: 34 self.member_type = m_type 35 self.bitfield = m_bits 36 37 def __unicode__(self) -> str: 38 return self.__str__() 39 40 def __repr__(self) -> str: 41 return self.__str__() 42 43 def __str__(self) -> str: 44 if self.bitfield is None: 45 return '"Field type={}"'.format(self.member_type) 46 return '"Field type={} bit={}"'.format(self.member_type, self.bitfield) 47 48 49class SoCStructureHeaderChecker: 50 # capture: typedef, volatile, struct name 51 __REGEXP_MATCH_STRUCTURE_BEGIN = r'^[\s]*(typedef)?(?:[\s]+(volatile))?[\s]+struct[\s]+([\w]+)?[\s\S]*$' 52 # capture: typedef, volatile, union name 53 __REGEXP_MATCH_UNION_BEGIN = r'^[\s]*(typedef)?(?:[\s]+(volatile))?[\s]+union[\s]+([\w]+)?[\s\S]*$' 54 # capture: type_var_name 55 __REGEXP_MATCH_STRUCT_UNION_END_NAME = r'^[\s]*}[\s]*([\w\[\]\*]*)[\s]*;[\s\S]*$' 56 # capture: type, name, bitfield 57 __REGEXP_MATCH_BITFIELD_MEMBER = (r'^[\s]*(?:(?:volatile[\s]+)|(?:))([\w\*]+)[\s]+([\w\*]+(?:(?:\[[\s\S]*\])|(?:)))' 58 r'[\s]*(?:(?:[\s]*;)|(?::[\s]*([\d]+)[\s]*;))[\s\S]*$') 59 # should be useless and can be safely deleted 60 __REGEXP_MATCH_MULTILINE_COMMENT = r'^[\s]*[\/]{0,2}\*[\/]?[\s\S]*$' 61 __REGEX_MATCH_SIMPLE_VAL_FIELD = r'^[\s]*(?:(?:volatile[\s]+)|(?:))([\w]+)[\s]+([\w\[\]\*]+)[\s]*;[\s]*$' 62 # capture: type, name 63 __REGEX_MATCH_ROOT_EXTERNAL = r'^[\s]*extern[\s]+([\w]+)[\s]+([\w]+)[\s]*;[\s]*$' 64 65 __linecount = 0 66 __fd = None # type: Any 67 __is_eof = False 68 69 # generated reference tree 70 __ref_tree = dict() # type: dict 71 # middle result of generated tree, shared 72 # named typedef, or named struct/union. referd but will not delete 73 __temp_ref_types = dict() # type: dict 74 75 def __expand_type(self, member_type: str, bitfield: int=None) -> Any: 76 if member_type == 'uint32_t': 77 return MemberField(member_type, bitfield) 78 if bitfield is not None: 79 print('\033[0;31mERROR\033[0m: non-u32 type with bitfield') 80 return None 81 if member_type in self.__temp_ref_types: 82 return self.__temp_ref_types[member_type] 83 return None 84 85 def __getline(self, incomment:bool=False) -> Any: 86 rawline = self.__fd.readline() 87 if not rawline: 88 self.__is_eof = True 89 return None 90 self.__linecount += 1 91 92 if incomment: 93 pos = rawline.find('*/') 94 if pos != -1: 95 # set string that is behind comment 96 rawline = rawline[pos + 2:] 97 else: 98 # continue multiple line 99 return self.__getline(True) 100 101 # preprocess: remove '// comment' 102 match_obj = re.match(r'^([^(\/\/)]*)\/\/[\s\S]*$', rawline) 103 if match_obj is not None: 104 rawline = match_obj.groups()[0] 105 # preprocess: remove '/* comment' 106 match_obj = re.match(r'^([^(\/\*)]*)\/\*([\s\S]*)$', rawline) 107 if match_obj is not None: 108 rawline = match_obj.groups()[0] 109 # check if multiline commit in oneline 110 pos = match_obj.groups()[1].find('*/') 111 if pos != -1: 112 # apply string that is behind comment 113 rawline = rawline + match_obj.groups()[1][pos + 2:] 114 else: 115 # multiple line 116 return self.__getline(True) 117 118 if re.match(r'^[\s]*$', rawline): 119 # skip empty line 120 return self.__getline() 121 if rawline.count(';') > 1: 122 print('\033[0;34mINFO\033[0m: line: {}: possibily multiple expression within same line'.format(self.__linecount)) 123 print(rawline) 124 return rawline 125 126 def __process_structure(self, name: str, is_typedef: bool, is_volatile: bool) -> Any: 127 ret_val = 0 128 # first check for anonymous register structs 129 if is_typedef and is_volatile and name is None: 130 print('\033[0;31mERROR\033[0m: line {}: annoymous struct'.format(self.__linecount)) 131 ret_val = -1 132 node_tree = dict() 133 bitcount = 0 134 has_nested_struct_union = False 135 has_non_bitfield_member = False 136 parsed_varname = '' 137 while not self.__is_eof: 138 rawline = self.__getline() 139 if rawline is None: 140 break 141 # check for nested structure 142 match_obj = re.match(self.__REGEXP_MATCH_STRUCTURE_BEGIN, rawline) 143 if match_obj is not None: 144 has_nested_struct_union = True 145 ret, inherited_node_tree = self.__process_structure( 146 match_obj.groups()[2], match_obj.groups()[0] == 'typedef', match_obj.groups()[1] == 'volatile') 147 if ret != 0: 148 ret_val = -2 149 if inherited_node_tree is not None: 150 for node in inherited_node_tree: 151 node_tree[node] = inherited_node_tree[node] 152 continue 153 match_obj = re.match(self.__REGEXP_MATCH_UNION_BEGIN, rawline) 154 if match_obj is not None: 155 has_nested_struct_union = True 156 ret, inherited_node_tree = self.__process_union(match_obj.groups()[2], match_obj.groups()[0] == 'typedef', match_obj.groups()[1] == 'volatile') 157 if ret != 0: 158 ret_val = -2 159 if inherited_node_tree is not None: 160 for node in inherited_node_tree: 161 node_tree[node] = inherited_node_tree[node] 162 continue 163 # check if end of struct 164 match_obj = re.match(self.__REGEXP_MATCH_STRUCT_UNION_END_NAME, rawline) 165 if match_obj is not None: 166 # end of struct 167 if bitcount not in (0, 32): 168 ret_val = -2 169 if is_typedef: 170 print('\033[0;31mERROR\033[0m: line {}: bitfield count is {}, type {}'.format(self.__linecount, bitcount, match_obj.groups()[0])) 171 else: 172 print('\033[0;31mERROR\033[0m: line {}: bitfield count is {}, type {}, varname "{}"' 173 .format(self.__linecount, bitcount, name, match_obj.groups()[0])) 174 parsed_varname = match_obj.groups()[0] 175 if is_typedef: 176 # is a typedef 177 if match_obj.groups()[0] == '' or match_obj.groups()[0].find('[') != -1: 178 # should be c error 179 print('\033[0;31mERROR\033[0m: line {}: C error'.format(self.__linecount)) 180 ret_val = -3 181 if match_obj.groups()[0] in self.__temp_ref_types: 182 # duplication, script bug: we are putting all types into same namespace 183 print('script run into bug...') 184 self.__temp_ref_types[match_obj.groups()[0]] = dict() 185 for member in node_tree: 186 self.__temp_ref_types[match_obj.groups()[0]][member] = node_tree[member] 187 elif name is not None: 188 # currently this kind of expression doesn't exist 189 print('!!!!!!UNDEALED CONDITION!!!!!') 190 elif match_obj.groups()[0] != '': 191 # named member, wrap and overwrite 192 if len(node_tree) == 0: 193 node_tree = None 194 else: 195 array_match = re.match(r'^([\w]*)\[[\s\S]*\]$', match_obj.groups()[0]) 196 if array_match is not None: 197 node_tree = {array_match.groups()[0] + '[]': node_tree} 198 else: 199 node_tree = {match_obj.groups()[0]: node_tree} 200 else: 201 # not a type, no member name, treat its fields as its parent's 202 pass 203 break 204 # check member 205 match_obj = re.match(self.__REGEXP_MATCH_BITFIELD_MEMBER, rawline) 206 if match_obj is not None: 207 field_bit = None 208 if match_obj.groups()[2] is not None: 209 field_bit = int(match_obj.groups()[2]) 210 bitcount += field_bit 211 # bitfield should be u32 212 if match_obj.groups()[0] != 'uint32_t': 213 print('\033[0;33mWARN\033[0m: line: {}: {} has type {}'.format(self.__linecount, match_obj.groups()[1], match_obj.groups()[0])) 214 else: 215 has_non_bitfield_member = True 216 # append to node tree 217 member_node = self.__expand_type(match_obj.groups()[0], field_bit) 218 if member_node is not None: 219 array_match = re.match(r'^([\w]*)\[[\s\S]*\]$', match_obj.groups()[1]) 220 if array_match is not None: 221 node_tree[array_match.groups()[0] + '[]'] = member_node 222 else: 223 node_tree[match_obj.groups()[1]] = member_node 224 else: 225 if '*' not in match_obj.groups()[0]: 226 print('\033[0;33mWARN\033[0m: line {}: unknown type {}'.format(self.__linecount, match_obj.groups()[0])) 227 else: 228 print('\033[0;33mWARN\033[0m: line {}: pointer type {}'.format(self.__linecount, match_obj.groups()[0])) 229 continue 230 # check comments 231 match_obj = re.match(self.__REGEXP_MATCH_MULTILINE_COMMENT, rawline) 232 if match_obj is not None: 233 # code comments 234 continue 235 # dump out unmatched condition 236 print(('\033[0;33mWARN\033[0m: line: {}: unexpected expression: {}'.format(self.__linecount, rawline)).replace('\n', '')) 237 238 if bitcount != 0 and has_nested_struct_union: 239 print('\033[0;33mWARN\033[0m: line: {}: mixed bitfield member and nested structure/union'.format(self.__linecount)) 240 if bitcount != 0 and has_non_bitfield_member: 241 print('\033[0;33mWARN\033[0m: line: {}: mixed bitfield member and non-bitfield member'.format(self.__linecount)) 242 if is_typedef and is_volatile and name is None: 243 if parsed_varname != '': 244 print('SUGGEST: {}'.format(parsed_varname.rstrip('t') + 's')) 245 if name is not None and is_typedef and is_volatile and parsed_varname.rstrip('t') != name.rstrip('s'): 246 print('\033[0;33mWARN\033[0m: line: {}: different type and typedef name: {} {}'.format(self.__linecount, name, parsed_varname)) 247 return ret_val, node_tree 248 249 def __process_union(self, name: str, is_typedef: bool, is_volatile: bool) -> Any: 250 ret_val = 0 251 # first check for anonymous register structs 252 if is_typedef and is_volatile and name is None: 253 print('\033[0;31mERROR\033[0m: line {}: annoymous union'.format(self.__linecount)) 254 ret_val = -1 255 node_tree = dict() # type: Any 256 has_struct_count = 0 257 has_val_field_count = 0 258 while not self.__is_eof: 259 rawline = self.__getline() 260 if rawline is None: 261 break 262 # check for nested structure 263 match_obj = re.match(self.__REGEXP_MATCH_STRUCTURE_BEGIN, rawline) 264 if match_obj is not None: 265 has_struct_count += 1 266 ret, inherited_node_tree = self.__process_structure( 267 match_obj.groups()[2], match_obj.groups()[0] == 'typedef', match_obj.groups()[1] == 'volatile') 268 if ret != 0: 269 ret_val = -2 270 if inherited_node_tree is not None: 271 for node in inherited_node_tree: 272 node_tree[node] = inherited_node_tree[node] 273 continue 274 match_obj = re.match(self.__REGEXP_MATCH_UNION_BEGIN, rawline) 275 if match_obj is not None: 276 has_struct_count += 1 277 ret, inherited_node_tree = self.__process_union(match_obj.groups()[2], match_obj.groups()[0] == 'typedef', match_obj.groups()[1] == 'volatile') 278 if ret != 0: 279 ret_val = -2 280 if inherited_node_tree is not None: 281 for node in inherited_node_tree: 282 node_tree[node] = inherited_node_tree[node] 283 continue 284 match_obj = re.match(self.__REGEXP_MATCH_STRUCT_UNION_END_NAME, rawline) 285 if match_obj is not None: 286 parsed_varname = match_obj.groups()[0] 287 # end of struct 288 if is_typedef: 289 # is a typedef 290 if match_obj.groups()[0] == '': 291 # should be c error 292 print('\033[0;31mERROR\033[0m: line {}: C error'.format(self.__linecount)) 293 ret_val = -3 294 if match_obj.groups()[0] in self.__temp_ref_types: 295 # duplication, script bug: we are putting all types into same namespace 296 print('script run into bug...') 297 self.__temp_ref_types[match_obj.groups()[0]] = dict() 298 for member in node_tree: 299 self.__temp_ref_types[match_obj.groups()[0]][member] = node_tree[member] 300 node_tree = None 301 elif name is not None: 302 # currently this kind of expression doesn't exist 303 print('!!!!!!UNDEALED CONDITION!!!!!') 304 elif match_obj.groups()[0] != '': 305 # named member, wrap and overwrite 306 if len(node_tree) == 0: 307 node_tree = None 308 else: 309 array_match = re.match(r'^([\w]*)\[[\s\S]*\]$', match_obj.groups()[0]) 310 if array_match is not None: 311 node_tree = {array_match.groups()[0] + '[]': node_tree} 312 else: 313 node_tree = {match_obj.groups()[0]: node_tree} 314 else: 315 # not a type, no member name, treat its fields as its parent's 316 pass 317 break 318 match_obj = re.match(self.__REGEXP_MATCH_MULTILINE_COMMENT, rawline) 319 if match_obj is not None: 320 # code comments 321 continue 322 match_obj = re.match(self.__REGEX_MATCH_SIMPLE_VAL_FIELD, rawline) 323 if match_obj is not None: 324 # expecting to see 'uint32_t val;' 325 if match_obj.groups()[0] != 'uint32_t' or match_obj.groups()[1] != 'val': 326 print(('\033[0;33mWARN\033[0m: unexpected union member at {}: {}'.format(self.__linecount, rawline)).replace('\n', '')) 327 else: 328 has_val_field_count += 1 329 # append to node tree 330 member_node = self.__expand_type(match_obj.groups()[0], None) 331 if member_node is not None: 332 node_tree[match_obj.groups()[1]] = member_node 333 else: 334 if '*' not in match_obj.groups()[0]: 335 print('\033[0;31mERROR\033[0m: line {}: unknown type {}'.format(self.__linecount, match_obj.groups()[0])) 336 else: 337 print('\033[0;33mWARN\033[0m: line {}: pointer type {}'.format(self.__linecount, match_obj.groups()[0])) 338 continue 339 # dump out unmatched condition 340 print(('\033[0;33mWARN\033[0m: line: {}: unexpected expression: {}'.format(self.__linecount, rawline)).replace('\n', '')) 341 342 if not (has_struct_count == 1 and has_val_field_count == 1): 343 print('\033[0;34mINFO\033[0m: line: {}: not a typical union: {} nested structures, {} u32 val member' 344 .format(self.__linecount, has_struct_count, has_val_field_count)) 345 if is_typedef and is_volatile and name is None: 346 if parsed_varname != '': 347 print('SUGGEST: {}'.format(parsed_varname.rstrip('t') + 's')) 348 if name is not None and is_typedef and is_volatile and parsed_varname.rstrip('t') != name.rstrip('s'): 349 print('\033[0;33mWARN\033[0m: line: {}: different type and typedef name: {} {}'.format(self.__linecount, name, parsed_varname)) 350 return ret_val, node_tree 351 352 def __process_root(self) -> int: 353 ret_val = 0 354 node_tree = dict() 355 356 while not self.__is_eof: 357 rawline = self.__getline() 358 if rawline is None: 359 break 360 # start checking by finding any of structure or union 361 match_obj = re.match(self.__REGEXP_MATCH_STRUCTURE_BEGIN, rawline) 362 if match_obj is not None: 363 ret, inherited_node_tree = self.__process_structure( 364 match_obj.groups()[2], match_obj.groups()[0] == 'typedef', match_obj.groups()[1] == 'volatile') 365 if ret != 0: 366 ret_val = -2 367 if inherited_node_tree is not None: 368 for node in inherited_node_tree: 369 node_tree[node] = inherited_node_tree[node] 370 continue 371 match_obj = re.match(self.__REGEXP_MATCH_UNION_BEGIN, rawline) 372 if match_obj is not None: 373 ret, inherited_node_tree = self.__process_union(match_obj.groups()[2], match_obj.groups()[0] == 'typedef', match_obj.groups()[1] == 'volatile') 374 if ret != 0: 375 ret_val = -2 376 if inherited_node_tree is not None: 377 for node in inherited_node_tree: 378 node_tree[node] = inherited_node_tree[node] 379 continue 380 # processing root level external declaration 381 match_obj = re.match(self.__REGEX_MATCH_ROOT_EXTERNAL, rawline) 382 if match_obj is not None: 383 self.__ref_tree[match_obj.groups()[1]] = self.__expand_type(match_obj.groups()[0]) 384 continue 385 return ret_val 386 387 def check(self, file: str) -> int: 388 self.__fd = open(file, 'r', encoding='utf8') 389 self.__linecount = 0 390 self.__is_eof = False 391 392 ret_val = self.__process_root() 393 394 self.__fd.close() 395 if ret_val != 0: 396 print('\033[0;31mCHECK FAILED\033[0m:\t{}'.format(file)) 397 else: 398 print('\033[0;32mCHECK PASSED\033[0m:\t{}'.format(file)) 399 return ret_val 400 401 def get_ref_tree(self) -> Any: 402 return self.__ref_tree 403 404 405def main() -> None: 406 ret = 0 407 if len(sys.argv) <= 1 or not os.path.isfile(sys.argv[1]): 408 print('file not exist') 409 exit(-1) 410 checker = SoCStructureHeaderChecker() 411 print('CHECKING:\t{}'.format(sys.argv[1])) 412 ret = checker.check(sys.argv[1]) 413 if len(sys.argv) == 3 and sys.argv[2] == 'print': 414 print(checker.get_ref_tree()) 415 del checker 416 sys.exit(ret) 417 418 419if __name__ == '__main__': 420 main() 421