1#!/usr/bin/env python
2
3# A check script that just works at the time of writing...
4#
5# also builds a structure tree for further reference
6#
7# Input file format must be similiar to those headers generated by regtool, or this script makes no sense at all
8#
9# Known limitation:
10# 1. won't accept /* ... */ /* ... */': badly behavior with multiline comment
11# 2. won't accept multiple expression within same line' (but will info that)
12# 3. won't accept single line struct/union definition
13#
14# Check list:
15# 1. a structure should not contain bitfield member alongside with nested struct/union
16# 2. bitfield sum in a struct should be 32 (means being well padded)
17# 3. each bitfield type should be uint32_t
18# 4. expecting union to be `union { struct {xxx}; uint32_t val; }` and complain if it is not an u32 val (but not fail)
19# 5. typedef volatile struct xxx{}: xxx must exists
20#
21# Otherwise won't fail but warning
22
23import os
24import re
25import sys
26from typing import Any
27
28
29class MemberField:
30    member_type = ''
31    bitfield = None
32
33    def __init__(self, m_type: str, m_bits: int=None) -> None:
34        self.member_type = m_type
35        self.bitfield = m_bits
36
37    def __unicode__(self) -> str:
38        return self.__str__()
39
40    def __repr__(self) -> str:
41        return self.__str__()
42
43    def __str__(self) -> str:
44        if self.bitfield is None:
45            return '"Field type={}"'.format(self.member_type)
46        return '"Field type={} bit={}"'.format(self.member_type, self.bitfield)
47
48
49class SoCStructureHeaderChecker:
50    # capture: typedef, volatile, struct name
51    __REGEXP_MATCH_STRUCTURE_BEGIN = r'^[\s]*(typedef)?(?:[\s]+(volatile))?[\s]+struct[\s]+([\w]+)?[\s\S]*$'
52    # capture: typedef, volatile, union name
53    __REGEXP_MATCH_UNION_BEGIN = r'^[\s]*(typedef)?(?:[\s]+(volatile))?[\s]+union[\s]+([\w]+)?[\s\S]*$'
54    # capture: type_var_name
55    __REGEXP_MATCH_STRUCT_UNION_END_NAME = r'^[\s]*}[\s]*([\w\[\]\*]*)[\s]*;[\s\S]*$'
56    # capture: type, name, bitfield
57    __REGEXP_MATCH_BITFIELD_MEMBER = (r'^[\s]*(?:(?:volatile[\s]+)|(?:))([\w\*]+)[\s]+([\w\*]+(?:(?:\[[\s\S]*\])|(?:)))'
58                                      r'[\s]*(?:(?:[\s]*;)|(?::[\s]*([\d]+)[\s]*;))[\s\S]*$')
59    # should be useless and can be safely deleted
60    __REGEXP_MATCH_MULTILINE_COMMENT = r'^[\s]*[\/]{0,2}\*[\/]?[\s\S]*$'
61    __REGEX_MATCH_SIMPLE_VAL_FIELD = r'^[\s]*(?:(?:volatile[\s]+)|(?:))([\w]+)[\s]+([\w\[\]\*]+)[\s]*;[\s]*$'
62    # capture: type, name
63    __REGEX_MATCH_ROOT_EXTERNAL = r'^[\s]*extern[\s]+([\w]+)[\s]+([\w]+)[\s]*;[\s]*$'
64
65    __linecount = 0
66    __fd = None     # type: Any
67    __is_eof = False
68
69    # generated reference tree
70    __ref_tree = dict()     # type: dict
71    # middle result of generated tree, shared
72    # named typedef, or named struct/union. referd but will not delete
73    __temp_ref_types = dict()   # type: dict
74
75    def __expand_type(self, member_type: str, bitfield: int=None) -> Any:
76        if member_type == 'uint32_t':
77            return MemberField(member_type, bitfield)
78        if bitfield is not None:
79            print('\033[0;31mERROR\033[0m: non-u32 type with bitfield')
80            return None
81        if member_type in self.__temp_ref_types:
82            return self.__temp_ref_types[member_type]
83        return None
84
85    def __getline(self, incomment:bool=False) -> Any:
86        rawline = self.__fd.readline()
87        if not rawline:
88            self.__is_eof = True
89            return None
90        self.__linecount += 1
91
92        if incomment:
93            pos = rawline.find('*/')
94            if pos != -1:
95                # set string that is behind comment
96                rawline = rawline[pos + 2:]
97            else:
98                # continue multiple line
99                return self.__getline(True)
100
101        # preprocess: remove '// comment'
102        match_obj = re.match(r'^([^(\/\/)]*)\/\/[\s\S]*$', rawline)
103        if match_obj is not None:
104            rawline = match_obj.groups()[0]
105        # preprocess: remove '/* comment'
106        match_obj = re.match(r'^([^(\/\*)]*)\/\*([\s\S]*)$', rawline)
107        if match_obj is not None:
108            rawline = match_obj.groups()[0]
109            # check if multiline commit in oneline
110            pos = match_obj.groups()[1].find('*/')
111            if pos != -1:
112                # apply string that is behind comment
113                rawline = rawline + match_obj.groups()[1][pos + 2:]
114            else:
115                # multiple line
116                return self.__getline(True)
117
118        if re.match(r'^[\s]*$', rawline):
119            # skip empty line
120            return self.__getline()
121        if rawline.count(';') > 1:
122            print('\033[0;34mINFO\033[0m: line: {}: possibily multiple expression within same line'.format(self.__linecount))
123            print(rawline)
124        return rawline
125
126    def __process_structure(self, name: str, is_typedef: bool, is_volatile: bool) -> Any:
127        ret_val = 0
128        # first check for anonymous register structs
129        if is_typedef and is_volatile and name is None:
130            print('\033[0;31mERROR\033[0m: line {}: annoymous struct'.format(self.__linecount))
131            ret_val = -1
132        node_tree = dict()
133        bitcount = 0
134        has_nested_struct_union = False
135        has_non_bitfield_member = False
136        parsed_varname = ''
137        while not self.__is_eof:
138            rawline = self.__getline()
139            if rawline is None:
140                break
141            # check for nested structure
142            match_obj = re.match(self.__REGEXP_MATCH_STRUCTURE_BEGIN, rawline)
143            if match_obj is not None:
144                has_nested_struct_union = True
145                ret, inherited_node_tree = self.__process_structure(
146                    match_obj.groups()[2], match_obj.groups()[0] == 'typedef', match_obj.groups()[1] == 'volatile')
147                if ret != 0:
148                    ret_val = -2
149                if inherited_node_tree is not None:
150                    for node in inherited_node_tree:
151                        node_tree[node] = inherited_node_tree[node]
152                continue
153            match_obj = re.match(self.__REGEXP_MATCH_UNION_BEGIN, rawline)
154            if match_obj is not None:
155                has_nested_struct_union = True
156                ret, inherited_node_tree = self.__process_union(match_obj.groups()[2], match_obj.groups()[0] == 'typedef', match_obj.groups()[1] == 'volatile')
157                if ret != 0:
158                    ret_val = -2
159                if inherited_node_tree is not None:
160                    for node in inherited_node_tree:
161                        node_tree[node] = inherited_node_tree[node]
162                continue
163            # check if end of struct
164            match_obj = re.match(self.__REGEXP_MATCH_STRUCT_UNION_END_NAME, rawline)
165            if match_obj is not None:
166                # end of struct
167                if bitcount not in (0, 32):
168                    ret_val = -2
169                    if is_typedef:
170                        print('\033[0;31mERROR\033[0m: line {}: bitfield count is {}, type {}'.format(self.__linecount, bitcount, match_obj.groups()[0]))
171                    else:
172                        print('\033[0;31mERROR\033[0m: line {}: bitfield count is {}, type {}, varname "{}"'
173                              .format(self.__linecount, bitcount, name, match_obj.groups()[0]))
174                parsed_varname = match_obj.groups()[0]
175                if is_typedef:
176                    # is a typedef
177                    if match_obj.groups()[0] == '' or match_obj.groups()[0].find('[') != -1:
178                        # should be c error
179                        print('\033[0;31mERROR\033[0m: line {}: C error'.format(self.__linecount))
180                        ret_val = -3
181                    if match_obj.groups()[0] in self.__temp_ref_types:
182                        # duplication, script bug: we are putting all types into same namespace
183                        print('script run into bug...')
184                    self.__temp_ref_types[match_obj.groups()[0]] = dict()
185                    for member in node_tree:
186                        self.__temp_ref_types[match_obj.groups()[0]][member] = node_tree[member]
187                elif name is not None:
188                    # currently this kind of expression doesn't exist
189                    print('!!!!!!UNDEALED CONDITION!!!!!')
190                elif match_obj.groups()[0] != '':
191                    # named member, wrap and overwrite
192                    if len(node_tree) == 0:
193                        node_tree = None
194                    else:
195                        array_match = re.match(r'^([\w]*)\[[\s\S]*\]$', match_obj.groups()[0])
196                        if array_match is not None:
197                            node_tree = {array_match.groups()[0] + '[]': node_tree}
198                        else:
199                            node_tree = {match_obj.groups()[0]: node_tree}
200                else:
201                    # not a type, no member name, treat its fields as its parent's
202                    pass
203                break
204            # check member
205            match_obj = re.match(self.__REGEXP_MATCH_BITFIELD_MEMBER, rawline)
206            if match_obj is not None:
207                field_bit = None
208                if match_obj.groups()[2] is not None:
209                    field_bit = int(match_obj.groups()[2])
210                    bitcount += field_bit
211                    # bitfield should be u32
212                    if match_obj.groups()[0] != 'uint32_t':
213                        print('\033[0;33mWARN\033[0m: line: {}: {} has type {}'.format(self.__linecount, match_obj.groups()[1], match_obj.groups()[0]))
214                else:
215                    has_non_bitfield_member = True
216                # append to node tree
217                member_node = self.__expand_type(match_obj.groups()[0], field_bit)
218                if member_node is not None:
219                    array_match = re.match(r'^([\w]*)\[[\s\S]*\]$', match_obj.groups()[1])
220                    if array_match is not None:
221                        node_tree[array_match.groups()[0] + '[]'] = member_node
222                    else:
223                        node_tree[match_obj.groups()[1]] = member_node
224                else:
225                    if '*' not in match_obj.groups()[0]:
226                        print('\033[0;33mWARN\033[0m: line {}: unknown type {}'.format(self.__linecount, match_obj.groups()[0]))
227                    else:
228                        print('\033[0;33mWARN\033[0m: line {}: pointer type {}'.format(self.__linecount, match_obj.groups()[0]))
229                continue
230            # check comments
231            match_obj = re.match(self.__REGEXP_MATCH_MULTILINE_COMMENT, rawline)
232            if match_obj is not None:
233                # code comments
234                continue
235            # dump out unmatched condition
236            print(('\033[0;33mWARN\033[0m: line: {}: unexpected expression: {}'.format(self.__linecount, rawline)).replace('\n', ''))
237
238        if bitcount != 0 and has_nested_struct_union:
239            print('\033[0;33mWARN\033[0m: line: {}: mixed bitfield member and nested structure/union'.format(self.__linecount))
240        if bitcount != 0 and has_non_bitfield_member:
241            print('\033[0;33mWARN\033[0m: line: {}: mixed bitfield member and non-bitfield member'.format(self.__linecount))
242        if is_typedef and is_volatile and name is None:
243            if parsed_varname != '':
244                print('SUGGEST: {}'.format(parsed_varname.rstrip('t') + 's'))
245        if name is not None and is_typedef and is_volatile and parsed_varname.rstrip('t') != name.rstrip('s'):
246            print('\033[0;33mWARN\033[0m: line: {}: different type and typedef name: {} {}'.format(self.__linecount, name, parsed_varname))
247        return ret_val, node_tree
248
249    def __process_union(self, name: str, is_typedef: bool, is_volatile: bool) -> Any:
250        ret_val = 0
251        # first check for anonymous register structs
252        if is_typedef and is_volatile and name is None:
253            print('\033[0;31mERROR\033[0m: line {}: annoymous union'.format(self.__linecount))
254            ret_val = -1
255        node_tree = dict()  # type: Any
256        has_struct_count = 0
257        has_val_field_count = 0
258        while not self.__is_eof:
259            rawline = self.__getline()
260            if rawline is None:
261                break
262            # check for nested structure
263            match_obj = re.match(self.__REGEXP_MATCH_STRUCTURE_BEGIN, rawline)
264            if match_obj is not None:
265                has_struct_count += 1
266                ret, inherited_node_tree = self.__process_structure(
267                    match_obj.groups()[2], match_obj.groups()[0] == 'typedef', match_obj.groups()[1] == 'volatile')
268                if ret != 0:
269                    ret_val = -2
270                if inherited_node_tree is not None:
271                    for node in inherited_node_tree:
272                        node_tree[node] = inherited_node_tree[node]
273                continue
274            match_obj = re.match(self.__REGEXP_MATCH_UNION_BEGIN, rawline)
275            if match_obj is not None:
276                has_struct_count += 1
277                ret, inherited_node_tree = self.__process_union(match_obj.groups()[2], match_obj.groups()[0] == 'typedef', match_obj.groups()[1] == 'volatile')
278                if ret != 0:
279                    ret_val = -2
280                if inherited_node_tree is not None:
281                    for node in inherited_node_tree:
282                        node_tree[node] = inherited_node_tree[node]
283                continue
284            match_obj = re.match(self.__REGEXP_MATCH_STRUCT_UNION_END_NAME, rawline)
285            if match_obj is not None:
286                parsed_varname = match_obj.groups()[0]
287                # end of struct
288                if is_typedef:
289                    # is a typedef
290                    if match_obj.groups()[0] == '':
291                        # should be c error
292                        print('\033[0;31mERROR\033[0m: line {}: C error'.format(self.__linecount))
293                        ret_val = -3
294                    if match_obj.groups()[0] in self.__temp_ref_types:
295                        # duplication, script bug: we are putting all types into same namespace
296                        print('script run into bug...')
297                    self.__temp_ref_types[match_obj.groups()[0]] = dict()
298                    for member in node_tree:
299                        self.__temp_ref_types[match_obj.groups()[0]][member] = node_tree[member]
300                    node_tree = None
301                elif name is not None:
302                    # currently this kind of expression doesn't exist
303                    print('!!!!!!UNDEALED CONDITION!!!!!')
304                elif match_obj.groups()[0] != '':
305                    # named member, wrap and overwrite
306                    if len(node_tree) == 0:
307                        node_tree = None
308                    else:
309                        array_match = re.match(r'^([\w]*)\[[\s\S]*\]$', match_obj.groups()[0])
310                        if array_match is not None:
311                            node_tree = {array_match.groups()[0] + '[]': node_tree}
312                        else:
313                            node_tree = {match_obj.groups()[0]: node_tree}
314                else:
315                    # not a type, no member name, treat its fields as its parent's
316                    pass
317                break
318            match_obj = re.match(self.__REGEXP_MATCH_MULTILINE_COMMENT, rawline)
319            if match_obj is not None:
320                # code comments
321                continue
322            match_obj = re.match(self.__REGEX_MATCH_SIMPLE_VAL_FIELD, rawline)
323            if match_obj is not None:
324                # expecting to see 'uint32_t val;'
325                if match_obj.groups()[0] != 'uint32_t' or match_obj.groups()[1] != 'val':
326                    print(('\033[0;33mWARN\033[0m: unexpected union member at {}: {}'.format(self.__linecount, rawline)).replace('\n', ''))
327                else:
328                    has_val_field_count += 1
329                # append to node tree
330                member_node = self.__expand_type(match_obj.groups()[0], None)
331                if member_node is not None:
332                    node_tree[match_obj.groups()[1]] = member_node
333                else:
334                    if '*' not in match_obj.groups()[0]:
335                        print('\033[0;31mERROR\033[0m: line {}: unknown type {}'.format(self.__linecount, match_obj.groups()[0]))
336                    else:
337                        print('\033[0;33mWARN\033[0m: line {}: pointer type {}'.format(self.__linecount, match_obj.groups()[0]))
338                continue
339            # dump out unmatched condition
340            print(('\033[0;33mWARN\033[0m: line: {}: unexpected expression: {}'.format(self.__linecount, rawline)).replace('\n', ''))
341
342        if not (has_struct_count == 1 and has_val_field_count == 1):
343            print('\033[0;34mINFO\033[0m: line: {}: not a typical union: {} nested structures, {} u32 val member'
344                  .format(self.__linecount, has_struct_count, has_val_field_count))
345        if is_typedef and is_volatile and name is None:
346            if parsed_varname != '':
347                print('SUGGEST: {}'.format(parsed_varname.rstrip('t') + 's'))
348        if name is not None and is_typedef and is_volatile and parsed_varname.rstrip('t') != name.rstrip('s'):
349            print('\033[0;33mWARN\033[0m: line: {}: different type and typedef name: {} {}'.format(self.__linecount, name, parsed_varname))
350        return ret_val, node_tree
351
352    def __process_root(self) -> int:
353        ret_val = 0
354        node_tree = dict()
355
356        while not self.__is_eof:
357            rawline = self.__getline()
358            if rawline is None:
359                break
360            # start checking by finding any of structure or union
361            match_obj = re.match(self.__REGEXP_MATCH_STRUCTURE_BEGIN, rawline)
362            if match_obj is not None:
363                ret, inherited_node_tree = self.__process_structure(
364                    match_obj.groups()[2], match_obj.groups()[0] == 'typedef', match_obj.groups()[1] == 'volatile')
365                if ret != 0:
366                    ret_val = -2
367                if inherited_node_tree is not None:
368                    for node in inherited_node_tree:
369                        node_tree[node] = inherited_node_tree[node]
370                continue
371            match_obj = re.match(self.__REGEXP_MATCH_UNION_BEGIN, rawline)
372            if match_obj is not None:
373                ret, inherited_node_tree = self.__process_union(match_obj.groups()[2], match_obj.groups()[0] == 'typedef', match_obj.groups()[1] == 'volatile')
374                if ret != 0:
375                    ret_val = -2
376                if inherited_node_tree is not None:
377                    for node in inherited_node_tree:
378                        node_tree[node] = inherited_node_tree[node]
379                continue
380            # processing root level external declaration
381            match_obj = re.match(self.__REGEX_MATCH_ROOT_EXTERNAL, rawline)
382            if match_obj is not None:
383                self.__ref_tree[match_obj.groups()[1]] = self.__expand_type(match_obj.groups()[0])
384                continue
385        return ret_val
386
387    def check(self, file: str) -> int:
388        self.__fd = open(file, 'r', encoding='utf8')
389        self.__linecount = 0
390        self.__is_eof = False
391
392        ret_val = self.__process_root()
393
394        self.__fd.close()
395        if ret_val != 0:
396            print('\033[0;31mCHECK FAILED\033[0m:\t{}'.format(file))
397        else:
398            print('\033[0;32mCHECK PASSED\033[0m:\t{}'.format(file))
399        return ret_val
400
401    def get_ref_tree(self) -> Any:
402        return self.__ref_tree
403
404
405def main() -> None:
406    ret = 0
407    if len(sys.argv) <= 1 or not os.path.isfile(sys.argv[1]):
408        print('file not exist')
409        exit(-1)
410    checker = SoCStructureHeaderChecker()
411    print('CHECKING:\t{}'.format(sys.argv[1]))
412    ret = checker.check(sys.argv[1])
413    if len(sys.argv) == 3 and sys.argv[2] == 'print':
414        print(checker.get_ref_tree())
415    del checker
416    sys.exit(ret)
417
418
419if __name__ == '__main__':
420    main()
421