diff options
author | Siddhesh Poyarekar <siddhesh@sourceware.org> | 2019-01-09 16:48:27 +0530 |
---|---|---|
committer | Siddhesh Poyarekar <siddhesh@sourceware.org> | 2019-01-12 16:46:42 +0530 |
commit | c716fcd0b018cd8583c1438707ee165d45e3b1db (patch) | |
tree | 30e7f9e73a455c5b7556ed65f9a0e3fee5d36357 | |
parent | f6f5699f8747f6bb26e08d0216141ff7f7811782 (diff) | |
download | glibc-c716fcd0b018cd8583c1438707ee165d45e3b1db.tar.gz glibc-c716fcd0b018cd8583c1438707ee165d45e3b1db.tar.xz glibc-c716fcd0b018cd8583c1438707ee165d45e3b1db.zip |
Miscellaneous fixes. Also improve macrocall detection.
-rwxr-xr-x | scripts/gen-changed-entities.py | 112 |
1 files changed, 91 insertions, 21 deletions
diff --git a/scripts/gen-changed-entities.py b/scripts/gen-changed-entities.py index 59ebd47c69..9b062a3d42 100755 --- a/scripts/gen-changed-entities.py +++ b/scripts/gen-changed-entities.py @@ -168,7 +168,7 @@ actions = {0:{'new': 'New', 'mod': 'Modified', 'del': 'Remove'}, # The __attribute__ are written in a bunch of different ways in glibc. ATTRIBUTE = \ - r'((_*(attribute|ATTRIBUTE)_*(\s*\(\([^)]+\)\)|\w+))|weak_function)'; +r'(attribute_\w+)|(__attribute__\(\([^;]\)\))|(weak_function)|(_*ATTRIBUTE_*\w+)' # Function regex FUNC_RE = re.compile(ATTRIBUTE + r'*\s*(\w+)\s*\([^(][^{]+\)\s*{') @@ -176,7 +176,7 @@ FUNC_RE = re.compile(ATTRIBUTE + r'*\s*(\w+)\s*\([^(][^{]+\)\s*{') # The macrocall_re peeks into the next line to ensure that it doesn't eat up # a FUNC by accident. The func_re regex is also quite crude and only # intends to ensure that the function name gets picked up correctly. -MACROCALL_RE = re.compile(r'(\w+)\s*\((?!void).(\s*,\s*[\w\.]+)*\)$') +MACROCALL_RE = re.compile(r'(\w+)\s*\((\w+)(\s*,\s*[\w\.]+)*\)$') # Composite types such as structs and unions. COMPOSITE_RE = re.compile(r'(struct|union|enum)\s*(\w*)\s*{') @@ -185,13 +185,16 @@ COMPOSITE_RE = re.compile(r'(struct|union|enum)\s*(\w*)\s*{') ASSIGN_RE = re.compile(r'(\w+)\s*(\[[^\]]*\])*\s*([^\s]*attribute[\s\w()]+)?\s*=') # Function Declarations. -FNDECL_RE = re.compile(r'(\w+)\s*\([^;]+\)\s*' + ATTRIBUTE + '*;') +FNDECL_RE = re.compile(r'(\w+)\s*\([^;]+\)\s*(__THROW)?\s*' + ATTRIBUTE + '*;') # Function pointer typedefs. TYPEDEF_FN_RE = re.compile(r'\(\*(\w+)\)\s*\([^)]+\);') # Simple decls. -DECL_RE = re.compile(r'(\w+)(\[\w+\])?\s*' + ATTRIBUTE + '?;') +DECL_RE = re.compile(r'(\w+)(\[\w+\])*\s*' + ATTRIBUTE + '?;') + +# __typeof decls. +TYPEOF_DECL_RE = re.compile(r'__typeof\(\w+\)\s*(\w+);') def remove_comments(op): @@ -406,7 +409,8 @@ def parse_func(name, cur, op, loc, code, blocktype): return loc -def parse_macrocall(name, cur, op, loc, code, blocktype): +SYM_MACROS = [] +def parse_macrocall(cur, op, loc, code, blocktype): ''' Parse a macro call. Match a symbol hack macro calls that get added without semicolons. @@ -419,11 +423,16 @@ def parse_macrocall(name, cur, op, loc, code, blocktype): - Returns: The next location to be read in the array. ''' - debug_print('FOUND MACROCALL: %s' % name) - new_block(name, blocktype, [cur], code) + found = re.search(MACROCALL_RE, cur) + if found: + name = found.group(1) + if name in SYM_MACROS: + debug_print('FOUND MACROCALL: %s' % name) + new_block(found.group(2), blocktype, [cur], code) + return '', loc - return loc + return cur, loc # Regular expressions and token consumption functions for expressions we @@ -433,16 +442,19 @@ c_expr_parsers = [ 'type' : block_type.composite}, {'regex' : ASSIGN_RE, 'func' : parse_assign, 'name' : 1, 'type' : block_type.assign}, + {'regex' : TYPEOF_DECL_RE, 'func' : parse_decl, 'name' : 1, + 'type' : block_type.decl}, {'regex' : TYPEDEF_FN_RE, 'func' : parse_decl, 'name' : 1, 'type' : block_type.decl}, {'regex' : FNDECL_RE, 'func' : parse_decl, 'name' : 1, 'type' : block_type.fndecl}, {'regex' : FUNC_RE, 'func' : parse_func, 'name' : 5, 'type' : block_type.func}, - {'regex' : MACROCALL_RE, 'func' : parse_macrocall, 'name' : 1, - 'type' : block_type.macrocall}, {'regex' : DECL_RE, 'func' : parse_decl, 'name' : 1, - 'type' : block_type.decl}] + 'type' : block_type.decl}, + # Special case at the end: macro call. + {'regex' : None, 'func' : parse_macrocall, 'name' : 1, + 'type' : block_type.macrocall}] def parse_c_expr(cur, op, loc, code): @@ -456,6 +468,9 @@ def parse_c_expr(cur, op, loc, code): debug_print('PARSING: %s' % cur) for p in c_expr_parsers: + if not p['regex']: + return p['func'](cur, op, loc, code, p['type']) + found = re.search(p['regex'], cur) if found: return '', p['func'](found.group(p['name']), cur, op, loc, code, @@ -499,6 +514,61 @@ def c_parse(op, loc, code, start = ''): return loc +def compact_tree(tree): + ''' Try to reduce the tree to its minimal form. + + A source code tree in its simplest form may have a lot of duplicated + information that may be difficult to compare and come up with a minimal + difference. Currently this function only consolidates macro scope blocks + so that if there are multiple blocks with the same condition in the same + nesting scope, they are collapsed into a single scope. + ''' + + # Macro conditions that nest the entire file aren't very interesting. This + # should take care of the header guards. + if tree['type'] == block_type.file \ + and len(tree['contents']) == 1 \ + and tree['contents'][0]['type'] == block_type.macro_cond: + tree['contents'] = tree['contents'][0]['contents'] + + # Nothing to do for non-nesting blocks. + if tree['type'] != block_type.macro_cond \ + and tree['type'] != block_type.file: + return + + # Now for nesting blocks, get the list of unique condition names and + # consolidate code under them. The result also bunches up all the + # conditions at the top. + newcontents = [] + macro_names = sorted(set([x['name'] for x in tree['contents'] \ + if x['type'] == block_type.macro_cond])) + for m in macro_names: + nc = [x['contents'] for x in tree['contents'] if x['name'] == m] + b = new_block(m, block_type.macro_cond, sum(nc, []), tree) + compact_tree(b) + newcontents.append(b) + + newcontents.extend([x for x in tree['contents'] \ + if x['type'] != block_type.macro_cond]) + + tree['contents'] = newcontents + + +def build_macrocalls(): + ''' Build a list of macro calls used for symbol versioning and attributes. + + glibc uses a set of macro calls that do not end with a semi-colon and hence + breaks our parser. Identify those calls from include/libc-symbols.h and + filter them out. + ''' + with open('include/libc-symbols.h') as macrofile: + global SYM_MACROS + op = macrofile.readlines() + op = remove_comments(op) + SYM_MACROS = [re.sub(r'.*define (\w+).*', r'\1', x[:-1]) for x in op \ + if 'define ' in x] + + def c_parse_output(op): ''' File parser. @@ -506,10 +576,13 @@ def c_parse_output(op): represent the file. This tree structure is then used for comparison between the old and new file. ''' + build_macrocalls() tree = new_block('', block_type.file, [], None) op = remove_comments(op) op = [re.sub(r'#\s+', '#', x) for x in op] op = c_parse(op, 0, tree) + #compact_tree(tree) + c_dump_tree(tree, 0) return tree @@ -610,17 +683,15 @@ def c_compare(oldfile, newfile): Parse the two files into trees and compare them. Print the result of the comparison in the ChangeLog-like format. ''' - left = c_parse_output(oldfile) - right = c_parse_output(newfile) - - c_compare_trees(left, right) - debug_print('LEFT TREE') debug_print('-' * 80) - c_dump_tree(left, 0) + left = c_parse_output(oldfile) + debug_print('RIGHT TREE') debug_print('-' * 80) - c_dump_tree(right, 0) + right = c_parse_output(newfile) + + c_compare_trees(left, right) #------------------------------------------------------------------------------ @@ -628,8 +699,8 @@ def c_compare(oldfile, newfile): # Register backends for specific file extensions. BACKENDS = \ - {'.c': {'compare': c_compare}, - '.h': {'compare': c_compare}} + {'.c': {'parse_output': c_parse_output, 'compare': c_compare}, + '.h': {'parse_output': c_parse_output, 'compare': c_compare}} def get_backend(filename): @@ -801,7 +872,6 @@ def backend_file_test(f): op = srcfile.readlines() op = [x[:-1] for x in op] tree = backend['parse_output'](op) - backend['dump_tree'](tree, 0) # Program Entry point. If -d is specified, the second argument is assumed to be |