diff options
-rw-r--r-- | ChangeLog | 8 | ||||
-rw-r--r-- | conform/Makefile | 13 | ||||
-rw-r--r-- | conform/linknamespace.pl | 233 | ||||
-rw-r--r-- | conform/linknamespace.py | 217 |
4 files changed, 231 insertions, 240 deletions
diff --git a/ChangeLog b/ChangeLog index 1649229cbb..86040c06a6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2018-10-30 Joseph Myers <joseph@codesourcery.com> + + * conform/linknamespace.py: New file. + * conform/linknamespace.pl: Remove file. + * conform/Makefile ($(linknamespace-header-tests)): Use + linknamespace.py instead of linknamespace.pl. Do not use --tmpdir + option. + 2018-10-30 Florian Weimer <fweimer@redhat.com> * stdlib/test-bz22786.c (do_test): Additional free calls to avoid diff --git a/conform/Makefile b/conform/Makefile index a2bbe0fb24..fbc4110688 100644 --- a/conform/Makefile +++ b/conform/Makefile @@ -218,17 +218,16 @@ $(linknamespace-symlist-stdlibs-tests): $(objpfx)symlist-stdlibs-%: \ $(evaluate-test) $(linknamespace-header-tests): $(objpfx)%/linknamespace.out: \ - linknamespace.pl \ + linknamespace.py \ $(linknamespace-symlists-tests) \ $(linknamespace-symlist-stdlibs-tests) (set -e; std_hdr=$*; std=$${std_hdr%%/*}; hdr=$${std_hdr#*/}; \ mkdir -p $(@D)/scratch; \ - $(PERL) -I. -w $< --tmpdir=$(@D)/scratch --cc='$(CC)' \ - --flags='$(conformtest-cc-flags)' --standard=$$std \ - --stdsyms=$(objpfx)symlist-$$std --header=$$hdr \ - --libsyms=$(objpfx)symlist-stdlibs-$$std \ - --readelf='$(READELF)' \ - > $@ 2>&1); \ + $(PYTHON) $< --cc='$(CC)' --flags='$(conformtest-cc-flags)' \ + --standard=$$std --stdsyms=$(objpfx)symlist-$$std \ + --header=$$hdr --libsyms=$(objpfx)symlist-stdlibs-$$std \ + --readelf='$(READELF)' \ + > $@ 2>&1); \ $(evaluate-test) # Pre-standard C feature no longer supported by GCC (obsoleted in diff --git a/conform/linknamespace.pl b/conform/linknamespace.pl deleted file mode 100644 index 3fc6aca621..0000000000 --- a/conform/linknamespace.pl +++ /dev/null @@ -1,233 +0,0 @@ -#!/usr/bin/perl - -# Check that use of symbols declared in a given header does not result -# in any symbols being brought in that are not reserved with external -# linkage for the given standard. - -# Copyright (C) 2014-2018 Free Software Foundation, Inc. -# This file is part of the GNU C Library. - -# The GNU C Library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. - -# The GNU C Library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. - -# You should have received a copy of the GNU Lesser General Public -# License along with the GNU C Library; if not, see -# <http://www.gnu.org/licenses/>. - -use GlibcConform; -use Getopt::Long; - -GetOptions ('header=s' => \$header, 'standard=s' => \$standard, - 'flags=s' => \$flags, 'cc=s' => \$CC, 'tmpdir=s' => \$tmpdir, - 'stdsyms=s' => \$stdsyms_file, 'libsyms=s' => \$libsyms_file, - 'readelf=s' => \$READELF); - -# Load the list of symbols that are OK. -%stdsyms = (); -open (STDSYMS, "<$stdsyms_file") || die ("open $stdsyms_file: $!\n"); -while (<STDSYMS>) { - chomp; - $stdsyms{$_} = 1; -} -close (STDSYMS) || die ("close $stdsyms_file: $!\n"); - -# The following whitelisted symbols are also allowed for now. -# -# * Bug 17576: stdin, stdout, stderr only reserved with external -# linkage when stdio.h included (and possibly not then), not -# generally. -# -# * Bug 18442: re_syntax_options wrongly brought in by regcomp and -# used by re_comp. -# -@whitelist = qw(stdin stdout stderr re_syntax_options); -foreach my $sym (@whitelist) { - $stdsyms{$sym} = 1; -} - -# Return information about GLOBAL and WEAK symbols listed in readelf -# -s output. -sub list_syms { - my ($syms_file) = @_; - open (SYMS, "<$syms_file") || die ("open $syms_file: $!\n"); - my ($file) = $syms_file; - my (@ret) = (); - while (<SYMS>) { - chomp; - if (/^File: (.*)/) { - $file = $1; - $file =~ s|^.*/||; - next; - } - s/^\s*//; - # Architecture-specific st_other bits appear inside [] and disrupt - # the format of readelf output. - s/\[.*?\]//; - my (@fields) = split (/\s+/, $_); - if (@fields < 8) { - next; - } - my ($bind) = $fields[4]; - my ($ndx) = $fields[6]; - my ($sym) = $fields[7]; - if ($bind ne "GLOBAL" && $bind ne "WEAK") { - next; - } - if ($sym !~ /^\w+$/) { - next; - } - push (@ret, [$file, $sym, $bind, $ndx ne "UND"]); - } - close (SYMS) || die ("close $syms_file: $!\n"); - return @ret; -} - -# Load information about GLOBAL and WEAK symbols defined or used in -# the standard libraries. -# Symbols from a given object, except for weak defined symbols. -%seen_syms = (); -# Strong undefined symbols from a given object. -%strong_undef_syms = (); -# Objects defining a given symbol (strongly or weakly). -%sym_objs = (); -@sym_data = list_syms ($libsyms_file); -foreach my $sym (@sym_data) { - my ($file, $name, $bind, $defined) = @$sym; - if ($defined) { - if (!defined ($sym_objs{$name})) { - $sym_objs{$name} = []; - } - push (@{$sym_objs{$name}}, $file); - } - if ($bind eq "GLOBAL" || !$defined) { - if (!defined ($seen_syms{$file})) { - $seen_syms{$file} = []; - } - push (@{$seen_syms{$file}}, $name); - } - if ($bind eq "GLOBAL" && !$defined) { - if (!defined ($strong_undef_syms{$file})) { - $strong_undef_syms{$file} = []; - } - push (@{$strong_undef_syms{$file}}, $name); - } -} - -# Determine what ELF-level symbols are brought in by use of C-level -# symbols declared in the given header. -# -# The rules followed are heuristic and so may produce false positives -# and false negatives. -# -# * All undefined symbols are considered of signficance, but it is -# possible that (a) any standard library definition is weak, so can be -# overridden by the user's definition, and (b) the symbol is only used -# conditionally and not if the program is limited to standard -# functionality. -# -# * If a symbol reference is only brought in by the user using a data -# symbol rather than a function from the standard library, this will -# not be detected. -# -# * If a symbol reference is only brought in by crt*.o or libgcc, this -# will not be detected. -# -# * If a symbol reference is only brought in through __builtin_foo in -# a standard macro being compiled to call foo, this will not be -# detected. -# -# * Header inclusions should be compiled several times with different -# options such as -O2, -D_FORTIFY_SOURCE and -D_FILE_OFFSET_BITS=64 to -# find out what symbols are undefined from such a compilation; this is -# not yet implemented. -# -# * This script finds symbols referenced through use of macros on the -# basis that if a macro calls an internal function, that function must -# also be declared in the header. However, the header might also -# declare implementation-namespace functions that are not called by -# any standard macro in the header, resulting in false positives for -# any symbols brought in only through use of those -# implementation-namespace functions. -# -# * Namespace issues can apply for dynamic linking as well as static -# linking, when a call is from one shared library to another or uses a -# PLT entry for a call within a shared library; such issues are only -# detected by this script if the same namespace issue applies for -# static linking. - -@c_syms = list_exported_functions ("$CC $flags", $standard, $header, $tmpdir); -$cincfile = "$tmpdir/undef-$$.c"; -$cincfile_o = "$tmpdir/undef-$$.o"; -$cincfile_sym = "$tmpdir/undef-$$.sym"; -open (CINCFILE, ">$cincfile") || die ("open $cincfile: $!\n"); -print CINCFILE "#include <$header>\n"; -foreach my $sym (sort @c_syms) { - print CINCFILE "void *__glibc_test_$sym = (void *) &$sym;\n"; -} -close CINCFILE || die ("close $cincfile: $!\n"); -system ("$CC $flags -D_ISOMAC $CFLAGS{$standard} -c $cincfile -o $cincfile_o") - && die ("compiling failed\n"); -system ("LC_ALL=C $READELF -W -s $cincfile_o > $cincfile_sym") - && die ("readelf failed\n"); -@elf_syms = list_syms ($cincfile_sym); -unlink ($cincfile) || die ("unlink $cincfile: $!\n"); -unlink ($cincfile_o) || die ("unlink $cincfile_o: $!\n"); -unlink ($cincfile_sym) || die ("unlink $cincfile_sym: $!\n"); - -%seen_where = (); -%files_seen = (); -%all_undef = (); -%current_undef = (); -foreach my $sym (@elf_syms) { - my ($file, $name, $bind, $defined) = @$sym; - if ($bind eq "GLOBAL" && !$defined) { - $seen_where{$name} = "[initial] $name"; - $all_undef{$name} = "[initial] $name"; - $current_undef{$name} = "[initial] $name"; - } -} - -while (%current_undef) { - %new_undef = (); - foreach my $sym (sort keys %current_undef) { - foreach my $file (@{$sym_objs{$sym}}) { - if (defined ($files_seen{$file})) { - next; - } - $files_seen{$file} = 1; - foreach my $ssym (@{$seen_syms{$file}}) { - if (!defined ($seen_where{$ssym})) { - $seen_where{$ssym} = "$current_undef{$sym} -> [$file] $ssym"; - } - } - foreach my $usym (@{$strong_undef_syms{$file}}) { - if (!defined ($all_undef{$usym})) { - $all_undef{$usym} = "$current_undef{$sym} -> [$file] $usym"; - $new_undef{$usym} = "$current_undef{$sym} -> [$file] $usym"; - } - } - } - } - %current_undef = %new_undef; -} - -$ret = 0; -foreach my $sym (sort keys %seen_where) { - if ($sym =~ /^_/) { - next; - } - if (defined ($stdsyms{$sym})) { - next; - } - print "$seen_where{$sym}\n"; - $ret = 1; -} - -exit $ret; diff --git a/conform/linknamespace.py b/conform/linknamespace.py new file mode 100644 index 0000000000..07a775499d --- /dev/null +++ b/conform/linknamespace.py @@ -0,0 +1,217 @@ +#!/usr/bin/python +# Check that use of symbols declared in a given header does not result +# in any symbols being brought in that are not reserved with external +# linkage for the given standard. +# Copyright (C) 2014-2018 Free Software Foundation, Inc. +# This file is part of the GNU C Library. +# +# The GNU C Library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# The GNU C Library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with the GNU C Library; if not, see +# <http://www.gnu.org/licenses/>. + +import argparse +from collections import defaultdict +import os.path +import re +import subprocess +import sys +import tempfile + +import glibcconform + +# The following whitelisted symbols are also allowed for now. +# +# * Bug 17576: stdin, stdout, stderr only reserved with external +# linkage when stdio.h included (and possibly not then), not +# generally. +# +# * Bug 18442: re_syntax_options wrongly brought in by regcomp and +# used by re_comp. +# +WHITELIST = {'stdin', 'stdout', 'stderr', 're_syntax_options'} + + +def list_syms(filename): + """Return information about GLOBAL and WEAK symbols listed in readelf + -s output.""" + ret = [] + cur_file = filename + with open(filename, 'r') as syms_file: + for line in syms_file: + line = line.rstrip() + if line.startswith('File: '): + cur_file = line[len('File: '):] + cur_file = cur_file.split('/')[-1] + continue + # Architecture-specific st_other bits appear inside [] and + # disrupt the format of readelf output. + line = re.sub(r'\[.*?\]', '', line) + fields = line.split() + if len(fields) < 8: + continue + bind = fields[4] + ndx = fields[6] + sym = fields[7] + if bind not in ('GLOBAL', 'WEAK'): + continue + if not re.fullmatch('[A-Za-z0-9_]+', sym): + continue + ret.append((cur_file, sym, bind, ndx != 'UND')) + return ret + + +def main(): + """The main entry point.""" + parser = argparse.ArgumentParser(description='Check link-time namespace.') + parser.add_argument('--header', metavar='HEADER', + help='name of header') + parser.add_argument('--standard', metavar='STD', + help='standard to use when processing header') + parser.add_argument('--cc', metavar='CC', + help='C compiler to use') + parser.add_argument('--flags', metavar='CFLAGS', + help='Compiler flags to use with CC') + parser.add_argument('--stdsyms', metavar='FILE', + help='File with list of standard symbols') + parser.add_argument('--libsyms', metavar='FILE', + help='File with symbol information from libraries') + parser.add_argument('--readelf', metavar='READELF', + help='readelf program to use') + args = parser.parse_args() + + # Load the list of symbols that are OK. + stdsyms = set() + with open(args.stdsyms, 'r') as stdsyms_file: + for line in stdsyms_file: + stdsyms.add(line.rstrip()) + stdsyms |= WHITELIST + + # Load information about GLOBAL and WEAK symbols defined or used + # in the standard libraries. + # Symbols from a given object, except for weak defined symbols. + seen_syms = defaultdict(list) + # Strong undefined symbols from a given object. + strong_undef_syms = defaultdict(list) + # Objects defining a given symbol (strongly or weakly). + sym_objs = defaultdict(list) + for file, name, bind, defined in list_syms(args.libsyms): + if defined: + sym_objs[name].append(file) + if bind == 'GLOBAL' or not defined: + seen_syms[file].append(name) + if bind == 'GLOBAL' and not defined: + strong_undef_syms[file].append(name) + + # Determine what ELF-level symbols are brought in by use of C-level + # symbols declared in the given header. + # + # The rules followed are heuristic and so may produce false + # positives and false negatives. + # + # * All undefined symbols are considered of signficance, but it is + # possible that (a) any standard library definition is weak, so + # can be overridden by the user's definition, and (b) the symbol + # is only used conditionally and not if the program is limited to + # standard functionality. + # + # * If a symbol reference is only brought in by the user using a + # data symbol rather than a function from the standard library, + # this will not be detected. + # + # * If a symbol reference is only brought in by crt*.o or libgcc, + # this will not be detected. + # + # * If a symbol reference is only brought in through __builtin_foo + # in a standard macro being compiled to call foo, this will not be + # detected. + # + # * Header inclusions should be compiled several times with + # different options such as -O2, -D_FORTIFY_SOURCE and + # -D_FILE_OFFSET_BITS=64 to find out what symbols are undefined + # from such a compilation; this is not yet implemented. + # + # * This script finds symbols referenced through use of macros on + # the basis that if a macro calls an internal function, that + # function must also be declared in the header. However, the + # header might also declare implementation-namespace functions + # that are not called by any standard macro in the header, + # resulting in false positives for any symbols brought in only + # through use of those implementation-namespace functions. + # + # * Namespace issues can apply for dynamic linking as well as + # static linking, when a call is from one shared library to + # another or uses a PLT entry for a call within a shared library; + # such issues are only detected by this script if the same + # namespace issue applies for static linking. + seen_where = {} + files_seen = set() + all_undef = {} + current_undef = {} + compiler = '%s %s' % (args.cc, args.flags) + c_syms = glibcconform.list_exported_functions(compiler, args.standard, + args.header) + with tempfile.TemporaryDirectory() as temp_dir: + cincfile_name = os.path.join(temp_dir, 'undef.c') + cincfile_o_name = os.path.join(temp_dir, 'undef.o') + cincfile_sym_name = os.path.join(temp_dir, 'undef.sym') + cincfile_text = ('#include <%s>\n%s\n' + % (args.header, + '\n'.join('void *__glibc_test_%s = (void *) &%s;' + % (sym, sym) for sym in sorted(c_syms)))) + with open(cincfile_name, 'w') as cincfile: + cincfile.write(cincfile_text) + cmd = ('%s %s -D_ISOMAC %s -c %s -o %s' + % (args.cc, args.flags, glibcconform.CFLAGS[args.standard], + cincfile_name, cincfile_o_name)) + subprocess.check_call(cmd, shell=True) + cmd = ('LC_ALL=C %s -W -s %s > %s' + % (args.readelf, cincfile_o_name, cincfile_sym_name)) + subprocess.check_call(cmd, shell=True) + for file, name, bind, defined in list_syms(cincfile_sym_name): + if bind == 'GLOBAL' and not defined: + sym_text = '[initial] %s' % name + seen_where[name] = sym_text + all_undef[name] = sym_text + current_undef[name] = sym_text + + while current_undef: + new_undef = {} + for sym, cu_sym in sorted(current_undef.items()): + for file in sym_objs[sym]: + if file in files_seen: + continue + files_seen.add(file) + for ssym in seen_syms[file]: + if ssym not in seen_where: + seen_where[ssym] = ('%s -> [%s] %s' + % (cu_sym, file, ssym)) + for usym in strong_undef_syms[file]: + if usym not in all_undef: + usym_text = '%s -> [%s] %s' % (cu_sym, file, usym) + all_undef[usym] = usym_text + new_undef[usym] = usym_text + current_undef = new_undef + + ret = 0 + for sym in sorted(seen_where): + if sym.startswith('_'): + continue + if sym in stdsyms: + continue + print(seen_where[sym]) + ret = 1 + sys.exit(ret) + + +if __name__ == '__main__': + main() |