about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog8
-rw-r--r--conform/Makefile13
-rw-r--r--conform/linknamespace.pl233
-rw-r--r--conform/linknamespace.py217
4 files changed, 231 insertions, 240 deletions
diff --git a/ChangeLog b/ChangeLog
index 1649229cbb..86040c06a6 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2018-10-30  Joseph Myers  <joseph@codesourcery.com>
+
+	* conform/linknamespace.py: New file.
+	* conform/linknamespace.pl: Remove file.
+	* conform/Makefile ($(linknamespace-header-tests)): Use
+	linknamespace.py instead of linknamespace.pl.  Do not use --tmpdir
+	option.
+
 2018-10-30  Florian Weimer  <fweimer@redhat.com>
 
 	* stdlib/test-bz22786.c (do_test): Additional free calls to avoid
diff --git a/conform/Makefile b/conform/Makefile
index a2bbe0fb24..fbc4110688 100644
--- a/conform/Makefile
+++ b/conform/Makefile
@@ -218,17 +218,16 @@ $(linknamespace-symlist-stdlibs-tests): $(objpfx)symlist-stdlibs-%: \
 	$(evaluate-test)
 
 $(linknamespace-header-tests): $(objpfx)%/linknamespace.out: \
-			       linknamespace.pl \
+			       linknamespace.py \
 			       $(linknamespace-symlists-tests) \
 			       $(linknamespace-symlist-stdlibs-tests)
 	(set -e; std_hdr=$*; std=$${std_hdr%%/*}; hdr=$${std_hdr#*/}; \
 	 mkdir -p $(@D)/scratch; \
-	 $(PERL) -I. -w $< --tmpdir=$(@D)/scratch --cc='$(CC)' \
-		 --flags='$(conformtest-cc-flags)' --standard=$$std \
-		 --stdsyms=$(objpfx)symlist-$$std --header=$$hdr \
-		 --libsyms=$(objpfx)symlist-stdlibs-$$std \
-		 --readelf='$(READELF)' \
-		 > $@ 2>&1); \
+	 $(PYTHON) $< --cc='$(CC)' --flags='$(conformtest-cc-flags)' \
+		   --standard=$$std --stdsyms=$(objpfx)symlist-$$std \
+		   --header=$$hdr --libsyms=$(objpfx)symlist-stdlibs-$$std \
+		   --readelf='$(READELF)' \
+		   > $@ 2>&1); \
 	$(evaluate-test)
 
 # Pre-standard C feature no longer supported by GCC (obsoleted in
diff --git a/conform/linknamespace.pl b/conform/linknamespace.pl
deleted file mode 100644
index 3fc6aca621..0000000000
--- a/conform/linknamespace.pl
+++ /dev/null
@@ -1,233 +0,0 @@
-#!/usr/bin/perl
-
-# Check that use of symbols declared in a given header does not result
-# in any symbols being brought in that are not reserved with external
-# linkage for the given standard.
-
-# Copyright (C) 2014-2018 Free Software Foundation, Inc.
-# This file is part of the GNU C Library.
-
-# The GNU C Library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-
-# The GNU C Library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# Lesser General Public License for more details.
-
-# You should have received a copy of the GNU Lesser General Public
-# License along with the GNU C Library; if not, see
-# <http://www.gnu.org/licenses/>.
-
-use GlibcConform;
-use Getopt::Long;
-
-GetOptions ('header=s' => \$header, 'standard=s' => \$standard,
-	    'flags=s' => \$flags, 'cc=s' => \$CC, 'tmpdir=s' => \$tmpdir,
-	    'stdsyms=s' => \$stdsyms_file, 'libsyms=s' => \$libsyms_file,
-	    'readelf=s' => \$READELF);
-
-# Load the list of symbols that are OK.
-%stdsyms = ();
-open (STDSYMS, "<$stdsyms_file") || die ("open $stdsyms_file: $!\n");
-while (<STDSYMS>) {
-  chomp;
-  $stdsyms{$_} = 1;
-}
-close (STDSYMS) || die ("close $stdsyms_file: $!\n");
-
-# The following whitelisted symbols are also allowed for now.
-#
-# * Bug 17576: stdin, stdout, stderr only reserved with external
-# linkage when stdio.h included (and possibly not then), not
-# generally.
-#
-# * Bug 18442: re_syntax_options wrongly brought in by regcomp and
-# used by re_comp.
-#
-@whitelist = qw(stdin stdout stderr re_syntax_options);
-foreach my $sym (@whitelist) {
-  $stdsyms{$sym} = 1;
-}
-
-# Return information about GLOBAL and WEAK symbols listed in readelf
-# -s output.
-sub list_syms {
-  my ($syms_file) = @_;
-  open (SYMS, "<$syms_file") || die ("open $syms_file: $!\n");
-  my ($file) = $syms_file;
-  my (@ret) = ();
-  while (<SYMS>) {
-    chomp;
-    if (/^File: (.*)/) {
-      $file = $1;
-      $file =~ s|^.*/||;
-      next;
-    }
-    s/^\s*//;
-    # Architecture-specific st_other bits appear inside [] and disrupt
-    # the format of readelf output.
-    s/\[.*?\]//;
-    my (@fields) = split (/\s+/, $_);
-    if (@fields < 8) {
-      next;
-    }
-    my ($bind) = $fields[4];
-    my ($ndx) = $fields[6];
-    my ($sym) = $fields[7];
-    if ($bind ne "GLOBAL" && $bind ne "WEAK") {
-      next;
-    }
-    if ($sym !~ /^\w+$/) {
-      next;
-    }
-    push (@ret, [$file, $sym, $bind, $ndx ne "UND"]);
-  }
-  close (SYMS) || die ("close $syms_file: $!\n");
-  return @ret;
-}
-
-# Load information about GLOBAL and WEAK symbols defined or used in
-# the standard libraries.
-# Symbols from a given object, except for weak defined symbols.
-%seen_syms = ();
-# Strong undefined symbols from a given object.
-%strong_undef_syms = ();
-# Objects defining a given symbol (strongly or weakly).
-%sym_objs = ();
-@sym_data = list_syms ($libsyms_file);
-foreach my $sym (@sym_data) {
-  my ($file, $name, $bind, $defined) = @$sym;
-  if ($defined) {
-    if (!defined ($sym_objs{$name})) {
-      $sym_objs{$name} = [];
-    }
-    push (@{$sym_objs{$name}}, $file);
-  }
-  if ($bind eq "GLOBAL" || !$defined) {
-    if (!defined ($seen_syms{$file})) {
-      $seen_syms{$file} = [];
-    }
-    push (@{$seen_syms{$file}}, $name);
-  }
-  if ($bind eq "GLOBAL" && !$defined) {
-    if (!defined ($strong_undef_syms{$file})) {
-      $strong_undef_syms{$file} = [];
-    }
-    push (@{$strong_undef_syms{$file}}, $name);
-  }
-}
-
-# Determine what ELF-level symbols are brought in by use of C-level
-# symbols declared in the given header.
-#
-# The rules followed are heuristic and so may produce false positives
-# and false negatives.
-#
-# * All undefined symbols are considered of signficance, but it is
-# possible that (a) any standard library definition is weak, so can be
-# overridden by the user's definition, and (b) the symbol is only used
-# conditionally and not if the program is limited to standard
-# functionality.
-#
-# * If a symbol reference is only brought in by the user using a data
-# symbol rather than a function from the standard library, this will
-# not be detected.
-#
-# * If a symbol reference is only brought in by crt*.o or libgcc, this
-# will not be detected.
-#
-# * If a symbol reference is only brought in through __builtin_foo in
-# a standard macro being compiled to call foo, this will not be
-# detected.
-#
-# * Header inclusions should be compiled several times with different
-# options such as -O2, -D_FORTIFY_SOURCE and -D_FILE_OFFSET_BITS=64 to
-# find out what symbols are undefined from such a compilation; this is
-# not yet implemented.
-#
-# * This script finds symbols referenced through use of macros on the
-# basis that if a macro calls an internal function, that function must
-# also be declared in the header.  However, the header might also
-# declare implementation-namespace functions that are not called by
-# any standard macro in the header, resulting in false positives for
-# any symbols brought in only through use of those
-# implementation-namespace functions.
-#
-# * Namespace issues can apply for dynamic linking as well as static
-# linking, when a call is from one shared library to another or uses a
-# PLT entry for a call within a shared library; such issues are only
-# detected by this script if the same namespace issue applies for
-# static linking.
-
-@c_syms = list_exported_functions ("$CC $flags", $standard, $header, $tmpdir);
-$cincfile = "$tmpdir/undef-$$.c";
-$cincfile_o = "$tmpdir/undef-$$.o";
-$cincfile_sym = "$tmpdir/undef-$$.sym";
-open (CINCFILE, ">$cincfile") || die ("open $cincfile: $!\n");
-print CINCFILE "#include <$header>\n";
-foreach my $sym (sort @c_syms) {
-  print CINCFILE "void *__glibc_test_$sym = (void *) &$sym;\n";
-}
-close CINCFILE || die ("close $cincfile: $!\n");
-system ("$CC $flags -D_ISOMAC $CFLAGS{$standard} -c $cincfile -o $cincfile_o")
-  && die ("compiling failed\n");
-system ("LC_ALL=C $READELF -W -s $cincfile_o > $cincfile_sym")
-  && die ("readelf failed\n");
-@elf_syms = list_syms ($cincfile_sym);
-unlink ($cincfile) || die ("unlink $cincfile: $!\n");
-unlink ($cincfile_o) || die ("unlink $cincfile_o: $!\n");
-unlink ($cincfile_sym) || die ("unlink $cincfile_sym: $!\n");
-
-%seen_where = ();
-%files_seen = ();
-%all_undef = ();
-%current_undef = ();
-foreach my $sym (@elf_syms) {
-  my ($file, $name, $bind, $defined) = @$sym;
-  if ($bind eq "GLOBAL" && !$defined) {
-    $seen_where{$name} = "[initial] $name";
-    $all_undef{$name} = "[initial] $name";
-    $current_undef{$name} = "[initial] $name";
-  }
-}
-
-while (%current_undef) {
-  %new_undef = ();
-  foreach my $sym (sort keys %current_undef) {
-    foreach my $file (@{$sym_objs{$sym}}) {
-      if (defined ($files_seen{$file})) {
-	next;
-      }
-      $files_seen{$file} = 1;
-      foreach my $ssym (@{$seen_syms{$file}}) {
-	if (!defined ($seen_where{$ssym})) {
-	  $seen_where{$ssym} = "$current_undef{$sym} -> [$file] $ssym";
-	}
-      }
-      foreach my $usym (@{$strong_undef_syms{$file}}) {
-	if (!defined ($all_undef{$usym})) {
-	  $all_undef{$usym} = "$current_undef{$sym} -> [$file] $usym";
-	  $new_undef{$usym} = "$current_undef{$sym} -> [$file] $usym";
-	}
-      }
-    }
-  }
-  %current_undef = %new_undef;
-}
-
-$ret = 0;
-foreach my $sym (sort keys %seen_where) {
-  if ($sym =~ /^_/) {
-    next;
-  }
-  if (defined ($stdsyms{$sym})) {
-    next;
-  }
-  print "$seen_where{$sym}\n";
-  $ret = 1;
-}
-
-exit $ret;
diff --git a/conform/linknamespace.py b/conform/linknamespace.py
new file mode 100644
index 0000000000..07a775499d
--- /dev/null
+++ b/conform/linknamespace.py
@@ -0,0 +1,217 @@
+#!/usr/bin/python
+# Check that use of symbols declared in a given header does not result
+# in any symbols being brought in that are not reserved with external
+# linkage for the given standard.
+# Copyright (C) 2014-2018 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+#
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <http://www.gnu.org/licenses/>.
+
+import argparse
+from collections import defaultdict
+import os.path
+import re
+import subprocess
+import sys
+import tempfile
+
+import glibcconform
+
+# The following whitelisted symbols are also allowed for now.
+#
+# * Bug 17576: stdin, stdout, stderr only reserved with external
+# linkage when stdio.h included (and possibly not then), not
+# generally.
+#
+# * Bug 18442: re_syntax_options wrongly brought in by regcomp and
+# used by re_comp.
+#
+WHITELIST = {'stdin', 'stdout', 'stderr', 're_syntax_options'}
+
+
+def list_syms(filename):
+    """Return information about GLOBAL and WEAK symbols listed in readelf
+    -s output."""
+    ret = []
+    cur_file = filename
+    with open(filename, 'r') as syms_file:
+        for line in syms_file:
+            line = line.rstrip()
+            if line.startswith('File: '):
+                cur_file = line[len('File: '):]
+                cur_file = cur_file.split('/')[-1]
+                continue
+            # Architecture-specific st_other bits appear inside [] and
+            # disrupt the format of readelf output.
+            line = re.sub(r'\[.*?\]', '', line)
+            fields = line.split()
+            if len(fields) < 8:
+                continue
+            bind = fields[4]
+            ndx = fields[6]
+            sym = fields[7]
+            if bind not in ('GLOBAL', 'WEAK'):
+                continue
+            if not re.fullmatch('[A-Za-z0-9_]+', sym):
+                continue
+            ret.append((cur_file, sym, bind, ndx != 'UND'))
+    return ret
+
+
+def main():
+    """The main entry point."""
+    parser = argparse.ArgumentParser(description='Check link-time namespace.')
+    parser.add_argument('--header', metavar='HEADER',
+                        help='name of header')
+    parser.add_argument('--standard', metavar='STD',
+                        help='standard to use when processing header')
+    parser.add_argument('--cc', metavar='CC',
+                        help='C compiler to use')
+    parser.add_argument('--flags', metavar='CFLAGS',
+                        help='Compiler flags to use with CC')
+    parser.add_argument('--stdsyms', metavar='FILE',
+                        help='File with list of standard symbols')
+    parser.add_argument('--libsyms', metavar='FILE',
+                        help='File with symbol information from libraries')
+    parser.add_argument('--readelf', metavar='READELF',
+                        help='readelf program to use')
+    args = parser.parse_args()
+
+    # Load the list of symbols that are OK.
+    stdsyms = set()
+    with open(args.stdsyms, 'r') as stdsyms_file:
+        for line in stdsyms_file:
+            stdsyms.add(line.rstrip())
+    stdsyms |= WHITELIST
+
+    # Load information about GLOBAL and WEAK symbols defined or used
+    # in the standard libraries.
+    # Symbols from a given object, except for weak defined symbols.
+    seen_syms = defaultdict(list)
+    # Strong undefined symbols from a given object.
+    strong_undef_syms = defaultdict(list)
+    # Objects defining a given symbol (strongly or weakly).
+    sym_objs = defaultdict(list)
+    for file, name, bind, defined in list_syms(args.libsyms):
+        if defined:
+            sym_objs[name].append(file)
+        if bind == 'GLOBAL' or not defined:
+            seen_syms[file].append(name)
+        if bind == 'GLOBAL' and not defined:
+            strong_undef_syms[file].append(name)
+
+    # Determine what ELF-level symbols are brought in by use of C-level
+    # symbols declared in the given header.
+    #
+    # The rules followed are heuristic and so may produce false
+    # positives and false negatives.
+    #
+    # * All undefined symbols are considered of signficance, but it is
+    # possible that (a) any standard library definition is weak, so
+    # can be overridden by the user's definition, and (b) the symbol
+    # is only used conditionally and not if the program is limited to
+    # standard functionality.
+    #
+    # * If a symbol reference is only brought in by the user using a
+    # data symbol rather than a function from the standard library,
+    # this will not be detected.
+    #
+    # * If a symbol reference is only brought in by crt*.o or libgcc,
+    # this will not be detected.
+    #
+    # * If a symbol reference is only brought in through __builtin_foo
+    # in a standard macro being compiled to call foo, this will not be
+    # detected.
+    #
+    # * Header inclusions should be compiled several times with
+    # different options such as -O2, -D_FORTIFY_SOURCE and
+    # -D_FILE_OFFSET_BITS=64 to find out what symbols are undefined
+    # from such a compilation; this is not yet implemented.
+    #
+    # * This script finds symbols referenced through use of macros on
+    # the basis that if a macro calls an internal function, that
+    # function must also be declared in the header.  However, the
+    # header might also declare implementation-namespace functions
+    # that are not called by any standard macro in the header,
+    # resulting in false positives for any symbols brought in only
+    # through use of those implementation-namespace functions.
+    #
+    # * Namespace issues can apply for dynamic linking as well as
+    # static linking, when a call is from one shared library to
+    # another or uses a PLT entry for a call within a shared library;
+    # such issues are only detected by this script if the same
+    # namespace issue applies for static linking.
+    seen_where = {}
+    files_seen = set()
+    all_undef = {}
+    current_undef = {}
+    compiler = '%s %s' % (args.cc, args.flags)
+    c_syms = glibcconform.list_exported_functions(compiler, args.standard,
+                                                  args.header)
+    with tempfile.TemporaryDirectory() as temp_dir:
+        cincfile_name = os.path.join(temp_dir, 'undef.c')
+        cincfile_o_name = os.path.join(temp_dir, 'undef.o')
+        cincfile_sym_name = os.path.join(temp_dir, 'undef.sym')
+        cincfile_text = ('#include <%s>\n%s\n'
+                         % (args.header,
+                            '\n'.join('void *__glibc_test_%s = (void *) &%s;'
+                                      % (sym, sym) for sym in sorted(c_syms))))
+        with open(cincfile_name, 'w') as cincfile:
+            cincfile.write(cincfile_text)
+        cmd = ('%s %s -D_ISOMAC %s -c %s -o %s'
+               % (args.cc, args.flags, glibcconform.CFLAGS[args.standard],
+                  cincfile_name, cincfile_o_name))
+        subprocess.check_call(cmd, shell=True)
+        cmd = ('LC_ALL=C %s -W -s %s > %s'
+               % (args.readelf, cincfile_o_name, cincfile_sym_name))
+        subprocess.check_call(cmd, shell=True)
+        for file, name, bind, defined in list_syms(cincfile_sym_name):
+            if bind == 'GLOBAL' and not defined:
+                sym_text = '[initial] %s' % name
+                seen_where[name] = sym_text
+                all_undef[name] = sym_text
+                current_undef[name] = sym_text
+
+    while current_undef:
+        new_undef = {}
+        for sym, cu_sym in sorted(current_undef.items()):
+            for file in sym_objs[sym]:
+                if file in files_seen:
+                    continue
+                files_seen.add(file)
+                for ssym in seen_syms[file]:
+                    if ssym not in seen_where:
+                        seen_where[ssym] = ('%s -> [%s] %s'
+                                            % (cu_sym, file, ssym))
+                for usym in strong_undef_syms[file]:
+                    if usym not in all_undef:
+                        usym_text = '%s -> [%s] %s' % (cu_sym, file, usym)
+                        all_undef[usym] = usym_text
+                        new_undef[usym] = usym_text
+        current_undef = new_undef
+
+    ret = 0
+    for sym in sorted(seen_where):
+        if sym.startswith('_'):
+            continue
+        if sym in stdsyms:
+            continue
+        print(seen_where[sym])
+        ret = 1
+    sys.exit(ret)
+
+
+if __name__ == '__main__':
+    main()