about summary refs log tree commit diff
path: root/posix
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2002-07-10 23:09:16 +0000
committerUlrich Drepper <drepper@redhat.com>2002-07-10 23:09:16 +0000
commit81c64d407c82933f18dc09de9bf58cc76d6a6148 (patch)
tree25a6b5492c2edae9d761ca208d147d726d516804 /posix
parentf4741da2e730191150e4394237716fb29d61845c (diff)
downloadglibc-81c64d407c82933f18dc09de9bf58cc76d6a6148.tar.gz
glibc-81c64d407c82933f18dc09de9bf58cc76d6a6148.tar.xz
glibc-81c64d407c82933f18dc09de9bf58cc76d6a6148.zip
Update.
2002-07-10  Ulrich Drepper  <drepper@redhat.com>
	* Versions.def [libpthread]: Add GLIBC_2.2.6.
	* posix/Versions [libc] (GLIBC_2.2.6): Add __nanosleep.

2002-07-06  Bruno Haible  <bruno@clisp.org>

	* sysdeps/unix/sysv/sysv4/bits/sigset.h (__NSSBITS): Correct value.
	* sysdeps/unix/sysv/linux/bits/statvfs.h (ST_NODIRATIME): Set to 2048.
Diffstat (limited to 'posix')
-rw-r--r--posix/Makefile3
-rw-r--r--posix/Versions4
-rw-r--r--posix/bug-regex4.c1
-rw-r--r--posix/bug-regex6.c74
-rw-r--r--posix/regcomp.c60
-rw-r--r--posix/regex_internal.h45
-rw-r--r--posix/regexec.c94
7 files changed, 195 insertions, 86 deletions
diff --git a/posix/Makefile b/posix/Makefile
index 008154de9a..db58cf5c7c 100644
--- a/posix/Makefile
+++ b/posix/Makefile
@@ -71,7 +71,7 @@ tests		:= tstgetopt testfnm runtests runptests	     \
 		   tst-getlogin tst-mmap tst-getaddrinfo tst-truncate \
 		   tst-truncate64 tst-fork tst-fnmatch tst-regexloc tst-dir \
 		   tst-chmod bug-regex1 bug-regex2 bug-regex3 bug-regex4 \
-		   tst-gnuglob tst-regex bug-regex5
+		   tst-gnuglob tst-regex bug-regex5 bug-regex6
 ifeq (yes,$(build-shared))
 test-srcs	:= globtest
 tests           += wordexp-test tst-exec tst-spawn
@@ -125,6 +125,7 @@ tst-regexloc-ENV = LOCPATH=$(common-objpfx)localedata
 bug-regex1-ENV = LOCPATH=$(common-objpfx)localedata
 tst-regex-ENV = LOCPATH=$(common-objpfx)localedata
 bug-regex5-ENV = LOCPATH=$(common-objpfx)localedata
+bug-regex6-ENV = LOCPATH=$(common-objpfx)localedata
 
 testcases.h: TESTS TESTS2C.sed
 	sed -f TESTS2C.sed < $< > $@T
diff --git a/posix/Versions b/posix/Versions
index c79a042aa3..07dc49a4d4 100644
--- a/posix/Versions
+++ b/posix/Versions
@@ -105,6 +105,10 @@ libc {
     # Extended Interface.
     fnmatch;
   }
+  GLIBC_2.2.6 {
+    # For syscall wrapper
+    __nanosleep;
+  }
   GLIBC_PRIVATE {
     # functions which have an additional interface since they are
     # are cancelable.
diff --git a/posix/bug-regex4.c b/posix/bug-regex4.c
index 76841e9db3..5ecca1a8d0 100644
--- a/posix/bug-regex4.c
+++ b/posix/bug-regex4.c
@@ -35,7 +35,6 @@ main (void)
 
   setlocale (LC_ALL, "C");
 
-  setlocale (LC_ALL, "C");
   s = re_compile_pattern ("ab[cde]", 7, &regex);
   if (s != NULL)
     {
diff --git a/posix/bug-regex6.c b/posix/bug-regex6.c
new file mode 100644
index 0000000000..9a06898a26
--- /dev/null
+++ b/posix/bug-regex6.c
@@ -0,0 +1,74 @@
+/* Test for regexec.
+   Copyright (C) 2002 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Jakub Jelinek <jakub@redhat.com>, 2002.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sys/types.h>
+#include <regex.h>
+#include <locale.h>
+#include <stdio.h>
+
+int
+main (int argc, char *argv[])
+{
+  regex_t re;
+  regmatch_t mat[10];
+  int i, j, ret = 0;
+  char *locales[] = { "C", "de_DE.UTF-8" };
+  char *string = "http://www.regex.com/pattern/matching.html#intro";
+  regmatch_t expect[10] = {
+    { 0, 48 }, { 0, 5 }, { 0, 4 }, { 5, 20 }, { 7, 20 }, { 20, 42 },
+    { -1, -1 }, { -1, -1 }, { 42, 48 }, { 43, 48 } };
+
+  for (i = 0; i < sizeof (locales) / sizeof (locales[0]); ++i)
+    {
+      if (setlocale (LC_ALL, locales[i]) == NULL)
+	{
+	  puts ("cannot set locale");
+	  ret = 1;
+	}
+      else if (regcomp (&re,
+			"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?",
+			REG_EXTENDED) != REG_NOERROR)
+	{
+	  puts ("cannot compile expression \"[a-f]*\"");
+	  ret = 1;
+	}
+      else if (regexec (&re, string, 10, mat, 0) == REG_NOMATCH)
+	{
+	  puts ("no match");
+	  ret = 1;
+	}
+      else
+	{
+	  if (! memcmp (mat, expect, sizeof (mat)))
+	    printf ("matching ok for %s locale\n", locales[i]);
+	  else
+	    {
+	      printf ("matching failed for %s locale:\n", locales[i]);
+	      ret = 1;
+	      for (j = 0; j < 9; ++j)
+		if (mat[j].rm_so != -1)
+		  printf ("%d: %.*s\n", j, mat[j].rm_eo - mat[j].rm_so,
+			  string + mat[j].rm_so);
+	    }
+	}
+    }
+
+  return ret;
+}
diff --git a/posix/regcomp.c b/posix/regcomp.c
index b9b056065f..513604274c 100644
--- a/posix/regcomp.c
+++ b/posix/regcomp.c
@@ -724,7 +724,6 @@ re_compile_internal (preg, pattern, length, syntax)
       dfa = re_realloc (preg->buffer, re_dfa_t, 1);
       if (dfa == NULL)
 	return REG_ESPACE;
-      memset (dfa, '\0', sizeof (re_dfa_t));
       preg->allocated = sizeof (re_dfa_t);
     }
   preg->buffer = (unsigned char *) dfa;
@@ -781,6 +780,9 @@ init_dfa (dfa, pat_len)
      int pat_len;
 {
   int table_size;
+
+  memset (dfa, '\0', sizeof (re_dfa_t));
+
   dfa->nodes_alloc = pat_len + 1;
   dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
 
@@ -1001,8 +1003,6 @@ calc_first (dfa, node)
   switch (type)
     {
 #ifdef DEBUG
-    case OP_OPEN_SUBEXP:
-    case OP_CLOSE_SUBEXP:
     case OP_OPEN_BRACKET:
     case OP_CLOSE_BRACKET:
     case OP_OPEN_DUP_NUM:
@@ -1028,6 +1028,8 @@ calc_first (dfa, node)
     case SIMPLE_BRACKET:
     case OP_BACK_REF:
     case ANCHOR:
+    case OP_OPEN_SUBEXP:
+    case OP_CLOSE_SUBEXP:
       node->first = idx;
       break;
     case OP_DUP_PLUS:
@@ -1041,14 +1043,6 @@ calc_first (dfa, node)
     case OP_ALT:
       node->first = idx;
       break;
-    case SUBEXP:
-      if (node->left == NULL)
-        {
-          if (node->next == -1)
-            calc_next (dfa, node);
-          node->first = node->next;
-          break;
-        }
       /* else fall through */
     default:
 #ifdef DEBUG
@@ -1161,7 +1155,9 @@ calc_epsdest (dfa, node)
             }
           re_node_set_init_2 (dfa->edests + idx, left, right);
         }
-      else if (dfa->nodes[idx].type == ANCHOR)
+      else if (dfa->nodes[idx].type == ANCHOR
+               || dfa->nodes[idx].type == OP_OPEN_SUBEXP
+               || dfa->nodes[idx].type == OP_CLOSE_SUBEXP)
         re_node_set_init_1 (dfa->edests + idx, node->next);
     }
 }
@@ -2055,8 +2051,9 @@ parse_sub_exp (regexp, preg, token, syntax, nest, err)
      reg_errcode_t *err;
 {
   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
-  bin_tree_t *tree;
+  bin_tree_t *tree, *left_par, *right_par;
   size_t cur_nsub;
+  int new_idx;
   cur_nsub = preg->re_nsub++;
   if (dfa->subexps_alloc < preg->re_nsub)
     {
@@ -2073,30 +2070,39 @@ parse_sub_exp (regexp, preg, token, syntax, nest, err)
     }
   dfa->subexps[cur_nsub].start = dfa->nodes_len;
   dfa->subexps[cur_nsub].end = -1;
+
+  new_idx = re_dfa_add_node (dfa, *token, 0);
+  left_par = create_tree (NULL, NULL, 0, new_idx);
+  if (BE (new_idx == -1 || left_par == NULL, 0))
+    return *err = REG_ESPACE, NULL;
+  dfa->nodes[new_idx].opr.idx = cur_nsub;
   *token = fetch_token (regexp, syntax);
 
   /* The subexpression may be a null string.  */
   if (token->type == OP_CLOSE_SUBEXP)
-    {
-      tree = create_tree (NULL, NULL, SUBEXP, 0);
-      if (BE (tree == NULL, 0))
-        return *err = REG_ESPACE, NULL;
-      dfa->subexps[cur_nsub].end = dfa->nodes_len;
-    }
+    tree = NULL;
   else
     {
       tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);
       if (BE (*err != REG_NOERROR && tree == NULL, 0))
         return NULL;
-      dfa->subexps[cur_nsub].end = dfa->nodes_len;
-      if (BE (token->type != OP_CLOSE_SUBEXP, 0))
-        {
-          free_bin_tree (tree);
-          *err = REG_BADPAT;
-          return NULL;
-        }
-      tree = create_tree (tree, NULL, SUBEXP, 0);
     }
+  if (BE (token->type != OP_CLOSE_SUBEXP, 0))
+    {
+      free_bin_tree (tree);
+      *err = REG_BADPAT;
+      return NULL;
+    }
+  new_idx = re_dfa_add_node (dfa, *token, 0);
+  dfa->subexps[cur_nsub].end = dfa->nodes_len;
+  right_par = create_tree (NULL, NULL, 0, new_idx);
+  tree = ((tree == NULL) ? right_par
+          : create_tree (tree, right_par, CONCAT, 0));
+  tree = create_tree (left_par, tree, CONCAT, 0);
+  if (BE (new_idx == -1 || right_par == NULL || tree == NULL, 0))
+    return *err = REG_ESPACE, NULL;
+  dfa->nodes[new_idx].opr.idx = cur_nsub;
+
   return tree;
 }
 
diff --git a/posix/regex_internal.h b/posix/regex_internal.h
index 95ae46e297..20622540e8 100644
--- a/posix/regex_internal.h
+++ b/posix/regex_internal.h
@@ -96,8 +96,6 @@ typedef enum
   NON_TYPE = 0,
 
   /* Token type, these are used only by token.  */
-  OP_OPEN_SUBEXP,
-  OP_CLOSE_SUBEXP,
   OP_OPEN_BRACKET,
   OP_CLOSE_BRACKET,
   OP_CHARSET_RANGE,
@@ -124,6 +122,8 @@ typedef enum
 #endif /* RE_ENABLE_I18N */
 
   /* Node type, These are used by token, node, tree.  */
+  OP_OPEN_SUBEXP,
+  OP_CLOSE_SUBEXP,
   OP_PERIOD,
   CHARACTER,
   END_OF_RE,
@@ -142,24 +142,18 @@ typedef enum
 #ifdef RE_ENABLE_I18N
 typedef struct
 {
-  /* If this character set is the non-matching list.  */
-  unsigned int non_match : 1;
-
   /* Multibyte characters.  */
   wchar_t *mbchars;
-  int nmbchars;
 
   /* Collating symbols.  */
 # ifdef _LIBC
   int32_t *coll_syms;
 # endif
-  int ncoll_syms;
 
   /* Equivalence classes. */
 # ifdef _LIBC
   int32_t *equiv_classes;
 # endif
-  int nequiv_classes;
 
   /* Range expressions. */
 # ifdef _LIBC
@@ -169,17 +163,32 @@ typedef struct
   wchar_t *range_starts;
   wchar_t *range_ends;
 # endif /* not _LIBC */
-  int nranges;
 
   /* Character classes. */
   wctype_t *char_classes;
+
+  /* If this character set is the non-matching list.  */
+  unsigned int non_match : 1;
+
+  /* # of multibyte characters.  */
+  int nmbchars;
+
+  /* # of collating symbols.  */
+  int ncoll_syms;
+
+  /* # of equivalence classes. */
+  int nequiv_classes;
+
+  /* # of range expressions. */
+  int nranges;
+
+  /* # of character classes. */
   int nchar_classes;
 } re_charset_t;
 #endif /* RE_ENABLE_I18N */
 
 typedef struct
 {
-  re_token_type_t type;
   union
   {
     unsigned char c;		/* for CHARACTER */
@@ -195,6 +204,11 @@ typedef struct
       re_node_set *bkref_eclosure;
     } *ctx_info;
   } opr;
+#if __GNUC__ >= 2
+  re_token_type_t type : 8;
+#else
+  re_token_type_t type;
+#endif
   unsigned int constraint : 10;	/* context constraint */
   unsigned int duplicated : 1;
 #ifdef RE_ENABLE_I18N
@@ -203,8 +217,9 @@ typedef struct
 } re_token_t;
 
 #define IS_EPSILON_NODE(type) \
-  ((type) == OP_ALT || (type) == OP_DUP_ASTERISK || (type) == OP_DUP_PLUS || \
-   (type) == OP_DUP_QUESTION || (type) == ANCHOR)
+  ((type) == OP_ALT || (type) == OP_DUP_ASTERISK || (type) == OP_DUP_PLUS \
+   || (type) == OP_DUP_QUESTION || (type) == ANCHOR \
+   || (type) == OP_OPEN_SUBEXP || (type) == OP_CLOSE_SUBEXP)
 
 #define ACCEPT_MB_NODE(type) \
   ((type) == COMPLEX_BRACKET || (type) == OP_PERIOD)
@@ -214,9 +229,6 @@ struct re_string_t
   /* Indicate the raw buffer which is the original string passed as an
      argument of regexec(), re_search(), etc..  */
   const unsigned char *raw_mbs;
-  /* Index in RAW_MBS.  Each character mbs[i] corresponds to
-     raw_mbs[raw_mbs_idx + i].  */
-  int raw_mbs_idx;
   /* Store the multibyte string.  In case of "case insensitive mode" like
      REG_ICASE, upper cases of the string are stored, otherwise MBS points
      the same address that RAW_MBS points.  */
@@ -230,6 +242,9 @@ struct re_string_t
   wint_t *wcs;
   mbstate_t cur_state;
 #endif
+  /* Index in RAW_MBS.  Each character mbs[i] corresponds to
+     raw_mbs[raw_mbs_idx + i].  */
+  int raw_mbs_idx;
   /* The length of the valid characters in the buffers.  */
   int valid_len;
   /* The length of the buffers MBS, MBS_CASE, and WCS.  */
diff --git a/posix/regexec.c b/posix/regexec.c
index 2c7a2774eb..5dd3a06827 100644
--- a/posix/regexec.c
+++ b/posix/regexec.c
@@ -58,6 +58,8 @@ static int check_halt_node_context (const re_dfa_t *dfa, int node,
 static int check_halt_state_context (const regex_t *preg,
                                      const re_dfastate_t *state,
                                      const re_match_context_t *mctx, int idx);
+static void update_regs (re_dfa_t *dfa, regmatch_t *pmatch, int cur_node,
+                         int cur_idx, int nmatch);
 static int proceed_next_node (const regex_t *preg,
                               const re_match_context_t *mctx,
                               int *pidx, int node, re_node_set *eps_via_nodes);
@@ -886,24 +888,38 @@ proceed_next_node (preg, mctx, pidx, node, eps_via_nodes)
     re_node_set *eps_via_nodes;
 {
   re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
-  int i, dest_node = -1, err;
+  int i, err, dest_node, cur_entity;
+  dest_node = -1;
+  cur_entity = ((dfa->nodes[node].type == OP_CONTEXT_NODE)
+                ? dfa->nodes[node].opr.ctx_info->entity : node);
   if (IS_EPSILON_NODE (dfa->nodes[node].type))
     {
+      int dest_entity = INT_MAX;
       err = re_node_set_insert (eps_via_nodes, node);
       if (BE (err < 0, 0))
         return -1;
       for (i = 0; i < mctx->state_log[*pidx]->nodes.nelem; ++i)
         {
-          int candidate = mctx->state_log[*pidx]->nodes.elems[i];
-          if (!re_node_set_contains (dfa->edests + node, candidate)
-              && !(dfa->nodes[candidate].type == OP_CONTEXT_NODE
-                   && re_node_set_contains (dfa->edests + node,
-                            dfa->nodes[candidate].opr.ctx_info->entity)))
-            continue;
-          dest_node = candidate;
+          int candidate, candidate_entity;
+          candidate = mctx->state_log[*pidx]->nodes.elems[i];
+          candidate_entity = ((dfa->nodes[candidate].type == OP_CONTEXT_NODE)
+                              ? dfa->nodes[candidate].opr.ctx_info->entity
+                              : candidate);
+          if (!re_node_set_contains (dfa->edests + node, candidate))
+            if (candidate == candidate_entity
+                || !re_node_set_contains (dfa->edests + node, candidate_entity))
+              continue;
+
           /* In order to avoid infinite loop like "(a*)*".  */
-          if (!re_node_set_contains (eps_via_nodes, dest_node))
-            break;
+          if (cur_entity > candidate_entity
+              && re_node_set_contains (eps_via_nodes, candidate))
+            continue;
+
+          if (dest_entity > candidate_entity)
+            {
+              dest_node = candidate;
+              dest_entity = candidate_entity;
+            }
         }
 #ifdef DEBUG
       assert (dest_node != -1);
@@ -986,9 +1002,8 @@ set_regs (preg, mctx, nmatch, pmatch, last_node)
     int last_node;
 {
   re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
-  int idx, cur_node, node_entity, real_nmatch;
+  int idx, cur_node, real_nmatch;
   re_node_set eps_via_nodes;
-  int i;
 #ifdef DEBUG
   assert (nmatch > 1);
   assert (mctx->state_log != NULL);
@@ -998,36 +1013,7 @@ set_regs (preg, mctx, nmatch, pmatch, last_node)
   re_node_set_init_empty (&eps_via_nodes);
   for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
     {
-      node_entity = ((dfa->nodes[cur_node].type == OP_CONTEXT_NODE)
-                     ? dfa->nodes[cur_node].opr.ctx_info->entity : cur_node);
-      for (i = 1; i < real_nmatch; ++i)
-        {
-          if (dfa->subexps[i - 1].start == dfa->subexps[i - 1].end)
-            {
-              /* In case of the null subexpression like '()'.  */
-              if (dfa->subexps[i - 1].start == node_entity)
-                {
-                  pmatch[i].rm_so = idx;
-                  pmatch[i].rm_eo = idx;
-                }
-            }
-          else if (dfa->subexps[i - 1].start <= node_entity
-                   && node_entity < dfa->subexps[i - 1].end)
-            {
-              if (pmatch[i].rm_so == -1 || pmatch[i].rm_eo != -1)
-                /* We are at the first node of this sub expression.  */
-                {
-                  pmatch[i].rm_so = idx;
-                  pmatch[i].rm_eo = -1;
-                }
-            }
-          else
-            {
-              if (pmatch[i].rm_so != -1 && pmatch[i].rm_eo == -1)
-                /* We are at the last node of this sub expression.  */
-                pmatch[i].rm_eo = idx;
-            }
-        }
+      update_regs (dfa, pmatch, cur_node, idx, real_nmatch);
       if (idx == pmatch[0].rm_eo && cur_node == last_node)
         break;
 
@@ -1040,6 +1026,30 @@ set_regs (preg, mctx, nmatch, pmatch, last_node)
   return REG_NOERROR;
 }
 
+static void
+update_regs (dfa, pmatch, cur_node, cur_idx, nmatch)
+     re_dfa_t *dfa;
+     regmatch_t *pmatch;
+     int cur_node, cur_idx, nmatch;
+{
+  int type = dfa->nodes[cur_node].type;
+  int reg_num;
+  if (type != OP_OPEN_SUBEXP && type != OP_CLOSE_SUBEXP)
+    return;
+  reg_num = dfa->nodes[cur_node].opr.idx + 1;
+  if (reg_num >= nmatch)
+    return;
+  if (type == OP_OPEN_SUBEXP)
+    {
+      /* We are at the first node of this sub expression.  */
+      pmatch[reg_num].rm_so = cur_idx;
+      pmatch[reg_num].rm_eo = -1;
+    }
+  else if (type == OP_CLOSE_SUBEXP)
+    /* We are at the first node of this sub expression.  */
+    pmatch[reg_num].rm_eo = cur_idx;
+ }
+
 #define NUMBER_OF_STATE 1
 
 /* This function checks the STATE_LOG from the MCTX->match_last to 0