about summary refs log tree commit diff
path: root/posix/regexec.c
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2002-07-27 08:20:17 +0000
committerUlrich Drepper <drepper@redhat.com>2002-07-27 08:20:17 +0000
commitac3d553b8b5bcfbc7a13bd746966036422cf5275 (patch)
treebbec8a946e4ecb0f3e36fadc722aa00d8e000d1c /posix/regexec.c
parent85ae1f3949469ce2bb01a3f55708b83d09e84ec8 (diff)
downloadglibc-ac3d553b8b5bcfbc7a13bd746966036422cf5275.tar.gz
glibc-ac3d553b8b5bcfbc7a13bd746966036422cf5275.tar.xz
glibc-ac3d553b8b5bcfbc7a13bd746966036422cf5275.zip
Update.
2002-07-27  Ulrich Drepper  <drepper@redhat.com>

	* iconvdata/Makefile (CPPFLAGS): Add NOT_IN_libc.

	* iconv/iconv_prog.c (main): Improve error message for the cases
	where -t and/or -f parameter are missing.

2002-07-10  Stepan Kasal  <kasal@math.cas.cz>

	* posix/regexec.c (re_match): Reorganize to remove duplicate code.
	(re_search): Likewise.
	(re_match_2): Likewise.
	(re_search_2): Likewise.
	(re_search_stub): New function.
	(re_search_2_stub): New function.
	* regcomp.c (re_compile_pattern): Typo in a comment fixed.

2002-07-10  Isamu Hasegawa  <isamu@yamato.ibm.com>

	* posix/regex_internal.h: Add new member STOP to struct re_string_t.
	(re_string_eoi): Use STOP instead of LEN.
	* posix/regex_internal.c (re_string_allocate): Initialize pstr->len.
	(re_string_construct): Likewise.
	(re_string_reconstruct): Adjust pstr->stop like pstr->len.
	* posix/regexec.c (re_search_internal): Add a new argument STOP
	so that it can handle the argument STOP of re_search_2 and re_match_2
	correctly.
	(regexec): Adapt to new I/F of re_search_internal.
	(re_search_stub): Likewise.
Diffstat (limited to 'posix/regexec.c')
-rw-r--r--posix/regexec.c480
1 files changed, 244 insertions, 236 deletions
diff --git a/posix/regexec.c b/posix/regexec.c
index 5dd3a06827..74e3ab5cd2 100644
--- a/posix/regexec.c
+++ b/posix/regexec.c
@@ -45,8 +45,20 @@ static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
                                           int from, int to);
 static reg_errcode_t re_search_internal (const regex_t *preg,
                                          const char *string, int length,
-                                         int start, int range, size_t nmatch,
-                                         regmatch_t pmatch[], int eflags);
+                                         int start, int range, int stop,
+                                         size_t nmatch, regmatch_t pmatch[],
+                                         int eflags);
+static int re_search_2_stub (struct re_pattern_buffer *bufp,
+                             const char *string1, int length1,
+                             const char *string2, int length2,
+                             int start, int range, struct re_registers *regs,
+                             int stop, int ret_len);
+static int re_search_stub (struct re_pattern_buffer *bufp,
+                           const char *string, int length, int start,
+                           int range, int stop, struct re_registers *regs,
+                           int ret_len);
+static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
+                              int nregs, int regs_allocated);
 static inline re_dfastate_t *acquire_init_state_context (reg_errcode_t *err,
                                                          const regex_t *preg,
                                                          const re_match_context_t *mctx,
@@ -150,10 +162,10 @@ regexec (preg, string, nmatch, pmatch, eflags)
   reg_errcode_t err;
   int length = strlen (string);
   if (preg->no_sub)
-    err = re_search_internal (preg, string, length, 0, length, 0,
+    err = re_search_internal (preg, string, length, 0, length, length, 0,
                               NULL, eflags);
   else
-    err = re_search_internal (preg, string, length, 0, length, nmatch,
+    err = re_search_internal (preg, string, length, 0, length, length, nmatch,
                               pmatch, eflags);
   return err != REG_NOERROR;
 }
@@ -163,285 +175,279 @@ weak_alias (__regexec, regexec)
 
 /* Entry points for GNU code.  */
 
-/* re_match is like re_match_2 except it takes only a single string.  */
+/* re_match, re_search, re_match_2, re_search_2
+
+   The former two functions operate on STRING with length LENGTH,
+   while the later two operate on concatenation of STRING1 and STRING2
+   with lengths LENGTH1 and LENGTH2, respectively.
+
+   re_match() matches the compiled pattern in BUFP against the string,
+   starting at index START.
+
+   re_search() first tries matching at index START, then it tries to match
+   starting from index START + 1, and so on.  The last start position tried
+   is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
+   way as re_match().)
+
+   The parameter STOP of re_{match,search}_2 specifies that no match exceeding
+   the first STOP characters of the concatenation of the strings should be
+   concerned.
+
+   If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
+   and all groups is stroed in REGS.  (For the "_2" variants, the offsets are
+   computed relative to the concatenation, not relative to the individual
+   strings.)
+
+   On success, re_match* functions return the length of the match, re_search*
+   return the position of the start of the match.  Return value -1 means no
+   match was found and -2 indicates an internal error.  */
 
 int
-re_match (buffer, string, length, start, regs)
-    struct re_pattern_buffer *buffer;
+re_match (bufp, string, length, start, regs)
+    struct re_pattern_buffer *bufp;
     const char *string;
     int length, start;
     struct re_registers *regs;
 {
-  reg_errcode_t result;
-  int i, tmp_nregs, nregs, rval, eflags = 0;
-  regmatch_t *pmatch;
-
-  eflags |= (buffer->not_bol) ? REG_NOTBOL : 0;
-  eflags |= (buffer->not_eol) ? REG_NOTEOL : 0;
-
-  /* We need at least 1 register.  */
-  tmp_nregs = ((buffer->no_sub || regs == NULL || regs->num_regs < 1) ? 1
-               : regs->num_regs);
-  nregs = ((tmp_nregs < buffer->re_nsub + 1
-            && buffer->regs_allocated == REGS_FIXED) ? tmp_nregs
-           : buffer->re_nsub + 1);
-  pmatch = re_malloc (regmatch_t, nregs);
-  if (BE (pmatch == NULL, 0))
-    return -2;
-  result = re_search_internal (buffer, string, length, start, 0,
-                               nregs, pmatch, eflags);
-
-  /* If caller wants register contents data back, do it.  */
-  if (regs && !buffer->no_sub)
-    {
-      /* Have the register data arrays been allocated?  */
-      if (buffer->regs_allocated == REGS_UNALLOCATED)
-        { /* No.  So allocate them with malloc.  We need one
-             extra element beyond `num_regs' for the `-1' marker
-             GNU code uses.  */
-          regs->num_regs = buffer->re_nsub + 1;
-          regs->start = re_malloc (regoff_t, regs->num_regs);
-          regs->end = re_malloc (regoff_t, regs->num_regs);
-          if (BE (regs->start == NULL || regs->end == NULL, 0))
-            {
-              re_free (pmatch);
-              return -2;
-            }
-          buffer->regs_allocated = REGS_REALLOCATE;
-        }
-      else if (buffer->regs_allocated == REGS_REALLOCATE)
-        { /* Yes.  If we need more elements than were already
-             allocated, reallocate them.  If we need fewer, just
-             leave it alone.  */
-          if (regs->num_regs < buffer->re_nsub + 1)
-            {
-              regs->num_regs = buffer->re_nsub + 1;
-              regs->start = re_realloc (regs->start, regoff_t, regs->num_regs);
-              regs->end = re_realloc (regs->end, regoff_t, regs->num_regs);
-              if (BE (regs->start == NULL || regs->end == NULL, 0))
-                {
-                  re_free (pmatch);
-                  return -2;
-                }
-            }
-        }
-      else
-        {
-          /* These braces fend off a "empty body in an else-statement"
-             warning under GCC when assert expands to nothing.  */
-          assert (buffer->regs_allocated == REGS_FIXED);
-        }
-    }
-
-  /* Restore registers.  */
-  if (regs != NULL)
-    {
-      int max_regs = ((regs->num_regs < buffer->re_nsub + 1) ? regs->num_regs
-                      : buffer->re_nsub + 1);
-      for (i = 0; i < max_regs; ++i)
-        {
-          regs->start[i] = pmatch[i].rm_so;
-          regs->end[i] = pmatch[i].rm_eo;
-        }
-      for ( ; i < regs->num_regs; ++i)
-        {
-          regs->start[i] = -1;
-          regs->end[i] = -1;
-        }
-    }
-  /* Return value is -1 if not match, the length of mathing otherwise.  */
-  rval = (result != REG_NOERROR) ? -1 : pmatch[0].rm_eo - pmatch[0].rm_so;
-  re_free (pmatch);
-  return rval;
+  return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
 }
 #ifdef _LIBC
 weak_alias (__re_match, re_match)
 #endif
 
-/* re_match_2 matches the compiled pattern in BUFP against the
-   the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
-   and SIZE2, respectively).  We start matching at POS, and stop
-   matching at STOP.
-
-   If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
-   store offsets for the substring each group matched in REGS.  See the
-   documentation for exactly how many groups we fill.
-
-   We return -1 if no match, -2 if an internal error.
-   Otherwise, we return the length of the matched substring.  */
+int
+re_search (bufp, string, length, start, range, regs)
+    struct re_pattern_buffer *bufp;
+    const char *string;
+    int length, start, range;
+    struct re_registers *regs;
+{
+  return re_search_stub (bufp, string, length, start, range, length, regs, 0);
+}
+#ifdef _LIBC
+weak_alias (__re_search, re_search)
+#endif
 
 int
-re_match_2 (buffer, string1, length1, string2, length2, start, regs, stop)
-     struct re_pattern_buffer *buffer;
-     const char *string1, *string2;
-     int length1, length2, start, stop;
-     struct re_registers *regs;
+re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
+    struct re_pattern_buffer *bufp;
+    const char *string1, *string2;
+    int length1, length2, start, stop;
+    struct re_registers *regs;
 {
-  int len, ret;
-  char *str = re_malloc (char, length1 + length2);
-  if (BE (str == NULL, 0))
-    return -2;
-  memcpy (str, string1, length1);
-  memcpy (str + length1, string2, length2);
-  len = (length1 + length2 < stop) ? length1 + length2 : stop;
-  ret = re_match (buffer, str, len, start, regs);
-  re_free (str);
-  return ret;
+  return re_search_2_stub (bufp, string1, length1, string2, length2,
+                           start, 0, regs, stop, 1);
 }
 #ifdef _LIBC
 weak_alias (__re_match_2, re_match_2)
 #endif
 
-/* Like re_search_2, below, but only one string is specified, and
-   doesn't let you say where to stop matching.  */
-
 int
-re_search (bufp, string, size, startpos, range, regs)
-     struct re_pattern_buffer *bufp;
-     const char *string;
-     int size, startpos, range;
-     struct re_registers *regs;
+re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
+    struct re_pattern_buffer *bufp;
+    const char *string1, *string2;
+    int length1, length2, start, range, stop;
+    struct re_registers *regs;
+{
+  return re_search_2_stub (bufp, string1, length1, string2, length2,
+                           start, range, regs, stop, 0);
+}
+#ifdef _LIBC
+weak_alias (__re_search_2, re_search_2)
+#endif
+
+static int
+re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
+                  stop, ret_len)
+    struct re_pattern_buffer *bufp;
+    const char *string1, *string2;
+    int length1, length2, start, range, stop, ret_len;
+    struct re_registers *regs;
+{
+  const char *str;
+  int rval;
+  int len = length1 + length2;
+  int free_str = 0;
+
+  if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
+    return -2;
+
+  /* Concatenate the strings.  */
+  if (length2 > 0)
+    if (length1 > 0)
+      {
+        char *s = re_malloc (char, len);
+
+        if (BE (s == NULL, 0))
+          return -2;
+        memcpy (s, string1, length1);
+        memcpy (s + length1, string2, length2);
+        str = s;
+        free_str = 1;
+      }
+    else
+      str = string2;
+  else
+    str = string1;
+
+  rval = re_search_stub (bufp, str, len, start, range, stop, regs,
+                         ret_len);
+  if (free_str)
+      re_free ((char *) str);
+  return rval;
+}
+
+/* The parameters have the same meaning as those of re_search.
+   Additional parameters:
+   If RET_LEN is nonzero the length of the match is returned (re_match style);
+   otherwise the position of the match is returned.  */
+
+static int
+re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
+    struct re_pattern_buffer *bufp;
+    const char *string;
+    int length, start, range, stop, ret_len;
+    struct re_registers *regs;
 {
   reg_errcode_t result;
-  int i, tmp_nregs, nregs, real_range, rval, eflags = 0;
   regmatch_t *pmatch;
+  int nregs, rval;
+  int eflags = 0;
+
+  /* Check for out-of-range.  */
+  if (BE (start < 0 || start > length || range < 0, 0))
+    return -1;
+  if (BE (start + range > length, 0))
+    range = length - start;
 
   eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
   eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
 
-  /* Check for out-of-range.  */
-  if (BE (startpos < 0 || startpos > size, 0))
-    return -1;
+  /* Compile fastmap if we haven't yet.  */
+  if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
+    re_compile_fastmap (bufp);
+
+  if (BE (bufp->no_sub, 0))
+    regs = NULL;
 
   /* We need at least 1 register.  */
-  tmp_nregs = ((bufp->no_sub || regs == NULL || regs->num_regs < 1) ? 1
-               : regs->num_regs);
-  nregs = ((tmp_nregs < bufp->re_nsub + 1
-            && bufp->regs_allocated == REGS_FIXED) ? tmp_nregs
-           : bufp->re_nsub + 1);
+  if (regs == NULL)
+    nregs = 1;
+  else if (BE (bufp->regs_allocated == REGS_FIXED &&
+               regs->num_regs < bufp->re_nsub + 1, 0))
+    {
+      nregs = regs->num_regs;
+      if (BE (nregs < 1, 0))
+        {
+          /* Nothing can be copied to regs.  */
+          regs = NULL;
+          nregs = 1;
+        }
+    }
+  else
+    nregs = bufp->re_nsub + 1;
   pmatch = re_malloc (regmatch_t, nregs);
   if (BE (pmatch == NULL, 0))
     return -2;
 
-  /* Correct range if we need.  */
-  real_range = ((startpos + range > size) ? size - startpos
-                : ((startpos + range < 0) ? -startpos : range));
-
-  /* Compile fastmap if we haven't yet.  */
-  if (bufp->fastmap != NULL && !bufp->fastmap_accurate)
-    re_compile_fastmap (bufp);
-
-  result = re_search_internal (bufp, string, size, startpos, real_range,
+  result = re_search_internal (bufp, string, length, start, range, stop,
                                nregs, pmatch, eflags);
 
-  /* If caller wants register contents data back, do it.  */
-  if (regs && !bufp->no_sub)
+  rval = 0;
+
+  /* I hope we needn't fill ther regs with -1's when no match was found.  */
+  if (result != REG_NOERROR)
+    rval = -1;
+  else if (regs != NULL)
     {
-      /* Have the register data arrays been allocated?  */
-      if (bufp->regs_allocated == REGS_UNALLOCATED)
-        { /* No.  So allocate them with malloc.  We need one
-             extra element beyond `num_regs' for the `-1' marker
-             GNU code uses.  */
-          regs->num_regs = bufp->re_nsub + 1;
-          regs->start = re_malloc (regoff_t, regs->num_regs);
-          regs->end = re_malloc (regoff_t, regs->num_regs);
-          if (BE (regs->start == NULL || regs->end == NULL, 0))
-            {
-              re_free (pmatch);
-              return -2;
-            }
-          bufp->regs_allocated = REGS_REALLOCATE;
-        }
-      else if (bufp->regs_allocated == REGS_REALLOCATE)
-        { /* Yes.  If we need more elements than were already
-             allocated, reallocate them.  If we need fewer, just
-             leave it alone.  */
-          if (regs->num_regs < bufp->re_nsub + 1)
-            {
-              regs->num_regs = bufp->re_nsub + 1;
-              regs->start = re_realloc (regs->start, regoff_t, regs->num_regs);
-              regs->end = re_realloc (regs->end, regoff_t, regs->num_regs);
-              if (BE (regs->start == NULL || regs->end == NULL, 0))
-                {
-                  re_free (pmatch);
-                  return -2;
-                }
-            }
-        }
-      else
-        {
-          /* These braces fend off a "empty body in an else-statement"
-             warning under GCC when assert expands to nothing.  */
-          assert (bufp->regs_allocated == REGS_FIXED);
-        }
+      /* If caller wants register contents data back, copy them.  */
+      bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
+                                           bufp->regs_allocated);
+      if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
+        rval = -2;
     }
 
-  /* Restore registers.  */
-  if (regs != NULL)
+  if (BE (rval == 0, 1))
     {
-      int max_regs = ((regs->num_regs < bufp->re_nsub + 1) ? regs->num_regs
-                      : bufp->re_nsub + 1);
-      for (i = 0; i < max_regs; ++i)
-        {
-          regs->start[i] = pmatch[i].rm_so;
-          regs->end[i] = pmatch[i].rm_eo;
-        }
-      for ( ; i < regs->num_regs; ++i)
+      if (ret_len)
         {
-          regs->start[i] = -1;
-          regs->end[i] = -1;
+          assert (pmatch[0].rm_so == 0);
+          rval = pmatch[0].rm_eo;
         }
+      else
+        rval = pmatch[0].rm_so;
     }
-  /* Return value is -1 if not match, the position where the mathing starts
-     otherwise.  */
-  rval = (result != REG_NOERROR) ? -1 : pmatch[0].rm_so;
   re_free (pmatch);
   return rval;
 }
-#ifdef _LIBC
-weak_alias (__re_search, re_search)
-#endif
-
-/* Using the compiled pattern in BUFP, first tries to match the virtual
-   concatenation of STRING1 and STRING2, starting first at index
-   STARTPOS, then at STARTPOS + 1, and so on.
-
-   STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
-
-   RANGE is how far to scan while trying to match.  RANGE = 0 means try
-   only at STARTPOS; in general, the last start tried is STARTPOS +
-   RANGE.
-
-   In REGS, return the indices of the virtual concatenation of STRING1
-   and STRING2 that matched the entire BUFP->buffer and its contained
-   subexpressions.
-
-   Do not consider matching one past the index STOP in the virtual
-   concatenation of STRING1 and STRING2.
-
-   We return either the position in the strings at which the match was
-   found, -1 if no match, or -2 if error.  */
 
-int
-re_search_2 (bufp, string1, length1, string2, length2, start, range, regs,
-             stop)
-    struct re_pattern_buffer *bufp;
-    const char *string1, *string2;
-    int length1, length2, start, range, stop;
+static unsigned
+re_copy_regs (regs, pmatch, nregs, regs_allocated)
     struct re_registers *regs;
+    regmatch_t *pmatch;
+    int nregs, regs_allocated;
 {
-  int len, ret;
-  char *str = re_malloc (char, length1 + length2);
-  memcpy (str, string1, length1);
-  memcpy (str + length1, string2, length2);
-  len = (length1 + length2 < stop) ? length1 + length2 : stop;
-  ret = re_search (bufp, str, len, start, range, regs);
-  re_free (str);
-  return ret;
+  int rval = REGS_REALLOCATE;
+  int i;
+  int need_regs = nregs + 1;
+  /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
+     uses.  */
+
+  /* Have the register data arrays been allocated?  */
+  if (regs_allocated == REGS_UNALLOCATED)
+    { /* No.  So allocate them with malloc.  */
+      regs->start = re_malloc (regoff_t, need_regs);
+      if (BE (regs->start == NULL, 0))
+        return REGS_UNALLOCATED;
+      regs->end = re_malloc (regoff_t, need_regs);
+      if (BE (regs->end == NULL, 0))
+        {
+          re_free (regs->start);
+          return REGS_UNALLOCATED;
+        }
+      regs->num_regs = need_regs;
+    }
+  else if (regs_allocated == REGS_REALLOCATE)
+    { /* Yes.  If we need more elements than were already
+         allocated, reallocate them.  If we need fewer, just
+         leave it alone.  */
+      if (need_regs > regs->num_regs)
+        {
+          regs->start = re_realloc (regs->start, regoff_t, need_regs);
+          if (BE (regs->start == NULL, 0))
+            {
+              if (regs->end != NULL)
+                re_free (regs->end);
+              return REGS_UNALLOCATED;
+            }
+          regs->end = re_realloc (regs->end, regoff_t, need_regs);
+          if (BE (regs->end == NULL, 0))
+            {
+              re_free (regs->start);
+              return REGS_UNALLOCATED;
+            }
+          regs->num_regs = need_regs;
+        }
+    }
+  else
+    {
+      assert (regs_allocated == REGS_FIXED);
+      /* This function may not be called with REGS_FIXED and nregs too big.  */
+      assert (regs->num_regs >= nregs);
+      rval = REGS_FIXED;
+    }
+
+  /* Copy the regs.  */
+  for (i = 0; i < nregs; ++i)
+    {
+      regs->start[i] = pmatch[i].rm_so;
+      regs->end[i] = pmatch[i].rm_eo;
+    }
+  for ( ; i < regs->num_regs; ++i)
+    regs->start[i] = regs->end[i] = -1;
+
+  return rval;
 }
-#ifdef _LIBC
-weak_alias (__re_search_2, re_search_2)
-#endif
 
 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
@@ -510,10 +516,11 @@ static re_node_set empty_set;
    (START + RANGE >= 0 && START + RANGE <= LENGTH)  */
 
 static reg_errcode_t
-re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags)
+re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
+                    eflags)
     const regex_t *preg;
     const char *string;
-    int length, start, range, eflags;
+    int length, start, range, stop, eflags;
     size_t nmatch;
     regmatch_t pmatch[];
 {
@@ -541,6 +548,7 @@ re_search_internal (preg, string, length, start, range, nmatch, pmatch, eflags)
                             preg->translate, preg->syntax & RE_ICASE);
   if (BE (err != REG_NOERROR, 0))
     return err;
+  input.stop = stop;
 
   err = match_ctx_init (&mctx, eflags, &input, dfa->nbackref * 2);
   if (BE (err != REG_NOERROR, 0))