about summary refs log tree commit diff
path: root/posix/regex.c
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2001-06-27 16:15:39 +0000
committerUlrich Drepper <drepper@redhat.com>2001-06-27 16:15:39 +0000
commit64333c6623d36b69906590454cbe097d1bb598a3 (patch)
tree0a66e08235101934a498d62484bb89bc55a2d974 /posix/regex.c
parent7bcad280635fa97b7929f3427284cf5189363e95 (diff)
downloadglibc-64333c6623d36b69906590454cbe097d1bb598a3.tar.gz
glibc-64333c6623d36b69906590454cbe097d1bb598a3.tar.xz
glibc-64333c6623d36b69906590454cbe097d1bb598a3.zip
Update.
	* posix/Makefile: Add rules to build and run tst-regex.

2001-06-20 Isamu Hasegawa <isamu@yamato.ibm.com>

	* posix/regex.c (FREE_WCS_BUFFERS): New macro to free buffers.
	(re_search_2): invoke convert_mbs_to_wcs and FREE_WCS_BUFFERS.
	(wcs_re_match_2_internal): Check whether the wcs buffers need
	seting up or not, and skip seting up routin if not needed.

2001-06-26 Isamu Hasegawa <isamu@yamato.ibm.com>

	* posix/regex.c (count_mbs_length): Use binary search for
	optimization.

2001-06-27  Ulrich Drepper  <drepper@redhat.com>
Diffstat (limited to 'posix/regex.c')
-rw-r--r--posix/regex.c276
1 files changed, 207 insertions, 69 deletions
diff --git a/posix/regex.c b/posix/regex.c
index aedabc6f93..a2f20103cc 100644
--- a/posix/regex.c
+++ b/posix/regex.c
@@ -416,11 +416,14 @@ static int byte_re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
 					     struct re_registers *regs,
 					     int stop));
 static int wcs_re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
-					    const char *string1, int size1,
-					    const char *string2, int size2,
+					    const char *cstring1, int csize1,
+					    const char *cstring2, int csize2,
 					    int pos,
 					    struct re_registers *regs,
-					    int stop));
+					    int stop,
+					    wchar_t *string1, int size1,
+					    wchar_t *string2, int size2,
+					    int *mbs_offset1, int *mbs_offset2));
 static int byte_re_search_2 PARAMS ((struct re_pattern_buffer *bufp,
 				     const char *string1, int size1,
 				     const char *string2, int size2,
@@ -1253,7 +1256,7 @@ convert_mbs_to_wcs (dest, src, len, offset_buffer, is_binary)
   size_t wc_count = 0;
 
   mbstate_t mbs;
-  int consumed;
+  int i, consumed;
   size_t mb_remain = len;
   size_t mb_count = 0;
 
@@ -1286,6 +1289,10 @@ convert_mbs_to_wcs (dest, src, len, offset_buffer, is_binary)
       offset_buffer[wc_count + 1] = mb_count += consumed;
     }
 
+  /* Fill remain of the buffer with sentinel.  */
+  for (i = wc_count + 1 ; i <= len ; i++)
+    offset_buffer[i] = mb_count + 1;
+
   return wc_count;
 }
 
@@ -5047,6 +5054,23 @@ weak_alias (__re_search_2, re_search_2)
 
 #ifdef INSIDE_RECURSION
 
+#ifdef MATCH_MAY_ALLOCATE
+# define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
+#else
+# define FREE_VAR(var) if (var) free (var); var = NULL
+#endif
+
+#ifdef WCHAR
+# define FREE_WCS_BUFFERS()    \
+  do {                         \
+    FREE_VAR (string1);                \
+    FREE_VAR (string2);                \
+    FREE_VAR (mbs_offset1);    \
+    FREE_VAR (mbs_offset2);    \
+  } while (0)
+
+#endif
+
 static int
 PREFIX(re_search_2) (bufp, string1, size1, string2, size2, startpos, range,
 		     regs, stop)
@@ -5063,6 +5087,16 @@ PREFIX(re_search_2) (bufp, string1, size1, string2, size2, startpos, range,
   register RE_TRANSLATE_TYPE translate = bufp->translate;
   int total_size = size1 + size2;
   int endpos = startpos + range;
+#ifdef WCHAR
+  /* We need wchar_t* buffers correspond to cstring1, cstring2.  */
+  wchar_t *wcs_string1 = NULL, *wcs_string2 = NULL;
+  /* We need the size of wchar_t buffers correspond to csize1, csize2.  */
+  int wcs_size1 = 0, wcs_size2 = 0;
+  /* offset buffer for optimizatoin. See convert_mbs_to_wc.  */
+  int *mbs_offset1 = NULL, *mbs_offset2 = NULL;
+  /* They hold whether each wchar_t is binary data or not.  */
+  char *is_binary = NULL;
+#endif /* WCHAR */
 
   /* Check for out-of-range STARTPOS.  */
   if (startpos < 0 || startpos > total_size)
@@ -5106,6 +5140,45 @@ PREFIX(re_search_2) (bufp, string1, size1, string2, size2, startpos, range,
     if (re_compile_fastmap (bufp) == -2)
       return -2;
 
+#ifdef WCHAR
+  /* Allocate wchar_t array for wcs_string1 and wcs_string2 and
+     fill them with converted string.  */
+  if (size1 != 0)
+    {
+      wcs_string1 = REGEX_TALLOC (size1 + 1, CHAR_T);
+      mbs_offset1 = REGEX_TALLOC (size1 + 1, int);
+      is_binary = REGEX_TALLOC (size1 + 1, char);
+      if (!wcs_string1 || !mbs_offset1 || !is_binary)
+	{
+	  FREE_VAR (wcs_string1);
+	  FREE_VAR (mbs_offset1);
+	  FREE_VAR (is_binary);
+	  return -2;
+	}
+      wcs_size1 = convert_mbs_to_wcs(wcs_string1, string1, size1,
+				     mbs_offset1, is_binary);
+      wcs_string1[wcs_size1] = L'\0'; /* for a sentinel  */
+      FREE_VAR (is_binary);
+    }
+  if (size2 != 0)
+    {
+      wcs_string2 = REGEX_TALLOC (size2 + 1, CHAR_T);
+      mbs_offset2 = REGEX_TALLOC (size2 + 1, int);
+      is_binary = REGEX_TALLOC (size2 + 1, char);
+      if (!wcs_string2 || !mbs_offset2 || !is_binary)
+	{
+	  FREE_WCS_BUFFERS ();
+	  FREE_VAR (is_binary);
+	  return -2;
+	}
+      wcs_size2 = convert_mbs_to_wcs(wcs_string2, string2, size2,
+				     mbs_offset2, is_binary);
+      wcs_string2[wcs_size2] = L'\0'; /* for a sentinel  */
+      FREE_VAR (is_binary);
+    }
+#endif /* WCHAR */
+
+
   /* Loop through the string, looking for a place to start matching.  */
   for (;;)
     {
@@ -5153,10 +5226,24 @@ PREFIX(re_search_2) (bufp, string1, size1, string2, size2, startpos, range,
       /* If can't match the null string, and that's all we have left, fail.  */
       if (range >= 0 && startpos == total_size && fastmap
           && !bufp->can_be_null)
-	return -1;
+       {
+#ifdef WCHAR
+         FREE_WCS_BUFFERS ();
+#endif
+         return -1;
+       }
+
+#ifdef WCHAR
+      val = wcs_re_match_2_internal (bufp, string1, size1, string2,
+				     size2, startpos, regs, stop,
+				     wcs_string1, wcs_size1,
+				     wcs_string2, wcs_size2,
+				     mbs_offset1, mbs_offset2);
+#else /* BYTE */
+      val = byte_re_match_2_internal (bufp, string1, size1, string2,
+				      size2, startpos, regs, stop);
+#endif /* BYTE */
 
-    val = PREFIX(re_match_2_internal) (bufp, string1, size1, string2,
-				       size2, startpos, regs, stop);
 #ifndef REGEX_MALLOC
 # ifdef C_ALLOCA
       alloca (0);
@@ -5164,10 +5251,20 @@ PREFIX(re_search_2) (bufp, string1, size1, string2, size2, startpos, range,
 #endif
 
       if (val >= 0)
-	return startpos;
+	{
+#ifdef WCHAR
+	  FREE_WCS_BUFFERS ();
+#endif
+	  return startpos;
+	}
 
       if (val == -2)
-	return -2;
+	{
+#ifdef WCHAR
+	  FREE_WCS_BUFFERS ();
+#endif
+	  return -2;
+	}
 
     advance:
       if (!range)
@@ -5183,6 +5280,9 @@ PREFIX(re_search_2) (bufp, string1, size1, string2, size2, startpos, range,
           startpos--;
         }
     }
+#ifdef WCHAR
+  FREE_WCS_BUFFERS ();
+#endif
   return -1;
 }
 
@@ -5255,7 +5355,6 @@ PREFIX(re_search_2) (bufp, string1, size1, string2, size2, startpos, range,
 
 /* Free everything we malloc.  */
 #ifdef MATCH_MAY_ALLOCATE
-# define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
 # ifdef WCHAR
 #  define FREE_VARIABLES()						\
   do {									\
@@ -5269,10 +5368,13 @@ PREFIX(re_search_2) (bufp, string1, size1, string2, size2, startpos, range,
     FREE_VAR (reg_info);						\
     FREE_VAR (reg_dummy);						\
     FREE_VAR (reg_info_dummy);						\
-    FREE_VAR (string1);							\
-    FREE_VAR (string2);							\
-    FREE_VAR (mbs_offset1);						\
-    FREE_VAR (mbs_offset2);						\
+    if (!cant_free_wcs_buf)						\
+      {									\
+        FREE_VAR (string1);						\
+        FREE_VAR (string2);						\
+        FREE_VAR (mbs_offset1);						\
+        FREE_VAR (mbs_offset2);						\
+      }									\
   } while (0)
 # else /* BYTE */
 #  define FREE_VARIABLES()						\
@@ -5290,14 +5392,16 @@ PREFIX(re_search_2) (bufp, string1, size1, string2, size2, startpos, range,
   } while (0)
 # endif /* WCHAR */
 #else
-# define FREE_VAR(var) if (var) free (var); var = NULL
 # ifdef WCHAR
 #  define FREE_VARIABLES()						\
   do {									\
-    FREE_VAR (string1);							\
-    FREE_VAR (string2);							\
-    FREE_VAR (mbs_offset1);						\
-    FREE_VAR (mbs_offset2);						\
+    if (!cant_free_wcs_buf)						\
+      {									\
+        FREE_VAR (string1);						\
+        FREE_VAR (string2);						\
+        FREE_VAR (mbs_offset1);						\
+        FREE_VAR (mbs_offset2);						\
+      }									\
   } while (0)
 # else /* BYTE */
 #  define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning. */
@@ -5331,7 +5435,8 @@ re_match (bufp, string, size, pos, regs)
 # ifdef MBS_SUPPORT
   if (MB_CUR_MAX != 1)
     result = wcs_re_match_2_internal (bufp, NULL, 0, string, size,
-				  pos, regs, size);
+				      pos, regs, size,
+				      NULL, 0, NULL, 0, NULL, NULL);
   else
 # endif
     result = byte_re_match_2_internal (bufp, NULL, 0, string, size,
@@ -5390,7 +5495,8 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
 # ifdef MBS_SUPPORT
   if (MB_CUR_MAX != 1)
     result = wcs_re_match_2_internal (bufp, string1, size1, string2, size2,
-				  pos, regs, stop);
+				      pos, regs, stop,
+				      NULL, 0, NULL, 0, NULL, NULL);
   else
 # endif
     result = byte_re_match_2_internal (bufp, string1, size1, string2, size2,
@@ -5424,7 +5530,7 @@ count_mbs_length(offset_buffer, length)
      int *offset_buffer;
      int length;
 {
-  int wcs_size;
+  int upper, lower;
 
   /* Check whether the size is valid.  */
   if (length < 0)
@@ -5433,45 +5539,73 @@ count_mbs_length(offset_buffer, length)
   if (offset_buffer == NULL)
     return 0;
 
-  for (wcs_size = 0 ; offset_buffer[wcs_size] != -1 ; wcs_size++)
+  /* If there are no multibyte character, offset_buffer[i] == i.
+   Optmize for this case.  */
+  if (offset_buffer[length] == length)
+    return length;
+
+  /* Set up upper with length. (because for all i, offset_buffer[i] >= i)  */
+  upper = length;
+  lower = 0;
+
+  while (true)
     {
-      if (offset_buffer[wcs_size] == length)
-	return wcs_size;
-      if (offset_buffer[wcs_size] > length)
-	/* It is a fragment of a wide character.  */
-	return -1;
+      int middle = (lower + upper) / 2;
+      if (middle == lower || middle == upper)
+	break;
+      if (offset_buffer[middle] > length)
+	upper = middle;
+      else if (offset_buffer[middle] < length)
+	lower = middle;
+      else
+	return middle;
     }
 
-  /* We reached at the sentinel.  */
   return -1;
 }
 #endif /* WCHAR */
 
 /* This is a separate function so that we can force an alloca cleanup
    afterwards.  */
+#ifdef WCHAR
 static int
-PREFIX(re_match_2_internal) (bufp, ARG_PREFIX(string1), ARG_PREFIX(size1),
-			     ARG_PREFIX(string2), ARG_PREFIX(size2), pos,
-			     regs, stop)
+wcs_re_match_2_internal (bufp, cstring1, csize1, cstring2, csize2, pos,
+			 regs, stop, string1, size1, string2, size2,
+			 mbs_offset1, mbs_offset2)
      struct re_pattern_buffer *bufp;
-     const char *ARG_PREFIX(string1), *ARG_PREFIX(string2);
-     int ARG_PREFIX(size1), ARG_PREFIX(size2);
+     const char *cstring1, *cstring2;
+     int csize1, csize2;
      int pos;
      struct re_registers *regs;
      int stop;
+     /* string1 == string2 == NULL means string1/2, size1/2 and
+	mbs_offset1/2 need seting up in this function.  */
+     /* We need wchar_t* buffers correspond to cstring1, cstring2.  */
+     wchar_t *string1, *string2;
+     /* We need the size of wchar_t buffers correspond to csize1, csize2.  */
+     int size1, size2;
+     /* offset buffer for optimizatoin. See convert_mbs_to_wc.  */
+     int *mbs_offset1, *mbs_offset2;
+#else /* BYTE */
+static int
+byte_re_match_2_internal (bufp, string1, size1,string2, size2, pos,
+			  regs, stop)
+     struct re_pattern_buffer *bufp;
+     const char *string1, *string2;
+     int size1, size2;
+     int pos;
+     struct re_registers *regs;
+     int stop;
+#endif /* BYTE */
 {
   /* General temporaries.  */
   int mcnt;
   UCHAR_T *p1;
 #ifdef WCHAR
-  /* We need wchar_t* buffers correspond to string1, string2.  */
-  CHAR_T *string1 = NULL, *string2 = NULL;
-  /* We need the size of wchar_t buffers correspond to csize1, csize2.  */
-  int size1 = 0, size2 = 0;
-  /* offset buffer for optimization. See convert_mbs_to_wc.  */
-  int *mbs_offset1 = NULL, *mbs_offset2 = NULL;
   /* They hold whether each wchar_t is binary data or not.  */
   char *is_binary = NULL;
+  /* If true, we can't free string1/2, mbs_offset1/2.  */
+  int cant_free_wcs_buf = 1;
 #endif /* WCHAR */
 
   /* Just past the end of the corresponding string.  */
@@ -5648,41 +5782,45 @@ PREFIX(re_match_2_internal) (bufp, ARG_PREFIX(string1), ARG_PREFIX(size1),
 #ifdef WCHAR
   /* Allocate wchar_t array for string1 and string2 and
      fill them with converted string.  */
-  if (csize1 != 0)
+  if (string1 == NULL && string2 == NULL)
     {
-      string1 = REGEX_TALLOC (csize1 + 1, CHAR_T);
-      mbs_offset1 = REGEX_TALLOC (csize1 + 1, int);
-      is_binary = REGEX_TALLOC (csize1 + 1, char);
-      if (!string1 || !mbs_offset1 || !is_binary)
+      /* We need seting up buffers here.  */
+
+      /* We must free wcs buffers in this function.  */
+      cant_free_wcs_buf = 0;
+
+      if (csize1 != 0)
 	{
-	  FREE_VAR (string1);
-	  FREE_VAR (mbs_offset1);
-	  FREE_VAR (is_binary);
-	  return -2;
+	  string1 = REGEX_TALLOC (csize1 + 1, CHAR_T);
+	  mbs_offset1 = REGEX_TALLOC (csize1 + 1, int);
+	  is_binary = REGEX_TALLOC (csize1 + 1, char);
+	  if (!string1 || !mbs_offset1 || !is_binary)
+	    {
+	      FREE_VAR (string1);
+	      FREE_VAR (mbs_offset1);
+	      FREE_VAR (is_binary);
+	      return -2;
+	    }
 	}
-      size1 = convert_mbs_to_wcs(string1, cstring1, csize1,
-				 mbs_offset1, is_binary);
-      string1[size1] = L'\0'; /* for a sentinel  */
-      FREE_VAR (is_binary);
-    }
-  if (csize2 != 0)
-    {
-      string2 = REGEX_TALLOC (csize2 + 1, CHAR_T);
-      mbs_offset2 = REGEX_TALLOC (csize2 + 1, int);
-      is_binary = REGEX_TALLOC (csize2 + 1, char);
-      if (!string2 || !mbs_offset2 || !is_binary)
+      if (csize2 != 0)
 	{
-	  FREE_VAR (string1);
-	  FREE_VAR (mbs_offset1);
-	  FREE_VAR (string2);
-	  FREE_VAR (mbs_offset2);
+	  string2 = REGEX_TALLOC (csize2 + 1, CHAR_T);
+	  mbs_offset2 = REGEX_TALLOC (csize2 + 1, int);
+	  is_binary = REGEX_TALLOC (csize2 + 1, char);
+	  if (!string2 || !mbs_offset2 || !is_binary)
+	    {
+	      FREE_VAR (string1);
+	      FREE_VAR (mbs_offset1);
+	      FREE_VAR (string2);
+	      FREE_VAR (mbs_offset2);
+	      FREE_VAR (is_binary);
+	      return -2;
+	    }
+	  size2 = convert_mbs_to_wcs(string2, cstring2, csize2,
+				     mbs_offset2, is_binary);
+	  string2[size2] = L'\0'; /* for a sentinel  */
 	  FREE_VAR (is_binary);
-	  return -2;
 	}
-      size2 = convert_mbs_to_wcs(string2, cstring2, csize2,
-				 mbs_offset2, is_binary);
-      string2[size2] = L'\0'; /* for a sentinel  */
-      FREE_VAR (is_binary);
     }
 
   /* We need to cast pattern to (wchar_t*), because we casted this compiled