about summary refs log tree commit diff
path: root/posix/regex_internal.c
diff options
context:
space:
mode:
Diffstat (limited to 'posix/regex_internal.c')
-rw-r--r--posix/regex_internal.c120
1 files changed, 69 insertions, 51 deletions
diff --git a/posix/regex_internal.c b/posix/regex_internal.c
index 821ed7f45f..240e8872b3 100644
--- a/posix/regex_internal.c
+++ b/posix/regex_internal.c
@@ -26,12 +26,13 @@ static void re_string_construct_common (const char *str, int len,
 static int re_string_skip_chars (re_string_t *pstr, int new_raw_idx,
 				 wint_t *last_wc) internal_function;
 #endif /* RE_ENABLE_I18N */
-static reg_errcode_t register_state (re_dfa_t *dfa, re_dfastate_t *newstate,
+static reg_errcode_t register_state (const re_dfa_t *dfa,
+				     re_dfastate_t *newstate,
 				     unsigned int hash) internal_function;
-static re_dfastate_t *create_ci_newstate (re_dfa_t *dfa,
+static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
 					  const re_node_set *nodes,
 					  unsigned int hash) internal_function;
-static re_dfastate_t *create_cd_newstate (re_dfa_t *dfa,
+static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
 					  const re_node_set *nodes,
 					  unsigned int context,
 					  unsigned int hash) internal_function;
@@ -654,37 +655,50 @@ re_string_reconstruct (pstr, idx, eflags)
 		     byte other than 0x80 - 0xbf.  */
 		  raw = pstr->raw_mbs + pstr->raw_mbs_idx;
 		  end = raw + (offset - pstr->mb_cur_max);
-		  for (p = raw + offset - 1; p >= end; --p)
-		    if ((*p & 0xc0) != 0x80)
-		      {
-			mbstate_t cur_state;
-			wchar_t wc2;
-			int mlen = raw + pstr->len - p;
-			unsigned char buf[6];
-
-			q = p;
-			if (BE (pstr->trans != NULL, 0))
-			  {
-			    int i = mlen < 6 ? mlen : 6;
-			    while (--i >= 0)
-			      buf[i] = pstr->trans[p[i]];
-			    q = buf;
-			  }
-			/* XXX Don't use mbrtowc, we know which conversion
-			   to use (UTF-8 -> UCS4).  */
-			memset (&cur_state, 0, sizeof (cur_state));
-			mlen = (mbrtowc (&wc2, (const char *) p, mlen,
-					 &cur_state)
-				- (raw + offset - p));
-			if (mlen >= 0)
-			  {
-			    memset (&pstr->cur_state, '\0',
-				    sizeof (mbstate_t));
-			    pstr->valid_len = mlen;
-			    wc = wc2;
-			  }
-			break;
-		      }
+		  p = raw + offset - 1;
+#ifdef _LIBC
+		  /* We know the wchar_t encoding is UCS4, so for the simple
+		     case, ASCII characters, skip the conversion step.  */
+		  if (isascii (*p) && BE (pstr->trans == NULL, 1))
+		    {
+		      memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
+		      pstr->valid_len = 0;
+		      wc = (wchar_t) *p;
+		    }
+		  else
+#endif
+		    for (; p >= end; --p)
+		      if ((*p & 0xc0) != 0x80)
+			{
+			  mbstate_t cur_state;
+			  wchar_t wc2;
+			  int mlen = raw + pstr->len - p;
+			  unsigned char buf[6];
+			  size_t mbclen;
+
+			  q = p;
+			  if (BE (pstr->trans != NULL, 0))
+			    {
+			      int i = mlen < 6 ? mlen : 6;
+			      while (--i >= 0)
+				buf[i] = pstr->trans[p[i]];
+			      q = buf;
+			    }
+			  /* XXX Don't use mbrtowc, we know which conversion
+			     to use (UTF-8 -> UCS4).  */
+			  memset (&cur_state, 0, sizeof (cur_state));
+			  mbclen = mbrtowc (&wc2, (const char *) p, mlen,
+					    &cur_state);
+			  if (raw + offset - p <= mbclen
+			      && mbclen < (size_t) -2)
+			    {
+			      memset (&pstr->cur_state, '\0',
+				      sizeof (mbstate_t));
+			      pstr->valid_len = mbclen - (raw + offset - p);
+			      wc = wc2;
+			    }
+			  break;
+			}
 		}
 
 	      if (wc == WEOF)
@@ -738,15 +752,15 @@ re_string_reconstruct (pstr, idx, eflags)
     }
   else
 #endif /* RE_ENABLE_I18N */
-  if (BE (pstr->mbs_allocated, 0))
-    {
-      if (pstr->icase)
-	build_upper_buffer (pstr);
-      else if (pstr->trans != NULL)
-	re_string_translate_buffer (pstr);
-    }
-  else
-    pstr->valid_len = pstr->len;
+    if (BE (pstr->mbs_allocated, 0))
+      {
+	if (pstr->icase)
+	  build_upper_buffer (pstr);
+	else if (pstr->trans != NULL)
+	  re_string_translate_buffer (pstr);
+      }
+    else
+      pstr->valid_len = pstr->len;
 
   pstr->cur_idx = 0;
   return REG_NOERROR;
@@ -1345,12 +1359,16 @@ re_dfa_add_node (dfa, token)
   int type = token.type;
   if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
     {
-      int new_nodes_alloc = dfa->nodes_alloc * 2;
+      size_t new_nodes_alloc = dfa->nodes_alloc * 2;
       int *new_nexts, *new_indices;
       re_node_set *new_edests, *new_eclosures;
+      re_token_t *new_nodes;
+
+      /* Avoid overflows.  */
+      if (BE (new_nodes_alloc < dfa->nodes_alloc, 0))
+	return -1;
 
-      re_token_t *new_nodes = re_realloc (dfa->nodes, re_token_t,
-					  new_nodes_alloc);
+      new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
       if (BE (new_nodes == NULL, 0))
 	return -1;
       dfa->nodes = new_nodes;
@@ -1403,7 +1421,7 @@ calc_state_hash (nodes, context)
 static re_dfastate_t*
 re_acquire_state (err, dfa, nodes)
      reg_errcode_t *err;
-     re_dfa_t *dfa;
+     const re_dfa_t *dfa;
      const re_node_set *nodes;
 {
   unsigned int hash;
@@ -1448,7 +1466,7 @@ re_acquire_state (err, dfa, nodes)
 static re_dfastate_t*
 re_acquire_state_context (err, dfa, nodes, context)
      reg_errcode_t *err;
-     re_dfa_t *dfa;
+     const re_dfa_t *dfa;
      const re_node_set *nodes;
      unsigned int context;
 {
@@ -1486,7 +1504,7 @@ re_acquire_state_context (err, dfa, nodes, context)
 
 static reg_errcode_t
 register_state (dfa, newstate, hash)
-     re_dfa_t *dfa;
+     const re_dfa_t *dfa;
      re_dfastate_t *newstate;
      unsigned int hash;
 {
@@ -1525,7 +1543,7 @@ register_state (dfa, newstate, hash)
 
 static re_dfastate_t *
 create_ci_newstate (dfa, nodes, hash)
-     re_dfa_t *dfa;
+     const re_dfa_t *dfa;
      const re_node_set *nodes;
      unsigned int hash;
 {
@@ -1576,7 +1594,7 @@ create_ci_newstate (dfa, nodes, hash)
 
 static re_dfastate_t *
 create_cd_newstate (dfa, nodes, context, hash)
-     re_dfa_t *dfa;
+     const re_dfa_t *dfa;
      const re_node_set *nodes;
      unsigned int context, hash;
 {