about summary refs log tree commit diff
path: root/posix/regcomp.c
diff options
context:
space:
mode:
Diffstat (limited to 'posix/regcomp.c')
-rw-r--r--posix/regcomp.c115
1 files changed, 60 insertions, 55 deletions
diff --git a/posix/regcomp.c b/posix/regcomp.c
index 37e06797ac..c93f79ea24 100644
--- a/posix/regcomp.c
+++ b/posix/regcomp.c
@@ -19,11 +19,11 @@
    02111-1307 USA.  */
 
 static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern,
-					  int length, reg_syntax_t syntax);
+					  size_t length, reg_syntax_t syntax);
 static void re_compile_fastmap_iter (regex_t *bufp,
 				     const re_dfastate_t *init_state,
 				     char *fastmap);
-static reg_errcode_t init_dfa (re_dfa_t *dfa, int pat_len);
+static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len);
 static void init_word_char (re_dfa_t *dfa);
 #ifdef RE_ENABLE_I18N
 static void free_charset (re_charset_t *cset);
@@ -34,7 +34,6 @@ static reg_errcode_t create_initial_state (re_dfa_t *dfa);
 static void optimize_utf8 (re_dfa_t *dfa);
 #endif
 static reg_errcode_t analyze (regex_t *preg);
-static reg_errcode_t create_initial_state (re_dfa_t *dfa);
 static reg_errcode_t preorder (bin_tree_t *root,
 			       reg_errcode_t (fn (void *, bin_tree_t *)),
 			       void *extra);
@@ -51,9 +50,8 @@ static reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node);
 static reg_errcode_t duplicate_node_closure (re_dfa_t *dfa, int top_org_node,
 					     int top_clone_node, int root_node,
 					     unsigned int constraint);
-static reg_errcode_t duplicate_node (int *new_idx, re_dfa_t *dfa, int org_idx,
-				     unsigned int constraint);
-static int search_duplicated_node (re_dfa_t *dfa, int org_node,
+static int duplicate_node (re_dfa_t *dfa, int org_idx, unsigned int constraint);
+static int search_duplicated_node (const re_dfa_t *dfa, int org_node,
 				   unsigned int constraint);
 static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
 static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
@@ -370,7 +368,7 @@ re_compile_fastmap_iter (bufp, init_state, fastmap)
 	  int i, j, ch;
 	  for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
 	    for (j = 0; j < UINT_BITS; ++j, ++ch)
-	      if (dfa->nodes[node].opr.sbcset[i] & (1 << j))
+	      if (dfa->nodes[node].opr.sbcset[i] & (1u << j))
 		re_set_fastmap (fastmap, icase, ch);
 	}
 #ifdef RE_ENABLE_I18N
@@ -536,8 +534,8 @@ weak_alias (__regcomp, regcomp)
 size_t
 regerror (errcode, preg, errbuf, errbuf_size)
     int errcode;
-    const regex_t *preg;
-    char *errbuf;
+    const regex_t *__restrict preg;
+    char *__restrict errbuf;
     size_t errbuf_size;
 {
   const char *msg;
@@ -742,7 +740,7 @@ static reg_errcode_t
 re_compile_internal (preg, pattern, length, syntax)
      regex_t *preg;
      const char * pattern;
-     int length;
+     size_t length;
      reg_syntax_t syntax;
 {
   reg_errcode_t err = REG_NOERROR;
@@ -783,6 +781,7 @@ re_compile_internal (preg, pattern, length, syntax)
       return err;
     }
 #ifdef DEBUG
+  /* Note: length+1 will not overflow since it is checked in init_dfa.  */
   dfa->re_str = re_malloc (char, length + 1);
   strncpy (dfa->re_str, pattern, length + 1);
 #endif
@@ -842,9 +841,9 @@ re_compile_internal (preg, pattern, length, syntax)
 static reg_errcode_t
 init_dfa (dfa, pat_len)
      re_dfa_t *dfa;
-     int pat_len;
+     size_t pat_len;
 {
-  int table_size;
+  unsigned int table_size;
 #ifndef _LIBC
   char *codeset_name;
 #endif
@@ -854,13 +853,15 @@ init_dfa (dfa, pat_len)
   /* Force allocation of str_tree_storage the first time.  */
   dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
 
+  /* Avoid overflows.  */
+  if (pat_len == SIZE_MAX)
+    return REG_ESPACE;
+
   dfa->nodes_alloc = pat_len + 1;
   dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
 
-  dfa->states_alloc = pat_len + 1;
-
   /*  table_size = 2 ^ ceil(log pat_len) */
-  for (table_size = 1; table_size > 0; table_size <<= 1)
+  for (table_size = 1; ; table_size <<= 1)
     if (table_size > pat_len)
       break;
 
@@ -918,11 +919,11 @@ init_dfa (dfa, pat_len)
 	  for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
 	    for (j = 0; j < UINT_BITS; ++j, ++ch)
 	      {
-		wchar_t wch = __btowc (ch);
+		wint_t wch = __btowc (ch);
 		if (wch != WEOF)
-		  dfa->sb_char[i] |= 1 << j;
+		  dfa->sb_char[i] |= 1u << j;
 # ifndef _LIBC
-		if (isascii (ch) && wch != (wchar_t) ch)
+		if (isascii (ch) && wch != ch)
 		  dfa->map_notascii = 1;
 # endif
 	      }
@@ -948,7 +949,7 @@ init_word_char (dfa)
   for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
     for (j = 0; j < UINT_BITS; ++j, ++ch)
       if (isalnum (ch) || ch == '_')
-	dfa->word_char[i] |= 1 << j;
+	dfa->word_char[i] |= 1u << j;
 }
 
 /* Free the work area which are only used while compiling.  */
@@ -1281,8 +1282,8 @@ optimize_subexps (extra, node)
         node->left->parent = node;
 
       dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
-      if (other_idx < 8 * sizeof (dfa->used_bkref_map))
-	dfa->used_bkref_map &= ~(1 << other_idx);
+      if (other_idx < CHAR_BIT * sizeof dfa->used_bkref_map)
+	dfa->used_bkref_map &= ~(1u << other_idx);
     }
 
   return REG_NOERROR;
@@ -1330,8 +1331,8 @@ lower_subexp (err, preg, node)
 	 very common, so we do not lose much.  An example that triggers
 	 this case is the sed "script" /\(\)/x.  */
       && node->left != NULL
-      && (node->token.opr.idx >= 8 * sizeof (dfa->used_bkref_map)
-	  || !(dfa->used_bkref_map & (1 << node->token.opr.idx))))
+      && (node->token.opr.idx >= CHAR_BIT * sizeof dfa->used_bkref_map
+	  || !(dfa->used_bkref_map & (1u << node->token.opr.idx))))
     return node->left;
 
   /* Convert the SUBEXP node to the concatenation of an
@@ -1469,7 +1470,6 @@ duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node,
      int top_org_node, top_clone_node, root_node;
      unsigned int init_constraint;
 {
-  reg_errcode_t err;
   int org_node, clone_node, ret;
   unsigned int constraint = init_constraint;
   for (org_node = top_org_node, clone_node = top_clone_node;;)
@@ -1483,9 +1483,9 @@ duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node,
 	     edests of the back reference.  */
 	  org_dest = dfa->nexts[org_node];
 	  re_node_set_empty (dfa->edests + clone_node);
-	  err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
-	  if (BE (err != REG_NOERROR, 0))
-	    return err;
+	  clone_dest = duplicate_node (dfa, org_dest, constraint);
+	  if (BE (clone_dest == -1, 0))
+	    return REG_ESPACE;
 	  dfa->nexts[clone_node] = dfa->nexts[org_node];
 	  ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
 	  if (BE (ret < 0, 0))
@@ -1521,9 +1521,9 @@ duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node,
 		}
 	      constraint |= dfa->nodes[org_node].opr.ctx_type;
 	    }
-	  err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
-	  if (BE (err != REG_NOERROR, 0))
-	    return err;
+	  clone_dest = duplicate_node (dfa, org_dest, constraint);
+	  if (BE (clone_dest == -1, 0))
+	    return REG_ESPACE;
 	  ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
 	  if (BE (ret < 0, 0))
 	    return REG_ESPACE;
@@ -1539,9 +1539,10 @@ duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node,
 	  if (clone_dest == -1)
 	    {
 	      /* There are no such a duplicated node, create a new one.  */
-	      err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
-	      if (BE (err != REG_NOERROR, 0))
-		return err;
+	      reg_errcode_t err;
+	      clone_dest = duplicate_node (dfa, org_dest, constraint);
+	      if (BE (clone_dest == -1, 0))
+		return REG_ESPACE;
 	      ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
 	      if (BE (ret < 0, 0))
 		return REG_ESPACE;
@@ -1560,9 +1561,9 @@ duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node,
 	    }
 
 	  org_dest = dfa->edests[org_node].elems[1];
-	  err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
-	  if (BE (err != REG_NOERROR, 0))
-	    return err;
+	  clone_dest = duplicate_node (dfa, org_dest, constraint);
+	  if (BE (clone_dest == -1, 0))
+	    return REG_ESPACE;
 	  ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
 	  if (BE (ret < 0, 0))
 	    return REG_ESPACE;
@@ -1578,7 +1579,7 @@ duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node,
 
 static int
 search_duplicated_node (dfa, org_node, constraint)
-     re_dfa_t *dfa;
+     const re_dfa_t *dfa;
      int org_node;
      unsigned int constraint;
 {
@@ -1593,27 +1594,27 @@ search_duplicated_node (dfa, org_node, constraint)
 }
 
 /* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
-   The new index will be stored in NEW_IDX and return REG_NOERROR if succeeded,
-   otherwise return the error code.  */
+   Return the index of the new node, or -1 if insufficient storage is
+   available.  */
 
-static reg_errcode_t
-duplicate_node (new_idx, dfa, org_idx, constraint)
+static int
+duplicate_node (dfa, org_idx, constraint)
      re_dfa_t *dfa;
-     int *new_idx, org_idx;
+     int org_idx;
      unsigned int constraint;
 {
   int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]);
-  if (BE (dup_idx == -1, 0))
-    return REG_ESPACE;
-  dfa->nodes[dup_idx].constraint = constraint;
-  if (dfa->nodes[org_idx].type == ANCHOR)
-    dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].opr.ctx_type;
-  dfa->nodes[dup_idx].duplicated = 1;
-
-  /* Store the index of the original node.  */
-  dfa->org_indices[dup_idx] = org_idx;
-  *new_idx = dup_idx;
-  return REG_NOERROR;
+  if (BE (dup_idx != -1, 1))
+    {
+      dfa->nodes[dup_idx].constraint = constraint;
+      if (dfa->nodes[org_idx].type == ANCHOR)
+	dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].opr.ctx_type;
+      dfa->nodes[dup_idx].duplicated = 1;
+
+      /* Store the index of the original node.  */
+      dfa->org_indices[dup_idx] = org_idx;
+    }
+  return dup_idx;
 }
 
 static reg_errcode_t
@@ -2496,7 +2497,9 @@ parse_sub_exp (regexp, preg, token, syntax, nest, err)
       if (BE (*err != REG_NOERROR, 0))
 	return NULL;
     }
-  dfa->completed_bkref_map |= 1 << cur_nsub;
+
+  if (cur_nsub <= '9' - '1')
+    dfa->completed_bkref_map |= 1 << cur_nsub;
 
   tree = create_tree (dfa, tree, NULL, SUBEXP);
   if (BE (tree == NULL, 0))
@@ -2683,7 +2686,9 @@ build_range_exp (sbcset, start_elem, end_elem)
 
 # ifdef RE_ENABLE_I18N
   {
-    wchar_t wc, start_wc, end_wc;
+    wchar_t wc;
+    wint_t start_wc;
+    wint_t end_wc;
     wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
 
     start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch