about summary refs log tree commit diff
path: root/posix/regexec.c
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2003-12-23 01:43:19 +0000
committerUlrich Drepper <drepper@redhat.com>2003-12-23 01:43:19 +0000
commit3ce12656a8f3bf28042231df9407a9193626f26f (patch)
tree950fdc73389f5ffec292891d79daa131db11f76a /posix/regexec.c
parentd3e4ed994c517460a344153aa0a7d48cbcfa0237 (diff)
downloadglibc-3ce12656a8f3bf28042231df9407a9193626f26f.tar.gz
glibc-3ce12656a8f3bf28042231df9407a9193626f26f.tar.xz
glibc-3ce12656a8f3bf28042231df9407a9193626f26f.zip
(build_trtable): Don't allocate the trtable until state->word_trtable is known. Don't hardcode UINT_BITS iterations on each bitset item.
(match_ctx_init, match_ctx_clean,
match_ctx_free, match_ctx_free_subtops,
match_ctx_add_entry, search_cur_bkref_entry,
match_ctx_clear_flag, match_ctx_add_subtop,
match_ctx_add_sublast, sift_ctx_init,
re_search_internal, re_search_2_stub, re_search_stub,
re_copy_regs, acquire_init_state_context,
prune_impossible_nodes, check_matching,
check_halt_node_context, check_halt_state_context
update_regs, proceed_next_node, push_fail_stack,
pop_fail_stack, set_regs, free_fail_stack_return,
sift_states_iter_mb, sift_states_backward
update_cur_sifted_state, add_epsilon_src_nodes,
sub_epsilon_src_nodes, check_dst_limits,
check_dst_limits_calc_pos, check_subexp_limits,
sift_states_bkref, clean_state_log_if_need,
merge_state_array, transit_state,
check_subexp_matching_top, transit_state_sb,
transit_state_mb, transit_state_bkref,
get_subexp, get_subexp_sub, find_subexp_node,
check_arrival, check_arrival_add_next_nodes,
find_collation_sequence_value, check_arrival_expand_ecl,
check_arrival_expand_ecl_sub, expand_bkref_cache,
build_trtable, check_node_accept_bytes, extend_buffers,
group_nodes_into_DFAstates, check_node_accept): Likewise.
Diffstat (limited to 'posix/regexec.c')
-rw-r--r--posix/regexec.c243
1 files changed, 129 insertions, 114 deletions
diff --git a/posix/regexec.c b/posix/regexec.c
index 7f8fac8961..b0f9a53cfb 100644
--- a/posix/regexec.c
+++ b/posix/regexec.c
@@ -19,176 +19,176 @@
    02111-1307 USA.  */
 
 static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
-				     re_string_t *input, int n);
-static void match_ctx_clean (re_match_context_t *mctx);
-static void match_ctx_free (re_match_context_t *cache);
-static void match_ctx_free_subtops (re_match_context_t *mctx);
+				     re_string_t *input, int n) internal_function;
+static void match_ctx_clean (re_match_context_t *mctx) internal_function;
+static void match_ctx_free (re_match_context_t *cache) internal_function;
+static void match_ctx_free_subtops (re_match_context_t *mctx) internal_function;
 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
-					  int str_idx, int from, int to);
-static int search_cur_bkref_entry (re_match_context_t *mctx, int str_idx);
-static void match_ctx_clear_flag (re_match_context_t *mctx);
+					  int str_idx, int from, int to) internal_function;
+static int search_cur_bkref_entry (re_match_context_t *mctx, int str_idx) internal_function;
+static void match_ctx_clear_flag (re_match_context_t *mctx) internal_function;
 static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
-					   int str_idx);
+					   int str_idx) internal_function;
 static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
-						   int node, int str_idx);
+						   int node, int str_idx) internal_function;
 static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
 			   re_dfastate_t **limited_sts, int last_node,
-			   int last_str_idx, int check_subexp);
+			   int last_str_idx, int check_subexp) internal_function;
 static reg_errcode_t re_search_internal (const regex_t *preg,
 					 const char *string, int length,
 					 int start, int range, int stop,
 					 size_t nmatch, regmatch_t pmatch[],
-					 int eflags);
+					 int eflags) internal_function;
 static int re_search_2_stub (struct re_pattern_buffer *bufp,
 			     const char *string1, int length1,
 			     const char *string2, int length2,
 			     int start, int range, struct re_registers *regs,
-			     int stop, int ret_len);
+			     int stop, int ret_len) internal_function;
 static int re_search_stub (struct re_pattern_buffer *bufp,
 			   const char *string, int length, int start,
 			   int range, int stop, struct re_registers *regs,
-			   int ret_len);
+			   int ret_len) internal_function;
 static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
-			      int nregs, int regs_allocated);
+			      int nregs, int regs_allocated) internal_function;
 static inline re_dfastate_t *acquire_init_state_context
   (reg_errcode_t *err, const regex_t *preg, const re_match_context_t *mctx,
-   int idx) __attribute ((always_inline));
+   int idx) __attribute ((always_inline)) internal_function;
 static reg_errcode_t prune_impossible_nodes (const regex_t *preg,
-					     re_match_context_t *mctx);
+					     re_match_context_t *mctx) internal_function;
 static int check_matching (const regex_t *preg, re_match_context_t *mctx,
-			   int fl_longest_match);
+			   int fl_longest_match) internal_function;
 static int check_halt_node_context (const re_dfa_t *dfa, int node,
-				    unsigned int context);
+				    unsigned int context) internal_function;
 static int check_halt_state_context (const regex_t *preg,
 				     const re_dfastate_t *state,
-				     const re_match_context_t *mctx, int idx);
+				     const re_match_context_t *mctx, int idx) internal_function;
 static void update_regs (re_dfa_t *dfa, regmatch_t *pmatch, int cur_node,
-			 int cur_idx, int nmatch);
+			 int cur_idx, int nmatch) internal_function;
 static int proceed_next_node (const regex_t *preg, int nregs, regmatch_t *regs,
 			      const re_match_context_t *mctx,
 			      int *pidx, int node, re_node_set *eps_via_nodes,
-			      struct re_fail_stack_t *fs);
+			      struct re_fail_stack_t *fs) internal_function;
 static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
 				      int str_idx, int *dests, int nregs,
 				      regmatch_t *regs,
-				      re_node_set *eps_via_nodes);
+				      re_node_set *eps_via_nodes) internal_function;
 static int pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,
-			   regmatch_t *regs, re_node_set *eps_via_nodes);
+			   regmatch_t *regs, re_node_set *eps_via_nodes) internal_function;
 static reg_errcode_t set_regs (const regex_t *preg,
 			       const re_match_context_t *mctx,
 			       size_t nmatch, regmatch_t *pmatch,
-			       int fl_backtrack);
-static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);
+			       int fl_backtrack) internal_function;
+static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs) internal_function;
 
 #ifdef RE_ENABLE_I18N
 static int sift_states_iter_mb (const regex_t *preg,
 				const re_match_context_t *mctx,
 				re_sift_context_t *sctx,
-				int node_idx, int str_idx, int max_str_idx);
+				int node_idx, int str_idx, int max_str_idx) internal_function;
 #endif /* RE_ENABLE_I18N */
 static reg_errcode_t sift_states_backward (const regex_t *preg,
 					   re_match_context_t *mctx,
-					   re_sift_context_t *sctx);
+					   re_sift_context_t *sctx) internal_function;
 static reg_errcode_t update_cur_sifted_state (const regex_t *preg,
 					      re_match_context_t *mctx,
 					      re_sift_context_t *sctx,
 					      int str_idx,
-					      re_node_set *dest_nodes);
+					      re_node_set *dest_nodes) internal_function;
 static reg_errcode_t add_epsilon_src_nodes (re_dfa_t *dfa,
 					    re_node_set *dest_nodes,
-					    const re_node_set *candidates);
+					    const re_node_set *candidates) internal_function;
 static reg_errcode_t sub_epsilon_src_nodes (re_dfa_t *dfa, int node,
 					    re_node_set *dest_nodes,
-					    const re_node_set *and_nodes);
+					    const re_node_set *and_nodes) internal_function;
 static int check_dst_limits (re_dfa_t *dfa, re_node_set *limits,
 			     re_match_context_t *mctx, int dst_node,
-			     int dst_idx, int src_node, int src_idx);
+			     int dst_idx, int src_node, int src_idx) internal_function;
 static int check_dst_limits_calc_pos (re_dfa_t *dfa, re_match_context_t *mctx,
 				      int limit, re_node_set *eclosures,
-				      int subexp_idx, int node, int str_idx);
+				      int subexp_idx, int node, int str_idx) internal_function;
 static reg_errcode_t check_subexp_limits (re_dfa_t *dfa,
 					  re_node_set *dest_nodes,
 					  const re_node_set *candidates,
 					  re_node_set *limits,
 					  struct re_backref_cache_entry *bkref_ents,
-					  int str_idx);
+					  int str_idx) internal_function;
 static reg_errcode_t sift_states_bkref (const regex_t *preg,
 					re_match_context_t *mctx,
 					re_sift_context_t *sctx,
-					int str_idx, re_node_set *dest_nodes);
+					int str_idx, re_node_set *dest_nodes) internal_function;
 static reg_errcode_t clean_state_log_if_need (re_match_context_t *mctx,
-					      int next_state_log_idx);
+					      int next_state_log_idx) internal_function;
 static reg_errcode_t merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst,
-					re_dfastate_t **src, int num);
+					re_dfastate_t **src, int num) internal_function;
 static re_dfastate_t *transit_state (reg_errcode_t *err, const regex_t *preg,
 				     re_match_context_t *mctx,
-				     re_dfastate_t *state);
+				     re_dfastate_t *state) internal_function;
 static reg_errcode_t check_subexp_matching_top (re_dfa_t *dfa,
 						re_match_context_t *mctx,
 						re_node_set *cur_nodes,
-						int str_idx);
+						int str_idx) internal_function;
 #if 0
 static re_dfastate_t *transit_state_sb (reg_errcode_t *err, const regex_t *preg,
 					re_dfastate_t *pstate,
-					re_match_context_t *mctx);
+					re_match_context_t *mctx) internal_function;
 #endif
 #ifdef RE_ENABLE_I18N
 static reg_errcode_t transit_state_mb (const regex_t *preg,
 				       re_dfastate_t *pstate,
-				       re_match_context_t *mctx);
+				       re_match_context_t *mctx) internal_function;
 #endif /* RE_ENABLE_I18N */
 static reg_errcode_t transit_state_bkref (const regex_t *preg,
 					  const re_node_set *nodes,
-					  re_match_context_t *mctx);
+					  re_match_context_t *mctx) internal_function;
 static reg_errcode_t get_subexp (const regex_t *preg, re_match_context_t *mctx,
-				 int bkref_node, int bkref_str_idx);
+				 int bkref_node, int bkref_str_idx) internal_function;
 static reg_errcode_t get_subexp_sub (const regex_t *preg,
 				     re_match_context_t *mctx,
 				     const re_sub_match_top_t *sub_top,
 				     re_sub_match_last_t *sub_last,
-				     int bkref_node, int bkref_str);
+				     int bkref_node, int bkref_str) internal_function;
 static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
-			     int subexp_idx, int type);
+			     int subexp_idx, int type) internal_function;
 static reg_errcode_t check_arrival (const regex_t *preg,
 				    re_match_context_t *mctx,
 				    state_array_t *path, int top_node,
 				    int top_str, int last_node, int last_str,
-				    int type);
+				    int type) internal_function;
 static reg_errcode_t check_arrival_add_next_nodes (const regex_t *preg,
 						   re_dfa_t *dfa,
 						   re_match_context_t *mctx,
 						   int str_idx,
 						   re_node_set *cur_nodes,
-						   re_node_set *next_nodes);
+						   re_node_set *next_nodes) internal_function;
 static reg_errcode_t check_arrival_expand_ecl (re_dfa_t *dfa,
 					       re_node_set *cur_nodes,
-					       int ex_subexp, int type);
+					       int ex_subexp, int type) internal_function;
 static reg_errcode_t check_arrival_expand_ecl_sub (re_dfa_t *dfa,
 						   re_node_set *dst_nodes,
 						   int target, int ex_subexp,
-						   int type);
+						   int type) internal_function;
 static reg_errcode_t expand_bkref_cache (const regex_t *preg,
 					 re_match_context_t *mctx,
 					 re_node_set *cur_nodes, int cur_str,
 					 int last_str, int subexp_num,
-					 int type);
+					 int type) internal_function;
 static re_dfastate_t **build_trtable (const regex_t *dfa,
-				      re_dfastate_t *state);
+				      re_dfastate_t *state) internal_function;
 #ifdef RE_ENABLE_I18N
 static int check_node_accept_bytes (const regex_t *preg, int node_idx,
-				    const re_string_t *input, int idx);
+				    const re_string_t *input, int idx) internal_function;
 # ifdef _LIBC
 static unsigned int find_collation_sequence_value (const unsigned char *mbs,
-						   size_t name_len);
+						   size_t name_len) internal_function;
 # endif /* _LIBC */
 #endif /* RE_ENABLE_I18N */
 static int group_nodes_into_DFAstates (const regex_t *dfa,
 				       const re_dfastate_t *state,
 				       re_node_set *states_node,
-				       bitset *states_ch);
+				       bitset *states_ch) internal_function;
 static int check_node_accept (const regex_t *preg, const re_token_t *node,
-			      const re_match_context_t *mctx, int idx);
-static reg_errcode_t extend_buffers (re_match_context_t *mctx);
+			      const re_match_context_t *mctx, int idx) internal_function;
+static reg_errcode_t extend_buffers (re_match_context_t *mctx) internal_function;
 
 /* Entry point for POSIX code.  */
 
@@ -3132,7 +3132,8 @@ build_trtable (preg, state)
 {
   reg_errcode_t err;
   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
-  int i, j, k, ch;
+  int i, j, ch;
+  unsigned int elem, mask;
   int dests_node_malloced = 0, dest_states_malloced = 0;
   int ndests; /* Number of the destination states from `state'.  */
   re_dfastate_t **trtable;
@@ -3161,14 +3162,7 @@ build_trtable (preg, state)
   dests_ch = (bitset *) (dests_node + SBC_MAX);
 
   /* Initialize transiton table.  */
-  trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
   state->word_trtable = 0;
-  if (BE (trtable == NULL, 0))
-    {
-      if (dests_node_malloced)
-	free (dests_node);
-      return NULL;
-    }
 
   /* At first, group all nodes belonging to `state' into several
      destinations.  */
@@ -3180,10 +3174,10 @@ build_trtable (preg, state)
       /* Return NULL in case of an error, trtable otherwise.  */
       if (ndests == 0)
 	{
-	  state->trtable = trtable;
-	  return trtable;
+	  state->trtable = (re_dfastate_t **)
+	    calloc (sizeof (re_dfastate_t *), SBC_MAX);;
+	  return state->trtable;
 	}
-      free (trtable);
       return NULL;
     }
 
@@ -3209,7 +3203,6 @@ out_free:
 	  re_node_set_free (&follows);
 	  for (i = 0; i < ndests; ++i)
 	    re_node_set_free (dests_node + i);
-	  free (trtable);
 	  if (dests_node_malloced)
 	    free (dests_node);
 	  return NULL;
@@ -3247,11 +3240,16 @@ out_free:
 							  CONTEXT_WORD);
 	  if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
 	    goto out_free;
+
+	  if (dest_states[i] != dest_states_word[i]
+	      && dfa->mb_cur_max > 1)
+	    state->word_trtable = 1;
+
 	  dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
 							CONTEXT_NEWLINE);
 	  if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
 	    goto out_free;
-	}
+ 	}
       else
 	{
 	  dest_states_word[i] = dest_states[i];
@@ -3260,59 +3258,76 @@ out_free:
       bitset_merge (acceptable, dests_ch[i]);
     }
 
-  /* Update the transition table.  */
-  /* For all characters ch...:  */
-  for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
-    for (j = 0; j < UINT_BITS; ++j, ++ch)
-      if ((acceptable[i] >> j) & 1)
-	{
-	  for (k = 0; k < ndests; ++k)
-	    if ((dests_ch[k][i] >> j) & 1)
-	      {
-		/* k-th destination accepts the word character ch.  */
-		if (state->word_trtable)
-		  {
-		    trtable[ch] = dest_states[k];
-		    trtable[ch + SBC_MAX] = dest_states_word[k];
-		  }
-		else if (dfa->mb_cur_max > 1
-			 && dest_states[k] != dest_states_word[k])
-		  {
-		    re_dfastate_t **new_trtable;
-
-		    new_trtable = (re_dfastate_t **)
-				  realloc (trtable,
-					   sizeof (re_dfastate_t *)
-					   * 2 * SBC_MAX);
-		    if (BE (new_trtable == NULL, 0))
-		      goto out_free;
-		    memcpy (new_trtable + SBC_MAX, new_trtable,
-			    sizeof (re_dfastate_t *) * SBC_MAX);
-		    trtable = new_trtable;
-		    state->word_trtable = 1;
-		    trtable[ch] = dest_states[k];
-		    trtable[ch + SBC_MAX] = dest_states_word[k];
-		  }
-		else if (IS_WORD_CHAR (ch))
-		  trtable[ch] = dest_states_word[k];
-		else
-		  trtable[ch] = dest_states[k];
-		/* There must be only one destination which accepts
-		   character ch.  See group_nodes_into_DFAstates.  */
-		break;
-	      }
-	}
+  if (!BE (state->word_trtable, 0))
+    {
+      /* We don't care about whether the following character is a word
+	 character, or we are in a single-byte character set so we can
+	 discern by looking at the character code: allocate a
+	 256-entry transition table.  */
+      trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
+      if (BE (trtable == NULL, 0))
+	goto out_free;
+
+      /* For all characters ch...:  */
+      for (i = 0; i < BITSET_UINTS; ++i)
+	for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1;
+	     elem;
+	     mask <<= 1, elem >>= 1, ++ch)
+	  if (BE (elem & 1, 0))
+	    {
+	      /* There must be exactly one destination which accepts
+		 character ch.  See group_nodes_into_DFAstates.  */
+	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
+		;
+	      
+	      /* j-th destination accepts the word character ch.  */
+	      if (IS_WORD_CHAR (ch))
+		trtable[ch] = dest_states_word[j];
+	      else
+		trtable[ch] = dest_states[j];
+	    }
+    }
+  else
+    {
+      /* We care about whether the following character is a word
+	 character, and we are in a multi-byte character set: discern
+	 by looking at the character code: build two 256-entry
+	 transition tables, one starting at trtable[0] and one
+	 starting at trtable[SBC_MAX].  */
+      trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *),
+					   2 * SBC_MAX);
+      if (BE (trtable == NULL, 0))
+	goto out_free;
+      
+      /* For all characters ch...:  */
+      for (i = 0; i < BITSET_UINTS; ++i)
+	for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1;
+	     elem;
+	     mask <<= 1, elem >>= 1, ++ch)
+	  if (BE (elem & 1, 0))
+	    {
+	      /* There must be exactly one destination which accepts
+		 character ch.  See group_nodes_into_DFAstates.  */
+	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
+		;
+	      
+	      /* j-th destination accepts the word character ch.  */
+	      trtable[ch] = dest_states[j];
+	      trtable[ch + SBC_MAX] = dest_states_word[j];
+	    }
+    }
+  
   /* new line */
   if (bitset_contain (acceptable, NEWLINE_CHAR))
     {
       /* The current state accepts newline character.  */
-      for (k = 0; k < ndests; ++k)
-	if (bitset_contain (dests_ch[k], NEWLINE_CHAR))
+      for (j = 0; j < ndests; ++j)
+	if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
 	  {
 	    /* k-th destination accepts newline character.  */
-	    trtable[NEWLINE_CHAR] = dest_states_nl[k];
+	    trtable[NEWLINE_CHAR] = dest_states_nl[j];
 	    if (state->word_trtable)
-	      trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[k];
+	      trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
 	    /* There must be only one destination which accepts
 	       newline.  See group_nodes_into_DFAstates.  */
 	    break;