about summary refs log tree commit diff
path: root/posix/regexec.c
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2003-12-27 23:40:06 +0000
committerUlrich Drepper <drepper@redhat.com>2003-12-27 23:40:06 +0000
commit6b6557e8b3b094132c619e3a2e00fe28422fd16f (patch)
tree0b8826d04903f41e1e32dd4ddb5c8b756df8d9f4 /posix/regexec.c
parentcb5b9388dad6d0524322d45eafaa7b5d7b00b554 (diff)
downloadglibc-6b6557e8b3b094132c619e3a2e00fe28422fd16f.tar.gz
glibc-6b6557e8b3b094132c619e3a2e00fe28422fd16f.tar.xz
glibc-6b6557e8b3b094132c619e3a2e00fe28422fd16f.zip
Update.
2003-12-23  Paolo Bonzini  <bonzini@gnu.org>

	* posix/regex_internal.c (re_dfa_add_node): Initialize opt_subexp.
	* posix/regex_internal.h (re_token_type_t): Put OP_DUP_PLUS
	among the tokens, rather than among the epsilon-transiting nodes.
	(re_token_t): Add the opt_subexp flag.
	* posix/regcomp.c (optimize_utf8, calc_first,
	calc_next, calc_epsdest): Don't consider OP_DUP_PLUS.
	(mark_opt_subexp, mark_opt_subexp_iter): New functions.
	(parse_dup_op): Mostly rewritten, lowering OP_DUP_PLUS to
	OP_DUP_ASTERISK and marking optional subexpressions
	as such using mark_opt_subexp.
	* posix/regexec.c (set_regs): Initialize PREV_INDEX_MATCH
	and pass it to update_regs.
	(update_regs): Use the PREV_INDEX_MATCH parameter, together
	with the opt_subexp flag, in order to discard a final empty
	match of a repeated subexpression.
	* posix/BOOST.tests: Adjust test vectors.
	* posix/PCRE.tests: Likewise.
	* posix/rxspencer/tests: Likewise.

2003-12-17  Paolo Bonzini  <bonzini@gnu.org>
2003-12-16  Paolo Bonzini  <bonzini@gnu.org>
2003-12-17  Paolo Bonzini  <bonzini@gnu.org>
2003-12-16  Jakub Jelinek  <jakub@redhat.com>
2003-04-06  Kaz Kojima  <kkojima@rr.iij4u.or.jp>
2003-02-20  Paolo Bonzini  <bonzini@gnu.org>
2003-01-12  Franz Sirl  <Franz.Sirl-kernel@lauterbach.com>
2003-01-09  Richard Henderson  <rth@redhat.com>
2003-01-09  Richard Henderson  <rth@redhat.com>
2003-01-03  Paul Eggert  <eggert@twinsun.com>
Diffstat (limited to 'posix/regexec.c')
-rw-r--r--posix/regexec.c79
1 files changed, 57 insertions, 22 deletions
diff --git a/posix/regexec.c b/posix/regexec.c
index b0f9a53cfb..973d1c788f 100644
--- a/posix/regexec.c
+++ b/posix/regexec.c
@@ -62,7 +62,8 @@ static int check_halt_node_context (const re_dfa_t *dfa, int node,
 static int check_halt_state_context (const regex_t *preg,
 				     const re_dfastate_t *state,
 				     const re_match_context_t *mctx, int idx) internal_function;
-static void update_regs (re_dfa_t *dfa, regmatch_t *pmatch, int cur_node,
+static void update_regs (re_dfa_t *dfa, regmatch_t *pmatch,
+			 regmatch_t *prev_idx_match, int cur_node,
 			 int cur_idx, int nmatch) internal_function;
 static int proceed_next_node (const regex_t *preg, int nregs, regmatch_t *regs,
 			      const re_match_context_t *mctx,
@@ -1277,11 +1278,13 @@ set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
      regmatch_t *pmatch;
      int fl_backtrack;
 {
-  re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
+  re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
   int idx, cur_node, real_nmatch;
   re_node_set eps_via_nodes;
   struct re_fail_stack_t *fs;
-  struct re_fail_stack_t fs_body = {0, 2, NULL};
+  struct re_fail_stack_t fs_body = { 0, 2, NULL };
+  regmatch_t *prev_idx_match;
+
 #ifdef DEBUG
   assert (nmatch > 1);
   assert (mctx->state_log != NULL);
@@ -1290,15 +1293,23 @@ set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
     {
       fs = &fs_body;
       fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
+      if (fs->stack == NULL)
+	return REG_ESPACE;
     }
   else
     fs = NULL;
+
   cur_node = dfa->init_node;
   real_nmatch = (nmatch <= preg->re_nsub) ? nmatch : preg->re_nsub + 1;
   re_node_set_init_empty (&eps_via_nodes);
+
+  prev_idx_match = (regmatch_t *) alloca (sizeof (regmatch_t) * real_nmatch);
+  memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * real_nmatch);
+
   for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
     {
-      update_regs (dfa, pmatch, cur_node, idx, real_nmatch);
+      update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, real_nmatch);
+
       if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
 	{
 	  int reg_idx;
@@ -1362,31 +1373,55 @@ free_fail_stack_return (fs)
 }
 
 static void
-update_regs (dfa, pmatch, cur_node, cur_idx, nmatch)
+update_regs (dfa, pmatch, prev_idx_match, cur_node, cur_idx, nmatch)
      re_dfa_t *dfa;
-     regmatch_t *pmatch;
+     regmatch_t *pmatch, *prev_idx_match;
      int cur_node, cur_idx, nmatch;
 {
   int type = dfa->nodes[cur_node].type;
-  int reg_num;
-  if (type != OP_OPEN_SUBEXP && type != OP_CLOSE_SUBEXP)
-    return;
-  reg_num = dfa->nodes[cur_node].opr.idx + 1;
-  if (reg_num >= nmatch)
-    return;
   if (type == OP_OPEN_SUBEXP)
     {
+      int reg_num = dfa->nodes[cur_node].opr.idx + 1;
+
       /* We are at the first node of this sub expression.  */
-      pmatch[reg_num].rm_so = cur_idx;
-      pmatch[reg_num].rm_eo = -1;
+      if (reg_num < nmatch)
+	{
+	  pmatch[reg_num].rm_so = cur_idx;
+	  pmatch[reg_num].rm_eo = -1;
+	}
     }
   else if (type == OP_CLOSE_SUBEXP)
-    /* We are at the first node of this sub expression.  */
-    pmatch[reg_num].rm_eo = cur_idx;
+    {
+      int reg_num = dfa->nodes[cur_node].opr.idx + 1;
+      if (reg_num < nmatch)
+	{
+	  /* We are at the last node of this sub expression.  */
+	  if (pmatch[reg_num].rm_so < cur_idx)
+	    {
+	      pmatch[reg_num].rm_eo = cur_idx;
+	      /* This is a non-empty match or we are not inside an optional
+		 subexpression.  Accept this right away.  */
+	      memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
+	    }
+	  else
+	    {
+	      if (dfa->nodes[cur_node].opt_subexp
+		  && prev_idx_match[reg_num].rm_so != -1)
+		/* We transited through an empty match for an optional
+		   subexpression, like (a?)*, and this is not the subexp's
+		   first match.  Copy back the old content of the registers
+		   so that matches of an inner subexpression are undone as
+		   well, like in ((a?))*.  */
+		memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
+	      else
+		/* We completed a subexpression, but it may be part of
+		   an optional one, so do not update PREV_IDX_MATCH.  */
+		pmatch[reg_num].rm_eo = cur_idx;
+	    }
+	}
+    }
 }
 
-#define NUMBER_OF_STATE 1
-
 /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
    and sift the nodes in each states according to the following rules.
    Updated state_log will be wrote to STATE_LOG.
@@ -3279,7 +3314,7 @@ out_free:
 		 character ch.  See group_nodes_into_DFAstates.  */
 	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
 		;
-	      
+
 	      /* j-th destination accepts the word character ch.  */
 	      if (IS_WORD_CHAR (ch))
 		trtable[ch] = dest_states_word[j];
@@ -3298,7 +3333,7 @@ out_free:
 					   2 * SBC_MAX);
       if (BE (trtable == NULL, 0))
 	goto out_free;
-      
+
       /* For all characters ch...:  */
       for (i = 0; i < BITSET_UINTS; ++i)
 	for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1;
@@ -3310,13 +3345,13 @@ out_free:
 		 character ch.  See group_nodes_into_DFAstates.  */
 	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
 		;
-	      
+
 	      /* j-th destination accepts the word character ch.  */
 	      trtable[ch] = dest_states[j];
 	      trtable[ch + SBC_MAX] = dest_states_word[j];
 	    }
     }
-  
+
   /* new line */
   if (bitset_contain (acceptable, NEWLINE_CHAR))
     {