about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog12
-rw-r--r--posix/regcomp.c9
-rw-r--r--posix/regex.h5
-rw-r--r--posix/regex_internal.h6
-rw-r--r--posix/regexec.c1
5 files changed, 28 insertions, 5 deletions
diff --git a/ChangeLog b/ChangeLog
index 32c26375cd..0a6301cfba 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2003-09-20  Paolo Bonzini  <bonzini@gnu.org>
+
+	* posix/regcomp.c (peek_token): Don't look back for ( or |
+	to check whether to treat a caret as special.  It fails
+	for the (extended) regex \(^.
+	(parse, parse_reg_exp): Pass RE_CARET_ANCHORS_HERE to fetch_token.
+	* posix/regex.h: Define RE_CARET_ANCHORS_HERE.
+
+	* posix/regexec.c: Check out of bounds value before shifting.
+
+	* posix/regex_internal.h: Define __attribute for non-gcc.
+
 2003-09-22  Jakub Jelinek  <jakub@redhat.com>
 
 	* include/atomic.h (atomic_compare_and_exchange_val_rel,
diff --git a/posix/regcomp.c b/posix/regcomp.c
index d9212de3b9..4682ca6c1b 100644
--- a/posix/regcomp.c
+++ b/posix/regcomp.c
@@ -1650,12 +1650,11 @@ peek_token (token, input, syntax)
       token->type = OP_PERIOD;
       break;
     case '^':
-      if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) &&
+      if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) &&
 	  re_string_cur_idx (input) != 0)
 	{
 	  char prev = re_string_peek_byte (input, -1);
-	  if (prev != '|' && prev != '(' &&
-	      (!(syntax & RE_NEWLINE_ALT) || prev != '\n'))
+	  if (!(syntax & RE_NEWLINE_ALT) || prev != '\n')
 	    break;
 	}
       token->type = ANCHOR;
@@ -1790,7 +1789,7 @@ parse (regexp, preg, syntax, err)
   bin_tree_t *tree, *eor, *root;
   re_token_t current_token;
   int new_idx;
-  current_token = fetch_token (regexp, syntax);
+  current_token = fetch_token (regexp, syntax | RE_CARET_ANCHORS_HERE);
   tree = parse_reg_exp (regexp, preg, &current_token, syntax, 0, err);
   if (BE (*err != REG_NOERROR && tree == NULL, 0))
     return NULL;
@@ -1837,7 +1836,7 @@ parse_reg_exp (regexp, preg, token, syntax, nest, err)
     {
       re_token_t alt_token = *token;
       new_idx = re_dfa_add_node (dfa, alt_token, 0);
-      *token = fetch_token (regexp, syntax);
+      *token = fetch_token (regexp, syntax | RE_CARET_ANCHORS_HERE);
       if (token->type != OP_ALT && token->type != END_OF_RE
 	  && (nest == 0 || token->type != OP_CLOSE_SUBEXP))
 	{
diff --git a/posix/regex.h b/posix/regex.h
index e251962cef..1ab3e243a7 100644
--- a/posix/regex.h
+++ b/posix/regex.h
@@ -170,6 +170,11 @@ typedef unsigned long int reg_syntax_t;
    If not set, then case is significant.  */
 #define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1)
 
+/* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only
+   for ^, because it is difficult to scan the regex backwards to find
+   whether ^ should be special.  */
+#define RE_CARET_ANCHORS_HERE (RE_ICASE << 1)
+
 /* This global variable defines the particular regexp syntax to use (for
    some interfaces).  When a regexp is compiled, the syntax used is
    stored in the pattern buffer, so changing this does not affect
diff --git a/posix/regex_internal.h b/posix/regex_internal.h
index bf84ad6270..9dd3fe55b3 100644
--- a/posix/regex_internal.h
+++ b/posix/regex_internal.h
@@ -106,6 +106,12 @@
 # define attribute_hidden
 #endif /* not _LIBC */
 
+#ifdef __GNUC__
+# define __attribute __attribute__
+#else
+# define __attribute
+#endif
+
 extern const char __re_error_msgid[] attribute_hidden;
 extern const size_t __re_error_msgid_idx[] attribute_hidden;
 
diff --git a/posix/regexec.c b/posix/regexec.c
index 277c935231..39a27d2fed 100644
--- a/posix/regexec.c
+++ b/posix/regexec.c
@@ -2193,6 +2193,7 @@ check_subexp_matching_top (dfa, mctx, cur_nodes, str_idx)
     {
       int node = cur_nodes->elems[node_idx];
       if (dfa->nodes[node].type == OP_OPEN_SUBEXP
+	  && dfa->nodes[node].opr.idx < (8 * sizeof (dfa->used_bkref_map))
 	  && dfa->used_bkref_map & (1 << dfa->nodes[node].opr.idx))
 	{
 	  err = match_ctx_add_subtop (mctx, node, str_idx);