about summary refs log tree commit diff
path: root/posix/regexec.c
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2002-09-10 18:40:35 +0000
committerUlrich Drepper <drepper@redhat.com>2002-09-10 18:40:35 +0000
commitc202c2c50523dd6721e9e2a9c80c1dc018f373bc (patch)
tree70e18e68fb0e4cf5ac46628a839a7dd7a9d15bdd /posix/regexec.c
parent62439eac8bea0958c8ed57ffa69f74006c0a9055 (diff)
downloadglibc-c202c2c50523dd6721e9e2a9c80c1dc018f373bc.tar.gz
glibc-c202c2c50523dd6721e9e2a9c80c1dc018f373bc.tar.xz
glibc-c202c2c50523dd6721e9e2a9c80c1dc018f373bc.zip
Update.
2002-09-10  Isamu Hasegawa  <isamu@yamato.ibm.com>

	* posix/regexec.c (build_trtable): Fix the destination of
	newline to prevent wrong states from overwriting.
	Append break statements to optimization.

2002-09-10  Isamu Hasegawa  <isamu@yamato.ibm.com>

	* posix/regcomp.c: Wrap #include wchar.h and wctype.h in #if.
	(build_range_exp): Add castings to strlen invocations.
	(build_collating_symbol): Restore the type of characters from "char"
	to "unsigned char", and supplement castings.
	(build_collating_symbol): Likewise.
	(build_equiv_class): Likewise.
	(build_charclass): Likewise.
	(seek_collating_symbol_entry): Likewise.
	(parse_bracket_exp): Likewise.
	(build_word_op): Supplement a casting.
	* posix/regex_internal.c: Wrap #include wchar.h and wctype.h in #if.
	(re_string_allocate): Fix castings.
	(re_string_construct): Likewise.
	(re_string_construct_common): Likewise.
	(re_string_realloc_buffers): Likewise.
	(build_wcs_buffer): Likewise.
	(build_wcs_upper_buffer): Likewise.
	(re_string_skip_chars): Likewise.
	(re_string_reconstruct): Likewise.
	* posix/regex_internal.h: Restore the type of characters in
	re_string_t and bracket_elem_t from "char" to "unsigned char".
	(re_string_elem_size_at): Fix castings.
	* posix/regexec.c: Wrap #include wchar.h and wctype.h in #if.
	(transit_state_bkref_loop): Restore the type of characters from
	"char" to "unsigned char", and append a cast to "char*" pointer in
	array subscript.
	(check_node_accept_bytes): Likewise.
	(find_collation_sequence_value): Likewise.
Diffstat (limited to 'posix/regexec.c')
-rw-r--r--posix/regexec.c69
1 files changed, 51 insertions, 18 deletions
diff --git a/posix/regexec.c b/posix/regexec.c
index 4a9c64a191..142127883d 100644
--- a/posix/regexec.c
+++ b/posix/regexec.c
@@ -23,8 +23,13 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <wchar.h>
-#include <wctype.h>
+
+#if defined HAVE_WCHAR_H || defined _LIBC
+# include <wchar.h>
+#endif /* HAVE_WCHAR_H || _LIBC */
+#if defined HAVE_WCTYPE_H || defined _LIBC
+# include <wctype.h>
+#endif /* HAVE_WCTYPE_H || _LIBC */
 
 #ifdef _LIBC
 # ifndef _RE_DEFINE_LOCALE_FUNCTIONS
@@ -123,7 +128,7 @@ static re_dfastate_t **build_trtable (const regex_t *dfa,
 static int check_node_accept_bytes (const regex_t *preg, int node_idx,
                                     const re_string_t *input, int idx);
 # ifdef _LIBC
-static unsigned int find_collation_sequence_value (const char *mbs,
+static unsigned int find_collation_sequence_value (const unsigned char *mbs,
                                                    size_t name_len);
 # endif /* _LIBC */
 #endif /* RE_ENABLE_I18N */
@@ -1674,7 +1679,7 @@ transit_state_bkref_loop (preg, nodes, work_state_log, mctx)
           if (BE (err != REG_NOERROR, 0))
             return err;
         }
-      buf = re_string_get_buffer (mctx->input);
+      buf = (char *) re_string_get_buffer (mctx->input);
       if (strncmp (buf + cur_regs[subexp_idx].rm_so, buf + cur_str_idx,
                    subexp_len) != 0)
         continue;
@@ -1855,27 +1860,51 @@ build_trtable (preg, state, fl_search)
     }
 
   /* Update the transition table.  */
+  /* For all characters ch...:  */
   for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
     for (j = 0; j < UINT_BITS; ++j, ++ch)
       if ((acceptable[i] >> j) & 1)
         {
+          /* The current state accepts the character ch.  */
           if (IS_WORD_CHAR (ch))
             {
               for (k = 0; k < ndests; ++k)
                 if ((dests_ch[k][i] >> j) & 1)
-                  trtable[ch] = dest_states_word[k];
+                  {
+                    /* k-th destination accepts the word character ch.  */
+                    trtable[ch] = dest_states_word[k];
+                    /* There must be only one destination which accepts
+                       character ch.  See group_nodes_into_DFAstates.  */
+                    break;
+                  }
             }
           else /* not WORD_CHAR */
             {
               for (k = 0; k < ndests; ++k)
                 if ((dests_ch[k][i] >> j) & 1)
-                  trtable[ch] = dest_states[k];
+                  {
+                    /* k-th destination accepts the non-word character ch.  */
+                    trtable[ch] = dest_states[k];
+                    /* There must be only one destination which accepts
+                       character ch.  See group_nodes_into_DFAstates.  */
+                    break;
+                  }
             }
         }
   /* new line */
-  for (k = 0; k < ndests; ++k)
-    if (bitset_contain (acceptable, NEWLINE_CHAR))
-      trtable[NEWLINE_CHAR] = dest_states_nl[k];
+  if (bitset_contain (acceptable, NEWLINE_CHAR))
+    {
+      /* The current state accepts newline character.  */
+      for (k = 0; k < ndests; ++k)
+        if (bitset_contain (dests_ch[k], NEWLINE_CHAR))
+          {
+            /* k-th destination accepts newline character.  */
+            trtable[NEWLINE_CHAR] = dest_states_nl[k];
+            /* There must be only one destination which accepts
+               newline.  See group_nodes_into_DFAstates.  */
+            break;
+          }
+    }
 
   re_free (dest_states_nl);
   re_free (dest_states_word);
@@ -2069,7 +2098,7 @@ check_node_accept_bytes (preg, node_idx, input, str_idx)
     {
       const re_charset_t *cset = node->opr.mbcset;
 # ifdef _LIBC
-      const char *pin = re_string_get_buffer (input) + str_idx;
+      const unsigned char *pin = re_string_get_buffer (input) + str_idx;
 # endif /* _LIBC */
       int match_len = 0;
       wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
@@ -2098,17 +2127,19 @@ check_node_accept_bytes (preg, node_idx, input, str_idx)
         {
           unsigned int in_collseq = 0;
           const int32_t *table, *indirect;
-          const char *weights, *extra, *collseqwc;
+          const unsigned char *weights, *extra;
+          const char *collseqwc;
           int32_t idx;
           /* This #include defines a local function!  */
 #  include <locale/weight.h>
 
           /* match with collating_symbol?  */
           if (cset->ncoll_syms)
-            extra = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
+            extra = (const unsigned char *)
+              _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
           for (i = 0; i < cset->ncoll_syms; ++i)
             {
-              const char *coll_sym = extra + cset->coll_syms[i];
+              const unsigned char *coll_sym = extra + cset->coll_syms[i];
               /* Compare the length of input collating element and
                  the length of current collating element.  */
               if (*coll_sym != elem_len)
@@ -2147,11 +2178,13 @@ check_node_accept_bytes (preg, node_idx, input, str_idx)
           /* match with equivalence_class?  */
           if (cset->nequiv_classes)
             {
-              const unsigned char *cp = (const unsigned char *) pin;
+              const unsigned char *cp = pin;
               table = (const int32_t *)
                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
-              weights = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
-              extra = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
+              weights = (const unsigned char *)
+                _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
+              extra = (const unsigned char *)
+                _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
               indirect = (const int32_t *)
                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
               idx = findidx (&cp);
@@ -2215,7 +2248,7 @@ check_node_accept_bytes (preg, node_idx, input, str_idx)
 # ifdef _LIBC
 static unsigned int
 find_collation_sequence_value (mbs, mbs_len)
-    const char *mbs;
+    const unsigned char *mbs;
     size_t mbs_len;
 {
   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
@@ -2226,7 +2259,7 @@ find_collation_sequence_value (mbs, mbs_len)
           /* No valid character.  Match it as a single byte character.  */
           const unsigned char *collseq = (const unsigned char *)
             _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
-          return collseq[*(unsigned char *) mbs];
+          return collseq[mbs[0]];
         }
       return UINT_MAX;
     }