about summary refs log tree commit diff
path: root/posix/regexec.c
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2002-04-26 20:52:02 +0000
committerUlrich Drepper <drepper@redhat.com>2002-04-26 20:52:02 +0000
commit434d3784f194e382d86edd72c9c6a1d0051a7b96 (patch)
tree3678759b0c16bc0f694e6892eb7f111c0605dc8d /posix/regexec.c
parent58fe8d109631d84a4392c7a8f77db3d163e37345 (diff)
downloadglibc-434d3784f194e382d86edd72c9c6a1d0051a7b96.tar.gz
glibc-434d3784f194e382d86edd72c9c6a1d0051a7b96.tar.xz
glibc-434d3784f194e382d86edd72c9c6a1d0051a7b96.zip
Update.
2002-04-26  Isamu Hasegawa  <isamu@yamato.ibm.com>

	* posix/regcomp.c (re_compile_fastmap_iter): Fix fastmap in case of
	not _LIBC and RE_ENABLE_I18N.
	(build_range_exp): Implement for not _LIBC.
	(build_collating_symbol): Likewise.
	(parse_bracket_exp): Unify redundant error handlings.
	Don't erase mbcset for non matching list in multibyte envs.
	(build_word_op): Add '_' to matching list for \w operator.
	* posix/regex_internal.c (re_string_construct): Invoke
	build_upper_buffer in case of not RE_ENABLE_I18N.
	(re_string_reconstruct): Don't touch cur_state in case of not
	RE_ENABLE_I18N.
	* posix/regex_internal.h (attribute_hidden): New macro in case of
	not _LIBC.
	(re_charset_t): Define range_starts/ends in case of not _LIBC.
	* posix/regexec.c (sift_states_iter_mb): Hide in case of not
	RE_ENABLE_I18N.
	(transit_state_mb): Likewise.
	(check_node_accept_bytes): Implement the code evaluating range
	expression in case of not _LIBC.
	(find_collation_sequence_value): Hide in case of not _LIBC.

2002-04-26  Jakub Jelinek  <jakub@redhat.com>

	* sysdeps/unix/sysv/linux/sparc/sparc32/semctl.c: Copied from
	i386/semctl.c.
	(__old_semctl, __new_semctl): Only use va_arg if the argument will
	be used.
Diffstat (limited to 'posix/regexec.c')
-rw-r--r--posix/regexec.c123
1 files changed, 84 insertions, 39 deletions
diff --git a/posix/regexec.c b/posix/regexec.c
index e888970936..2c7a2774eb 100644
--- a/posix/regexec.c
+++ b/posix/regexec.c
@@ -64,9 +64,11 @@ static int proceed_next_node (const regex_t *preg,
 static reg_errcode_t set_regs (const regex_t *preg,
                                const re_match_context_t *mctx,
                                size_t nmatch, regmatch_t *pmatch, int last);
+#ifdef RE_ENABLE_I18N
 static int sift_states_iter_mb (const regex_t *preg,
                                 const re_match_context_t *mctx,
                                 int node_idx, int str_idx, int max_str_idx);
+#endif /* RE_ENABLE_I18N */
 static int sift_states_iter_bkref (const re_dfa_t *dfa,
                                    re_dfastate_t **state_log,
                                    struct re_backref_cache_entry *mctx_entry,
@@ -88,9 +90,11 @@ static re_dfastate_t *transit_state_sb (reg_errcode_t *err, const regex_t *preg,
                                         re_dfastate_t *pstate,
                                         int fl_search,
                                         re_match_context_t *mctx);
+#ifdef RE_ENABLE_I18N
 static reg_errcode_t transit_state_mb (const regex_t *preg,
                                        re_dfastate_t *pstate,
                                        re_match_context_t *mctx);
+#endif /* RE_ENABLE_I18N */
 static reg_errcode_t transit_state_bkref (const regex_t *preg,
                                           re_dfastate_t *pstate,
                                           re_match_context_t *mctx);
@@ -101,10 +105,14 @@ static reg_errcode_t transit_state_bkref_loop (const regex_t *preg,
 static re_dfastate_t **build_trtable (const regex_t *dfa,
                                       const re_dfastate_t *state,
                                       int fl_search);
+#ifdef RE_ENABLE_I18N
 static int check_node_accept_bytes (const regex_t *preg, int node_idx,
                                     const re_string_t *input, int idx);
+# ifdef _LIBC
 static unsigned int find_collation_sequence_value (const unsigned char *mbs,
                                                    size_t name_len);
+# endif /* _LIBC */
+#endif /* RE_ENABLE_I18N */
 static int group_nodes_into_DFAstates (const regex_t *dfa,
                                        const re_dfastate_t *state,
                                        re_node_set *states_node,
@@ -912,9 +920,12 @@ proceed_next_node (preg, mctx, pidx, node, eps_via_nodes)
           type = dfa->nodes[entity].type;
         }
 
+#ifdef RE_ENABLE_I18N
       if (ACCEPT_MB_NODE (type))
         naccepted = check_node_accept_bytes (preg, entity, mctx->input, *pidx);
-      else if (type == OP_BACK_REF)
+      else
+#endif /* RE_ENABLE_I18N */
+      if (type == OP_BACK_REF)
         {
           for (i = 0; i < mctx->nbkref_ents; ++i)
             {
@@ -1121,13 +1132,16 @@ sift_states_backward (preg, mctx, last_node)
               type = dfa->nodes[entity].type;
             }
 
+#ifdef RE_ENABLE_I18N
           /* If the node may accept `multi byte'.  */
           if (ACCEPT_MB_NODE (type))
             naccepted = sift_states_iter_mb (preg, mctx, entity, str_idx,
                                              mctx->match_last);
 
           /* If the node is a back reference.  */
-          else if (type == OP_BACK_REF)
+          else
+#endif /* RE_ENABLE_I18N */
+          if (type == OP_BACK_REF)
             for (j = 0; j < mctx->nbkref_ents; ++j)
               {
                 naccepted = sift_states_iter_bkref (dfa, mctx->state_log,
@@ -1201,6 +1215,7 @@ clean_state_log_if_need (mctx, next_state_log_idx)
   return REG_NOERROR;
 }
 
+#ifdef RE_ENABLE_I18N
 static int
 sift_states_iter_mb (preg, mctx, node_idx, str_idx, max_str_idx)
     const regex_t *preg;
@@ -1222,6 +1237,7 @@ sift_states_iter_mb (preg, mctx, node_idx, str_idx, max_str_idx)
      `naccepted' bytes input.  */
   return naccepted;
 }
+#endif /* RE_ENABLE_I18N */
 
 static int
 sift_states_iter_bkref (dfa, state_log, mctx_entry, node_idx, idx, match_last)
@@ -1317,6 +1333,7 @@ transit_state (err, preg, mctx, state, fl_search)
     }
   else
     {
+#ifdef RE_ENABLE_I18N
       /* If the current state can accept multibyte.  */
       if (state->accept_mb)
         {
@@ -1324,6 +1341,7 @@ transit_state (err, preg, mctx, state, fl_search)
           if (BE (*err != REG_NOERROR, 0))
             return NULL;
         }
+#endif /* RE_ENABLE_I18N */
 
       /* Then decide the next state with the single byte.  */
       if (1)
@@ -1474,6 +1492,7 @@ transit_state_sb (err, preg, state, fl_search, mctx)
   return next_state;
 }
 
+#ifdef RE_ENABLE_I18N
 static reg_errcode_t
 transit_state_mb (preg, pstate, mctx)
     const regex_t *preg;
@@ -1543,6 +1562,7 @@ transit_state_mb (preg, pstate, mctx)
     }
   return REG_NOERROR;
 }
+#endif /* RE_ENABLE_I18N */
 
 static reg_errcode_t
 transit_state_bkref (preg, pstate, mctx)
@@ -1991,7 +2011,14 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
   return ndests;
 }
 
-/* Check how many bytes the node `dfa->nodes[node_idx]' accepts.  */
+#ifdef RE_ENABLE_I18N
+/* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
+   Return the number of the bytes the node accepts.
+   STR_IDX is the current index of the input string.
+
+   This function handles the nodes which can accept one character, or
+   one collating element like '.', '[a-z]', opposite to the other nodes
+   can only accept one byte.  */
 
 static int
 check_node_accept_bytes (preg, node_idx, input, str_idx)
@@ -2003,14 +2030,16 @@ check_node_accept_bytes (preg, node_idx, input, str_idx)
   const re_token_t *node = dfa->nodes + node_idx;
   int elem_len = re_string_elem_size_at (input, str_idx);
   int char_len = re_string_char_size_at (input, str_idx);
-  int i, j;
-#ifdef _LIBC
+  int i;
+# ifdef _LIBC
+  int j;
   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
-#endif /* _LIBC */
+# endif /* _LIBC */
   if (elem_len <= 1 && char_len <= 1)
     return 0;
   if (node->type == OP_PERIOD)
     {
+      /* '.' accepts any one character except the following two cases.  */
       if ((!(preg->syntax & RE_DOT_NEWLINE) &&
            re_string_byte_at (input, str_idx) == '\n') ||
           ((preg->syntax & RE_DOT_NOT_NULL) &&
@@ -2021,18 +2050,40 @@ check_node_accept_bytes (preg, node_idx, input, str_idx)
   else if (node->type == COMPLEX_BRACKET)
     {
       const re_charset_t *cset = node->opr.mbcset;
+# ifdef _LIBC
       const unsigned char *pin = re_string_get_buffer (input) + str_idx;
-#ifdef _LIBC
+# endif /* _LIBC */
+      int match_len = 0;
+      wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
+                    ? re_string_wchar_at (input, str_idx) : 0);
+
+      /* match with multibyte character?  */
+      for (i = 0; i < cset->nmbchars; ++i)
+        if (wc == cset->mbchars[i])
+          {
+            match_len = char_len;
+            goto check_node_accept_bytes_match;
+          }
+      /* match with character_class?  */
+      for (i = 0; i < cset->nchar_classes; ++i)
+        {
+          wctype_t wt = cset->char_classes[i];
+          if (__iswctype (wc, wt))
+            {
+              match_len = char_len;
+              goto check_node_accept_bytes_match;
+            }
+        }
+
+# ifdef _LIBC
       if (nrules != 0)
         {
-          int match_len = 0;
           unsigned int in_collseq = 0;
           const int32_t *table, *indirect;
           const unsigned char *weights, *extra, *collseqwc;
           int32_t idx;
-          wchar_t wc = 0;
           /* This #include defines a local function!  */
-# include <locale/weight.h>
+#  include <locale/weight.h>
 
           /* match with collating_symbol?  */
           if (cset->ncoll_syms)
@@ -2057,9 +2108,6 @@ check_node_accept_bytes (preg, node_idx, input, str_idx)
                 }
             }
 
-          if (cset->nranges || cset->nchar_classes || cset->nmbchars)
-            wc = re_string_wchar_at (input, str_idx);
-
           if (cset->nranges)
             {
               if (elem_len <= char_len)
@@ -2112,43 +2160,39 @@ check_node_accept_bytes (preg, node_idx, input, str_idx)
                       }
                   }
             }
-
-          /* match with multibyte character?  */
-          for (i = 0; i < cset->nmbchars; ++i)
-            if (wc == cset->mbchars[i])
-              {
-                match_len = char_len;
-                goto check_node_accept_bytes_match;
-              }
-
-          /* match with character_class?  */
-          for (i = 0; i < cset->nchar_classes; ++i)
+        }
+      else
+# endif /* _LIBC */
+        {
+          /* match with range expression?  */
+          wchar_t cmp_buf[6] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
+          for (i = 0; i < cset->nranges; ++i)
             {
-              wctype_t wt = cset->char_classes[i];
-              if (__iswctype (wc, wt))
+              cmp_buf[0] = cset->range_starts[i];
+              cmp_buf[4] = cset->range_ends[i];
+              if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
+                  && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
                 {
                   match_len = char_len;
                   goto check_node_accept_bytes_match;
                 }
             }
-
-        check_node_accept_bytes_match:
-          if (!cset->non_match)
-            return match_len;
+        }
+    check_node_accept_bytes_match:
+      if (!cset->non_match)
+        return match_len;
+      else
+        {
+          if (match_len > 0)
+            return 0;
           else
-            {
-              if (match_len > 0)
-                return 0;
-              else
-                return re_string_elem_size_at (input, str_idx);
-            }
+            return (elem_len > char_len) ? elem_len : char_len;
         }
-#endif
     }
   return 0;
 }
 
-#ifdef _LIBC
+# ifdef _LIBC
 static unsigned int
 find_collation_sequence_value (mbs, mbs_len)
     const unsigned char *mbs;
@@ -2204,7 +2248,8 @@ find_collation_sequence_value (mbs, mbs_len)
         }
     }
 }
-#endif
+# endif /* _LIBC */
+#endif /* RE_ENABLE_I18N */
 
 /* Check whether the node accepts the byte which is IDX-th
    byte of the INPUT.  */