about summary refs log tree commit diff
path: root/posix/regex_internal.h
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2002-04-24 21:54:53 +0000
committerUlrich Drepper <drepper@redhat.com>2002-04-24 21:54:53 +0000
commit612546c60dd28d7af44fbb2bc98c69c33b4a0c49 (patch)
tree11dc6bf94bd7beb3271366b33a86be162b3bfa6e /posix/regex_internal.h
parentbe479a6dfe81c5b426e2cbabd62be2c042ab2d45 (diff)
downloadglibc-612546c60dd28d7af44fbb2bc98c69c33b4a0c49.tar.gz
glibc-612546c60dd28d7af44fbb2bc98c69c33b4a0c49.tar.xz
glibc-612546c60dd28d7af44fbb2bc98c69c33b4a0c49.zip
Update.
2002-04-22  Isamu Hasegawa  <isamu@yamato.ibm.com>

	* posix/regcomp.c (re_compile_internal): Adapt it to new interface
	of buffer building functions.
	* posix/regex_internal.c (re_string_allocate): New function.
	(re_string_realloc_buffers): New function.
	(re_string_skip_chars): New function.
	(re_string_reconstruct): New function.
	(re_string_construct): Adapt it to new interface of buffer building
	functions.
	(re_string_construct_common): Likewise.
	(build_wcs_buffer): Likewise.
	(build_wcs_upper_buffer): Likewise.
	(build_upper_buffer): Likewise.
	(re_string_translate_buffer): Likewise.
	(re_string_context_at): Adapt it to variable length buffers.
	* posix/regex_internal.h (re_string_t): Add new fields to handle
	variable length buffers.
	(re_match_context_t): Likewise.
	* posix/regexec.c (re_search_internal): Adapt it to new interface
	of re_string_t and re_match_context_t.
	(acquire_init_state_context): Likewise.
	(check_matching): Likewise.
	(check_halt_state_context): Likewise.
	(proceed_next_node): Likewise.
	(set_regs): Likewise.
	(sift_states_backward): Likewise.
	(clean_state_log_if_need): Likewise.
	(sift_states_iter_mb): Likewise.
	(sift_states_iter_bkref): Likewise.
	(add_epsilon_backreference): Likewise.
	(transit_state): Likewise.
	(transit_state_sb): Likewise.
	(transit_state_mb): Likewise.
	(transit_state_bkref): Likewise.
	(transit_state_bkref_loop): Likewise.
	(check_node_accept): Likewise.
	(match_ctx_init): Likewise.
	(extend_buffers): New function.

2002-04-21  Bruno Haible  <bruno@clisp.org>

	* iconvdata/tst-table.sh: For the second check, use the truncated
	GB18030 charmap table, like for the first check.
Diffstat (limited to 'posix/regex_internal.h')
-rw-r--r--posix/regex_internal.h112
1 files changed, 75 insertions, 37 deletions
diff --git a/posix/regex_internal.h b/posix/regex_internal.h
index bb28102cc9..f676ae2746 100644
--- a/posix/regex_internal.h
+++ b/posix/regex_internal.h
@@ -201,33 +201,67 @@ typedef struct
 
 struct re_string_t
 {
+  /* Indicate the raw buffer which is the original string passed as an
+     argument of regexec(), re_search(), etc..  */
+  const unsigned char *raw_mbs;
+  /* Index in RAW_MBS.  Each character mbs[i] corresponds to
+     raw_mbs[raw_mbs_idx + i].  */
+  int raw_mbs_idx;
   /* Store the multibyte string.  In case of "case insensitive mode" like
-     REG_ICASE, upper cases of the string are stored.  */
-  const unsigned char *mbs;
+     REG_ICASE, upper cases of the string are stored, otherwise MBS points
+     the same address that RAW_MBS points.  */
+  unsigned char *mbs;
   /* Store the case sensitive multibyte string.  In case of
      "case insensitive mode", the original string are stored,
      otherwise MBS_CASE points the same address that MBS points.  */
-  const unsigned char *mbs_case;
-  int cur_idx;
-  int len;
+  unsigned char *mbs_case;
 #ifdef RE_ENABLE_I18N
   /* Store the wide character string which is corresponding to MBS.  */
   wchar_t *wcs;
+  mbstate_t cur_state;
 #endif
-  /* 1 if mbs is allocated by regex library.  */
-  unsigned int mbs_alloc : 1;
-  /* 1 if mbs_case is allocated by regex library.  */
-  unsigned int mbs_case_alloc : 1;
+  /* The length of the valid characters in the buffers.  */
+  int valid_len;
+  /* The length of the buffers MBS, MBS_CASE, and WCS.  */
+  int bufs_len;
+  /* The index in MBS, which is updated by re_string_fetch_byte.  */
+  int cur_idx;
+  /* This is length_of_RAW_MBS - RAW_MBS_IDX.  */
+  int len;
+  /* The context of mbs[0].  We store the context independently, since
+     the context of mbs[0] may be different from raw_mbs[0], which is
+     the beginning of the input string.  */
+  unsigned int tip_context;
+  /* The translation passed as a part of an argument of re_compile_pattern.  */
+  RE_TRANSLATE_TYPE trans;
+  /* 1 if REG_ICASE.  */
+  unsigned int icase : 1;
 };
 typedef struct re_string_t re_string_t;
+/* In case of REG_ICASE, we allocate the buffer dynamically for mbs.  */
+#define MBS_ALLOCATED(pstr) (pstr->icase)
+/* In case that we need translation, we allocate the buffer dynamically
+   for mbs_case.  Note that mbs == mbs_case if not REG_ICASE.  */
+#define MBS_CASE_ALLOCATED(pstr) (pstr->trans != NULL)
+
 
+static reg_errcode_t re_string_allocate (re_string_t *pstr,
+                                         const unsigned char *str, int len,
+                                         int init_len,
+                                         RE_TRANSLATE_TYPE trans, int icase);
 static reg_errcode_t re_string_construct (re_string_t *pstr,
 					  const unsigned char *str, int len,
-					  RE_TRANSLATE_TYPE trans);
-static reg_errcode_t re_string_construct_toupper (re_string_t *pstr,
-						  const unsigned char *str,
-						  int len,
-						  RE_TRANSLATE_TYPE trans);
+                                          RE_TRANSLATE_TYPE trans, int icase);
+static reg_errcode_t re_string_reconstruct (re_string_t *pstr, int idx,
+                                            int eflags, int newline);
+static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
+                                                int new_buf_len);
+#ifdef RE_ENABLE_I18N
+static void build_wcs_buffer (re_string_t *pstr);
+static void build_wcs_upper_buffer (re_string_t *pstr);
+#endif /* RE_ENABLE_I18N */
+static void build_upper_buffer (re_string_t *pstr);
+static void re_string_translate_buffer (re_string_t *pstr);
 static void re_string_destruct (re_string_t *pstr);
 #ifdef RE_ENABLE_I18N
 static int re_string_elem_size_at (const re_string_t *pstr, int idx);
@@ -253,8 +287,7 @@ static unsigned int re_string_context_at (const re_string_t *input, int idx,
 #define re_string_cur_idx(pstr) ((pstr)->cur_idx)
 #define re_string_get_buffer(pstr) ((pstr)->mbs)
 #define re_string_length(pstr) ((pstr)->len)
-#define re_string_byte_at(pstr,idx) \
-  ((pstr)->mbs[idx])
+#define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx])
 #define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx))
 #define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx))
 
@@ -279,27 +312,6 @@ struct bin_tree_t
 };
 typedef struct bin_tree_t bin_tree_t;
 
-struct re_backref_cache_entry
-{
-  int node;
-  int from;
-  int to;
-  int flag;
-};
-
-typedef struct
-{
-  int eflags;
-  int match_first;
-  int match_last;
-  int state_log_top;
-  /* Back reference cache.  */
-  int nbkref_ents;
-  int abkref_ents;
-  struct re_backref_cache_entry *bkref_ents;
-  int max_bkref_len;
-} re_match_context_t;
-
 
 #define CONTEXT_WORD 1
 #define CONTEXT_NEWLINE (CONTEXT_WORD << 1)
@@ -363,6 +375,32 @@ struct re_state_table_entry
   re_dfastate_t **array;
 };
 
+struct re_backref_cache_entry
+{
+  int node;
+  int from;
+  int to;
+  int flag;
+};
+
+typedef struct
+{
+  /* EFLAGS of the argument of regexec.  */
+  int eflags;
+  /* Where the matching ends.  */
+  int match_last;
+  /* The string object corresponding to the input string.  */
+  re_string_t *input;
+  /* The state log used by the matcher.  */
+  re_dfastate_t **state_log;
+  int state_log_top;
+  /* Back reference cache.  */
+  int nbkref_ents;
+  int abkref_ents;
+  struct re_backref_cache_entry *bkref_ents;
+  int max_bkref_len;
+} re_match_context_t;
+
 struct re_dfa_t
 {
   re_bitset_ptr_t word_char;