From 612546c60dd28d7af44fbb2bc98c69c33b4a0c49 Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Wed, 24 Apr 2002 21:54:53 +0000 Subject: Update. 2002-04-22 Isamu Hasegawa * posix/regcomp.c (re_compile_internal): Adapt it to new interface of buffer building functions. * posix/regex_internal.c (re_string_allocate): New function. (re_string_realloc_buffers): New function. (re_string_skip_chars): New function. (re_string_reconstruct): New function. (re_string_construct): Adapt it to new interface of buffer building functions. (re_string_construct_common): Likewise. (build_wcs_buffer): Likewise. (build_wcs_upper_buffer): Likewise. (build_upper_buffer): Likewise. (re_string_translate_buffer): Likewise. (re_string_context_at): Adapt it to variable length buffers. * posix/regex_internal.h (re_string_t): Add new fields to handle variable length buffers. (re_match_context_t): Likewise. * posix/regexec.c (re_search_internal): Adapt it to new interface of re_string_t and re_match_context_t. (acquire_init_state_context): Likewise. (check_matching): Likewise. (check_halt_state_context): Likewise. (proceed_next_node): Likewise. (set_regs): Likewise. (sift_states_backward): Likewise. (clean_state_log_if_need): Likewise. (sift_states_iter_mb): Likewise. (sift_states_iter_bkref): Likewise. (add_epsilon_backreference): Likewise. (transit_state): Likewise. (transit_state_sb): Likewise. (transit_state_mb): Likewise. (transit_state_bkref): Likewise. (transit_state_bkref_loop): Likewise. (check_node_accept): Likewise. (match_ctx_init): Likewise. (extend_buffers): New function. 2002-04-21 Bruno Haible * iconvdata/tst-table.sh: For the second check, use the truncated GB18030 charmap table, like for the first check. --- posix/regex_internal.h | 112 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 75 insertions(+), 37 deletions(-) (limited to 'posix/regex_internal.h') diff --git a/posix/regex_internal.h b/posix/regex_internal.h index bb28102cc9..f676ae2746 100644 --- a/posix/regex_internal.h +++ b/posix/regex_internal.h @@ -201,33 +201,67 @@ typedef struct struct re_string_t { + /* Indicate the raw buffer which is the original string passed as an + argument of regexec(), re_search(), etc.. */ + const unsigned char *raw_mbs; + /* Index in RAW_MBS. Each character mbs[i] corresponds to + raw_mbs[raw_mbs_idx + i]. */ + int raw_mbs_idx; /* Store the multibyte string. In case of "case insensitive mode" like - REG_ICASE, upper cases of the string are stored. */ - const unsigned char *mbs; + REG_ICASE, upper cases of the string are stored, otherwise MBS points + the same address that RAW_MBS points. */ + unsigned char *mbs; /* Store the case sensitive multibyte string. In case of "case insensitive mode", the original string are stored, otherwise MBS_CASE points the same address that MBS points. */ - const unsigned char *mbs_case; - int cur_idx; - int len; + unsigned char *mbs_case; #ifdef RE_ENABLE_I18N /* Store the wide character string which is corresponding to MBS. */ wchar_t *wcs; + mbstate_t cur_state; #endif - /* 1 if mbs is allocated by regex library. */ - unsigned int mbs_alloc : 1; - /* 1 if mbs_case is allocated by regex library. */ - unsigned int mbs_case_alloc : 1; + /* The length of the valid characters in the buffers. */ + int valid_len; + /* The length of the buffers MBS, MBS_CASE, and WCS. */ + int bufs_len; + /* The index in MBS, which is updated by re_string_fetch_byte. */ + int cur_idx; + /* This is length_of_RAW_MBS - RAW_MBS_IDX. */ + int len; + /* The context of mbs[0]. We store the context independently, since + the context of mbs[0] may be different from raw_mbs[0], which is + the beginning of the input string. */ + unsigned int tip_context; + /* The translation passed as a part of an argument of re_compile_pattern. */ + RE_TRANSLATE_TYPE trans; + /* 1 if REG_ICASE. */ + unsigned int icase : 1; }; typedef struct re_string_t re_string_t; +/* In case of REG_ICASE, we allocate the buffer dynamically for mbs. */ +#define MBS_ALLOCATED(pstr) (pstr->icase) +/* In case that we need translation, we allocate the buffer dynamically + for mbs_case. Note that mbs == mbs_case if not REG_ICASE. */ +#define MBS_CASE_ALLOCATED(pstr) (pstr->trans != NULL) + +static reg_errcode_t re_string_allocate (re_string_t *pstr, + const unsigned char *str, int len, + int init_len, + RE_TRANSLATE_TYPE trans, int icase); static reg_errcode_t re_string_construct (re_string_t *pstr, const unsigned char *str, int len, - RE_TRANSLATE_TYPE trans); -static reg_errcode_t re_string_construct_toupper (re_string_t *pstr, - const unsigned char *str, - int len, - RE_TRANSLATE_TYPE trans); + RE_TRANSLATE_TYPE trans, int icase); +static reg_errcode_t re_string_reconstruct (re_string_t *pstr, int idx, + int eflags, int newline); +static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr, + int new_buf_len); +#ifdef RE_ENABLE_I18N +static void build_wcs_buffer (re_string_t *pstr); +static void build_wcs_upper_buffer (re_string_t *pstr); +#endif /* RE_ENABLE_I18N */ +static void build_upper_buffer (re_string_t *pstr); +static void re_string_translate_buffer (re_string_t *pstr); static void re_string_destruct (re_string_t *pstr); #ifdef RE_ENABLE_I18N static int re_string_elem_size_at (const re_string_t *pstr, int idx); @@ -253,8 +287,7 @@ static unsigned int re_string_context_at (const re_string_t *input, int idx, #define re_string_cur_idx(pstr) ((pstr)->cur_idx) #define re_string_get_buffer(pstr) ((pstr)->mbs) #define re_string_length(pstr) ((pstr)->len) -#define re_string_byte_at(pstr,idx) \ - ((pstr)->mbs[idx]) +#define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx]) #define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx)) #define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx)) @@ -279,27 +312,6 @@ struct bin_tree_t }; typedef struct bin_tree_t bin_tree_t; -struct re_backref_cache_entry -{ - int node; - int from; - int to; - int flag; -}; - -typedef struct -{ - int eflags; - int match_first; - int match_last; - int state_log_top; - /* Back reference cache. */ - int nbkref_ents; - int abkref_ents; - struct re_backref_cache_entry *bkref_ents; - int max_bkref_len; -} re_match_context_t; - #define CONTEXT_WORD 1 #define CONTEXT_NEWLINE (CONTEXT_WORD << 1) @@ -363,6 +375,32 @@ struct re_state_table_entry re_dfastate_t **array; }; +struct re_backref_cache_entry +{ + int node; + int from; + int to; + int flag; +}; + +typedef struct +{ + /* EFLAGS of the argument of regexec. */ + int eflags; + /* Where the matching ends. */ + int match_last; + /* The string object corresponding to the input string. */ + re_string_t *input; + /* The state log used by the matcher. */ + re_dfastate_t **state_log; + int state_log_top; + /* Back reference cache. */ + int nbkref_ents; + int abkref_ents; + struct re_backref_cache_entry *bkref_ents; + int max_bkref_len; +} re_match_context_t; + struct re_dfa_t { re_bitset_ptr_t word_char; -- cgit 1.4.1