diff options
Diffstat (limited to 'posix/regexec.c')
-rw-r--r-- | posix/regexec.c | 936 |
1 files changed, 483 insertions, 453 deletions
diff --git a/posix/regexec.c b/posix/regexec.c index 4b1ab4ecff..63aef97535 100644 --- a/posix/regexec.c +++ b/posix/regexec.c @@ -15,100 +15,98 @@ You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#include <stdint.h> + <https://www.gnu.org/licenses/>. */ static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags, - int n); + Idx n); static void match_ctx_clean (re_match_context_t *mctx); static void match_ctx_free (re_match_context_t *cache); -static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node, - int str_idx, int from, int to); -static int search_cur_bkref_entry (const re_match_context_t *mctx, - int str_idx); -static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node, - int str_idx); +static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node, + Idx str_idx, Idx from, Idx to); +static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx); +static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node, + Idx str_idx); static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop, - int node, int str_idx); + Idx node, Idx str_idx); static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts, - re_dfastate_t **limited_sts, int last_node, - int last_str_idx); + re_dfastate_t **limited_sts, Idx last_node, + Idx last_str_idx); static reg_errcode_t re_search_internal (const regex_t *preg, - const char *string, int length, - int start, int range, int stop, + const char *string, Idx length, + Idx start, Idx last_start, Idx stop, size_t nmatch, regmatch_t pmatch[], int eflags); -static int re_search_2_stub (struct re_pattern_buffer *bufp, - const char *string1, int length1, - const char *string2, int length2, - int start, int range, struct re_registers *regs, - int stop, int ret_len); -static int re_search_stub (struct re_pattern_buffer *bufp, - const char *string, int length, int start, - int range, int stop, struct re_registers *regs, - int ret_len); +static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp, + const char *string1, Idx length1, + const char *string2, Idx length2, + Idx start, regoff_t range, + struct re_registers *regs, + Idx stop, bool ret_len); +static regoff_t re_search_stub (struct re_pattern_buffer *bufp, + const char *string, Idx length, Idx start, + regoff_t range, Idx stop, + struct re_registers *regs, + bool ret_len); static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, - int nregs, int regs_allocated); + Idx nregs, int regs_allocated); static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx); -static int check_matching (re_match_context_t *mctx, int fl_longest_match, - int *p_match_first); -static int check_halt_state_context (const re_match_context_t *mctx, - const re_dfastate_t *state, int idx); +static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match, + Idx *p_match_first); +static Idx check_halt_state_context (const re_match_context_t *mctx, + const re_dfastate_t *state, Idx idx); static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, - regmatch_t *prev_idx_match, int cur_node, - int cur_idx, int nmatch); + regmatch_t *prev_idx_match, Idx cur_node, + Idx cur_idx, Idx nmatch); static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs, - int str_idx, int dest_node, int nregs, + Idx str_idx, Idx dest_node, Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes); static reg_errcode_t set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, regmatch_t *pmatch, - int fl_backtrack); + bool fl_backtrack); static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs); #ifdef RE_ENABLE_I18N static int sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx, - int node_idx, int str_idx, int max_str_idx); + Idx node_idx, Idx str_idx, Idx max_str_idx); #endif /* RE_ENABLE_I18N */ static reg_errcode_t sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx); static reg_errcode_t build_sifted_states (const re_match_context_t *mctx, - re_sift_context_t *sctx, int str_idx, + re_sift_context_t *sctx, Idx str_idx, re_node_set *cur_dest); static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx, re_sift_context_t *sctx, - int str_idx, + Idx str_idx, re_node_set *dest_nodes); static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes, const re_node_set *candidates); -static int check_dst_limits (const re_match_context_t *mctx, - re_node_set *limits, - int dst_node, int dst_idx, int src_node, - int src_idx); +static bool check_dst_limits (const re_match_context_t *mctx, + const re_node_set *limits, + Idx dst_node, Idx dst_idx, Idx src_node, + Idx src_idx); static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, - int boundaries, int subexp_idx, - int from_node, int bkref_idx); + int boundaries, Idx subexp_idx, + Idx from_node, Idx bkref_idx); static int check_dst_limits_calc_pos (const re_match_context_t *mctx, - int limit, int subexp_idx, - int node, int str_idx, - int bkref_idx); + Idx limit, Idx subexp_idx, + Idx node, Idx str_idx, + Idx bkref_idx); static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes, const re_node_set *candidates, re_node_set *limits, struct re_backref_cache_entry *bkref_ents, - int str_idx); + Idx str_idx); static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx, - int str_idx, - const re_node_set *candidates); + Idx str_idx, const re_node_set *candidates); static reg_errcode_t merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst, - re_dfastate_t **src, int num); + re_dfastate_t **src, Idx num); static re_dfastate_t *find_recover_state (reg_errcode_t *err, re_match_context_t *mctx); static re_dfastate_t *transit_state (reg_errcode_t *err, @@ -119,7 +117,7 @@ static re_dfastate_t *merge_state_with_log (reg_errcode_t *err, re_dfastate_t *next_state); static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes, - int str_idx); + Idx str_idx); #if 0 static re_dfastate_t *transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx, @@ -132,46 +130,46 @@ static reg_errcode_t transit_state_mb (re_match_context_t *mctx, static reg_errcode_t transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes); static reg_errcode_t get_subexp (re_match_context_t *mctx, - int bkref_node, int bkref_str_idx); + Idx bkref_node, Idx bkref_str_idx); static reg_errcode_t get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top, re_sub_match_last_t *sub_last, - int bkref_node, int bkref_str); -static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, - int subexp_idx, int type); + Idx bkref_node, Idx bkref_str); +static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, + Idx subexp_idx, int type); static reg_errcode_t check_arrival (re_match_context_t *mctx, - state_array_t *path, int top_node, - int top_str, int last_node, int last_str, + state_array_t *path, Idx top_node, + Idx top_str, Idx last_node, Idx last_str, int type); static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx, - int str_idx, + Idx str_idx, re_node_set *cur_nodes, re_node_set *next_nodes); static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes, - int ex_subexp, int type); + Idx ex_subexp, int type); static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes, - int target, int ex_subexp, + Idx target, Idx ex_subexp, int type); static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx, - re_node_set *cur_nodes, int cur_str, - int subexp_num, int type); -static int build_trtable (const re_dfa_t *dfa, re_dfastate_t *state); + re_node_set *cur_nodes, Idx cur_str, + Idx subexp_num, int type); +static bool build_trtable (const re_dfa_t *dfa, re_dfastate_t *state); #ifdef RE_ENABLE_I18N -static int check_node_accept_bytes (const re_dfa_t *dfa, int node_idx, - const re_string_t *input, int idx); +static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, + const re_string_t *input, Idx idx); # ifdef _LIBC static unsigned int find_collation_sequence_value (const unsigned char *mbs, size_t name_len); # endif /* _LIBC */ #endif /* RE_ENABLE_I18N */ -static int group_nodes_into_DFAstates (const re_dfa_t *dfa, +static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, re_node_set *states_node, bitset_t *states_ch); -static int check_node_accept (const re_match_context_t *mctx, - const re_token_t *node, int idx); +static bool check_node_accept (const re_match_context_t *mctx, + const re_token_t *node, Idx idx); static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len); /* Entry point for POSIX code. */ @@ -180,23 +178,23 @@ static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len); string STRING. If NMATCH is zero or REG_NOSUB was set in the cflags argument to - `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at + 'regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at least NMATCH elements, and we set them to the offsets of the corresponding matched substrings. - EFLAGS specifies `execution flags' which affect matching: if + EFLAGS specifies "execution flags" which affect matching: if REG_NOTBOL is set, then ^ does not match at the beginning of the string; if REG_NOTEOL is set, then $ does not match at the end. We return 0 if we find a match and REG_NOMATCH if not. */ int -regexec (const regex_t *__restrict preg, const char *__restrict string, +regexec (const regex_t *_Restrict_ preg, const char *_Restrict_ string, size_t nmatch, regmatch_t pmatch[], int eflags) { reg_errcode_t err; - int start, length; - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; + Idx start, length; + re_dfa_t *dfa = preg->buffer; if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND)) return REG_BADPAT; @@ -212,14 +210,14 @@ regexec (const regex_t *__restrict preg, const char *__restrict string, length = strlen (string); } - __libc_lock_lock (dfa->lock); + lock_lock (dfa->lock); if (preg->no_sub) - err = re_search_internal (preg, string, length, start, length - start, + err = re_search_internal (preg, string, length, start, length, length, 0, NULL, eflags); else - err = re_search_internal (preg, string, length, start, length - start, + err = re_search_internal (preg, string, length, start, length, length, nmatch, pmatch, eflags); - __libc_lock_unlock (dfa->lock); + lock_unlock (dfa->lock); return err != REG_NOERROR; } @@ -234,8 +232,8 @@ __typeof__ (__regexec) __compat_regexec; int attribute_compat_text_section -__compat_regexec (const regex_t *__restrict preg, - const char *__restrict string, size_t nmatch, +__compat_regexec (const regex_t *_Restrict_ preg, + const char *_Restrict_ string, size_t nmatch, regmatch_t pmatch[], int eflags) { return regexec (preg, string, nmatch, pmatch, @@ -274,62 +272,65 @@ compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0); return the position of the start of the match. Return value -1 means no match was found and -2 indicates an internal error. */ -int -re_match (struct re_pattern_buffer *bufp, const char *string, int length, - int start, struct re_registers *regs) +regoff_t +re_match (struct re_pattern_buffer *bufp, const char *string, Idx length, + Idx start, struct re_registers *regs) { - return re_search_stub (bufp, string, length, start, 0, length, regs, 1); + return re_search_stub (bufp, string, length, start, 0, length, regs, true); } #ifdef _LIBC weak_alias (__re_match, re_match) #endif -int -re_search (struct re_pattern_buffer *bufp, const char *string, int length, - int start, int range, struct re_registers *regs) +regoff_t +re_search (struct re_pattern_buffer *bufp, const char *string, Idx length, + Idx start, regoff_t range, struct re_registers *regs) { - return re_search_stub (bufp, string, length, start, range, length, regs, 0); + return re_search_stub (bufp, string, length, start, range, length, regs, + false); } #ifdef _LIBC weak_alias (__re_search, re_search) #endif -int -re_match_2 (struct re_pattern_buffer *bufp, const char *string1, int length1, - const char *string2, int length2, int start, - struct re_registers *regs, int stop) +regoff_t +re_match_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1, + const char *string2, Idx length2, Idx start, + struct re_registers *regs, Idx stop) { return re_search_2_stub (bufp, string1, length1, string2, length2, - start, 0, regs, stop, 1); + start, 0, regs, stop, true); } #ifdef _LIBC weak_alias (__re_match_2, re_match_2) #endif -int -re_search_2 (struct re_pattern_buffer *bufp, const char *string1, int length1, - const char *string2, int length2, int start, int range, - struct re_registers *regs, int stop) +regoff_t +re_search_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1, + const char *string2, Idx length2, Idx start, regoff_t range, + struct re_registers *regs, Idx stop) { return re_search_2_stub (bufp, string1, length1, string2, length2, - start, range, regs, stop, 0); + start, range, regs, stop, false); } #ifdef _LIBC weak_alias (__re_search_2, re_search_2) #endif -static int +static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp, const char *string1, - int length1, const char *string2, int length2, int start, - int range, struct re_registers *regs, - int stop, int ret_len) + Idx length1, const char *string2, Idx length2, Idx start, + regoff_t range, struct re_registers *regs, + Idx stop, bool ret_len) { const char *str; - int rval; - int len = length1 + length2; + regoff_t rval; + Idx len; char *s = NULL; - if (BE (length1 < 0 || length2 < 0 || stop < 0 || len < length1, 0)) + if (BE ((length1 < 0 || length2 < 0 || stop < 0 + || INT_ADD_WRAPV (length1, length2, &len)), + 0)) return -2; /* Concatenate the strings. */ @@ -353,42 +354,45 @@ re_search_2_stub (struct re_pattern_buffer *bufp, const char *string1, else str = string1; - rval = re_search_stub (bufp, str, len, start, range, stop, regs, ret_len); + rval = re_search_stub (bufp, str, len, start, range, stop, regs, + ret_len); re_free (s); return rval; } /* The parameters have the same meaning as those of re_search. Additional parameters: - If RET_LEN is nonzero the length of the match is returned (re_match style); + If RET_LEN is true the length of the match is returned (re_match style); otherwise the position of the match is returned. */ -static int -re_search_stub (struct re_pattern_buffer *bufp, const char *string, int length, - int start, int range, int stop, struct re_registers *regs, - int ret_len) +static regoff_t +re_search_stub (struct re_pattern_buffer *bufp, const char *string, Idx length, + Idx start, regoff_t range, Idx stop, struct re_registers *regs, + bool ret_len) { reg_errcode_t result; regmatch_t *pmatch; - int nregs, rval; + Idx nregs; + regoff_t rval; int eflags = 0; - re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; + re_dfa_t *dfa = bufp->buffer; + Idx last_start = start + range; /* Check for out-of-range. */ if (BE (start < 0 || start > length, 0)) return -1; - if (BE (start + range > length, 0)) - range = length - start; - else if (BE (start + range < 0, 0)) - range = -start; + if (BE (length < last_start || (0 <= range && last_start < start), 0)) + last_start = length; + else if (BE (last_start < 0 || (range < 0 && start <= last_start), 0)) + last_start = 0; - __libc_lock_lock (dfa->lock); + lock_lock (dfa->lock); eflags |= (bufp->not_bol) ? REG_NOTBOL : 0; eflags |= (bufp->not_eol) ? REG_NOTEOL : 0; /* Compile fastmap if we haven't yet. */ - if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate) + if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate) re_compile_fastmap (bufp); if (BE (bufp->no_sub, 0)) @@ -397,8 +401,8 @@ re_search_stub (struct re_pattern_buffer *bufp, const char *string, int length, /* We need at least 1 register. */ if (regs == NULL) nregs = 1; - else if (BE (bufp->regs_allocated == REGS_FIXED && - regs->num_regs < bufp->re_nsub + 1, 0)) + else if (BE (bufp->regs_allocated == REGS_FIXED + && regs->num_regs <= bufp->re_nsub, 0)) { nregs = regs->num_regs; if (BE (nregs < 1, 0)) @@ -417,14 +421,14 @@ re_search_stub (struct re_pattern_buffer *bufp, const char *string, int length, goto out; } - result = re_search_internal (bufp, string, length, start, range, stop, + result = re_search_internal (bufp, string, length, start, last_start, stop, nregs, pmatch, eflags); rval = 0; - /* I hope we needn't fill ther regs with -1's when no match was found. */ + /* I hope we needn't fill their regs with -1's when no match was found. */ if (result != REG_NOERROR) - rval = -1; + rval = result == REG_NOMATCH ? -1 : -2; else if (regs != NULL) { /* If caller wants register contents data back, copy them. */ @@ -446,18 +450,18 @@ re_search_stub (struct re_pattern_buffer *bufp, const char *string, int length, } re_free (pmatch); out: - __libc_lock_unlock (dfa->lock); + lock_unlock (dfa->lock); return rval; } static unsigned -re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, int nregs, +re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs, int regs_allocated) { int rval = REGS_REALLOCATE; - int i; - int need_regs = nregs + 1; - /* We need one extra element beyond `num_regs' for the `-1' marker GNU code + Idx i; + Idx need_regs = nregs + 1; + /* We need one extra element beyond 'num_regs' for the '-1' marker GNU code uses. */ /* Have the register data arrays been allocated? */ @@ -530,7 +534,7 @@ re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, int nregs, void re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, - unsigned num_regs, regoff_t *starts, regoff_t *ends) + __re_size_t num_regs, regoff_t *starts, regoff_t *ends) { if (num_regs) { @@ -543,7 +547,7 @@ re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs, { bufp->regs_allocated = REGS_UNALLOCATED; regs->num_regs = 0; - regs->start = regs->end = (regoff_t *) 0; + regs->start = regs->end = NULL; } } #ifdef _LIBC @@ -568,32 +572,38 @@ re_exec (const char *s) /* Searches for a compiled pattern PREG in the string STRING, whose length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same - meaning as with regexec. START, and RANGE have the same meanings - with re_search. + meaning as with regexec. LAST_START is START + RANGE, where + START and RANGE have the same meaning as with re_search. Return REG_NOERROR if we find a match, and REG_NOMATCH if not, otherwise return the error code. Note: We assume front end functions already check ranges. - (START + RANGE >= 0 && START + RANGE <= LENGTH) */ + (0 <= LAST_START && LAST_START <= LENGTH) */ static reg_errcode_t __attribute_warn_unused_result__ -re_search_internal (const regex_t *preg, const char *string, int length, - int start, int range, int stop, size_t nmatch, +re_search_internal (const regex_t *preg, const char *string, Idx length, + Idx start, Idx last_start, Idx stop, size_t nmatch, regmatch_t pmatch[], int eflags) { reg_errcode_t err; - const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer; - int left_lim, right_lim, incr; - int fl_longest_match, match_first, match_kind, match_last = -1; - int extra_nmatch; - int sb, ch; + const re_dfa_t *dfa = preg->buffer; + Idx left_lim, right_lim; + int incr; + bool fl_longest_match; + int match_kind; + Idx match_first; + Idx match_last = -1; + Idx extra_nmatch; + bool sb; + int ch; #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L) re_match_context_t mctx = { .dfa = dfa }; #else re_match_context_t mctx; #endif - char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate - && range && !preg->can_be_null) ? preg->fastmap : NULL; + char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate + && start != last_start && !preg->can_be_null) + ? preg->fastmap : NULL); RE_TRANSLATE_TYPE t = preg->translate; #if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)) @@ -612,7 +622,7 @@ re_search_internal (const regex_t *preg, const char *string, int length, #ifdef DEBUG /* We assume front-end functions already check them. */ - assert (start + range >= 0 && start + range <= length); + assert (0 <= last_start && last_start <= length); #endif /* If initial states with non-begbuf contexts have no elements, @@ -623,16 +633,17 @@ re_search_internal (const regex_t *preg, const char *string, int length, && (dfa->init_state_nl->nodes.nelem == 0 || !preg->newline_anchor)) { - if (start != 0 && start + range != 0) - return REG_NOMATCH; - start = range = 0; + if (start != 0 && last_start != 0) + return REG_NOMATCH; + start = last_start = 0; } /* We must check the longest matching, if nmatch > 0. */ fl_longest_match = (nmatch != 0 || dfa->nbackref); err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1, - preg->translate, preg->syntax & RE_ICASE, dfa); + preg->translate, (preg->syntax & RE_ICASE) != 0, + dfa); if (BE (err != REG_NOERROR, 0)) goto free_return; mctx.input.stop = stop; @@ -650,7 +661,8 @@ re_search_internal (const regex_t *preg, const char *string, int length, if (nmatch > 1 || dfa->has_mb_node) { /* Avoid overflow. */ - if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= mctx.input.bufs_len, 0)) + if (BE ((MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) + <= mctx.input.bufs_len), 0)) { err = REG_ESPACE; goto free_return; @@ -670,15 +682,15 @@ re_search_internal (const regex_t *preg, const char *string, int length, mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF : CONTEXT_NEWLINE | CONTEXT_BEGBUF; - /* Check incrementally whether of not the input string match. */ - incr = (range < 0) ? -1 : 1; - left_lim = (range < 0) ? start + range : start; - right_lim = (range < 0) ? start : start + range; + /* Check incrementally whether the input string matches. */ + incr = (last_start < start) ? -1 : 1; + left_lim = (last_start < start) ? last_start : start; + right_lim = (last_start < start) ? start : last_start; sb = dfa->mb_cur_max == 1; match_kind = (fastmap ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0) - | (range >= 0 ? 2 : 0) + | (start <= last_start ? 2 : 0) | (t != NULL ? 1 : 0)) : 8); @@ -745,8 +757,8 @@ re_search_internal (const regex_t *preg, const char *string, int length, { /* If MATCH_FIRST is out of the valid range, reconstruct the buffers. */ - unsigned int offset = match_first - mctx.input.raw_mbs_idx; - if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, 0)) + __re_size_t offset = match_first - mctx.input.raw_mbs_idx; + if (BE (offset >= (__re_size_t) mctx.input.valid_raw_len, 0)) { err = re_string_reconstruct (&mctx.input, match_first, eflags); @@ -788,7 +800,7 @@ re_search_internal (const regex_t *preg, const char *string, int length, /* We assume that the matching starts from 0. */ mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0; match_last = check_matching (&mctx, fl_longest_match, - range >= 0 ? &match_first : NULL); + start <= last_start ? &match_first : NULL); if (match_last != -1) { if (BE (match_last == -2, 0)) @@ -831,7 +843,7 @@ re_search_internal (const regex_t *preg, const char *string, int length, /* Set pmatch[] if we need. */ if (nmatch > 0) { - int reg_idx; + Idx reg_idx; /* Initialize registers. */ for (reg_idx = 1; reg_idx < nmatch; ++reg_idx) @@ -840,6 +852,9 @@ re_search_internal (const regex_t *preg, const char *string, int length, /* Set the points where matching start/end. */ pmatch[0].rm_so = 0; pmatch[0].rm_eo = mctx.match_last; + /* FIXME: This function should fail if mctx.match_last exceeds + the maximum possible regoff_t value. We need a new error + code REG_OVERFLOW. */ if (!preg->no_sub && nmatch > 1) { @@ -903,7 +918,7 @@ __attribute_warn_unused_result__ prune_impossible_nodes (re_match_context_t *mctx) { const re_dfa_t *const dfa = mctx->dfa; - int halt_node, match_last; + Idx halt_node, match_last; reg_errcode_t ret; re_dfastate_t **sifted_states; re_dfastate_t **lim_states = NULL; @@ -915,7 +930,7 @@ prune_impossible_nodes (re_match_context_t *mctx) halt_node = mctx->last_node; /* Avoid overflow. */ - if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= match_last, 0)) + if (BE (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) <= match_last, 0)) return REG_ESPACE; sifted_states = re_malloc (re_dfastate_t *, match_last + 1); @@ -995,9 +1010,9 @@ prune_impossible_nodes (re_match_context_t *mctx) since initial states may have constraints like "\<", "^", etc.. */ static inline re_dfastate_t * -__attribute ((always_inline)) +__attribute__ ((always_inline)) acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx, - int idx) + Idx idx) { const re_dfa_t *const dfa = mctx->dfa; if (dfa->init_state->has_constraint) @@ -1028,27 +1043,27 @@ acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx, } /* Check whether the regular expression match input string INPUT or not, - and return the index where the matching end, return -1 if not match, - or return -2 in case of an error. + and return the index where the matching end. Return -1 if + there is no match, and return -2 in case of an error. FL_LONGEST_MATCH means we want the POSIX longest matching. If P_MATCH_FIRST is not NULL, and the match fails, it is set to the next place where we may want to try matching. - Note that the matcher assume that the maching starts from the current + Note that the matcher assumes that the matching starts from the current index of the buffer. */ -static int +static Idx __attribute_warn_unused_result__ -check_matching (re_match_context_t *mctx, int fl_longest_match, - int *p_match_first) +check_matching (re_match_context_t *mctx, bool fl_longest_match, + Idx *p_match_first) { const re_dfa_t *const dfa = mctx->dfa; reg_errcode_t err; - int match = 0; - int match_last = -1; - int cur_str_idx = re_string_cur_idx (&mctx->input); + Idx match = 0; + Idx match_last = -1; + Idx cur_str_idx = re_string_cur_idx (&mctx->input); re_dfastate_t *cur_state; - int at_init_state = p_match_first != NULL; - int next_start_idx = cur_str_idx; + bool at_init_state = p_match_first != NULL; + Idx next_start_idx = cur_str_idx; err = REG_NOERROR; cur_state = acquire_init_state_context (&err, mctx, cur_str_idx); @@ -1067,7 +1082,7 @@ check_matching (re_match_context_t *mctx, int fl_longest_match, later. E.g. Processing back references. */ if (BE (dfa->nbackref, 0)) { - at_init_state = 0; + at_init_state = false; err = check_subexp_matching_top (mctx, &cur_state->nodes, 0); if (BE (err != REG_NOERROR, 0)) return err; @@ -1100,7 +1115,7 @@ check_matching (re_match_context_t *mctx, int fl_longest_match, while (!re_string_eoi (&mctx->input)) { re_dfastate_t *old_state = cur_state; - int next_char_idx = re_string_cur_idx (&mctx->input) + 1; + Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1; if ((BE (next_char_idx >= mctx->input.bufs_len, 0) && mctx->input.bufs_len < mctx->input.len) @@ -1138,7 +1153,7 @@ check_matching (re_match_context_t *mctx, int fl_longest_match, if (old_state == cur_state) next_start_idx = next_char_idx; else - at_init_state = 0; + at_init_state = false; } if (cur_state->halt) @@ -1169,29 +1184,29 @@ check_matching (re_match_context_t *mctx, int fl_longest_match, /* Check NODE match the current context. */ -static int -check_halt_node_context (const re_dfa_t *dfa, int node, unsigned int context) +static bool +check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context) { re_token_type_t type = dfa->nodes[node].type; unsigned int constraint = dfa->nodes[node].constraint; if (type != END_OF_RE) - return 0; + return false; if (!constraint) - return 1; + return true; if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context)) - return 0; - return 1; + return false; + return true; } /* Check the halt state STATE match the current context. Return 0 if not match, if the node, STATE has, is a halt node and match the context, return the node. */ -static int +static Idx check_halt_state_context (const re_match_context_t *mctx, - const re_dfastate_t *state, int idx) + const re_dfastate_t *state, Idx idx) { - int i; + Idx i; unsigned int context; #ifdef DEBUG assert (state->halt); @@ -1205,31 +1220,33 @@ check_halt_state_context (const re_match_context_t *mctx, /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA corresponding to the DFA). - Return the destination node, and update EPS_VIA_NODES, return -1 in case - of errors. */ + Return the destination node, and update EPS_VIA_NODES; + return -1 in case of errors. */ -static int -proceed_next_node (const re_match_context_t *mctx, int nregs, regmatch_t *regs, - int *pidx, int node, re_node_set *eps_via_nodes, +static Idx +proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs, + Idx *pidx, Idx node, re_node_set *eps_via_nodes, struct re_fail_stack_t *fs) { const re_dfa_t *const dfa = mctx->dfa; - int i, err; + Idx i; + bool ok; if (IS_EPSILON_NODE (dfa->nodes[node].type)) { re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes; re_node_set *edests = &dfa->edests[node]; - int dest_node; - err = re_node_set_insert (eps_via_nodes, node); - if (BE (err < 0, 0)) + Idx dest_node; + ok = re_node_set_insert (eps_via_nodes, node); + if (BE (! ok, 0)) return -2; - /* Pick up a valid destination, or return -1 if none is found. */ + /* Pick up a valid destination, or return -1 if none + is found. */ for (dest_node = -1, i = 0; i < edests->nelem; ++i) { - int candidate = edests->elems[i]; + Idx candidate = edests->elems[i]; if (!re_node_set_contains (cur_nodes, candidate)) continue; - if (dest_node == -1) + if (dest_node == -1) dest_node = candidate; else @@ -1253,7 +1270,7 @@ proceed_next_node (const re_match_context_t *mctx, int nregs, regmatch_t *regs, } else { - int naccepted = 0; + Idx naccepted = 0; re_token_type_t type = dfa->nodes[node].type; #ifdef RE_ENABLE_I18N @@ -1263,7 +1280,7 @@ proceed_next_node (const re_match_context_t *mctx, int nregs, regmatch_t *regs, #endif /* RE_ENABLE_I18N */ if (type == OP_BACK_REF) { - int subexp_idx = dfa->nodes[node].opr.idx + 1; + Idx subexp_idx = dfa->nodes[node].opr.idx + 1; naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so; if (fs != NULL) { @@ -1280,9 +1297,9 @@ proceed_next_node (const re_match_context_t *mctx, int nregs, regmatch_t *regs, if (naccepted == 0) { - int dest_node; - err = re_node_set_insert (eps_via_nodes, node); - if (BE (err < 0, 0)) + Idx dest_node; + ok = re_node_set_insert (eps_via_nodes, node); + if (BE (! ok, 0)) return -2; dest_node = dfa->edests[node].elems[0]; if (re_node_set_contains (&mctx->state_log[*pidx]->nodes, @@ -1294,7 +1311,7 @@ proceed_next_node (const re_match_context_t *mctx, int nregs, regmatch_t *regs, if (naccepted != 0 || check_node_accept (mctx, dfa->nodes + node, *pidx)) { - int dest_node = dfa->nexts[node]; + Idx dest_node = dfa->nexts[node]; *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted; if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL || !re_node_set_contains (&mctx->state_log[*pidx]->nodes, @@ -1309,16 +1326,16 @@ proceed_next_node (const re_match_context_t *mctx, int nregs, regmatch_t *regs, static reg_errcode_t __attribute_warn_unused_result__ -push_fail_stack (struct re_fail_stack_t *fs, int str_idx, int dest_node, - int nregs, regmatch_t *regs, re_node_set *eps_via_nodes) +push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node, + Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes) { reg_errcode_t err; - int num = fs->num++; + Idx num = fs->num++; if (fs->num == fs->alloc) { struct re_fail_stack_ent_t *new_array; - new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t) - * fs->alloc * 2)); + new_array = re_realloc (fs->stack, struct re_fail_stack_ent_t, + fs->alloc * 2); if (new_array == NULL) return REG_ESPACE; fs->alloc *= 2; @@ -1334,11 +1351,11 @@ push_fail_stack (struct re_fail_stack_t *fs, int str_idx, int dest_node, return err; } -static int -pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs, +static Idx +pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes) { - int num = --fs->num; + Idx num = --fs->num; assert (num >= 0); *pidx = fs->stack[num].idx; memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs); @@ -1356,15 +1373,15 @@ pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs, static reg_errcode_t __attribute_warn_unused_result__ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, - regmatch_t *pmatch, int fl_backtrack) + regmatch_t *pmatch, bool fl_backtrack) { - const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer; - int idx, cur_node; + const re_dfa_t *dfa = preg->buffer; + Idx idx, cur_node; re_node_set eps_via_nodes; struct re_fail_stack_t *fs; struct re_fail_stack_t fs_body = { 0, 2, NULL }; regmatch_t *prev_idx_match; - int prev_idx_match_malloced = 0; + bool prev_idx_match_malloced = false; #ifdef DEBUG assert (nmatch > 1); @@ -1393,7 +1410,7 @@ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, free_fail_stack_return (fs); return REG_ESPACE; } - prev_idx_match_malloced = 1; + prev_idx_match_malloced = true; } memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch); @@ -1403,7 +1420,7 @@ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node) { - int reg_idx; + Idx reg_idx; if (fs) { for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) @@ -1465,7 +1482,7 @@ free_fail_stack_return (struct re_fail_stack_t *fs) { if (fs) { - int fs_idx; + Idx fs_idx; for (fs_idx = 0; fs_idx < fs->num; ++fs_idx) { re_node_set_free (&fs->stack[fs_idx].eps_via_nodes); @@ -1478,12 +1495,12 @@ free_fail_stack_return (struct re_fail_stack_t *fs) static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, - regmatch_t *prev_idx_match, int cur_node, int cur_idx, int nmatch) + regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch) { int type = dfa->nodes[cur_node].type; if (type == OP_OPEN_SUBEXP) { - int reg_num = dfa->nodes[cur_node].opr.idx + 1; + Idx reg_num = dfa->nodes[cur_node].opr.idx + 1; /* We are at the first node of this sub expression. */ if (reg_num < nmatch) @@ -1494,7 +1511,7 @@ update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, } else if (type == OP_CLOSE_SUBEXP) { - int reg_num = dfa->nodes[cur_node].opr.idx + 1; + Idx reg_num = dfa->nodes[cur_node].opr.idx + 1; if (reg_num < nmatch) { /* We are at the last node of this sub expression. */ @@ -1528,21 +1545,21 @@ update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, and sift the nodes in each states according to the following rules. Updated state_log will be wrote to STATE_LOG. - Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if... + Rules: We throw away the Node 'a' in the STATE_LOG[STR_IDX] if... 1. When STR_IDX == MATCH_LAST(the last index in the state_log): - If `a' isn't the LAST_NODE and `a' can't epsilon transit to - the LAST_NODE, we throw away the node `a'. - 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts - string `s' and transit to `b': + If 'a' isn't the LAST_NODE and 'a' can't epsilon transit to + the LAST_NODE, we throw away the node 'a'. + 2. When 0 <= STR_IDX < MATCH_LAST and 'a' accepts + string 's' and transit to 'b': i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw - away the node `a'. + away the node 'a'. ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is - thrown away, we throw away the node `a'. + thrown away, we throw away the node 'a'. 3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b': i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the - node `a'. + node 'a'. ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away, - we throw away the node `a'. */ + we throw away the node 'a'. */ #define STATE_NODE_CONTAINS(state,node) \ ((state) != NULL && re_node_set_contains (&(state)->nodes, node)) @@ -1552,7 +1569,7 @@ sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx) { reg_errcode_t err; int null_cnt = 0; - int str_idx = sctx->last_str_idx; + Idx str_idx = sctx->last_str_idx; re_node_set cur_dest; #ifdef DEBUG @@ -1607,31 +1624,31 @@ sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx) static reg_errcode_t __attribute_warn_unused_result__ build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx, - int str_idx, re_node_set *cur_dest) + Idx str_idx, re_node_set *cur_dest) { const re_dfa_t *const dfa = mctx->dfa; const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes; - int i; + Idx i; /* Then build the next sifted state. - We build the next sifted state on `cur_dest', and update - `sifted_states[str_idx]' with `cur_dest'. + We build the next sifted state on 'cur_dest', and update + 'sifted_states[str_idx]' with 'cur_dest'. Note: - `cur_dest' is the sifted state from `state_log[str_idx + 1]'. - `cur_src' points the node_set of the old `state_log[str_idx]' + 'cur_dest' is the sifted state from 'state_log[str_idx + 1]'. + 'cur_src' points the node_set of the old 'state_log[str_idx]' (with the epsilon nodes pre-filtered out). */ for (i = 0; i < cur_src->nelem; i++) { - int prev_node = cur_src->elems[i]; + Idx prev_node = cur_src->elems[i]; int naccepted = 0; - int ret; + bool ok; #ifdef DEBUG re_token_type_t type = dfa->nodes[prev_node].type; assert (!IS_EPSILON_NODE (type)); #endif #ifdef RE_ENABLE_I18N - /* If the node may accept `multi byte'. */ + /* If the node may accept "multi byte". */ if (dfa->nodes[prev_node].accept_mb) naccepted = sift_states_iter_mb (mctx, sctx, prev_node, str_idx, sctx->last_str_idx); @@ -1650,14 +1667,14 @@ build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx, if (sctx->limits.nelem) { - int to_idx = str_idx + naccepted; + Idx to_idx = str_idx + naccepted; if (check_dst_limits (mctx, &sctx->limits, dfa->nexts[prev_node], to_idx, prev_node, str_idx)) continue; } - ret = re_node_set_insert (cur_dest, prev_node); - if (BE (ret == -1, 0)) + ok = re_node_set_insert (cur_dest, prev_node); + if (BE (! ok, 0)) return REG_ESPACE; } @@ -1667,9 +1684,9 @@ build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx, /* Helper functions. */ static reg_errcode_t -clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx) +clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx) { - int top = mctx->state_log_top; + Idx top = mctx->state_log_top; if ((next_state_log_idx >= mctx->input.bufs_len && mctx->input.bufs_len < mctx->input.len) @@ -1693,9 +1710,9 @@ clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx) static reg_errcode_t merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst, - re_dfastate_t **src, int num) + re_dfastate_t **src, Idx num) { - int st_idx; + Idx st_idx; reg_errcode_t err; for (st_idx = 0; st_idx < num; ++st_idx) { @@ -1719,7 +1736,7 @@ merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst, static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx, - re_sift_context_t *sctx, int str_idx, + re_sift_context_t *sctx, Idx str_idx, re_node_set *dest_nodes) { const re_dfa_t *const dfa = mctx->dfa; @@ -1770,7 +1787,7 @@ add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes, const re_node_set *candidates) { reg_errcode_t err = REG_NOERROR; - int i; + Idx i; re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes); if (BE (err != REG_NOERROR, 0)) @@ -1794,23 +1811,23 @@ add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes, } static reg_errcode_t -sub_epsilon_src_nodes (const re_dfa_t *dfa, int node, re_node_set *dest_nodes, +sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes, const re_node_set *candidates) { - int ecl_idx; + Idx ecl_idx; reg_errcode_t err; re_node_set *inv_eclosure = dfa->inveclosures + node; re_node_set except_nodes; re_node_set_init_empty (&except_nodes); for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx) { - int cur_node = inv_eclosure->elems[ecl_idx]; + Idx cur_node = inv_eclosure->elems[ecl_idx]; if (cur_node == node) continue; if (IS_EPSILON_NODE (dfa->nodes[cur_node].type)) { - int edst1 = dfa->edests[cur_node].elems[0]; - int edst2 = ((dfa->edests[cur_node].nelem > 1) + Idx edst1 = dfa->edests[cur_node].elems[0]; + Idx edst2 = ((dfa->edests[cur_node].nelem > 1) ? dfa->edests[cur_node].elems[1] : -1); if ((!re_node_set_contains (inv_eclosure, edst1) && re_node_set_contains (dest_nodes, edst1)) @@ -1830,10 +1847,10 @@ sub_epsilon_src_nodes (const re_dfa_t *dfa, int node, re_node_set *dest_nodes, } for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx) { - int cur_node = inv_eclosure->elems[ecl_idx]; + Idx cur_node = inv_eclosure->elems[ecl_idx]; if (!re_node_set_contains (&except_nodes, cur_node)) { - int idx = re_node_set_contains (dest_nodes, cur_node) - 1; + Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1; re_node_set_remove_at (dest_nodes, idx); } } @@ -1841,18 +1858,18 @@ sub_epsilon_src_nodes (const re_dfa_t *dfa, int node, re_node_set *dest_nodes, return REG_NOERROR; } -static int -check_dst_limits (const re_match_context_t *mctx, re_node_set *limits, - int dst_node, int dst_idx, int src_node, int src_idx) +static bool +check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits, + Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx) { const re_dfa_t *const dfa = mctx->dfa; - int lim_idx, src_pos, dst_pos; + Idx lim_idx, src_pos, dst_pos; - int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx); - int src_bkref_idx = search_cur_bkref_entry (mctx, src_idx); + Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx); + Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx); for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx) { - int subexp_idx; + Idx subexp_idx; struct re_backref_cache_entry *ent; ent = mctx->bkref_ents + limits->elems[lim_idx]; subexp_idx = dfa->nodes[ent->node].opr.idx; @@ -1871,24 +1888,24 @@ check_dst_limits (const re_match_context_t *mctx, re_node_set *limits, if (src_pos == dst_pos) continue; /* This is unrelated limitation. */ else - return 1; + return true; } - return 0; + return false; } static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries, - int subexp_idx, int from_node, int bkref_idx) + Idx subexp_idx, Idx from_node, Idx bkref_idx) { const re_dfa_t *const dfa = mctx->dfa; const re_node_set *eclosures = dfa->eclosures + from_node; - int node_idx; + Idx node_idx; /* Else, we are on the boundary: examine the nodes on the epsilon closure. */ for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx) { - int node = eclosures->elems[node_idx]; + Idx node = eclosures->elems[node_idx]; switch (dfa->nodes[node].type) { case OP_BACK_REF: @@ -1897,7 +1914,8 @@ check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries, struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx; do { - int dst, cpos; + Idx dst; + int cpos; if (ent->node != node) continue; @@ -1957,9 +1975,9 @@ check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries, } static int -check_dst_limits_calc_pos (const re_match_context_t *mctx, int limit, - int subexp_idx, int from_node, int str_idx, - int bkref_idx) +check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit, + Idx subexp_idx, Idx from_node, Idx str_idx, + Idx bkref_idx) { struct re_backref_cache_entry *lim = mctx->bkref_ents + limit; int boundaries; @@ -1988,14 +2006,14 @@ check_dst_limits_calc_pos (const re_match_context_t *mctx, int limit, static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes, const re_node_set *candidates, re_node_set *limits, - struct re_backref_cache_entry *bkref_ents, int str_idx) + struct re_backref_cache_entry *bkref_ents, Idx str_idx) { reg_errcode_t err; - int node_idx, lim_idx; + Idx node_idx, lim_idx; for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx) { - int subexp_idx; + Idx subexp_idx; struct re_backref_cache_entry *ent; ent = bkref_ents + limits->elems[lim_idx]; @@ -2005,11 +2023,11 @@ check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes, subexp_idx = dfa->nodes[ent->node].opr.idx; if (ent->subexp_to == str_idx) { - int ops_node = -1; - int cls_node = -1; + Idx ops_node = -1; + Idx cls_node = -1; for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) { - int node = dest_nodes->elems[node_idx]; + Idx node = dest_nodes->elems[node_idx]; re_token_type_t type = dfa->nodes[node].type; if (type == OP_OPEN_SUBEXP && subexp_idx == dfa->nodes[node].opr.idx) @@ -2033,7 +2051,7 @@ check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes, if (cls_node >= 0) for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) { - int node = dest_nodes->elems[node_idx]; + Idx node = dest_nodes->elems[node_idx]; if (!re_node_set_contains (dfa->inveclosures + node, cls_node) && !re_node_set_contains (dfa->eclosures + node, @@ -2053,7 +2071,7 @@ check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes, { for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) { - int node = dest_nodes->elems[node_idx]; + Idx node = dest_nodes->elems[node_idx]; re_token_type_t type = dfa->nodes[node].type; if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP) { @@ -2075,13 +2093,13 @@ check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes, static reg_errcode_t __attribute_warn_unused_result__ sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx, - int str_idx, const re_node_set *candidates) + Idx str_idx, const re_node_set *candidates) { const re_dfa_t *const dfa = mctx->dfa; reg_errcode_t err; - int node_idx, node; + Idx node_idx, node; re_sift_context_t local_sctx; - int first_idx = search_cur_bkref_entry (mctx, str_idx); + Idx first_idx = search_cur_bkref_entry (mctx, str_idx); if (first_idx == -1) return REG_NOERROR; @@ -2090,7 +2108,7 @@ sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx, for (node_idx = 0; node_idx < candidates->nelem; ++node_idx) { - int enabled_idx; + Idx enabled_idx; re_token_type_t type; struct re_backref_cache_entry *entry; node = candidates->elems[node_idx]; @@ -2105,10 +2123,10 @@ sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx, enabled_idx = first_idx; do { - int subexp_len; - int to_idx; - int dst_node; - int ret; + Idx subexp_len; + Idx to_idx; + Idx dst_node; + bool ok; re_dfastate_t *cur_state; if (entry->node != node) @@ -2134,8 +2152,8 @@ sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx, } local_sctx.last_node = node; local_sctx.last_str_idx = str_idx; - ret = re_node_set_insert (&local_sctx.limits, enabled_idx); - if (BE (ret < 0, 0)) + ok = re_node_set_insert (&local_sctx.limits, enabled_idx); + if (BE (! ok, 0)) { err = REG_ESPACE; goto free_return; @@ -2174,21 +2192,21 @@ sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx, #ifdef RE_ENABLE_I18N static int sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx, - int node_idx, int str_idx, int max_str_idx) + Idx node_idx, Idx str_idx, Idx max_str_idx) { const re_dfa_t *const dfa = mctx->dfa; int naccepted; - /* Check the node can accept `multi byte'. */ + /* Check the node can accept "multi byte". */ naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx); if (naccepted > 0 && str_idx + naccepted <= max_str_idx && !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted], dfa->nexts[node_idx])) - /* The node can't accept the `multi byte', or the + /* The node can't accept the "multi byte", or the destination was already thrown away, then the node - could't accept the current input `multi byte'. */ + could't accept the current input "multi byte". */ naccepted = 0; /* Otherwise, it is sure that the node could accept - `naccepted' bytes input. */ + 'naccepted' bytes input. */ return naccepted; } #endif /* RE_ENABLE_I18N */ @@ -2259,12 +2277,12 @@ transit_state (reg_errcode_t *err, re_match_context_t *mctx, } /* Update the state_log if we need */ -re_dfastate_t * +static re_dfastate_t * merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx, re_dfastate_t *next_state) { const re_dfa_t *const dfa = mctx->dfa; - int cur_idx = re_string_cur_idx (&mctx->input); + Idx cur_idx = re_string_cur_idx (&mctx->input); if (cur_idx > mctx->state_log_top) { @@ -2337,14 +2355,14 @@ merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx, /* Skip bytes in the input that correspond to part of a multi-byte match, then look in the log for a state from which to restart matching. */ -re_dfastate_t * +static re_dfastate_t * find_recover_state (reg_errcode_t *err, re_match_context_t *mctx) { re_dfastate_t *cur_state; do { - int max = mctx->state_log_top; - int cur_str_idx = re_string_cur_idx (&mctx->input); + Idx max = mctx->state_log_top; + Idx cur_str_idx = re_string_cur_idx (&mctx->input); do { @@ -2369,10 +2387,10 @@ find_recover_state (reg_errcode_t *err, re_match_context_t *mctx) static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes, - int str_idx) + Idx str_idx) { const re_dfa_t *const dfa = mctx->dfa; - int node_idx; + Idx node_idx; reg_errcode_t err; /* TODO: This isn't efficient. @@ -2382,7 +2400,7 @@ check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes, E.g. RE: (a){2} */ for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx) { - int node = cur_nodes->elems[node_idx]; + Idx node = cur_nodes->elems[node_idx]; if (dfa->nodes[node].type == OP_OPEN_SUBEXP && dfa->nodes[node].opr.idx < BITSET_WORD_BITS && (dfa->used_bkref_map @@ -2407,7 +2425,7 @@ transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx, const re_dfa_t *const dfa = mctx->dfa; re_node_set next_nodes; re_dfastate_t *next_state; - int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input); + Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input); unsigned int context; *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1); @@ -2415,7 +2433,7 @@ transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx, return NULL; for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt) { - int cur_node = state->nodes.elems[node_cnt]; + Idx cur_node = state->nodes.elems[node_cnt]; if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx)) { *err = re_node_set_merge (&next_nodes, @@ -2444,13 +2462,14 @@ transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate) { const re_dfa_t *const dfa = mctx->dfa; reg_errcode_t err; - int i; + Idx i; for (i = 0; i < pstate->nodes.nelem; ++i) { re_node_set dest_nodes, *new_nodes; - int cur_node_idx = pstate->nodes.elems[i]; - int naccepted, dest_idx; + Idx cur_node_idx = pstate->nodes.elems[i]; + int naccepted; + Idx dest_idx; unsigned int context; re_dfastate_t *dest_state; @@ -2473,7 +2492,7 @@ transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate) if (naccepted == 0) continue; - /* The node can accepts `naccepted' bytes. */ + /* The node can accepts 'naccepted' bytes. */ dest_idx = re_string_cur_idx (&mctx->input) + naccepted; mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted : mctx->max_mb_elem_len); @@ -2513,18 +2532,18 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) { const re_dfa_t *const dfa = mctx->dfa; reg_errcode_t err; - int i; - int cur_str_idx = re_string_cur_idx (&mctx->input); + Idx i; + Idx cur_str_idx = re_string_cur_idx (&mctx->input); for (i = 0; i < nodes->nelem; ++i) { - int dest_str_idx, prev_nelem, bkc_idx; - int node_idx = nodes->elems[i]; + Idx dest_str_idx, prev_nelem, bkc_idx; + Idx node_idx = nodes->elems[i]; unsigned int context; const re_token_t *node = dfa->nodes + node_idx; re_node_set *new_dest_nodes; - /* Check whether `node' is a backreference or not. */ + /* Check whether 'node' is a backreference or not. */ if (node->type != OP_BACK_REF) continue; @@ -2536,21 +2555,21 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) continue; } - /* `node' is a backreference. + /* 'node' is a backreference. Check the substring which the substring matched. */ bkc_idx = mctx->nbkref_ents; err = get_subexp (mctx, node_idx, cur_str_idx); if (BE (err != REG_NOERROR, 0)) goto free_return; - /* And add the epsilon closures (which is `new_dest_nodes') of + /* And add the epsilon closures (which is 'new_dest_nodes') of the backreference to appropriate state_log. */ #ifdef DEBUG assert (dfa->nexts[node_idx] != -1); #endif for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx) { - int subexp_len; + Idx subexp_len; re_dfastate_t *dest_state; struct re_backref_cache_entry *bkref_ent; bkref_ent = mctx->bkref_ents + bkc_idx; @@ -2567,7 +2586,7 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) dest_state = mctx->state_log[dest_str_idx]; prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0 : mctx->state_log[cur_str_idx]->nodes.nelem); - /* Add `new_dest_node' to state_log. */ + /* Add 'new_dest_node' to state_log. */ if (dest_state == NULL) { mctx->state_log[dest_str_idx] @@ -2623,13 +2642,13 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) static reg_errcode_t __attribute_warn_unused_result__ -get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx) +get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx) { const re_dfa_t *const dfa = mctx->dfa; - int subexp_num, sub_top_idx; + Idx subexp_num, sub_top_idx; const char *buf = (const char *) re_string_get_buffer (&mctx->input); /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */ - int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx); + Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx); if (cache_idx != -1) { const struct re_backref_cache_entry *entry @@ -2648,7 +2667,7 @@ get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx) reg_errcode_t err; re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx]; re_sub_match_last_t *sub_last; - int sub_last_idx, sl_str, bkref_str_off; + Idx sub_last_idx, sl_str, bkref_str_off; if (dfa->nodes[sub_top->node].opr.idx != subexp_num) continue; /* It isn't related. */ @@ -2659,7 +2678,7 @@ get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx) evaluated. */ for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx) { - int sl_str_diff; + regoff_t sl_str_diff; sub_last = sub_top->lasts[sub_last_idx]; sl_str_diff = sub_last->str_idx - sl_str; /* The matched string by the sub expression match with the substring @@ -2705,7 +2724,8 @@ get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx) /* Then, search for the other last nodes of the sub expression. */ for (; sl_str <= bkref_str_idx; ++sl_str) { - int cls_node, sl_str_off; + Idx cls_node; + regoff_t sl_str_off; const re_node_set *nodes; sl_str_off = sl_str - sub_top->str_idx; /* The matched string by the sub expression match with the substring @@ -2772,10 +2792,10 @@ get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx) static reg_errcode_t get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top, - re_sub_match_last_t *sub_last, int bkref_node, int bkref_str) + re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str) { reg_errcode_t err; - int to_idx; + Idx to_idx; /* Can the subexpression arrive the back reference? */ err = check_arrival (mctx, &sub_last->path, sub_last->node, sub_last->str_idx, bkref_node, bkref_str, @@ -2798,14 +2818,14 @@ get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top, nodes. E.g. RE: (a){2} */ -static int +static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, - int subexp_idx, int type) + Idx subexp_idx, int type) { - int cls_idx; + Idx cls_idx; for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx) { - int cls_node = nodes->elems[cls_idx]; + Idx cls_node = nodes->elems[cls_idx]; const re_token_t *node = dfa->nodes + cls_node; if (node->type == type && node->opr.idx == subexp_idx) @@ -2821,12 +2841,12 @@ find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, static reg_errcode_t __attribute_warn_unused_result__ -check_arrival (re_match_context_t *mctx, state_array_t *path, int top_node, - int top_str, int last_node, int last_str, int type) +check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node, + Idx top_str, Idx last_node, Idx last_str, int type) { const re_dfa_t *const dfa = mctx->dfa; reg_errcode_t err = REG_NOERROR; - int subexp_num, backup_cur_idx, str_idx, null_cnt; + Idx subexp_num, backup_cur_idx, str_idx, null_cnt; re_dfastate_t *cur_state = NULL; re_node_set *cur_nodes, next_nodes; re_dfastate_t **backup_state_log; @@ -2837,20 +2857,24 @@ check_arrival (re_match_context_t *mctx, state_array_t *path, int top_node, if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0)) { re_dfastate_t **new_array; - int old_alloc = path->alloc; - path->alloc += last_str + mctx->max_mb_elem_len + 1; - new_array = re_realloc (path->array, re_dfastate_t *, path->alloc); + Idx old_alloc = path->alloc; + Idx incr_alloc = last_str + mctx->max_mb_elem_len + 1; + Idx new_alloc; + if (BE (IDX_MAX - old_alloc < incr_alloc, 0)) + return REG_ESPACE; + new_alloc = old_alloc + incr_alloc; + if (BE (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc, 0)) + return REG_ESPACE; + new_array = re_realloc (path->array, re_dfastate_t *, new_alloc); if (BE (new_array == NULL, 0)) - { - path->alloc = old_alloc; - return REG_ESPACE; - } + return REG_ESPACE; path->array = new_array; + path->alloc = new_alloc; memset (new_array + old_alloc, '\0', sizeof (re_dfastate_t *) * (path->alloc - old_alloc)); } - str_idx = path->next_idx ?: top_str; + str_idx = path->next_idx ? path->next_idx : top_str; /* Temporary modify MCTX. */ backup_state_log = mctx->state_log; @@ -2982,12 +3006,12 @@ check_arrival (re_match_context_t *mctx, state_array_t *path, int top_node, static reg_errcode_t __attribute_warn_unused_result__ -check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx, +check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx, re_node_set *cur_nodes, re_node_set *next_nodes) { const re_dfa_t *const dfa = mctx->dfa; - int result; - int cur_idx; + bool ok; + Idx cur_idx; #ifdef RE_ENABLE_I18N reg_errcode_t err = REG_NOERROR; #endif @@ -2996,13 +3020,13 @@ check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx, for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx) { int naccepted = 0; - int cur_node = cur_nodes->elems[cur_idx]; + Idx cur_node = cur_nodes->elems[cur_idx]; #ifdef DEBUG re_token_type_t type = dfa->nodes[cur_node].type; assert (!IS_EPSILON_NODE (type)); #endif #ifdef RE_ENABLE_I18N - /* If the node may accept `multi byte'. */ + /* If the node may accept "multi byte". */ if (dfa->nodes[cur_node].accept_mb) { naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input, @@ -3010,8 +3034,8 @@ check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx, if (naccepted > 1) { re_dfastate_t *dest_state; - int next_node = dfa->nexts[cur_node]; - int next_idx = str_idx + naccepted; + Idx next_node = dfa->nexts[cur_node]; + Idx next_idx = str_idx + naccepted; dest_state = mctx->state_log[next_idx]; re_node_set_empty (&union_set); if (dest_state) @@ -3023,8 +3047,8 @@ check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx, return err; } } - result = re_node_set_insert (&union_set, next_node); - if (BE (result < 0, 0)) + ok = re_node_set_insert (&union_set, next_node); + if (BE (! ok, 0)) { re_node_set_free (&union_set); return REG_ESPACE; @@ -3043,8 +3067,8 @@ check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx, if (naccepted || check_node_accept (mctx, dfa->nodes + cur_node, str_idx)) { - result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]); - if (BE (result < 0, 0)) + ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]); + if (BE (! ok, 0)) { re_node_set_free (&union_set); return REG_ESPACE; @@ -3063,10 +3087,10 @@ check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx, static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes, - int ex_subexp, int type) + Idx ex_subexp, int type) { reg_errcode_t err; - int idx, outside_node; + Idx idx, outside_node; re_node_set new_nodes; #ifdef DEBUG assert (cur_nodes->nelem); @@ -3079,7 +3103,7 @@ check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes, for (idx = 0; idx < cur_nodes->nelem; ++idx) { - int cur_node = cur_nodes->elems[idx]; + Idx cur_node = cur_nodes->elems[idx]; const re_node_set *eclosure = dfa->eclosures + cur_node; outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type); if (outside_node == -1) @@ -3116,31 +3140,32 @@ check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes, static reg_errcode_t __attribute_warn_unused_result__ check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes, - int target, int ex_subexp, int type) + Idx target, Idx ex_subexp, int type) { - int cur_node; + Idx cur_node; for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);) { - int err; + bool ok; if (dfa->nodes[cur_node].type == type && dfa->nodes[cur_node].opr.idx == ex_subexp) { if (type == OP_CLOSE_SUBEXP) { - err = re_node_set_insert (dst_nodes, cur_node); - if (BE (err == -1, 0)) + ok = re_node_set_insert (dst_nodes, cur_node); + if (BE (! ok, 0)) return REG_ESPACE; } break; } - err = re_node_set_insert (dst_nodes, cur_node); - if (BE (err == -1, 0)) + ok = re_node_set_insert (dst_nodes, cur_node); + if (BE (! ok, 0)) return REG_ESPACE; if (dfa->edests[cur_node].nelem == 0) break; if (dfa->edests[cur_node].nelem == 2) { + reg_errcode_t err; err = check_arrival_expand_ecl_sub (dfa, dst_nodes, dfa->edests[cur_node].elems[1], ex_subexp, type); @@ -3160,11 +3185,11 @@ check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes, static reg_errcode_t __attribute_warn_unused_result__ expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes, - int cur_str, int subexp_num, int type) + Idx cur_str, Idx subexp_num, int type) { const re_dfa_t *const dfa = mctx->dfa; reg_errcode_t err; - int cache_idx_start = search_cur_bkref_entry (mctx, cur_str); + Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str); struct re_backref_cache_entry *ent; if (cache_idx_start == -1) @@ -3174,7 +3199,7 @@ expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes, ent = mctx->bkref_ents + cache_idx_start; do { - int to_idx, next_node; + Idx to_idx, next_node; /* Is this entry ENT is appropriate? */ if (!re_node_set_contains (cur_nodes, ent->node)) @@ -3212,14 +3237,14 @@ expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes, next_node = dfa->nexts[ent->node]; if (mctx->state_log[to_idx]) { - int ret; + bool ok; if (re_node_set_contains (&mctx->state_log[to_idx]->nodes, next_node)) continue; err = re_node_set_init_copy (&union_set, &mctx->state_log[to_idx]->nodes); - ret = re_node_set_insert (&union_set, next_node); - if (BE (err != REG_NOERROR || ret < 0, 0)) + ok = re_node_set_insert (&union_set, next_node); + if (BE (err != REG_NOERROR || ! ok, 0)) { re_node_set_free (&union_set); err = err != REG_NOERROR ? err : REG_ESPACE; @@ -3244,17 +3269,19 @@ expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes, } /* Build transition table for the state. - Return 1 if succeeded, otherwise return NULL. */ + Return true if successful. */ -static int +static bool build_trtable (const re_dfa_t *dfa, re_dfastate_t *state) { reg_errcode_t err; - int i, j, ch, need_word_trtable = 0; + Idx i, j; + int ch; + bool need_word_trtable = false; bitset_word_t elem, mask; bool dests_node_malloced = false; bool dest_states_malloced = false; - int ndests; /* Number of the destination states from `state'. */ + Idx ndests; /* Number of the destination states from 'state'. */ re_dfastate_t **trtable; re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl; re_node_set follows, *dests_node; @@ -3268,8 +3295,8 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state) } *dests_alloc; /* We build DFA states which corresponds to the destination nodes - from `state'. `dests_node[i]' represents the nodes which i-th - destination state contains, and `dests_ch[i]' represents the + from 'state'. 'dests_node[i]' represents the nodes which i-th + destination state contains, and 'dests_ch[i]' represents the characters which i-th destination state accepts. */ if (__libc_use_alloca (sizeof (struct dests_alloc))) dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc)); @@ -3277,32 +3304,32 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state) { dests_alloc = re_malloc (struct dests_alloc, 1); if (BE (dests_alloc == NULL, 0)) - return 0; + return false; dests_node_malloced = true; } dests_node = dests_alloc->dests_node; dests_ch = dests_alloc->dests_ch; - /* Initialize transiton table. */ + /* Initialize transition table. */ state->word_trtable = state->trtable = NULL; - /* At first, group all nodes belonging to `state' into several + /* At first, group all nodes belonging to 'state' into several destinations. */ ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch); if (BE (ndests <= 0, 0)) { if (dests_node_malloced) - free (dests_alloc); - /* Return 0 in case of an error, 1 otherwise. */ + re_free (dests_alloc); + /* Return false in case of an error, true otherwise. */ if (ndests == 0) { state->trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX); - if (BE (state->trtable == NULL, 0)) - return 0; - return 1; + if (BE (state->trtable == NULL, 0)) + return false; + return true; } - return 0; + return false; } err = re_node_set_alloc (&follows, ndests + 1); @@ -3322,19 +3349,18 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state) alloca (ndests * 3 * sizeof (re_dfastate_t *)); else { - dest_states = (re_dfastate_t **) - malloc (ndests * 3 * sizeof (re_dfastate_t *)); + dest_states = re_malloc (re_dfastate_t *, ndests * 3); if (BE (dest_states == NULL, 0)) { out_free: if (dest_states_malloced) - free (dest_states); + re_free (dest_states); re_node_set_free (&follows); for (i = 0; i < ndests; ++i) re_node_set_free (dests_node + i); if (dests_node_malloced) - free (dests_alloc); - return 0; + re_free (dests_alloc); + return false; } dest_states_malloced = true; } @@ -3345,7 +3371,7 @@ out_free: /* Then build the states for all destinations. */ for (i = 0; i < ndests; ++i) { - int next_node; + Idx next_node; re_node_set_empty (&follows); /* Merge the follows of this destination states. */ for (j = 0; j < dests_node[i].nelem; ++j) @@ -3371,13 +3397,13 @@ out_free: goto out_free; if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1) - need_word_trtable = 1; + need_word_trtable = true; dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows, CONTEXT_NEWLINE); if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0)) goto out_free; - } + } else { dest_states_word[i] = dest_states[i]; @@ -3464,16 +3490,16 @@ out_free: } if (dest_states_malloced) - free (dest_states); + re_free (dest_states); re_node_set_free (&follows); for (i = 0; i < ndests; ++i) re_node_set_free (dests_node + i); if (dests_node_malloced) - free (dests_alloc); + re_free (dests_alloc); - return 1; + return true; } /* Group all nodes belonging to STATE into several destinations. @@ -3481,20 +3507,20 @@ out_free: to DESTS_NODE[i] and set the characters accepted by the destination to DEST_CH[i]. This function return the number of destinations. */ -static int +static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, re_node_set *dests_node, bitset_t *dests_ch) { reg_errcode_t err; - int result; - int i, j, k; - int ndests; /* Number of the destinations from `state'. */ + bool ok; + Idx i, j, k; + Idx ndests; /* Number of the destinations from 'state'. */ bitset_t accepts; /* Characters a node can accept. */ const re_node_set *cur_nodes = &state->nodes; bitset_empty (accepts); ndests = 0; - /* For all the nodes belonging to `state', */ + /* For all the nodes belonging to 'state', */ for (i = 0; i < cur_nodes->nelem; ++i) { re_token_t *node = &dfa->nodes[cur_nodes->elems[i]]; @@ -3524,7 +3550,10 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, #ifdef RE_ENABLE_I18N else if (type == OP_UTF8_PERIOD) { - memset (accepts, '\xff', sizeof (bitset_t) / 2); + if (ASCII_CHARS % BITSET_WORD_BITS == 0) + memset (accepts, -1, ASCII_CHARS / CHAR_BIT); + else + bitset_merge (accepts, utf8_sb_map); if (!(dfa->syntax & RE_DOT_NEWLINE)) bitset_clear (accepts, '\n'); if (dfa->syntax & RE_DOT_NOT_NULL) @@ -3534,7 +3563,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, else continue; - /* Check the `accepts' and sift the characters which are not + /* Check the 'accepts' and sift the characters which are not match it the context. */ if (constraint) { @@ -3593,7 +3622,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, } } - /* Then divide `accepts' into DFA states, or create a new + /* Then divide 'accepts' into DFA states, or create a new state. Above, we make sure that accepts is not empty. */ for (j = 0; j < ndests; ++j) { @@ -3606,7 +3635,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c)) continue; - /* Enumerate the intersection set of this state and `accepts'. */ + /* Enumerate the intersection set of this state and 'accepts'. */ has_intersec = 0; for (k = 0; k < BITSET_WORDS; ++k) has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k]; @@ -3614,7 +3643,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, if (!has_intersec) continue; - /* Then check if this state is a subset of `accepts'. */ + /* Then check if this state is a subset of 'accepts'. */ not_subset = not_consumed = 0; for (k = 0; k < BITSET_WORDS; ++k) { @@ -3622,8 +3651,8 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k]; } - /* If this state isn't a subset of `accepts', create a - new group state, which has the `remains'. */ + /* If this state isn't a subset of 'accepts', create a + new group state, which has the 'remains'. */ if (not_subset) { bitset_copy (dests_ch[ndests], remains); @@ -3635,8 +3664,8 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, } /* Put the position in the current group. */ - result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]); - if (BE (result < 0, 0)) + ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]); + if (BE (! ok, 0)) goto error_return; /* If all characters are consumed, go to next node. */ @@ -3662,7 +3691,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, } #ifdef RE_ENABLE_I18N -/* Check how many bytes the node `dfa->nodes[node_idx]' accepts. +/* Check how many bytes the node 'dfa->nodes[node_idx]' accepts. Return the number of the bytes the node accepts. STR_IDX is the current index of the input string. @@ -3675,12 +3704,12 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, # endif static int -check_node_accept_bytes (const re_dfa_t *dfa, int node_idx, - const re_string_t *input, int str_idx) +check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, + const re_string_t *input, Idx str_idx) { const re_token_t *node = dfa->nodes + node_idx; int char_len, elem_len; - int i; + Idx i; if (BE (node->type == OP_UTF8_PERIOD, 0)) { @@ -3759,7 +3788,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, int node_idx, # ifdef _LIBC const unsigned char *pin = ((const unsigned char *) re_string_get_buffer (input) + str_idx); - int j; + Idx j; uint32_t nrules; # endif /* _LIBC */ int match_len = 0; @@ -3827,6 +3856,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, int node_idx, in_collseq = find_collation_sequence_value (pin, elem_len); } /* match with range expression? */ + /* FIXME: Implement rational ranges here, too. */ for (i = 0; i < cset->nranges; ++i) if (cset->range_starts[i] <= in_collseq && in_collseq <= cset->range_ends[i]) @@ -3856,7 +3886,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, int node_idx, if (weight_len == weights[equiv_class_idx & 0xffffff] && (idx >> 24) == (equiv_class_idx >> 24)) { - int cnt = 0; + Idx cnt = 0; idx &= 0xffffff; equiv_class_idx &= 0xffffff; @@ -3878,18 +3908,9 @@ check_node_accept_bytes (const re_dfa_t *dfa, int node_idx, # endif /* _LIBC */ { /* match with range expression? */ -#if __GNUC__ >= 2 - wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'}; -#else - wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; - cmp_buf[2] = wc; -#endif for (i = 0; i < cset->nranges; ++i) { - cmp_buf[0] = cset->range_starts[i]; - cmp_buf[4] = cset->range_ends[i]; - if (__wcscoll (cmp_buf, cmp_buf + 2) <= 0 - && __wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) + if (cset->range_starts[i] <= wc && wc <= cset->range_ends[i]) { match_len = char_len; goto check_node_accept_bytes_match; @@ -3936,7 +3957,8 @@ find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len) for (idx = 0; idx < extrasize;) { - int mbs_cnt, found = 0; + int mbs_cnt; + bool found = false; int32_t elem_mbs_len; /* Skip the name of collating element name. */ idx = idx + extra[idx] + 1; @@ -3948,7 +3970,7 @@ find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len) break; if (mbs_cnt == elem_mbs_len) /* Found the entry. */ - found = 1; + found = true; } /* Skip the byte sequence of the collating element. */ idx += elem_mbs_len; @@ -3973,9 +3995,9 @@ find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len) /* Check whether the node accepts the byte which is IDX-th byte of the INPUT. */ -static int +static bool check_node_accept (const re_match_context_t *mctx, const re_token_t *node, - int idx) + Idx idx) { unsigned char ch; ch = re_string_byte_at (&mctx->input, idx); @@ -3983,28 +4005,28 @@ check_node_accept (const re_match_context_t *mctx, const re_token_t *node, { case CHARACTER: if (node->opr.c != ch) - return 0; + return false; break; case SIMPLE_BRACKET: if (!bitset_contain (node->opr.sbcset, ch)) - return 0; + return false; break; #ifdef RE_ENABLE_I18N case OP_UTF8_PERIOD: - if (ch >= 0x80) - return 0; - /* FALLTHROUGH */ + if (ch >= ASCII_CHARS) + return false; + FALLTHROUGH; #endif case OP_PERIOD: if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE)) || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL))) - return 0; + return false; break; default: - return 0; + return false; } if (node->constraint) @@ -4014,10 +4036,10 @@ check_node_accept (const re_match_context_t *mctx, const re_token_t *node, unsigned int context = re_string_context_at (&mctx->input, idx, mctx->eflags); if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context)) - return 0; + return false; } - return 1; + return true; } /* Extend the buffers, if the buffers have run out. */ @@ -4030,10 +4052,11 @@ extend_buffers (re_match_context_t *mctx, int min_len) re_string_t *pstr = &mctx->input; /* Avoid overflow. */ - if (BE (INT_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0)) + if (BE (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) / 2 + <= pstr->bufs_len, 0)) return REG_ESPACE; - /* Double the lengthes of the buffers, but allocate at least MIN_LEN. */ + /* Double the lengths of the buffers, but allocate at least MIN_LEN. */ ret = re_string_realloc_buffers (pstr, MAX (min_len, MIN (pstr->len, pstr->bufs_len * 2))); @@ -4089,12 +4112,19 @@ extend_buffers (re_match_context_t *mctx, int min_len) static reg_errcode_t __attribute_warn_unused_result__ -match_ctx_init (re_match_context_t *mctx, int eflags, int n) +match_ctx_init (re_match_context_t *mctx, int eflags, Idx n) { mctx->eflags = eflags; mctx->match_last = -1; if (n > 0) { + /* Avoid overflow. */ + size_t max_object_size = + MAX (sizeof (struct re_backref_cache_entry), + sizeof (re_sub_match_top_t *)); + if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n, 0)) + return REG_ESPACE; + mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n); mctx->sub_tops = re_malloc (re_sub_match_top_t *, n); if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0)) @@ -4118,10 +4148,10 @@ match_ctx_init (re_match_context_t *mctx, int eflags, int n) static void match_ctx_clean (re_match_context_t *mctx) { - int st_idx; + Idx st_idx; for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx) { - int sl_idx; + Idx sl_idx; re_sub_match_top_t *top = mctx->sub_tops[st_idx]; for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx) { @@ -4135,7 +4165,7 @@ match_ctx_clean (re_match_context_t *mctx) re_free (top->path->array); re_free (top->path); } - free (top); + re_free (top); } mctx->nsub_tops = 0; @@ -4160,8 +4190,8 @@ match_ctx_free (re_match_context_t *mctx) static reg_errcode_t __attribute_warn_unused_result__ -match_ctx_add_entry (re_match_context_t *mctx, int node, int str_idx, int from, - int to) +match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from, + Idx to) { if (mctx->nbkref_ents >= mctx->abkref_ents) { @@ -4196,7 +4226,7 @@ match_ctx_add_entry (re_match_context_t *mctx, int node, int str_idx, int from, A backreference does not epsilon-transition unless it is empty, so set to all zeros if FROM != TO. */ mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map - = (from == to ? ~0 : 0); + = (from == to ? -1 : 0); mctx->bkref_ents[mctx->nbkref_ents++].more = 0; if (mctx->max_mb_elem_len < to - from) @@ -4204,13 +4234,13 @@ match_ctx_add_entry (re_match_context_t *mctx, int node, int str_idx, int from, return REG_NOERROR; } -/* Search for the first entry which has the same str_idx, or -1 if none is +/* Return the first entry with the same str_idx, or -1 if none is found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */ -static int -search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx) +static Idx +search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx) { - int left, right, mid, last; + Idx left, right, mid, last; last = right = mctx->nbkref_ents; for (left = 0; left < right;) { @@ -4231,7 +4261,7 @@ search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx) static reg_errcode_t __attribute_warn_unused_result__ -match_ctx_add_subtop (re_match_context_t *mctx, int node, int str_idx) +match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx) { #ifdef DEBUG assert (mctx->sub_tops != NULL); @@ -4239,7 +4269,7 @@ match_ctx_add_subtop (re_match_context_t *mctx, int node, int str_idx) #endif if (BE (mctx->nsub_tops == mctx->asub_tops, 0)) { - int new_asub_tops = mctx->asub_tops * 2; + Idx new_asub_tops = mctx->asub_tops * 2; re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops, re_sub_match_top_t *, new_asub_tops); @@ -4260,12 +4290,12 @@ match_ctx_add_subtop (re_match_context_t *mctx, int node, int str_idx) at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */ static re_sub_match_last_t * -match_ctx_add_sublast (re_sub_match_top_t *subtop, int node, int str_idx) +match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx) { re_sub_match_last_t *new_entry; if (BE (subtop->nlasts == subtop->alasts, 0)) { - int new_alasts = 2 * subtop->alasts + 1; + Idx new_alasts = 2 * subtop->alasts + 1; re_sub_match_last_t **new_array = re_realloc (subtop->lasts, re_sub_match_last_t *, new_alasts); @@ -4287,7 +4317,7 @@ match_ctx_add_sublast (re_sub_match_top_t *subtop, int node, int str_idx) static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts, - re_dfastate_t **limited_sts, int last_node, int last_str_idx) + re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx) { sctx->sifted_states = sifted_sts; sctx->limited_states = limited_sts; |