diff options
author | Ulrich Drepper <drepper@redhat.com> | 2002-11-06 19:57:50 +0000 |
---|---|---|
committer | Ulrich Drepper <drepper@redhat.com> | 2002-11-06 19:57:50 +0000 |
commit | 1b2c2628354003bd97af125a164e830c9d295e3a (patch) | |
tree | 32abaefed6d40cf6c559bcd6e0b47016419e7dc3 | |
parent | b14993a2356cd7b8c3f74ca2b8e743c056f9401d (diff) | |
download | glibc-1b2c2628354003bd97af125a164e830c9d295e3a.tar.gz glibc-1b2c2628354003bd97af125a164e830c9d295e3a.tar.xz glibc-1b2c2628354003bd97af125a164e830c9d295e3a.zip |
Update.
2002-11-06 Jakub Jelinek <jakub@redhat.com> * posix/regcomp.c (re_compile_pattern): Don't set regs_allocated here. (regcomp): Don't set can_be_null here. (re_comp): Clear whole re_comp_buf with the exception of fastmap. (re_compile_internal): Clear can_be_null, set regs_allocated. * posix/regcomp.c (re_set_fastmap): New function. (re_compile_fastmap_iter): Use it. Remove redundant type == COMPLEX_BRACKET check. * posix/regexec.c (re_search_internal): Optimize searching with fastmap. Call re_string_reconstruct even if match_first is smaller than raw_mbs_idx. 2002-11-06 Isamu Hasegawa <isamu@yamato.ibm.com> * posix/regcomp (free_dfa_content): Use free_state. * posix/regex_internal.c (re_string_realloc_buffers): Don't edit pointers in case that realloc failed. (re_node_set_merge): Likewise. (register_state): Likewise. (create_newstate_common): Invoke memory release functions in case of error conditions. (create_ci_newstate): Likewise. (create_cd_newstate): Likewise. (free_state): New function. * posix/regexec.c (re_search_internal): Invoke memory release functions in case of error conditions. (sift_states_backward): Likewise. (merge_state_array): Likewise. (add_epsilon_src_nodes): Likewise. (sub_epsilon_src_nodes): Likewise. (search_subexp): Likewise. (sift_states_bkref): Likewise. (transit_state_sb): Likewise. (transit_state_mb): Likewise. (transit_state_bkref_loop): Likewise. (group_nodes_into_DFAstates): Likewise. (push_fail_stack): Don't edit pointers in case that realloc failed. (extend_buffers): Likewise. (match_ctx_add_entry): Likewise.
-rw-r--r-- | ChangeLog | 43 | ||||
-rw-r--r-- | posix/regcomp.c | 35 | ||||
-rw-r--r-- | posix/regex_internal.c | 607 | ||||
-rw-r--r-- | posix/regexec.c | 362 |
4 files changed, 624 insertions, 423 deletions
diff --git a/ChangeLog b/ChangeLog index 07bfd50665..40b5e138d6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,46 @@ +2002-11-06 Jakub Jelinek <jakub@redhat.com> + + * posix/regcomp.c (re_compile_pattern): Don't set regs_allocated + here. + (regcomp): Don't set can_be_null here. + (re_comp): Clear whole re_comp_buf with the exception of fastmap. + (re_compile_internal): Clear can_be_null, set regs_allocated. + + * posix/regcomp.c (re_set_fastmap): New function. + (re_compile_fastmap_iter): Use it. Remove redundant type == + COMPLEX_BRACKET check. + * posix/regexec.c (re_search_internal): Optimize searching with + fastmap. Call re_string_reconstruct even if match_first is + smaller than raw_mbs_idx. + +2002-11-06 Isamu Hasegawa <isamu@yamato.ibm.com> + + * posix/regcomp (free_dfa_content): Use free_state. + * posix/regex_internal.c (re_string_realloc_buffers): Don't edit + pointers in case that realloc failed. + (re_node_set_merge): Likewise. + (register_state): Likewise. + (create_newstate_common): Invoke memory release functions in case of + error conditions. + (create_ci_newstate): Likewise. + (create_cd_newstate): Likewise. + (free_state): New function. + * posix/regexec.c (re_search_internal): Invoke memory release + functions in case of error conditions. + (sift_states_backward): Likewise. + (merge_state_array): Likewise. + (add_epsilon_src_nodes): Likewise. + (sub_epsilon_src_nodes): Likewise. + (search_subexp): Likewise. + (sift_states_bkref): Likewise. + (transit_state_sb): Likewise. + (transit_state_mb): Likewise. + (transit_state_bkref_loop): Likewise. + (group_nodes_into_DFAstates): Likewise. + (push_fail_stack): Don't edit pointers in case that realloc failed. + (extend_buffers): Likewise. + (match_ctx_add_entry): Likewise. + 2002-11-06 Roland McGrath <roland@redhat.com> * configure.in: Add checks on as and ld for binutils 2.13 or later. diff --git a/posix/regcomp.c b/posix/regcomp.c index 3cf07b17d4..03d5cfce14 100644 --- a/posix/regcomp.c +++ b/posix/regcomp.c @@ -267,10 +267,6 @@ re_compile_pattern (pattern, length, bufp) { reg_errcode_t ret; - /* GNU code is written to assume at least RE_NREGS registers will be set - (and at least one extra will be -1). */ - bufp->regs_allocated = REGS_UNALLOCATED; - /* And GNU code determines whether or not to get register information by passing null for the REGS argument to re_match, etc., not by setting no_sub. */ @@ -339,6 +335,14 @@ re_compile_fastmap (bufp) weak_alias (__re_compile_fastmap, re_compile_fastmap) #endif +static inline void +re_set_fastmap (char *fastmap, int icase, int ch) +{ + fastmap[ch] = 1; + if (icase) + fastmap[tolower (ch)] = 1; +} + /* Helper function for re_compile_fastmap. Compile fastmap for the initial_state INIT_STATE. */ @@ -350,20 +354,21 @@ re_compile_fastmap_iter (bufp, init_state, fastmap) { re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; int node_cnt; + int icase = (MB_CUR_MAX == 1 && (bufp->syntax & RE_ICASE)); for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt) { int node = init_state->nodes.elems[node_cnt]; re_token_type_t type = dfa->nodes[node].type; if (type == CHARACTER) - fastmap[dfa->nodes[node].opr.c] = 1; + re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c); else if (type == SIMPLE_BRACKET) { int i, j, ch; for (i = 0, ch = 0; i < BITSET_UINTS; ++i) for (j = 0; j < UINT_BITS; ++j, ++ch) if (dfa->nodes[node].opr.sbcset[i] & (1 << j)) - fastmap[ch] = 1; + re_set_fastmap (fastmap, icase, ch); } #ifdef RE_ENABLE_I18N else if (type == COMPLEX_BRACKET) @@ -388,28 +393,24 @@ re_compile_fastmap_iter (bufp, init_state, fastmap) for (i = 0, ch = 0; i < BITSET_UINTS; ++i) for (j = 0; j < UINT_BITS; ++j, ++ch) if (table[ch] < 0) - fastmap[ch] = 1; + re_set_fastmap (fastmap, icase, ch); } # else if (MB_CUR_MAX > 1) for (i = 0; i < SBC_MAX; ++i) if (__btowc (i) == WEOF) - fastmap[i] = 1; + re_set_fastmap (fastmap, icase, i); # endif /* not _LIBC */ } for (i = 0; i < cset->nmbchars; ++i) { char buf[256]; wctomb (buf, cset->mbchars[i]); - fastmap[*(unsigned char *) buf] = 1; + re_set_fastmap (fastmap, icase, *(unsigned char *) buf); } } #endif /* RE_ENABLE_I18N */ - else if (type == END_OF_RE || type == OP_PERIOD -#ifdef RE_ENABLE_I18N - || type == COMPLEX_BRACKET -#endif /* RE_ENABLE_I18N */ - ) + else if (type == END_OF_RE || type == OP_PERIOD) { memset (fastmap, '\1', sizeof (char) * SBC_MAX); if (type == END_OF_RE) @@ -468,7 +469,6 @@ regcomp (preg, pattern, cflags) preg->buffer = NULL; preg->allocated = 0; preg->used = 0; - preg->can_be_null = 0; /* Try to allocate space for the fastmap. */ preg->fastmap = re_malloc (char, SBC_MAX); @@ -667,8 +667,7 @@ re_comp (s) fastmap = re_comp_buf.fastmap; re_comp_buf.fastmap = NULL; __regfree (&re_comp_buf); - re_comp_buf.buffer = NULL; - re_comp_buf.allocated = 0; + memset (&re_comp_buf, '\0', sizeof (re_comp_buf)); re_comp_buf.fastmap = fastmap; } @@ -725,6 +724,8 @@ re_compile_internal (preg, pattern, length, syntax) preg->not_bol = preg->not_eol = 0; preg->used = 0; preg->re_nsub = 0; + preg->can_be_null = 0; + preg->regs_allocated = REGS_UNALLOCATED; /* Initialize the dfa. */ dfa = (re_dfa_t *) preg->buffer; diff --git a/posix/regex_internal.c b/posix/regex_internal.c index 69fb9a65f5..74c8e8b7f6 100644 --- a/posix/regex_internal.c +++ b/posix/regex_internal.c @@ -63,25 +63,25 @@ #include "regex_internal.h" static void re_string_construct_common (const char *str, int len, - re_string_t *pstr, - RE_TRANSLATE_TYPE trans, int icase); + re_string_t *pstr, + RE_TRANSLATE_TYPE trans, int icase); #ifdef RE_ENABLE_I18N static int re_string_skip_chars (re_string_t *pstr, int new_raw_idx); #endif /* RE_ENABLE_I18N */ static re_dfastate_t *create_newstate_common (re_dfa_t *dfa, - const re_node_set *nodes, - unsigned int hash); + const re_node_set *nodes, + unsigned int hash); static reg_errcode_t register_state (re_dfa_t *dfa, re_dfastate_t *newstate, - unsigned int hash); + unsigned int hash); static re_dfastate_t *create_ci_newstate (re_dfa_t *dfa, - const re_node_set *nodes, - unsigned int hash); + const re_node_set *nodes, + unsigned int hash); static re_dfastate_t *create_cd_newstate (re_dfa_t *dfa, - const re_node_set *nodes, - unsigned int context, - unsigned int hash); + const re_node_set *nodes, + unsigned int context, + unsigned int hash); static unsigned int inline calc_state_hash (const re_node_set *nodes, - unsigned int context); + unsigned int context); /* Functions for string operation. */ @@ -105,10 +105,10 @@ re_string_allocate (pstr, str, len, init_len, trans, icase) return ret; pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case - : (unsigned char *) str); + : (unsigned char *) str); pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case; pstr->valid_len = (MBS_CASE_ALLOCATED (pstr) || MBS_ALLOCATED (pstr) - || MB_CUR_MAX > 1) ? pstr->valid_len : len; + || MB_CUR_MAX > 1) ? pstr->valid_len : len; return REG_NOERROR; } @@ -131,34 +131,34 @@ re_string_construct (pstr, str, len, trans, icase) { ret = re_string_realloc_buffers (pstr, len + 1); if (BE (ret != REG_NOERROR, 0)) - return ret; + return ret; } pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case - : (unsigned char *) str); + : (unsigned char *) str); pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case; if (icase) { #ifdef RE_ENABLE_I18N if (MB_CUR_MAX > 1) - build_wcs_upper_buffer (pstr); + build_wcs_upper_buffer (pstr); else #endif /* RE_ENABLE_I18N */ - build_upper_buffer (pstr); + build_upper_buffer (pstr); } else { #ifdef RE_ENABLE_I18N if (MB_CUR_MAX > 1) - build_wcs_buffer (pstr); + build_wcs_buffer (pstr); else #endif /* RE_ENABLE_I18N */ - { - if (trans != NULL) - re_string_translate_buffer (pstr); - else - pstr->valid_len = len; - } + { + if (trans != NULL) + re_string_translate_buffer (pstr); + else + pstr->valid_len = len; + } } /* Initialized whole buffers, then valid_len == bufs_len. */ @@ -176,24 +176,29 @@ re_string_realloc_buffers (pstr, new_buf_len) #ifdef RE_ENABLE_I18N if (MB_CUR_MAX > 1) { - pstr->wcs = re_realloc (pstr->wcs, wint_t, new_buf_len); - if (BE (pstr->wcs == NULL, 0)) - return REG_ESPACE; + wint_t *new_array = re_realloc (pstr->wcs, wint_t, new_buf_len); + if (BE (new_array == NULL, 0)) + return REG_ESPACE; + pstr->wcs = new_array; } #endif /* RE_ENABLE_I18N */ if (MBS_ALLOCATED (pstr)) { - pstr->mbs = re_realloc (pstr->mbs, unsigned char, new_buf_len); - if (BE (pstr->mbs == NULL, 0)) - return REG_ESPACE; + unsigned char *new_array = re_realloc (pstr->mbs, unsigned char, + new_buf_len); + if (BE (new_array == NULL, 0)) + return REG_ESPACE; + pstr->mbs = new_array; } if (MBS_CASE_ALLOCATED (pstr)) { - pstr->mbs_case = re_realloc (pstr->mbs_case, unsigned char, new_buf_len); - if (BE (pstr->mbs_case == NULL, 0)) - return REG_ESPACE; + unsigned char *new_array = re_realloc (pstr->mbs_case, unsigned char, + new_buf_len); + if (BE (new_array == NULL, 0)) + return REG_ESPACE; + pstr->mbs_case = new_array; if (!MBS_ALLOCATED (pstr)) - pstr->mbs = pstr->mbs_case; + pstr->mbs = pstr->mbs_case; } pstr->bufs_len = new_buf_len; return REG_NOERROR; @@ -243,32 +248,32 @@ build_wcs_buffer (pstr) remain_len = end_idx - byte_idx; prev_st = pstr->cur_state; mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx - + byte_idx), remain_len, &pstr->cur_state); + + byte_idx), remain_len, &pstr->cur_state); if (BE (mbclen == (size_t) -2, 0)) - { - /* The buffer doesn't have enough space, finish to build. */ - pstr->cur_state = prev_st; - break; - } + { + /* The buffer doesn't have enough space, finish to build. */ + pstr->cur_state = prev_st; + break; + } else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0)) - { - /* We treat these cases as a singlebyte character. */ - mbclen = 1; - wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; - pstr->cur_state = prev_st; - } + { + /* We treat these cases as a singlebyte character. */ + mbclen = 1; + wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; + pstr->cur_state = prev_st; + } /* Apply the translateion if we need. */ if (pstr->trans != NULL && mbclen == 1) - { - int ch = pstr->trans[pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]]; - pstr->mbs_case[byte_idx] = ch; - } + { + int ch = pstr->trans[pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]]; + pstr->mbs_case[byte_idx] = ch; + } /* Write wide character and padding. */ pstr->wcs[byte_idx++] = wc; /* Write paddings. */ for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) - pstr->wcs[byte_idx++] = WEOF; + pstr->wcs[byte_idx++] = WEOF; } pstr->valid_len = byte_idx; } @@ -291,40 +296,40 @@ build_wcs_upper_buffer (pstr) remain_len = end_idx - byte_idx; prev_st = pstr->cur_state; mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx - + byte_idx), remain_len, &pstr->cur_state); + + byte_idx), remain_len, &pstr->cur_state); if (BE (mbclen == (size_t) -2, 0)) - { - /* The buffer doesn't have enough space, finish to build. */ - pstr->cur_state = prev_st; - break; - } + { + /* The buffer doesn't have enough space, finish to build. */ + pstr->cur_state = prev_st; + break; + } else if (mbclen == 1 || mbclen == (size_t) -1 || mbclen == 0) - { - /* In case of a singlebyte character. */ - int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; - /* Apply the translateion if we need. */ - if (pstr->trans != NULL && mbclen == 1) - { - ch = pstr->trans[ch]; - pstr->mbs_case[byte_idx] = ch; - } - pstr->wcs[byte_idx] = iswlower (wc) ? toupper (wc) : wc; - pstr->mbs[byte_idx++] = islower (ch) ? toupper (ch) : ch; - if (BE (mbclen == (size_t) -1, 0)) - pstr->cur_state = prev_st; - } + { + /* In case of a singlebyte character. */ + int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; + /* Apply the translateion if we need. */ + if (pstr->trans != NULL && mbclen == 1) + { + ch = pstr->trans[ch]; + pstr->mbs_case[byte_idx] = ch; + } + pstr->wcs[byte_idx] = iswlower (wc) ? toupper (wc) : wc; + pstr->mbs[byte_idx++] = islower (ch) ? toupper (ch) : ch; + if (BE (mbclen == (size_t) -1, 0)) + pstr->cur_state = prev_st; + } else /* mbclen > 1 */ - { - if (iswlower (wc)) - wcrtomb ((char *) pstr->mbs + byte_idx, towupper (wc), &prev_st); - else - memcpy (pstr->mbs + byte_idx, - pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen); - pstr->wcs[byte_idx++] = iswlower (wc) ? toupper (wc) : wc; - /* Write paddings. */ - for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) - pstr->wcs[byte_idx++] = WEOF; - } + { + if (iswlower (wc)) + wcrtomb ((char *) pstr->mbs + byte_idx, towupper (wc), &prev_st); + else + memcpy (pstr->mbs + byte_idx, + pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen); + pstr->wcs[byte_idx++] = iswlower (wc) ? toupper (wc) : wc; + /* Write paddings. */ + for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) + pstr->wcs[byte_idx++] = WEOF; + } } pstr->valid_len = byte_idx; } @@ -347,13 +352,13 @@ re_string_skip_chars (pstr, new_raw_idx) int remain_len = pstr->len - rawbuf_idx; prev_st = pstr->cur_state; mbclen = mbrlen ((const char *) pstr->raw_mbs + rawbuf_idx, remain_len, - &pstr->cur_state); + &pstr->cur_state); if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0)) - { - /* We treat these cases as a singlebyte character. */ - mbclen = 1; - pstr->cur_state = prev_st; - } + { + /* We treat these cases as a singlebyte character. */ + mbclen = 1; + pstr->cur_state = prev_st; + } /* Then proceed the next character. */ rawbuf_idx += mbclen; } @@ -361,7 +366,7 @@ re_string_skip_chars (pstr, new_raw_idx) } #endif /* RE_ENABLE_I18N */ -/* Build the buffer PSTR->MBS, and apply the translation if we need. +/* Build the buffer PSTR->MBS, and apply the translation if we need. This function is used in case of REG_ICASE. */ static void @@ -375,14 +380,14 @@ build_upper_buffer (pstr) { int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx]; if (pstr->trans != NULL) - { - ch = pstr->trans[ch]; - pstr->mbs_case[char_idx] = ch; - } + { + ch = pstr->trans[ch]; + pstr->mbs_case[char_idx] = ch; + } if (islower (ch)) - pstr->mbs[char_idx] = toupper (ch); + pstr->mbs[char_idx] = toupper (ch); else - pstr->mbs[char_idx] = ch; + pstr->mbs[char_idx] = ch; } pstr->valid_len = char_idx; } @@ -420,65 +425,65 @@ re_string_reconstruct (pstr, idx, eflags, newline) /* Reset buffer. */ #ifdef RE_ENABLE_I18N if (MB_CUR_MAX > 1) - memset (&pstr->cur_state, '\0', sizeof (mbstate_t)); + memset (&pstr->cur_state, '\0', sizeof (mbstate_t)); #endif /* RE_ENABLE_I18N */ pstr->len += pstr->raw_mbs_idx; pstr->stop += pstr->raw_mbs_idx; pstr->valid_len = pstr->raw_mbs_idx = 0; pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF - : CONTEXT_NEWLINE | CONTEXT_BEGBUF); + : CONTEXT_NEWLINE | CONTEXT_BEGBUF); if (!MBS_CASE_ALLOCATED (pstr)) - pstr->mbs_case = (unsigned char *) pstr->raw_mbs; + pstr->mbs_case = (unsigned char *) pstr->raw_mbs; if (!MBS_ALLOCATED (pstr) && !MBS_CASE_ALLOCATED (pstr)) - pstr->mbs = (unsigned char *) pstr->raw_mbs; + pstr->mbs = (unsigned char *) pstr->raw_mbs; offset = idx; } if (offset != 0) { pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags, - newline); + newline); /* Are the characters which are already checked remain? */ if (offset < pstr->valid_len) - { - /* Yes, move them to the front of the buffer. */ + { + /* Yes, move them to the front of the buffer. */ #ifdef RE_ENABLE_I18N - if (MB_CUR_MAX > 1) - memmove (pstr->wcs, pstr->wcs + offset, - (pstr->valid_len - offset) * sizeof (wint_t)); + if (MB_CUR_MAX > 1) + memmove (pstr->wcs, pstr->wcs + offset, + (pstr->valid_len - offset) * sizeof (wint_t)); #endif /* RE_ENABLE_I18N */ - if (MBS_ALLOCATED (pstr)) - memmove (pstr->mbs, pstr->mbs + offset, - pstr->valid_len - offset); - if (MBS_CASE_ALLOCATED (pstr)) - memmove (pstr->mbs_case, pstr->mbs_case + offset, - pstr->valid_len - offset); - pstr->valid_len -= offset; + if (MBS_ALLOCATED (pstr)) + memmove (pstr->mbs, pstr->mbs + offset, + pstr->valid_len - offset); + if (MBS_CASE_ALLOCATED (pstr)) + memmove (pstr->mbs_case, pstr->mbs_case + offset, + pstr->valid_len - offset); + pstr->valid_len -= offset; #if DEBUG - assert (pstr->valid_len > 0); + assert (pstr->valid_len > 0); #endif - } + } else - { - /* No, skip all characters until IDX. */ - pstr->valid_len = 0; + { + /* No, skip all characters until IDX. */ + pstr->valid_len = 0; #ifdef RE_ENABLE_I18N - if (MB_CUR_MAX > 1) - { - int wcs_idx; - pstr->valid_len = re_string_skip_chars (pstr, idx) - idx; - for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx) - pstr->wcs[wcs_idx] = WEOF; - } + if (MB_CUR_MAX > 1) + { + int wcs_idx; + pstr->valid_len = re_string_skip_chars (pstr, idx) - idx; + for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx) + pstr->wcs[wcs_idx] = WEOF; + } #endif /* RE_ENABLE_I18N */ - } + } if (!MBS_CASE_ALLOCATED (pstr)) - { - pstr->mbs_case += offset; - /* In case of !MBS_ALLOCATED && !MBS_CASE_ALLOCATED. */ - if (!MBS_ALLOCATED (pstr)) - pstr->mbs += offset; - } + { + pstr->mbs_case += offset; + /* In case of !MBS_ALLOCATED && !MBS_CASE_ALLOCATED. */ + if (!MBS_ALLOCATED (pstr)) + pstr->mbs += offset; + } } pstr->raw_mbs_idx = idx; pstr->len -= offset; @@ -489,17 +494,17 @@ re_string_reconstruct (pstr, idx, eflags, newline) if (MB_CUR_MAX > 1) { if (pstr->icase) - build_wcs_upper_buffer (pstr); + build_wcs_upper_buffer (pstr); else - build_wcs_buffer (pstr); + build_wcs_buffer (pstr); } else #endif /* RE_ENABLE_I18N */ { if (pstr->icase) - build_upper_buffer (pstr); + build_upper_buffer (pstr); else if (pstr->trans != NULL) - re_string_translate_buffer (pstr); + re_string_translate_buffer (pstr); } pstr->cur_idx = 0; @@ -530,12 +535,12 @@ re_string_context_at (input, idx, eflags, newline_anchor) if (idx < 0 || idx == input->len) { if (idx < 0) - /* In this case, we use the value stored in input->tip_context, - since we can't know the character in input->mbs[-1] here. */ - return input->tip_context; + /* In this case, we use the value stored in input->tip_context, + since we can't know the character in input->mbs[-1] here. */ + return input->tip_context; else /* (idx == input->len) */ - return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF - : CONTEXT_NEWLINE | CONTEXT_ENDBUF); + return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF + : CONTEXT_NEWLINE | CONTEXT_ENDBUF); } c = re_string_byte_at (input, idx); if (IS_WORD_CHAR (c)) @@ -590,15 +595,15 @@ re_node_set_init_2 (set, elem1, elem2) { set->nelem = 2; if (elem1 < elem2) - { - set->elems[0] = elem1; - set->elems[1] = elem2; - } + { + set->elems[0] = elem1; + set->elems[1] = elem2; + } else - { - set->elems[0] = elem2; - set->elems[1] = elem1; - } + { + set->elems[0] = elem2; + set->elems[1] = elem1; + } } return REG_NOERROR; } @@ -614,7 +619,7 @@ re_node_set_init_copy (dest, src) dest->alloc = dest->nelem; dest->elems = re_malloc (int, dest->alloc); if (BE (dest->elems == NULL, 0)) - return REG_ESPACE; + return REG_ESPACE; memcpy (dest->elems, src->elems, src->nelem * sizeof (int)); } else @@ -635,12 +640,12 @@ re_node_set_add_intersect (dest, src1, src2) if (src1->nelem > 0 && src2->nelem > 0) { if (src1->nelem + src2->nelem + dest->nelem > dest->alloc) - { - dest->alloc = src1->nelem + src2->nelem + dest->nelem; - dest->elems = re_realloc (dest->elems, int, dest->alloc); - if (BE (dest->elems == NULL, 0)) - return REG_ESPACE; - } + { + dest->alloc = src1->nelem + src2->nelem + dest->nelem; + dest->elems = re_realloc (dest->elems, int, dest->alloc); + if (BE (dest->elems == NULL, 0)) + return REG_ESPACE; + } } else return REG_NOERROR; @@ -648,24 +653,24 @@ re_node_set_add_intersect (dest, src1, src2) for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;) { if (src1->elems[i1] > src2->elems[i2]) - { - ++i2; - continue; - } + { + ++i2; + continue; + } if (src1->elems[i1] == src2->elems[i2]) - { - while (id < dest->nelem && dest->elems[id] < src2->elems[i2]) - ++id; - if (id < dest->nelem && dest->elems[id] == src2->elems[i2]) - ++id; - else - { - memmove (dest->elems + id + 1, dest->elems + id, - sizeof (int) * (dest->nelem - id)); - dest->elems[id++] = src2->elems[i2++]; - ++dest->nelem; - } - } + { + while (id < dest->nelem && dest->elems[id] < src2->elems[i2]) + ++id; + if (id < dest->nelem && dest->elems[id] == src2->elems[i2]) + ++id; + else + { + memmove (dest->elems + id + 1, dest->elems + id, + sizeof (int) * (dest->nelem - id)); + dest->elems[id++] = src2->elems[i2++]; + ++dest->nelem; + } + } ++i1; } return REG_NOERROR; @@ -685,39 +690,39 @@ re_node_set_init_union (dest, src1, src2) dest->alloc = src1->nelem + src2->nelem; dest->elems = re_malloc (int, dest->alloc); if (BE (dest->elems == NULL, 0)) - return REG_ESPACE; + return REG_ESPACE; } else { if (src1 != NULL && src1->nelem > 0) - return re_node_set_init_copy (dest, src1); + return re_node_set_init_copy (dest, src1); else if (src2 != NULL && src2->nelem > 0) - return re_node_set_init_copy (dest, src2); + return re_node_set_init_copy (dest, src2); else - re_node_set_init_empty (dest); + re_node_set_init_empty (dest); return REG_NOERROR; } for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;) { if (src1->elems[i1] > src2->elems[i2]) - { - dest->elems[id++] = src2->elems[i2++]; - continue; - } + { + dest->elems[id++] = src2->elems[i2++]; + continue; + } if (src1->elems[i1] == src2->elems[i2]) - ++i2; + ++i2; dest->elems[id++] = src1->elems[i1++]; } if (i1 < src1->nelem) { memcpy (dest->elems + id, src1->elems + i1, - (src1->nelem - i1) * sizeof (int)); + (src1->nelem - i1) * sizeof (int)); id += src1->nelem - i1; } else if (i2 < src2->nelem) { memcpy (dest->elems + id, src2->elems + i2, - (src2->nelem - i2) * sizeof (int)); + (src2->nelem - i2) * sizeof (int)); id += src2->nelem - i2; } dest->nelem = id; @@ -737,10 +742,12 @@ re_node_set_merge (dest, src) return REG_NOERROR; if (dest->alloc < src->nelem + dest->nelem) { + int *new_buffer; dest->alloc = 2 * (src->nelem + dest->alloc); - dest->elems = re_realloc (dest->elems, int, dest->alloc); - if (BE (dest->elems == NULL, 0)) - return REG_ESPACE; + new_buffer = re_realloc (dest->elems, int, dest->alloc); + if (BE (new_buffer == NULL, 0)) + return REG_ESPACE; + dest->elems = new_buffer; } for (si = 0, di = 0 ; si < src->nelem && di < dest->nelem ;) @@ -749,34 +756,34 @@ re_node_set_merge (dest, src) /* Binary search the spot we will add the new element. */ right = dest->nelem; while (di < right) - { - mid = (di + right) / 2; - if (dest->elems[mid] < src_elem) - di = mid + 1; - else - right = mid; - } + { + mid = (di + right) / 2; + if (dest->elems[mid] < src_elem) + di = mid + 1; + else + right = mid; + } if (di >= dest->nelem) - break; + break; if (dest->elems[di] == src_elem) - { - /* Skip since, DEST already has the element. */ - ++di; - ++si; - continue; - } + { + /* Skip since, DEST already has the element. */ + ++di; + ++si; + continue; + } /* Skip the src elements which are less than dest->elems[di]. */ cp_from = si; while (si < src->nelem && src->elems[si] < dest->elems[di]) - ++si; + ++si; /* Copy these src elements. */ ncp = si - cp_from; memmove (dest->elems + di + ncp, dest->elems + di, - sizeof (int) * (dest->nelem - di)); + sizeof (int) * (dest->nelem - di)); memcpy (dest->elems + di, src->elems + cp_from, - sizeof (int) * ncp); + sizeof (int) * ncp); /* Update counters. */ di += ncp; dest->nelem += ncp; @@ -786,7 +793,7 @@ re_node_set_merge (dest, src) if (si < src->nelem) { memcpy (dest->elems + di, src->elems + si, - sizeof (int) * (src->nelem - si)); + sizeof (int) * (src->nelem - si)); dest->nelem += src->nelem - si; } return REG_NOERROR; @@ -806,9 +813,9 @@ re_node_set_insert (set, elem) if (set->elems == NULL || set->alloc == 0) { if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1)) - return 1; + return 1; else - return -1; + return -1; } /* Binary search the spot we will add the new element. */ @@ -818,9 +825,9 @@ re_node_set_insert (set, elem) { mid = (idx + right) / 2; if (set->elems[mid] < elem) - idx = mid + 1; + idx = mid + 1; else - right = mid; + right = mid; } /* Realloc if we need. */ @@ -830,13 +837,13 @@ re_node_set_insert (set, elem) set->alloc = set->alloc * 2; new_array = re_malloc (int, set->alloc); if (BE (new_array == NULL, 0)) - return -1; + return -1; /* Copy the elements they are followed by the new element. */ if (idx > 0) - memcpy (new_array, set->elems, sizeof (int) * (idx)); + memcpy (new_array, set->elems, sizeof (int) * (idx)); /* Copy the elements which follows the new element. */ if (set->nelem - idx > 0) - memcpy (new_array + idx + 1, set->elems + idx, + memcpy (new_array + idx + 1, set->elems + idx, sizeof (int) * (set->nelem - idx)); re_free (set->elems); set->elems = new_array; @@ -845,8 +852,8 @@ re_node_set_insert (set, elem) { /* Move the elements which follows the new element. */ if (set->nelem - idx > 0) - memmove (set->elems + idx + 1, set->elems + idx, - sizeof (int) * (set->nelem - idx)); + memmove (set->elems + idx + 1, set->elems + idx, + sizeof (int) * (set->nelem - idx)); } /* Insert the new element. */ set->elems[idx] = elem; @@ -888,9 +895,9 @@ re_node_set_contains (set, elem) { mid = (idx + right) / 2; if (set->elems[mid] < elem) - idx = mid + 1; + idx = mid + 1; else - right = mid; + right = mid; } return set->elems[idx] == elem ? idx + 1 : 0; } @@ -904,7 +911,7 @@ re_node_set_remove_at (set, idx) return; if (idx < set->nelem - 1) memmove (set->elems + idx, set->elems + idx + 1, - sizeof (int) * (set->nelem - idx - 1)); + sizeof (int) * (set->nelem - idx - 1)); --set->nelem; } @@ -924,28 +931,28 @@ re_dfa_add_node (dfa, token, mode) dfa->nodes_alloc *= 2; new_array = re_realloc (dfa->nodes, re_token_t, dfa->nodes_alloc); if (BE (new_array == NULL, 0)) - return -1; + return -1; else - dfa->nodes = new_array; + dfa->nodes = new_array; if (mode) - { - int *new_nexts; - re_node_set *new_edests, *new_eclosures, *new_inveclosures; - - new_nexts = re_realloc (dfa->nexts, int, dfa->nodes_alloc); - new_edests = re_realloc (dfa->edests, re_node_set, dfa->nodes_alloc); - new_eclosures = re_realloc (dfa->eclosures, re_node_set, - dfa->nodes_alloc); - new_inveclosures = re_realloc (dfa->inveclosures, re_node_set, - dfa->nodes_alloc); - if (BE (new_nexts == NULL || new_edests == NULL - || new_eclosures == NULL || new_inveclosures == NULL, 0)) - return -1; - dfa->nexts = new_nexts; - dfa->edests = new_edests; - dfa->eclosures = new_eclosures; - dfa->inveclosures = new_inveclosures; - } + { + int *new_nexts; + re_node_set *new_edests, *new_eclosures, *new_inveclosures; + + new_nexts = re_realloc (dfa->nexts, int, dfa->nodes_alloc); + new_edests = re_realloc (dfa->edests, re_node_set, dfa->nodes_alloc); + new_eclosures = re_realloc (dfa->eclosures, re_node_set, + dfa->nodes_alloc); + new_inveclosures = re_realloc (dfa->inveclosures, re_node_set, + dfa->nodes_alloc); + if (BE (new_nexts == NULL || new_edests == NULL + || new_eclosures == NULL || new_inveclosures == NULL, 0)) + return -1; + dfa->nexts = new_nexts; + dfa->edests = new_edests; + dfa->eclosures = new_eclosures; + dfa->inveclosures = new_inveclosures; + } } dfa->nodes[dfa->nodes_len] = token; dfa->nodes[dfa->nodes_len].duplicated = 0; @@ -970,9 +977,9 @@ calc_state_hash (nodes, context) Otherwise create the new one and return it. In case of an error return NULL and set the error code in ERR. Note: - We assume NULL as the invalid state, then it is possible that - return value is NULL and ERR is REG_NOERROR. - - We never return non-NULL value in case of any errors, it is for - optimization. */ + return value is NULL and ERR is REG_NOERROR. + - We never return non-NULL value in case of any errors, it is for + optimization. */ static re_dfastate_t* re_acquire_state (err, dfa, nodes) @@ -996,9 +1003,9 @@ re_acquire_state (err, dfa, nodes) { re_dfastate_t *state = spot->array[i]; if (hash != state->hash) - continue; + continue; if (re_node_set_compare (&state->nodes, nodes)) - return state; + return state; } /* There are no appropriate state in the dfa, create the new one. */ @@ -1018,9 +1025,9 @@ re_acquire_state (err, dfa, nodes) Otherwise create the new one and return it. In case of an error return NULL and set the error code in ERR. Note: - We assume NULL as the invalid state, then it is possible that - return value is NULL and ERR is REG_NOERROR. - - We never return non-NULL value in case of any errors, it is for - optimization. */ + return value is NULL and ERR is REG_NOERROR. + - We never return non-NULL value in case of any errors, it is for + optimization. */ static re_dfastate_t* re_acquire_state_context (err, dfa, nodes, context) @@ -1045,10 +1052,10 @@ re_acquire_state_context (err, dfa, nodes, context) { re_dfastate_t *state = spot->array[i]; if (hash != state->hash) - continue; + continue; if (re_node_set_compare (state->entrance_nodes, nodes) - && state->context == context) - return state; + && state->context == context) + return state; } /* There are no appropriate state in `dfa', create the new one. */ new_state = create_cd_newstate (dfa, nodes, context, hash); @@ -1071,10 +1078,16 @@ create_newstate_common (dfa, nodes, hash) unsigned int hash; { re_dfastate_t *newstate; + reg_errcode_t err; newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1); if (BE (newstate == NULL, 0)) return NULL; - re_node_set_init_copy (&newstate->nodes, nodes); + err = re_node_set_init_copy (&newstate->nodes, nodes); + if (BE (err != REG_NOERROR, 0)) + { + re_free (newstate); + return NULL; + } newstate->trtable = NULL; newstate->trtable_search = NULL; newstate->hash = hash; @@ -1095,10 +1108,12 @@ register_state (dfa, newstate, hash) if (spot->alloc <= spot->num) { + re_dfastate_t **new_array; spot->alloc = 2 * spot->num + 2; - spot->array = re_realloc (spot->array, re_dfastate_t *, spot->alloc); - if (BE (spot->array == NULL, 0)) - return REG_ESPACE; + new_array = re_realloc (spot->array, re_dfastate_t *, spot->alloc); + if (BE (new_array == NULL, 0)) + return REG_ESPACE; + spot->array = new_array; } spot->array[spot->num++] = newstate; return REG_NOERROR; @@ -1126,23 +1141,28 @@ create_ci_newstate (dfa, nodes, hash) re_token_t *node = dfa->nodes + nodes->elems[i]; re_token_type_t type = node->type; if (type == CHARACTER && !node->constraint) - continue; + continue; /* If the state has the halt node, the state is a halt state. */ else if (type == END_OF_RE) - newstate->halt = 1; + newstate->halt = 1; #ifdef RE_ENABLE_I18N else if (type == COMPLEX_BRACKET - || (type == OP_PERIOD && MB_CUR_MAX > 1)) - newstate->accept_mb = 1; + || (type == OP_PERIOD && MB_CUR_MAX > 1)) + newstate->accept_mb = 1; #endif /* RE_ENABLE_I18N */ else if (type == OP_BACK_REF) - newstate->has_backref = 1; + newstate->has_backref = 1; else if (type == ANCHOR || node->constraint) - newstate->has_constraint = 1; + newstate->has_constraint = 1; } err = register_state (dfa, newstate, hash); - return (err != REG_NOERROR) ? NULL : newstate; + if (BE (err != REG_NOERROR, 0)) + { + free_state (newstate); + newstate = NULL; + } + return newstate; } /* Create the new state which is depend on the context CONTEXT. @@ -1170,42 +1190,65 @@ create_cd_newstate (dfa, nodes, context, hash) re_token_t *node = dfa->nodes + nodes->elems[i]; re_token_type_t type = node->type; if (node->constraint) - constraint = node->constraint; + constraint = node->constraint; if (type == CHARACTER && !constraint) - continue; + continue; /* If the state has the halt node, the state is a halt state. */ else if (type == END_OF_RE) - newstate->halt = 1; + newstate->halt = 1; #ifdef RE_ENABLE_I18N else if (type == COMPLEX_BRACKET - || (type == OP_PERIOD && MB_CUR_MAX > 1)) - newstate->accept_mb = 1; + || (type == OP_PERIOD && MB_CUR_MAX > 1)) + newstate->accept_mb = 1; #endif /* RE_ENABLE_I18N */ else if (type == OP_BACK_REF) - newstate->has_backref = 1; + newstate->has_backref = 1; else if (type == ANCHOR) - constraint = node->opr.ctx_type; + constraint = node->opr.ctx_type; if (constraint) - { - if (newstate->entrance_nodes == &newstate->nodes) - { - newstate->entrance_nodes = re_malloc (re_node_set, 1); + { + if (newstate->entrance_nodes == &newstate->nodes) + { + newstate->entrance_nodes = re_malloc (re_node_set, 1); if (BE (newstate->entrance_nodes == NULL, 0)) - return NULL; - re_node_set_init_copy (newstate->entrance_nodes, nodes); - nctx_nodes = 0; - newstate->has_constraint = 1; - } - - if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context)) - { - re_node_set_remove_at (&newstate->nodes, i - nctx_nodes); - ++nctx_nodes; - } - } + { + free_state (newstate); + return NULL; + } + re_node_set_init_copy (newstate->entrance_nodes, nodes); + nctx_nodes = 0; + newstate->has_constraint = 1; + } + + if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context)) + { + re_node_set_remove_at (&newstate->nodes, i - nctx_nodes); + ++nctx_nodes; + } + } } err = register_state (dfa, newstate, hash); - return (err != REG_NOERROR) ? NULL : newstate; + if (BE (err != REG_NOERROR, 0)) + { + free_state (newstate); + newstate = NULL; + } + return newstate; +} + +static void +free_state (state) + re_dfastate_t *state; +{ + if (state->entrance_nodes != &state->nodes) + { + re_node_set_free (state->entrance_nodes); + re_free (state->entrance_nodes); + } + re_node_set_free (&state->nodes); + re_free (state->trtable); + re_free (state->trtable_search); + re_free (state); } diff --git a/posix/regexec.c b/posix/regexec.c index 511b979f2a..17c469c47f 100644 --- a/posix/regexec.c +++ b/posix/regexec.c @@ -85,7 +85,7 @@ static int proceed_next_node (const regex_t *preg, int nregs, regmatch_t *regs, const re_match_context_t *mctx, int *pidx, int node, re_node_set *eps_via_nodes, struct re_fail_stack_t *fs); -static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs, +static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs, int str_idx, int *dests, int nregs, regmatch_t *regs, re_node_set *eps_via_nodes); @@ -337,7 +337,7 @@ re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs, rval = re_search_stub (bufp, str, len, start, range, stop, regs, ret_len); if (free_str) - re_free ((char *) str); + re_free ((char *) str); return rval; } @@ -575,9 +575,10 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch, re_string_t input; int left_lim, right_lim, incr; int fl_longest_match, match_first, match_last = -1; + int fast_translate, sb; re_match_context_t mctx; - char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate) - ? preg->fastmap : NULL); + char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate + && range && !preg->can_be_null) ? preg->fastmap : NULL); /* Check if the DFA haven't been compiled. */ if (BE (preg->used == 0 || dfa->init_state == NULL @@ -586,6 +587,7 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch, return REG_NOMATCH; re_node_set_init_empty (&empty_set); + memset (&mctx, '\0', sizeof (re_match_context_t)); /* We must check the longest matching, if nmatch > 0. */ fl_longest_match = (nmatch != 0); @@ -593,12 +595,12 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch, err = re_string_allocate (&input, string, length, dfa->nodes_len + 1, preg->translate, preg->syntax & RE_ICASE); if (BE (err != REG_NOERROR, 0)) - return err; + goto free_return; input.stop = stop; err = match_ctx_init (&mctx, eflags, &input, dfa->nbackref * 2); if (BE (err != REG_NOERROR, 0)) - return err; + goto free_return; /* We will log all the DFA states through which the dfa pass, if nmatch > 1, or this dfa has "multibyte node", which is a @@ -608,7 +610,10 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch, { mctx.state_log = re_malloc (re_dfastate_t *, dfa->nodes_len + 1); if (BE (mctx.state_log == NULL, 0)) - return REG_ESPACE; + { + err = REG_ESPACE; + goto free_return; + } } else mctx.state_log = NULL; @@ -626,65 +631,117 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch, incr = (range < 0) ? -1 : 1; left_lim = (range < 0) ? start + range : start; right_lim = (range < 0) ? start : start + range; + sb = MB_CUR_MAX == 1; + fast_translate = sb || !(preg->syntax & RE_ICASE || preg->translate); for (;;) { /* At first get the current byte from input string. */ - int ch; - if (MB_CUR_MAX > 1 && (preg->syntax & RE_ICASE || preg->translate)) - { - /* In this case, we can't determin easily the current byte, - since it might be a component byte of a multibyte character. - Then we use the constructed buffer instead. */ - /* If MATCH_FIRST is out of the valid range, reconstruct the - buffers. */ - if (input.raw_mbs_idx + input.valid_len <= match_first) - re_string_reconstruct (&input, match_first, eflags, - preg->newline_anchor); - /* If MATCH_FIRST is out of the buffer, leave it as '\0'. - Note that MATCH_FIRST must not be smaller than 0. */ - ch = ((match_first >= length) ? 0 - : re_string_byte_at (&input, match_first - input.raw_mbs_idx)); - } - else - { - /* We apply translate/conversion manually, since it is trivial - in this case. */ - /* If MATCH_FIRST is out of the buffer, leave it as '\0'. - Note that MATCH_FIRST must not be smaller than 0. */ - ch = (match_first < length) ? (unsigned char)string[match_first] : 0; - /* Apply translation if we need. */ - ch = preg->translate ? preg->translate[ch] : ch; - /* In case of case insensitive mode, convert to upper case. */ - ch = ((preg->syntax & RE_ICASE) && islower (ch)) ? toupper (ch) : ch; - } + if (fastmap) + { + if (BE (fast_translate, 1)) + { + unsigned RE_TRANSLATE_TYPE t + = (unsigned RE_TRANSLATE_TYPE) preg->translate; + if (BE (range >= 0, 1)) + { + if (BE (t != NULL, 0)) + { + while (BE (match_first < right_lim, 1) + && !fastmap[t[(unsigned char) string[match_first]]]) + ++match_first; + } + else + { + while (BE (match_first < right_lim, 1) + && !fastmap[(unsigned char) string[match_first]]) + ++match_first; + } + if (BE (match_first == right_lim, 0)) + { + int ch = match_first >= length + ? 0 : (unsigned char) string[match_first]; + if (!fastmap[t ? t[ch] : ch]) + break; + } + } + else + { + while (match_first >= left_lim) + { + int ch = match_first >= length + ? 0 : (unsigned char) string[match_first]; + if (fastmap[t ? t[ch] : ch]) + break; + --match_first; + } + if (match_first < left_lim) + break; + } + } + else + { + int ch; + + do + { + /* In this case, we can't determine easily the current byte, + since it might be a component byte of a multibyte + character. Then we use the constructed buffer + instead. */ + /* If MATCH_FIRST is out of the valid range, reconstruct the + buffers. */ + if (input.raw_mbs_idx + input.valid_len <= match_first + || match_first < input.raw_mbs_idx) + { + err = re_string_reconstruct (&input, match_first, eflags, + preg->newline_anchor); + if (BE (err != REG_NOERROR, 0)) + goto free_return; + } + /* If MATCH_FIRST is out of the buffer, leave it as '\0'. + Note that MATCH_FIRST must not be smaller than 0. */ + ch = ((match_first >= length) ? 0 + : re_string_byte_at (&input, + match_first - input.raw_mbs_idx)); + if (fastmap[ch]) + break; + match_first += incr; + } + while (match_first >= left_lim && match_first <= right_lim); + if (! fastmap[ch]) + break; + } + } - /* Eliminate inappropriate one by fastmap. */ - if (preg->can_be_null || fastmap == NULL || fastmap[ch]) - { - /* Reconstruct the buffers so that the matcher can assume that - the matching starts from the begining of the buffer. */ - re_string_reconstruct (&input, match_first, eflags, - preg->newline_anchor); + /* Reconstruct the buffers so that the matcher can assume that + the matching starts from the begining of the buffer. */ + err = re_string_reconstruct (&input, match_first, eflags, + preg->newline_anchor); + if (BE (err != REG_NOERROR, 0)) + goto free_return; #ifdef RE_ENABLE_I18N - /* Eliminate it when it is a component of a multibyte character - and isn't the head of a multibyte character. */ - if (MB_CUR_MAX == 1 || re_string_first_byte (&input, 0)) + /* Eliminate it when it is a component of a multibyte character + and isn't the head of a multibyte character. */ + if (sb || re_string_first_byte (&input, 0)) #endif - { - /* It seems to be appropriate one, then use the matcher. */ - /* We assume that the matching starts from 0. */ - mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0; - match_last = check_matching (preg, &mctx, 0, fl_longest_match); - if (match_last != -1) - { - if (BE (match_last == -2, 0)) - return REG_ESPACE; - else - break; /* We found a matching. */ - } - } - } + { + /* It seems to be appropriate one, then use the matcher. */ + /* We assume that the matching starts from 0. */ + mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0; + match_last = check_matching (preg, &mctx, 0, fl_longest_match); + if (match_last != -1) + { + if (BE (match_last == -2, 0)) + { + err = REG_ESPACE; + goto free_return; + } + else + break; /* We found a matching. */ + } + } + /* Update counter. */ match_first += incr; if (match_first < left_lim || right_lim < match_first) @@ -722,28 +779,42 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch, match_ctx_clear_flag (&mctx); sifted_states = re_malloc (re_dfastate_t *, match_last + 1); if (BE (sifted_states == NULL, 0)) - return REG_ESPACE; + { + err = REG_ESPACE; + goto free_return; + } if (dfa->nbackref) { lim_states = calloc (sizeof (re_dfastate_t *), match_last + 1); if (BE (lim_states == NULL, 0)) - return REG_ESPACE; + { + re_free (sifted_states); + err = REG_ESPACE; + goto free_return; + } } sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, mctx.match_last, 0); err = sift_states_backward (preg, &mctx, &sctx); + re_node_set_free (&sctx.limits); if (BE (err != REG_NOERROR, 0)) - return err; + { + re_free (sifted_states); + re_free (lim_states); + goto free_return; + } if (lim_states != NULL) { err = merge_state_array (dfa, sifted_states, lim_states, match_last + 1); - if (BE (err != REG_NOERROR, 0)) - return err; re_free (lim_states); + if (BE (err != REG_NOERROR, 0)) + { + re_free (sifted_states); + goto free_return; + } } - re_node_set_free (&sctx.limits); re_free (mctx.state_log); mctx.state_log = sifted_states; } @@ -751,7 +822,7 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch, err = set_regs (preg, &mctx, nmatch, pmatch, dfa->has_plural_match && dfa->nbackref > 0); if (BE (err != REG_NOERROR, 0)) - return err; + goto free_return; } /* At last, add the offset to the each registers, since we slided @@ -763,13 +834,13 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch, pmatch[reg_idx].rm_eo += match_first; } } - + err = (match_last == -1) ? REG_NOMATCH : REG_NOERROR; + free_return: re_free (mctx.state_log); if (dfa->nbackref) match_ctx_free (&mctx); re_string_destruct (&input); - - return (match_last == -1) ? REG_NOMATCH : REG_NOERROR; + return err; } /* Acquire an initial state and return it. @@ -1099,11 +1170,13 @@ push_fail_stack (fs, str_idx, dests, nregs, regs, eps_via_nodes) int num = fs->num++; if (fs->num == fs->alloc) { + struct re_fail_stack_ent_t *new_array; fs->alloc *= 2; - fs->stack = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t) + new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t) * fs->alloc)); - if (fs->stack == NULL) + if (new_array == NULL) return REG_ESPACE; + fs->stack = new_array; } fs->stack[num].idx = str_idx; fs->stack[num].node = dests[1]; @@ -1112,7 +1185,7 @@ push_fail_stack (fs, str_idx, dests, nregs, regs, eps_via_nodes) err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes); return err; } - + static int pop_fail_stack (fs, pidx, nregs, regs, eps_via_nodes) struct re_fail_stack_t *fs; @@ -1301,7 +1374,7 @@ sift_states_backward (preg, mctx, sctx) return err; err = update_cur_sifted_state (preg, mctx, sctx, str_idx, &cur_dest); if (BE (err != REG_NOERROR, 0)) - return err; + goto free_return; /* Then check each states in the state_log. */ while (str_idx > 0) @@ -1365,7 +1438,10 @@ sift_states_backward (preg, mctx, sctx) } ret = re_node_set_insert (&cur_dest, prev_node); if (BE (ret == -1, 0)) - return err; + { + err = REG_ESPACE; + goto free_return; + } } /* Add all the nodes which satisfy the following conditions: @@ -1374,11 +1450,12 @@ sift_states_backward (preg, mctx, sctx) And update state_log. */ err = update_cur_sifted_state (preg, mctx, sctx, str_idx, &cur_dest); if (BE (err != REG_NOERROR, 0)) - return err; + goto free_return; } - + err = REG_NOERROR; + free_return: re_node_set_free (&cur_dest); - return REG_NOERROR; + return err; } /* Helper functions. */ @@ -1409,7 +1486,8 @@ clean_state_log_if_need (mctx, next_state_log_idx) return REG_NOERROR; } -static reg_errcode_t merge_state_array (dfa, dst, src, num) +static reg_errcode_t +merge_state_array (dfa, dst, src, num) re_dfa_t *dfa; re_dfastate_t **dst; re_dfastate_t **src; @@ -1429,9 +1507,9 @@ static reg_errcode_t merge_state_array (dfa, dst, src, num) if (BE (err != REG_NOERROR, 0)) return err; dst[st_idx] = re_acquire_state (&err, dfa, &merged_set); + re_node_set_free (&merged_set); if (BE (err != REG_NOERROR, 0)) return err; - re_node_set_free (&merged_set); } } return REG_NOERROR; @@ -1512,7 +1590,10 @@ add_epsilon_src_nodes (dfa, dest_nodes, candidates) dfa->inveclosures + src_copy.elems[src_idx]); if (BE (err != REG_NOERROR, 0)) - return err; + { + re_node_set_free (&src_copy); + return err; + } } re_node_set_free (&src_copy); return REG_NOERROR; @@ -1549,7 +1630,10 @@ sub_epsilon_src_nodes (dfa, node, dest_nodes, candidates) err = re_node_set_add_intersect (&except_nodes, candidates, dfa->inveclosures + cur_node); if (BE (err != REG_NOERROR, 0)) - return err; + { + re_node_set_free (&except_nodes); + return err; + } } } } @@ -1791,7 +1875,7 @@ search_subexp (preg, mctx, sctx, str_idx, dest_nodes) local_sctx = *sctx; err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits); if (BE (err != REG_NOERROR, 0)) - return err; + goto free_return; } local_sctx.check_subexp = -sctx->check_subexp; local_sctx.limited_states = sctx->limited_states; @@ -1801,7 +1885,7 @@ search_subexp (preg, mctx, sctx, str_idx, dest_nodes) err = sift_states_backward (preg, mctx, &local_sctx); local_sctx.sifted_states[str_idx] = cur_state; if (BE (err != REG_NOERROR, 0)) - return err; + goto free_return; /* There must not 2 same node in a node set. */ break; } @@ -1823,7 +1907,7 @@ search_subexp (preg, mctx, sctx, str_idx, dest_nodes) reg_errcode_t err; err = extend_buffers (mctx); if (BE (err != REG_NOERROR, 0)) - return err; + goto free_return; } buf = (char *) re_string_get_buffer (mctx->input); if (strncmp (buf + str_idx, buf + bkref_str_idx, subexp_len) != 0) @@ -1835,17 +1919,20 @@ search_subexp (preg, mctx, sctx, str_idx, dest_nodes) if (lim_states == NULL) { lim_states = re_malloc (re_dfastate_t *, str_idx + 1); + if (BE (lim_states == NULL, 0)) + { + err = REG_ESPACE; + goto free_return; + } } if (local_sctx.sifted_states == NULL) { /* It hasn't been initialized yet, initialize it now. */ local_sctx = *sctx; - if (BE (lim_states == NULL, 0)) - return REG_ESPACE; err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits); if (BE (err != REG_NOERROR, 0)) - return err; + goto free_return; } local_sctx.check_subexp = 0; local_sctx.last_node = node; @@ -1855,7 +1942,7 @@ search_subexp (preg, mctx, sctx, str_idx, dest_nodes) sizeof (re_dfastate_t*) * (str_idx + 1)); err = sift_states_backward (preg, mctx, &local_sctx); if (BE (err != REG_NOERROR, 0)) - return err; + goto free_return; if (local_sctx.sifted_states[0] == NULL && local_sctx.limited_states[0] == NULL) { @@ -1869,10 +1956,10 @@ search_subexp (preg, mctx, sctx, str_idx, dest_nodes) err = match_ctx_add_entry (mctx, sctx->cur_bkref, bkref_str_idx, str_idx, sctx->cls_subexp_idx); if (BE (err != REG_NOERROR, 0)) - return err; + goto free_return; err = clean_state_log_if_need (mctx, dest_str_idx); if (BE (err != REG_NOERROR, 0)) - return err; + goto free_return; break; } } @@ -1882,18 +1969,19 @@ search_subexp (preg, mctx, sctx, str_idx, dest_nodes) { err = sub_epsilon_src_nodes(dfa, node, dest_nodes, candidates); if (BE (err != REG_NOERROR, 0)) - return err; + goto free_return; /* Update state_log. */ sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes); if (BE (err != REG_NOERROR, 0)) - return err; + goto free_return; } - + err = REG_NOERROR; + free_return: if (local_sctx.sifted_states != NULL) re_node_set_free (&local_sctx.limits); if (lim_states != NULL) re_free (lim_states); - return REG_NOERROR; + return err; } static reg_errcode_t @@ -1958,11 +2046,11 @@ sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes) cur_bkref_idx, entry->subexp_from, entry->subexp_to); if (BE (err != REG_NOERROR, 0)) - return err; + goto free_return; err = clean_state_log_if_need (mctx, cur_bkref_idx + subexp_len); if (BE (err != REG_NOERROR, 0)) - return err; + goto free_return; } else { @@ -1984,24 +2072,27 @@ sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes) err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits); if (BE (err != REG_NOERROR, 0)) - return err; + goto free_return; } local_sctx.last_node = node; local_sctx.last_str_idx = str_idx; err = re_node_set_insert (&local_sctx.limits, enabled_idx); if (BE (err < 0, 0)) - return REG_ESPACE; + { + err = REG_ESPACE; + goto free_return; + } cur_state = local_sctx.sifted_states[str_idx]; err = sift_states_backward (preg, mctx, &local_sctx); if (BE (err != REG_NOERROR, 0)) - return err; + goto free_return; if (sctx->limited_states != NULL) { err = merge_state_array (dfa, sctx->limited_states, local_sctx.sifted_states, str_idx + 1); if (BE (err != REG_NOERROR, 0)) - return err; + goto free_return; } local_sctx.sifted_states[str_idx] = cur_state; re_node_set_remove (&local_sctx.limits, enabled_idx); @@ -2019,12 +2110,14 @@ sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes) } } } + err = REG_NOERROR; + free_return: if (local_sctx.sifted_states != NULL) { re_node_set_free (&local_sctx.limits); } - return REG_NOERROR; + return err; } @@ -2215,7 +2308,10 @@ transit_state_sb (err, preg, state, fl_search, mctx) *err = re_node_set_merge (&next_nodes, dfa->eclosures + dfa->nexts[cur_node]); if (BE (*err != REG_NOERROR, 0)) - return NULL; + { + re_node_set_free (&next_nodes); + return NULL; + } } } if (fl_search) @@ -2235,7 +2331,10 @@ transit_state_sb (err, preg, state, fl_search, mctx) *err = re_node_set_merge (&next_nodes, dfa->init_state->entrance_nodes); if (BE (*err != REG_NOERROR, 0)) - return NULL; + { + re_node_set_free (&next_nodes); + return NULL; + } } } context = re_string_context_at (mctx->input, cur_str_idx, mctx->eflags, @@ -2313,10 +2412,10 @@ transit_state_mb (preg, pstate, mctx) preg->newline_anchor); mctx->state_log[dest_idx] = re_acquire_state_context (&err, dfa, &dest_nodes, context); - if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0)) - return err; if (dest_state != NULL) re_node_set_free (&dest_nodes); + if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0)) + return err; } return REG_NOERROR; } @@ -2389,7 +2488,7 @@ transit_state_bkref_loop (preg, nodes, work_state_log, mctx) match_ctx_clear_flag (mctx); err = sift_states_backward (preg, mctx, &sctx); if (BE (err != REG_NOERROR, 0)) - return err; + goto free_return; /* And add the epsilon closures (which is `new_dest_nodes') of the backreference to appropriate state_log. */ @@ -2424,7 +2523,7 @@ transit_state_bkref_loop (preg, nodes, work_state_log, mctx) context); if (BE (mctx->state_log[dest_str_idx] == NULL && err != REG_NOERROR, 0)) - return err; + goto free_return; } else { @@ -2433,13 +2532,16 @@ transit_state_bkref_loop (preg, nodes, work_state_log, mctx) dest_state->entrance_nodes, new_dest_nodes); if (BE (err != REG_NOERROR, 0)) - return err; + { + re_node_set_free (&dest_nodes); + goto free_return; + } mctx->state_log[dest_str_idx] = re_acquire_state_context (&err, dfa, &dest_nodes, context); + re_node_set_free (&dest_nodes); if (BE (mctx->state_log[dest_str_idx] == NULL && err != REG_NOERROR, 0)) - return err; - re_node_set_free (&dest_nodes); + goto free_return; } /* We need to check recursively if the backreference can epsilon transit. */ @@ -2449,12 +2551,14 @@ transit_state_bkref_loop (preg, nodes, work_state_log, mctx) err = transit_state_bkref_loop (preg, new_dest_nodes, work_state_log, mctx); if (BE (err != REG_NOERROR, 0)) - return err; + goto free_return; } } } + err = REG_NOERROR; + free_return: re_free (cur_regs); - return REG_NOERROR; + return err; } /* Build transition table for the state. @@ -2776,14 +2880,14 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch) bitset_copy (dests_ch[j], intersec); err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]); if (BE (err != REG_NOERROR, 0)) - return -1; + goto error_return; ++ndests; } /* Put the position in the current group. */ err = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]); if (BE (err < 0, 0)) - return -1; + goto error_return; /* If all characters are consumed, go to next node. */ if (!not_consumed) @@ -2795,12 +2899,16 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch) bitset_copy (dests_ch[ndests], accepts); err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]); if (BE (err != REG_NOERROR, 0)) - return -1; + goto error_return; ++ndests; bitset_empty (accepts); } } return ndests; + error_return: + for (j = 0; j < ndests; ++j) + re_node_set_free (dests_node + j); + return -1; } #ifdef RE_ENABLE_I18N @@ -3099,10 +3207,12 @@ extend_buffers (mctx) if (mctx->state_log != NULL) { /* And double the length of state_log. */ - mctx->state_log = re_realloc (mctx->state_log, re_dfastate_t *, - pstr->bufs_len * 2); - if (BE (mctx->state_log == NULL, 0)) + re_dfastate_t **new_array; + new_array = re_realloc (mctx->state_log, re_dfastate_t *, + pstr->bufs_len * 2); + if (BE (new_array == NULL, 0)) return REG_ESPACE; + mctx->state_log = new_array; } /* Then reconstruct the buffers. */ @@ -3174,13 +3284,17 @@ match_ctx_add_entry (mctx, node, str_idx, from, to) { if (mctx->nbkref_ents >= mctx->abkref_ents) { - mctx->bkref_ents = re_realloc (mctx->bkref_ents, - struct re_backref_cache_entry, - mctx->abkref_ents * 2); - if (BE (mctx->bkref_ents == NULL, 0)) - return REG_ESPACE; + struct re_backref_cache_entry* new_entry; + new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry, + mctx->abkref_ents * 2); + if (BE (new_entry == NULL, 0)) + { + re_free (mctx->bkref_ents); + return REG_ESPACE; + } + mctx->bkref_ents = new_entry; memset (mctx->bkref_ents + mctx->nbkref_ents, '\0', - sizeof (struct re_backref_cache_entry) * mctx->abkref_ents); + sizeof (struct re_backref_cache_entry) * mctx->abkref_ents); mctx->abkref_ents *= 2; } mctx->bkref_ents[mctx->nbkref_ents].node = node; |