diff options
author | Ulrich Drepper <drepper@redhat.com> | 2005-09-07 01:15:33 +0000 |
---|---|---|
committer | Ulrich Drepper <drepper@redhat.com> | 2005-09-07 01:15:33 +0000 |
commit | 01ed6ceb7c440f0695726463ee9ee307921ea97e (patch) | |
tree | eda9aef2d63fd0c0f39e51208e7bb9d463d65661 /posix/regex_internal.c | |
parent | 2d87db5b5341bd6b714f175c1c268b7136444a71 (diff) | |
download | glibc-01ed6ceb7c440f0695726463ee9ee307921ea97e.tar.gz glibc-01ed6ceb7c440f0695726463ee9ee307921ea97e.tar.xz glibc-01ed6ceb7c440f0695726463ee9ee307921ea97e.zip |
* posix/regex_internal.c (re_string_reconstruct): Avoid calling
mbrtowc for very simple UTF-8 case. 2005-09-01 Paul Eggert <eggert@cs.ucla.edu> * posix/regex_internal.c (build_wcs_upper_buffer): Fix portability bugs in int versus size_t comparisons. 2005-09-06 Ulrich Drepper <drepper@redhat.com> * posix/regex_internal.c (re_acquire_state): Make DFA pointer arg a pointer-to-const. (re_acquire_state_context): Likewise. * posix/regex_internal.h: Adjust prototypes. 2005-08-31 Jim Meyering <jim@meyering.net> * posix/regcomp.c (search_duplicated_node): Make first pointer arg a pointer-to-const. * posix/regex_internal.c (create_ci_newstate, create_cd_newstate, register_state): Likewise. * posix/regexec.c (search_cur_bkref_entry, check_dst_limits): (check_dst_limits_calc_pos_1, check_dst_limits_calc_pos): (group_nodes_into_DFAstates): Likewise. * posix/regexec.c (re_search_internal): Simplify update of rm_so and rm_eo by replacing "if (A == B) A += C - B;" with the equivalent of "if (A == B) A = C;". 2005-09-06 Ulrich Drepper <drepper@redhat.com> * posix/regcomp.c (re_compile_internal): Change third parameter type to size_t. (init_dfa): Likewise. Make sure that arithmetic on pat_len doesn't overflow. * posix/regex_internal.h (struct re_dfa_t): Change type of nodes_alloc and nodes_len to size_t. * posix/regex_internal.c (re_dfa_add_node): Use size_t as type for new_nodes_alloc. Check for overflow. 2005-08-31 Paul Eggert <eggert@cs.ucla.edu> * posix/regcomp.c (re_compile_fastmap_iter, init_dfa, init_word_char): (optimize_subexps, lower_subexp): Don't assume 1<<31 has defined behavior on hosts with 32-bit int, since the signed shift might overflow. Use 1u<<31 instead. * posix/regex_internal.h (bitset_set, bitset_clear, bitset_contain): Likewise. * posix/regexec.c (check_dst_limits_calc_pos_1): Likewise. (check_subexp_matching_top): Likewise. * posix/regcomp.c (optimize_subexps, lower_subexp): Use CHAR_BIT rather than 8, for clarity. * posix/regexec.c (check_dst_limits_calc_pos_1): (check_subexp_matching_top): Likewise. * posix/regcomp.c (init_dfa): Make table_size unsigned, so that we don't have to worry about portability issues when shifting it left. Remove no-longer-needed test for table_size > 0. * posix/regcomp.c (parse_sub_exp): Do not shift more bits than there are in a word, as the resulting behavior is undefined. * posix/regexec.c (check_dst_limits_calc_pos_1): Likewise; in one case, a <= should have been an <, and in another case the whole test was missing. * posix/regex_internal.h (BYTE_BITS): Remove. All uses changed to the standard name CHAR_BIT.
Diffstat (limited to 'posix/regex_internal.c')
-rw-r--r-- | posix/regex_internal.c | 120 |
1 files changed, 69 insertions, 51 deletions
diff --git a/posix/regex_internal.c b/posix/regex_internal.c index 821ed7f45f..240e8872b3 100644 --- a/posix/regex_internal.c +++ b/posix/regex_internal.c @@ -26,12 +26,13 @@ static void re_string_construct_common (const char *str, int len, static int re_string_skip_chars (re_string_t *pstr, int new_raw_idx, wint_t *last_wc) internal_function; #endif /* RE_ENABLE_I18N */ -static reg_errcode_t register_state (re_dfa_t *dfa, re_dfastate_t *newstate, +static reg_errcode_t register_state (const re_dfa_t *dfa, + re_dfastate_t *newstate, unsigned int hash) internal_function; -static re_dfastate_t *create_ci_newstate (re_dfa_t *dfa, +static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes, unsigned int hash) internal_function; -static re_dfastate_t *create_cd_newstate (re_dfa_t *dfa, +static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes, unsigned int context, unsigned int hash) internal_function; @@ -654,37 +655,50 @@ re_string_reconstruct (pstr, idx, eflags) byte other than 0x80 - 0xbf. */ raw = pstr->raw_mbs + pstr->raw_mbs_idx; end = raw + (offset - pstr->mb_cur_max); - for (p = raw + offset - 1; p >= end; --p) - if ((*p & 0xc0) != 0x80) - { - mbstate_t cur_state; - wchar_t wc2; - int mlen = raw + pstr->len - p; - unsigned char buf[6]; - - q = p; - if (BE (pstr->trans != NULL, 0)) - { - int i = mlen < 6 ? mlen : 6; - while (--i >= 0) - buf[i] = pstr->trans[p[i]]; - q = buf; - } - /* XXX Don't use mbrtowc, we know which conversion - to use (UTF-8 -> UCS4). */ - memset (&cur_state, 0, sizeof (cur_state)); - mlen = (mbrtowc (&wc2, (const char *) p, mlen, - &cur_state) - - (raw + offset - p)); - if (mlen >= 0) - { - memset (&pstr->cur_state, '\0', - sizeof (mbstate_t)); - pstr->valid_len = mlen; - wc = wc2; - } - break; - } + p = raw + offset - 1; +#ifdef _LIBC + /* We know the wchar_t encoding is UCS4, so for the simple + case, ASCII characters, skip the conversion step. */ + if (isascii (*p) && BE (pstr->trans == NULL, 1)) + { + memset (&pstr->cur_state, '\0', sizeof (mbstate_t)); + pstr->valid_len = 0; + wc = (wchar_t) *p; + } + else +#endif + for (; p >= end; --p) + if ((*p & 0xc0) != 0x80) + { + mbstate_t cur_state; + wchar_t wc2; + int mlen = raw + pstr->len - p; + unsigned char buf[6]; + size_t mbclen; + + q = p; + if (BE (pstr->trans != NULL, 0)) + { + int i = mlen < 6 ? mlen : 6; + while (--i >= 0) + buf[i] = pstr->trans[p[i]]; + q = buf; + } + /* XXX Don't use mbrtowc, we know which conversion + to use (UTF-8 -> UCS4). */ + memset (&cur_state, 0, sizeof (cur_state)); + mbclen = mbrtowc (&wc2, (const char *) p, mlen, + &cur_state); + if (raw + offset - p <= mbclen + && mbclen < (size_t) -2) + { + memset (&pstr->cur_state, '\0', + sizeof (mbstate_t)); + pstr->valid_len = mbclen - (raw + offset - p); + wc = wc2; + } + break; + } } if (wc == WEOF) @@ -738,15 +752,15 @@ re_string_reconstruct (pstr, idx, eflags) } else #endif /* RE_ENABLE_I18N */ - if (BE (pstr->mbs_allocated, 0)) - { - if (pstr->icase) - build_upper_buffer (pstr); - else if (pstr->trans != NULL) - re_string_translate_buffer (pstr); - } - else - pstr->valid_len = pstr->len; + if (BE (pstr->mbs_allocated, 0)) + { + if (pstr->icase) + build_upper_buffer (pstr); + else if (pstr->trans != NULL) + re_string_translate_buffer (pstr); + } + else + pstr->valid_len = pstr->len; pstr->cur_idx = 0; return REG_NOERROR; @@ -1345,12 +1359,16 @@ re_dfa_add_node (dfa, token) int type = token.type; if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0)) { - int new_nodes_alloc = dfa->nodes_alloc * 2; + size_t new_nodes_alloc = dfa->nodes_alloc * 2; int *new_nexts, *new_indices; re_node_set *new_edests, *new_eclosures; + re_token_t *new_nodes; + + /* Avoid overflows. */ + if (BE (new_nodes_alloc < dfa->nodes_alloc, 0)) + return -1; - re_token_t *new_nodes = re_realloc (dfa->nodes, re_token_t, - new_nodes_alloc); + new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc); if (BE (new_nodes == NULL, 0)) return -1; dfa->nodes = new_nodes; @@ -1403,7 +1421,7 @@ calc_state_hash (nodes, context) static re_dfastate_t* re_acquire_state (err, dfa, nodes) reg_errcode_t *err; - re_dfa_t *dfa; + const re_dfa_t *dfa; const re_node_set *nodes; { unsigned int hash; @@ -1448,7 +1466,7 @@ re_acquire_state (err, dfa, nodes) static re_dfastate_t* re_acquire_state_context (err, dfa, nodes, context) reg_errcode_t *err; - re_dfa_t *dfa; + const re_dfa_t *dfa; const re_node_set *nodes; unsigned int context; { @@ -1486,7 +1504,7 @@ re_acquire_state_context (err, dfa, nodes, context) static reg_errcode_t register_state (dfa, newstate, hash) - re_dfa_t *dfa; + const re_dfa_t *dfa; re_dfastate_t *newstate; unsigned int hash; { @@ -1525,7 +1543,7 @@ register_state (dfa, newstate, hash) static re_dfastate_t * create_ci_newstate (dfa, nodes, hash) - re_dfa_t *dfa; + const re_dfa_t *dfa; const re_node_set *nodes; unsigned int hash; { @@ -1576,7 +1594,7 @@ create_ci_newstate (dfa, nodes, hash) static re_dfastate_t * create_cd_newstate (dfa, nodes, context, hash) - re_dfa_t *dfa; + const re_dfa_t *dfa; const re_node_set *nodes; unsigned int context, hash; { |