diff options
author | Ulrich Drepper <drepper@redhat.com> | 2005-09-28 17:33:18 +0000 |
---|---|---|
committer | Ulrich Drepper <drepper@redhat.com> | 2005-09-28 17:33:18 +0000 |
commit | 2c05d33f90861d074dc12808dafbde30f487b1a0 (patch) | |
tree | 654e4c8e7c500eca23285264cbb19a0945f05638 | |
parent | 1873e3cd1a7f5173d20d9060f3be825f31a53a39 (diff) | |
download | glibc-2c05d33f90861d074dc12808dafbde30f487b1a0.tar.gz glibc-2c05d33f90861d074dc12808dafbde30f487b1a0.tar.xz glibc-2c05d33f90861d074dc12808dafbde30f487b1a0.zip |
[BZ #1302]
2005-09-06 Paul Eggert <eggert@cs.ucla.edu> Ulrich Drepper <drepper@redhat.com> [BZ #1302] Change bitset word type from unsigned int to unsigned long int, as this has better performance on typical 64-bit hosts. Change bitset type name to bitset_t. * posix/regcomp.c (build_equiv_class, build_charclass): (build_range_exp, build_collating_symbol): Prefer bitset_t to re_bitset_ptr_t in prototypes, when the actual argument is a bitset. This is merely a style issue, but it makes it clearer that an entire array is expected. (re_compile_fastmap_iter, init_dfa, init_word_char, optimize_subexps, lower_subexp): Adjust for new bitset_t definition. (lower_subexp, parse_bracket_exp, built_charclass_op): Likewise. * posix/regex_internal.h (bitset_set, bitset_clear, bitset_contain, bitset_not, bitset_merge, bitset_set_all, bitset_mask): Likewise. * posix/regexec.c (check_dst_limits_calc_pos_1, check_subexp_matching_top, build_trtable, group_nodes_into_DFAstates): Likewise. * posix/regcomp.c (utf8_sb_map): Don't assume initializer == 0xffffffff. * posix/regex_internal.h (BITSET_WORD_BITS): Renamed from UINT_BITS. All uses changed. (BITSET_WORDS): Renamed from BITSET_UINTS. All uses changed. (bitset_word_t): New type, replacing 'unsigned int' for bitset uses. All uses changed. (BITSET_WORD_MAX): New macro. (bitset_set, bitset_clear, bitset_contain, bitset_empty, (bitset_set_all, bitset_copy): Adjust for bitset_t change. (bitset_empty, bitset_copy): Prefer sizeof (bitset_t) to multiplying it out ourselves. (bitset_not_merge): Remove; unused. (bitset_contain): Return bool, not unsigned int with one bit on. All callers changed. * posix/regexec.c (build_trtable): Don't assume bitset_t has no stricter alignment than re_node_set; do this by defining a new internal type struct dests_alloc and using it to allocate memory.
-rw-r--r-- | ChangeLog | 39 | ||||
-rw-r--r-- | posix/regcomp.c | 93 | ||||
-rw-r--r-- | posix/regex_internal.h | 74 | ||||
-rw-r--r-- | posix/regexec.c | 97 |
4 files changed, 173 insertions, 130 deletions
diff --git a/ChangeLog b/ChangeLog index a8e575ffdf..002323ef6e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,42 @@ +2005-09-06 Paul Eggert <eggert@cs.ucla.edu> + Ulrich Drepper <drepper@redhat.com> + + [BZ #1302] + Change bitset word type from unsigned int to unsigned long int, + as this has better performance on typical 64-bit hosts. Change + bitset type name to bitset_t. + * posix/regcomp.c (build_equiv_class, build_charclass): + (build_range_exp, build_collating_symbol): + Prefer bitset_t to re_bitset_ptr_t in prototypes, when the actual + argument is a bitset. This is merely a style issue, but it makes + it clearer that an entire array is expected. + (re_compile_fastmap_iter, init_dfa, init_word_char, optimize_subexps, + lower_subexp): Adjust for new bitset_t definition. + (lower_subexp, parse_bracket_exp, built_charclass_op): Likewise. + * posix/regex_internal.h (bitset_set, bitset_clear, bitset_contain, + bitset_not, bitset_merge, bitset_set_all, bitset_mask): Likewise. + * posix/regexec.c (check_dst_limits_calc_pos_1, + check_subexp_matching_top, build_trtable, group_nodes_into_DFAstates): + Likewise. + * posix/regcomp.c (utf8_sb_map): Don't assume initializer + == 0xffffffff. + * posix/regex_internal.h (BITSET_WORD_BITS): Renamed from UINT_BITS. + All uses changed. + (BITSET_WORDS): Renamed from BITSET_UINTS. All uses changed. + (bitset_word_t): New type, replacing 'unsigned int' for bitset uses. + All uses changed. + (BITSET_WORD_MAX): New macro. + (bitset_set, bitset_clear, bitset_contain, bitset_empty, + (bitset_set_all, bitset_copy): Adjust for bitset_t change. + (bitset_empty, bitset_copy): + Prefer sizeof (bitset_t) to multiplying it out ourselves. + (bitset_not_merge): Remove; unused. + (bitset_contain): Return bool, not unsigned int with one bit on. + All callers changed. + * posix/regexec.c (build_trtable): Don't assume bitset_t has no + stricter alignment than re_node_set; do this by defining a new + internal type struct dests_alloc and using it to allocate memory. + 2005-09-27 Ulrich Drepper <drepper@redhat.com> [BZ #1230] diff --git a/posix/regcomp.c b/posix/regcomp.c index bf374a8b61..fde262b83c 100644 --- a/posix/regcomp.c +++ b/posix/regcomp.c @@ -113,21 +113,21 @@ static reg_errcode_t build_collating_symbol (re_bitset_ptr_t sbcset, # endif /* not RE_ENABLE_I18N */ #endif /* not _LIBC */ #ifdef RE_ENABLE_I18N -static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset, +static reg_errcode_t build_equiv_class (bitset_t sbcset, re_charset_t *mbcset, int *equiv_class_alloc, const unsigned char *name); static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans, - re_bitset_ptr_t sbcset, + bitset_t sbcset, re_charset_t *mbcset, int *char_class_alloc, const unsigned char *class_name, reg_syntax_t syntax); #else /* not RE_ENABLE_I18N */ -static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset, +static reg_errcode_t build_equiv_class (bitset_t sbcset, const unsigned char *name); static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans, - re_bitset_ptr_t sbcset, + bitset_t sbcset, const unsigned char *class_name, reg_syntax_t syntax); #endif /* not RE_ENABLE_I18N */ @@ -354,7 +354,7 @@ re_compile_fastmap_iter (bufp, init_state, fastmap) && dfa->nodes[node].type == CHARACTER && dfa->nodes[node].mb_partial) *p++ = dfa->nodes[node].opr.c; - memset (&state, 0, sizeof (state)); + memset (&state, '\0', sizeof (state)); if (mbrtowc (&wc, (const char *) buf, p - buf, &state) == p - buf && (__wcrtomb ((char *) buf, towlower (wc), &state) @@ -365,11 +365,15 @@ re_compile_fastmap_iter (bufp, init_state, fastmap) } else if (type == SIMPLE_BRACKET) { - int i, j, ch; - for (i = 0, ch = 0; i < BITSET_UINTS; ++i) - for (j = 0; j < UINT_BITS; ++j, ++ch) - if (dfa->nodes[node].opr.sbcset[i] & (1u << j)) - re_set_fastmap (fastmap, icase, ch); + int i, ch; + for (i = 0, ch = 0; i < BITSET_WORDS; ++i) + { + int j; + bitset_word_t w = dfa->nodes[node].opr.sbcset[i]; + for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) + if (w & ((bitset_word_t) 1 << j)) + re_set_fastmap (fastmap, icase, ch); + } } #ifdef RE_ENABLE_I18N else if (type == COMPLEX_BRACKET) @@ -388,13 +392,11 @@ re_compile_fastmap_iter (bufp, init_state, fastmap) is a valid collation element, and don't catch 'b' since 'b' is the only collation element which starts from 'b'. */ - int j, ch; const int32_t *table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); - for (i = 0, ch = 0; i < BITSET_UINTS; ++i) - for (j = 0; j < UINT_BITS; ++j, ++ch) - if (table[ch] < 0) - re_set_fastmap (fastmap, icase, ch); + for (i = 0; i < SBC_MAX; ++i) + if (table[i] < 0) + re_set_fastmap (fastmap, icase, i); } # else if (dfa->mb_cur_max > 1) @@ -581,14 +583,10 @@ weak_alias (__regerror, regerror) UTF-8 is used. Otherwise we would allocate memory just to initialize it the same all the time. UTF-8 is the preferred encoding so this is a worthwhile optimization. */ -static const bitset utf8_sb_map = +static const bitset_t utf8_sb_map = { /* Set the first 128 bits. */ -# if UINT_MAX == 0xffffffff - 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff -# else -# error "Add case for new unsigned int size" -# endif + [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX }; #endif @@ -908,20 +906,17 @@ init_dfa (dfa, pat_len) { int i, j, ch; - dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset), 1); + dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); if (BE (dfa->sb_char == NULL, 0)) return REG_ESPACE; - /* Clear all bits by, then set those corresponding to single - byte chars. */ - bitset_empty (dfa->sb_char); - - for (i = 0, ch = 0; i < BITSET_UINTS; ++i) - for (j = 0; j < UINT_BITS; ++j, ++ch) + /* Set the bits corresponding to single byte chars. */ + for (i = 0, ch = 0; i < BITSET_WORDS; ++i) + for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) { wint_t wch = __btowc (ch); if (wch != WEOF) - dfa->sb_char[i] |= 1u << j; + dfa->sb_char[i] |= (bitset_word_t) 1 << j; # ifndef _LIBC if (isascii (ch) && wch != ch) dfa->map_notascii = 1; @@ -946,10 +941,10 @@ init_word_char (dfa) { int i, j, ch; dfa->word_ops_used = 1; - for (i = 0, ch = 0; i < BITSET_UINTS; ++i) - for (j = 0; j < UINT_BITS; ++j, ++ch) + for (i = 0, ch = 0; i < BITSET_WORDS; ++i) + for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch) if (isalnum (ch) || ch == '_') - dfa->word_char[i] |= 1u << j; + dfa->word_char[i] |= (bitset_word_t) 1 << j; } /* Free the work area which are only used while compiling. */ @@ -1096,8 +1091,9 @@ optimize_utf8 (dfa) case COMPLEX_BRACKET: return; case SIMPLE_BRACKET: - /* Just double check. */ - for (i = 0x80 / UINT_BITS; i < BITSET_UINTS; ++i) + /* Just double check. The non-ASCII range starts at 0x80. */ + assert (0x80 % BITSET_WORD_BITS == 0); + for (i = 0x80 / BITSET_WORD_BITS; i < BITSET_WORDS; ++i) if (dfa->nodes[node].opr.sbcset[i]) return; break; @@ -1282,8 +1278,8 @@ optimize_subexps (extra, node) node->left->parent = node; dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx]; - if (other_idx < CHAR_BIT * sizeof dfa->used_bkref_map) - dfa->used_bkref_map &= ~(1u << other_idx); + if (other_idx < BITSET_WORD_BITS) + dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx); } return REG_NOERROR; @@ -1331,8 +1327,9 @@ lower_subexp (err, preg, node) very common, so we do not lose much. An example that triggers this case is the sed "script" /\(\)/x. */ && node->left != NULL - && (node->token.opr.idx >= CHAR_BIT * sizeof dfa->used_bkref_map - || !(dfa->used_bkref_map & (1u << node->token.opr.idx)))) + && (node->token.opr.idx >= BITSET_WORD_BITS + || !(dfa->used_bkref_map + & ((bitset_word_t) 1 << node->token.opr.idx)))) return node->left; /* Convert the SUBEXP node to the concatenation of an @@ -2666,7 +2663,7 @@ build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem) # else /* not RE_ENABLE_I18N */ build_range_exp (sbcset, start_elem, end_elem) # endif /* not RE_ENABLE_I18N */ - re_bitset_ptr_t sbcset; + bitset_t sbcset; bracket_elem_t *start_elem, *end_elem; { unsigned int start_ch, end_ch; @@ -2788,7 +2785,7 @@ build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name) # else /* not RE_ENABLE_I18N */ build_collating_symbol (sbcset, name) # endif /* not RE_ENABLE_I18N */ - re_bitset_ptr_t sbcset; + bitset_t sbcset; const unsigned char *name; { size_t name_len = strlen ((const char *) name); @@ -2931,7 +2928,7 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem) re_charset_t *mbcset; int *range_alloc; - re_bitset_ptr_t sbcset; + bitset_t sbcset; bracket_elem_t *start_elem, *end_elem; { unsigned int ch; @@ -3014,7 +3011,7 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name) re_charset_t *mbcset; int *coll_sym_alloc; - re_bitset_ptr_t sbcset; + bitset_t sbcset; const unsigned char *name; { int32_t elem, idx; @@ -3099,7 +3096,7 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) _NL_COLLATE_SYMB_EXTRAMB); } #endif - sbcset = (re_bitset_ptr_t) calloc (sizeof (unsigned int), BITSET_UINTS); + sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); #ifdef RE_ENABLE_I18N mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); #endif /* RE_ENABLE_I18N */ @@ -3309,12 +3306,12 @@ parse_bracket_exp (regexp, dfa, token, syntax, err) mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token); if (BE (mbc_tree == NULL, 0)) goto parse_bracket_exp_espace; - for (sbc_idx = 0; sbc_idx < BITSET_UINTS; ++sbc_idx) + for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx) if (sbcset[sbc_idx]) break; /* If there are no bits set in sbcset, there is no point of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */ - if (sbc_idx < BITSET_UINTS) + if (sbc_idx < BITSET_WORDS) { /* Build a tree for simple bracket. */ br_token.type = SIMPLE_BRACKET; @@ -3464,7 +3461,7 @@ build_equiv_class (sbcset, mbcset, equiv_class_alloc, name) #else /* not RE_ENABLE_I18N */ build_equiv_class (sbcset, name) #endif /* not RE_ENABLE_I18N */ - re_bitset_ptr_t sbcset; + bitset_t sbcset; const unsigned char *name; { #if defined _LIBC @@ -3560,7 +3557,7 @@ build_charclass (trans, sbcset, mbcset, char_class_alloc, class_name, syntax) build_charclass (trans, sbcset, class_name, syntax) #endif /* not RE_ENABLE_I18N */ RE_TRANSLATE_TYPE trans; - re_bitset_ptr_t sbcset; + bitset_t sbcset; const unsigned char *class_name; reg_syntax_t syntax; { @@ -3649,7 +3646,7 @@ build_charclass_op (dfa, trans, class_name, extra, non_match, err) re_token_t br_token; bin_tree_t *tree; - sbcset = (re_bitset_ptr_t) calloc (sizeof (unsigned int), BITSET_UINTS); + sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); #ifdef RE_ENABLE_I18N mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); #endif /* RE_ENABLE_I18N */ diff --git a/posix/regex_internal.h b/posix/regex_internal.h index 0096bf7c91..681be1f52b 100644 --- a/posix/regex_internal.h +++ b/posix/regex_internal.h @@ -39,6 +39,9 @@ #if defined HAVE_WCTYPE_H || defined _LIBC # include <wctype.h> #endif /* HAVE_WCTYPE_H || _LIBC */ +#if defined HAVE_STDBOOL_H || defined _LIBC +# include <stdbool.h> +#endif /* HAVE_STDBOOL_H || _LIBC */ #if defined _LIBC # include <bits/libc-lock.h> #else @@ -120,26 +123,31 @@ extern const char __re_error_msgid[] attribute_hidden; extern const size_t __re_error_msgid_idx[] attribute_hidden; -/* Number of bits in an unsinged int. */ -#define UINT_BITS (sizeof (unsigned int) * CHAR_BIT) -/* Number of unsigned int in an bit_set. */ -#define BITSET_UINTS ((SBC_MAX + UINT_BITS - 1) / UINT_BITS) -typedef unsigned int bitset[BITSET_UINTS]; -typedef unsigned int *re_bitset_ptr_t; -typedef const unsigned int *re_const_bitset_ptr_t; - -#define bitset_set(set,i) (set[i / UINT_BITS] |= 1u << i % UINT_BITS) -#define bitset_clear(set,i) (set[i / UINT_BITS] &= ~(1u << i % UINT_BITS)) -#define bitset_contain(set,i) (set[i / UINT_BITS] & (1u << i % UINT_BITS)) -#define bitset_empty(set) memset (set, 0, sizeof (unsigned int) * BITSET_UINTS) -#define bitset_set_all(set) \ - memset (set, 255, sizeof (unsigned int) * BITSET_UINTS) -#define bitset_copy(dest,src) \ - memcpy (dest, src, sizeof (unsigned int) * BITSET_UINTS) -static inline void bitset_not (bitset set); -static inline void bitset_merge (bitset dest, const bitset src); -static inline void bitset_not_merge (bitset dest, const bitset src); -static inline void bitset_mask (bitset dest, const bitset src); +/* An integer used to represent a set of bits. It must be unsigned, + and must be at least as wide as unsigned int. */ +typedef unsigned long int bitset_word_t; +/* All bits set in a bitset_word_t. */ +#define BITSET_WORD_MAX ULONG_MAX +/* Number of bits in a bitset_word_t. */ +#define BITSET_WORD_BITS (sizeof (bitset_word_t) * CHAR_BIT) +/* Number of bitset_word_t in a bit_set. */ +#define BITSET_WORDS (SBC_MAX / BITSET_WORD_BITS) +typedef bitset_word_t bitset_t[BITSET_WORDS]; +typedef bitset_word_t *re_bitset_ptr_t; +typedef const bitset_word_t *re_const_bitset_ptr_t; + +#define bitset_set(set,i) \ + (set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS) +#define bitset_clear(set,i) \ + (set[i / BITSET_WORD_BITS] &= ~((bitset_word_t) 1 << i % BITSET_WORD_BITS)) +#define bitset_contain(set,i) \ + (set[i / BITSET_WORD_BITS] & ((bitset_word_t) 1 << i % BITSET_WORD_BITS)) +#define bitset_empty(set) memset (set, '\0', sizeof (bitset_t)) +#define bitset_set_all(set) memset (set, '\xff', sizeof (bitset_t)) +#define bitset_copy(dest,src) memcpy (dest, src, sizeof (bitset_t)) +static inline void bitset_not (bitset_t set); +static inline void bitset_merge (bitset_t dest, const bitset_t src); +static inline void bitset_mask (bitset_t dest, const bitset_t src); #define PREV_WORD_CONSTRAINT 0x0001 #define PREV_NOTWORD_CONSTRAINT 0x0002 @@ -648,8 +656,8 @@ struct re_dfa_t int nbackref; /* The number of backreference in this dfa. */ /* Bitmap expressing which backreference is used. */ - unsigned int used_bkref_map; - unsigned int completed_bkref_map; + bitset_word_t used_bkref_map; + bitset_word_t completed_bkref_map; unsigned int has_plural_match : 1; /* If this dfa has "multibyte node", which is a backreference or @@ -660,7 +668,7 @@ struct re_dfa_t unsigned int map_notascii : 1; unsigned int word_ops_used : 1; int mb_cur_max; - bitset word_char; + bitset_t word_char; reg_syntax_t syntax; int *subexp_map; #ifdef DEBUG @@ -734,34 +742,26 @@ typedef struct /* Inline functions for bitset operation. */ static inline void -bitset_not (bitset set) +bitset_not (bitset_t set) { int bitset_i; - for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i) + for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i) set[bitset_i] = ~set[bitset_i]; } static inline void -bitset_merge (bitset dest, const bitset src) +bitset_merge (bitset_t dest, const bitset_t src) { int bitset_i; - for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i) + for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i) dest[bitset_i] |= src[bitset_i]; } static inline void -bitset_not_merge (bitset dest, const bitset src) -{ - int i; - for (i = 0; i < BITSET_UINTS; ++i) - dest[i] |= ~src[i]; -} - -static inline void -bitset_mask (bitset dest, const bitset src) +bitset_mask (bitset_t dest, const bitset_t src) { int bitset_i; - for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i) + for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i) dest[bitset_i] &= src[bitset_i]; } diff --git a/posix/regexec.c b/posix/regexec.c index 9df5574cdb..03f705a3a3 100644 --- a/posix/regexec.c +++ b/posix/regexec.c @@ -213,7 +213,7 @@ static unsigned int find_collation_sequence_value (const unsigned char *mbs, static int group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, re_node_set *states_node, - bitset *states_ch) internal_function; + bitset_t *states_ch) internal_function; static int check_node_accept (const re_match_context_t *mctx, const re_token_t *node, int idx) internal_function; @@ -1980,9 +1980,9 @@ check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, from_node, bkref_idx) if (ent->node != node) continue; - if (subexp_idx - < CHAR_BIT * sizeof ent->eps_reachable_subexps_map - && !(ent->eps_reachable_subexps_map & (1u << subexp_idx))) + if (subexp_idx < BITSET_WORD_BITS + && !(ent->eps_reachable_subexps_map + & ((bitset_word_t) 1 << subexp_idx))) continue; /* Recurse trying to reach the OP_OPEN_SUBEXP and @@ -2008,9 +2008,9 @@ check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, from_node, bkref_idx) if (cpos == 0 && (boundaries & 2)) return 0; - if (subexp_idx - < CHAR_BIT * sizeof ent->eps_reachable_subexps_map) - ent->eps_reachable_subexps_map &= ~(1u << subexp_idx); + if (subexp_idx < BITSET_WORD_BITS) + ent->eps_reachable_subexps_map + &= ~((bitset_word_t) 1 << subexp_idx); } while (ent++->more); } @@ -2477,8 +2477,9 @@ check_subexp_matching_top (mctx, cur_nodes, str_idx) { int node = cur_nodes->elems[node_idx]; if (dfa->nodes[node].type == OP_OPEN_SUBEXP - && dfa->nodes[node].opr.idx < CHAR_BIT * sizeof dfa->used_bkref_map - && dfa->used_bkref_map & (1u << dfa->nodes[node].opr.idx)) + && dfa->nodes[node].opr.idx < BITSET_WORD_BITS + && (dfa->used_bkref_map + & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx))) { err = match_ctx_add_subtop (mctx, node, str_idx); if (BE (err != REG_NOERROR, 0)) @@ -3363,31 +3364,37 @@ build_trtable (dfa, state) { reg_errcode_t err; int i, j, ch, need_word_trtable = 0; - unsigned int elem, mask; - int dests_node_malloced = 0, dest_states_malloced = 0; + bitset_word_t elem, mask; + bool dests_node_malloced = false; + bool dest_states_malloced = false; int ndests; /* Number of the destination states from `state'. */ re_dfastate_t **trtable; re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl; re_node_set follows, *dests_node; - bitset *dests_ch; - bitset acceptable; + bitset_t *dests_ch; + bitset_t acceptable; + + struct dests_alloc + { + re_node_set dests_node[SBC_MAX]; + bitset_t dests_ch[SBC_MAX]; + } *dests_alloc; /* We build DFA states which corresponds to the destination nodes from `state'. `dests_node[i]' represents the nodes which i-th destination state contains, and `dests_ch[i]' represents the characters which i-th destination state accepts. */ - if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX)) - dests_node = (re_node_set *) - alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX); + if (__libc_use_alloca (sizeof (struct dests_alloc))) + dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc)); else { - dests_node = (re_node_set *) - malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX); - if (BE (dests_node == NULL, 0)) + dests_alloc = re_malloc (struct dests_alloc, 1); + if (BE (dests_alloc == NULL, 0)) return 0; - dests_node_malloced = 1; + dests_node_malloced = true; } - dests_ch = (bitset *) (dests_node + SBC_MAX); + dests_node = dests_alloc->dests_node; + dests_ch = dests_alloc->dests_ch; /* Initialize transiton table. */ state->word_trtable = state->trtable = NULL; @@ -3398,7 +3405,7 @@ build_trtable (dfa, state) if (BE (ndests <= 0, 0)) { if (dests_node_malloced) - free (dests_node); + free (dests_alloc); /* Return 0 in case of an error, 1 otherwise. */ if (ndests == 0) { @@ -3413,7 +3420,7 @@ build_trtable (dfa, state) if (BE (err != REG_NOERROR, 0)) goto out_free; - if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX + if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX + ndests * 3 * sizeof (re_dfastate_t *))) dest_states = (re_dfastate_t **) alloca (ndests * 3 * sizeof (re_dfastate_t *)); @@ -3430,10 +3437,10 @@ out_free: for (i = 0; i < ndests; ++i) re_node_set_free (dests_node + i); if (dests_node_malloced) - free (dests_node); + free (dests_alloc); return 0; } - dest_states_malloced = 1; + dest_states_malloced = true; } dest_states_word = dest_states + ndests; dest_states_nl = dest_states_word + ndests; @@ -3495,8 +3502,8 @@ out_free: goto out_free; /* For all characters ch...: */ - for (i = 0; i < BITSET_UINTS; ++i) - for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1; + for (i = 0; i < BITSET_WORDS; ++i) + for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1; elem; mask <<= 1, elem >>= 1, ++ch) if (BE (elem & 1, 0)) @@ -3526,8 +3533,8 @@ out_free: goto out_free; /* For all characters ch...: */ - for (i = 0; i < BITSET_UINTS; ++i) - for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1; + for (i = 0; i < BITSET_WORDS; ++i) + for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1; elem; mask <<= 1, elem >>= 1, ++ch) if (BE (elem & 1, 0)) @@ -3568,7 +3575,7 @@ out_free: re_node_set_free (dests_node + i); if (dests_node_malloced) - free (dests_node); + free (dests_alloc); return 1; } @@ -3583,13 +3590,13 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch) const re_dfa_t *dfa; const re_dfastate_t *state; re_node_set *dests_node; - bitset *dests_ch; + bitset_t *dests_ch; { reg_errcode_t err; int result; int i, j, k; int ndests; /* Number of the destinations from `state'. */ - bitset accepts; /* Characters a node can accept. */ + bitset_t accepts; /* Characters a node can accept. */ const re_node_set *cur_nodes = &state->nodes; bitset_empty (accepts); ndests = 0; @@ -3624,7 +3631,7 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch) #ifdef RE_ENABLE_I18N else if (type == OP_UTF8_PERIOD) { - memset (accepts, 255, sizeof (unsigned int) * BITSET_UINTS / 2); + memset (accepts, '\xff', sizeof (bitset_t) / 2); if (!(dfa->syntax & RE_DOT_NEWLINE)) bitset_clear (accepts, '\n'); if (dfa->syntax & RE_DOT_NOT_NULL) @@ -3640,7 +3647,7 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch) { if (constraint & NEXT_NEWLINE_CONSTRAINT) { - int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR); + bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR); bitset_empty (accepts); if (accepts_newline) bitset_set (accepts, NEWLINE_CHAR); @@ -3655,7 +3662,7 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch) if (constraint & NEXT_WORD_CONSTRAINT) { - unsigned int any_set = 0; + bitset_word_t any_set = 0; if (type == CHARACTER && !node->word_char) { bitset_empty (accepts); @@ -3663,18 +3670,18 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch) } #ifdef RE_ENABLE_I18N if (dfa->mb_cur_max > 1) - for (j = 0; j < BITSET_UINTS; ++j) + for (j = 0; j < BITSET_WORDS; ++j) any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j])); else #endif - for (j = 0; j < BITSET_UINTS; ++j) + for (j = 0; j < BITSET_WORDS; ++j) any_set |= (accepts[j] &= dfa->word_char[j]); if (!any_set) continue; } if (constraint & NEXT_NOTWORD_CONSTRAINT) { - unsigned int any_set = 0; + bitset_word_t any_set = 0; if (type == CHARACTER && node->word_char) { bitset_empty (accepts); @@ -3682,11 +3689,11 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch) } #ifdef RE_ENABLE_I18N if (dfa->mb_cur_max > 1) - for (j = 0; j < BITSET_UINTS; ++j) + for (j = 0; j < BITSET_WORDS; ++j) any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j])); else #endif - for (j = 0; j < BITSET_UINTS; ++j) + for (j = 0; j < BITSET_WORDS; ++j) any_set |= (accepts[j] &= ~dfa->word_char[j]); if (!any_set) continue; @@ -3697,10 +3704,10 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch) state. Above, we make sure that accepts is not empty. */ for (j = 0; j < ndests; ++j) { - bitset intersec; /* Intersection sets, see below. */ - bitset remains; + bitset_t intersec; /* Intersection sets, see below. */ + bitset_t remains; /* Flags, see below. */ - int has_intersec, not_subset, not_consumed; + bitset_word_t has_intersec, not_subset, not_consumed; /* Optimization, skip if this state doesn't accept the character. */ if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c)) @@ -3708,7 +3715,7 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch) /* Enumerate the intersection set of this state and `accepts'. */ has_intersec = 0; - for (k = 0; k < BITSET_UINTS; ++k) + for (k = 0; k < BITSET_WORDS; ++k) has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k]; /* And skip if the intersection set is empty. */ if (!has_intersec) @@ -3716,7 +3723,7 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch) /* Then check if this state is a subset of `accepts'. */ not_subset = not_consumed = 0; - for (k = 0; k < BITSET_UINTS; ++k) + for (k = 0; k < BITSET_WORDS; ++k) { not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k]; not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k]; |