about summary refs log tree commit diff
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2005-09-28 17:33:18 +0000
committerUlrich Drepper <drepper@redhat.com>2005-09-28 17:33:18 +0000
commit2c05d33f90861d074dc12808dafbde30f487b1a0 (patch)
tree654e4c8e7c500eca23285264cbb19a0945f05638
parent1873e3cd1a7f5173d20d9060f3be825f31a53a39 (diff)
downloadglibc-2c05d33f90861d074dc12808dafbde30f487b1a0.tar.gz
glibc-2c05d33f90861d074dc12808dafbde30f487b1a0.tar.xz
glibc-2c05d33f90861d074dc12808dafbde30f487b1a0.zip
[BZ #1302]
2005-09-06  Paul Eggert  <eggert@cs.ucla.edu>
            Ulrich Drepper  <drepper@redhat.com>

	[BZ #1302]
	Change bitset word type from unsigned int to unsigned long int,
	as this has better performance on typical 64-bit hosts.  Change
	bitset type name to bitset_t.
	* posix/regcomp.c (build_equiv_class, build_charclass):
	(build_range_exp, build_collating_symbol):
	Prefer bitset_t to re_bitset_ptr_t in prototypes, when the actual
	argument is a bitset.  This is merely a style issue, but it makes
	it clearer that an entire array is expected.
	(re_compile_fastmap_iter, init_dfa, init_word_char, optimize_subexps,
	lower_subexp): Adjust for new bitset_t definition.
	(lower_subexp, parse_bracket_exp, built_charclass_op): Likewise.
	* posix/regex_internal.h (bitset_set, bitset_clear, bitset_contain,
	bitset_not, bitset_merge, bitset_set_all, bitset_mask): Likewise.
	* posix/regexec.c (check_dst_limits_calc_pos_1,
	check_subexp_matching_top, build_trtable, group_nodes_into_DFAstates):
	Likewise.
	* posix/regcomp.c (utf8_sb_map): Don't assume initializer
	== 0xffffffff.
	* posix/regex_internal.h (BITSET_WORD_BITS): Renamed from UINT_BITS.
	All uses changed.
	(BITSET_WORDS): Renamed from BITSET_UINTS.  All uses changed.
	(bitset_word_t): New type, replacing 'unsigned int' for bitset uses.
	All uses changed.
	(BITSET_WORD_MAX): New macro.
	(bitset_set, bitset_clear, bitset_contain, bitset_empty,
	(bitset_set_all, bitset_copy):  Adjust for bitset_t change.
	(bitset_empty, bitset_copy):
	Prefer sizeof (bitset_t) to multiplying it out ourselves.
	(bitset_not_merge): Remove; unused.
	(bitset_contain): Return bool, not unsigned int with one bit on.
	All callers changed.
	* posix/regexec.c (build_trtable): Don't assume bitset_t has no
	stricter alignment than re_node_set; do this by defining a new
	internal type struct dests_alloc and using it to allocate memory.
-rw-r--r--ChangeLog39
-rw-r--r--posix/regcomp.c93
-rw-r--r--posix/regex_internal.h74
-rw-r--r--posix/regexec.c97
4 files changed, 173 insertions, 130 deletions
diff --git a/ChangeLog b/ChangeLog
index a8e575ffdf..002323ef6e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,42 @@
+2005-09-06  Paul Eggert  <eggert@cs.ucla.edu>
+            Ulrich Drepper  <drepper@redhat.com>
+
+	[BZ #1302]
+	Change bitset word type from unsigned int to unsigned long int,
+	as this has better performance on typical 64-bit hosts.  Change
+	bitset type name to bitset_t.
+	* posix/regcomp.c (build_equiv_class, build_charclass):
+	(build_range_exp, build_collating_symbol):
+	Prefer bitset_t to re_bitset_ptr_t in prototypes, when the actual
+	argument is a bitset.  This is merely a style issue, but it makes
+	it clearer that an entire array is expected.
+	(re_compile_fastmap_iter, init_dfa, init_word_char, optimize_subexps,
+	lower_subexp): Adjust for new bitset_t definition.
+	(lower_subexp, parse_bracket_exp, built_charclass_op): Likewise.
+	* posix/regex_internal.h (bitset_set, bitset_clear, bitset_contain,
+	bitset_not, bitset_merge, bitset_set_all, bitset_mask): Likewise.
+	* posix/regexec.c (check_dst_limits_calc_pos_1,
+	check_subexp_matching_top, build_trtable, group_nodes_into_DFAstates):
+	Likewise.
+	* posix/regcomp.c (utf8_sb_map): Don't assume initializer
+	== 0xffffffff.
+	* posix/regex_internal.h (BITSET_WORD_BITS): Renamed from UINT_BITS.
+	All uses changed.
+	(BITSET_WORDS): Renamed from BITSET_UINTS.  All uses changed.
+	(bitset_word_t): New type, replacing 'unsigned int' for bitset uses.
+	All uses changed.
+	(BITSET_WORD_MAX): New macro.
+	(bitset_set, bitset_clear, bitset_contain, bitset_empty,
+	(bitset_set_all, bitset_copy):  Adjust for bitset_t change.
+	(bitset_empty, bitset_copy):
+	Prefer sizeof (bitset_t) to multiplying it out ourselves.
+	(bitset_not_merge): Remove; unused.
+	(bitset_contain): Return bool, not unsigned int with one bit on.
+	All callers changed.
+	* posix/regexec.c (build_trtable): Don't assume bitset_t has no
+	stricter alignment than re_node_set; do this by defining a new
+	internal type struct dests_alloc and using it to allocate memory.
+
 2005-09-27  Ulrich Drepper  <drepper@redhat.com>
 
 	[BZ #1230]
diff --git a/posix/regcomp.c b/posix/regcomp.c
index bf374a8b61..fde262b83c 100644
--- a/posix/regcomp.c
+++ b/posix/regcomp.c
@@ -113,21 +113,21 @@ static reg_errcode_t build_collating_symbol (re_bitset_ptr_t sbcset,
 # endif /* not RE_ENABLE_I18N */
 #endif /* not _LIBC */
 #ifdef RE_ENABLE_I18N
-static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset,
+static reg_errcode_t build_equiv_class (bitset_t sbcset,
 					re_charset_t *mbcset,
 					int *equiv_class_alloc,
 					const unsigned char *name);
 static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
-				      re_bitset_ptr_t sbcset,
+				      bitset_t sbcset,
 				      re_charset_t *mbcset,
 				      int *char_class_alloc,
 				      const unsigned char *class_name,
 				      reg_syntax_t syntax);
 #else  /* not RE_ENABLE_I18N */
-static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset,
+static reg_errcode_t build_equiv_class (bitset_t sbcset,
 					const unsigned char *name);
 static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
-				      re_bitset_ptr_t sbcset,
+				      bitset_t sbcset,
 				      const unsigned char *class_name,
 				      reg_syntax_t syntax);
 #endif /* not RE_ENABLE_I18N */
@@ -354,7 +354,7 @@ re_compile_fastmap_iter (bufp, init_state, fastmap)
 		     &&	dfa->nodes[node].type == CHARACTER
 		     && dfa->nodes[node].mb_partial)
 		*p++ = dfa->nodes[node].opr.c;
-	      memset (&state, 0, sizeof (state));
+	      memset (&state, '\0', sizeof (state));
 	      if (mbrtowc (&wc, (const char *) buf, p - buf,
 			   &state) == p - buf
 		  && (__wcrtomb ((char *) buf, towlower (wc), &state)
@@ -365,11 +365,15 @@ re_compile_fastmap_iter (bufp, init_state, fastmap)
 	}
       else if (type == SIMPLE_BRACKET)
 	{
-	  int i, j, ch;
-	  for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
-	    for (j = 0; j < UINT_BITS; ++j, ++ch)
-	      if (dfa->nodes[node].opr.sbcset[i] & (1u << j))
-		re_set_fastmap (fastmap, icase, ch);
+	  int i, ch;
+	  for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
+	    {
+	      int j;
+	      bitset_word_t w = dfa->nodes[node].opr.sbcset[i];
+	      for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
+		if (w & ((bitset_word_t) 1 << j))
+		  re_set_fastmap (fastmap, icase, ch);
+	    }
 	}
 #ifdef RE_ENABLE_I18N
       else if (type == COMPLEX_BRACKET)
@@ -388,13 +392,11 @@ re_compile_fastmap_iter (bufp, init_state, fastmap)
 			  is a valid collation element, and don't catch
 			  'b' since 'b' is the only collation element
 			  which starts from 'b'.  */
-		  int j, ch;
 		  const int32_t *table = (const int32_t *)
 		    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
-		  for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
-		    for (j = 0; j < UINT_BITS; ++j, ++ch)
-		      if (table[ch] < 0)
-			re_set_fastmap (fastmap, icase, ch);
+		  for (i = 0; i < SBC_MAX; ++i)
+		    if (table[i] < 0)
+		      re_set_fastmap (fastmap, icase, i);
 		}
 # else
 	      if (dfa->mb_cur_max > 1)
@@ -581,14 +583,10 @@ weak_alias (__regerror, regerror)
    UTF-8 is used.  Otherwise we would allocate memory just to initialize
    it the same all the time.  UTF-8 is the preferred encoding so this is
    a worthwhile optimization.  */
-static const bitset utf8_sb_map =
+static const bitset_t utf8_sb_map =
 {
   /* Set the first 128 bits.  */
-# if UINT_MAX == 0xffffffff
-  0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
-# else
-#  error "Add case for new unsigned int size"
-# endif
+  [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX
 };
 #endif
 
@@ -908,20 +906,17 @@ init_dfa (dfa, pat_len)
 	{
 	  int i, j, ch;
 
-	  dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset), 1);
+	  dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
 	  if (BE (dfa->sb_char == NULL, 0))
 	    return REG_ESPACE;
 
-	  /* Clear all bits by, then set those corresponding to single
-	     byte chars.  */
-	  bitset_empty (dfa->sb_char);
-
-	  for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
-	    for (j = 0; j < UINT_BITS; ++j, ++ch)
+	  /* Set the bits corresponding to single byte chars.  */
+	  for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
+	    for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
 	      {
 		wint_t wch = __btowc (ch);
 		if (wch != WEOF)
-		  dfa->sb_char[i] |= 1u << j;
+		  dfa->sb_char[i] |= (bitset_word_t) 1 << j;
 # ifndef _LIBC
 		if (isascii (ch) && wch != ch)
 		  dfa->map_notascii = 1;
@@ -946,10 +941,10 @@ init_word_char (dfa)
 {
   int i, j, ch;
   dfa->word_ops_used = 1;
-  for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
-    for (j = 0; j < UINT_BITS; ++j, ++ch)
+  for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
+    for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
       if (isalnum (ch) || ch == '_')
-	dfa->word_char[i] |= 1u << j;
+	dfa->word_char[i] |= (bitset_word_t) 1 << j;
 }
 
 /* Free the work area which are only used while compiling.  */
@@ -1096,8 +1091,9 @@ optimize_utf8 (dfa)
       case COMPLEX_BRACKET:
 	return;
       case SIMPLE_BRACKET:
-	/* Just double check.  */
-        for (i = 0x80 / UINT_BITS; i < BITSET_UINTS; ++i)
+	/* Just double check.  The non-ASCII range starts at 0x80.  */
+	assert (0x80 % BITSET_WORD_BITS == 0);
+        for (i = 0x80 / BITSET_WORD_BITS; i < BITSET_WORDS; ++i)
 	  if (dfa->nodes[node].opr.sbcset[i])
 	    return;
 	break;
@@ -1282,8 +1278,8 @@ optimize_subexps (extra, node)
         node->left->parent = node;
 
       dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
-      if (other_idx < CHAR_BIT * sizeof dfa->used_bkref_map)
-	dfa->used_bkref_map &= ~(1u << other_idx);
+      if (other_idx < BITSET_WORD_BITS)
+	  dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx);
     }
 
   return REG_NOERROR;
@@ -1331,8 +1327,9 @@ lower_subexp (err, preg, node)
 	 very common, so we do not lose much.  An example that triggers
 	 this case is the sed "script" /\(\)/x.  */
       && node->left != NULL
-      && (node->token.opr.idx >= CHAR_BIT * sizeof dfa->used_bkref_map
-	  || !(dfa->used_bkref_map & (1u << node->token.opr.idx))))
+      && (node->token.opr.idx >= BITSET_WORD_BITS
+	  || !(dfa->used_bkref_map
+	       & ((bitset_word_t) 1 << node->token.opr.idx))))
     return node->left;
 
   /* Convert the SUBEXP node to the concatenation of an
@@ -2666,7 +2663,7 @@ build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
 # else /* not RE_ENABLE_I18N */
 build_range_exp (sbcset, start_elem, end_elem)
 # endif /* not RE_ENABLE_I18N */
-     re_bitset_ptr_t sbcset;
+     bitset_t sbcset;
      bracket_elem_t *start_elem, *end_elem;
 {
   unsigned int start_ch, end_ch;
@@ -2788,7 +2785,7 @@ build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
 # else /* not RE_ENABLE_I18N */
 build_collating_symbol (sbcset, name)
 # endif /* not RE_ENABLE_I18N */
-     re_bitset_ptr_t sbcset;
+     bitset_t sbcset;
      const unsigned char *name;
 {
   size_t name_len = strlen ((const char *) name);
@@ -2931,7 +2928,7 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
   build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
 	 re_charset_t *mbcset;
 	 int *range_alloc;
-	 re_bitset_ptr_t sbcset;
+	 bitset_t sbcset;
 	 bracket_elem_t *start_elem, *end_elem;
     {
       unsigned int ch;
@@ -3014,7 +3011,7 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
   build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
 	 re_charset_t *mbcset;
 	 int *coll_sym_alloc;
-	 re_bitset_ptr_t sbcset;
+	 bitset_t sbcset;
 	 const unsigned char *name;
     {
       int32_t elem, idx;
@@ -3099,7 +3096,7 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
 						   _NL_COLLATE_SYMB_EXTRAMB);
     }
 #endif
-  sbcset = (re_bitset_ptr_t) calloc (sizeof (unsigned int), BITSET_UINTS);
+  sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
 #ifdef RE_ENABLE_I18N
   mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
 #endif /* RE_ENABLE_I18N */
@@ -3309,12 +3306,12 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
       mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
       if (BE (mbc_tree == NULL, 0))
 	goto parse_bracket_exp_espace;
-      for (sbc_idx = 0; sbc_idx < BITSET_UINTS; ++sbc_idx)
+      for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx)
 	if (sbcset[sbc_idx])
 	  break;
       /* If there are no bits set in sbcset, there is no point
 	 of having both SIMPLE_BRACKET and COMPLEX_BRACKET.  */
-      if (sbc_idx < BITSET_UINTS)
+      if (sbc_idx < BITSET_WORDS)
 	{
           /* Build a tree for simple bracket.  */
           br_token.type = SIMPLE_BRACKET;
@@ -3464,7 +3461,7 @@ build_equiv_class (sbcset, mbcset, equiv_class_alloc, name)
 #else /* not RE_ENABLE_I18N */
 build_equiv_class (sbcset, name)
 #endif /* not RE_ENABLE_I18N */
-     re_bitset_ptr_t sbcset;
+     bitset_t sbcset;
      const unsigned char *name;
 {
 #if defined _LIBC
@@ -3560,7 +3557,7 @@ build_charclass (trans, sbcset, mbcset, char_class_alloc, class_name, syntax)
 build_charclass (trans, sbcset, class_name, syntax)
 #endif /* not RE_ENABLE_I18N */
      RE_TRANSLATE_TYPE trans;
-     re_bitset_ptr_t sbcset;
+     bitset_t sbcset;
      const unsigned char *class_name;
      reg_syntax_t syntax;
 {
@@ -3649,7 +3646,7 @@ build_charclass_op (dfa, trans, class_name, extra, non_match, err)
   re_token_t br_token;
   bin_tree_t *tree;
 
-  sbcset = (re_bitset_ptr_t) calloc (sizeof (unsigned int), BITSET_UINTS);
+  sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
 #ifdef RE_ENABLE_I18N
   mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
 #endif /* RE_ENABLE_I18N */
diff --git a/posix/regex_internal.h b/posix/regex_internal.h
index 0096bf7c91..681be1f52b 100644
--- a/posix/regex_internal.h
+++ b/posix/regex_internal.h
@@ -39,6 +39,9 @@
 #if defined HAVE_WCTYPE_H || defined _LIBC
 # include <wctype.h>
 #endif /* HAVE_WCTYPE_H || _LIBC */
+#if defined HAVE_STDBOOL_H || defined _LIBC
+# include <stdbool.h>
+#endif /* HAVE_STDBOOL_H || _LIBC */
 #if defined _LIBC
 # include <bits/libc-lock.h>
 #else
@@ -120,26 +123,31 @@
 extern const char __re_error_msgid[] attribute_hidden;
 extern const size_t __re_error_msgid_idx[] attribute_hidden;
 
-/* Number of bits in an unsinged int.  */
-#define UINT_BITS (sizeof (unsigned int) * CHAR_BIT)
-/* Number of unsigned int in an bit_set.  */
-#define BITSET_UINTS ((SBC_MAX + UINT_BITS - 1) / UINT_BITS)
-typedef unsigned int bitset[BITSET_UINTS];
-typedef unsigned int *re_bitset_ptr_t;
-typedef const unsigned int *re_const_bitset_ptr_t;
-
-#define bitset_set(set,i) (set[i / UINT_BITS] |= 1u << i % UINT_BITS)
-#define bitset_clear(set,i) (set[i / UINT_BITS] &= ~(1u << i % UINT_BITS))
-#define bitset_contain(set,i) (set[i / UINT_BITS] & (1u << i % UINT_BITS))
-#define bitset_empty(set) memset (set, 0, sizeof (unsigned int) * BITSET_UINTS)
-#define bitset_set_all(set) \
-  memset (set, 255, sizeof (unsigned int) * BITSET_UINTS)
-#define bitset_copy(dest,src) \
-  memcpy (dest, src, sizeof (unsigned int) * BITSET_UINTS)
-static inline void bitset_not (bitset set);
-static inline void bitset_merge (bitset dest, const bitset src);
-static inline void bitset_not_merge (bitset dest, const bitset src);
-static inline void bitset_mask (bitset dest, const bitset src);
+/* An integer used to represent a set of bits.  It must be unsigned,
+   and must be at least as wide as unsigned int.  */
+typedef unsigned long int bitset_word_t;
+/* All bits set in a bitset_word_t.  */
+#define BITSET_WORD_MAX ULONG_MAX
+/* Number of bits in a bitset_word_t.  */
+#define BITSET_WORD_BITS (sizeof (bitset_word_t) * CHAR_BIT)
+/* Number of bitset_word_t in a bit_set.  */
+#define BITSET_WORDS (SBC_MAX / BITSET_WORD_BITS)
+typedef bitset_word_t bitset_t[BITSET_WORDS];
+typedef bitset_word_t *re_bitset_ptr_t;
+typedef const bitset_word_t *re_const_bitset_ptr_t;
+
+#define bitset_set(set,i) \
+  (set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS)
+#define bitset_clear(set,i) \
+  (set[i / BITSET_WORD_BITS] &= ~((bitset_word_t) 1 << i % BITSET_WORD_BITS))
+#define bitset_contain(set,i) \
+  (set[i / BITSET_WORD_BITS] & ((bitset_word_t) 1 << i % BITSET_WORD_BITS))
+#define bitset_empty(set) memset (set, '\0', sizeof (bitset_t))
+#define bitset_set_all(set) memset (set, '\xff', sizeof (bitset_t))
+#define bitset_copy(dest,src) memcpy (dest, src, sizeof (bitset_t))
+static inline void bitset_not (bitset_t set);
+static inline void bitset_merge (bitset_t dest, const bitset_t src);
+static inline void bitset_mask (bitset_t dest, const bitset_t src);
 
 #define PREV_WORD_CONSTRAINT 0x0001
 #define PREV_NOTWORD_CONSTRAINT 0x0002
@@ -648,8 +656,8 @@ struct re_dfa_t
   int nbackref; /* The number of backreference in this dfa.  */
 
   /* Bitmap expressing which backreference is used.  */
-  unsigned int used_bkref_map;
-  unsigned int completed_bkref_map;
+  bitset_word_t used_bkref_map;
+  bitset_word_t completed_bkref_map;
 
   unsigned int has_plural_match : 1;
   /* If this dfa has "multibyte node", which is a backreference or
@@ -660,7 +668,7 @@ struct re_dfa_t
   unsigned int map_notascii : 1;
   unsigned int word_ops_used : 1;
   int mb_cur_max;
-  bitset word_char;
+  bitset_t word_char;
   reg_syntax_t syntax;
   int *subexp_map;
 #ifdef DEBUG
@@ -734,34 +742,26 @@ typedef struct
 
 /* Inline functions for bitset operation.  */
 static inline void
-bitset_not (bitset set)
+bitset_not (bitset_t set)
 {
   int bitset_i;
-  for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i)
+  for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
     set[bitset_i] = ~set[bitset_i];
 }
 
 static inline void
-bitset_merge (bitset dest, const bitset src)
+bitset_merge (bitset_t dest, const bitset_t src)
 {
   int bitset_i;
-  for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i)
+  for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
     dest[bitset_i] |= src[bitset_i];
 }
 
 static inline void
-bitset_not_merge (bitset dest, const bitset src)
-{
-  int i;
-  for (i = 0; i < BITSET_UINTS; ++i)
-    dest[i] |= ~src[i];
-}
-
-static inline void
-bitset_mask (bitset dest, const bitset src)
+bitset_mask (bitset_t dest, const bitset_t src)
 {
   int bitset_i;
-  for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i)
+  for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
     dest[bitset_i] &= src[bitset_i];
 }
 
diff --git a/posix/regexec.c b/posix/regexec.c
index 9df5574cdb..03f705a3a3 100644
--- a/posix/regexec.c
+++ b/posix/regexec.c
@@ -213,7 +213,7 @@ static unsigned int find_collation_sequence_value (const unsigned char *mbs,
 static int group_nodes_into_DFAstates (const re_dfa_t *dfa,
 				       const re_dfastate_t *state,
 				       re_node_set *states_node,
-				       bitset *states_ch) internal_function;
+				       bitset_t *states_ch) internal_function;
 static int check_node_accept (const re_match_context_t *mctx,
 			      const re_token_t *node, int idx)
      internal_function;
@@ -1980,9 +1980,9 @@ check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, from_node, bkref_idx)
 		  if (ent->node != node)
 		    continue;
 
-		  if (subexp_idx
-		      < CHAR_BIT * sizeof ent->eps_reachable_subexps_map
-		      && !(ent->eps_reachable_subexps_map & (1u << subexp_idx)))
+		  if (subexp_idx < BITSET_WORD_BITS
+		      && !(ent->eps_reachable_subexps_map
+			   & ((bitset_word_t) 1 << subexp_idx)))
 		    continue;
 
 		  /* Recurse trying to reach the OP_OPEN_SUBEXP and
@@ -2008,9 +2008,9 @@ check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, from_node, bkref_idx)
 		  if (cpos == 0 && (boundaries & 2))
 		    return 0;
 
-		  if (subexp_idx
-		      < CHAR_BIT * sizeof ent->eps_reachable_subexps_map)
-		    ent->eps_reachable_subexps_map &= ~(1u << subexp_idx);
+		  if (subexp_idx < BITSET_WORD_BITS)
+		    ent->eps_reachable_subexps_map
+		      &= ~((bitset_word_t) 1 << subexp_idx);
 	        }
 	      while (ent++->more);
 	    }
@@ -2477,8 +2477,9 @@ check_subexp_matching_top (mctx, cur_nodes, str_idx)
     {
       int node = cur_nodes->elems[node_idx];
       if (dfa->nodes[node].type == OP_OPEN_SUBEXP
-	  && dfa->nodes[node].opr.idx < CHAR_BIT * sizeof dfa->used_bkref_map
-	  && dfa->used_bkref_map & (1u << dfa->nodes[node].opr.idx))
+	  && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
+	  && (dfa->used_bkref_map
+	      & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
 	{
 	  err = match_ctx_add_subtop (mctx, node, str_idx);
 	  if (BE (err != REG_NOERROR, 0))
@@ -3363,31 +3364,37 @@ build_trtable (dfa, state)
 {
   reg_errcode_t err;
   int i, j, ch, need_word_trtable = 0;
-  unsigned int elem, mask;
-  int dests_node_malloced = 0, dest_states_malloced = 0;
+  bitset_word_t elem, mask;
+  bool dests_node_malloced = false;
+  bool dest_states_malloced = false;
   int ndests; /* Number of the destination states from `state'.  */
   re_dfastate_t **trtable;
   re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
   re_node_set follows, *dests_node;
-  bitset *dests_ch;
-  bitset acceptable;
+  bitset_t *dests_ch;
+  bitset_t acceptable;
+
+  struct dests_alloc
+  {
+    re_node_set dests_node[SBC_MAX];
+    bitset_t dests_ch[SBC_MAX];
+  } *dests_alloc;
 
   /* We build DFA states which corresponds to the destination nodes
      from `state'.  `dests_node[i]' represents the nodes which i-th
      destination state contains, and `dests_ch[i]' represents the
      characters which i-th destination state accepts.  */
-  if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX))
-    dests_node = (re_node_set *)
-      alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
+  if (__libc_use_alloca (sizeof (struct dests_alloc)))
+    dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
   else
     {
-      dests_node = (re_node_set *)
-	malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
-      if (BE (dests_node == NULL, 0))
+      dests_alloc = re_malloc (struct dests_alloc, 1);
+      if (BE (dests_alloc == NULL, 0))
 	return 0;
-      dests_node_malloced = 1;
+      dests_node_malloced = true;
     }
-  dests_ch = (bitset *) (dests_node + SBC_MAX);
+  dests_node = dests_alloc->dests_node;
+  dests_ch = dests_alloc->dests_ch;
 
   /* Initialize transiton table.  */
   state->word_trtable = state->trtable = NULL;
@@ -3398,7 +3405,7 @@ build_trtable (dfa, state)
   if (BE (ndests <= 0, 0))
     {
       if (dests_node_malloced)
-	free (dests_node);
+	free (dests_alloc);
       /* Return 0 in case of an error, 1 otherwise.  */
       if (ndests == 0)
 	{
@@ -3413,7 +3420,7 @@ build_trtable (dfa, state)
   if (BE (err != REG_NOERROR, 0))
     goto out_free;
 
-  if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX
+  if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
 			 + ndests * 3 * sizeof (re_dfastate_t *)))
     dest_states = (re_dfastate_t **)
       alloca (ndests * 3 * sizeof (re_dfastate_t *));
@@ -3430,10 +3437,10 @@ out_free:
 	  for (i = 0; i < ndests; ++i)
 	    re_node_set_free (dests_node + i);
 	  if (dests_node_malloced)
-	    free (dests_node);
+	    free (dests_alloc);
 	  return 0;
 	}
-      dest_states_malloced = 1;
+      dest_states_malloced = true;
     }
   dest_states_word = dest_states + ndests;
   dest_states_nl = dest_states_word + ndests;
@@ -3495,8 +3502,8 @@ out_free:
 	goto out_free;
 
       /* For all characters ch...:  */
-      for (i = 0; i < BITSET_UINTS; ++i)
-	for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1;
+      for (i = 0; i < BITSET_WORDS; ++i)
+	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
 	     elem;
 	     mask <<= 1, elem >>= 1, ++ch)
 	  if (BE (elem & 1, 0))
@@ -3526,8 +3533,8 @@ out_free:
 	goto out_free;
 
       /* For all characters ch...:  */
-      for (i = 0; i < BITSET_UINTS; ++i)
-	for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1;
+      for (i = 0; i < BITSET_WORDS; ++i)
+	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
 	     elem;
 	     mask <<= 1, elem >>= 1, ++ch)
 	  if (BE (elem & 1, 0))
@@ -3568,7 +3575,7 @@ out_free:
     re_node_set_free (dests_node + i);
 
   if (dests_node_malloced)
-    free (dests_node);
+    free (dests_alloc);
 
   return 1;
 }
@@ -3583,13 +3590,13 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch)
      const re_dfa_t *dfa;
      const re_dfastate_t *state;
      re_node_set *dests_node;
-     bitset *dests_ch;
+     bitset_t *dests_ch;
 {
   reg_errcode_t err;
   int result;
   int i, j, k;
   int ndests; /* Number of the destinations from `state'.  */
-  bitset accepts; /* Characters a node can accept.  */
+  bitset_t accepts; /* Characters a node can accept.  */
   const re_node_set *cur_nodes = &state->nodes;
   bitset_empty (accepts);
   ndests = 0;
@@ -3624,7 +3631,7 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch)
 #ifdef RE_ENABLE_I18N
       else if (type == OP_UTF8_PERIOD)
         {
-	  memset (accepts, 255, sizeof (unsigned int) * BITSET_UINTS / 2);
+	  memset (accepts, '\xff', sizeof (bitset_t) / 2);
 	  if (!(dfa->syntax & RE_DOT_NEWLINE))
 	    bitset_clear (accepts, '\n');
 	  if (dfa->syntax & RE_DOT_NOT_NULL)
@@ -3640,7 +3647,7 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch)
 	{
 	  if (constraint & NEXT_NEWLINE_CONSTRAINT)
 	    {
-	      int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
+	      bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
 	      bitset_empty (accepts);
 	      if (accepts_newline)
 		bitset_set (accepts, NEWLINE_CHAR);
@@ -3655,7 +3662,7 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch)
 
 	  if (constraint & NEXT_WORD_CONSTRAINT)
 	    {
-	      unsigned int any_set = 0;
+	      bitset_word_t any_set = 0;
 	      if (type == CHARACTER && !node->word_char)
 		{
 		  bitset_empty (accepts);
@@ -3663,18 +3670,18 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch)
 		}
 #ifdef RE_ENABLE_I18N
 	      if (dfa->mb_cur_max > 1)
-		for (j = 0; j < BITSET_UINTS; ++j)
+		for (j = 0; j < BITSET_WORDS; ++j)
 		  any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
 	      else
 #endif
-		for (j = 0; j < BITSET_UINTS; ++j)
+		for (j = 0; j < BITSET_WORDS; ++j)
 		  any_set |= (accepts[j] &= dfa->word_char[j]);
 	      if (!any_set)
 		continue;
 	    }
 	  if (constraint & NEXT_NOTWORD_CONSTRAINT)
 	    {
-	      unsigned int any_set = 0;
+	      bitset_word_t any_set = 0;
 	      if (type == CHARACTER && node->word_char)
 		{
 		  bitset_empty (accepts);
@@ -3682,11 +3689,11 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch)
 		}
 #ifdef RE_ENABLE_I18N
 	      if (dfa->mb_cur_max > 1)
-		for (j = 0; j < BITSET_UINTS; ++j)
+		for (j = 0; j < BITSET_WORDS; ++j)
 		  any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
 	      else
 #endif
-		for (j = 0; j < BITSET_UINTS; ++j)
+		for (j = 0; j < BITSET_WORDS; ++j)
 		  any_set |= (accepts[j] &= ~dfa->word_char[j]);
 	      if (!any_set)
 		continue;
@@ -3697,10 +3704,10 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch)
 	 state.  Above, we make sure that accepts is not empty.  */
       for (j = 0; j < ndests; ++j)
 	{
-	  bitset intersec; /* Intersection sets, see below.  */
-	  bitset remains;
+	  bitset_t intersec; /* Intersection sets, see below.  */
+	  bitset_t remains;
 	  /* Flags, see below.  */
-	  int has_intersec, not_subset, not_consumed;
+	  bitset_word_t has_intersec, not_subset, not_consumed;
 
 	  /* Optimization, skip if this state doesn't accept the character.  */
 	  if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
@@ -3708,7 +3715,7 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch)
 
 	  /* Enumerate the intersection set of this state and `accepts'.  */
 	  has_intersec = 0;
-	  for (k = 0; k < BITSET_UINTS; ++k)
+	  for (k = 0; k < BITSET_WORDS; ++k)
 	    has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
 	  /* And skip if the intersection set is empty.  */
 	  if (!has_intersec)
@@ -3716,7 +3723,7 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch)
 
 	  /* Then check if this state is a subset of `accepts'.  */
 	  not_subset = not_consumed = 0;
-	  for (k = 0; k < BITSET_UINTS; ++k)
+	  for (k = 0; k < BITSET_WORDS; ++k)
 	    {
 	      not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
 	      not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];