summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog4
-rw-r--r--posix/regcomp.c36
2 files changed, 36 insertions, 4 deletions
diff --git a/ChangeLog b/ChangeLog
index 737c4ba196..73b1d419bf 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2012-01-03  Ulrich Drepper  <drepper@gmail.com>
+
+	* posix/regcomp.c (init_word_char): Optimize a bit for sane encodings.
+
 2012-01-01  Ulrich Drepper  <drepper@gmail.com>
 
 	* posix/getconf.c: Update copyright year.
diff --git a/posix/regcomp.c b/posix/regcomp.c
index 34ee845081..6771dbb299 100644
--- a/posix/regcomp.c
+++ b/posix/regcomp.c
@@ -1,5 +1,5 @@
 /* Extended regular expression matching and search library.
-   Copyright (C) 2002-2007,2009,2010,2011 Free Software Foundation, Inc.
+   Copyright (C) 2002-2007,2009,2010,2011,2012 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
 
@@ -926,10 +926,38 @@ static void
 internal_function
 init_word_char (re_dfa_t *dfa)
 {
-  int i, j, ch;
   dfa->word_ops_used = 1;
-  for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
-    for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
+  int i = 0;
+  int ch = 0;
+  if (BE (dfa->map_notascii == 0, 1))
+    {
+      if (sizeof (dfa->word_char[0]) == 8)
+	{
+	  dfa->word_char[0] = UINT64_C (0x03ff000000000000);
+	  dfa->word_char[1] = UINT64_C (0x07fffffe87fffffe);
+	  i = 2;
+	}
+      else if (sizeof (dfa->word_char[0]) == 4)
+	{
+	  dfa->word_char[0] = UINT32_C (0x00000000);
+	  dfa->word_char[1] = UINT32_C (0x03ff0000);
+	  dfa->word_char[2] = UINT32_C (0x87fffffe);
+	  dfa->word_char[3] = UINT32_C (0x07fffffe);
+	  i = 4;
+	}
+      else
+	abort ();
+      ch = 128;
+
+      if (BE (dfa->is_utf8, 1))
+	{
+	  memset (&dfa->word_char[i], '\0', (SBC_MAX - ch) / 8);
+	  return;
+	}
+    }
+
+  for (; i < BITSET_WORDS; ++i)
+    for (int j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
       if (isalnum (ch) || ch == '_')
 	dfa->word_char[i] |= (bitset_word_t) 1 << j;
 }