summary refs log tree commit diff
path: root/posix
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2003-11-18 07:25:02 +0000
committerUlrich Drepper <drepper@redhat.com>2003-11-18 07:25:02 +0000
commit5f93cd52f626b77301c5e1db3d973eefc534323b (patch)
tree8bf877d6a8b552be0cd43b88c7540cdbc63397d6 /posix
parent5ddb5bf5fbceb6f44f04f7267eb214eee5fc2f90 (diff)
downloadglibc-5f93cd52f626b77301c5e1db3d973eefc534323b.tar.gz
glibc-5f93cd52f626b77301c5e1db3d973eefc534323b.tar.xz
glibc-5f93cd52f626b77301c5e1db3d973eefc534323b.zip
Update.
2003-11-17  Jakub Jelinek  <jakub@redhat.com>

	* posix/regcomp.c (optimize_utf8): Optimize multi-byte chars as
	well.
	* posix/bug-regex20.c (tests): Add new tests.  Multi-byte char
	followed by dup operator is expected to be optimized.

	* posix/regexec.c (check_node_accept_bytes): Move nrules and j
	variables to the block where they are only used, initialize
	nrules only immediately before using it.

2003-11-15  Andreas Jaeger  <aj@suse.de>

	* sysdeps/x86_64/fpu/s_scalbnl.S: New file.
	* sysdeps/x86_64/fpu/s_truncl.S: New file.
	* sysdeps/x86_64/fpu/s_nearbyintl.S: New file.
	* sysdeps/x86_64/fpu/s_floorl.S: New file.
	* sysdeps/x86_64/fpu/s_ilogbl.S: New file.
	* sysdeps/x86_64/fpu/e_remainderl.S: New file.

	* math/libm-test.inc (floor_test): Test also ±0.25.
	(ceil_test): Test -0.25.
Diffstat (limited to 'posix')
-rw-r--r--posix/bug-regex20.c22
-rw-r--r--posix/regcomp.c13
-rw-r--r--posix/regexec.c7
3 files changed, 31 insertions, 11 deletions
diff --git a/posix/bug-regex20.c b/posix/bug-regex20.c
index 11b9484faf..74662e6246 100644
--- a/posix/bug-regex20.c
+++ b/posix/bug-regex20.c
@@ -43,15 +43,35 @@ static struct
      \xe2\x80\x94	EM DASH  */
   /* Should be optimized.  */
   {RE_SYNTAX_POSIX_BASIC, "foo", "b\xc3\xa4rfoob\xc3\xa4z", 4, 1},
+  {RE_SYNTAX_POSIX_BASIC, "b\xc3\xa4z", "b\xc3\xa4rfoob\xc3\xa4z", 7, 1},
+  {RE_SYNTAX_POSIX_BASIC, "b\xc3\xa4*z", "b\xc3\xa4rfoob\xc3\xa4z", 7, 1},
+  {RE_SYNTAX_POSIX_BASIC, "b\xc3\xa4*z", "b\xc3\xa4rfoobz", 7, 1},
+  {RE_SYNTAX_POSIX_BASIC, "b\xc3\xa4\\+z",
+   "b\xc3\xa4rfoob\xc3\xa4\xc3\xa4z", 7, 1},
+  {RE_SYNTAX_POSIX_BASIC, "b\xc3\xa4\\?z", "b\xc3\xa4rfoob\xc3\xa4z", 7, 1},
+  {RE_SYNTAX_POSIX_BASIC, "b\xc3\xa4\\{1,2\\}z",
+   "b\xc3\xa4rfoob\xc3\xa4z", 7, 1},
   {RE_SYNTAX_POSIX_BASIC, "^x\\|xy*z$", "\xc3\xb6xyyz", 2, 1},
   {RE_SYNTAX_POSIX_BASIC, "^x\\\\y\\{6\\}z\\+", "x\\yyyyyyzz\xc3\xb6", 0, 1},
   {RE_SYNTAX_POSIX_BASIC, "^x\\\\y\\{2,36\\}z\\+", "x\\yzz\xc3\xb6", -1, 1},
   {RE_SYNTAX_POSIX_BASIC, "^x\\\\y\\{,3\\}z\\+", "x\\yyyzz\xc3\xb6", 0, 1},
+  {RE_SYNTAX_POSIX_BASIC, "^x\\|x\xc3\xa4*z$",
+   "\xc3\xb6x\xc3\xa4\xc3\xa4z", 2, 1},
+  {RE_SYNTAX_POSIX_BASIC, "^x\\\\\xc3\x84\\{6\\}z\\+",
+   "x\\\xc3\x84\xc3\x84\xc3\x84\xc3\x84\xc3\x84\xc3\x84zz\xc3\xb6", 0, 1},
+  {RE_SYNTAX_POSIX_BASIC, "^x\\\\\xc3\x84\\{2,36\\}z\\+",
+   "x\\\xc3\x84zz\xc3\xb6", -1, 1},
+  {RE_SYNTAX_POSIX_BASIC, "^x\\\\\xc3\x84\\{,3\\}z\\+",
+   "x\\\xc3\x84\xc3\x84\xc3\x84zz\xc3\xb6", 0, 1},
   {RE_SYNTAX_POSIX_BASIC, "x[C]y", "axCy", 1, 1},
   {RE_SYNTAX_POSIX_BASIC, "x[ABC]y", "axCy", 1, 1},
   {RE_SYNTAX_POSIX_BASIC, "\\`x\\|z\\'", "x\xe2\x80\x94", 0, 1},
   {RE_SYNTAX_POSIX_BASIC, "\\(xy\\)z\\1a\\1", "\xe2\x80\x94xyzxyaxy\xc3\x84", 3, 1},
   {RE_SYNTAX_POSIX_BASIC, "xy\\?z", "\xc3\x84xz\xc3\xb6", 2, 1},
+  {RE_SYNTAX_POSIX_BASIC, "\\`\xc3\x84\\|z\\'", "\xc3\x84\xe2\x80\x94", 0, 1},
+  {RE_SYNTAX_POSIX_BASIC, "\\(x\xc3\x84\\)z\\1\x61\\1",
+   "\xe2\x80\x94x\xc3\x84zx\xc3\x84\x61x\xc3\x84\xc3\x96", 3, 1},
+  {RE_SYNTAX_POSIX_BASIC, "x\xc3\x96\\?z", "\xc3\x84xz\xc3\xb6", 2, 1},
   {RE_SYNTAX_POSIX_EXTENDED, "foo", "b\xc3\xa4rfoob\xc3\xa4z", 4, 1},
   {RE_SYNTAX_POSIX_EXTENDED, "^x|xy*z$", "\xc3\xb6xyyz", 2, 1},
   {RE_SYNTAX_POSIX_EXTENDED, "^x\\\\y{6}z+", "x\\yyyyyyzz\xc3\xb6", 0, 1},
@@ -64,7 +84,6 @@ static struct
   {RE_SYNTAX_POSIX_EXTENDED, "xy?z", "\xc3\x84xz\xc3\xb6", 2, 1},
   /* Should not be optimized.  */
   {RE_SYNTAX_POSIX_BASIC, "x.y", "ax\xe2\x80\x94yz", 1, 0},
-  {RE_SYNTAX_POSIX_BASIC, "x\xc3\x96*y", "ax\xc3\x96\xc3\x96yz", 1, 0},
   {RE_SYNTAX_POSIX_BASIC, "x[\xc3\x84\xc3\xa4]y", "ax\xc3\xa4y", 1, 0},
   {RE_SYNTAX_POSIX_BASIC, "x[A-Z,]y", "axCy", 1, 0},
   {RE_SYNTAX_POSIX_BASIC, "x[^y]z", "ax\xe2\x80\x94z", 1, 0},
@@ -77,7 +96,6 @@ static struct
   {RE_SYNTAX_POSIX_BASIC, "a\\wz", "a\xc3\x84z", 0, 0},
   {RE_SYNTAX_POSIX_BASIC, "x\\Wz", "\xc3\x96x\xe2\x80\x94z", 2, 0},
   {RE_SYNTAX_POSIX_EXTENDED, "x.y", "ax\xe2\x80\x94yz", 1, 0},
-  {RE_SYNTAX_POSIX_EXTENDED, "x\xc3\x96*y", "ax\xc3\x96\xc3\x96yz", 1, 0},
   {RE_SYNTAX_POSIX_EXTENDED, "x[\xc3\x84\xc3\xa4]y", "ax\xc3\xa4y", 1, 0},
   {RE_SYNTAX_POSIX_EXTENDED, "x[A-Z,]y", "axCy", 1, 0},
   {RE_SYNTAX_POSIX_EXTENDED, "x[^y]z", "ax\xe2\x80\x94z", 1, 0},
diff --git a/posix/regcomp.c b/posix/regcomp.c
index 68ce551c3a..b5f0c92a3a 100644
--- a/posix/regcomp.c
+++ b/posix/regcomp.c
@@ -965,17 +965,14 @@ static void
 optimize_utf8 (dfa)
      re_dfa_t *dfa;
 {
-  int node, i;
+  int node, i, mb_chars = 0;
 
   for (node = 0; node < dfa->nodes_len; ++node)
     switch (dfa->nodes[node].type)
       {
       case CHARACTER:
-        /* Chars >= 0x80 are optimizable in some cases (e.g. when not
-	   followed by DUP operator, not in bracket etc.).
-	   For now punt on them all.  */
 	if (dfa->nodes[node].opr.c >= 0x80)
-	  return;
+	  mb_chars = 1;
 	break;
       case ANCHOR:
 	switch (dfa->nodes[node].opr.idx)
@@ -1010,6 +1007,12 @@ optimize_utf8 (dfa)
 	return;
       }
 
+  if (mb_chars)
+    for (node = 0; node < dfa->nodes_len; ++node)
+      if (dfa->nodes[node].type == CHARACTER
+	  && dfa->nodes[node].opr.c >= 0x80)
+	dfa->nodes[node].mb_partial = 0;
+
   /* The search can be in single byte locale.  */
   dfa->mb_cur_max = 1;
   dfa->is_utf8 = 0;
diff --git a/posix/regexec.c b/posix/regexec.c
index 7470197506..09756b7691 100644
--- a/posix/regexec.c
+++ b/posix/regexec.c
@@ -3483,10 +3483,6 @@ check_node_accept_bytes (preg, node_idx, input, str_idx)
   int elem_len = re_string_elem_size_at (input, str_idx);
   int char_len = re_string_char_size_at (input, str_idx);
   int i;
-# ifdef _LIBC
-  int j;
-  uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
-# endif /* _LIBC */
   if (elem_len <= 1 && char_len <= 1)
     return 0;
   if (node->type == OP_PERIOD)
@@ -3505,6 +3501,8 @@ check_node_accept_bytes (preg, node_idx, input, str_idx)
 # ifdef _LIBC
       const unsigned char *pin = ((char *) re_string_get_buffer (input)
 				  + str_idx);
+      int j;
+      uint32_t nrules;
 # endif /* _LIBC */
       int match_len = 0;
       wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
@@ -3529,6 +3527,7 @@ check_node_accept_bytes (preg, node_idx, input, str_idx)
 	}
 
 # ifdef _LIBC
+      nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
       if (nrules != 0)
 	{
 	  unsigned int in_collseq = 0;