regexec: Fix off-by-one bug in weight comparison [BZ #23036]

Each weight is prefixed by its length, and the length does not include itself in the count. This can be seen clearly from the find_idx function in string/strxfrm_l.c, for example. The old code behaved as if the length itself counted, thus comparing an additional byte after the weight, leading to spurious comparison failures and incorrect further partitioning of character equivalence classes.
author: Florian Weimer <fweimer@redhat.com> 2018-07-10 11:18:26 +0200
committer: Florian Weimer <fweimer@redhat.com> 2018-07-10 11:18:26 +0200
commit: 7b2f4cedf044ea83f53f6b43a5bf6871eb9ce969 (patch)
tree: 4e579c9471bc1d2d1a799ba005023fc7a7f4440b
parent: 4fa34da6793c442255fc003cf659c85b197ab29a (diff)
download: glibc-7b2f4cedf044ea83f53f6b43a5bf6871eb9ce969.tar.gz
glibc-7b2f4cedf044ea83f53f6b43a5bf6871eb9ce969.tar.xz
glibc-7b2f4cedf044ea83f53f6b43a5bf6871eb9ce969.zip
2 files changed, 27 insertions, 23 deletions
diff --git a/ChangeLog b/ChangeLog
index 8068171e2b..d18c24453f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,12 @@
 2018-07-10  Florian Weimer  <fweimer@redhat.com>
 
+	[BZ #23036]
+	* posix/regexec.c (check_node_accept_bytes): When comparing
+	weights, do not compare an extra byte after the end of the
+	weights.
+
+2018-07-10  Florian Weimer  <fweimer@redhat.com>
+
 	* libio/readline.c: Fix copyright year.
 	* libio/tst-readline.c Likewise.
 	* nss/tst-nss-files-hosts-getent.c: Likewise.
diff --git a/posix/regexec.c b/posix/regexec.c
index 63aef97535..73644c2341 100644
--- a/posix/regexec.c
+++ b/posix/regexec.c
@@ -3878,30 +3878,27 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
 	      indirect = (const int32_t *)
 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
 	      int32_t idx = findidx (table, indirect, extra, &cp, elem_len);
+	      int32_t rule = idx >> 24;
+	      idx &= 0xffffff;
 	      if (idx > 0)
-		for (i = 0; i < cset->nequiv_classes; ++i)
-		  {
-		    int32_t equiv_class_idx = cset->equiv_classes[i];
-		    size_t weight_len = weights[idx & 0xffffff];
-		    if (weight_len == weights[equiv_class_idx & 0xffffff]
-			&& (idx >> 24) == (equiv_class_idx >> 24))
-		      {
-			Idx cnt = 0;
-
-			idx &= 0xffffff;
-			equiv_class_idx &= 0xffffff;
-
-			while (cnt <= weight_len
-			       && (weights[equiv_class_idx + 1 + cnt]
-				   == weights[idx + 1 + cnt]))
-			  ++cnt;
-			if (cnt > weight_len)
-			  {
-			    match_len = elem_len;
-			    goto check_node_accept_bytes_match;
-			  }
-		      }
-		  }
+		{
+		  size_t weight_len = weights[idx];
+		  for (i = 0; i < cset->nequiv_classes; ++i)
+		    {
+		      int32_t equiv_class_idx = cset->equiv_classes[i];
+		      int32_t equiv_class_rule = equiv_class_idx >> 24;
+		      equiv_class_idx &= 0xffffff;
+		      if (weights[equiv_class_idx] == weight_len
+			  && equiv_class_rule == rule
+			  && memcmp (weights + idx + 1,
+				     weights + equiv_class_idx + 1,
+				     weight_len) == 0)
+			{
+			  match_len = elem_len;
+			  goto check_node_accept_bytes_match;
+			}
+		    }
+		}
 	    }
 	}
       else
author	Florian Weimer <fweimer@redhat.com>	2018-07-10 11:18:26 +0200
committer	Florian Weimer <fweimer@redhat.com>	2018-07-10 11:18:26 +0200
commit	7b2f4cedf044ea83f53f6b43a5bf6871eb9ce969 (patch)
tree	4e579c9471bc1d2d1a799ba005023fc7a7f4440b
parent	4fa34da6793c442255fc003cf659c85b197ab29a (diff)
download	glibc-7b2f4cedf044ea83f53f6b43a5bf6871eb9ce969.tar.gz glibc-7b2f4cedf044ea83f53f6b43a5bf6871eb9ce969.tar.xz glibc-7b2f4cedf044ea83f53f6b43a5bf6871eb9ce969.zip