about summary refs log tree commit diff
diff options
context:
space:
mode:
authorRich Felker <dalias@aerifal.cx>2016-10-06 12:15:47 -0400
committerRich Felker <dalias@aerifal.cx>2016-10-06 18:47:57 -0400
commitaee6abb2400b9a955c2b41166db1c22f63ad42ef (patch)
tree09d65ed801ea04cb1c2251439b747be06ff944e2
parentc3edc06d1e1360f3570db9155d6b318ae0d0f0f7 (diff)
downloadmusl-aee6abb2400b9a955c2b41166db1c22f63ad42ef.tar.gz
musl-aee6abb2400b9a955c2b41166db1c22f63ad42ef.tar.xz
musl-aee6abb2400b9a955c2b41166db1c22f63ad42ef.zip
fix regexec with haystack strings longer than INT_MAX
we inherited from TRE regexec code that's utterly wrong with respect
to the integer types it's using. while it doesn't appear that
compilers are producing unsafe output, signed integer overflows seem
to happen, and regexec fails to find matches past offset INT_MAX.

this patch fixes the type of all variables/fields used to store
offsets in the string from int to regoff_t. after the changes, basic
testing showed that regexec can now find matches past 2GB (INT_MAX)
and past 4GB on x86_64, and code generation is unchanged on i386.
-rw-r--r--src/regex/regexec.c54
1 files changed, 28 insertions, 26 deletions
diff --git a/src/regex/regexec.c b/src/regex/regexec.c
index dd523195..5c4cb922 100644
--- a/src/regex/regexec.c
+++ b/src/regex/regexec.c
@@ -44,7 +44,7 @@
 
 static void
 tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
-		const tre_tnfa_t *tnfa, int *tags, int match_eo);
+		const tre_tnfa_t *tnfa, regoff_t *tags, regoff_t match_eo);
 
 /***********************************************************************
  from tre-match-utils.h
@@ -97,7 +97,7 @@ tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
 /* Returns 1 if `t1' wins `t2', 0 otherwise. */
 static int
 tre_tag_order(int num_tags, tre_tag_direction_t *tag_directions,
-	      int *t1, int *t2)
+	      regoff_t *t1, regoff_t *t2)
 {
   int i;
   for (i = 0; i < num_tags; i++)
@@ -157,25 +157,25 @@ tre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc, int icase)
 
 typedef struct {
   tre_tnfa_transition_t *state;
-  int *tags;
+  regoff_t *tags;
 } tre_tnfa_reach_t;
 
 typedef struct {
-  int pos;
-  int **tags;
+  regoff_t pos;
+  regoff_t **tags;
 } tre_reach_pos_t;
 
 
 static reg_errcode_t
 tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string,
-		      int *match_tags, int eflags,
-		      int *match_end_ofs)
+		      regoff_t *match_tags, int eflags,
+		      regoff_t *match_end_ofs)
 {
   /* State variables required by GET_NEXT_WCHAR. */
   tre_char_t prev_c = 0, next_c = 0;
   const char *str_byte = string;
-  int pos = -1;
-  int pos_add_next = 1;
+  regoff_t pos = -1;
+  regoff_t pos_add_next = 1;
 #ifdef TRE_MBSTATE
   mbstate_t mbstate;
 #endif /* TRE_MBSTATE */
@@ -191,10 +191,10 @@ tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string,
   int *tag_i;
   int num_tags, i;
 
-  int match_eo = -1;	   /* end offset of match (-1 if no match found yet) */
+  regoff_t match_eo = -1;	   /* end offset of match (-1 if no match found yet) */
   int new_match = 0;
-  int *tmp_tags = NULL;
-  int *tmp_iptr;
+  regoff_t *tmp_tags = NULL;
+  regoff_t *tmp_iptr;
 
 #ifdef TRE_MBSTATE
   memset(&mbstate, '\0', sizeof(mbstate));
@@ -214,7 +214,7 @@ tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string,
 
     /* Ensure that tbytes and xbytes*num_states cannot overflow, and that
      * they don't contribute more than 1/8 of SIZE_MAX to total_bytes. */
-    if (num_tags > SIZE_MAX/(8 * sizeof(int) * tnfa->num_states))
+    if (num_tags > SIZE_MAX/(8 * sizeof(regoff_t) * tnfa->num_states))
       goto error_exit;
 
     /* Likewise check rbytes. */
@@ -229,7 +229,7 @@ tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string,
     tbytes = sizeof(*tmp_tags) * num_tags;
     rbytes = sizeof(*reach_next) * (tnfa->num_states + 1);
     pbytes = sizeof(*reach_pos) * tnfa->num_states;
-    xbytes = sizeof(int) * num_tags;
+    xbytes = sizeof(regoff_t) * num_tags;
     total_bytes =
       (sizeof(long) - 1) * 4 /* for alignment paddings */
       + (rbytes + xbytes * tnfa->num_states) * 2 + tbytes + pbytes;
@@ -490,12 +490,12 @@ error_exit:
 */
 
 typedef struct {
-  int pos;
+  regoff_t pos;
   const char *str_byte;
   tre_tnfa_transition_t *state;
   int state_id;
   int next_c;
-  int *tags;
+  regoff_t *tags;
 #ifdef TRE_MBSTATE
   mbstate_t mbstate;
 #endif /* TRE_MBSTATE */
@@ -591,13 +591,13 @@ typedef struct tre_backtrack_struct {
 
 static reg_errcode_t
 tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string,
-		       int *match_tags, int eflags, int *match_end_ofs)
+		       regoff_t *match_tags, int eflags, regoff_t *match_end_ofs)
 {
   /* State variables required by GET_NEXT_WCHAR. */
   tre_char_t prev_c = 0, next_c = 0;
   const char *str_byte = string;
-  int pos = 0;
-  int pos_add_next = 1;
+  regoff_t pos = 0;
+  regoff_t pos_add_next = 1;
 #ifdef TRE_MBSTATE
   mbstate_t mbstate;
 #endif /* TRE_MBSTATE */
@@ -610,15 +610,16 @@ tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string,
      started from. */
   int next_c_start;
   const char *str_byte_start;
-  int pos_start = -1;
+  regoff_t pos_start = -1;
 #ifdef TRE_MBSTATE
   mbstate_t mbstate_start;
 #endif /* TRE_MBSTATE */
 
   /* End offset of best match so far, or -1 if no match found yet. */
-  int match_eo = -1;
+  regoff_t match_eo = -1;
   /* Tag arrays. */
-  int *next_tags, *tags = NULL;
+  int *next_tags;
+  regoff_t *tags = NULL;
   /* Current TNFA state. */
   tre_tnfa_transition_t *state;
   int *states_seen = NULL;
@@ -768,8 +769,9 @@ tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string,
 	  /* This is a back reference state.  All transitions leaving from
 	     this state have the same back reference "assertion".  Instead
 	     of reading the next character, we match the back reference. */
-	  int so, eo, bt = trans_i->u.backref;
-	  int bt_len;
+	  regoff_t so, eo;
+	  int bt = trans_i->u.backref;
+	  regoff_t bt_len;
 	  int result;
 
 	  /* Get the substring we need to match against.  Remember to
@@ -926,7 +928,7 @@ tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string,
    endpoint values. */
 static void
 tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
-		const tre_tnfa_t *tnfa, int *tags, int match_eo)
+		const tre_tnfa_t *tnfa, regoff_t *tags, regoff_t match_eo)
 {
   tre_submatch_data_t *submatch_data;
   unsigned int i, j;
@@ -996,7 +998,7 @@ regexec(const regex_t *restrict preg, const char *restrict string,
 {
   tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
   reg_errcode_t status;
-  int *tags = NULL, eo;
+  regoff_t *tags = NULL, eo;
   if (tnfa->cflags & REG_NOSUB) nmatch = 0;
   if (tnfa->num_tags > 0 && nmatch > 0)
     {