about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog14
-rw-r--r--sysdeps/tile/tilegx/memchr.c2
-rw-r--r--sysdeps/tile/tilegx/rawmemchr.c2
-rw-r--r--sysdeps/tile/tilegx/strchr.c6
-rw-r--r--sysdeps/tile/tilegx/strchrnul.c6
-rw-r--r--sysdeps/tile/tilegx/string-endian.h25
-rw-r--r--sysdeps/tile/tilegx/strlen.c2
-rw-r--r--sysdeps/tile/tilegx/strnlen.c2
-rw-r--r--sysdeps/tile/tilegx/strrchr.c6
-rw-r--r--sysdeps/tile/tilegx/strstr.c24
10 files changed, 64 insertions, 25 deletions
diff --git a/ChangeLog b/ChangeLog
index 333012ba81..ded6845092 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,17 @@
+2017-12-05  Chris Metcalf  <cmetcalf@mellanox.com>
+
+	* sysdeps/tile/tilegx/string-endian.h (VECOP): Provide working
+	replacements for __insn_xxx builtins for v1cmpeq, v1cmpltu,
+	v1cmpne, v1add, v1shru, v1shl (register and immediate versions).
+	* sysdeps/tile/tilegx/memchr.c (__memchr): Use VECOP function
+	instead of __insn__xxx.
+	* sysdeps/tile/tilegx/rawmemchr.c (__rawmemchr): Likewise.
+	* sysdeps/tile/tilegx/strstr.c (strcasechr): Likewise.
+	* sysdeps/tile/tilegx/strrchr.c (strrchr): Likewise.
+	* sysdeps/tile/tilegx/strlen.c (strlen): Likewise.
+	* sysdeps/tile/tilegx/strchrnul.c (__strchrnul): Likewise.
+	* sysdeps/tile/tilegx/strchr.c (strchr): Likewise.
+
 2017-12-05  Florian Weimer  <fweimer@redhat.com>
 
 	Linux: Implement interfaces for memory protection keys
diff --git a/sysdeps/tile/tilegx/memchr.c b/sysdeps/tile/tilegx/memchr.c
index 7da0f79da2..38c0da6737 100644
--- a/sysdeps/tile/tilegx/memchr.c
+++ b/sysdeps/tile/tilegx/memchr.c
@@ -58,7 +58,7 @@ __memchr (const void *s, int c, size_t n)
   /* Compute the address of the word containing the last byte. */
   last_word_ptr = (const uint64_t *) ((uintptr_t) last_byte_ptr & -8);
 
-  while ((bits = __insn_v1cmpeq (v, goal)) == 0)
+  while ((bits = v1cmpeq (v, goal)) == 0)
     {
       if (__builtin_expect (p == last_word_ptr, 0))
         {
diff --git a/sysdeps/tile/tilegx/rawmemchr.c b/sysdeps/tile/tilegx/rawmemchr.c
index 54b4a5c1b8..3f5044c83e 100644
--- a/sysdeps/tile/tilegx/rawmemchr.c
+++ b/sysdeps/tile/tilegx/rawmemchr.c
@@ -36,7 +36,7 @@ __rawmemchr (const void *s, int c)
   uint64_t v = (*p | before_mask) ^ (goal & before_mask);
 
   uint64_t bits;
-  while ((bits = __insn_v1cmpeq (v, goal)) == 0)
+  while ((bits = v1cmpeq (v, goal)) == 0)
     v = *++p;
 
   return ((char *) p) + (CFZ (bits) >> 3);
diff --git a/sysdeps/tile/tilegx/strchr.c b/sysdeps/tile/tilegx/strchr.c
index 36dfd31391..1a5eb5c927 100644
--- a/sysdeps/tile/tilegx/strchr.c
+++ b/sysdeps/tile/tilegx/strchr.c
@@ -38,16 +38,16 @@ strchr (const char *s, int c)
      match neither zero nor goal (we make sure the high bit of each byte
      is 1, and the low 7 bits are all the opposite of the goal byte).  */
   const uint64_t before_mask = MASK (s_int);
-  uint64_t v = (*p | before_mask) ^ (goal & __insn_v1shrui (before_mask, 1));
+  uint64_t v = (*p | before_mask) ^ (goal & v1shrui (before_mask, 1));
 
   uint64_t zero_matches, goal_matches;
   while (1)
     {
       /* Look for a terminating '\0'. */
-      zero_matches = __insn_v1cmpeqi (v, 0);
+      zero_matches = v1cmpeqi (v, 0);
 
       /* Look for the goal byte. */
-      goal_matches = __insn_v1cmpeq (v, goal);
+      goal_matches = v1cmpeq (v, goal);
 
       if (__builtin_expect ((zero_matches | goal_matches) != 0, 0))
         break;
diff --git a/sysdeps/tile/tilegx/strchrnul.c b/sysdeps/tile/tilegx/strchrnul.c
index e0f13b684e..e3024dda57 100644
--- a/sysdeps/tile/tilegx/strchrnul.c
+++ b/sysdeps/tile/tilegx/strchrnul.c
@@ -36,16 +36,16 @@ __strchrnul (const char *s, int c)
      match neither zero nor goal (we make sure the high bit of each byte
      is 1, and the low 7 bits are all the opposite of the goal byte).  */
   const uint64_t before_mask = MASK (s_int);
-  uint64_t v = (*p | before_mask) ^ (goal & __insn_v1shrui (before_mask, 1));
+  uint64_t v = (*p | before_mask) ^ (goal & v1shrui (before_mask, 1));
 
   uint64_t zero_matches, goal_matches;
   while (1)
     {
       /* Look for a terminating '\0'. */
-      zero_matches = __insn_v1cmpeqi (v, 0);
+      zero_matches = v1cmpeqi (v, 0);
 
       /* Look for the goal byte. */
-      goal_matches = __insn_v1cmpeq (v, goal);
+      goal_matches = v1cmpeq (v, goal);
 
       if (__builtin_expect ((zero_matches | goal_matches) != 0, 0))
         break;
diff --git a/sysdeps/tile/tilegx/string-endian.h b/sysdeps/tile/tilegx/string-endian.h
index fe9b073efb..6a3f882e60 100644
--- a/sysdeps/tile/tilegx/string-endian.h
+++ b/sysdeps/tile/tilegx/string-endian.h
@@ -56,3 +56,28 @@ static inline uint64_t copy_byte(uint8_t byte)
 {
   return __insn_shufflebytes(byte, 0, 0);
 }
+
+/* Implement the byte vector instructions using extended assembly.
+   The __insn_OP() builtins are buggy in current compiler versions.  */
+
+#define VECOP(OP)                                                       \
+  static inline uint64_t OP (uint64_t a, uint64_t b)                    \
+  {                                                                     \
+    uint64_t result;                                                    \
+    asm volatile (#OP " %0, %1, %2" : "=r"(result) : "r"(a), "r"(b));   \
+    return result;                                                      \
+  }                                                                     \
+                                                                        \
+  static inline uint64_t OP ## i (uint64_t a, uint64_t b)               \
+  {                                                                     \
+    uint64_t result;                                                    \
+    asm volatile (#OP "i %0, %1, %2" : "=r"(result) : "r"(a), "I"(b));  \
+    return result;                                                      \
+  }
+
+VECOP(v1cmpeq)
+VECOP(v1cmpltu)
+VECOP(v1cmpne)
+VECOP(v1add)
+VECOP(v1shru)
+VECOP(v1shl)
diff --git a/sysdeps/tile/tilegx/strlen.c b/sysdeps/tile/tilegx/strlen.c
index 5cd04acc59..cebdf22af5 100644
--- a/sysdeps/tile/tilegx/strlen.c
+++ b/sysdeps/tile/tilegx/strlen.c
@@ -31,7 +31,7 @@ strlen (const char *s)
   uint64_t v = *p | MASK (s_int);
 
   uint64_t bits;
-  while ((bits = __insn_v1cmpeqi (v, 0)) == 0)
+  while ((bits = v1cmpeqi (v, 0)) == 0)
     v = *++p;
 
   return ((const char *) p) + (CFZ (bits) >> 3) - s;
diff --git a/sysdeps/tile/tilegx/strnlen.c b/sysdeps/tile/tilegx/strnlen.c
index 5d73a14926..c3560d208d 100644
--- a/sysdeps/tile/tilegx/strnlen.c
+++ b/sysdeps/tile/tilegx/strnlen.c
@@ -37,7 +37,7 @@ __strnlen (const char *s, size_t maxlen)
   uint64_t v = *p | MASK (s_int);
 
   uint64_t bits;
-  while ((bits = __insn_v1cmpeqi (v, 0)) == 0)
+  while ((bits = v1cmpeqi (v, 0)) == 0)
     {
       if (bytes_read >= maxlen)
 	{
diff --git a/sysdeps/tile/tilegx/strrchr.c b/sysdeps/tile/tilegx/strrchr.c
index 5a9049e1b9..51a08b71bf 100644
--- a/sysdeps/tile/tilegx/strrchr.c
+++ b/sysdeps/tile/tilegx/strrchr.c
@@ -34,16 +34,16 @@ strrchr (const char *s, int c)
      match neither zero nor goal (we make sure the high bit of each byte
      is 1, and the low 7 bits are all the opposite of the goal byte).  */
   const uint64_t before_mask = MASK (s_int);
-  uint64_t v = (*p | before_mask) ^ (goal & __insn_v1shrui (before_mask, 1));
+  uint64_t v = (*p | before_mask) ^ (goal & v1shrui (before_mask, 1));
   const char *found = NULL;
   uint64_t zero_matches, goal_matches;
   while (1)
     {
       /* Look for a terminating '\0'. */
-      zero_matches = __insn_v1cmpeqi (v, 0);
+      zero_matches = v1cmpeqi (v, 0);
 
       /* Look for the goal byte. */
-      goal_matches = __insn_v1cmpeq (v, goal);
+      goal_matches = v1cmpeq (v, goal);
 
       /* If we found the goal, record the last offset. */
       if (__builtin_expect (goal_matches != 0, 0))
diff --git a/sysdeps/tile/tilegx/strstr.c b/sysdeps/tile/tilegx/strstr.c
index 548a92045d..f82936a3c2 100644
--- a/sysdeps/tile/tilegx/strstr.c
+++ b/sysdeps/tile/tilegx/strstr.c
@@ -57,10 +57,10 @@ static uint64_t
 vec_tolower (uint64_t cc)
 {
   /* For Uppercases letters, add 32 to convert to lower case.  */
-  uint64_t less_than_eq_Z = __insn_v1cmpltui (cc, 'Z' + 1);
-  uint64_t less_than_A =  __insn_v1cmpltui (cc, 'A');
-  uint64_t is_upper = __insn_v1cmpne (less_than_eq_Z, less_than_A);
-  return __insn_v1add (cc,__insn_v1shli (is_upper, 5));
+  uint64_t less_than_eq_Z = v1cmpltui (cc, 'Z' + 1);
+  uint64_t less_than_A =  v1cmpltui (cc, 'A');
+  uint64_t is_upper = v1cmpne (less_than_eq_Z, less_than_A);
+  return v1add (cc, v1shli (is_upper, 5));
 }
 
 /* There is no strcasechr() defined, but needed for 1 byte case
@@ -85,16 +85,16 @@ strcasechr (const char *s, int c)
      is 1, and the low 7 bits are all the opposite of the goal byte).  */
   const uint64_t before_mask = MASK (s_int);
   uint64_t v =
-    (vec_tolower (*p) | before_mask) ^ (goal & __insn_v1shrui (before_mask, 1));
+    (vec_tolower (*p) | before_mask) ^ (goal & v1shrui (before_mask, 1));
 
   uint64_t zero_matches, goal_matches;
   while (1)
     {
       /* Look for a terminating '\0'.  */
-      zero_matches = __insn_v1cmpeqi (v, 0);
+      zero_matches = v1cmpeqi (v, 0);
 
       /* Look for the goal byte.  */
-      goal_matches = __insn_v1cmpeq (v, goal);
+      goal_matches = v1cmpeq (v, goal);
 
       if (__builtin_expect ((zero_matches | goal_matches) != 0, 0))
         break;
@@ -146,14 +146,14 @@ STRSTR2 (const char *haystack_start, const char *needle)
      is 1, and the low 7 bits are all the opposite of the goal byte).  */
   const uint64_t before_mask = MASK (s_int);
   uint64_t v =
-    (vec_load (p) | before_mask) ^ (byte1 & __insn_v1shrui (before_mask, 1));
+    (vec_load (p) | before_mask) ^ (byte1 & v1shrui (before_mask, 1));
 
   uint64_t zero_matches, goal_matches;
   while (1)
     {
       /* Look for a terminating '\0'.  */
-      zero_matches = __insn_v1cmpeqi (v, 0);
-      uint64_t byte1_matches = __insn_v1cmpeq (v, byte1);
+      zero_matches = v1cmpeqi (v, 0);
+      uint64_t byte1_matches = v1cmpeq (v, byte1);
       if (__builtin_expect (zero_matches != 0, 0))
 	{
 	  /* This is the last vector.  Don't worry about matches
@@ -161,7 +161,7 @@ STRSTR2 (const char *haystack_start, const char *needle)
 	     back 1 byte to align it with the first byte, then and to
 	     check for both matching.  Each vector has a 1 in the LSB
 	     of the byte if there was match.  */
-	  uint64_t byte2_matches = __insn_v1cmpeq (v, byte2);
+	  uint64_t byte2_matches = v1cmpeq (v, byte2);
 	  goal_matches = byte1_matches & STRSHIFT (byte2_matches, 8);
 	  break;
 	}
@@ -175,7 +175,7 @@ STRSTR2 (const char *haystack_start, const char *needle)
 	    {
 	      /* 8-bytes starting 1 byte into v.  */
 	      v = __insn_dblalign (v, v2, (void*)1);
-	      uint64_t byte2_matches_shifted = __insn_v1cmpeq (v, byte2);
+	      uint64_t byte2_matches_shifted = v1cmpeq (v, byte2);
 	      goal_matches = byte1_matches & byte2_matches_shifted;
 	      if (__builtin_expect (goal_matches != 0, 0))
 		break;