about summary refs log tree commit diff
path: root/sysdeps/x86_64/multiarch/strspn-c.c
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/x86_64/multiarch/strspn-c.c')
-rw-r--r--sysdeps/x86_64/multiarch/strspn-c.c154
1 files changed, 8 insertions, 146 deletions
diff --git a/sysdeps/x86_64/multiarch/strspn-c.c b/sysdeps/x86_64/multiarch/strspn-c.c
index be9e8ac0a8..6faa259fd7 100644
--- a/sysdeps/x86_64/multiarch/strspn-c.c
+++ b/sysdeps/x86_64/multiarch/strspn-c.c
@@ -1,5 +1,5 @@
 /* strspn with SSE4.2 intrinsics
-   Copyright (C) 2009 Free Software Foundation, Inc.
+   Copyright (C) 2009, 2010 Free Software Foundation, Inc.
    Contributed by Intel Corporation.
    This file is part of the GNU C Library.
 
@@ -20,6 +20,7 @@
 
 #include <nmmintrin.h>
 #include <string.h>
+#include "varshift.h"
 
 /* We use 0x12:
 	_SIDD_SBYTE_OPS
@@ -71,54 +72,7 @@ __strspn_sse42 (const char *s, const char *a)
       aligned = (const char *) ((size_t) a & -16L);
       __m128i mask0 = _mm_load_si128 ((__m128i *) aligned);
 
-      switch (offset)
-	{
-	case 1:
-	  mask = _mm_srli_si128 (mask0, 1);
-	  break;
-	case 2:
-	  mask = _mm_srli_si128 (mask0, 2);
-	  break;
-	case 3:
-	  mask = _mm_srli_si128 (mask0, 3);
-	  break;
-	case 4:
-	  mask = _mm_srli_si128 (mask0, 4);
-	  break;
-	case 5:
-	  mask = _mm_srli_si128 (mask0, 5);
-	  break;
-	case 6:
-	  mask = _mm_srli_si128 (mask0, 6);
-	  break;
-	case 7:
-	  mask = _mm_srli_si128 (mask0, 7);
-	  break;
-	case 8:
-	  mask = _mm_srli_si128 (mask0, 8);
-	  break;
-	case 9:
-	  mask = _mm_srli_si128 (mask0, 9);
-	  break;
-	case 10:
-	  mask = _mm_srli_si128 (mask0, 10);
-	  break;
-	case 11:
-	  mask = _mm_srli_si128 (mask0, 11);
-	  break;
-	case 12:
-	  mask = _mm_srli_si128 (mask0, 12);
-	  break;
-	case 13:
-	  mask = _mm_srli_si128 (mask0, 13);
-	  break;
-	case 14:
-	  mask = _mm_srli_si128 (mask0, 14);
-	  break;
-	case 15:
-	  mask = _mm_srli_si128 (mask0, 15);
-	  break;
-	}
+      mask = __m128i_shift_right (mask0, offset);
 
       /* Find where the NULL terminator is.  */
       int length = _mm_cmpistri (mask, mask, 0x3a);
@@ -135,55 +89,10 @@ __strspn_sse42 (const char *s, const char *a)
 
 	  if (index != 0)
 	    {
-	      /* Combine mask0 and mask1.  */
-	      switch (offset)
-		{
-		case 1:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 1);
-		  break;
-		case 2:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 2);
-		  break;
-		case 3:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 3);
-		  break;
-		case 4:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 4);
-		  break;
-		case 5:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 5);
-		  break;
-		case 6:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 6);
-		  break;
-		case 7:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 7);
-		  break;
-		case 8:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 8);
-		  break;
-		case 9:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 9);
-		  break;
-		case 10:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 10);
-		  break;
-		case 11:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 11);
-		  break;
-		case 12:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 12);
-		  break;
-		case 13:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 13);
-		  break;
-		case 14:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 14);
-		  break;
-		case 15:
-		  mask = _mm_alignr_epi8 (mask1, mask0, 15);
-		  break;
-		}
+	      /* Combine mask0 and mask1.  We could play games with
+		 palignr, but frankly this data should be in L1 now
+		 so do the merge via an unaligned load.  */
+	      mask = _mm_loadu_si128 ((__m128i *) a);
 	    }
 	}
     }
@@ -210,54 +119,7 @@ __strspn_sse42 (const char *s, const char *a)
       aligned = (const char *) ((size_t) s & -16L);
       __m128i value = _mm_load_si128 ((__m128i *) aligned);
 
-      switch (offset)
-	{
-	case 1:
-	  value = _mm_srli_si128 (value, 1);
-	  break;
-	case 2:
-	  value = _mm_srli_si128 (value, 2);
-	  break;
-	case 3:
-	  value = _mm_srli_si128 (value, 3);
-	  break;
-	case 4:
-	  value = _mm_srli_si128 (value, 4);
-	  break;
-	case 5:
-	  value = _mm_srli_si128 (value, 5);
-	  break;
-	case 6:
-	  value = _mm_srli_si128 (value, 6);
-	  break;
-	case 7:
-	  value = _mm_srli_si128 (value, 7);
-	  break;
-	case 8:
-	  value = _mm_srli_si128 (value, 8);
-	  break;
-	case 9:
-	  value = _mm_srli_si128 (value, 9);
-	  break;
-	case 10:
-	  value = _mm_srli_si128 (value, 10);
-	  break;
-	case 11:
-	  value = _mm_srli_si128 (value, 11);
-	  break;
-	case 12:
-	  value = _mm_srli_si128 (value, 12);
-	  break;
-	case 13:
-	  value = _mm_srli_si128 (value, 13);
-	  break;
-	case 14:
-	  value = _mm_srli_si128 (value, 14);
-	  break;
-	case 15:
-	  value = _mm_srli_si128 (value, 15);
-	  break;
-	}
+      value = __m128i_shift_right (value, offset);
 
       int length = _mm_cmpistri (mask, value, 0x12);
       /* No need to check CFlag since it is always 1.  */