diff options
Diffstat (limited to 'sysdeps')
-rw-r--r-- | sysdeps/powerpc/powerpc64/power8/memchr.S | 31 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/power8/memcmp.S | 21 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/power8/memrchr.S | 30 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/power8/memset.S | 10 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/power8/strchr.S | 30 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/power8/strlen.S | 23 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/power8/strncmp.S | 2 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/power8/strncpy.S | 2 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/power8/strnlen.S | 51 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/power8/strrchr.S | 39 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/power8/strspn.S | 35 |
11 files changed, 87 insertions, 187 deletions
diff --git a/sysdeps/powerpc/powerpc64/power8/memchr.S b/sysdeps/powerpc/powerpc64/power8/memchr.S index 422002f256..c4a11278d6 100644 --- a/sysdeps/powerpc/powerpc64/power8/memchr.S +++ b/sysdeps/powerpc/powerpc64/power8/memchr.S @@ -20,21 +20,10 @@ /* void *[r3] memchr (const void *s [r3], int c [r4], size_t n [r5]) */ -/* TODO: change these to the actual instructions when the minimum required - binutils allows it. */ -#define MTVRD(v, r) .long (0x7c000167 | ((v)<<(32-11)) | ((r)<<(32-16))) -#define MFVRD(r, v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16))) -#define VBPERMQ(t, a, b) .long (0x1000054c \ - | ((t)<<(32-11)) \ - | ((a)<<(32-16)) \ - | ((b)<<(32-21)) ) - #ifndef MEMCHR # define MEMCHR __memchr #endif -/* TODO: change this to .machine power8 when the minimum required binutils - allows it. */ - .machine power7 + .machine power8 ENTRY_TOCLESS (MEMCHR) CALL_MCOUNT 3 dcbt 0, r3 @@ -97,7 +86,7 @@ L(align_qw): li r0, 0 lvsl v11, r0, r0 vslb v10, v11, v10 - MTVRD(v1, r4) + mtvrd v1, r4 vspltb v1, v1, 7 cmpldi r5, 64 ble L(tail64) @@ -210,10 +199,10 @@ L(tail64): .align 4 L(found): /* Permute the first bit of each byte into bits 48-63. */ - VBPERMQ(v6, v6, v10) - VBPERMQ(v7, v7, v10) - VBPERMQ(v8, v8, v10) - VBPERMQ(v9, v9, v10) + vbpermq v6, v6, v10 + vbpermq v7, v7, v10 + vbpermq v8, v8, v10 + vbpermq v9, v9, v10 /* Shift each component into its correct position for merging. */ #ifdef __LITTLE_ENDIAN__ vsldoi v7, v7, v7, 2 @@ -228,7 +217,7 @@ L(found): vor v11, v6, v7 vor v4, v9, v8 vor v4, v11, v4 - MFVRD(r5, v4) + mfvrd r5, v4 #ifdef __LITTLE_ENDIAN__ addi r6, r5, -1 andc r6, r6, r5 @@ -243,16 +232,16 @@ L(found): .align 4 L(found_16B): /* Permute the first bit of each byte into bits 48-63. */ - VBPERMQ(v6, v6, v10) + vbpermq v6, v6, v10 /* Shift each component into its correct position for merging. */ #ifdef __LITTLE_ENDIAN__ - MFVRD(r7, v6) + mfvrd r7, v6 addi r6, r7, -1 andc r6, r6, r7 popcntd r6, r6 #else vsldoi v6, v6, v6, 6 - MFVRD(r7, v6) + mfvrd r7, v6 cntlzd r6, r7 /* Count leading zeros before the match. */ #endif add r3, r8, r6 /* Compute final length. */ diff --git a/sysdeps/powerpc/powerpc64/power8/memcmp.S b/sysdeps/powerpc/powerpc64/power8/memcmp.S index db824d6494..8617d5950b 100644 --- a/sysdeps/powerpc/powerpc64/power8/memcmp.S +++ b/sysdeps/powerpc/powerpc64/power8/memcmp.S @@ -22,13 +22,10 @@ const char *s2 [r4], size_t size [r5]) */ -/* TODO: change these to the actual instructions when the minimum required - binutils allows it. */ -#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16))) #ifndef MEMCMP # define MEMCMP memcmp #endif - .machine power7 + .machine power8 ENTRY_TOCLESS (MEMCMP, 4) CALL_MCOUNT 3 @@ -231,15 +228,15 @@ L(different2): vperm v4, v4, v0, v9 vperm v5, v5, v0, v9 #endif - MFVRD(r7, v4) - MFVRD(r9, v5) + mfvrd r7, v4 + mfvrd r9, v5 cmpld cr6, r7, r9 bne cr6, L(ret_diff) /* Difference in second DW. */ vsldoi v4, v4, v4, 8 vsldoi v5, v5, v5, 8 - MFVRD(r7, v4) - MFVRD(r9, v5) + mfvrd r7, v4 + mfvrd r9, v5 cmpld cr6, r7, r9 L(ret_diff): li rRTN, 1 @@ -256,15 +253,15 @@ L(different3): vperm v6, v6, v0, v9 vperm v8, v8, v0, v9 #endif - MFVRD(r7, v6) - MFVRD(r9, v8) + mfvrd r7, v6 + mfvrd r9, v8 cmpld cr6, r7, r9 bne cr6, L(ret_diff) /* Difference in second DW. */ vsldoi v6, v6, v6, 8 vsldoi v8, v8, v8, 8 - MFVRD(r7, v6) - MFVRD(r9, v8) + mfvrd r7, v6 + mfvrd r9, v8 cmpld cr6, r7, r9 li rRTN, 1 bgtlr cr6 diff --git a/sysdeps/powerpc/powerpc64/power8/memrchr.S b/sysdeps/powerpc/powerpc64/power8/memrchr.S index 51c6937c5f..77e5b4cc9d 100644 --- a/sysdeps/powerpc/powerpc64/power8/memrchr.S +++ b/sysdeps/powerpc/powerpc64/power8/memrchr.S @@ -21,18 +21,10 @@ /* int [r3] memrchr (char *s [r3], int byte [r4], int size [r5]) */ -/* TODO: change these to the actual instructions when the minimum required - binutils allows it. */ -#define MTVRD(v, r) .long (0x7c000167 | ((v)<<(32-11)) | ((r)<<(32-16))) -#define MFVRD(r, v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16))) -#define VBPERMQ(t, a, b) .long (0x1000054c \ - | ((t)<<(32-11)) \ - | ((a)<<(32-16)) \ - | ((b)<<(32-21)) ) #ifndef MEMRCHR # define MEMRCHR __memrchr #endif - .machine power7 + .machine power8 ENTRY_TOCLESS (MEMRCHR) CALL_MCOUNT 3 add r7, r3, r5 /* Calculate the last acceptable address. */ @@ -92,7 +84,7 @@ L(align_qw): li r0, 0 lvsl v11, r0, r0 vslb v10, v11, v10 - MTVRD(v1, r4) + mtvrd v1, r4 vspltb v1, v1, 7 cmpldi r5, 64 ble L(tail64) @@ -205,10 +197,10 @@ L(tail64): .align 4 L(found): /* Permute the first bit of each byte into bits 48-63. */ - VBPERMQ(v6, v6, v10) - VBPERMQ(v7, v7, v10) - VBPERMQ(v8, v8, v10) - VBPERMQ(v9, v9, v10) + vbpermq v6, v6, v10 + vbpermq v7, v7, v10 + vbpermq v8, v8, v10 + vbpermq v9, v9, v10 /* Shift each component into its correct position for merging. */ #ifdef __LITTLE_ENDIAN__ vsldoi v7, v7, v7, 2 @@ -223,7 +215,7 @@ L(found): vor v11, v6, v7 vor v4, v9, v8 vor v4, v11, v4 - MFVRD(r5, v4) + mfvrd r5, v4 #ifdef __LITTLE_ENDIAN__ cntlzd r6, r5 /* Count leading zeros before the match. */ #else @@ -245,7 +237,7 @@ L(found_16B): bge L(last) /* Now discard bytes before starting address. */ sub r9, r10, r8 - MTVRD(v9, r9) + mtvrd v9, r9 vspltisb v8, 3 /* Mask unwanted bytes. */ #ifdef __LITTLE_ENDIAN__ @@ -263,14 +255,14 @@ L(found_16B): #endif L(last): /* Permute the first bit of each byte into bits 48-63. */ - VBPERMQ(v6, v6, v10) + vbpermq v6, v6, v10 /* Shift each component into its correct position for merging. */ #ifdef __LITTLE_ENDIAN__ vsldoi v6, v6, v6, 6 - MFVRD(r7, v6) + mfvrd r7, v6 cntlzd r6, r7 /* Count leading zeros before the match. */ #else - MFVRD(r7, v6) + mfvrd r7, v6 addi r6, r7, -1 andc r6, r6, r7 popcntd r6, r6 diff --git a/sysdeps/powerpc/powerpc64/power8/memset.S b/sysdeps/powerpc/powerpc64/power8/memset.S index 436daf40be..3dba1f34d6 100644 --- a/sysdeps/powerpc/powerpc64/power8/memset.S +++ b/sysdeps/powerpc/powerpc64/power8/memset.S @@ -18,19 +18,13 @@ #include <sysdep.h> -#define MTVSRD_V1_R4 .long 0x7c240166 /* mtvsrd v1,r4 */ - /* void * [r3] memset (void *s [r3], int c [r4], size_t n [r5])); Returns 's'. */ #ifndef MEMSET # define MEMSET memset #endif - - /* No need to use .machine power8 since mtvsrd is already - handled by the define. It avoid breakage on binutils - that does not support this machine specifier. */ - .machine power7 + .machine power8 ENTRY_TOCLESS (MEMSET, 5) CALL_MCOUNT 3 @@ -151,7 +145,7 @@ L(tail_bytes): vector instruction to achieve best throughput. */ L(huge_vector): /* Replicate set byte to quadword in VMX register. */ - MTVSRD_V1_R4 + mtvsrd v1,r4 xxpermdi 32,v0,v1,0 vspltb v2,v0,15 diff --git a/sysdeps/powerpc/powerpc64/power8/strchr.S b/sysdeps/powerpc/powerpc64/power8/strchr.S index a81ecbbd31..0e7754ebfb 100644 --- a/sysdeps/powerpc/powerpc64/power8/strchr.S +++ b/sysdeps/powerpc/powerpc64/power8/strchr.S @@ -33,17 +33,7 @@ #endif /* !USE_AS_STRCHRNUL */ /* int [r3] strchr (char *s [r3], int c [r4]) */ -/* TODO: change these to the actual instructions when the minimum required - binutils allows it. */ -#define MTVRD(v,r) .long (0x7c000167 | ((v)<<(32-11)) | ((r)<<(32-16))) -#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16))) -#define VBPERMQ(t,a,b) .long (0x1000054c \ - | ((t)<<(32-11)) \ - | ((a)<<(32-16)) \ - | ((b)<<(32-21)) ) -/* TODO: change this to .machine power8 when the minimum required binutils - allows it. */ - .machine power7 + .machine power8 ENTRY_TOCLESS (FUNC_NAME) CALL_MCOUNT 2 dcbt 0,r3 @@ -178,7 +168,7 @@ L(vector): vspltisb v10, 3 lvsl v11, r0, r0 vslb v10, v11, v10 - MTVRD(v1,r4) + mtvrd v1, r4 li r5, 16 vspltb v1, v1, 7 /* Compare 32 bytes in each loop. */ @@ -202,10 +192,10 @@ L(continue): blt cr6, L(no_match) #endif /* Permute the first bit of each byte into bits 48-63. */ - VBPERMQ(v2, v2, v10) - VBPERMQ(v3, v3, v10) - VBPERMQ(v6, v6, v10) - VBPERMQ(v7, v7, v10) + vbpermq v2, v2, v10 + vbpermq v3, v3, v10 + vbpermq v6, v6, v10 + vbpermq v7, v7, v10 /* Shift each component into its correct position for merging. */ #ifdef __LITTLE_ENDIAN__ vsldoi v3, v3, v3, 2 @@ -221,7 +211,7 @@ L(continue): vor v1, v3, v2 vor v2, v6, v7 vor v4, v1, v2 - MFVRD(r5, v4) + mfvrd r5, v4 #ifdef __LITTLE_ENDIAN__ addi r6, r5, -1 andc r6, r6, r5 @@ -347,8 +337,8 @@ L(continue1): blt cr6, L(continue1) addi r3, r3, -32 L(end1): - VBPERMQ(v2, v2, v10) - VBPERMQ(v3, v3, v10) + vbpermq v2, v2, v10 + vbpermq v3, v3, v10 /* Shift each component into its correct position for merging. */ #ifdef __LITTLE_ENDIAN__ vsldoi v3, v3, v3, 2 @@ -359,7 +349,7 @@ L(end1): /* Merge the results and move to a GPR. */ vor v4, v3, v2 - MFVRD(r5, v4) + mfvrd r5, v4 #ifdef __LITTLE_ENDIAN__ addi r6, r5, -1 andc r6, r6, r5 diff --git a/sysdeps/powerpc/powerpc64/power8/strlen.S b/sysdeps/powerpc/powerpc64/power8/strlen.S index 6436252f82..b73e014272 100644 --- a/sysdeps/powerpc/powerpc64/power8/strlen.S +++ b/sysdeps/powerpc/powerpc64/power8/strlen.S @@ -19,23 +19,12 @@ #include <sysdep.h> -/* TODO: change these to the actual instructions when the minimum required - binutils allows it. */ -#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16))) -#define VBPERMQ(t,a,b) .long (0x1000054c \ - | ((t)<<(32-11)) \ - | ((a)<<(32-16)) \ - | ((b)<<(32-21)) ) - /* int [r3] strlen (char *s [r3]) */ #ifndef STRLEN # define STRLEN strlen #endif - -/* TODO: change this to .machine power8 when the minimum required binutils - allows it. */ - .machine power7 + .machine power8 ENTRY_TOCLESS (STRLEN, 4) CALL_MCOUNT 1 dcbt 0,r3 @@ -250,10 +239,10 @@ L(vmx_zero): vslb v10,v11,v10 /* Permute the first bit of each byte into bits 48-63. */ - VBPERMQ(v1,v1,v10) - VBPERMQ(v2,v2,v10) - VBPERMQ(v3,v3,v10) - VBPERMQ(v4,v4,v10) + vbpermq v1,v1,v10 + vbpermq v2,v2,v10 + vbpermq v3,v3,v10 + vbpermq v4,v4,v10 /* Shift each component into its correct position for merging. */ #ifdef __LITTLE_ENDIAN__ @@ -270,7 +259,7 @@ L(vmx_zero): vor v1,v2,v1 vor v2,v3,v4 vor v4,v1,v2 - MFVRD(r10,v4) + mfvrd r10,v4 /* Adjust address to the begninning of the current 64-byte block. */ addi r4,r4,-64 diff --git a/sysdeps/powerpc/powerpc64/power8/strncmp.S b/sysdeps/powerpc/powerpc64/power8/strncmp.S index 40b5324a49..4a76bc516b 100644 --- a/sysdeps/powerpc/powerpc64/power8/strncmp.S +++ b/sysdeps/powerpc/powerpc64/power8/strncmp.S @@ -31,7 +31,7 @@ 64K as default, the page cross handling assumes minimum page size of 4k. */ - .machine power7 + .machine power8 ENTRY_TOCLESS (STRNCMP, 4) /* Check if size is 0. */ mr. r10,r5 diff --git a/sysdeps/powerpc/powerpc64/power8/strncpy.S b/sysdeps/powerpc/powerpc64/power8/strncpy.S index 01e9f70250..cb6b7c1a61 100644 --- a/sysdeps/powerpc/powerpc64/power8/strncpy.S +++ b/sysdeps/powerpc/powerpc64/power8/strncpy.S @@ -60,7 +60,7 @@ 64K as default, the page cross handling assumes minimum page size of 4k. */ - .machine power7 + .machine power8 #ifdef MEMSET_is_local ENTRY_TOCLESS (FUNC_NAME, 4) #else diff --git a/sysdeps/powerpc/powerpc64/power8/strnlen.S b/sysdeps/powerpc/powerpc64/power8/strnlen.S index 0a5761bfe7..a85f56cb4e 100644 --- a/sysdeps/powerpc/powerpc64/power8/strnlen.S +++ b/sysdeps/powerpc/powerpc64/power8/strnlen.S @@ -33,32 +33,9 @@ /* Define default page size to 4KB. */ #define PAGE_SIZE 4096 -/* The following macros implement Power ISA v2.07 opcodes - that could not be used directly into this code to the keep - compatibility with older binutils versions. */ - -/* Move from vector register doubleword. */ -#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16))) - -/* Move to vector register doubleword. */ -#define MTVRD(v,r) .long (0x7c000167 | ((v)<<(32-11)) | ((r)<<(32-16))) - -/* Vector Bit Permute Quadword. */ -#define VBPERMQ(t,a,b) .long (0x1000054c \ - | ((t)<<(32-11)) \ - | ((a)<<(32-16)) \ - | ((b)<<(32-21)) ) - -/* Vector Population Count Halfword. */ -#define VPOPCNTH(t,b) .long (0x10000743 | ((t)<<(32-11)) | ((b)<<(32-21))) - -/* Vector Count Leading Zeros Halfword. */ -#define VCLZH(t,b) .long (0x10000742 | ((t)<<(32-11)) | ((b)<<(32-21))) - /* int [r3] strnlen (char *s [r3], size_t maxlen [r4]) */ -/* TODO: change to power8 when minimum required binutils allows it. */ - .machine power7 + .machine power8 ENTRY_TOCLESS (__strnlen) CALL_MCOUNT 2 dcbt 0,r3 @@ -207,7 +184,7 @@ L(smaller): /* Place rounded up number of qw's to check into a vmx register, and use some vector tricks to minimize branching. */ - MTVRD(v7,r4) /* Copy maxlen from GPR to vector register. */ + mtvrd v7,r4 /* copy maxlen from gpr to vector register. */ vspltisb v5,1 vspltisb v6,15 vspltb v2,v7,7 @@ -229,20 +206,20 @@ L(loop_16B): beq cr6,L(loop_16B) /* If null bytes not found. */ vcmpequb v1,v1,v0 - VBPERMQ(v1,v1,v10) + vbpermq v1,v1,v10 #ifdef __LITTLE_ENDIAN__ vsubuhm v2,v1,v5 /* Form a mask of trailing zeros. */ vandc v2,v2,v1 - VPOPCNTH(v1,v2) /* Count of trailing zeros, 16 if none. */ + vpopcnth v1,v2 /* count of trailing zeros, 16 if none. */ #else - VCLZH(v1,v1) /* Count the leading zeros, 16 if none. */ + vclzh v1,v1 /* count the leading zeros, 16 if none. */ #endif /* Truncate to maximum allowable offset. */ vcmpgtub v2,v1,v7 /* Compare and truncate for matches beyond maxlen. */ vsel v1,v1,v7,v2 /* 0-16 is now in byte 7. */ - MFVRD(r0,v1) + mfvrd r0,v1 addi r5,r5,-16 /* Undo speculative bump. */ extsb r0,r0 /* Clear whatever gunk is in the high 56b. */ add r5,r5,r0 /* Add the offset of whatever was found. */ @@ -262,10 +239,10 @@ L(found_64B): vcmpequb v4,v4,v0 /* Permute the first bit of each byte into bits 48-63. */ - VBPERMQ(v1,v1,v10) - VBPERMQ(v2,v2,v10) - VBPERMQ(v3,v3,v10) - VBPERMQ(v4,v4,v10) + vbpermq v1,v1,v10 + vbpermq v2,v2,v10 + vbpermq v3,v3,v10 + vbpermq v4,v4,v10 /* Shift each component into its correct position for merging. */ #ifdef __LITTLE_ENDIAN__ @@ -286,7 +263,7 @@ L(found_64B): /* Adjust address to the start of the current 64B block. */ addi r5,r5,-64 - MFVRD(r10,v4) + mfvrd r10,v4 #ifdef __LITTLE_ENDIAN__ addi r9,r10,-1 /* Form a mask from trailing zeros. */ andc r9,r9,r10 @@ -302,15 +279,15 @@ L(found_64B): as a preparation for the 64B loop. */ .p2align 4 L(found_aligning64B): - VBPERMQ(v1,v1,v10) + vbpermq v1,v1,v10 #ifdef __LITTLE_ENDIAN__ - MFVRD(r10,v1) + mfvrd r10,v1 addi r9,r10,-1 /* Form a mask from trailing zeros. */ andc r9,r9,r10 popcntd r0,r9 /* Count the bits in the mask. */ #else vsldoi v1,v1,v1,6 - MFVRD(r10,v1) + mfvrd r10,v1 cntlzd r0,r10 /* Count leading zeros before the match. */ #endif addi r5,r5,-16 /* Adjust address to offset of last 16 bytes diff --git a/sysdeps/powerpc/powerpc64/power8/strrchr.S b/sysdeps/powerpc/powerpc64/power8/strrchr.S index 988602d8d0..09d9f518ff 100644 --- a/sysdeps/powerpc/powerpc64/power8/strrchr.S +++ b/sysdeps/powerpc/powerpc64/power8/strrchr.S @@ -19,26 +19,13 @@ #include <sysdep.h> /* char *[r3] strrchr (char *s [r3], int c [r4]) */ -/* TODO: change these to the actual instructions when the minimum required - binutils allows it. */ -#define MTVRD(v,r) .long (0x7c000167 | ((v)<<(32-11)) | ((r)<<(32-16))) -#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16))) -#define VBPERMQ(t,a,b) .long (0x1000054c \ - | ((t)<<(32-11)) \ - | ((a)<<(32-16)) \ - | ((b)<<(32-21)) ) -#define VCLZD(r,v) .long (0x100007c2 | ((r)<<(32-11)) | ((v)<<(32-21))) -#define VPOPCNTD(r,v) .long (0x100007c3 | ((r)<<(32-11)) | ((v)<<(32-21))) -#define VADDUQM(t,a,b) .long (0x10000100 \ - | ((t)<<(32-11)) \ - | ((a)<<(32-16)) \ - | ((b)<<(32-21)) ) + #ifdef __LITTLE_ENDIAN__ /* Find the match position from v6 and place result in r6. */ # define CALCULATE_MATCH() \ - VBPERMQ(v6, v6, v10); \ + vbpermq v6, v6, v10; \ vsldoi v6, v6, v6, 6; \ - MFVRD(r7, v6); \ + mfvrd r7, v6; \ cntlzd r6, r7; \ subfic r6, r6, 15; /* @@ -48,9 +35,9 @@ */ # define FIND_NULL_POS(reg) \ vspltisb v11, -1; \ - VADDUQM(v11, reg, v11); \ + vadduqm v11, reg, v11; \ vandc v11, v11, reg; \ - VPOPCNTD(v2, v11); \ + vpopcntd v2, v11; \ vspltb v11, v2, 15; \ vcmpequb. v11, v11, v9; \ blt cr6, 1f; \ @@ -60,14 +47,14 @@ vsumsws v2, v2, v0; #else # define CALCULATE_MATCH() \ - VBPERMQ(v6, v6, v10); \ - MFVRD(r7, v6); \ + vbpermq v6, v6, v10; \ + mfvrd r7, v6; \ addi r6, r7, -1; \ andc r6, r6, r7; \ popcntd r6, r6; \ subfic r6, r6, 15; # define FIND_NULL_POS(reg) \ - VCLZD(v2, reg); \ + vclzd v2, reg; \ vspltb v11, v2, 7; \ vcmpequb. v11, v11, v9; \ blt cr6, 1f; \ @@ -80,7 +67,7 @@ #ifndef STRRCHR # define STRRCHR strrchr #endif - .machine power7 + .machine power8 ENTRY_TOCLESS (STRRCHR) CALL_MCOUNT 2 dcbt 0,r3 @@ -243,7 +230,7 @@ L(vector): vspltisb v10, 3 lvsl v11, r0, r0 vslb v10, v11, v10 - MTVRD(v1, r4) + mtvrd v1, r4 li r5, 16 vspltb v1, v1, 7 /* Compare 32 bytes in each loop. */ @@ -442,8 +429,8 @@ L(continue1): addi r3, r3, 32 blt cr6, L(continue1) addi r3, r3, -32 - VBPERMQ(v2, v2, v10) - VBPERMQ(v3, v3, v10) + vbpermq v2, v2, v10 + vbpermq v3, v3, v10 /* Shift each component into its correct position for merging. */ #ifdef __LITTLE_ENDIAN__ vsldoi v3, v3, v3, 2 @@ -453,7 +440,7 @@ L(continue1): #endif /* Merge the results and move to a GPR. */ vor v4, v3, v2 - MFVRD(r5, v4) + mfvrd r5, v4 #ifdef __LITTLE_ENDIAN__ addi r6, r5, -1 andc r6, r6, r5 diff --git a/sysdeps/powerpc/powerpc64/power8/strspn.S b/sysdeps/powerpc/powerpc64/power8/strspn.S index 02bac4113e..632db2540b 100644 --- a/sysdeps/powerpc/powerpc64/power8/strspn.S +++ b/sysdeps/powerpc/powerpc64/power8/strspn.S @@ -52,22 +52,7 @@ #define XXVR(insn, vrt, vra, vrb) \ insn 32+vrt, 32+vra, 32+vrb -/* ISA 2.07B instructions are not all defined for older binutils. - Macros are defined below for these newer instructions in order - to maintain compatibility. */ - -/* Note, TX/SX is always set as VMX regs are the high 32 VSX regs. */ -#define MTVRD(v,r) .long (0x7c000167 | ((v)<<(32-11)) | ((r)<<(32-16))) -#define MFVRD(r,v) .long (0x7c000067 | ((v)<<(32-11)) | ((r)<<(32-16))) - -#define VBPERMQ(t,a,b) .long (0x1000054c \ - | ((t)<<(32-11)) \ - | ((a)<<(32-16)) \ - | ((b)<<(32-21)) ) - - /* This can be updated to power8 once the minimum version of - binutils supports power8 and the above instructions. */ - .machine power7 + .machine power8 ENTRY_TOCLESS (STRSPN, 4) CALL_MCOUNT 2 @@ -135,10 +120,10 @@ L(needle_gt128): L(start_cmp): /* Move and merge bitmap into 2 VRs. bpermd is slower on P8. */ mr r0, r3 /* Save r3 for final length computation. */ - MTVRD (v5, r5) - MTVRD (v6, r6) - MTVRD (v7, r7) - MTVRD (v8, r8) + mtvrd v5, r5 + mtvrd v6, r6 + mtvrd v7, r7 + mtvrd v8, r8 /* Continue interleaved mask generation. */ #ifdef __LITTLE_ENDIAN__ @@ -160,8 +145,8 @@ L(start_cmp): /* Compare the first 1-16B, while masking unwanted bytes. */ clrrdi r3, r3, 4 /* Note, counts from qw boundaries. */ vxor v9, v0, v1 /* Swap high bit. */ - VBPERMQ (v8, v5, v0) - VBPERMQ (v7, v6, v9) + vbpermq v8, v5, v0 + vbpermq v7, v6, v9 vor v7, v7, v8 vor v7, v7, v11 /* Ignore non-participating bytes. */ vcmpequh. v8, v7, v4 @@ -174,8 +159,8 @@ L(vec): lvx v0, 0, r3 addi r3, r3, 16 vxor v9, v0, v1 /* Swap high bit. */ - VBPERMQ (v8, v5, v0) - VBPERMQ (v7, v6, v9) + vbpermq v8, v5, v0 + vbpermq v7, v6, v9 vor v7, v7, v8 vcmpequh. v8, v7, v4 blt cr6, L(vec) @@ -183,7 +168,7 @@ L(vec): addi r3, r3, -16 L(done): subf r3, r0, r3 - MFVRD (r10, v7) + mfvrd r10, v7 #ifdef __LITTLE_ENDIAN__ addi r0, r10, 1 /* Count the trailing 1's. */ |