From 1ef83476d0aacae4ee0d2fd07aca2868cb1cdbcb Mon Sep 17 00:00:00 2001 From: Adhemerval Zanella Date: Thu, 28 Mar 2013 14:09:03 -0500 Subject: PowerPC: strnlen ifunc for PPC32 --- sysdeps/powerpc/powerpc32/multiarch/Makefile | 3 +- .../powerpc/powerpc32/multiarch/ifunc-impl-list.c | 7 + sysdeps/powerpc/powerpc32/multiarch/rtld-strnlen.c | 1 + sysdeps/powerpc/powerpc32/multiarch/strnlen-c.c | 8 + .../powerpc/powerpc32/multiarch/strnlen-power7.S | 167 ++++++++++++++++++++ sysdeps/powerpc/powerpc32/multiarch/strnlen.S | 70 +++++++++ sysdeps/powerpc/powerpc32/power7/strnlen.S | 169 --------------------- 7 files changed, 255 insertions(+), 170 deletions(-) create mode 100644 sysdeps/powerpc/powerpc32/multiarch/rtld-strnlen.c create mode 100644 sysdeps/powerpc/powerpc32/multiarch/strnlen-c.c create mode 100644 sysdeps/powerpc/powerpc32/multiarch/strnlen-power7.S create mode 100644 sysdeps/powerpc/powerpc32/multiarch/strnlen.S delete mode 100644 sysdeps/powerpc/powerpc32/power7/strnlen.S diff --git a/sysdeps/powerpc/powerpc32/multiarch/Makefile b/sysdeps/powerpc/powerpc32/multiarch/Makefile index 22deb1759f..e5981f404c 100644 --- a/sysdeps/powerpc/powerpc32/multiarch/Makefile +++ b/sysdeps/powerpc/powerpc32/multiarch/Makefile @@ -3,5 +3,6 @@ sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \ memcmp-power7 memset-power4 memset-power6 memset-power7 \ bzero-power4 bzero-power6 bzero-power7 \ strncmp-power7 strncmp-power4 strlen-power7 \ - strcasecmp-power7 strcasecmp_l-power7 + strcasecmp-power7 strcasecmp_l-power7 \ + strnlen-power7 strnlen-c endif diff --git a/sysdeps/powerpc/powerpc32/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc32/multiarch/ifunc-impl-list.c index 7ca97acdf7..7975c00bba 100644 --- a/sysdeps/powerpc/powerpc32/multiarch/ifunc-impl-list.c +++ b/sysdeps/powerpc/powerpc32/multiarch/ifunc-impl-list.c @@ -106,6 +106,13 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __strcasecmp_l_power7) IFUNC_IMPL_ADD (array, i, strcasecmp_l, 1, __strcasecmp_l_ppc32)) + + IFUNC_IMPL (i, name, strnlen, + IFUNC_IMPL_ADD (array, i, strnlen, + hwcap & PPC_FEATURE_HAS_VSX, + __strnlen_power7) + IFUNC_IMPL_ADD (array, i, strnlen, 1, + __strnlen_ppc32)) #endif return i; diff --git a/sysdeps/powerpc/powerpc32/multiarch/rtld-strnlen.c b/sysdeps/powerpc/powerpc32/multiarch/rtld-strnlen.c new file mode 100644 index 0000000000..1aa5440644 --- /dev/null +++ b/sysdeps/powerpc/powerpc32/multiarch/rtld-strnlen.c @@ -0,0 +1 @@ +#include diff --git a/sysdeps/powerpc/powerpc32/multiarch/strnlen-c.c b/sysdeps/powerpc/powerpc32/multiarch/strnlen-c.c new file mode 100644 index 0000000000..cc27cc1039 --- /dev/null +++ b/sysdeps/powerpc/powerpc32/multiarch/strnlen-c.c @@ -0,0 +1,8 @@ +#define STRNLEN __strnlen_ppc32 +#ifdef SHARED +# undef libc_hidden_def +# define libc_hidden_def(name) \ + __hidden_ver1 (__strnlen_ppc32, __GI_strnlen, __strnlen_ppc32); +#endif + +#include "string/strnlen.c" diff --git a/sysdeps/powerpc/powerpc32/multiarch/strnlen-power7.S b/sysdeps/powerpc/powerpc32/multiarch/strnlen-power7.S new file mode 100644 index 0000000000..63ac8215b6 --- /dev/null +++ b/sysdeps/powerpc/powerpc32/multiarch/strnlen-power7.S @@ -0,0 +1,167 @@ +/* Optimized strnlen implementation for PowerPC32/POWER7 using cmpb insn. + Copyright (C) 2010-2013 Free Software Foundation, Inc. + Contributed by Luis Machado . + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +/* int [r3] strnlen (char *s [r3], int size [r4]) */ + .machine power7 +ENTRY (__strnlen_power7) + CALL_MCOUNT + dcbt 0,r3 + clrrwi r8,r3,2 /* Align the address to word boundary. */ + add r7,r3,r4 /* Calculate the last acceptable address. */ + cmplwi r4,16 + li r0,0 /* Word with null chars. */ + ble L(small_range) + + cmplw cr7,r3,r7 /* Is the address equal or less than r3? If + it's equal or less, it means size is either 0 + or a negative number. */ + ble cr7,L(proceed) + + li r7,-1 /* Make r11 the biggest if r4 <= 0. */ +L(proceed): + rlwinm r6,r3,3,27,28 /* Calculate padding. */ + lwz r12,0(r8) /* Load word from memory. */ + cmpb r10,r12,r0 /* Check for null bytes in DWORD1. */ + slw r10,r10,r6 + srw r10,r10,r6 + cmplwi cr7,r10,0 /* If r10 == 0, no null's have been found. */ + bne cr7,L(done) + + /* Are we done already? */ + addi r9,r8,4 + cmplw cr6,r9,r7 + bge cr6,L(end_max) + + mtcrf 0x01,r8 + /* Are we now aligned to a doubleword boundary? If so, skip to + the main loop. Otherwise, go through the alignment code. */ + + bt 29,L(loop_setup) + + /* Handle DWORD2 of pair. */ + lwzu r12,4(r8) + cmpb r10,r12,r0 + cmplwi cr7,r10,0 + bne cr7,L(done) + + /* Are we done already? */ + addi r9,r8,4 + cmplw cr6,r9,r7 + bge cr6,L(end_max) + +L(loop_setup): + sub r5,r7,r9 + srwi r6,r5,3 /* Number of loop iterations. */ + mtctr r6 /* Setup the counter. */ + b L(loop) + /* Main loop to look for the null byte backwards in the string. Since + it's a small loop (< 8 instructions), align it to 32-bytes. */ + .p2align 5 +L(loop): + /* Load two words, compare and merge in a + single register for speed. This is an attempt + to speed up the null-checking process for bigger strings. */ + + lwz r12,4(r8) + lwzu r11,8(r8) + cmpb r10,r12,r0 + cmpb r9,r11,r0 + or r5,r9,r10 /* Merge everything in one word. */ + cmplwi cr7,r5,0 + bne cr7,L(found) + bdnz L(loop) + /* We're here because the counter reached 0, and that means we + didn't have any matches for null in the whole range. Just return + the original size. */ + addi r9,r8,4 + cmplw cr6,r9,r7 + blt cr6,L(loop_small) + +L(end_max): + sub r3,r7,r3 + blr + + /* OK, one (or both) of the words contains a null byte. Check + the first word and decrement the address in case the first + word really contains a null byte. */ + .align 4 +L(found): + cmplwi cr6,r10,0 + addi r8,r8,-4 + bne cr6,L(done) + + /* The null byte must be in the second word. Adjust the address + again and move the result of cmpb to r10 so we can calculate the + length. */ + + mr r10,r9 + addi r8,r8,4 + + /* r10 has the output of the cmpb instruction, that is, it contains + 0xff in the same position as the null byte in the original + word from the string. Use that to calculate the length. + We need to make sure the null char is *before* the end of the + range. */ +L(done): + cntlzw r0,r10 /* Count leading zeroes before the match. */ + srwi r0,r0,3 /* Convert leading zeroes to bytes. */ + add r9,r8,r0 + sub r6,r9,r3 /* Length until the match. */ + cmplw r9,r7 + bgt L(end_max) + mr r3,r6 + blr + + .align 4 +L(zero): + li r3,0 + blr + +/* Deals with size <= 32. */ + .align 4 +L(small_range): + cmplwi r4,0 + beq L(zero) + + rlwinm r6,r3,3,27,28 /* Calculate padding. */ + lwz r12,0(r8) /* Load word from memory. */ + cmpb r10,r12,r0 /* Check for null bytes in WORD1. */ + slw r10,r10,r6 + srw r10,r10,r6 + cmplwi cr7,r10,0 + bne cr7,L(done) + + addi r9,r8,4 + cmplw r9,r7 + bge L(end_max) + b L(loop_small) + + .p2align 5 +L(loop_small): + lwzu r12,4(r8) + cmpb r10,r12,r0 + addi r9,r8,4 + cmplwi cr6,r10,0 + bne cr6,L(done) + cmplw r9,r7 + bge L(end_max) + b L(loop_small) +END (__strnlen_power7) diff --git a/sysdeps/powerpc/powerpc32/multiarch/strnlen.S b/sysdeps/powerpc/powerpc32/multiarch/strnlen.S new file mode 100644 index 0000000000..1b3f46eaf1 --- /dev/null +++ b/sysdeps/powerpc/powerpc32/multiarch/strnlen.S @@ -0,0 +1,70 @@ +/* Copyright (C) 2013 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + +/* Define multiple versions only for the definition in libc. */ +#ifndef NOT_IN_libc + .text +ENTRY(__strnlen) + .type __strnlen, @gnu_indirect_function +# ifdef PIC + mflr r6 + cfi_register (lr,r6) + SETUP_GOT_ACCESS (r5,got_label) + addis r5,r5,_GLOBAL_OFFSET_TABLE_-got_label@ha + addi r5,r5,_GLOBAL_OFFSET_TABLE_-got_label@l + mtlr r6 + cfi_same_value (lr) +# ifdef SHARED + lwz r6,_rtld_global_ro@got(r5) + /* If _rtld_global_ro is not initialized use the default ppc32 + implementation. */ + cmplwi r6,0 + beq L(ppc32) + lwz r6,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET+4(r6) +# else + lwz r6,_dl_hwcap@got(r5) + lwz r6,4(r6) +# endif +# else /* PIC */ + lis r6,(_dl_hwcap+4)@ha + lwz r6,(_dl_hwcap+4)@l(r6) +# endif + andi. r7,r6,PPC_FEATURE_HAS_VSX + bne L(power7) +L(ppc32): +# ifdef PIC + lwz r3,__strnlen_ppc32@got(r5) +# else + lis r3,__strnlen_ppc32@ha + lwz r3,__strnlen_ppc32@l(r3) +# endif + blr +L(power7): +# ifdef PIC + lwz r3,__strnlen_power7@got(r5) +# else + lis r3,__strnlen_power7@ha + lwz r3,__strnlen_power7@l(r3) +# endif + blr +END(__strnlen) + +weak_alias (__strnlen, strnlen) +#endif diff --git a/sysdeps/powerpc/powerpc32/power7/strnlen.S b/sysdeps/powerpc/powerpc32/power7/strnlen.S deleted file mode 100644 index ed088366a2..0000000000 --- a/sysdeps/powerpc/powerpc32/power7/strnlen.S +++ /dev/null @@ -1,169 +0,0 @@ -/* Optimized strnlen implementation for PowerPC32/POWER7 using cmpb insn. - Copyright (C) 2010-2013 Free Software Foundation, Inc. - Contributed by Luis Machado . - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#include - -/* int [r3] strnlen (char *s [r3], int size [r4]) */ - .machine power7 -ENTRY (__strnlen) - CALL_MCOUNT - dcbt 0,r3 - clrrwi r8,r3,2 /* Align the address to word boundary. */ - add r7,r3,r4 /* Calculate the last acceptable address. */ - cmplwi r4,16 - li r0,0 /* Word with null chars. */ - ble L(small_range) - - cmplw cr7,r3,r7 /* Is the address equal or less than r3? If - it's equal or less, it means size is either 0 - or a negative number. */ - ble cr7,L(proceed) - - li r7,-1 /* Make r11 the biggest if r4 <= 0. */ -L(proceed): - rlwinm r6,r3,3,27,28 /* Calculate padding. */ - lwz r12,0(r8) /* Load word from memory. */ - cmpb r10,r12,r0 /* Check for null bytes in DWORD1. */ - slw r10,r10,r6 - srw r10,r10,r6 - cmplwi cr7,r10,0 /* If r10 == 0, no null's have been found. */ - bne cr7,L(done) - - /* Are we done already? */ - addi r9,r8,4 - cmplw cr6,r9,r7 - bge cr6,L(end_max) - - mtcrf 0x01,r8 - /* Are we now aligned to a doubleword boundary? If so, skip to - the main loop. Otherwise, go through the alignment code. */ - - bt 29,L(loop_setup) - - /* Handle DWORD2 of pair. */ - lwzu r12,4(r8) - cmpb r10,r12,r0 - cmplwi cr7,r10,0 - bne cr7,L(done) - - /* Are we done already? */ - addi r9,r8,4 - cmplw cr6,r9,r7 - bge cr6,L(end_max) - -L(loop_setup): - sub r5,r7,r9 - srwi r6,r5,3 /* Number of loop iterations. */ - mtctr r6 /* Setup the counter. */ - b L(loop) - /* Main loop to look for the null byte backwards in the string. Since - it's a small loop (< 8 instructions), align it to 32-bytes. */ - .p2align 5 -L(loop): - /* Load two words, compare and merge in a - single register for speed. This is an attempt - to speed up the null-checking process for bigger strings. */ - - lwz r12,4(r8) - lwzu r11,8(r8) - cmpb r10,r12,r0 - cmpb r9,r11,r0 - or r5,r9,r10 /* Merge everything in one word. */ - cmplwi cr7,r5,0 - bne cr7,L(found) - bdnz L(loop) - /* We're here because the counter reached 0, and that means we - didn't have any matches for null in the whole range. Just return - the original size. */ - addi r9,r8,4 - cmplw cr6,r9,r7 - blt cr6,L(loop_small) - -L(end_max): - sub r3,r7,r3 - blr - - /* OK, one (or both) of the words contains a null byte. Check - the first word and decrement the address in case the first - word really contains a null byte. */ - .align 4 -L(found): - cmplwi cr6,r10,0 - addi r8,r8,-4 - bne cr6,L(done) - - /* The null byte must be in the second word. Adjust the address - again and move the result of cmpb to r10 so we can calculate the - length. */ - - mr r10,r9 - addi r8,r8,4 - - /* r10 has the output of the cmpb instruction, that is, it contains - 0xff in the same position as the null byte in the original - word from the string. Use that to calculate the length. - We need to make sure the null char is *before* the end of the - range. */ -L(done): - cntlzw r0,r10 /* Count leading zeroes before the match. */ - srwi r0,r0,3 /* Convert leading zeroes to bytes. */ - add r9,r8,r0 - sub r6,r9,r3 /* Length until the match. */ - cmplw r9,r7 - bgt L(end_max) - mr r3,r6 - blr - - .align 4 -L(zero): - li r3,0 - blr - -/* Deals with size <= 32. */ - .align 4 -L(small_range): - cmplwi r4,0 - beq L(zero) - - rlwinm r6,r3,3,27,28 /* Calculate padding. */ - lwz r12,0(r8) /* Load word from memory. */ - cmpb r10,r12,r0 /* Check for null bytes in WORD1. */ - slw r10,r10,r6 - srw r10,r10,r6 - cmplwi cr7,r10,0 - bne cr7,L(done) - - addi r9,r8,4 - cmplw r9,r7 - bge L(end_max) - b L(loop_small) - - .p2align 5 -L(loop_small): - lwzu r12,4(r8) - cmpb r10,r12,r0 - addi r9,r8,4 - cmplwi cr6,r10,0 - bne cr6,L(done) - cmplw r9,r7 - bge L(end_max) - b L(loop_small) -END (__strnlen) -weak_alias (__strnlen, strnlen) -libc_hidden_builtin_def (strnlen) -- cgit 1.4.1