From 572cca93fafa59d641c11372a9556722d95b038c Mon Sep 17 00:00:00 2001 From: Stefan Liebler Date: Tue, 18 Dec 2018 13:57:15 +0100 Subject: S390: Refactor strpbrk ifunc handling. The ifunc handling for strpbrk is adjusted in order to omit ifunc variants if those will never be used as the minimum architecture level already supports newer CPUs by default. Glibc internal calls will then also use the "newer" ifunc variant. ChangeLog: * sysdeps/s390/multiarch/Makefile (sysdep_routines): Remove strpbrk variants. * sysdeps/s390/Makefile (sysdep_routines): Add strpbrk variants. * sysdeps/s390/multiarch/ifunc-impl-list.c (__libc_ifunc_impl_list): Refactor ifunc handling for strpbrk. * sysdeps/s390/multiarch/strpbrk-c.c: Move to ... * sysdeps/s390/strpbrk-c.c: ... here and adjust ifunc handling. * sysdeps/s390/multiarch/strpbrk-vx.S: Move to ... * sysdeps/s390/strpbrk-vx.S: ... here and adjust ifunc handling. * sysdeps/s390/multiarch/strpbrk.c: Move to ... * sysdeps/s390/strpbrk.c: ... here and adjust ifunc handling. * sysdeps/s390/ifunc-strpbrk.h: New file. --- ChangeLog | 15 ++ sysdeps/s390/Makefile | 3 +- sysdeps/s390/ifunc-strpbrk.h | 52 +++++ sysdeps/s390/multiarch/Makefile | 3 +- sysdeps/s390/multiarch/ifunc-impl-list.c | 14 +- sysdeps/s390/multiarch/strpbrk-c.c | 28 --- sysdeps/s390/multiarch/strpbrk-vx.S | 302 ----------------------------- sysdeps/s390/multiarch/strpbrk.c | 31 --- sysdeps/s390/strpbrk-c.c | 32 ++++ sysdeps/s390/strpbrk-vx.S | 313 +++++++++++++++++++++++++++++++ sysdeps/s390/strpbrk.c | 42 +++++ 11 files changed, 470 insertions(+), 365 deletions(-) create mode 100644 sysdeps/s390/ifunc-strpbrk.h delete mode 100644 sysdeps/s390/multiarch/strpbrk-c.c delete mode 100644 sysdeps/s390/multiarch/strpbrk-vx.S delete mode 100644 sysdeps/s390/multiarch/strpbrk.c create mode 100644 sysdeps/s390/strpbrk-c.c create mode 100644 sysdeps/s390/strpbrk-vx.S create mode 100644 sysdeps/s390/strpbrk.c diff --git a/ChangeLog b/ChangeLog index 295ad8c7a3..66c5fc8656 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +2018-12-18 Stefan Liebler + + * sysdeps/s390/multiarch/Makefile + (sysdep_routines): Remove strpbrk variants. + * sysdeps/s390/Makefile (sysdep_routines): Add strpbrk variants. + * sysdeps/s390/multiarch/ifunc-impl-list.c + (__libc_ifunc_impl_list): Refactor ifunc handling for strpbrk. + * sysdeps/s390/multiarch/strpbrk-c.c: Move to ... + * sysdeps/s390/strpbrk-c.c: ... here and adjust ifunc handling. + * sysdeps/s390/multiarch/strpbrk-vx.S: Move to ... + * sysdeps/s390/strpbrk-vx.S: ... here and adjust ifunc handling. + * sysdeps/s390/multiarch/strpbrk.c: Move to ... + * sysdeps/s390/strpbrk.c: ... here and adjust ifunc handling. + * sysdeps/s390/ifunc-strpbrk.h: New file. + 2018-12-18 Stefan Liebler * sysdeps/s390/multiarch/Makefile diff --git a/sysdeps/s390/Makefile b/sysdeps/s390/Makefile index 604ca68ef1..df1f858e28 100644 --- a/sysdeps/s390/Makefile +++ b/sysdeps/s390/Makefile @@ -73,5 +73,6 @@ sysdep_routines += bzero memset memset-z900 \ strchr strchr-vx strchr-c \ strchrnul strchrnul-vx strchrnul-c \ strrchr strrchr-vx strrchr-c \ - strspn strspn-vx strspn-c + strspn strspn-vx strspn-c \ + strpbrk strpbrk-vx strpbrk-c endif diff --git a/sysdeps/s390/ifunc-strpbrk.h b/sysdeps/s390/ifunc-strpbrk.h new file mode 100644 index 0000000000..4a3138c6bf --- /dev/null +++ b/sysdeps/s390/ifunc-strpbrk.h @@ -0,0 +1,52 @@ +/* strpbrk variant information on S/390 version. + Copyright (C) 2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if defined USE_MULTIARCH && IS_IN (libc) \ + && ! defined HAVE_S390_MIN_Z13_ZARCH_ASM_SUPPORT +# define HAVE_STRPBRK_IFUNC 1 +#else +# define HAVE_STRPBRK_IFUNC 0 +#endif + +#ifdef HAVE_S390_VX_ASM_SUPPORT +# define HAVE_STRPBRK_IFUNC_AND_VX_SUPPORT HAVE_STRPBRK_IFUNC +#else +# define HAVE_STRPBRK_IFUNC_AND_VX_SUPPORT 0 +#endif + +#if defined HAVE_S390_MIN_Z13_ZARCH_ASM_SUPPORT +# define STRPBRK_DEFAULT STRPBRK_Z13 +# define HAVE_STRPBRK_C 0 +# define HAVE_STRPBRK_Z13 1 +#else +# define STRPBRK_DEFAULT STRPBRK_C +# define HAVE_STRPBRK_C 1 +# define HAVE_STRPBRK_Z13 HAVE_STRPBRK_IFUNC_AND_VX_SUPPORT +#endif + +#if HAVE_STRPBRK_C +# define STRPBRK_C __strpbrk_c +#else +# define STRPBRK_C NULL +#endif + +#if HAVE_STRPBRK_Z13 +# define STRPBRK_Z13 __strpbrk_vx +#else +# define STRPBRK_Z13 NULL +#endif diff --git a/sysdeps/s390/multiarch/Makefile b/sysdeps/s390/multiarch/Makefile index 9b141e338c..1a3fed9fc8 100644 --- a/sysdeps/s390/multiarch/Makefile +++ b/sysdeps/s390/multiarch/Makefile @@ -1,6 +1,5 @@ ifeq ($(subdir),string) -sysdep_routines += strpbrk strpbrk-vx strpbrk-c \ - strcspn strcspn-vx strcspn-c \ +sysdep_routines += strcspn strcspn-vx strcspn-c \ memchr memchr-vx \ rawmemchr rawmemchr-vx rawmemchr-c \ memccpy memccpy-vx memccpy-c \ diff --git a/sysdeps/s390/multiarch/ifunc-impl-list.c b/sysdeps/s390/multiarch/ifunc-impl-list.c index c39e1f793a..8e23416730 100644 --- a/sysdeps/s390/multiarch/ifunc-impl-list.c +++ b/sysdeps/s390/multiarch/ifunc-impl-list.c @@ -40,6 +40,7 @@ #include #include #include +#include /* Maximum number of IFUNC implementations. */ #define MAX_IFUNC 3 @@ -346,6 +347,18 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, ) #endif /* HAVE_STRSPN_IFUNC */ +#if HAVE_STRPBRK_IFUNC + IFUNC_IMPL (i, name, strpbrk, +# if HAVE_STRPBRK_Z13 + IFUNC_IMPL_ADD (array, i, strpbrk, + dl_hwcap & HWCAP_S390_VX, STRPBRK_Z13) +# endif +# if HAVE_STRPBRK_C + IFUNC_IMPL_ADD (array, i, strpbrk, 1, STRPBRK_C) +# endif + ) +#endif /* HAVE_STRPBRK_IFUNC */ + #ifdef HAVE_S390_VX_ASM_SUPPORT # define IFUNC_VX_IMPL(FUNC) \ @@ -382,7 +395,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, IFUNC_VX_IMPL (wcsspn); - IFUNC_VX_IMPL (strpbrk); IFUNC_VX_IMPL (wcspbrk); IFUNC_VX_IMPL (strcspn); diff --git a/sysdeps/s390/multiarch/strpbrk-c.c b/sysdeps/s390/multiarch/strpbrk-c.c deleted file mode 100644 index 2c0517aeb5..0000000000 --- a/sysdeps/s390/multiarch/strpbrk-c.c +++ /dev/null @@ -1,28 +0,0 @@ -/* Default strpbrk implementation for S/390. - Copyright (C) 2015-2018 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) -# define STRPBRK __strpbrk_c -# ifdef SHARED -# undef libc_hidden_builtin_def -# define libc_hidden_builtin_def(name) \ - __hidden_ver1 (__strpbrk_c, __GI_strpbrk, __strpbrk_c); -# endif /* SHARED */ - -# include -#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/strpbrk-vx.S b/sysdeps/s390/multiarch/strpbrk-vx.S deleted file mode 100644 index e19c550ed4..0000000000 --- a/sysdeps/s390/multiarch/strpbrk-vx.S +++ /dev/null @@ -1,302 +0,0 @@ -/* Vector optimized 32/64 bit S/390 version of strpbrk. - Copyright (C) 2015-2018 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) - -# include "sysdep.h" -# include "asm-syntax.h" - - .text - -/* char *strpbrk (const char *s, const char * accept) - The strpbrk() function locates the first occurrence in the string s - of any of the characters in the string accept and returns a pointer - to that character or NULL if not found. - - This method checks the length of accept string. If it fits entirely - in one vector register, a fast algorithm is used, which does not need - to check multiple parts of accept-string. Otherwise a slower full - check of accept-string is used. - - register overview: - r3: pointer to start of accept-string - r2: pointer to start of search-string - r0: loaded byte count of vlbb search-string (32bit unsigned) - r4: found byte index (32bit unsigned) - r1: current return len (64bit unsigned) - v16: search-string - v17: accept-string - v18: temp-vreg - - ONLY FOR SLOW: - v19: first accept-string - v20: zero for preparing acc-vector - v21: global mask; 1 indicates a match between - search-string-vreg and any accept-character - v22: current mask; 1 indicates a match between - search-string-vreg and any accept-character in current acc-vreg - v24: one for result-checking of former string-part - v30, v31: for re-/storing registers r6, r8, r9 - r5: current len of accept-string - r6: zero-index in search-string or 16 if no zero - or min(zero-index, loaded byte count) - r8: >0, if former accept-string-part contains a zero, - otherwise =0; - r9: loaded byte count of vlbb accept-string -*/ -ENTRY(__strpbrk_vx) - .machine "z13" - .machinemode "zarch_nohighgprs" - - /* - Check if accept-string fits in one vreg: - ---------------------------------------- - */ - vlbb %v17,0(%r3),6 /* Load accept. */ - lghi %r1,0 /* Zero out current len. */ - vlgvb %r0,%v17,0 /* Get first element. */ - clije %r0,0,.Lfast_end_null /* Return null if accept is empty. */ - lcbb %r0,0(%r3),6 - jo .Lcheck_onbb /* Special case if accept lays - on block-boundary. */ -.Lcheck_notonbb: - vistrbs %v17,%v17 /* Fill with zeros after first zero. */ - je .Lfast /* Zero found -> accept fits in one vreg. */ - j .Lslow /* No zero -> accept exceeds one vreg */ - - -.Lcheck_onbb: - /* Accept lays on block-boundary. */ - vfenezb %v18,%v17,%v17 /* Search zero in loaded accept bytes. */ - vlgvb %r4,%v18,7 /* Get index of zero or 16 if not found. */ - clrjl %r4,%r0,.Lcheck_notonbb /* Zero index < loaded bytes count -> - Accept fits in one vreg; - Fill with zeros and proceed - with FAST. */ - vl %v17,0(%r3) /* Load accept, which exceeds loaded bytes. */ - j .Lcheck_notonbb /* Check if accept fits in one vreg. */ - - - /* - Search s for accept in one vreg - ------------------------------- - */ -.Lfast: - /* Complete accept-string in v17 and remaining bytes are zero. */ - - vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ - lcbb %r0,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ - - vfaezbs %v18,%v16,%v17,0 /* Find first element in v16 unequal to any - in v17 or first zero element. */ - - vlgvb %r4,%v18,7 /* Load byte index of found element. */ - /* If found index is within loaded bytes, return with found - element index (=equal count). */ - clrjl %r4,%r0,.Lfast_loop_found2 - - /* Align s to 16 byte. */ - risbgn %r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ - lghi %r1,16 /* current_len = 16. */ - slr %r1,%r4 /* Compute bytes to 16bytes boundary. */ - - /* Process s in 16byte aligned loop. */ -.Lfast_loop: - vl %v16,0(%r1,%r2) /* Load search-string. */ - vfaezbs %v18,%v16,%v17,0 /* Find first element in v16 equal to any - in v17 or first zero element. */ - jno .Lfast_loop_found - - vl %v16,16(%r1,%r2) - vfaezbs %v18,%v16,%v17,0 - jno .Lfast_loop_found16 - - vl %v16,32(%r1,%r2) - vfaezbs %v18,%v16,%v17,0 - jno .Lfast_loop_found32 - - vl %v16,48(%r1,%r2) - vfaezbs %v18,%v16,%v17,0 - jno .Lfast_loop_found48 - - aghi %r1,64 - j .Lfast_loop /* Loop if no element was unequal to accept - and not zero. */ - - /* Found equal or zero element. */ -.Lfast_loop_found48: - aghi %r1,16 -.Lfast_loop_found32: - aghi %r1,16 -.Lfast_loop_found16: - aghi %r1,16 -.Lfast_loop_found: - vlgvb %r4,%v18,7 /* Load byte index of found element. */ -.Lfast_loop_found2: - vlgvb %r0,%v16,0(%r4) /* Get found element. */ - clije %r0,0,.Lfast_end_null /* Return null if no accept-char found */ - algfr %r1,%r4 /* Add found index of char to current len. */ - la %r2,0(%r1,%r2) /* And return pointer to first equal char. */ - br %r14 - -.Lfast_end_null: - lghi %r2,0 /* Return null if no character is equal. */ - br %r14 - - - - - /* - Search s for accept in multiple vregs - ------------------------------------- - */ -.Lslow: - /* Save registers. */ - vlvgg %v30,%r6,0 - vlvgp %v31,%r8,%r9 - - /* accept in v17 without zero. */ - vlr %v19,%v17 /* Save first acc-part for a fast reload. */ - vzero %v20 /* Zero for preparing acc-vector. */ - vone %v24 /* One for checking result of former string. */ - - /* Align s to 16 byte. */ - risbg %r4,%r2,60,128+63,0 /* Test if s is aligned and - %r4 = bits 60-63 'and' 15. */ - je .Lslow_loop_str /* If s is aligned, loop aligned. */ - lghi %r0,15 - slr %r0,%r4 /* Compute highest index to load (15-x). */ - vll %v16,%r0,0(%r2) /* Load up to 16 byte boundary (vll needs - highest index, remaining bytes are 0). */ - ahi %r0,1 /* Work with loaded byte count. */ - vzero %v21 /* Zero out global mask. */ - lghi %r5,0 /* Set current len of accept-string to zero. */ - vfenezb %v18,%v16,%v16 /* Find zero in current string-part. */ - lghi %r8,0 /* There is no zero in first accept-part. */ - vlgvb %r6,%v18,7 /* Load byte index of zero or 16 if no zero. */ - clije %r6,0,.Lslow_end_null /* If first element is zero - (end of string) -> return null */ - clr %r0,%r6 /* cc==1 if loaded byte count < zero-index. */ - locrl %r6,%r0 /* Load on cc==1; zero-index = lbc. */ - j .Lslow_loop_acc - - - /* Process s in 16byte aligned loop. */ -.Lslow_next_str: - /* Check results of former processed str-part. */ - vfeeb %v18,%v21,%v24 /* Find first equal match in global mask - (ones in element). */ - vlgvb %r4,%v18,7 /* Get index of first one (=equal) - or 16 if no match. */ - /* Equal-index < min(zero-index, loaded byte count) - -> return pointer to equal element. */ - clrjl %r4,%r6,.Lslow_index_found - /* Zero-index < loaded byte count - -> former str-part was last str-part - -> return null */ - clrjl %r6,%r0,.Lslow_end_null - /* All elements are zero (=no match) -> proceed with next str-part. */ - - vlr %v17,%v19 /* Load first part of accept (no zero). */ - algfr %r1,%r0 /* Add loaded byte count to current len. */ - -.Lslow_loop_str: - vl %v16,0(%r1,%r2) /* Load search-string */ - lghi %r0,16 /* Loaded byte count is 16. */ - vzero %v21 /* Zero out global mask. */ - lghi %r5,0 /* Set current len of accept to zero. */ - vfenezb %v18,%v16,%v16 /* Find zero in current string-part. */ - lghi %r8,0 /* There is no zero in first accept-part. */ - vlgvb %r6,%v18,7 /* Load byte index of zero or 16 if no zero. */ - clije %r6,0,.Lslow_end_null /* If first element is zero - (end of string) -> return null. */ - -.Lslow_loop_acc: - vfaeb %v22,%v16,%v17,4 /* Create matching-mask (1 in mask -> - Character matches any accepted character in - this accept-string-part) IN=0, RT=1. */ - vlgvb %r4,%v22,0 /* Get result of first element. */ - /* First element is equal to any accepted characters - (all other parts of accept cannot lead to a match before this one) - -> current len is pointing to first element - -> return found */ - clijh %r4,0,.Lslow_end_found - vo %v21,%v21,%v22 /* Global-mask = global-|matching-mask. */ - /* Proceed with next acc until end of acc is reached. */ - - -.Lslow_next_acc: - clijh %r8,0,.Lslow_next_str /* There was a zero in the last acc-part - -> add index to current_len and - end. */ - vlbb %v17,16(%r5,%r3),6 /* Load next accept part. */ - aghi %r5,16 /* Increment current len of accept-string. */ - lcbb %r9,0(%r5,%r3),6 /* Get loaded byte count of accept-string. */ - jo .Lslow_next_acc_onbb /* Jump away ifaccept-string is - on block-boundary. */ -.Lslow_next_acc_notonbb: - vistrbs %v17,%v17 /* Fill with zeros after first zero. */ - jo .Lslow_loop_acc /* No zero found -> no preparation needed. */ - -.Lslow_next_acc_prepare_zero: - /* Zero in accept-part: fill zeros with first-accept-character. */ - vlgvb %r8,%v17,0 /* Load first element of acc-part. */ - clije %r8,0,.Lslow_next_str /* Proceed with next string-part, - if first char in this part of accept - is a zero. */ - /* r8>0 -> zero found in this acc-part. */ - vrepb %v18,%v17,0 /* Replicate first char accross all chars. */ - vceqb %v22,%v20,%v17 /* Create a mask (v22) of null chars - by comparing with 0 (v20). */ - vsel %v17,%v18,%v17,%v22 /* Replace null chars with first char. */ - j .Lslow_loop_acc /* Accept part is prepared -> process. */ - -.Lslow_next_acc_onbb: - vfenezb %v18,%v17,%v17 /* Find zero in loaded bytes of accept part. */ - vlgvb %r8,%v18,7 /* Load byte index of zero. */ - clrjl %r8,%r9,.Lslow_next_acc_notonbb /* Found a zero in loaded bytes - -> Prepare vreg. */ - vl %v17,0(%r5,%r3) /* Load over boundary ... */ - lghi %r8,0 /* r8=0 -> no zero in this part of acc, - check for zero is in jump-target. */ - j .Lslow_next_acc_notonbb /* ... and search for zero in - fully loaded vreg again. */ - -.Lslow_end_null: - lghi %r1,0 /* Return null if no character is equal. */ - j .Lslow_end - -.Lslow_loop_found: - vlgvb %r4,%v18,7 /* Load byte index of found element. */ - vlgvb %r0,%v16,0(%r4) /* Get found element. */ - clije %r0,0,.Lslow_end_null /* Return null if no acc-char found. */ - -.Lslow_index_found: - algfr %r1,%r4 /* Add found index of char to current len. */ -.Lslow_end_found: - la %r1,0(%r1,%r2) /* And return pointer to first equal char. */ - -.Lslow_end: - /* Restore registers. */ - vlgvg %r6,%v30,0 - vlgvg %r8,%v31,0 - vlgvg %r9,%v31,1 - lgr %r2,%r1 - br %r14 -END(__strpbrk_vx) -#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/strpbrk.c b/sysdeps/s390/multiarch/strpbrk.c deleted file mode 100644 index 11afc268f7..0000000000 --- a/sysdeps/s390/multiarch/strpbrk.c +++ /dev/null @@ -1,31 +0,0 @@ -/* Multiple versions of strpbrk. - Copyright (C) 2015-2018 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) -# define strpbrk __redirect_strpbrk -/* Omit the strpbrk inline definitions because it would redefine strpbrk. */ -# define __NO_STRING_INLINES -# include -# undef strpbrk -# include - -s390_vx_libc_ifunc2_redirected (__redirect_strpbrk, __strpbrk, strpbrk) - -#else -# include -#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/sysdeps/s390/strpbrk-c.c b/sysdeps/s390/strpbrk-c.c new file mode 100644 index 0000000000..70cc5db672 --- /dev/null +++ b/sysdeps/s390/strpbrk-c.c @@ -0,0 +1,32 @@ +/* Default strpbrk implementation for S/390. + Copyright (C) 2015-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +#if HAVE_STRPBRK_C +# if HAVE_STRPBRK_IFUNC +# define STRPBRK STRPBRK_C +# if defined SHARED && IS_IN (libc) +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__strpbrk_c, __GI_strpbrk, __strpbrk_c); +# endif +# endif + +# include +#endif diff --git a/sysdeps/s390/strpbrk-vx.S b/sysdeps/s390/strpbrk-vx.S new file mode 100644 index 0000000000..0fc7dc1433 --- /dev/null +++ b/sysdeps/s390/strpbrk-vx.S @@ -0,0 +1,313 @@ +/* Vector optimized 32/64 bit S/390 version of strpbrk. + Copyright (C) 2015-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +#if HAVE_STRPBRK_Z13 + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* char *strpbrk (const char *s, const char * accept) + The strpbrk() function locates the first occurrence in the string s + of any of the characters in the string accept and returns a pointer + to that character or NULL if not found. + + This method checks the length of accept string. If it fits entirely + in one vector register, a fast algorithm is used, which does not need + to check multiple parts of accept-string. Otherwise a slower full + check of accept-string is used. + + register overview: + r3: pointer to start of accept-string + r2: pointer to start of search-string + r0: loaded byte count of vlbb search-string (32bit unsigned) + r4: found byte index (32bit unsigned) + r1: current return len (64bit unsigned) + v16: search-string + v17: accept-string + v18: temp-vreg + + ONLY FOR SLOW: + v19: first accept-string + v20: zero for preparing acc-vector + v21: global mask; 1 indicates a match between + search-string-vreg and any accept-character + v22: current mask; 1 indicates a match between + search-string-vreg and any accept-character in current acc-vreg + v24: one for result-checking of former string-part + v30, v31: for re-/storing registers r6, r8, r9 + r5: current len of accept-string + r6: zero-index in search-string or 16 if no zero + or min(zero-index, loaded byte count) + r8: >0, if former accept-string-part contains a zero, + otherwise =0; + r9: loaded byte count of vlbb accept-string +*/ +ENTRY(STRPBRK_Z13) + .machine "z13" + .machinemode "zarch_nohighgprs" + + /* + Check if accept-string fits in one vreg: + ---------------------------------------- + */ + vlbb %v17,0(%r3),6 /* Load accept. */ + lghi %r1,0 /* Zero out current len. */ + vlgvb %r0,%v17,0 /* Get first element. */ + clije %r0,0,.Lfast_end_null /* Return null if accept is empty. */ + lcbb %r0,0(%r3),6 + jo .Lcheck_onbb /* Special case if accept lays + on block-boundary. */ +.Lcheck_notonbb: + vistrbs %v17,%v17 /* Fill with zeros after first zero. */ + je .Lfast /* Zero found -> accept fits in one vreg. */ + j .Lslow /* No zero -> accept exceeds one vreg */ + + +.Lcheck_onbb: + /* Accept lays on block-boundary. */ + vfenezb %v18,%v17,%v17 /* Search zero in loaded accept bytes. */ + vlgvb %r4,%v18,7 /* Get index of zero or 16 if not found. */ + clrjl %r4,%r0,.Lcheck_notonbb /* Zero index < loaded bytes count -> + Accept fits in one vreg; + Fill with zeros and proceed + with FAST. */ + vl %v17,0(%r3) /* Load accept, which exceeds loaded bytes. */ + j .Lcheck_notonbb /* Check if accept fits in one vreg. */ + + + /* + Search s for accept in one vreg + ------------------------------- + */ +.Lfast: + /* Complete accept-string in v17 and remaining bytes are zero. */ + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r0,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + + vfaezbs %v18,%v16,%v17,0 /* Find first element in v16 unequal to any + in v17 or first zero element. */ + + vlgvb %r4,%v18,7 /* Load byte index of found element. */ + /* If found index is within loaded bytes, return with found + element index (=equal count). */ + clrjl %r4,%r0,.Lfast_loop_found2 + + /* Align s to 16 byte. */ + risbgn %r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r1,16 /* current_len = 16. */ + slr %r1,%r4 /* Compute bytes to 16bytes boundary. */ + + /* Process s in 16byte aligned loop. */ +.Lfast_loop: + vl %v16,0(%r1,%r2) /* Load search-string. */ + vfaezbs %v18,%v16,%v17,0 /* Find first element in v16 equal to any + in v17 or first zero element. */ + jno .Lfast_loop_found + + vl %v16,16(%r1,%r2) + vfaezbs %v18,%v16,%v17,0 + jno .Lfast_loop_found16 + + vl %v16,32(%r1,%r2) + vfaezbs %v18,%v16,%v17,0 + jno .Lfast_loop_found32 + + vl %v16,48(%r1,%r2) + vfaezbs %v18,%v16,%v17,0 + jno .Lfast_loop_found48 + + aghi %r1,64 + j .Lfast_loop /* Loop if no element was unequal to accept + and not zero. */ + + /* Found equal or zero element. */ +.Lfast_loop_found48: + aghi %r1,16 +.Lfast_loop_found32: + aghi %r1,16 +.Lfast_loop_found16: + aghi %r1,16 +.Lfast_loop_found: + vlgvb %r4,%v18,7 /* Load byte index of found element. */ +.Lfast_loop_found2: + vlgvb %r0,%v16,0(%r4) /* Get found element. */ + clije %r0,0,.Lfast_end_null /* Return null if no accept-char found */ + algfr %r1,%r4 /* Add found index of char to current len. */ + la %r2,0(%r1,%r2) /* And return pointer to first equal char. */ + br %r14 + +.Lfast_end_null: + lghi %r2,0 /* Return null if no character is equal. */ + br %r14 + + + + + /* + Search s for accept in multiple vregs + ------------------------------------- + */ +.Lslow: + /* Save registers. */ + vlvgg %v30,%r6,0 + vlvgp %v31,%r8,%r9 + + /* accept in v17 without zero. */ + vlr %v19,%v17 /* Save first acc-part for a fast reload. */ + vzero %v20 /* Zero for preparing acc-vector. */ + vone %v24 /* One for checking result of former string. */ + + /* Align s to 16 byte. */ + risbg %r4,%r2,60,128+63,0 /* Test if s is aligned and + %r4 = bits 60-63 'and' 15. */ + je .Lslow_loop_str /* If s is aligned, loop aligned. */ + lghi %r0,15 + slr %r0,%r4 /* Compute highest index to load (15-x). */ + vll %v16,%r0,0(%r2) /* Load up to 16 byte boundary (vll needs + highest index, remaining bytes are 0). */ + ahi %r0,1 /* Work with loaded byte count. */ + vzero %v21 /* Zero out global mask. */ + lghi %r5,0 /* Set current len of accept-string to zero. */ + vfenezb %v18,%v16,%v16 /* Find zero in current string-part. */ + lghi %r8,0 /* There is no zero in first accept-part. */ + vlgvb %r6,%v18,7 /* Load byte index of zero or 16 if no zero. */ + clije %r6,0,.Lslow_end_null /* If first element is zero + (end of string) -> return null */ + clr %r0,%r6 /* cc==1 if loaded byte count < zero-index. */ + locrl %r6,%r0 /* Load on cc==1; zero-index = lbc. */ + j .Lslow_loop_acc + + + /* Process s in 16byte aligned loop. */ +.Lslow_next_str: + /* Check results of former processed str-part. */ + vfeeb %v18,%v21,%v24 /* Find first equal match in global mask + (ones in element). */ + vlgvb %r4,%v18,7 /* Get index of first one (=equal) + or 16 if no match. */ + /* Equal-index < min(zero-index, loaded byte count) + -> return pointer to equal element. */ + clrjl %r4,%r6,.Lslow_index_found + /* Zero-index < loaded byte count + -> former str-part was last str-part + -> return null */ + clrjl %r6,%r0,.Lslow_end_null + /* All elements are zero (=no match) -> proceed with next str-part. */ + + vlr %v17,%v19 /* Load first part of accept (no zero). */ + algfr %r1,%r0 /* Add loaded byte count to current len. */ + +.Lslow_loop_str: + vl %v16,0(%r1,%r2) /* Load search-string */ + lghi %r0,16 /* Loaded byte count is 16. */ + vzero %v21 /* Zero out global mask. */ + lghi %r5,0 /* Set current len of accept to zero. */ + vfenezb %v18,%v16,%v16 /* Find zero in current string-part. */ + lghi %r8,0 /* There is no zero in first accept-part. */ + vlgvb %r6,%v18,7 /* Load byte index of zero or 16 if no zero. */ + clije %r6,0,.Lslow_end_null /* If first element is zero + (end of string) -> return null. */ + +.Lslow_loop_acc: + vfaeb %v22,%v16,%v17,4 /* Create matching-mask (1 in mask -> + Character matches any accepted character in + this accept-string-part) IN=0, RT=1. */ + vlgvb %r4,%v22,0 /* Get result of first element. */ + /* First element is equal to any accepted characters + (all other parts of accept cannot lead to a match before this one) + -> current len is pointing to first element + -> return found */ + clijh %r4,0,.Lslow_end_found + vo %v21,%v21,%v22 /* Global-mask = global-|matching-mask. */ + /* Proceed with next acc until end of acc is reached. */ + + +.Lslow_next_acc: + clijh %r8,0,.Lslow_next_str /* There was a zero in the last acc-part + -> add index to current_len and + end. */ + vlbb %v17,16(%r5,%r3),6 /* Load next accept part. */ + aghi %r5,16 /* Increment current len of accept-string. */ + lcbb %r9,0(%r5,%r3),6 /* Get loaded byte count of accept-string. */ + jo .Lslow_next_acc_onbb /* Jump away ifaccept-string is + on block-boundary. */ +.Lslow_next_acc_notonbb: + vistrbs %v17,%v17 /* Fill with zeros after first zero. */ + jo .Lslow_loop_acc /* No zero found -> no preparation needed. */ + +.Lslow_next_acc_prepare_zero: + /* Zero in accept-part: fill zeros with first-accept-character. */ + vlgvb %r8,%v17,0 /* Load first element of acc-part. */ + clije %r8,0,.Lslow_next_str /* Proceed with next string-part, + if first char in this part of accept + is a zero. */ + /* r8>0 -> zero found in this acc-part. */ + vrepb %v18,%v17,0 /* Replicate first char accross all chars. */ + vceqb %v22,%v20,%v17 /* Create a mask (v22) of null chars + by comparing with 0 (v20). */ + vsel %v17,%v18,%v17,%v22 /* Replace null chars with first char. */ + j .Lslow_loop_acc /* Accept part is prepared -> process. */ + +.Lslow_next_acc_onbb: + vfenezb %v18,%v17,%v17 /* Find zero in loaded bytes of accept part. */ + vlgvb %r8,%v18,7 /* Load byte index of zero. */ + clrjl %r8,%r9,.Lslow_next_acc_notonbb /* Found a zero in loaded bytes + -> Prepare vreg. */ + vl %v17,0(%r5,%r3) /* Load over boundary ... */ + lghi %r8,0 /* r8=0 -> no zero in this part of acc, + check for zero is in jump-target. */ + j .Lslow_next_acc_notonbb /* ... and search for zero in + fully loaded vreg again. */ + +.Lslow_end_null: + lghi %r1,0 /* Return null if no character is equal. */ + j .Lslow_end + +.Lslow_loop_found: + vlgvb %r4,%v18,7 /* Load byte index of found element. */ + vlgvb %r0,%v16,0(%r4) /* Get found element. */ + clije %r0,0,.Lslow_end_null /* Return null if no acc-char found. */ + +.Lslow_index_found: + algfr %r1,%r4 /* Add found index of char to current len. */ +.Lslow_end_found: + la %r1,0(%r1,%r2) /* And return pointer to first equal char. */ + +.Lslow_end: + /* Restore registers. */ + vlgvg %r6,%v30,0 + vlgvg %r8,%v31,0 + vlgvg %r9,%v31,1 + lgr %r2,%r1 + br %r14 +END(STRPBRK_Z13) + +# if ! HAVE_STRPBRK_IFUNC +strong_alias (STRPBRK_Z13, strpbrk) +# endif + +# if ! HAVE_STRPBRK_C && defined SHARED && IS_IN (libc) +strong_alias (STRPBRK_Z13, __GI_strpbrk) +# endif + +#endif /* HAVE_STRPBRK_Z13 */ diff --git a/sysdeps/s390/strpbrk.c b/sysdeps/s390/strpbrk.c new file mode 100644 index 0000000000..41ce00a1ae --- /dev/null +++ b/sysdeps/s390/strpbrk.c @@ -0,0 +1,42 @@ +/* Multiple versions of strpbrk. + Copyright (C) 2015-2018 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +#if HAVE_STRPBRK_IFUNC +# define strpbrk __redirect_strpbrk +/* Omit the strpbrk inline definitions because it would redefine strpbrk. */ +# define __NO_STRING_INLINES +# include +# undef strpbrk +# include + +# if HAVE_STRPBRK_C +extern __typeof (__redirect_strpbrk) STRPBRK_C attribute_hidden; +# endif + +# if HAVE_STRPBRK_Z13 +extern __typeof (__redirect_strpbrk) STRPBRK_Z13 attribute_hidden; +# endif + +s390_libc_ifunc_expr (__redirect_strpbrk, strpbrk, + (HAVE_STRPBRK_Z13 && (hwcap & HWCAP_S390_VX)) + ? STRPBRK_Z13 + : STRPBRK_DEFAULT + ) +#endif /* HAVE_STRPBRK_IFUNC */ -- cgit 1.4.1