diff options
author | Stefan Liebler <stli@linux.ibm.com> | 2018-12-18 13:57:23 +0100 |
---|---|---|
committer | Stefan Liebler <stli@linux.ibm.com> | 2018-12-18 13:57:23 +0100 |
commit | 8e87c1f6d45a63dc2175825c2dc5d66192a13aab (patch) | |
tree | db2faa21872a7ff359be90777b0ccb388fbcb2f0 /sysdeps/s390/multiarch | |
parent | 8507e831907ff46d06382fe453c6832db2594e0b (diff) | |
download | glibc-8e87c1f6d45a63dc2175825c2dc5d66192a13aab.tar.gz glibc-8e87c1f6d45a63dc2175825c2dc5d66192a13aab.tar.xz glibc-8e87c1f6d45a63dc2175825c2dc5d66192a13aab.zip |
S390: Refactor wcspbrk ifunc handling.
The ifunc handling for wcspbrk is adjusted in order to omit ifunc if the minimum architecture level already supports newer CPUs by default. Unfortunately the c ifunc variant can't be omitted at all as it is used by the z13 ifunc variant as fallback if the pointers are not 4-byte aligned. Glibc internal calls will use the "newer" ifunc variant. ChangeLog: * sysdeps/s390/multiarch/Makefile (sysdep_routines): Remove wcspbrk variants. * sysdeps/s390/Makefile (sysdep_routines): Add wcspbrk variants. * sysdeps/s390/multiarch/ifunc-impl-list.c (__libc_ifunc_impl_list): Refactor ifunc handling for wcspbrk. * sysdeps/s390/multiarch/wcspbrk-c.c: Move to ... * sysdeps/s390/wcspbrk-c.c: ... here and adjust ifunc handling. * sysdeps/s390/multiarch/wcspbrk-vx.S: Move to ... * sysdeps/s390/wcspbrk-vx.S: ... here and adjust ifunc handling. * sysdeps/s390/multiarch/wcspbrk.c: Move to ... * sysdeps/s390/wcspbrk.c: ... here and adjust ifunc handling. * sysdeps/s390/ifunc-wcspbrk.h: New file.
Diffstat (limited to 'sysdeps/s390/multiarch')
-rw-r--r-- | sysdeps/s390/multiarch/Makefile | 3 | ||||
-rw-r--r-- | sysdeps/s390/multiarch/ifunc-impl-list.c | 15 | ||||
-rw-r--r-- | sysdeps/s390/multiarch/wcspbrk-c.c | 31 | ||||
-rw-r--r-- | sysdeps/s390/multiarch/wcspbrk-vx.S | 315 | ||||
-rw-r--r-- | sysdeps/s390/multiarch/wcspbrk.c | 29 |
5 files changed, 14 insertions, 379 deletions
diff --git a/sysdeps/s390/multiarch/Makefile b/sysdeps/s390/multiarch/Makefile index 091f515084..e1e2d9dc74 100644 --- a/sysdeps/s390/multiarch/Makefile +++ b/sysdeps/s390/multiarch/Makefile @@ -1,6 +1,5 @@ ifeq ($(subdir),wcsmbs) -sysdep_routines += wcspbrk wcspbrk-vx wcspbrk-c \ - wcscspn wcscspn-vx wcscspn-c \ +sysdep_routines += wcscspn wcscspn-vx wcscspn-c \ wmemchr wmemchr-vx wmemchr-c \ wmemset wmemset-vx wmemset-c \ wmemcmp wmemcmp-vx wmemcmp-c diff --git a/sysdeps/s390/multiarch/ifunc-impl-list.c b/sysdeps/s390/multiarch/ifunc-impl-list.c index 6f4de2845b..89d6e8ad7e 100644 --- a/sysdeps/s390/multiarch/ifunc-impl-list.c +++ b/sysdeps/s390/multiarch/ifunc-impl-list.c @@ -60,6 +60,7 @@ #include <ifunc-wcschrnul.h> #include <ifunc-wcsrchr.h> #include <ifunc-wcsspn.h> +#include <ifunc-wcspbrk.h> /* Maximum number of IFUNC implementations. */ #define MAX_IFUNC 3 @@ -606,6 +607,18 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, ) #endif /* HAVE_WCSSPN_IFUNC */ +#if HAVE_WCSPBRK_IFUNC + IFUNC_IMPL (i, name, wcspbrk, +# if HAVE_WCSPBRK_Z13 + IFUNC_IMPL_ADD (array, i, wcspbrk, + dl_hwcap & HWCAP_S390_VX, WCSPBRK_Z13) +# endif +# if HAVE_WCSPBRK_C + IFUNC_IMPL_ADD (array, i, wcspbrk, 1, WCSPBRK_C) +# endif + ) +#endif /* HAVE_WCSPBRK_IFUNC */ + #ifdef HAVE_S390_VX_ASM_SUPPORT # define IFUNC_VX_IMPL(FUNC) \ @@ -614,8 +627,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, __##FUNC##_vx) \ IFUNC_IMPL_ADD (array, i, FUNC, 1, __##FUNC##_c)) - IFUNC_VX_IMPL (wcspbrk); - IFUNC_VX_IMPL (wcscspn); IFUNC_VX_IMPL (wmemchr); diff --git a/sysdeps/s390/multiarch/wcspbrk-c.c b/sysdeps/s390/multiarch/wcspbrk-c.c deleted file mode 100644 index 6b6e7aade4..0000000000 --- a/sysdeps/s390/multiarch/wcspbrk-c.c +++ /dev/null @@ -1,31 +0,0 @@ -/* Default wcspbrk implementation for S/390. - Copyright (C) 2015-2018 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) -# define WCSPBRK __wcspbrk_c - -# include <wchar.h> -extern __typeof (wcspbrk) __wcspbrk_c; -# ifdef SHARED -# undef libc_hidden_def -# define libc_hidden_def(name) \ - __hidden_ver1 (__wcspbrk_c, __GI_wcspbrk, __wcspbrk_c); -# endif /* SHARED */ - -# include <wcsmbs/wcspbrk.c> -#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/wcspbrk-vx.S b/sysdeps/s390/multiarch/wcspbrk-vx.S deleted file mode 100644 index 5c89ec5d33..0000000000 --- a/sysdeps/s390/multiarch/wcspbrk-vx.S +++ /dev/null @@ -1,315 +0,0 @@ -/* Vector optimized 32/64 bit S/390 version of wcspbrk. - Copyright (C) 2015-2018 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) - -# include "sysdep.h" -# include "asm-syntax.h" - - .text - -/* wchar_t *wcspbrk (const wchar_t *s, const wchar_t * accept) - The wcspbrk() function locates the first occurrence in the string s - of any of the characters in the string accept and returns a pointer - to that character or NULL if not found. - - This method checks the length of accept string. If it fits entirely - in one vector register, a fast algorithm is used, which does not need - to check multiple parts of accept-string. Otherwise a slower full - check of accept-string is used. - - register overview: - r3: pointer to start of accept-string - r2: pointer to start of search-string - r0: loaded byte count of vlbb search-string (32bit unsigned) - r4: found byte index (32bit unsigned) - r1: current return len (64bit unsigned) - v16: search-string - v17: accept-string - v18: temp-vreg - - ONLY FOR SLOW: - v19: first accept-string - v20: zero for preparing acc-vector - v21: global mask; 1 indicates a match between - search-string-vreg and any accept-character - v22: current mask; 1 indicates a match between - search-string-vreg and any accept-character in current acc-vreg - v24: one for result-checking of former string-part - v30, v31: for re-/storing registers r6, r8, r9 - r5: current len of accept-string - r6: zero-index in search-string or 16 if no zero - or min(zero-index, loaded byte count) - r8: >0, if former accept-string-part contains a zero, - otherwise =0; - r9: loaded byte count of vlbb accept-string -*/ -ENTRY(__wcspbrk_vx) - .machine "z13" - .machinemode "zarch_nohighgprs" - - tmll %r2,3 /* Test if s is 4-byte aligned? */ - jne .Lfallback /* And use common-code variant if not. */ - - /* - Check if accept-string fits in one vreg: - ---------------------------------------- - */ - vlbb %v17,0(%r3),6 /* Load accept. */ - lcbb %r0,0(%r3),6 - jo .Lcheck_onbb /* Special case if accept lays - on block-boundary. */ - -.Lcheck_notonbb: - lghi %r1,0 /* Zero out current len. */ - vlgvf %r0,%v17,0 /* Get first element. */ - clije %r0,0,.Lfast_end_null /* Return null if accept is empty. */ - - vistrfs %v17,%v17 /* Fill with zeros after first zero. */ - je .Lfast /* Zero found -> accept fits in one vreg. */ - j .Lslow /* No zero -> accept exceeds one vreg */ - - -.Lcheck_onbb: - /* Accept lays on block-boundary. */ - nill %r0,65532 /* Recognize only fully loaded characters. */ - je .Lcheck_onbb2 /* Reload vr, if we loaded no full wchar_t. */ - vfenezf %v18,%v17,%v17 /* Search zero in loaded accept bytes. */ - vlgvb %r4,%v18,7 /* Get index of zero or 16 if not found. */ - clrjl %r4,%r0,.Lcheck_notonbb /* Zero index < loaded bytes count -> - accept fits in one vreg; - Fill with zeros and proceed - with FAST. */ -.Lcheck_onbb2: - vl %v17,0(%r3) /* Load accept, which exceeds loaded bytes. */ - j .Lcheck_notonbb /* Check if accept fits in one vreg. */ - - - /* - Search s for accept in one vreg - ------------------------------- - */ -.Lfast: - /* Complete accept-string in v17 and remaining bytes are zero. */ - - vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ - lcbb %r0,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ - - vfaezfs %v18,%v16,%v17,0 /* Find first element in v16 unequal to any - in v17 or first zero element. */ - vlgvb %r4,%v18,7 /* Load byte index of found element. */ - /* If found index is within loaded bytes, return with found - element index (=equal count). */ - clrjl %r4,%r0,.Lfast_loop_found2 - - /* Align s to 16 byte. */ - risbgn %r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ - lghi %r1,16 /* current_len = 16. */ - slr %r1,%r4 /* Compute bytes to 16bytes boundary. */ - -.Lfast_loop: - vl %v16,0(%r1,%r2) /* Load search-string. */ - vfaezfs %v18,%v16,%v17,0 /* Find first element in v16 equal to any - in v17 or first zero element. */ - jno .Lfast_loop_found - - vl %v16,16(%r1,%r2) - vfaezfs %v18,%v16,%v17,0 - jno .Lfast_loop_found16 - - vl %v16,32(%r1,%r2) - vfaezfs %v18,%v16,%v17,0 - jno .Lfast_loop_found32 - - vl %v16,48(%r1,%r2) - vfaezfs %v18,%v16,%v17,0 - jno .Lfast_loop_found48 - - aghi %r1,64 - j .Lfast_loop /* Loop if no element was unequal to accept - and not zero. */ - - /* Found equal or zero element. */ -.Lfast_loop_found48: - aghi %r1,16 -.Lfast_loop_found32: - aghi %r1,16 -.Lfast_loop_found16: - aghi %r1,16 -.Lfast_loop_found: - vlgvb %r4,%v18,7 /* Load byte index of found element. */ -.Lfast_loop_found2: - srlg %r5,%r4,2 /* Convert byte-index to character-index. */ - vlgvf %r0,%v16,0(%r5) /* Get found element. */ - clije %r0,0,.Lfast_end_null /* Return null if no accept-char found */ - algfr %r1,%r4 /* Add found index of char to current len. */ - la %r2,0(%r1,%r2) /* And return pointer to first equal char. */ - br %r14 - -.Lfast_end_null: - lghi %r2,0 /* Return null if no character is equal. */ - br %r14 - - - - - /* - Search s for accept in multiple vregs - ------------------------------------- - */ -.Lslow: - /* Save registers. */ - vlvgg %v30,%r6,0 - vlvgp %v31,%r8,%r9 - - /* Accept in v17 without zero */ - vlr %v19,%v17 /* Save first acc-part for a fast reload. */ - vzero %v20 /* Zero for preparing acc-vector. */ - vone %v24 /* One for checking result of former string. */ - - /* Align s to 16 byte. */ - risbg %r4,%r2,60,128+63,0 /* Test if s is aligned and - %r4 = bits 60-63 'and' 15. */ - je .Lslow_loop_str /* If s is aligned, loop aligned. */ - lghi %r0,15 - slr %r0,%r4 /* Compute highest index to load (15-x). */ - vll %v16,%r0,0(%r2) /* Load up to 16byte boundary; - needs highest index, left bytes are 0. */ - ahi %r0,1 /* Work with loaded byte count. */ - vzero %v21 /* Zero out global mask. */ - lghi %r5,0 /* Set current len of accept-string to zero. */ - vfenezf %v18,%v16,%v16 /* Find zero in current string-part. */ - lghi %r8,0 /* There is no zero in first accept-part. */ - vlgvb %r6,%v18,7 /* Load byte index of zero or 16 if no zero. */ - clije %r6,0,.Lslow_end_null /* If first element is zero - (end of string) -> return null */ - clr %r0,%r6 /* cc==1 if loaded byte count < zero-index. */ - locrl %r6,%r0 /* Load on cc==1; zero-index = lbc. */ - j .Lslow_loop_acc - - - /* Process s in 16byte aligned loop. */ -.Lslow_next_str: - /* Check results of former processed str-part. */ - vfeef %v18,%v21,%v24 /* Find first equal match in global mask - (ones in element). */ - vlgvb %r4,%v18,7 /* Get index of first one (=equal) - or 16 if no match. */ - /* Equal-index < min(zero-index, loaded byte count) - -> return pointer to equal element. */ - clrjl %r4,%r6,.Lslow_index_found - /* Zero-index < loaded byte count - -> former str-part was last str-part - -> return null */ - clrjl %r6,%r0,.Lslow_end_null - /* All elements are zero (=no match) -> proceed with next str-part. */ - - vlr %v17,%v19 /* Load first part of accept (no zero). */ - algfr %r1,%r0 /* Add loaded byte count to current len. */ - -.Lslow_loop_str: - vl %v16,0(%r1,%r2) /* Load search-string */ - lghi %r0,16 /* Loaded byte count is 16. */ - vzero %v21 /* Zero out global mask. */ - lghi %r5,0 /* Set current len of accept to zero. */ - vfenezf %v18,%v16,%v16 /* Find zero in current string-part. */ - lghi %r8,0 /* There is no zero in first accept-part. */ - vlgvb %r6,%v18,7 /* Load byte index of zero or 16 if no zero. */ - clije %r6,0,.Lslow_end_null /* If first element is zero - (end of string) -> return null. */ - -.Lslow_loop_acc: - vfaef %v22,%v16,%v17,4 /* Create matching-mask (1 in mask -> - Character matches any accepted character in - this accept-string-part) IN=0, RT=1. */ - vlgvf %r4,%v22,0 /* Get result of first element. */ - /* First element is equal to any accepted characters - (all other parts of accept cannot lead to a match before this one) - -> current len is pointing to first element - -> return found */ - clijh %r4,0,.Lslow_end_found - vo %v21,%v21,%v22 /* Global-mask = global-|matching-mask. */ - /* Proceed with next acc until end of acc is reached. */ - - -.Lslow_next_acc: - clijh %r8,0,.Lslow_next_str /* There was a zero in the last acc-part - -> add index to current len and - end. */ - vlbb %v17,16(%r5,%r3),6 /* Load next accept part. */ - aghi %r5,16 /* Increment current len of accept-string. */ - lcbb %r9,0(%r5,%r3),6 /* Get loaded byte count of accept-string. */ - jo .Lslow_next_acc_onbb /* Jump away ifaccept-string is - on block-boundary. */ -.Lslow_next_acc_notonbb: - vistrfs %v17,%v17 /* Fill with zeros after first zero. */ - jo .Lslow_loop_acc /* No zero found -> no preparation needed. */ - -.Lslow_next_acc_prepare_zero: - /* Zero in accept-part: fill zeros with first-accept-character. */ - vlgvf %r8,%v17,0 /* Load first element of acc-part. */ - clije %r8,0,.Lslow_next_str /* Proceed with next string-part, - If first char in this part of accept - is a zero. */ - /* r8>0 -> zero found in this acc-part. */ - vrepf %v18,%v17,0 /* Replicate first char accross all chars. */ - vceqf %v22,%v20,%v17 /* Create a mask (v22) of null chars - by comparing with 0 (v20). */ - vsel %v17,%v18,%v17,%v22 /* Replace null chars with first char. */ - j .Lslow_loop_acc /* Accept part is prepared -> process. */ - -.Lslow_next_acc_onbb: - nill %r9,65532 /* Recognize only fully loaded characters. */ - je .Lslow_next_acc_onbb2 /* Reload vr, if no full wchar_t. */ - vfenezf %v18,%v17,%v17 /* Find zero in loaded bytes of accept part. */ - vlgvb %r8,%v18,7 /* Load byte index of zero. */ - clrjl %r8,%r9,.Lslow_next_acc_notonbb /* Found a zero in loaded bytes - -> Prepare vreg. */ -.Lslow_next_acc_onbb2: - vl %v17,0(%r5,%r3) /* Load over boundary ... */ - lghi %r8,0 /* r8=0 -> no zero in this part of acc, - check for zero is in jump-target. */ - j .Lslow_next_acc_notonbb /* ... and search for zero in - fully loaded vreg again. */ - -.Lslow_end_null: - lghi %r1,0 /* Return null if no character is equal. */ - j .Lslow_end - -.Lslow_loop_found: - vlgvb %r4,%v18,7 /* Load byte index of found element. */ - srlg %r5,%r4,2 /* Convert byte-index to character-index. */ - vlgvf %r0,%v16,0(%r5) /* Get found element. */ - clije %r0,0,.Lslow_end_null /* Return null if no acc-char found. */ - -.Lslow_index_found: - algfr %r1,%r4 /* Add found index of char to current len. */ -.Lslow_end_found: - la %r1,0(%r1,%r2) /* And return pointer to first equal char. */ - -.Lslow_end: - /* Restore registers. */ - vlgvg %r6,%v30,0 - vlgvg %r8,%v31,0 - vlgvg %r9,%v31,1 - lgr %r2,%r1 - br %r14 -.Lfallback: - jg __wcspbrk_c -END(__wcspbrk_vx) -#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/sysdeps/s390/multiarch/wcspbrk.c b/sysdeps/s390/multiarch/wcspbrk.c deleted file mode 100644 index 97876328b5..0000000000 --- a/sysdeps/s390/multiarch/wcspbrk.c +++ /dev/null @@ -1,29 +0,0 @@ -/* Multiple versions of wcspbrk. - Copyright (C) 2015-2018 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) -# define wcspbrk __redirect_wcspbrk -# include <wchar.h> -# undef wcspbrk -# include <ifunc-resolve.h> - -s390_vx_libc_ifunc2_redirected (__redirect_wcspbrk, __wcspbrk, wcspbrk) - -#else -# include <wcsmbs/wcspbrk.c> -#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ |