diff options
Diffstat (limited to 'REORG.TODO/sysdeps/s390/multiarch')
124 files changed, 10942 insertions, 0 deletions
diff --git a/REORG.TODO/sysdeps/s390/multiarch/8bit-generic.c b/REORG.TODO/sysdeps/s390/multiarch/8bit-generic.c new file mode 100644 index 0000000000..49f0ed8ae5 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/8bit-generic.c @@ -0,0 +1,398 @@ +/* Generic conversion to and from 8bit charsets - S390 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT + +# if defined HAVE_S390_VX_GCC_SUPPORT +# define ASM_CLOBBER_VR(NR) , NR +# else +# define ASM_CLOBBER_VR(NR) +# endif + +/* Generate the conversion loop routines without vector instructions as + fallback, if vector instructions aren't available at runtime. */ +# define IGNORE_ICONV_SKELETON +# define from_generic __from_generic_c +# define to_generic __to_generic_c +# include "iconvdata/8bit-generic.c" +# undef IGNORE_ICONV_SKELETON +# undef from_generic +# undef to_generic + +/* Generate the converion routines with vector instructions. The vector + routines can only be used with charsets where the maximum UCS4 value + fits in 1 byte size. Then the hardware translate-instruction is used + to translate between multiple generic characters and "1 byte UCS4" + characters at once. The vector instructions are used to convert between + the "1 byte UCS4" and UCS4. */ +# include <ifunc-resolve.h> + +# undef FROM_LOOP +# undef TO_LOOP +# define FROM_LOOP __from_generic_vx +# define TO_LOOP __to_generic_vx + +# define MIN_NEEDED_FROM 1 +# define MIN_NEEDED_TO 4 +# define ONE_DIRECTION 0 + +/* First define the conversion function from the 8bit charset to UCS4. */ +# define MIN_NEEDED_INPUT MIN_NEEDED_FROM +# define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +# define LOOPFCT FROM_LOOP +# define BODY_FROM_ORIG \ + { \ + uint32_t ch = to_ucs4[*inptr]; \ + \ + if (HAS_HOLES && __builtin_expect (ch == L'\0', 0) && *inptr != '\0') \ + { \ + /* This is an illegal character. */ \ + STANDARD_FROM_LOOP_ERR_HANDLER (1); \ + } \ + \ + put32 (outptr, ch); \ + outptr += 4; \ + ++inptr; \ + } + +# define BODY \ + { \ + if (__builtin_expect (inend - inptr < 16, 1) \ + || outend - outptr < 64) \ + /* Convert remaining bytes with c code. */ \ + BODY_FROM_ORIG \ + else \ + { \ + /* Convert 16 ... 256 bytes at once with tr-instruction. */ \ + size_t index; \ + char buf[256]; \ + size_t loop_count = (inend - inptr) / 16; \ + if (loop_count > (outend - outptr) / 64) \ + loop_count = (outend - outptr) / 64; \ + if (loop_count > 16) \ + loop_count = 16; \ + __asm__ volatile (".machine push\n\t" \ + ".machine \"z13\"\n\t" \ + ".machinemode \"zarch_nohighgprs\"\n\t" \ + " sllk %[R_I],%[R_LI],4\n\t" \ + " ahi %[R_I],-1\n\t" \ + /* Execute mvc and tr with correct len. */ \ + " exrl %[R_I],21f\n\t" \ + " exrl %[R_I],22f\n\t" \ + /* Post-processing. */ \ + " lghi %[R_I],0\n\t" \ + " vzero %%v0\n\t" \ + "0: \n\t" \ + /* Find invalid character - value is zero. */ \ + " vl %%v16,0(%[R_I],%[R_BUF])\n\t" \ + " vceqbs %%v23,%%v0,%%v16\n\t" \ + " jle 10f\n\t" \ + "1: \n\t" \ + /* Enlarge to UCS4. */ \ + " vuplhb %%v17,%%v16\n\t" \ + " vupllb %%v18,%%v16\n\t" \ + " vuplhh %%v19,%%v17\n\t" \ + " vupllh %%v20,%%v17\n\t" \ + " vuplhh %%v21,%%v18\n\t" \ + " vupllh %%v22,%%v18\n\t" \ + /* Store 64bytes to buf_out. */ \ + " vstm %%v19,%%v22,0(%[R_OUT])\n\t" \ + " aghi %[R_I],16\n\t" \ + " la %[R_OUT],64(%[R_OUT])\n\t" \ + " brct %[R_LI],0b\n\t" \ + " la %[R_IN],0(%[R_I],%[R_IN])\n\t" \ + " j 20f\n\t" \ + "21: mvc 0(1,%[R_BUF]),0(%[R_IN])\n\t" \ + "22: tr 0(1,%[R_BUF]),0(%[R_TBL])\n\t" \ + /* Possibly invalid character found. */ \ + "10: \n\t" \ + /* Test if input was zero, too. */ \ + " vl %%v24,0(%[R_I],%[R_IN])\n\t" \ + " vceqb %%v24,%%v0,%%v24\n\t" \ + /* Zeros in buf (v23) and inptr (v24) are marked \ + with one bits. After xor, invalid characters \ + are marked as one bits. Proceed, if no \ + invalid characters are found. */ \ + " vx %%v24,%%v23,%%v24\n\t" \ + " vfenebs %%v24,%%v24,%%v0\n\t" \ + " jo 1b\n\t" \ + /* Found an invalid translation. \ + Store the preceding chars. */ \ + " la %[R_IN],0(%[R_I],%[R_IN])\n\t" \ + " vlgvb %[R_I],%%v24,7\n\t" \ + " la %[R_IN],0(%[R_I],%[R_IN])\n\t" \ + " sll %[R_I],2\n\t" \ + " ahi %[R_I],-1\n\t" \ + " jl 20f\n\t" \ + " lgr %[R_LI],%[R_I]\n\t" \ + " vuplhb %%v17,%%v16\n\t" \ + " vuplhh %%v19,%%v17\n\t" \ + " vstl %%v19,%[R_I],0(%[R_OUT])\n\t" \ + " ahi %[R_I],-16\n\t" \ + " jl 11f\n\t" \ + " vupllh %%v20,%%v17\n\t" \ + " vstl %%v20,%[R_I],16(%[R_OUT])\n\t" \ + " ahi %[R_I],-16\n\t" \ + " jl 11f\n\t" \ + " vupllb %%v18,%%v16\n\t" \ + " vuplhh %%v21,%%v18\n\t" \ + " vstl %%v21,%[R_I],32(%[R_OUT])\n\t" \ + " ahi %[R_I],-16\n\t" \ + " jl 11f\n\t" \ + " vupllh %%v22,%%v18\n\t" \ + " vstl %%v22,%[R_I],48(%[R_OUT])\n\t" \ + "11: \n\t" \ + " la %[R_OUT],1(%[R_LI],%[R_OUT])\n\t" \ + "20: \n\t" \ + ".machine pop" \ + : /* outputs */ [R_IN] "+a" (inptr) \ + , [R_OUT] "+a" (outptr), [R_I] "=&a" (index) \ + , [R_LI] "+a" (loop_count) \ + : /* inputs */ [R_BUF] "a" (buf) \ + , [R_TBL] "a" (to_ucs1) \ + : /* clobber list*/ "memory", "cc" \ + ASM_CLOBBER_VR ("v0") ASM_CLOBBER_VR ("v16") \ + ASM_CLOBBER_VR ("v17") ASM_CLOBBER_VR ("v18") \ + ASM_CLOBBER_VR ("v19") ASM_CLOBBER_VR ("v20") \ + ASM_CLOBBER_VR ("v21") ASM_CLOBBER_VR ("v22") \ + ASM_CLOBBER_VR ("v23") ASM_CLOBBER_VR ("v24") \ + ); \ + /* Error occured? */ \ + if (loop_count != 0) \ + { \ + /* Found an invalid character! */ \ + STANDARD_FROM_LOOP_ERR_HANDLER (1); \ + } \ + } \ + } + +# define LOOP_NEED_FLAGS +# include <iconv/loop.c> + +/* Next, define the other direction - from UCS4 to 8bit charset. */ +# define MIN_NEEDED_INPUT MIN_NEEDED_TO +# define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM +# define LOOPFCT TO_LOOP +# define BODY_TO_ORIG \ + { \ + uint32_t ch = get32 (inptr); \ + \ + if (__builtin_expect (ch >= sizeof (from_ucs4) / sizeof (from_ucs4[0]), 0)\ + || (__builtin_expect (from_ucs4[ch], '\1') == '\0' && ch != 0)) \ + { \ + UNICODE_TAG_HANDLER (ch, 4); \ + \ + /* This is an illegal character. */ \ + STANDARD_TO_LOOP_ERR_HANDLER (4); \ + } \ + \ + *outptr++ = from_ucs4[ch]; \ + inptr += 4; \ + } +# define BODY \ + { \ + if (__builtin_expect (inend - inptr < 64, 1) \ + || outend - outptr < 16) \ + /* Convert remaining bytes with c code. */ \ + BODY_TO_ORIG \ + else \ + { \ + /* Convert 64 ... 1024 bytes at once with tr-instruction. */ \ + size_t index, tmp; \ + char buf[256]; \ + size_t loop_count = (inend - inptr) / 64; \ + uint32_t max = sizeof (from_ucs4) / sizeof (from_ucs4[0]); \ + if (loop_count > (outend - outptr) / 16) \ + loop_count = (outend - outptr) / 16; \ + if (loop_count > 16) \ + loop_count = 16; \ + size_t remaining_loop_count = loop_count; \ + /* Step 1: Check for ch>=max, ch == 0 and shorten to bytes. \ + (ch == 0 is no error, but is handled differently) */ \ + __asm__ volatile (".machine push\n\t" \ + ".machine \"z13\"\n\t" \ + ".machinemode \"zarch_nohighgprs\"\n\t" \ + /* Setup to check for ch >= max. */ \ + " vzero %%v21\n\t" \ + " vleih %%v21,-24576,0\n\t" /* element 0: > */ \ + " vleih %%v21,-8192,2\n\t" /* element 1: =<> */ \ + " vlvgf %%v20,%[R_MAX],0\n\t" /* element 0: val */ \ + /* Process in 64byte - 16 characters blocks. */ \ + " lghi %[R_I],0\n\t" \ + " lghi %[R_TMP],0\n\t" \ + "0: \n\t" \ + " vlm %%v16,%%v19,0(%[R_IN])\n\t" \ + /* Test for ch >= max and ch == 0. */ \ + " vstrczfs %%v22,%%v16,%%v20,%%v21\n\t" \ + " jno 10f\n\t" \ + " vstrczfs %%v22,%%v17,%%v20,%%v21\n\t" \ + " jno 11f\n\t" \ + " vstrczfs %%v22,%%v18,%%v20,%%v21\n\t" \ + " jno 12f\n\t" \ + " vstrczfs %%v22,%%v19,%%v20,%%v21\n\t" \ + " jno 13f\n\t" \ + /* Shorten to byte values. */ \ + " vpkf %%v16,%%v16,%%v17\n\t" \ + " vpkf %%v18,%%v18,%%v19\n\t" \ + " vpkh %%v16,%%v16,%%v18\n\t" \ + /* Store 16bytes to buf. */ \ + " vst %%v16,0(%[R_I],%[R_BUF])\n\t" \ + /* Loop until all blocks are processed. */ \ + " la %[R_IN],64(%[R_IN])\n\t" \ + " aghi %[R_I],16\n\t" \ + " brct %[R_LI],0b\n\t" \ + " j 20f\n\t" \ + /* Found error ch >= max or ch == 0. */ \ + "13: aghi %[R_TMP],4\n\t" \ + "12: aghi %[R_TMP],4\n\t" \ + "11: aghi %[R_TMP],4\n\t" \ + "10: vlgvb %[R_I],%%v22,7\n\t" \ + " srlg %[R_I],%[R_I],2\n\t" \ + " agr %[R_I],%[R_TMP]\n\t" \ + "20: \n\t" \ + ".machine pop" \ + : /* outputs */ [R_IN] "+a" (inptr) \ + , [R_I] "=&a" (index) \ + , [R_TMP] "=d" (tmp) \ + , [R_LI] "+d" (remaining_loop_count) \ + : /* inputs */ [R_BUF] "a" (buf) \ + , [R_MAX] "d" (max) \ + : /* clobber list*/ "memory", "cc" \ + ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ + ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ + ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21") \ + ASM_CLOBBER_VR ("v22") \ + ); \ + /* Error occured in step 1? An error (ch >= max || ch == 0) \ + occured, if remaining_loop_count > 0. The error occured \ + at character-index (index) after already processed blocks. */ \ + loop_count -= remaining_loop_count; \ + if (loop_count > 0) \ + { \ + /* Step 2: Translate already processed blocks in buf and \ + check for errors (from_ucs4[ch] == 0). */ \ + __asm__ volatile (".machine push\n\t" \ + ".machine \"z13\"\n\t" \ + ".machinemode \"zarch_nohighgprs\"\n\t" \ + " sllk %[R_I],%[R_LI],4\n\t" \ + " ahi %[R_I],-1\n\t" \ + /* Execute tr with correct len. */ \ + " exrl %[R_I],21f\n\t" \ + /* Post-processing. */ \ + " lghi %[R_I],0\n\t" \ + "0: \n\t" \ + /* Find invalid character - value == 0. */ \ + " vl %%v16,0(%[R_I],%[R_BUF])\n\t" \ + " vfenezbs %%v17,%%v16,%%v16\n\t" \ + " je 10f\n\t" \ + /* Store 16bytes to buf_out. */ \ + " vst %%v16,0(%[R_I],%[R_OUT])\n\t" \ + " aghi %[R_I],16\n\t" \ + " brct %[R_LI],0b\n\t" \ + " la %[R_OUT],0(%[R_I],%[R_OUT])\n\t" \ + " j 20f\n\t" \ + "21: tr 0(1,%[R_BUF]),0(%[R_TBL])\n\t" \ + /* Found an error: from_ucs4[ch] == 0. */ \ + "10: la %[R_OUT],0(%[R_I],%[R_OUT])\n\t" \ + " vlgvb %[R_I],%%v17,7\n\t" \ + "20: \n\t" \ + ".machine pop" \ + : /* outputs */ [R_OUT] "+a" (outptr) \ + , [R_I] "=&a" (tmp) \ + , [R_LI] "+d" (loop_count) \ + : /* inputs */ [R_BUF] "a" (buf) \ + , [R_TBL] "a" (from_ucs4) \ + : /* clobber list*/ "memory", "cc" \ + ASM_CLOBBER_VR ("v16") \ + ASM_CLOBBER_VR ("v17") \ + ); \ + /* Error occured in processed bytes of step 2? \ + Thus possible error in step 1 is obselete.*/ \ + if (tmp < 16) \ + { \ + index = tmp; \ + inptr -= loop_count * 64; \ + } \ + } \ + /* Error occured in step 1/2? */ \ + if (index < 16) \ + { \ + /* Found an invalid character (see step 2) or zero \ + (see step 1) at index! Convert the chars before index \ + manually. If there is a zero at index detected by step 1, \ + there could be invalid characters before this zero. */ \ + int i; \ + uint32_t ch; \ + for (i = 0; i < index; i++) \ + { \ + ch = get32 (inptr); \ + if (__builtin_expect (from_ucs4[ch], '\1') == '\0') \ + break; \ + *outptr++ = from_ucs4[ch]; \ + inptr += 4; \ + } \ + if (i == index) \ + { \ + ch = get32 (inptr); \ + if (ch == 0) \ + { \ + /* This is no error, but handled differently. */ \ + *outptr++ = from_ucs4[ch]; \ + inptr += 4; \ + continue; \ + } \ + } \ + \ + UNICODE_TAG_HANDLER (ch, 4); \ + \ + /* This is an illegal character. */ \ + STANDARD_TO_LOOP_ERR_HANDLER (4); \ + } \ + } \ + } + +# define LOOP_NEED_FLAGS +# include <iconv/loop.c> + + +/* Generate ifunc'ed loop function. */ +s390_libc_ifunc_expr (__from_generic_c, __from_generic, + (sizeof (from_ucs4) / sizeof (from_ucs4[0]) <= 256 + && hwcap & HWCAP_S390_VX) + ? __from_generic_vx + : __from_generic_c); + +s390_libc_ifunc_expr (__to_generic_c, __to_generic, + (sizeof (from_ucs4) / sizeof (from_ucs4[0]) <= 256 + && hwcap & HWCAP_S390_VX) + ? __to_generic_vx + : __to_generic_c); + +strong_alias (__to_generic_c_single, __to_generic_single) + +# undef FROM_LOOP +# undef TO_LOOP +# define FROM_LOOP __from_generic +# define TO_LOOP __to_generic +# include <iconv/skeleton.c> + +#else +/* Generate this module without ifunc if build environment lacks vector + support. Instead the common 8bit-generic.c is used. */ +# include "iconvdata/8bit-generic.c" +#endif /* !defined HAVE_S390_VX_ASM_SUPPORT */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/Makefile b/REORG.TODO/sysdeps/s390/multiarch/Makefile new file mode 100644 index 0000000000..c893ebc565 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/Makefile @@ -0,0 +1,59 @@ +ifeq ($(subdir),string) +sysdep_routines += strlen strlen-vx strlen-c \ + strnlen strnlen-vx strnlen-c \ + strcpy strcpy-vx \ + stpcpy stpcpy-vx stpcpy-c \ + strncpy strncpy-vx \ + stpncpy stpncpy-vx stpncpy-c \ + strcat strcat-vx strcat-c \ + strncat strncat-vx strncat-c \ + strcmp strcmp-vx \ + strncmp strncmp-vx strncmp-c \ + strchr strchr-vx strchr-c \ + strchrnul strchrnul-vx strchrnul-c \ + strrchr strrchr-vx strrchr-c \ + strspn strspn-vx strspn-c \ + strpbrk strpbrk-vx strpbrk-c \ + strcspn strcspn-vx strcspn-c \ + memchr memchr-vx \ + rawmemchr rawmemchr-vx rawmemchr-c \ + memccpy memccpy-vx memccpy-c \ + memrchr memrchr-vx memrchr-c \ + mempcpy +endif + +ifeq ($(subdir),wcsmbs) +sysdep_routines += wcslen wcslen-vx wcslen-c \ + wcsnlen wcsnlen-vx wcsnlen-c \ + wcscpy wcscpy-vx wcscpy-c \ + wcpcpy wcpcpy-vx wcpcpy-c \ + wcsncpy wcsncpy-vx wcsncpy-c \ + wcpncpy wcpncpy-vx wcpncpy-c \ + wcscat wcscat-vx wcscat-c \ + wcsncat wcsncat-vx wcsncat-c \ + wcscmp wcscmp-vx wcscmp-c \ + wcsncmp wcsncmp-vx wcsncmp-c \ + wcschr wcschr-vx wcschr-c \ + wcschrnul wcschrnul-vx wcschrnul-c \ + wcsrchr wcsrchr-vx wcsrchr-c \ + wcsspn wcsspn-vx wcsspn-c \ + wcspbrk wcspbrk-vx wcspbrk-c \ + wcscspn wcscspn-vx wcscspn-c \ + wmemchr wmemchr-vx wmemchr-c \ + wmemset wmemset-vx wmemset-c \ + wmemcmp wmemcmp-vx wmemcmp-c +endif + +ifeq ($(subdir),iconvdata) +override define generate-8bit-table +$(make-target-directory) +LC_ALL=C $(SHELL) ./gen-8bit.sh $< > $(@:stmp=T) +LC_ALL=C $(SHELL) ../sysdeps/s390/multiarch/gen-8bit.sh $< >> $(@:stmp=T) +$(move-if-change) $(@:stmp=T) $(@:stmp=h) +touch $@ +endef +endif + +ifeq ($(subdir),iconv) +sysdep_routines += gconv_simple +endif diff --git a/REORG.TODO/sysdeps/s390/multiarch/gconv_simple.c b/REORG.TODO/sysdeps/s390/multiarch/gconv_simple.c new file mode 100644 index 0000000000..75632873e3 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/gconv_simple.c @@ -0,0 +1,1266 @@ +/* Simple transformations functions - s390 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT +# include <ifunc-resolve.h> + +# if defined HAVE_S390_VX_GCC_SUPPORT +# define ASM_CLOBBER_VR(NR) , NR +# else +# define ASM_CLOBBER_VR(NR) +# endif + +# define ICONV_C_NAME(NAME) __##NAME##_c +# define ICONV_VX_NAME(NAME) __##NAME##_vx +# define ICONV_VX_IFUNC(FUNC) \ + extern __typeof (ICONV_C_NAME (FUNC)) __##FUNC; \ + s390_vx_libc_ifunc (__##FUNC) \ + int FUNC (struct __gconv_step *step, struct __gconv_step_data *data, \ + const unsigned char **inptrp, const unsigned char *inend, \ + unsigned char **outbufstart, size_t *irreversible, \ + int do_flush, int consume_incomplete) \ + { \ + return __##FUNC (step, data, inptrp, inend,outbufstart, \ + irreversible, do_flush, consume_incomplete); \ + } +# define ICONV_VX_SINGLE(NAME) \ + static __typeof (NAME##_single) __##NAME##_vx_single __attribute__((alias(#NAME "_single"))); + +/* Generate the transformations which are used, if the target machine does not + support vector instructions. */ +# define __gconv_transform_ascii_internal \ + ICONV_C_NAME (__gconv_transform_ascii_internal) +# define __gconv_transform_internal_ascii \ + ICONV_C_NAME (__gconv_transform_internal_ascii) +# define __gconv_transform_internal_ucs4le \ + ICONV_C_NAME (__gconv_transform_internal_ucs4le) +# define __gconv_transform_ucs4_internal \ + ICONV_C_NAME (__gconv_transform_ucs4_internal) +# define __gconv_transform_ucs4le_internal \ + ICONV_C_NAME (__gconv_transform_ucs4le_internal) +# define __gconv_transform_ucs2_internal \ + ICONV_C_NAME (__gconv_transform_ucs2_internal) +# define __gconv_transform_ucs2reverse_internal \ + ICONV_C_NAME (__gconv_transform_ucs2reverse_internal) +# define __gconv_transform_internal_ucs2 \ + ICONV_C_NAME (__gconv_transform_internal_ucs2) +# define __gconv_transform_internal_ucs2reverse \ + ICONV_C_NAME (__gconv_transform_internal_ucs2reverse) + + +# include <iconv/gconv_simple.c> + +# undef __gconv_transform_ascii_internal +# undef __gconv_transform_internal_ascii +# undef __gconv_transform_internal_ucs4le +# undef __gconv_transform_ucs4_internal +# undef __gconv_transform_ucs4le_internal +# undef __gconv_transform_ucs2_internal +# undef __gconv_transform_ucs2reverse_internal +# undef __gconv_transform_internal_ucs2 +# undef __gconv_transform_internal_ucs2reverse + +/* Now define the functions with vector support. */ +# if defined __s390x__ +# define CONVERT_32BIT_SIZE_T(REG) +# else +# define CONVERT_32BIT_SIZE_T(REG) "llgfr %" #REG ",%" #REG "\n\t" +# endif + +/* Convert from ISO 646-IRV to the internal (UCS4-like) format. */ +# define DEFINE_INIT 0 +# define DEFINE_FINI 0 +# define MIN_NEEDED_FROM 1 +# define MIN_NEEDED_TO 4 +# define FROM_DIRECTION 1 +# define FROM_LOOP ICONV_VX_NAME (ascii_internal_loop) +# define TO_LOOP ICONV_VX_NAME (ascii_internal_loop) /* This is not used. */ +# define FUNCTION_NAME ICONV_VX_NAME (__gconv_transform_ascii_internal) +# define ONE_DIRECTION 1 + +# define MIN_NEEDED_INPUT MIN_NEEDED_FROM +# define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +# define LOOPFCT FROM_LOOP +# define BODY_ORIG_ERROR \ + /* The value is too large. We don't try transliteration here since \ + this is not an error because of the lack of possibilities to \ + represent the result. This is a genuine bug in the input since \ + ASCII does not allow such values. */ \ + STANDARD_FROM_LOOP_ERR_HANDLER (1); + +# define BODY_ORIG \ + { \ + if (__glibc_unlikely (*inptr > '\x7f')) \ + { \ + BODY_ORIG_ERROR \ + } \ + else \ + { \ + /* It's an one byte sequence. */ \ + *((uint32_t *) outptr) = *inptr++; \ + outptr += sizeof (uint32_t); \ + } \ + } +# define BODY \ + { \ + size_t len = inend - inptr; \ + if (len > (outend - outptr) / 4) \ + len = (outend - outptr) / 4; \ + size_t loop_count, tmp; \ + __asm__ volatile (".machine push\n\t" \ + ".machine \"z13\"\n\t" \ + ".machinemode \"zarch_nohighgprs\"\n\t" \ + CONVERT_32BIT_SIZE_T ([R_LEN]) \ + " vrepib %%v30,0x7f\n\t" /* For compare > 0x7f. */ \ + " srlg %[R_LI],%[R_LEN],4\n\t" \ + " vrepib %%v31,0x20\n\t" \ + " clgije %[R_LI],0,1f\n\t" \ + "0: \n\t" /* Handle 16-byte blocks. */ \ + " vl %%v16,0(%[R_IN])\n\t" \ + /* Checking for values > 0x7f. */ \ + " vstrcbs %%v17,%%v16,%%v30,%%v31\n\t" \ + " jno 10f\n\t" \ + /* Enlarge to UCS4. */ \ + " vuplhb %%v17,%%v16\n\t" \ + " vupllb %%v18,%%v16\n\t" \ + " vuplhh %%v19,%%v17\n\t" \ + " vupllh %%v20,%%v17\n\t" \ + " vuplhh %%v21,%%v18\n\t" \ + " vupllh %%v22,%%v18\n\t" \ + /* Store 64bytes to buf_out. */ \ + " vstm %%v19,%%v22,0(%[R_OUT])\n\t" \ + " la %[R_IN],16(%[R_IN])\n\t" \ + " la %[R_OUT],64(%[R_OUT])\n\t" \ + " brctg %[R_LI],0b\n\t" \ + " lghi %[R_LI],15\n\t" \ + " ngr %[R_LEN],%[R_LI]\n\t" \ + " je 20f\n\t" /* Jump away if no remaining bytes. */ \ + /* Handle remaining bytes. */ \ + "1: aghik %[R_LI],%[R_LEN],-1\n\t" \ + " jl 20f\n\t" /* Jump away if no remaining bytes. */ \ + " vll %%v16,%[R_LI],0(%[R_IN])\n\t" \ + /* Checking for values > 0x7f. */ \ + " vstrcbs %%v17,%%v16,%%v30,%%v31\n\t" \ + " vlgvb %[R_TMP],%%v17,7\n\t" \ + " clr %[R_TMP],%[R_LI]\n\t" \ + " locrh %[R_TMP],%[R_LEN]\n\t" \ + " locghih %[R_LEN],0\n\t" \ + " j 12f\n\t" \ + "10:\n\t" \ + /* Found a value > 0x7f. \ + Store the preceding chars. */ \ + " vlgvb %[R_TMP],%%v17,7\n\t" \ + "12: la %[R_IN],0(%[R_TMP],%[R_IN])\n\t" \ + " sllk %[R_TMP],%[R_TMP],2\n\t" \ + " ahi %[R_TMP],-1\n\t" \ + " jl 20f\n\t" \ + " lgr %[R_LI],%[R_TMP]\n\t" \ + " vuplhb %%v17,%%v16\n\t" \ + " vuplhh %%v19,%%v17\n\t" \ + " vstl %%v19,%[R_LI],0(%[R_OUT])\n\t" \ + " ahi %[R_LI],-16\n\t" \ + " jl 11f\n\t" \ + " vupllh %%v20,%%v17\n\t" \ + " vstl %%v20,%[R_LI],16(%[R_OUT])\n\t" \ + " ahi %[R_LI],-16\n\t" \ + " jl 11f\n\t" \ + " vupllb %%v18,%%v16\n\t" \ + " vuplhh %%v21,%%v18\n\t" \ + " vstl %%v21,%[R_LI],32(%[R_OUT])\n\t" \ + " ahi %[R_LI],-16\n\t" \ + " jl 11f\n\t" \ + " vupllh %%v22,%%v18\n\t" \ + " vstl %%v22,%[R_LI],48(%[R_OUT])\n\t" \ + "11:\n\t" \ + " la %[R_OUT],1(%[R_TMP],%[R_OUT])\n\t" \ + "20:\n\t" \ + ".machine pop" \ + : /* outputs */ [R_OUT] "+a" (outptr) \ + , [R_IN] "+a" (inptr) \ + , [R_LEN] "+d" (len) \ + , [R_LI] "=d" (loop_count) \ + , [R_TMP] "=a" (tmp) \ + : /* inputs */ \ + : /* clobber list*/ "memory", "cc" \ + ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ + ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ + ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21") \ + ASM_CLOBBER_VR ("v22") ASM_CLOBBER_VR ("v30") \ + ASM_CLOBBER_VR ("v31") \ + ); \ + if (len > 0) \ + { \ + /* Found an invalid character at the next input byte. */ \ + BODY_ORIG_ERROR \ + } \ + } + +# define LOOP_NEED_FLAGS +# include <iconv/loop.c> +# include <iconv/skeleton.c> +# undef BODY_ORIG +# undef BODY_ORIG_ERROR +ICONV_VX_IFUNC (__gconv_transform_ascii_internal) + +/* Convert from the internal (UCS4-like) format to ISO 646-IRV. */ +# define DEFINE_INIT 0 +# define DEFINE_FINI 0 +# define MIN_NEEDED_FROM 4 +# define MIN_NEEDED_TO 1 +# define FROM_DIRECTION 1 +# define FROM_LOOP ICONV_VX_NAME (internal_ascii_loop) +# define TO_LOOP ICONV_VX_NAME (internal_ascii_loop) /* This is not used. */ +# define FUNCTION_NAME ICONV_VX_NAME (__gconv_transform_internal_ascii) +# define ONE_DIRECTION 1 + +# define MIN_NEEDED_INPUT MIN_NEEDED_FROM +# define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +# define LOOPFCT FROM_LOOP +# define BODY_ORIG_ERROR \ + UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4); \ + STANDARD_TO_LOOP_ERR_HANDLER (4); + +# define BODY_ORIG \ + { \ + if (__glibc_unlikely (*((const uint32_t *) inptr) > 0x7f)) \ + { \ + BODY_ORIG_ERROR \ + } \ + else \ + { \ + /* It's an one byte sequence. */ \ + *outptr++ = *((const uint32_t *) inptr); \ + inptr += sizeof (uint32_t); \ + } \ + } +# define BODY \ + { \ + size_t len = (inend - inptr) / 4; \ + if (len > outend - outptr) \ + len = outend - outptr; \ + size_t loop_count, tmp, tmp2; \ + __asm__ volatile (".machine push\n\t" \ + ".machine \"z13\"\n\t" \ + ".machinemode \"zarch_nohighgprs\"\n\t" \ + CONVERT_32BIT_SIZE_T ([R_LEN]) \ + /* Setup to check for ch > 0x7f. */ \ + " vzero %%v21\n\t" \ + " srlg %[R_LI],%[R_LEN],4\n\t" \ + " vleih %%v21,8192,0\n\t" /* element 0: > */ \ + " vleih %%v21,-8192,2\n\t" /* element 1: =<> */ \ + " vleif %%v20,127,0\n\t" /* element 0: 127 */ \ + " lghi %[R_TMP],0\n\t" \ + " clgije %[R_LI],0,1f\n\t" \ + "0:\n\t" \ + " vlm %%v16,%%v19,0(%[R_IN])\n\t" \ + /* Shorten to byte values. */ \ + " vpkf %%v23,%%v16,%%v17\n\t" \ + " vpkf %%v24,%%v18,%%v19\n\t" \ + " vpkh %%v23,%%v23,%%v24\n\t" \ + /* Checking for values > 0x7f. */ \ + " vstrcfs %%v22,%%v16,%%v20,%%v21\n\t" \ + " jno 10f\n\t" \ + " vstrcfs %%v22,%%v17,%%v20,%%v21\n\t" \ + " jno 11f\n\t" \ + " vstrcfs %%v22,%%v18,%%v20,%%v21\n\t" \ + " jno 12f\n\t" \ + " vstrcfs %%v22,%%v19,%%v20,%%v21\n\t" \ + " jno 13f\n\t" \ + /* Store 16bytes to outptr. */ \ + " vst %%v23,0(%[R_OUT])\n\t" \ + " la %[R_IN],64(%[R_IN])\n\t" \ + " la %[R_OUT],16(%[R_OUT])\n\t" \ + " brctg %[R_LI],0b\n\t" \ + " lghi %[R_LI],15\n\t" \ + " ngr %[R_LEN],%[R_LI]\n\t" \ + " je 20f\n\t" /* Jump away if no remaining bytes. */ \ + /* Handle remaining bytes. */ \ + "1: sllg %[R_LI],%[R_LEN],2\n\t" \ + " aghi %[R_LI],-1\n\t" \ + " jl 20f\n\t" /* Jump away if no remaining bytes. */ \ + /* Load remaining 1...63 bytes. */ \ + " vll %%v16,%[R_LI],0(%[R_IN])\n\t" \ + " ahi %[R_LI],-16\n\t" \ + " jl 2f\n\t" \ + " vll %%v17,%[R_LI],16(%[R_IN])\n\t" \ + " ahi %[R_LI],-16\n\t" \ + " jl 2f\n\t" \ + " vll %%v18,%[R_LI],32(%[R_IN])\n\t" \ + " ahi %[R_LI],-16\n\t" \ + " jl 2f\n\t" \ + " vll %%v19,%[R_LI],48(%[R_IN])\n\t" \ + "2:\n\t" \ + /* Shorten to byte values. */ \ + " vpkf %%v23,%%v16,%%v17\n\t" \ + " vpkf %%v24,%%v18,%%v19\n\t" \ + " vpkh %%v23,%%v23,%%v24\n\t" \ + " sllg %[R_LI],%[R_LEN],2\n\t" \ + " aghi %[R_LI],-16\n\t" \ + " jl 3f\n\t" /* v16 is not fully loaded. */ \ + " vstrcfs %%v22,%%v16,%%v20,%%v21\n\t" \ + " jno 10f\n\t" \ + " aghi %[R_LI],-16\n\t" \ + " jl 4f\n\t" /* v17 is not fully loaded. */ \ + " vstrcfs %%v22,%%v17,%%v20,%%v21\n\t" \ + " jno 11f\n\t" \ + " aghi %[R_LI],-16\n\t" \ + " jl 5f\n\t" /* v18 is not fully loaded. */ \ + " vstrcfs %%v22,%%v18,%%v20,%%v21\n\t" \ + " jno 12f\n\t" \ + " aghi %[R_LI],-16\n\t" \ + /* v19 is not fully loaded. */ \ + " lghi %[R_TMP],12\n\t" \ + " vstrcfs %%v22,%%v19,%%v20,%%v21\n\t" \ + "6: vlgvb %[R_I],%%v22,7\n\t" \ + " aghi %[R_LI],16\n\t" \ + " clrjl %[R_I],%[R_LI],14f\n\t" \ + " lgr %[R_I],%[R_LEN]\n\t" \ + " lghi %[R_LEN],0\n\t" \ + " j 15f\n\t" \ + "3: vstrcfs %%v22,%%v16,%%v20,%%v21\n\t" \ + " j 6b\n\t" \ + "4: vstrcfs %%v22,%%v17,%%v20,%%v21\n\t" \ + " lghi %[R_TMP],4\n\t" \ + " j 6b\n\t" \ + "5: vstrcfs %%v22,%%v17,%%v20,%%v21\n\t" \ + " lghi %[R_TMP],8\n\t" \ + " j 6b\n\t" \ + /* Found a value > 0x7f. */ \ + "13: ahi %[R_TMP],4\n\t" \ + "12: ahi %[R_TMP],4\n\t" \ + "11: ahi %[R_TMP],4\n\t" \ + "10: vlgvb %[R_I],%%v22,7\n\t" \ + "14: srlg %[R_I],%[R_I],2\n\t" \ + " agr %[R_I],%[R_TMP]\n\t" \ + " je 20f\n\t" \ + /* Store characters before invalid one... */ \ + "15: aghi %[R_I],-1\n\t" \ + " vstl %%v23,%[R_I],0(%[R_OUT])\n\t" \ + /* ... and update pointers. */ \ + " la %[R_OUT],1(%[R_I],%[R_OUT])\n\t" \ + " sllg %[R_I],%[R_I],2\n\t" \ + " la %[R_IN],4(%[R_I],%[R_IN])\n\t" \ + "20:\n\t" \ + ".machine pop" \ + : /* outputs */ [R_OUT] "+a" (outptr) \ + , [R_IN] "+a" (inptr) \ + , [R_LEN] "+d" (len) \ + , [R_LI] "=d" (loop_count) \ + , [R_I] "=a" (tmp2) \ + , [R_TMP] "=d" (tmp) \ + : /* inputs */ \ + : /* clobber list*/ "memory", "cc" \ + ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ + ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ + ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21") \ + ASM_CLOBBER_VR ("v22") ASM_CLOBBER_VR ("v23") \ + ASM_CLOBBER_VR ("v24") \ + ); \ + if (len > 0) \ + { \ + /* Found an invalid character > 0x7f at next character. */ \ + BODY_ORIG_ERROR \ + } \ + } +# define LOOP_NEED_FLAGS +# include <iconv/loop.c> +# include <iconv/skeleton.c> +# undef BODY_ORIG +# undef BODY_ORIG_ERROR +ICONV_VX_IFUNC (__gconv_transform_internal_ascii) + + +/* Convert from internal UCS4 to UCS4 little endian form. */ +# define DEFINE_INIT 0 +# define DEFINE_FINI 0 +# define MIN_NEEDED_FROM 4 +# define MIN_NEEDED_TO 4 +# define FROM_DIRECTION 1 +# define FROM_LOOP ICONV_VX_NAME (internal_ucs4le_loop) +# define TO_LOOP ICONV_VX_NAME (internal_ucs4le_loop) /* This is not used. */ +# define FUNCTION_NAME ICONV_VX_NAME (__gconv_transform_internal_ucs4le) +# define ONE_DIRECTION 0 + +static inline int +__attribute ((always_inline)) +ICONV_VX_NAME (internal_ucs4le_loop) (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, + const unsigned char *inend, + unsigned char **outptrp, + unsigned char *outend, + size_t *irreversible) +{ + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + int result; + size_t len = MIN (inend - inptr, outend - outptr) / 4; + size_t loop_count; + __asm__ volatile (".machine push\n\t" + ".machine \"z13\"\n\t" + ".machinemode \"zarch_nohighgprs\"\n\t" + CONVERT_32BIT_SIZE_T ([R_LEN]) + " bras %[R_LI],1f\n\t" + /* Vector permute mask: */ + " .long 0x03020100,0x7060504,0x0B0A0908,0x0F0E0D0C\n\t" + "1: vl %%v20,0(%[R_LI])\n\t" + /* Process 64byte (16char) blocks. */ + " srlg %[R_LI],%[R_LEN],4\n\t" + " clgije %[R_LI],0,10f\n\t" + "0: vlm %%v16,%%v19,0(%[R_IN])\n\t" + " vperm %%v16,%%v16,%%v16,%%v20\n\t" + " vperm %%v17,%%v17,%%v17,%%v20\n\t" + " vperm %%v18,%%v18,%%v18,%%v20\n\t" + " vperm %%v19,%%v19,%%v19,%%v20\n\t" + " vstm %%v16,%%v19,0(%[R_OUT])\n\t" + " la %[R_IN],64(%[R_IN])\n\t" + " la %[R_OUT],64(%[R_OUT])\n\t" + " brctg %[R_LI],0b\n\t" + " llgfr %[R_LEN],%[R_LEN]\n\t" + " nilf %[R_LEN],15\n\t" + /* Process 16byte (4char) blocks. */ + "10: srlg %[R_LI],%[R_LEN],2\n\t" + " clgije %[R_LI],0,20f\n\t" + "11: vl %%v16,0(%[R_IN])\n\t" + " vperm %%v16,%%v16,%%v16,%%v20\n\t" + " vst %%v16,0(%[R_OUT])\n\t" + " la %[R_IN],16(%[R_IN])\n\t" + " la %[R_OUT],16(%[R_OUT])\n\t" + " brctg %[R_LI],11b\n\t" + " nill %[R_LEN],3\n\t" + /* Process <16bytes. */ + "20: sll %[R_LEN],2\n\t" + " ahi %[R_LEN],-1\n\t" + " jl 30f\n\t" + " vll %%v16,%[R_LEN],0(%[R_IN])\n\t" + " vperm %%v16,%%v16,%%v16,%%v20\n\t" + " vstl %%v16,%[R_LEN],0(%[R_OUT])\n\t" + " la %[R_IN],1(%[R_LEN],%[R_IN])\n\t" + " la %[R_OUT],1(%[R_LEN],%[R_OUT])\n\t" + "30: \n\t" + ".machine pop" + : /* outputs */ [R_OUT] "+a" (outptr) + , [R_IN] "+a" (inptr) + , [R_LI] "=a" (loop_count) + , [R_LEN] "+a" (len) + : /* inputs */ + : /* clobber list*/ "memory", "cc" + ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") + ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") + ASM_CLOBBER_VR ("v20") + ); + *inptrp = inptr; + *outptrp = outptr; + + /* Determine the status. */ + if (*inptrp == inend) + result = __GCONV_EMPTY_INPUT; + else if (*outptrp + 4 > outend) + result = __GCONV_FULL_OUTPUT; + else + result = __GCONV_INCOMPLETE_INPUT; + + return result; +} + +ICONV_VX_SINGLE (internal_ucs4le_loop) +# include <iconv/skeleton.c> +ICONV_VX_IFUNC (__gconv_transform_internal_ucs4le) + + +/* Transform from UCS4 to the internal, UCS4-like format. Unlike + for the other direction we have to check for correct values here. */ +# define DEFINE_INIT 0 +# define DEFINE_FINI 0 +# define MIN_NEEDED_FROM 4 +# define MIN_NEEDED_TO 4 +# define FROM_DIRECTION 1 +# define FROM_LOOP ICONV_VX_NAME (ucs4_internal_loop) +# define TO_LOOP ICONV_VX_NAME (ucs4_internal_loop) /* This is not used. */ +# define FUNCTION_NAME ICONV_VX_NAME (__gconv_transform_ucs4_internal) +# define ONE_DIRECTION 0 + + +static inline int +__attribute ((always_inline)) +ICONV_VX_NAME (ucs4_internal_loop) (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, + const unsigned char *inend, + unsigned char **outptrp, + unsigned char *outend, + size_t *irreversible) +{ + int flags = step_data->__flags; + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + int result; + size_t len, loop_count; + do + { + len = MIN (inend - inptr, outend - outptr) / 4; + __asm__ volatile (".machine push\n\t" + ".machine \"z13\"\n\t" + ".machinemode \"zarch_nohighgprs\"\n\t" + CONVERT_32BIT_SIZE_T ([R_LEN]) + /* Setup to check for ch > 0x7fffffff. */ + " larl %[R_LI],9f\n\t" + " vlm %%v20,%%v21,0(%[R_LI])\n\t" + " srlg %[R_LI],%[R_LEN],2\n\t" + " clgije %[R_LI],0,1f\n\t" + /* Process 16byte (4char) blocks. */ + "0: vl %%v16,0(%[R_IN])\n\t" + " vstrcfs %%v22,%%v16,%%v20,%%v21\n\t" + " jno 10f\n\t" + " vst %%v16,0(%[R_OUT])\n\t" + " la %[R_IN],16(%[R_IN])\n\t" + " la %[R_OUT],16(%[R_OUT])\n\t" + " brctg %[R_LI],0b\n\t" + " llgfr %[R_LEN],%[R_LEN]\n\t" + " nilf %[R_LEN],3\n\t" + /* Process <16bytes. */ + "1: sll %[R_LEN],2\n\t" + " ahik %[R_LI],%[R_LEN],-1\n\t" + " jl 20f\n\t" /* No further bytes available. */ + " vll %%v16,%[R_LI],0(%[R_IN])\n\t" + " vstrcfs %%v22,%%v16,%%v20,%%v21\n\t" + " vlgvb %[R_LI],%%v22,7\n\t" + " clr %[R_LI],%[R_LEN]\n\t" + " locgrhe %[R_LI],%[R_LEN]\n\t" + " locghihe %[R_LEN],0\n\t" + " j 11f\n\t" + /* v20: Vector string range compare values. */ + "9: .long 0x7fffffff,0x0,0x0,0x0\n\t" + /* v21: Vector string range compare control-bits. + element 0: >; element 1: =<> (always true) */ + " .long 0x20000000,0xE0000000,0x0,0x0\n\t" + /* Found a value > 0x7fffffff. */ + "10: vlgvb %[R_LI],%%v22,7\n\t" + /* Store characters before invalid one. */ + "11: aghi %[R_LI],-1\n\t" + " jl 20f\n\t" + " vstl %%v16,%[R_LI],0(%[R_OUT])\n\t" + " la %[R_IN],1(%[R_LI],%[R_IN])\n\t" + " la %[R_OUT],1(%[R_LI],%[R_OUT])\n\t" + "20:\n\t" + ".machine pop" + : /* outputs */ [R_OUT] "+a" (outptr) + , [R_IN] "+a" (inptr) + , [R_LI] "=a" (loop_count) + , [R_LEN] "+d" (len) + : /* inputs */ + : /* clobber list*/ "memory", "cc" + ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v20") + ASM_CLOBBER_VR ("v21") ASM_CLOBBER_VR ("v22") + ); + if (len > 0) + { + /* The value is too large. We don't try transliteration here since + this is not an error because of the lack of possibilities to + represent the result. This is a genuine bug in the input since + UCS4 does not allow such values. */ + if (irreversible == NULL) + /* We are transliterating, don't try to correct anything. */ + return __GCONV_ILLEGAL_INPUT; + + if (flags & __GCONV_IGNORE_ERRORS) + { + /* Just ignore this character. */ + ++*irreversible; + inptr += 4; + continue; + } + + *inptrp = inptr; + *outptrp = outptr; + return __GCONV_ILLEGAL_INPUT; + } + } + while (len > 0); + + *inptrp = inptr; + *outptrp = outptr; + + /* Determine the status. */ + if (*inptrp == inend) + result = __GCONV_EMPTY_INPUT; + else if (*outptrp + 4 > outend) + result = __GCONV_FULL_OUTPUT; + else + result = __GCONV_INCOMPLETE_INPUT; + + return result; +} + +ICONV_VX_SINGLE (ucs4_internal_loop) +# include <iconv/skeleton.c> +ICONV_VX_IFUNC (__gconv_transform_ucs4_internal) + + +/* Transform from UCS4-LE to the internal encoding. */ +# define DEFINE_INIT 0 +# define DEFINE_FINI 0 +# define MIN_NEEDED_FROM 4 +# define MIN_NEEDED_TO 4 +# define FROM_DIRECTION 1 +# define FROM_LOOP ICONV_VX_NAME (ucs4le_internal_loop) +# define TO_LOOP ICONV_VX_NAME (ucs4le_internal_loop) /* This is not used. */ +# define FUNCTION_NAME ICONV_VX_NAME (__gconv_transform_ucs4le_internal) +# define ONE_DIRECTION 0 + +static inline int +__attribute ((always_inline)) +ICONV_VX_NAME (ucs4le_internal_loop) (struct __gconv_step *step, + struct __gconv_step_data *step_data, + const unsigned char **inptrp, + const unsigned char *inend, + unsigned char **outptrp, + unsigned char *outend, + size_t *irreversible) +{ + int flags = step_data->__flags; + const unsigned char *inptr = *inptrp; + unsigned char *outptr = *outptrp; + int result; + size_t len, loop_count; + do + { + len = MIN (inend - inptr, outend - outptr) / 4; + __asm__ volatile (".machine push\n\t" + ".machine \"z13\"\n\t" + ".machinemode \"zarch_nohighgprs\"\n\t" + CONVERT_32BIT_SIZE_T ([R_LEN]) + /* Setup to check for ch > 0x7fffffff. */ + " larl %[R_LI],9f\n\t" + " vlm %%v20,%%v22,0(%[R_LI])\n\t" + " srlg %[R_LI],%[R_LEN],2\n\t" + " clgije %[R_LI],0,1f\n\t" + /* Process 16byte (4char) blocks. */ + "0: vl %%v16,0(%[R_IN])\n\t" + " vperm %%v16,%%v16,%%v16,%%v22\n\t" + " vstrcfs %%v23,%%v16,%%v20,%%v21\n\t" + " jno 10f\n\t" + " vst %%v16,0(%[R_OUT])\n\t" + " la %[R_IN],16(%[R_IN])\n\t" + " la %[R_OUT],16(%[R_OUT])\n\t" + " brctg %[R_LI],0b\n\t" + " llgfr %[R_LEN],%[R_LEN]\n\t" + " nilf %[R_LEN],3\n\t" + /* Process <16bytes. */ + "1: sll %[R_LEN],2\n\t" + " ahik %[R_LI],%[R_LEN],-1\n\t" + " jl 20f\n\t" /* No further bytes available. */ + " vll %%v16,%[R_LI],0(%[R_IN])\n\t" + " vperm %%v16,%%v16,%%v16,%%v22\n\t" + " vstrcfs %%v23,%%v16,%%v20,%%v21\n\t" + " vlgvb %[R_LI],%%v23,7\n\t" + " clr %[R_LI],%[R_LEN]\n\t" + " locgrhe %[R_LI],%[R_LEN]\n\t" + " locghihe %[R_LEN],0\n\t" + " j 11f\n\t" + /* v20: Vector string range compare values. */ + "9: .long 0x7fffffff,0x0,0x0,0x0\n\t" + /* v21: Vector string range compare control-bits. + element 0: >; element 1: =<> (always true) */ + " .long 0x20000000,0xE0000000,0x0,0x0\n\t" + /* v22: Vector permute mask. */ + " .long 0x03020100,0x7060504,0x0B0A0908,0x0F0E0D0C\n\t" + /* Found a value > 0x7fffffff. */ + "10: vlgvb %[R_LI],%%v23,7\n\t" + /* Store characters before invalid one. */ + "11: aghi %[R_LI],-1\n\t" + " jl 20f\n\t" + " vstl %%v16,%[R_LI],0(%[R_OUT])\n\t" + " la %[R_IN],1(%[R_LI],%[R_IN])\n\t" + " la %[R_OUT],1(%[R_LI],%[R_OUT])\n\t" + "20:\n\t" + ".machine pop" + : /* outputs */ [R_OUT] "+a" (outptr) + , [R_IN] "+a" (inptr) + , [R_LI] "=a" (loop_count) + , [R_LEN] "+d" (len) + : /* inputs */ + : /* clobber list*/ "memory", "cc" + ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v20") + ASM_CLOBBER_VR ("v21") ASM_CLOBBER_VR ("v22") + ASM_CLOBBER_VR ("v23") + ); + if (len > 0) + { + /* The value is too large. We don't try transliteration here since + this is not an error because of the lack of possibilities to + represent the result. This is a genuine bug in the input since + UCS4 does not allow such values. */ + if (irreversible == NULL) + /* We are transliterating, don't try to correct anything. */ + return __GCONV_ILLEGAL_INPUT; + + if (flags & __GCONV_IGNORE_ERRORS) + { + /* Just ignore this character. */ + ++*irreversible; + inptr += 4; + continue; + } + + *inptrp = inptr; + *outptrp = outptr; + return __GCONV_ILLEGAL_INPUT; + } + } + while (len > 0); + + *inptrp = inptr; + *outptrp = outptr; + + /* Determine the status. */ + if (*inptrp == inend) + result = __GCONV_EMPTY_INPUT; + else if (*inptrp + 4 > inend) + result = __GCONV_INCOMPLETE_INPUT; + else + { + assert (*outptrp + 4 > outend); + result = __GCONV_FULL_OUTPUT; + } + + return result; +} +ICONV_VX_SINGLE (ucs4le_internal_loop) +# include <iconv/skeleton.c> +ICONV_VX_IFUNC (__gconv_transform_ucs4le_internal) + +/* Convert from UCS2 to the internal (UCS4-like) format. */ +# define DEFINE_INIT 0 +# define DEFINE_FINI 0 +# define MIN_NEEDED_FROM 2 +# define MIN_NEEDED_TO 4 +# define FROM_DIRECTION 1 +# define FROM_LOOP ICONV_VX_NAME (ucs2_internal_loop) +# define TO_LOOP ICONV_VX_NAME (ucs2_internal_loop) /* This is not used. */ +# define FUNCTION_NAME ICONV_VX_NAME (__gconv_transform_ucs2_internal) +# define ONE_DIRECTION 1 + +# define MIN_NEEDED_INPUT MIN_NEEDED_FROM +# define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +# define LOOPFCT FROM_LOOP +# define BODY_ORIG_ERROR \ + /* Surrogate characters in UCS-2 input are not valid. Reject \ + them. (Catching this here is not security relevant.) */ \ + STANDARD_FROM_LOOP_ERR_HANDLER (2); +# define BODY_ORIG \ + { \ + uint16_t u1 = get16 (inptr); \ + \ + if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \ + { \ + BODY_ORIG_ERROR \ + } \ + \ + *((uint32_t *) outptr) = u1; \ + outptr += sizeof (uint32_t); \ + inptr += 2; \ + } +# define BODY \ + { \ + size_t len, tmp, tmp2; \ + len = MIN ((inend - inptr) / 2, (outend - outptr) / 4); \ + __asm__ volatile (".machine push\n\t" \ + ".machine \"z13\"\n\t" \ + ".machinemode \"zarch_nohighgprs\"\n\t" \ + CONVERT_32BIT_SIZE_T ([R_LEN]) \ + /* Setup to check for ch >= 0xd800 && ch < 0xe000. */ \ + " larl %[R_TMP],9f\n\t" \ + " vlm %%v20,%%v21,0(%[R_TMP])\n\t" \ + " srlg %[R_TMP],%[R_LEN],3\n\t" \ + " clgije %[R_TMP],0,1f\n\t" \ + /* Process 16byte (8char) blocks. */ \ + "0: vl %%v16,0(%[R_IN])\n\t" \ + " vstrchs %%v19,%%v16,%%v20,%%v21\n\t" \ + /* Enlarge UCS2 to UCS4. */ \ + " vuplhh %%v17,%%v16\n\t" \ + " vupllh %%v18,%%v16\n\t" \ + " jno 10f\n\t" \ + /* Store 32bytes to buf_out. */ \ + " vstm %%v17,%%v18,0(%[R_OUT])\n\t" \ + " la %[R_IN],16(%[R_IN])\n\t" \ + " la %[R_OUT],32(%[R_OUT])\n\t" \ + " brctg %[R_TMP],0b\n\t" \ + " llgfr %[R_LEN],%[R_LEN]\n\t" \ + " nilf %[R_LEN],7\n\t" \ + /* Process <16bytes. */ \ + "1: sll %[R_LEN],1\n\t" \ + " ahik %[R_TMP],%[R_LEN],-1\n\t" \ + " jl 20f\n\t" /* No further bytes available. */ \ + " vll %%v16,%[R_TMP],0(%[R_IN])\n\t" \ + " vstrchs %%v19,%%v16,%%v20,%%v21\n\t" \ + /* Enlarge UCS2 to UCS4. */ \ + " vuplhh %%v17,%%v16\n\t" \ + " vupllh %%v18,%%v16\n\t" \ + " vlgvb %[R_TMP],%%v19,7\n\t" \ + " clr %[R_TMP],%[R_LEN]\n\t" \ + " locgrhe %[R_TMP],%[R_LEN]\n\t" \ + " locghihe %[R_LEN],0\n\t" \ + " j 11f\n\t" \ + /* v20: Vector string range compare values. */ \ + "9: .short 0xd800,0xe000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \ + /* v21: Vector string range compare control-bits. \ + element 0: =>; element 1: < */ \ + " .short 0xa000,0x4000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \ + /* Found an element: ch >= 0xd800 && ch < 0xe000 */ \ + "10: vlgvb %[R_TMP],%%v19,7\n\t" \ + "11: la %[R_IN],0(%[R_TMP],%[R_IN])\n\t" \ + " sll %[R_TMP],1\n\t" \ + " lgr %[R_TMP2],%[R_TMP]\n\t" \ + " ahi %[R_TMP],-1\n\t" \ + " jl 20f\n\t" \ + " vstl %%v17,%[R_TMP],0(%[R_OUT])\n\t" \ + " ahi %[R_TMP],-16\n\t" \ + " jl 19f\n\t" \ + " vstl %%v18,%[R_TMP],16(%[R_OUT])\n\t" \ + "19: la %[R_OUT],0(%[R_TMP2],%[R_OUT])\n\t" \ + "20: \n\t" \ + ".machine pop" \ + : /* outputs */ [R_OUT] "+a" (outptr) \ + , [R_IN] "+a" (inptr) \ + , [R_TMP] "=a" (tmp) \ + , [R_TMP2] "=a" (tmp2) \ + , [R_LEN] "+d" (len) \ + : /* inputs */ \ + : /* clobber list*/ "memory", "cc" \ + ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ + ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ + ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21") \ + ); \ + if (len > 0) \ + { \ + /* Found an invalid character at next input-char. */ \ + BODY_ORIG_ERROR \ + } \ + } + +# define LOOP_NEED_FLAGS +# include <iconv/loop.c> +# include <iconv/skeleton.c> +# undef BODY_ORIG +# undef BODY_ORIG_ERROR +ICONV_VX_IFUNC (__gconv_transform_ucs2_internal) + +/* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */ +# define DEFINE_INIT 0 +# define DEFINE_FINI 0 +# define MIN_NEEDED_FROM 2 +# define MIN_NEEDED_TO 4 +# define FROM_DIRECTION 1 +# define FROM_LOOP ICONV_VX_NAME (ucs2reverse_internal_loop) +# define TO_LOOP ICONV_VX_NAME (ucs2reverse_internal_loop) /* This is not used.*/ +# define FUNCTION_NAME ICONV_VX_NAME (__gconv_transform_ucs2reverse_internal) +# define ONE_DIRECTION 1 + +# define MIN_NEEDED_INPUT MIN_NEEDED_FROM +# define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +# define LOOPFCT FROM_LOOP +# define BODY_ORIG_ERROR \ + /* Surrogate characters in UCS-2 input are not valid. Reject \ + them. (Catching this here is not security relevant.) */ \ + if (! ignore_errors_p ()) \ + { \ + result = __GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + inptr += 2; \ + ++*irreversible; \ + continue; + +# define BODY_ORIG \ + { \ + uint16_t u1 = bswap_16 (get16 (inptr)); \ + \ + if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000)) \ + { \ + BODY_ORIG_ERROR \ + } \ + \ + *((uint32_t *) outptr) = u1; \ + outptr += sizeof (uint32_t); \ + inptr += 2; \ + } +# define BODY \ + { \ + size_t len, tmp, tmp2; \ + len = MIN ((inend - inptr) / 2, (outend - outptr) / 4); \ + __asm__ volatile (".machine push\n\t" \ + ".machine \"z13\"\n\t" \ + ".machinemode \"zarch_nohighgprs\"\n\t" \ + CONVERT_32BIT_SIZE_T ([R_LEN]) \ + /* Setup to check for ch >= 0xd800 && ch < 0xe000. */ \ + " larl %[R_TMP],9f\n\t" \ + " vlm %%v20,%%v22,0(%[R_TMP])\n\t" \ + " srlg %[R_TMP],%[R_LEN],3\n\t" \ + " clgije %[R_TMP],0,1f\n\t" \ + /* Process 16byte (8char) blocks. */ \ + "0: vl %%v16,0(%[R_IN])\n\t" \ + " vperm %%v16,%%v16,%%v16,%%v22\n\t" \ + " vstrchs %%v19,%%v16,%%v20,%%v21\n\t" \ + /* Enlarge UCS2 to UCS4. */ \ + " vuplhh %%v17,%%v16\n\t" \ + " vupllh %%v18,%%v16\n\t" \ + " jno 10f\n\t" \ + /* Store 32bytes to buf_out. */ \ + " vstm %%v17,%%v18,0(%[R_OUT])\n\t" \ + " la %[R_IN],16(%[R_IN])\n\t" \ + " la %[R_OUT],32(%[R_OUT])\n\t" \ + " brctg %[R_TMP],0b\n\t" \ + " llgfr %[R_LEN],%[R_LEN]\n\t" \ + " nilf %[R_LEN],7\n\t" \ + /* Process <16bytes. */ \ + "1: sll %[R_LEN],1\n\t" \ + " ahik %[R_TMP],%[R_LEN],-1\n\t" \ + " jl 20f\n\t" /* No further bytes available. */ \ + " vll %%v16,%[R_TMP],0(%[R_IN])\n\t" \ + " vperm %%v16,%%v16,%%v16,%%v22\n\t" \ + " vstrchs %%v19,%%v16,%%v20,%%v21\n\t" \ + /* Enlarge UCS2 to UCS4. */ \ + " vuplhh %%v17,%%v16\n\t" \ + " vupllh %%v18,%%v16\n\t" \ + " vlgvb %[R_TMP],%%v19,7\n\t" \ + " clr %[R_TMP],%[R_LEN]\n\t" \ + " locgrhe %[R_TMP],%[R_LEN]\n\t" \ + " locghihe %[R_LEN],0\n\t" \ + " j 11f\n\t" \ + /* v20: Vector string range compare values. */ \ + "9: .short 0xd800,0xe000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \ + /* v21: Vector string range compare control-bits. \ + element 0: =>; element 1: < */ \ + " .short 0xa000,0x4000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \ + /* v22: Vector permute mask. */ \ + " .short 0x0100,0x0302,0x0504,0x0706\n\t" \ + " .short 0x0908,0x0b0a,0x0d0c,0x0f0e\n\t" \ + /* Found an element: ch >= 0xd800 && ch < 0xe000 */ \ + "10: vlgvb %[R_TMP],%%v19,7\n\t" \ + "11: la %[R_IN],0(%[R_TMP],%[R_IN])\n\t" \ + " sll %[R_TMP],1\n\t" \ + " lgr %[R_TMP2],%[R_TMP]\n\t" \ + " ahi %[R_TMP],-1\n\t" \ + " jl 20f\n\t" \ + " vstl %%v17,%[R_TMP],0(%[R_OUT])\n\t" \ + " ahi %[R_TMP],-16\n\t" \ + " jl 19f\n\t" \ + " vstl %%v18,%[R_TMP],16(%[R_OUT])\n\t" \ + "19: la %[R_OUT],0(%[R_TMP2],%[R_OUT])\n\t" \ + "20: \n\t" \ + ".machine pop" \ + : /* outputs */ [R_OUT] "+a" (outptr) \ + , [R_IN] "+a" (inptr) \ + , [R_TMP] "=a" (tmp) \ + , [R_TMP2] "=a" (tmp2) \ + , [R_LEN] "+d" (len) \ + : /* inputs */ \ + : /* clobber list*/ "memory", "cc" \ + ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ + ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ + ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21") \ + ASM_CLOBBER_VR ("v22") \ + ); \ + if (len > 0) \ + { \ + /* Found an invalid character at next input-char. */ \ + BODY_ORIG_ERROR \ + } \ + } +# define LOOP_NEED_FLAGS +# include <iconv/loop.c> +# include <iconv/skeleton.c> +# undef BODY_ORIG +# undef BODY_ORIG_ERROR +ICONV_VX_IFUNC (__gconv_transform_ucs2reverse_internal) + +/* Convert from the internal (UCS4-like) format to UCS2. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 4 +#define MIN_NEEDED_TO 2 +#define FROM_DIRECTION 1 +#define FROM_LOOP ICONV_VX_NAME (internal_ucs2_loop) +#define TO_LOOP ICONV_VX_NAME (internal_ucs2_loop) /* This is not used. */ +#define FUNCTION_NAME ICONV_VX_NAME (__gconv_transform_internal_ucs2) +#define ONE_DIRECTION 1 + +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY_ORIG \ + { \ + uint32_t val = *((const uint32_t *) inptr); \ + \ + if (__glibc_unlikely (val >= 0x10000)) \ + { \ + UNICODE_TAG_HANDLER (val, 4); \ + STANDARD_TO_LOOP_ERR_HANDLER (4); \ + } \ + else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \ + { \ + /* Surrogate characters in UCS-4 input are not valid. \ + We must catch this, because the UCS-2 output might be \ + interpreted as UTF-16 by other programs. If we let \ + surrogates pass through, attackers could make a security \ + hole exploit by synthesizing any desired plane 1-16 \ + character. */ \ + result = __GCONV_ILLEGAL_INPUT; \ + if (! ignore_errors_p ()) \ + break; \ + inptr += 4; \ + ++*irreversible; \ + continue; \ + } \ + else \ + { \ + put16 (outptr, val); \ + outptr += sizeof (uint16_t); \ + inptr += 4; \ + } \ + } +# define BODY \ + { \ + if (__builtin_expect (inend - inptr < 32, 1) \ + || outend - outptr < 16) \ + /* Convert remaining bytes with c code. */ \ + BODY_ORIG \ + else \ + { \ + /* Convert in 32 byte blocks. */ \ + size_t loop_count = (inend - inptr) / 32; \ + size_t tmp, tmp2; \ + if (loop_count > (outend - outptr) / 16) \ + loop_count = (outend - outptr) / 16; \ + __asm__ volatile (".machine push\n\t" \ + ".machine \"z13\"\n\t" \ + ".machinemode \"zarch_nohighgprs\"\n\t" \ + CONVERT_32BIT_SIZE_T ([R_LI]) \ + " larl %[R_I],3f\n\t" \ + " vlm %%v20,%%v23,0(%[R_I])\n\t" \ + "0: \n\t" \ + " vlm %%v16,%%v17,0(%[R_IN])\n\t" \ + /* Shorten UCS4 to UCS2. */ \ + " vpkf %%v18,%%v16,%%v17\n\t" \ + " vstrcfs %%v19,%%v16,%%v20,%%v21\n\t" \ + " jno 11f\n\t" \ + "1: vstrcfs %%v19,%%v17,%%v20,%%v21\n\t" \ + " jno 10f\n\t" \ + /* Store 16bytes to buf_out. */ \ + "2: vst %%v18,0(%[R_OUT])\n\t" \ + " la %[R_IN],32(%[R_IN])\n\t" \ + " la %[R_OUT],16(%[R_OUT])\n\t" \ + " brctg %[R_LI],0b\n\t" \ + " j 20f\n\t" \ + /* Setup to check for ch >= 0xd800. (v20, v21) */ \ + "3: .long 0xd800,0xd800,0x0,0x0\n\t" \ + " .long 0xa0000000,0xa0000000,0x0,0x0\n\t" \ + /* Setup to check for ch >= 0xe000 \ + && ch < 0x10000. (v22,v23) */ \ + " .long 0xe000,0x10000,0x0,0x0\n\t" \ + " .long 0xa0000000,0x40000000,0x0,0x0\n\t" \ + /* v16 contains only valid chars. Check in v17: \ + ch >= 0xe000 && ch <= 0xffff. */ \ + "10: vstrcfs %%v19,%%v17,%%v22,%%v23,8\n\t" \ + " jo 2b\n\t" /* All ch's in this range, proceed. */ \ + " lghi %[R_TMP],16\n\t" \ + " j 12f\n\t" \ + /* Maybe v16 contains invalid chars. \ + Check ch >= 0xe000 && ch <= 0xffff. */ \ + "11: vstrcfs %%v19,%%v16,%%v22,%%v23,8\n\t" \ + " jo 1b\n\t" /* All ch's in this range, proceed. */ \ + " lghi %[R_TMP],0\n\t" \ + "12: vlgvb %[R_I],%%v19,7\n\t" \ + " agr %[R_I],%[R_TMP]\n\t" \ + " la %[R_IN],0(%[R_I],%[R_IN])\n\t" \ + " srl %[R_I],1\n\t" \ + " ahi %[R_I],-1\n\t" \ + " jl 20f\n\t" \ + " vstl %%v18,%[R_I],0(%[R_OUT])\n\t" \ + " la %[R_OUT],1(%[R_I],%[R_OUT])\n\t" \ + "20:\n\t" \ + ".machine pop" \ + : /* outputs */ [R_OUT] "+a" (outptr) \ + , [R_IN] "+a" (inptr) \ + , [R_LI] "+d" (loop_count) \ + , [R_I] "=a" (tmp2) \ + , [R_TMP] "=d" (tmp) \ + : /* inputs */ \ + : /* clobber list*/ "memory", "cc" \ + ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ + ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ + ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21") \ + ASM_CLOBBER_VR ("v22") ASM_CLOBBER_VR ("v23") \ + ); \ + if (loop_count > 0) \ + { \ + /* Found an invalid character at next character. */ \ + BODY_ORIG \ + } \ + } \ + } +#define LOOP_NEED_FLAGS +#include <iconv/loop.c> +#include <iconv/skeleton.c> +# undef BODY_ORIG +ICONV_VX_IFUNC (__gconv_transform_internal_ucs2) + +/* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */ +#define DEFINE_INIT 0 +#define DEFINE_FINI 0 +#define MIN_NEEDED_FROM 4 +#define MIN_NEEDED_TO 2 +#define FROM_DIRECTION 1 +#define FROM_LOOP ICONV_VX_NAME (internal_ucs2reverse_loop) +#define TO_LOOP ICONV_VX_NAME (internal_ucs2reverse_loop)/* This is not used.*/ +#define FUNCTION_NAME ICONV_VX_NAME (__gconv_transform_internal_ucs2reverse) +#define ONE_DIRECTION 1 + +#define MIN_NEEDED_INPUT MIN_NEEDED_FROM +#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO +#define LOOPFCT FROM_LOOP +#define BODY_ORIG \ + { \ + uint32_t val = *((const uint32_t *) inptr); \ + if (__glibc_unlikely (val >= 0x10000)) \ + { \ + UNICODE_TAG_HANDLER (val, 4); \ + STANDARD_TO_LOOP_ERR_HANDLER (4); \ + } \ + else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000)) \ + { \ + /* Surrogate characters in UCS-4 input are not valid. \ + We must catch this, because the UCS-2 output might be \ + interpreted as UTF-16 by other programs. If we let \ + surrogates pass through, attackers could make a security \ + hole exploit by synthesizing any desired plane 1-16 \ + character. */ \ + if (! ignore_errors_p ()) \ + { \ + result = __GCONV_ILLEGAL_INPUT; \ + break; \ + } \ + inptr += 4; \ + ++*irreversible; \ + continue; \ + } \ + else \ + { \ + put16 (outptr, bswap_16 (val)); \ + outptr += sizeof (uint16_t); \ + inptr += 4; \ + } \ + } +# define BODY \ + { \ + if (__builtin_expect (inend - inptr < 32, 1) \ + || outend - outptr < 16) \ + /* Convert remaining bytes with c code. */ \ + BODY_ORIG \ + else \ + { \ + /* Convert in 32 byte blocks. */ \ + size_t loop_count = (inend - inptr) / 32; \ + size_t tmp, tmp2; \ + if (loop_count > (outend - outptr) / 16) \ + loop_count = (outend - outptr) / 16; \ + __asm__ volatile (".machine push\n\t" \ + ".machine \"z13\"\n\t" \ + ".machinemode \"zarch_nohighgprs\"\n\t" \ + CONVERT_32BIT_SIZE_T ([R_LI]) \ + " larl %[R_I],3f\n\t" \ + " vlm %%v20,%%v24,0(%[R_I])\n\t" \ + "0: \n\t" \ + " vlm %%v16,%%v17,0(%[R_IN])\n\t" \ + /* Shorten UCS4 to UCS2 and byteswap. */ \ + " vpkf %%v18,%%v16,%%v17\n\t" \ + " vperm %%v18,%%v18,%%v18,%%v24\n\t" \ + " vstrcfs %%v19,%%v16,%%v20,%%v21\n\t" \ + " jno 11f\n\t" \ + "1: vstrcfs %%v19,%%v17,%%v20,%%v21\n\t" \ + " jno 10f\n\t" \ + /* Store 16bytes to buf_out. */ \ + "2: vst %%v18,0(%[R_OUT])\n\t" \ + " la %[R_IN],32(%[R_IN])\n\t" \ + " la %[R_OUT],16(%[R_OUT])\n\t" \ + " brctg %[R_LI],0b\n\t" \ + " j 20f\n\t" \ + /* Setup to check for ch >= 0xd800. (v20, v21) */ \ + "3: .long 0xd800,0xd800,0x0,0x0\n\t" \ + " .long 0xa0000000,0xa0000000,0x0,0x0\n\t" \ + /* Setup to check for ch >= 0xe000 \ + && ch < 0x10000. (v22,v23) */ \ + " .long 0xe000,0x10000,0x0,0x0\n\t" \ + " .long 0xa0000000,0x40000000,0x0,0x0\n\t" \ + /* Vector permute mask (v24) */ \ + " .short 0x0100,0x0302,0x0504,0x0706\n\t" \ + " .short 0x0908,0x0b0a,0x0d0c,0x0f0e\n\t" \ + /* v16 contains only valid chars. Check in v17: \ + ch >= 0xe000 && ch <= 0xffff. */ \ + "10: vstrcfs %%v19,%%v17,%%v22,%%v23,8\n\t" \ + " jo 2b\n\t" /* All ch's in this range, proceed. */ \ + " lghi %[R_TMP],16\n\t" \ + " j 12f\n\t" \ + /* Maybe v16 contains invalid chars. \ + Check ch >= 0xe000 && ch <= 0xffff. */ \ + "11: vstrcfs %%v19,%%v16,%%v22,%%v23,8\n\t" \ + " jo 1b\n\t" /* All ch's in this range, proceed. */ \ + " lghi %[R_TMP],0\n\t" \ + "12: vlgvb %[R_I],%%v19,7\n\t" \ + " agr %[R_I],%[R_TMP]\n\t" \ + " la %[R_IN],0(%[R_I],%[R_IN])\n\t" \ + " srl %[R_I],1\n\t" \ + " ahi %[R_I],-1\n\t" \ + " jl 20f\n\t" \ + " vstl %%v18,%[R_I],0(%[R_OUT])\n\t" \ + " la %[R_OUT],1(%[R_I],%[R_OUT])\n\t" \ + "20:\n\t" \ + ".machine pop" \ + : /* outputs */ [R_OUT] "+a" (outptr) \ + , [R_IN] "+a" (inptr) \ + , [R_LI] "+d" (loop_count) \ + , [R_I] "=a" (tmp2) \ + , [R_TMP] "=d" (tmp) \ + : /* inputs */ \ + : /* clobber list*/ "memory", "cc" \ + ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \ + ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \ + ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21") \ + ASM_CLOBBER_VR ("v22") ASM_CLOBBER_VR ("v23") \ + ASM_CLOBBER_VR ("v24") \ + ); \ + if (loop_count > 0) \ + { \ + /* Found an invalid character at next character. */ \ + BODY_ORIG \ + } \ + } \ + } +#define LOOP_NEED_FLAGS +#include <iconv/loop.c> +#include <iconv/skeleton.c> +# undef BODY_ORIG +ICONV_VX_IFUNC (__gconv_transform_internal_ucs2reverse) + + +#else +/* Generate the internal transformations without ifunc if build environment + lacks vector support. Instead simply include the common version. */ +# include <iconv/gconv_simple.c> +#endif /* !defined HAVE_S390_VX_ASM_SUPPORT */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/gen-8bit.sh b/REORG.TODO/sysdeps/s390/multiarch/gen-8bit.sh new file mode 100644 index 0000000000..6f88c4bd9d --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/gen-8bit.sh @@ -0,0 +1,6 @@ +#!/bin/sh +echo "static const uint8_t to_ucs1[256] = {" +sed -ne '/^[^[:space:]]*[[:space:]]*.x00/d;/^END/q' \ + -e 's/^<U00\(..\)>[[:space:]]*.x\(..\).*/ [0x\2] = 0x\1,/p' \ + "$@" | sort -u +echo "};" diff --git a/REORG.TODO/sysdeps/s390/multiarch/iconv/skeleton.c b/REORG.TODO/sysdeps/s390/multiarch/iconv/skeleton.c new file mode 100644 index 0000000000..ca9ad1206f --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/iconv/skeleton.c @@ -0,0 +1,21 @@ +/* Skeleton for a conversion module - S390 version. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef IGNORE_ICONV_SKELETON +# include_next <iconv/skeleton.c> +#endif diff --git a/REORG.TODO/sysdeps/s390/multiarch/ifunc-impl-list.c b/REORG.TODO/sysdeps/s390/multiarch/ifunc-impl-list.c new file mode 100644 index 0000000000..6210f50acb --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/ifunc-impl-list.c @@ -0,0 +1,152 @@ +/* Enumerate available IFUNC implementations of a function. s390/s390x version. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <assert.h> +#include <string.h> +#include <wchar.h> +#include <ifunc-impl-list.h> +#include <ifunc-resolve.h> + +/* Maximum number of IFUNC implementations. */ +#define MAX_IFUNC 3 + +/* Fill ARRAY of MAX elements with IFUNC implementations for function + NAME supported on target machine and return the number of valid + entries. */ +size_t +__libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, + size_t max) +{ + assert (max >= MAX_IFUNC); + + size_t i = 0; + + /* Get hardware information. */ + unsigned long int dl_hwcap = GLRO (dl_hwcap); + unsigned long long stfle_bits = 0ULL; + if ((dl_hwcap & HWCAP_S390_STFLE) + && (dl_hwcap & HWCAP_S390_ZARCH) + && (dl_hwcap & HWCAP_S390_HIGH_GPRS)) + { + S390_STORE_STFLE (stfle_bits); + } + + IFUNC_IMPL (i, name, memset, + IFUNC_IMPL_ADD (array, i, memset, + S390_IS_Z196 (stfle_bits), __memset_z196) + IFUNC_IMPL_ADD (array, i, memset, + S390_IS_Z10 (stfle_bits), __memset_z10) + IFUNC_IMPL_ADD (array, i, memset, 1, __memset_default)) + + IFUNC_IMPL (i, name, memcmp, + IFUNC_IMPL_ADD (array, i, memcmp, + S390_IS_Z196 (stfle_bits), __memcmp_z196) + IFUNC_IMPL_ADD (array, i, memcmp, + S390_IS_Z10 (stfle_bits), __memcmp_z10) + IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_default)) + +#ifdef SHARED + + IFUNC_IMPL (i, name, memcpy, + IFUNC_IMPL_ADD (array, i, memcpy, + S390_IS_Z196 (stfle_bits), __memcpy_z196) + IFUNC_IMPL_ADD (array, i, memcpy, + S390_IS_Z10 (stfle_bits), __memcpy_z10) + IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_default)) + + IFUNC_IMPL (i, name, mempcpy, + IFUNC_IMPL_ADD (array, i, mempcpy, + S390_IS_Z196 (stfle_bits), ____mempcpy_z196) + IFUNC_IMPL_ADD (array, i, mempcpy, + S390_IS_Z10 (stfle_bits), ____mempcpy_z10) + IFUNC_IMPL_ADD (array, i, mempcpy, 1, ____mempcpy_default)) + +#endif /* SHARED */ + +#ifdef HAVE_S390_VX_ASM_SUPPORT + +# define IFUNC_VX_IMPL(FUNC) \ + IFUNC_IMPL (i, name, FUNC, \ + IFUNC_IMPL_ADD (array, i, FUNC, dl_hwcap & HWCAP_S390_VX, \ + __##FUNC##_vx) \ + IFUNC_IMPL_ADD (array, i, FUNC, 1, __##FUNC##_c)) + + IFUNC_VX_IMPL (strlen); + IFUNC_VX_IMPL (wcslen); + + IFUNC_VX_IMPL (strnlen); + IFUNC_VX_IMPL (wcsnlen); + + IFUNC_VX_IMPL (strcpy); + IFUNC_VX_IMPL (wcscpy); + + IFUNC_VX_IMPL (stpcpy); + IFUNC_VX_IMPL (wcpcpy); + + IFUNC_VX_IMPL (strncpy); + IFUNC_VX_IMPL (wcsncpy); + + IFUNC_VX_IMPL (stpncpy); + IFUNC_VX_IMPL (wcpncpy); + + IFUNC_VX_IMPL (strcat); + IFUNC_VX_IMPL (wcscat); + + IFUNC_VX_IMPL (strncat); + IFUNC_VX_IMPL (wcsncat); + + IFUNC_VX_IMPL (strcmp); + IFUNC_VX_IMPL (wcscmp); + + IFUNC_VX_IMPL (strncmp); + IFUNC_VX_IMPL (wcsncmp); + + IFUNC_VX_IMPL (strchr); + IFUNC_VX_IMPL (wcschr); + + IFUNC_VX_IMPL (strchrnul); + IFUNC_VX_IMPL (wcschrnul); + + IFUNC_VX_IMPL (strrchr); + IFUNC_VX_IMPL (wcsrchr); + + IFUNC_VX_IMPL (strspn); + IFUNC_VX_IMPL (wcsspn); + + IFUNC_VX_IMPL (strpbrk); + IFUNC_VX_IMPL (wcspbrk); + + IFUNC_VX_IMPL (strcspn); + IFUNC_VX_IMPL (wcscspn); + + IFUNC_VX_IMPL (memchr); + IFUNC_VX_IMPL (wmemchr); + IFUNC_VX_IMPL (rawmemchr); + + IFUNC_VX_IMPL (memccpy); + + IFUNC_VX_IMPL (wmemset); + + IFUNC_VX_IMPL (wmemcmp); + + IFUNC_VX_IMPL (memrchr); + +#endif /* HAVE_S390_VX_ASM_SUPPORT */ + + return i; +} diff --git a/REORG.TODO/sysdeps/s390/multiarch/ifunc-resolve.h b/REORG.TODO/sysdeps/s390/multiarch/ifunc-resolve.h new file mode 100644 index 0000000000..adbd08a6ee --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/ifunc-resolve.h @@ -0,0 +1,90 @@ +/* IFUNC resolver function for CPU specific functions. + 32/64 bit S/390 version. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <unistd.h> +#include <dl-procinfo.h> + +#define S390_STFLE_BITS_Z10 34 /* General instructions extension */ +#define S390_STFLE_BITS_Z196 45 /* Distinct operands, pop ... */ + +#define S390_IS_Z196(STFLE_BITS) \ + ((STFLE_BITS & (1ULL << (63 - S390_STFLE_BITS_Z196))) != 0) + +#define S390_IS_Z10(STFLE_BITS) \ + ((STFLE_BITS & (1ULL << (63 - S390_STFLE_BITS_Z10))) != 0) + +#define S390_STORE_STFLE(STFLE_BITS) \ + /* We want just 1 double word to be returned. */ \ + register unsigned long reg0 __asm__("0") = 0; \ + \ + __asm__ __volatile__(".machine push" "\n\t" \ + ".machine \"z9-109\"" "\n\t" \ + ".machinemode \"zarch_nohighgprs\"\n\t" \ + "stfle %0" "\n\t" \ + ".machine pop" "\n" \ + : "=QS" (STFLE_BITS), "+d" (reg0) \ + : : "cc"); +#define s390_libc_ifunc_init() \ + unsigned long long stfle_bits = 0ULL; \ + if (__glibc_likely((dl_hwcap & HWCAP_S390_STFLE) \ + && (dl_hwcap & HWCAP_S390_ZARCH) \ + && (dl_hwcap & HWCAP_S390_HIGH_GPRS))) \ + { \ + S390_STORE_STFLE (stfle_bits); \ + } + +#define s390_libc_ifunc(TYPE_FUNC, RESOLVERFUNC, FUNC) \ + /* Make the declarations of the optimized functions hidden in order + to prevent GOT slots being generated for them. */ \ + extern __typeof (TYPE_FUNC) RESOLVERFUNC##_z196 attribute_hidden; \ + extern __typeof (TYPE_FUNC) RESOLVERFUNC##_z10 attribute_hidden; \ + extern __typeof (TYPE_FUNC) RESOLVERFUNC##_default attribute_hidden; \ + __ifunc (TYPE_FUNC, FUNC, \ + __glibc_likely (S390_IS_Z196 (stfle_bits)) \ + ? RESOLVERFUNC##_z196 \ + : __glibc_likely (S390_IS_Z10 (stfle_bits)) \ + ? RESOLVERFUNC##_z10 \ + : RESOLVERFUNC##_default, \ + unsigned long int dl_hwcap, s390_libc_ifunc_init); + +#define s390_vx_libc_ifunc(FUNC) \ + s390_vx_libc_ifunc2_redirected(FUNC, FUNC, FUNC) + +#define s390_vx_libc_ifunc_redirected(TYPE_FUNC, FUNC) \ + s390_vx_libc_ifunc2_redirected(TYPE_FUNC, FUNC, FUNC) + +#define s390_vx_libc_ifunc2(RESOLVERFUNC, FUNC) \ + s390_vx_libc_ifunc2_redirected(FUNC, RESOLVERFUNC, FUNC) + +#define s390_vx_libc_ifunc_init() +#define s390_vx_libc_ifunc2_redirected(TYPE_FUNC, RESOLVERFUNC, FUNC) \ + /* Make the declarations of the optimized functions hidden in order + to prevent GOT slots being generated for them. */ \ + extern __typeof (TYPE_FUNC) RESOLVERFUNC##_vx attribute_hidden; \ + extern __typeof (TYPE_FUNC) RESOLVERFUNC##_c attribute_hidden; \ + __ifunc (TYPE_FUNC, FUNC, \ + (dl_hwcap & HWCAP_S390_VX) \ + ? RESOLVERFUNC##_vx \ + : RESOLVERFUNC##_c, \ + unsigned long int dl_hwcap, s390_vx_libc_ifunc_init); + +#define s390_libc_ifunc_expr_init() +#define s390_libc_ifunc_expr(TYPE_FUNC, FUNC, EXPR) \ + __ifunc (TYPE_FUNC, FUNC, EXPR, unsigned long int hwcap, \ + s390_libc_ifunc_expr_init); diff --git a/REORG.TODO/sysdeps/s390/multiarch/memccpy-c.c b/REORG.TODO/sysdeps/s390/multiarch/memccpy-c.c new file mode 100644 index 0000000000..73069ae502 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/memccpy-c.c @@ -0,0 +1,25 @@ +/* Default memccpy implementation for S/390. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define MEMCCPY __memccpy_c + +# include <string.h> +extern __typeof (__memccpy) __memccpy_c; +# include <string/memccpy.c> +#endif diff --git a/REORG.TODO/sysdeps/s390/multiarch/memccpy-vx.S b/REORG.TODO/sysdeps/s390/multiarch/memccpy-vx.S new file mode 100644 index 0000000000..f60aecef77 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/memccpy-vx.S @@ -0,0 +1,156 @@ +/* Vector optimized 32/64 bit S/390 version of memccpy. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* void *memccpy (void * dest, const void *src, int c, size_t n) + Copies no more than n bytes from src to dest, + stopping when the character c is found + and returns pointer next to c in dest or null if c not found. + + Register usage: + -r0=tmp + -r1=tmp + -r2=dest + -r3=src + -r4=c + -r5=n + -r6=current_len + -v16=part of s + -v17=index of found c + -v18=c replicated + -v19=part #2 of s + -v31=save area for r6 +*/ +ENTRY(__memccpy_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + +# if !defined __s390x__ + llgfr %r5,%r5 +# endif /* !defined __s390x__ */ + + vlvgp %v31,%r6,%r7 /* Save registers. */ + clgije %r5,0,.Lnf_end /* If len == 0 then exit. */ + + vlbb %v16,0(%r3),6 /* Load s until next 4k-byte boundary. */ + lcbb %r0,0(%r3),6 /* Get bytes to 4k-byte boundary or 16. */ + llgfr %r0,%r0 /* Convert 32bit to 64bit. */ + + vlvgb %v18,%r4,0 /* Generate vector which elements are all c. + if c > 255, c will be truncated. */ + vrepb %v18,%v18,0 + lghi %r6,0 /* current_len = 0. */ + + clgrjle %r5,%r0,.Lremaining_v16 /* If maxlen <= loaded-bytes + -> Process remaining. */ + + vfeebs %v17,%v16,%v18 /* Find c. */ + vlgvb %r1,%v17,7 /* Load byte index of c. */ + clgrjl %r1,%r0,.Lfound_v16 /* Found c is within loaded bytes. */ + + /* Align s to 16 byte. */ + risbgn %r1,%r3,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r6,15 /* current_len = 15. */ + slr %r6,%r1 /* Compute highest index to 16byte boundary. */ + + vstl %v16,%r6,0(%r2) /* Store prcessed bytes */ + ahi %r6,1 + +.Lpreloop1: + /* Now we are 16byte aligned, so we can load + a full vreg without page fault. */ + vl %v16,0(%r6,%r3) /* Load s. */ + clgijl %r5,17,.Lremaining_v16 /* If n <= 16, + process remaining bytes. */ + lgr %r7,%r5 + slgfi %r7,16 /* border_len = n - 16. */ + j .Lloop1 + +.Lloop2: + vl %v16,16(%r6,%r3) + vst %v19,0(%r6,%r2) + aghi %r6,16 + +.Lloop1: + clgrjhe %r6,%r7,.Lremaining_v16 /* If current_len >= border + then process remaining bytes. */ + vfeebs %v17,%v16,%v18 /* Find c. */ + jl .Lfound_v16 /* Jump away if c was found. */ + vl %v19,16(%r6,%r3) /* Load next s part. */ + vst %v16,0(%r6,%r2) /* Store previous part without c. */ + aghi %r6,16 + + clgrjhe %r6,%r7,.Lremaining_v19 + vfeebs %v17,%v19,%v18 + jl .Lfound_v19 + vl %v16,16(%r6,%r3) + vst %v19,0(%r6,%r2) + aghi %r6,16 + + clgrjhe %r6,%r7,.Lremaining_v16 + vfeebs %v17,%v16,%v18 + jl .Lfound_v16 + vl %v19,16(%r6,%r3) + vst %v16,0(%r6,%r2) + aghi %r6,16 + + clgrjhe %r6,%r7,.Lremaining_v19 + vfeebs %v17,%v19,%v18 + jo .Lloop2 + +.Lfound_v19: + vlr %v16,%v19 +.Lfound_v16: + /* v16 contains c. Store remaining bytes to c. currlen hasn´t + reached border, thus checking for maxlen is not needed! */ + vlgvb %r1,%v17,7 /* Load byte index of c. */ + la %r2,0(%r6,%r2) /* vstl has no support for index-register. */ +.Lfound_v16_store: + vstl %v16,%r1,0(%r2) /* Copy bytes including c. */ + la %r2,1(%r1,%r2) /* Return pointer next to c in dest. */ + vlgvg %r6,%v31,0 + vlgvg %r7,%v31,1 + br %r14 + +.Lremaining_v19: + vlr %v16,%v19 +.Lremaining_v16: + /* v16 contains the remaining bytes [1...16]. + Check and store remaining bytes. */ + vfeebs %v17,%v16,%v18 + slgrk %r7,%r5,%r6 /* Remaining bytes = maxlen - current_len. */ + aghi %r7,-1 /* vstl needs highest index. */ + la %r2,0(%r6,%r2) /* vstl has no index register. */ + vlgvb %r1,%v17,7 /* Load index of c or 16 if not found. */ + /* c in remaining bytes? -> Jump away (c-index <= max-index) */ + clrjle %r1,%r7,.Lfound_v16_store + vstl %v16,%r7,0(%r2) /* Store remaining bytes. */ + +.Lnf_end: + vlgvg %r6,%v31,0 + vlgvg %r7,%v31,1 + lghi %r2,0 /* Return null. */ + br %r14 +END(__memccpy_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/memccpy.c b/REORG.TODO/sysdeps/s390/multiarch/memccpy.c new file mode 100644 index 0000000000..5adc1a8063 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/memccpy.c @@ -0,0 +1,28 @@ +/* Multiple versions of memccpy. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <string.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc (__memccpy) +weak_alias (__memccpy, memccpy) + +#else +# include <string/memccpy.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/memchr-vx.S b/REORG.TODO/sysdeps/s390/multiarch/memchr-vx.S new file mode 100644 index 0000000000..e9cb06f948 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/memchr-vx.S @@ -0,0 +1,159 @@ +/* Vector optimized 32/64 bit S/390 version of memchr. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* void *memchr (const void *s, int c, size_t n) + Scans memory for character c + and returns pointer to first c. + + Register usage: + -r0=tmp + -r1=tmp + -r2=s + -r3=c + -r4=n + -r5=current_len + -v16=part of s + -v17=index of found c + -v18=c replicated +*/ +ENTRY(__memchr_vx) + + .machine "z13" + .machinemode "zarch_nohighgprs" + +# if !defined __s390x__ + llgfr %r4,%r4 +# endif /* !defined __s390x__ */ + + clgije %r4,0,.Lnf_end /* If len == 0 then exit. */ + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r0,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + llgfr %r0,%r0 /* Convert 32bit to 64bit. */ + + vlvgb %v18,%r3,0 /* Generate vector which elements are all c. + if c > 255, c will be truncated. */ + vrepb %v18,%v18,0 + lghi %r5,16 /* current_len = 16. */ + + clgrjhe %r0,%r4,.Llastcmp /* If (bytes to boundary) >= n, + jump to lastcmp. */ + + vfeebs %v17,%v16,%v18 /* Find c. */ + vlgvb %r1,%v17,7 /* Load byte index of c. */ + clgrjl %r1,%r0,.Lfound2 /* Found c is within loaded bytes. */ + + /* Align s to 16 byte. */ + risbgn %r1,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + slr %r5,%r1 /* Compute bytes to 16bytes boundary. */ + + lgr %r0,%r5 /* If %r5 + 64 < n? -> loop64. */ + aghi %r0,64 + clgrjl %r0,%r4,.Lloop64 +.Llt64: + vl %v16,0(%r5,%r2) + aghi %r5,16 + clgrjhe %r5,%r4,.Llastcmp /* Do last compare if curr-len >= n. */ + vfeebs %v17,%v16,%v18 /* Find c. */ + jl .Lfound /* Jump away if c was found. */ + + vl %v16,0(%r5,%r2) + aghi %r5,16 + clgrjhe %r5,%r4,.Llastcmp + vfeebs %v17,%v16,%v18 + jl .Lfound + + vl %v16,0(%r5,%r2) + aghi %r5,16 + clgrjhe %r5,%r4,.Llastcmp + vfeebs %v17,%v16,%v18 + jl .Lfound + + vl %v16,0(%r5,%r2) + aghi %r5,16 + +.Llastcmp: + /* Use comparision result only if located within first n characters. + %r5: current_len; + %r4: n; + (current_len - n): [0...16[ + first ignored match index: vr-width - (current_len - n) ]0...16] + */ + vfeebs %v17,%v16,%v18 /* Find c. */ + slgrk %r4,%r5,%r4 /* %r5 = current_len - n. */ + lghi %r0,16 /* Register width = 16. */ + vlgvb %r1,%v17,7 /* Extract found index or 16 if all equal. */ + slr %r0,%r4 /* %r0 = first ignored match index. */ + clrjl %r1,%r0,.Lfound2 /* Go away if miscompare is below n bytes. */ + /* c not found within n-bytes. */ +.Lnf_end: + lghi %r2,0 /* Return null. */ + br %r14 + +.Lfound48: + aghi %r5,16 +.Lfound32: + aghi %r5,16 +.Lfound16: + aghi %r5,16 +.Lfound0: + aghi %r5,16 +.Lfound: + vlgvb %r1,%v17,7 /* Load byte index of c. */ +.Lfound2: + slgfi %r5,16 /* current_len -=16 */ + algr %r5,%r1 /* Zero byte index is added to current len. */ + la %r2,0(%r5,%r2) /* Return pointer to c. */ + br %r14 + + +.Lloop64: + vl %v16,0(%r5,%r2) + vfeebs %v17,%v16,%v18 /* Find c. */ + jl .Lfound0 /* Jump away if c was found. */ + vl %v16,16(%r5,%r2) + vfeebs %v17,%v16,%v18 + jl .Lfound16 + vl %v16,32(%r5,%r2) + vfeebs %v17,%v16,%v18 + jl .Lfound32 + vl %v16,48(%r5,%r2) + vfeebs %v17,%v16,%v18 + jl .Lfound48 + + aghi %r5,64 + lgr %r0,%r5 /* If %r5 + 64 < n? -> loop64. */ + aghi %r0,64 + clgrjl %r0,%r4,.Lloop64 + + j .Llt64 +END(__memchr_vx) + +# define memchr __memchr_c +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) strong_alias(__memchr_c, __GI_memchr) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ + +#include <memchr.S> diff --git a/REORG.TODO/sysdeps/s390/multiarch/memchr.c b/REORG.TODO/sysdeps/s390/multiarch/memchr.c new file mode 100644 index 0000000000..f0bf41f8e4 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/memchr.c @@ -0,0 +1,27 @@ +/* Multiple versions of memchr. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define memchr __redirect_memchr +# include <string.h> +# undef memchr +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc2_redirected (__redirect_memchr, __memchr, memchr) + +#endif diff --git a/REORG.TODO/sysdeps/s390/multiarch/mempcpy.c b/REORG.TODO/sysdeps/s390/multiarch/mempcpy.c new file mode 100644 index 0000000000..bf111a3aea --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/mempcpy.c @@ -0,0 +1,32 @@ +/* Multiple versions of mempcpy. + Copyright (C) 2016-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + + +#if defined SHARED && IS_IN (libc) +# define mempcpy __redirect_mempcpy +# define __mempcpy __redirect___mempcpy +/* Omit the mempcpy inline definitions because it would redefine mempcpy. */ +# define _HAVE_STRING_ARCH_mempcpy 1 +# include <string.h> +# undef mempcpy +# undef __mempcpy +# include <ifunc-resolve.h> + +s390_libc_ifunc (__redirect___mempcpy, ____mempcpy, __mempcpy) +weak_alias (__mempcpy, mempcpy); +#endif diff --git a/REORG.TODO/sysdeps/s390/multiarch/memrchr-c.c b/REORG.TODO/sysdeps/s390/multiarch/memrchr-c.c new file mode 100644 index 0000000000..7382b7ff05 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/memrchr-c.c @@ -0,0 +1,25 @@ +/* Default memrchr implementation for S/390. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define MEMRCHR __memrchr_c + +# include <string.h> +extern __typeof (__memrchr) __memrchr_c; +# include <string/memrchr.c> +#endif diff --git a/REORG.TODO/sysdeps/s390/multiarch/memrchr-vx.S b/REORG.TODO/sysdeps/s390/multiarch/memrchr-vx.S new file mode 100644 index 0000000000..c6b0d34692 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/memrchr-vx.S @@ -0,0 +1,160 @@ +/* Vector optimized 32/64 bit S/390 version of memrchr. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* void *memrchr (const void *s, int c, size_t n) + Scans memory for character c backwards + and returns pointer to first c. + + Register usage: + -r0=tmp + -r1=tmp + -r2=s + -r3=c + -r4=n + -r5=s in loop + + -v16=part of s + -v17=index of found c + -v18=c replicated + -v20=permute pattern +*/ +ENTRY(__memrchr_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + +# if !defined __s390x__ + llgfr %r4,%r4 +# endif /* !defined __s390x__ */ + clgije %r4,0,.Lnot_found + + vlvgb %v18,%r3,0 /* Generate vector which elements are all c. + If c > 255, c will be truncated. */ + vrepb %v18,%v18,0 + + llcr %r3,%r3 /* char c_char = (char) c. */ + + /* check byte n - 1. */ + llc %r0,-1(%r4,%r2) + slgfi %r4,1 + clrje %r0,%r3,.Lfound_end + jh .Lnot_found /* Return NULL if n is now 0. */ + + larl %r1,.Lpermute_mask /* Load permute mask. */ + vl %v20,0(%r1) + + /* check byte n - 2. */ + llc %r0,-1(%r4,%r2) + slgfi %r4,1 + clrje %r0,%r3,.Lfound_end + jh .Lnot_found /* Return NULL if n is now 0. */ + + clgijhe %r4,64,.Lloop64 /* If n >= 64 -> loop64. */ + +.Llt64: + /* Process n < 64 bytes. */ + clgijl %r4,16,.Llt16 /* Jump away if n < 16. */ + aghi %r4,-16 + vl %v16,0(%r4,%r2) + vfeebs %v17,%v16,%v18 + jno .Lfound0 + clgijl %r4,16,.Llt16 + aghi %r4,-16 + vl %v16,0(%r4,%r2) + vfeebs %v17,%v16,%v18 + jno .Lfound0 + clgijl %r4,16,.Llt16 + aghi %r4,-16 + vl %v16,0(%r4,%r2) + vfeebs %v17,%v16,%v18 + jno .Lfound0 +.Llt16: + clgfi %r4,0 /* if remaining bytes == 0, return NULL. */ + locghie %r2,0 + ber %r14 + + aghi %r4,-1 /* vll needs highest index. */ + vll %v16,%r4,0(%r2) /* Load remaining bytes. */ + + /* Right-shift of v16 to mask bytes after highest index. */ + lhi %r0,15 + slr %r0,%r4 /* Compute byte count for vector shift right. */ + sll %r0,3 /* Convert to bit count. */ + vlvgb %v17,%r0,7 + vsrlb %v16,%v16,%v17 /* Vector shift right by byte by number of bytes + specified in bits 1-4 of byte 7 in v17. */ + j .Lfound_permute + +.Lfound48: + aghi %r4,16 +.Lfound32: + aghi %r4,16 +.Lfound16: + aghi %r4,16 +.Lfound0: + la %r2,0(%r4,%r2) /* Set pointer to start of v16. */ + lghi %r4,15 /* Set highest index in v16 to last index. */ +.Lfound_permute: + /* Search for a c in v16 in reversed byte order. v16 contains %r4 + 1 + bytes. If v16 was not fully loaded, the bytes are already + right shifted, so that the bytes in v16 can simply be reversed. */ + vperm %v16,%v16,%v16,%v20 /* Permute v16 to reversed order. */ + vfeeb %v16,%v16,%v18 /* Find c in reversed v16. */ + vlgvb %r1,%v16,7 /* Index of c or 16 if not found. */ + + /* Return NULL if there is no c in loaded bytes. */ + clrjh %r1,%r4,.Lnot_found + + slgr %r4,%r1 +.Lfound_end: + la %r2,0(%r4,%r2) /* Return pointer to c. */ + br %r14 + +.Lnot_found: + lghi %r2,0 + br %r14 + +.Lpermute_mask: + .byte 0x0F,0x0E,0x0D,0x0C,0x0B,0x0A,0x09,0x08 + .byte 0x07,0x06,0x05,0x04,0x03,0x02,0x01,0x00 + +.Lloop64: + aghi %r4,-64 + vl %v16,48(%r4,%r2) /* Load 16bytes of memory area. */ + vfeebs %v17,%v16,%v18 /* Find c. */ + jno .Lfound48 /* Jump away if c was found. */ + vl %v16,32(%r4,%r2) + vfeebs %v17,%v16,%v18 + jno .Lfound32 + vl %v16,16(%r4,%r2) + vfeebs %v17,%v16,%v18 + jno .Lfound16 + vl %v16,0(%r4,%r2) + vfeebs %v17,%v16,%v18 + jno .Lfound0 + + clgijhe %r4,64,.Lloop64 /* If n >= 64 -> loop64. */ + j .Llt64 +END(__memrchr_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/memrchr.c b/REORG.TODO/sysdeps/s390/multiarch/memrchr.c new file mode 100644 index 0000000000..3a76c62376 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/memrchr.c @@ -0,0 +1,28 @@ +/* Multiple versions of memrchr. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <string.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc (__memrchr) +weak_alias (__memrchr, memrchr) + +#else +# include <string/memrchr.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/rawmemchr-c.c b/REORG.TODO/sysdeps/s390/multiarch/rawmemchr-c.c new file mode 100644 index 0000000000..12a64d338d --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/rawmemchr-c.c @@ -0,0 +1,34 @@ +/* Default rawmemchr implementation for S/390. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <string.h> + +# define RAWMEMCHR __rawmemchr_c +# undef weak_alias +# define weak_alias(a, b) +# ifdef SHARED +# undef libc_hidden_def +# define libc_hidden_def(name) \ + __hidden_ver1 (__rawmemchr_c, __GI___rawmemchr, __rawmemchr_c); +# endif /* SHARED */ + +extern __typeof (rawmemchr) __rawmemchr_c attribute_hidden; + +# include <string/rawmemchr.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/rawmemchr-vx.S b/REORG.TODO/sysdeps/s390/multiarch/rawmemchr-vx.S new file mode 100644 index 0000000000..e2ac9a88e1 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/rawmemchr-vx.S @@ -0,0 +1,92 @@ +/* Vector optimized 32/64 bit S/390 version of rawmemchr. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* void *rawmemchr (const void *s, int c) + Scans memory for character c + and returns pointer to first c. + + Register usage: + -r1=tmp + -r2=s + -r3=c + -r4=tmp + -r5=current_len + -v16=part of s + -v17=index of unequal + -v18=c replicated +*/ +ENTRY(__rawmemchr_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + + vlvgb %v18,%r3,0 /* Generate vector which elements are all c. + If c > 255, c will be truncated. */ + vrepb %v18,%v18,0 + + vfeeb %v17,%v16,%v18 /* Vector find element equal. */ + vlgvb %r5,%v17,7 /* Load byte index of character or zero. */ + clrjl %r5,%r1,.Lend_found /* If found c is in loaded bytes, end. */ + + /* Align s to 16 byte. */ + risbgn %r1,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,16 + slr %r5,%r1 /* Compute bytes to 16bytes boundary. */ + + /* Find c in a 16byte aligned loop. */ +.Lloop: + vl %v16,0(%r5,%r2) /* Load s. */ + vfeebs %v17,%v16,%v18 /* Vector find element equal. */ + jno .Lcharacter /* Jump away if element found. */ + vl %v16,16(%r5,%r2) + vfeebs %v17,%v16,%v18 + jno .Lcharacter16 + vl %v16,32(%r5,%r2) + vfeebs %v17,%v16,%v18 + jno .Lcharacter32 + vl %v16,48(%r5,%r2) + vfeebs %v17,%v16,%v18 + jno .Lcharacter48 + + aghi %r5,64 + j .Lloop /* No character found -> loop. */ + + /* Found character. */ +.Lcharacter48: + aghi %r5,16 +.Lcharacter32: + aghi %r5,16 +.Lcharacter16: + aghi %r5,16 +.Lcharacter: + vlgvb %r1,%v17,7 /* Load byte index of character. */ + algr %r5,%r1 +.Lend_found: + la %r2,0(%r5,%r2) /* Return pointer to character. */ + br %r14 +END(__rawmemchr_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/rawmemchr.c b/REORG.TODO/sysdeps/s390/multiarch/rawmemchr.c new file mode 100644 index 0000000000..74b0accb33 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/rawmemchr.c @@ -0,0 +1,31 @@ +/* Multiple versions of rawmemchr. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define __rawmemchr __redirect___rawmemchr +# include <string.h> +# undef __rawmemchr +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc2_redirected (__redirect___rawmemchr, __rawmemchr + , __rawmemchr) +weak_alias (__rawmemchr, rawmemchr) + +#else +# include <string/rawmemchr.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/stpcpy-c.c b/REORG.TODO/sysdeps/s390/multiarch/stpcpy-c.c new file mode 100644 index 0000000000..3818cfc24a --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/stpcpy-c.c @@ -0,0 +1,35 @@ +/* Default stpcpy implementation for S/390. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define STPCPY __stpcpy_c +# undef weak_alias +# define weak_alias(a, b) +# ifdef SHARED +# undef libc_hidden_def +# define libc_hidden_def(name) \ + __hidden_ver1 (__stpcpy_c, __GI___stpcpy, __stpcpy_c); +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + strong_alias (__stpcpy_c, __stpcpy_c_1); \ + __hidden_ver1 (__stpcpy_c_1, __GI_stpcpy, __stpcpy_c_1); +# endif /* SHARED */ + + +# include <string/stpcpy.c> +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/stpcpy-vx.S b/REORG.TODO/sysdeps/s390/multiarch/stpcpy-vx.S new file mode 100644 index 0000000000..a9e5be2e68 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/stpcpy-vx.S @@ -0,0 +1,104 @@ +/* Vector optimized 32/64 bit S/390 version of stpcpy. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* char * stpcpy (const char *dest, const char *src) + Copy string src to dest returning a pointer to its end. + + Register usage: + -r1=tmp + -r2=dest and return value + -r3=src + -r4=tmp + -r5=current_len + -v16=part of src + -v17=index of zero + -v18=part of src +*/ +ENTRY(__stpcpy_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + vlbb %v16,0(%r3),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r3),6 /* Get bytes to 4k-byte boundary or 16. */ + + vfenezb %v17,%v16,%v16 /* Find element not equal with zero search. */ + vlgvb %r5,%v17,7 /* Load zero index or 16 if not found. */ + clrjl %r5,%r1,.Lfound_align /* If found zero within loaded bytes, + copy bytes before and return. */ + + /* Align s to 16 byte. */ + risbgn %r4,%r3,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,15 /* current_len = 15. */ + slr %r5,%r4 /* Compute highest index to 16byte boundary. */ + + vstl %v16,%r5,0(%r2) /* Copy loaded characters - no zero. */ + ahi %r5,1 /* Start loop at next character. */ + + /* Find zero in 16byte aligned loop. */ +.Lloop: + vl %v16,0(%r5,%r3) /* Load s. */ + vfenezbs %v17,%v16,%v16 /* Find element not equal with zero search. */ + je .Lfound_v16_0 /* Jump away if zero was found. */ + vl %v18,16(%r5,%r3) /* Load next part of s. */ + vst %v16,0(%r5,%r2) /* Store previous part without zero to dst. */ + vfenezbs %v17,%v18,%v18 + je .Lfound_v18_16 + vl %v16,32(%r5,%r3) + vst %v18,16(%r5,%r2) + vfenezbs %v17,%v16,%v16 + je .Lfound_v16_32 + vl %v18,48(%r5,%r3) + vst %v16,32(%r5,%r2) + vfenezbs %v17,%v18,%v18 + je .Lfound_v18_48 + vst %v18,48(%r5,%r2) + + aghi %r5,64 + j .Lloop /* No zero found -> loop. */ + +.Lfound_v16_32: + aghi %r5,32 +.Lfound_v16_0: + la %r3,0(%r5,%r2) + vlgvb %r1,%v17,7 /* Load byte index of zero. */ + vstl %v16,%r1,0(%r3) /* Copy characters including zero. */ + la %r2,0(%r1,%r3) /* Return pointer to zero. */ + br %r14 + +.Lfound_v18_48: + aghi %r5,32 +.Lfound_v18_16: + la %r3,16(%r5,%r2) + vlgvb %r1,%v17,7 /* Load byte index of zero. */ + vstl %v18,%r1,0(%r3) /* Copy characters including zero. */ + la %r2,0(%r1,%r3) /* Return pointer to zero. */ + br %r14 + +.Lfound_align: + vstl %v16,%r5,0(%r2) /* Copy characters including zero. */ + la %r2,0(%r5,%r2) /* Return pointer to zero. */ + br %r14 +END(__stpcpy_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/stpcpy.c b/REORG.TODO/sysdeps/s390/multiarch/stpcpy.c new file mode 100644 index 0000000000..611cf0765e --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/stpcpy.c @@ -0,0 +1,35 @@ +/* Multiple versions of stpcpy. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define stpcpy __redirect_stpcpy +# define __stpcpy __redirect___stpcpy +/* Omit the stpcpy inline definitions because it would redefine stpcpy. */ +# define __NO_STRING_INLINES +# define NO_MEMPCPY_STPCPY_REDIRECT +# include <string.h> +# undef stpcpy +# undef __stpcpy +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc_redirected (__redirect___stpcpy, __stpcpy); +weak_alias (__stpcpy, stpcpy) + +#else +# include <string/stpcpy.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/stpncpy-c.c b/REORG.TODO/sysdeps/s390/multiarch/stpncpy-c.c new file mode 100644 index 0000000000..95efa38895 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/stpncpy-c.c @@ -0,0 +1,28 @@ +/* Default stpncpy implementation for S/390. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define STPNCPY __stpncpy_c +# ifdef SHARED +# undef libc_hidden_def +# define libc_hidden_def(name) \ + __hidden_ver1 (__stpncpy_c, __GI___stpncpy, __stpncpy_c); +# endif /* SHARED */ + +# include <string/stpncpy.c> +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/stpncpy-vx.S b/REORG.TODO/sysdeps/s390/multiarch/stpncpy-vx.S new file mode 100644 index 0000000000..869e8f7aef --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/stpncpy-vx.S @@ -0,0 +1,200 @@ +/* Vector optimized 32/64 bit S/390 version of stpncpy. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* char * stpncpy (char *dest, const char *src, size_t n) + Copies at most n characters of string src to dest + returning a pointer to its end or dest+n + if src is smaller than n. + + Register usage: + -%r0 = return value + -%r1 = zero byte index + -%r2 = curr dst pointer + -%r3 = curr src pointer + -%r4 = n + -%r5 = current_len + -%r6 = loaded bytes + -%r7 = border, tmp +*/ +ENTRY(__stpncpy_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + +# if !defined __s390x__ + llgfr %r4,%r4 +# endif /* !defined __s390x__ */ + + clgfi %r4,0 + ber %r14 /* Nothing to do, if n == 0. */ + + la %r0,0(%r4,%r2) /* Save destination pointer + n for return. */ + vlvgp %v31,%r6,%r7 /* Save registers. */ + + vlbb %v16,0(%r3),6 /* Load s until next 4k-byte boundary. */ + lcbb %r6,0(%r3),6 /* Get bytes to 4k-byte boundary or 16. */ + llgfr %r6,%r6 /* Convert 32bit to 64bit. */ + + lghi %r5,0 /* current_len = 0. */ + + clgrjle %r4,%r6,.Lremaining_v16 /* If n <= loaded-bytes + -> process remaining. */ + + /* n > loaded-byte-count */ + vfenezb %v17,%v16,%v16 /* Find element not equal with zero search. */ + vlgvb %r1,%v17,7 /* Load zero index or 16 if not found. */ + clrjl %r1,%r6,.Lfound_v16_store /* Found zero within loaded bytes, + copy and return. */ + + /* Align s to 16 byte. */ + risbgn %r7,%r3,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,15 /* current_len = 15. */ + slr %r5,%r7 /* Compute highest index to 16byte boundary. */ + + /* Zero not found and n > loaded-byte-count. */ + vstl %v16,%r5,0(%r2) /* Copy loaded characters - no zero. */ + ahi %r5,1 /* Start loop at next character. */ + + /* Now we are 16byte aligned, so we can load a full vreg + without page fault. */ + lgr %r1,%r5 /* If %r5 + 64 < maxlen? -> loop64. */ + aghi %r1,64 + clgrjl %r1,%r4,.Lloop64 + + vl %v16,0(%r5,%r3) /* Load s. */ + clgijl %r4,17,.Lremaining_v16 /* If n <= 16, process remaining + bytes. */ +.Llt64: + lgr %r7,%r4 + slgfi %r7,16 /* border_len = n - 16. */ + + clgrjhe %r5,%r7,.Lremaining_v16 /* If current_len >= border + then process remaining bytes. */ + vfenezbs %v17,%v16,%v16 /* Find element not equal with zero search. */ + je .Lfound_v16 /* Jump away if zero was found. */ + vl %v18,16(%r5,%r3) /* Load next part of s. */ + vst %v16,0(%r5,%r2) /* Save previous part without zero to dst. */ + aghi %r5,16 + + clgrjhe %r5,%r7,.Lremaining_v18 + vfenezbs %v17,%v18,%v18 + je .Lfound_v18 + vl %v16,16(%r5,%r3) + vst %v18,0(%r5,%r2) + aghi %r5,16 + + clgrjhe %r5,%r7,.Lremaining_v16 + vfenezbs %v17,%v16,%v16 + je .Lfound_v16 + vl %v18,16(%r5,%r3) + vst %v16,0(%r5,%r2) + aghi %r5,16 + +.Lremaining_v18: + vlr %v16,%v18 +.Lremaining_v16: + /* v16 contains the remaining bytes [1...16]. + Store remaining bytes and append string-termination. */ + vfenezb %v17,%v16,%v16 /* Find element not equal with zero search. */ + slgrk %r7,%r4,%r5 /* Remaining bytes = maxlen - current_len */ + aghi %r7,-1 /* vstl needs highest index. */ + la %r2,0(%r5,%r2) /* vstl has no index register. */ + vlgvb %r1,%v17,7 /* Load zero index or 16 if not found. */ + /* Zero in remaining bytes? -> jump away (zero-index <= max-index). */ + clrjle %r1,%r7,.Lfound_v16_store + vstl %v16,%r7,0(%r2) /* Store remaining bytes without null + termination! */ +.Lend: + /* Restore saved registers. */ + vlgvg %r6,%v31,0 + vlgvg %r7,%v31,1 + lgr %r2,%r0 /* Load saved dest-ptr. */ + br %r14 + +.Lfound_v16_32: + aghi %r5,32 + j .Lfound_v16 +.Lfound_v18_48: + aghi %r5,32 +.Lfound_v18_16: + aghi %r5,16 +.Lfound_v18: + vlr %v16,%v18 +.Lfound_v16: + /* v16 contains a zero. Store remaining bytes to zero. current_len + has not reached border, thus checking for n is not needed! */ + vlgvb %r1,%v17,7 /* Load byte index of zero. */ + la %r2,0(%r5,%r2) /* vstl has no support for index-register. */ +.Lfound_v16_store: + vstl %v16,%r1,0(%r2) /* Copy characters including zero. */ + /* Fill remaining bytes with zero - remaining count always > 0. */ + algr %r5,%r1 /* Remaining bytes (=%r4) = ... */ + slgr %r4,%r5 /* = maxlen - (currlen + zero_index + 1) */ + la %r2,0(%r1,%r2) /* Pointer to zero. start filling beyond. */ + lgr %r0,%r2 /* Save return-pointer to found zero. */ + clgije %r4,1,.Lend /* Skip zero-filling, if found zero is last + possible character. + (1 is substracted from r4 below!). */ + aghi %r4,-2 /* mvc with exrl needs count - 1. + (additional -1, see remaining bytes above) */ + srlg %r6,%r4,8 /* Split into 256 byte blocks. */ + ltgr %r6,%r6 + je .Lzero_lt256 +.Lzero_loop256: + mvc 1(256,%r2),0(%r2) /* Fill 256 zeros at once. */ + la %r2,256(%r2) + brctg %r6,.Lzero_loop256 /* Loop until all blocks are processed. */ +.Lzero_lt256: + exrl %r4,.Lmvc_lt256 + j .Lend +.Lmvc_lt256: + mvc 1(1,%r2),0(%r2) + +.Lloop64: + vl %v16,0(%r5,%r3) + vfenezbs %v17,%v16,%v16 /* Find element not equal with zero search. */ + je .Lfound_v16 /* Jump away if zero was found. */ + vl %v18,16(%r5,%r3) /* Load next part of s. */ + vst %v16,0(%r5,%r2) /* Save previous part without zero to dst. */ + vfenezbs %v17,%v18,%v18 + je .Lfound_v18_16 + vl %v16,32(%r5,%r3) + vst %v18,16(%r5,%r2) + vfenezbs %v17,%v16,%v16 + je .Lfound_v16_32 + vl %v18,48(%r5,%r3) + vst %v16,32(%r5,%r2) + vfenezbs %v17,%v18,%v18 + je .Lfound_v18_48 + vst %v18,48(%r5,%r2) + + aghi %r5,64 + lgr %r1,%r5 /* If %r5 + 64 < maxlen? -> loop64. */ + aghi %r1,64 + clgrjl %r1,%r4,.Lloop64 + + vl %v16,0(%r5,%r3) /* Load s. */ + j .Llt64 +END(__stpncpy_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/stpncpy.c b/REORG.TODO/sysdeps/s390/multiarch/stpncpy.c new file mode 100644 index 0000000000..74f5ddc25e --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/stpncpy.c @@ -0,0 +1,32 @@ +/* Multiple versions of stpncpy. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define stpncpy __redirect_stpncpy +# define __stpncpy __redirect___stpncpy +# include <string.h> +# undef stpncpy +# undef __stpncpy +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc_redirected (__redirect___stpncpy, __stpncpy) +weak_alias (__stpncpy, stpncpy) + +#else +# include <string/stpncpy.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/strcat-c.c b/REORG.TODO/sysdeps/s390/multiarch/strcat-c.c new file mode 100644 index 0000000000..0b88c1fc28 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/strcat-c.c @@ -0,0 +1,28 @@ +/* Default strcat implementation for S/390. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define STRCAT __strcat_c +# ifdef SHARED +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__strcat_c, __GI_strcat, __strcat_c); +# endif /* SHARED */ + +# include <string/strcat.c> +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/strcat-vx.S b/REORG.TODO/sysdeps/s390/multiarch/strcat-vx.S new file mode 100644 index 0000000000..65c19c21c8 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/strcat-vx.S @@ -0,0 +1,161 @@ +/* Vector optimized 32/64 bit S/390 version of strcat. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* char * strcat (const char *dest, const char *src) + Concatenate two strings. + + Register usage: + -r0=saved dest pointer for return + -r1=tmp + -r2=dest + -r3=src + -r4=tmp + -r5=current_len + -v16=part of src + -v17=index of zero + -v18=part of src +*/ +ENTRY(__strcat_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + lgr %r0,%r2 /* Save destination pointer for return. */ + + /* STRLEN + r1 = loaded bytes (tmp) + r4 = zero byte index (tmp) + r2 = dst + */ + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + + vfenezb %v16,%v16,%v16 /* Find element not equal with zero search. */ + vlgvb %r5,%v16,7 /* Load zero index or 16 if not found. */ + clrjl %r5,%r1,.Llen_end /* Found zero within loaded bytes, end. */ + + /* Align s to 16 byte. */ + risbgn %r1,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,16 /* current_len = 16. */ + slr %r5,%r1 /* Compute bytes to 16bytes boundary. */ + + /* Find zero in 16byte aligned loop. */ +.Llen_loop: + vl %v16,0(%r5,%r2) /* Load s. */ + vfenezbs %v16,%v16,%v16 /* Find element not equal with zero search. */ + je .Llen_found /* Jump away if zero was found. */ + vl %v16,16(%r5,%r2) + vfenezbs %v16,%v16,%v16 + je .Llen_found16 + vl %v16,32(%r5,%r2) + vfenezbs %v16,%v16,%v16 + je .Llen_found32 + vl %v16,48(%r5,%r2) + vfenezbs %v16,%v16,%v16 + je .Llen_found48 + + aghi %r5,64 + j .Llen_loop /* No zero -> loop. */ + +.Llen_found48: + aghi %r5,16 +.Llen_found32: + aghi %r5,16 +.Llen_found16: + aghi %r5,16 +.Llen_found: + vlgvb %r4,%v16,7 /* Load byte index of zero. */ + algr %r5,%r4 + +.Llen_end: + /* STRCPY + %r1 = loaded bytes (tmp) + %r4 = zero byte index (tmp) + %r3 = curr src pointer + %r2 = curr dst pointer + */ + la %r2,0(%r5,%r2) /* strcpy at end of dst-string. */ + + vlbb %v16,0(%r3),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r3),6 /* Get bytes to 4k-byte boundary or 16. */ + + vfenezb %v17,%v16,%v16 /* Find element not equal with zero search. */ + vlgvb %r5,%v17,7 /* Load zero index or 16 if not found. */ + clrjl %r5,%r1,.Lcpy_found_align /* If found zero within loaded bytes, + copy bytes before and return. */ + + /* Align s to 16 byte. */ + risbgn %r4,%r3,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,15 /* current_len = 15. */ + slr %r5,%r4 /* Compute highest index to 16byte boundary. */ + + vstl %v16,%r5,0(%r2) /* Copy loaded characters - no zero. */ + ahi %r5,1 /* Start loop at next character. */ + + /* Find zero in 16byte aligned loop. */ +.Lcpy_loop: + vl %v16,0(%r5,%r3) /* Load s. */ + vfenezbs %v17,%v16,%v16 /* Find element not equal with zero search. */ + je .Lcpy_found_v16_0 /* Jump away if zero was found. */ + vl %v18,16(%r5,%r3)/* Load next part of s. */ + vst %v16,0(%r5,%r2) /* Store previous part without zero to dst. */ + vfenezbs %v17,%v18,%v18 + je .Lcpy_found_v18_16 + vl %v16,32(%r5,%r3) + vst %v18,16(%r5,%r2) + vfenezbs %v17,%v16,%v16 + je .Lcpy_found_v16_32 + vl %v18,48(%r5,%r3) + vst %v16,32(%r5,%r2) + vfenezbs %v17,%v18,%v18 + je .Lcpy_found_v18_48 + vst %v18,48(%r5,%r2) + + aghi %r5,64 + j .Lcpy_loop /* No zero -> loop. */ + +.Lcpy_found_v16_32: + aghi %r5,32 +.Lcpy_found_v16_0: + la %r4,0(%r5,%r2) + vlgvb %r1,%v17,7 /* Load byte index of zero. */ + vstl %v16,%r1,0(%r4) /* Copy characters including zero. */ + lgr %r2,%r0 /* Load saved dest-ptr. */ + br %r14 + +.Lcpy_found_v18_48: + aghi %r5,32 +.Lcpy_found_v18_16: + la %r4,16(%r5,%r2) + vlgvb %r1,%v17,7 /* Load byte index of zero. */ + vstl %v18,%r1,0(%r4) /* Copy characters including zero. */ + lgr %r2,%r0 /* Load saved dest-ptr. */ + br %r14 + +.Lcpy_found_align: + vstl %v16,%r5,0(%r2) /* Copy characters including zero. */ + lgr %r2,%r0 /* Load saved dest-ptr. */ + br %r14 +END(__strcat_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/strcat.c b/REORG.TODO/sysdeps/s390/multiarch/strcat.c new file mode 100644 index 0000000000..d1c5d4c55e --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/strcat.c @@ -0,0 +1,29 @@ +/* Multiple versions of strcat. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define strcat __redirect_strcat +# include <string.h> +# undef strcat +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc2_redirected (__redirect_strcat, __strcat, strcat) + +#else +# include <string/strcat.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/strchr-c.c b/REORG.TODO/sysdeps/s390/multiarch/strchr-c.c new file mode 100644 index 0000000000..636d8dd016 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/strchr-c.c @@ -0,0 +1,29 @@ +/* Default strchr implementation for S/390. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define STRCHR __strchr_c +# undef weak_alias +# ifdef SHARED +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__strchr_c, __GI_strchr, __strchr_c); +# endif /* SHARED */ + +# include <string/strchr.c> +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/strchr-vx.S b/REORG.TODO/sysdeps/s390/multiarch/strchr-vx.S new file mode 100644 index 0000000000..bd2109bc05 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/strchr-vx.S @@ -0,0 +1,100 @@ +/* Vector optimized 32/64 bit S/390 version of strchr. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* char *strchr (const char *s, int c) + Locate character in string. + + Register usage: + -r1=tmp + -r2=s + -r3=c + -r4=tmp + -r5=current_len + -v16=part of s + -v17=index of unequal + -v18=replicated c +*/ +ENTRY(__strchr_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + + lghi %r5,0 /* current_len = 0. */ + + vlvgb %v18,%r3,0 /* Generate vector which elements are all c. + If c > 255, c will be truncated. */ + vrepb %v18,%v18,0 + + vfeezbs %v16,%v16,%v18 /* Find element equal with zero search. */ + vlgvb %r4,%v16,7 /* Load byte index of character or zero. */ + clrjl %r4,%r1,.Lfound /* Return if c/zero is in loaded bytes. */ + + /* Align s to 16 byte. */ + risbgn %r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,16 /* current_len = 16. */ + slr %r5,%r4 /* Compute bytes to 16bytes boundary. */ + + /* Find c/zero in 16 byte aligned loop */ +.Lloop: + vl %v16,0(%r5,%r2) /* Load s. */ + vfeezbs %v16,%v16,%v18 /* Find element equal with zero search. */ + jno .Lfound /* Found c/zero (cc=0|1|2). */ + vl %v16,16(%r5,%r2) + vfeezbs %v16,%v16,%v18 + jno .Lfound16 + vl %v16,32(%r5,%r2) + vfeezbs %v16,%v16,%v18 + jno .Lfound32 + vl %v16,48(%r5,%r2) + vfeezbs %v16,%v16,%v18 + jno .Lfound48 + + aghi %r5,64 + j .Lloop /* No character and no zero -> loop. */ + +.Lfound48: + la %r5,16(%r5) /* Use la since aghi would clobber cc. */ +.Lfound32: + la %r5,16(%r5) +.Lfound16: + la %r5,16(%r5) +.Lfound: + je .Lzero /* Found zero, but no c before that zero. */ + +.Lcharacter: + vlgvb %r4,%v16,7 /* Load byte index of character. */ + algr %r5,%r4 + la %r2,0(%r5,%r2) /* Return pointer to character. */ + br %r14 + +.Lzero: + llgcr %r3,%r3 /* char c_char = (char) c. */ + clije %r3,0,.Lcharacter /* Found zero and c is zero. */ + lghi %r2,0 /* Return null if character not found. */ + br %r14 +END(__strchr_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/strchr.c b/REORG.TODO/sysdeps/s390/multiarch/strchr.c new file mode 100644 index 0000000000..3df90aff14 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/strchr.c @@ -0,0 +1,32 @@ +/* Multiple versions of strchr. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define strchr __redirect_strchr +/* Omit the strchr inline definitions because it would redefine strchr. */ +# define __NO_STRING_INLINES +# include <string.h> +# undef strchr +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc2_redirected (__redirect_strchr, __strchr, strchr) +weak_alias (strchr, index) + +#else +# include <string/strchr.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/strchrnul-c.c b/REORG.TODO/sysdeps/s390/multiarch/strchrnul-c.c new file mode 100644 index 0000000000..cd43d3a450 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/strchrnul-c.c @@ -0,0 +1,26 @@ +/* Default strchrnul implementation for S/390. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define STRCHRNUL __strchrnul_c +# define __strchrnul STRCHRNUL +# undef weak_alias +# define weak_alias(name, alias) + +# include <string/strchrnul.c> +#endif diff --git a/REORG.TODO/sysdeps/s390/multiarch/strchrnul-vx.S b/REORG.TODO/sysdeps/s390/multiarch/strchrnul-vx.S new file mode 100644 index 0000000000..cfc8cbf6e9 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/strchrnul-vx.S @@ -0,0 +1,93 @@ +/* Vector optimized 32/64 bit S/390 version of strchrnul. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* char *strchrnul (const char *s, int c) + Returns pointer to first c or to \0 if c not found. + + Register usage: + -r1=tmp + -r2=s and return pointer + -r3=c + -r4=tmp + -r5=current_len + -v16=part of s + -v18=vector with c replicated in every byte +*/ +ENTRY(__strchrnul_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + + lghi %r5,0 /* current_len = 0. */ + + vlvgb %v18,%r3,0 /* Generate vector which elements are all c. + If c > 255, c will be truncated. */ + vrepb %v18,%v18,0 + + vfeezbs %v16,%v16,%v18 /* Find element equal with zero search. */ + vlgvb %r4,%v16,7 /* Load byte index of character or zero. */ + clrjl %r4,%r1,.Lfound /* Return if c/zero is in loaded bytes. */ + + /* Align s to 16 byte. */ + risbgn %r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,16 /* current_len = 16. */ + slr %r5,%r4 /* Compute bytes to 16bytes boundary. */ + + /* Find c/zero in 16byte aligned loop */ +.Lloop: + vl %v16,0(%r5,%r2) /* Load s */ + vfeezbs %v16,%v16,%v18 /* Find element equal with zero search. */ + jno .Lfound /* Found c/zero (cc=0|1|2). */ + vl %v16,16(%r5,%r2) + vfeezbs %v16,%v16,%v18 + jno .Lfound16 + vl %v16,32(%r5,%r2) + vfeezbs %v16,%v16,%v18 + jno .Lfound32 + vl %v16,48(%r5,%r2) + vfeezbs %v16,%v16,%v18 + jno .Lfound48 + + aghi %r5,64 + j .Lloop /* No character and no zero -> loop. */ + + /* Found character or zero */ +.Lfound48: + aghi %r5,16 +.Lfound32: + aghi %r5,16 +.Lfound16: + aghi %r5,16 +.Lfound: + vlgvb %r1,%v16,7 /* Load byte index of character. */ + algr %r5,%r1 + la %r2,0(%r5,%r2) /* Return pointer to character. */ + +.Lend: + br %r14 +END(__strchrnul_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/strchrnul.c b/REORG.TODO/sysdeps/s390/multiarch/strchrnul.c new file mode 100644 index 0000000000..9447523f25 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/strchrnul.c @@ -0,0 +1,28 @@ +/* Multiple versions of strchrnul. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <string.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc (__strchrnul) +weak_alias (__strchrnul, strchrnul) + +#else +# include <string/strchrnul.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/strcmp-vx.S b/REORG.TODO/sysdeps/s390/multiarch/strcmp-vx.S new file mode 100644 index 0000000000..640e4d6b4c --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/strcmp-vx.S @@ -0,0 +1,116 @@ +/* Vector optimized 32/64 bit S/390 version of strcmp. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* int strcmp (const char *s1, const char *s2) + Compare two strings + + Register usage: + -r1=loaded byte count s1 + -r2=s1 + -r3=s2 + -r4=loaded byte coutn s2, tmp + -r5=current_len + -v16=part of s1 + -v17=part of s2 + -v18=index of unequal +*/ +ENTRY(__strcmp_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + lghi %r5,0 /* current_len = 0. */ + +.Lloop: + vlbb %v16,0(%r5,%r2),6 /* Load s1 to block boundary. */ + vlbb %v17,0(%r5,%r3),6 /* Load s2 to block boundary. */ + lcbb %r1,0(%r5,%r2),6 /* Get loaded byte count of s1. */ + jo .Llt16_1 /* Jump away if vr is not fully loaded. */ + lcbb %r4,0(%r5,%r3),6 + jo .Llt16_2 /* Jump away if vr is not fully loaded. */ + /* Both vrs are fully loaded. */ + aghi %r5,16 + vfenezbs %v18,%v16,%v17 /* Compare not equal with zero search. */ + jno .Lfound + + vlbb %v16,0(%r5,%r2),6 + vlbb %v17,0(%r5,%r3),6 + lcbb %r1,0(%r5,%r2),6 + jo .Llt16_1 + lcbb %r4,0(%r5,%r3),6 + jo .Llt16_2 + aghi %r5,16 + vfenezbs %v18,%v16,%v17 + jno .Lfound + + vlbb %v16,0(%r5,%r2),6 + vlbb %v17,0(%r5,%r3),6 + lcbb %r1,0(%r5,%r2),6 + jo .Llt16_1 + lcbb %r4,0(%r5,%r3),6 + jo .Llt16_2 + aghi %r5,16 + vfenezbs %v18,%v16,%v17 + jno .Lfound + + vlbb %v16,0(%r5,%r2),6 + vlbb %v17,0(%r5,%r3),6 + lcbb %r1,0(%r5,%r2),6 + jo .Llt16_1 + lcbb %r4,0(%r5,%r3),6 + jo .Llt16_2 + aghi %r5,16 + vfenezbs %v18,%v16,%v17 + jno .Lfound + j .Lloop + +.Llt16_1: + lcbb %r4,0(%r5,%r3),6 /* Get loaded byte count of s2. */ +.Llt16_2: + clr %r1,%r4 + locrh %r1,%r4 /* Get minimum of bytes loaded in s1/2. */ + algfr %r5,%r1 /* Add smallest loaded bytes to current_len. */ + vfenezbs %v18,%v16,%v17 /* Compare not equal with zero search. */ + vlgvb %r4,%v18,7 /* Get not equal index or 16 if all equal. */ + clrjl %r4,%r1,.Lfound /* Jump away if miscompare is within loaded + bytes. */ + j .Lloop + +.Lfound: + je .Lend_equal + lghi %r2,1 + lghi %r1,-1 + locgrl %r2,%r1 + br %r14 +.Lend_equal: + lghi %r2,0 + br %r14 +END(__strcmp_vx) + +# define strcmp __strcmp_c +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) strong_alias(__strcmp_c, __GI_strcmp) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ + +#include <strcmp.S> diff --git a/REORG.TODO/sysdeps/s390/multiarch/strcmp.c b/REORG.TODO/sysdeps/s390/multiarch/strcmp.c new file mode 100644 index 0000000000..19e6d02597 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/strcmp.c @@ -0,0 +1,29 @@ +/* Multiple versions of strcmp. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define strcmp __redirect_strcmp +/* Omit the strcmp inline definitions because it would redefine strcmp. */ +# define __NO_STRING_INLINES +# include <string.h> +# include <ifunc-resolve.h> +# undef strcmp + +s390_vx_libc_ifunc2_redirected (__redirect_strcmp, __strcmp, strcmp) + +#endif diff --git a/REORG.TODO/sysdeps/s390/multiarch/strcpy-vx.S b/REORG.TODO/sysdeps/s390/multiarch/strcpy-vx.S new file mode 100644 index 0000000000..1ae9860a01 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/strcpy-vx.S @@ -0,0 +1,109 @@ +/* Vector optimized 32/64 bit S/390 version of strcpy. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* char * strcpy (const char *dest, const char *src) + Copy string src to dest. + + Register usage: + -r1=tmp + -r2=dest and return_value + -r3=src + -r4=tmp + -r5=current_len + -v16=part of src + -v17=index of zero + -v18=part of src +*/ +ENTRY(__strcpy_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + vlbb %v16,0(%r3),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r3),6 /* Get bytes to 4k-byte boundary or 16. */ + + vfenezb %v17,%v16,%v16 /* Find element not equal with zero search. */ + vlgvb %r5,%v17,7 /* Load zero index or 16 if not found. */ + clrjl %r5,%r1,.Lfound_align /* If found zero within loaded bytes, + copy bytes before and return. */ + + /* Align s to 16 byte. */ + risbgn %r4,%r3,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,15 /* current_len = 15. */ + slr %r5,%r4 /* Compute highest index to 16byte boundary. */ + + vstl %v16,%r5,0(%r2) /* Copy loaded characters - no zero. */ + ahi %r5,1 /* Start loop at next character. */ + + /* Find zero in 16byte aligned loop. */ +.Lloop: + vl %v16,0(%r5,%r3) /* Load s. */ + vfenezbs %v17,%v16,%v16 /* Find element not equal with zero search. */ + je .Lfound_v16_0 /* Jump away if zero was found. */ + vl %v18,16(%r5,%r3)/* Load next part of s. */ + vst %v16,0(%r5,%r2) /* Store previous part without zero to dst. */ + vfenezbs %v17,%v18,%v18 + je .Lfound_v18_16 + vl %v16,32(%r5,%r3) + vst %v18,16(%r5,%r2) + vfenezbs %v17,%v16,%v16 + je .Lfound_v16_32 + vl %v18,48(%r5,%r3) + vst %v16,32(%r5,%r2) + vfenezbs %v17,%v18,%v18 + je .Lfound_v18_48 + vst %v18,48(%r5,%r2) + + aghi %r5,64 + j .Lloop /* No zero found -> loop. */ + +.Lfound_v16_32: + aghi %r5,32 +.Lfound_v16_0: + la %r3,0(%r5,%r2) + vlgvb %r4,%v17,7 /* Load byte index of zero. */ + vstl %v16,%r4,0(%r3) /* Store characters including zero. */ + br %r14 + +.Lfound_v18_48: + aghi %r5,32 +.Lfound_v18_16: + la %r3,16(%r5,%r2) + vlgvb %r4,%v17,7 /* Load byte index of zero. */ + vstl %v18,%r4,0(%r3) /* Store characters including zero. */ + br %r14 + +.Lfound_align: + vstl %v16,%r5,0(%r2) /* Copy characters including zero. */ + br %r14 +END(__strcpy_vx) + +/* Use mvst-strcpy-implementation as default implementation. */ +# define strcpy __strcpy_c +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) strong_alias(__strcpy_c, __GI_strcpy) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ + +/* Include mvst-strcpy-implementation in s390-32/s390-64 subdirectory. */ +#include <strcpy.S> diff --git a/REORG.TODO/sysdeps/s390/multiarch/strcpy.c b/REORG.TODO/sysdeps/s390/multiarch/strcpy.c new file mode 100644 index 0000000000..5a5107690b --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/strcpy.c @@ -0,0 +1,27 @@ +/* Multiple versions of strcpy. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define strcpy __redirect_strcpy +# include <string.h> +# undef strcpy +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc2_redirected (__redirect_strcpy, __strcpy, strcpy) + +#endif diff --git a/REORG.TODO/sysdeps/s390/multiarch/strcspn-c.c b/REORG.TODO/sysdeps/s390/multiarch/strcspn-c.c new file mode 100644 index 0000000000..f67e30d031 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/strcspn-c.c @@ -0,0 +1,28 @@ +/* Default strcspn implementation for S/390. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define STRCSPN __strcspn_c +# ifdef SHARED +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__strcspn_c, __GI_strcspn, __strcspn_c); +# endif /* SHARED */ + +# include <string/strcspn.c> +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/strcspn-vx.S b/REORG.TODO/sysdeps/s390/multiarch/strcspn-vx.S new file mode 100644 index 0000000000..840d94ab10 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/strcspn-vx.S @@ -0,0 +1,281 @@ +/* Vector optimized 32/64 bit S/390 version of strcspn. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* size_t strcspn (const char *s, const char * reject) + The strcspn() function calculates the length of the initial segment + of s which consists entirely of characters not in reject. + + This method checks the length of reject string. If it fits entirely + in one vector register, a fast algorithm is used, which does not need + to check multiple parts of accept-string. Otherwise a slower full + check of accept-string is used. + + register overview: + r3: pointer to start of reject-string + r2: pointer to start of search-string + r0: loaded byte count of vlbb search-string + r4: found byte index + r1: current return len + v16: search-string + v17: reject-string + v18: temp-vreg + + ONLY FOR SLOW: + v19: first reject-string + v20: zero for preparing acc-vector + v21: global mask; 1 indicates a match between + search-string-vreg and any reject-character + v22: current mask; 1 indicates a match between + search-string-vreg and any reject-character in current acc-vreg + v24: one for result-checking of former string-part + v30, v31: for re-/storing registers r6, r8, r9 + r5: current len of reject-string + r6: zero-index in search-string or 16 if no zero + or min(zero-index, loaded byte count) + r8: >0, if former reject-string-part contains a zero, + otherwise =0; + r9: loaded byte count of vlbb reject-string +*/ +ENTRY(__strcspn_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + /* + Check if reject-string fits in one vreg: + ---------------------------------------- + */ + vlbb %v17,0(%r3),6 /* Load reject. */ + lghi %r1,0 /* Zero out current len. */ + lcbb %r0,0(%r3),6 + jo .Lcheck_onbb /* Special case if reject + lays on block-boundary. */ +.Lcheck_notonbb: + vistrbs %v17,%v17 /* Fill with zeros after first zero. */ + je .Lfast /* Zero found -> reject fits in one vreg. */ + j .Lslow /* No zero -> reject exceeds one vreg. */ + + +.Lcheck_onbb: + /* Reject lays on block-boundary. */ + vfenezb %v18,%v17,%v17 /* Search zero in loaded reject bytes. */ + vlgvb %r4,%v18,7 /* Get index of zero or 16 if not found. */ + clrjl %r4,%r0,.Lcheck_notonbb /* Zero index < loaded bytes count -> + Reject fits in one vreg; + Fill with zeros and proceed + with FAST. */ + vl %v17,0(%r3) /* Load reject, which exceeds loaded bytes. */ + j .Lcheck_notonbb /* Check if reject fits in one vreg. */ + + + /* + Search s for reject in one vreg + ------------------------------- + */ +.Lfast: + /* Complete reject-string in v17 and remaining bytes are zero. */ + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r0,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + + vfaezbs %v18,%v16,%v17,0 /* Find first element in v16 + unequal to any in v17 + or first zero element. */ + + vlgvb %r4,%v18,7 /* Load byte index of found element. */ + clrjl %r4,%r0,.Lfast_loop_found2 /* If found index is within loaded + bytes, return with found element + index (=equal count). */ + + /* Align s to 16 byte. */ + risbgn %r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r1,16 /* current_len = 16. */ + slr %r1,%r4 /* Compute bytes to 16bytes boundary. */ + + /* Process s in 16byte aligned loop. */ +.Lfast_loop: + vl %v16,0(%r1,%r2) /* Load search-string. */ + vfaezbs %v18,%v16,%v17,0 /* Find first element in v16 equal to any + in v17 or first zero element. */ + jno .Lfast_loop_found + + vl %v16,16(%r1,%r2) + vfaezbs %v18,%v16,%v17,0 + jno .Lfast_loop_found16 + + vl %v16,32(%r1,%r2) + vfaezbs %v18,%v16,%v17,0 + jno .Lfast_loop_found32 + + vl %v16,48(%r1,%r2) + vfaezbs %v18,%v16,%v17,0 + jno .Lfast_loop_found48 + + aghi %r1,64 + j .Lfast_loop /* Loop if no element was unequal to reject + and not zero. */ + + /* Found equal or zero element. */ +.Lfast_loop_found48: + aghi %r1,16 +.Lfast_loop_found32: + aghi %r1,16 +.Lfast_loop_found16: + aghi %r1,16 +.Lfast_loop_found: + vlgvb %r4,%v18,7 /* Load byte index of found element or zero. */ +.Lfast_loop_found2: + algrk %r2,%r1,%r4 /* Add found index to current len. */ + br %r14 + + + + /* + Search s for reject in multiple vregs + ------------------------------------- + */ +.Lslow: + /* Save registers. */ + vlvgg %v30,%r6,0 + vlvgp %v31,%r8,%r9 + + /* Reject in v17 without zero. */ + vlr %v19,%v17 /* Save first acc-part for a fast reload. */ + vzero %v20 /* Zero for preparing acc-vector. */ + vone %v24 /* One for checking result of former + string-part. */ + + /* Align s to 16 byte. */ + risbg %r4,%r2,60,128+63,0 /* Test if s is aligned and + %r4 = bits 60-63 'and' 15. */ + je .Lslow_loop_str /* If s is aligned, loop aligned. */ + lghi %r0,15 + slr %r0,%r4 /* Compute highest index to load (15-x). */ + vll %v16,%r0,0(%r2) /* Load up to 16 byte boundary (vll needs + highest index, remaining bytes are 0). */ + ahi %r0,1 /* Work with loaded byte count. */ + vzero %v21 /* Zero out global mask. */ + lghi %r5,0 /* Set current len of reject-string to zero. */ + vfenezb %v18,%v16,%v16 /* Find zero in current string-part. */ + lghi %r8,0 /* There is no zero in first reject-part. */ + vlgvb %r6,%v18,7 /* Load byte index of zero or 16 if no zero. */ + clije %r6,0,.Lslow_end /* If first element is zero -> return 0. */ + clr %r0,%r6 /* cc==1 if loaded byte count < zero-index. */ + locrl %r6,%r0 /* Load on cc==1; zero-index = lbc. */ + j .Lslow_loop_acc + + + /* Process s in 16byte aligned loop. */ +.Lslow_next_str: + /* Check results of former processed str-part. */ + vfeeb %v18,%v21,%v24 /* Find first equal match in global mask + (ones in element). */ + vlgvb %r4,%v18,7 /* Get index of first one (=equal) or 16. */ + /* Equal-index < min(zero-index, loaded byte count) + -> Return pointer to equal element. */ + clrjl %r4,%r6,.Lslow_index_found + /* Zero-index < loaded byte count + -> Former str-part was last str-part + -> Return null */ + clrjl %r6,%r0,.Lslow_end_not_found + + /* All elements are zero (=no match) -> Proceed with next str-part. */ + vlr %v17,%v19 /* Load first part of reject (no zero). */ + algfr %r1,%r0 /* Add loaded byte count to current len. */ + +.Lslow_loop_str: + vl %v16,0(%r1,%r2) /* Load search-string. */ + lghi %r0,16 /* Loaded byte count is 16. */ + vzero %v21 /* Zero out global mask. */ + lghi %r5,0 /* Set current len of reject to zero. */ + vfenezb %v18,%v16,%v16 /* Find zero in current string-part. */ + lghi %r8,0 /* There is no zero in first reject-part. */ + vlgvb %r6,%v18,7 /* Load byte index of zero or 16 if no zero. */ + clije %r6,0,.Lslow_end /* If first element is zero (end of string) + -> Return current length. */ + +.Lslow_loop_acc: + vfaeb %v22,%v16,%v17,4 /* Create matching-mask (1 in mask -> + Character matches any rejected character in + this reject-string-part) IN=0, RT=1. */ + vlgvb %r4,%v22,0 /* Get result of first element. */ + /* First element is equal to any rejected characters? + (all other parts of reject cannot lead to a match before this one) + -> Return current len, which is pointing to this element. */ + clijh %r4,0,.Lslow_end + vo %v21,%v21,%v22 /* Global-mask = global-|matching-mask. */ + /* Proceed with next acc until end of acc is reached. */ + + +.Lslow_next_acc: + clijh %r8,0,.Lslow_next_str /* There was a zero in last reject-part + -> Add found index to current len + and end. */ + vlbb %v17,16(%r5,%r3),6 /* Load next reject part. */ + aghi %r5,16 /* Increment current len of reject-string. */ + lcbb %r9,0(%r5,%r3),6 /* Get loaded byte count of reject-string. */ + jo .Lslow_next_acc_onbb /* Jump away if reject-string is + on block-boundary. */ +.Lslow_next_acc_notonbb: + vistrbs %v17,%v17 /* Fill with zeros after first zero. */ + jo .Lslow_loop_acc /* No zero found -> no preparation needed. */ + +.Lslow_next_acc_prepare_zero: + /* Zero in reject-part: fill zeros with first-reject-character. */ + vlgvb %r8,%v17,0 /* Load first element of reject-part. */ + clije %r8,0,.Lslow_next_str /* Process next str-part if first + character in this part of reject + is a zero. */ + /* r8>0 -> zero found in this acc-part. */ + vrepb %v18,%v17,0 /* Replicate first char accross all chars. */ + vceqb %v22,%v20,%v17 /* Create a mask (v22) of null chars + by comparing with 0 (v20). */ + vsel %v17,%v18,%v17,%v22 /* Replace null chars with first char. */ + j .Lslow_loop_acc /* Reject-string part is prepared. */ + +.Lslow_next_acc_onbb: + vfenezb %v18,%v17,%v17 /* Find zero in loaded bytes of reject part. */ + vlgvb %r8,%v18,7 /* Load byte index of zero. */ + clrjl %r8,%r9,.Lslow_next_acc_notonbb /* Found a zero in loaded bytes + -> Prepare vreg. */ + vl %v17,0(%r5,%r3) /* Load over boundary ... */ + lghi %r8,0 /* r8=0 -> no zero in this part of acc, + check for zero is in jump-target. */ + j .Lslow_next_acc_notonbb /* ... and search for zero in + fully loaded vreg again. */ + +.Lslow_end_not_found: + algfr %r1,%r6 /* Add zero-index to current len. */ + j .Lslow_end +.Lslow_index_found: + algfr %r1,%r4 /* Add found index of char to current len. */ +.Lslow_end: + lgr %r2,%r1 + /* Restore registers. */ + vlgvg %r6,%v30,0 + vlgvg %r8,%v31,0 + vlgvg %r9,%v31,1 + br %r14 +END(__strcspn_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/strcspn.c b/REORG.TODO/sysdeps/s390/multiarch/strcspn.c new file mode 100644 index 0000000000..414f313c75 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/strcspn.c @@ -0,0 +1,31 @@ +/* Multiple versions of strcspn. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define strcspn __redirect_strcspn +/* Omit the strcspn inline definitions because it would redefine strcspn. */ +# define __NO_STRING_INLINES +# include <string.h> +# undef strcspn +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc2_redirected (__redirect_strcspn, __strcspn, strcspn) + +#else +# include <string/strcspn.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/strlen-c.c b/REORG.TODO/sysdeps/s390/multiarch/strlen-c.c new file mode 100644 index 0000000000..b72366920c --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/strlen-c.c @@ -0,0 +1,28 @@ +/* Default strlen implementation for S/390. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define STRLEN __strlen_c +# ifdef SHARED +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__strlen_c, __GI_strlen, __strlen_c); +# endif /* SHARED */ + +# include <string/strlen.c> +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/strlen-vx.S b/REORG.TODO/sysdeps/s390/multiarch/strlen-vx.S new file mode 100644 index 0000000000..a779d6521a --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/strlen-vx.S @@ -0,0 +1,84 @@ +/* Vector optimized 32/64 bit S/390 version of strlen. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* size_t strlen (const char *s) + Returns length of string s. + + Register usage: + -r1=bytes to 4k-byte boundary + -r2=s + -r3=tmp + -r4=tmp + -r5=current_len and return_value + -v16=part of s +*/ +ENTRY(__strlen_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + + vfenezb %v16,%v16,%v16 /* Find element not equal with zero search. */ + vlgvb %r4,%v16,7 /* Load zero index or 16 if not found. */ + clr %r4,%r1 /* If found zero within loaded bytes? */ + locgrl %r2,%r4 /* Then copy return value. */ + blr %r14 /* And return. */ + + /* Align s to 16 byte. */ + risbgn %r3,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,16 /* current_len = 16. */ + slr %r5,%r3 /* Compute bytes to 16bytes boundary. */ + + /* Find zero in 16 byte aligned loop. */ +.Lloop: + vl %v16,0(%r5,%r2) /* Load s. */ + vfenezbs %v16,%v16,%v16 /* Find element not equal with zero search. */ + je .Lfound /* Jump away if zero was found. */ + vl %v16,16(%r5,%r2) + vfenezbs %v16,%v16,%v16 + je .Lfound16 + vl %v16,32(%r5,%r2) + vfenezbs %v16,%v16,%v16 + je .Lfound32 + vl %v16,48(%r5,%r2) + vfenezbs %v16,%v16,%v16 + je .Lfound48 + + aghi %r5,64 + j .Lloop /* No zero found -> loop. */ + +.Lfound48: + aghi %r5,16 +.Lfound32: + aghi %r5,16 +.Lfound16: + aghi %r5,16 +.Lfound: + vlgvb %r2,%v16,7 /* Load byte index of zero. */ + algr %r2,%r5 + br %r14 +END(__strlen_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/strlen.c b/REORG.TODO/sysdeps/s390/multiarch/strlen.c new file mode 100644 index 0000000000..615274e4b8 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/strlen.c @@ -0,0 +1,29 @@ +/* Multiple versions of strlen. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define strlen __redirect_strlen +# include <string.h> +# include <ifunc-resolve.h> +# undef strlen + +s390_vx_libc_ifunc2_redirected (__redirect_strlen, __strlen, strlen) + +#else +# include <string/strlen.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/strncat-c.c b/REORG.TODO/sysdeps/s390/multiarch/strncat-c.c new file mode 100644 index 0000000000..e72d778fb4 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/strncat-c.c @@ -0,0 +1,23 @@ +/* Default strncat implementation for S/390. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define STRNCAT __strncat_c + +# include <string/strncat.c> +#endif diff --git a/REORG.TODO/sysdeps/s390/multiarch/strncat-vx.S b/REORG.TODO/sysdeps/s390/multiarch/strncat-vx.S new file mode 100644 index 0000000000..c41509ceb5 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/strncat-vx.S @@ -0,0 +1,239 @@ +/* Vector optimized 32/64 bit S/390 version of strncat. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* char * strncat (const char *dest, const char *src, size_t n) + Concatenate two strings - at most n characters of src. + + Register usage: + -r0=saved dest pointer for return + -r1=tmp + -r2=dest + -r3=src + -r4=n + -r5=current_len + -r6=tmp + -r7=tmp + -v16=part of src + -v17=index of zero + -v18=part of src + -v31=register save area for r6, r7 +*/ +ENTRY(__strncat_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + +# if !defined __s390x__ + llgfr %r4,%r4 +# endif /* !defined __s390x__ */ + + clgfi %r4,0 + ber %r14 /* Nothing to do, if n == 0. */ + lgr %r0,%r2 /* Save destination pointer for return. */ + vlvgp %v31,%r6,%r7 /* Save registers. */ + + /* STRLEN + %r1 = loaded bytes (tmp) + %r6 = zero byte index (tmp) + %r2 = dst + */ + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + + vfenezb %v16,%v16,%v16 /* Find element not equal with zero search. */ + vlgvb %r5,%v16,7 /* Load zero index or 16 if not found. */ + clrjl %r5,%r1,.Llen_end /* Found zero within loaded bytes, end. */ + + /* Align s to 16 byte. */ + risbgn %r1,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,16 /* current_len = 16. */ + slr %r5,%r1 /* Compute bytes to 16bytes boundary. */ + + /* Find zero in 16byte aligned loop. */ +.Llen_loop: + vl %v16,0(%r5,%r2) /* Load s. */ + vfenezbs %v16,%v16,%v16 /* Find element not equal with zero search. */ + je .Llen_found /* Jump away if zero was found. */ + vl %v16,16(%r5,%r2) + vfenezbs %v16,%v16,%v16 + je .Llen_found16 + vl %v16,32(%r5,%r2) + vfenezbs %v16,%v16,%v16 + je .Llen_found32 + vl %v16,48(%r5,%r2) + vfenezbs %v16,%v16,%v16 + je .Llen_found48 + + aghi %r5,64 + j .Llen_loop /* No zero -> loop. */ + +.Llen_found48: + aghi %r5,16 +.Llen_found32: + aghi %r5,16 +.Llen_found16: + aghi %r5,16 +.Llen_found: + vlgvb %r1,%v16,7 /* Load byte index of zero. */ + algr %r5,%r1 + +.Llen_end: + /* STRCPY + %r1 = zero byte index (tmp) + %r6 = loaded bytes (tmp) + %r3 = curr src pointer + %r2 = curr dst pointer + %r7 = border, tmp + */ + la %r2,0(%r5,%r2) /* strcpy at end of dst-string. */ + + vlbb %v16,0(%r3),6 /* Load s until next 4k-byte boundary. */ + lcbb %r6,0(%r3),6 /* Get bytes to 4k-byte boundary or 16. */ + llgfr %r6,%r6 /* Convert 32bit to 64bit. */ + + lghi %r5,0 /* current_len = 0. */ + + clgrjle %r4,%r6,.Lcpy_remaining_v16 /* If n <= loaded-bytes + -> process remaining. */ + + /* n > loaded-byte-count. */ + vfenezb %v17,%v16,%v16 /* Find element not equal with zero search. */ + vlgvb %r1,%v17,7 /* Load zero index or 16 if not found. */ + clrjl %r1,%r6,.Lcpy_found_v16_store /* Found zero within loaded + bytes, copy and return. */ + + /* Align s to 16 byte. */ + risbgn %r7,%r3,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,15 /* current_len = 15. */ + slr %r5,%r7 /* Compute highest index to 16byte boundary. */ + + /* Zero not found and n > loaded-byte-count. */ + vstl %v16,%r5,0(%r2) /* Copy loaded characters - no zero. */ + ahi %r5,1 /* Start loop at next character. */ + + /* + Now we are 16byte aligned, so we can load a full vreg + without page fault. + */ + lgr %r1,%r5 /* If %r5 + 64 < maxlen? -> loop64. */ + aghi %r1,64 + clgrjl %r1,%r4,.Lcpy_loop64 + + vl %v16,0(%r5,%r3) /* Load s. */ + clgijl %r4,17,.Lcpy_remaining_v16 /* If n <=16, + process remaining bytes. */ +.Lcpy_lt64: + lgr %r7,%r4 + slgfi %r7,16 /* border_len = n - 16. */ + + /* If current_len >= border then process remaining bytes. */ + clgrjhe %r5,%r7,.Lcpy_remaining_v16 + vfenezbs %v17,%v16,%v16 /* Find element not equal with zero search. */ + je .Lcpy_found_v16 /* Jump away if zero was found. */ + vl %v18,16(%r5,%r3) /* Load next part of s. */ + vst %v16,0(%r5,%r2) /* Store previous part without zero to dst. */ + aghi %r5,16 + + clgrjhe %r5,%r7,.Lcpy_remaining_v18 + vfenezbs %v17,%v18,%v18 + je .Lcpy_found_v18 + vl %v16,16(%r5,%r3) + vst %v18,0(%r5,%r2) + aghi %r5,16 + + clgrjhe %r5,%r7,.Lcpy_remaining_v16 + vfenezbs %v17,%v16,%v16 + je .Lcpy_found_v16 + vl %v18,16(%r5,%r3) + vst %v16,0(%r5,%r2) + aghi %r5,16 + +.Lcpy_remaining_v18: + vlr %v16,%v18 +.Lcpy_remaining_v16: + /* v16 contains the remaining bytes [1...16]. + Store remaining bytes and append string-termination. */ + vfenezb %v17,%v16,%v16 /* Find element not equal with zero search. */ + slgrk %r7,%r4,%r5 /* Remaining bytes = maxlen - current_len. */ + aghi %r7,-1 /* vstl needs highest index. */ + vlgvb %r1,%v17,7 /* Load zero index or 16 if not found. */ + la %r2,0(%r5,%r2) /* vstl has no index register. */ + /* Zero-index within remaining-bytes, store up to zero and end. */ + clgrjle %r1,%r7,.Lcpy_found_v16_store + vstl %v16,%r7,0(%r2) /* Store remaining bytes. */ + lghi %r1,0 + stc %r1,1(%r7,%r2) /* Store string-null-termination beyond n. */ +.Lcpy_end: + /* Restore saved registers. */ + vlgvg %r6,%v31,0 + vlgvg %r7,%v31,1 + lgr %r2,%r0 /* Load saved dest-ptr. */ + br %r14 + +.Lcpy_found_v16_32: + aghi %r5,32 + j .Lcpy_found_v16 +.Lcpy_found_v18_48: + aghi %r5,32 +.Lcpy_found_v18_16: + aghi %r5,16 +.Lcpy_found_v18: + vlr %v16,%v18 +.Lcpy_found_v16: + /* v16 contains a zero. Store remaining bytes to zero. current_len + has not reached border, thus checking for n is not needed! */ + vlgvb %r1,%v17,7 /* Load byte index of zero. */ + la %r2,0(%r5,%r2) +.Lcpy_found_v16_store: + vstl %v16,%r1,0(%r2) /* Copy characters including zero. */ + j .Lcpy_end + + /* Find zero in 16byte aligned loop. */ +.Lcpy_loop64: + vl %v16,0(%r5,%r3) /* Load s. */ + vfenezbs %v17,%v16,%v16 /* Find element not equal with zero search. */ + je .Lcpy_found_v16 /* Jump away if zero was found. */ + vl %v18,16(%r5,%r3) /* Load next part of s. */ + vst %v16,0(%r5,%r2) /* Store previous part without zero to dst. */ + vfenezbs %v17,%v18,%v18 + je .Lcpy_found_v18_16 + vl %v16,32(%r5,%r3) + vst %v18,16(%r5,%r2) + vfenezbs %v17,%v16,%v16 + je .Lcpy_found_v16_32 + vl %v18,48(%r5,%r3) + vst %v16,32(%r5,%r2) + vfenezbs %v17,%v18,%v18 + je .Lcpy_found_v18_48 + vst %v18,48(%r5,%r2) + + aghi %r5,64 + lgr %r1,%r5 /* If %r5 + 64 < maxlen? -> loop64. */ + aghi %r1,64 + clgrjl %r1,%r4,.Lcpy_loop64 + + vl %v16,0(%r5,%r3) /* Load s. */ + j .Lcpy_lt64 +END(__strncat_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/strncat.c b/REORG.TODO/sysdeps/s390/multiarch/strncat.c new file mode 100644 index 0000000000..d931ca6c85 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/strncat.c @@ -0,0 +1,27 @@ +/* Multiple versions of strncat. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <string.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc2 (__strncat, strncat) + +#else +# include <string/strncat.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/strncmp-c.c b/REORG.TODO/sysdeps/s390/multiarch/strncmp-c.c new file mode 100644 index 0000000000..fddd5ed97e --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/strncmp-c.c @@ -0,0 +1,28 @@ +/* Default strncmp implementation for S/390. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define STRNCMP __strncmp_c +# ifdef SHARED +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__strncmp_c, __GI_strncmp, __strncmp_c); +# endif /* SHARED */ + +# include <string/strncmp.c> +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/strncmp-vx.S b/REORG.TODO/sysdeps/s390/multiarch/strncmp-vx.S new file mode 100644 index 0000000000..ee35d2debf --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/strncmp-vx.S @@ -0,0 +1,137 @@ +/* Vector optimized 32/64 bit S/390 version of strncmp. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* int strncmp (const char *s1, const char *s2, size_t n) + Compare at most n characters of two strings. + + Register usage: + -r0=tmp + -r1=tmp + -r2=s1 + -r3=s2 + -r4=n + -r5=current_len + -v16=part of s1 + -v17=part of s2 + -v18=index of unequal +*/ +ENTRY(__strncmp_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + +# if !defined __s390x__ + llgfr %r4,%r4 +# endif /* !defined __s390x__ */ + + clgije %r4,0,.Lend_equal /* Nothing to do if n == 0, */ + lghi %r5,0 /* current_len = 0. */ + +.Lloop: + vlbb %v16,0(%r5,%r2),6 /* Load s1 to block boundary. */ + vlbb %v17,0(%r5,%r3),6 /* Load s2 to block boundary. */ + lcbb %r0,0(%r5,%r2),6 /* Get loaded byte count of s1. */ + jo .Llt16_1 /* Jump away if vr is not fully loaded. */ + lcbb %r1,0(%r5,%r3),6 /* Get loaded byte count of s2. */ + jo .Llt16_2 /* Jump away if vr is not fully loaded. */ + aghi %r5,16 /* Both vrs are fully loaded. */ + clgrjhe %r5,%r4,.Llastcmp /* If current_len >= n ->last compare. */ + vfenezbs %v18,%v16,%v17 /* Compare not equal with zero search. */ + jno .Lfound + + vlbb %v16,0(%r5,%r2),6 + vlbb %v17,0(%r5,%r3),6 + lcbb %r0,0(%r5,%r2),6 + jo .Llt16_1 + lcbb %r1,0(%r5,%r3),6 + jo .Llt16_2 + aghi %r5,16 + clgrjhe %r5,%r4,.Llastcmp + vfenezbs %v18,%v16,%v17 + jno .Lfound + + vlbb %v16,0(%r5,%r2),6 + vlbb %v17,0(%r5,%r3),6 + lcbb %r0,0(%r5,%r2),6 + jo .Llt16_1 + lcbb %r1,0(%r5,%r3),6 + jo .Llt16_2 + aghi %r5,16 + clgrjhe %r5,%r4,.Llastcmp + vfenezbs %v18,%v16,%v17 + jno .Lfound + + vlbb %v16,0(%r5,%r2),6 + vlbb %v17,0(%r5,%r3),6 + lcbb %r0,0(%r5,%r2),6 + jo .Llt16_1 + lcbb %r1,0(%r5,%r3),6 + jo .Llt16_2 + aghi %r5,16 + clgrjhe %r5,%r4,.Llastcmp + vfenezbs %v18,%v16,%v17 + jno .Lfound + j .Lloop + +.Llt16_1: + lcbb %r1,0(%r5,%r3),6 /* Get loaded byte count ofs2. */ +.Llt16_2: + clr %r0,%r1 /* Compare logical. */ + locrh %r0,%r1 /* Compute minimum of bytes loaded. */ + algfr %r5,%r0 /* Add smallest loaded bytes to current_len. */ + clgrj %r5,%r4,10,.Llastcmp /* If current_len >= n ->last compare. */ + vfenezbs %v18,%v16,%v17 /* Compare not equal with zero search. */ + vlgvb %r1,%v18,7 /* Get not equal index or 16 if all equal. */ + clrjl %r1,%r0,.Lfound /* Jump away if miscompare is within + loaded bytes (index < loaded-bytes) */ + j .Lloop + +.Llastcmp: + /* Use comparision result only if located within first n characters. + %r0: loaded byte count in vreg; + %r5: current_len; + %r4: n; + (current_len - n): [0...16[ + First ignored match index: loaded bytes - (current_len-n): ]0...16] + */ + slgr %r5,%r4 /* %r5 = current_len - n. */ + slr %r0,%r5 /* %r0 = first ignored match index. */ + vfenezbs %v18,%v16,%v17 /* Compare not equal with zero search. */ + vlgvb %r1,%v18,7 /* Get not equal index or 16 if all equal. */ + clrjl %r1,%r0,.Lfound /* Jump away if miscompare is within + loaded bytes and below n bytes. */ + j .Lend_equal /* Miscompare after n-bytes -> end equal. */ + +.Lfound: + /* Difference or end of string. */ + je .Lend_equal + lghi %r2,1 + lghi %r1,-1 + locgrl %r2,%r1 + br %r14 +.Lend_equal: + lghi %r2,0 + br %r14 +END(__strncmp_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/strncmp.c b/REORG.TODO/sysdeps/s390/multiarch/strncmp.c new file mode 100644 index 0000000000..c97753cc70 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/strncmp.c @@ -0,0 +1,31 @@ +/* Multiple versions of strncmp. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define strncmp __redirect_strncmp +/* Omit the strncmp inline definitions because it would redefine strncmp. */ +# define __NO_STRING_INLINES +# include <string.h> +# undef strncmp +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc2_redirected (__redirect_strncmp, __strncmp, strncmp) + +#else +# include <string/strncmp.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/strncpy-vx.S b/REORG.TODO/sysdeps/s390/multiarch/strncpy-vx.S new file mode 100644 index 0000000000..4f58977b9b --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/strncpy-vx.S @@ -0,0 +1,207 @@ +/* Vector optimized 32/64 bit S/390 version of strncpy. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* char * strncpy (const char *dest, const char *src, size_t n) + Copy at most n characters of string src to dest. + + Register usage: + -r0=dest pointer for return + -r1=tmp, zero byte index + -r2=dest + -r3=src + -r4=n + -r5=current_len + -r6=tmp, loaded bytes + -r7=tmp, border + -v16=part of src + -v17=index of zero + -v18=part of src + -v31=register save area for r6, r7 +*/ +ENTRY(__strncpy_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + +# if !defined __s390x__ + llgfr %r4,%r4 +# endif /* !defined __s390x__ */ + + clgfi %r4,0 + ber %r14 /* Nothing to do, if n == 0. */ + lgr %r0,%r2 /* Save destination pointer for return. */ + vlvgp %v31,%r6,%r7 /* Save registers. */ + + vlbb %v16,0(%r3),6 /* Load s until next 4k-byte boundary. */ + lcbb %r6,0(%r3),6 /* Get bytes to 4k-byte boundary or 16. */ + llgfr %r6,%r6 /* Convert 32bit to 64bit. */ + + lghi %r5,0 /* current_len = 0. */ + + clgrjle %r4,%r6,.Lremaining_v16 /* If n <= loaded-bytes + -> process remaining. */ + + /* n > loaded-byte-count. */ + vfenezb %v17,%v16,%v16 /* Find element not equal with zero search. */ + vlgvb %r1,%v17,7 /* Load zero index or 16 if not found. */ + clrjl %r1,%r6,.Lfound_v16_store /* Found zero within loaded bytes, + copy and return. */ + + /* Align s to 16 byte. */ + risbgn %r7,%r3,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,15 /* current_len = 15. */ + slr %r5,%r7 /* Compute highest index to 16byte boundary. */ + + /* Zero not found and n > loaded-byte-count. */ + vstl %v16,%r5,0(%r2) /* Copy loaded characters - no zero. */ + ahi %r5,1 /* Start loop at next character. */ + + /* Now we are 16byte aligned, so we can load + a full vreg without page fault. */ + lgr %r1,%r5 /* If %r5 + 64 < maxlen? -> loop64. */ + aghi %r1,64 + clgrjl %r1,%r4,.Lloop64 + + vl %v16,0(%r5,%r3) /* Load s. */ + clgijl %r4,17,.Lremaining_v16 /* If n <= 16, process remaining + bytes. */ +.Llt64: + lgr %r7,%r4 + slgfi %r7,16 /* border_len = n - 16. */ + + clgrjhe %r5,%r7,.Lremaining_v16 /* If current_len >= border + then process remaining bytes. */ + vfenezbs %v17,%v16,%v16 /* Find element not equal with zero search. */ + je .Lfound_v16 /* Jump away if zero was found. */ + vl %v18,16(%r5,%r3) /* Load next part of s. */ + vst %v16,0(%r5,%r2) /* Store previous part without zero to dst. */ + aghi %r5,16 + + clgrjhe %r5,%r7,.Lremaining_v18 + vfenezbs %v17,%v18,%v18 + je .Lfound_v18 + vl %v16,16(%r5,%r3) + vst %v18,0(%r5,%r2) + aghi %r5,16 + + clgrjhe %r5,%r7,.Lremaining_v16 + vfenezbs %v17,%v16,%v16 + je .Lfound_v16 + vl %v18,16(%r5,%r3) + vst %v16,0(%r5,%r2) + aghi %r5,16 + +.Lremaining_v18: + vlr %v16,%v18 +.Lremaining_v16: + /* v16 contains the remaining bytes [1...16]. + Store remaining bytes and append string-termination. */ + vfenezb %v17,%v16,%v16 /* Find element not equal with zero search. */ + slgrk %r7,%r4,%r5 /* Remaining bytes = maxlen - current_len. */ + aghi %r7,-1 /* vstl needs highest index. */ + la %r2,0(%r5,%r2) /* vstl has no index register. */ + vlgvb %r1,%v17,7 /* Load zero index or 16 if not found. */ + /* Zero in remaining bytes? -> jump away (zero-index < max-index) + Do not jump away if zero-index == max-index, + but simply copy zero with vstl below. */ + clrjl %r1,%r7,.Lfound_v16_store + vstl %v16,%r7,0(%r2) /* Store remaining bytes without null + termination!. */ +.Lend: + /* Restore saved registers. */ + vlgvg %r6,%v31,0 + vlgvg %r7,%v31,1 + lgr %r2,%r0 /* Load saved dest-ptr. */ + br %r14 + + +.Lfound_v16_32: + aghi %r5,32 + j .Lfound_v16 +.Lfound_v18_48: + aghi %r5,32 +.Lfound_v18_16: + aghi %r5,16 +.Lfound_v18: + vlr %v16,%v18 +.Lfound_v16: + /* v16 contains a zero. Store remaining bytes to zero. current_len + has not reached border, thus checking for n is not needed! */ + vlgvb %r1,%v17,7 /* Load byte index of zero. */ + la %r2,0(%r5,%r2) /* vstl has no support for index-register. */ +.Lfound_v16_store: + vstl %v16,%r1,0(%r2) /* Copy characters including zero. */ + /* Fill remaining bytes with zero - remaining count always > 0. */ + algr %r5,%r1 /* Remaining bytes (=%r4) = ... */ + slgr %r4,%r5 /* = n - (current_len + zero_index + 1). */ + la %r2,0(%r1,%r2) /* Pointer to zero. start filling beyond. */ + aghi %r4,-2 /* mvc with exrl needs count - 1. + (additional -1, see remaining bytes above) */ + srlg %r6,%r4,8 /* Split into 256 byte blocks. */ + ltgr %r6,%r6 + je .Lzero_lt256 +.Lzero_loop256: + mvc 1(256,%r2),0(%r2) /* Fill 256 zeros at once. */ + la %r2,256(%r2) + brctg %r6,.Lzero_loop256 /* Loop until all blocks are processed. */ +.Lzero_lt256: + exrl %r4,.Lmvc_lt256 + j .Lend +.Lmvc_lt256: + mvc 1(1,%r2),0(%r2) + +.Lloop64: + vl %v16,0(%r5,%r3) /* Load s. */ + vfenezbs %v17,%v16,%v16 /* Find element not equal with zero search. */ + je .Lfound_v16 /* Jump away if zero was found. */ + vl %v18,16(%r5,%r3) /* Load next part of s. */ + vst %v16,0(%r5,%r2) /* Store previous part without zero to dst. */ + vfenezbs %v17,%v18,%v18 + je .Lfound_v18_16 + vl %v16,32(%r5,%r3) + vst %v18,16(%r5,%r2) + vfenezbs %v17,%v16,%v16 + je .Lfound_v16_32 + vl %v18,48(%r5,%r3) + vst %v16,32(%r5,%r2) + vfenezbs %v17,%v18,%v18 + je .Lfound_v18_48 + vst %v18,48(%r5,%r2) + + aghi %r5,64 + lgr %r1,%r5 /* If %r5 + 64 < maxlen? -> loop64. */ + aghi %r1,64 + clgrjl %r1,%r4,.Lloop64 + + vl %v16,0(%r5,%r3) /* Load s. */ + j .Llt64 +END(__strncpy_vx) + +# define strncpy __strncpy_c +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) strong_alias(__strncpy_c, __GI_strncpy) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ + +/* Include strncpy-implementation in s390-32/s390-64 subdirectory. */ +#include <strncpy.S> diff --git a/REORG.TODO/sysdeps/s390/multiarch/strncpy.c b/REORG.TODO/sysdeps/s390/multiarch/strncpy.c new file mode 100644 index 0000000000..d44b459093 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/strncpy.c @@ -0,0 +1,29 @@ +/* Multiple versions of strncpy. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define strncpy __redirect_strncpy +/* Omit the strncpy inline definitions because it would redefine strncpy. */ +# define __NO_STRING_INLINES +# include <string.h> +# undef strncpy +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc2_redirected (__redirect_strncpy, __strncpy, strncpy); + +#endif diff --git a/REORG.TODO/sysdeps/s390/multiarch/strnlen-c.c b/REORG.TODO/sysdeps/s390/multiarch/strnlen-c.c new file mode 100644 index 0000000000..46e2cac342 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/strnlen-c.c @@ -0,0 +1,30 @@ +/* Default strnlen implementation for S/390. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define STRNLEN __strnlen_c +# ifdef SHARED +# undef libc_hidden_def +# define libc_hidden_def(name) \ + __hidden_ver1 (__strnlen_c, __GI_strnlen, __strnlen_c); \ + strong_alias (__strnlen_c, __strnlen_c_1); \ + __hidden_ver1 (__strnlen_c_1, __GI___strnlen, __strnlen_c_1); +# endif /* SHARED */ + +# include <string/strnlen.c> +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/strnlen-vx.S b/REORG.TODO/sysdeps/s390/multiarch/strnlen-vx.S new file mode 100644 index 0000000000..da83a0d4f0 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/strnlen-vx.S @@ -0,0 +1,134 @@ +/* Vector optimized 32/64 bit S/390 version of strnlen. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* size_t strnlen (const char *s, size_t maxlen) + Returns the number of characters in s or at most maxlen. + + Register usage: + -r1=tmp + -r2=address of string + -r3=maxlen (number of characters to be read) + -r4=tmp + -r5=current_len and return_value + -v16=part of s +*/ +ENTRY(__strnlen_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + +# if !defined __s390x__ + llgfr %r3,%r3 +# endif /* !defined __s390x__ */ + + clgfi %r3,0 /* if maxlen == 0, return 0. */ + locgre %r2,%r3 + ber %r14 + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + llgfr %r1,%r1 /* Convert 32bit to 64bit. */ + + vfenezb %v16,%v16,%v16 /* Find element not equal with zero search. */ + clgr %r1,%r3 + locgrh %r1,%r3 /* loaded_byte_count + = min (loaded_byte_count, maxlen) */ + + vlgvb %r5,%v16,7 /* Load zero index or 16 if not found. */ + clr %r5,%r1 /* If found zero within loaded bytes? */ + locgrl %r2,%r5 /* Then copy return value. */ + blr %r14 /* And return. */ + + clgr %r1,%r3 /* If loaded_byte_count == maxlen? */ + locgre %r2,%r3 /* Then copy return value. */ + ber %r14 /* And return. */ + + /* Align s to 16 byte. */ + risbgn %r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,16 /* current_len = 16. */ + slr %r5,%r4 /* Compute bytes to 16bytes boundary. */ + + lgr %r1,%r5 /* If %r5 + 64 < maxlen? -> loop64. */ + aghi %r1,64 + clgrjl %r1,%r3,.Lloop64 + + /* Find zero in max 64byte with aligned s. */ +.Llt64: + vl %v16,0(%r5,%r2) /* Load s. */ + vfenezbs %v16,%v16,%v16 /* Find element not equal with zero search. */ + je .Lfound /* Jump away if zero was found. */ + aghi %r5,16 + clgrjhe %r5,%r3,.Lfound /* current_len >= maxlen -> end. */ + vl %v16,0(%r5,%r2) + vfenezbs %v16,%v16,%v16 + je .Lfound + aghi %r5,16 + clgrjhe %r5,%r3,.Lfound + vl %v16,0(%r5,%r2) + vfenezbs %v16,%v16,%v16 + je .Lfound + aghi %r5,16 + clgrjhe %r5,%r3,.Lfound + vl %v16,0(%r5,%r2) + vfenezbs %v16,%v16,%v16 + j .Lfound + +.Lfound48: + aghi %r5,16 +.Lfound32: + aghi %r5,16 +.Lfound16: + aghi %r5,16 +.Lfound: + vlgvb %r4,%v16,7 /* Load byte index of zero or 16 if no zero. */ + algr %r5,%r4 + + clgr %r5,%r3 + locgrh %r5,%r3 /* Return min (current_len, maxlen). */ + lgr %r2,%r5 + br %r14 + + /* Find zero in 16 byte aligned loop. */ +.Lloop64: + vl %v16,0(%r5,%r2) /* Load s. */ + vfenezbs %v16,%v16,%v16 /* Find element not equal with zero search. */ + je .Lfound /* Jump away if zero was found. */ + vl %v16,16(%r5,%r2) + vfenezbs %v16,%v16,%v16 + je .Lfound16 + vl %v16,32(%r5,%r2) + vfenezbs %v16,%v16,%v16 + je .Lfound32 + vl %v16,48(%r5,%r2) + vfenezbs %v16,%v16,%v16 + je .Lfound48 + + aghi %r5,64 + lgr %r1,%r5 /* If %r5 + 64 < maxlen? -> loop64. */ + aghi %r1,64 + clgrjl %r1,%r3,.Lloop64 + + j .Llt64 +END(__strnlen_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/strnlen.c b/REORG.TODO/sysdeps/s390/multiarch/strnlen.c new file mode 100644 index 0000000000..6ebcafbd16 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/strnlen.c @@ -0,0 +1,32 @@ +/* Multiple versions of strnlen. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define strnlen __redirect_strnlen +# define __strnlen __redirect___strnlen +# include <string.h> +# undef strnlen +# undef __strnlen +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc_redirected (__redirect___strnlen, __strnlen) +weak_alias (__strnlen, strnlen) + +#else +# include <string/strnlen.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/strpbrk-c.c b/REORG.TODO/sysdeps/s390/multiarch/strpbrk-c.c new file mode 100644 index 0000000000..f099c0607f --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/strpbrk-c.c @@ -0,0 +1,28 @@ +/* Default strpbrk implementation for S/390. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define STRPBRK __strpbrk_c +# ifdef SHARED +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__strpbrk_c, __GI_strpbrk, __strpbrk_c); +# endif /* SHARED */ + +# include <string/strpbrk.c> +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/strpbrk-vx.S b/REORG.TODO/sysdeps/s390/multiarch/strpbrk-vx.S new file mode 100644 index 0000000000..dd24d5e9db --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/strpbrk-vx.S @@ -0,0 +1,302 @@ +/* Vector optimized 32/64 bit S/390 version of strpbrk. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* char *strpbrk (const char *s, const char * accept) + The strpbrk() function locates the first occurrence in the string s + of any of the characters in the string accept and returns a pointer + to that character or NULL if not found. + + This method checks the length of accept string. If it fits entirely + in one vector register, a fast algorithm is used, which does not need + to check multiple parts of accept-string. Otherwise a slower full + check of accept-string is used. + + register overview: + r3: pointer to start of accept-string + r2: pointer to start of search-string + r0: loaded byte count of vlbb search-string (32bit unsigned) + r4: found byte index (32bit unsigned) + r1: current return len (64bit unsigned) + v16: search-string + v17: accept-string + v18: temp-vreg + + ONLY FOR SLOW: + v19: first accept-string + v20: zero for preparing acc-vector + v21: global mask; 1 indicates a match between + search-string-vreg and any accept-character + v22: current mask; 1 indicates a match between + search-string-vreg and any accept-character in current acc-vreg + v24: one for result-checking of former string-part + v30, v31: for re-/storing registers r6, r8, r9 + r5: current len of accept-string + r6: zero-index in search-string or 16 if no zero + or min(zero-index, loaded byte count) + r8: >0, if former accept-string-part contains a zero, + otherwise =0; + r9: loaded byte count of vlbb accept-string +*/ +ENTRY(__strpbrk_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + /* + Check if accept-string fits in one vreg: + ---------------------------------------- + */ + vlbb %v17,0(%r3),6 /* Load accept. */ + lghi %r1,0 /* Zero out current len. */ + vlgvb %r0,%v17,0 /* Get first element. */ + clije %r0,0,.Lfast_end_null /* Return null if accept is empty. */ + lcbb %r0,0(%r3),6 + jo .Lcheck_onbb /* Special case if accept lays + on block-boundary. */ +.Lcheck_notonbb: + vistrbs %v17,%v17 /* Fill with zeros after first zero. */ + je .Lfast /* Zero found -> accept fits in one vreg. */ + j .Lslow /* No zero -> accept exceeds one vreg */ + + +.Lcheck_onbb: + /* Accept lays on block-boundary. */ + vfenezb %v18,%v17,%v17 /* Search zero in loaded accept bytes. */ + vlgvb %r4,%v18,7 /* Get index of zero or 16 if not found. */ + clrjl %r4,%r0,.Lcheck_notonbb /* Zero index < loaded bytes count -> + Accept fits in one vreg; + Fill with zeros and proceed + with FAST. */ + vl %v17,0(%r3) /* Load accept, which exceeds loaded bytes. */ + j .Lcheck_notonbb /* Check if accept fits in one vreg. */ + + + /* + Search s for accept in one vreg + ------------------------------- + */ +.Lfast: + /* Complete accept-string in v17 and remaining bytes are zero. */ + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r0,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + + vfaezbs %v18,%v16,%v17,0 /* Find first element in v16 unequal to any + in v17 or first zero element. */ + + vlgvb %r4,%v18,7 /* Load byte index of found element. */ + /* If found index is within loaded bytes, return with found + element index (=equal count). */ + clrjl %r4,%r0,.Lfast_loop_found2 + + /* Align s to 16 byte. */ + risbgn %r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r1,16 /* current_len = 16. */ + slr %r1,%r4 /* Compute bytes to 16bytes boundary. */ + + /* Process s in 16byte aligned loop. */ +.Lfast_loop: + vl %v16,0(%r1,%r2) /* Load search-string. */ + vfaezbs %v18,%v16,%v17,0 /* Find first element in v16 equal to any + in v17 or first zero element. */ + jno .Lfast_loop_found + + vl %v16,16(%r1,%r2) + vfaezbs %v18,%v16,%v17,0 + jno .Lfast_loop_found16 + + vl %v16,32(%r1,%r2) + vfaezbs %v18,%v16,%v17,0 + jno .Lfast_loop_found32 + + vl %v16,48(%r1,%r2) + vfaezbs %v18,%v16,%v17,0 + jno .Lfast_loop_found48 + + aghi %r1,64 + j .Lfast_loop /* Loop if no element was unequal to accept + and not zero. */ + + /* Found equal or zero element. */ +.Lfast_loop_found48: + aghi %r1,16 +.Lfast_loop_found32: + aghi %r1,16 +.Lfast_loop_found16: + aghi %r1,16 +.Lfast_loop_found: + vlgvb %r4,%v18,7 /* Load byte index of found element. */ +.Lfast_loop_found2: + vlgvb %r0,%v16,0(%r4) /* Get found element. */ + clije %r0,0,.Lfast_end_null /* Return null if no accept-char found */ + algfr %r1,%r4 /* Add found index of char to current len. */ + la %r2,0(%r1,%r2) /* And return pointer to first equal char. */ + br %r14 + +.Lfast_end_null: + lghi %r2,0 /* Return null if no character is equal. */ + br %r14 + + + + + /* + Search s for accept in multiple vregs + ------------------------------------- + */ +.Lslow: + /* Save registers. */ + vlvgg %v30,%r6,0 + vlvgp %v31,%r8,%r9 + + /* accept in v17 without zero. */ + vlr %v19,%v17 /* Save first acc-part for a fast reload. */ + vzero %v20 /* Zero for preparing acc-vector. */ + vone %v24 /* One for checking result of former string. */ + + /* Align s to 16 byte. */ + risbg %r4,%r2,60,128+63,0 /* Test if s is aligned and + %r4 = bits 60-63 'and' 15. */ + je .Lslow_loop_str /* If s is aligned, loop aligned. */ + lghi %r0,15 + slr %r0,%r4 /* Compute highest index to load (15-x). */ + vll %v16,%r0,0(%r2) /* Load up to 16 byte boundary (vll needs + highest index, remaining bytes are 0). */ + ahi %r0,1 /* Work with loaded byte count. */ + vzero %v21 /* Zero out global mask. */ + lghi %r5,0 /* Set current len of accept-string to zero. */ + vfenezb %v18,%v16,%v16 /* Find zero in current string-part. */ + lghi %r8,0 /* There is no zero in first accept-part. */ + vlgvb %r6,%v18,7 /* Load byte index of zero or 16 if no zero. */ + clije %r6,0,.Lslow_end_null /* If first element is zero + (end of string) -> return null */ + clr %r0,%r6 /* cc==1 if loaded byte count < zero-index. */ + locrl %r6,%r0 /* Load on cc==1; zero-index = lbc. */ + j .Lslow_loop_acc + + + /* Process s in 16byte aligned loop. */ +.Lslow_next_str: + /* Check results of former processed str-part. */ + vfeeb %v18,%v21,%v24 /* Find first equal match in global mask + (ones in element). */ + vlgvb %r4,%v18,7 /* Get index of first one (=equal) + or 16 if no match. */ + /* Equal-index < min(zero-index, loaded byte count) + -> return pointer to equal element. */ + clrjl %r4,%r6,.Lslow_index_found + /* Zero-index < loaded byte count + -> former str-part was last str-part + -> return null */ + clrjl %r6,%r0,.Lslow_end_null + /* All elements are zero (=no match) -> proceed with next str-part. */ + + vlr %v17,%v19 /* Load first part of accept (no zero). */ + algfr %r1,%r0 /* Add loaded byte count to current len. */ + +.Lslow_loop_str: + vl %v16,0(%r1,%r2) /* Load search-string */ + lghi %r0,16 /* Loaded byte count is 16. */ + vzero %v21 /* Zero out global mask. */ + lghi %r5,0 /* Set current len of accept to zero. */ + vfenezb %v18,%v16,%v16 /* Find zero in current string-part. */ + lghi %r8,0 /* There is no zero in first accept-part. */ + vlgvb %r6,%v18,7 /* Load byte index of zero or 16 if no zero. */ + clije %r6,0,.Lslow_end_null /* If first element is zero + (end of string) -> return null. */ + +.Lslow_loop_acc: + vfaeb %v22,%v16,%v17,4 /* Create matching-mask (1 in mask -> + Character matches any accepted character in + this accept-string-part) IN=0, RT=1. */ + vlgvb %r4,%v22,0 /* Get result of first element. */ + /* First element is equal to any accepted characters + (all other parts of accept cannot lead to a match before this one) + -> current len is pointing to first element + -> return found */ + clijh %r4,0,.Lslow_end_found + vo %v21,%v21,%v22 /* Global-mask = global-|matching-mask. */ + /* Proceed with next acc until end of acc is reached. */ + + +.Lslow_next_acc: + clijh %r8,0,.Lslow_next_str /* There was a zero in the last acc-part + -> add index to current_len and + end. */ + vlbb %v17,16(%r5,%r3),6 /* Load next accept part. */ + aghi %r5,16 /* Increment current len of accept-string. */ + lcbb %r9,0(%r5,%r3),6 /* Get loaded byte count of accept-string. */ + jo .Lslow_next_acc_onbb /* Jump away ifaccept-string is + on block-boundary. */ +.Lslow_next_acc_notonbb: + vistrbs %v17,%v17 /* Fill with zeros after first zero. */ + jo .Lslow_loop_acc /* No zero found -> no preparation needed. */ + +.Lslow_next_acc_prepare_zero: + /* Zero in accept-part: fill zeros with first-accept-character. */ + vlgvb %r8,%v17,0 /* Load first element of acc-part. */ + clije %r8,0,.Lslow_next_str /* Proceed with next string-part, + if first char in this part of accept + is a zero. */ + /* r8>0 -> zero found in this acc-part. */ + vrepb %v18,%v17,0 /* Replicate first char accross all chars. */ + vceqb %v22,%v20,%v17 /* Create a mask (v22) of null chars + by comparing with 0 (v20). */ + vsel %v17,%v18,%v17,%v22 /* Replace null chars with first char. */ + j .Lslow_loop_acc /* Accept part is prepared -> process. */ + +.Lslow_next_acc_onbb: + vfenezb %v18,%v17,%v17 /* Find zero in loaded bytes of accept part. */ + vlgvb %r8,%v18,7 /* Load byte index of zero. */ + clrjl %r8,%r9,.Lslow_next_acc_notonbb /* Found a zero in loaded bytes + -> Prepare vreg. */ + vl %v17,0(%r5,%r3) /* Load over boundary ... */ + lghi %r8,0 /* r8=0 -> no zero in this part of acc, + check for zero is in jump-target. */ + j .Lslow_next_acc_notonbb /* ... and search for zero in + fully loaded vreg again. */ + +.Lslow_end_null: + lghi %r1,0 /* Return null if no character is equal. */ + j .Lslow_end + +.Lslow_loop_found: + vlgvb %r4,%v18,7 /* Load byte index of found element. */ + vlgvb %r0,%v16,0(%r4) /* Get found element. */ + clije %r0,0,.Lslow_end_null /* Return null if no acc-char found. */ + +.Lslow_index_found: + algfr %r1,%r4 /* Add found index of char to current len. */ +.Lslow_end_found: + la %r1,0(%r1,%r2) /* And return pointer to first equal char. */ + +.Lslow_end: + /* Restore registers. */ + vlgvg %r6,%v30,0 + vlgvg %r8,%v31,0 + vlgvg %r9,%v31,1 + lgr %r2,%r1 + br %r14 +END(__strpbrk_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/strpbrk.c b/REORG.TODO/sysdeps/s390/multiarch/strpbrk.c new file mode 100644 index 0000000000..89bdb2b26c --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/strpbrk.c @@ -0,0 +1,31 @@ +/* Multiple versions of strpbrk. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define strpbrk __redirect_strpbrk +/* Omit the strpbrk inline definitions because it would redefine strpbrk. */ +# define __NO_STRING_INLINES +# include <string.h> +# undef strpbrk +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc2_redirected (__redirect_strpbrk, __strpbrk, strpbrk) + +#else +# include <string/strpbrk.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/strrchr-c.c b/REORG.TODO/sysdeps/s390/multiarch/strrchr-c.c new file mode 100644 index 0000000000..d1a8c4bd31 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/strrchr-c.c @@ -0,0 +1,29 @@ +/* Default strrchr implementation for S/390. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define STRRCHR __strrchr_c +# undef weak_alias +# ifdef SHARED +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__strrchr_c, __GI_strrchr, __strrchr_c); +# endif /* SHARED */ + +# include <string/strrchr.c> +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/strrchr-vx.S b/REORG.TODO/sysdeps/s390/multiarch/strrchr-vx.S new file mode 100644 index 0000000000..89a8f5f14c --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/strrchr-vx.S @@ -0,0 +1,180 @@ +/* Vector optimized 32/64 bit S/390 version of strrchr. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* char *strrchr (const char *s, int c) + Locate the last character c in string. + + Register usage: + -r0=loaded bytes in first part of s. + -r1=pointer to last occurence of c or NULL if not found. + -r2=s + -r3=c + -r4=tmp + -r5=current_len + -v16=part of s + -v17=index of found element + -v18=replicated c + -v19=part of s with last occurence of c. + -v20=permute pattern +*/ +ENTRY(__strrchr_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r0,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + + vlvgb %v18,%r3,0 /* Generate vector which elements are all c. + if c > 255, c will be truncated. */ + vrepb %v18,%v18,0 + + lghi %r1,-1 /* Currently no c found. */ + lghi %r5,0 /* current_len = 0. */ + + vfeezbs %v17,%v16,%v18 /* Find element equal or zero. */ + vlgvb %r4,%v17,7 /* Load byte index of c/zero or 16. */ + clrjl %r4,%r0,.Lfound_first_part /* Found c/zero in loaded bytes. */ +.Lalign: + /* Align s to 16 byte. */ + risbgn %r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,16 /* current_len = 16. */ + slr %r5,%r4 /* Compute bytes to 16bytes boundary. */ + +.Lloop: + vl %v16,0(%r5,%r2) /* Load s. */ + vfeezbs %v17,%v16,%v18 /* Find element equal with zero search. */ + jno .Lfound /* Found c/zero (cc=0|1|2). */ + vl %v16,16(%r5,%r2) + vfeezbs %v17,%v16,%v18 + jno .Lfound16 + vl %v16,32(%r5,%r2) + vfeezbs %v17,%v16,%v18 + jno .Lfound32 + vl %v16,48(%r5,%r2) + vfeezbs %v17,%v16,%v18 + jno .Lfound48 + + aghi %r5,64 + j .Lloop /* No character and no zero -> loop. */ + +.Lfound48: + la %r5,16(%r5) /* Use la since aghi would clobber cc. */ +.Lfound32: + la %r5,16(%r5) +.Lfound16: + la %r5,16(%r5) +.Lfound: + je .Lzero /* Found zero, but no c before that zero. */ + /* Save this part of s to check for further matches after reaching + the end of the complete string. */ + vlr %v19,%v16 + lgr %r1,%r5 + + jh .Lzero /* Found a zero after the found c. */ + aghi %r5,16 /* Start search of next part of s. */ + j .Lloop + +.Lfound_first_part: + /* This code is only executed if the found c/zero is whithin loaded + bytes. If no c/zero was found (cc==3) the found index = 16, thus + this code is not called. + Resulting condition code of vector find element equal: + cc==0: no c, found zero + cc==1: c found, no zero + cc==2: c found, found zero after c + cc==3: no c, no zero (this case can be ignored). */ + je .Lzero /* Found zero, but no c before that zero. */ + + locgrne %r1,%r5 /* Mark c as found in first part of s. */ + vlr %v19,%v16 + + jl .Lalign /* No zero (e.g. if vr was fully loaded) + -> Align and loop afterwards. */ + + /* Found a zero in vr. If vr was not fully loaded due to block + boundary, the remaining bytes are filled with zero and we can't + rely on zero indication of condition code here! */ + + vfenezb %v17,%v16,%v16 /* Find zero. */ + vlgvb %r4,%v17,7 /* Load byte index of zero or 16. */ + clrjl %r4,%r0,.Lzero /* Zero within loaded bytes -> end. */ + j .Lalign /* Align and loop afterwards. */ + +.Lend_searched_zero: + vlgvb %r4,%v17,7 /* Load byte index of zero. */ + algr %r5,%r4 + la %r2,0(%r5,%r2) /* Return pointer to zero. */ + br %r14 + +.Lzero: + /* Reached end of string. Check if one c was found before. */ + clije %r3,0,.Lend_searched_zero /* Found zero and c is zero. */ + + cgfi %r1,-1 /* No c found -> return NULL. */ + locghie %r2,0 + ber %r14 + + larl %r3,.Lpermute_mask /* Load permute mask. */ + vl %v20,0(%r3) + + /* c was found and is part of v19. */ + vfenezb %v17,%v19,%v19 /* Find zero. */ + vlgvb %r4,%v17,7 /* Load byte index of zero or 16. */ + + clgfi %r5,0 /* Loaded byte count in v19 is 16, ... */ + lochine %r0,16 /* ... if v19 is not the first part of s. */ + ahi %r0,-1 /* Convert byte count to highest index. */ + + clr %r0,%r4 + locrl %r4,%r0 /* r4 = min (zero-index, highest-index). */ + + /* Right-shift of v19 to mask bytes after zero. */ + clije %r4,15,.Lzero_permute /* No shift is needed if highest index + in vr is 15. */ + lhi %r0,15 + slr %r0,%r4 /* Compute byte count for vector shift right. */ + sll %r0,3 /* Convert to bit count. */ + vlvgb %v17,%r0,7 + vsrlb %v19,%v19,%v17 /* Vector shift right by byte by number of bytes + specified in bits 1-4 of byte 7 in v17. */ + + /* Reverse bytes in v19. */ +.Lzero_permute: + vperm %v19,%v19,%v19,%v20 /* Permute v19 to reversed order. */ + + /* Find c in reversed v19. */ + vfeeb %v19,%v19,%v18 /* Find c. */ + la %r2,0(%r1,%r2) + vlgvb %r3,%v19,7 /* Load byte index of c. */ + + /* Compute index in real s and return. */ + slgr %r4,%r3 + la %r2,0(%r4,%r2) /* Return pointer to zero. */ + br %r14 +.Lpermute_mask: + .byte 0x0F,0x0E,0x0D,0x0C,0x0B,0x0A,0x09,0x08 + .byte 0x07,0x06,0x05,0x04,0x03,0x02,0x01,0x00 +END(__strrchr_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/strrchr.c b/REORG.TODO/sysdeps/s390/multiarch/strrchr.c new file mode 100644 index 0000000000..edb8ce6caf --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/strrchr.c @@ -0,0 +1,30 @@ +/* Multiple versions of strrchr. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define strrchr __redirect_strrchr +# include <string.h> +# undef strrchr +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc2_redirected (__redirect_strrchr, __strrchr, strrchr) +weak_alias (strrchr, rindex); + +#else +# include <string/strrchr.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/strspn-c.c b/REORG.TODO/sysdeps/s390/multiarch/strspn-c.c new file mode 100644 index 0000000000..99e1f20da4 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/strspn-c.c @@ -0,0 +1,28 @@ +/* Default strspn implementation for S/390. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define STRSPN __strspn_c +# ifdef SHARED +# undef libc_hidden_builtin_def +# define libc_hidden_builtin_def(name) \ + __hidden_ver1 (__strspn_c, __GI_strspn, __strspn_c); +# endif /* SHARED */ + +# include <string/strspn.c> +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/strspn-vx.S b/REORG.TODO/sysdeps/s390/multiarch/strspn-vx.S new file mode 100644 index 0000000000..1cb3d0d598 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/strspn-vx.S @@ -0,0 +1,256 @@ +/* Vector optimized 32/64 bit S/390 version of strspn. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* size_t strspn (const char *s, const char * accept) + The strspn() function calculates the length of the initial segment + of s which consists entirely of characters in accept. + + This method checks the length of accept string. If it fits entirely + in one vector register, a fast algorithm is used, which does not need + to check multiple parts of accept-string. Otherwise a slower full + check of accept-string is used. + + register overview: + r3: pointer to start of accept-string + r2: pointer to start of search-string + r4: loaded byte count of vl search-string + r0: found byte index + r1: current return len of s + v16: search-string + v17: accept-string + v18: temp-vreg + + ONLY FOR SLOW: + v19: first accept-string + v20: zero for preparing acc-vector + v21: global mask; 1 indicates a match between + search-string-vreg and any accept-character + v22: current mask; 1 indicates a match between + search-string-vreg and any accept-character in current acc-vreg + v30, v31: for re-/storing registers r6, r8, r9 + r5: current len of accept-string + r6: zero-index in search-string or 16 if no zero + or min(zero-index, loaded byte count) + r8: >0, if former accept-string-part contains a zero, + otherwise =0; + r9: loaded byte count of vlbb accept-string +*/ +ENTRY(__strspn_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + /* + Check if accept-string fits in one vreg: + ---------------------------------------- + */ + vlbb %v17,0(%r3),6 /* Load accept. */ + lcbb %r4,0(%r3),6 + jo .Lcheck_onbb /* Special case if accept lays + on block-boundary. */ +.Lcheck_notonbb: + vistrbs %v17,%v17 /* Fill with zeros after first zero. */ + je .Lfast /* Zero found -> accept fits in one vreg. */ + j .Lslow /* No zero -> accept exceeds one vreg. */ + +.Lcheck_onbb: + /* Accept lays on block-boundary. */ + vfenezb %v18,%v17,%v17 /* Search zero in loaded accept bytes. */ + vlgvb %r0,%v18,7 /* Get index of zero or 16 if not found. */ + clrjl %r0,%r4,.Lcheck_notonbb /* Zero index < loaded bytes count -> + Accept fits in one vreg; + Fill with zeros and proceed + with FAST. */ + vl %v17,0(%r3) /* Load accept, which exceeds loaded bytes. */ + j .Lcheck_notonbb /* Check if accept fits in one vreg. */ + + + /* + Search s for accept in one vreg + ------------------------------- + */ +.Lfast: + /* Complete accept-string is in v17 and remaining bytes are zero. */ + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + + vfaezbs %v16,%v16,%v17,8 /* Find first element in v16 + unequal to any in v17 + or first zero element. */ + vlgvb %r0,%v16,7 /* Load byte index of found element. */ + /* If found index is within loaded bytes (%r0 < %r1), + return with found element index (=equal count). */ + clr %r0,%r1 + locgrl %r2,%r0 + blr %r14 + + /* Align s to 16 byte. */ + risbgn %r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r1,16 /* current_len = 16. */ + slr %r1,%r4 /* Compute bytes to 16bytes boundary. */ + +.Lfast_loop: + vl %v16,0(%r1,%r2) /* Load search-string. */ + vfaezbs %v16,%v16,%v17,8 /* Find first element in v16 + unequal to any in v17 + or first zero element. */ + jno .Lfast_loop_found + vl %v16,16(%r1,%r2) + vfaezbs %v16,%v16,%v17,8 + jno .Lfast_loop_found16 + vl %v16,32(%r1,%r2) + vfaezbs %v16,%v16,%v17,8 + jno .Lfast_loop_found32 + vl %v16,48(%r1,%r2) + vfaezbs %v16,%v16,%v17,8 + jno .Lfast_loop_found48 + + aghi %r1,64 + j .Lfast_loop /* Loop if no element was unequal to accept + and not zero. */ + + /* Found unequal or zero element. */ +.Lfast_loop_found48: + aghi %r1,16 +.Lfast_loop_found32: + aghi %r1,16 +.Lfast_loop_found16: + aghi %r1,16 +.Lfast_loop_found: + vlgvb %r0,%v16,7 /* Load byte index of found element. */ + algrk %r2,%r1,%r0 /* And add it to current len. */ + br %r14 + + + /* + Search s for accept in multiple vregs + ------------------------------------- + */ +.Lslow: + /* Save registers. */ + vlvgg %v30,%r6,0 + vlvgp %v31,%r8,%r9 + lghi %r1,0 /* current_len = 0. */ + + /* Accept in v17 without zero. */ + vlr %v19,%v17 /* Save first acc-part for a fast reload. */ + vzero %v20 /* Zero for preparing acc-vector. */ + + /* Align s to 16 byte. */ + risbg %r0,%r2,60,128+63,0 /* Test if s is aligned and + %r0 = bits 60-63 'and' 15 */ + je .Lslow_loop_str /* If s is aligned, loop aligned */ + lghi %r4,15 + slr %r4,%r0 /* Compute highest index to load (15-x). */ + vll %v16,%r4,0(%r2) /* Load up to 16byte boundary (vll needs + highest index, left bytes are 0). */ + ahi %r4,1 /* Work with loaded byte count. */ + vzero %v21 /* Zero out global mask. */ + lghi %r5,0 /* Set current len of accept-string to zero. */ + vfenezb %v18,%v16,%v16 /* Find zero in current string-part. */ + lghi %r8,0 /* There is no zero in first accept-part. */ + vlgvb %r6,%v18,7 /* Load byte index of zero or 16 + if there is no zero. */ + clr %r4,%r6 /* cc==1 if loaded byte count < zero-index. */ + locrl %r6,%r4 /* Load on cc==1. */ + j .Lslow_loop_acc + + /* Process s in 16byte aligned loop. */ +.Lslow_next_str: + vlr %v17,%v19 /* Load first part of accept (no zero). */ + algfr %r1,%r4 /* Add loaded byte count to current len. */ +.Lslow_loop_str: + vl %v16,0(%r1,%r2) /* Load search-string. */ + lghi %r4,16 /* Loaded byte count is 16. */ + vzero %v21 /* Zero out global mask. */ + lghi %r5,0 /* Set current len of accept-string to zero. */ + vfenezb %v18,%v16,%v16 /* Find zero in current string-part. */ + lghi %r8,0 /* There is no zero in first accept-part. */ + vlgvb %r6,%v18,7 /* Load byte index of zero or 16 if no zero. */ + +.Lslow_loop_acc: + vfaeb %v22,%v16,%v17,4 /* Create matching-mask (1 in mask -> + character matches any accepted character in + this accept-string-part) IN=0, RT=1. */ + vo %v21,%v21,%v22 /* global-mask = global- | matching-mask. */ + vfenezb %v18,%v21,%v21 /* Find first zero in global-mask. */ + vlgvb %r0,%v18,7 /* Get first found zero-index + (= first mismatch). */ + clrjl %r0,%r6,.Lslow_next_acc /* Mismatch-index < min(lbc,zero-index) + -> Process this string-part + with next acc-part. */ + clrjhe %r0,%r4,.Lslow_next_str /* Found-index >= loaded byte count + -> All loaded bytes are matching + any accept-character + and are not zero. */ + /* All bytes are matching any characters in accept-string + and search-string is fully processed (found-index == zero-index) */ +.Lslow_add_lbc_end: + algrk %r2,%r1,%r0 /* Add matching characters to current_len. */ + /* Restore registers. */ + vlgvg %r6,%v30,0 + vlgvg %r8,%v31,0 + vlgvg %r9,%v31,1 + br %r14 + + + +.Lslow_next_acc: + clijh %r8,0,.Lslow_add_lbc_end /* There was a zero in last acc-part + -> Add found index to current len + and end. */ + vlbb %v17,16(%r5,%r3),6 /* Load next accept part. */ + aghi %r5,16 /* Add current_len of accept-string. */ + lcbb %r9,0(%r5,%r3),6 /* Get loaded byte count of accept-string. */ + jo .Lslow_next_acc_onbb /* Jump away if accept-string is + on block-boundary. */ +.Lslow_next_acc_notonbb: + vistrbs %v17,%v17 /* Fill with zeros after first zero. */ + jo .Lslow_loop_acc /* No zero found -> no preparation needed. */ + +.Lslow_next_acc_prepare_zero: + /* Zero in accept-part: fill zeros with first-accept-character. */ + vlgvb %r8,%v17,0 /* Load first element of acc-part. */ + clije %r8,0,.Lslow_add_lbc_end /* End if zero is first character + in this part of accept-string. */ + /* r8>0 -> zero found in this acc-part. */ + vrepb %v18,%v17,0 /* Replicate first char accross all chars. */ + vceqb %v22,%v20,%v17 /* Create a mask (v22) of null chars + by comparing with 0 (v20). */ + vsel %v17,%v18,%v17,%v22 /* Replace null chars with first char. */ + j .Lslow_loop_acc /* Accept part is prepared -> process. */ + +.Lslow_next_acc_onbb: + vfenezb %v18,%v17,%v17 /* Find zero in loaded bytes of accept part. */ + vlgvb %r8,%v18,7 /* Load byte index of zero. */ + clrjl %r8,%r9,.Lslow_next_acc_notonbb /* Found a zero in loaded bytes + -> Prepare vr. */ + vl %v17,0(%r5,%r3) /* Load over boundary ... */ + lghi %r8,0 /* r8=0 -> no zero in this part of acc, + Check for zero is in jump-target. */ + j .Lslow_next_acc_notonbb /* ... and search for zero in + fully loaded vreg again. */ +END(__strspn_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/strspn.c b/REORG.TODO/sysdeps/s390/multiarch/strspn.c new file mode 100644 index 0000000000..b9cf295fb3 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/strspn.c @@ -0,0 +1,31 @@ +/* Multiple versions of strspn. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define strspn __redirect_strspn +/* Omit the strspn inline definitions because it would redefine strspn. */ +# define __NO_STRING_INLINES +# include <string.h> +# undef strspn +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc2_redirected (__redirect_strspn, __strspn, strspn) + +#else +# include <string/strspn.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/utf16-utf32-z9.c b/REORG.TODO/sysdeps/s390/multiarch/utf16-utf32-z9.c new file mode 100644 index 0000000000..6e64169835 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/utf16-utf32-z9.c @@ -0,0 +1,44 @@ +/* Conversion between UTF-16 and UTF-32 BE/internal - multiarch s390 version. + + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/s390/utf16-utf32-z9.c> +#include <ifunc-resolve.h> + +#undef FROM_LOOP +#define FROM_LOOP __from_utf16_loop +#undef TO_LOOP +#define TO_LOOP __to_utf16_loop + +#define _SINGLE_NAME(NAME) NAME##_single +#define SINGLE_NAME(NAME) _SINGLE_NAME(NAME) +strong_alias (SINGLE_NAME (FROM_LOOP_DEFAULT), SINGLE_NAME (FROM_LOOP)) +strong_alias (SINGLE_NAME (TO_LOOP_DEFAULT), SINGLE_NAME (TO_LOOP)) + +/* Generate ifunc'ed loop functions for FROM/TO_LOOP. */ +s390_libc_ifunc_expr (FROM_LOOP_DEFAULT, FROM_LOOP, + (HAVE_FROM_VX && (hwcap & HWCAP_S390_VX)) + ? FROM_LOOP_VX + : FROM_LOOP_DEFAULT); + +s390_libc_ifunc_expr (TO_LOOP_DEFAULT, TO_LOOP, + (HAVE_TO_VX && (hwcap & HWCAP_S390_VX)) + ? TO_LOOP_VX + : TO_LOOP_DEFAULT); + +#include <iconv/skeleton.c> diff --git a/REORG.TODO/sysdeps/s390/multiarch/utf8-utf16-z9.c b/REORG.TODO/sysdeps/s390/multiarch/utf8-utf16-z9.c new file mode 100644 index 0000000000..b55ef1aaec --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/utf8-utf16-z9.c @@ -0,0 +1,48 @@ +/* Conversion between UTF-8 and UTF-16 - multiarch s390 version. + + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/s390/utf8-utf16-z9.c> +#include <ifunc-resolve.h> + +#undef FROM_LOOP +#define FROM_LOOP __from_utf8_loop +#undef TO_LOOP +#define TO_LOOP __to_utf8_loop + +#define _SINGLE_NAME(NAME) NAME##_single +#define SINGLE_NAME(NAME) _SINGLE_NAME(NAME) +strong_alias (SINGLE_NAME (FROM_LOOP_DEFAULT), SINGLE_NAME (FROM_LOOP)) +strong_alias (SINGLE_NAME (TO_LOOP_DEFAULT), SINGLE_NAME (TO_LOOP)) + +/* Generate ifunc'ed loop functions for FROM/TO_LOOP. */ +s390_libc_ifunc_expr (FROM_LOOP_DEFAULT, FROM_LOOP, + (HAVE_FROM_VX && (hwcap & HWCAP_S390_VX)) + ? FROM_LOOP_VX + : (HAVE_FROM_CU && (hwcap & HWCAP_S390_ZARCH + && hwcap & HWCAP_S390_HIGH_GPRS + && hwcap & HWCAP_S390_ETF3EH)) + ? FROM_LOOP_CU + : FROM_LOOP_DEFAULT); + +s390_libc_ifunc_expr (TO_LOOP_DEFAULT, TO_LOOP, + (HAVE_TO_VX && (hwcap & HWCAP_S390_VX)) + ? TO_LOOP_VX + : TO_LOOP_DEFAULT); + +#include <iconv/skeleton.c> diff --git a/REORG.TODO/sysdeps/s390/multiarch/utf8-utf32-z9.c b/REORG.TODO/sysdeps/s390/multiarch/utf8-utf32-z9.c new file mode 100644 index 0000000000..faf1f46aa9 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/utf8-utf32-z9.c @@ -0,0 +1,48 @@ +/* Conversion between UTF-8 and UTF-32 - multiarch s390 version. + + Copyright (C) 2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#include <sysdeps/s390/utf8-utf32-z9.c> +#include <ifunc-resolve.h> + +#undef FROM_LOOP +#define FROM_LOOP __from_utf8_loop +#undef TO_LOOP +#define TO_LOOP __to_utf8_loop + +#define _SINGLE_NAME(NAME) NAME##_single +#define SINGLE_NAME(NAME) _SINGLE_NAME(NAME) +strong_alias (SINGLE_NAME (FROM_LOOP_DEFAULT), SINGLE_NAME (FROM_LOOP)) +strong_alias (SINGLE_NAME (TO_LOOP_DEFAULT), SINGLE_NAME (TO_LOOP)) + +/* Generate ifunc'ed loop functions for FROM/TO_LOOP. */ +s390_libc_ifunc_expr (FROM_LOOP_DEFAULT, FROM_LOOP, + (HAVE_FROM_VX && (hwcap & HWCAP_S390_VX)) + ? FROM_LOOP_VX + : (HAVE_FROM_CU && (hwcap & HWCAP_S390_ZARCH + && hwcap & HWCAP_S390_HIGH_GPRS + && hwcap & HWCAP_S390_ETF3EH)) + ? FROM_LOOP_CU + : FROM_LOOP_DEFAULT); + +s390_libc_ifunc_expr (TO_LOOP_DEFAULT, TO_LOOP, + (HAVE_TO_VX && (hwcap & HWCAP_S390_VX)) + ? TO_LOOP_VX + : TO_LOOP_DEFAULT); + +#include <iconv/skeleton.c> diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcpcpy-c.c b/REORG.TODO/sysdeps/s390/multiarch/wcpcpy-c.c new file mode 100644 index 0000000000..6eadde5441 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcpcpy-c.c @@ -0,0 +1,25 @@ +/* Default wcslen implementation for S/390. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define WCPCPY __wcpcpy_c + +# include <wchar.h> +extern __typeof (__wcpcpy) __wcpcpy_c; +# include <wcsmbs/wcpcpy.c> +#endif diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcpcpy-vx.S b/REORG.TODO/sysdeps/s390/multiarch/wcpcpy-vx.S new file mode 100644 index 0000000000..73b2670786 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcpcpy-vx.S @@ -0,0 +1,114 @@ +/* Vector optimized 32/64 bit S/390 version of wcpcpy. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* wchar_t * wcpcpy (const wchar_t *dest, const wchar_t *src) + Copy string src to dest returning a pointer to its end. + + Register usage: + -r0=border-len for switching to vector-instructions + -r1=tmp + -r2=dest and return value + -r3=src + -r4=tmp + -r5=current_len + -v16=part of src + -v17=index of zero + -v18=part of src +*/ +ENTRY(__wcpcpy_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + vlbb %v16,0(%r3),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r3),6 /* Get bytes to 4k-byte boundary or 16. */ + + tmll %r3,3 /* Test if s is 4-byte aligned? */ + jne .Lfallback /* And use common-code variant if not. */ + + vfenezf %v17,%v16,%v16 /* Find element not equal with zero search. */ + vlgvb %r5,%v17,7 /* Load zero index or 16 if not found. */ + clrjl %r5,%r1,.Lfound_align /* If found zero within loaded bytes, + copy bytes before and return. */ + + /* Align s to 16 byte. */ + risbgn %r4,%r3,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,15 /* current_len = 15. */ + slr %r5,%r4 /* Compute highest index to 16byte boundary. */ + + vstl %v16,%r5,0(%r2) /* Copy loaded characters - no zero. */ + ahi %r5,1 /* Start loop at next character. */ + + /* Find zero in 16byte aligned loop. */ +.Lloop: + vl %v16,0(%r5,%r3) /* Load s. */ + vfenezfs %v17,%v16,%v16 /* Find element not equal with zero search. */ + je .Lfound_v16_0 /* Jump away if zero was found. */ + vl %v18,16(%r5,%r3) /* Load next part of s. */ + vst %v16,0(%r5,%r2) /* Store previous part without zero to dst. */ + vfenezfs %v17,%v18,%v18 + je .Lfound_v18_16 + vl %v16,32(%r5,%r3) + vst %v18,16(%r5,%r2) + vfenezfs %v17,%v16,%v16 + je .Lfound_v16_32 + vl %v18,48(%r5,%r3) + vst %v16,32(%r5,%r2) + vfenezfs %v17,%v18,%v18 + je .Lfound_v18_48 + vst %v18,48(%r5,%r2) + + aghi %r5,64 + j .Lloop /* No zero found -> loop. */ + +.Lfound_v16_32: + aghi %r5,32 +.Lfound_v16_0: + la %r3,0(%r5,%r2) + vlgvb %r1,%v17,7 /* Load byte index of zero. */ + aghi %r1,3 /* Also copy remaining bytes of zero. */ + vstl %v16,%r1,0(%r3) /* Copy characters including zero. */ + lay %r2,-3(%r1,%r3) /* Return pointer to zero. */ + br %r14 + +.Lfound_v18_48: + aghi %r5,32 +.Lfound_v18_16: + la %r3,16(%r5,%r2) + vlgvb %r1,%v17,7 /* Load byte index of zero. */ + aghi %r1,3 /* Also copy remaining bytes of zero. */ + vstl %v18,%r1,0(%r3) /* Copy characters including zero. */ + lay %r2,-3(%r1,%r3) /* Return pointer to zero. */ + br %r14 + +.Lfound_align: + aghi %r5,3 /* Also copy remaining bytes of zero. */ + vstl %v16,%r5,0(%r2) /* Copy characters including zero. */ + lay %r2,-3(%r5,%r2) /* Return pointer to zero. */ + br %r14 + +.Lfallback: + jg __wcpcpy_c +END(__wcpcpy_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcpcpy.c b/REORG.TODO/sysdeps/s390/multiarch/wcpcpy.c new file mode 100644 index 0000000000..79d1641606 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcpcpy.c @@ -0,0 +1,28 @@ +/* Multiple versions of wcpcpy. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <wchar.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc (__wcpcpy) +weak_alias (__wcpcpy, wcpcpy) + +#else +# include <wcsmbs/wcpcpy.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcpncpy-c.c b/REORG.TODO/sysdeps/s390/multiarch/wcpncpy-c.c new file mode 100644 index 0000000000..7cb834bba8 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcpncpy-c.c @@ -0,0 +1,25 @@ +/* Default wcsncpy implementation for S/390. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define WCPNCPY __wcpncpy_c + +# include <wchar.h> +extern __typeof (__wcpncpy) __wcpncpy_c; +# include <wcsmbs/wcpncpy.c> +#endif diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcpncpy-vx.S b/REORG.TODO/sysdeps/s390/multiarch/wcpncpy-vx.S new file mode 100644 index 0000000000..6a1b587f0e --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcpncpy-vx.S @@ -0,0 +1,222 @@ +/* Vector optimized 32/64 bit S/390 version of wcpncpy. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* wchar_t * wcpncpy (wchar_t *dest, const wchar_t *src, size_t n) + Copies at most n characters of string src to dest + returning a pointer to its end or dest+n + if src is smaller than n. + + Register usage: + -%r0 = return value + -%r1 = zero byte index + -%r2 = curr dst pointer + -%r3 = curr src pointer + -%r4 = n + -%r5 = current_len + -%r6 = loaded bytes + -%r7 = border, tmp +*/ +ENTRY(__wcpncpy_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + +# if !defined __s390x__ + llgfr %r4,%r4 +# endif /* !defined __s390x__ */ + + clgfi %r4,0 + ber %r14 /* Nothing to do, if n == 0. */ + + vlbb %v16,0(%r3),6 /* Load s until next 4k-byte boundary. */ + + tmll %r3,3 /* Test if s is 4-byte aligned? */ + jne .Lfallback /* And use common-code variant if not. */ + + vlvgp %v31,%r6,%r7 /* Save registers. */ + lghi %r5,0 /* current_len = 0. */ + + lcbb %r6,0(%r3),6 /* Get bytes to 4k-byte boundary or 16. */ + llgfr %r6,%r6 /* Convert 32bit to 64bit. */ + + /* Check range of maxlen and convert to byte-count. */ +# ifdef __s390x__ + tmhh %r4,49152 /* Test bit 0 or 1 of maxlen. */ + lghi %r1,-4 /* Max byte-count is 18446744073709551612. */ +# else + tmlh %r4,49152 /* Test bit 0 or 1 of maxlen. */ + llilf %r1,4294967292 /* Max byte-count is 4294967292. */ +# endif /* !__s390x__ */ + sllg %r4,%r4,2 /* Convert character-count to byte-count. */ + locgrne %r4,%r1 /* Use max byte-count, if bit 0/1 was one. */ + + la %r0,0(%r4,%r2) /* Save destination pointer + n for return. */ + + clgrjle %r4,%r6,.Lremaining_v16 /* If n <= loaded-bytes + -> process remaining. */ + + /* n > loaded-byte-count */ + vfenezf %v17,%v16,%v16 /* Find element not equal with zero search. */ + vlgvb %r1,%v17,7 /* Load zero index or 16 if not found. */ + aghi %r1,3 /* Also copy remaining bytes of zero. */ + clrjl %r1,%r6,.Lfound_v16_store /* Found zero within loaded bytes, + copy and return. */ + + /* Align s to 16 byte. */ + risbgn %r7,%r3,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,15 /* current_len = 15. */ + slr %r5,%r7 /* Compute highest index to 16byte boundary. */ + + /* Zero not found and n > loaded-byte-count. */ + vstl %v16,%r5,0(%r2) /* Copy loaded characters - no zero. */ + ahi %r5,1 /* Start loop at next character. */ + + /* Now we are 16byte aligned, so we can load a full vreg + without page fault. */ + lgr %r1,%r5 /* If %r5 + 64 < maxlen? -> loop64. */ + aghi %r1,64 + clgrjl %r1,%r4,.Lloop64 + + vl %v16,0(%r5,%r3) /* Load s. */ + clgijl %r4,17,.Lremaining_v16 /* If n <=16, + process remaining bytes. */ +.Llt64: + lgr %r7,%r4 + slgfi %r7,16 /* border_len = n - 16. */ + + clgrjhe %r5,%r7,.Lremaining_v16 /* If current_len >= border + then process remaining bytes. */ + vfenezfs %v17,%v16,%v16 /* Find element not equal with zero search. */ + je .Lfound_v16 /* Jump away if zero was found. */ + vl %v18,16(%r5,%r3) /* Load next part of s. */ + vst %v16,0(%r5,%r2) /* Store previous part without zero to dst. */ + aghi %r5,16 + + clgrjhe %r5,%r7,.Lremaining_v18 + vfenezfs %v17,%v18,%v18 + je .Lfound_v18 + vl %v16,16(%r5,%r3) + vst %v18,0(%r5,%r2) + aghi %r5,16 + + clgrjhe %r5,%r7,.Lremaining_v16 + vfenezfs %v17,%v16,%v16 + je .Lfound_v16 + vl %v18,16(%r5,%r3) + vst %v16,0(%r5,%r2) + aghi %r5,16 + +.Lremaining_v18: + vlr %v16,%v18 +.Lremaining_v16: + /* v16 contains the remaining bytes [1...16]. + Store remaining bytes and append string-termination. */ + vfenezf %v17,%v16,%v16 /* Find element not equal with zero search. */ + slgrk %r7,%r4,%r5 /* Remaining bytes = maxlen - current_len */ + aghi %r7,-1 /* vstl needs highest index. */ + la %r2,0(%r5,%r2) /* vstl has no index register. */ + vlgvb %r1,%v17,7 /* Load zero index or 16 if not found. */ + aghi %r1,3 /* Also copy remaining bytes of zero. */ + /* Zero in remaining bytes? -> jump away (zero-index <= max-index). */ + clrjle %r1,%r7,.Lfound_v16_store + vstl %v16,%r7,0(%r2) /* Store remaining bytes without null + termination! */ +.Lend: + /* Restore saved registers. */ + vlgvg %r6,%v31,0 + vlgvg %r7,%v31,1 + lgr %r2,%r0 /* Load saved dest-ptr. */ + br %r14 + +.Lfound_v16_32: + aghi %r5,32 + j .Lfound_v16 +.Lfound_v18_48: + aghi %r5,32 +.Lfound_v18_16: + aghi %r5,16 +.Lfound_v18: + vlr %v16,%v18 +.Lfound_v16: + /* v16 contains a zero. Store remaining bytes to zero. current_len + has not reached border, thus checking for n is not needed! */ + vlgvb %r1,%v17,7 /* Load byte index of zero. */ + la %r2,0(%r5,%r2) /* vstl has no support for index-register. */ + aghi %r1,3 /* Also copy remaining bytes of zero. */ +.Lfound_v16_store: + vstl %v16,%r1,0(%r2) /* Copy characters including zero. */ + /* Fill remaining bytes with zero - remaining byte count always > 0. */ + algr %r5,%r1 /* Remaining bytes (=%r4) = ... */ + slgr %r4,%r5 /* = n - (currlen + zero_index + 1) */ + la %r2,0(%r1,%r2) /* Pointer to zero. start filling beyond. */ + lay %r0,-3(%r2) /* Save return-pointer to found zero. */ + clgije %r4,1,.Lend /* Skip zero-filling, if found-zero is last + possible character. + (1 is substracted from r4 below!). */ + aghi %r4,-2 /* mvc with exrl needs count - 1. + (additional -1, see remaining bytes above) */ + srlg %r6,%r4,8 /* Split into 256 byte blocks. */ + ltgr %r6,%r6 + je .Lzero_lt256 +.Lzero_loop256: + mvc 1(256,%r2),0(%r2) /* Fill 256 zeros at once. */ + la %r2,256(%r2) + brctg %r6,.Lzero_loop256 /* Loop until all blocks are processed. */ +.Lzero_lt256: + exrl %r4,.Lmvc_lt256 + j .Lend +.Lmvc_lt256: + mvc 1(1,%r2),0(%r2) + + /* Find zero in 16byte aligned loop. */ +.Lloop64: + vl %v16,0(%r5,%r3) + vfenezfs %v17,%v16,%v16 /* Find element not equal with zero search. */ + je .Lfound_v16 /* Jump away if zero was found. */ + vl %v18,16(%r5,%r3) /* Load next part of s. */ + vst %v16,0(%r5,%r2) /* Store previous part without zero to dst. */ + vfenezfs %v17,%v18,%v18 + je .Lfound_v18_16 + vl %v16,32(%r5,%r3) + vst %v18,16(%r5,%r2) + vfenezfs %v17,%v16,%v16 + je .Lfound_v16_32 + vl %v18,48(%r5,%r3) + vst %v16,32(%r5,%r2) + vfenezfs %v17,%v18,%v18 + je .Lfound_v18_48 + vst %v18,48(%r5,%r2) + + aghi %r5,64 + lgr %r1,%r5 /* If %r5 + 64 < maxlen? -> loop64. */ + aghi %r1,64 + clgrjl %r1,%r4,.Lloop64 + + vl %v16,0(%r5,%r3) /* Load s. */ + j .Llt64 + +.Lfallback: + jg __wcpncpy_c +END(__wcpncpy_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcpncpy.c b/REORG.TODO/sysdeps/s390/multiarch/wcpncpy.c new file mode 100644 index 0000000000..1a19a9939a --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcpncpy.c @@ -0,0 +1,28 @@ +/* Multiple versions of wcpncpy. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <wchar.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc (__wcpncpy) +weak_alias (__wcpncpy, wcpncpy) + +#else +# include <wcsmbs/wcpncpy.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcscat-c.c b/REORG.TODO/sysdeps/s390/multiarch/wcscat-c.c new file mode 100644 index 0000000000..ef7c000455 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcscat-c.c @@ -0,0 +1,25 @@ +/* Default wcscat implementation for S/390. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define WCSCAT __wcscat_c + +# include <wchar.h> +extern __typeof (__wcscat) __wcscat_c; +# include <wcsmbs/wcscat.c> +#endif diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcscat-vx.S b/REORG.TODO/sysdeps/s390/multiarch/wcscat-vx.S new file mode 100644 index 0000000000..604a849906 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcscat-vx.S @@ -0,0 +1,175 @@ +/* Vector optimized 32/64 bit S/390 version of wcscat. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* wchar_t * wcscat (wchar_t *dest, const wchar_t *src) + Concatenate two strings. + + Register usage: + -r0=saved dest pointer for return + -r1=tmp + -r2=dest + -r3=src + -r4=tmp + -r5=current_len + -v16=part of src + -v17=index of zero + -v18=part of src +*/ +ENTRY(__wcscat_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + + /* __wcslen_c can handle non 4byte aligned pointers, + but __wcscpy_c not. Thus if either src or dest is + not 4byte aligned, use __wcscat_c. */ + tmll %r2,3 /* Test if s is 4-byte aligned? */ + jne .Lfallback /* And use common-code variant if not. */ + tmll %r3,3 /* Test if src is 4-byte aligned? */ + jne .Lfallback /* And use common-code variant if not. */ + + lgr %r0,%r2 /* Save destination pointer for return. */ + + /* WCSLEN + r1 = loaded bytes (tmp) + r4 = zero byte index (tmp) + r2 = dst + */ + + vfenezf %v16,%v16,%v16 /* Find element not equal with zero search. */ + vlgvb %r5,%v16,7 /* Load zero index or 16 if not found. */ + clrjl %r5,%r1,.Llen_end /* Found zero within loaded bytes, end. */ + + /* Align s to 16 byte. */ + risbgn %r1,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,16 /* current_len = 16. */ + slr %r5,%r1 /* Compute bytes to 16bytes boundary. */ + + /* Find zero in 16byte aligned loop. */ +.Llen_loop: + vl %v16,0(%r5,%r2) /* Load s. */ + vfenezfs %v16,%v16,%v16 /* Find element not equal with zero search. */ + je .Llen_found /* Jump away if zero was found. */ + vl %v16,16(%r5,%r2) + vfenezfs %v16,%v16,%v16 + je .Llen_found16 + vl %v16,32(%r5,%r2) + vfenezfs %v16,%v16,%v16 + je .Llen_found32 + vl %v16,48(%r5,%r2) + vfenezfs %v16,%v16,%v16 + je .Llen_found48 + + aghi %r5,64 + j .Llen_loop /* No zero -> loop. */ + +.Llen_found48: + aghi %r5,16 +.Llen_found32: + aghi %r5,16 +.Llen_found16: + aghi %r5,16 +.Llen_found: + vlgvb %r4,%v16,7 /* Load byte index of zero. */ + algr %r5,%r4 + +.Llen_end: + /* WCSCPY + %r1 = loaded bytes (tmp) + %r4 = zero byte index (tmp) + %r3 = curr src pointer + %r2 = curr dst pointer + */ + la %r2,0(%r5,%r2) /* strcpy at end of dst-string. */ + + vlbb %v16,0(%r3),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r3),6 /* Get bytes to 4k-byte boundary or 16. */ + + vfenezf %v17,%v16,%v16 /* Find element not equal with zero search. */ + vlgvb %r5,%v17,7 /* Load zero index or 16 if not found. */ + clrjl %r5,%r1,.Lcpy_found_align /* If found zero within loaded bytes, + copy bytes before and return. */ + + /* Align s to 16 byte. */ + risbgn %r4,%r3,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,15 /* current_len = 15. */ + slr %r5,%r4 /* Compute highest index to 16byte boundary. */ + + vstl %v16,%r5,0(%r2) /* Copy loaded characters - no zero. */ + ahi %r5,1 /* Start loop at next character. */ + + /* Find zero in 16byte aligned loop. */ +.Lcpy_loop: + vl %v16,0(%r5,%r3) /* Load s. */ + vfenezfs %v17,%v16,%v16 /* Find element not equal with zero search. */ + je .Lcpy_found_v16_0 /* Jump away if zero was found. */ + vl %v18,16(%r5,%r3) /* Load next part of s. */ + vst %v16,0(%r5,%r2) /* Save previous part without zero to dst. */ + vfenezfs %v17,%v18,%v18 + je .Lcpy_found_v18_16 + vl %v16,32(%r5,%r3) + vst %v18,16(%r5,%r2) + vfenezfs %v17,%v16,%v16 + je .Lcpy_found_v16_32 + vl %v18,48(%r5,%r3) + vst %v16,32(%r5,%r2) + vfenezfs %v17,%v18,%v18 + je .Lcpy_found_v18_48 + vst %v18,48(%r5,%r2) + + aghi %r5,64 + j .Lcpy_loop /* No zero -> loop. */ + +.Lcpy_found_v16_32: + aghi %r5,32 +.Lcpy_found_v16_0: + la %r4,0(%r5,%r2) + vlgvb %r1,%v17,7 /* Load byte index of zero. */ + aghi %r1,3 /* Also copy remaining bytes of zero. */ + vstl %v16,%r1,0(%r4) /* Copy characters including zero. */ + lgr %r2,%r0 /* Load saved dest-ptr. */ + br %r14 + +.Lcpy_found_v18_48: + aghi %r5,32 +.Lcpy_found_v18_16: + la %r4,16(%r5,%r2) + vlgvb %r1,%v17,7 /* Load byte index of zero. */ + aghi %r1,3 /* Also copy remaining bytes of zero. */ + vstl %v18,%r1,0(%r4) /* Copy characters including zero. */ + lgr %r2,%r0 /* Load saved dest-ptr. */ + br %r14 + +.Lcpy_found_align: + aghi %r5,3 /* Also copy remaining bytes of found zero. */ + vstl %v16,%r5,0(%r2) /* Copy characters including zero. */ + lgr %r2,%r0 /* Load saved dest-ptr. */ + br %r14 +.Lfallback: + jg __wcscat_c +END(__wcscat_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcscat.c b/REORG.TODO/sysdeps/s390/multiarch/wcscat.c new file mode 100644 index 0000000000..2e7fdedf22 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcscat.c @@ -0,0 +1,28 @@ +/* Multiple versions of wcscat. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <wchar.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc (__wcscat) +weak_alias (__wcscat, wcscat) + +#else +# include <wcsmbs/wcscat.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcschr-c.c b/REORG.TODO/sysdeps/s390/multiarch/wcschr-c.c new file mode 100644 index 0000000000..b084983dd8 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcschr-c.c @@ -0,0 +1,37 @@ +/* Default wcschr implementation for S/390. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define WCSCHR __wcschr_c + +# include <wchar.h> +extern __typeof (__wcschr) __wcschr_c; +# undef weak_alias +# define weak_alias(name, alias) +# ifdef SHARED +# undef libc_hidden_def +# define libc_hidden_def(name) \ + __hidden_ver1 (__wcschr_c, __GI_wcschr, __wcschr_c); \ + strong_alias (__wcschr_c, __wcschr_c_1); \ + __hidden_ver1 (__wcschr_c_1, __GI___wcschr, __wcschr_c_1); +# undef libc_hidden_weak +# define libc_hidden_weak(name) +# endif /* SHARED */ + +# include <wcsmbs/wcschr.c> +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcschr-vx.S b/REORG.TODO/sysdeps/s390/multiarch/wcschr-vx.S new file mode 100644 index 0000000000..5a04d44dde --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcschr-vx.S @@ -0,0 +1,103 @@ +/* Vector optimized 32/64 bit S/390 version of wcschr. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* wchar_t *wcschr (const wchar_t *s, wchar_t c) + Locate character in string. + + Register usage: + -r1=tmp + -r2=s + -r3=c + -r4=tmp + -r5=current_len + -v16=part of s + -v17=index of unequal + -v18=replicated c +*/ +ENTRY(__wcschr_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + + tmll %r2,3 /* Test if s is 4-byte aligned? */ + jne .Lfallback /* And use common-code variant if not. */ + + lghi %r5,0 /* current_len = 0. */ + + vlvgf %v18,%r3,0 /* Generate vector which elements are all c. */ + vrepf %v18,%v18,0 + + vfeezfs %v16,%v16,%v18 /* Find element equal with zero search. */ + vlgvb %r4,%v16,7 /* Load byte index of character or zero. */ + clrjl %r4,%r1,.Lfound /* Return if c/zero is in loaded bytes. */ + + /* Align s to 16 byte. */ + risbgn %r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,16 /* current_len = 16. */ + slr %r5,%r4 /* Compute bytes to 16bytes boundary. */ + + /* Find c/zero in 16byte aligned loop */ +.Lloop: + vl %v16,0(%r5,%r2) /* Load s. */ + vfeezfs %v16,%v16,%v18 /* Find element equal with zero search. */ + jno .Lfound /* Found c/zero (cc=0|1|2). */ + vl %v16,16(%r5,%r2) + vfeezfs %v16,%v16,%v18 + jno .Lfound16 + vl %v16,32(%r5,%r2) + vfeezfs %v16,%v16,%v18 + jno .Lfound32 + vl %v16,48(%r5,%r2) + vfeezfs %v16,%v16,%v18 + jno .Lfound48 + + aghi %r5,64 + j .Lloop /* No character and no zero -> loop. */ + +.Lfound48: + la %r5,16(%r5) /* Use la since aghi would clobber cc. */ +.Lfound32: + la %r5,16(%r5) +.Lfound16: + la %r5,16(%r5) +.Lfound: + je .Lzero /* Found zero, but no c before that zero. */ + +.Lcharacter: + vlgvb %r4,%v16,7 /* Load byte index of character. */ + algr %r5,%r4 + la %r2,0(%r5,%r2) /* Return pointer to character. */ + br %r14 + +.Lzero: + clije %r3,0,.Lcharacter /* Found zero and c is zero. */ + lghi %r2,0 /* Return null if character not found. */ + br %r14 +.Lfallback: + jg __wcschr_c +END(__wcschr_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcschr.c b/REORG.TODO/sysdeps/s390/multiarch/wcschr.c new file mode 100644 index 0000000000..59e5df5706 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcschr.c @@ -0,0 +1,32 @@ +/* Multiple versions of wcschr. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define wcschr __redirect_wcschr +# define __wcschr __redirect___wcschr +# include <wchar.h> +# undef wcschr +# undef __wcschr +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc_redirected (__redirect___wcschr, __wcschr) +weak_alias (__wcschr, wcschr) + +#else +# include <wcsmbs/wcschr.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcschrnul-c.c b/REORG.TODO/sysdeps/s390/multiarch/wcschrnul-c.c new file mode 100644 index 0000000000..095ea1202c --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcschrnul-c.c @@ -0,0 +1,25 @@ +/* Default wcschrnul implementation for S/390. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define WCSCHRNUL __wcschrnul_c + +# include <wchar.h> +extern __typeof (__wcschrnul) __wcschrnul_c; +# include <wcsmbs/wcschrnul.c> +#endif diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcschrnul-vx.S b/REORG.TODO/sysdeps/s390/multiarch/wcschrnul-vx.S new file mode 100644 index 0000000000..6aa94823ab --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcschrnul-vx.S @@ -0,0 +1,97 @@ +/* Vector optimized 32/64 bit S/390 version of wcschrnul. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* wchar_t* wcschrnul (const wchar_t *s, wchar_t c) + Returns pointer to first c or to \0 if c not found. + + Register usage: + -r1=tmp + -r2=s and return pointer + -r3=c + -r4=tmp + -r5=current_len + -v16=part of s + -v18=vector with c replicated in every byte +*/ +ENTRY(__wcschrnul_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + + tmll %r2,3 /* Test if s is 4-byte aligned? */ + jne .Lfallback /* And use common-code variant if not. */ + + lghi %r5,0 /* current_len = 0. */ + + vlvgf %v18,%r3,0 /* Generate vector which elements are all c. */ + vrepf %v18,%v18,0 + + vfeezfs %v16,%v16,%v18 /* Find element equal with zero search. */ + vlgvb %r4,%v16,7 /* Load byte index of character or zero. */ + clrjl %r4,%r1,.Lfound /* Return if c/zero is in loaded bytes. */ + + /* Align s to 16 byte. */ + risbgn %r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,16 /* current_len = 16. */ + slr %r5,%r4 /* Compute bytes to 16bytes boundary. */ + + /* Find c/zero in 16byte aligned loop */ +.Lloop: + vl %v16,0(%r5,%r2) /* Load s. */ + vfeezfs %v16,%v16,%v18 /* Find element equal with zero search. */ + jno .Lfound /* Found c/zero (cc=0|1|2). */ + vl %v16,16(%r5,%r2) + vfeezfs %v16,%v16,%v18 + jno .Lfound16 + vl %v16,32(%r5,%r2) + vfeezfs %v16,%v16,%v18 + jno .Lfound32 + vl %v16,48(%r5,%r2) + vfeezfs %v16,%v16,%v18 + jno .Lfound48 + + aghi %r5,64 + j .Lloop /* No character and no zero -> loop. */ + + /* Found character or zero */ +.Lfound48: + aghi %r5,16 +.Lfound32: + aghi %r5,16 +.Lfound16: + aghi %r5,16 +.Lfound: + vlgvb %r1,%v16,7 /* Load byte index of character. */ + algr %r5,%r1 + la %r2,0(%r5,%r2) /* Return pointer to character. */ + +.Lend: + br %r14 +.Lfallback: + jg __wcschrnul_c +END(__wcschrnul_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcschrnul.c b/REORG.TODO/sysdeps/s390/multiarch/wcschrnul.c new file mode 100644 index 0000000000..f01ea9f9ac --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcschrnul.c @@ -0,0 +1,28 @@ +/* Multiple versions of wcschrnul. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <wchar.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc (__wcschrnul) +weak_alias (__wcschrnul, wcschrnul) + +#else +# include <wcsmbs/wcschrnul.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcscmp-c.c b/REORG.TODO/sysdeps/s390/multiarch/wcscmp-c.c new file mode 100644 index 0000000000..7a148f41bb --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcscmp-c.c @@ -0,0 +1,32 @@ +/* Default wcscmp implementation for S/390. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define WCSCMP __wcscmp_c + +# include <wchar.h> +extern __typeof (wcscmp) __wcscmp_c; +# undef weak_alias +# define weak_alias(name, alias) +# ifdef SHARED +# undef libc_hidden_def +# define libc_hidden_def(name) \ + __hidden_ver1 (__wcscmp_c, __GI___wcscmp, __wcscmp_c); +# endif /* SHARED */ +# include <wcsmbs/wcscmp.c> +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcscmp-vx.S b/REORG.TODO/sysdeps/s390/multiarch/wcscmp-vx.S new file mode 100644 index 0000000000..3b92bbd515 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcscmp-vx.S @@ -0,0 +1,131 @@ +/* Vector optimized 32/64 bit S/390 version of wcscmp. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* int wcscmp (const wchar_t *s1, const wchar_t *s2) + Compare two strings + + Register usage: + -r1=loaded byte count s1 + -r2=s1 + -r3=s2 + -r4=loaded byte coutn s2, tmp + -r5=current_len + -v16=part of s1 + -v17=part of s2 + -v18=index of unequal +*/ +ENTRY(__wcscmp_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + lghi %r5,0 /* current_len = 0. */ + +.Lloop: + vlbb %v16,0(%r5,%r2),6 /* Load s1 to block boundary. */ + vlbb %v17,0(%r5,%r3),6 /* Load s2 to block boundary. */ + lcbb %r1,0(%r5,%r2),6 /* Get loaded byte count of s1. */ + jo .Llt16_1 /* Jump away if vr is not fully loaded. */ + lcbb %r4,0(%r5,%r3),6 + jo .Llt16_2 /* Jump away if vr is not fully loaded. */ + /* Both vrs are fully loaded. */ + aghi %r5,16 + vfenezfs %v18,%v16,%v17 /* Compare not equal with zero search. */ + jno .Lfound + + vlbb %v16,0(%r5,%r2),6 + vlbb %v17,0(%r5,%r3),6 + lcbb %r1,0(%r5,%r2),6 + jo .Llt16_1 + lcbb %r4,0(%r5,%r3),6 + jo .Llt16_2 + aghi %r5,16 + vfenezfs %v18,%v16,%v17 + jno .Lfound + + vlbb %v16,0(%r5,%r2),6 + vlbb %v17,0(%r5,%r3),6 + lcbb %r1,0(%r5,%r2),6 + jo .Llt16_1 + lcbb %r4,0(%r5,%r3),6 + jo .Llt16_2 + aghi %r5,16 + vfenezfs %v18,%v16,%v17 + jno .Lfound + + vlbb %v16,0(%r5,%r2),6 + vlbb %v17,0(%r5,%r3),6 + lcbb %r1,0(%r5,%r2),6 + jo .Llt16_1 + lcbb %r4,0(%r5,%r3),6 + jo .Llt16_2 + aghi %r5,16 + vfenezfs %v18,%v16,%v17 + jno .Lfound + j .Lloop + +.Lcmp_one_char: + /* At least one of both strings is not 4-byte aligned + and there is no full character before next block-boundary. + Compare one character to get over the boundary and + proceed with normal loop! */ + vlef %v16,0(%r5,%r2),0 /* Load one character. */ + vlef %v17,0(%r5,%r3),0 + lghi %r1,4 /* Loaded byte count is 4. */ + j .Llt_cmp /* Proceed with comparision. */ + +.Llt16_1: + lcbb %r4,0(%r5,%r3),6 /* Get loaded byte count of s2. */ +.Llt16_2: + clr %r1,%r4 + locrh %r1,%r4 /* Get minimum of bytes loaded in s1/2. */ + nill %r1,65532 /* Align bytes loaded to full characters. */ + jz .Lcmp_one_char /* Jump away if no full char is available. */ +.Llt_cmp: + algfr %r5,%r1 /* Add smallest loaded bytes to current_len. */ + vfenezfs %v18,%v16,%v17 /* Compare not equal with zero search. */ + vlgvb %r4,%v18,7 /* Get not equal index or 16 if all equal. */ + clrjl %r4,%r1,.Lfound /* Jump away if miscompare is within loaded + bytes. */ + j .Lloop + +.Lfound: + /* vfenezf found an unequal element or zero. + This instruction compares unsigned words, but wchar_t is signed. + Thus we have to compare the found element again. */ + vlgvb %r4,%v18,7 /* Extract not equal byte-index, */ + srl %r4,2 /* Convert it to character-index. */ + vlgvf %r3,%v16,0(%r4) /* Load character-values. */ + vlgvf %r4,%v17,0(%r4) + cr %r3,%r4 + je .Lend_equal + lghi %r2,1 + lghi %r1,-1 + locgrl %r2,%r1 + br %r14 +.Lend_equal: + lghi %r2,0 + br %r14 +END(__wcscmp_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcscmp.c b/REORG.TODO/sysdeps/s390/multiarch/wcscmp.c new file mode 100644 index 0000000000..a5c2b69a29 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcscmp.c @@ -0,0 +1,30 @@ +/* Multiple versions of wcscmp. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define __wcscmp __redirect___wcscmp +# include <wchar.h> +# undef __wcscmp +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc_redirected (__redirect___wcscmp, __wcscmp) +weak_alias (__wcscmp, wcscmp) + +#else +# include <wcsmbs/wcscmp.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcscpy-c.c b/REORG.TODO/sysdeps/s390/multiarch/wcscpy-c.c new file mode 100644 index 0000000000..342c247a8f --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcscpy-c.c @@ -0,0 +1,25 @@ +/* Default wcscpy implementation for S/390. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define WCSCPY __wcscpy_c + +# include <wchar.h> +extern __typeof (wcscpy) __wcscpy_c; +# include <wcsmbs/wcscpy.c> +#endif diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcscpy-vx.S b/REORG.TODO/sysdeps/s390/multiarch/wcscpy-vx.S new file mode 100644 index 0000000000..a8adc20dea --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcscpy-vx.S @@ -0,0 +1,111 @@ +/* Vector optimized 32/64 bit S/390 version of wcscpy. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* char * wcscpy (const wchar_t *dest, const wchar_t *src) + Copy string src to dest. + + Register usage: + -r0=border-len for switching to vector-instructions + -r1=tmp + -r2=dest and return value + -r3=src + -r4=tmp + -r5=current_len + -v16=part of src + -v17=index of zero + -v18=part of src +*/ +ENTRY(__wcscpy_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + vlbb %v16,0(%r3),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r3),6 /* Get bytes to 4k-byte boundary or 16. */ + + tmll %r3,3 /* Test if s is 4-byte aligned? */ + jne .Lfallback /* And use common-code variant if not. */ + + vfenezf %v17,%v16,%v16 /* Find element not equal with zero search. */ + vlgvb %r5,%v17,7 /* Load zero index or 16 if not found. */ + clrjl %r5,%r1,.Lfound_align /* If found zero within loaded bytes, + copy bytes before and return. */ + + /* Align s to 16 byte. */ + risbgn %r4,%r3,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,15 /* current_len = 15. */ + slr %r5,%r4 /* Compute highest index to 16byte boundary. */ + + vstl %v16,%r5,0(%r2) /* Copy loaded characters - no zero. */ + ahi %r5,1 /* Start loop at next character. */ + + /* Find zero in 16byte aligned loop. */ +.Lloop: + vl %v16,0(%r5,%r3) /* Load s. */ + vfenezfs %v17,%v16,%v16 /* Find element not equal with zero search. */ + je .Lfound_v16_0 /* Jump away if zero was found. */ + vl %v18,16(%r5,%r3) /* Load next part of s. */ + vst %v16,0(%r5,%r2) /* Store previous part without zero to dst. */ + vfenezfs %v17,%v18,%v18 + je .Lfound_v18_16 + vl %v16,32(%r5,%r3) + vst %v18,16(%r5,%r2) + vfenezfs %v17,%v16,%v16 + je .Lfound_v16_32 + vl %v18,48(%r5,%r3) + vst %v16,32(%r5,%r2) + vfenezfs %v17,%v18,%v18 + je .Lfound_v18_48 + vst %v18,48(%r5,%r2) + + aghi %r5,64 + j .Lloop /* No zero found -> loop. */ + +.Lfound_v16_32: + aghi %r5,32 +.Lfound_v16_0: + la %r3,0(%r5,%r2) + vlgvb %r1,%v17,7 /* Load byte index of zero. */ + aghi %r1,3 /* Also copy remaining bytes of zero. */ + vstl %v16,%r1,0(%r3) /* Copy characters including zero. */ + br %r14 + +.Lfound_v18_48: + aghi %r5,32 +.Lfound_v18_16: + la %r3,16(%r5,%r2) + vlgvb %r1,%v17,7 /* Load byte index of zero. */ + aghi %r1,3 /* Also copy remaining bytes of zero. */ + vstl %v18,%r1,0(%r3) /* Copy characters including zero. */ + br %r14 + +.Lfound_align: + aghi %r5,3 /* Also copy remaining bytes of zero. */ + vstl %v16,%r5,0(%r2) /* Copy characters including zero. */ + br %r14 + +.Lfallback: + jg __wcscpy_c +END(__wcscpy_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcscpy.c b/REORG.TODO/sysdeps/s390/multiarch/wcscpy.c new file mode 100644 index 0000000000..bd68b1725d --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcscpy.c @@ -0,0 +1,27 @@ +/* Multiple versions of wcscpy. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <wchar.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc2 (__wcscpy, wcscpy) + +#else +# include <wcsmbs/wcscpy.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcscspn-c.c b/REORG.TODO/sysdeps/s390/multiarch/wcscspn-c.c new file mode 100644 index 0000000000..937e1d1963 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcscspn-c.c @@ -0,0 +1,26 @@ +/* Default wcscscpn implementation for S/390. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define WCSCSPN __wcscspn_c + +# include <wchar.h> +extern __typeof (wcscspn) __wcscspn_c; + +# include <wcsmbs/wcscspn.c> +#endif diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcscspn-vx.S b/REORG.TODO/sysdeps/s390/multiarch/wcscspn-vx.S new file mode 100644 index 0000000000..cd528907d6 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcscspn-vx.S @@ -0,0 +1,293 @@ +/* Vector optimized 32/64 bit S/390 version of wcscspn. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* size_t wcscspn (const wchar_t *s, const wchar_t * reject) + The wcscspn() function calculates the length of the initial segment + of s which consists entirely of characters not in reject. + + This method checks the length of reject string. If it fits entirely + in one vector register, a fast algorithm is used, which does not need + to check multiple parts of accept-string. Otherwise a slower full + check of accept-string is used. + + register overview: + r3: pointer to start of reject-string + r2: pointer to start of search-string + r0: loaded byte count of vlbb search-string + r4: found byte index + r1: current return len + v16: search-string + v17: reject-string + v18: temp-vreg + + ONLY FOR SLOW: + v19: first reject-string + v20: zero for preparing acc-vector + v21: global mask; 1 indicates a match between + search-string-vreg and any reject-character + v22: current mask; 1 indicates a match between + search-string-vreg and any reject-character in current acc-vreg + v30, v31: for re-/storing registers r6, r8, r9 + r5: current len of reject-string + r6: zero-index in search-string or 16 if no zero + or min(zero-index, loaded byte count) + r8: >0, if former reject-string-part contains a zero, + otherwise =0; + r9: loaded byte count of vlbb reject-string +*/ +ENTRY(__wcscspn_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + tmll %r2,3 /* Test if s is 4-byte aligned? */ + jne .Lfallback /* And use common-code variant if not. */ + + /* + Check if reject-string fits in one vreg: + ---------------------------------------- + */ + vlbb %v17,0(%r3),0 /* Load reject. */ + lcbb %r0,0(%r3),0 + jo .Lcheck_onbb /* Special case if reject + lays on block-boundary. */ + +.Lcheck_notonbb: + lghi %r1,0 /* Zero out current len. */ + vistrfs %v17,%v17 /* Fill with zeros after first zero. */ + je .Lfast /* Zero found -> reject fits in one vreg. */ + j .Lslow /* No zero -> reject exceeds one vreg. */ + + +.Lcheck_onbb: + /* Reject lays on block-boundary. */ + nill %r0,65532 /* Recognize only fully loaded characters. */ + je .Lcheck_onbb2 /* Reload vr, if we loaded no full wchar_t. */ + vfenezf %v18,%v17,%v17 /* Search zero in loaded reject bytes. */ + vlgvb %r4,%v18,7 /* Get index of zero or 16 if not found. */ + clrjl %r4,%r0,.Lcheck_notonbb /* Zero index < loaded bytes count -> + Reject fits in one vreg; + Fill with zeros and proceed + with FAST. */ +.Lcheck_onbb2: + vl %v17,0(%r3) /* Load reject, which exceeds loaded bytes. */ + j .Lcheck_notonbb /* Check if reject fits in one vreg. */ + + + /* + Search s for reject in one vreg + ------------------------------- + */ +.Lfast: + /* Complete reject-string in v17 and remaining bytes are zero. */ + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r0,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + + vfaezfs %v18,%v16,%v17,0 /* Find first element in v16 + unequal to any in v17 + or first zero element. */ + vlgvb %r4,%v18,7 /* Load byte index of found element. */ + clrjl %r4,%r0,.Lfast_loop_found2 /* If found index is within loaded + bytes, return with found element + index (=equal count). */ + + /* Align s to 16 byte. */ + risbgn %r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r1,16 /* current_len = 16. */ + slr %r1,%r4 /* Compute bytes to 16bytes boundary. */ + + /* Process s in 16byte aligned loop. */ +.Lfast_loop: + vl %v16,0(%r1,%r2) /* Load search-string. */ + vfaezfs %v18,%v16,%v17,0 /* Find first element in v16 equal to any + in v17 or first zero element. */ + jno .Lfast_loop_found + + vl %v16,16(%r1,%r2) + vfaezfs %v18,%v16,%v17,0 + jno .Lfast_loop_found16 + + vl %v16,32(%r1,%r2) + vfaezfs %v18,%v16,%v17,0 + jno .Lfast_loop_found32 + + vl %v16,48(%r1,%r2) + vfaezfs %v18,%v16,%v17,0 + jno .Lfast_loop_found48 + + aghi %r1,64 + j .Lfast_loop /* Loop if no element was unequal to reject + and not zero. */ + + /* Found equal or zero element. */ +.Lfast_loop_found48: + aghi %r1,16 +.Lfast_loop_found32: + aghi %r1,16 +.Lfast_loop_found16: + aghi %r1,16 +.Lfast_loop_found: + vlgvb %r4,%v18,7 /* Load byte index of found element or zero. */ +.Lfast_loop_found2: + algrk %r2,%r1,%r4 /* Add found index to current len. */ + srlg %r2,%r2,2 /* Convert byte-count to character-count. */ + br %r14 + + + + /* + Search s for reject in multiple vregs + ------------------------------------- + */ +.Lslow: + /* Save registers. */ + vlvgg %v30,%r6,0 + vlvgp %v31,%r8,%r9 + + /* Reject in v17 without zero. */ + vlr %v19,%v17 /* Save first acc-part for a fast reload. */ + vzero %v20 /* Zero for preparing acc-vector. */ + vone %v24 /* One for checking result of former + string-part. */ + + /* Align s to 16 byte. */ + risbg %r4,%r2,60,128+63,0 /* Test if s is aligned and + %r4 = bits 60-63 'and' 15. */ + je .Lslow_loop_str /* If s is aligned, loop aligned. */ + lghi %r0,15 + slr %r0,%r4 /* Compute highest index to load (15-x). */ + vll %v16,%r0,0(%r2) /* Load up to 16byte boundary (vll needs + highest index, remaining bytes are 0). */ + ahi %r0,1 /* Work with loaded byte count. */ + vzero %v21 /* Zero out global mask. */ + lghi %r5,0 /* Set current len of reject-string to zero. */ + vfenezf %v18,%v16,%v16 /* Find zero in current string-part. */ + lghi %r8,0 /* There is no zero in first reject-part. */ + vlgvb %r6,%v18,7 /* Load byte index of zero or 16 if no zero. */ + clije %r6,0,.Lslow_end /* If first element is zero -> return 0. */ + clr %r0,%r6 /* cc==1 if loaded byte count < zero-index. */ + locrl %r6,%r0 /* Load on cc==1; zero-index = lbc. */ + j .Lslow_loop_acc + + + /* Process s in 16byte aligned loop. */ +.Lslow_next_str: + /* Check results of former processed str-part. */ + vfeef %v18,%v21,%v24 /* Find first equal match in global mask + (ones in element). */ + vlgvb %r4,%v18,7 /* Get index of first one (=equal) or 16. */ + /* Equal-index < min(zero-index, loaded byte count) + -> Return pointer to equal element. */ + clrjl %r4,%r6,.Lslow_index_found + /* Zero-index < loaded byte count + -> Former str-part was last str-part + -> Return null */ + clrjl %r6,%r0,.Lslow_end_not_found + + /* All elements are zero (=no match) -> proceed with next str-part. */ + vlr %v17,%v19 /* Load first part of reject (no zero). */ + algfr %r1,%r0 /* Add loaded byte count to current len. */ + +.Lslow_loop_str: + vl %v16,0(%r1,%r2) /* Load search-string. */ + lghi %r0,16 /* Loaded byte count is 16. */ + vzero %v21 /* Zero out global mask. */ + lghi %r5,0 /* Set current len of reject to zero. */ + vfenezf %v18,%v16,%v16 /* Find zero in current string-part. */ + lghi %r8,0 /* There is no zero in first reject-part. */ + vlgvb %r6,%v18,7 /* Load byte index of zero or 16 if no zero. */ + clije %r6,0,.Lslow_end /* If first element is zero (end of string) + -> Return current length. */ + +.Lslow_loop_acc: + vfaef %v22,%v16,%v17,4 /* Create matching-mask (1 in mask -> + Character matches any rejected character in + this reject-string-part) IN=0, RT=1. */ + vlgvf %r4,%v22,0 /* Get result of first element. */ + /* First element is equal to any rejected characters? + (All other parts of reject cannot lead to a match before this one) + -> Return current len, which is pointing to this element. */ + clijh %r4,0,.Lslow_end + vo %v21,%v21,%v22 /* Global-mask = global-|matching-mask. */ + /* Proceed with next acc until end of acc is reached. */ + + +.Lslow_next_acc: + clijh %r8,0,.Lslow_next_str /* There was a zero in last reject-part + -> Add found index to current len + and end. */ + vlbb %v17,16(%r5,%r3),6 /* Load next reject part. */ + aghi %r5,16 /* Increment current len of reject-string. */ + lcbb %r9,0(%r5,%r3),6 /* Get loaded byte count of reject-string. */ + jo .Lslow_next_acc_onbb /* Jump away if reject-string is + on block-boundary. */ +.Lslow_next_acc_notonbb: + vistrfs %v17,%v17 /* Fill with zeros after first zero. */ + jo .Lslow_loop_acc /* No zero found -> no preparation needed. */ + +.Lslow_next_acc_prepare_zero: + /* Zero in reject-part: fill zeros with first-reject-character. */ + vlgvf %r8,%v17,0 /* Load first element of reject-part. */ + clije %r8,0,.Lslow_next_str /* Process next str-part if first + character in this part of reject + is a zero. */ + /* r8>0 -> zero found in this acc-part. */ + vrepf %v18,%v17,0 /* Replicate first char accross all chars. */ + vceqf %v22,%v20,%v17 /* Create a mask (v22) of null chars + by comparing with 0 (v20). */ + vsel %v17,%v18,%v17,%v22 /* Replace null chars with first char. */ + j .Lslow_loop_acc /* Reject-string part is prepared. */ + +.Lslow_next_acc_onbb: + nill %r9,65532 /* Recognize only fully loaded characters. */ + je .Lslow_next_acc_onbb2 /* Reload vr, if no full wchar_t + loaded. */ + vfenezf %v18,%v17,%v17 /* Find zero in loaded bytes of reject part. */ + vlgvb %r8,%v18,7 /* Load byte index of zero. */ + clrjl %r8,%r9,.Lslow_next_acc_notonbb /* Found a zero in loaded bytes + -> Prepare vreg. */ +.Lslow_next_acc_onbb2: + vl %v17,0(%r5,%r3) /* Load over boundary ... */ + lghi %r8,0 /* r8=0 -> no zero in this part of acc, + check for zero is in jump-target. */ + j .Lslow_next_acc_notonbb /* ... and search for zero in + fully loaded vreg again. */ + +.Lslow_end_not_found: + algfr %r1,%r6 /* Add zero-index to current len. */ + j .Lslow_end +.Lslow_index_found: + algfr %r1,%r4 /* Add found index of char to current len. */ +.Lslow_end: + srlg %r2,%r1,2 /* Convert byte-count to character-count. */ + /* Restore registers. */ + vlgvg %r6,%v30,0 + vlgvg %r8,%v31,0 + vlgvg %r9,%v31,1 + br %r14 +.Lfallback: + jg __wcscspn_c +END(__wcscspn_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcscspn.c b/REORG.TODO/sysdeps/s390/multiarch/wcscspn.c new file mode 100644 index 0000000000..f38ed9c7f4 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcscspn.c @@ -0,0 +1,27 @@ +/* Multiple versions of wcscspn. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <wchar.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc2 (__wcscspn, wcscspn) + +#else +# include <wcsmbs/wcscspn.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcslen-c.c b/REORG.TODO/sysdeps/s390/multiarch/wcslen-c.c new file mode 100644 index 0000000000..9626a76da5 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcslen-c.c @@ -0,0 +1,25 @@ +/* Default wcslen implementation for S/390. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define WCSLEN __wcslen_c + +# include <wchar.h> +extern __typeof (__wcslen) __wcslen_c; +# include <wcsmbs/wcslen.c> +#endif diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcslen-vx.S b/REORG.TODO/sysdeps/s390/multiarch/wcslen-vx.S new file mode 100644 index 0000000000..7539303fbd --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcslen-vx.S @@ -0,0 +1,91 @@ +/* Vector optimized 32/64 bit S/390 version of wcslen. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* size_t wcslen (const wchar_t *s) + Returns length of string s. + + Register usage: + -r1=bytes to 4k-byte boundary + -r2=s + -r3=tmp + -r4=tmp + -r5=current_len and return_value + -v16=part of s +*/ +ENTRY(__wcslen_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + + tmll %r2,3 /* Test if s is 4-byte aligned? */ + jne .Lfallback /* And use common-code variant if not. */ + + vfenezf %v16,%v16,%v16 /* Find element not equal with zero search. */ + vlgvb %r4,%v16,7 /* Load zero index or 16 if not found. */ + clr %r4,%r1 /* If found zero within loaded bytes? */ + locgrl %r2,%r4 /* Then copy return value. */ + jl .Lend /* And return. */ + + /* Align s to 16 byte. */ + risbgn %r3,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,16 /* current_len = 16. */ + slr %r5,%r3 /* Compute bytes to 16bytes boundary. */ + + /* Find zero in 16byte aligned loop. */ +.Lloop: + vl %v16,0(%r5,%r2) /* Load s. */ + vfenezfs %v16,%v16,%v16 /* Find element not equal with zero search. */ + je .Lfound /* Jump away if zero was found. */ + vl %v16,16(%r5,%r2) + vfenezfs %v16,%v16,%v16 + je .Lfound16 + vl %v16,32(%r5,%r2) + vfenezfs %v16,%v16,%v16 + je .Lfound32 + vl %v16,48(%r5,%r2) + vfenezfs %v16,%v16,%v16 + je .Lfound48 + + aghi %r5,64 + j .Lloop /* No zero found -> loop. */ + +.Lfound48: + aghi %r5,16 +.Lfound32: + aghi %r5,16 +.Lfound16: + aghi %r5,16 +.Lfound: + vlgvb %r2,%v16,7 /* Load byte index of zero. */ + algr %r2,%r5 +.Lend: + srlg %r2,%r2,2 /* Convert byte-count to character-count. */ + br %r14 +.Lfallback: + jg __wcslen_c +END(__wcslen_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcslen.c b/REORG.TODO/sysdeps/s390/multiarch/wcslen.c new file mode 100644 index 0000000000..bfb577797e --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcslen.c @@ -0,0 +1,28 @@ +/* Multiple versions of wcslen. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <wchar.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc (__wcslen) +weak_alias (__wcslen, wcslen) + +#else +# include <wcsmbs/wcslen.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcsncat-c.c b/REORG.TODO/sysdeps/s390/multiarch/wcsncat-c.c new file mode 100644 index 0000000000..84b3eb1124 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcsncat-c.c @@ -0,0 +1,25 @@ +/* Default wcsncat implementation for S/390. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define WCSNCAT __wcsncat_c + +# include <wchar.h> +extern __typeof (wcsncat) __wcsncat_c; +# include <wcsmbs/wcsncat.c> +#endif diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcsncat-vx.S b/REORG.TODO/sysdeps/s390/multiarch/wcsncat-vx.S new file mode 100644 index 0000000000..5c49a1e499 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcsncat-vx.S @@ -0,0 +1,265 @@ +/* Vector optimized 32/64 bit S/390 version of wcsncat. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* wchar_t * wcsncat (wchar_t *dest, const wchar_t *src, size_t n) + Concatenate two strings - at most n characters of src. + + Register usage: + -r0=saved dest pointer for return + -r1=tmp + -r2=dest + -r3=src + -r4=n + -r5=current_len + -r6=tmp + -r7=tmp + -v16=part of src + -v17=index of zero + -v18=part of src + -v31=register save area for r6, r7 +*/ +ENTRY(__wcsncat_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + +# if !defined __s390x__ + llgfr %r4,%r4 +# endif /* !defined __s390x__ */ + + clgfi %r4,0 + ber %r14 /* Nothing to do, if n == 0. */ + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + + /* If either src or dest is not 4byte aligned, use __wcsncat_c. */ + tmll %r2,3 /* Test if s is 4-byte aligned? */ + jne .Lfallback /* And use common-code variant if not. */ + tmll %r3,3 /* Test if src is 4-byte aligned? */ + jne .Lfallback /* And use common-code variant if not. */ + + lgr %r0,%r2 /* Save destination pointer for return. */ + vlvgp %v31,%r6,%r7 /* Save registers. */ + + /* WCSLEN + %r1 = loaded bytes (tmp) + %r6 = zero byte index (tmp) + %r2 = dst + */ + vfenezf %v16,%v16,%v16 /* Find element not equal with zero search. */ + vlgvb %r5,%v16,7 /* Load zero index or 16 if not found. */ + clrjl %r5,%r1,.Llen_end /* Found zero within loaded bytes, end. */ + + /* Align s to 16 byte. */ + risbgn %r1,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,16 /* current_len = 16. */ + slr %r5,%r1 /* Compute bytes to 16bytes boundary. */ + + /* Find zero in 16byte aligned loop. */ +.Llen_loop: + vl %v16,0(%r5,%r2) /* Load s. */ + vfenezfs %v16,%v16,%v16 /* Find element not equal with zero search. */ + je .Llen_found /* Jump away if zero was found. */ + vl %v16,16(%r5,%r2) + vfenezfs %v16,%v16,%v16 + je .Llen_found16 + vl %v16,32(%r5,%r2) + vfenezfs %v16,%v16,%v16 + je .Llen_found32 + vl %v16,48(%r5,%r2) + vfenezfs %v16,%v16,%v16 + je .Llen_found48 + + aghi %r5,64 + j .Llen_loop /* No zero -> loop. */ + +.Llen_found48: + aghi %r5,16 +.Llen_found32: + aghi %r5,16 +.Llen_found16: + aghi %r5,16 +.Llen_found: + vlgvb %r1,%v16,7 /* Load byte index of zero. */ + algr %r5,%r1 + +.Llen_end: + /* WCSNCPY + %r1 = zero byte index (tmp) + %r6 = loaded bytes (tmp) + %r3 = curr src pointer + %r2 = curr dst pointer + %r7 = border, tmp + */ + la %r2,0(%r5,%r2) /* strcpy at end of dst-string. */ + + vlbb %v16,0(%r3),6 /* Load s until next 4k-byte boundary. */ + lcbb %r6,0(%r3),6 /* Get bytes to 4k-byte boundary or 16. */ + llgfr %r6,%r6 /* Convert 32bit to 64bit. */ + + lghi %r5,0 /* current_len = 0. */ + + /* Check range of maxlen and convert to byte-count. */ +# ifdef __s390x__ + tmhh %r4,49152 /* Test bit 0 or 1 of maxlen. */ + lghi %r1,-4 /* Max byte-count is 18446744073709551612. */ +# else + tmlh %r4,49152 /* Test bit 0 or 1 of maxlen. */ + llilf %r1,4294967292 /* Max byte-count is 4294967292. */ +# endif /* !__s390x__ */ + sllg %r4,%r4,2 /* Convert character-count to byte-count. */ + locgrne %r4,%r1 /* Use max byte-count, if bit 0/1 was one. */ + + clgrjle %r4,%r6,.Lcpy_remaining_v16 /* If n <= loaded-bytes + -> process remaining. */ + + /* n > loaded-byte-count. */ + vfenezf %v17,%v16,%v16 /* Find element not equal with zero search. */ + vlgvb %r1,%v17,7 /* Load zero index or 16 if not found. */ + clrjl %r1,%r6,.Lcpy_found_v16_store /* Found zero within loaded bytes, + copy and return. */ + + /* Align s to 16 byte. */ + risbgn %r1,%r3,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,15 /* current_len = 15. */ + slr %r5,%r1 /* Compute highest index to 16byte boundary. * + + /* Zero not found and maxlen > loaded-byte-count. */ + vstl %v16,%r5,0(%r2) /* Copy loaded characters - no zero. */ + ahi %r5,1 /* Start loop at next character. */ + + /* + Now we are 16byte aligned, so we can load a full vreg + without page fault. + */ + lgr %r1,%r5 /* If %r5 + 64 < maxlen? -> loop64. */ + aghi %r1,64 + clgrjl %r1,%r4,.Lcpy_loop64 + + vl %v16,0(%r5,%r3) /* Load s. */ + clgijl %r4,17,.Lcpy_remaining_v16 /* If n <=16, + process remaining bytes. */ +.Lcpy_lt64: + lgr %r7,%r4 + slgfi %r7,16 /* border_len = n - 16. */ + + clgrjhe %r5,%r7,.Lcpy_remaining_v16 + vfenezfs %v17,%v16,%v16 /* Find element not equal with zero search. */ + je .Lcpy_found_v16 /* Jump away if zero was found. */ + vl %v18,16(%r5,%r3) /* Load next part of s. */ + vst %v16,0(%r5,%r2) /* Save previous part without zero to dst. */ + aghi %r5,16 + + clgrjhe %r5,%r7,.Lcpy_remaining_v18 + vfenezfs %v17,%v18,%v18 + je .Lcpy_found_v18 + vl %v16,16(%r5,%r3) + vst %v18,0(%r5,%r2) + aghi %r5,16 + + clgrjhe %r5,%r7,.Lcpy_remaining_v16 + vfenezfs %v17,%v16,%v16 + je .Lcpy_found_v16 + vl %v18,16(%r5,%r3) + vst %v16,0(%r5,%r2) + aghi %r5,16 + +.Lcpy_remaining_v18: + vlr %v16,%v18 +.Lcpy_remaining_v16: + /* v16 contains the remaining bytes [1...16]. + Store remaining bytes and append string-termination. */ + vfenezf %v17,%v16,%v16 /* Find element not equal with zero search. */ + slgrk %r7,%r4,%r5 /* Remaining bytes = maxlen - current_len. */ + aghi %r7,-1 /* vstl needs highest index. */ + vlgvb %r1,%v17,7 /* Load zero index or 16 if not found. */ + la %r2,0(%r5,%r2) /* vstl has no index register. */ + /* Zero-index within remaining-bytes, store up to zero and end. */ + clgrjle %r1,%r7,.Lcpy_found_v16_store + vstl %v16,%r7,0(%r2) /* Store remaining bytes. */ + lghi %r1,0 + st %r1,1(%r7,%r2) /* Store string-null-termination beyond n. */ +.Lcpy_end: + /* Restore saved registers. */ + vlgvg %r6,%v31,0 + vlgvg %r7,%v31,1 + lgr %r2,%r0 /* Load saved dest-ptr. */ + br %r14 + +.Lcpy_found_v16_32: + aghi %r5,32 + j .Lcpy_found_v16 +.Lcpy_found_v18_48: + aghi %r5,32 +.Lcpy_found_v18_16: + aghi %r5,16 +.Lcpy_found_v18: + vlr %v16,%v18 +.Lcpy_found_v16: + /* v16 contains a zero. Store remaining bytes to zero. current_len + has not reached border, thus checking for n is not needed! */ + vlgvb %r1,%v17,7 /* Load byte index of zero. */ + la %r2,0(%r5,%r2) +.Lcpy_found_v16_store: + aghi %r1,3 /* Also copy remaining bytes of zero. */ + vstl %v16,%r1,0(%r2) /* Copy characters including zero. */ + j .Lcpy_end + + /* Find zero in 16byte aligned loop. */ +.Lcpy_loop2: + vl %v16,16(%r5,%r3) + vst %v18,0(%r5,%r2) + aghi %r5,16 + +.Lcpy_loop64: + vl %v16,0(%r5,%r3) + vfenezfs %v17,%v16,%v16 /* Find element not equal with zero search. */ + je .Lcpy_found_v16 /* Jump away if zero was found. */ + vl %v18,16(%r5,%r3) /* Load next part of s. */ + vst %v16,0(%r5,%r2) /* Save previous part without zero to dst. */ + vfenezfs %v17,%v18,%v18 + je .Lcpy_found_v18_16 + vl %v16,32(%r5,%r3) + vst %v18,16(%r5,%r2) + vfenezfs %v17,%v16,%v16 + je .Lcpy_found_v16_32 + vl %v18,48(%r5,%r3) + vst %v16,32(%r5,%r2) + vfenezfs %v17,%v18,%v18 + je .Lcpy_found_v18_48 + vst %v18,48(%r5,%r2) + + aghi %r5,64 + lgr %r1,%r5 /* If %r5 + 64 < maxlen? -> loop64. */ + aghi %r1,64 + clgrjl %r1,%r4,.Lcpy_loop64 + + vl %v16,0(%r5,%r3) /* Load s. */ + j .Lcpy_lt64 + +.Lfallback: + jg __wcsncat_c +END(__wcsncat_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcsncat.c b/REORG.TODO/sysdeps/s390/multiarch/wcsncat.c new file mode 100644 index 0000000000..be545d091d --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcsncat.c @@ -0,0 +1,27 @@ +/* Multiple versions of wcsncat. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <wchar.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc2 (__wcsncat, wcsncat) + +#else +# include <wcsmbs/wcsncat.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcsncmp-c.c b/REORG.TODO/sysdeps/s390/multiarch/wcsncmp-c.c new file mode 100644 index 0000000000..72ee85ab7a --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcsncmp-c.c @@ -0,0 +1,25 @@ +/* Default wcsncmp implementation for S/390. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define WCSNCMP __wcsncmp_c + +# include <wchar.h> +extern __typeof (wcsncmp) __wcsncmp_c; +# include <wcsmbs/wcsncmp.c> +#endif diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcsncmp-vx.S b/REORG.TODO/sysdeps/s390/multiarch/wcsncmp-vx.S new file mode 100644 index 0000000000..e44c53a1a9 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcsncmp-vx.S @@ -0,0 +1,177 @@ +/* Vector optimized 32/64 bit S/390 version of wcsncmp. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* int wcsncmp (const wchar_t *s1, const wchar_t *s2, size_t n) + Compare at most n characters of two strings. + + Register usage: + -r0=tmp + -r1=tmp + -r2=s1 + -r3=s2 + -r4=n + -r5=current_len + -v16=part of s1 + -v17=part of s2 + -v18=index of unequal +*/ +ENTRY(__wcsncmp_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + +# if !defined __s390x__ + llgfr %r4,%r4 +# endif /* !defined __s390x__ */ + + clgije %r4,0,.Lend_equal /* Nothing to do if n == 0. */ + + /* Check range of n and convert to byte-count. */ +# ifdef __s390x__ + tmhh %r4,49152 /* Test bit 0 or 1 of maxlen. */ + lghi %r1,-4 /* Max byte-count is 18446744073709551612. */ +# else + tmlh %r4,49152 /* Test bit 0 or 1 of maxlen. */ + llilf %r1,4294967292 /* Max byte-count is 4294967292. */ +# endif /* !__s390x__ */ + sllg %r4,%r4,2 /* Convert character-count to byte-count. */ + locgrne %r4,%r1 /* Use max byte-count, if bit 0/1 was one. */ + + /* Check first character without vector load. */ + lghi %r5,4 /* current_len = 4 bytes. */ + /* Check s1/2[0]. */ + lt %r0,0(%r2) + l %r1,0(%r3) + je .Lend_cmp_one_char + crjne %r0,%r1,.Lend_cmp_one_char + +.Lloop: + vlbb %v17,0(%r5,%r3),6 /* Load s2 to block boundary. */ + vlbb %v16,0(%r5,%r2),6 /* Load s1 to block boundary. */ + lcbb %r0,0(%r5,%r2),6 /* Get loaded byte count of s1. */ + jo .Llt16_1 /* Jump away if vector not fully loaded. */ + lcbb %r1,0(%r5,%r3),6 /* Get loaded byte count of s2. */ + jo .Llt16_2 /* Jump away if vector not fully loaded. */ + aghi %r5,16 /* Both vectors are fully loaded. */ + vfenezfs %v18,%v16,%v17 /* Compare not equal with zero search. */ + clgrjhe %r5,%r4,.Llastcmp /* If current_len >= n ->last compare. */ + jno .Lfound + + vlbb %v17,0(%r5,%r3),6 + vlbb %v16,0(%r5,%r2),6 + lcbb %r0,0(%r5,%r2),6 + jo .Llt16_1 + lcbb %r1,0(%r5,%r3),6 + jo .Llt16_2 + aghi %r5,16 + vfenezfs %v18,%v16,%v17 + clgrjhe %r5,%r4,.Llastcmp + jno .Lfound + + vlbb %v17,0(%r5,%r3),6 + vlbb %v16,0(%r5,%r2),6 + lcbb %r0,0(%r5,%r2),6 + jo .Llt16_1 + lcbb %r1,0(%r5,%r3),6 + jo .Llt16_2 + aghi %r5,16 + vfenezfs %v18,%v16,%v17 + clgrjhe %r5,%r4,.Llastcmp + jno .Lfound + + vlbb %v17,0(%r5,%r3),6 + vlbb %v16,0(%r5,%r2),6 + lcbb %r0,0(%r5,%r2),6 + jo .Llt16_1 + lcbb %r1,0(%r5,%r3),6 + jo .Llt16_2 + aghi %r5,16 + vfenezfs %v18,%v16,%v17 + clgrjhe %r5,%r4,.Llastcmp + jno .Lfound + + j .Lloop + +.Llt16_1: + lcbb %r1,0(%r5,%r3),6 /* Get loaded byte count of s2. */ +.Llt16_2: + clr %r0,%r1 /* Compare logical. */ + locrh %r0,%r1 /* Compute minimum of bytes loaded. */ + nill %r0,65532 /* Align bytes loaded to full characters. */ + jz .Lcmp_one_char /* Jump away if no full char is available. */ +.Llt_cmp: + algfr %r5,%r0 /* Add smallest loaded bytes to current_len. */ + vfenezfs %v18,%v16,%v17 /* Compare not equal with zero search. */ + clgrj %r5,%r4,10,.Llastcmp /* If current_len >= n -> last compare */ + vlgvb %r1,%v18,7 /* Get not equal index or 16 if all equal. */ + clrjl %r1,%r0,.Lfound /* Jump away if miscompare is within + loaded bytes; (index < loaded-bytes) */ + j .Lloop + +.Lcmp_one_char: + /* At least one of both strings is not 4-byte aligned + and there is no full character before next block-boundary. + Compare one character to get over the boundary and + proceed with normal loop! */ + vlef %v16,0(%r5,%r2),0 /* Load one character. */ + lghi %r0,4 /* Loaded byte count is 4. */ + vlef %v17,0(%r5,%r3),0 + j .Llt_cmp /* Proceed with comparision. */ + +.Llastcmp: + /* Use comparision result only if located within first n characters. + %r0: loaded byte count in vreg; + %r5: current_len; + %r4: n; + (current_len - n): [0...16[ + First ignored match index: loaded bytes - (current_len-n): ]0...16] + */ + slgr %r5,%r4 /* %r5 = current_len - n. */ + slr %r0,%r5 /* %r0 = first ignored match index. */ + vlgvb %r4,%v18,7 /* Get not equal index or 16 if all equal. */ + clrjl %r4,%r0,.Lfound2 /* Jump away if miscompare is within + loaded bytes and below n bytes. */ +.Lend_equal: + lghi %r2,0 + br %r14 + +.Lfound: + /* Difference or end of string. */ + /* vfenezf found an unequal element or zero. + This instruction compares unsigned words, but wchar_t is signed. + Thus we have to compare the found element again. */ + vlgvb %r4,%v18,7 /* Extract not equal byte-index. */ +.Lfound2: + srl %r4,2 /* And convert it to character-index. */ + vlgvf %r0,%v16,0(%r4) /* Load character-values. */ + vlgvf %r1,%v17,0(%r4) +.Lend_cmp_one_char: + cr %r0,%r1 + je .Lend_equal + lghi %r2,1 + lghi %r1,-1 + locgrl %r2,%r1 + br %r14 +END(__wcsncmp_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcsncmp.c b/REORG.TODO/sysdeps/s390/multiarch/wcsncmp.c new file mode 100644 index 0000000000..9b8e968d1f --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcsncmp.c @@ -0,0 +1,27 @@ +/* Multiple versions of wcsncmp. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <wchar.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc2 (__wcsncmp, wcsncmp) + +#else +# include <wcsmbs/wcsncmp.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcsncpy-c.c b/REORG.TODO/sysdeps/s390/multiarch/wcsncpy-c.c new file mode 100644 index 0000000000..32ec8ffe64 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcsncpy-c.c @@ -0,0 +1,25 @@ +/* Default wcsncpy implementation for S/390. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define WCSNCPY __wcsncpy_c + +# include <wchar.h> +extern __typeof (__wcsncpy) __wcsncpy_c; +# include <wcsmbs/wcsncpy.c> +#endif diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcsncpy-vx.S b/REORG.TODO/sysdeps/s390/multiarch/wcsncpy-vx.S new file mode 100644 index 0000000000..3d1a67e0d6 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcsncpy-vx.S @@ -0,0 +1,223 @@ +/* Vector optimized 32/64 bit S/390 version of wcsncpy. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* wchar_t *wcsncpy (const wchar_t *dest, const wchar_t *src, size_t n) + Copy at most n characters of string src to dest. + + Register usage: + -r0=dest pointer for return + -r1=tmp, zero byte index + -r2=dest + -r3=src + -r4=n + -r5=current_len + -r6=tmp, loaded bytes + -r7=tmp, border + -v16=part of src + -v17=index of zero + -v18=part of src + -v31=register save area for r6, r7 +*/ +ENTRY(__wcsncpy_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + +# if !defined __s390x__ + llgfr %r4,%r4 +# endif /* !defined __s390x__ */ + + clgfi %r4,0 + ber %r14 /* Nothing to do, if n == 0. */ + + vlbb %v16,0(%r3),6 /* Load s until next 4k-byte boundary. */ + + tmll %r3,3 /* Test if s is 4-byte aligned? */ + jne .Lfallback /* And use common-code variant if not. */ + + vlvgp %v31,%r6,%r7 /* Save registers. */ + lgr %r0,%r2 /* Save destination pointer for return. */ + + lcbb %r6,0(%r3),6 /* Get bytes to 4k-byte boundary or 16. */ + llgfr %r6,%r6 /* Convert 32bit to 64bit. */ + + lghi %r5,0 /* current_len = 0. */ + + /* Check range of maxlen and convert to byte-count. */ +# ifdef __s390x__ + tmhh %r4,49152 /* Test bit 0 or 1 of n. */ + lghi %r1,-4 /* Max byte-count is 18446744073709551612. */ +# else + tmlh %r4,49152 /* Test bit 0 or 1 of n. */ + llilf %r1,4294967292 /* Max byte-count is 4294967292. */ +# endif /* !__s390x__ */ + sllg %r4,%r4,2 /* Convert character-count to byte-count. */ + locgrne %r4,%r1 /* Use max byte-count, if bit 0/1 was one. */ + + clgrjle %r4,%r6,.Lremaining_v16 /* If n <= loaded-bytes + -> process remaining. */ + + /* n > loaded-byte-count. */ + vfenezf %v17,%v16,%v16 /* Find element not equal with zero search. */ + vlgvb %r1,%v17,7 /* Load zero index or 16 if not found. */ + aghi %r1,3 /* Also copy remaining bytes of zero. */ + clrjl %r1,%r6,.Lfound_v16_store /* Found zero within loaded bytes, + copy and return. */ + + /* Align s to 16 byte. */ + risbgn %r7,%r3,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,15 /* current_len = 15. */ + slr %r5,%r7 /* Compute highest index to 16byte boundary. */ + + /* Zero not found and n > loaded-byte-count. */ + vstl %v16,%r5,0(%r2) /* Copy loaded characters - no zero. */ + ahi %r5,1 /* Start loop at next character. */ + + /* Now we are 16byte aligned, so we can load + a full vreg without page fault. */ + lgr %r1,%r5 /* If %r5 + 64 < maxlen? -> loop64. */ + aghi %r1,64 + clgrjl %r1,%r4,.Lloop64 + + vl %v16,0(%r5,%r3) /* Load s. */ + clgijl %r4,17,.Lremaining_v16 /* If n <=16, process remaining + bytes. */ +.Llt64: + lgr %r7,%r4 + slgfi %r7,16 /* border_len = maxlen - 16. */ + + clgrjhe %r5,%r7,.Lremaining_v16 /* If current_len >= border + then process remaining bytes. */ + vfenezfs %v17,%v16,%v16 /* Find element not equal with zero search. */ + je .Lfound_v16 /* Jump away if zero was found. */ + vl %v18,16(%r5,%r3) /* Load next part of s. */ + vst %v16,0(%r5,%r2) /* Store previous part without zero to dst. */ + aghi %r5,16 + + clgrjhe %r5,%r7,.Lremaining_v18 + vfenezfs %v17,%v18,%v18 + je .Lfound_v18 + vl %v16,16(%r5,%r3) + vst %v18,0(%r5,%r2) + aghi %r5,16 + + clgrjhe %r5,%r7,.Lremaining_v16 + vfenezfs %v17,%v16,%v16 + je .Lfound_v16 + vl %v18,16(%r5,%r3) + vst %v16,0(%r5,%r2) + aghi %r5,16 + +.Lremaining_v18: + vlr %v16,%v18 +.Lremaining_v16: + /* v16 contains the remaining bytes [1...16]. + Store remaining bytes and append string-termination. */ + vfenezf %v17,%v16,%v16 /* Find element not equal with zero search. */ + slgrk %r7,%r4,%r5 /* Remaining bytes = maxlen - current_len. */ + aghi %r7,-1 /* vstl needs highest index. */ + la %r2,0(%r5,%r2) /* vstl has no index register. */ + vlgvb %r1,%v17,7 /* Load zero index or 16 if not found. */ + aghi %r1,3 /* Also copy remaining bytes of zero. */ + /* Zero in remaining bytes? -> jump away (zero-index < max-index) + Do not jump away if zero-index == max-index, + but simply copy zero with vstl below. */ + clrjl %r1,%r7,.Lfound_v16_store + vstl %v16,%r7,0(%r2) /* Store remaining bytes without null + termination!. */ +.Lend: + /* Restore saved registers. */ + vlgvg %r6,%v31,0 + vlgvg %r7,%v31,1 + lgr %r2,%r0 /* Load saved dest-ptr. */ + br %r14 + +.Lfound_v16_32: + aghi %r5,32 + j .Lfound_v16 +.Lfound_v18_48: + aghi %r5,32 +.Lfound_v18_16: + aghi %r5,16 +.Lfound_v18: + vlr %v16,%v18 +.Lfound_v16: + /* v16 contains a zero. Store remaining bytes to zero. current_len + has not reached border, thus checking for n is not needed! */ + vlgvb %r1,%v17,7 /* Load byte index of zero. */ + la %r2,0(%r5,%r2) /* vstl has no support for index-register. */ + aghi %r1,3 /* Also copy remaining bytes of zero. */ +.Lfound_v16_store: + vstl %v16,%r1,0(%r2) /* Copy characters including zero. */ + /* Fill remaining bytes with zero - remaining count always > 0. */ + algr %r5,%r1 /* Remaining bytes (=%r4) = ... */ + slgr %r4,%r5 /* = maxlen - (currlen + zero_index + 1). */ + la %r2,0(%r1,%r2) /* Pointer to zero. start filling beyond. */ + aghi %r4,-2 /* mvc with exrl needs count - 1. + (additional -1, see remaining bytes above) */ + srlg %r6,%r4,8 /* Split into 256 byte blocks. */ + ltgr %r6,%r6 + je .Lzero_lt256 +.Lzero_loop256: + mvc 1(256,%r2),0(%r2) /* Fill 256 zeros at once. */ + la %r2,256(%r2) + brctg %r6,.Lzero_loop256 /* Loop until all blocks are processed. */ +.Lzero_lt256: + exrl %r4,.Lmvc_lt256 + j .Lend +.Lmvc_lt256: + mvc 1(1,%r2),0(%r2) + + /* Find zero in 16byte aligned loop. */ +.Lloop64: + vl %v16,0(%r5,%r3) /* Load s. */ + vfenezfs %v17,%v16,%v16 /* Find element not equal with zero search. */ + je .Lfound_v16 /* Jump away if zero was found. */ + vl %v18,16(%r5,%r3) /* Load next part of s. */ + vst %v16,0(%r5,%r2) /* Store previous part without zero to dst. */ + vfenezfs %v17,%v18,%v18 + je .Lfound_v18_16 + vl %v16,32(%r5,%r3) + vst %v18,16(%r5,%r2) + vfenezfs %v17,%v16,%v16 + je .Lfound_v16_32 + vl %v18,48(%r5,%r3) + vst %v16,32(%r5,%r2) + vfenezfs %v17,%v18,%v18 + je .Lfound_v18_48 + vst %v18,48(%r5,%r2) + + aghi %r5,64 + lgr %r1,%r5 /* If %r5 + 64 < maxlen? -> loop64. */ + aghi %r1,64 + clgrjl %r1,%r4,.Lloop64 + + vl %v16,0(%r5,%r3) /* Load s. */ + j .Llt64 + +.Lfallback: + jg __wcsncpy_c +END(__wcsncpy_vx) + +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcsncpy.c b/REORG.TODO/sysdeps/s390/multiarch/wcsncpy.c new file mode 100644 index 0000000000..6e1e8f01fb --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcsncpy.c @@ -0,0 +1,28 @@ +/* Multiple versions of wcsncpy. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <wchar.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc (__wcsncpy) +weak_alias (__wcsncpy, wcsncpy) + +#else +# include <wcsmbs/wcsncpy.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcsnlen-c.c b/REORG.TODO/sysdeps/s390/multiarch/wcsnlen-c.c new file mode 100644 index 0000000000..e86ca654e0 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcsnlen-c.c @@ -0,0 +1,25 @@ +/* Default wcsnlen implementation for S/390. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define WCSNLEN __wcsnlen_c + +# include <wchar.h> +extern __typeof (__wcsnlen) __wcsnlen_c; +# include <wcsmbs/wcsnlen.c> +#endif diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcsnlen-vx.S b/REORG.TODO/sysdeps/s390/multiarch/wcsnlen-vx.S new file mode 100644 index 0000000000..c678b94cac --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcsnlen-vx.S @@ -0,0 +1,151 @@ +/* Vector optimized 32/64 bit S/390 version of wcsnlen. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* size_t wcsnlen (const wchar_t *s, size_t maxlen) + Returns the number of characters in s or at most maxlen. + + Register usage: + -r1=tmp + -r2=address of string + -r3=maxlen (number of characters to be read) + -r4=tmp + -r5=current_len and return_value + -v16=part of s +*/ +ENTRY(__wcsnlen_vx) + + .machine "z13" + .machinemode "zarch_nohighgprs" + +# if !defined __s390x__ + llgfr %r3,%r3 +# endif /* !defined __s390x__ */ + + clgfi %r3,0 /* if maxlen == 0, return 0. */ + locgre %r2,%r3 + ber %r14 + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + llgfr %r1,%r1 /* Convert 32bit to 64bit. */ + + tmll %r2,3 /* Test if s is 4-byte aligned? */ + jne .Lfallback /* And use common-code variant if not. */ + + /* Check range of maxlen and convert to byte-count. */ +# ifdef __s390x__ + tmhh %r3,49152 /* Test bit 0 or 1 of maxlen. */ + lghi %r4,-4 /* Max byte-count is 18446744073709551612. */ +# else + tmlh %r3,49152 /* Test bit 0 or 1 of maxlen. */ + llilf %r4,4294967292 /* Max byte-count is 4294967292. */ +# endif /* !__s390x__ */ + sllg %r3,%r3,2 /* Convert character-count to byte-count. */ + locgrne %r3,%r4 /* Use max byte-count, if bit 0/1 was one. */ + + vfenezf %v16,%v16,%v16 /* Find element not equal with zero search. */ + clgr %r1,%r3 + locgrh %r1,%r3 /* loaded_byte_count + = min (loaded_byte_count, maxlen) */ + + vlgvb %r5,%v16,7 /* Load zero index or 16 if not found. */ + clrjl %r5,%r1,.Lend /* Found zero within loaded bytes -> return. */ + + clgr %r1,%r3 /* If loaded_byte_count == maxlen -> end. */ + locgre %r5,%r3 + je .Lend + + /* Align s to 16 byte. */ + risbgn %r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,16 /* current_len = 16. */ + slr %r5,%r4 /* Compute bytes to 16bytes boundary. */ + + lgr %r1,%r5 /* If %r5 + 64 < maxlen? -> loop64. */ + aghi %r1,64 + clgrjl %r1,%r3,.Lloop64 + + /* Find zero in max 64byte with aligned s. */ +.Llt64: + vl %v16,0(%r5,%r2) /* Load s. */ + vfenezfs %v16,%v16,%v16 /* Find element not equal with zero search. */ + je .Lfound /* Jump away if zero was found. */ + aghi %r5,16 + clgrjhe %r5,%r3,.Lfound /* If current_len >= maxlen -> end. */ + vl %v16,0(%r5,%r2) + vfenezfs %v16,%v16,%v16 + je .Lfound + aghi %r5,16 + clgrjhe %r5,%r3,.Lfound + vl %v16,0(%r5,%r2) + vfenezfs %v16,%v16,%v16 + je .Lfound + aghi %r5,16 + clgrjhe %r5,%r3,.Lfound + vl %v16,0(%r5,%r2) + vfenezfs %v16,%v16,%v16 + j .Lfound + +.Lfound48: + aghi %r5,16 +.Lfound32: + aghi %r5,16 +.Lfound16: + aghi %r5,16 +.Lfound: + vlgvb %r4,%v16,7 /* Load byte index of zero or 16 if no zero. */ + algr %r5,%r4 + + clgr %r5,%r3 + locgrh %r5,%r3 /* Return min (current_len, maxlen). */ +.Lend: + srlg %r2,%r5,2 /* Convert byte-count to character-count. */ + br %r14 + + /* Find zero in 16byte aligned loop. */ +.Lloop64: + vl %v16,0(%r5,%r2) /* Load s. */ + vfenezfs %v16,%v16,%v16 /* Find element not equal with zero search. */ + je .Lfound /* Jump away if zero was found. */ + vl %v16,16(%r5,%r2) + vfenezfs %v16,%v16,%v16 + je .Lfound16 + vl %v16,32(%r5,%r2) + vfenezfs %v16,%v16,%v16 + je .Lfound32 + vl %v16,48(%r5,%r2) + vfenezfs %v16,%v16,%v16 + je .Lfound48 + + aghi %r5,64 + lgr %r1,%r5 /* If %r5 + 64 < maxlen? -> loop64. */ + aghi %r1,64 + clgrjl %r1,%r3,.Lloop64 + + j .Llt64 + +.Lfallback: + jg __wcsnlen_c +END(__wcsnlen_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcsnlen.c b/REORG.TODO/sysdeps/s390/multiarch/wcsnlen.c new file mode 100644 index 0000000000..54486b9158 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcsnlen.c @@ -0,0 +1,28 @@ +/* Multiple versions of wcsnlen. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <wchar.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc (__wcsnlen) +weak_alias (__wcsnlen, wcsnlen) + +#else +# include <wcsmbs/wcsnlen.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcspbrk-c.c b/REORG.TODO/sysdeps/s390/multiarch/wcspbrk-c.c new file mode 100644 index 0000000000..0a2e4c8e4d --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcspbrk-c.c @@ -0,0 +1,31 @@ +/* Default wcspbrk implementation for S/390. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define WCSPBRK __wcspbrk_c + +# include <wchar.h> +extern __typeof (wcspbrk) __wcspbrk_c; +# ifdef SHARED +# undef libc_hidden_def +# define libc_hidden_def(name) \ + __hidden_ver1 (__wcspbrk_c, __GI_wcspbrk, __wcspbrk_c); +# endif /* SHARED */ + +# include <wcsmbs/wcspbrk.c> +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcspbrk-vx.S b/REORG.TODO/sysdeps/s390/multiarch/wcspbrk-vx.S new file mode 100644 index 0000000000..267baf091d --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcspbrk-vx.S @@ -0,0 +1,315 @@ +/* Vector optimized 32/64 bit S/390 version of wcspbrk. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* wchar_t *wcspbrk (const wchar_t *s, const wchar_t * accept) + The wcspbrk() function locates the first occurrence in the string s + of any of the characters in the string accept and returns a pointer + to that character or NULL if not found. + + This method checks the length of accept string. If it fits entirely + in one vector register, a fast algorithm is used, which does not need + to check multiple parts of accept-string. Otherwise a slower full + check of accept-string is used. + + register overview: + r3: pointer to start of accept-string + r2: pointer to start of search-string + r0: loaded byte count of vlbb search-string (32bit unsigned) + r4: found byte index (32bit unsigned) + r1: current return len (64bit unsigned) + v16: search-string + v17: accept-string + v18: temp-vreg + + ONLY FOR SLOW: + v19: first accept-string + v20: zero for preparing acc-vector + v21: global mask; 1 indicates a match between + search-string-vreg and any accept-character + v22: current mask; 1 indicates a match between + search-string-vreg and any accept-character in current acc-vreg + v24: one for result-checking of former string-part + v30, v31: for re-/storing registers r6, r8, r9 + r5: current len of accept-string + r6: zero-index in search-string or 16 if no zero + or min(zero-index, loaded byte count) + r8: >0, if former accept-string-part contains a zero, + otherwise =0; + r9: loaded byte count of vlbb accept-string +*/ +ENTRY(__wcspbrk_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + tmll %r2,3 /* Test if s is 4-byte aligned? */ + jne .Lfallback /* And use common-code variant if not. */ + + /* + Check if accept-string fits in one vreg: + ---------------------------------------- + */ + vlbb %v17,0(%r3),6 /* Load accept. */ + lcbb %r0,0(%r3),6 + jo .Lcheck_onbb /* Special case if accept lays + on block-boundary. */ + +.Lcheck_notonbb: + lghi %r1,0 /* Zero out current len. */ + vlgvf %r0,%v17,0 /* Get first element. */ + clije %r0,0,.Lfast_end_null /* Return null if accept is empty. */ + + vistrfs %v17,%v17 /* Fill with zeros after first zero. */ + je .Lfast /* Zero found -> accept fits in one vreg. */ + j .Lslow /* No zero -> accept exceeds one vreg */ + + +.Lcheck_onbb: + /* Accept lays on block-boundary. */ + nill %r0,65532 /* Recognize only fully loaded characters. */ + je .Lcheck_onbb2 /* Reload vr, if we loaded no full wchar_t. */ + vfenezf %v18,%v17,%v17 /* Search zero in loaded accept bytes. */ + vlgvb %r4,%v18,7 /* Get index of zero or 16 if not found. */ + clrjl %r4,%r0,.Lcheck_notonbb /* Zero index < loaded bytes count -> + accept fits in one vreg; + Fill with zeros and proceed + with FAST. */ +.Lcheck_onbb2: + vl %v17,0(%r3) /* Load accept, which exceeds loaded bytes. */ + j .Lcheck_notonbb /* Check if accept fits in one vreg. */ + + + /* + Search s for accept in one vreg + ------------------------------- + */ +.Lfast: + /* Complete accept-string in v17 and remaining bytes are zero. */ + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r0,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + + vfaezfs %v18,%v16,%v17,0 /* Find first element in v16 unequal to any + in v17 or first zero element. */ + vlgvb %r4,%v18,7 /* Load byte index of found element. */ + /* If found index is within loaded bytes, return with found + element index (=equal count). */ + clrjl %r4,%r0,.Lfast_loop_found2 + + /* Align s to 16 byte. */ + risbgn %r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r1,16 /* current_len = 16. */ + slr %r1,%r4 /* Compute bytes to 16bytes boundary. */ + +.Lfast_loop: + vl %v16,0(%r1,%r2) /* Load search-string. */ + vfaezfs %v18,%v16,%v17,0 /* Find first element in v16 equal to any + in v17 or first zero element. */ + jno .Lfast_loop_found + + vl %v16,16(%r1,%r2) + vfaezfs %v18,%v16,%v17,0 + jno .Lfast_loop_found16 + + vl %v16,32(%r1,%r2) + vfaezfs %v18,%v16,%v17,0 + jno .Lfast_loop_found32 + + vl %v16,48(%r1,%r2) + vfaezfs %v18,%v16,%v17,0 + jno .Lfast_loop_found48 + + aghi %r1,64 + j .Lfast_loop /* Loop if no element was unequal to accept + and not zero. */ + + /* Found equal or zero element. */ +.Lfast_loop_found48: + aghi %r1,16 +.Lfast_loop_found32: + aghi %r1,16 +.Lfast_loop_found16: + aghi %r1,16 +.Lfast_loop_found: + vlgvb %r4,%v18,7 /* Load byte index of found element. */ +.Lfast_loop_found2: + srlg %r5,%r4,2 /* Convert byte-index to character-index. */ + vlgvf %r0,%v16,0(%r5) /* Get found element. */ + clije %r0,0,.Lfast_end_null /* Return null if no accept-char found */ + algfr %r1,%r4 /* Add found index of char to current len. */ + la %r2,0(%r1,%r2) /* And return pointer to first equal char. */ + br %r14 + +.Lfast_end_null: + lghi %r2,0 /* Return null if no character is equal. */ + br %r14 + + + + + /* + Search s for accept in multiple vregs + ------------------------------------- + */ +.Lslow: + /* Save registers. */ + vlvgg %v30,%r6,0 + vlvgp %v31,%r8,%r9 + + /* Accept in v17 without zero */ + vlr %v19,%v17 /* Save first acc-part for a fast reload. */ + vzero %v20 /* Zero for preparing acc-vector. */ + vone %v24 /* One for checking result of former string. */ + + /* Align s to 16 byte. */ + risbg %r4,%r2,60,128+63,0 /* Test if s is aligned and + %r4 = bits 60-63 'and' 15. */ + je .Lslow_loop_str /* If s is aligned, loop aligned. */ + lghi %r0,15 + slr %r0,%r4 /* Compute highest index to load (15-x). */ + vll %v16,%r0,0(%r2) /* Load up to 16byte boundary; + needs highest index, left bytes are 0. */ + ahi %r0,1 /* Work with loaded byte count. */ + vzero %v21 /* Zero out global mask. */ + lghi %r5,0 /* Set current len of accept-string to zero. */ + vfenezf %v18,%v16,%v16 /* Find zero in current string-part. */ + lghi %r8,0 /* There is no zero in first accept-part. */ + vlgvb %r6,%v18,7 /* Load byte index of zero or 16 if no zero. */ + clije %r6,0,.Lslow_end_null /* If first element is zero + (end of string) -> return null */ + clr %r0,%r6 /* cc==1 if loaded byte count < zero-index. */ + locrl %r6,%r0 /* Load on cc==1; zero-index = lbc. */ + j .Lslow_loop_acc + + + /* Process s in 16byte aligned loop. */ +.Lslow_next_str: + /* Check results of former processed str-part. */ + vfeef %v18,%v21,%v24 /* Find first equal match in global mask + (ones in element). */ + vlgvb %r4,%v18,7 /* Get index of first one (=equal) + or 16 if no match. */ + /* Equal-index < min(zero-index, loaded byte count) + -> return pointer to equal element. */ + clrjl %r4,%r6,.Lslow_index_found + /* Zero-index < loaded byte count + -> former str-part was last str-part + -> return null */ + clrjl %r6,%r0,.Lslow_end_null + /* All elements are zero (=no match) -> proceed with next str-part. */ + + vlr %v17,%v19 /* Load first part of accept (no zero). */ + algfr %r1,%r0 /* Add loaded byte count to current len. */ + +.Lslow_loop_str: + vl %v16,0(%r1,%r2) /* Load search-string */ + lghi %r0,16 /* Loaded byte count is 16. */ + vzero %v21 /* Zero out global mask. */ + lghi %r5,0 /* Set current len of accept to zero. */ + vfenezf %v18,%v16,%v16 /* Find zero in current string-part. */ + lghi %r8,0 /* There is no zero in first accept-part. */ + vlgvb %r6,%v18,7 /* Load byte index of zero or 16 if no zero. */ + clije %r6,0,.Lslow_end_null /* If first element is zero + (end of string) -> return null. */ + +.Lslow_loop_acc: + vfaef %v22,%v16,%v17,4 /* Create matching-mask (1 in mask -> + Character matches any accepted character in + this accept-string-part) IN=0, RT=1. */ + vlgvf %r4,%v22,0 /* Get result of first element. */ + /* First element is equal to any accepted characters + (all other parts of accept cannot lead to a match before this one) + -> current len is pointing to first element + -> return found */ + clijh %r4,0,.Lslow_end_found + vo %v21,%v21,%v22 /* Global-mask = global-|matching-mask. */ + /* Proceed with next acc until end of acc is reached. */ + + +.Lslow_next_acc: + clijh %r8,0,.Lslow_next_str /* There was a zero in the last acc-part + -> add index to current len and + end. */ + vlbb %v17,16(%r5,%r3),6 /* Load next accept part. */ + aghi %r5,16 /* Increment current len of accept-string. */ + lcbb %r9,0(%r5,%r3),6 /* Get loaded byte count of accept-string. */ + jo .Lslow_next_acc_onbb /* Jump away ifaccept-string is + on block-boundary. */ +.Lslow_next_acc_notonbb: + vistrfs %v17,%v17 /* Fill with zeros after first zero. */ + jo .Lslow_loop_acc /* No zero found -> no preparation needed. */ + +.Lslow_next_acc_prepare_zero: + /* Zero in accept-part: fill zeros with first-accept-character. */ + vlgvf %r8,%v17,0 /* Load first element of acc-part. */ + clije %r8,0,.Lslow_next_str /* Proceed with next string-part, + If first char in this part of accept + is a zero. */ + /* r8>0 -> zero found in this acc-part. */ + vrepf %v18,%v17,0 /* Replicate first char accross all chars. */ + vceqf %v22,%v20,%v17 /* Create a mask (v22) of null chars + by comparing with 0 (v20). */ + vsel %v17,%v18,%v17,%v22 /* Replace null chars with first char. */ + j .Lslow_loop_acc /* Accept part is prepared -> process. */ + +.Lslow_next_acc_onbb: + nill %r9,65532 /* Recognize only fully loaded characters. */ + je .Lslow_next_acc_onbb2 /* Reload vr, if no full wchar_t. */ + vfenezf %v18,%v17,%v17 /* Find zero in loaded bytes of accept part. */ + vlgvb %r8,%v18,7 /* Load byte index of zero. */ + clrjl %r8,%r9,.Lslow_next_acc_notonbb /* Found a zero in loaded bytes + -> Prepare vreg. */ +.Lslow_next_acc_onbb2: + vl %v17,0(%r5,%r3) /* Load over boundary ... */ + lghi %r8,0 /* r8=0 -> no zero in this part of acc, + check for zero is in jump-target. */ + j .Lslow_next_acc_notonbb /* ... and search for zero in + fully loaded vreg again. */ + +.Lslow_end_null: + lghi %r1,0 /* Return null if no character is equal. */ + j .Lslow_end + +.Lslow_loop_found: + vlgvb %r4,%v18,7 /* Load byte index of found element. */ + srlg %r5,%r4,2 /* Convert byte-index to character-index. */ + vlgvf %r0,%v16,0(%r5) /* Get found element. */ + clije %r0,0,.Lslow_end_null /* Return null if no acc-char found. */ + +.Lslow_index_found: + algfr %r1,%r4 /* Add found index of char to current len. */ +.Lslow_end_found: + la %r1,0(%r1,%r2) /* And return pointer to first equal char. */ + +.Lslow_end: + /* Restore registers. */ + vlgvg %r6,%v30,0 + vlgvg %r8,%v31,0 + vlgvg %r9,%v31,1 + lgr %r2,%r1 + br %r14 +.Lfallback: + jg __wcspbrk_c +END(__wcspbrk_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcspbrk.c b/REORG.TODO/sysdeps/s390/multiarch/wcspbrk.c new file mode 100644 index 0000000000..9f14f9a4f2 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcspbrk.c @@ -0,0 +1,29 @@ +/* Multiple versions of wcspbrk. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define wcspbrk __redirect_wcspbrk +# include <wchar.h> +# undef wcspbrk +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc2_redirected (__redirect_wcspbrk, __wcspbrk, wcspbrk) + +#else +# include <wcsmbs/wcspbrk.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcsrchr-c.c b/REORG.TODO/sysdeps/s390/multiarch/wcsrchr-c.c new file mode 100644 index 0000000000..bc462a7c71 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcsrchr-c.c @@ -0,0 +1,25 @@ +/* Default wcsrchr implementation for S/390. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define WCSRCHR __wcsrchr_c + +# include <wchar.h> +extern __typeof (wcsrchr) __wcsrchr_c; +# include <wcsmbs/wcsrchr.c> +#endif diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcsrchr-vx.S b/REORG.TODO/sysdeps/s390/multiarch/wcsrchr-vx.S new file mode 100644 index 0000000000..ba605f579d --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcsrchr-vx.S @@ -0,0 +1,190 @@ +/* Vector optimized 32/64 bit S/390 version of wcsrchr. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* wchar_t *wcsrchr (const wchar_t *s, wchar_t c) + Locate the last character c in string. + + Register usage: + -r0=loaded bytes in first part of s. + -r1=pointer to last occurence of c or NULL if not found. + -r2=s + -r3=c + -r4=tmp + -r5=current_len + -v16=part of s + -v17=index of found element + -v18=replicated c + -v19=part of s with last occurence of c. + -v20=permute pattern +*/ +ENTRY(__wcsrchr_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r0,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + + tmll %r2,3 /* Test if s is 4-byte aligned? */ + jne .Lfallback /* And use common-code variant if not. */ + + vlvgf %v18,%r3,0 /* Generate vector which elements are all c. */ + vrepf %v18,%v18,0 + + lghi %r1,-1 /* Currently no c found. */ + lghi %r5,0 /* current_len = 0. */ + + vfeezfs %v17,%v16,%v18 /* Find element equal or zero. */ + vlgvb %r4,%v17,7 /* Load byte index of c/zero or 16. */ + clrjl %r4,%r0,.Lfound_first_part /* Found c/zero in loaded bytes. */ +.Lalign: + /* Align s to 16 byte. */ + risbgn %r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r5,16 /* current_len = 16. */ + slr %r5,%r4 /* Compute bytes to 16bytes boundary. */ + +.Lloop: + vl %v16,0(%r5,%r2) /* Load s. */ + vfeezfs %v17,%v16,%v18 /* Find element equal with zero search. */ + jno .Lfound /* Found c/zero (cc=0|1|2). */ + vl %v16,16(%r5,%r2) + vfeezfs %v17,%v16,%v18 + jno .Lfound16 + vl %v16,32(%r5,%r2) + vfeezfs %v17,%v16,%v18 + jno .Lfound32 + vl %v16,48(%r5,%r2) + vfeezfs %v17,%v16,%v18 + jno .Lfound48 + + aghi %r5,64 + j .Lloop /* No character and no zero -> loop. */ + +.Lfound48: + la %r5,16(%r5) /* Use la since aghi would clobber cc. */ +.Lfound32: + la %r5,16(%r5) +.Lfound16: + la %r5,16(%r5) +.Lfound: + je .Lzero /* Found zero, but no c before that zero. */ + /* Save this part of s to check for further matches after reaching + the end of the complete string. */ + vlr %v19,%v16 + lgr %r1,%r5 + + jh .Lzero /* Found a zero after the found c. */ + aghi %r5,16 /* Start search of next part of s. */ + j .Lloop + +.Lfound_first_part: + /* This code is only executed if the found c/zero is whithin loaded + bytes. If no c/zero was found (cc==3) the found index = 16, thus + this code is not called. + Resulting condition code of vector find element equal: + cc==0: no c, found zero + cc==1: c found, no zero + cc==2: c found, found zero after c + cc==3: no c, no zero (this case can be ignored). */ + je .Lzero /* Found zero, but no c before that zero. */ + + locgrne %r1,%r5 /* Mark c as found in first part of s. */ + vlr %v19,%v16 + + jl .Lalign /* No zero (e.g. if vr was fully loaded) + -> Align and loop afterwards. */ + + /* Found a zero in vr. If vr was not fully loaded due to block + boundary, the remaining bytes are filled with zero and we can't + rely on zero indication of condition code here! */ + + vfenezf %v17,%v16,%v16 + vlgvb %r4,%v17,7 /* Load byte index of zero or 16. */ + clrjl %r4,%r0,.Lzero /* Zero within loaded bytes -> end. */ + j .Lalign /* Align and loop afterwards. */ + +.Lend_searched_zero: + vlgvb %r4,%v17,7 /* Load byte index of zero. */ + algr %r5,%r4 + la %r2,0(%r5,%r2) /* Return pointer to zero. */ + br %r14 + +.Lzero: + /* Reached end of string. Check if one c was found before. */ + clije %r3,0,.Lend_searched_zero /* Found zero and c is zero. */ + + cgfi %r1,-1 /* No c found -> return NULL. */ + locghie %r2,0 + ber %r14 + + larl %r3,.Lpermute_mask /* Load permute mask. */ + vl %v20,0(%r3) + + /* c was found and is part of v19. */ + vfenezf %v17,%v19,%v19 /* Find zero. */ + vlgvb %r4,%v17,7 /* Load byte index of zero or 16. */ + ahi %r4,3 /* Found zero index is first byte, + thus highest byte index is last byte of + wchar_t zero. */ + + clgfi %r5,0 /* Loaded byte count in v19 is 16, ... */ + lochine %r0,16 /* ... if v19 is not the first part of s. */ + ahi %r0,-1 /* Convert byte count to highest index. */ + + clr %r0,%r4 + locrl %r4,%r0 /* r4 = min (zero-index, highest-index). */ + + /* Right-shift of v19 to mask bytes after zero. */ + clije %r4,15,.Lzero_permute /* No shift is needed if highest index + in vr is 15. */ + lhi %r0,15 + slr %r0,%r4 /* Compute byte count for vector shift left. */ + sll %r0,3 /* Convert to bit count. */ + vlvgb %v17,%r0,7 + vsrlb %v19,%v19,%v17 /* Vector shift right by byte by number of bytes + specified in bits 1-4 of byte 7 in v17. */ + + /* Reverse bytes in v19. */ +.Lzero_permute: + vperm %v19,%v19,%v19,%v20 /* Permute v19 to reversed order. */ + + /* Find c in reversed v19. */ + vfeef %v19,%v19,%v18 /* Find c. */ + la %r2,0(%r1,%r2) + vlgvb %r3,%v19,7 /* Load byte index of c. */ + + /* Compute index in real s and return. */ + slgr %r4,%r3 + lay %r2,-3(%r4,%r2) /* Return pointer to zero. -3 is needed, + because the found byte index is reversed in + vector-register. Thus point to first byte of + wchar_t. */ + br %r14 +.Lpermute_mask: + .byte 0x0C,0x0D,0x0E,0x0F,0x08,0x09,0x0A,0x0B + .byte 0x04,0x05,0x06,0x07,0x00,0x01,0x02,0x03 +.Lfallback: + jg __wcsrchr_c +END(__wcsrchr_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcsrchr.c b/REORG.TODO/sysdeps/s390/multiarch/wcsrchr.c new file mode 100644 index 0000000000..cd3f695117 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcsrchr.c @@ -0,0 +1,27 @@ +/* Multiple versions of wcsrchr. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <wchar.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc2 (__wcsrchr, wcsrchr) + +#else +# include <wcsmbs/wcsrchr.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcsspn-c.c b/REORG.TODO/sysdeps/s390/multiarch/wcsspn-c.c new file mode 100644 index 0000000000..7790556b0d --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcsspn-c.c @@ -0,0 +1,31 @@ +/* Default wcsspn implementation for S/390. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define WCSSPN __wcsspn_c + +# include <wchar.h> +extern __typeof (wcsspn) __wcsspn_c; +# ifdef SHARED +# undef libc_hidden_def +# define libc_hidden_def(name) \ + __hidden_ver1 (__wcsspn_c, __GI_wcsspn, __wcsspn_c); +# endif /* SHARED */ + +# include <wcsmbs/wcsspn.c> +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcsspn-vx.S b/REORG.TODO/sysdeps/s390/multiarch/wcsspn-vx.S new file mode 100644 index 0000000000..77806c3a50 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcsspn-vx.S @@ -0,0 +1,270 @@ +/* Vector optimized 32/64 bit S/390 version of wcsspn. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* size_t wcsspn (const wchar_t *s, const wchar_t * accept) + The wcsspn() function calculates the length of the initial segment + of s which consists entirely of characters in accept. + + This method checks the length of accept string. If it fits entirely + in one vector register, a fast algorithm is used, which does not need + to check multiple parts of accept-string. Otherwise a slower full + check of accept-string is used. + + register overview: + r3: pointer to start of accept-string + r2: pointer to start of search-string + r4: loaded byte count of vl search-string + r0: found byte index + r1: current return len of s + v16: search-string + v17: accept-string + v18: temp-vreg + + ONLY FOR SLOW: + v19: first accept-string + v20: zero for preparing acc-vector + v21: global mask; 1 indicates a match between + search-string-vreg and any accept-character + v22: current mask; 1 indicates a match between + search-string-vreg and any accept-character in current acc-vreg + v30, v31: for re-/storing registers r6, r8, r9 + r5: current len of accept-string + r6: zero-index in search-string or 16 if no zero + or min(zero-index, loaded byte count) + r8: >0, if former accept-string-part contains a zero, + otherwise =0; + r9: loaded byte count of vlbb accept-string +*/ +ENTRY(__wcsspn_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + + tmll %r2,3 /* Test if s is 4-byte aligned? */ + jne .Lfallback /* And use common-code variant if not. */ + + /* + Check if accept-string fits in one vreg: + ---------------------------------------- + */ + vlbb %v17,0(%r3),6 /* Load accept. */ + lcbb %r4,0(%r3),6 + jo .Lcheck_onbb /* Special case if accept lays + on block-boundary. */ +.Lcheck_notonbb: + vistrfs %v17,%v17 /* Fill with zeros after first zero. */ + je .Lfast /* Zero found -> accept fits in one vreg. */ + j .Lslow /* No zero -> accept exceeds one vreg. */ + +.Lcheck_onbb: + /* Accept lays on block-boundary. */ + nill %r4,65532 /* Recognize only fully loaded characters. */ + je .Lcheck_onbb2 /* Reload vr if no full wchar_t. */ + vfenezf %v18,%v17,%v17 /* Search zero in loaded accept bytes. */ + vlgvb %r0,%v18,7 /* Get index of zero or 16 if not found. */ + clrjl %r0,%r4,.Lcheck_notonbb /* Zero index < loaded bytes count -> + Accept fits in one vreg; + Fill with zeros and proceed + with FAST. */ +.Lcheck_onbb2: + vl %v17,0(%r3) /* Load accept, which exceeds loaded bytes. */ + j .Lcheck_notonbb /* Check if accept fits in one vreg. */ + + + /* + Search s for accept in one vreg + ------------------------------- + */ +.Lfast: + /* Complete accept-string in v17 and remaining bytes are zero. */ + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r1,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + + vfaezfs %v16,%v16,%v17,8 /* Find first element in v16 + unequal to any in v17 + or first zero element. */ + + vlgvb %r0,%v16,7 /* Load byte index of found element. */ + /* If found index is within loaded bytes (%r0 < %r1), + return with found element index (=equal count). */ + clr %r0,%r1 + srlg %r0,%r0,2 /* Convert byte-count to character-count. */ + locgrl %r2,%r0 + blr %r14 + + /* Align s to 16 byte. */ + risbgn %r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + lghi %r1,16 /* current_len = 16. */ + slr %r1,%r4 /* Compute bytes to 16bytes boundary. */ + +.Lfast_loop: + vl %v16,0(%r1,%r2) /* Load search-string. */ + vfaezfs %v16,%v16,%v17,8 /* Find first element in v16 + unequal to any in v17 + or first zero element. */ + jno .Lfast_loop_found + vl %v16,16(%r1,%r2) + vfaezfs %v16,%v16,%v17,8 + jno .Lfast_loop_found16 + vl %v16,32(%r1,%r2) + vfaezfs %v16,%v16,%v17,8 + jno .Lfast_loop_found32 + vl %v16,48(%r1,%r2) + vfaezfs %v16,%v16,%v17,8 + jno .Lfast_loop_found48 + + aghi %r1,64 + j .Lfast_loop /* Loop if no element was unequal to accept + and not zero. */ + + /* Found unequal or zero element. */ +.Lfast_loop_found48: + aghi %r1,16 +.Lfast_loop_found32: + aghi %r1,16 +.Lfast_loop_found16: + aghi %r1,16 +.Lfast_loop_found: + vlgvb %r0,%v16,7 /* Load byte index of found element. */ + algrk %r2,%r1,%r0 /* And add it to current len. */ + srlg %r2,%r2,2 /* Convert byte-count to character-count. */ + br %r14 + + + /* + Search s for accept in multiple vregs + ------------------------------------- + */ +.Lslow: + /* Save registers. */ + vlvgg %v30,%r6,0 + vlvgp %v31,%r8,%r9 + lghi %r1,0 /* Zero out current len. */ + + /* accept in v17 without zero. */ + vlr %v19,%v17 /* Save first acc-part for a fast reload. */ + vzero %v20 /* Zero for preparing acc-vector. */ + + /* Align s to 16 byte. */ + risbg %r0,%r2,60,128+63,0 /* Test if s is aligned and + %r0 = bits 60-63 'and' 15. */ + je .Lslow_loop_str /* If s is aligned, loop aligned */ + lghi %r4,15 + slr %r4,%r0 /* Compute highest index to load (15-x). */ + vll %v16,%r4,0(%r2) /* Load up to 16byte boundary (vll needs + highest index, remaining bytes are 0). */ + aghi %r4,1 /* Work with loaded byte count. */ + vzero %v21 /* Zero out global mask. */ + lghi %r5,0 /* Set current len of accept-string to zero. */ + vfenezf %v18,%v16,%v16 /* Find zero in current string-part. */ + lghi %r8,0 /* There is no zero in first accept-part. */ + vlgvb %r6,%v18,7 /* Load byte index of zero or 16 + if there is no zero. */ + clr %r4,%r6 /* cc==1 if loaded byte count < zero-index. */ + locrl %r6,%r4 /* Load on cc==1. */ + j .Lslow_loop_acc + + /* Process s in 16byte aligned loop. */ +.Lslow_next_str: + vlr %v17,%v19 /* Load first part of accept (no zero). */ + algfr %r1,%r4 /* Add loaded byte count to current len. */ +.Lslow_loop_str: + vl %v16,0(%r1,%r2) /* Load search-string. */ + lghi %r4,16 /* Loaded byte count is 16. */ + vzero %v21 /* Zero out global mask. */ + lghi %r5,0 /* Set current len of accept-string to zero. */ + vfenezf %v18,%v16,%v16 /* Find zero in current string-part. */ + lghi %r8,0 /* There is no zero in first accept-part. */ + vlgvb %r6,%v18,7 /* Load byte index of zero or 16 if no zero. */ + +.Lslow_loop_acc: + vfaef %v22,%v16,%v17,4 /* Create matching-mask (1 in mask -> + character matches any accepted character in + this accept-string-part) IN=0, RT=1. */ + vo %v21,%v21,%v22 /* global-mask = global- | matching-mask. */ + vfenezf %v18,%v21,%v21 /* Find first zero in global-mask. */ + vlgvb %r0,%v18,7 /* Get first found zero-index + (= first mismatch). */ + clrjl %r0,%r6,.Lslow_next_acc /* Mismatch-index < min(lbc,zero-index) + -> Process this string-part + with next acc-part. */ + clrjhe %r0,%r4,.Lslow_next_str /* Found-index >= loaded byte count + -> All loaded bytes are matching + any accept-character + and are not zero. */ + /* All bytes are matching any characters in accept-string + and search-string is fully processed (found-index == zero-index). */ +.Lslow_add_lbc_end: + algrk %r2,%r1,%r0 /* Add matching characters to current len. */ + srlg %r2,%r2,2 /* Convert byte-count to character-count. */ + /* Restore registers. */ + vlgvg %r6,%v30,0 + vlgvg %r8,%v31,0 + vlgvg %r9,%v31,1 + br %r14 + +.Lslow_next_acc: + clijh %r8,0,.Lslow_add_lbc_end /* There was a zero in last acc-part + -> Add found index to current len + and end. */ + vlbb %v17,16(%r5,%r3),6 /* Load next accept part. */ + aghi %r5,16 /* Increment current len of accept-string. */ + lcbb %r9,0(%r5,%r3),6 /* Get loaded byte count of accept-string. */ + jo .Lslow_next_acc_onbb /* Jump away if accept-string is + on block-boundary. */ +.Lslow_next_acc_notonbb: + vistrfs %v17,%v17 /* Fill with zeros after first zero. */ + jo .Lslow_loop_acc /* No zero found -> no preparation needed. */ + +.Lslow_next_acc_prepare_zero: + /* Zero in accept-part: fill zeros with first-accept-character. */ + vlgvf %r8,%v17,0 /* Load first element of acc-part. */ + clije %r8,0,.Lslow_add_lbc_end /* End if zero is first character + in this part of accept-string. */ + /* r8>0 -> zero found in this acc-part. */ + vrepf %v18,%v17,0 /* Replicate first char accross all chars. */ + vceqf %v22,%v20,%v17 /* Create a mask (v22) of null chars + by comparing with 0 (v20). */ + vsel %v17,%v18,%v17,%v22 /* Replace null chars with first char. */ + j .Lslow_loop_acc /* Accept part is prepared -> process. */ + +.Lslow_next_acc_onbb: + nill %r9,65532 /* Recognize only fully loaded characters. */ + je .Lslow_next_acc_onbb2 /* Reload vr, if we loaded no full + wchar_t. */ + vfenezf %v18,%v17,%v17 /* Find zero in loaded bytes of accept part. */ + vlgvb %r8,%v18,7 /* Load byte index of zero. */ + clrjl %r8,%r9,.Lslow_next_acc_notonbb /* Found a zero in loaded bytes + -> Prepare vreg. */ +.Lslow_next_acc_onbb2: + vl %v17,0(%r5,%r3) /* Load over boundary ... */ + lghi %r8,0 /* r8=0 -> no zero in this part of acc, + check for zero is in jump-target. */ + j .Lslow_next_acc_notonbb /* ... and search for zero in + fully loaded vreg again. */ +.Lfallback: + jg __wcsspn_c +END(__wcsspn_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wcsspn.c b/REORG.TODO/sysdeps/s390/multiarch/wcsspn.c new file mode 100644 index 0000000000..6eaec7dd65 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wcsspn.c @@ -0,0 +1,29 @@ +/* Multiple versions of wcsspn. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define wcsspn __redirect_wcsspn +# include <wchar.h> +# undef wcsspn +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc2_redirected (__redirect_wcsspn, __wcsspn, wcsspn) + +#else +# include <wcsmbs/wcsspn.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wmemchr-c.c b/REORG.TODO/sysdeps/s390/multiarch/wmemchr-c.c new file mode 100644 index 0000000000..0d04494ac8 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wmemchr-c.c @@ -0,0 +1,37 @@ +/* Default wmemchr implementation for S/390. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define WMEMCHR __wmemchr_c + +# include <wchar.h> +extern __typeof (wmemchr) __wmemchr_c; +# undef weak_alias +# define weak_alias(name, alias) +# ifdef SHARED +# undef libc_hidden_def +# define libc_hidden_def(name) \ + __hidden_ver1 (__wmemchr_c, __GI___wmemchr, __wmemchr_c); +# undef libc_hidden_weak +# define libc_hidden_weak(name) \ + strong_alias (__wmemchr_c, __wmemchr_c_1); \ + __hidden_ver1 (__wmemchr_c_1, __GI_wmemchr, __wmemchr_c_1); +# endif /* SHARED */ + +# include <wcsmbs/wmemchr.c> +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wmemchr-vx.S b/REORG.TODO/sysdeps/s390/multiarch/wmemchr-vx.S new file mode 100644 index 0000000000..f5ab76e5bc --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wmemchr-vx.S @@ -0,0 +1,166 @@ +/* Vector optimized 32/64 bit S/390 version of wmemchr. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* wchar_t *wmemchr (const wchar_t *s, wchar_t c, size_t n) + Scans memory for character c + and returns pointer to first c. + + Register usage: + -r0=tmp + -r1=tmp + -r2=s + -r3=c + -r4=n + -r5=current_len + -v16=part of s + -v17=index of found c + -v18=c replicated +*/ +ENTRY(__wmemchr_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + +# if !defined __s390x__ + llgfr %r4,%r4 +# endif /* !defined __s390x__ */ + + clgije %r4,0,.Lnf_end /* If len == 0 then exit. */ + + vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */ + lcbb %r0,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */ + llgfr %r0,%r0 /* Convert 32bit to 64bit. */ + + tmll %r2,3 /* Test if s is 4-byte aligned? */ + jne .Lfallback /* And use common-code variant if not. */ + + vlvgf %v18,%r3,0 /* Generate vector which elements are all c. */ + vrepf %v18,%v18,0 + lghi %r5,16 /* current_len = 16. */ + + /* Check range of maxlen and convert to byte-count. */ +# ifdef __s390x__ + tmhh %r4,49152 /* Test bit 0 or 1 of maxlen. */ + lghi %r1,-4 /* Max byte-count is 18446744073709551612. */ +# else + tmlh %r4,49152 /* Test bit 0 or 1 of maxlen. */ + llilf %r1,4294967292 /* Max byte-count is 4294967292. */ +# endif /* !__s390x__ */ + sllg %r4,%r4,2 /* Convert character-count to byte-count. */ + locgrne %r4,%r1 /* Use max byte-count, if bit 0/1 was one. */ + + clgrjhe %r0,%r4,.Llastcmp /* If (bytes to boundary) >= n, + jump to lastcmp. */ + + vfeefs %v17,%v16,%v18 /* Find c. */ + vlgvb %r1,%v17,7 /* Load byte index of c. */ + clgrjl %r1,%r0,.Lfound2 /* Found c is within loaded bytes. */ + + /* Align s to 16 byte. */ + risbgn %r1,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */ + slr %r5,%r1 /* Compute bytes to 16bytes boundary. */ + + lgr %r0,%r5 /* If %r5 + 64 < n? -> loop64. */ + aghi %r0,64 + clgrjl %r0,%r4,.Lloop64 +.Llt64: + vl %v16,0(%r5,%r2) + aghi %r5,16 + clgrjhe %r5,%r4,.Llastcmp /* Do last compare if curr-len >= n. */ + vfeefs %v17,%v16,%v18 /* Find c. */ + jl .Lfound /* Jump away if c was found. */ + + vl %v16,0(%r5,%r2) + aghi %r5,16 + clgrjhe %r5,%r4,.Llastcmp + vfeefs %v17,%v16,%v18 + jl .Lfound + + vl %v16,0(%r5,%r2) + aghi %r5,16 + clgrjhe %r5,%r4,.Llastcmp + vfeefs %v17,%v16,%v18 + jl .Lfound + + vl %v16,0(%r5,%r2) + aghi %r5,16 + +.Llastcmp: + /* Use comparision result only if located within first n characters. + %r5: current_len; + %r4: n; + (current_len - n): [0...16[ + first ignored match index = vr-width - (current_len - n) ]0...16] + */ + vfeefs %v17,%v16,%v18 /* Find c. */ + slgrk %r4,%r5,%r4 /* %r5 = current_len - n. */ + lghi %r0,16 /* Register width = 16. */ + vlgvb %r1,%v17,7 /* Extract found index or 16 if all equal. */ + slr %r0,%r4 /* %r0 = first ignored match index. */ + clrjl %r1,%r0,.Lfound2 /* Go away if miscompare is below n bytes. */ + /* c not found within n-bytes. */ +.Lnf_end: + lghi %r2,0 /* Return null. */ + br %r14 + +.Lfound48: + aghi %r5,16 +.Lfound32: + aghi %r5,16 +.Lfound16: + aghi %r5,16 +.Lfound0: + aghi %r5,16 +.Lfound: + vlgvb %r1,%v17,7 /* Load byte index of c. */ +.Lfound2: + slgfi %r5,16 /* current_len -=16 */ + algr %r5,%r1 /* Zero byte index is added to current len. */ + la %r2,0(%r5,%r2) /* Return pointer to c. */ + br %r14 + +.Lloop64: + vl %v16,0(%r5,%r2) + vfeefs %v17,%v16,%v18 /* Find c. */ + jl .Lfound0 /* Jump away if c was found. */ + vl %v16,16(%r5,%r2) + vfeefs %v17,%v16,%v18 + jl .Lfound16 + vl %v16,32(%r5,%r2) + vfeefs %v17,%v16,%v18 + jl .Lfound32 + vl %v16,48(%r5,%r2) + vfeefs %v17,%v16,%v18 + jl .Lfound48 + + aghi %r5,64 + lgr %r0,%r5 /* If %r5 + 64 < n? -> loop64. */ + aghi %r0,64 + clgrjl %r0,%r4,.Lloop64 + + j .Llt64 +.Lfallback: + jg __wmemchr_c +END(__wmemchr_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wmemchr.c b/REORG.TODO/sysdeps/s390/multiarch/wmemchr.c new file mode 100644 index 0000000000..efb4d64e2b --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wmemchr.c @@ -0,0 +1,32 @@ +/* Multiple versions of wmemchr. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define wmemchr __redirect_wmemchr +# define __wmemchr __redirect___wmemchr +# include <wchar.h> +# undef wmemchr +# undef __wmemchr +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc_redirected (__redirect___wmemchr, __wmemchr) +weak_alias (__wmemchr, wmemchr) + +#else +# include <wcsmbs/wmemchr.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wmemcmp-c.c b/REORG.TODO/sysdeps/s390/multiarch/wmemcmp-c.c new file mode 100644 index 0000000000..bdd8995634 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wmemcmp-c.c @@ -0,0 +1,26 @@ +/* Default wmemcmp implementation for S/390. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define WMEMCMP __wmemcmp_c + +# include <wchar.h> +extern __typeof (wmemcmp) __wmemcmp_c; + +# include <wcsmbs/wmemcmp.c> +#endif diff --git a/REORG.TODO/sysdeps/s390/multiarch/wmemcmp-vx.S b/REORG.TODO/sysdeps/s390/multiarch/wmemcmp-vx.S new file mode 100644 index 0000000000..40e71e5a65 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wmemcmp-vx.S @@ -0,0 +1,149 @@ +/* Vector Optimized 32/64 bit S/390 version of wmemcmp. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* int wmemcmp (const wchar_t *s1, const wchar_t *s2, size_t n) + Compare at most n characters of two wchar_t-arrays. + + Register usage: + -r0=tmp + -r1=number of blocks + -r2=s1 + -r3=s2 + -r4=n + -r5=current_len + -v16=part of s1 + -v17=part of s2 + -v18=index of unequal +*/ +ENTRY(__wmemcmp_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + +# if !defined __s390x__ + llgfr %r4,%r4 +# endif /* !defined __s390x__ */ + clgije %r4,0,.Lend_equal /* Nothing to do if n == 0. */ + + /* Check range of maxlen and convert to byte-count. */ +# ifdef __s390x__ + tmhh %r4,49152 /* Test bit 0 or 1 of maxlen. */ + lghi %r1,-4 /* Max byte-count is 18446744073709551612. */ +# else + tmlh %r4,49152 /* Test bit 0 or 1 of maxlen. */ + llilf %r1,4294967292 /* Max byte-count is 4294967292. */ +# endif /* !__s390x__ */ + sllg %r4,%r4,2 /* Convert character-count to byte-count. */ + locgrne %r4,%r1 /* Use max byte-count, if bit 0/1 was one. */ + + lghi %r5,0 /* current_len = 0. */ + + clgijh %r4,16,.Lgt16 + +.Lremaining: + aghi %r4,-1 /* vstl needs highest index. */ + vll %v16,%r4,0(%r2) + vll %v17,%r4,0(%r3) + vfenef %v18,%v16,%v17 /* Compare not equal. */ + vlgvb %r1,%v18,7 /* Load unequal index or 16 if not found. */ + clrj %r1,%r4,12,.Lfound2 /* r1 <= r4 -> unequal within loaded + bytes. */ + +.Lend_equal: + lghi %r2,0 + br %r14 + +.Lfound: + /* vfenezf found an unequal element or zero. + This instruction compares unsigned words, but wchar_t is signed. + Thus we have to compare the found element again. */ + vlgvb %r1,%v18,7 /* Extract not equal byte-index. */ +.Lfound2: + srl %r1,2 /* And convert it to character-index. */ + vlgvf %r0,%v16,0(%r1) /* Load character-values. */ + vlgvf %r1,%v17,0(%r1) + cr %r0,%r1 + je .Lend_equal + lghi %r2,1 + lghi %r1,-1 + locgrl %r2,%r1 + br %r14 + +.Lgt16: + clgijh %r4,64,.Lpreloop64 + +.Lpreloop16: + srlg %r1,%r4,4 /* Split into 16byte blocks */ +.Lloop16: + vl %v16,0(%r5,%r2) + vl %v17,0(%r5,%r3) + aghi %r5,16 + vfenefs %v18,%v16,%v17 /* Compare not equal. */ + jno .Lfound + brctg %r1,.Lloop16 /* Loop until all blocks are processed. */ + + llgfr %r4,%r4 + nilf %r4,15 /* Get remaining bytes */ + locgre %r2,%r4 + ber %r14 + la %r2,0(%r5,%r2) + la %r3,0(%r5,%r3) + j .Lremaining + +.Lpreloop64: + srlg %r1,%r4,6 /* Split into 64byte blocks */ +.Lloop64: + vl %v16,0(%r5,%r2) + vl %v17,0(%r5,%r3) + vfenefs %v18,%v16,%v17 /* Compare not equal. */ + jno .Lfound + + vl %v16,16(%r5,%r2) + vl %v17,16(%r5,%r3) + vfenefs %v18,%v16,%v17 + jno .Lfound + + vl %v16,32(%r5,%r2) + vl %v17,32(%r5,%r3) + vfenefs %v18,%v16,%v17 + jno .Lfound + + vl %v16,48(%r5,%r2) + vl %v17,48(%r5,%r3) + aghi %r5,64 + vfenefs %v18,%v16,%v17 + jno .Lfound + + brctg %r1,.Lloop64 /* Loop until all blocks are processed. */ + + llgfr %r4,%r4 + nilf %r4,63 /* Get remaining bytes */ + locgre %r2,%r4 + ber %r14 + clgijh %r4,16,.Lpreloop16 + la %r2,0(%r5,%r2) + la %r3,0(%r5,%r3) + j .Lremaining +END(__wmemcmp_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wmemcmp.c b/REORG.TODO/sysdeps/s390/multiarch/wmemcmp.c new file mode 100644 index 0000000000..59471a36b2 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wmemcmp.c @@ -0,0 +1,27 @@ +/* Multiple versions of wmemcmp. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# include <wchar.h> +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc2 (__wmemcmp, wmemcmp) + +#else +# include <wcsmbs/wmemcmp.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wmemset-c.c b/REORG.TODO/sysdeps/s390/multiarch/wmemset-c.c new file mode 100644 index 0000000000..9d5a848ec8 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wmemset-c.c @@ -0,0 +1,37 @@ +/* Default wmemset implementation for S/390. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define WMEMSET __wmemset_c + +# include <wchar.h> +extern __typeof (__wmemset) __wmemset_c; +# undef weak_alias +# define weak_alias(name, alias) +# ifdef SHARED +# undef libc_hidden_def +# define libc_hidden_def(name) \ + __hidden_ver1 (__wmemset_c, __GI___wmemset, __wmemset_c); +# undef libc_hidden_weak +# define libc_hidden_weak(name) \ + strong_alias (__wmemset_c, __wmemset_c_1); \ + __hidden_ver1 (__wmemset_c_1, __GI_wmemset, __wmemset_c_1); +# endif /* SHARED */ + +# include <wcsmbs/wmemset.c> +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wmemset-vx.S b/REORG.TODO/sysdeps/s390/multiarch/wmemset-vx.S new file mode 100644 index 0000000000..6b76dead18 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wmemset-vx.S @@ -0,0 +1,142 @@ +/* Vector Optimized 32/64 bit S/390 version of wmemset. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) + +# include "sysdep.h" +# include "asm-syntax.h" + + .text + +/* wchar_t *wmemset(wchar_t *dest, wchar_t wc, size_t n) + Fill an array of wide-characters with a constant wide character + and returns dest. + + Register usage: + -r0=tmp + -r1=tmp + -r2=dest or current-pointer + -r3=wc + -r4=n + -r5=tmp + -v16=replicated wc + -v17,v18,v19=copy of v16 for vstm + -v31=saved dest for return +*/ +ENTRY(__wmemset_vx) + .machine "z13" + .machinemode "zarch_nohighgprs" + +# if !defined __s390x__ + llgfr %r4,%r4 +# endif /* !defined __s390x__ */ + + vlvgg %v31,%r2,0 /* Save destination pointer for return. */ + clgije %r4,0,.Lend + + vlvgf %v16,%r3,0 /* Generate vector with wchar_t wc. */ + vrepf %v16,%v16,0 + + /* Check range of maxlen and convert to byte-count. */ +# ifdef __s390x__ + tmhh %r4,49152 /* Test bit 0 or 1 of maxlen. */ + lghi %r5,-4 /* Max byte-count is 18446744073709551612. */ +# else + tmlh %r4,49152 /* Test bit 0 or 1 of maxlen. */ + llilf %r5,4294967292 /* Max byte-count is 4294967292. */ +# endif /* !__s390x__ */ + sllg %r4,%r4,2 /* Convert character-count to byte-count. */ + locgrne %r4,%r5 /* Use max byte-count, if bit 0/1 was one. */ + + /* Align dest to 16 byte. */ + risbg %r0,%r2,60,128+63,0 /* Test if s is aligned and + %r3 = bits 60-63 'and' 15. */ + je .Lpreloop /* If s is aligned, loop aligned. */ + tmll %r2,3 /* Test if s is 4-byte aligned? */ + jne .Lfallback /* And use common-code variant if not. */ + lghi %r1,16 + slr %r1,%r0 /* Compute byte count to load (16-x). */ + clgr %r1,%r4 + locgrh %r1,%r4 /* min (byte count, n) */ + aghik %r5,%r1,-1 /* vstl needs highest index. */ + vstl %v16,%r5,0(%r2) /* Store remaining bytes. */ + clgrje %r1,%r4,.Lend /* Return if n bytes where set. */ + slgr %r4,%r1 /* Compute remaining byte count. */ + la %r2,0(%r1,%r2) + +.Lpreloop: + /* Now we are 16-byte aligned. */ + clgijl %r4,17,.Lremaining + srlg %r1,%r4,8 /* Split into 256byte blocks */ + clgije %r1,0,.Lpreloop64 + vlr %v17,%v16 + vlr %v18,%v16 + vlr %v19,%v16 + +.Lloop256: + vstm %v16,%v19,0(%r2) + vstm %v16,%v19,64(%r2) + vstm %v16,%v19,128(%r2) + vstm %v16,%v19,192(%r2) + la %r2,256(%r2) + brctg %r1,.Lloop256 /* Loop until all blocks are processed. */ + + llgfr %r4,%r4 + nilf %r4,255 /* Get remaining bytes */ + je .Lend /* Skip store remaining bytes if zero. */ + +.Lpreloop64: + clgijl %r4,17,.Lremaining + clgijl %r4,33,.Lpreloop16 + srlg %r1,%r4,5 /* Split into 32byte blocks */ + +.Lloop32: + vst %v16,0(%r2) + vst %v16,16(%r2) + la %r2,32(%r2) + brctg %r1,.Lloop32 /* Loop until all blocks are processed. */ + + llgfr %r4,%r4 + nilf %r4,31 /* Get remaining bytes */ + je .Lend /* Skip store remaining bytes if zero. */ + +.Lpreloop16: + clgijl %r4,17,.Lremaining + srlg %r1,%r4,4 /* Split into 16byte blocks */ + +.Lloop16: + vst %v16,0(%r2) + la %r2,16(%r2) + brctg %r1,.Lloop16 /* Loop until all blocks are processed. */ + + llgfr %r4,%r4 + nilf %r4,15 /* Get remaining bytes */ + je .Lend /* Skip store remaining bytes if zero. */ + +.Lremaining: + aghi %r4,-1 /* vstl needs highest index. */ + vstl %v16,%r4,0(%r2) + +.Lend: + vlgvg %r2,%v31,0 /* Load saved dest for return value. */ + br %r14 +.Lfallback: + srlg %r4,%r4,2 /* Convert byte-count to character-count. */ + jg __wmemset_c +END(__wmemset_vx) +#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */ diff --git a/REORG.TODO/sysdeps/s390/multiarch/wmemset.c b/REORG.TODO/sysdeps/s390/multiarch/wmemset.c new file mode 100644 index 0000000000..d3eea1cee0 --- /dev/null +++ b/REORG.TODO/sysdeps/s390/multiarch/wmemset.c @@ -0,0 +1,32 @@ +/* Multiple versions of wmemset. + Copyright (C) 2015-2017 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ + +#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) +# define wmemset __redirect_wmemset +# define __wmemset __redirect___wmemset +# include <wchar.h> +# undef wmemset +# undef __wmemset +# include <ifunc-resolve.h> + +s390_vx_libc_ifunc_redirected (__redirect___wmemset, __wmemset) +weak_alias (__wmemset, wmemset) + +#else +# include <wcsmbs/wmemset.c> +#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */ |