about summary refs log tree commit diff
diff options
context:
space:
mode:
authorStefan Liebler <stli@linux.vnet.ibm.com>2017-06-27 17:09:43 +0200
committerStefan Liebler <stli@linux.vnet.ibm.com>2017-06-27 17:09:43 +0200
commit593e4da186906525e2a0bdc4d87601bd0c2625eb (patch)
treec66ef4eb008f4a5ba94ea7605827f348fdf44ea8
parent23ea69a9d6e9ab28c66a232b767a800b04eaa938 (diff)
downloadglibc-593e4da186906525e2a0bdc4d87601bd0c2625eb.tar.gz
glibc-593e4da186906525e2a0bdc4d87601bd0c2625eb.tar.xz
glibc-593e4da186906525e2a0bdc4d87601bd0c2625eb.zip
S390: Use cu42 instruction for converting from utf32 to utf16.
This patch adds an ifunc variant to use the cu instruction on arch12 CPUs.
This new ifunc variant can be built if binutils support z13 vector
instructions.  At runtime, HWCAP_S390_VXE decides if we can use the
cu42 instruction.

ChangeLog:

	* sysdeps/s390/utf16-utf32-z9.c (__to_utf16_loop_vx_cu):
	Use vector and cu42 instruction.
	* sysdeps/s390/multiarch/utf16-utf32-z9.c:
	Add __to_utf16_loop_vx_cu in ifunc resolver.
-rw-r--r--ChangeLog7
-rw-r--r--sysdeps/s390/multiarch/utf16-utf32-z9.c8
-rw-r--r--sysdeps/s390/utf16-utf32-z9.c107
3 files changed, 118 insertions, 4 deletions
diff --git a/ChangeLog b/ChangeLog
index dedaa3c535..2cfd8249b1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,12 @@
 2017-06-27  Stefan Liebler  <stli@linux.vnet.ibm.com>
 
+	* sysdeps/s390/utf16-utf32-z9.c (__to_utf16_loop_vx_cu):
+	Use vector and cu42 instruction.
+	* sysdeps/s390/multiarch/utf16-utf32-z9.c:
+	Add __to_utf16_loop_vx_cu in ifunc resolver.
+
+2017-06-27  Stefan Liebler  <stli@linux.vnet.ibm.com>
+
 	* sysdeps/s390/utf8-utf32-z9.c (__to_utf8_loop_vx_cu):
 	Use vector and cu41 instruction.
 	* sysdeps/s390/multiarch/utf8-utf32-z9.c: Add __to_utf8_loop_vx_cu
diff --git a/sysdeps/s390/multiarch/utf16-utf32-z9.c b/sysdeps/s390/multiarch/utf16-utf32-z9.c
index 6e64169835..ded3cc2ad9 100644
--- a/sysdeps/s390/multiarch/utf16-utf32-z9.c
+++ b/sysdeps/s390/multiarch/utf16-utf32-z9.c
@@ -37,8 +37,10 @@ s390_libc_ifunc_expr (FROM_LOOP_DEFAULT, FROM_LOOP,
 		      : FROM_LOOP_DEFAULT);
 
 s390_libc_ifunc_expr (TO_LOOP_DEFAULT, TO_LOOP,
-		      (HAVE_TO_VX && (hwcap & HWCAP_S390_VX))
-		      ? TO_LOOP_VX
-		      : TO_LOOP_DEFAULT);
+		      (HAVE_TO_VX_CU && (hwcap & HWCAP_S390_VXE))
+		      ? TO_LOOP_VX_CU
+		      : (HAVE_TO_VX && (hwcap & HWCAP_S390_VX))
+			? TO_LOOP_VX
+			: TO_LOOP_DEFAULT);
 
 #include <iconv/skeleton.c>
diff --git a/sysdeps/s390/utf16-utf32-z9.c b/sysdeps/s390/utf16-utf32-z9.c
index 7a174ea0e5..45c9f2b80c 100644
--- a/sysdeps/s390/utf16-utf32-z9.c
+++ b/sysdeps/s390/utf16-utf32-z9.c
@@ -40,9 +40,11 @@
 #if defined HAVE_S390_VX_ASM_SUPPORT && defined USE_MULTIARCH
 # define HAVE_FROM_VX		1
 # define HAVE_TO_VX		1
+# define HAVE_TO_VX_CU		1
 #else
 # define HAVE_FROM_VX		0
 # define HAVE_TO_VX		0
+# define HAVE_TO_VX_CU		0
 #endif
 
 #if defined HAVE_S390_VX_GCC_SUPPORT
@@ -471,7 +473,7 @@ gconv_end (struct __gconv_step *data)
 		  "    vlm %%v30,%%v31,0(%[R_TMP])\n\t"			\
 		  CONVERT_32BIT_SIZE_T ([R_INLEN])			\
 		  CONVERT_32BIT_SIZE_T ([R_OUTLEN])			\
-		  /* Loop which handles UTF-16 chars			\
+		  /* Loop which handles UTF-32 chars			\
 		     ch < 0xd800 || (ch > 0xdfff && ch < 0x10000).  */	\
 		  "0:  clgijl %[R_INLEN],32,2f\n\t"			\
 		  "    clgijl %[R_OUTLEN],16,2f\n\t"			\
@@ -595,6 +597,109 @@ gconv_end (struct __gconv_step *data)
 # define TO_LOOP_VX		NULL
 #endif /* HAVE_TO_VX != 1  */
 
+#if HAVE_TO_VX_CU == 1
+#define BODY_TO_VX_CU							\
+  {									\
+    register const unsigned char* pInput asm ("8") = inptr;		\
+    register size_t inlen asm ("9") = inend - inptr;			\
+    register unsigned char* pOutput asm ("10") = outptr;		\
+    register size_t outlen asm ("11") = outend - outptr;		\
+    unsigned long tmp, tmp2, tmp3;					\
+    asm volatile (".machine push\n\t"					\
+		  ".machine \"z13\"\n\t"				\
+		  ".machinemode \"zarch_nohighgprs\"\n\t"		\
+		  /* Setup to check for surrogates.  */			\
+		  "    larl %[R_TMP],9f\n\t"				\
+		  "    vlm %%v30,%%v31,0(%[R_TMP])\n\t"			\
+		  CONVERT_32BIT_SIZE_T ([R_INLEN])			\
+		  CONVERT_32BIT_SIZE_T ([R_OUTLEN])			\
+		  /* Loop which handles UTF-32 chars			\
+		     ch < 0xd800 || (ch > 0xdfff && ch < 0x10000).  */	\
+		  "0:  clgijl %[R_INLEN],32,20f\n\t"			\
+		  "    clgijl %[R_OUTLEN],16,20f\n\t"			\
+		  "1:  vlm %%v16,%%v17,0(%[R_IN])\n\t"			\
+		  "    lghi %[R_TMP2],0\n\t"				\
+		  /* Shorten to UTF-16.  */				\
+		  "    vpkf %%v18,%%v16,%%v17\n\t"			\
+		  /* Check for surrogate chars.  */			\
+		  "    vstrcfs %%v19,%%v16,%%v30,%%v31\n\t"		\
+		  "    jno 10f\n\t"					\
+		  "    vstrcfs %%v19,%%v17,%%v30,%%v31\n\t"		\
+		  "    jno 11f\n\t"					\
+		  /* Store 16 bytes to buf_out.  */			\
+		  "    vst %%v18,0(%[R_OUT])\n\t"			\
+		  "    la %[R_IN],32(%[R_IN])\n\t"			\
+		  "    aghi %[R_INLEN],-32\n\t"				\
+		  "    aghi %[R_OUTLEN],-16\n\t"			\
+		  "    la %[R_OUT],16(%[R_OUT])\n\t"			\
+		  "    clgijl %[R_INLEN],32,20f\n\t"			\
+		  "    clgijl %[R_OUTLEN],16,20f\n\t"			\
+		  "    j 1b\n\t"					\
+		  /* Setup to check for ch >= 0xd800 && ch <= 0xdfff	\
+		     and check for ch >= 0x10000. (v30, v31)  */	\
+		  "9:  .long 0xd800,0xdfff,0x10000,0x10000\n\t"		\
+		  "    .long 0xa0000000,0xc0000000, 0xa0000000,0xa0000000\n\t" \
+		  /* At least one UTF32 char is in range of surrogates.	\
+		     Store the preceding characters.  */		\
+		  "11: ahi %[R_TMP2],16\n\t"				\
+		  "10: vlgvb %[R_TMP],%%v19,7\n\t"			\
+		  "    agr %[R_TMP],%[R_TMP2]\n\t"			\
+		  "    srlg %[R_TMP3],%[R_TMP],1\n\t" /* Number of out bytes.  */ \
+		  "    ahik %[R_TMP2],%[R_TMP3],-1\n\t" /* Highest index to store.  */ \
+		  "    jl 20f\n\t"					\
+		  "    vstl %%v18,%[R_TMP2],0(%[R_OUT])\n\t"		\
+		  /* Update pointers.  */				\
+		  "    la %[R_IN],0(%[R_TMP],%[R_IN])\n\t"		\
+		  "    slgr %[R_INLEN],%[R_TMP]\n\t"			\
+		  "    la %[R_OUT],0(%[R_TMP3],%[R_OUT])\n\t"		\
+		  "    slgr %[R_OUTLEN],%[R_TMP3]\n\t"			\
+		  /* Handles UTF16 surrogates with convert instruction.  */ \
+		  "20: cu42 %[R_OUT],%[R_IN]\n\t"			\
+		  "    jo 0b\n\t" /* Try vector implemenation again.  */ \
+		  "    lochil %[R_RES],%[RES_OUT_FULL]\n\t" /* cc == 1.  */ \
+		  "    lochih %[R_RES],%[RES_IN_ILL]\n\t" /* cc == 2.  */ \
+		  ".machine pop"					\
+		  : /* outputs */ [R_IN] "+a" (pInput)			\
+		    , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (pOutput)	\
+		    , [R_OUTLEN] "+d" (outlen), [R_TMP] "=a" (tmp)	\
+		    , [R_TMP2] "=d" (tmp2), [R_TMP3] "=a" (tmp3)	\
+		    , [R_RES] "+d" (result)				\
+		  : /* inputs */					\
+		    [RES_OUT_FULL] "i" (__GCONV_FULL_OUTPUT)		\
+		    , [RES_IN_ILL] "i" (__GCONV_ILLEGAL_INPUT)		\
+		  : /* clobber list */ "memory", "cc"			\
+		    ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17")	\
+		    ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19")	\
+		    ASM_CLOBBER_VR ("v30") ASM_CLOBBER_VR ("v31")	\
+		  );							\
+    inptr = pInput;							\
+    outptr = pOutput;							\
+									\
+    if (__glibc_likely (inlen == 0)					\
+	|| result == __GCONV_FULL_OUTPUT)				\
+      break;								\
+    if (inlen < 4)							\
+      {									\
+	result = __GCONV_INCOMPLETE_INPUT;				\
+	break;								\
+      }									\
+									\
+    STANDARD_TO_LOOP_ERR_HANDLER (4);					\
+  }
+
+/* Generate loop-function with hardware vector and utf-convert instructions.  */
+# define MIN_NEEDED_INPUT	MIN_NEEDED_TO
+# define MIN_NEEDED_OUTPUT	MIN_NEEDED_FROM
+# define MAX_NEEDED_OUTPUT	MAX_NEEDED_FROM
+# define TO_LOOP_VX_CU		__to_utf16_loop_vx_cu
+# define LOOPFCT		TO_LOOP_VX_CU
+# define LOOP_NEED_FLAGS
+# define BODY			BODY_TO_VX_CU
+# include <iconv/loop.c>
+#else
+# define TO_LOOP_VX_CU		NULL
+#endif /* HAVE_TO_VX_CU != 1  */
+
 /* This file also exists in sysdeps/s390/multiarch/ which
    generates ifunc resolvers for FROM/TO_LOOP functions
    and includes iconv/skeleton.c afterwards.  */