about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog5
-rw-r--r--sysdeps/s390/multiarch/utf16-utf32-z9.c8
-rw-r--r--sysdeps/s390/utf16-utf32-z9.c113
3 files changed, 122 insertions, 4 deletions
diff --git a/ChangeLog b/ChangeLog
index 2cfd8249b1..62aacea90f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
 2017-06-27  Stefan Liebler  <stli@linux.vnet.ibm.com>
 
+	* sysdeps/s390/utf16-utf32-z9.c (__from_utf16_loop_vx_cu):
+	Use vector and cu24 instruction.
+
+2017-06-27  Stefan Liebler  <stli@linux.vnet.ibm.com>
+
 	* sysdeps/s390/utf16-utf32-z9.c (__to_utf16_loop_vx_cu):
 	Use vector and cu42 instruction.
 	* sysdeps/s390/multiarch/utf16-utf32-z9.c:
diff --git a/sysdeps/s390/multiarch/utf16-utf32-z9.c b/sysdeps/s390/multiarch/utf16-utf32-z9.c
index ded3cc2ad9..917de5f5cc 100644
--- a/sysdeps/s390/multiarch/utf16-utf32-z9.c
+++ b/sysdeps/s390/multiarch/utf16-utf32-z9.c
@@ -32,9 +32,11 @@ strong_alias (SINGLE_NAME (TO_LOOP_DEFAULT), SINGLE_NAME (TO_LOOP))
 
 /* Generate ifunc'ed loop functions for FROM/TO_LOOP.  */
 s390_libc_ifunc_expr (FROM_LOOP_DEFAULT, FROM_LOOP,
-		      (HAVE_FROM_VX && (hwcap & HWCAP_S390_VX))
-		      ? FROM_LOOP_VX
-		      : FROM_LOOP_DEFAULT);
+		      (HAVE_FROM_VX_CU && (hwcap & HWCAP_S390_VXE))
+		      ? FROM_LOOP_VX_CU
+		      : (HAVE_FROM_VX && (hwcap & HWCAP_S390_VX))
+			? FROM_LOOP_VX
+			: FROM_LOOP_DEFAULT);
 
 s390_libc_ifunc_expr (TO_LOOP_DEFAULT, TO_LOOP,
 		      (HAVE_TO_VX_CU && (hwcap & HWCAP_S390_VXE))
diff --git a/sysdeps/s390/utf16-utf32-z9.c b/sysdeps/s390/utf16-utf32-z9.c
index 45c9f2b80c..07cff21654 100644
--- a/sysdeps/s390/utf16-utf32-z9.c
+++ b/sysdeps/s390/utf16-utf32-z9.c
@@ -39,10 +39,12 @@
 
 #if defined HAVE_S390_VX_ASM_SUPPORT && defined USE_MULTIARCH
 # define HAVE_FROM_VX		1
+# define HAVE_FROM_VX_CU	1
 # define HAVE_TO_VX		1
 # define HAVE_TO_VX_CU		1
 #else
 # define HAVE_FROM_VX		0
+# define HAVE_FROM_VX_CU	0
 # define HAVE_TO_VX		0
 # define HAVE_TO_VX_CU		0
 #endif
@@ -383,7 +385,6 @@ gconv_end (struct __gconv_step *data)
     STANDARD_FROM_LOOP_ERR_HANDLER (2);					\
   }
 
-
 /* Generate loop-function with hardware vector instructions.  */
 # define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
 # define MAX_NEEDED_INPUT	MAX_NEEDED_FROM
@@ -397,6 +398,116 @@ gconv_end (struct __gconv_step *data)
 # define FROM_LOOP_VX		NULL
 #endif /* HAVE_FROM_VX != 1  */
 
+#if HAVE_FROM_VX_CU == 1
+#define BODY_FROM_VX_CU							\
+  {									\
+    register const unsigned char* pInput asm ("8") = inptr;		\
+    register size_t inlen asm ("9") = inend - inptr;			\
+    register unsigned char* pOutput asm ("10") = outptr;		\
+    register size_t outlen asm ("11") = outend - outptr;		\
+    unsigned long tmp, tmp2, tmp3;					\
+    asm volatile (".machine push\n\t"					\
+		  ".machine \"z13\"\n\t"				\
+		  ".machinemode \"zarch_nohighgprs\"\n\t"		\
+		  /* Setup to check for surrogates.  */			\
+		  "    larl %[R_TMP],9f\n\t"				\
+		  "    vlm %%v30,%%v31,0(%[R_TMP])\n\t"			\
+		  CONVERT_32BIT_SIZE_T ([R_INLEN])			\
+		  CONVERT_32BIT_SIZE_T ([R_OUTLEN])			\
+		  /* Loop which handles UTF-16 chars <0xd800, >0xdfff.  */ \
+		  "0:  clgijl %[R_INLEN],16,20f\n\t"			\
+		  "    clgijl %[R_OUTLEN],32,20f\n\t"			\
+		  "1:  vl %%v16,0(%[R_IN])\n\t"				\
+		  /* Check for surrogate chars.  */			\
+		  "    vstrchs %%v19,%%v16,%%v30,%%v31\n\t"		\
+		  "    jno 10f\n\t"					\
+		  /* Enlarge to UTF-32.  */				\
+		  "    vuplhh %%v17,%%v16\n\t"				\
+		  "    la %[R_IN],16(%[R_IN])\n\t"			\
+		  "    vupllh %%v18,%%v16\n\t"				\
+		  "    aghi %[R_INLEN],-16\n\t"				\
+		  /* Store 32 bytes to buf_out.  */			\
+		  "    vstm %%v17,%%v18,0(%[R_OUT])\n\t"		\
+		  "    aghi %[R_OUTLEN],-32\n\t"			\
+		  "    la %[R_OUT],32(%[R_OUT])\n\t"			\
+		  "    clgijl %[R_INLEN],16,20f\n\t"			\
+		  "    clgijl %[R_OUTLEN],32,20f\n\t"			\
+		  "    j 1b\n\t"					\
+		  /* Setup to check for ch >= 0xd800 && ch <= 0xdfff. (v30, v31)  */ \
+		  "9:  .short 0xd800,0xdfff,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \
+		  "    .short 0xa000,0xc000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \
+		  /* At least one uint16_t is in range of surrogates.	\
+		     Store the preceding chars.  */			\
+		  "10: vlgvb %[R_TMP],%%v19,7\n\t"			\
+		  "    vuplhh %%v17,%%v16\n\t"				\
+		  "    sllg %[R_TMP3],%[R_TMP],1\n\t" /* Number of out bytes.  */ \
+		  "    ahik %[R_TMP2],%[R_TMP3],-1\n\t" /* Highest index to store.  */ \
+		  "    jl 20f\n\t"					\
+		  "    vstl %%v17,%[R_TMP2],0(%[R_OUT])\n\t"		\
+		  "    vupllh %%v18,%%v16\n\t"				\
+		  "    ahi %[R_TMP2],-16\n\t"				\
+		  "    jl 11f\n\t"					\
+		  "    vstl %%v18,%[R_TMP2],16(%[R_OUT])\n\t"		\
+		  "11: \n\t" /* Update pointers.  */			\
+		  "    la %[R_IN],0(%[R_TMP],%[R_IN])\n\t"		\
+		  "    slgr %[R_INLEN],%[R_TMP]\n\t"			\
+		  "    la %[R_OUT],0(%[R_TMP3],%[R_OUT])\n\t"		\
+		  "    slgr %[R_OUTLEN],%[R_TMP3]\n\t"			\
+		  /* Handles UTF16 surrogates with convert instruction.  */ \
+		  "20: cu24 %[R_OUT],%[R_IN],1\n\t"			\
+		  "    jo 0b\n\t" /* Try vector implemenation again.  */ \
+		  "    lochil %[R_RES],%[RES_OUT_FULL]\n\t" /* cc == 1.  */ \
+		  "    lochih %[R_RES],%[RES_IN_ILL]\n\t" /* cc == 2.  */ \
+		  ".machine pop"					\
+		  : /* outputs */ [R_IN] "+a" (pInput)			\
+		    , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (pOutput)	\
+		    , [R_OUTLEN] "+d" (outlen), [R_TMP] "=a" (tmp)	\
+		    , [R_TMP2] "=d" (tmp2), [R_TMP3] "=a" (tmp3)	\
+		    , [R_RES] "+d" (result)				\
+		  : /* inputs */					\
+		    [RES_OUT_FULL] "i" (__GCONV_FULL_OUTPUT)		\
+		    , [RES_IN_ILL] "i" (__GCONV_ILLEGAL_INPUT)		\
+		  : /* clobber list */ "memory", "cc"			\
+		    ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17")	\
+		    ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19")	\
+		    ASM_CLOBBER_VR ("v30") ASM_CLOBBER_VR ("v31")	\
+		  );							\
+    inptr = pInput;							\
+    outptr = pOutput;							\
+									\
+    if (__glibc_likely (inlen == 0)					\
+	|| result == __GCONV_FULL_OUTPUT)				\
+      break;								\
+    if (inlen == 1)							\
+      {									\
+	/* Input does not contain a complete utf16 character.  */	\
+	result = __GCONV_INCOMPLETE_INPUT;				\
+	break;								\
+      }									\
+    else if (result != __GCONV_ILLEGAL_INPUT)				\
+      {									\
+	/* Input is >= 2 and < 4 bytes (as cu24 would have processed	\
+	   a possible next utf16 character) and not illegal.		\
+	   => we have a single high surrogate at end of input.  */	\
+	result = __GCONV_INCOMPLETE_INPUT;				\
+	break;								\
+      }									\
+									\
+    STANDARD_FROM_LOOP_ERR_HANDLER (2);					\
+  }
+
+/* Generate loop-function with hardware vector and utf-convert instructions.  */
+# define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
+# define MAX_NEEDED_INPUT	MAX_NEEDED_FROM
+# define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
+# define FROM_LOOP_VX_CU	__from_utf16_loop_vx_cu
+# define LOOPFCT		FROM_LOOP_VX_CU
+# define LOOP_NEED_FLAGS
+# define BODY			BODY_FROM_VX_CU
+# include <iconv/loop.c>
+#else
+# define FROM_LOOP_VX_CU	NULL
+#endif /* HAVE_FROM_VX_CU != 1  */
 
 /* Conversion from UTF-32 internal/BE to UTF-16.  */