about summary refs log tree commit diff
diff options
context:
space:
mode:
authorLexi Shao <shaolexi@huawei.com>2020-05-15 18:48:59 +0800
committerSzabolcs Nagy <szabolcs.nagy@arm.com>2020-05-15 12:15:56 +0100
commit59b64f9cbbf1e98c6d187873de6c363994aee19d (patch)
tree05ffddcb27240c8fcab594a2c5d97ccbb803229d
parent981e638d38947837fd36520c4cb051342348779d (diff)
downloadglibc-59b64f9cbbf1e98c6d187873de6c363994aee19d.tar.gz
glibc-59b64f9cbbf1e98c6d187873de6c363994aee19d.tar.xz
glibc-59b64f9cbbf1e98c6d187873de6c363994aee19d.zip
aarch64: fix strcpy and strnlen for big-endian [BZ #25824]
This patch fixes the optimized implementation of strcpy and strnlen
on a big-endian arm64 machine.

The optimized method uses neon, which can process 128bit with one
instruction. On a big-endian machine, the bit order should be reversed
for the whole 128-bits double word. But with instuction
	rev64	datav.16b, datav.16b
it reverses 64bits in the two halves rather than reversing 128bits.
There is no such instruction as rev128 to reverse the 128bits, but we
can fix this by loading the data registers accordingly.

Fixes 0237b61526e7("aarch64: Optimized implementation of strcpy") and
2911cb68ed3d("aarch64: Optimized implementation of strnlen").

Signed-off-by: Lexi Shao <shaolexi@huawei.com>
Reviewed-by: Szabolcs Nagy  <szabolcs.nagy@arm.com>
-rw-r--r--sysdeps/aarch64/strcpy.S5
-rw-r--r--sysdeps/aarch64/strnlen.S5
2 files changed, 10 insertions, 0 deletions
diff --git a/sysdeps/aarch64/strcpy.S b/sysdeps/aarch64/strcpy.S
index 548130e413..a8ff52c072 100644
--- a/sysdeps/aarch64/strcpy.S
+++ b/sysdeps/aarch64/strcpy.S
@@ -234,8 +234,13 @@ L(entry_no_page_cross):
 #endif
 	/* calculate the loc value */
 	cmeq	datav.16b, datav.16b, #0
+#ifdef __AARCH64EB__
+	mov	data1, datav.d[1]
+	mov	data2, datav.d[0]
+#else
 	mov	data1, datav.d[0]
 	mov	data2, datav.d[1]
+#endif
 	cmp	data1, 0
 	csel	data1, data1, data2, ne
 	mov	pos, 8
diff --git a/sysdeps/aarch64/strnlen.S b/sysdeps/aarch64/strnlen.S
index 5981247dd9..086a5c7e99 100644
--- a/sysdeps/aarch64/strnlen.S
+++ b/sysdeps/aarch64/strnlen.S
@@ -154,8 +154,13 @@ L(loop_end):
 	   byte.  */
 
 	cmeq	datav.16b, datav.16b, #0
+#ifdef __AARCH64EB__
+	mov	data1, datav.d[1]
+	mov	data2, datav.d[0]
+#else
 	mov	data1, datav.d[0]
 	mov	data2, datav.d[1]
+#endif
 	cmp	data1, 0
 	csel	data1, data1, data2, ne
 	sub	len, src, srcin