about summary refs log tree commit diff
path: root/sysdeps/aarch64/multiarch/memmove.c
diff options
context:
space:
mode:
authorAnton Youdkevitch <anton.youdkevitch@bell-sw.com>2019-05-03 11:01:34 -0700
committerSteve Ellcey <sellcey@caviumnetworks.com>2019-05-03 11:01:34 -0700
commit32e902a94e24fc5a00168d0df3301098704c61fb (patch)
tree88bd3588b0e08141855220a21467f230366b42db /sysdeps/aarch64/multiarch/memmove.c
parentac3da35de5cf113edfd514c2fc8ccbaed4536aaf (diff)
downloadglibc-32e902a94e24fc5a00168d0df3301098704c61fb.tar.gz
glibc-32e902a94e24fc5a00168d0df3301098704c61fb.tar.xz
glibc-32e902a94e24fc5a00168d0df3301098704c61fb.zip
aarch64: thunderx2 memmove performance improvements
The performance improvement is about 20%-30% for
larger cases and about 1%-5% for smaller cases.

Used SIMD load/store instead of GPR for large
overlapping forward moves.

Reused existing memcpy implementation for smaller
or overlapping backward moves.

Fixed the existing memcpy implementation to allow it
to deal with the overlapping case.

Simplified loop tails in the memcpy implementation -
use branchless overlapping sequence of fixed length
load/stores instead of branching depending on the
size.

A cleanup/optimization converting str's to stp's.

Added __memmove_thunderx2 to the list of the
available implementations.
Diffstat (limited to 'sysdeps/aarch64/multiarch/memmove.c')
-rw-r--r--sysdeps/aarch64/multiarch/memmove.c5
1 files changed, 4 insertions, 1 deletions
diff --git a/sysdeps/aarch64/multiarch/memmove.c b/sysdeps/aarch64/multiarch/memmove.c
index f58dde3122..f3d341bef6 100644
--- a/sysdeps/aarch64/multiarch/memmove.c
+++ b/sysdeps/aarch64/multiarch/memmove.c
@@ -30,6 +30,7 @@ extern __typeof (__redirect_memmove) __libc_memmove;
 
 extern __typeof (__redirect_memmove) __memmove_generic attribute_hidden;
 extern __typeof (__redirect_memmove) __memmove_thunderx attribute_hidden;
+extern __typeof (__redirect_memmove) __memmove_thunderx2 attribute_hidden;
 extern __typeof (__redirect_memmove) __memmove_falkor attribute_hidden;
 
 libc_ifunc (__libc_memmove,
@@ -37,7 +38,9 @@ libc_ifunc (__libc_memmove,
 	     ? __memmove_thunderx
 	     : (IS_FALKOR (midr) || IS_PHECDA (midr)
 		? __memmove_falkor
-		: __memmove_generic)));
+		: (IS_THUNDERX2 (midr) || IS_THUNDERX2PA (midr)
+		  ? __memmove_thunderx2
+		  : __memmove_generic))));
 
 # undef memmove
 strong_alias (__libc_memmove, memmove);