about summary refs log tree commit diff
path: root/sysdeps
diff options
context:
space:
mode:
authorFeng Xue <feng.xue@amperecomputing.com>2019-08-14 10:48:05 +0800
committerFeng Xue <feng.xue@amperecomputing.com>2019-08-14 10:58:21 +0800
commitb68fabfbbc5a4178338e167f5517787b76eb5962 (patch)
treee1b15af9e5b6397b16dc9021437d96ca5e6037f7 /sysdeps
parentc3ce62cc0bd6e8a33629e2aabb7783a322e9189c (diff)
downloadglibc-b68fabfbbc5a4178338e167f5517787b76eb5962.tar.gz
glibc-b68fabfbbc5a4178338e167f5517787b76eb5962.tar.xz
glibc-b68fabfbbc5a4178338e167f5517787b76eb5962.zip
aarch64: Disable using DC ZVA in emag memset
    * sysdeps/aarch64/multiarch/memset_base64.S (DC_ZVA_THRESHOLD):
    Disable DC ZVA code if this macro is defined as zero.
    * sysdeps/aarch64/multiarch/memset_emag.S (DC_ZVA_THRESHOLD):
    Change to zero to disable using DC ZVA.
Diffstat (limited to 'sysdeps')
-rw-r--r--sysdeps/aarch64/multiarch/memset_base64.S12
-rw-r--r--sysdeps/aarch64/multiarch/memset_emag.S12
2 files changed, 17 insertions, 7 deletions
diff --git a/sysdeps/aarch64/multiarch/memset_base64.S b/sysdeps/aarch64/multiarch/memset_base64.S
index 9a623259b9..c0cccbac64 100644
--- a/sysdeps/aarch64/multiarch/memset_base64.S
+++ b/sysdeps/aarch64/multiarch/memset_base64.S
@@ -23,6 +23,7 @@
 # define MEMSET __memset_base64
 #endif
 
+/* To disable DC ZVA, set this threshold to 0. */
 #ifndef DC_ZVA_THRESHOLD
 # define DC_ZVA_THRESHOLD 512
 #endif
@@ -91,11 +92,12 @@ L(set96):
 	.p2align 4
 L(set_long):
 	stp	val, val, [dstin]
+	bic	dst, dstin, 15
+#if DC_ZVA_THRESHOLD
 	cmp	count, DC_ZVA_THRESHOLD
 	ccmp	val, 0, 0, cs
-	bic	dst, dstin, 15
 	b.eq	L(zva_64)
-
+#endif
 	/* Small-size or non-zero memset does not use DC ZVA. */
 	sub	count, dstend, dst
 
@@ -105,7 +107,11 @@ L(set_long):
 	 * count is less than 33 bytes, so as to bypass 2 unneccesary stps.
 	 */
 	sub	count, count, 64+16+1
+
+#if DC_ZVA_THRESHOLD
+	/* Align loop on 16-byte boundary, this might be friendly to i-cache. */
 	nop
+#endif
 
 1:	stp	val, val, [dst, 16]
 	stp	val, val, [dst, 32]
@@ -121,6 +127,7 @@ L(set_long):
 	stp	val, val, [dstend, -16]
 	ret
 
+#if DC_ZVA_THRESHOLD
 	.p2align 3
 L(zva_64):
 	stp	val, val, [dst, 16]
@@ -173,6 +180,7 @@ L(zva_64):
 1:	stp	val, val, [dstend, -32]
 	stp	val, val, [dstend, -16]
 	ret
+#endif
 
 END (MEMSET)
 libc_hidden_builtin_def (MEMSET)
diff --git a/sysdeps/aarch64/multiarch/memset_emag.S b/sysdeps/aarch64/multiarch/memset_emag.S
index 1c1fabc624..c2aed62fe5 100644
--- a/sysdeps/aarch64/multiarch/memset_emag.S
+++ b/sysdeps/aarch64/multiarch/memset_emag.S
@@ -21,12 +21,14 @@
 # define MEMSET __memset_emag
 
 /*
- * Using dc zva to zero memory does not produce better performance if
+ * Using DC ZVA to zero memory does not produce better performance if
  * memory size is not very large, especially when there are multiple
- * processes/threads contending memory/cache. Here we use a somewhat
- * large threshold to trigger usage of dc zva.
-*/
-# define DC_ZVA_THRESHOLD 1024
+ * processes/threads contending memory/cache. Here we set threshold to
+ * zero to disable using DC ZVA, which is good for multi-process/thread
+ * workloads.
+ */
+
+# define DC_ZVA_THRESHOLD 0
 
 # include "./memset_base64.S"
 #endif