about summary refs log tree commit diff
path: root/sysdeps
diff options
context:
space:
mode:
authorSzabolcs Nagy <szabolcs.nagy@arm.com>2021-02-09 17:56:02 +0000
committerSzabolcs Nagy <szabolcs.nagy@arm.com>2021-03-26 11:03:06 +0000
commit23fd760add29042fd6f8fead0f6a2f32631d41aa (patch)
treea7b15b0d3d507153ba3f14ab14c4ca3d7d5438fd /sysdeps
parent383bc2402879080ed65b7f68789f5ebaf994f896 (diff)
downloadglibc-23fd760add29042fd6f8fead0f6a2f32631d41aa.tar.gz
glibc-23fd760add29042fd6f8fead0f6a2f32631d41aa.tar.xz
glibc-23fd760add29042fd6f8fead0f6a2f32631d41aa.zip
aarch64: Optimize __libc_mtag_tag_region
This is a target hook for memory tagging, the original was a naive
implementation. The optimized version relies on "dc gva" to tag 64
bytes at a time for large allocations and optimizes small cases without
adding too many branches. This was not benchmarked on real cpu, but
expected to be faster than the naive implementation.
Diffstat (limited to 'sysdeps')
-rw-r--r--sysdeps/aarch64/__mtag_tag_region.S98
1 files changed, 80 insertions, 18 deletions
diff --git a/sysdeps/aarch64/__mtag_tag_region.S b/sysdeps/aarch64/__mtag_tag_region.S
index 9a8a3ffb60..cae0c8f121 100644
--- a/sysdeps/aarch64/__mtag_tag_region.S
+++ b/sysdeps/aarch64/__mtag_tag_region.S
@@ -20,32 +20,94 @@
 
 #ifdef USE_MTAG
 
-/* Use the same register names and assignments as memset.  */
-
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64, MTE, LP64 ABI.
+ *
+ * Interface contract:
+ * Address is 16 byte aligned and size is multiple of 16.
+ * Returns the passed pointer.
+ * The memory region may remain untagged if tagging is not enabled.
+ */
 	.arch armv8.5-a
 	.arch_extension memtag
 
-/* NB, only supported on variants with 64-bit pointers.  */
+#define dstin	x0
+#define count	x1
+#define dst	x2
+#define dstend	x3
+#define tmp	x4
+#define zva_val	x4
+
+ENTRY (__libc_mtag_tag_region)
+	PTR_ARG (0)
+	SIZE_ARG (1)
+
+	add	dstend, dstin, count
 
-/* FIXME: This is a minimal implementation.  We could do better than
-   this for larger values of COUNT.  */
+	cmp	count, 96
+	b.hi	L(set_long)
 
-#define dstin x0
-#define count x1
-#define dst   x2
+	tbnz	count, 6, L(set96)
 
-ENTRY_ALIGN(__libc_mtag_tag_region, 6)
+	/* Set 0, 16, 32, or 48 bytes.  */
+	lsr	tmp, count, 5
+	add	tmp, dstin, tmp, lsl 4
+	cbz     count, L(end)
+	stg	dstin, [dstin]
+	stg	dstin, [tmp]
+	stg	dstin, [dstend, -16]
+L(end):
+	ret
+
+	.p2align 4
+	/* Set 64..96 bytes.  Write 64 bytes from the start and
+	   32 bytes from the end.  */
+L(set96):
+	st2g	dstin, [dstin]
+	st2g	dstin, [dstin, 32]
+	st2g	dstin, [dstend, -32]
+	ret
 
-	mov	dst, dstin
-L(loop):
-	stg	dst, [dst], #16
-	subs	count, count, 16
-	bne	L(loop)
-#if 0
-	/* This is not currently needed, since for now we are only called
-	   to tag memory that is taggable.  */
-	ldg	dstin, [dstin] // Recover the tag created (might be untagged).
+	.p2align 4
+	/* Size is > 96 bytes.  */
+L(set_long):
+	cmp	count, 160
+	b.lo	L(no_zva)
+
+#ifndef SKIP_ZVA_CHECK
+	mrs	zva_val, dczid_el0
+	and	zva_val, zva_val, 31
+	cmp	zva_val, 4		/* ZVA size is 64 bytes.  */
+	b.ne	L(no_zva)
 #endif
+	st2g	dstin, [dstin]
+	st2g	dstin, [dstin, 32]
+	bic	dst, dstin, 63
+	sub	count, dstend, dst	/* Count is now 64 too large.  */
+	sub	count, count, 128	/* Adjust count and bias for loop.  */
+
+	.p2align 4
+L(zva_loop):
+	add	dst, dst, 64
+	dc	gva, dst
+	subs	count, count, 64
+	b.hi	L(zva_loop)
+	st2g	dstin, [dstend, -64]
+	st2g	dstin, [dstend, -32]
 	ret
+
+L(no_zva):
+	sub	dst, dstin, 32		/* Dst is biased by -32.  */
+	sub	count, count, 64	/* Adjust count for loop.  */
+L(no_zva_loop):
+	st2g	dstin, [dst, 32]
+	st2g	dstin, [dst, 64]!
+	subs	count, count, 64
+	b.hi	L(no_zva_loop)
+	st2g	dstin, [dstend, -64]
+	st2g	dstin, [dstend, -32]
+	ret
+
 END (__libc_mtag_tag_region)
 #endif /* USE_MTAG */