about summary refs log tree commit diff
path: root/stdlib
diff options
context:
space:
mode:
authorAdhemerval Zanella Netto <adhemerval.zanella@linaro.org>2022-07-21 10:05:02 -0300
committerAdhemerval Zanella <adhemerval.zanella@linaro.org>2022-07-22 11:58:27 -0300
commit4c128c7823e5a19058589cfac42aa96de3e15430 (patch)
treec214ca1a59aaa09294629a5b37d303bb639167b5 /stdlib
parent5d765ada01d140d8d1ecf94953a4751593af720d (diff)
downloadglibc-4c128c7823e5a19058589cfac42aa96de3e15430.tar.gz
glibc-4c128c7823e5a19058589cfac42aa96de3e15430.tar.xz
glibc-4c128c7823e5a19058589cfac42aa96de3e15430.zip
aarch64: Add optimized chacha20
It adds vectorized ChaCha20 implementation based on libgcrypt
cipher/chacha20-aarch64.S.  It is used as default and only
little-endian is supported (BE uses generic code).

As for generic implementation, the last step that XOR with the
input is omited.  The final state register clearing is also
omitted.

On a virtualized Linux on Apple M1 it shows the following
improvements (using formatted bench-arc4random data):

GENERIC                                    MB/s
-----------------------------------------------
arc4random [single-thread]               380.89
arc4random_buf(16) [single-thread]       500.73
arc4random_buf(32) [single-thread]       552.61
arc4random_buf(48) [single-thread]       566.82
arc4random_buf(64) [single-thread]       574.01
arc4random_buf(80) [single-thread]       581.02
arc4random_buf(96) [single-thread]       591.19
arc4random_buf(112) [single-thread]      592.29
arc4random_buf(128) [single-thread]      596.43
-----------------------------------------------

OPTIMIZED                                  MB/s
-----------------------------------------------
arc4random [single-thread]               569.60
arc4random_buf(16) [single-thread]       825.78
arc4random_buf(32) [single-thread]       987.03
arc4random_buf(48) [single-thread]      1042.39
arc4random_buf(64) [single-thread]      1075.50
arc4random_buf(80) [single-thread]      1094.68
arc4random_buf(96) [single-thread]      1130.16
arc4random_buf(112) [single-thread]     1129.58
arc4random_buf(128) [single-thread]     1137.91
-----------------------------------------------

Checked on aarch64-linux-gnu.
Diffstat (limited to 'stdlib')
-rw-r--r--stdlib/chacha20.c8
1 files changed, 6 insertions, 2 deletions
diff --git a/stdlib/chacha20.c b/stdlib/chacha20.c
index c47b8418f2..2745a81315 100644
--- a/stdlib/chacha20.c
+++ b/stdlib/chacha20.c
@@ -165,8 +165,9 @@ chacha20_block (uint32_t *state, uint8_t *dst, const uint8_t *src)
 }
 
 static void
-chacha20_crypt (uint32_t *state, uint8_t *dst, const uint8_t *src,
-		size_t bytes)
+__attribute_maybe_unused__
+chacha20_crypt_generic (uint32_t *state, uint8_t *dst, const uint8_t *src,
+			size_t bytes)
 {
   while (bytes >= CHACHA20_BLOCK_SIZE)
     {
@@ -185,3 +186,6 @@ chacha20_crypt (uint32_t *state, uint8_t *dst, const uint8_t *src,
       explicit_bzero (stream, sizeof stream);
     }
 }
+
+/* Get the architecture optimized version.  */
+#include <chacha20_arch.h>