From b7060acfe8e80fe832e3227020d1127f2d971d1c Mon Sep 17 00:00:00 2001 From: Adhemerval Zanella Netto Date: Thu, 21 Jul 2022 10:05:05 -0300 Subject: powerpc64: Add optimized chacha20 It adds vectorized ChaCha20 implementation based on libgcrypt cipher/chacha20-ppc.c. It targets POWER8 and it is used on default for LE. On a POWER8 it shows the following improvements (using formatted bench-arc4random data): POWER8 GENERIC MB/s ----------------------------------------------- arc4random [single-thread] 138.77 arc4random_buf(16) [single-thread] 174.36 arc4random_buf(32) [single-thread] 228.11 arc4random_buf(48) [single-thread] 252.31 arc4random_buf(64) [single-thread] 270.11 arc4random_buf(80) [single-thread] 278.97 arc4random_buf(96) [single-thread] 287.78 arc4random_buf(112) [single-thread] 291.92 arc4random_buf(128) [single-thread] 295.25 POWER8 MB/s ----------------------------------------------- arc4random [single-thread] 198.06 arc4random_buf(16) [single-thread] 278.79 arc4random_buf(32) [single-thread] 448.89 arc4random_buf(48) [single-thread] 551.09 arc4random_buf(64) [single-thread] 646.12 arc4random_buf(80) [single-thread] 698.04 arc4random_buf(96) [single-thread] 756.06 arc4random_buf(112) [single-thread] 784.12 arc4random_buf(128) [single-thread] 808.04 ----------------------------------------------- Checked on powerpc64-linux-gnu and powerpc64le-linux-gnu. Reviewed-by: Paul E. Murphy --- sysdeps/powerpc/powerpc64/be/multiarch/Makefile | 4 +++ .../powerpc/powerpc64/be/multiarch/chacha20-ppc.c | 1 + .../powerpc/powerpc64/be/multiarch/chacha20_arch.h | 42 ++++++++++++++++++++++ 3 files changed, 47 insertions(+) create mode 100644 sysdeps/powerpc/powerpc64/be/multiarch/Makefile create mode 100644 sysdeps/powerpc/powerpc64/be/multiarch/chacha20-ppc.c create mode 100644 sysdeps/powerpc/powerpc64/be/multiarch/chacha20_arch.h (limited to 'sysdeps/powerpc/powerpc64/be/multiarch') diff --git a/sysdeps/powerpc/powerpc64/be/multiarch/Makefile b/sysdeps/powerpc/powerpc64/be/multiarch/Makefile new file mode 100644 index 0000000000..8c75165f7f --- /dev/null +++ b/sysdeps/powerpc/powerpc64/be/multiarch/Makefile @@ -0,0 +1,4 @@ +ifeq ($(subdir),stdlib) +sysdep_routines += chacha20-ppc +CFLAGS-chacha20-ppc.c += -mcpu=power8 +endif diff --git a/sysdeps/powerpc/powerpc64/be/multiarch/chacha20-ppc.c b/sysdeps/powerpc/powerpc64/be/multiarch/chacha20-ppc.c new file mode 100644 index 0000000000..cf9e735326 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/be/multiarch/chacha20-ppc.c @@ -0,0 +1 @@ +#include diff --git a/sysdeps/powerpc/powerpc64/be/multiarch/chacha20_arch.h b/sysdeps/powerpc/powerpc64/be/multiarch/chacha20_arch.h new file mode 100644 index 0000000000..08494dc045 --- /dev/null +++ b/sysdeps/powerpc/powerpc64/be/multiarch/chacha20_arch.h @@ -0,0 +1,42 @@ +/* PowerPC optimization for ChaCha20. + Copyright (C) 2022 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + +unsigned int __chacha20_power8_blocks4 (uint32_t *state, uint8_t *dst, + const uint8_t *src, size_t nblks) + attribute_hidden; + +static void +chacha20_crypt (uint32_t *state, uint8_t *dst, + const uint8_t *src, size_t bytes) +{ + _Static_assert (CHACHA20_BUFSIZE % 4 == 0, + "CHACHA20_BUFSIZE not multiple of 4"); + _Static_assert (CHACHA20_BUFSIZE >= CHACHA20_BLOCK_SIZE * 4, + "CHACHA20_BUFSIZE < CHACHA20_BLOCK_SIZE * 4"); + + unsigned long int hwcap = GLRO(dl_hwcap); + unsigned long int hwcap2 = GLRO(dl_hwcap2); + if (hwcap2 & PPC_FEATURE2_ARCH_2_07 && hwcap & PPC_FEATURE_HAS_ALTIVEC) + __chacha20_power8_blocks4 (state, dst, src, + CHACHA20_BUFSIZE / CHACHA20_BLOCK_SIZE); + else + chacha20_crypt_generic (state, dst, src, bytes); +} -- cgit 1.4.1