From 3baddb72a4181e05d0d279a6b345635641e13a18 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 5 Oct 2012 14:46:47 -0700 Subject: Add Niagara-4 optimized memset/bzero implementation. * sysdeps/sparc/sparc64/multiarch/memset-niagara4.S: New file. * sysdeps/sparc/sparc32/sparcv9/multiarch/memset-niagara4.S: New file. * sysdeps/sparc/sparc64/multiarch/Makefile: Add to sysdep_routines. * sysdeps/sparc/sparc32/sparcv9/multiarch/Makefile: Likewise. * sysdeps/sparc/sparc64/multiarch/memset.S: Use Niagara-4 memset and bzero when HWCAP_SPARC_CRYPTO is present. --- sysdeps/sparc/sparc64/multiarch/Makefile | 2 +- sysdeps/sparc/sparc64/multiarch/memset-niagara4.S | 124 ++++++++++++++++++++++ sysdeps/sparc/sparc64/multiarch/memset.S | 30 +++++- 3 files changed, 151 insertions(+), 5 deletions(-) create mode 100644 sysdeps/sparc/sparc64/multiarch/memset-niagara4.S (limited to 'sysdeps/sparc/sparc64') diff --git a/sysdeps/sparc/sparc64/multiarch/Makefile b/sysdeps/sparc/sparc64/multiarch/Makefile index 377dfc7330..7358bdb167 100644 --- a/sysdeps/sparc/sparc64/multiarch/Makefile +++ b/sysdeps/sparc/sparc64/multiarch/Makefile @@ -1,4 +1,4 @@ ifeq ($(subdir),string) sysdep_routines += memcpy-ultra3 memcpy-niagara1 memcpy-niagara2 \ - memset-niagara1 memcpy-niagara4 + memset-niagara1 memcpy-niagara4 memset-niagara4 endif diff --git a/sysdeps/sparc/sparc64/multiarch/memset-niagara4.S b/sysdeps/sparc/sparc64/multiarch/memset-niagara4.S new file mode 100644 index 0000000000..c5a2f1befd --- /dev/null +++ b/sysdeps/sparc/sparc64/multiarch/memset-niagara4.S @@ -0,0 +1,124 @@ +/* Set a block of memory to some byte value. For SUN4V Niagara-4. + Copyright (C) 2012 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by David S. Miller (davem@davemloft.net) + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include + +#define ASI_BLK_INIT_QUAD_LDD_P 0xe2 + +#if !defined NOT_IN_libc + + .register %g2, #scratch + .register %g3, #scratch + + .text + .align 32 + +ENTRY(__memset_niagara4) + andcc %o1, 0xff, %o4 + be,pt %icc, 1f + mov %o2, %o1 + sllx %o4, 8, %g1 + or %g1, %o4, %o2 + sllx %o2, 16, %g1 + or %g1, %o2, %o2 + sllx %o2, 32, %g1 + ba,pt %icc, 1f + or %g1, %o2, %o4 +END(__memset_niagara4) + + .align 32 +ENTRY(__bzero_niagara4) + clr %o4 +1: cmp %o1, 16 + ble %icc, .Ltiny + mov %o0, %o3 + sub %g0, %o0, %g1 + and %g1, 0x7, %g1 + brz,pt %g1, .Laligned8 + sub %o1, %g1, %o1 +1: stb %o4, [%o0 + 0x00] + subcc %g1, 1, %g1 + bne,pt %icc, 1b + add %o0, 1, %o0 +.Laligned8: + cmp %o1, 64 + (64 - 8) + ble .Lmedium + sub %g0, %o0, %g1 + andcc %g1, (64 - 1), %g1 + brz,pn %g1, .Laligned64 + sub %o1, %g1, %o1 +1: stx %o4, [%o0 + 0x00] + subcc %g1, 8, %g1 + bne,pt %icc, 1b + add %o0, 0x8, %o0 +.Laligned64: + andn %o1, 64 - 1, %g1 + sub %o1, %g1, %o1 + brnz,pn %o4, .Lnon_bzero_loop + mov 0x20, %g2 +1: stxa %o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P + subcc %g1, 0x40, %g1 + stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P + bne,pt %icc, 1b + add %o0, 0x40, %o0 +.Lpostloop: + cmp %o1, 8 + bl,pn %icc, .Ltiny + membar #StoreStore|#StoreLoad +.Lmedium: + andn %o1, 0x7, %g1 + sub %o1, %g1, %o1 +1: stx %o4, [%o0 + 0x00] + subcc %g1, 0x8, %g1 + bne,pt %icc, 1b + add %o0, 0x08, %o0 + andcc %o1, 0x4, %g1 + be,pt %icc, .Ltiny + sub %o1, %g1, %o1 + stw %o4, [%o0 + 0x00] + add %o0, 0x4, %o0 +.Ltiny: + cmp %o1, 0 + be,pn %icc, .Lexit +1: subcc %o1, 1, %o1 + stb %o4, [%o0 + 0x00] + bne,pt %icc, 1b + add %o0, 1, %o0 +.Lexit: + retl + mov %o3, %o0 +.Lnon_bzero_loop: + mov 0x08, %g3 + mov 0x28, %o5 +1: stxa %o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P + subcc %g1, 0x40, %g1 + stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P + stxa %o4, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P + stxa %o4, [%o0 + %o5] ASI_BLK_INIT_QUAD_LDD_P + add %o0, 0x10, %o0 + stxa %o4, [%o0 + %g0] ASI_BLK_INIT_QUAD_LDD_P + stxa %o4, [%o0 + %g2] ASI_BLK_INIT_QUAD_LDD_P + stxa %o4, [%o0 + %g3] ASI_BLK_INIT_QUAD_LDD_P + stxa %o4, [%o0 + %o5] ASI_BLK_INIT_QUAD_LDD_P + bne,pt %icc, 1b + add %o0, 0x30, %o0 + ba,a,pt %icc, .Lpostloop +END(__bzero_niagara4) + +#endif diff --git a/sysdeps/sparc/sparc64/multiarch/memset.S b/sysdeps/sparc/sparc64/multiarch/memset.S index 26cc669d6f..8b3faee915 100644 --- a/sysdeps/sparc/sparc64/multiarch/memset.S +++ b/sysdeps/sparc/sparc64/multiarch/memset.S @@ -26,8 +26,19 @@ ENTRY(memset) # ifdef SHARED SETUP_PIC_REG_LEAF(o3, o5) # endif - andcc %o0, HWCAP_SPARC_BLKINIT, %g0 - be 9f + set HWCAP_SPARC_CRYPTO, %o1 + andcc %o0, %o1, %g0 + be 1f + andcc %o0, HWCAP_SPARC_BLKINIT, %g0 +# ifdef SHARED + sethi %gdop_hix22(__memset_niagara4), %o1 + xor %o1, %gdop_lox10(__memset_niagara4), %o1 +# else + set __memset_niagara4, %o1 +# endif + ba 10f + nop +1: be 9f nop # ifdef SHARED sethi %gdop_hix22(__memset_niagara1), %o1 @@ -57,8 +68,19 @@ ENTRY(__bzero) # ifdef SHARED SETUP_PIC_REG_LEAF(o3, o5) # endif - andcc %o0, HWCAP_SPARC_BLKINIT, %g0 - be 9f + set HWCAP_SPARC_CRYPTO, %o1 + andcc %o0, %o1, %g0 + be 1f + andcc %o0, HWCAP_SPARC_BLKINIT, %g0 +# ifdef SHARED + sethi %gdop_hix22(__bzero_niagara4), %o1 + xor %o1, %gdop_lox10(__bzero_niagara4), %o1 +# else + set __bzero_niagara4, %o1 +# endif + ba 10f + nop +1: be 9f nop # ifdef SHARED sethi %gdop_hix22(__bzero_niagara1), %o1 -- cgit 1.4.1