From 88d85d4f001fac0fd7ef4e3d05ca8a7d50b0c98f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 28 Mar 2012 22:35:26 -0700 Subject: Optimize mempcpy on sparc. * sysdeps/sparc/sparc32/memcpy.S: Implement mempcpy using a stub that branches into memcpy. * sysdeps/sparc/sparc64/memcpy.S: Likewise. * sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S: Likewise. * sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S: Likewise. * sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S: Likewise. * sysdeps/sparc/sparc64/multiarch/memcpy.S: Add mempcpy multiarch bits. * sysdeps/sparc/sparc64/rtld-memcpy.c: Include generic mempcpy implementation too. * sysdeps/sparc/mempcpy.S: New file. --- sysdeps/sparc/sparc64/memcpy.S | 23 ++++++--- sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S | 10 +++- sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S | 10 +++- sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S | 6 +++ sysdeps/sparc/sparc64/multiarch/memcpy.S | 60 +++++++++++++++++++++++ sysdeps/sparc/sparc64/rtld-memcpy.c | 1 + 6 files changed, 99 insertions(+), 11 deletions(-) (limited to 'sysdeps/sparc/sparc64') diff --git a/sysdeps/sparc/sparc64/memcpy.S b/sysdeps/sparc/sparc64/memcpy.S index a77c4e441f..668ebecef1 100644 --- a/sysdeps/sparc/sparc64/memcpy.S +++ b/sysdeps/sparc/sparc64/memcpy.S @@ -374,19 +374,24 @@ ENTRY(__memcpy_large) mov %g4, %o0 END(__memcpy_large) +ENTRY(__mempcpy) + ba,pt %xcc, 210f + add %o0, %o2, %g4 +END(__mempcpy) + .align 32 ENTRY(memcpy) + mov %o0, %g4 /* IEU0 Group */ 210: #ifndef USE_BPR - srl %o2, 0, %o2 /* IEU1 Group */ + srl %o2, 0, %o2 /* IEU1 */ #endif brz,pn %o2, 209b /* CTI Group */ - mov %o0, %g4 /* IEU0 */ -218: cmp %o2, 15 /* IEU1 Group */ - bleu,pn %xcc, 208b /* CTI */ - cmp %o2, (64 * 6) /* IEU1 Group */ - bgeu,pn %xcc, 200b /* CTI */ - andcc %o0, 7, %g2 /* IEU1 Group */ +218: cmp %o2, 15 /* IEU1 */ + bleu,pn %xcc, 208b /* CTI Group */ + cmp %o2, (64 * 6) /* IEU1 */ + bgeu,pn %xcc, 200b /* CTI Group */ + andcc %o0, 7, %g2 /* IEU1 */ sub %o0, %o1, %g5 /* IEU0 */ andcc %g5, 3, %o5 /* IEU1 Group */ bne,pn %xcc, 212f /* CTI */ @@ -569,3 +574,7 @@ ENTRY(memcpy) END(memcpy) libc_hidden_builtin_def (memcpy) + +libc_hidden_def (__mempcpy) +weak_alias (__mempcpy, mempcpy) +libc_hidden_builtin_def (mempcpy) diff --git a/sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S index 8bada0eeec..6ba1b0c6e9 100644 --- a/sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S +++ b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara1.S @@ -43,13 +43,19 @@ .text +ENTRY(__mempcpy_niagara1) + ba,pt %XCC, 101f + add %o0, %o2, %g5 +END(__mempcpy_niagara1) + .align 32 ENTRY(__memcpy_niagara1) +100: /* %o0=dst, %o1=src, %o2=len */ + mov %o0, %g5 +101: # ifndef USE_BPR srl %o2, 0, %o2 # endif -100: /* %o0=dst, %o1=src, %o2=len */ - mov %o0, %g5 cmp %o2, 0 be,pn %XCC, 85f 218: or %o0, %o1, %o3 diff --git a/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S index ccbb0252f3..0e9442de5f 100644 --- a/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S +++ b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S @@ -145,13 +145,19 @@ .text +ENTRY(__mempcpy_niagara2) + ba,pt %XCC, 101f + add %o0, %o2, %g5 +END(__mempcpy_niagara2) + .align 32 ENTRY(__memcpy_niagara2) +100: /* %o0=dst, %o1=src, %o2=len */ + mov %o0, %g5 +101: # ifndef USE_BPR srl %o2, 0, %o2 # endif -100: /* %o0=dst, %o1=src, %o2=len */ - mov %o0, %g5 cmp %o2, 0 be,pn %XCC, 85f 218: or %o0, %o1, %o3 diff --git a/sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S b/sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S index 7e21665d5f..0784ba9b5d 100644 --- a/sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S +++ b/sysdeps/sparc/sparc64/multiarch/memcpy-ultra3.S @@ -38,6 +38,11 @@ .text +ENTRY(__mempcpy_ultra3) + ba,pt %XCC, 101f + add %o0, %o2, %g5 +END(__mempcpy_ultra3) + /* Special/non-trivial issues of this code: * * 1) %o5 is preserved from VISEntryHalf to VISExitHalf @@ -57,6 +62,7 @@ ENTRY(__memcpy_ultra3) 100: /* %o0=dst, %o1=src, %o2=len */ mov %o0, %g5 +101: cmp %o2, 0 be,pn %XCC, out 218: or %o0, %o1, %o3 diff --git a/sysdeps/sparc/sparc64/multiarch/memcpy.S b/sysdeps/sparc/sparc64/multiarch/memcpy.S index 0f3751e5e7..20c72d9bbd 100644 --- a/sysdeps/sparc/sparc64/multiarch/memcpy.S +++ b/sysdeps/sparc/sparc64/multiarch/memcpy.S @@ -72,12 +72,72 @@ ENTRY(memcpy) mov %o1, %o0 END(memcpy) +ENTRY(__mempcpy) + .type __mempcpy, @gnu_indirect_function +# ifdef SHARED + SETUP_PIC_REG_LEAF(o3, o5) +# endif + andcc %o0, HWCAP_SPARC_N2, %g0 + be 1f + andcc %o0, HWCAP_SPARC_BLKINIT, %g0 +# ifdef SHARED + sethi %gdop_hix22(__mempcpy_niagara2), %o1 + xor %o1, %gdop_lox10(__mempcpy_niagara2), %o1 +# else + set __mempcpy_niagara2, %o1 +# endif + ba 10f + nop +1: be 1f + andcc %o0, HWCAP_SPARC_ULTRA3, %g0 +# ifdef SHARED + sethi %gdop_hix22(__mempcpy_niagara1), %o1 + xor %o1, %gdop_lox10(__mempcpy_niagara1), %o1 +# else + set __mempcpy_niagara1, %o1 +# endif + ba 10f + nop +1: be 9f + nop +# ifdef SHARED + sethi %gdop_hix22(__mempcpy_ultra3), %o1 + xor %o1, %gdop_lox10(__mempcpy_ultra3), %o1 +# else + set __mempcpy_ultra3, %o1 +# endif + ba 10f + nop +9: +# ifdef SHARED + sethi %gdop_hix22(__mempcpy_ultra1), %o1 + xor %o1, %gdop_lox10(__mempcpy_ultra1), %o1 +# else + set __mempcpy_ultra1, %o1 +# endif +10: +# ifdef SHARED + add %o3, %o1, %o1 +# endif + retl + mov %o1, %o0 +END(__mempcpy) + libc_hidden_builtin_def (memcpy) +libc_hidden_def (__mempcpy) +weak_alias (__mempcpy, mempcpy) +libc_hidden_builtin_def (mempcpy) + #undef libc_hidden_builtin_def #define libc_hidden_builtin_def(name) +#undef weak_alias +#define weak_alias(x, y) +#undef libc_hidden_def +#define libc_hidden_def(name) #define memcpy __memcpy_ultra1 +#define __mempcpy __mempcpy_ultra1 #endif diff --git a/sysdeps/sparc/sparc64/rtld-memcpy.c b/sysdeps/sparc/sparc64/rtld-memcpy.c index 5e50e6effe..b1b06479d2 100644 --- a/sysdeps/sparc/sparc64/rtld-memcpy.c +++ b/sysdeps/sparc/sparc64/rtld-memcpy.c @@ -1 +1,2 @@ #include +#include -- cgit 1.4.1