From f230c29b40cc36ce62387664be92c3cf94119efe Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 31 May 2012 14:19:30 -0700 Subject: Avoid performance penalty in sparc optimized memcpy/memset. fmovd clears the current exception field in the %fsr, fsrc2 does not and therefore runs more efficiently on some cpus. * sysdeps/sparc/sparc64/memcpy.S: Use fsrc2 to move 64-bit values between float registers. * sysdeps/sparc/sparc64/memset.S: Likewise. * sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S: Likewise. --- sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S | 72 +++++++++++------------ 1 file changed, 36 insertions(+), 36 deletions(-) (limited to 'sysdeps/sparc/sparc64/multiarch') diff --git a/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S index 0e9442de5f..fb815e5e57 100644 --- a/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S +++ b/sysdeps/sparc/sparc64/multiarch/memcpy-niagara2.S @@ -58,49 +58,49 @@ faligndata %x7, %x8, %f14; #define FREG_MOVE_1(x0) \ - fmovd %x0, %f0; + fsrc2 %x0, %f0; #define FREG_MOVE_2(x0, x1) \ - fmovd %x0, %f0; \ - fmovd %x1, %f2; + fsrc2 %x0, %f0; \ + fsrc2 %x1, %f2; #define FREG_MOVE_3(x0, x1, x2) \ - fmovd %x0, %f0; \ - fmovd %x1, %f2; \ - fmovd %x2, %f4; + fsrc2 %x0, %f0; \ + fsrc2 %x1, %f2; \ + fsrc2 %x2, %f4; #define FREG_MOVE_4(x0, x1, x2, x3) \ - fmovd %x0, %f0; \ - fmovd %x1, %f2; \ - fmovd %x2, %f4; \ - fmovd %x3, %f6; + fsrc2 %x0, %f0; \ + fsrc2 %x1, %f2; \ + fsrc2 %x2, %f4; \ + fsrc2 %x3, %f6; #define FREG_MOVE_5(x0, x1, x2, x3, x4) \ - fmovd %x0, %f0; \ - fmovd %x1, %f2; \ - fmovd %x2, %f4; \ - fmovd %x3, %f6; \ - fmovd %x4, %f8; + fsrc2 %x0, %f0; \ + fsrc2 %x1, %f2; \ + fsrc2 %x2, %f4; \ + fsrc2 %x3, %f6; \ + fsrc2 %x4, %f8; #define FREG_MOVE_6(x0, x1, x2, x3, x4, x5) \ - fmovd %x0, %f0; \ - fmovd %x1, %f2; \ - fmovd %x2, %f4; \ - fmovd %x3, %f6; \ - fmovd %x4, %f8; \ - fmovd %x5, %f10; + fsrc2 %x0, %f0; \ + fsrc2 %x1, %f2; \ + fsrc2 %x2, %f4; \ + fsrc2 %x3, %f6; \ + fsrc2 %x4, %f8; \ + fsrc2 %x5, %f10; #define FREG_MOVE_7(x0, x1, x2, x3, x4, x5, x6) \ - fmovd %x0, %f0; \ - fmovd %x1, %f2; \ - fmovd %x2, %f4; \ - fmovd %x3, %f6; \ - fmovd %x4, %f8; \ - fmovd %x5, %f10; \ - fmovd %x6, %f12; + fsrc2 %x0, %f0; \ + fsrc2 %x1, %f2; \ + fsrc2 %x2, %f4; \ + fsrc2 %x3, %f6; \ + fsrc2 %x4, %f8; \ + fsrc2 %x5, %f10; \ + fsrc2 %x6, %f12; #define FREG_MOVE_8(x0, x1, x2, x3, x4, x5, x6, x7) \ - fmovd %x0, %f0; \ - fmovd %x1, %f2; \ - fmovd %x2, %f4; \ - fmovd %x3, %f6; \ - fmovd %x4, %f8; \ - fmovd %x5, %f10; \ - fmovd %x6, %f12; \ - fmovd %x7, %f14; + fsrc2 %x0, %f0; \ + fsrc2 %x1, %f2; \ + fsrc2 %x2, %f4; \ + fsrc2 %x3, %f6; \ + fsrc2 %x4, %f8; \ + fsrc2 %x5, %f10; \ + fsrc2 %x6, %f12; \ + fsrc2 %x7, %f14; #define FREG_LOAD_1(base, x0) \ LOAD(ldd, base + 0x00, %x0) #define FREG_LOAD_2(base, x0, x1) \ -- cgit 1.4.1