1 files changed, 172 insertions, 144 deletions
diff --git a/sysdeps/ia64/fpu/s_roundl.S b/sysdeps/ia64/fpu/s_roundl.S
index 79dff00c06..b30f590917 100644
--- a/sysdeps/ia64/fpu/s_roundl.S
+++ b/sysdeps/ia64/fpu/s_roundl.S
@@ -1,10 +1,11 @@
 .file "roundl.s"
 
-
-// Copyright (c) 2000 - 2003, Intel Corporation
+// Copyright (C) 2000, 2001, Intel Corporation
 // All rights reserved.
-//
-// Contributed 2000 by the Intel Numerics Group, Intel Corporation
+// 
+// Contributed 10/25/2000 by John Harrison, Cristina Iordache, Ted Kubaska,
+// Bob Norin, Tom Rowan, Shane Story, and Ping Tak Peter Tang of the
+// Computational Software Lab, Intel Corporation.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,202 +21,229 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+// 
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at
-// http://www.intel.com/software/products/opensource/libraries/num.htm.
+// problem reports or change requests be submitted to it directly at 
+// http://developer.intel.com/opensource.
 //
 // History
 //==============================================================
-// 10/25/00 Initial version
-// 06/14/01 Changed cmp to an equivalent form
-// 05/20/02 Cleaned up namespace and sf0 syntax
-// 01/20/03 Improved performance and reduced code size
-// 04/18/03 Eliminate possible WAW dependency warning
+// 10/25/2000: Created
 //==============================================================
-
+//
 // API
 //==============================================================
 // long double roundl(long double x)
-//==============================================================
-
-// general input registers:
-// r14 - r19
+//
 
-rSignexp   = r14
-rExp       = r15
-rExpMask   = r16
-rBigexp    = r17
-rExpHalf   = r18
-rExpMHalf  = r19
+#include "libm_support.h"
 
-// floating-point registers:
-// f8 - f13
+// general input registers:  
+//
+roundl_GR_half      = r14
+roundl_GR_big       = r15
+roundl_GR_expmask   = r16
+roundl_GR_signexp   = r17
+roundl_GR_exp       = r18
+roundl_GR_expdiff   = r19
+
+// predicate registers used: 
+// p6 - p10
 
-fXtruncInt = f9
-fNormX     = f10
-fHalf      = f11
-fMHalf     = f12
-fRem       = f13
+// floating-point registers used: 
 
-// predicate registers used:
-// p6 - p10
+ROUNDL_NORM_f8        = f9                        
+ROUNDL_TRUNC_f8       = f10
+ROUNDL_RINT_f8        = f11
+ROUNDL_FLOAT_TRUNC_f8 = f12
+ROUNDL_FLOAT_RINT_f8  = f13
+ROUNDL_REMAINDER      = f14
+ROUNDL_HALF           = f15
 
 // Overview of operation
 //==============================================================
+
 // long double roundl(long double x)
-// Return an integer value (represented as a long double) that is x
-// rounded to nearest integer, halfway cases rounded away from
-// zero.
+// Return an integer value (represented as a long double) that is x 
+// rounded to nearest integer, halfway cases rounded away from 
+// zero. 
 //  if x>0   result = trunc(x+0.5)
 //  if x<0   result = trunc(x-0.5)
-//
-//==============================================================
+// *******************************************************************************
+
+// Set denormal flag for denormal input and
+// and take denormal fault if necessary.
 
-// double_extended
-// if the exponent is > 1003e => 3F(true) = 63(decimal)
-// we have a significand of 64 bits 1.63-bits.
-// If we multiply by 2^63, we no longer have a fractional part
-// So input is an integer value already.
+// If x is NAN, ZERO, INFINITY, or >= 2^63 then return
 
-// double
-// if the exponent is >= 10033 => 34(true) = 52(decimal)
-// 34 + 3ff = 433
-// we have a significand of 53 bits 1.52-bits. (implicit 1)
-// If we multiply by 2^52, we no longer have a fractional part
-// So input is an integer value already.
+// qnan snan inf norm     unorm 0 -+
+// 1    1    1   0        0     1 11     0xe7
 
-// single
-// if the exponent is > 10016 => 17(true) = 23(decimal)
-// we have a significand of 24 bits 1.23-bits. (implicit 1)
-// If we multiply by 2^23, we no longer have a fractional part
-// So input is an integer value already.
 
+.align 32
+.global roundl#
 
 .section .text
-GLOBAL_LIBM_ENTRY(roundl)
+.proc  roundl#
+.align 32
+
 
+roundl: 
+	
+// Get exponent for +0.5
+// Truncate x to integer
 { .mfi
-      getf.exp         rSignexp  = f8        // Get signexp, recompute if unorm
-      fcvt.fx.trunc.s1 fXtruncInt  = f8      // Convert to int in significand
-      addl             rBigexp = 0x1003e, r0 // Set exponent at which is integer
-}
+      addl           roundl_GR_half  = 0x0fffe, r0
+      fcvt.fx.trunc.s1     ROUNDL_TRUNC_f8 = f8
+      nop.i 999
+}
+	
+// Get signexp of x
+// Normalize input
+// Form exponent mask
 { .mfi
-      mov              rExpHalf    = 0x0FFFE // Form sign and exponent of 0.5
-      fnorm.s1         fNormX  = f8          // Normalize input
-      mov              rExpMask    = 0x1FFFF // Form exponent mask
+      getf.exp  roundl_GR_signexp = f8
+      fnorm     ROUNDL_NORM_f8 = f8                        
+      addl      roundl_GR_expmask  = 0x1ffff, r0 ;;
 }
-;;
 
-{ .mmf
-      setf.exp         fHalf = rExpHalf      // Form 0.5
-      mov              rExpMHalf   = 0x2FFFE // Form sign and exponent of -0.5
-      fclass.m         p7,p0 = f8, 0x0b      // Test x unorm
+// Form +0.5
+// Round x to integer
+{ .mfi
+      setf.exp    ROUNDL_HALF  = roundl_GR_half                      
+      fcvt.fx.s1  ROUNDL_RINT_f8 = f8
+      nop.i 999 ;;
 }
-;;
-
-{ .mfb
-      setf.exp         fMHalf = rExpMHalf    // Form -0.5
-      fclass.m         p6,p0 = f8, 0x1e3     // Test x natval, nan, inf
-(p7)  br.cond.spnt     ROUND_UNORM           // Branch if x unorm
+// Get exp of x
+// Test for NAN, INF, ZERO
+// Get exponent at which input has no fractional part
+{ .mfi
+      and         roundl_GR_exp = roundl_GR_expmask, roundl_GR_signexp
+      fclass.m    p8,p9 = f8,0xe7
+      addl        roundl_GR_big  = 0x1003e, r0 ;;
+}
+
+// Get exp-bigexp
+// If exp is so big there is no fractional part, then turn on p8, off p9
+{ .mmi
+      sub    roundl_GR_expdiff = roundl_GR_exp, roundl_GR_big ;;
+#ifdef _LIBC
+(p9)  cmp.lt.or.andcm  p8,p9 = r0, roundl_GR_expdiff
+#else
+(p9)  cmp.ge.or.andcm  p8,p9 = roundl_GR_expdiff, r0
+#endif
+      nop.i 999 ;;
+}
+     
+// Set p6 if x<0, else set p7
+{ .mfi
+      nop.m 999
+(p9)  fcmp.lt.unc  p6,p7 = f8,f0
+      nop.i 999
 }
-;;
-
-ROUND_COMMON:
-// Return here from ROUND_UNORM
+	
+// If NAN, INF, ZERO, or no fractional part, result is just normalized input
 { .mfi
-      nop.m            0
-      fcmp.lt.s1       p8,p9 = f8, f0        // Test if x < 0
-      nop.i            0
+      nop.m 999
+(p8)  fnorm.s0  f8 = f8
+      nop.i 999 ;;
 }
-{ .mfb
-      and              rExp = rSignexp, rExpMask // Get biased exponent
-(p6)  fma.s0           f8 = f8, f1, f0       // Result if x natval, nan, inf
-(p6)  br.ret.spnt      b0                    // Exit if x natval, nan, inf
+
+// Float the truncated integer
+{ .mfi
+      nop.m 999
+(p9)  fcvt.xf     ROUNDL_FLOAT_TRUNC_f8 = ROUNDL_TRUNC_f8
+      nop.i 999 ;;
 }
-;;
 
+// Float the rounded integer to get preliminary result
 { .mfi
-      cmp.lt           p6,p0 = rExp, rExpHalf // Is |x| < 0.5?
-      fcvt.xf          f8 = fXtruncInt        // Pre-Result if 0.5 <= |x| < 2^63
-      cmp.ge           p7,p0 = rExp, rBigexp  // Is |x| >= 2^63?
+      nop.m 999
+(p9)  fcvt.xf     ROUNDL_FLOAT_RINT_f8 = ROUNDL_RINT_f8
+      nop.i 999 ;;
+}
+
+// If x<0 and the difference of the truncated input minus the input is 0.5
+//    then result = truncated input - 1.0
+// Else if x>0 and the difference of the input minus truncated input is 0.5
+//    then result = truncated input + 1.0
+// Else 
+//    result = rounded input
+// Endif
+{ .mfi
+      nop.m 999
+(p6)  fsub.s1   ROUNDL_REMAINDER = ROUNDL_FLOAT_TRUNC_f8, ROUNDL_NORM_f8 
+      nop.i 999
 }
+	
 { .mfi
-      cmp.lt           p10,p0 = rExp, rExpHalf // Is |x| < 0.5? 
-      nop.f            0
-      nop.i            0
+      nop.m 999
+(p7)  fsub.s1   ROUNDL_REMAINDER = ROUNDL_NORM_f8, ROUNDL_FLOAT_TRUNC_f8
+      nop.i 999 ;;
 }
-;;
 
-// We must correct result if |x| < 0.5, or |x| >= 2^63
-.pred.rel "mutex",p6,p7
+// Assume preliminary result is rounded integer
 { .mfi
-      nop.m            0
-(p6)  fmerge.s         f8 = fNormX, f0        // If |x| < 0.5, result sgn(x)*0
-      nop.i            0
+      nop.m 999
+(p9)  fnorm.s0  f8 = ROUNDL_FLOAT_RINT_f8
+      nop.i 999
 }
-{ .mfb
-(p7)  cmp.eq           p10,p0 = r0, r0        // Also turn on p10 if |x| >= 2^63
-(p7)  fma.s0           f8 = fNormX, f1, f0    // If |x| >= 2^63, result x
-(p10) br.ret.spnt      b0                     // Exit |x| < 0.5 or |x| >= 2^63
-}
-;;
 
-// Here if 0.5 <= |x| < 2^63
+// If x<0, test if result=0
 { .mfi
-      nop.m            0
-      fms.s1           fRem = fNormX, f1, f8  // Get remainder = x - trunc(x)
-      nop.i            0
+      nop.m 999
+(p6)  fcmp.eq.unc  p10,p0 = ROUNDL_FLOAT_RINT_f8,f0
+      nop.i 999 ;;
 }
-;;
 
+// If x<0 and result=0, set result=-0
 { .mfi
-      nop.m            0
-(p8)  fcmp.le.s1       p8,p0 = fRem, fMHalf
-      nop.i            0
+      nop.m 999
+(p10) fmerge.ns  f8 = f1,f8
+      nop.i 999
 }
+	
+// If x<0, test if remainder=0.5
 { .mfi
-      nop.m            0
-(p9)  fcmp.ge.s1       p9,p0 = fRem, fHalf
-      nop.i            0
+      nop.m 999
+(p6)  fcmp.eq.unc  p6,p0 = ROUNDL_REMAINDER, ROUNDL_HALF
+      nop.i 999 ;;
 }
-;;
-
-// If x < 0 and remainder <= -0.5, then subtract 1 from result
-// If x > 0 and remainder >= +0.5, then add 1 to result
-.pred.rel "mutex",p8,p9
+	
+// If x>0, test if remainder=0.5
 { .mfi
-      nop.m            0
-(p8)  fms.s0           f8 = f8, f1, f1
-      nop.i            0
-}
-{ .mfb
-      nop.m            0
-(p9)  fma.s0           f8 = f8, f1, f1
-      br.ret.sptk      b0
+      nop.m 999
+(p7)  fcmp.eq.unc  p7,p0 = ROUNDL_REMAINDER, ROUNDL_HALF
+      nop.i 999 ;;
 }
-;;
 
-
-ROUND_UNORM:
-// Here if x unorm
+// If x<0 and remainder=0.5, result=truncated-1.0
+// If x>0 and remainder=0.5, result=truncated+1.0
+// Exit
+.pred.rel "mutex",p6,p7
+{ .mfi
+      nop.m 999
+(p6)  fsub.s0  f8 = ROUNDL_FLOAT_TRUNC_f8,f1
+      nop.i 999 
+}
+	
 { .mfb
-      getf.exp         rSignexp  = fNormX     // Get signexp, recompute if unorm
-      fcmp.eq.s0       p7,p0 = f8, f0         // Dummy op to set denormal flag
-      br.cond.sptk     ROUND_COMMON           // Return to main path
+      nop.m 999
+(p7)  fadd.s0  f8 = ROUNDL_FLOAT_TRUNC_f8,f1
+      br.ret.sptk  b0 ;;
 }
-;;
 
-GLOBAL_LIBM_END(roundl)
+.endp roundl
+ASM_SIZE_DIRECTIVE(roundl)