diff options
Diffstat (limited to 'sysdeps/ia64/fpu/s_nexttowardf.S')
-rw-r--r-- | sysdeps/ia64/fpu/s_nexttowardf.S | 276 |
1 files changed, 139 insertions, 137 deletions
diff --git a/sysdeps/ia64/fpu/s_nexttowardf.S b/sysdeps/ia64/fpu/s_nexttowardf.S index fb1adaea5b..b8b976271a 100644 --- a/sysdeps/ia64/fpu/s_nexttowardf.S +++ b/sysdeps/ia64/fpu/s_nexttowardf.S @@ -1,7 +1,7 @@ .file "nexttowardf.s" -// Copyright (c) 2001 - 2003, Intel Corporation +// Copyright (c) 2001 - 2004, Intel Corporation // All rights reserved. // // Contributed 2001 by the Intel Numerics Group, Intel Corporation @@ -43,6 +43,7 @@ // 08/23/01 Corrected error tag number // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align +// 12/14/04 Added error handling on underflow. // // API //============================================================== @@ -52,21 +53,21 @@ // // Registers used //============================================================== -nexttoward_GR_max_pexp = r14 -nexttoward_GR_min_pexp = r15 -nexttoward_GR_exp = r16 -nexttoward_GR_sig = r17 -nexttoward_GR_lnorm_sig = r18 -nexttoward_GR_sign_mask = r19 -nexttoward_GR_exp_mask = r20 -nexttoward_GR_sden_sig = r21 -nexttoward_GR_new_sig = r22 -nexttoward_GR_new_exp = r23 -nexttoward_GR_lden_sig = r24 -nexttoward_GR_snorm_sig = r25 -nexttoward_GR_exp1 = r26 -nexttoward_GR_x_exp = r27 -nexttoward_GR_min_den_rexp = r28 +GR_max_pexp = r14 +GR_min_pexp = r15 +GR_exp = r16 +GR_sig = r17 +GR_lnorm_sig = r18 +GR_sign_mask = r19 +GR_exp_mask = r20 +GR_sden_sig = r21 +GR_new_sig = r22 +GR_new_exp = r23 +GR_lden_sig = r24 +GR_snorm_sig = r25 +GR_exp1 = r26 +GR_x_exp = r27 +GR_min_den_rexp = r28 // r36-39 parameters for libm_error_support GR_SAVE_B0 = r34 @@ -76,20 +77,21 @@ GR_SAVE_PFS = r32 GR_Parameter_X = r36 GR_Parameter_Y = r37 GR_Parameter_RESULT = r38 - -NEXTTOWARD_lnorm_sig = f10 -NEXTTOWARD_lnorm_exp = f11 -NEXTTOWARD_lnorm = f12 -NEXTTOWARD_sden_sig = f13 -NEXTTOWARD_sden_exp = f14 -NEXTTOWARD_sden = f15 -NEXTTOWARD_save_f8 = f33 -NEXTTOWARD_new_exp = f34 -NEXTTOWARD_new_sig = f35 -NEXTTOWARD_lden_sig = f36 -NEXTTOWARD_snorm_sig = f37 -NEXTTOWARD_exp1 = f38 -NEXTTOWARD_tmp = f39 +GR_Parameter_TAG = r39 + +FR_lnorm_sig = f10 +FR_lnorm_exp = f11 +FR_lnorm = f12 +FR_sden_sig = f13 +FR_sden_exp = f14 +FR_sden = f15 +FR_save_f8 = f33 +FR_new_exp = f34 +FR_new_sig = f35 +FR_lden_sig = f36 +FR_snorm_sig = f37 +FR_exp1 = f38 +FR_tmp = f39 // // Overview of operation @@ -104,21 +106,21 @@ GLOBAL_LIBM_ENTRY(nexttowardf) // Extract signexp from x // Form smallest denormal significand = ulp size { .mlx - getf.exp nexttoward_GR_exp = f8 - movl nexttoward_GR_sden_sig = 0x0000010000000000 + getf.exp GR_exp = f8 + movl GR_sden_sig = 0x0000010000000000 } // Form largest normal exponent // Is x < y ? p10 if yes, p11 if no // Form smallest normal exponent { .mfi - addl nexttoward_GR_max_pexp = 0x1007e, r0 + addl GR_max_pexp = 0x1007e, r0 fcmp.lt.s1 p10,p11 = f8, f9 - addl nexttoward_GR_min_pexp = 0x0ff81, r0 ;; + addl GR_min_pexp = 0x0ff81, r0 ;; } // Is x=y? { .mfi - getf.sig nexttoward_GR_sig = f8 + getf.sig GR_sig = f8 fcmp.eq.s0 p6,p0 = f8, f9 nop.i 0 } @@ -126,14 +128,14 @@ GLOBAL_LIBM_ENTRY(nexttowardf) // Form largest normal significand { .mlx nop.m 0 - movl nexttoward_GR_lnorm_sig = 0xffffff0000000000 ;; + movl GR_lnorm_sig = 0xffffff0000000000 ;; } // Move largest normal significand to fp reg for special cases { .mfi - setf.sig NEXTTOWARD_lnorm_sig = nexttoward_GR_lnorm_sig + setf.sig FR_lnorm_sig = GR_lnorm_sig nop.f 0 - addl nexttoward_GR_sign_mask = 0x20000, r0 ;; + addl GR_sign_mask = 0x20000, r0 ;; } // Move smallest denormal significand and signexp to fp regs @@ -142,14 +144,14 @@ GLOBAL_LIBM_ENTRY(nexttowardf) // It increases (p12 set) if x<y and x>=0 or if x>y and x<0 // It decreases (p13 set) if x<y and x<0 or if x>y and x>=0 { .mfi - setf.sig NEXTTOWARD_sden_sig = nexttoward_GR_sden_sig + setf.sig FR_sden_sig = GR_sden_sig fclass.m p8,p0 = f8, 0xc3 -(p10) cmp.lt p12,p13 = nexttoward_GR_exp, nexttoward_GR_sign_mask +(p10) cmp.lt p12,p13 = GR_exp, GR_sign_mask } { .mfi - setf.exp NEXTTOWARD_sden_exp = nexttoward_GR_min_pexp + setf.exp FR_sden_exp = GR_min_pexp nop.f 999 -(p11) cmp.ge p12,p13 = nexttoward_GR_exp, nexttoward_GR_sign_mask ;; +(p11) cmp.ge p12,p13 = GR_exp, GR_sign_mask ;; } .pred.rel "mutex",p12,p13 @@ -158,33 +160,33 @@ GLOBAL_LIBM_ENTRY(nexttowardf) // If x=y set result to y // Form smallest normal significand and largest denormal significand { .mfi -(p12) add nexttoward_GR_new_sig = nexttoward_GR_sig, nexttoward_GR_sden_sig +(p12) add GR_new_sig = GR_sig, GR_sden_sig (p6) fnorm.s.s0 f8=f9 //Normalise - dep.z nexttoward_GR_snorm_sig = 1,63,1 // 0x8000000000000000 + dep.z GR_snorm_sig = 1,63,1 // 0x8000000000000000 } { .mlx -(p13) sub nexttoward_GR_new_sig = nexttoward_GR_sig, nexttoward_GR_sden_sig - movl nexttoward_GR_lden_sig = 0x7fffff0000000000 ;; +(p13) sub GR_new_sig = GR_sig, GR_sden_sig + movl GR_lden_sig = 0x7fffff0000000000 ;; } // Move expected result significand and signexp to fp regs // Is y=nan? // Form new exponent in case result exponent needs incrementing or decrementing { .mfi - setf.exp NEXTTOWARD_new_exp = nexttoward_GR_exp + setf.exp FR_new_exp = GR_exp fclass.m p9,p0 = f9, 0xc3 -(p12) add nexttoward_GR_exp1 = 1, nexttoward_GR_exp +(p12) add GR_exp1 = 1, GR_exp } { .mib - setf.sig NEXTTOWARD_new_sig = nexttoward_GR_new_sig -(p13) add nexttoward_GR_exp1 = -1, nexttoward_GR_exp + setf.sig FR_new_sig = GR_new_sig +(p13) add GR_exp1 = -1, GR_exp (p6) br.ret.spnt b0 ;; // Exit if x=y } // Move largest normal signexp to fp reg for special cases // Is x=zero? { .mfi - setf.exp NEXTTOWARD_lnorm_exp = nexttoward_GR_max_pexp + setf.exp FR_lnorm_exp = GR_max_pexp fclass.m p7,p0 = f8, 0x7 nop.i 999 } @@ -197,12 +199,12 @@ GLOBAL_LIBM_ENTRY(nexttowardf) // Move exp+-1 and smallest normal significand to fp regs for special cases // Is x=inf? { .mfi - setf.exp NEXTTOWARD_exp1 = nexttoward_GR_exp1 + setf.exp FR_exp1 = GR_exp1 fclass.m p6,p0 = f8, 0x23 - addl nexttoward_GR_exp_mask = 0x1ffff, r0 + addl GR_exp_mask = 0x1ffff, r0 } { .mfb - setf.sig NEXTTOWARD_snorm_sig = nexttoward_GR_snorm_sig + setf.sig FR_snorm_sig = GR_snorm_sig (p9) fma.s0 f8 = f8,f1,f9 (p9) br.ret.spnt b0 ;; // Exit if y=nan } @@ -210,16 +212,16 @@ GLOBAL_LIBM_ENTRY(nexttowardf) // Move largest denormal significand to fp regs for special cases // Save x { .mfb - setf.sig NEXTTOWARD_lden_sig = nexttoward_GR_lden_sig - mov NEXTTOWARD_save_f8 = f8 -(p7) br.cond.spnt NEXTTOWARD_ZERO ;; // Exit if x=0 + setf.sig FR_lden_sig = GR_lden_sig + mov FR_save_f8 = f8 +(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0 } // Mask off the sign to get x_exp { .mfb - and nexttoward_GR_x_exp = nexttoward_GR_exp_mask, nexttoward_GR_exp + and GR_x_exp = GR_exp_mask, GR_exp nop.f 999 -(p6) br.cond.spnt NEXTTOWARD_INF ;; // Exit if x=inf +(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf } // Check 6 special cases when significand rolls over: @@ -238,35 +240,35 @@ GLOBAL_LIBM_ENTRY(nexttowardf) // // Form exponent of smallest float denormal (if normalized register format) { .mmi - adds nexttoward_GR_min_den_rexp = -23, nexttoward_GR_min_pexp -(p12) cmp.eq.unc p6,p0 = nexttoward_GR_new_sig, r0 -(p13) cmp.eq.unc p8,p10 = nexttoward_GR_new_sig, nexttoward_GR_lden_sig ;; + adds GR_min_den_rexp = -23, GR_min_pexp +(p12) cmp.eq.unc p6,p0 = GR_new_sig, r0 +(p13) cmp.eq.unc p8,p10 = GR_new_sig, GR_lden_sig ;; } { .mmi -(p6) cmp.lt.unc p6,p7 = nexttoward_GR_x_exp, nexttoward_GR_max_pexp -(p8) cmp.gt.unc p8,p9 = nexttoward_GR_x_exp, nexttoward_GR_min_pexp -(p10) cmp.eq.unc p10,p0 = nexttoward_GR_new_sig, r0 ;; +(p6) cmp.lt.unc p6,p7 = GR_x_exp, GR_max_pexp +(p8) cmp.gt.unc p8,p9 = GR_x_exp, GR_min_pexp +(p10) cmp.eq.unc p10,p0 = GR_new_sig, r0 ;; } // Create small normal in case need to generate underflow flag { .mfi -(p10) cmp.le.unc p10,p0 = nexttoward_GR_x_exp, nexttoward_GR_min_pexp - fmerge.se NEXTTOWARD_tmp = NEXTTOWARD_sden_exp, NEXTTOWARD_lnorm_sig -(p9) cmp.gt.unc p9,p14 = nexttoward_GR_x_exp, nexttoward_GR_min_den_rexp +(p10) cmp.le.unc p10,p0 = GR_x_exp, GR_min_pexp + fmerge.se FR_tmp = FR_sden_exp, FR_lnorm_sig +(p9) cmp.gt.unc p9,p14 = GR_x_exp, GR_min_den_rexp } // Branch if cases 1, 2, 3 { .bbb -(p6) br.cond.spnt NEXTTOWARD_EXPUP -(p7) br.cond.spnt NEXTTOWARD_OVERFLOW -(p8) br.cond.spnt NEXTTOWARD_EXPDOWN ;; +(p6) br.cond.spnt NEXT_EXPUP +(p7) br.cond.spnt NEXT_OVERFLOW +(p8) br.cond.spnt NEXT_EXPDOWN ;; } // Branch if cases 4, 5, 6 { .bbb -(p9) br.cond.spnt NEXTTOWARD_NORM_TO_DENORM -(p10) br.cond.spnt NEXTTOWARD_UNDERFLOW_TO_ZERO -(p14) br.cond.spnt NEXTTOWARD_UNDERFLOW_TO_ZERO ;; +(p9) br.cond.spnt NEXT_NORM_TO_DENORM +(p10) br.cond.spnt NEXT_UNDERFLOW_TO_ZERO +(p14) br.cond.spnt NEXT_UNDERFLOW_TO_ZERO ;; } // Here if no special cases @@ -274,68 +276,72 @@ GLOBAL_LIBM_ENTRY(nexttowardf) // Case 1: x_exp=min_exp, x_sig=unnormalized // Case 2: x_exp<min_exp { .mfi - cmp.lt p6,p7 = nexttoward_GR_x_exp, nexttoward_GR_min_pexp - fmerge.se f8 = NEXTTOWARD_new_exp, NEXTTOWARD_new_sig + cmp.lt p6,p7 = GR_x_exp, GR_min_pexp + fmerge.se f8 = FR_new_exp, FR_new_sig nop.i 999 ;; } { .mfi nop.m 999 nop.f 999 -(p7) tbit.z p6,p0 = nexttoward_GR_new_sig, 63 ;; +(p7) tbit.z p6,p0 = GR_new_sig, 63 ;; } -NEXTTOWARD_COMMON_FINISH: +NEXT_COMMON_FINISH: // Force underflow and inexact if denormal result { .mfi nop.m 999 -(p6) fma.s.s0 NEXTTOWARD_tmp = NEXTTOWARD_tmp,NEXTTOWARD_tmp,f0 - nop.i 999 ;; +(p6) fma.s.s0 FR_tmp = FR_tmp,FR_tmp,f0 + nop.i 999 +} +{ .mfb + nop.m 999 + fnorm.s.s0 f8 = f8 // Final normalization to result precision +(p6) br.cond.spnt NEXT_UNDERFLOW ;; } -// Final normalization to result precision and exit { .mfb nop.m 999 - fnorm.s.s0 f8 = f8 + nop.f 999 br.ret.sptk b0;; } //Special cases -NEXTTOWARD_EXPUP: +NEXT_EXPUP: { .mfb - cmp.lt p6,p7 = nexttoward_GR_x_exp, nexttoward_GR_min_pexp - fmerge.se f8 = NEXTTOWARD_exp1, NEXTTOWARD_snorm_sig - br.cond.sptk NEXTTOWARD_COMMON_FINISH ;; + cmp.lt p6,p7 = GR_x_exp, GR_min_pexp + fmerge.se f8 = FR_exp1, FR_snorm_sig + br.cond.sptk NEXT_COMMON_FINISH ;; } -NEXTTOWARD_EXPDOWN: +NEXT_EXPDOWN: { .mfb - cmp.lt p6,p7 = nexttoward_GR_x_exp, nexttoward_GR_min_pexp - fmerge.se f8 = NEXTTOWARD_exp1, NEXTTOWARD_lnorm_sig - br.cond.sptk NEXTTOWARD_COMMON_FINISH ;; + cmp.lt p6,p7 = GR_x_exp, GR_min_pexp + fmerge.se f8 = FR_exp1, FR_lnorm_sig + br.cond.sptk NEXT_COMMON_FINISH ;; } -NEXTTOWARD_NORM_TO_DENORM: +NEXT_NORM_TO_DENORM: { .mfi nop.m 999 - fmerge.se f8 = NEXTTOWARD_new_exp, NEXTTOWARD_lden_sig + fmerge.se f8 = FR_new_exp, FR_lden_sig nop.i 999 } // Force underflow and inexact { .mfb nop.m 999 - fma.s.s0 NEXTTOWARD_tmp = NEXTTOWARD_tmp,NEXTTOWARD_tmp,f0 - br.ret.sptk b0 ;; + fma.s.s0 FR_tmp = FR_tmp,FR_tmp,f0 + br.cond.sptk NEXT_UNDERFLOW ;; } -NEXTTOWARD_UNDERFLOW_TO_ZERO: +NEXT_UNDERFLOW_TO_ZERO: { .mfb cmp.eq p6,p0 = r0,r0 - fmerge.s f8 = NEXTTOWARD_save_f8,f0 - br.cond.sptk NEXTTOWARD_COMMON_FINISH ;; + fmerge.s f8 = FR_save_f8,f0 + br.cond.sptk NEXT_COMMON_FINISH ;; } -NEXTTOWARD_INF: +NEXT_INF: // Here if f8 is +- infinity // INF // if f8 is +inf, no matter what y is return largest float @@ -343,17 +349,17 @@ NEXTTOWARD_INF: { .mfi nop.m 999 - fmerge.se NEXTTOWARD_lnorm = NEXTTOWARD_lnorm_exp,NEXTTOWARD_lnorm_sig + fmerge.se FR_lnorm = FR_lnorm_exp,FR_lnorm_sig nop.i 999 ;; } { .mfb nop.m 999 - fmerge.s f8 = f8,NEXTTOWARD_lnorm + fmerge.s f8 = f8,FR_lnorm br.ret.sptk b0 ;; } -NEXTTOWARD_ZERO: +NEXT_ZERO: // Here if f8 is +- zero // ZERO @@ -362,76 +368,72 @@ NEXTTOWARD_ZERO: { .mfi nop.m 999 - fmerge.se NEXTTOWARD_sden = NEXTTOWARD_sden_exp,NEXTTOWARD_sden_sig + fmerge.se FR_sden = FR_sden_exp,FR_sden_sig nop.i 999 ;; } // Create small normal to generate underflow flag { .mfi nop.m 999 - fmerge.se NEXTTOWARD_tmp = NEXTTOWARD_sden_exp, NEXTTOWARD_lnorm_sig + fmerge.se FR_tmp = FR_sden_exp, FR_lnorm_sig nop.i 999 ;; } // Add correct sign from direction arg { .mfi nop.m 999 - fmerge.s f8 = f9,NEXTTOWARD_sden - nop.i 999;; + fmerge.s f8 = f9,FR_sden + nop.i 999 ;; } // Force underflow and inexact flags { .mfb nop.m 999 - fma.s.s0 NEXTTOWARD_tmp = NEXTTOWARD_tmp,NEXTTOWARD_tmp,f0 - br.ret.sptk b0 ;; + fma.s.s0 FR_tmp = FR_tmp,FR_tmp,f0 + br.cond.sptk NEXT_UNDERFLOW ;; } -GLOBAL_LIBM_END(nexttowardf) -// Stack operations when calling error support. -// (1) (2) (3) (call) (4) -// sp -> + psp -> + psp -> + sp -> + -// | | | | -// | | <- GR_Y R3 ->| <- GR_RESULT | -> f8 -// | | | | -// | <-GR_Y Y2->| Y2 ->| <- GR_Y | -// | | | | -// | | <- GR_X X1 ->| | -// | | | | -// sp-64 -> + sp -> + sp -> + + -// save ar.pfs save b0 restore gp -// save gp restore ar.pfs - - +NEXT_UNDERFLOW: +// Here if result is a denorm, or input is finite and result is zero +// Call error support to report possible range error +{ .mib + alloc r32=ar.pfs,2,2,4,0 + mov GR_Parameter_TAG = 272 // Error code + br.cond.sptk __libm_error_region // Branch to error call +} +;; -LOCAL_LIBM_ENTRY(__libm_error_region) -NEXTTOWARD_OVERFLOW: -// Here if f8 is finite, but result will be infinite +NEXT_OVERFLOW: +// Here if input is finite, but result will be infinite // Use frcpa to generate infinity of correct sign // Call error support to report possible range error -.prologue - { .mfi alloc r32=ar.pfs,2,2,4,0 - frcpa.s1 f8,p6 = NEXTTOWARD_save_f8, f0 - nop.i 999 + frcpa.s1 f8,p6 = FR_save_f8, f0 + nop.i 999 ;; } -// Create largest float +// Create largest double { .mfi nop.m 999 - fmerge.se NEXTTOWARD_lnorm = NEXTTOWARD_lnorm_exp,NEXTTOWARD_lnorm_sig + fmerge.se FR_lnorm = FR_lnorm_exp,FR_lnorm_sig nop.i 999 ;; } // Force overflow and inexact flags to be set -{ .mfi - mov r39 = 200 // Error code - fma.s.s0 NEXTTOWARD_tmp = NEXTTOWARD_lnorm,NEXTTOWARD_lnorm,f0 - nop.i 999 +{ .mfb + mov GR_Parameter_TAG = 200 // Error code + fma.s.s0 FR_tmp = FR_lnorm,FR_lnorm,f0 + br.cond.sptk __libm_error_region // Branch to error call } ;; +GLOBAL_LIBM_END(nexttowardf) + + +LOCAL_LIBM_ENTRY(__libm_error_region) +.prologue + // (1) { .mfi add GR_Parameter_Y=-32,sp // Parameter 2 value @@ -458,7 +460,7 @@ NEXTTOWARD_OVERFLOW: .body // (3) { .mib - stfs [GR_Parameter_X] = NEXTTOWARD_save_f8 // STORE Parameter 1 on stack + stfs [GR_Parameter_X] = FR_save_f8 // STORE Parameter 1 on stack add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address nop.b 0 } |