diff options
Diffstat (limited to 'sysdeps/ia64/fpu/s_nextafterf.S')
-rw-r--r-- | sysdeps/ia64/fpu/s_nextafterf.S | 274 |
1 files changed, 138 insertions, 136 deletions
diff --git a/sysdeps/ia64/fpu/s_nextafterf.S b/sysdeps/ia64/fpu/s_nextafterf.S index 6d2a92796d..0c269eca9e 100644 --- a/sysdeps/ia64/fpu/s_nextafterf.S +++ b/sysdeps/ia64/fpu/s_nextafterf.S @@ -1,7 +1,7 @@ .file "nextafterf.s" -// Copyright (c) 2000 - 2003, Intel Corporation +// Copyright (c) 2000 - 2004, Intel Corporation // All rights reserved. // // Contributed 2000 by the Intel Numerics Group, Intel Corporation @@ -51,6 +51,7 @@ // fixed flag settings for several cases // 05/20/02 Cleaned up namespace and sf0 syntax // 02/10/03 Reordered header: .section, .global, .proc, .align +// 12/14/04 Added error handling on underflow. // // API //============================================================== @@ -60,21 +61,21 @@ // // Registers used //============================================================== -nextafter_GR_max_pexp = r14 -nextafter_GR_min_pexp = r15 -nextafter_GR_exp = r16 -nextafter_GR_sig = r17 -nextafter_GR_lnorm_sig = r18 -nextafter_GR_sign_mask = r19 -nextafter_GR_exp_mask = r20 -nextafter_GR_sden_sig = r21 -nextafter_GR_new_sig = r22 -nextafter_GR_new_exp = r23 -nextafter_GR_lden_sig = r24 -nextafter_GR_snorm_sig = r25 -nextafter_GR_exp1 = r26 -nextafter_GR_x_exp = r27 -nextafter_GR_min_den_rexp = r28 +GR_max_pexp = r14 +GR_min_pexp = r15 +GR_exp = r16 +GR_sig = r17 +GR_lnorm_sig = r18 +GR_sign_mask = r19 +GR_exp_mask = r20 +GR_sden_sig = r21 +GR_new_sig = r22 +GR_new_exp = r23 +GR_lden_sig = r24 +GR_snorm_sig = r25 +GR_exp1 = r26 +GR_x_exp = r27 +GR_min_den_rexp = r28 // r36-39 parameters for libm_error_support GR_SAVE_B0 = r34 @@ -84,20 +85,21 @@ GR_SAVE_PFS = r32 GR_Parameter_X = r36 GR_Parameter_Y = r37 GR_Parameter_RESULT = r38 - -NEXTAFTER_lnorm_sig = f10 -NEXTAFTER_lnorm_exp = f11 -NEXTAFTER_lnorm = f12 -NEXTAFTER_sden_sig = f13 -NEXTAFTER_sden_exp = f14 -NEXTAFTER_sden = f15 -NEXTAFTER_save_f8 = f33 -NEXTAFTER_new_exp = f34 -NEXTAFTER_new_sig = f35 -NEXTAFTER_lden_sig = f36 -NEXTAFTER_snorm_sig = f37 -NEXTAFTER_exp1 = f38 -NEXTAFTER_tmp = f39 +GR_Parameter_TAG = r39 + +FR_lnorm_sig = f10 +FR_lnorm_exp = f11 +FR_lnorm = f12 +FR_sden_sig = f13 +FR_sden_exp = f14 +FR_sden = f15 +FR_save_f8 = f33 +FR_new_exp = f34 +FR_new_sig = f35 +FR_lden_sig = f36 +FR_snorm_sig = f37 +FR_exp1 = f38 +FR_tmp = f39 // // Overview of operation @@ -112,21 +114,21 @@ GLOBAL_LIBM_ENTRY(nextafterf) // Extract signexp from x // Form smallest denormal significand = ulp size { .mlx - getf.exp nextafter_GR_exp = f8 - movl nextafter_GR_sden_sig = 0x0000010000000000 + getf.exp GR_exp = f8 + movl GR_sden_sig = 0x0000010000000000 } // Form largest normal exponent // Is x < y ? p10 if yes, p11 if no // Form smallest normal exponent { .mfi - addl nextafter_GR_max_pexp = 0x1007e, r0 + addl GR_max_pexp = 0x1007e, r0 fcmp.lt.s1 p10,p11 = f8, f9 - addl nextafter_GR_min_pexp = 0x0ff81, r0 ;; + addl GR_min_pexp = 0x0ff81, r0 ;; } // Is x=y? { .mfi - getf.sig nextafter_GR_sig = f8 + getf.sig GR_sig = f8 fcmp.eq.s0 p6,p0 = f8, f9 nop.i 0 } @@ -134,14 +136,14 @@ GLOBAL_LIBM_ENTRY(nextafterf) // Form largest normal significand { .mlx nop.m 0 - movl nextafter_GR_lnorm_sig = 0xffffff0000000000 ;; + movl GR_lnorm_sig = 0xffffff0000000000 ;; } // Move largest normal significand to fp reg for special cases { .mfi - setf.sig NEXTAFTER_lnorm_sig = nextafter_GR_lnorm_sig + setf.sig FR_lnorm_sig = GR_lnorm_sig nop.f 0 - addl nextafter_GR_sign_mask = 0x20000, r0 ;; + addl GR_sign_mask = 0x20000, r0 ;; } // Move smallest denormal significand and signexp to fp regs @@ -150,14 +152,14 @@ GLOBAL_LIBM_ENTRY(nextafterf) // It increases (p12 set) if x<y and x>=0 or if x>y and x<0 // It decreases (p13 set) if x<y and x<0 or if x>y and x>=0 { .mfi - setf.sig NEXTAFTER_sden_sig = nextafter_GR_sden_sig + setf.sig FR_sden_sig = GR_sden_sig fclass.m p8,p0 = f8, 0xc3 -(p10) cmp.lt p12,p13 = nextafter_GR_exp, nextafter_GR_sign_mask +(p10) cmp.lt p12,p13 = GR_exp, GR_sign_mask } { .mfi - setf.exp NEXTAFTER_sden_exp = nextafter_GR_min_pexp + setf.exp FR_sden_exp = GR_min_pexp nop.f 999 -(p11) cmp.ge p12,p13 = nextafter_GR_exp, nextafter_GR_sign_mask ;; +(p11) cmp.ge p12,p13 = GR_exp, GR_sign_mask ;; } .pred.rel "mutex",p12,p13 @@ -166,33 +168,33 @@ GLOBAL_LIBM_ENTRY(nextafterf) // If x=y set result to y // Form smallest normal significand and largest denormal significand { .mfi -(p12) add nextafter_GR_new_sig = nextafter_GR_sig, nextafter_GR_sden_sig +(p12) add GR_new_sig = GR_sig, GR_sden_sig (p6) fmerge.s f8=f9,f9 - dep.z nextafter_GR_snorm_sig = 1,63,1 // 0x8000000000000000 + dep.z GR_snorm_sig = 1,63,1 // 0x8000000000000000 } { .mlx -(p13) sub nextafter_GR_new_sig = nextafter_GR_sig, nextafter_GR_sden_sig - movl nextafter_GR_lden_sig = 0x7fffff0000000000 ;; +(p13) sub GR_new_sig = GR_sig, GR_sden_sig + movl GR_lden_sig = 0x7fffff0000000000 ;; } // Move expected result significand and signexp to fp regs // Is y=nan? // Form new exponent in case result exponent needs incrementing or decrementing { .mfi - setf.exp NEXTAFTER_new_exp = nextafter_GR_exp + setf.exp FR_new_exp = GR_exp fclass.m p9,p0 = f9, 0xc3 -(p12) add nextafter_GR_exp1 = 1, nextafter_GR_exp +(p12) add GR_exp1 = 1, GR_exp } { .mib - setf.sig NEXTAFTER_new_sig = nextafter_GR_new_sig -(p13) add nextafter_GR_exp1 = -1, nextafter_GR_exp + setf.sig FR_new_sig = GR_new_sig +(p13) add GR_exp1 = -1, GR_exp (p6) br.ret.spnt b0 ;; // Exit if x=y } // Move largest normal signexp to fp reg for special cases // Is x=zero? { .mfi - setf.exp NEXTAFTER_lnorm_exp = nextafter_GR_max_pexp + setf.exp FR_lnorm_exp = GR_max_pexp fclass.m p7,p0 = f8, 0x7 nop.i 999 } @@ -205,12 +207,12 @@ GLOBAL_LIBM_ENTRY(nextafterf) // Move exp+-1 and smallest normal significand to fp regs for special cases // Is x=inf? { .mfi - setf.exp NEXTAFTER_exp1 = nextafter_GR_exp1 + setf.exp FR_exp1 = GR_exp1 fclass.m p6,p0 = f8, 0x23 - addl nextafter_GR_exp_mask = 0x1ffff, r0 + addl GR_exp_mask = 0x1ffff, r0 } { .mfb - setf.sig NEXTAFTER_snorm_sig = nextafter_GR_snorm_sig + setf.sig FR_snorm_sig = GR_snorm_sig (p9) fma.s0 f8 = f8,f1,f9 (p9) br.ret.spnt b0 ;; // Exit if y=nan } @@ -218,16 +220,16 @@ GLOBAL_LIBM_ENTRY(nextafterf) // Move largest denormal significand to fp regs for special cases // Save x { .mfb - setf.sig NEXTAFTER_lden_sig = nextafter_GR_lden_sig - mov NEXTAFTER_save_f8 = f8 -(p7) br.cond.spnt NEXTAFTER_ZERO ;; // Exit if x=0 + setf.sig FR_lden_sig = GR_lden_sig + mov FR_save_f8 = f8 +(p7) br.cond.spnt NEXT_ZERO ;; // Exit if x=0 } // Mask off the sign to get x_exp { .mfb - and nextafter_GR_x_exp = nextafter_GR_exp_mask, nextafter_GR_exp + and GR_x_exp = GR_exp_mask, GR_exp nop.f 999 -(p6) br.cond.spnt NEXTAFTER_INF ;; // Exit if x=inf +(p6) br.cond.spnt NEXT_INF ;; // Exit if x=inf } // Check 6 special cases when significand rolls over: @@ -246,35 +248,35 @@ GLOBAL_LIBM_ENTRY(nextafterf) // // Form exponent of smallest float denormal (if normalized register format) { .mmi - adds nextafter_GR_min_den_rexp = -23, nextafter_GR_min_pexp -(p12) cmp.eq.unc p6,p0 = nextafter_GR_new_sig, r0 -(p13) cmp.eq.unc p8,p10 = nextafter_GR_new_sig, nextafter_GR_lden_sig ;; + adds GR_min_den_rexp = -23, GR_min_pexp +(p12) cmp.eq.unc p6,p0 = GR_new_sig, r0 +(p13) cmp.eq.unc p8,p10 = GR_new_sig, GR_lden_sig ;; } { .mmi -(p6) cmp.lt.unc p6,p7 = nextafter_GR_x_exp, nextafter_GR_max_pexp -(p8) cmp.gt.unc p8,p9 = nextafter_GR_x_exp, nextafter_GR_min_pexp -(p10) cmp.eq.unc p10,p0 = nextafter_GR_new_sig, r0 ;; +(p6) cmp.lt.unc p6,p7 = GR_x_exp, GR_max_pexp +(p8) cmp.gt.unc p8,p9 = GR_x_exp, GR_min_pexp +(p10) cmp.eq.unc p10,p0 = GR_new_sig, r0 ;; } // Create small normal in case need to generate underflow flag { .mfi -(p10) cmp.le.unc p10,p0 = nextafter_GR_x_exp, nextafter_GR_min_pexp - fmerge.se NEXTAFTER_tmp = NEXTAFTER_sden_exp, NEXTAFTER_lnorm_sig -(p9) cmp.gt.unc p9,p14 = nextafter_GR_x_exp, nextafter_GR_min_den_rexp +(p10) cmp.le.unc p10,p0 = GR_x_exp, GR_min_pexp + fmerge.se FR_tmp = FR_sden_exp, FR_lnorm_sig +(p9) cmp.gt.unc p9,p14 = GR_x_exp, GR_min_den_rexp } // Branch if cases 1, 2, 3 { .bbb -(p6) br.cond.spnt NEXTAFTER_EXPUP -(p7) br.cond.spnt NEXTAFTER_OVERFLOW -(p8) br.cond.spnt NEXTAFTER_EXPDOWN ;; +(p6) br.cond.spnt NEXT_EXPUP +(p7) br.cond.spnt NEXT_OVERFLOW +(p8) br.cond.spnt NEXT_EXPDOWN ;; } // Branch if cases 4, 5, 6 { .bbb -(p9) br.cond.spnt NEXTAFTER_NORM_TO_DENORM -(p10) br.cond.spnt NEXTAFTER_UNDERFLOW_TO_ZERO -(p14) br.cond.spnt NEXTAFTER_UNDERFLOW_TO_ZERO ;; +(p9) br.cond.spnt NEXT_NORM_TO_DENORM +(p10) br.cond.spnt NEXT_UNDERFLOW_TO_ZERO +(p14) br.cond.spnt NEXT_UNDERFLOW_TO_ZERO ;; } // Here if no special cases @@ -282,68 +284,72 @@ GLOBAL_LIBM_ENTRY(nextafterf) // Case 1: x_exp=min_exp, x_sig=unnormalized // Case 2: x_exp<min_exp { .mfi - cmp.lt p6,p7 = nextafter_GR_x_exp, nextafter_GR_min_pexp - fmerge.se f8 = NEXTAFTER_new_exp, NEXTAFTER_new_sig + cmp.lt p6,p7 = GR_x_exp, GR_min_pexp + fmerge.se f8 = FR_new_exp, FR_new_sig nop.i 999 ;; } { .mfi nop.m 999 nop.f 999 -(p7) tbit.z p6,p0 = nextafter_GR_new_sig, 63 ;; +(p7) tbit.z p6,p0 = GR_new_sig, 63 ;; } -NEXTAFTER_COMMON_FINISH: +NEXT_COMMON_FINISH: // Force underflow and inexact if denormal result { .mfi nop.m 999 -(p6) fma.s.s0 NEXTAFTER_tmp = NEXTAFTER_tmp,NEXTAFTER_tmp,f0 - nop.i 999 ;; +(p6) fma.s.s0 FR_tmp = FR_tmp,FR_tmp,f0 + nop.i 999 +} +{ .mfb + nop.m 999 + fnorm.s.s0 f8 = f8 // Final normalization to result precision +(p6) br.cond.spnt NEXT_UNDERFLOW ;; } -// Final normalization to result precision and exit { .mfb nop.m 999 - fnorm.s.s0 f8 = f8 + nop.f 999 br.ret.sptk b0;; } //Special cases -NEXTAFTER_EXPUP: +NEXT_EXPUP: { .mfb - cmp.lt p6,p7 = nextafter_GR_x_exp, nextafter_GR_min_pexp - fmerge.se f8 = NEXTAFTER_exp1, NEXTAFTER_snorm_sig - br.cond.sptk NEXTAFTER_COMMON_FINISH ;; + cmp.lt p6,p7 = GR_x_exp, GR_min_pexp + fmerge.se f8 = FR_exp1, FR_snorm_sig + br.cond.sptk NEXT_COMMON_FINISH ;; } -NEXTAFTER_EXPDOWN: +NEXT_EXPDOWN: { .mfb - cmp.lt p6,p7 = nextafter_GR_x_exp, nextafter_GR_min_pexp - fmerge.se f8 = NEXTAFTER_exp1, NEXTAFTER_lnorm_sig - br.cond.sptk NEXTAFTER_COMMON_FINISH ;; + cmp.lt p6,p7 = GR_x_exp, GR_min_pexp + fmerge.se f8 = FR_exp1, FR_lnorm_sig + br.cond.sptk NEXT_COMMON_FINISH ;; } -NEXTAFTER_NORM_TO_DENORM: +NEXT_NORM_TO_DENORM: { .mfi nop.m 999 - fmerge.se f8 = NEXTAFTER_new_exp, NEXTAFTER_lden_sig + fmerge.se f8 = FR_new_exp, FR_lden_sig nop.i 999 } // Force underflow and inexact { .mfb nop.m 999 - fma.s.s0 NEXTAFTER_tmp = NEXTAFTER_tmp,NEXTAFTER_tmp,f0 - br.ret.sptk b0 ;; + fma.s.s0 FR_tmp = FR_tmp,FR_tmp,f0 + br.cond.sptk NEXT_UNDERFLOW ;; } -NEXTAFTER_UNDERFLOW_TO_ZERO: +NEXT_UNDERFLOW_TO_ZERO: { .mfb cmp.eq p6,p0 = r0,r0 - fmerge.s f8 = NEXTAFTER_save_f8,f0 - br.cond.sptk NEXTAFTER_COMMON_FINISH ;; + fmerge.s f8 = FR_save_f8,f0 + br.cond.sptk NEXT_COMMON_FINISH ;; } -NEXTAFTER_INF: +NEXT_INF: // Here if f8 is +- infinity // INF // if f8 is +inf, no matter what y is return largest float @@ -351,17 +357,17 @@ NEXTAFTER_INF: { .mfi nop.m 999 - fmerge.se NEXTAFTER_lnorm = NEXTAFTER_lnorm_exp,NEXTAFTER_lnorm_sig + fmerge.se FR_lnorm = FR_lnorm_exp,FR_lnorm_sig nop.i 999 ;; } { .mfb nop.m 999 - fmerge.s f8 = f8,NEXTAFTER_lnorm + fmerge.s f8 = f8,FR_lnorm br.ret.sptk b0 ;; } -NEXTAFTER_ZERO: +NEXT_ZERO: // Here if f8 is +- zero // ZERO @@ -370,76 +376,72 @@ NEXTAFTER_ZERO: { .mfi nop.m 999 - fmerge.se NEXTAFTER_sden = NEXTAFTER_sden_exp,NEXTAFTER_sden_sig + fmerge.se FR_sden = FR_sden_exp,FR_sden_sig nop.i 999 ;; } // Create small normal to generate underflow flag { .mfi nop.m 999 - fmerge.se NEXTAFTER_tmp = NEXTAFTER_sden_exp, NEXTAFTER_lnorm_sig + fmerge.se FR_tmp = FR_sden_exp, FR_lnorm_sig nop.i 999 ;; } // Add correct sign from direction arg { .mfi nop.m 999 - fmerge.s f8 = f9,NEXTAFTER_sden + fmerge.s f8 = f9,FR_sden nop.i 999 ;; } // Force underflow and inexact flags { .mfb nop.m 999 - fma.s.s0 NEXTAFTER_tmp = NEXTAFTER_tmp,NEXTAFTER_tmp,f0 - br.ret.sptk b0 ;; + fma.s.s0 FR_tmp = FR_tmp,FR_tmp,f0 + br.cond.sptk NEXT_UNDERFLOW ;; } -GLOBAL_LIBM_END(nextafterf) -// Stack operations when calling error support. -// (1) (2) (3) (call) (4) -// sp -> + psp -> + psp -> + sp -> + -// | | | | -// | | <- GR_Y R3 ->| <- GR_RESULT | -> f8 -// | | | | -// | <-GR_Y Y2->| Y2 ->| <- GR_Y | -// | | | | -// | | <- GR_X X1 ->| | -// | | | | -// sp-64 -> + sp -> + sp -> + + -// save ar.pfs save b0 restore gp -// save gp restore ar.pfs - - +NEXT_UNDERFLOW: +// Here if result is a denorm, or input is finite and result is zero +// Call error support to report possible range error +{ .mib + alloc r32=ar.pfs,2,2,4,0 + mov GR_Parameter_TAG = 269 // Error code + br.cond.sptk __libm_error_region // Branch to error call +} +;; -LOCAL_LIBM_ENTRY(__libm_error_region) -NEXTAFTER_OVERFLOW: -// Here if f8 is finite, but result will be infinite +NEXT_OVERFLOW: +// Here if input is finite, but result will be infinite // Use frcpa to generate infinity of correct sign // Call error support to report possible range error -.prologue - { .mfi alloc r32=ar.pfs,2,2,4,0 - frcpa.s1 f8,p6 = NEXTAFTER_save_f8, f0 - nop.i 999 + frcpa.s1 f8,p6 = FR_save_f8, f0 + nop.i 999 ;; } -// Create largest float +// Create largest double { .mfi nop.m 999 - fmerge.se NEXTAFTER_lnorm = NEXTAFTER_lnorm_exp,NEXTAFTER_lnorm_sig + fmerge.se FR_lnorm = FR_lnorm_exp,FR_lnorm_sig nop.i 999 ;; } // Force overflow and inexact flags to be set -{ .mfi - mov r39 = 155 // Error code - fma.s.s0 NEXTAFTER_tmp = NEXTAFTER_lnorm,NEXTAFTER_lnorm,f0 - nop.i 999 +{ .mfb + mov GR_Parameter_TAG = 155 // Error code + fma.s.s0 FR_tmp = FR_lnorm,FR_lnorm,f0 + br.cond.sptk __libm_error_region // Branch to error call } ;; +GLOBAL_LIBM_END(nextafterf) + + +LOCAL_LIBM_ENTRY(__libm_error_region) +.prologue + // (1) { .mfi add GR_Parameter_Y=-32,sp // Parameter 2 value @@ -466,7 +468,7 @@ NEXTAFTER_OVERFLOW: .body // (3) { .mib - stfs [GR_Parameter_X] = NEXTAFTER_save_f8 // STORE Parameter 1 on stack + stfs [GR_Parameter_X] = FR_save_f8 // STORE Parameter 1 on stack add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address nop.b 0 } |