about summary refs log tree commit diff
path: root/sysdeps/ia64/fpu/e_powl.S
diff options
context:
space:
mode:
Diffstat (limited to 'sysdeps/ia64/fpu/e_powl.S')
-rw-r--r--sysdeps/ia64/fpu/e_powl.S4076
1 files changed, 1717 insertions, 2359 deletions
diff --git a/sysdeps/ia64/fpu/e_powl.S b/sysdeps/ia64/fpu/e_powl.S
index d286e9abad..0896c19aac 100644
--- a/sysdeps/ia64/fpu/e_powl.S
+++ b/sysdeps/ia64/fpu/e_powl.S
@@ -1,10 +1,10 @@
 .file "powl.s"
 
-// Copyright (C) 2000, 2001, Intel Corporation
+
+// Copyright (c) 2000 - 2003, Intel Corporation
 // All rights reserved.
-// 
-// Contributed 2/2/2000 by John Harrison, Ted Kubaska, Bob Norin, Shane Story,
-// and Ping Tak Peter Tang of the Computational Software Lab, Intel Corporation.
+//
+// Contributed 2000 by the Intel Numerics Group, Intel Corporation
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
@@ -20,61 +20,69 @@
 // * The name of Intel Corporation may not be used to endorse or promote
 // products derived from this software without specific prior written
 // permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 // OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
-// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
-// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
-// 
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
 // Intel Corporation is the author of this code, and requests that all
-// problem reports or change requests be submitted to it directly at 
-// http://developer.intel.com/opensource.
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
 //
-// *********************************************************************
+//*********************************************************************
 //
 // Function:   powl(x,y), where
-//                         y
+//                          y
 //             powl(x,y) = x , for double extended precision x and y values
 //
-// *********************************************************************
+//*********************************************************************
 //
-// History: 
-// 2/02/00  (Hand Optimized)
-// 4/04/00  Unwind support added
-// 8/15/00  Bundle added after call to __libm_error_support to properly
+// History:
+// 02/02/00 (Hand Optimized)
+// 04/04/00 Unwind support added
+// 08/15/00 Bundle added after call to __libm_error_support to properly
 //          set [the previously overwritten] GR_Parameter_RESULT.
-// 1/22/01  Corrected results for powl(1,inf), powl(1,nan), and
+// 01/22/01 Corrected results for powl(1,inf), powl(1,nan), and
 //          powl(snan,0) to be 1 per C99, not nan.  Fixed many flag settings.
-// 2/06/01  Call __libm_error support if over/underflow when y=2.
+// 02/06/01 Call __libm_error support if over/underflow when y=2.
+// 04/17/01 Support added for y close to 1 and x a non-special value.
+//          Shared software under/overflow detection for all paths
+// 02/07/02 Corrected sf3 setting to disable traps
+// 05/13/02 Improved performance of all paths
+// 02/10/03 Reordered header: .section, .global, .proc, .align;
+//          used data8 for long double table values
+// 04/17/03 Added missing mutex directive
 //
-// *********************************************************************
+//*********************************************************************
 //
 // Resources Used:
 //
-//    Floating-Point Registers: 
-//                        f8  (Input and Return Value)
-//                        f9-f15,f32-f63,f99 
+//    Floating-Point Registers:
+//                        f8  (Input x and Return Value)
+//                        f9  (Input y)
+//                        f10-f15,f32-f79
 //
 //    General Purpose Registers:
-//                        Locals r32 - r61
+//                        Locals r14-24,r32-r65
 //                        Parameters to __libm_error_support r62,r63,r64,r65
 //
 //    Predicate Registers: p6-p15
 //
-// *********************************************************************
+//*********************************************************************
 //
 //  Special Cases and IEEE special conditions:
 //
 //    Denormal fault raised on denormal inputs
-//    Overflow exceptions raised when appropriate for pow 
-//    Underflow exceptions raised when appropriate for pow 
+//    Overflow exceptions raised when appropriate for pow
+//    Underflow exceptions raised when appropriate for pow
 //    (Error Handling Routine called for overflow and Underflow)
 //    Inexact raised when appropriate by algorithm
 //
@@ -102,8 +110,8 @@
 //  22. X or Y denorm/unorm and denorm/unorm operand trap is enabled,
 //      generate denorm/unorm fault except if invalid or div_0 raised.
 //
-// *********************************************************************
-// 
+//*********************************************************************
+//
 //  Algorithm
 //  =========
 //
@@ -113,23 +121,23 @@
 //    If Y = 0.5,  return sqrt(X).
 //
 //  Compute log(X) to extra precision.
-//  
+//
 //  ker_log_80( X, logX_hi, logX_lo, Safe );
 //
-//   ...logX_hi + logX_lo approximates log(X) to roughly 80 
+//   ...logX_hi + logX_lo approximates log(X) to roughly 80
 //   ...significant bits of accuracy.
 //
 //  Compute Y*log(X) to extra precision.
 //
 //    P_hi := Y * logX_hi
-//    P_lo := Y * logX_hi - P_hi	...using FMA
-//    P_lo := Y * logX_lo + P_lo	...using FMA
+//    P_lo := Y * logX_hi - P_hi       ...using FMA
+//    P_lo := Y * logX_lo + P_lo       ...using FMA
 //
 //  Compute exp(P_hi + P_lo)
 //
-//    Flag := 2; 
+//    Flag := 2;
 //    Expo_Range := 2; (assuming double-extended power function)
-//    ker_exp_64( P_hi, P_lo, Flag, Expo_Range, 
+//    ker_exp_64( P_hi, P_lo, Flag, Expo_Range,
 //                Z_hi, Z_lo, scale, Safe )
 //
 //    scale := sgn * scale
@@ -138,7 +146,7 @@
 //       return scale*Z_hi + (scale*Z_lo)
 //       quickly
 //    Else
-//       take necessary precaution in computing 
+//       take necessary precaution in computing
 //       scale*Z_hi + (scale*Z_lo)
 //       to set possible exceptions correctly.
 //    End If
@@ -152,8 +160,8 @@
 //   If Y is qNaN, return Y without exception.
 //   If X is qNaN, return X without exception.
 //
-//   At this point, X is real and Y is +-inf. 
-//   Thus |X| can only be 1, strictly bigger than 1, or 
+//   At this point, X is real and Y is +-inf.
+//   Thus |X| can only be 1, strictly bigger than 1, or
 //   strictly less than 1.
 //
 //   If |X| < 1, then
@@ -169,8 +177,8 @@
 //   ...Note that Y is real, finite, non-zero, and not +1.
 //
 //   If X is qNaN, return X without exception.
-//    
-//   If X is +-0, 
+//
+//   If X is +-0,
 //   return ( Y > 0 ? +0 : +inf )
 //
 //   If X is +inf
@@ -180,11 +188,11 @@
 //   return -0 ** -Y
 //   return ( Y > 0 ? +inf : +0 )
 //
-//  Case_Invalid 
+//  Case_Invalid
 //
 //   Return 0 * inf to generate a quiet NaN together
 //   with an invalid exception.
-// 
+//
 //  Implementation
 //  ==============
 //
@@ -193,15 +201,15 @@
 //
 //  STAGE 1
 //  -------
-//   This stage contains two threads. 
+//   This stage contains two threads.
 //
 //   Stage1.Thread1
 //
 //     fclass.m   X_excep,  X_ok   = X, (NatVal or s/qNaN) or
-//				  +-0, +-infinity
+//                              +-0, +-infinity
 //
 //     fclass.nm  X_unsupp, X_supp = X, (NatVal or s/qNaN) or
-//				  +-(0, unnorm, norm, infinity)
+//                              +-(0, unnorm, norm, infinity)
 //
 //     X_norm := fnorm( X ) with traps disabled
 //
@@ -209,26 +217,26 @@
 //     If (X_unsupp) goto Filtering (Step 2)
 //
 //     Stage1.Thread2
-//     ..............    
+//     ..............
 //
 //     fclass.m   Y_excep,  Y_ok   = Y, (NatVal or s/qNaN) or
-//				  +-0, +-infinity
+//                              +-0, +-infinity
 //
 //     fclass.nm  Y_unsupp, Y_supp = Y, (NatVal or s/qNaN) or
-//				  +-(0, unnorm, norm, infinity)
+//                              +-(0, unnorm, norm, infinity)
 //
 //     Y_norm := fnorm( Y ) with traps disabled
 //
 //     If (Y_excep)  goto Filtering (Step 2)
 //     If (Y_unsupp) goto Filtering (Step 2)
 //
-// 
+//
 //  STAGE 2
 //  -------
 //  This stage contains two threads.
 //
-//     Stage2.Thread1		
-//     ..............	
+//     Stage2.Thread1
+//     ..............
 //
 //     Set X_lt_0 if X < 0 (using fcmp)
 //     sgn := +1.0
@@ -245,14 +253,14 @@
 //   This stage contains two threads.
 //
 //
-//   Stage3.Thread1		
-//   .............. 	
+//   Stage3.Thread1
+//   ..............
 //
 //     X := fnorm(X) in prevailing traps
 //
 //
-//     Stage3.Thread2		
-//     ..............	
+//     Stage3.Thread2
+//     ..............
 //
 //     Y := fnorm(Y) in prevailing traps
 //
@@ -262,60 +270,56 @@
 //   Go to Case_Normal.
 //
 
-#include "libm_support.h"
-
-#ifdef _LIBC
-.rodata
-#else
-.data
-#endif
-
-// Inv_L, L_hi, L_lo 
-.align 64
-Constants_exp_64_Arg:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_Arg,@object)
-data4 0x5C17F0BC,0xB8AA3B29,0x0000400B,0x00000000 
-data4 0x00000000,0xB17217F4,0x00003FF2,0x00000000
-data4 0xF278ECE6,0xF473DE6A,0x00003FD4,0x00000000
-ASM_SIZE_DIRECTIVE(Constants_exp_64_Arg)
-
-.align 64
-Constants_exp_64_Exponents:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_Exponents,@object)
-data4 0x0000007E,0x00000000,0xFFFFFF83,0xFFFFFFFF
-data4 0x000003FE,0x00000000,0xFFFFFC03,0xFFFFFFFF
-data4 0x00003FFE,0x00000000,0xFFFFC003,0xFFFFFFFF
-data4 0x00003FFE,0x00000000,0xFFFFC003,0xFFFFFFFF
-data4 0xFFFFFFE2,0xFFFFFFFF,0xFFFFFFC4,0xFFFFFFFF
-data4 0xFFFFFFBA,0xFFFFFFFF,0xFFFFFFBA,0xFFFFFFFF
-ASM_SIZE_DIRECTIVE(Constants_exp_64_Exponents)
-
-.align 64
-Constants_exp_64_A:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_A,@object)
-// Reversed 
-data4 0xB1B736A0,0xAAAAAAAB,0x00003FFA,0x00000000
-data4 0x90CD6327,0xAAAAAAAB,0x00003FFC,0x00000000
-data4 0xFFFFFFFF,0xFFFFFFFF,0x00003FFD,0x00000000
-ASM_SIZE_DIRECTIVE(Constants_exp_64_A)
-
-.align 64
-Constants_exp_64_P:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_P,@object)
-// Reversed 
-data4 0x43914A8A,0xD00D6C81,0x00003FF2,0x00000000
-data4 0x30304B30,0xB60BC4AC,0x00003FF5,0x00000000
-data4 0x7474C518,0x88888888,0x00003FF8,0x00000000
-data4 0x8DAE729D,0xAAAAAAAA,0x00003FFA,0x00000000
-data4 0xAAAAAF61,0xAAAAAAAA,0x00003FFC,0x00000000
-data4 0x000004C7,0x80000000,0x00003FFE,0x00000000
-ASM_SIZE_DIRECTIVE(Constants_exp_64_P)
-
-.align 64
-Constants_exp_64_T1:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_T1,@object)
-data4 0x3F800000,0x3F8164D2,0x3F82CD87,0x3F843A29 
-data4 0x3F85AAC3,0x3F871F62,0x3F88980F,0x3F8A14D5 
+
+// ************* DO NOT CHANGE ORDER OF THESE TABLES ********************
+
+// double-extended 1/ln(2)
+// 3fff b8aa 3b29 5c17 f0bb be87fed0691d3e88
+// 3fff b8aa 3b29 5c17 f0bc
+// For speed the significand will be loaded directly with a movl and setf.sig
+//   and the exponent will be bias+63 instead of bias+0.  Thus subsequent
+//   computations need to scale appropriately.
+// The constant 2^12/ln(2) is needed for the computation of N.  This is also
+//   obtained by scaling the computations.
+//
+// Two shifting constants are loaded directly with movl and setf.d.
+//   1. RSHF_2TO51 = 1.1000..00 * 2^(63-12)
+//        This constant is added to x*1/ln2 to shift the integer part of
+//        x*2^12/ln2 into the rightmost bits of the significand.
+//        The result of this fma is N_signif.
+//   2. RSHF       = 1.1000..00 * 2^(63)
+//        This constant is subtracted from N_signif * 2^(-51) to give
+//        the integer part of N, N_fix, as a floating-point number.
+//        The result of this fms is float_N.
+RODATA
+
+.align 16
+// L_hi, L_lo
+LOCAL_OBJECT_START(Constants_exp_64_Arg)
+data8 0xB17217F400000000,0x00003FF2 // L_hi = hi part log(2)/2^12
+data8 0xF473DE6AF278ECE6,0x00003FD4 // L_lo = lo part log(2)/2^12
+LOCAL_OBJECT_END(Constants_exp_64_Arg)
+
+LOCAL_OBJECT_START(Constants_exp_64_A)
+// Reversed
+data8 0xAAAAAAABB1B736A0,0x00003FFA
+data8 0xAAAAAAAB90CD6327,0x00003FFC
+data8 0xFFFFFFFFFFFFFFFF,0x00003FFD
+LOCAL_OBJECT_END(Constants_exp_64_A)
+
+LOCAL_OBJECT_START(Constants_exp_64_P)
+// Reversed
+data8 0xD00D6C8143914A8A,0x00003FF2
+data8 0xB60BC4AC30304B30,0x00003FF5
+data8 0x888888887474C518,0x00003FF8
+data8 0xAAAAAAAA8DAE729D,0x00003FFA
+data8 0xAAAAAAAAAAAAAF61,0x00003FFC
+data8 0x80000000000004C7,0x00003FFE
+LOCAL_OBJECT_END(Constants_exp_64_P)
+
+LOCAL_OBJECT_START(Constants_exp_64_T1)
+data4 0x3F800000,0x3F8164D2,0x3F82CD87,0x3F843A29
+data4 0x3F85AAC3,0x3F871F62,0x3F88980F,0x3F8A14D5
 data4 0x3F8B95C2,0x3F8D1ADF,0x3F8EA43A,0x3F9031DC
 data4 0x3F91C3D3,0x3F935A2B,0x3F94F4F0,0x3F96942D
 data4 0x3F9837F0,0x3F99E046,0x3F9B8D3A,0x3F9D3EDA
@@ -330,274 +334,263 @@ data4 0x3FD744FD,0x3FD99D16,0x3FDBFBB8,0x3FDE60F5
 data4 0x3FE0CCDF,0x3FE33F89,0x3FE5B907,0x3FE8396A
 data4 0x3FEAC0C7,0x3FED4F30,0x3FEFE4BA,0x3FF28177
 data4 0x3FF5257D,0x3FF7D0DF,0x3FFA83B3,0x3FFD3E0C
-ASM_SIZE_DIRECTIVE(Constants_exp_64_T1)
-
-.align 64
-Constants_exp_64_T2:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_T2,@object)
-data4 0x3F800000,0x3F80058C,0x3F800B18,0x3F8010A4 
-data4 0x3F801630,0x3F801BBD,0x3F80214A,0x3F8026D7 
-data4 0x3F802C64,0x3F8031F2,0x3F803780,0x3F803D0E 
-data4 0x3F80429C,0x3F80482B,0x3F804DB9,0x3F805349 
-data4 0x3F8058D8,0x3F805E67,0x3F8063F7,0x3F806987 
-data4 0x3F806F17,0x3F8074A8,0x3F807A39,0x3F807FCA 
-data4 0x3F80855B,0x3F808AEC,0x3F80907E,0x3F809610 
-data4 0x3F809BA2,0x3F80A135,0x3F80A6C7,0x3F80AC5A 
-data4 0x3F80B1ED,0x3F80B781,0x3F80BD14,0x3F80C2A8 
-data4 0x3F80C83C,0x3F80CDD1,0x3F80D365,0x3F80D8FA 
-data4 0x3F80DE8F,0x3F80E425,0x3F80E9BA,0x3F80EF50 
-data4 0x3F80F4E6,0x3F80FA7C,0x3F810013,0x3F8105AA 
-data4 0x3F810B41,0x3F8110D8,0x3F81166F,0x3F811C07 
-data4 0x3F81219F,0x3F812737,0x3F812CD0,0x3F813269 
-data4 0x3F813802,0x3F813D9B,0x3F814334,0x3F8148CE 
+LOCAL_OBJECT_END(Constants_exp_64_T1)
+
+LOCAL_OBJECT_START(Constants_exp_64_T2)
+data4 0x3F800000,0x3F80058C,0x3F800B18,0x3F8010A4
+data4 0x3F801630,0x3F801BBD,0x3F80214A,0x3F8026D7
+data4 0x3F802C64,0x3F8031F2,0x3F803780,0x3F803D0E
+data4 0x3F80429C,0x3F80482B,0x3F804DB9,0x3F805349
+data4 0x3F8058D8,0x3F805E67,0x3F8063F7,0x3F806987
+data4 0x3F806F17,0x3F8074A8,0x3F807A39,0x3F807FCA
+data4 0x3F80855B,0x3F808AEC,0x3F80907E,0x3F809610
+data4 0x3F809BA2,0x3F80A135,0x3F80A6C7,0x3F80AC5A
+data4 0x3F80B1ED,0x3F80B781,0x3F80BD14,0x3F80C2A8
+data4 0x3F80C83C,0x3F80CDD1,0x3F80D365,0x3F80D8FA
+data4 0x3F80DE8F,0x3F80E425,0x3F80E9BA,0x3F80EF50
+data4 0x3F80F4E6,0x3F80FA7C,0x3F810013,0x3F8105AA
+data4 0x3F810B41,0x3F8110D8,0x3F81166F,0x3F811C07
+data4 0x3F81219F,0x3F812737,0x3F812CD0,0x3F813269
+data4 0x3F813802,0x3F813D9B,0x3F814334,0x3F8148CE
 data4 0x3F814E68,0x3F815402,0x3F81599C,0x3F815F37
-ASM_SIZE_DIRECTIVE(Constants_exp_64_T2)
-  
-.align 64
-Constants_exp_64_W1:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_W1,@object)
-data4 0x00000000,0x00000000,0x171EC4B4,0xBE384454
-data4 0x4AA72766,0xBE694741,0xD42518F8,0xBE5D32B6
-data4 0x3A319149,0x3E68D96D,0x62415F36,0xBE68F4DA
-data4 0xC9C86A3B,0xBE6DDA2F,0xF49228FE,0x3E6B2E50
-data4 0x1188B886,0xBE49C0C2,0x1A4C2F1F,0x3E64BFC2
-data4 0x2CB98B54,0xBE6A2FBB,0x9A55D329,0x3E5DC5DE
-data4 0x39A7AACE,0x3E696490,0x5C66DBA5,0x3E54728B
-data4 0xBA1C7D7D,0xBE62B0DB,0x09F1AF5F,0x3E576E04
-data4 0x1A0DD6A1,0x3E612500,0x795FBDEF,0xBE66A419
-data4 0xE1BD41FC,0xBE5CDE8C,0xEA54964F,0xBE621376
-data4 0x476E76EE,0x3E6370BE,0x3427EB92,0x3E390D1A 
-data4 0x2BF82BF8,0x3E1336DE,0xD0F7BD9E,0xBE5FF1CB 
-data4 0x0CEB09DD,0xBE60A355,0x0980F30D,0xBE5CA37E 
-data4 0x4C082D25,0xBE5C541B,0x3B467D29,0xBE5BBECA 
-data4 0xB9D946C5,0xBE400D8A,0x07ED374A,0xBE5E2A08 
-data4 0x365C8B0A,0xBE66CB28,0xD3403BCA,0x3E3AAD5B 
-data4 0xC7EA21E0,0x3E526055,0xE72880D6,0xBE442C75 
-data4 0x85222A43,0x3E58B2BB,0x522C42BF,0xBE5AAB79 
-data4 0x469DC2BC,0xBE605CB4,0xA48C40DC,0xBE589FA7 
-data4 0x1AA42614,0xBE51C214,0xC37293F4,0xBE48D087 
-data4 0xA2D673E0,0x3E367A1C,0x114F7A38,0xBE51BEBB 
-data4 0x661A4B48,0xBE6348E5,0x1D3B9962,0xBDF52643  
-data4 0x35A78A53,0x3E3A3B5E,0x1CECD788,0xBE46C46C 
-data4 0x7857D689,0xBE60B7EC,0xD14F1AD7,0xBE594D3D 
-data4 0x4C9A8F60,0xBE4F9C30,0x02DFF9D2,0xBE521873 
-data4 0x55E6D68F,0xBE5E4C88,0x667F3DC4,0xBE62140F 
-data4 0x3BF88747,0xBE36961B,0xC96EC6AA,0x3E602861 
-data4 0xD57FD718,0xBE3B5151,0xFC4A627B,0x3E561CD0 
-data4 0xCA913FEA,0xBE3A5217,0x9A5D193A,0x3E40A3CC 
-data4 0x10A9C312,0xBE5AB713,0xC5F57719,0x3E4FDADB 
-data4 0xDBDF59D5,0x3E361428,0x61B4180D,0x3E5DB5DB 
-data4 0x7408D856,0xBE42AD5F,0x31B2B707,0x3E2A3148
-ASM_SIZE_DIRECTIVE(Constants_exp_64_W1)
-
-.align 64
-Constants_exp_64_W2:
-ASM_TYPE_DIRECTIVE(Constants_exp_64_W2,@object)
-data4 0x00000000,0x00000000,0x37A3D7A2,0xBE641F25 
-data4 0xAD028C40,0xBE68DD57,0xF212B1B6,0xBE5C77D8 
-data4 0x1BA5B070,0x3E57878F,0x2ECAE6FE,0xBE55A36A 
-data4 0x569DFA3B,0xBE620608,0xA6D300A3,0xBE53B50E 
-data4 0x223F8F2C,0x3E5B5EF2,0xD6DE0DF4,0xBE56A0D9 
-data4 0xEAE28F51,0xBE64EEF3,0x367EA80B,0xBE5E5AE2 
-data4 0x5FCBC02D,0x3E47CB1A,0x9BDAFEB7,0xBE656BA0 
-data4 0x805AFEE7,0x3E6E70C6,0xA3415EBA,0xBE6E0509 
-data4 0x49BFF529,0xBE56856B,0x00508651,0x3E66DD33 
-data4 0xC114BC13,0x3E51165F,0xC453290F,0x3E53333D 
-data4 0x05539FDA,0x3E6A072B,0x7C0A7696,0xBE47CD87 
-data4 0xEB05C6D9,0xBE668BF4,0x6AE86C93,0xBE67C3E3 
-data4 0xD0B3E84B,0xBE533904,0x556B53CE,0x3E63E8D9 
-data4 0x63A98DC8,0x3E212C89,0x032A7A22,0xBE33138F 
-data4 0xBC584008,0x3E530FA9,0xCCB93C97,0xBE6ADF82 
-data4 0x8370EA39,0x3E5F9113,0xFB6A05D8,0x3E5443A4 
-data4 0x181FEE7A,0x3E63DACD,0xF0F67DEC,0xBE62B29D 
-data4 0x3DDE6307,0x3E65C483,0xD40A24C1,0x3E5BF030  
-data4 0x14E437BE,0x3E658B8F,0xED98B6C7,0xBE631C29 
-data4 0x04CF7C71,0x3E6335D2,0xE954A79D,0x3E529EED
-data4 0xF64A2FB8,0x3E5D9257,0x854ED06C,0xBE6BED1B 
-data4 0xD71405CB,0x3E5096F6,0xACB9FDF5,0xBE3D4893 
-data4 0x01B68349,0xBDFEB158,0xC6A463B9,0x3E628D35 
-data4 0xADE45917,0xBE559725,0x042FC476,0xBE68C29C 
-data4 0x01E511FA,0xBE67593B,0x398801ED,0xBE4A4313 
-data4 0xDA7C3300,0x3E699571,0x08062A9E,0x3E5349BE 
-data4 0x755BB28E,0x3E5229C4,0x77A1F80D,0x3E67E426 
-data4 0x6B69C352,0xBE52B33F,0x084DA57F,0xBE6B3550 
-data4 0xD1D09A20,0xBE6DB03F,0x2161B2C1,0xBE60CBC4 
-data4 0x78A2B771,0x3E56ED9C,0x9D0FA795,0xBE508E31 
-data4 0xFD1A54E9,0xBE59482A,0xB07FD23E,0xBE2A17CE 
-data4 0x17365712,0x3E68BF5C,0xB3785569,0x3E3956F9
-ASM_SIZE_DIRECTIVE(Constants_exp_64_W2)
-
-.align 64
-Constants_log_80_P:
-ASM_TYPE_DIRECTIVE(Constants_log_80_P,@object)
-// 1/2, P_8, P_7, ..., P_1  
-data4 0x00000000, 0x80000000, 0x00003FFE, 0x00000000
-data4 0x3B1042BC, 0xCCCE8B88, 0x0000BFFB, 0x00000000
-data4 0xCADC2149, 0xE38997B7, 0x00003FFB, 0x00000000
-data4 0xB1ACB090, 0xFFFFFFFE, 0x0000BFFB, 0x00000000
-data4 0x06481C81, 0x92492498, 0x00003FFC, 0x00000000
-data4 0xAAAAB0EF, 0xAAAAAAAA, 0x0000BFFC, 0x00000000
-data4 0xCCC91416, 0xCCCCCCCC, 0x00003FFC, 0x00000000
-data4 0x00000000, 0x80000000, 0x0000BFFD, 0x00000000
-data4 0xAAAAAAAB, 0xAAAAAAAA, 0x00003FFD
-ASM_SIZE_DIRECTIVE(Constants_log_80_P)
-
-.align 64
-Constants_log_80_Q:
-ASM_TYPE_DIRECTIVE(Constants_log_80_Q,@object)
-// log2_hi, log2_lo, Q_6, Q_5, Q_4, Q_3, Q_2, Q_1 
-data4 0x00000000,0xB1721800,0x00003FFE,0x00000000
-data4 0x4361C4C6,0x82E30865,0x0000BFE2,0x00000000
-data4 0xA51BE0AF,0x92492453,0x00003FFC,0x00000000
-data4 0xA0CFD29F,0xAAAAAB73,0x0000BFFC,0x00000000
-data4 0xCCCE3872,0xCCCCCCCC,0x00003FFC,0x00000000
-data4 0xFFFFB4FB,0xFFFFFFFF,0x0000BFFC,0x00000000
-data4 0xAAAAAAAB,0xAAAAAAAA,0x00003FFD,0x00000000
-data4 0x00000000,0x80000000,0x0000BFFE,0x00000000
-ASM_SIZE_DIRECTIVE(Constants_log_80_Q)
-
-.align 64
-Constants_log_80_Z_G_H_h1:
-ASM_TYPE_DIRECTIVE(Constants_log_80_Z_G_H_h1,@object)
-// Z1 - 16 bit fixed, G1 and H1 IEEE single, h1 IEEE double   
+LOCAL_OBJECT_END(Constants_exp_64_T2)
+
+LOCAL_OBJECT_START(Constants_exp_64_W1)
+data8 0x0000000000000000, 0xBE384454171EC4B4
+data8 0xBE6947414AA72766, 0xBE5D32B6D42518F8
+data8 0x3E68D96D3A319149, 0xBE68F4DA62415F36
+data8 0xBE6DDA2FC9C86A3B, 0x3E6B2E50F49228FE
+data8 0xBE49C0C21188B886, 0x3E64BFC21A4C2F1F
+data8 0xBE6A2FBB2CB98B54, 0x3E5DC5DE9A55D329
+data8 0x3E69649039A7AACE, 0x3E54728B5C66DBA5
+data8 0xBE62B0DBBA1C7D7D, 0x3E576E0409F1AF5F
+data8 0x3E6125001A0DD6A1, 0xBE66A419795FBDEF
+data8 0xBE5CDE8CE1BD41FC, 0xBE621376EA54964F
+data8 0x3E6370BE476E76EE, 0x3E390D1A3427EB92
+data8 0x3E1336DE2BF82BF8, 0xBE5FF1CBD0F7BD9E
+data8 0xBE60A3550CEB09DD, 0xBE5CA37E0980F30D
+data8 0xBE5C541B4C082D25, 0xBE5BBECA3B467D29
+data8 0xBE400D8AB9D946C5, 0xBE5E2A0807ED374A
+data8 0xBE66CB28365C8B0A, 0x3E3AAD5BD3403BCA
+data8 0x3E526055C7EA21E0, 0xBE442C75E72880D6
+data8 0x3E58B2BB85222A43, 0xBE5AAB79522C42BF
+data8 0xBE605CB4469DC2BC, 0xBE589FA7A48C40DC
+data8 0xBE51C2141AA42614, 0xBE48D087C37293F4
+data8 0x3E367A1CA2D673E0, 0xBE51BEBB114F7A38
+data8 0xBE6348E5661A4B48, 0xBDF526431D3B9962
+data8 0x3E3A3B5E35A78A53, 0xBE46C46C1CECD788
+data8 0xBE60B7EC7857D689, 0xBE594D3DD14F1AD7
+data8 0xBE4F9C304C9A8F60, 0xBE52187302DFF9D2
+data8 0xBE5E4C8855E6D68F, 0xBE62140F667F3DC4
+data8 0xBE36961B3BF88747, 0x3E602861C96EC6AA
+data8 0xBE3B5151D57FD718, 0x3E561CD0FC4A627B
+data8 0xBE3A5217CA913FEA, 0x3E40A3CC9A5D193A
+data8 0xBE5AB71310A9C312, 0x3E4FDADBC5F57719
+data8 0x3E361428DBDF59D5, 0x3E5DB5DB61B4180D
+data8 0xBE42AD5F7408D856, 0x3E2A314831B2B707
+LOCAL_OBJECT_END(Constants_exp_64_W1)
+
+LOCAL_OBJECT_START(Constants_exp_64_W2)
+data8 0x0000000000000000, 0xBE641F2537A3D7A2
+data8 0xBE68DD57AD028C40, 0xBE5C77D8F212B1B6
+data8 0x3E57878F1BA5B070, 0xBE55A36A2ECAE6FE
+data8 0xBE620608569DFA3B, 0xBE53B50EA6D300A3
+data8 0x3E5B5EF2223F8F2C, 0xBE56A0D9D6DE0DF4
+data8 0xBE64EEF3EAE28F51, 0xBE5E5AE2367EA80B
+data8 0x3E47CB1A5FCBC02D, 0xBE656BA09BDAFEB7
+data8 0x3E6E70C6805AFEE7, 0xBE6E0509A3415EBA
+data8 0xBE56856B49BFF529, 0x3E66DD3300508651
+data8 0x3E51165FC114BC13, 0x3E53333DC453290F
+data8 0x3E6A072B05539FDA, 0xBE47CD877C0A7696
+data8 0xBE668BF4EB05C6D9, 0xBE67C3E36AE86C93
+data8 0xBE533904D0B3E84B, 0x3E63E8D9556B53CE
+data8 0x3E212C8963A98DC8, 0xBE33138F032A7A22
+data8 0x3E530FA9BC584008, 0xBE6ADF82CCB93C97
+data8 0x3E5F91138370EA39, 0x3E5443A4FB6A05D8
+data8 0x3E63DACD181FEE7A, 0xBE62B29DF0F67DEC
+data8 0x3E65C4833DDE6307, 0x3E5BF030D40A24C1
+data8 0x3E658B8F14E437BE, 0xBE631C29ED98B6C7
+data8 0x3E6335D204CF7C71, 0x3E529EEDE954A79D
+data8 0x3E5D9257F64A2FB8, 0xBE6BED1B854ED06C
+data8 0x3E5096F6D71405CB, 0xBE3D4893ACB9FDF5
+data8 0xBDFEB15801B68349, 0x3E628D35C6A463B9
+data8 0xBE559725ADE45917, 0xBE68C29C042FC476
+data8 0xBE67593B01E511FA, 0xBE4A4313398801ED
+data8 0x3E699571DA7C3300, 0x3E5349BE08062A9E
+data8 0x3E5229C4755BB28E, 0x3E67E42677A1F80D
+data8 0xBE52B33F6B69C352, 0xBE6B3550084DA57F
+data8 0xBE6DB03FD1D09A20, 0xBE60CBC42161B2C1
+data8 0x3E56ED9C78A2B771, 0xBE508E319D0FA795
+data8 0xBE59482AFD1A54E9, 0xBE2A17CEB07FD23E
+data8 0x3E68BF5C17365712, 0x3E3956F9B3785569
+LOCAL_OBJECT_END(Constants_exp_64_W2)
+
+LOCAL_OBJECT_START(Constants_log_80_P)
+// P_8, P_7, ..., P_1
+data8 0xCCCE8B883B1042BC, 0x0000BFFB // P_8
+data8 0xE38997B7CADC2149, 0x00003FFB // P_7
+data8 0xFFFFFFFEB1ACB090, 0x0000BFFB // P_6
+data8 0x9249249806481C81, 0x00003FFC // P_5
+data8 0x0000000000000000, 0x00000000 // Pad for bank conflicts
+data8 0xAAAAAAAAAAAAB0EF, 0x0000BFFC // P_4
+data8 0xCCCCCCCCCCC91416, 0x00003FFC // P_3
+data8 0x8000000000000000, 0x0000BFFD // P_2
+data8 0xAAAAAAAAAAAAAAAB, 0x00003FFD // P_1
+LOCAL_OBJECT_END(Constants_log_80_P)
+
+LOCAL_OBJECT_START(Constants_log_80_Q)
+// log2_hi, log2_lo, Q_6, Q_5, Q_4, Q_3, Q_2, Q_1
+data8 0xB172180000000000,0x00003FFE
+data8 0x82E308654361C4C6,0x0000BFE2
+data8 0x92492453A51BE0AF,0x00003FFC
+data8 0xAAAAAB73A0CFD29F,0x0000BFFC
+data8 0xCCCCCCCCCCCE3872,0x00003FFC
+data8 0xFFFFFFFFFFFFB4FB,0x0000BFFC
+data8 0xAAAAAAAAAAAAAAAB,0x00003FFD
+data8 0x8000000000000000,0x0000BFFE
+LOCAL_OBJECT_END(Constants_log_80_Q)
+
+LOCAL_OBJECT_START(Constants_log_80_Z_G_H_h1)
+// Z1 - 16 bit fixed, G1 and H1 IEEE single, h1 IEEE double
 data4 0x00008000,0x3F800000,0x00000000,0x00000000
-data4 0x00000000,0x00000000,0x00000000,0x00000000 
+data4 0x00000000,0x00000000,0x00000000,0x00000000
 data4 0x00007879,0x3F70F0F0,0x3D785196,0x00000000
 data4 0xEBA0E0D1,0x8B1D330B,0x00003FDA,0x00000000
 data4 0x000071C8,0x3F638E38,0x3DF13843,0x00000000
 data4 0x9EADD553,0xE2AF365E,0x00003FE2,0x00000000
 data4 0x00006BCB,0x3F579430,0x3E2FF9A0,0x00000000
-data4 0x752F34A2,0xF585FEC3,0x0000BFE3,0x00000000  
+data4 0x752F34A2,0xF585FEC3,0x0000BFE3,0x00000000
 data4 0x00006667,0x3F4CCCC8,0x3E647FD6,0x00000000
-data4 0x893B03F3,0xF3546435,0x00003FE2,0x00000000  
-data4 0x00006187,0x3F430C30,0x3E8B3AE7,0x00000000 
-data4 0x39CDD2AC,0xBABA62E0,0x00003FE4,0x00000000 
-data4 0x00005D18,0x3F3A2E88,0x3EA30C68,0x00000000 
+data4 0x893B03F3,0xF3546435,0x00003FE2,0x00000000
+data4 0x00006187,0x3F430C30,0x3E8B3AE7,0x00000000
+data4 0x39CDD2AC,0xBABA62E0,0x00003FE4,0x00000000
+data4 0x00005D18,0x3F3A2E88,0x3EA30C68,0x00000000
 data4 0x457978A1,0x8718789F,0x00003FE2,0x00000000
-data4 0x0000590C,0x3F321640,0x3EB9CEC8,0x00000000 
-data4 0x3185E56A,0x9442DF96,0x0000BFE4,0x00000000 
-data4 0x00005556,0x3F2AAAA8,0x3ECF9927,0x00000000 
-data4 0x2BBE2CBD,0xCBF9A4BF,0x00003FE4,0x00000000 
-data4 0x000051EC,0x3F23D708,0x3EE47FC5,0x00000000 
-data4 0x852D5935,0xF3537535,0x00003FE3,0x00000000 
-data4 0x00004EC5,0x3F1D89D8,0x3EF8947D,0x00000000 
-data4 0x46CDF32F,0xA1F1E699,0x0000BFDF,0x00000000 
-data4 0x00004BDB,0x3F17B420,0x3F05F3A1,0x00000000 
-data4 0xD8484CE3,0x84A61856,0x00003FE4,0x00000000 
+data4 0x0000590C,0x3F321640,0x3EB9CEC8,0x00000000
+data4 0x3185E56A,0x9442DF96,0x0000BFE4,0x00000000
+data4 0x00005556,0x3F2AAAA8,0x3ECF9927,0x00000000
+data4 0x2BBE2CBD,0xCBF9A4BF,0x00003FE4,0x00000000
+data4 0x000051EC,0x3F23D708,0x3EE47FC5,0x00000000
+data4 0x852D5935,0xF3537535,0x00003FE3,0x00000000
+data4 0x00004EC5,0x3F1D89D8,0x3EF8947D,0x00000000
+data4 0x46CDF32F,0xA1F1E699,0x0000BFDF,0x00000000
+data4 0x00004BDB,0x3F17B420,0x3F05F3A1,0x00000000
+data4 0xD8484CE3,0x84A61856,0x00003FE4,0x00000000
 data4 0x00004925,0x3F124920,0x3F0F4303,0x00000000
-data4 0xFF28821B,0xC7DD97E0,0x0000BFE2,0x00000000    
-data4 0x0000469F,0x3F0D3DC8,0x3F183EBF,0x00000000 
-data4 0xEF1FD32F,0xD3C4A887,0x00003FE3,0x00000000  
-data4 0x00004445,0x3F088888,0x3F20EC80,0x00000000 
-data4 0x464C76DA,0x84672BE6,0x00003FE5,0x00000000 
+data4 0xFF28821B,0xC7DD97E0,0x0000BFE2,0x00000000
+data4 0x0000469F,0x3F0D3DC8,0x3F183EBF,0x00000000
+data4 0xEF1FD32F,0xD3C4A887,0x00003FE3,0x00000000
+data4 0x00004445,0x3F088888,0x3F20EC80,0x00000000
+data4 0x464C76DA,0x84672BE6,0x00003FE5,0x00000000
 data4 0x00004211,0x3F042108,0x3F29516A,0x00000000
-data4 0x18835FB9,0x9A43A511,0x0000BFE5,0x00000000 
-ASM_SIZE_DIRECTIVE(Constants_log_80_Z_G_H_h1)
-
-.align 64
-Constants_log_80_Z_G_H_h2:
-ASM_TYPE_DIRECTIVE(Constants_log_80_Z_G_H_h2,@object)
-// Z2 - 16 bit fixed, G2 and H2 IEEE single, h2 IEEE double   
-data4 0x00008000,0x3F800000,0x00000000,0x00000000 
-data4 0x00000000,0x00000000,0x00000000,0x00000000 
-data4 0x00007F81,0x3F7F00F8,0x3B7F875D,0x00000000 
+data4 0x18835FB9,0x9A43A511,0x0000BFE5,0x00000000
+LOCAL_OBJECT_END(Constants_log_80_Z_G_H_h1)
+
+LOCAL_OBJECT_START(Constants_log_80_Z_G_H_h2)
+// Z2 - 16 bit fixed, G2 and H2 IEEE single, h2 IEEE double
+data4 0x00008000,0x3F800000,0x00000000,0x00000000
+data4 0x00000000,0x00000000,0x00000000,0x00000000
+data4 0x00007F81,0x3F7F00F8,0x3B7F875D,0x00000000
 data4 0x211398BF,0xAD08B116,0x00003FDB,0x00000000
-data4 0x00007F02,0x3F7E03F8,0x3BFF015B,0x00000000 
-data4 0xC376958E,0xB106790F,0x00003FDE,0x00000000 
-data4 0x00007E85,0x3F7D08E0,0x3C3EE393,0x00000000 
-data4 0x79A7679A,0xFD03F242,0x0000BFDA,0x00000000 
-data4 0x00007E08,0x3F7C0FC0,0x3C7E0586,0x00000000 
-data4 0x05E7AE08,0xF03F81C3,0x0000BFDF,0x00000000 
-data4 0x00007D8D,0x3F7B1880,0x3C9E75D2,0x00000000 
+data4 0x00007F02,0x3F7E03F8,0x3BFF015B,0x00000000
+data4 0xC376958E,0xB106790F,0x00003FDE,0x00000000
+data4 0x00007E85,0x3F7D08E0,0x3C3EE393,0x00000000
+data4 0x79A7679A,0xFD03F242,0x0000BFDA,0x00000000
+data4 0x00007E08,0x3F7C0FC0,0x3C7E0586,0x00000000
+data4 0x05E7AE08,0xF03F81C3,0x0000BFDF,0x00000000
+data4 0x00007D8D,0x3F7B1880,0x3C9E75D2,0x00000000
 data4 0x049EB22F,0xD1B87D3C,0x00003FDE,0x00000000
-data4 0x00007D12,0x3F7A2328,0x3CBDC97A,0x00000000 
-data4 0x3A9E81E0,0xFABC8B95,0x00003FDF,0x00000000 
+data4 0x00007D12,0x3F7A2328,0x3CBDC97A,0x00000000
+data4 0x3A9E81E0,0xFABC8B95,0x00003FDF,0x00000000
 data4 0x00007C98,0x3F792FB0,0x3CDCFE47,0x00000000
-data4 0x7C4B5443,0xF5F3653F,0x00003FDF,0x00000000 
-data4 0x00007C20,0x3F783E08,0x3CFC15D0,0x00000000 
-data4 0xF65A1773,0xE78AB204,0x00003FE0,0x00000000  
-data4 0x00007BA8,0x3F774E38,0x3D0D874D,0x00000000 
-data4 0x7B8EF695,0xDB7CBFFF,0x0000BFE0,0x00000000 
-data4 0x00007B31,0x3F766038,0x3D1CF49B,0x00000000 
-data4 0xCF773FB3,0xC0241AEA,0x0000BFE0,0x00000000 
-data4 0x00007ABB,0x3F757400,0x3D2C531D,0x00000000 
-data4 0xC9539FDF,0xFC8F4D48,0x00003FE1,0x00000000  
-data4 0x00007A45,0x3F748988,0x3D3BA322,0x00000000 
-data4 0x954665C2,0x9CD035FB,0x0000BFE1,0x00000000  
-data4 0x000079D1,0x3F73A0D0,0x3D4AE46F,0x00000000 
-data4 0xDD367A30,0xEC9017C7,0x00003FE1,0x00000000 
-data4 0x0000795D,0x3F72B9D0,0x3D5A1756,0x00000000 
-data4 0xCB11189C,0xEE6625D3,0x0000BFE1,0x00000000 
-data4 0x000078EB,0x3F71D488,0x3D693B9D,0x00000000 
+data4 0x7C4B5443,0xF5F3653F,0x00003FDF,0x00000000
+data4 0x00007C20,0x3F783E08,0x3CFC15D0,0x00000000
+data4 0xF65A1773,0xE78AB204,0x00003FE0,0x00000000
+data4 0x00007BA8,0x3F774E38,0x3D0D874D,0x00000000
+data4 0x7B8EF695,0xDB7CBFFF,0x0000BFE0,0x00000000
+data4 0x00007B31,0x3F766038,0x3D1CF49B,0x00000000
+data4 0xCF773FB3,0xC0241AEA,0x0000BFE0,0x00000000
+data4 0x00007ABB,0x3F757400,0x3D2C531D,0x00000000
+data4 0xC9539FDF,0xFC8F4D48,0x00003FE1,0x00000000
+data4 0x00007A45,0x3F748988,0x3D3BA322,0x00000000
+data4 0x954665C2,0x9CD035FB,0x0000BFE1,0x00000000
+data4 0x000079D1,0x3F73A0D0,0x3D4AE46F,0x00000000
+data4 0xDD367A30,0xEC9017C7,0x00003FE1,0x00000000
+data4 0x0000795D,0x3F72B9D0,0x3D5A1756,0x00000000
+data4 0xCB11189C,0xEE6625D3,0x0000BFE1,0x00000000
+data4 0x000078EB,0x3F71D488,0x3D693B9D,0x00000000
 data4 0xBE11C424,0xA49C8DB5,0x0000BFE0,0x00000000
-ASM_SIZE_DIRECTIVE(Constants_log_80_Z_G_H_h2)
- 
-.align 64
-Constants_log_80_h3_G_H:
-ASM_TYPE_DIRECTIVE(Constants_log_80_h3_G_H,@object)
-// h3 IEEE double extended, H3 and G3 IEEE single   
-data4 0x112666B0,0xAAACAAB1,0x00003FD3,0x3F7FFC00 
+LOCAL_OBJECT_END(Constants_log_80_Z_G_H_h2)
+
+LOCAL_OBJECT_START(Constants_log_80_h3_G_H)
+// h3 IEEE double extended, H3 and G3 IEEE single
+data4 0x112666B0,0xAAACAAB1,0x00003FD3,0x3F7FFC00
 data4 0x9B7FAD21,0x90051030,0x00003FD8,0x3F7FF400
-data4 0xF4D783C4,0xA6B46F46,0x00003FDA,0x3F7FEC00 
-data4 0x11C6DDCA,0xDA148D88,0x0000BFD8,0x3F7FE400 
+data4 0xF4D783C4,0xA6B46F46,0x00003FDA,0x3F7FEC00
+data4 0x11C6DDCA,0xDA148D88,0x0000BFD8,0x3F7FE400
 data4 0xCA964D95,0xCE65C1D8,0x0000BFD8,0x3F7FDC00
-data4 0x23412D13,0x883838EE,0x0000BFDB,0x3F7FD400 
-data4 0x983ED687,0xB7E5CFA1,0x00003FDB,0x3F7FCC08 
-data4 0xE3C3930B,0xDBE23B16,0x0000BFD9,0x3F7FC408 
-data4 0x48AA4DFC,0x9B92F1FC,0x0000BFDC,0x3F7FBC10 
-data4 0xCE9C8F7E,0x9A8CEB15,0x0000BFD9,0x3F7FB410 
-data4 0x0DECE74A,0x8C220879,0x00003FDC,0x3F7FAC18 
+data4 0x23412D13,0x883838EE,0x0000BFDB,0x3F7FD400
+data4 0x983ED687,0xB7E5CFA1,0x00003FDB,0x3F7FCC08
+data4 0xE3C3930B,0xDBE23B16,0x0000BFD9,0x3F7FC408
+data4 0x48AA4DFC,0x9B92F1FC,0x0000BFDC,0x3F7FBC10
+data4 0xCE9C8F7E,0x9A8CEB15,0x0000BFD9,0x3F7FB410
+data4 0x0DECE74A,0x8C220879,0x00003FDC,0x3F7FAC18
 data4 0x2F053150,0xB25CA912,0x0000BFDA,0x3F7FA420
-data4 0xD9A5BE20,0xA5876555,0x00003FDB,0x3F7F9C20 
-data4 0x2053F087,0xC919BB6E,0x00003FD9,0x3F7F9428 
-data4 0x041E9A77,0xB70BDA79,0x00003FDC,0x3F7F8C30 
-data4 0xEA1C9C30,0xF18A5C08,0x00003FDA,0x3F7F8438 
-data4 0x796D89E5,0xA3790D84,0x0000BFDD,0x3F7F7C40 
-data4 0xA2915A3A,0xE1852369,0x0000BFDD,0x3F7F7448 
-data4 0xA39ED868,0xD803858F,0x00003FDC,0x3F7F6C50 
-data4 0x9417EBB7,0xB2EEE356,0x0000BFDD,0x3F7F6458 
-data4 0x9BB0D07F,0xED5C1F8A,0x0000BFDC,0x3F7F5C68 
-data4 0xE87C740A,0xD6D201A0,0x0000BFDD,0x3F7F5470 
-data4 0x1CA74025,0xE8DEBF5E,0x00003FDC,0x3F7F4C78 
+data4 0xD9A5BE20,0xA5876555,0x00003FDB,0x3F7F9C20
+data4 0x2053F087,0xC919BB6E,0x00003FD9,0x3F7F9428
+data4 0x041E9A77,0xB70BDA79,0x00003FDC,0x3F7F8C30
+data4 0xEA1C9C30,0xF18A5C08,0x00003FDA,0x3F7F8438
+data4 0x796D89E5,0xA3790D84,0x0000BFDD,0x3F7F7C40
+data4 0xA2915A3A,0xE1852369,0x0000BFDD,0x3F7F7448
+data4 0xA39ED868,0xD803858F,0x00003FDC,0x3F7F6C50
+data4 0x9417EBB7,0xB2EEE356,0x0000BFDD,0x3F7F6458
+data4 0x9BB0D07F,0xED5C1F8A,0x0000BFDC,0x3F7F5C68
+data4 0xE87C740A,0xD6D201A0,0x0000BFDD,0x3F7F5470
+data4 0x1CA74025,0xE8DEBF5E,0x00003FDC,0x3F7F4C78
 data4 0x1F34A7EB,0x9A995A97,0x0000BFDC,0x3F7F4488
-data4 0x359EED97,0x9CB0F742,0x0000BFDA,0x3F7F3C90 
-data4 0xBBC6A1C8,0xD6F833C2,0x0000BFDD,0x3F7F34A0 
-data4 0xE71090EC,0xE1F68F2A,0x00003FDC,0x3F7F2CA8 
-data4 0xC160A74F,0xD1881CF1,0x0000BFDB,0x3F7F24B8 
-data4 0xD78CB5A4,0x9AD05AE2,0x00003FD6,0x3F7F1CC8 
-data4 0x9A77DC4B,0xE658CB8E,0x0000BFDD,0x3F7F14D8 
-data4 0x6BD6D312,0xBA281296,0x00003FDC,0x3F7F0CE0 
-data4 0xF95210D0,0xB478BBEB,0x0000BFDB,0x3F7F04F0 
-data4 0x38800100,0x39400480,0x39A00640,0x39E00C41 // H's start here 
-data4 0x3A100A21,0x3A300F22,0x3A4FF51C,0x3A6FFC1D 
+data4 0x359EED97,0x9CB0F742,0x0000BFDA,0x3F7F3C90
+data4 0xBBC6A1C8,0xD6F833C2,0x0000BFDD,0x3F7F34A0
+data4 0xE71090EC,0xE1F68F2A,0x00003FDC,0x3F7F2CA8
+data4 0xC160A74F,0xD1881CF1,0x0000BFDB,0x3F7F24B8
+data4 0xD78CB5A4,0x9AD05AE2,0x00003FD6,0x3F7F1CC8
+data4 0x9A77DC4B,0xE658CB8E,0x0000BFDD,0x3F7F14D8
+data4 0x6BD6D312,0xBA281296,0x00003FDC,0x3F7F0CE0
+data4 0xF95210D0,0xB478BBEB,0x0000BFDB,0x3F7F04F0
+data4 0x38800100,0x39400480,0x39A00640,0x39E00C41 // H's start here
+data4 0x3A100A21,0x3A300F22,0x3A4FF51C,0x3A6FFC1D
 data4 0x3A87F20B,0x3A97F68B,0x3AA7EB86,0x3AB7E101
-data4 0x3AC7E701,0x3AD7DD7B,0x3AE7D474,0x3AF7CBED 
-data4 0x3B03E1F3,0x3B0BDE2F,0x3B13DAAA,0x3B1BD766 
-data4 0x3B23CC5C,0x3B2BC997,0x3B33C711,0x3B3BBCC6 
-data4 0x3B43BAC0,0x3B4BB0F4,0x3B53AF6D,0x3B5BA620 
-data4 0x3B639D12,0x3B6B9444,0x3B7393BC,0x3B7B8B6D 
-ASM_SIZE_DIRECTIVE(Constants_log_80_h3_G_H)
-
-.align 64
-Constant_half:
-ASM_TYPE_DIRECTIVE(Constant_half,@object)
-data4 0x00000000,0x80000000,0x00003FFE
-ASM_SIZE_DIRECTIVE(Constant_half)
-
-GR_Expo_Range       = r32
-GR_Flag             = r33
+data4 0x3AC7E701,0x3AD7DD7B,0x3AE7D474,0x3AF7CBED
+data4 0x3B03E1F3,0x3B0BDE2F,0x3B13DAAA,0x3B1BD766
+data4 0x3B23CC5C,0x3B2BC997,0x3B33C711,0x3B3BBCC6
+data4 0x3B43BAC0,0x3B4BB0F4,0x3B53AF6D,0x3B5BA620
+data4 0x3B639D12,0x3B6B9444,0x3B7393BC,0x3B7B8B6D
+LOCAL_OBJECT_END(Constants_log_80_h3_G_H)
+
+GR_sig_inv_ln2      = r14
+GR_rshf_2to51       = r15
+GR_exp_2tom51       = r16
+GR_rshf             = r17
+GR_exp_half         = r18
+GR_sign_mask        = r19
+GR_exp_square_oflow = r20
+GR_exp_square_uflow = r21
+GR_exp_ynear1_oflow = r22
+GR_exp_ynear1_uflow = r23
+GR_signif_Z         = r24
+
+GR_signexp_x        = r32
+
+GR_exp_x            = r33
+
 GR_Table_Ptr        = r34
 
 GR_Table_Ptr1       = r35
-GR_BIAS             = r35
 
 GR_Index1           = r36
-GR_sign_mask        = r36
 
 GR_Index2           = r37
 GR_Expo_X           = r37
 
-GR_signif_Z         = r38
 GR_M                = r38
 
 GR_X_0              = r39
@@ -620,45 +613,49 @@ GR_k                = r44
 
 GR_Big_Pos_Exp      = r45
 
+GR_exp_pos_max      = r46
 
-GR_BIAS_p_k         = r47
-GR_BIASed_exp_y     = r47
+GR_exp_bias_p_k     = r47
 
-GR_Big_Neg_Exp      = r48
 GR_Index3           = r48
 GR_temp             = r48
 
 GR_vsm_expo         = r49
-GR_y_sign           = r49
 
 GR_T1_ptr           = r50
+GR_P_ptr1           = r50
 GR_T2_ptr           = r51
+GR_P_ptr2           = r51
 GR_N_fix            = r52
 GR_exp_y            = r53
 GR_signif_y         = r54
-GR_exp_and_sign_y   = r55
+GR_signexp_y        = r55
+GR_fraction_y       = r55
 GR_low_order_bit    = r56
-GR_get_exp_mask     = r57
-GR_exponent_zero    = r58
-
-// ** Registers for unwind support
+GR_exp_mask         = r57
+GR_exp_bias         = r58
+GR_y_sign           = r59
+GR_table_base       = r60
+GR_ptr_exp_Arg      = r61
+GR_Delta_Exp        = r62
+GR_Special_Exp      = r63
+GR_exp_neg_max      = r64
+GR_Big_Neg_Exp      = r65
+
+//** Registers for unwind support
 
 GR_SAVE_PFS         = r59
 GR_SAVE_B0          = r60
 GR_SAVE_GP          = r61
-GR_Parameter_X      = r62 
-GR_Parameter_Y      = r63 
-GR_Parameter_RESULT = r64 
-GR_Parameter_TAG    = r65 
-
-FR_X      = f8
-FR_Y      = f9
-FR_RESULT = f99
+GR_Parameter_X      = r62
+GR_Parameter_Y      = r63
+GR_Parameter_RESULT = r64
+GR_Parameter_TAG    = r65
 
-// **
+//**
 
 FR_Input_X          = f8
-FR_Output           = f8
+FR_Result           = f8
 FR_Input_Y          = f9
 
 FR_Neg              = f10
@@ -671,7 +668,6 @@ FR_poly_hi          = f11
 
 FR_Sgn              = f12
 
-FR_Neg_X            = f13
 FR_half_W           = f13
 
 FR_X_cor            = f14
@@ -698,13 +694,11 @@ FR_Scale            = f36
 FR_G_1              = f37
 FR_G                = f37
 FR_Wsq              = f37
-FR_L_Inv            = f37
 FR_temp             = f37
 
 FR_H_1              = f38
 FR_H                = f38
 FR_W4               = f38
-FR_float_N          = f38
 
 FR_h                = f39
 FR_h_1              = f39
@@ -720,9 +714,7 @@ FR_L_lo             = f41
 FR_A_1              = f41
 
 FR_h_2              = f42
-FR_P_6              = f42
 
-FR_abs_W            = f43
 FR_W1               = f43
 
 FR_G_3              = f44
@@ -740,7 +732,6 @@ FR_H_3              = f47
 
 FR_float_N          = f48
 
-FR_P_4              = f49
 FR_A_2              = f49
 
 FR_Q_4              = f50
@@ -768,7 +759,6 @@ FR_Two              = f56
 FR_Big              = f57
 
 FR_neg_2_mK         = f58
-FR_NBig             = f58
 
 FR_r                = f59
 
@@ -777,1652 +767,1253 @@ FR_poly_lo          = f60
 FR_poly             = f61
 
 FR_P_5              = f62
+FR_Result_small     = f62
 
 FR_rsq              = f63
 
-FR_Result           = f99
-FR_Result_small     = f100
-FR_Result_big       = f101
+FR_Delta            = f64
 
-.section .text
-.proc powl#
-.global powl#
-.align 64 
+FR_save_Input_X     = f65
+FR_norm_X           = f66
+FR_norm_Y           = f67
+FR_Y_lo_2           = f68
 
-powl: 
-{ .mfi
-alloc  GR_Expo_Range = ar.pfs,0,30,4,0
-(p0)   fclass.m.unc p7, p13 =  FR_Input_Y, 0x1E7 
-nop.i 0
-}
-{ .mfi
-(p0)   getf.exp GR_exp_and_sign_y = FR_Input_Y 
+FR_P_6              = f69
+FR_Result_big       = f69
+
+FR_RSHF_2TO51       = f70
+FR_INV_LN2_2TO63    = f71
+FR_2TOM51           = f72
+FR_RSHF             = f73
+FR_TMP1             = f74
+FR_TMP2             = f75
+FR_TMP3             = f76
+FR_Tscale           = f77
+FR_P_4              = f78
+FR_NBig             = f79
+
+
+.section .text
+GLOBAL_LIBM_ENTRY(powl)
 //
-//     Save State
+//     Get significand of x.  It is the critical path.
 //
-(p0)   fclass.m.unc p6, p12 =  FR_Input_X, 0x1E7 
-nop.i 0
-};;
 { .mfi
-(p0)   getf.sig GR_signif_y = FR_Input_Y 
-(p0)   fcmp.eq.unc.s1 p12, p13 =  FR_Input_X, f1 
-nop.i 0
+      getf.sig GR_signif_Z = FR_Input_X    // Get significand of x
+      fclass.m p11, p12 = FR_Input_X, 0x0b // Test x unorm
+      nop.i 999
 }
 { .mfi
-	nop.m 999
-//
-//     Check for y = 1 
-//     Identify EM unsupporteds.
-//     Load FR_half = .5
-//
-(p0)   fadd.s1 FR_Two = f1, f1 
-//
-//     Load 1/2 in GP register
-//
-nop.i 0 
+      nop.m 999
+      fnorm.s1 FR_norm_X = FR_Input_X      // Normalize x
+      mov GR_exp_half = 0xffff - 1         // Exponent for 0.5
 }
 ;;
 
-{ .mmi
-      nop.m 999
-(p0)  addl           GR_Table_Ptr   = @ltoff(Constant_half#), gp
-      nop.i 999
+{ .mfi
+      alloc  r32 = ar.pfs,0,30,4,0
+      fclass.m p7, p0 =  FR_Input_Y, 0x1E7 // Test y natval, nan, inf, zero
+      mov GR_exp_pos_max = 0x13fff         // Max exponent for pos oflow test
+}
+{ .mfi
+      addl GR_table_base = @ltoff(Constants_exp_64_Arg#), gp // Ptr to tables
+      fnorm.s1 FR_norm_Y = FR_Input_Y      // Normalize y
+      mov GR_exp_neg_max = 0x33fff         // Max exponent for neg oflow test
 }
 ;;
 
-{ .mmi
-      ld8 GR_Table_Ptr = [GR_Table_Ptr]
-      nop.m 999
-      nop.i 999
+{ .mfi
+      getf.exp GR_signexp_y = FR_Input_Y   // Get sign and exp of y
+(p12) fclass.m p11, p0 =  FR_Input_Y, 0x0b // Test y unorm
+      mov GR_sign_mask = 0x20000           // Sign mask
+}
+{ .mfi
+      ld8 GR_table_base = [GR_table_base]  // Get base address for tables
+      fadd.s1 FR_Two = f1, f1              // Form 2.0 for square test
+      mov GR_exp_mask = 0x1FFFF            // Exponent mask
 }
 ;;
 
-{ .mlx
-(p0)   ldfe FR_Half =[GR_Table_Ptr],0
-(p0)   movl GR_get_exp_mask = 0x1FFFF ;; 
+{ .mfi
+      getf.sig GR_signif_y = FR_Input_Y    // Get significand of y
+      fclass.m p6, p0 =  FR_Input_X, 0x1E7 // Test x natval, nan, inf, zero
+      nop.i 999
 }
+;;
 
 { .mfi
-	nop.m 999
-(p0)   fclass.nm.unc p9, p15 =  FR_Input_Y, 0x1FF 
-//
-//     Create FR_Two = 2
-//     Get exp and significand of Y
-//     Crate Masks
-//     sgn = 1
-//
-(p0)   and GR_exp_y = GR_get_exp_mask,GR_exp_and_sign_y
+      getf.exp GR_signexp_x = FR_Input_X   // Get signexp of x
+      fmerge.s FR_save_Input_X = FR_Input_X, FR_Input_X
+      extr.u GR_Index1 = GR_signif_Z, 59, 4  // Extract upper 4 signif bits of x
 }
-{ .mlx
-	nop.m 999
-(p0)   movl GR_exponent_zero = 0xFFFF ;; 
+{ .mfb
+      setf.exp FR_Half = GR_exp_half       // Load half
+      nop.f 999
+(p11) br.cond.spnt  POWL_DENORM            // Branch if x or y denorm/unorm
 }
+;;
+
+// Return here from POWL_DENORM
+POWL_COMMON:
 { .mfi
-	nop.m 999
-(p0)   mov FR_Sgn = f1 
-	nop.i 999
+      setf.exp FR_Big = GR_exp_pos_max     // Form big pos value for oflow test
+      fclass.nm p11, p0 = FR_Input_Y, 0x1FF // Test Y unsupported
+      shl GR_Index1 = GR_Index1,5          // Adjust index1 pointer x 32
 }
 { .mfi
-	nop.m 999
-(p0)   fcmp.eq.unc.s1 p10, p11 =  FR_Input_Y, f1 
-	nop.i 999 ;;
+      add GR_Table_Ptr = 0x7c0, GR_table_base // Constants_log_80_Z_G_H_h1
+      fma.s1 FR_Sgn = f1,f1,f0             // Assume result positive
+      mov GR_exp_bias = 0xFFFF             // Form exponent bias
 }
-{ .mfb
-	nop.m 999
+;;
+
 //
 //     Identify NatVals, NaNs, Infs, and Zeros.
-//     Load Half
 //
-(p0)   fclass.nm.unc p8, p14 =  FR_Input_X, 0x1FF 
-//      
-//     Remove sign bit from exponent of y.
-//     Check for x = 1 
-//
-(p6)   br.cond.spnt L(POWL_64_SPECIAL) ;; 
-}
-{ .mib
-	nop.m 999
-	nop.i 999
-(p7)   br.cond.spnt L(POWL_64_SPECIAL) ;; 
-}
-{ .mib
-	nop.m 999
-	nop.i 999
-(p8)   br.cond.spnt L(POWL_64_UNSUPPORT) ;; 
-}
-{ .mib
-	nop.m 999
-	nop.i 999
-(p9)   br.cond.spnt L(POWL_64_UNSUPPORT) ;; 
-}
-{ .mfi
-(p0)   cmp.lt.unc  p9, p0 = GR_exp_y,GR_exponent_zero 
-(p0)   fcmp.lt.unc.s1 p6, p13  =  FR_Input_X, f0 
 //
+//     Remove sign bit from exponent of y.
+//     Check for x = 1
 //     Branch on Infs, Nans, Zeros, and Natvals
 //     Check to see that exponent < 0
 //
-(p0)   sub GR_exp_y = GR_exp_y,GR_exponent_zero
-}
-//     x not zero, is y ==2? 
 { .mfi
-	nop.m 999
-(p11)  fcmp.eq.unc.s1 p7, p14 =  FR_Input_Y, FR_Two 
-	nop.i 999 ;;
+      setf.exp FR_NBig = GR_exp_neg_max    // Form big neg value for oflow test
+      fclass.nm p8, p0 =  FR_Input_X, 0x1FF  // Test X unsupported
+      and GR_exp_y = GR_exp_mask,GR_signexp_y // Get biased exponent of y
 }
 { .mfb
-	nop.m 999
-(p9)   fcmp.lt.unc.s1 p9, p0   =  FR_Input_X, f0 
-(p7)   br.cond.spnt L(POWL_64_SQUARE) ;;   // Branch if x not zero and y=2
+      add GR_Index1 = GR_Index1,GR_Table_Ptr
+      nop.f 999
+(p6)  br.cond.spnt POWL_64_SPECIAL         // Branch if x natval, nan, inf, zero
 }
-{ .mfi
-	nop.m 999
-(p6)   fmerge.ns FR_Neg_X = FR_Input_X, FR_Input_X 
-	nop.i 999 ;;
-}
-{ .mfi
-	nop.m 999
-(p10)  fmpy.s0 FR_Result = FR_Input_X, f1 
-//
-//     For y = 1, compute result = x 
-//     For x = 1, compute 1 
-//     When Y is one return X and possible raise 
-//     denormal operand exception.
-//     Remove exponent BIAS
+;;
+
+//     load Z_1 from Index1
+
+// There is logic starting here to determine if y is an integer when x < 0.
+// If 0 < |y| < 1 then clearly y is not an integer.
+// If |y| > 1, then the significand of y is shifted left by the size of
+//    the exponent of y.  This preserves the lsb of the integer part + the
+//    fractional bits.  The lsb of the integer can be tested to determine if
+//    the integer is even or odd.  The fractional bits can be tested.  If zero,
+//    then y is an integer.
 //
-(p6)   shl GR_exp_and_sign_y=  GR_signif_y,GR_exp_y ;; 
-}
 { .mfi
-(p9)   or  GR_exp_and_sign_y = 0xF,GR_signif_y 
-(p12)  fma.s0 FR_Result = FR_Input_Y, f0, f1 
-	nop.i 999 ;;
+      ld2 GR_Z_1 =[GR_Index1],4            // Load Z_1
+      fmerge.s FR_Z = f0, FR_norm_X        // Z = |x|
+      extr.u GR_X_0 = GR_signif_Z, 49, 15  // Extract X_0 from significand
 }
-{ .mii
-	nop.m 999
-(p6)   extr.u GR_exp_y = GR_exp_and_sign_y,63,1 ;; 
-(p6)   cmp.ne.unc  p9, p0 =  GR_exp_y, r0 
+{ .mfb
+      cmp.lt p9, p0 = GR_exp_y,GR_exp_bias // Test 0 < |y| < 1
+      nop.f 999
+(p7)  br.cond.spnt POWL_64_SPECIAL         // Branch if y natval, nan, inf, zero
 }
-{ .mii
-	nop.m 999
-//
-//     Both predicates can be set. 
-//     Don't consider  y's < 1.
-//
-(p6)   shl GR_signif_y=  GR_exp_and_sign_y,1 ;; 
-//
-//     Is shift off integer part of y.
-//     Get y's  even or odd bit.
-//
-(p6)   cmp.ne.unc  p8, p0  =  GR_signif_y, r0 
+;;
+
+{ .mfb
+      ldfs  FR_G_1 = [GR_Index1],4         // Load G_1
+      fcmp.eq.s1 p10, p0 =  FR_Input_Y, f1 // Test Y = +1.0
+(p8)  br.cond.spnt POWL_64_UNSUPPORT       // Branch if x unsupported
 }
-{ .mib
-	nop.m 999
-	nop.i 999
+;;
+
 //
-//     Is the fractional part of the y = 0?
-//     Is the integer even or odd. 
+//     X_0  = High order 15 bit of Z
 //
-(p10)  br.cond.spnt L(POWL_64_RETURN) ;; 
-}
-{ .mib
-	nop.m 999
-	nop.i 999
-(p12)  br.cond.spnt L(POWL_64_RETURN) ;; 
-}
-{ .mib
-	nop.m 999
-	nop.i 999
-(p8)   br.cond.spnt L(POWL_64_XNEG) ;;
+{ .mfb
+      ldfs  FR_H_1 = [GR_Index1],8             // Load H_1
+(p9)  fcmp.lt.unc.s1 p9, p0 = FR_Input_X, f0   // Test x<0, 0 <|y|<1
+(p11) br.cond.spnt POWL_64_UNSUPPORT           // Branch if y unsupported
 }
+;;
+
 { .mfi
-	nop.m 999
-(p9)   fmerge.ns FR_Sgn = FR_Sgn, FR_Sgn 
-	nop.i 999
+      ldfe FR_h_1 = [GR_Index1]                // Load h_1
+      fcmp.eq.s1 p7, p0 =  FR_Input_Y, FR_Two  // Test y = 2.0
+      pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15     // X_1 = X_0 * Z_1 (bits 15-30)
+                                               // Wait 4 cycles to use result
 }
 { .mfi
-	nop.m 999
-(p0)   fcmp.eq.unc.s0 p11, p0 =  FR_Input_Y, FR_Half 
-	nop.i 999 ;;
+      add GR_Table_Ptr = 0x9c0, GR_table_base  // Constants_log_80_Z_G_H_h2
+      nop.f 999
+      sub GR_exp_y = GR_exp_y,GR_exp_bias      // Get true exponent of y
 }
+;;
+
 //
-//     Raise possible denormal operand exception for both
-//     X and Y.
+//      Branch for (x < 0) and Y not an integer.
 //
 { .mfb
-	nop.m 999
-//
-//     Branch for (x < 0) and Y not an integer.
-//
-(p0)   fcmp.eq.unc.s0 p12, p0 =  FR_Input_X, f1 
-//
-//     For x < 0 and y integer, make x positive 
-//     For x < 0 and y odd integer,, set sign = -1.
-//
-(p11)  br.cond.spnt L(POWL_64_SQRT) ;; 
-}
-{ .mmf
-(p0)   cmp.eq.unc  p15, p14 =  r0, r0 
-	nop.m 999
-(p13)  fnorm.s1 FR_Z = FR_Input_X ;; 
-}
-{ .mfi
-	nop.m 999
-(p6)   fnorm.s1 FR_Z = FR_Neg_X 
-	nop.i 999
+      nop.m 999
+      fcmp.lt.s1 p6, p0  =  FR_Input_X, f0     // Test x < 0
+(p9)  br.cond.spnt POWL_64_XNEG                // Branch if x < 0, 0 < |y| < 1
 }
 ;;
 
-//
-//     Branch to embedded sqrt(x)
-//
-//
-//     Computes ln( x ) to extra precision 
-//     Input  FR 1: FR_X            
-//     Output FR 2: FR_Y_hi  
-//     Output FR 3: FR_Y_lo  
-//     Output PR 1: PR_Safe  
-//
-
-{ .mmi
+{ .mfi
       nop.m 999
-(p0)  addl           GR_Table_Ptr   = @ltoff(Constants_log_80_Z_G_H_h1#), gp
+      fcmp.eq.s1 p12, p0 =  FR_Input_X, f1     // Test x=+1.0
       nop.i 999
 }
+{ .mfb
+      nop.m 999
+      fsub.s1 FR_W = FR_Z, f1                  // W = Z - 1
+(p7)  br.cond.spnt POWL_64_SQUARE              // Branch if y=2
+}
 ;;
 
-{ .mmi
-      ld8 GR_Table_Ptr = [GR_Table_Ptr]
+{ .mfi
       nop.m 999
-      nop.i 999
+(p10) fmpy.s0 FR_Result = FR_Input_X, f1       // If y=+1.0, result=x
+(p6)  shl GR_fraction_y=  GR_signif_y,GR_exp_y // Get lsb of int + fraction
+                                               // Wait 4 cycles to use result
 }
 ;;
 
-
-{ .mlx
-	nop.m 999
-(p0)   movl GR_BIAS = 0x000000000000FFFF ;; 
-}
 { .mfi
-	nop.m 999
-(p0)   fsub.s1 FR_W = FR_Z, f1 
-	nop.i 999 ;;
-}
-//     
-//     Z = Norm(X) - both + and - case 
-//     Set Safe = True
-//     
-{ .mmb
-(p0)   getf.sig GR_signif_Z = FR_Z 
-(p0)   getf.exp GR_N =  FR_Z 
-	nop.b 999 ;;
-}
-{ .mii
-	nop.m 999
-//     
-//     Get significand of Z 
-//     W = Z - 1
-//     
-(p0)   extr.u GR_Index1 = GR_signif_Z, 59, 4 ;;  
-//     
-//     Index1 = High order 4 bits of Z
-//     X_0  = High order 15 bit of Z 
-//
-(p0)   shl GR_Index1 = GR_Index1,5 ;; 
-}
-{ .mfi
-	nop.m 999
-//     
-//     Add offset to Index1 ptr.
-//     
-(p0)   fabs FR_abs_W =  FR_W 
-//     
-//     BIAS = 0x000...FFFF
-//     Adjust Index1 ptr ( x 32) .
-//     
-(p0)   add GR_Index1 = GR_Index1,GR_Table_Ptr  
+      nop.m 999
+(p12) fma.s0 FR_Result = FR_Input_Y, f0, f1    // If x=1.0, result=1, chk denorm
+      extr.u GR_Index2 = GR_X_1, 6, 4          // Extract index2
 }
-{ .mmi
-	nop.m 999 ;;
-(p0)   ld2 GR_Z_1 =[GR_Index1],4
-(p0)   extr.u GR_X_0 = GR_signif_Z, 49, 15  
+;;
+
+//
+//     N = exponent of Z
+//
+{ .mib
+      getf.exp GR_N =  FR_Z                    // Get exponent of Z (also x)
+      shl GR_Index2=GR_Index2,5                // Index2  x 32 bytes
+(p10) br.ret.spnt  b0                          // Exit if y=+1.0
 }
 ;;
 
-{ .mmi
-      nop.m 999
-(p0)  addl           GR_Table_Ptr   = @ltoff(Constants_log_80_Z_G_H_h2#), gp
+{ .mib
+      add GR_Index2 = GR_Index2, GR_Table_Ptr  // Pointer to table 2
       nop.i 999
+(p12) br.ret.spnt  b0                          // Exit if x=+1.0
 }
 ;;
 
 { .mmi
-      ld8 GR_Table_Ptr = [GR_Table_Ptr]
-      nop.m 999
+      ld2 GR_Z_2 =[GR_Index2],4                // Load Z_2
+;;
+      ldfs  FR_G_2 = [GR_Index2],4             // Load G_2
       nop.i 999
 }
 ;;
 
-
-{ .mmi
-(p0)   ldfs  FR_G_1 = [GR_Index1],4 ;; 
-(p0)   ldfs  FR_H_1 = [GR_Index1],8 
-	nop.i 999 ;;
+{ .mii
+      ldfs  FR_H_2 = [GR_Index2],8             // Load H_2
+(p6)  tbit.nz.unc p9, p0 = GR_fraction_y, 63   // Test x<0 and y odd integer
+      add GR_Table_Ptr = 0xbcc, GR_table_base  // Constants_log_80_h3_G_H, G_3
 }
+;;
+
 //
-//     Adjust Index2 (x 32). 
+//      For x < 0 and y odd integer,, set sign = -1.
 //
 { .mfi
-(p0)   ldfe FR_h_1 = [GR_Index1],0 
-	nop.f 999
-(p0)   pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15 ;; 
-}
-{ .mmi
-	nop.m 999 ;;
-//
-//     load Z_1 from Index1
-//     abs_W = |W|
-//     Point to Table2 
-//
-(p0)   getf.exp GR_M = FR_abs_W 
-//
-//     M = M - BIAS 
-//     Load G_1 
-//     N = exponent of Z 
-//
-       nop.i 999;;
+      getf.exp GR_M = FR_W                      // Get signexp of W
+      nop.f 999
+      pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15      // X_2 = X_1 * Z_2 (bits 15-30)
 }
-{ .mmi
-       nop.m 999
-       nop.m 999
-       nop.i 999;;
+{ .mfi
+      ldfe FR_h_2 = [GR_Index2]                // Load h_2
+(p9)  fnma.s1 FR_Sgn = f1, f1, f0          // If x<0, y odd int, result negative
+      sub GR_N = GR_N, GR_exp_bias             // Get true exponent of x = N
 }
-{ .mmi
-       nop.m 999
-       nop.m 999
-       nop.i 999;;
+;;
+
+{ .mfi
+      add GR_Table_Ptr1 = 0xdc0, GR_table_base // Ptr to H_3
+      fcmp.eq.s0 p11, p0 = FR_Input_Y, FR_Half // Test y=0.5, also set denorm
+(p6)  shl GR_fraction_y=  GR_fraction_y, 1     // Shift left 1 to get fraction
 }
-{ .mmi
-        nop.m 999
-        nop.m 999
-(p0)   extr.u GR_Index2 = GR_X_1, 6, 4 ;;  
+;;
+
+{ .mmb
+      setf.sig FR_float_N = GR_N
+(p6)  cmp.ne.unc p8, p0 = GR_fraction_y, r0    // Test x<0 and y not integer
+(p8)  br.cond.spnt POWL_64_XNEG                // Branch if x<0 and y not int
 }
-{ .mii
-	nop.m 999
-//     
-//     Extract Index2 
-//     Load H_1
-//     Is -8 > M ?
+;;
+
 //
-(p0)   shl GR_Index2=GR_Index2,5 ;; 
-(p0)   add GR_Index2 = GR_Index2, GR_Table_Ptr  
-}
+//      Raise possible denormal operand exception for both X and Y.
+//      Set pointers in case |x| near 1
+//      Branch to embedded sqrt(x) if y=0.5
 //
-//     M = exponent of abs_W
-//     X_1 = X_0 * Z_1 
-//     
-{ .mii
-(p0)   sub GR_M = GR_M, GR_BIAS  
-	nop.i 999 ;;
-(p0)   cmp.gt.unc  p7, p14 =  -8, GR_M 
+{ .mfi
+      add GR_P_ptr1 = 0x6b0, GR_table_base // Constants_log_80_P, P8, NEAR path
+      fcmp.eq.s0 p12, p0 =  FR_Input_X, FR_Input_Y // Dummy to set denormal
+      add GR_P_ptr2 = 0x700, GR_table_base // Constants_log_80_P, P4, NEAR path
 }
-{ .mib
-	nop.m 999
-	nop.i 999
-(p7)   br.cond.spnt L(LOGL80_NEAR) ;; 
+{ .mfb
+      cmp.eq p15, p14 =  r0, r0            // Assume result safe (no over/under)
+      fsub.s1  FR_Delta = FR_Input_Y,f1    // Delta = y - 1.0
+(p11) br.cond.spnt POWL_64_SQRT            // Branch if y=0.5
 }
+;;
+
 //
-//     Load h_1
-//     Possible branch out.  
-//     Add offset of table to Index2 
+//     Computes ln( x ) to extra precision
+//     Input  FR 1: FR_X
+//     Output FR 2: FR_Y_hi
+//     Output FR 3: FR_Y_lo
+//     Output PR 1: PR_Safe
 //
 { .mfi
-(p0)   ld2 GR_Z_2 =[GR_Index2],4
-(p0)   fmerge.se FR_S =  f1,FR_Z
-(p0)   sub GR_N = GR_N, GR_BIAS  
+      and GR_M = GR_exp_mask, GR_M            // Mask to get exponent of W
+      nop.f 999
+      extr.u GR_Index3 = GR_X_2, 1, 5         // Get index3
 }
 ;;
 
 { .mmi
-      nop.m 999
-(p0)  addl           GR_Table_Ptr   = @ltoff(Constants_log_80_h3_G_H#), gp
-      nop.i 999
+      shladd GR_Table_Ptr1 = GR_Index3,2,GR_Table_Ptr1 // Ptr to H_3
+      shladd GR_Index3 = GR_Index3,4,GR_Table_Ptr      // Ptr to G_3
+      sub GR_M = GR_M, GR_exp_bias            // Get true exponent of W
 }
 ;;
 
-{ .mmi
-      ld8 GR_Table_Ptr = [GR_Table_Ptr]
-      nop.m 999
-      nop.i 999
+{ .mib
+      ldfs FR_G_3 = [GR_Index3],-12           // Load G_3
+      cmp.gt  p7, p14 =  -8, GR_M             // Test if |x-1| < 2^-8
+(p7)  br.cond.spnt LOGL80_NEAR                // Branch if |x-1| < 2^-8
 }
 ;;
 
-//     
-//     load Z_2 
-//     N - BIAS 
-//     Point to Table 3.
-//     S = merging of Z and 1.0
-//     
-{ .mmi
-(p0)   ldfs  FR_G_2 = [GR_Index2],4 
-(p0)   setf.sig FR_float_N = GR_N 
-(p0)   add GR_Table_Ptr1 = 0x200,GR_Table_Ptr ;;  
-}
-//
-//     load G_2 
-//     X_2 = X_1 * Z_2 
-//     Add offset to Table 2 ptr.
-//     float_N = significand of N
-//
-{ .mmi
-(p0)   ldfs  FR_H_2 = [GR_Index2],8 ;; 
-//
-//     load H_2 
-//     G = G * G_2
-//
-(p0)   ldfe FR_h_2 = [GR_Index2],0 
-(p0)   pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 ;; 
-}
-{ .mmi
-       nop.m 999
-       nop.m 999
-       nop.i 999;;
-}
-{ .mmi
-       nop.m 999
-       nop.m 999
-       nop.i 999;;
-}
-{ .mmi
-        nop.m 999
-        nop.m 999
-        nop.i 999;;
+// Here if |x-1| >= 2^-8
+{ .mmf
+      ldfs FR_H_3 = [GR_Table_Ptr1]           // Load H_3
+      nop.m 999
+      nop.f 999
 }
-{ .mii
-	nop.m 999
-	nop.i 999 ;;
-(p0)   extr.u GR_Index3 = GR_X_2, 1, 5 ;;  
+;;
+
+{ .mfi
+      ldfe FR_h_3 = [GR_Index3]               // Load h_3
+      fmerge.se FR_S =  f1,FR_Z               // S = merge of 1.0 and signif(Z)
+      nop.i 999
 }
 { .mfi
-(p0)   shladd GR_Table_Ptr1 = GR_Index3,2,GR_Table_Ptr1 
-	nop.f 999
+      add GR_Table_Ptr = 0x740, GR_table_base // Constants_log_80_Q
+      fmpy.s1 FR_G = FR_G_1, FR_G_2           // G = G_1 * G_2
+      nop.i 999
+}
+;;
+
 //
-//     h = h_1 + h_2  
-//     Adjust Index3 
+//     Begin Loading Q's -  load log2_hi part
 //
-(p0)   shladd GR_Index3 = GR_Index3,4,GR_Table_Ptr ;; 
-}
-{ .mmb
-	nop.m 999
-(p0)   ldfe FR_h_3 = [GR_Index3],12 
-	nop.b 999 ;;
-}
-{ .mmf
-(p0)   ldfs FR_H_3 = [GR_Table_Ptr1],0 
+{ .mfi
+      ldfe FR_log2_hi = [GR_Table_Ptr],16     // Load log2_hi
+      fadd.s1 FR_H = FR_H_1, FR_H_2           // H = H_1 + H_2
+      nop.i 999
+};;
+
 //
-//     float_N = Make N a fp number
-//     Load h_3
-//     Get pointer to Q table.     
+//     h = h_1 + h_2
 //
-(p0)   ldfs  FR_G_3 = [GR_Index3],0 
-(p0)   fmpy.s1 FR_G = FR_G_1, FR_G_2 
+{ .mfi
+      ldfe FR_log2_lo = [GR_Table_Ptr],16     // Load log2_lo
+      fadd.s1 FR_h = FR_h_1, FR_h_2           // h = h_1 + h_2
+      nop.i 999
 }
 ;;
 
-{ .mmi
-      nop.m 999
-(p0)  addl           GR_Table_Ptr   = @ltoff(Constants_log_80_Q#), gp
+{ .mfi
+      ldfe FR_Q_6 = [GR_Table_Ptr],16         // Load Q_6
+      fcvt.xf FR_float_N = FR_float_N
       nop.i 999
 }
 ;;
 
-{ .mmi
-      ld8 GR_Table_Ptr = [GR_Table_Ptr]
-      nop.m 999
+{ .mfi
+      ldfe FR_Q_5 = [GR_Table_Ptr],16         // Load Q_5
+      nop.f 999
       nop.i 999
 }
 ;;
 
-
-
-{ .mfi
-(p0)   ldfe FR_log2_hi = [GR_Table_Ptr],16
-(p0)   fadd.s1 FR_H = FR_H_1, FR_H_2 
-	nop.i 999 ;;
-}
-{ .mmf
-	nop.m 999
-//
-//     G = G_1 * G_2 * G_3 
-//
-(p0)   ldfe FR_log2_lo = [GR_Table_Ptr],16 
-//
-//     load h_2 
-//     H = H_1 + H_2 
-//     Get Index3
 //
-(p0)   fadd.s1 FR_h = FR_h_1, FR_h_2 ;; 
-}
-//
-//     Load log2_lo part
-//     r = G*S -1
+//     G = G_1 * G_2 * G_3
 //
 { .mfi
-(p0)   ldfe FR_Q_6 = [GR_Table_Ptr],16 
-//
-//     Load H_3
-//
-(p0)   fcvt.xf FR_float_N = FR_float_N 
-	nop.i 999 ;;
+      ldfe FR_Q_4 = [GR_Table_Ptr],16         // Load Q_4
+      fmpy.s1 FR_G = FR_G, FR_G_3
+      nop.i 999
 }
+;;
+
 //
-//     Load Q_6
+//     H = H_1 + H_2 + H_3
 //
-{ .mmi
-(p0)   ldfe FR_Q_5 = [GR_Table_Ptr],16 ;; 
-(p0)   ldfe FR_Q_4 = [GR_Table_Ptr],16 
-	nop.i 999 ;;
-}
-{ .mmi
-(p0)   ldfe FR_Q_3 = [GR_Table_Ptr],16 ;; 
-(p0)   ldfe FR_Q_2 = [GR_Table_Ptr],16 
-	nop.i 999 ;;
+{ .mfi
+      ldfe FR_Q_3 = [GR_Table_Ptr],16         // Load Q_3
+      fadd.s1 FR_H = FR_H, FR_H_3
+      nop.i 999
 }
-{ .mmf
-	nop.m 999
-//
-//     poly_lo = Q_5 + r * Q_6
-//     Load Q_2
-//     rsq = r * r 
+;;
+
 //
-(p0)   ldfe FR_Q_1 = [GR_Table_Ptr],16 
+//     Y_lo = poly + Y_lo
 //
-//     h = h_1 + h_2 + h_3   
-//     H = H_1 + H_2 + H_3 
-//     Load G_3.
-//     Begin Loading Q's -  load log2_hi part
+//     h = h_1 + h_2 + h_3
 //
-(p0)   fmpy.s1 FR_G = FR_G, FR_G_3 
-}
 { .mfi
-	nop.m 999
-(p0)   fadd.s1 FR_H = FR_H, FR_H_3 
-	nop.i 999 
+      ldfe FR_Q_2 = [GR_Table_Ptr],16         // Load Q_2
+      fadd.s1 FR_h = FR_h, FR_h_3
+      nop.i 999
 }
 ;;
 
 //
-//     Y_lo = poly + Y_lo 
+//     GS_hi = G*S
+//     r = G*S -1
 //
-
-{ .mmi
-      nop.m 999
-(p0)  addl           GR_Table_Ptr   = @ltoff(Constants_exp_64_Arg#), gp
+{ .mfi
+      ldfe FR_Q_1 = [GR_Table_Ptr],16         // Load Q_1
+      fmpy.s1 FR_GS_hi = FR_G, FR_S
       nop.i 999
 }
-;;
-
-{ .mmi
-      ld8 GR_Table_Ptr = [GR_Table_Ptr]
+{ .mfi
       nop.m 999
+      fms.s1 FR_r = FR_G, FR_S, f1
       nop.i 999
 }
 ;;
 
-
-{ .mfi
-	nop.m 999
-(p0)   fadd.s1 FR_h = FR_h, FR_h_3 
-	nop.i 999 ;;
-}
-{ .mfi
-	nop.m 999
 //
-//     Load Q_5
+//     poly_lo = Q_5 + r * Q_6
 //
-(p0)   fmpy.s1 FR_GS_hi = FR_G, FR_S 
-	nop.i 999
-}
-{ .mfi
-	nop.m 999
-(p0)   fms.s1 FR_r = FR_G, FR_S, f1 
-	nop.i 999 ;;
-}
 { .mfi
-	nop.m 999
-(p0)   fma.s1 FR_poly_lo = FR_r, FR_Q_6, FR_Q_5 
-	nop.i 999
+      getf.exp GR_Delta_Exp =  FR_Delta     // Get signexp of y-1 for exp calc
+      fma.s1 FR_poly_lo = FR_r, FR_Q_6, FR_Q_5
+      nop.i 999
 }
-{ .mfi
-	nop.m 999
 //
-//     GS_hi = G*S
-//     Load Q_4
+//     r_cor = GS_hi -1
 //
-(p0)   fsub.s1 FR_r_cor = FR_GS_hi, f1 
-	nop.i 999 ;;
-}
-{ .mfi
-	nop.m 999
-(p0)   fms.s1 FR_GS_lo = FR_G, FR_S, FR_GS_hi 
-	nop.i 999
-}
 { .mfi
-	nop.m 999
-(p0)   fma.s1 FR_poly = FR_r, FR_Q_2, FR_Q_1 
-	nop.i 999 ;;
+      nop.m 999
+      fsub.s1 FR_r_cor = FR_GS_hi, f1
+      nop.i 999
 }
-{ .mfi
-	nop.m 999
+;;
+
 //
-//     Load Q_3
-//     r_cor = GS_hi -1
 //     GS_lo  = G*S - GS_hi
 //
-(p0)   fmpy.s1 FR_rsq = FR_r, FR_r 
-	nop.i 999
-}
 { .mfi
-	nop.m 999
-(p0)   fma.s1 FR_G = FR_float_N, FR_log2_hi, FR_H 
-	nop.i 999 ;;
+      nop.m 999
+      fms.s1 FR_GS_lo = FR_G, FR_S, FR_GS_hi
+      nop.i 999
 }
-{ .mfi
-	nop.m 999
+;;
+
 //
-//     poly = poly_hi + rsq * poly_lo 
-//     Tbl = float_N*log2_hi + H
+//     rsq = r * r
 //
-(p0)   fma.s1 FR_Y_lo = FR_float_N, FR_log2_lo, FR_h 
-	nop.i 999 ;;
-}
 { .mfi
-	nop.m 999
-//
-//     r_cor = r_cor - r
-//     poly_hi = r * Q_2 + Q_1
-//
-(p0)   fma.s1 FR_poly_lo = FR_r, FR_poly_lo, FR_Q_4 
-	nop.i 999
+      nop.m 999
+      fmpy.s1 FR_rsq = FR_r, FR_r
+      nop.i 999
 }
-{ .mfi
-	nop.m 999
 //
-//     Load Q_1
+//     G = float_N*log2_hi + H
 //
-(p0)   fsub.s1 FR_r_cor = FR_r_cor, FR_r 
-	nop.i 999 ;;
-}
 { .mfi
-	nop.m 999
-// 
-//     Y_lo = float_N*log2_lo + h
-// 
-(p0)   fadd.s1 FR_Y_hi = FR_G, FR_r 
-	nop.i 999 ;;
+      nop.m 999
+      fma.s1 FR_G = FR_float_N, FR_log2_hi, FR_H
+      nop.i 999
 }
-{ .mfi
-	nop.m 999
+;;
+
 //
-//     poly_lo = Q_4 + r * poly_lo;;
-//     r_cor = r_cor + GS_lo;;
+//     Y_lo = float_N*log2_lo + h
 //
-(p0)   fma.s1 FR_poly_lo = FR_r, FR_poly_lo, FR_Q_3 
-	nop.i 999
-}
 { .mfi
-	nop.m 999
-(p0)   fadd.s1 FR_r_cor = FR_r_cor, FR_GS_lo 
-	nop.i 999 ;;
-}
-{ .mfi
-	nop.m 999
-(p0)   fadd.s1 FR_r_cor = FR_r_cor, FR_Y_lo 
-	nop.i 999
+      nop.m 999
+      fma.s1 FR_Y_lo = FR_float_N, FR_log2_lo, FR_h
+      nop.i 999
 }
-{ .mfi
-	nop.m 999
+;;
+
 //
-//     poly_lo = Q_3 + r * poly_lo;;
+//      poly_lo = Q_4 + r * poly_lo
+//      r_cor = r_cor - r
 //
-(p0)   fma.s1 FR_poly = FR_rsq, FR_poly_lo, FR_poly 
-	nop.i 999 ;;
-}
 { .mfi
-	nop.m 999
-(p0)   fsub.s1 FR_Y_lo = FR_G, FR_Y_hi 
-	nop.i 999
-}
-{ .mmi
-(p0)   ldfe FR_L_Inv = [GR_Table_Ptr],16 ;; 
-(p0)   ldfe FR_L_hi = [GR_Table_Ptr],16 
-	nop.i 999 ;;
+      nop.m 999
+      fma.s1 FR_poly_lo = FR_r, FR_poly_lo, FR_Q_4
+      nop.i 999
 }
 { .mfi
-(p0)   ldfe FR_L_lo = [GR_Table_Ptr],16 
-	nop.f 999
-	nop.i 999 ;;
+      nop.m 999
+      fsub.s1 FR_r_cor = FR_r_cor, FR_r
+      nop.i 999
 }
-{ .mfi
-	nop.m 999
+;;
+
 //
-//     Y_hi = Tbl + r 
-//     r_cor = r_cor + Y_lo 
+//      poly_hi = r * Q_2 + Q_1
+//      Y_hi = G + r
 //
-(p0)   fma.s1 FR_poly = FR_rsq, FR_poly, FR_r_cor 
-	nop.i 999 ;;
-}
 { .mfi
-	nop.m 999
-//     Y_lo = Tbl - Y_hi 
-//     poly = rsq * poly + r_cor
-//
-(p0)   fadd.s1 FR_Y_lo = FR_Y_lo, FR_r 
-	nop.i 999 ;;
+      nop.m 999
+      fma.s1 FR_poly = FR_r, FR_Q_2, FR_Q_1
+      nop.i 999
 }
-{ .mfb
-	nop.m 999
-//
-//     Y_lo =  Y_lo + r  
-//
-(p0)   fadd.s1 FR_Y_lo = FR_Y_lo, FR_poly 
-//
-//     Load L_Inv
-//     Load L_hi
-//     Load L_lo
-//     all long before they are needed.
-//     They are used in LOGL_RETURN PATH
-//
-br.cond.sptk L(LOGL_RETURN) ;; 
+{ .mfi
+      nop.m 999
+      fadd.s1 FR_Y_hi = FR_G, FR_r
+      nop.i 999
 }
-L(LOGL80_NEAR): 
+;;
+
 //
-//     Branch LOGL80_NEAR
+//      poly_lo = Q_3 + r * poly_lo
+//      r_cor = r_cor + GS_lo
 //
-
-{ .mmi
+{ .mfi
       nop.m 999
-(p0)  addl           GR_Table_Ptr   = @ltoff(Constants_log_80_P#), gp
+      fma.s1 FR_poly_lo = FR_r, FR_poly_lo, FR_Q_3
       nop.i 999
 }
-;;
-
-{ .mmi
-      ld8 GR_Table_Ptr = [GR_Table_Ptr]
+{ .mfi
       nop.m 999
+      fadd.s1 FR_r_cor = FR_r_cor, FR_GS_lo
       nop.i 999
 }
 ;;
 
-{ .mfi
-	nop.m 999
-(p0)   fmpy.s1 FR_Wsq = FR_W, FR_W 
-(p0)   add GR_Table_Ptr1 = 0x50,GR_Table_Ptr  
-}
 //
-//     Adjust ptr to 1/2 
-//     Adjust Ptr1 to P_4
+//      Y_lo = G - Y_hi
 //
-{ .mmi
-(p0)   ldfe FR_Half = [GR_Table_Ptr],16 ;; 
-(p0)   ldfe FR_P_4 = [GR_Table_Ptr1],16 
-	nop.i 999
+{ .mfi
+      nop.m 999
+      fsub.s1 FR_Y_lo_2 = FR_G, FR_Y_hi
+      nop.i 999
 }
+;;
+
 //
-//     Load 1/2 
+//      r_cor = r_cor + Y_lo
+//      poly = poly_hi + rsq * poly_lo
 //
-{ .mmi
-(p0)   ldfe FR_P_8 = [GR_Table_Ptr],16 ;; 
-(p0)   ldfe FR_P_3 = [GR_Table_Ptr1],16 
-	nop.i 999
+{ .mfi
+      add  GR_Table_Ptr   = 0x0, GR_table_base   // Constants_exp_64_Arg
+      fadd.s1 FR_r_cor = FR_r_cor, FR_Y_lo
+      nop.i 999
 }
-{ .mmi
-(p0)   ldfe FR_P_7 = [GR_Table_Ptr],16 ;; 
-(p0)   ldfe FR_P_2 = [GR_Table_Ptr1],16 
-	nop.i 999
+{ .mfi
+      nop.m 999
+      fma.s1 FR_poly = FR_rsq, FR_poly_lo, FR_poly
+      nop.i 999
 }
+;;
+
 //
-//     Load P_7
-//     half_W = .5 * W
-//     Load P_3
-//
-{ .mmi
-(p0)   ldfe FR_P_6 = [GR_Table_Ptr],16 ;; 
-(p0)   ldfe FR_P_1 = [GR_Table_Ptr1],16 
-	nop.i 999 ;;
-}
+//      Load L_hi
+//      Load L_lo
+//      all long before they are needed.
+//      They are used in LOGL_RETURN PATH
 //
-//     Load P_6
-//     Wsq = w * w
-//     poly = w*P_4 + P_3 
-//     Load P_2
+//      Y_lo =  Y_lo + r
+//      poly = rsq * poly + r_cor
 //
 { .mfi
-(p0)   ldfe FR_P_5 = [GR_Table_Ptr],16 
-//
-//     Load P_5
-//     poly_lo =  w * P_8 + P_7 
-//     Y_hi = w - (1/2)w*w
-//     Load P_1
-//
-(p0)   fmpy.s1 FR_W4 = FR_Wsq, FR_Wsq 
-	nop.i 999
+      ldfe FR_L_hi = [GR_Table_Ptr],16           // Load L_hi
+      fadd.s1 FR_Y_lo = FR_Y_lo_2, FR_r
+      nop.i 999
 }
 { .mfi
-	nop.m 999
-(p0)   fmpy.s1 FR_W3 = FR_Wsq, FR_W 
-	nop.i 999 
+      nop.m 999
+      fma.s1 FR_poly = FR_rsq, FR_poly, FR_r_cor
+      nop.i 999
 }
 ;;
 
+{ .mfb
+      ldfe FR_L_lo = [GR_Table_Ptr],16           // Load L_lo
+      fadd.s1 FR_Y_lo = FR_Y_lo, FR_poly
+      br.cond.sptk LOGL_RETURN                   // Branch to common code
+}
+;;
+
+
+LOGL80_NEAR:
+// Here if |x-1| < 2^-8
 //
-//     Y_lo = W3 * poly + Y_lo
+//     Branch LOGL80_NEAR
 //
 
+{ .mmf
+      ldfe FR_P_8 = [GR_P_ptr1],16           // Load P_8
+      ldfe FR_P_4 = [GR_P_ptr2],16           // Load P_4
+      fmpy.s1 FR_Wsq = FR_W, FR_W
+}
+;;
+
 { .mmi
-      nop.m 999
-(p0)  addl           GR_Table_Ptr   = @ltoff(Constants_exp_64_Arg#), gp
+      ldfe FR_P_7 = [GR_P_ptr1],16           // Load P_7
+      ldfe FR_P_3 = [GR_P_ptr2],16           // Load P_3
       nop.i 999
 }
 ;;
 
 { .mmi
-      ld8 GR_Table_Ptr = [GR_Table_Ptr]
-      nop.m 999
+      ldfe FR_P_6 = [GR_P_ptr1],16           // Load P_6
+      ldfe FR_P_2 = [GR_P_ptr2],16           // Load P_2
       nop.i 999
 }
 ;;
 
-
 { .mmi
-(p0)   ldfe FR_L_Inv = [GR_Table_Ptr],16 ;; 
-(p0)   ldfe FR_L_hi = [GR_Table_Ptr],16 
-	nop.i 999 ;;
-}
-{ .mfi
-(p0)   ldfe FR_L_lo = [GR_Table_Ptr],16 
-//
-//     Load P_8
-//     Load P_4
-//
-(p0)   fmpy.s1 FR_half_W = FR_Half, FR_W 
-	nop.i 999 ;;
+      ldfe FR_P_5 = [GR_P_ptr1],16           // Load P_5
+      ldfe FR_P_1 = [GR_P_ptr2],16           // Load P_1
+      nop.i 999
 }
+;;
+
 { .mfi
-	nop.m 999
-(p0)   fma.s1 FR_poly_lo = FR_W, FR_P_8,FR_P_7 
-	nop.i 999
+      getf.exp GR_Delta_Exp =  FR_Delta      // Get signexp of y-1 for exp calc
+      fmpy.s1 FR_W4 = FR_Wsq, FR_Wsq
+      nop.i 999
 }
 { .mfi
-	nop.m 999
-(p0)   fma.s1 FR_poly = FR_W, FR_P_4, FR_P_3 
-	nop.i 999 ;;
+      add  GR_Table_Ptr = 0x0, GR_table_base // Constants_exp_64_Arg
+      fmpy.s1 FR_W3 = FR_Wsq, FR_W
+      nop.i 999
 }
+;;
+
 { .mfi
-	nop.m 999
-(p0)   fnma.s1 FR_Y_hi = FR_W, FR_half_W, FR_W 
-	nop.i 999 ;;
+      nop.m 999
+      fmpy.s1 FR_half_W = FR_Half, FR_W
+      nop.i 999
 }
+;;
+
 { .mfi
-	nop.m 999
-//
-//     W4 = Wsq * Wsq
-//     poly = w *poly + P_2
-//
-(p0)   fma.s1 FR_poly_lo = FR_W, FR_poly_lo, FR_P_6 
-	nop.i 999
+      ldfe FR_L_hi = [GR_Table_Ptr],16
+      fma.s1 FR_poly_lo = FR_W, FR_P_8,FR_P_7
+      nop.i 999
 }
 { .mfi
-	nop.m 999
-(p0)   fma.s1 FR_poly = FR_W, FR_poly, FR_P_2 
-	nop.i 999 ;;
+      nop.m 999
+      fma.s1 FR_poly = FR_W, FR_P_4, FR_P_3
+      nop.i 999
 }
+;;
+
 { .mfi
-	nop.m 999
-(p0)   fsub.s1 FR_Y_lo = FR_W, FR_Y_hi 
-	nop.i 999 ;;
+      ldfe FR_L_lo = [GR_Table_Ptr],16
+      fnma.s1 FR_Y_hi = FR_W, FR_half_W, FR_W
+      nop.i 999
 }
+;;
+
 { .mfi
-	nop.m 999
-//
-//     poly = w * poly + P_1
-//     w3 = wsq * w
-//
-(p0)   fma.s1 FR_poly_lo = FR_W, FR_poly_lo, FR_P_5 
-	nop.i 999
+      nop.m 999
+      fma.s1 FR_poly_lo = FR_W, FR_poly_lo, FR_P_6
+      nop.i 999
 }
 { .mfi
-	nop.m 999
-//
-//     poly_lo = w * poly_lo + P_6
-//     Y_lo = W - Y_hi
-//
-(p0)   fma.s1 FR_poly = FR_W, FR_poly, FR_P_1 
-	nop.i 999 ;;
+      nop.m 999
+      fma.s1 FR_poly = FR_W, FR_poly, FR_P_2
+      nop.i 999
 }
+;;
+
 { .mfi
-	nop.m 999
-(p0)   fnma.s1 FR_Y_lo = FR_W, FR_half_W, FR_Y_lo 
-	nop.i 999 ;;
+      nop.m 999
+      fsub.s1 FR_Y_lo = FR_W, FR_Y_hi
+      nop.i 999
 }
+;;
+
 { .mfi
-	nop.m 999
-//
-//     poly_lo = w * poly_lo + 
-//     Y_lo = Y_lo - w * (1/2)w
-//
-(p0)   fma.s1 FR_poly = FR_poly_lo, FR_W4, FR_poly 
-	nop.i 999 ;;
+      nop.m 999
+      fma.s1 FR_poly_lo = FR_W, FR_poly_lo, FR_P_5
+      nop.i 999
 }
 { .mfi
-	nop.m 999
-//
-//     Y_lo = (W-Y_hi) - w * (1/2)w
-//     poly =  W4* poly_lo + poly 
-//
-(p0)   fma.s1 FR_Y_lo = FR_poly, FR_W3, FR_Y_lo 
-	nop.i 999 ;;
+      nop.m 999
+      fma.s1 FR_poly = FR_W, FR_poly, FR_P_1
+      nop.i 999
 }
-L(LOGL_RETURN): 
+;;
+
 { .mfi
-(p0)   add GR_Expo_Range = 0x2,r0  
-//
-//     Load L_Inv
-//     Load L_hi
-//     Load L_lo
-//     all long before they are needed.
-//
-//
-//     kernel_log_80 computed ln(X)
-//     and return logX_hi and logX_lo as results.
-//     PR_pow_Safe set as well. 
-//
-(p0)   fmpy.s1 FR_X_lo = FR_Input_Y, FR_logx_lo 
-//
-//     Compute Y * (logX_hi + logX_lo) 
-//     P_hi -> X 
-//     P_lo -> X_cor 
-//     (Manipulate names so that inputs are in
-//     the place kernel_exp expects them)
-//     Set GR_Flag to 2 
-//     Set GR_Expo_Range to Double
-// 
-//     This function computes exp( x  + x_cor) 
-//     Input  FR 1: FR_X            
-//     Input  FR 2: FR_X_cor  
-//     Input  GR 1: GR_Flag  
-//     Input  GR 2: GR_Expo_Range  
-//     Output FR 3: FR_Y_hi  
-//     Output FR 4: FR_Y_lo  
-//     Output FR 5: FR_Scale  
-//     Output PR 1: PR_Safe  
-// 
-(p0)   cmp.eq.unc  p15, p0 =  r0, r0 
+      nop.m 999
+      fnma.s1 FR_Y_lo = FR_W, FR_half_W, FR_Y_lo
+      nop.i 999
 }
 ;;
 
-{ .mmi
-(p0)  addl           GR_W1_ptr   = @ltoff(Constants_exp_64_W1#), gp
-(p0)  addl           GR_W2_ptr   = @ltoff(Constants_exp_64_W2#), gp
-(p0)  add GR_Flag = 0x2,r0  
+{ .mfi
+      nop.m 999
+      fma.s1 FR_poly = FR_poly_lo, FR_W4, FR_poly
+      nop.i 999
 }
 ;;
 
-{ .mmi
-      ld8 GR_W1_ptr = [GR_W1_ptr]
-      ld8 GR_W2_ptr = [GR_W2_ptr]
-(p0)   cmp.ne.unc  p7, p0 =  0x1, GR_Flag 
+{ .mfi
+      nop.m 999
+      fma.s1 FR_Y_lo = FR_poly, FR_W3, FR_Y_lo
+      nop.i 999
 }
 ;;
 
-{ .mlx
-	nop.m 999
-(p0)   movl GR_Mask = 0x1FFFF ;; 
-}
 
+LOGL_RETURN:
+// Common code for completion of both logx paths
 
-{ .mlx
-	nop.m 999
-(p0)   movl GR_BIAS = 0x0FFFF ;; 
-}
-{ .mfi
-	nop.m 999
 //
-//     X_lo =  Y * logX_lo
+//     L_hi, L_lo already loaded.
 //
-(p0)   fma.s1 FR_P_hi = FR_Input_Y, FR_logx_hi,FR_X_lo 
-	nop.i 999 ;;
-}
-{ .mfi
-	nop.m 999
 //
-//     Set Safe=True 
-//     Flag is always 2 for this routine
+//     kernel_log_80 computed ln(X)
+//     and return logX_hi and logX_lo as results.
+//     PR_pow_Safe set as well.
 //
-(p0)   fmpy.s1 FR_float_N = FR_X, FR_L_Inv 
-	nop.i 999
-}
-{ .mfi
-	nop.m 999
 //
-//     X_hi  = Y * logX_hi + X_lo
-//     Set GR_Flag = 2 for exp(x + xcor)
+//     Compute Y * (logX_hi + logX_lo)
+//     P_hi -> X
+//     P_lo -> X_cor
+//     (Manipulate names so that inputs are in
+//     the place kernel_exp expects them)
 //
-(p0)   fms.s1 FR_P_lo= FR_Input_Y, FR_logx_hi, FR_P_hi 
-	nop.i 999 ;;
+//     This function computes exp( x  + x_cor)
+//     Input  FR 1: FR_X
+//     Input  FR 2: FR_X_cor
+//     Output FR 3: FR_Y_hi
+//     Output FR 4: FR_Y_lo
+//     Output FR 5: FR_Scale
+//     Output PR 1: PR_Safe
+//
+//     P15 is True
+//
+// Load constants used in computing N using right-shift technique
+{ .mlx
+      mov GR_exp_2tom51 = 0xffff-51
+      movl GR_sig_inv_ln2 = 0xb8aa3b295c17f0bc  // significand of 1/ln2
 }
-{ .mmi
-	nop.m 999 ;;
-(p0)   getf.exp GR_Expo_X = FR_X 
-	nop.i 999 ;;
+{ .mlx
+      add  GR_Special_Exp = -50,GR_exp_bias
+      movl GR_rshf_2to51 = 0x4718000000000000   // 1.10000 2^(63+51)
 }
-{ .mfi
-(p0)   and GR_Expo_X = GR_Expo_X, GR_Mask  
+;;
+
 //
-//     Calculate unBIASed exponent of X
 //     Point to Table of W1s
 //     Point to Table of W2s
 //
-(p0)   fcvt.fx.s1 FR_N = FR_float_N 
-	nop.i 999 ;;
-}
+{ .mmi
+      add GR_W1_ptr   = 0x2b0, GR_table_base    // Constants_exp_64_W1
+      add GR_W2_ptr   = 0x4b0, GR_table_base    // Constants_exp_64_W2
+      cmp.le p6,p0= GR_Delta_Exp,GR_Special_Exp
+};;
+
+// Form two constants we need
+//  1/ln2 * 2^63  to compute  w = x * 1/ln2 * 128
+//  1.1000..000 * 2^(63+63-12) to right shift int(N) into the significand
+
 { .mfi
-	nop.m 999
-(p0)   fadd.s1 FR_P_lo = FR_P_lo, FR_X_lo 
-//
-//     Float_N = X * L_Inv
-//     Create exponent BIAS
-//     Get BIASed exponent of X
-//
-(p0)   sub GR_Expo_X = GR_Expo_X, GR_BIAS ;;  
+      setf.sig  FR_INV_LN2_2TO63 = GR_sig_inv_ln2 // form 1/ln2 * 2^63
+      nop.f 999
+      and GR_Delta_Exp=GR_Delta_Exp,GR_exp_mask  // Get exponent of y-1
 }
-{ .mib
-(p0)   cmp.gt.unc  p9, p0  =  -6, GR_Expo_X 
-	nop.i 999
-//
-//     N = fcvt.fx(float_N)
-//     If -6 > Expo_X, set P9
-//
-(p9)   br.cond.spnt L(EXPL_SMALL) 
+{ .mlx
+      setf.d  FR_RSHF_2TO51 = GR_rshf_2to51    // Form const 1.1000 * 2^(63+51)
+      movl GR_rshf = 0x43e8000000000000        // 1.10000 2^63 for right shift
 }
 ;;
 
-//
-//     If expo_X < -6 goto exp_small
-//
-{ .mmi
+{ .mfi
       nop.m 999
-(p0)  addl           GR_T1_ptr   = @ltoff(Constants_exp_64_T1#), gp
-(p0)  cmp.lt.unc  p10, p0 =  14, GR_Expo_X 
+      fmpy.s1 FR_X_lo = FR_Input_Y, FR_logx_lo // logx_lo is Y_lo
+      cmp.eq  p15, p0=  r0, r0                 // Set p15, assume safe
+};;
+
+{ .mmi
+      setf.exp FR_2TOM51 = GR_exp_2tom51 // Form 2^-51 for scaling float_N
+      setf.d  FR_RSHF = GR_rshf          // Form right shift const 1.1000 * 2^63
+      add GR_Table_Ptr1   = 0x50, GR_table_base // Constants_exp_64_P for
+                                                // EXPL_SMALL path
 }
 ;;
 
 { .mmi
-      ld8 GR_T1_ptr = [GR_T1_ptr]
-      nop.m 999
+      ldfe FR_P_6 = [GR_Table_Ptr1],16          // Load P_6 for EXPL_SMALL path
+;;
+      ldfe FR_P_5 = [GR_Table_Ptr1],16          // Load P_5 for EXPL_SMALL path
       nop.i 999
 }
 ;;
 
-{ .mib
-	nop.m 999
-	nop.i 999
-//
-//     If 14 < Expo_X, set P10
-//     Create pointer to T1 table
-//  
-(p10)  br.cond.spnt L(EXPL_HUGE) ;;
+{ .mfi
+      ldfe FR_P_4 = [GR_Table_Ptr1],16          // Load P_4 for EXPL_SMALL path
+      fma.s1 FR_P_hi = FR_Input_Y, FR_logx_hi,FR_X_lo  // logx_hi ix Y_hi
+      nop.i 999
 }
-
+;;
 
 { .mmi
-(p0)  addl           GR_Table_Ptr   = @ltoff(Constants_exp_64_Exponents#), gp
-(p0)  addl           GR_T2_ptr   = @ltoff(Constants_exp_64_T2#), gp
+      ldfe FR_P_3 = [GR_Table_Ptr1],16          // Load P_3 for EXPL_SMALL path
+;;
+      ldfe FR_P_2 = [GR_Table_Ptr1],16          // Load P_2 for EXPL_SMALL path
       nop.i 999
 }
 ;;
 
-{ .mmi
-      ld8 GR_Table_Ptr = [GR_Table_Ptr]
-      ld8 GR_T2_ptr = [GR_T2_ptr]
+// N = X * Inv_log2_by_2^12
+// By adding 1.10...0*2^63 we shift and get round_int(N_signif) in significand.
+// We actually add 1.10...0*2^51 to X * Inv_log2 to do the same thing.
+{ .mfi
+      ldfe FR_P_1 = [GR_Table_Ptr1]             // Load P_1 for EXPL_SMALL path
+      fma.s1 FR_N = FR_X, FR_INV_LN2_2TO63, FR_RSHF_2TO51
       nop.i 999
 }
+{ .mfb
+      nop.m 999
+      fms.s1 FR_P_lo= FR_Input_Y, FR_logx_hi, FR_P_hi  // P_hi is X
+(p6)  br.cond.spnt POWL_Y_ALMOST_1              // Branch if |y-1| < 2^-50
+}
 ;;
 
-
 { .mmi
-(p0)   shladd GR_Table_Ptr = GR_Expo_Range,4,GR_Table_Ptr ;;  
-//
-//     Adjust T1_ptr by x 4 for single-precision values
-//     Adjust T2_ptr by x 4 for single-precision values
-//
-(p0)   ld8 GR_Big_Pos_Exp = [GR_Table_Ptr],8
-	nop.i 999 ;;
-}
-//
-//     Load double W1
-//     Load +max exponent
-//
-{ .mfi
-(p0)   ld8 GR_Big_Neg_Exp = [GR_Table_Ptr],0
-//
-//     If 14 < Expo_X, goto exp_huge
-//
-(p0)   fcvt.xf FR_float_N = FR_N 
-	nop.i 999 
+      getf.exp GR_Expo_X = FR_X
+      add GR_T1_ptr   = 0x0b0, GR_table_base    // Constants_exp_64_T1
+      add GR_T2_ptr   = 0x1b0, GR_table_base    // Constants_exp_64_T2
 }
 ;;
 
-//
-//     Load double W2
-//     Load -max exponent
-//     Load ptr to A's
-//
+// float_N = round_int(N)
+// The signficand of N contains the rounded integer part of X * 2^12/ln2,
+// as a twos complement number in the lower bits (that is, it may be negative).
+// That twos complement number (called N) is put into GR_N_fix.
 
-{ .mmi
-(p0)  getf.sig GR_N_fix = FR_N 
-(p0)  addl           GR_Table_Ptr   = @ltoff(Constants_exp_64_A#), gp
+// Since N is scaled by 2^51, it must be multiplied by 2^-51
+// before the shift constant 1.10000 * 2^63 is subtracted to yield float_N.
+// Thus, float_N contains the floating point version of N
+
+
+{ .mfi
+      add  GR_Table_Ptr   = 0x20, GR_table_base    // Constants_exp_64_A
+      fms.s1 FR_float_N = FR_N, FR_2TOM51, FR_RSHF // Form float_N
       nop.i 999
 }
-;;
+//     Create low part of Y(ln(x)_hi + ln(x)_lo) as P_lo
+{ .mfi
+      mov GR_Big_Pos_Exp = 0x3ffe               // 16382, largest safe exponent
+      fadd.s1 FR_P_lo = FR_P_lo, FR_X_lo
+      mov GR_Big_Neg_Exp = -0x3ffd              // -16381 smallest safe exponent
+};;
 
-{ .mmi
-      ld8 GR_Table_Ptr = [GR_Table_Ptr]
+{ .mfi
       nop.m 999
-      nop.i 999
+      fmpy.s1 FR_rsq = FR_X, FR_X               // rsq = X*X for EXPL_SMALL path
+      mov GR_vsm_expo = -70                     // Exponent for very small path
+}
+{ .mfi
+      nop.m 999
+      fma.s1 FR_poly_lo = FR_P_6, FR_X, FR_P_5  // poly_lo for EXPL_SMALL path
+      add GR_temp = 0x1,r0                      // For tiny signif if small path
 }
 ;;
 
 //
-//     Load single T1
-//     Load single T2
-//     W_1_p1 = W_1 + 1
-//
-{ .mmi
-(p0)   ldfe FR_A_3 = [GR_Table_Ptr],16 ;; 
-//
-//     Load A_3
-//     if k > big_pos_exp, set p14 and Safe=False
-//
-(p0)   ldfe FR_A_2 = [GR_Table_Ptr],16 
-(p0)   extr.u GR_M1 = GR_N_fix, 6, 6  
-}
-{ .mmi
-	nop.m 999 ;;
-(p0)   shladd GR_W1_ptr = GR_M1,3,GR_W1_ptr  
-//
-//     float_N = fcvt.xf(N) 
-//     N_fix = significand of N
-//     Create pointer to T2 table
-//
-(p0)   extr.u GR_M2 = GR_N_fix, 0, 6  
-}
-//
-//     r = r + X_cor
-//     Adjust W1_ptr by x 8 for double-precision values
-//     Adjust W2_ptr by x 8 for double-precision values
-//     Adjust Table_ptr by Expo_Rangex16 
+//      If expo_X < -6 goto exp_small
 //
 { .mmi
-(p0)   shladd GR_T1_ptr = GR_M1,2,GR_T1_ptr ;;  
-(p0)   ldfd  FR_W1 = [GR_W1_ptr],0 
-(p0)   shladd GR_W2_ptr = GR_M2,3,GR_W2_ptr  
+      getf.sig GR_N_fix = FR_N
+      ldfe FR_A_3 = [GR_Table_Ptr],16         // Load A_3
+      and GR_Expo_X = GR_Expo_X, GR_exp_mask  // Get exponent of X
 }
-//
-//     Load ptr to A's
-//
+;;
+
 { .mfi
-(p0)   ldfs  FR_T1 = [GR_T1_ptr],0 
-(p0)   fnma.s1 FR_r = FR_L_hi, FR_float_N, FR_X 
-(p0)   shladd GR_T2_ptr = GR_M2,2,GR_T2_ptr ;; 
+      ldfe FR_A_2 = [GR_Table_Ptr],16         // Load A_2
+      nop.f 999
+      sub GR_Expo_X = GR_Expo_X, GR_exp_bias  // Get true exponent of X
 }
-{ .mmi
-(p0)   ldfd  FR_W2 = [GR_W2_ptr],0 
-(p0)   ldfs  FR_T2 = [GR_T2_ptr],0 
+;;
+
 //
-//     r = x - L_hi * float_N
-//     M2 = extr.u(N_fix,0,6)
-//     M1 = extr.u(N_fix,6,6)
+//     If -6 > Expo_X, set P9 and branch
 //
-(p0)   extr GR_k = GR_N_fix, 12, 52 ;;  
+{ .mfb
+      cmp.gt  p9, p0  =  -6, GR_Expo_X
+      fnma.s1 FR_r = FR_L_hi, FR_float_N, FR_X // r = X - L_hi * float_N
+(p9)  br.cond.spnt EXPL_SMALL                  // Branch if |X| < 2^-6
 }
+;;
+
 //
-//     Load A_1
-//     poly = A_3 * r + A_2
-//     rsq = r*r
+//     If 14 <= Expo_X, set P10
 //
-{ .mii
-(p0)   add GR_BIAS_p_k = GR_BIAS, GR_k  
-(p0)   cmp.gt.unc  p14,p15 = GR_k,GR_Big_Pos_Exp ;; 
-(p15)  cmp.lt p14,p15 = GR_k,GR_Big_Neg_Exp
+{ .mib
+      cmp.le  p10, p0 =  14, GR_Expo_X
+      nop.i 999
+(p10) br.cond.spnt EXPL_HUGE                   // Branch if |X| >= 2^14
 }
+;;
+
 //
-//     BIAS_p_K = BIAS + k
-//     T = T1 * T2
+//      Load single T1
+//      Load single T2
+//      W_1_p1 = W_1 + 1
 //
-{ .mfi
-(p0)   setf.exp FR_Scale = GR_BIAS_p_k  
-	nop.f 999
-	nop.i 999 ;;
-}
-{ .mfi
-	nop.m 999
-(p0)   fnma.s1 FR_r = FR_L_lo, FR_float_N, FR_r 
-	nop.i 999
+{ .mmi
+      nop.m 999
+      nop.m 999
+      extr.u GR_M1 = GR_N_fix, 6, 6            // Extract index M_1
 }
+;;
+
 //
-//     W = W_1_p1 * W2 + W1
+//      k = extr.u(N_fix,0,6)
 //
-{ .mfi
-(p0)   ldfe FR_A_1 = [GR_Table_Ptr],16 
-	nop.f 999
-	nop.i 999 ;;
+{ .mmi
+      shladd GR_W1_ptr = GR_M1,3,GR_W1_ptr     // Point to W1
+      shladd GR_T1_ptr = GR_M1,2,GR_T1_ptr     // Point to T1
+      extr.u GR_M2 = GR_N_fix, 0, 6            // Extract index M_2
 }
-{ .mfi
-	nop.m 999
-(p0)   fadd.s1 FR_W_1_p1 = FR_W1, f1 
-	nop.i 999 ;;
+;;
+
+// N_fix is only correct up to 50 bits because of our right shift technique.
+// Actually in the normal path we will have restricted K to about 14 bits.
+// Somewhat arbitrarily we extract 32 bits.
+{ .mmi
+      ldfd  FR_W1 = [GR_W1_ptr]
+      shladd GR_W2_ptr = GR_M2,3,GR_W2_ptr     // Point to W2
+      extr GR_k = GR_N_fix, 12, 32             // Extract k
 }
+;;
+
 { .mfi
-	nop.m 999
-//
-//     k = extr.u(N_fix,0,6)
-//     r = r - N * L_lo
-//     Load ptr to Table of exponent thresholds.
-//
-(p0)   fadd.s1 FR_r = FR_r, FR_X_cor 
-	nop.i 999
+      ldfs  FR_T1 = [GR_T1_ptr]
+      fnma.s1 FR_r = FR_L_lo, FR_float_N, FR_r
+      shladd GR_T2_ptr = GR_M2,2,GR_T2_ptr     // Point to T2
 }
 { .mfi
-	nop.m 999
-(p0)   fmpy.s1 FR_T = FR_T1, FR_T2 
-	nop.i 999 ;;
+      add GR_exp_bias_p_k = GR_exp_bias, GR_k
+      nop.f 999
+      cmp.gt  p14,p15 = GR_k,GR_Big_Pos_Exp
 }
-{ .mfi
-	nop.m 999
+;;
+
 //
-//     if k < big_neg_exp, set p14 and Safe=False
-//     Load A_2
+//      if k < big_neg_exp, set p14 and Safe=False
 //
-(p0)   fma.s1 FR_W = FR_W2, FR_W_1_p1, FR_W1 
-	nop.i 999 ;;
+{ .mmi
+      ldfs  FR_T2 = [GR_T2_ptr]
+(p15) cmp.lt p14,p15 = GR_k,GR_Big_Neg_Exp
+      nop.i 999
 }
-{ .mfi
-	nop.m 999
-(p0)   fma.s1 FR_poly = FR_r, FR_A_3, FR_A_2 
-	nop.i 999
+;;
+
+{ .mmi
+      setf.exp FR_Scale = GR_exp_bias_p_k
+      ldfd  FR_W2 = [GR_W2_ptr]
+      nop.i 999
 }
+;;
+
 { .mfi
-	nop.m 999
-(p0)   fmpy.s1 FR_rsq = FR_r, FR_r 
-	nop.i 999 ;;
+      ldfe FR_A_1 = [GR_Table_Ptr],16
+      fadd.s1 FR_r = FR_r, FR_X_cor
+      nop.i 999
 }
+;;
+
 { .mfi
-	nop.m 999
-(p0)   mov FR_Y_hi = FR_T 
-	nop.i 999 ;;
+      nop.m 999
+      fadd.s1 FR_W_1_p1 = FR_W1, f1
+      nop.i 999
 }
+;;
+
 { .mfi
-	nop.m 999
-//
-//     Scale = set_exp(BIAS_p_k)
-//     poly = r * poly + A_1
-//
-(p0)   fadd.s1 FR_Wp1 = FR_W, f1 
-	nop.i 999 ;;
+      nop.m 999
+      fma.s1 FR_poly = FR_r, FR_A_3, FR_A_2
+      nop.i 999
 }
 { .mfi
-	nop.m 999
-(p0)   fma.s1 FR_poly = FR_r, FR_poly, FR_A_1 
-	nop.i 999 ;;
+      nop.m 999
+      fmpy.s1 FR_rsq = FR_r, FR_r
+      nop.i 999
 }
+;;
+
 { .mfi
-	nop.m 999
-(p0)   fma.s1 FR_poly = FR_rsq, FR_poly,FR_r  
-	nop.i 999 ;;
+      nop.m 999
+      fmpy.s1 FR_T = FR_T1, FR_T2
+      nop.i 999
 }
+;;
+
 { .mfi
-	nop.m 999
-//
-//     Wp1 =  W + 1
-//     poly = rsq * poly + rk
-//
-(p0)   fma.s1 FR_Y_lo = FR_Wp1, FR_poly, FR_W 
-	nop.i 999 ;;
-}
-{ .mfb
-	nop.m 999
-//
-//     Y_lo = poly * Wp1 + W 
-//     Y_hi = T
-//
-(p0)   fmpy.s1 FR_Y_lo = FR_Y_lo, FR_T 
-//
-//     Y_lo = T * Y_lo
-//
-(p0)   br.cond.sptk L(EXPL_RETURN) ;; 
+      nop.m 999
+      fma.s1 FR_W = FR_W2, FR_W_1_p1, FR_W1
+      nop.i 999
 }
+;;
 
-L(EXPL_SMALL): 
-
-//
-//     r4 = rsq * rsq
-//
-
-{ .mmi
+{ .mfi
       nop.m 999
-(p0)  addl           GR_Table_Ptr1   = @ltoff(Constants_exp_64_P), gp
+      fma.s1 FR_TMP1 = FR_Scale, FR_Sgn, f0
       nop.i 999
 }
 ;;
 
-{ .mmi
-      ld8 GR_Table_Ptr1 = [GR_Table_Ptr1]
+{ .mfi
       nop.m 999
+      fma.s1 FR_poly = FR_r, FR_poly, FR_A_1
       nop.i 999
 }
 ;;
 
-{ .mmf
-	nop.m 999
-(p0)   ldfe FR_P_6 = [GR_Table_Ptr1],16 
-//
-//     Return  
-//
-(p0)   fadd.s1 FR_r = FR_X,f0 ;; 
+{ .mfi
+      nop.m 999
+      fma.s1 FR_TMP2 = FR_T, f1, f0            // TMP2 = Y_hi = T
+      nop.i 999
 }
+;;
 
-{ .mmi
+{ .mfi
       nop.m 999
-(p0)  addl           GR_Table_Ptr   = @ltoff(Constants_exp_64_Exponents#), gp
+      fadd.s1 FR_Wp1 = FR_W, f1
       nop.i 999
 }
 ;;
 
-{ .mmi
-      ld8 GR_Table_Ptr = [GR_Table_Ptr]
-(p0)  ldfe FR_P_5 = [GR_Table_Ptr1],16 
+{ .mfi
+      nop.m 999
+      fma.s1 FR_poly = FR_rsq, FR_poly,FR_r
       nop.i 999
 }
 ;;
 
-//
-//     Is input very small? 
-//     Load P_5
-//
-{ .mii
-(p0)   ldfe FR_P_4 = [GR_Table_Ptr1],16 
-(p0)   add GR_Table_Ptr = 0x040,GR_Table_Ptr ;;  
-(p0)   shladd GR_Table_Ptr = GR_Expo_Range,3,GR_Table_Ptr ;;  
-}
-{ .mmb
-(p0)   ldfe FR_P_3 = [GR_Table_Ptr1],16 
-//
-//     Adjust ptr.
-//
-(p0)   ld8  GR_vsm_expo = [GR_Table_Ptr],0
-	nop.b 999 ;;
-}
 { .mfi
-	nop.m 999
-//
-//     r = X (don't seem to need X_Cor) 
-//     Load the threshold exponents
-//
-(p0)   fmpy.s1 FR_rsq = FR_r, FR_r 
-	nop.i 999 ;;
+      nop.m 999
+      fma.s1 FR_Tscale = FR_T, FR_TMP1, f0    // Scale * Sgn * T
+      nop.i 999
 }
-//
-//     Load the negative integer
-//     Load P_5
-//
 { .mfi
-(p0)   cmp.lt.unc  p12, p0 =  GR_Expo_X, GR_vsm_expo 
-	nop.f 999
-	nop.i 999 ;;
+      nop.m 999
+      fma.s1 FR_Y_lo = FR_Wp1, FR_poly, FR_W
+      nop.i 999
 }
+;;
+
 { .mfb
-	nop.m 999
-//
-//     rsq = r * r
-//     Offset into exponents
-//
-(p0)   fmpy.s1 FR_r4 = FR_rsq, FR_rsq 
-(p12)  br.cond.spnt L(EXPL_VERY_SMALL) ;; 
+      nop.m 999
+      fmpy.s1 FR_TMP3 = FR_Y_lo, FR_Tscale
+      br.cond.sptk POWL_64_SHARED
 }
-{ .mfi
-(p0)   ldfe FR_P_2 = [GR_Table_Ptr1],16 
-//
-//     Load p4,p3,p2,p1
-//
-(p0)   fma.s1 FR_poly_lo = FR_P_6, FR_r, FR_P_5 
+;;
+
+
+EXPL_SMALL:
+// Here if |ylogx| < 2^-6
 //
-//     Y_lo = r4 * poly_lo + poly_hi
-//     Scale = 1.0
+//     Begin creating lsb to perturb final result
 //
-(p0)   add GR_temp = 0x1,r0 ;;  
+{ .mfi
+      setf.sig FR_temp = GR_temp
+      fma.s1 FR_poly_lo = FR_poly_lo, FR_X, FR_P_4
+      cmp.lt  p12, p0 =  GR_Expo_X, GR_vsm_expo   // Test |ylogx| < 2^-70
 }
-{ .mmf
-	nop.m 999
-(p0)   ldfe FR_P_1 = [GR_Table_Ptr1],0 
-(p0)   mov FR_Scale = f1 
+{ .mfi
+      nop.m 999
+      fma.s1 FR_poly_hi = FR_P_2, FR_X, FR_P_1
+      nop.i 999
 }
-//
-//     Begin creating lsb to perturb final result
-//
+;;
+
 { .mfi
-(p0)   setf.sig FR_temp = GR_temp 
-(p0)   mov FR_Y_hi = f1 
-	nop.i 999 ;;
+      nop.m 999
+      fmpy.s1 FR_TMP2 = f1, f1
+      nop.i 999
 }
 { .mfi
-	nop.m 999
-//
-//     poly_lo = p_5 + p_6 * r
-//     poly_hi = p_1 + p_2 * r
-//
-(p0)   fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_P_4 
-	nop.i 999 ;;
+      nop.m 999
+      fmpy.s1 FR_TMP1 = FR_Sgn, f1
+      nop.i 999
 }
+;;
+
 { .mfi
-	nop.m 999
-//
-//     poly_lo = p_4 + poly_lo * r
-//     poly_hi = r + poly_hi * rsq
-//
-(p0)   fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_P_3 
-	nop.i 999
+      nop.m 999
+      fmpy.s1 FR_r4 = FR_rsq, FR_rsq
+(p12) cmp.eq  p15, p0 =  r0, r0                   // Set safe if |ylogx| < 2^-70
 }
+{ .mfb
+      nop.m 999
+(p12) fmpy.s1 FR_TMP3 = FR_Sgn, FR_X
+(p12) br.cond.spnt POWL_64_SHARED                 // Branch if |ylogx| < 2^-70
+}
+;;
+
 { .mfi
-	nop.m 999
-(p0)   fma.s1 FR_poly_hi = FR_P_2, FR_r, FR_P_1 
-	nop.i 999 ;;
+      nop.m 999
+      fma.s1 FR_poly_lo = FR_poly_lo, FR_X, FR_P_3
+      nop.i 999
 }
 { .mfi
-	nop.m 999
-(p0)   fma.s1 FR_poly_hi = FR_poly_hi, FR_rsq, FR_r 
-	nop.i 999 ;;
+      nop.m 999
+      fma.s1 FR_poly_hi = FR_poly_hi, FR_rsq, FR_X
+      nop.i 999
 }
+;;
+
 { .mfi
-	nop.m 999
-//  
-//     poly_lo = p_3 + poly_lo * r
-//     Y_hi = 1, always
-//  
-(p0)   fma.s1 FR_Y_lo = FR_poly_lo, FR_r4, FR_poly_hi 
-	nop.i 999 ;;
+      nop.m 999
+      fma.s1 FR_Y_lo = FR_poly_lo, FR_r4, FR_poly_hi
+      nop.i 999
 }
+;;
+
 { .mfi
-	nop.m 999
-//
-//     Set lsb in fp register
-// 
-(p0)   for FR_temp = FR_Y_lo,FR_temp 
-	nop.i 999 ;;
+      nop.m 999
+      fmpy.s1 FR_TMP3 = FR_Y_lo, FR_TMP1      // Add sign info
+      nop.i 999
 }
-{ .mfb
-	nop.m 999
+;;
+
 //
 //     Toggle on last bit of Y_lo
-//
-(p0)   fmerge.se FR_Y_lo = FR_Y_lo,FR_temp 
-//
 //     Set lsb of Y_lo to 1
 //
-(p0)   br.cond.sptk L(EXPL_RETURN) ;; 
-}
-L(EXPL_VERY_SMALL): 
 { .mfi
-	nop.m 999
-(p0)   mov FR_Y_lo = FR_r 
-(p0)   cmp.eq.unc  p15, p0 =  r0, r0 
+      nop.m 999
+      for FR_temp = FR_Y_lo,FR_temp
+      nop.i 999
 }
-{ .mfi
-       nop.m 999
-(p0)   mov FR_Scale = f1
-       nop.i 999
-};;
+;;
+
 { .mfb
-	nop.m 999
-(p0)   mov FR_Y_hi = f1 
-//
-//     If flag_not_1, 
-//     Y_hi = 1.0 
-//     Y_lo = X + X_cor
-//     PR_Safe = true
-//
-(p0)   br.cond.sptk L(EXPL_RETURN) ;; 
+      nop.m 999
+      fmerge.se FR_TMP3 = FR_TMP3,FR_temp
+      br.cond.sptk POWL_64_SHARED
 }
-L(EXPL_HUGE): 
+;;
+
+
+EXPL_HUGE:
+// Here if |ylogx| >= 2^14
 { .mfi
-	nop.m 999
-//
-//     Return for flag=2 
-//
-(p0)   fcmp.gt.unc.s1 p12, p13 =  FR_X, f0 
-(p0)   cmp.eq.unc  p14, p15 =  r0, r0 ;; 
+      mov GR_temp = 0x0A1DC               // If X < 0, exponent -24100
+      fcmp.gt.s1 p12, p13 =  FR_X, f0     // Test X > 0
+      cmp.eq  p14, p15 =  r0, r0          // Set Safe to false
 }
-{ .mlx
-	nop.m 999
-//
-//     Set Safe to false
-//     Is x > 0
-//
-(p12)  movl GR_Mask = 0x15DC0 ;; 
-}
-{ .mlx
-(p12)  setf.exp FR_Y_hi = GR_Mask 
-(p13)  movl GR_Mask = 0xA240 ;; 
+;;
+
+{ .mmi
+(p12) mov GR_Mask = 0x15DC0               // If X > 0, exponent +24000
+(p13) mov GR_Mask = 0x0A240               // If X < 0, exponent -24000
+      nop.i 999
 }
-{ .mlx
-(p13)  setf.exp FR_Y_hi = GR_Mask 
-//     
-//     x > 0: Create mask for Y_hi = 2**(24,000) 
-//     x <= 0: Create mask for Y_hi = 2**(-24,000) 
-//
-(p13)  movl GR_temp = 0xA1DC ;; 
+;;
+
+{ .mmf
+      setf.exp FR_TMP2 = GR_Mask          // Form Y_hi = TMP2
+(p13) setf.exp FR_Y_lo = GR_temp          // If X < 0, Y_lo = 2^-24100
+(p12) mov FR_Y_lo = f1                    // IF X > 0, Y_lo = 1.0
 }
+;;
+
 { .mfi
-(p13)  setf.exp FR_Y_lo = GR_temp 
-//
-//     x < =0: Create mask for 2**(-24,100)
-//     x <= 0: Y_lo = w**(-24,100)
-//
-(p12)  mov FR_Y_lo = f1 
-	nop.i 999 ;;
+      nop.m 999
+      fmpy.s1 FR_TMP1 = FR_TMP2, FR_Sgn   // TMP1 = Y_hi * Sgn
+      nop.i 999
 }
-{ .mfi
-	nop.m 999
-(p12)  mov FR_Scale =  FR_Y_hi 
-	nop.i 999 ;;
+;;
+
+{ .mfb
+      nop.m 999
+      fmpy.s1 FR_TMP3 = FR_Y_lo,FR_TMP1   // TMP3 = Y_lo * (Y_hi * Sgn)
+      br.cond.sptk POWL_64_SHARED
 }
-{ .mfi
-	nop.m 999
+;;
+
+POWL_Y_ALMOST_1:
+// Here if delta = |y-1| < 2^-50
 //
-//     x > 0: Y_lo = 1.0
-//     x > 0: Scale = 2**(24,000) 
+//  x**(1 + delta) = x * e (ln(x)*delta) = x ( 1 + ln(x) * delta)
 //
-(p13)  mov FR_Scale = FR_Y_hi 
-	nop.i 999 ;;
-}
-L(EXPL_RETURN): 
+// Computation will be safe for 2^-16381 <= x < 2^16383
+
 { .mfi
-	nop.m 999
-//
-//     Scale = 2**(24,000)
-//
-//
-//     exp(y *ln(x)) almost complete 
-//     FR_Scale is Scale
-//     f34 is Z_hi 
-//     f35 is Z_lo 
-//
-(p0)   fmpy.s1 FR_Sgn = FR_Scale, FR_Sgn  
-	nop.i 999 ;;
+       mov GR_exp_ynear1_oflow = 0xffff + 16383
+       fma.s1 FR_TMP1 = FR_Input_X,FR_Delta,f0
+       and GR_exp_x = GR_exp_mask, GR_signexp_x
 }
+;;
+
 { .mfi
-	nop.m 999
-//
-//     sgn * scale 
-//
-(p0)   fmpy.s1 FR_Y_lo = FR_Y_lo,FR_Sgn   
-	nop.i 999 ;;
+       cmp.lt  p15, p14 =  GR_exp_x, GR_exp_ynear1_oflow
+       fma.s1 FR_TMP2 = FR_logx_hi,f1,FR_X_lo
+       mov GR_exp_ynear1_uflow = 0xffff - 16381
 }
+;;
+
 { .mfb
-	nop.m 999
-//
-//     Z_lo * (sgn * scale) 
+(p15)  cmp.ge  p15, p14 =  GR_exp_x, GR_exp_ynear1_uflow
+       fma.s1 FR_TMP3 = FR_Input_X,f1,f0
+       br.cond.sptk POWL_64_SHARED
+};;
+
+POWL_64_SQUARE:
 //
-(p0)   fma.s0 FR_Result = FR_Y_hi, FR_Sgn, FR_Y_lo  
+//      Here if x not zero and y=2.
 //
-//     Z_hi * (sgn * scale)  + Z_lo
+//      Setup for multipath code
 //
-(p15)  br.cond.sptk L(POWL_64_RETURN) ;;
-}
 { .mfi
-	nop.m 999
-(p0)   fsetc.s3 0x7F,0x01
-	nop.i 999
-}
-{ .mlx
-	nop.m 999
-//
-//     Z_hi * (sgn * scale)  + Z_lo with wre & td
-//     Z_hi * (sgn * scale)  + Z_lo with fz  & td
-//
-(p0)   movl GR_T1_ptr = 0x00000000013FFF ;;
+      mov GR_exp_square_oflow = 0xffff + 8192   // Exponent where x*x overflows
+      fmerge.se FR_TMP1 = FR_Input_X, FR_Input_X
+      and GR_exp_x = GR_exp_mask, GR_signexp_x  // Get exponent of x
 }
+;;
+
 { .mfi
-	nop.m 999
-(p0)   fma.s3  FR_Result_small = FR_Y_hi, FR_Sgn, FR_Y_lo  
-	nop.i 999
+      cmp.lt  p15, p14 =  GR_exp_x, GR_exp_square_oflow // Decide safe/unsafe
+      fmerge.se FR_TMP2 = FR_Input_X, FR_Input_X
+      mov GR_exp_square_uflow = 0xffff - 8191   // Exponent where x*x underflows
 }
+;;
+
 { .mfi
-	nop.m 999
-(p0)   fsetc.s3 0x7F,0x40
-	nop.i 999 ;;
+(p15) cmp.ge  p15, p14 =  GR_exp_x, GR_exp_square_uflow // Decide safe/unsafe
+      fma.s1 FR_TMP3 = f0,f0,f0
+      nop.i 999
 }
-{ .mfi
-	nop.m 999
+;;
+
 //
-//     Return if no danger of over of underflow.
+//      This is the shared path that will set overflow and underflow.
 //
-(p0)   fsetc.s2 0x7F,0x42
-	nop.i 999;;
-}
-{ .mfi
-	nop.m 999
+POWL_64_SHARED:
+
 //
-//     S0 user supplied status
-//     S2 user supplied status + WRE + TD  (Overflows)
-//     S3 user supplied status + FZ + TD   (Underflows)
+//      Return if no danger of over or underflow.
 //
-(p0)   fma.s2  FR_Result_big = FR_Y_hi, FR_Sgn, FR_Y_lo  
-	nop.i 999 ;;
+{ .mfb
+      nop.m 999
+      fma.s0 FR_Result = FR_TMP1, FR_TMP2, FR_TMP3
+(p15) br.ret.sptk  b0      // Main path return if certain no over/underflow
 }
+;;
+
 //
-//     S0 user supplied status
-//     S2 user supplied status + WRE + TD  (Overflows)
-//     S3 user supplied status + FZ + TD   (Underflows)
+//      S0 user supplied status
+//      S2 user supplied status + WRE + TD  (Overflows)
+//      S2 user supplied status + FZ + TD   (Underflows)
 //
 //
 //     If (Safe) is true, then
@@ -2430,973 +2021,741 @@ L(EXPL_RETURN):
 //        No overflow or underflow here, but perhaps inexact.
 //        Return
 //     Else
-//       Determine if overflow or underflow  was raised.
-//       Fetch +/- overflow threshold for IEEE single, double,
-//       double extended
-//
-{ .mfi
-(p0)   setf.exp FR_Big = GR_T1_ptr
-(p0)   fsetc.s2 0x7F,0x40
-	nop.i 999 ;;
-}
-{ .mfi
-	nop.m 999
-(p0)   fclass.m.unc   p11, p0 =  FR_Result_small, 0x00F
-	nop.i 999 ;;
-}
-{ .mfi
-	nop.m 999
-(p0)   fmerge.ns FR_NBig = FR_Big, FR_Big
-	nop.i 999
-}
-{ .mfi
-	nop.m 999
-//
-//     Create largest double exponent + 1.
-//     Create smallest double exponent - 1.
-//     Identify denormals
-//
-(p0)   fcmp.ge.unc.s1 p8, p0 = FR_Result_big , FR_Big
-	nop.i 999 ;;
-}
-{ .mii
-	nop.m 999
-	nop.i 999 ;;
-//
-//     fcmp:   resultS2 <= - overflow threshold  
-//     fclass: resultS3 is denorm/unorm/0       
-//
-(p8)   mov   GR_Parameter_TAG = 18 ;;
-}
-{ .mfb
-	nop.m 999
-//
-//     fcmp:   resultS2 >= + overflow threshold  
-//
-(p0)   fcmp.le.unc.s1 p9, p0 = FR_Result_big, FR_NBig
-(p8)   br.cond.spnt __libm_error_region ;;
-}
-{ .mii
-	nop.m 999
-	nop.i 999 ;;
-(p9)   mov   GR_Parameter_TAG = 18
-}
-{ .mib
-	nop.m 999
-	nop.i 999
-(p9)   br.cond.spnt __libm_error_region ;;
-}
-//
-//     Report that pow overflowed - either +Inf, or -Inf
-//
-{ .mmb
-(p11)  mov   GR_Parameter_TAG = 19
-	nop.m 999
-(p11)  br.cond.spnt __libm_error_region ;;
-}
-{ .mib
-	nop.m 999
-	nop.i 999
-//
-//     Report that pow underflowed
-//
-(p0)   br.cond.sptk L(POWL_64_RETURN) ;;
-}
-
+//       Determine if overflow or underflow was raised.
+//       Fetch +/- overflow threshold for IEEE double extended
 
-L(POWL_64_SQUARE):
-// Here if x not zero and y=2.
-// Must call __libm_error_support for overflow or underflow
-//
-//     S0 user supplied status
-//     S2 user supplied status + WRE + TD  (Overflows)
-//     S3 user supplied status + FZ + TD   (Underflows)
-//
 { .mfi
-	nop.m 999
-(p0)   fma.s0 FR_Result = FR_Input_X, FR_Input_X, f0
-	nop.i 999
-}
-{ .mfi
-	nop.m 999
-(p0)   fsetc.s3 0x7F,0x01
-	nop.i 999
-}
-{ .mlx
-	nop.m 999
-(p0)   movl GR_T1_ptr = 0x00000000013FFF ;;
-}
-{ .mfi
-	nop.m 999
-(p0)   fma.s3 FR_Result_small = FR_Input_X, FR_Input_X, f0
-	nop.i 999
-}
-{ .mfi
-	nop.m 999
-(p0)   fsetc.s3 0x7F,0x40
-	nop.i 999 ;;
+      nop.m 999
+      fsetc.s2 0x7F,0x41       // For underflow test, set S2=User+TD+FTZ
+      nop.i 999
 }
+;;
+
 { .mfi
-	nop.m 999
-//
-//     Return if no danger of over of underflow.
-//
-(p0)   fsetc.s2 0x7F,0x42
-	nop.i 999;;
+      nop.m 999
+      fma.s2 FR_Result_small = FR_TMP1, FR_TMP2, FR_TMP3
+      nop.i 999
 }
+;;
+
 { .mfi
-	nop.m 999
-(p0)   fma.s2 FR_Result_big = FR_Input_X, FR_Input_X, f0
-	nop.i 999 ;;
+      nop.m 999
+      fsetc.s2 0x7F,0x42       // For overflow test, set S2=User+TD+WRE
+      nop.i 999
 }
-//
-//     S0 user supplied status
-//     S2 user supplied status + WRE + TD  (Overflows)
-//     S3 user supplied status + FZ + TD   (Underflows)
-//
-//
-//     If (Safe) is true, then
-//        Compute result using user supplied status field.
-//        No overflow or underflow here, but perhaps inexact.
-//        Return
-//     Else
-//       Determine if overflow or underflow  was raised.
-//       Fetch +/- overflow threshold for IEEE single, double,
-//       double extended
-//
+;;
+
 { .mfi
-(p0)   setf.exp FR_Big = GR_T1_ptr
-(p0)   fsetc.s2 0x7F,0x40
-	nop.i 999 ;;
+      nop.m 999
+      fma.s2 FR_Result_big = FR_TMP1, FR_TMP2,FR_TMP3
+      nop.i 999
 }
+;;
+
 { .mfi
-	nop.m 999
-(p0)   fclass.m.unc   p11, p0 =  FR_Result_small, 0x00F
-	nop.i 999 ;;
+      nop.m 999
+      fsetc.s2 0x7F,0x40       // Reset S2=User
+      nop.i 999
 }
+;;
+
 { .mfi
-	nop.m 999
-(p0)   fmerge.ns FR_NBig = FR_Big, FR_Big
-	nop.i 999
+      nop.m 999
+      fclass.m p11, p0 = FR_Result_small, 0x00F // Test small result unorm/zero
+      nop.i 999
 }
+;;
+
 { .mfi
-	nop.m 999
-//
-//     Create largest double exponent + 1.
-//     Create smallest double exponent - 1.
-//     Identify denormals
-//
-(p0)   fcmp.ge.unc.s1 p8, p0 = FR_Result_big , FR_Big
-	nop.i 999 ;;
-}
-{ .mii
-	nop.m 999
-	nop.i 999 ;;
-//
-//     fcmp:   resultS2 <= - overflow threshold  
-//     fclass: resultS3 is denorm/unorm/0       
-//
-(p8)   mov   GR_Parameter_TAG = 18 ;;
+      nop.m 999
+      fcmp.ge.s1 p8, p0 = FR_Result_big , FR_Big // Test >= + oflow threshold
+      nop.i 999
 }
+;;
+
 { .mfb
-	nop.m 999
-//
-//     fcmp:   resultS2 >= + overflow threshold  
-//
-(p0)   fcmp.le.unc.s1 p9, p0 = FR_Result_big, FR_NBig
-(p8)   br.cond.spnt __libm_error_region ;;
-}
-{ .mii
-	nop.m 999
-	nop.i 999 ;;
-(p9)   mov   GR_Parameter_TAG = 18
-}
-{ .mib
-	nop.m 999
-	nop.i 999
-(p9)   br.cond.spnt __libm_error_region ;;
-}
-//
-//     Report that pow overflowed - either +Inf, or -Inf
-//
-{ .mmb
-(p11)  mov   GR_Parameter_TAG = 19
-	nop.m 999
-(p11)  br.cond.spnt __libm_error_region ;;
-}
-{ .mib
-	nop.m 999
-	nop.i 999
-//
-//     Report that pow underflowed
-//
-(p0)   br.cond.sptk L(POWL_64_RETURN) ;;
+(p11) mov   GR_Parameter_TAG = 19                // Set tag for underflow
+      fcmp.le.s1 p9, p0 = FR_Result_big, FR_NBig // Test <= - oflow threshold
+(p11) br.cond.spnt __libm_error_region           // Branch if pow underflowed
 }
+;;
 
+{ .mfb
+(p8)  mov   GR_Parameter_TAG = 18                // Set tag for overflow
+      nop.f 999
+(p8)  br.cond.spnt __libm_error_region           // Branch if pow +overflow
+}
+;;
 
+{ .mbb
+(p9)  mov   GR_Parameter_TAG = 18                // Set tag for overflow
+(p9)  br.cond.spnt __libm_error_region           // Branch if pow -overflow
+      br.ret.sptk  b0                            // Branch if result really ok
+}
+;;
 
 
-L(POWL_64_SPECIAL): 
+POWL_64_SPECIAL:
+// Here if x or y is NatVal, nan, inf, or zero
 { .mfi
-	nop.m 999
-(p0)   fcmp.eq.s1 p15, p0 =  FR_Input_X, f1  // Is x=+1
-	nop.i 999 ;;
+      nop.m 999
+      fcmp.eq.s1 p15, p0 =  FR_Input_X, f1  // Test x=+1
+      nop.i 999
 }
+;;
+
 { .mfi
-	nop.m 999
-(p0)   fclass.m.unc p14, p0 =  FR_Input_Y, 0x023 
-	nop.i 999 ;;
+      nop.m 999
+      fclass.m p8, p0 =  FR_Input_X, 0x143  // Test x natval, snan
+      nop.i 999
 }
+;;
 
 { .mfi
-	nop.m 999
-(p15)   fcmp.eq.unc.s0 p6,p0 = FR_Input_Y, f0 // If x=1, flag invalid if y=SNaN
-	nop.i 999 
+      nop.m 999
+(p15) fcmp.eq.unc.s0 p6,p0 = FR_Input_Y, f0 // If x=1, flag invalid if y=SNaN
+      nop.i 999
 }
 { .mfb
-	nop.m 999
-(p15)   fmpy.s0 FR_Result = f1,f1        // If x=1, result=1
-(p15)   br.cond.spnt L(POWL_64_RETURN) ;;   // Exit if x=1
+      nop.m 999
+(p15) fmpy.s0 FR_Result = f1,f1             // If x=1, result=1
+(p15) br.ret.spnt b0                        // Exit if x=1
 }
+;;
 
 { .mfi
-	nop.m 999
-(p0)   fclass.m.unc p13, p0 =  FR_Input_X, 0x023 
-	nop.i 999 ;;
-}
-{ .mfi
-	nop.m 999
-(p0)   fclass.m.unc p8, p0 =  FR_Input_X, 0x143 
-	nop.i 999
+      nop.m 999
+      fclass.m p6, p0 =  FR_Input_Y, 0x007  // Test y zero
+      nop.i 999
 }
+;;
+
 { .mfi
-	nop.m 999
-(p0)   fclass.m.unc p9, p0 =  FR_Input_Y, 0x143 
-	nop.i 999 ;;
+      nop.m 999
+      fclass.m p9, p0 =  FR_Input_Y, 0x143  // Test y natval, snan
+      nop.i 999
 }
+;;
+
 { .mfi
-	nop.m 999
-(p0)   fclass.m.unc p10, p0 =  FR_Input_X, 0x083 
-	nop.i 999
+      nop.m 999
+      fclass.m p10, p0 =  FR_Input_X, 0x083 // Test x qnan
+      nop.i 999
 }
 { .mfi
-	nop.m 999
-(p0)   fclass.m.unc p11, p0 =  FR_Input_Y, 0x083 
-	nop.i 999 ;;
+      nop.m 999
+(p8)  fmpy.s0 FR_Result = FR_Input_Y, FR_Input_X // If x=snan, result=qnan
+(p6)  cmp.ne p8,p0 = r0,r0     // Don't exit if x=snan, y=0 ==> result=+1
 }
+;;
+
 { .mfi
-	nop.m 999
-(p0)   fclass.m.unc p6, p0 =  FR_Input_Y, 0x007 
-	nop.i 999
+      nop.m 999
+(p6)  fclass.m.unc p15, p0 =  FR_Input_X,0x007   // Test x=0, y=0
+      nop.i 999
 }
-{ .mfi
-	nop.m 999
-(p0)   fcmp.eq.unc.s1 p7, p0 =  FR_Input_Y, f1 
-	nop.i 999 ;;
+{ .mfb
+      nop.m 999
+(p9)  fmpy.s0 FR_Result = FR_Input_Y, FR_Input_X // If y=snan, result=qnan
+(p8)  br.ret.spnt b0                             // Exit if x=snan, y not 0,
+                                                 //   result=qnan
 }
+;;
+
 { .mfi
-	nop.m 999
-//
-//     set p13 if x +/- Inf 
-//     set p14 if y +/- Inf 
-//     set p8  if x  Natval or +/-SNaN  
-//     set p9  if y  Natval or +/-SNaN  
-//     set p10 if x QNaN
-//     set p11 if y QNaNs
-//     set p6  if y is +/-0 
-//     set p7  if y is 1
-//
-(p8)   fmpy.s0 FR_Result = FR_Input_Y, FR_Input_X 
-(p6)   cmp.ne p8,p0 = r0,r0 ;;  // Don't exit if x=snan, y=0 ==> result=+1
-}
-{ .mfb
-	nop.m 999
-(p9)   fmpy.s0 FR_Result = FR_Input_Y, FR_Input_X 
-(p8)   br.cond.spnt L(POWL_64_RETURN) ;; 
+      nop.m 999
+      fcmp.eq.s1 p7, p0 =  FR_Input_Y, f1        // Test y +1.0
+      nop.i 999
 }
 { .mfb
-	nop.m 999
-(p10)  fmpy.s0 FR_Result = FR_Input_X, f0 
-(p9)   br.cond.spnt L(POWL_64_RETURN) ;; 
-}
-{ .mfi
-	nop.m 999
-//
-//     Produce result for SNaN and NatVals and return
-//
-(p6)   fclass.m.unc p15, p0 =  FR_Input_X,0x007 
-	nop.i 999
+      nop.m 999
+(p10) fmpy.s0 FR_Result = FR_Input_X, f0         // If x=qnan, result=qnan
+(p9)  br.ret.spnt b0                             // Exit if y=snan, result=qnan
 }
+;;
+
 { .mfi
-	nop.m 999
-//
-//     If Y +/- 0, set p15 if x +/- 0
-//
-(p6)   fclass.m.unc p8, p0 =  FR_Input_X,0x0C3 
-	nop.i 999 ;;
+      nop.m 999
+(p6)  fclass.m.unc p8, p0 =  FR_Input_X,0x0C3    // Test x=nan, y=0
+      nop.i 999
 }
+;;
 
 { .mfi
-	nop.m 999
-(p6)   fcmp.eq.s0 p9,p0 = FR_Input_X, f0 // If y=0, flag if x denormal
-	nop.i 999
+      nop.m 999
+(p6)  fcmp.eq.s0 p9,p0 = FR_Input_X, f0          // If y=0, flag if x denormal
+      nop.i 999
 }
 { .mfi
-	nop.m 999
-(p6)   fadd.s0 FR_Result = f1, f0
-	nop.i 999 ;;
+      nop.m 999
+(p6)  fadd.s0 FR_Result = f1, f0                 // If y=0, result=1
+      nop.i 999
 }
+;;
+
 { .mfi
-	nop.m 999
-// 
-//     Set p8 if y = +/-0 and X is a QNaN/SNaN
-//     If y = +/-0, let result = 1.0
-// 
-(p7)   fmpy.s0 FR_Result = FR_Input_X,f1
-//
-//     If y == 1, result = x * 1 
-//
-(p15)  mov GR_Parameter_TAG = 20 
-}
-{ .mib
-	nop.m 999
-	nop.i 999
-(p15)  br.cond.spnt __libm_error_region ;;
-}
-{ .mib
-	nop.m 999
-//
-//     If x and y are both zero, result = 1.0 and call error
-//     support. 
-//
-(p8)   mov GR_Parameter_TAG = 23 
-(p8)   br.cond.spnt __libm_error_region ;;
+      nop.m 999
+      fclass.m p11, p0 =  FR_Input_Y, 0x083      // Test y qnan
+      nop.i 999
 }
-{ .mib
-	nop.m 999
-	nop.i 999
-//
-//     If y = +/-0 and x is a QNaN, result = 1.0 and call error
-//     support. 
-//
-(p6)   br.cond.spnt L(POWL_64_RETURN) ;; 
+{ .mfb
+(p15) mov GR_Parameter_TAG = 20                  // Error tag for x=0, y=0
+(p7)  fmpy.s0 FR_Result = FR_Input_X,f1          // If y=1, result=x
+(p15) br.cond.spnt __libm_error_region           // Branch if x=0, y=0, result=1
 }
+;;
 
-// If x=0, y=-inf, go to the X_IS_ZERO path
 { .mfb
-	nop.m 999
-(p14)  fcmp.eq.unc.s1 p0,p14 = FR_Input_X,f0 
-(p7)   br.cond.spnt L(POWL_64_RETURN) ;; 
+(p8)  mov GR_Parameter_TAG = 23                  // Error tag for x=nan, y=0
+      fclass.m p14, p0 =  FR_Input_Y, 0x023      // Test y inf
+(p8)  br.cond.spnt __libm_error_region           // Branch if x=snan, y=0,
+                                                 //   result=1
 }
+;;
 
-{ .mfi
-	nop.m 999
-//
-//     Produce all results for x**0 and x**1 
-//     Let all the result x ** 0 == 1 and return
-//     Let all x ** 1 == x and return
-//
-(p10)  fmpy.s0 FR_Result = FR_Input_Y,FR_Input_X
-	nop.i 999 ;;
-}
 { .mfb
-	nop.m 999
-(p11)  fmpy.s0 FR_Result = FR_Input_Y,FR_Input_X
-(p10)  br.cond.spnt L(POWL_64_RETURN) ;;
-}
-{ .mib
-	nop.m 999
-	nop.i 999
-(p11)  br.cond.spnt L(POWL_64_RETURN) ;;
-}
-{ .mib
-	nop.m 999
-	nop.i 999
-//
-//     Return result for x or y QNaN input with QNaN result 
-//
-(p14)  br.cond.spnt L(POWL_64_Y_IS_INF) ;;
+      nop.m 999
+      fclass.m p13, p0 =  FR_Input_X, 0x023      // Test x inf
+(p6)  br.ret.spnt b0                             // Exit y=0, x not nan or 0,
+                                                 //   result=1
 }
-{ .mib
-	nop.m 999
-	nop.i 999
-(p13)  br.cond.spnt L(POWL_64_X_IS_INF) ;;
+;;
+
+{ .mfb
+      nop.m 999
+(p14) fcmp.eq.unc.s1 p0,p14 = FR_Input_X,f0      // Test x not 0, y=inf
+(p7)  br.ret.spnt b0                             // Exit y=1, x not snan,
+                                                 //   result=x
 }
-L(POWL_64_X_IS_ZERO): 
-{ .mmb
-(p0)   getf.sig GR_signif_y = FR_Input_Y 
-(p0)   getf.exp GR_BIASed_exp_y = FR_Input_Y
-	nop.b 999 ;;
+;;
+
+{ .mfb
+      nop.m 999
+(p10) fmpy.s0 FR_Result = FR_Input_Y,FR_Input_X  // If x=qnan, y not snan,
+                                                 //   result=qnan
+(p10) br.ret.spnt b0                             // Exit x=qnan, y not snan,
+                                                 //   result=qnan
 }
-{ .mlx
-	nop.m 999
-(p0)   movl GR_Mask = 0x1FFFF
+;;
+
+{ .mfb
+      nop.m 999
+(p11) fmpy.s0 FR_Result = FR_Input_Y,FR_Input_X  // If y=qnan, x not nan or 1,
+                                                 //   result=qnan
+(p11) br.ret.spnt b0                             // Exit y=qnan, x not nan or 1,
+                                                 //   result=qnan
 }
-{ .mlx
-	nop.m 999
-(p0)   movl GR_y_sign = 0x20000 ;;
+;;
+
+{ .mbb
+      nop.m 999
+(p14) br.cond.spnt POWL_64_Y_IS_INF           // Branch if y=inf, x not 1 or nan
+(p13) br.cond.spnt POWL_64_X_IS_INF           // Branch if x=inf, y not 1 or nan
 }
-//
-//     Get BIASed exp and significand of y
+;;
+
+
+POWL_64_X_IS_ZERO:
+// Here if x=0, y not nan or 1 or inf or 0
+
+// There is logic starting here to determine if y is an integer when x = 0.
+// If 0 < |y| < 1 then clearly y is not an integer.
+// If |y| > 1, then the significand of y is shifted left by the size of
+//    the exponent of y.  This preserves the lsb of the integer part + the
+//    fractional bits.  The lsb of the integer can be tested to determine if
+//    the integer is even or odd.  The fractional bits can be tested.  If zero,
+//    then y is an integer.
 //
 { .mfi
-(p0)   and GR_exp_y = GR_Mask,GR_BIASed_exp_y
-	nop.f 999
-(p0)   and GR_y_sign = GR_y_sign,GR_BIASed_exp_y
-}
-{ .mlx
-	nop.m 999
-(p0)   movl GR_BIAS = 0xFFFF ;;
+      and GR_exp_y = GR_exp_mask,GR_signexp_y   // Get biased exponent of y
+      nop.f 999
+      and GR_y_sign = GR_sign_mask,GR_signexp_y // Get sign of y
 }
-{ .mfi
-(p0)   cmp.lt.unc  p9, p8 = GR_exp_y,GR_BIAS
-	nop.f 999
+;;
+
 //
 //     Maybe y is < 1 already, so
 //     can never be an integer.
-//     Remove sign bit from exponent.
-//
-(p0)   sub GR_exp_y = GR_exp_y,GR_BIAS ;;
-}
-{ .mii
-	nop.m 999
-	nop.i 999 ;;
-//
-//     Remove exponent BIAS
 //
-(p8)   shl GR_exp_y=  GR_signif_y,GR_exp_y ;;
-}
 { .mfi
-(p9)   or  GR_exp_y=  0xF,GR_signif_y
-	nop.f 999
-	nop.i 999 ;;
+      cmp.lt  p9, p8 = GR_exp_y,GR_exp_bias     // Test 0 < |y| < 1
+      nop.f 999
+      sub GR_exp_y = GR_exp_y,GR_exp_bias       // Get true exponent of y
 }
-{ .mii
-	nop.m 999
+;;
+
 //
 //     Shift significand of y looking for nonzero bits
 //     For y > 1, shift signif_y exp_y bits to the left
-//     For y < 1, turn on 4 low order bits of significand of y 
+//     For y < 1, turn on 4 low order bits of significand of y
 //     so that the fraction will always be non-zero
 //
-(p0)   shl GR_signif_y=  GR_exp_y,1 ;;
-(p0)   extr.u GR_low_order_bit = GR_exp_y,63,1
+{ .mmi
+(p9)  or  GR_exp_y=  0xF,GR_signif_y            // Force nonzero fraction if y<1
+;;
+      nop.m 999
+(p8)  shl GR_exp_y=  GR_signif_y,GR_exp_y       // Get lsb of int + fraction
+                                                // Wait 4 cycles to use result
+}
+;;
+
+{ .mmi
+      nop.m 999
+;;
+      nop.m 999
+      nop.i 999
+}
+;;
+
+{ .mmi
+      nop.m 999
+;;
+      nop.m 999
+      shl GR_fraction_y=  GR_exp_y,1            // Shift left 1 to get fraction
 }
+;;
+
 //
 //     Integer part of y  shifted off.
 //     Get y's low even or odd bit - y might not be an int.
 //
 { .mii
-(p0)   cmp.eq.unc  p13,p0  =  GR_signif_y, r0
-(p0)   cmp.eq.unc  p8,p9 =  GR_y_sign, r0 ;;
-//
-//     Is y an int?
-//     Is y positive
-//
-(p13)  cmp.ne.unc  p13,p0 =  GR_low_order_bit, r0 ;;
+      cmp.eq  p13,p0  =  GR_fraction_y, r0      // Test for y integer
+      cmp.eq  p8,p0 =  GR_y_sign, r0            // Test for y > 0
+;;
+(p13) tbit.nz.unc p13,p0 = GR_exp_y, 63         // Test if y an odd integer
 }
+;;
+
+{ .mfi
+(p13) cmp.eq.unc p13,p14 =  GR_y_sign, r0   // Test y pos odd integer
+(p8)  fcmp.eq.s0 p12,p0 = FR_Input_Y, f0    // If x=0 and y>0 flag if y denormal
+      nop.i 999
+}
+;;
+
 //
-//     Is y and int and odd?
+//     Return +/-0 when x=+/-0 and y is positive odd integer
 //
 { .mfb
-(p13)  cmp.eq.unc  p13,p14 =  GR_y_sign, r0
-(p8)   fcmp.eq.s0 p12,p0 = FR_Input_Y, f0 // If x=0 and y>0 flag if y denormal
-	nop.b 999 ;;
+      nop.m 999
+(p13) mov FR_Result = FR_Input_X            // If x=0,  y pos odd int, result=x
+(p13) br.ret.spnt b0                        // Exit x=0, y pos odd int, result=x
 }
-{ .mfb
-	nop.m 999
+;;
+
 //
-//     Is y and int and odd and positive?
+//     Return +/-inf when x=+/-0 and y is negative odd int
 //
-(p13)  mov FR_Result = FR_Input_X 
-(p13)  br.cond.sptk L(POWL_64_RETURN) ;;
+{ .mfb
+(p14) mov GR_Parameter_TAG = 21
+(p14) frcpa.s0 FR_Result, p0 = f1, FR_Input_X  // Result +-inf, set Z flag
+(p14) br.cond.spnt __libm_error_region
 }
-{ .mfi
-	nop.m 999
+;;
+
 //
-//     Return +/-0 when x=+/-0 and y is and odd pos. int
+//     Return +0 when x=+/-0 and y positive and not an odd integer
 //
-(p14)  frcpa.s0 FR_Result, p10 = f1, FR_Input_X
-(p14)  mov GR_Parameter_TAG = 21
-}
-{ .mib
-	nop.m 999
-	nop.i 999
-(p14)  br.cond.spnt __libm_error_region ;;
+{ .mfb
+      nop.m 999
+(p8)  mov FR_Result = f0      // If x=0, y>0 and not odd integer, result=+0
+(p8)  br.ret.sptk b0          // Exit x=0, y>0 and not odd integer, result=+0
 }
+;;
 
-{ .mfb
-	nop.m 999
 //
-//     Return +/-0 when x=+/-Inf and y is and odd neg int
-//     and raise dz exception
+//     Return +inf when x=+/-0 and y is negative and not odd int
 //
-(p8)   mov FR_Result = f0
-(p8)   br.cond.sptk L(POWL_64_RETURN) ;;
+{ .mfb
+      mov GR_Parameter_TAG = 21
+      frcpa.s0 FR_Result, p10 = f1,f0   // Result +inf, raise Z flag
+      br.cond.sptk __libm_error_region
 }
-{ .mfi
-	nop.m 999
+;;
+
+
+POWL_64_X_IS_INF:
 //
-//     Return +0 when x=+/-0 and y > 0  and not odd.
+// Here if x=inf, y not 1 or nan
 //
-(p9)   frcpa.s0 FR_Result, p10 = f1,f0
-(p9)   mov GR_Parameter_TAG = 21
-}
-{ .mib
-	nop.m 999
-	nop.i 999
-(p9)   br.cond.sptk __libm_error_region ;;
-}
-L(POWL_64_X_IS_INF): 
 { .mfi
-(p0)   getf.exp GR_exp_y = FR_Input_Y
-(p0)   fclass.m.unc p13, p0 =  FR_Input_X,0x022 
-(p0)   mov GR_Mask = 0x1FFFF ;;
+      and GR_exp_y = GR_exp_mask,GR_signexp_y   // Get biased exponent y
+      fclass.m p13, p0 =  FR_Input_X,0x022      // Test x=-inf
+      nop.i 999
 }
+;;
 
 { .mfi
-(p0)   getf.sig GR_signif_y = FR_Input_Y
-(p0)   fcmp.eq.s0 p9,p0 = FR_Input_Y, f0 // Flag if y denormal
-       nop.i 999 ;;
+      and GR_y_sign = GR_sign_mask,GR_signexp_y // Get sign of y
+      fcmp.eq.s0 p9,p0 = FR_Input_Y, f0         // Dummy to set flag if y denorm
+      nop.i 999
 }
+;;
 
 //
-//     Get exp and significand of y
-//     Create exponent mask and sign mask
+//     Maybe y is < 1 already, so
+//     isn't an int.
 //
-{ .mlx
-(p0)   and GR_low_order_bit = GR_Mask,GR_exp_y
-(p0)   movl GR_BIAS = 0xFFFF
+{ .mfi
+(p13) cmp.lt.unc  p9, p8 = GR_exp_y,GR_exp_bias // Test 0 < |y| < 1 if x=-inf
+      fclass.m p11, p0 =  FR_Input_X,0x021      // Test x=+inf
+      sub GR_exp_y = GR_exp_y,GR_exp_bias       // Get true exponent y
 }
-{ .mmi
-	nop.m 999 ;;
+;;
+
 //
-//     Remove sign bit from exponent.
+//     Shift significand of y looking for nonzero bits
+//     For y > 1, shift signif_y exp_y bits to the left
+//     For y < 1, turn on 4 low order bits of significand of y
+//     so that the fraction will always be non-zero
 //
-(p0)   cmp.lt.unc  p9, p8 = GR_low_order_bit,GR_BIAS
+{ .mmi
+(p9)  or  GR_exp_y=  0xF,GR_signif_y          // Force nonzero fraction if y<1
+;;
+(p11) cmp.eq.unc  p14,p12 = GR_y_sign, r0     // Test x=+inf, y>0
+(p8)  shl GR_exp_y=  GR_signif_y,GR_exp_y     // Get lsb of int + fraction
+                                              // Wait 4 cycles to use result
+}
+;;
+
 //
-//     Maybe y is < 1 already, so 
-//     isn't an int.
+//     Return +inf for x=+inf, y > 0
+//     Return +0   for x=+inf, y < 0
 //
-(p0)   sub GR_low_order_bit = GR_low_order_bit,GR_BIAS
+{ .mfi
+      nop.m 999
+(p12) mov FR_Result = f0                      // If x=+inf, y<0, result=+0
+      nop.i 999
 }
-{ .mlx
-	nop.m 999
-(p0)   movl GR_sign_mask = 0x20000 ;;
+{ .mfb
+      nop.m 999
+(p14) fma.s0 FR_Result = FR_Input_X,f1,f0     // If x=+inf, y>0, result=+inf
+(p11) br.ret.sptk b0                          // Exit x=+inf
 }
-{ .mfi
-(p0)   and GR_sign_mask = GR_sign_mask,GR_exp_y
+;;
+
 //
-//     Return +Inf when x=+/-0 and y < 0 and not odd and raise
-//     divide-by-zero exception.
+// Here only if x=-inf.  Wait until can use result of shl...
 //
-(p0)   fclass.m.unc p11, p0 =  FR_Input_X,0x021 
-	nop.i 999 ;;
-}
 { .mmi
-	nop.m 999 ;;
-//
-//     Is shift off integer part of y.
-//     Get y's even or odd bit - y might not be an int.
-//
-(p11)  cmp.eq.unc  p11,p12 = GR_sign_mask, r0
-//
-//     Remove exponent BIAS
-//
-(p8)   shl GR_exp_y = GR_signif_y,GR_low_order_bit ;;
+      nop.m 999
+;;
+      nop.m 999
+      nop.i 999
 }
+;;
+
 { .mfi
-(p9)   or  GR_exp_y = 0xF,GR_signif_y
-//
-//     Is y positive or negative when x is +Inf?
-//     Is y and int when x = -Inf 
-//
-(p11)  mov FR_Result = FR_Input_X 
-	nop.i 999 ;;
+      cmp.eq  p8,p9 = GR_y_sign, r0           // Test y pos
+      nop.f 999
+      shl GR_fraction_y = GR_exp_y,1          // Shift left 1 to get fraction
 }
-{ .mfi
-	nop.m 999
-(p12)  mov FR_Result = f0
-	nop.i 999 ;;
+;;
+
+{ .mmi
+      cmp.eq  p13,p0 = GR_fraction_y, r0      // Test y integer
+;;
+      nop.m 999
+(p13) tbit.nz.unc  p13,p0 = GR_exp_y, 63      // Test y odd integer
 }
-{ .mii
-	nop.m 999
+;;
+
 //
-//     Shift signficand looking for nonzero bits 
-//     For y non-ints, upset the significand.
+//     Is y even or odd?
 //
-(p0)   shl GR_signif_y = GR_exp_y,1 ;;
-(p13)  cmp.eq.unc  p13,p0  = GR_signif_y, r0
-}
 { .mii
-	nop.m 999
-(p0)   extr.u GR_low_order_bit = GR_exp_y,63,1 ;;
-(p13)  cmp.ne.unc  p13,p0 = GR_low_order_bit, r0
-}
-{ .mib
-	nop.m 999
-	nop.i 999
-(p11)  br.cond.sptk L(POWL_64_RETURN) ;; 
-}
-{ .mib
-	nop.m 999
-	nop.i 999
-(p12)  br.cond.sptk L(POWL_64_RETURN) ;; 
+(p13) cmp.eq.unc  p14,p10 = GR_y_sign, r0     // Test x=-inf, y pos odd int
+(p13) cmp.ne.and  p8,p9 = r0,r0               // If y odd int, turn off p8,p9
+      nop.i 999
 }
+;;
+
 //
-//     Return Inf for y > 0
-//     Return +0  for y < 0
-//     Is y even or odd?
+//     Return -0   for x = -inf and y < 0 and odd int.
+//     Return -Inf for x = -inf and y > 0 and odd int.
 //
-{ .mii
-(p13)  cmp.eq.unc  p13,p10 = GR_sign_mask, r0
-(p0)   cmp.eq.unc  p8,p9 = GR_sign_mask, r0 ;;
-	nop.i 999
+{ .mfi
+      nop.m 999
+(p10) fmerge.ns FR_Result = f0, f0      // If x=-inf, y neg odd int, result=-0
+      nop.i 999
 }
 { .mfi
-	nop.m 999
+      nop.m 999
+(p14) fmpy.s0 FR_Result = FR_Input_X,f1 // If x=-inf, y pos odd int, result=-inf
+      nop.i 999
+}
+;;
+
 //
-//     For x = -inf, y is and int, positive  
-//     and odd 
-//     Is y positive in general?
+//     Return Inf for x = -inf and y > 0 not an odd int.
+//     Return +0  for x = -inf and y < 0 not an odd int.
 //
-(p13)  mov FR_Result = FR_Input_X
-	nop.i 999 ;;
+.pred.rel "mutex",p8,p9
+{ .mfi
+      nop.m 999
+(p8)  fmerge.ns FR_Result = FR_Input_X, FR_Input_X // If x=-inf, y>0 not odd int
+                                                   //   result=+inf
+      nop.i 999
 }
 { .mfb
-	nop.m 999
-(p10)  fmerge.ns FR_Result = f0, f0 
-(p13)  br.cond.sptk L(POWL_64_RETURN) ;; 
-}
-{ .mib
-	nop.m 999
-	nop.i 999
-(p10)  br.cond.sptk L(POWL_64_RETURN) ;; 
+      nop.m 999
+(p9)  fmpy.s0 FR_Result = f0,f0                    // If x=-inf, y<0 not odd int
+                                                   //   result=+0
+      br.ret.sptk b0                               // Exit for x=-inf
 }
-{ .mfi
-	nop.m 999
+;;
+
+
+POWL_64_Y_IS_INF:
+// Here if y=inf, x not 1 or nan
 //
-//     Return -Inf for x = -inf and y > 0 and odd int.
-//     Return -0   for x = -inf and y < 0 and odd int.
+//     For y = +Inf and |x| < 1  returns 0
+//     For y = +Inf and |x| > 1  returns Inf
+//     For y = -Inf and |x| < 1  returns Inf
+//     For y = -Inf and |x| > 1  returns 0
+//     For y =  Inf and |x| = 1  returns 1
 //
-(p8)   fmerge.ns FR_Result = FR_Input_X, FR_Input_X 
-	nop.i 999 ;;
-}
-{ .mfb
-	nop.m 999
-(p9)   mov FR_Result = f0
-(p8)   br.cond.sptk L(POWL_64_RETURN) ;; 
+{ .mfi
+      nop.m 999
+      fclass.m p8, p0 =  FR_Input_Y, 0x021    // Test y=+inf
+      nop.i 999
 }
-{ .mib
-	nop.m 999
-	nop.i 999
-(p9)   br.cond.sptk L(POWL_64_RETURN) ;; 
+;;
+
+{ .mfi
+      nop.m 999
+      fclass.m p9, p0 =  FR_Input_Y, 0x022    // Test y=-inf
+      nop.i 999
 }
-L(POWL_64_Y_IS_INF): 
+;;
+
 { .mfi
-	nop.m 999
-//
-//     Return Inf for x = -inf and y > 0 not an odd int.
-//     Return +0  for x = -inf and y < 0 and not an odd int.
-//
-(p0)   fclass.m.unc p8, p0 =  FR_Input_Y, 0x021
-	nop.i 999
+      nop.m 999
+      fabs FR_X = FR_Input_X                  // Form |x|
+      nop.i 999
 }
+;;
+
 { .mfi
-	nop.m 999
-(p0)   fclass.m.unc p9, p0 =  FR_Input_Y, 0x022
-	nop.i 999 ;;
+      nop.m 999
+      fcmp.eq.s0 p10,p0 = FR_Input_X, f0      // flag if x denormal
+      nop.i 999
 }
+;;
+
 { .mfi
-	nop.m 999
-(p0)   fabs FR_X = FR_Input_X
-	nop.i 999 ;;
+      nop.m 999
+(p8)  fcmp.lt.unc.s1 p6, p0  =  FR_X, f1      // Test y=+inf, |x|<1
+      nop.i 999
 }
+;;
 
 { .mfi
-	nop.m 999
-(p0)   fcmp.eq.s0 p10,p0 = FR_Input_X, f0 // flag if x denormal
-	nop.i 999 ;;
+      nop.m 999
+(p8)  fcmp.gt.unc.s1 p7, p0  =  FR_X, f1      // Test y=+inf, |x|>1
+      nop.i 999
 }
+;;
 
 { .mfi
-	nop.m 999
-//
-//     Find y = +/- Inf
-//     Compute |x|
-//
-(p8)   fcmp.lt.unc.s1 p6, p0  =  FR_X, f1
-	nop.i 999
+      nop.m 999
+(p9)  fcmp.lt.unc.s1 p12, p0 =  FR_X, f1      // Test y=-inf, |x|<1
+      nop.i 999
 }
 { .mfi
-	nop.m 999
-(p8)   fcmp.gt.unc.s1 p7, p0  =  FR_X, f1
-	nop.i 999 ;;
+      nop.m 999
+(p6)  fmpy.s0 FR_Result = f0,f0               // If y=+inf, |x|<1, result=+0
+      nop.i 999
 }
+;;
+
 { .mfi
-	nop.m 999
-(p9)   fcmp.lt.unc.s1 p12, p0 =  FR_X, f1
-	nop.i 999
+      nop.m 999
+(p9)  fcmp.gt.unc.s1 p13, p0 =  FR_X, f1      // Test y=-inf, |x|>1
+      nop.i 999
 }
 { .mfi
-	nop.m 999
-(p9)   fcmp.gt.unc.s1 p13, p0 =  FR_X, f1
-	nop.i 999 ;;
+      nop.m 999
+(p7)  fmpy.s0 FR_Result = FR_Input_Y, f1      // If y=+inf, |x|>1, result=+inf
+      nop.i 999
 }
+;;
+
 { .mfi
-	nop.m 999
-//
-//     For y = +Inf and |x| < 1  returns 0
-//     For y = +Inf and |x| > 1  returns Inf
-//     For y = -Inf and |x| < 1  returns Inf
-//     For y = -Inf and |x| > 1  returns 0
-//
-(p6)   mov FR_Result = f0
-	nop.i 999 ;;
+      nop.m 999
+      fcmp.eq.s1 p14, p0 =  FR_X, f1          // Test y=inf, |x|=1
+      nop.i 999
 }
 { .mfi
-	nop.m 999
-(p7)   mov FR_Result = FR_Input_Y 
-	nop.i 999 ;;
+      nop.m 999
+(p12) fnma.s0 FR_Result = FR_Input_Y, f1, f0  // If y=-inf, |x|<1, result=+inf
+      nop.i 999
 }
+;;
+
 { .mfi
-	nop.m 999
-(p12)  fmpy.s0 FR_Result = FR_Input_Y, FR_Input_Y
-	nop.i 999 ;;
+      nop.m 999
+(p13) mov FR_Result = f0                      // If y=-inf, |x|>1, result=+0
+      nop.i 999
 }
+;;
+
 { .mfb
-	nop.m 999
-(p13)  mov FR_Result = f0
-//
-//     Produce x ** +/- Inf results
-//
-(p6)   br.cond.spnt L(POWL_64_RETURN) ;;
+      nop.m 999
+(p14) fmpy.s0 FR_Result = f1,f1               // If y=inf, |x|=1, result=+1
+      br.ret.sptk b0                          // Common return for y=inf
 }
-{ .mib
-	nop.m 999
-	nop.i 999
-(p7)   br.cond.spnt L(POWL_64_RETURN) ;;
+;;
+
+
+// Here if x or y denorm/unorm
+POWL_DENORM:
+{ .mmi
+      getf.sig GR_signif_Z = FR_norm_X   // Get significand of x
+;;
+      getf.exp GR_signexp_y = FR_norm_Y  // Get sign and exp of y
+      nop.i 999
 }
-{ .mib
-	nop.m 999
-	nop.i 999
-(p12)  br.cond.spnt L(POWL_64_RETURN) ;;
+;;
+
+{ .mfi
+      getf.sig GR_signif_y = FR_norm_Y   // Get significand of y
+      nop.f 999
+      nop.i 999
 }
+;;
+
 { .mib
-	nop.m 999
-	nop.i 999
-(p13)  br.cond.spnt L(POWL_64_RETURN) ;;
+      getf.exp GR_signexp_x = FR_norm_X  // Get sign and exp of x
+      extr.u GR_Index1 = GR_signif_Z, 59, 4  // Extract upper 4 signif bits of x
+      br.cond.sptk  POWL_COMMON          // Branch back to main path
 }
-{ .mfb
-	nop.m 999
+;;
+
+
+POWL_64_UNSUPPORT:
 //
-//     +/-1 ** +/-Inf, result is +1
+//     Raise exceptions for specific
+//     values - pseudo NaN and
+//     infinities.
+//     Return NaN and raise invalid
 //
-(p0)   fmpy.s0 FR_Result = f1,f1
-(p0)   br.cond.sptk L(POWL_64_RETURN) ;;
-}
-L(POWL_64_UNSUPPORT): 
 { .mfb
-	nop.m 999
+      nop.m 999
+      fmpy.s0 FR_Result = FR_Input_X,f0
+      br.ret.sptk b0
+}
+;;
+
+POWL_64_XNEG:
 //
-//     Return NaN and raise invalid    
+//     Raise invalid for x < 0  and
+//     y not an integer
 //
-(p0)   fmpy.s0 FR_Result = FR_Input_X,f0
-// 
-//     Raise exceptions for specific
-//     values - pseudo NaN and
-//     infinities.  
-// 
-(p0)   br.cond.sptk L(POWL_64_RETURN) ;; 
-}
-L(POWL_64_XNEG): 
 { .mfi
-	nop.m 999
-(p0)   frcpa.s0 FR_Result, p8 =  f0, f0
-// 
-//     Raise invalid for x < 0  and
-//     y not an integer and
-// 
-(p0)   mov GR_Parameter_TAG = 22
+      nop.m 999
+      frcpa.s0 FR_Result, p8 =  f0, f0
+      mov GR_Parameter_TAG = 22
 }
 { .mib
-	nop.m 999
-	nop.i 999
-(p0)   br.cond.sptk __libm_error_region ;; 
+      nop.m 999
+      nop.i 999
+      br.cond.sptk __libm_error_region
 }
-L(POWL_64_SQRT): 
+;;
+
+POWL_64_SQRT:
 { .mfi
-	nop.m 999
-(p0)   frsqrta.s0 FR_Result,p10 = FR_Input_X
-	nop.i 999 ;;
+      nop.m 999
+      frsqrta.s0 FR_Result,p10 = FR_save_Input_X
+      nop.i 999 ;;
 }
 { .mfi
-	nop.m 999
-(p10)  fma.s1   f62=FR_Half,FR_Input_X,f0
-	nop.i 999 ;;
+      nop.m 999
+(p10) fma.s1   f62=FR_Half,FR_save_Input_X,f0
+      nop.i 999 ;;
 }
 { .mfi
-	nop.m 999
-//
-//     Step (2)
-//     h = 1/2 * a in f9
-//
-(p10)  fma.s1   f63=FR_Result,FR_Result,f0
-	nop.i 999 ;;
+      nop.m 999
+(p10) fma.s1   f63=FR_Result,FR_Result,f0
+      nop.i 999 ;;
 }
 { .mfi
-	nop.m 999
-//
-//     Step (3)
-//     t1 = y0 * y0 in f10
-//
-(p10)  fnma.s1  f32=f63,f62,f11
-	nop.i 999 ;;
+      nop.m 999
+(p10) fnma.s1  f32=f63,f62,FR_Half
+      nop.i 999 ;;
 }
 { .mfi
-	nop.m 999
-//
-//     Step (4)
-//     t2 = 1/2 - t1 * h in f10
-//
-(p10)  fma.s1   f33=f32,FR_Result,FR_Result
-	nop.i 999 ;;
+      nop.m 999
+(p10) fma.s1   f33=f32,FR_Result,FR_Result
+      nop.i 999 ;;
 }
 { .mfi
-	nop.m 999
-//
-//     Step (5)
-//     y1 = y0 + t2 * y0 in f13
-//
-(p10)  fma.s1   f34=f33,f62,f0
-	nop.i 999 ;;
+      nop.m 999
+(p10) fma.s1   f34=f33,f62,f0
+      nop.i 999 ;;
 }
 { .mfi
-	nop.m 999
-//
-//     Step (6)
-//     t3 = y1 * h in f10
-//
-(p10)  fnma.s1  f35=f34,f33,f11
-	nop.i 999 ;;
+      nop.m 999
+(p10) fnma.s1  f35=f34,f33,FR_Half
+      nop.i 999 ;;
 }
 { .mfi
-	nop.m 999
-//
-//     Step (7)
-//     t4 = 1/2 - t3 * y1 in f10
-//
-(p10)  fma.s1   f63=f35,f33,f33
-	nop.i 999 ;;
+      nop.m 999
+(p10) fma.s1   f63=f35,f33,f33
+      nop.i 999 ;;
 }
 { .mfi
-	nop.m 999
-//
-//     Step (8)
-//     y2 = y1 + t4 * y1 in f13
-//
-(p10)  fma.s1   f32=FR_Input_X,f63,f0
-	nop.i 999
+      nop.m 999
+(p10) fma.s1   f32=FR_save_Input_X,f63,f0
+      nop.i 999
 }
 { .mfi
-	nop.m 999
-//
-//     Step (9)
-//     S = a * y2 in f10
-//
-(p10)  fma.s1   FR_Result=f63,f62,f0
-	nop.i 999 ;;
+      nop.m 999
+(p10) fma.s1   FR_Result=f63,f62,f0
+      nop.i 999 ;;
 }
 { .mfi
-	nop.m 999
-//
-//     Step (10)
-//     t5 = y2 * h in f9
-//
-(p10)  fma.s1   f33=f11,f63,f0
-	nop.i 999 ;;
+      nop.m 999
+(p10) fma.s1   f33=f11,f63,f0
+      nop.i 999 ;;
 }
 { .mfi
-	nop.m 999
-//
-//     Step (11)
-//     H = 1/2 * y2 in f11
-//
-(p10)  fnma.s1  f34=f32,f32,f8
-	nop.i 999
+      nop.m 999
+(p10) fnma.s1  f34=f32,f32,FR_save_Input_X
+      nop.i 999
 }
 { .mfi
-	nop.m 999
-//
-//     Step (12)
-//     d = a - S * S in f12
-//
-(p10)  fnma.s1  f35=FR_Result,f63,f11
-	nop.i 999 ;;
+      nop.m 999
+(p10) fnma.s1  f35=FR_Result,f63,FR_Half
+      nop.i 999 ;;
 }
 { .mfi
-	nop.m 999
-//
-//     Step (13)
-//     t6 = 1/2 - t5 * y2 in f7
-//
-(p10)  fma.s1   f62=f33,f34,f32
-	nop.i 999
+      nop.m 999
+(p10) fma.s1   f62=f33,f34,f32
+      nop.i 999
 }
 { .mfi
-	nop.m 999
-//
-//     Step (14)
-//     S1 = S + d * H in f13
-//
-(p10)  fma.s1   f63=f33,f35,f33
-	nop.i 999 ;;
+      nop.m 999
+(p10) fma.s1   f63=f33,f35,f33
+      nop.i 999 ;;
 }
 { .mfi
-	nop.m 999
-//
-//     Step (15)
-//     H1 = H + t6 * h in f7
-//
-(p10)  fnma.s1  f32=f62,f62,FR_Input_X
-	nop.i 999 ;;
+      nop.m 999
+(p10) fnma.s1  f32=f62,f62,FR_save_Input_X
+      nop.i 999 ;;
 }
 { .mfb
-	nop.m 999
-//
-//     Step (16)
-//     d1 = a - S1 * S1 
-//
-(p10)  fma.s0 FR_Result=f32,f63,f62
-//
-//     Step (17)
-//     R = S1 + d1 * H1 
-//
-(p10)  br.cond.sptk L(POWL_64_RETURN) ;; 
-}
-{ .mib
-	nop.m 999
-	nop.i 999
-//
-//     Do the Newton-Raphson iteration from the EAS.
-//
-(p0)   br.cond.sptk L(POWL_64_RETURN) ;; 
+      nop.m 999
+(p10) fma.s0 FR_Result=f32,f63,f62
+      br.ret.sptk   b0                // Exit for x > 0, y = 0.5
 }
-//
-//     Take care of the degenerate cases.
-//
+;;
 
-L(POWL_64_RETURN):
-{ .mfb
-       nop.m 999
-(p0)   mov   FR_Output = FR_Result
-(p0)   br.ret.sptk   b0 ;;
-}
-.endp powl
-ASM_SIZE_DIRECTIVE(powl)
+GLOBAL_LIBM_END(powl)
 
-.proc __libm_error_region
-__libm_error_region:
+LOCAL_LIBM_ENTRY(__libm_error_region)
 .prologue
 { .mfi
         add   GR_Parameter_Y=-32,sp             // Parameter 2 value
@@ -3411,32 +2770,32 @@ __libm_error_region:
         mov GR_SAVE_GP=gp                       // Save gp
 };;
 { .mmi
-        stfe [GR_Parameter_Y] = FR_Y,16         // Save Parameter 2 on stack
+        stfe [GR_Parameter_Y] = FR_Input_Y,16   // Save Parameter 2 on stack
         add GR_Parameter_X = 16,sp              // Parameter 1 address
 .save   b0, GR_SAVE_B0
         mov GR_SAVE_B0=b0                       // Save b0
 };;
 .body
 { .mib
-        stfe [GR_Parameter_X] = FR_X            // Store Parameter 1 on stack
+        stfe [GR_Parameter_X] = FR_save_Input_X // Store Parameter 1 on stack
         add   GR_Parameter_RESULT = 0,GR_Parameter_Y
         nop.b 0                                 // Parameter 3 address
 }
 { .mib
-        stfe [GR_Parameter_Y] = FR_RESULT      // Store Parameter 3 on stack
+        stfe [GR_Parameter_Y] = FR_Result       // Store Parameter 3 on stack
         add   GR_Parameter_Y = -16,GR_Parameter_Y
-        br.call.sptk b0=__libm_error_support#  // Call error handling function
+        br.call.sptk b0=__libm_error_support#   // Call error handling function
 };;
 { .mmi
-        nop.m 0
-        nop.m 0
         add   GR_Parameter_RESULT = 48,sp
+        nop.m 0
+        nop.i 0
 };;
 { .mmi
-        ldfe  f8 = [GR_Parameter_RESULT]       // Get return result off stack
+        ldfe  f8 = [GR_Parameter_RESULT]        // Get return result off stack
 .restore sp
-        add   sp = 64,sp                       // Restore stack pointer
-        mov   b0 = GR_SAVE_B0                  // Restore return address
+        add   sp = 64,sp                        // Restore stack pointer
+        mov   b0 = GR_SAVE_B0                   // Restore return address
 };;
 { .mib
         mov   gp = GR_SAVE_GP                  // Restore gp
@@ -3444,7 +2803,6 @@ __libm_error_region:
         br.ret.sptk     b0                     // Return
 };;
 
-.endp __libm_error_region
-ASM_SIZE_DIRECTIVE(__libm_error_region)
+.endp
 .type   __libm_error_support#,@function
 .global __libm_error_support#