about summary refs log tree commit diff
path: root/REORG.TODO/sysdeps/ia64/fpu/w_tgammaf_compat.S
diff options
context:
space:
mode:
authorZack Weinberg <zackw@panix.com>2017-06-08 15:39:03 -0400
committerZack Weinberg <zackw@panix.com>2017-06-08 15:39:03 -0400
commit5046dbb4a7eba5eccfd258f92f4735c9ffc8d069 (patch)
tree4470480d904b65cf14ca524f96f79eca818c3eaf /REORG.TODO/sysdeps/ia64/fpu/w_tgammaf_compat.S
parent199fc19d3aaaf57944ef036e15904febe877fc93 (diff)
downloadglibc-5046dbb4a7eba5eccfd258f92f4735c9ffc8d069.tar.gz
glibc-5046dbb4a7eba5eccfd258f92f4735c9ffc8d069.tar.xz
glibc-5046dbb4a7eba5eccfd258f92f4735c9ffc8d069.zip
Prepare for radical source tree reorganization. zack/build-layout-experiment
All top-level files and directories are moved into a temporary storage
directory, REORG.TODO, except for files that will certainly still
exist in their current form at top level when we're done (COPYING,
COPYING.LIB, LICENSES, NEWS, README), all old ChangeLog files (which
are moved to the new directory OldChangeLogs, instead), and the
generated file INSTALL (which is just deleted; in the new order, there
will be no generated files checked into version control).
Diffstat (limited to 'REORG.TODO/sysdeps/ia64/fpu/w_tgammaf_compat.S')
-rw-r--r--REORG.TODO/sysdeps/ia64/fpu/w_tgammaf_compat.S1330
1 files changed, 1330 insertions, 0 deletions
diff --git a/REORG.TODO/sysdeps/ia64/fpu/w_tgammaf_compat.S b/REORG.TODO/sysdeps/ia64/fpu/w_tgammaf_compat.S
new file mode 100644
index 0000000000..0fbd840adb
--- /dev/null
+++ b/REORG.TODO/sysdeps/ia64/fpu/w_tgammaf_compat.S
@@ -0,0 +1,1330 @@
+.file "tgammaf.s"
+
+
+// Copyright (c) 2001 - 2005, Intel Corporation
+// All rights reserved.
+//
+// Contributed 2001 by the Intel Numerics Group, Intel Corporation
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,INCLUDING,BUT NOT
+// LIMITED TO,THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT,INDIRECT,INCIDENTAL,SPECIAL,
+// EXEMPLARY,OR CONSEQUENTIAL DAMAGES (INCLUDING,BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,DATA,OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY,WHETHER IN CONTRACT,STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE,EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code,and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+//*********************************************************************
+//
+// History:
+// 11/30/01  Initial version
+// 05/20/02  Cleaned up namespace and sf0 syntax
+// 02/10/03  Reordered header: .section, .global, .proc, .align
+// 04/04/03  Changed error codes for overflow and negative integers
+// 04/10/03  Changed code for overflow near zero handling
+// 12/16/03  Fixed parameter passing to/from error handling routine
+// 03/31/05  Reformatted delimiters between data tables
+//
+//*********************************************************************
+//
+//*********************************************************************
+//
+// Function: tgammaf(x) computes the principle value of the GAMMA
+// function of x.
+//
+//*********************************************************************
+//
+// Resources Used:
+//
+//    Floating-Point Registers: f8-f15
+//                              f33-f75
+//
+//    General Purpose Registers:
+//      r8-r11
+//      r14-r29
+//      r32-r36
+//      r37-r40 (Used to pass arguments to error handling routine)
+//
+//    Predicate Registers:      p6-p15
+//
+//*********************************************************************
+//
+// IEEE Special Conditions:
+//
+//    tgammaf(+inf) = +inf
+//    tgammaf(-inf) = QNaN
+//    tgammaf(+/-0) = +/-inf
+//    tgammaf(x<0, x - integer) = QNaN
+//    tgammaf(SNaN) = QNaN
+//    tgammaf(QNaN) = QNaN
+//
+//*********************************************************************
+//
+// Overview
+//
+// The method consists of three cases.
+//
+// If       2 <= x < OVERFLOW_BOUNDARY   use case tgamma_regular;
+// else if  0 < x < 2                    use case tgamma_from_0_to_2;
+// else if  -(i+1) <  x < -i, i = 0...43 use case tgamma_negatives;
+//
+// Case 2 <= x < OVERFLOW_BOUNDARY
+// -------------------------------
+//   Here we use algorithm based on the recursive formula
+//   GAMMA(x+1) = x*GAMMA(x). For that we subdivide interval
+//   [2; OVERFLOW_BOUNDARY] into intervals [8*n; 8*(n+1)] and
+//   approximate GAMMA(x) by polynomial of 22th degree on each
+//   [8*n; 8*n+1], recursive formula is used to expand GAMMA(x)
+//   to [8*n; 8*n+1]. In other words we need to find n, i and r
+//   such that x = 8 * n + i + r where n and i are integer numbers
+//   and r is fractional part of x. So GAMMA(x) = GAMMA(8*n+i+r) =
+//   = (x-1)*(x-2)*...*(x-i)*GAMMA(x-i) =
+//   = (x-1)*(x-2)*...*(x-i)*GAMMA(8*n+r) ~
+//   ~ (x-1)*(x-2)*...*(x-i)*P12n(r).
+//
+//   Step 1: Reduction
+//   -----------------
+//    N = [x] with truncate
+//    r = x - N, note 0 <= r < 1
+//
+//    n = N & ~0xF - index of table that contains coefficient of
+//                   polynomial approximation
+//    i = N & 0xF  - is used in recursive formula
+//
+//
+//   Step 2: Approximation
+//   ---------------------
+//    We use factorized minimax approximation polynomials
+//    P12n(r) = A12*(r^2+C01(n)*r+C00(n))*
+//              *(r^2+C11(n)*r+C10(n))*...*(r^2+C51(n)*r+C50(n))
+//
+//   Step 3: Recursion
+//   -----------------
+//    In case when i > 0 we need to multiply P12n(r) by product
+//    R(i,x)=(x-1)*(x-2)*...*(x-i). To reduce number of fp-instructions
+//    we can calculate R as follow:
+//    R(i,x) = ((x-1)*(x-2))*((x-3)*(x-4))*...*((x-(i-1))*(x-i)) if i is
+//    even or R = ((x-1)*(x-2))*((x-3)*(x-4))*...*((x-(i-2))*(x-(i-1)))*
+//    *(i-1) if i is odd. In both cases we need to calculate
+//    R2(i,x) = (x^2-3*x+2)*(x^2-7*x+12)*...*(x^2+x+2*j*(2*j-1)) =
+//    = ((x^2-x)+2*(1-x))*((x^2-x)+6*(2-x))*...*((x^2-x)+2*(2*j-1)*(j-x)) =
+//    = (RA+2*RB)*(RA+6*(1-RB))*...*(RA+2*(2*j-1)*(j-1+RB))
+//    where j = 1..[i/2], RA = x^2-x, RB = 1-x.
+//
+//   Step 4: Reconstruction
+//   ----------------------
+//    Reconstruction is just simple multiplication i.e.
+//    GAMMA(x) = P12n(r)*R(i,x)
+//
+// Case 0 < x < 2
+// --------------
+//    To calculate GAMMA(x) on this interval we do following
+//        if 1.0  <= x < 1.25  than  GAMMA(x) = P7(x-1)
+//        if 1.25 <= x < 1.5   than  GAMMA(x) = P7(x-x_min) where
+//              x_min is point of local minimum on [1; 2] interval.
+//        if 1.5  <= x < 1.75  than  GAMMA(x) = P7(x-1.5)
+//        if 1.75 <= x < 2.0   than  GAMMA(x) = P7(x-1.5)
+//    and
+//        if 0 < x < 1 than GAMMA(x) = GAMMA(x+1)/x
+//
+// Case -(i+1) <  x < -i, i = 0...43
+// ----------------------------------
+//    Here we use the fact that GAMMA(-x) = PI/(x*GAMMA(x)*sin(PI*x)) and
+//    so we need to calculate GAMMA(x), sin(PI*x)/PI. Calculation of
+//    GAMMA(x) is described above.
+//
+//   Step 1: Reduction
+//   -----------------
+//    Note that period of sin(PI*x) is 2 and range reduction for
+//    sin(PI*x) is like to range reduction for GAMMA(x)
+//    i.e rs = x - round(x) and |rs| <= 0.5.
+//
+//   Step 2: Approximation
+//   ---------------------
+//    To approximate sin(PI*x)/PI = sin(PI*(2*n+rs))/PI =
+//    = (-1)^n*sin(PI*rs)/PI Taylor series is used.
+//    sin(PI*rs)/PI ~ S17(rs).
+//
+//   Step 3: Division
+//   ----------------
+//    To calculate 1/x and 1/(GAMMA(x)*S12(rs)) we use frcpa
+//    instruction with following Newton-Raphson interations.
+//
+//
+//*********************************************************************
+
+GR_ad_Data              = r8
+GR_TAG                  = r8
+GR_SignExp              = r9
+GR_Sig                  = r10
+GR_ArgNz                = r10
+GR_RqDeg                = r11
+
+GR_NanBound             = r14
+GR_ExpOf025             = r15
+GR_ExpOf05              = r16
+GR_ad_Co                = r17
+GR_ad_Ce                = r18
+GR_TblOffs              = r19
+GR_Arg                  = r20
+GR_Exp2Ind              = r21
+GR_TblOffsMask          = r21
+GR_Offs                 = r22
+GR_OvfNzBound           = r23
+GR_ZeroResBound         = r24
+GR_ad_SinO              = r25
+GR_ad_SinE              = r26
+GR_Correction           = r27
+GR_Tbl12Offs            = r28
+GR_NzBound              = r28
+GR_ExpOf1               = r29
+GR_fpsr                 = r29
+
+GR_SAVE_B0              = r33
+GR_SAVE_PFS             = r34
+GR_SAVE_GP              = r35
+GR_SAVE_SP              = r36
+
+GR_Parameter_X          = r37
+GR_Parameter_Y          = r38
+GR_Parameter_RESULT     = r39
+GR_Parameter_TAG        = r40
+
+
+FR_X                    = f10
+FR_Y                    = f1
+FR_RESULT               = f8
+
+FR_iXt                  = f11
+FR_Xt                   = f12
+FR_r                    = f13
+FR_r2                   = f14
+FR_r4                   = f15
+
+FR_C01                  = f33
+FR_A7                   = f33
+FR_C11                  = f34
+FR_A6                   = f34
+FR_C21                  = f35
+FR_A5                   = f35
+FR_C31                  = f36
+FR_A4                   = f36
+FR_C41                  = f37
+FR_A3                   = f37
+FR_C51                  = f38
+FR_A2                   = f38
+
+FR_C00                  = f39
+FR_A1                   = f39
+FR_C10                  = f40
+FR_A0                   = f40
+FR_C20                  = f41
+FR_C30                  = f42
+FR_C40                  = f43
+FR_C50                  = f44
+FR_An                   = f45
+FR_OvfBound             = f46
+FR_InvAn                = f47
+
+FR_Multplr              = f48
+FR_NormX                = f49
+FR_X2mX                 = f50
+FR_1mX                  = f51
+FR_Rq0                  = f51
+FR_Rq1                  = f52
+FR_Rq2                  = f53
+FR_Rq3                  = f54
+
+FR_Rcp0                 = f55
+FR_Rcp1                 = f56
+FR_Rcp2                 = f57
+
+FR_InvNormX1            = f58
+FR_InvNormX2            = f59
+
+FR_rs                   = f60
+FR_rs2                  = f61
+
+FR_LocalMin             = f62
+FR_10                   = f63
+
+FR_05                   = f64
+
+FR_S32                  = f65
+FR_S31                  = f66
+FR_S01                  = f67
+FR_S11                  = f68
+FR_S21                  = f69
+FR_S00                  = f70
+FR_S10                  = f71
+FR_S20                  = f72
+
+FR_GAMMA                = f73
+FR_2                    = f74
+FR_6                    = f75
+
+
+
+
+// Data tables
+//==============================================================
+RODATA
+.align 16
+LOCAL_OBJECT_START(tgammaf_data)
+data8 0x3FDD8B618D5AF8FE // local minimum (0.461632144968362356785)
+data8 0x4024000000000000 // 10.0
+data8 0x3E90FC992FF39E13 // S32
+data8 0xBEC144B2760626E2 // S31
+//
+//[2; 8)
+data8 0x4009EFD1BA0CB3B4 // C01
+data8 0x3FFFB35378FF4822 // C11
+data8 0xC01032270413B896 // C41
+data8 0xC01F171A4C0D6827 // C51
+data8 0x40148F8E197396AC // C20
+data8 0x401C601959F1249C // C30
+data8 0x3EE21AD881741977 // An
+data8 0x4041852200000000 // overflow boundary (35.04010009765625)
+data8 0x3FD9CE68F695B198 // C21
+data8 0xBFF8C30AC900DA03 // C31
+data8 0x400E17D2F0535C02 // C00
+data8 0x4010689240F7FAC8 // C10
+data8 0x402563147DDCCF8D // C40
+data8 0x4033406D0480A21C // C50
+//
+//[8; 16)
+data8 0x4006222BAE0B793B // C01
+data8 0x4002452733473EDA // C11
+data8 0xC0010EF3326FDDB3 // C41
+data8 0xC01492B817F99C0F // C51
+data8 0x40099C905A249B75 // C20
+data8 0x4012B972AE0E533D // C30
+data8 0x3FE6F6DB91D0D4CC // An
+data8 0x4041852200000000 // overflow boundary
+data8 0x3FF545828F7B73C5 // C21
+data8 0xBFBBD210578764DF // C31
+data8 0x4000542098F53CFC // C00
+data8 0x40032C1309AD6C81 // C10
+data8 0x401D7331E19BD2E1 // C40
+data8 0x402A06807295EF57 // C50
+//
+//[16; 24)
+data8 0x4000131002867596 // C01
+data8 0x3FFAA362D5D1B6F2 // C11
+data8 0xBFFCB6985697DB6D // C41
+data8 0xC0115BEE3BFC3B3B // C51
+data8 0x3FFE62FF83456F73 // C20
+data8 0x4007E33478A114C4 // C30
+data8 0x41E9B2B73795ED57 // An
+data8 0x4041852200000000 // overflow boundary
+data8 0x3FEEB1F345BC2769 // C21
+data8 0xBFC3BBE6E7F3316F // C31
+data8 0x3FF14E07DA5E9983 // C00
+data8 0x3FF53B76BF81E2C0 // C10
+data8 0x4014051E0269A3DC // C40
+data8 0x40229D4227468EDB // C50
+//
+//[24; 32)
+data8 0x3FFAF7BD498384DE // C01
+data8 0x3FF62AD8B4D1C3D2 // C11
+data8 0xBFFABCADCD004C32 // C41
+data8 0xC00FADE97C097EC9 // C51
+data8 0x3FF6DA9ED737707E // C20
+data8 0x4002A29E9E0C782C // C30
+data8 0x44329D5B5167C6C3 // An
+data8 0x4041852200000000 // overflow boundary
+data8 0x3FE8943CBBB4B727 // C21
+data8 0xBFCB39D466E11756 // C31
+data8 0x3FE879AF3243D8C1 // C00
+data8 0x3FEEC7DEBB14CE1E // C10
+data8 0x401017B79BA80BCB // C40
+data8 0x401E941DC3C4DE80 // C50
+//
+//[32; 40)
+data8 0x3FF7ECB3A0E8FE5C // C01
+data8 0x3FF3815A8516316B // C11
+data8 0xBFF9ABD8FCC000C3 // C41
+data8 0xC00DD89969A4195B // C51
+data8 0x3FF2E43139CBF563 // C20
+data8 0x3FFF96DC3474A606 // C30
+data8 0x46AFF4CA9B0DDDF0 // An
+data8 0x4041852200000000 // overflow boundary
+data8 0x3FE4CE76DA1B5783 // C21
+data8 0xBFD0524DB460BC4E // C31
+data8 0x3FE35852DF14E200 // C00
+data8 0x3FE8C7610359F642 // C10
+data8 0x400BCF750EC16173 // C40
+data8 0x401AC14E02EA701C // C50
+//
+//[40; 48)
+data8 0x3FF5DCE4D8193097 // C01
+data8 0x3FF1B0D8C4974FFA // C11
+data8 0xBFF8FB450194CAEA // C41
+data8 0xC00C9658E030A6C4 // C51
+data8 0x3FF068851118AB46 // C20
+data8 0x3FFBF7C7BB46BF7D // C30
+data8 0x3FF0000000000000 // An
+data8 0x4041852200000000 // overflow boundary
+data8 0x3FE231DEB11D847A // C21
+data8 0xBFD251ECAFD7E935 // C31
+data8 0x3FE0368AE288F6BF // C00
+data8 0x3FE513AE4215A70C // C10
+data8 0x4008F960F7141B8B // C40
+data8 0x40183BA08134397B // C50
+//
+//[1.0; 1.25)
+data8 0xBFD9909648921868 // A7
+data8 0x3FE96FFEEEA8520F // A6
+data8 0xBFED0800D93449B8 // A3
+data8 0x3FEFA648D144911C // A2
+data8 0xBFEE3720F7720B4D // A5
+data8 0x3FEF4857A010CA3B // A4
+data8 0xBFE2788CCD545AA4 // A1
+data8 0x3FEFFFFFFFE9209E // A0
+//
+//[1.25; 1.5)
+data8 0xBFB421236426936C // A7
+data8 0x3FAF237514F36691 // A6
+data8 0xBFC0BADE710A10B9 // A3
+data8 0x3FDB6C5465BBEF1F // A2
+data8 0xBFB7E7F83A546EBE // A5
+data8 0x3FC496A01A545163 // A4
+data8 0xBDEE86A39D8452EB // A1
+data8 0x3FEC56DC82A39AA2 // A0
+//
+//[1.5; 1.75)
+data8 0xBF94730B51795867 // A7
+data8 0x3FBF4203E3816C7B // A6
+data8 0xBFE85B427DBD23E4 // A3
+data8 0x3FEE65557AB26771 // A2
+data8 0xBFD59D31BE3AB42A // A5
+data8 0x3FE3C90CC8F09147 // A4
+data8 0xBFE245971DF735B8 // A1
+data8 0x3FEFFC613AE7FBC8 // A0
+//
+//[1.75; 2.0)
+data8 0xBF7746A85137617E // A7
+data8 0x3FA96E37D09735F3 // A6
+data8 0xBFE3C24AC40AC0BB // A3
+data8 0x3FEC56A80A977CA5 // A2
+data8 0xBFC6F0E707560916 // A5
+data8 0x3FDB262D949175BE // A4
+data8 0xBFE1C1AEDFB25495 // A1
+data8 0x3FEFEE1E644B2022 // A0
+//
+// sin(pi*x)/pi
+data8 0xC026FB0D377656CC // S01
+data8 0x3FFFB15F95A22324 // S11
+data8 0x406CE58F4A41C6E7 // S10
+data8 0x404453786302C61E // S20
+data8 0xC023D59A47DBFCD3 // S21
+data8 0x405541D7ABECEFCA // S00
+//
+// 1/An for [40; 48)
+data8 0xCAA7576DE621FCD5, 0x3F68
+LOCAL_OBJECT_END(tgammaf_data)
+
+//==============================================================
+// Code
+//==============================================================
+
+.section .text
+GLOBAL_LIBM_ENTRY(tgammaf)
+{ .mfi
+      getf.exp      GR_SignExp = f8
+      fma.s1        FR_NormX = f8,f1,f0
+      addl          GR_ad_Data = @ltoff(tgammaf_data), gp
+}
+{ .mfi
+      mov           GR_ExpOf05 = 0xFFFE
+      fcvt.fx.trunc.s1 FR_iXt = f8 // [x]
+      mov           GR_Offs = 0 // 2 <= x < 8
+};;
+{ .mfi
+      getf.d        GR_Arg = f8
+      fcmp.lt.s1    p14,p15 = f8,f0
+      mov           GR_Tbl12Offs = 0
+}
+{ .mfi
+      setf.exp      FR_05 = GR_ExpOf05
+      fma.s1        FR_2 = f1,f1,f1 // 2
+      mov           GR_Correction = 0
+};;
+{ .mfi
+      ld8           GR_ad_Data = [GR_ad_Data]
+      fclass.m      p10,p0 = f8,0x1E7 // is x  NaTVal, NaN, +/-0 or +/-INF?
+      tbit.z        p12,p13 = GR_SignExp,16 // p13 if |x| >= 2
+}
+{ .mfi
+      mov           GR_ExpOf1 = 0xFFFF
+      fcvt.fx.s1    FR_rs = f8 // round(x)
+      and           GR_Exp2Ind = 7,GR_SignExp
+};;
+.pred.rel "mutex",p14,p15
+{ .mfi
+(p15) cmp.eq.unc    p11,p0 = GR_ExpOf1,GR_SignExp // p11 if 1 <= x < 2
+(p14) fma.s1        FR_1mX = f1,f1,f8 // 1 - |x|
+      mov           GR_Sig = 0 // if |x| < 2
+}
+{ .mfi
+(p13) cmp.eq.unc    p7,p0 = 2,GR_Exp2Ind
+(p15) fms.s1        FR_1mX = f1,f1,f8 // 1 - |x|
+(p13) cmp.eq.unc    p8,p0 = 3,GR_Exp2Ind
+};;
+.pred.rel "mutex",p7,p8
+{ .mfi
+(p7)  mov           GR_Offs = 0x7    // 8 <= |x| < 16
+      nop.f         0
+(p8)  tbit.z.unc    p0,p6 = GR_Arg,51
+}
+{ .mib
+(p13) cmp.lt.unc    p9,p0 = 3,GR_Exp2Ind
+(p8)  mov           GR_Offs = 0xE // 16 <= |x| < 32
+      // jump if x is NaTVal, NaN, +/-0 or +/-INF?
+(p10) br.cond.spnt  tgammaf_spec_args
+};;
+.pred.rel "mutex",p14,p15
+.pred.rel "mutex",p6,p9
+{ .mfi
+(p9)  mov           GR_Offs = 0x1C // 32 <= |x|
+(p14) fma.s1        FR_X2mX = FR_NormX,FR_NormX,FR_NormX // x^2-|x|
+(p9)  tbit.z.unc    p0,p8 = GR_Arg,50
+}
+{ .mfi
+      ldfpd         FR_LocalMin,FR_10 = [GR_ad_Data],16
+(p15) fms.s1        FR_X2mX = FR_NormX,FR_NormX,FR_NormX // x^2-|x|
+(p6)  add           GR_Offs = 0x7,GR_Offs // 24 <= x < 32
+};;
+.pred.rel "mutex",p8,p12
+{ .mfi
+      add           GR_ad_Ce = 0x50,GR_ad_Data
+(p15) fcmp.lt.unc.s1 p10,p0 = f8,f1 // p10 if 0 <= x < 1
+      mov           GR_OvfNzBound = 2
+}
+{ .mib
+      ldfpd         FR_S32,FR_S31 = [GR_ad_Data],16
+(p8)  add           GR_Offs = 0x7,GR_Offs // 40 <= |x|
+      // jump if 1 <= x < 2
+(p11) br.cond.spnt  tgammaf_from_1_to_2
+};;
+{ .mfi
+      shladd        GR_ad_Ce = GR_Offs,4,GR_ad_Ce
+      fcvt.xf       FR_Xt = FR_iXt // [x]
+(p13) cmp.eq.unc    p7,p0 = r0,GR_Offs // p7 if 2 <= |x| < 8
+}
+{ .mfi
+      shladd        GR_ad_Co = GR_Offs,4,GR_ad_Data
+      fma.s1        FR_6 = FR_2,FR_2,FR_2
+      mov           GR_ExpOf05 = 0x7FC
+};;
+{ .mfi
+(p13) getf.sig      GR_Sig = FR_iXt // if |x| >= 2
+      frcpa.s1      FR_Rcp0,p0 = f1,FR_NormX
+(p10) shr           GR_Arg = GR_Arg,51
+}
+{ .mib
+      ldfpd         FR_C01,FR_C11 = [GR_ad_Co],16
+(p7)  mov           GR_Correction = 2
+      // jump if 0 < x < 1
+(p10) br.cond.spnt  tgammaf_from_0_to_1
+};;
+{ .mfi
+      ldfpd         FR_C21,FR_C31 = [GR_ad_Ce],16
+      fma.s1        FR_Rq2 = f1,f1,FR_1mX // 2 - |x|
+(p14) sub           GR_Correction = r0,GR_Correction
+}
+{ .mfi
+      ldfpd         FR_C41,FR_C51 = [GR_ad_Co],16
+(p14) fcvt.xf       FR_rs = FR_rs
+(p14) add           GR_ad_SinO = 0x3A0,GR_ad_Data
+};;
+.pred.rel "mutex",p14,p15
+{ .mfi
+      ldfpd         FR_C00,FR_C10 = [GR_ad_Ce],16
+      nop.f         0
+(p14) sub           GR_Sig = GR_Correction,GR_Sig
+}
+{ .mfi
+      ldfpd         FR_C20,FR_C30 = [GR_ad_Co],16
+      fma.s1        FR_Rq1 = FR_1mX,FR_2,FR_X2mX // (x-1)*(x-2)
+(p15) sub           GR_Sig = GR_Sig,GR_Correction
+};;
+{ .mfi
+(p14) ldfpd         FR_S01,FR_S11 = [GR_ad_SinO],16
+      fma.s1        FR_Rq3 = FR_2,f1,FR_1mX // 3 - |x|
+      and           GR_RqDeg = 0x6,GR_Sig
+}
+{ .mfi
+      ldfpd         FR_C40,FR_C50 = [GR_ad_Ce],16
+(p14) fma.d.s0      FR_X = f0,f0,f8 // set deno flag
+      mov           GR_NanBound = 0x30016 // -2^23
+};;
+.pred.rel "mutex",p14,p15
+{ .mfi
+(p14) add           GR_ad_SinE = 0x3C0,GR_ad_Data
+(p15) fms.s1        FR_r = FR_NormX,f1,FR_Xt // r = x - [x]
+      cmp.eq        p8,p0 = 2,GR_RqDeg
+}
+{ .mfi
+      ldfpd         FR_An,FR_OvfBound = [GR_ad_Co]
+(p14) fms.s1        FR_r = FR_Xt,f1,FR_NormX // r = |x - [x]|
+      cmp.eq        p9,p0 = 4,GR_RqDeg
+};;
+.pred.rel "mutex",p8,p9
+{ .mfi
+(p14) ldfpd         FR_S21,FR_S00 = [GR_ad_SinE],16
+(p8)  fma.s1        FR_Rq0 = FR_2,f1,FR_1mX // (3-x)
+      tbit.z        p0,p6 = GR_Sig,0
+}
+{ .mfi
+(p14) ldfpd         FR_S10,FR_S20 = [GR_ad_SinO],16
+(p9)  fma.s1        FR_Rq0 = FR_2,FR_2,FR_1mX // (5-x)
+      cmp.eq        p10,p0 = 6,GR_RqDeg
+};;
+{ .mfi
+(p14) getf.s        GR_Arg = f8
+(p14) fcmp.eq.unc.s1 p13,p0 = FR_NormX,FR_Xt
+(p14) mov           GR_ZeroResBound = 0xC22C // -43
+}
+{ .mfi
+(p14) ldfe          FR_InvAn = [GR_ad_SinE]
+(p10) fma.s1        FR_Rq0 = FR_6,f1,FR_1mX // (7-x)
+      cmp.eq        p7,p0 = r0,GR_RqDeg
+};;
+{ .mfi
+(p14) cmp.ge.unc    p11,p0 = GR_SignExp,GR_NanBound
+      fma.s1        FR_Rq2 = FR_Rq2,FR_6,FR_X2mX // (x-3)*(x-4)
+(p14) shl           GR_ZeroResBound = GR_ZeroResBound,16
+}
+{ .mfb
+(p14) mov           GR_OvfNzBound = 0x802
+(p14) fms.s1        FR_rs = FR_rs,f1,FR_NormX // rs = round(x) - x
+      // jump if  x < -2^23 i.e. x is negative integer
+(p11) br.cond.spnt  tgammaf_singularity
+};;
+{ .mfi
+      nop.m         0
+(p7)  fma.s1        FR_Rq1 = f0,f0,f1
+(p14) shl           GR_OvfNzBound = GR_OvfNzBound,20
+}
+{ .mfb
+      nop.m         0
+      fma.s1        FR_Rq3 = FR_Rq3,FR_10,FR_X2mX // (x-5)*(x-6)
+      // jump if x is negative integer such that -2^23 < x < 0
+(p13) br.cond.spnt  tgammaf_singularity
+};;
+{ .mfi
+      nop.m         0
+      fma.s1        FR_C01 = FR_C01,f1,FR_r
+(p14) mov           GR_ExpOf05 = 0xFFFE
+}
+{ .mfi
+(p14) cmp.eq.unc    p7,p0 = GR_Arg,GR_OvfNzBound
+      fma.s1        FR_C11 = FR_C11,f1,FR_r
+(p14) cmp.ltu.unc   p11,p0 = GR_Arg,GR_OvfNzBound
+};;
+{ .mfi
+      nop.m         0
+      fma.s1        FR_C21 = FR_C21,f1,FR_r
+(p14) cmp.ltu.unc   p9,p0 = GR_ZeroResBound,GR_Arg
+}
+{ .mfb
+      nop.m         0
+      fma.s1        FR_C31 = FR_C31,f1,FR_r
+      // jump if argument is close to 0 negative
+(p11) br.cond.spnt  tgammaf_overflow
+};;
+{ .mfi
+      nop.m         0
+      fma.s1        FR_C41 = FR_C41,f1,FR_r
+      nop.i         0
+}
+{ .mfb
+      nop.m         0
+      fma.s1        FR_C51 = FR_C51,f1,FR_r
+      // jump if x is negative noninteger such that -2^23 < x < -43
+(p9)  br.cond.spnt  tgammaf_underflow
+};;
+{ .mfi
+      nop.m         0
+(p14) fma.s1        FR_rs2 = FR_rs,FR_rs,f0
+      nop.i         0
+}
+{ .mfb
+      nop.m         0
+(p14) fma.s1        FR_S01 = FR_rs,FR_rs,FR_S01
+      // jump if argument is 0x80200000
+(p7)  br.cond.spnt  tgammaf_overflow_near0_bound
+};;
+{ .mfi
+      nop.m         0
+(p6)  fnma.s1       FR_Rq1 = FR_Rq1,FR_Rq0,f0
+      nop.i         0
+}
+{ .mfi
+      nop.m         0
+(p10) fma.s1        FR_Rq2 = FR_Rq2,FR_Rq3,f0
+      and           GR_Sig = 0x7,GR_Sig
+};;
+{ .mfi
+      nop.m         0
+      fma.s1        FR_C01 = FR_C01,FR_r,FR_C00
+      nop.i         0
+}
+{ .mfi
+      nop.m         0
+      fma.s1        FR_C11 = FR_C11,FR_r,FR_C10
+      cmp.eq        p6,p7 = r0,GR_Sig // p6 if |x| from one of base intervals
+};;
+{ .mfi
+      nop.m         0
+      fma.s1        FR_C21 = FR_C21,FR_r,FR_C20
+      nop.i         0
+}
+{ .mfi
+      nop.m         0
+      fma.s1        FR_C31 = FR_C31,FR_r,FR_C30
+(p7)  cmp.lt.unc    p9,p0 = 2,GR_RqDeg
+};;
+{ .mfi
+      nop.m         0
+(p14) fma.s1        FR_S11 = FR_rs,FR_rs,FR_S11
+      nop.i         0
+}
+{ .mfi
+      nop.m         0
+(p14) fma.s1        FR_S21 = FR_rs,FR_rs,FR_S21
+      nop.i         0
+};;
+{ .mfi
+      nop.m         0
+      fma.s1        FR_C41 = FR_C41,FR_r,FR_C40
+      nop.i         0
+}
+{ .mfi
+      nop.m         0
+(p14) fma.s1        FR_S32 = FR_rs2,FR_S32,FR_S31
+      nop.i         0
+};;
+{ .mfi
+      nop.m         0
+(p9)  fma.s1        FR_Rq1 = FR_Rq1,FR_Rq2,f0
+      nop.i         0
+}
+{ .mfi
+      nop.m         0
+      fma.s1        FR_C51 = FR_C51,FR_r,FR_C50
+      nop.i         0
+};;
+{ .mfi
+(p14) getf.exp      GR_SignExp = FR_rs
+      fma.s1        FR_C01 = FR_C01,FR_C11,f0
+      nop.i         0
+}
+{ .mfi
+      nop.m         0
+(p14) fma.s1        FR_S01 = FR_S01,FR_rs2,FR_S00
+      nop.i         0
+};;
+{ .mfi
+      nop.m         0
+      fma.s1        FR_C21 = FR_C21,FR_C31,f0
+      nop.i         0
+}
+{ .mfi
+      nop.m         0
+      // NR-iteration
+(p14) fnma.s1       FR_InvNormX1 = FR_Rcp0,FR_NormX,f1
+      nop.i         0
+};;
+{ .mfi
+      nop.m         0
+(p14) fma.s1        FR_S11 = FR_S11,FR_rs2,FR_S10
+(p14) tbit.z.unc    p11,p12 = GR_SignExp,17
+}
+{ .mfi
+      nop.m         0
+(p14) fma.s1        FR_S21 = FR_S21,FR_rs2,FR_S20
+      nop.i         0
+};;
+{ .mfi
+      nop.m         0
+(p15) fcmp.lt.unc.s1 p0,p13 = FR_NormX,FR_OvfBound
+      nop.i         0
+}
+{ .mfi
+      nop.m         0
+(p14) fma.s1        FR_S32 = FR_rs2,FR_S32,f0
+      nop.i         0
+};;
+{ .mfi
+      nop.m         0
+      fma.s1        FR_C41 = FR_C41,FR_C51,f0
+      nop.i         0
+}
+{ .mfi
+      nop.m         0
+(p7)  fma.s1        FR_An = FR_Rq1,FR_An,f0
+      nop.i         0
+};;
+{ .mfb
+      nop.m         0
+      nop.f         0
+      // jump if x > 35.04010009765625
+(p13) br.cond.spnt  tgammaf_overflow
+};;
+{ .mfi
+      nop.m         0
+      // NR-iteration
+(p14) fma.s1        FR_InvNormX1 = FR_Rcp0,FR_InvNormX1,FR_Rcp0
+      nop.i         0
+};;
+{ .mfi
+      nop.m         0
+(p14) fma.s1        FR_S01 = FR_S01,FR_S11,f0
+      nop.i         0
+};;
+{ .mfi
+      nop.m         0
+(p14) fma.s1        FR_S21 = FR_S21,FR_S32,f0
+      nop.i         0
+};;
+{ .mfi
+(p14) getf.exp      GR_SignExp = FR_NormX
+      fma.s1        FR_C01 = FR_C01,FR_C21,f0
+      nop.i         0
+}
+{ .mfi
+      nop.m         0
+      fma.s1        FR_C41 = FR_C41,FR_An,f0
+(p14) mov           GR_ExpOf1 = 0x2FFFF
+};;
+{ .mfi
+      nop.m         0
+      // NR-iteration
+(p14) fnma.s1       FR_InvNormX2 = FR_InvNormX1,FR_NormX,f1
+      nop.i         0
+};;
+.pred.rel "mutex",p11,p12
+{ .mfi
+      nop.m         0
+(p12) fnma.s1       FR_S01 = FR_S01,FR_S21,f0
+      nop.i         0
+}
+{ .mfi
+      nop.m         0
+(p11) fma.s1        FR_S01 = FR_S01,FR_S21,f0
+      nop.i         0
+};;
+
+{ .mfi
+      nop.m         0
+(p14) fma.s1        FR_GAMMA = FR_C01,FR_C41,f0
+(p14) tbit.z.unc    p6,p7 = GR_Sig,0
+}
+{ .mfb
+      nop.m         0
+(p15) fma.s.s0      f8 = FR_C01,FR_C41,f0
+(p15) br.ret.spnt   b0 // exit for positives
+};;
+.pred.rel "mutex",p11,p12
+{ .mfi
+      nop.m         0
+(p12) fms.s1        FR_S01 = FR_rs,FR_S01,FR_rs
+      nop.i         0
+}
+{ .mfi
+      nop.m         0
+(p11) fma.s1        FR_S01 = FR_rs,FR_S01,FR_rs
+      nop.i         0
+};;
+{ .mfi
+      nop.m         0
+      // NR-iteration
+      fma.s1        FR_InvNormX2 = FR_InvNormX1,FR_InvNormX2,FR_InvNormX1
+      cmp.eq        p10,p0 = 0x23,GR_Offs
+};;
+.pred.rel "mutex",p6,p7
+{ .mfi
+      nop.m         0
+(p6)  fma.s1        FR_GAMMA = FR_S01,FR_GAMMA,f0
+      cmp.gtu       p8,p0 = GR_SignExp,GR_ExpOf1
+}
+{ .mfi
+      nop.m         0
+(p7)  fnma.s1       FR_GAMMA = FR_S01,FR_GAMMA,f0
+      cmp.eq        p9,p0 = GR_SignExp,GR_ExpOf1
+};;
+{ .mfi
+      nop.m         0
+      // NR-iteration
+      fnma.s1       FR_InvNormX1 = FR_InvNormX2,FR_NormX,f1
+      nop.i         0
+}
+{ .mfi
+      nop.m         0
+(p10) fma.s1        FR_InvNormX2 = FR_InvNormX2,FR_InvAn,f0
+      nop.i         0
+};;
+{ .mfi
+      nop.m         0
+      frcpa.s1      FR_Rcp0,p0 = f1,FR_GAMMA
+      nop.i         0
+};;
+{ .mfi
+      nop.m         0
+      fms.s1        FR_Multplr = FR_NormX,f1,f1 // x - 1
+      nop.i         0
+};;
+{ .mfi
+      nop.m         0
+      // NR-iteration
+      fnma.s1       FR_Rcp1 = FR_Rcp0,FR_GAMMA,f1
+      nop.i         0
+};;
+.pred.rel "mutex",p8,p9
+{ .mfi
+      nop.m         0
+      // 1/x or 1/(An*x)
+(p8)  fma.s1        FR_Multplr = FR_InvNormX2,FR_InvNormX1,FR_InvNormX2
+      nop.i         0
+}
+{ .mfi
+      nop.m         0
+(p9)  fma.s1        FR_Multplr = f1,f1,f0
+      nop.i         0
+};;
+{ .mfi
+      nop.m         0
+      // NR-iteration
+      fma.s1        FR_Rcp1 = FR_Rcp0,FR_Rcp1,FR_Rcp0
+      nop.i         0
+};;
+{ .mfi
+      nop.m         0
+      // NR-iteration
+      fnma.s1       FR_Rcp2 = FR_Rcp1,FR_GAMMA,f1
+      nop.i         0
+}
+{ .mfi
+      nop.m         0
+      // NR-iteration
+      fma.s1        FR_Rcp1 = FR_Rcp1,FR_Multplr,f0
+      nop.i         0
+};;
+{ .mfb
+      nop.m         0
+      fma.s.s0      f8 = FR_Rcp1,FR_Rcp2,FR_Rcp1
+      br.ret.sptk   b0
+};;
+
+// here if 0 < x < 1
+//--------------------------------------------------------------------
+.align 32
+tgammaf_from_0_to_1:
+{ .mfi
+      cmp.lt        p7,p0 = GR_Arg,GR_ExpOf05
+      // NR-iteration
+      fnma.s1       FR_Rcp1 = FR_Rcp0,FR_NormX,f1
+      cmp.eq        p8,p0 = GR_Arg,GR_ExpOf05
+}
+{ .mfi
+      cmp.gt        p9,p0 = GR_Arg,GR_ExpOf05
+      fma.s1        FR_r = f0,f0,FR_NormX // reduced arg for (0;1)
+      mov           GR_ExpOf025 = 0x7FA
+};;
+{ .mfi
+      getf.s        GR_ArgNz = f8
+      fma.d.s0      FR_X = f0,f0,f8 // set deno flag
+      shl           GR_OvfNzBound = GR_OvfNzBound,20
+}
+{ .mfi
+(p8)  mov           GR_Tbl12Offs = 0x80 // 0.5 <= x < 0.75
+      nop.f         0
+(p7)  cmp.ge.unc    p6,p0 = GR_Arg,GR_ExpOf025
+};;
+.pred.rel "mutex",p6,p9
+{ .mfi
+(p9)  mov           GR_Tbl12Offs = 0xC0 // 0.75 <= x < 1
+      nop.f         0
+(p6)  mov           GR_Tbl12Offs = 0x40 // 0.25 <= x < 0.5
+}
+{ .mfi
+      add           GR_ad_Ce = 0x2C0,GR_ad_Data
+      nop.f         0
+      add           GR_ad_Co = 0x2A0,GR_ad_Data
+};;
+{ .mfi
+      add           GR_ad_Co = GR_ad_Co,GR_Tbl12Offs
+      nop.f         0
+      cmp.lt        p12,p0 = GR_ArgNz,GR_OvfNzBound
+}
+{ .mib
+      add           GR_ad_Ce = GR_ad_Ce,GR_Tbl12Offs
+      cmp.eq        p7,p0 = GR_ArgNz,GR_OvfNzBound
+      // jump if argument is 0x00200000
+(p7)  br.cond.spnt  tgammaf_overflow_near0_bound
+};;
+{ .mmb
+      ldfpd         FR_A7,FR_A6 = [GR_ad_Co],16
+      ldfpd         FR_A5,FR_A4 = [GR_ad_Ce],16
+      // jump if argument is close to 0 positive
+(p12) br.cond.spnt  tgammaf_overflow
+};;
+{ .mfi
+      ldfpd         FR_A3,FR_A2 = [GR_ad_Co],16
+      // NR-iteration
+      fma.s1        FR_Rcp1 = FR_Rcp0,FR_Rcp1,FR_Rcp0
+      nop.i         0
+}
+{ .mfb
+      ldfpd         FR_A1,FR_A0 = [GR_ad_Ce],16
+      nop.f         0
+      br.cond.sptk  tgamma_from_0_to_2
+};;
+
+// here if 1 < x < 2
+//--------------------------------------------------------------------
+.align 32
+tgammaf_from_1_to_2:
+{ .mfi
+      add           GR_ad_Co = 0x2A0,GR_ad_Data
+      fms.s1        FR_r = f0,f0,FR_1mX
+      shr           GR_TblOffs = GR_Arg,47
+}
+{ .mfi
+      add           GR_ad_Ce = 0x2C0,GR_ad_Data
+      nop.f         0
+      mov           GR_TblOffsMask = 0x18
+};;
+{ .mfi
+      nop.m         0
+      nop.f         0
+      and           GR_TblOffs = GR_TblOffs,GR_TblOffsMask
+};;
+{ .mfi
+      shladd        GR_ad_Co = GR_TblOffs,3,GR_ad_Co
+      nop.f         0
+      nop.i         0
+}
+{ .mfi
+      shladd        GR_ad_Ce = GR_TblOffs,3,GR_ad_Ce
+      nop.f         0
+      cmp.eq        p6,p7 = 8,GR_TblOffs
+};;
+{ .mmi
+      ldfpd         FR_A7,FR_A6 = [GR_ad_Co],16
+      ldfpd         FR_A5,FR_A4 = [GR_ad_Ce],16
+      nop.i         0
+};;
+{ .mmi
+      ldfpd         FR_A3,FR_A2 = [GR_ad_Co],16
+      ldfpd         FR_A1,FR_A0 = [GR_ad_Ce],16
+      nop.i         0
+};;
+
+.align 32
+tgamma_from_0_to_2:
+{ .mfi
+      nop.m         0
+(p6)  fms.s1        FR_r = FR_r,f1,FR_LocalMin
+      nop.i         0
+};;
+{ .mfi
+      nop.m         0
+      // NR-iteration
+(p10) fnma.s1       FR_Rcp2 = FR_Rcp1,FR_NormX,f1
+      nop.i         0
+};;
+{ .mfi
+      nop.m         0
+      fms.s1        FR_r2 = FR_r,FR_r,f0
+      nop.i         0
+};;
+{ .mfi
+      nop.m         0
+      fma.s1        FR_A7 = FR_A7,FR_r,FR_A6
+      nop.i         0
+}
+{ .mfi
+      nop.m         0
+      fma.s1        FR_A5 = FR_A5,FR_r,FR_A4
+      nop.i         0
+};;
+{ .mfi
+      nop.m         0
+      fma.s1        FR_A3 = FR_A3,FR_r,FR_A2
+      nop.i         0
+}
+{ .mfi
+      nop.m         0
+      fma.s1        FR_A1 = FR_A1,FR_r,FR_A0
+      nop.i         0
+};;
+{ .mfi
+      nop.m         0
+      // NR-iteration
+(p10) fma.s1        FR_Rcp2 = FR_Rcp1,FR_Rcp2,FR_Rcp1
+      nop.i         0
+};;
+{ .mfi
+      nop.m         0
+      fma.s1        FR_A7 = FR_A7,FR_r2,FR_A5
+      nop.i         0
+}
+{ .mfi
+      nop.m         0
+      fma.s1        FR_r4 = FR_r2,FR_r2,f0
+      nop.i         0
+};;
+{ .mfi
+      nop.m         0
+      fma.s1        FR_A3 = FR_A3,FR_r2,FR_A1
+      nop.i         0
+};;
+{ .mfi
+      nop.m         0
+(p10) fma.s1        FR_GAMMA = FR_A7,FR_r4,FR_A3
+      nop.i         0
+}
+{ .mfi
+      nop.m         0
+(p11) fma.s.s0      f8 = FR_A7,FR_r4,FR_A3
+      nop.i         0
+};;
+{ .mfb
+      nop.m         0
+(p10) fma.s.s0      f8 = FR_GAMMA,FR_Rcp2,f0
+      br.ret.sptk   b0
+};;
+
+
+// overflow
+//--------------------------------------------------------------------
+.align 32
+tgammaf_overflow_near0_bound:
+.pred.rel "mutex",p14,p15
+{ .mfi
+	  mov           GR_fpsr = ar.fpsr
+	  nop.f         0
+(p15) mov           r8 = 0x7f8
+}
+{ .mfi
+      nop.m         0
+      nop.f         0
+(p14) mov           r8 = 0xff8
+};;
+{ .mfi
+	  nop.m         0
+	  nop.f         0
+	  shl           r8 = r8,20
+};;
+{ .mfi
+      sub           r8 = r8,r0,1
+      nop.f         0
+	  extr.u        GR_fpsr = GR_fpsr,10,2 // rounding mode
+};;
+.pred.rel "mutex",p14,p15
+{ .mfi
+      // set p8 to 0 in case of overflow and to 1 otherwise
+	  // for negative arg:
+	  //    no overflow if rounding mode either Z or +Inf, i.e.
+	  //    GR_fpsr > 1
+(p14) cmp.lt        p8,p0 = 1,GR_fpsr
+      nop.f         0
+	  // for positive arg:
+	  //    no overflow if rounding mode either Z or -Inf, i.e.
+	  //    (GR_fpsr & 1) == 0
+(p15) tbit.z        p0,p8 = GR_fpsr,0
+};;
+{ .mib
+(p8)  setf.s        f8 = r8 // set result to 0x7f7fffff without
+                            // OVERFLOW flag raising
+      nop.i         0
+(p8)  br.ret.sptk   b0
+};;
+
+.align 32
+tgammaf_overflow:
+{ .mfi
+      nop.m         0
+      nop.f         0
+      mov           r8 = 0x1FFFE
+};;
+{ .mfi
+      setf.exp      f9 = r8
+      fmerge.s      FR_X = f8,f8
+      nop.i         0
+};;
+.pred.rel "mutex",p14,p15
+{ .mfi
+      nop.m         0
+(p14) fnma.s.s0     f8 = f9,f9,f0 // set I,O and -INF result
+      mov           GR_TAG = 261 // overflow
+}
+{ .mfb
+      nop.m         0
+(p15) fma.s.s0      f8 = f9,f9,f0 // set I,O and +INF result
+      br.cond.sptk  tgammaf_libm_err
+};;
+
+// x is negative integer or +/-0
+//--------------------------------------------------------------------
+.align 32
+tgammaf_singularity:
+{ .mfi
+      nop.m         0
+      fmerge.s      FR_X = f8,f8
+      mov           GR_TAG = 262 // negative
+}
+{ .mfb
+      nop.m         0
+      frcpa.s0      f8,p0 = f0,f0
+      br.cond.sptk  tgammaf_libm_err
+};;
+// x is negative noninteger with big absolute value
+//--------------------------------------------------------------------
+.align 32
+tgammaf_underflow:
+{ .mfi
+      mov           r8 = 0x00001
+      nop.f         0
+      tbit.z        p6,p7 = GR_Sig,0
+};;
+{ .mfi
+      setf.exp      f9 = r8
+      nop.f         0
+      nop.i         0
+};;
+.pred.rel "mutex",p6,p7
+{ .mfi
+      nop.m         0
+(p6)  fms.s.s0      f8 = f9,f9,f9
+      nop.i         0
+}
+{ .mfb
+      nop.m         0
+(p7)  fma.s.s0      f8 = f9,f9,f9
+      br.ret.sptk   b0
+};;
+
+//  x for natval, nan, +/-inf or +/-0
+//--------------------------------------------------------------------
+.align 32
+tgammaf_spec_args:
+{ .mfi
+      nop.m         0
+      fclass.m      p6,p0 =  f8,0x1E1 // Test x for natval, nan, +inf
+      nop.i         0
+};;
+{ .mfi
+      nop.m         0
+      fclass.m      p7,p8 =  f8,0x7 // +/-0
+      nop.i         0
+};;
+{ .mfi
+      nop.m         0
+      fmerge.s      FR_X = f8,f8
+      nop.i         0
+}
+{ .mfb
+      nop.m         0
+(p6)  fma.s.s0      f8 = f8,f1,f8
+(p6)  br.ret.spnt   b0
+};;
+.pred.rel "mutex",p7,p8
+{ .mfi
+(p7)  mov           GR_TAG = 262 // negative
+(p7)  frcpa.s0      f8,p0 = f1,f8
+      nop.i         0
+}
+{ .mib
+      nop.m         0
+      nop.i         0
+(p8)  br.cond.spnt  tgammaf_singularity
+};;
+
+.align 32
+tgammaf_libm_err:
+{ .mfi
+      alloc        r32 = ar.pfs,1,4,4,0
+      nop.f        0
+      mov          GR_Parameter_TAG = GR_TAG
+};;
+
+GLOBAL_LIBM_END(tgammaf)
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+{ .mfi
+        add   GR_Parameter_Y=-32,sp             // Parameter 2 value
+        nop.f 0
+.save   ar.pfs,GR_SAVE_PFS
+        mov  GR_SAVE_PFS=ar.pfs                 // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+        add sp=-64,sp                           // Create new stack
+        nop.f 0
+        mov GR_SAVE_GP=gp                       // Save gp
+};;
+{ .mmi
+        stfs [GR_Parameter_Y] = FR_Y,16         // STORE Parameter 2 on stack
+        add GR_Parameter_X = 16,sp              // Parameter 1 address
+.save   b0, GR_SAVE_B0
+        mov GR_SAVE_B0=b0                       // Save b0
+};;
+.body
+{ .mib
+        stfs [GR_Parameter_X] = FR_X           // STORE Parameter 1 on stack
+        add   GR_Parameter_RESULT = 0,GR_Parameter_Y  // Parameter 3 address
+        nop.b 0
+}
+{ .mib
+        stfs [GR_Parameter_Y] = FR_RESULT      // STORE Parameter 3 on stack
+        add   GR_Parameter_Y = -16,GR_Parameter_Y
+        br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+        nop.m 0
+        nop.m 0
+        add   GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+        ldfs  f8 = [GR_Parameter_RESULT]       // Get return result off stack
+.restore sp
+        add   sp = 64,sp                       // Restore stack pointer
+        mov   b0 = GR_SAVE_B0                  // Restore return address
+};;
+{ .mib
+        mov   gp = GR_SAVE_GP                  // Restore gp
+        mov   ar.pfs = GR_SAVE_PFS             // Restore ar.pfs
+        br.ret.sptk     b0                     // Return
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
+.type   __libm_error_support#,@function
+.global __libm_error_support#