diff options
author | Zack Weinberg <zackw@panix.com> | 2017-06-08 15:39:03 -0400 |
---|---|---|
committer | Zack Weinberg <zackw@panix.com> | 2017-06-08 15:39:03 -0400 |
commit | 5046dbb4a7eba5eccfd258f92f4735c9ffc8d069 (patch) | |
tree | 4470480d904b65cf14ca524f96f79eca818c3eaf /REORG.TODO/sysdeps/ia64/ia64libgcc.S | |
parent | 199fc19d3aaaf57944ef036e15904febe877fc93 (diff) | |
download | glibc-zack/build-layout-experiment.tar.gz glibc-zack/build-layout-experiment.tar.xz glibc-zack/build-layout-experiment.zip |
Prepare for radical source tree reorganization. zack/build-layout-experiment
All top-level files and directories are moved into a temporary storage directory, REORG.TODO, except for files that will certainly still exist in their current form at top level when we're done (COPYING, COPYING.LIB, LICENSES, NEWS, README), all old ChangeLog files (which are moved to the new directory OldChangeLogs, instead), and the generated file INSTALL (which is just deleted; in the new order, there will be no generated files checked into version control).
Diffstat (limited to 'REORG.TODO/sysdeps/ia64/ia64libgcc.S')
-rw-r--r-- | REORG.TODO/sysdeps/ia64/ia64libgcc.S | 350 |
1 files changed, 350 insertions, 0 deletions
diff --git a/REORG.TODO/sysdeps/ia64/ia64libgcc.S b/REORG.TODO/sysdeps/ia64/ia64libgcc.S new file mode 100644 index 0000000000..3f77b06a5a --- /dev/null +++ b/REORG.TODO/sysdeps/ia64/ia64libgcc.S @@ -0,0 +1,350 @@ +/* From the Intel IA-64 Optimization Guide, choose the minimum latency + alternative. */ + +#include <sysdep.h> +#undef ret + +#include <shlib-compat.h> + +#if SHLIB_COMPAT(libc, GLIBC_2_2, GLIBC_2_2_6) + +/* __divtf3 + Compute a 80-bit IEEE double-extended quotient. + farg0 holds the dividend. farg1 holds the divisor. */ + +ENTRY(___divtf3) + cmp.eq p7, p0 = r0, r0 + frcpa.s0 f10, p6 = farg0, farg1 + ;; +(p6) cmp.ne p7, p0 = r0, r0 + .pred.rel.mutex p6, p7 +(p6) fnma.s1 f11 = farg1, f10, f1 +(p6) fma.s1 f12 = farg0, f10, f0 + ;; +(p6) fma.s1 f13 = f11, f11, f0 +(p6) fma.s1 f14 = f11, f11, f11 + ;; +(p6) fma.s1 f11 = f13, f13, f11 +(p6) fma.s1 f13 = f14, f10, f10 + ;; +(p6) fma.s1 f10 = f13, f11, f10 +(p6) fnma.s1 f11 = farg1, f12, farg0 + ;; +(p6) fma.s1 f11 = f11, f10, f12 +(p6) fnma.s1 f12 = farg1, f10, f1 + ;; +(p6) fma.s1 f10 = f12, f10, f10 +(p6) fnma.s1 f12 = farg1, f11, farg0 + ;; +(p6) fma.s0 fret0 = f12, f10, f11 +(p7) mov fret0 = f10 + br.ret.sptk rp +END(___divtf3) + .symver ___divtf3, __divtf3@GLIBC_2.2 + +/* __divdf3 + Compute a 64-bit IEEE double quotient. + farg0 holds the dividend. farg1 holds the divisor. */ + +ENTRY(___divdf3) + cmp.eq p7, p0 = r0, r0 + frcpa.s0 f10, p6 = farg0, farg1 + ;; +(p6) cmp.ne p7, p0 = r0, r0 + .pred.rel.mutex p6, p7 +(p6) fmpy.s1 f11 = farg0, f10 +(p6) fnma.s1 f12 = farg1, f10, f1 + ;; +(p6) fma.s1 f11 = f12, f11, f11 +(p6) fmpy.s1 f13 = f12, f12 + ;; +(p6) fma.s1 f10 = f12, f10, f10 +(p6) fma.s1 f11 = f13, f11, f11 + ;; +(p6) fmpy.s1 f12 = f13, f13 +(p6) fma.s1 f10 = f13, f10, f10 + ;; +(p6) fma.d.s1 f11 = f12, f11, f11 +(p6) fma.s1 f10 = f12, f10, f10 + ;; +(p6) fnma.d.s1 f8 = farg1, f11, farg0 + ;; +(p6) fma.d fret0 = f8, f10, f11 +(p7) mov fret0 = f10 + br.ret.sptk rp + ;; +END(___divdf3) + .symver ___divdf3, __divdf3@GLIBC_2.2 + +/* __divsf3 + Compute a 32-bit IEEE float quotient. + farg0 holds the dividend. farg1 holds the divisor. */ + +ENTRY(___divsf3) + cmp.eq p7, p0 = r0, r0 + frcpa.s0 f10, p6 = farg0, farg1 + ;; +(p6) cmp.ne p7, p0 = r0, r0 + .pred.rel.mutex p6, p7 +(p6) fmpy.s1 f8 = farg0, f10 +(p6) fnma.s1 f9 = farg1, f10, f1 + ;; +(p6) fma.s1 f8 = f9, f8, f8 +(p6) fmpy.s1 f9 = f9, f9 + ;; +(p6) fma.s1 f8 = f9, f8, f8 +(p6) fmpy.s1 f9 = f9, f9 + ;; +(p6) fma.d.s1 f10 = f9, f8, f8 + ;; +(p6) fnorm.s.s0 fret0 = f10 +(p7) mov fret0 = f10 + br.ret.sptk rp + ;; +END(___divsf3) + .symver ___divsf3, __divsf3@GLIBC_2.2 + +/* __divdi3 + Compute a 64-bit integer quotient. + in0 holds the dividend. in1 holds the divisor. */ + +ENTRY(___divdi3) + .regstk 2,0,0,0 + /* Transfer inputs to FP registers. */ + setf.sig f8 = in0 + setf.sig f9 = in1 + ;; + /* Convert the inputs to FP, so that they won't be treated as + unsigned. */ + fcvt.xf f8 = f8 + fcvt.xf f9 = f9 + ;; + /* Compute the reciprocal approximation. */ + frcpa.s1 f10, p6 = f8, f9 + ;; + /* 3 Newton-Raphson iterations. */ +(p6) fnma.s1 f11 = f9, f10, f1 +(p6) fmpy.s1 f12 = f8, f10 + ;; +(p6) fmpy.s1 f13 = f11, f11 +(p6) fma.s1 f12 = f11, f12, f12 + ;; +(p6) fma.s1 f10 = f11, f10, f10 +(p6) fma.s1 f11 = f13, f12, f12 + ;; +(p6) fma.s1 f10 = f13, f10, f10 +(p6) fnma.s1 f12 = f9, f11, f8 + ;; +(p6) fma.s1 f10 = f12, f10, f11 + ;; + /* Round quotient to an integer. */ + fcvt.fx.trunc.s1 f10 = f10 + ;; + /* Transfer result to GP registers. */ + getf.sig ret0 = f10 + br.ret.sptk rp + ;; +END(___divdi3) + .symver ___divdi3, __divdi3@GLIBC_2.2 + +/* __moddi3 + Compute a 64-bit integer modulus. + in0 holds the dividend (a). in1 holds the divisor (b). */ + +ENTRY(___moddi3) + .regstk 2,0,0,0 + /* Transfer inputs to FP registers. */ + setf.sig f14 = in0 + setf.sig f9 = in1 + ;; + /* Convert the inputs to FP, so that they won't be treated as + unsigned. */ + fcvt.xf f8 = f14 + fcvt.xf f9 = f9 + ;; + /* Compute the reciprocal approximation. */ + frcpa.s1 f10, p6 = f8, f9 + ;; + /* 3 Newton-Raphson iterations. */ +(p6) fmpy.s1 f12 = f8, f10 +(p6) fnma.s1 f11 = f9, f10, f1 + ;; +(p6) fma.s1 f12 = f11, f12, f12 +(p6) fmpy.s1 f13 = f11, f11 + ;; +(p6) fma.s1 f10 = f11, f10, f10 +(p6) fma.s1 f11 = f13, f12, f12 + ;; + sub in1 = r0, in1 +(p6) fma.s1 f10 = f13, f10, f10 +(p6) fnma.s1 f12 = f9, f11, f8 + ;; + setf.sig f9 = in1 +(p6) fma.s1 f10 = f12, f10, f11 + ;; + fcvt.fx.trunc.s1 f10 = f10 + ;; + /* r = q * (-b) + a */ + xma.l f10 = f10, f9, f14 + ;; + /* Transfer result to GP registers. */ + getf.sig ret0 = f10 + br.ret.sptk rp + ;; +END(___moddi3) + .symver ___moddi3, __moddi3@GLIBC_2.2 + +/* __udivdi3 + Compute a 64-bit unsigned integer quotient. + in0 holds the dividend. in1 holds the divisor. */ + +ENTRY(___udivdi3) + .regstk 2,0,0,0 + /* Transfer inputs to FP registers. */ + setf.sig f8 = in0 + setf.sig f9 = in1 + ;; + /* Convert the inputs to FP, to avoid FP software-assist faults. */ + fcvt.xuf.s1 f8 = f8 + fcvt.xuf.s1 f9 = f9 + ;; + /* Compute the reciprocal approximation. */ + frcpa.s1 f10, p6 = f8, f9 + ;; + /* 3 Newton-Raphson iterations. */ +(p6) fnma.s1 f11 = f9, f10, f1 +(p6) fmpy.s1 f12 = f8, f10 + ;; +(p6) fmpy.s1 f13 = f11, f11 +(p6) fma.s1 f12 = f11, f12, f12 + ;; +(p6) fma.s1 f10 = f11, f10, f10 +(p6) fma.s1 f11 = f13, f12, f12 + ;; +(p6) fma.s1 f10 = f13, f10, f10 +(p6) fnma.s1 f12 = f9, f11, f8 + ;; +(p6) fma.s1 f10 = f12, f10, f11 + ;; + /* Round quotient to an unsigned integer. */ + fcvt.fxu.trunc.s1 f10 = f10 + ;; + /* Transfer result to GP registers. */ + getf.sig ret0 = f10 + br.ret.sptk rp + ;; +END(___udivdi3) + .symver ___udivdi3, __udivdi3@GLIBC_2.2 + +/* __umoddi3 + Compute a 64-bit unsigned integer modulus. + in0 holds the dividend (a). in1 holds the divisor (b). */ + +ENTRY(___umoddi3) + .regstk 2,0,0,0 + /* Transfer inputs to FP registers. */ + setf.sig f14 = in0 + setf.sig f9 = in1 + ;; + /* Convert the inputs to FP, to avoid FP software assist faults. */ + fcvt.xuf.s1 f8 = f14 + fcvt.xuf.s1 f9 = f9 + ;; + /* Compute the reciprocal approximation. */ + frcpa.s1 f10, p6 = f8, f9 + ;; + /* 3 Newton-Raphson iterations. */ +(p6) fmpy.s1 f12 = f8, f10 +(p6) fnma.s1 f11 = f9, f10, f1 + ;; +(p6) fma.s1 f12 = f11, f12, f12 +(p6) fmpy.s1 f13 = f11, f11 + ;; +(p6) fma.s1 f10 = f11, f10, f10 +(p6) fma.s1 f11 = f13, f12, f12 + ;; + sub in1 = r0, in1 +(p6) fma.s1 f10 = f13, f10, f10 +(p6) fnma.s1 f12 = f9, f11, f8 + ;; + setf.sig f9 = in1 +(p6) fma.s1 f10 = f12, f10, f11 + ;; + /* Round quotient to an unsigned integer. */ + fcvt.fxu.trunc.s1 f10 = f10 + ;; + /* r = q * (-b) + a */ + xma.l f10 = f10, f9, f14 + ;; + /* Transfer result to GP registers. */ + getf.sig ret0 = f10 + br.ret.sptk rp + ;; +END(___umoddi3) + .symver ___umoddi3, __umoddi3@GLIBC_2.2 + +/* __multi3 + Compute a 128-bit multiply of 128-bit multiplicands. + in0/in1 holds one multiplicand (a), in2/in3 holds the other one (b). */ + +ENTRY(___multi3) + .regstk 4,0,0,0 + setf.sig f6 = in1 + movl r19 = 0xffffffff + setf.sig f7 = in2 + ;; + and r14 = r19, in0 + ;; + setf.sig f10 = r14 + and r14 = r19, in2 + xmpy.l f9 = f6, f7 + ;; + setf.sig f6 = r14 + shr.u r14 = in0, 32 + ;; + setf.sig f7 = r14 + shr.u r14 = in2, 32 + ;; + setf.sig f8 = r14 + xmpy.l f11 = f10, f6 + xmpy.l f6 = f7, f6 + ;; + getf.sig r16 = f11 + xmpy.l f7 = f7, f8 + ;; + shr.u r14 = r16, 32 + and r16 = r19, r16 + getf.sig r17 = f6 + setf.sig f6 = in0 + ;; + setf.sig f11 = r14 + getf.sig r21 = f7 + setf.sig f7 = in3 + ;; + xma.l f11 = f10, f8, f11 + xma.l f6 = f6, f7, f9 + ;; + getf.sig r18 = f11 + ;; + add r18 = r18, r17 + ;; + and r15 = r19, r18 + cmp.ltu p7, p6 = r18, r17 + ;; + getf.sig r22 = f6 +(p7) adds r14 = 1, r19 + ;; +(p7) add r21 = r21, r14 + shr.u r14 = r18, 32 + shl r15 = r15, 32 + ;; + add r20 = r21, r14 + ;; + add ret0 = r15, r16 + add ret1 = r22, r20 + br.ret.sptk rp + ;; +END(___multi3) + .symver ___multi3, __multi3@GLIBC_2.2 + +#endif |