From 5046dbb4a7eba5eccfd258f92f4735c9ffc8d069 Mon Sep 17 00:00:00 2001 From: Zack Weinberg Date: Thu, 8 Jun 2017 15:39:03 -0400 Subject: Prepare for radical source tree reorganization. All top-level files and directories are moved into a temporary storage directory, REORG.TODO, except for files that will certainly still exist in their current form at top level when we're done (COPYING, COPYING.LIB, LICENSES, NEWS, README), all old ChangeLog files (which are moved to the new directory OldChangeLogs, instead), and the generated file INSTALL (which is just deleted; in the new order, there will be no generated files checked into version control). --- REORG.TODO/sysdeps/powerpc/power4/fpu/mpa.c | 214 ++++++++++++++++++++++++++++ 1 file changed, 214 insertions(+) create mode 100644 REORG.TODO/sysdeps/powerpc/power4/fpu/mpa.c (limited to 'REORG.TODO/sysdeps/powerpc/power4/fpu/mpa.c') diff --git a/REORG.TODO/sysdeps/powerpc/power4/fpu/mpa.c b/REORG.TODO/sysdeps/powerpc/power4/fpu/mpa.c new file mode 100644 index 0000000000..0a0f7175b4 --- /dev/null +++ b/REORG.TODO/sysdeps/powerpc/power4/fpu/mpa.c @@ -0,0 +1,214 @@ + +/* + * IBM Accurate Mathematical Library + * written by International Business Machines Corp. + * Copyright (C) 2001-2017 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 2.1 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, see . + */ + +/* Define __mul and __sqr and use the rest from generic code. */ +#define NO__MUL +#define NO__SQR + +#include + +/* Multiply *X and *Y and store result in *Z. X and Y may overlap but not X + and Z or Y and Z. For P in [1, 2, 3], the exact result is truncated to P + digits. In case P > 3 the error is bounded by 1.001 ULP. */ +void +__mul (const mp_no *x, const mp_no *y, mp_no *z, int p) +{ + long i, i1, i2, j, k, k2; + long p2 = p; + double u, zk, zk2; + + /* Is z=0? */ + if (__glibc_unlikely (X[0] * Y[0] == 0)) + { + Z[0] = 0; + return; + } + + /* Multiply, add and carry */ + k2 = (p2 < 3) ? p2 + p2 : p2 + 3; + zk = Z[k2] = 0; + for (k = k2; k > 1;) + { + if (k > p2) + { + i1 = k - p2; + i2 = p2 + 1; + } + else + { + i1 = 1; + i2 = k; + } +#if 1 + /* Rearrange this inner loop to allow the fmadd instructions to be + independent and execute in parallel on processors that have + dual symmetrical FP pipelines. */ + if (i1 < (i2 - 1)) + { + /* Make sure we have at least 2 iterations. */ + if (((i2 - i1) & 1L) == 1L) + { + /* Handle the odd iterations case. */ + zk2 = x->d[i2 - 1] * y->d[i1]; + } + else + zk2 = 0.0; + /* Do two multiply/adds per loop iteration, using independent + accumulators; zk and zk2. */ + for (i = i1, j = i2 - 1; i < i2 - 1; i += 2, j -= 2) + { + zk += x->d[i] * y->d[j]; + zk2 += x->d[i + 1] * y->d[j - 1]; + } + zk += zk2; /* Final sum. */ + } + else + { + /* Special case when iterations is 1. */ + zk += x->d[i1] * y->d[i1]; + } +#else + /* The original code. */ + for (i = i1, j = i2 - 1; i < i2; i++, j--) + zk += X[i] * Y[j]; +#endif + + u = (zk + CUTTER) - CUTTER; + if (u > zk) + u -= RADIX; + Z[k] = zk - u; + zk = u * RADIXI; + --k; + } + Z[k] = zk; + + int e = EX + EY; + /* Is there a carry beyond the most significant digit? */ + if (Z[1] == 0) + { + for (i = 1; i <= p2; i++) + Z[i] = Z[i + 1]; + e--; + } + + EZ = e; + Z[0] = X[0] * Y[0]; +} + +/* Square *X and store result in *Y. X and Y may not overlap. For P in + [1, 2, 3], the exact result is truncated to P digits. In case P > 3 the + error is bounded by 1.001 ULP. This is a faster special case of + multiplication. */ +void +__sqr (const mp_no *x, mp_no *y, int p) +{ + long i, j, k, ip; + double u, yk; + + /* Is z=0? */ + if (__glibc_unlikely (X[0] == 0)) + { + Y[0] = 0; + return; + } + + /* We need not iterate through all X's since it's pointless to + multiply zeroes. */ + for (ip = p; ip > 0; ip--) + if (X[ip] != 0) + break; + + k = (__glibc_unlikely (p < 3)) ? p + p : p + 3; + + while (k > 2 * ip + 1) + Y[k--] = 0; + + yk = 0; + + while (k > p) + { + double yk2 = 0.0; + long lim = k / 2; + + if (k % 2 == 0) + { + yk += X[lim] * X[lim]; + lim--; + } + + /* In __mul, this loop (and the one within the next while loop) run + between a range to calculate the mantissa as follows: + + Z[k] = X[k] * Y[n] + X[k+1] * Y[n-1] ... + X[n-1] * Y[k+1] + + X[n] * Y[k] + + For X == Y, we can get away with summing halfway and doubling the + result. For cases where the range size is even, the mid-point needs + to be added separately (above). */ + for (i = k - p, j = p; i <= lim; i++, j--) + yk2 += X[i] * X[j]; + + yk += 2.0 * yk2; + + u = (yk + CUTTER) - CUTTER; + if (u > yk) + u -= RADIX; + Y[k--] = yk - u; + yk = u * RADIXI; + } + + while (k > 1) + { + double yk2 = 0.0; + long lim = k / 2; + + if (k % 2 == 0) + { + yk += X[lim] * X[lim]; + lim--; + } + + /* Likewise for this loop. */ + for (i = 1, j = k - 1; i <= lim; i++, j--) + yk2 += X[i] * X[j]; + + yk += 2.0 * yk2; + + u = (yk + CUTTER) - CUTTER; + if (u > yk) + u -= RADIX; + Y[k--] = yk - u; + yk = u * RADIXI; + } + Y[k] = yk; + + /* Squares are always positive. */ + Y[0] = 1.0; + + int e = EX * 2; + /* Is there a carry beyond the most significant digit? */ + if (__glibc_unlikely (Y[1] == 0)) + { + for (i = 1; i <= p; i++) + Y[i] = Y[i + 1]; + e--; + } + EY = e; +} -- cgit 1.4.1