diff options
author | Szabolcs Nagy <szabolcs.nagy@arm.com> | 2018-06-13 17:57:20 +0100 |
---|---|---|
committer | Szabolcs Nagy <szabolcs.nagy@arm.com> | 2018-09-19 10:04:51 +0100 |
commit | 424c4f60ed6190e2ea0e72e0873bf3ebcbbf5448 (patch) | |
tree | 52fbd60de3d3b1e99208b3018cf79ee8a230a878 /sysdeps/ieee754/dbl-64/upow.h | |
parent | dab9c3488e86d5304f3e4b778933760374494a82 (diff) | |
download | glibc-424c4f60ed6190e2ea0e72e0873bf3ebcbbf5448.tar.gz glibc-424c4f60ed6190e2ea0e72e0873bf3ebcbbf5448.tar.xz glibc-424c4f60ed6190e2ea0e72e0873bf3ebcbbf5448.zip |
Add new pow implementation
The algorithm is exp(y * log(x)), where log(x) is computed with about 1.3*2^-68 relative error (1.5*2^-68 without fma), returning the result in two doubles, and the exp part uses the same algorithm (and lookup tables) as exp, but takes the input as two doubles and a sign (to handle negative bases with odd integer exponent). The __exp1 internal symbol is no longer necessary. There is separate code path when fma is not available but the worst case error is about 0.54 ULP in both cases. The lookup table and consts for log are 4168 bytes. The .rodata+.text is decreased by 37908 bytes on aarch64. The non-nearest rounding error is less than 1 ULP. Improvements on Cortex-A72 compared to current glibc master: pow thruput: 2.40x in [0.01 11.1]x[0.01 11.1] pow latency: 1.84x in [0.01 11.1]x[0.01 11.1] Tested on aarch64-linux-gnu (defined __FP_FAST_FMA, TOINT_INTRINSICS) and arm-linux-gnueabihf (!defined __FP_FAST_FMA, !TOINT_INTRINSICS) and x86_64-linux-gnu (!defined __FP_FAST_FMA, !TOINT_INTRINSICS) and powerpc64le-linux-gnu (defined __FP_FAST_FMA, !TOINT_INTRINSICS) targets. * NEWS: Mention pow improvements. * math/Makefile (type-double-routines): Add e_pow_log_data. * sysdeps/generic/math_private.h (__exp1): Remove. * sysdeps/i386/fpu/e_pow_log_data.c: New file. * sysdeps/ia64/fpu/e_pow_log_data.c: New file. * sysdeps/ieee754/dbl-64/Makefile (CFLAGS-e_pow.c): Allow fma contraction. * sysdeps/ieee754/dbl-64/e_exp.c (__exp1): Remove. (exp_inline): Remove. (__ieee754_exp): Only single double input is handled. * sysdeps/ieee754/dbl-64/e_pow.c: Rewrite. * sysdeps/ieee754/dbl-64/e_pow_log_data.c: New file. * sysdeps/ieee754/dbl-64/math_config.h (issignaling_inline): Define. (__pow_log_data): Define. * sysdeps/ieee754/dbl-64/upow.h: Remove. * sysdeps/ieee754/dbl-64/upow.tbl: Remove. * sysdeps/m68k/m680x0/fpu/e_pow_log_data.c: New file. * sysdeps/x86_64/fpu/multiarch/Makefile (CFLAGS-e_pow-fma.c): Allow fma contraction. (CFLAGS-e_pow-fma4.c): Likewise.
Diffstat (limited to 'sysdeps/ieee754/dbl-64/upow.h')
-rw-r--r-- | sysdeps/ieee754/dbl-64/upow.h | 76 |
1 files changed, 0 insertions, 76 deletions
diff --git a/sysdeps/ieee754/dbl-64/upow.h b/sysdeps/ieee754/dbl-64/upow.h deleted file mode 100644 index c1b9d8e3cc..0000000000 --- a/sysdeps/ieee754/dbl-64/upow.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * IBM Accurate Mathematical Library - * Written by International Business Machines Corp. - * Copyright (C) 2001-2018 Free Software Foundation, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation; either version 2.1 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. - */ - -/******************************************************************/ -/* */ -/* MODULE_NAME:upow.h */ -/* */ -/* common data and variables prototype and definition */ -/******************************************************************/ - -#ifndef UPOW_H -#define UPOW_H - -#include "mydefs.h" - -#ifdef BIG_ENDI - const static mynumber -/**/ nZERO = {{0x80000000, 0}}, /* -0.0 */ -/**/ INF = {{0x7ff00000, 0x00000000}}, /* INF */ -/**/ nINF = {{0xfff00000, 0x00000000}}, /* -INF */ -/**/ ln2a = {{0x3fe62e42, 0xfefa3800}}, /* ln(2) 43 bits */ -/**/ ln2b = {{0x3d2ef357, 0x93c76730}}, /* ln(2)-ln2a */ -/**/ bigu = {{0x4297ffff, 0xfffffd2c}}, /* 1.5*2**42 -724*2**-10 */ -/**/ bigv = {{0x4207ffff, 0xfff8016a}}, /* 1.5*2**33-1+362*2**-19 */ -/**/ t52 = {{0x43300000, 0x00000000}}, /* 2**52 */ -/**/ two52e = {{0x43300000, 0x000003ff}}; /* 2**52' */ - -#else -#ifdef LITTLE_ENDI - const static mynumber -/**/ nZERO = {{0, 0x80000000}}, /* -0.0 */ -/**/ INF = {{0x00000000, 0x7ff00000}}, /* INF */ -/**/ nINF = {{0x00000000, 0xfff00000}}, /* -INF */ -/**/ ln2a = {{0xfefa3800, 0x3fe62e42}}, /* ln(2) 43 bits */ -/**/ ln2b = {{0x93c76730, 0x3d2ef357}}, /* ln(2)-ln2a */ -/**/ bigu = {{0xfffffd2c, 0x4297ffff}}, /* 1.5*2**42 -724*2**-10 */ -/**/ bigv = {{0xfff8016a, 0x4207ffff}}, /* 1.5*2**33-1+362*2**-19 */ -/**/ t52 = {{0x00000000, 0x43300000}}, /* 2**52 */ -/**/ two52e = {{0x000003ff, 0x43300000}}; /* 2**52' */ - -#endif -#endif - -const static double p2=-0.5, p3 = 3.3333333333333333333e-1, p4 = -0.25, - q2 = -0.5, q3 = 3.3333333333331404e-01, q4 = -2.4999999999996436e-01, - q5 = 2.0000010500004459e-01, q6 = -1.6666678916688004e-01, - r3 = 3.33333333333333333372884096563030E-01, - r4 = -2.50000000000000000213574153875908E-01, - r5 = 1.99999999999683593814072199830603E-01, - r6 = -1.66666666666065494878165510225378E-01, - r7 = 1.42857517857114380606360005067609E-01, - r8 = -1.25000449999974370683775964001702E-01, - s3 = 0.333251953125000000e0, - ss3 = 8.138020833333333333e-05, - s4 = -2.500000000000000000e-01, - s5 = 1.999999999999960937e-01, - s6 = -1.666666666666592447e-01, - s7 = 1.428571845238194705e-01, - s8 = -1.250000500000149097e-01; -#endif |