powerpc: Refactor powerpc32 lround/lroundf/llround/llroundf

This patches consolidates all the powerpc llround{f} implementations on the generic sysdeps/powerpc/powerpc32/fpu/s_llround{f}. Checked on powerpc-linux-gnu (built without --with-cpu, with --with-cpu=power4 and with --with-cpu=power5+ and --disable-multi-arch), powerpc64-linux-gnu (built without --with-cp and with --with-cpu=power5+ and --disable-multi-arch). * sysdeps/powerpc/powerpc32/fpu/Makefile [$(subdir) == math] (CFLAGS-s_lround.c): New rule. * sysdeps/powerpc/powerpc32/fpu/s_llround.c (__llround): Add power5+ and fctidz optimization. * sysdeps/powerpc/powerpc32/fpu/s_lround.S: Remove file. * sysdeps/powerpc/powerpc32/fpu/s_lround.c: New file. * sysdeps/powerpc/powerpc32/power4/fpu/multiarch/Makefile (CFLAGS-s_llround-power6.c, CFLAGS-s_llround-power5+.c, CFLAGS-s_llround-ppc32.c, CFLAGS-s_lround-ppc32.c, CFLAGS-s_lround-power5+.c): New rule. * sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llround-power5+.c: New file. * sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llround-power6.c: Likewise. * sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llround-ppc32.c: Likewise. * sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lround-power5+.c: Likewise. * sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lround-ppc32.c: Likewise. * sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llround-power5+.S: Remove file. * sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llround-power6.S: Likewise. * sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_llround-ppc32.S: Likewise. * sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lround-power5+.S: Likewise. * sysdeps/powerpc/powerpc32/power4/fpu/multiarch/s_lround-ppc32.S: Likewise. * sysdeps/powerpc/powerpc32/power4/fpu/s_llround.S: Likewise. * sysdeps/powerpc/powerpc32/power4/fpu/s_llroundf.S: Likewise. * sysdeps/powerpc/powerpc32/power5+/fpu/s_llround.S: Likewise. * sysdeps/powerpc/powerpc32/power5+/fpu/s_llroundf.S: Likewise. * sysdeps/powerpc/powerpc32/power5+/fpu/s_lround.S: Likewise. * sysdeps/powerpc/powerpc32/power6/fpu/s_llround.S: Likewise. * sysdeps/powerpc/powerpc32/power6/fpu/s_llroundf.S: Likewise. Reviewed-by: Gabriel F. T. Gomes <gabriel@inconstante.eti.br>
author: Adhemerval Zanella <adhemerval.zanella@linaro.org> 2019-03-22 16:47:34 +0000
committer: Adhemerval Zanella <adhemerval.zanella@linaro.org> 2019-06-26 14:32:45 -0300
commit: 9d5d214e86db30633e905907f7b3cf56b8fee16e (patch)
tree: a92139b5d32f8c23de24da0bbeb2d58541ff1b2e /sysdeps/powerpc/powerpc32/fpu
parent: a63b96fbddbf97feaa068a9efed3b5623a1a1e78 (diff)
download: glibc-9d5d214e86db30633e905907f7b3cf56b8fee16e.tar.gz
glibc-9d5d214e86db30633e905907f7b3cf56b8fee16e.tar.xz
glibc-9d5d214e86db30633e905907f7b3cf56b8fee16e.zip
4 files changed, 127 insertions, 125 deletions
diff --git a/sysdeps/powerpc/powerpc32/fpu/Makefile b/sysdeps/powerpc/powerpc32/fpu/Makefile
index c79b192f60..b8b6bb0fa2 100644
--- a/sysdeps/powerpc/powerpc32/fpu/Makefile
+++ b/sysdeps/powerpc/powerpc32/fpu/Makefile
@@ -2,6 +2,7 @@ ifeq ($(subdir),math)
 # lrint is aliased to lrintf, so suppress compiler builtins to
 # avoid mismatched signatures.
 CFLAGS-s_lrint.c += -fno-builtin-lrintf
+CFLAGS-s_lround.c += -fno-builtin-lroundf
 endif
 
 ifeq ($(subdir),misc)
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_llround.c b/sysdeps/powerpc/powerpc32/fpu/s_llround.c
index 14d10e1e63..989d9b5763 100644
--- a/sysdeps/powerpc/powerpc32/fpu/s_llround.c
+++ b/sysdeps/powerpc/powerpc32/fpu/s_llround.c
@@ -18,10 +18,10 @@
 
 #include <limits.h>
 #include <math.h>
-#include <math_ldbl_opt.h>
 #include <math_private.h>
 #include <stdint.h>
 #include <libm-alias-double.h>
+#include <math-barriers.h>
 
 /* Round to the nearest integer, with values exactly on a 0.5 boundary
    rounded away from zero, regardless of the current rounding mode.
@@ -31,9 +31,53 @@
 long long int
 __llround (double x)
 {
+#ifdef _ARCH_PWR5X
+  x = round (x);
+  /* The barrier prevents compiler from optimizing it to llround when
+     compiled with -fno-math-errno */
+  math_opt_barrier (x);
+  return x;
+#else
   long long xr;
   if (HAVE_PPC_FCTIDZ)
-    xr = (long long) x;
+    {
+       /* IEEE 1003.1 lround function.  IEEE specifies "round to the nearest
+	  integer value, rounding halfway cases away from zero, regardless of
+	  the current rounding mode."  However PowerPC Architecture defines
+	  "round to Nearest" as "Choose the best approximation. In case of a
+	  tie, choose the one that is even (least significant bit o).".
+	  So we can't use the PowerPC "round to Nearest" mode. Instead we set
+	  "round toward Zero" mode and round by adding +-0.5 before rounding
+	  to the integer value.
+
+	  It is necessary to detect when x is (+-)0x1.fffffffffffffp-2
+	  because adding +-0.5 in this case will cause an erroneous shift,
+	  carry and round.  We simply return 0 if 0.5 > x > -0.5.  Likewise
+	  if x is and odd number between +-(2^52 and 2^53-1) a shift and
+	  carry will erroneously round if biased with +-0.5.  Therefore if x
+	  is greater/less than +-2^52 we don't need to bias the number with
+	  +-0.5.  */
+      double ax = fabs (x);
+
+      if (ax < 0.5)
+	return 0;
+
+      if (ax < 0x1p+52)
+	{
+	  /* Test whether an integer to avoid spurious "inexact".  */
+	  double t = ax + 0x1p+52;
+	  t = t - 0x1p+52;
+	  if (ax != t)
+	    {
+	      ax = ax + 0.5;
+	      if (x < 0.0)
+		ax = -fabs (ax);
+	      x = ax;
+	    }
+        }
+
+      return x;
+    }
   else
     {
       /* Avoid incorrect exceptions from libgcc conversions (as of GCC
@@ -80,5 +124,8 @@ __llround (double x)
 	xr -= (long long) ((unsigned long long) xr - 1) < 0;
     }
   return xr;
+#endif
 }
+#ifndef __llround
 libm_alias_double (__llround, llround)
+#endif
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_lround.S b/sysdeps/powerpc/powerpc32/fpu/s_lround.S
deleted file mode 100644
index 2d9540ceed..0000000000
--- a/sysdeps/powerpc/powerpc32/fpu/s_lround.S
+++ /dev/null
@@ -1,123 +0,0 @@
-/* lround function.  PowerPC32 version.
-   Copyright (C) 2004-2019 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-#include <math_ldbl_opt.h>
-#include <libm-alias-float.h>
-#include <libm-alias-double.h>
-
-	.section	.rodata.cst4,"aM",@progbits,4
-	.align	2
-.LC0:	/* 0.5 */
-	.long 0x3f000000
-.LC1:	/* 2^52.  */
-	.long 0x59800000
-	.section	.rodata.cst8,"aM",@progbits,8
-	.align	3
-.LC2:	/* 0x7fffffff.8p0.  */
-	.long 0x41dfffff
-	.long 0xffe00000
-.LC3:	/* -0x80000000.8p0.  */
-	.long 0xc1e00000
-	.long 0x00100000
-	.section	".text"
-
-/* long [r3] lround (float x [fp1])
-   IEEE 1003.1 lround function.  IEEE specifies "round to the nearest
-   integer value, rounding halfway cases away from zero, regardless of
-   the current rounding mode."  However PowerPC Architecture defines
-   "round to Nearest" as "Choose the best approximation. In case of a
-   tie, choose the one that is even (least significant bit o).".
-   So we can't use the PowerPC "round to Nearest" mode. Instead we set
-   "round toward Zero" mode and round by adding +-0.5 before rounding
-   to the integer value.  It is necessary to detect when x is
-   (+-)0x1.fffffffffffffp-2 because adding +-0.5 in this case will
-   cause an erroneous shift, carry and round.  We simply return 0 if
-   0.5 > x > -0.5.  */
-
-ENTRY (__lround)
-	stwu    r1,-16(r1)
-	cfi_adjust_cfa_offset (16)
-#ifdef SHARED
-	mflr	r11
-	cfi_register(lr,r11)
-	SETUP_GOT_ACCESS(r9,got_label)
-	addis	r10,r9,.LC0-got_label@ha
-	lfs	fp10,.LC0-got_label@l(r10)
-	addis	r10,r9,.LC1-got_label@ha
-	lfs	fp11,.LC1-got_label@l(r10)
-	addis	r10,r9,.LC2-got_label@ha
-	lfd	fp9,.LC2-got_label@l(r10)
-	addis	r10,r9,.LC3-got_label@ha
-	lfd	fp8,.LC3-got_label@l(r10)
-	mtlr	r11
-	cfi_same_value (lr)
-#else
-	lis	r9,.LC0@ha
-	lfs	fp10,.LC0@l(r9)
-	lis	r9,.LC1@ha
-	lfs	fp11,.LC1@l(r9)
-	lis	r9,.LC2@ha
-	lfd	fp9,.LC2@l(r9)
-	lis	r9,.LC3@ha
-	lfd	fp8,.LC3@l(r9)
-#endif
-	fabs	fp2, fp1	/* Get the absolute value of x.  */
-	fsub	fp12,fp10,fp10	/* Compute 0.0.  */
-	fcmpu	cr6, fp2, fp10	/* if |x| < 0.5  */
-	fcmpu	cr5, fp1, fp9	/* if x >= 0x7fffffff.8p0  */
-	fcmpu	cr1, fp1, fp8	/* if x <= -0x80000000.8p0  */
-	fcmpu	cr7, fp1, fp12	/* x is negative? x < 0.0  */
-	blt-	cr6,.Lretzero
-	bge-	cr5,.Loflow
-	ble-	cr1,.Loflow
-	/* Test whether an integer to avoid spurious "inexact".  */
-	fadd	fp3,fp2,fp11
-	fsub	fp3,fp3,fp11
-	fcmpu	cr5, fp2, fp3
-	beq	cr5,.Lnobias
-	fadd	fp3,fp2,fp10	/* |x|+=0.5 bias to prepare to round.  */
-	bge	cr7,.Lconvert	/* x is positive so don't negate x.  */
-	fnabs	fp3,fp3		/* -(|x|+=0.5)  */
-.Lconvert:
-	fctiwz	fp4,fp3		/* Convert to Integer word lround toward 0.  */
-	stfd	fp4,8(r1)
-	nop	/* Ensure the following load is in a different dispatch  */
-	nop	/* group to avoid pipe stall on POWER4&5.  */
-	nop
-	lwz	r3,8+LOWORD(r1)	/* Load return as integer.  */
-.Lout:
-	addi	r1,r1,16
-	blr
-.Lretzero:			/* when 0.5 > x > -0.5  */
-	li	r3,0		/* return 0.  */
-	b	.Lout
-.Lnobias:
-	fmr	fp3,fp1
-	b	.Lconvert
-.Loflow:
-	fmr	fp3,fp11
-	bge	cr7,.Lconvert
-	fnabs	fp3,fp3
-	b	.Lconvert
-	END (__lround)
-
-libm_alias_double (__lround, lround)
-
-strong_alias (__lround, __lroundf)
-libm_alias_float (__lround, lround)
diff --git a/sysdeps/powerpc/powerpc32/fpu/s_lround.c b/sysdeps/powerpc/powerpc32/fpu/s_lround.c
new file mode 100644
index 0000000000..28c06d25d2
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/fpu/s_lround.c
@@ -0,0 +1,77 @@
+/* lround function.  PowerPC32 version.
+   Copyright (C) 2004-2019 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#define lroundf __redirect_lroundf
+#define __lroundf __redirect___lroundf
+#include <math.h>
+#undef lroundf
+#undef __lroundf
+#include <libm-alias-float.h>
+#include <libm-alias-double.h>
+
+long int
+__lround (double x)
+{
+#ifdef _ARCH_PWR5X
+  x = round (x);
+#else
+  /* Ieee 1003.1 lround function.  ieee specifies "round to the nearest
+     integer value, rounding halfway cases away from zero, regardless of
+     the current rounding mode."  however powerpc architecture defines
+     "round to nearest" as "choose the best approximation. in case of a
+     tie, choose the one that is even (least significant bit o).".
+     so we can't use the powerpc "round to nearest" mode. instead we set
+     "round toward zero" mode and round by adding +-0.5 before rounding
+     to the integer value.  it is necessary to detect when x is
+     (+-)0x1.fffffffffffffp-2 because adding +-0.5 in this case will
+     cause an erroneous shift, carry and round.  we simply return 0 if
+     0.5 > x > -0.5.  */
+
+  double ax = fabs (x);
+
+  if (ax < 0.5)
+    return 0;
+
+  if (x >= 0x7fffffff.8p0 || x <= -0x80000000.8p0)
+    x = (x < 0.0) ? -0x1p+52 : 0x1p+52;
+  else
+    {
+      /* Test whether an integer to avoid spurious "inexact".  */
+      double t = ax + 0x1p+52;
+      t = t - 0x1p+52;
+      if (ax != t)
+        {
+	  ax = ax + 0.5;
+	  if (x < 0.0)
+	    ax = -fabs (ax);
+	  x = ax;
+        }
+    }
+#endif
+  /* Force evaluation of values larger than long int, so invalid
+     exceptions are raise.  */
+  long long int ret;
+  asm ("fctiwz %0, %1" : "=d" (ret) : "d" (x));
+  return ret;
+}
+#ifndef __lround
+libm_alias_double (__lround, lround)
+
+strong_alias (__lround, __lroundf)
+libm_alias_float (__lround, lround)
+#endif
author	Adhemerval Zanella <adhemerval.zanella@linaro.org>	2019-03-22 16:47:34 +0000
committer	Adhemerval Zanella <adhemerval.zanella@linaro.org>	2019-06-26 14:32:45 -0300
commit	9d5d214e86db30633e905907f7b3cf56b8fee16e (patch)
tree	a92139b5d32f8c23de24da0bbeb2d58541ff1b2e /sysdeps/powerpc/powerpc32/fpu
parent	a63b96fbddbf97feaa068a9efed3b5623a1a1e78 (diff)
download	glibc-9d5d214e86db30633e905907f7b3cf56b8fee16e.tar.gz glibc-9d5d214e86db30633e905907f7b3cf56b8fee16e.tar.xz glibc-9d5d214e86db30633e905907f7b3cf56b8fee16e.zip