/* Internal libc stuff for floating point environment routines. Copyright (C) 1997-2024 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, see . */ #ifndef _FENV_LIBC_H #define _FENV_LIBC_H 1 #include #include #include extern const fenv_t *__fe_nomask_env_priv (void); extern const fenv_t *__fe_mask_env (void) attribute_hidden; /* If the old env had any enabled exceptions and the new env has no enabled exceptions, then mask SIGFPE in the MSR FE0/FE1 bits. This may allow the FPU to run faster because it always takes the default action and can not generate SIGFPE. */ #define __TEST_AND_ENTER_NON_STOP(old, new) \ do { \ if (((old) & FPSCR_ENABLES_MASK) != 0 && ((new) & FPSCR_ENABLES_MASK) == 0) \ (void) __fe_mask_env (); \ } while (0) /* If the old env has no enabled exceptions and the new env has any enabled exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits. This will put the hardware into "precise mode" and may cause the FPU to run slower on some hardware. */ #define __TEST_AND_EXIT_NON_STOP(old, new) \ do { \ if (((old) & FPSCR_ENABLES_MASK) == 0 && ((new) & FPSCR_ENABLES_MASK) != 0) \ (void) __fe_nomask_env_priv (); \ } while (0) /* The sticky bits in the FPSCR indicating exceptions have occurred. */ #define FPSCR_STICKY_BITS ((FE_ALL_EXCEPT | FE_ALL_INVALID) & ~FE_INVALID) /* Equivalent to fegetenv, but returns a fenv_t instead of taking a pointer. */ #define fegetenv_register() __builtin_mffs() /* Equivalent to fegetenv_register, but only returns bits for status, exception enables, and mode. Nicely, it turns out that the 'mffsl' instruction will decode to 'mffs' on architectures older than "power9" because the additional bits set for 'mffsl' are "don't care" for 'mffs'. 'mffs' is a superset of 'mffsl'. */ #define fegetenv_control() \ ({register double __fr; \ __asm__ __volatile__ ( \ ".machine push; .machine \"power9\"; mffsl %0; .machine pop" \ : "=f" (__fr)); \ __fr; \ }) /* Starting with GCC 14 __builtin_set_fpscr_rn can be used to return the FPSCR fields as a double. This support is available on Power9 when the __SET_FPSCR_RN_RETURNS_FPSCR__ macro is defined. To retain backward compatibility with older GCC, we still retain the old inline assembly implementation.*/ #ifdef __SET_FPSCR_RN_RETURNS_FPSCR__ #define __fe_mffscrn(rn) __builtin_set_fpscr_rn (rn) #else #define __fe_mffscrn(rn) \ ({register fenv_union_t __fr; \ if (__builtin_constant_p (rn)) \ __asm__ __volatile__ ( \ ".machine push; .machine \"power9\"; mffscrni %0,%1; .machine pop" \ : "=f" (__fr.fenv) : "n" (rn)); \ else \ { \ __fr.l = (rn); \ __asm__ __volatile__ ( \ ".machine push; .machine \"power9\"; mffscrn %0,%1; .machine pop" \ : "=f" (__fr.fenv) : "f" (__fr.fenv)); \ } \ __fr.fenv; \ }) #endif /* Like fegetenv_control, but also sets the rounding mode. */ #ifdef _ARCH_PWR9 #define fegetenv_and_set_rn(rn) __fe_mffscrn (rn) #else /* 'mffscrn' will decode to 'mffs' on ARCH < 3_00, which is still necessary but not sufficient, because it does not set the rounding mode. Explicitly set the rounding mode when 'mffscrn' actually doesn't. */ #define fegetenv_and_set_rn(rn) \ ({register fenv_union_t __fr; \ __fr.fenv = __fe_mffscrn (rn); \ if (__glibc_unlikely (!(GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00))) \ __fesetround_inline (rn); \ __fr.fenv; \ }) #endif /* Equivalent to fesetenv, but takes a fenv_t instead of a pointer. */ #define fesetenv_register(env) \ do { \ double d = (env); \ if(GLRO(dl_hwcap) & PPC_FEATURE_HAS_DFP) \ asm volatile (".machine push; " \ ".machine \"power6\"; " \ "mtfsf 0xff,%0,1,0; " \ ".machine pop" : : "f" (d)); \ else \ __builtin_mtfsf (0xff, d); \ } while(0) /* Set the last 2 nibbles of the FPSCR, which contain the exception enables and the rounding mode. 'fegetenv_control' retrieves these bits by reading the FPSCR. */ #define fesetenv_control(env) __builtin_mtfsf (0b00000011, (env)); /* This very handy macro: - Sets the rounding mode to 'round to nearest'; - Sets the processor into IEEE mode; and - Prevents exceptions from being raised for inexact results. These things happen to be exactly what you need for typical elementary functions. */ #define relax_fenv_state() \ do { \ if (GLRO(dl_hwcap) & PPC_FEATURE_HAS_DFP) \ asm volatile (".machine push; .machine \"power6\"; " \ "mtfsfi 7,0,1; .machine pop"); \ asm volatile ("mtfsfi 7,0"); \ } while(0) /* Set/clear a particular FPSCR bit (for instance, reset_fpscr_bit(FPSCR_VE); prevents INVALID exceptions from being raised). */ #define set_fpscr_bit(x) asm volatile ("mtfsb1 %0" : : "n"(x)) #define reset_fpscr_bit(x) asm volatile ("mtfsb0 %0" : : "n"(x)) typedef union { fenv_t fenv; unsigned long long l; } fenv_union_t; static inline int __fesetround_inline (int round) { #ifdef _ARCH_PWR9 __fe_mffscrn (round); #else if (__glibc_likely (GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00)) __fe_mffscrn (round); else if ((unsigned int) round < 2) { asm volatile ("mtfsb0 30"); if ((unsigned int) round == 0) asm volatile ("mtfsb0 31"); else asm volatile ("mtfsb1 31"); } else { asm volatile ("mtfsb1 30"); if ((unsigned int) round == 2) asm volatile ("mtfsb0 31"); else asm volatile ("mtfsb1 31"); } #endif return 0; } /* Same as __fesetround_inline, however without runtime check to use DFP mtfsfi syntax (as relax_fenv_state) or if round value is valid. */ static inline void __fesetround_inline_nocheck (const int round) { #ifdef _ARCH_PWR9 __fe_mffscrn (round); #else if (__glibc_likely (GLRO(dl_hwcap2) & PPC_FEATURE2_ARCH_3_00)) __fe_mffscrn (round); else asm volatile ("mtfsfi 7,%0" : : "n" (round)); #endif } #define FPSCR_MASK(bit) (1 << (31 - (bit))) /* Definitions of all the FPSCR bit numbers */ enum { FPSCR_FX = 0, /* exception summary */ #define FPSCR_FX_MASK (FPSCR_MASK (FPSCR_FX)) FPSCR_FEX, /* enabled exception summary */ #define FPSCR_FEX_MASK (FPSCR_MASK FPSCR_FEX)) FPSCR_VX, /* invalid operation summary */ #define FPSCR_VX_MASK (FPSCR_MASK (FPSCR_VX)) FPSCR_OX, /* overflow */ #define FPSCR_OX_MASK (FPSCR_MASK (FPSCR_OX)) FPSCR_UX, /* underflow */ #define FPSCR_UX_MASK (FPSCR_MASK (FPSCR_UX)) FPSCR_ZX, /* zero divide */ #define FPSCR_ZX_MASK (FPSCR_MASK (FPSCR_ZX)) FPSCR_XX, /* inexact */ #define FPSCR_XX_MASK (FPSCR_MASK (FPSCR_XX)) FPSCR_VXSNAN, /* invalid operation for sNaN */ #define FPSCR_VXSNAN_MASK (FPSCR_MASK (FPSCR_VXSNAN)) FPSCR_VXISI, /* invalid operation for Inf-Inf */ #define FPSCR_VXISI_MASK (FPSCR_MASK (FPSCR_VXISI)) FPSCR_VXIDI, /* invalid operation for Inf/Inf */ #define FPSCR_VXIDI_MASK (FPSCR_MASK (FPSCR_VXIDI)) FPSCR_VXZDZ, /* invalid operation for 0/0 */ #define FPSCR_VXZDZ_MASK (FPSCR_MASK (FPSCR_VXZDZ)) FPSCR_VXIMZ, /* invalid operation for Inf*0 */ #define FPSCR_VXIMZ_MASK (FPSCR_MASK (FPSCR_VXIMZ)) FPSCR_VXVC, /* invalid operation for invalid compare */ #define FPSCR_VXVC_MASK (FPSCR_MASK (FPSCR_VXVC)) FPSCR_FR, /* fraction rounded [fraction was incremented by round] */ #define FPSCR_FR_MASK (FPSCR_MASK (FPSCR_FR)) FPSCR_FI, /* fraction inexact */ #define FPSCR_FI_MASK (FPSCR_MASK (FPSCR_FI)) FPSCR_FPRF_C, /* result class descriptor */ #define FPSCR_FPRF_C_MASK (FPSCR_MASK (FPSCR_FPRF_C)) FPSCR_FPRF_FL, /* result less than (usually, less than 0) */ #define FPSCR_FPRF_FL_MASK (FPSCR_MASK (FPSCR_FPRF_FL)) FPSCR_FPRF_FG, /* result greater than */ #define FPSCR_FPRF_FG_MASK (FPSCR_MASK (FPSCR_FPRF_FG)) FPSCR_FPRF_FE, /* result equal to */ #define FPSCR_FPRF_FE_MASK (FPSCR_MASK (FPSCR_FPRF_FE)) FPSCR_FPRF_FU, /* result unordered */ #define FPSCR_FPRF_FU_MASK (FPSCR_MASK (FPSCR_FPRF_FU)) FPSCR_20, /* reserved */ FPSCR_VXSOFT, /* invalid operation set by software */ #define FPSCR_VXSOFT_MASK (FPSCR_MASK (FPSCR_VXSOFT)) FPSCR_VXSQRT, /* invalid operation for square root */ #define FPSCR_VXSQRT_MASK (FPSCR_MASK (FPSCR_VXSQRT)) FPSCR_VXCVI, /* invalid operation for invalid integer convert */ #define FPSCR_VXCVI_MASK (FPSCR_MASK (FPSCR_VXCVI)) FPSCR_VE, /* invalid operation exception enable */ #define FPSCR_VE_MASK (FPSCR_MASK (FPSCR_VE)) FPSCR_OE, /* overflow exception enable */ #define FPSCR_OE_MASK (FPSCR_MASK (FPSCR_OE)) FPSCR_UE, /* underflow exception enable */ #define FPSCR_UE_MASK (FPSCR_MASK (FPSCR_UE)) FPSCR_ZE, /* zero divide exception enable */ #define FPSCR_ZE_MASK (FPSCR_MASK (FPSCR_ZE)) FPSCR_XE, /* inexact exception enable */ #define FPSCR_XE_MASK (FPSCR_MASK (FPSCR_XE)) #ifdef _ARCH_PWR6 FPSCR_29, /* Reserved in ISA 2.05 */ #define FPSCR_NI_MASK (FPSCR_MASK (FPSCR_29)) #else FPSCR_NI, /* non-IEEE mode (typically, no denormalised numbers) */ #define FPSCR_NI_MASK (FPSCR_MASK (FPSCR_NI)) #endif /* _ARCH_PWR6 */ /* the remaining two least-significant bits keep the rounding mode */ FPSCR_RN_hi, #define FPSCR_RN_hi_MASK (FPSCR_MASK (FPSCR_RN_hi)) FPSCR_RN_lo #define FPSCR_RN_lo_MASK (FPSCR_MASK (FPSCR_RN_lo)) }; #define FPSCR_RN_MASK (FPSCR_RN_hi_MASK|FPSCR_RN_lo_MASK) #define FPSCR_ENABLES_MASK \ (FPSCR_VE_MASK|FPSCR_OE_MASK|FPSCR_UE_MASK|FPSCR_ZE_MASK|FPSCR_XE_MASK) #define FPSCR_BASIC_EXCEPTIONS_MASK \ (FPSCR_VX_MASK|FPSCR_OX_MASK|FPSCR_UX_MASK|FPSCR_ZX_MASK|FPSCR_XX_MASK) #define FPSCR_EXCEPTIONS_MASK (FPSCR_BASIC_EXCEPTIONS_MASK| \ FPSCR_VXSNAN_MASK|FPSCR_VXISI_MASK|FPSCR_VXIDI_MASK|FPSCR_VXZDZ_MASK| \ FPSCR_VXIMZ_MASK|FPSCR_VXVC_MASK|FPSCR_VXSOFT_MASK|FPSCR_VXSQRT_MASK| \ FPSCR_VXCVI_MASK) #define FPSCR_FPRF_MASK \ (FPSCR_FPRF_C_MASK|FPSCR_FPRF_FL_MASK|FPSCR_FPRF_FG_MASK| \ FPSCR_FPRF_FE_MASK|FPSCR_FPRF_FU_MASK) #define FPSCR_CONTROL_MASK (FPSCR_ENABLES_MASK|FPSCR_NI_MASK|FPSCR_RN_MASK) #define FPSCR_STATUS_MASK (FPSCR_FR_MASK|FPSCR_FI_MASK|FPSCR_FPRF_MASK) /* The bits in the FENV(1) ABI for exceptions correspond one-to-one with bits in the FPSCR, albeit shifted to different but corresponding locations. Similarly, the exception indicator bits in the FPSCR correspond one-to-one with the exception enable bits. It is thus possible to map the FENV(1) exceptions directly to the FPSCR enables with a simple mask and shift, and vice versa. */ #define FPSCR_EXCEPT_TO_ENABLE_SHIFT 22 static inline int fenv_reg_to_exceptions (unsigned long long l) { return (((int)l) & FPSCR_ENABLES_MASK) << FPSCR_EXCEPT_TO_ENABLE_SHIFT; } static inline unsigned long long fenv_exceptions_to_reg (int excepts) { return (unsigned long long) (excepts & FE_ALL_EXCEPT) >> FPSCR_EXCEPT_TO_ENABLE_SHIFT; } #ifdef _ARCH_PWR6 /* Not supported in ISA 2.05. Provided for source compat only. */ # define FPSCR_NI 29 #endif /* _ARCH_PWR6 */ /* This operation (i) sets the appropriate FPSCR bits for its parameter, (ii) converts sNaN to the corresponding qNaN, and (iii) otherwise passes its parameter through unchanged (in particular, -0 and +0 stay as they were). The `obvious' way to do this is optimised out by gcc. */ #define f_wash(x) \ ({ double d; asm volatile ("fmul %0,%1,%2" \ : "=f"(d) \ : "f" (x), "f"((float)1.0)); d; }) #define f_washf(x) \ ({ float f; asm volatile ("fmuls %0,%1,%2" \ : "=f"(f) \ : "f" (x), "f"((float)1.0)); f; }) #endif /* fenv_libc.h */